stream.rb in google-cloud-speech-0.24.0

- old
+ new

@@ -11,11 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
 
-require "google/cloud/speech/v1beta1"
+require "google/cloud/speech/v1"
 require "google/cloud/speech/result"
 require "monitor"
 require "forwardable"
 
 module Google
@@ -29,27 +29,28 @@
       # @example
       #   require "google/cloud/speech"
       #
       #   speech = Google::Cloud::Speech.new
       #
-      #   stream = speech.stream encoding: :raw, sample_rate: 16000
+      #   stream = speech.stream encoding: :raw,
+      #                          language: "en-US",
+      #                          sample_rate: 16000
       #
-      #   # register callback for when a result is returned
-      #   stream.on_result do |results|
-      #     result = results.first
-      #     puts result.transcript # "how old is the Brooklyn Bridge"
-      #     puts result.confidence # 0.9826789498329163
-      #   end
-      #
       #   # Stream 5 seconds of audio from the microphone
       #   # Actual implementation of microphone input varies by platform
       #   5.times do
       #     stream.send MicrophoneInput.read(32000)
       #   end
       #
       #   stream.stop
+      #   stream.wait_until_complete!
       #
+      #   results = stream.results
+      #   result = results.first
+      #   result.transcript #=> "how old is the Brooklyn Bridge"
+      #   result.confidence #=> 0.9826789498329163
+      #
       class Stream
         include MonitorMixin
         ##
         # @private Creates a new Speech Stream instance.
         # This must always be private, since it may change as the implementation
@@ -93,32 +94,33 @@
         #
         #   speech = Google::Cloud::Speech.new
         #
         #   audio = speech.audio "path/to/audio.raw"
         #
-        #   stream = speech.stream encoding: :raw, sample_rate: 16000
+        #   stream = speech.stream encoding: :raw,
+        #                          language: "en-US",
+        #                          sample_rate: 16000
         #
-        #   # register callback for when a result is returned
-        #   stream.on_result do |results|
-        #     result = results.first
-        #     puts result.transcript # "how old is the Brooklyn Bridge"
-        #     puts result.confidence # 0.9826789498329163
-        #   end
-        #
         #   # Stream 5 seconds of audio from the microphone
         #   # Actual implementation of microphone input varies by platform
         #   5.times do
         #     stream.send MicrophoneInput.read(32000)
         #   end
         #
         #   stream.stop
+        #   stream.wait_until_complete!
         #
+        #   results = stream.results
+        #   result = results.first
+        #   result.transcript #=> "how old is the Brooklyn Bridge"
+        #   result.confidence #=> 0.9826789498329163
+        #
         def send bytes
           start # lazily call start if the stream wasn't started yet
           # TODO: do not send if stopped?
           synchronize do
-            req = V1beta1::StreamingRecognizeRequest.new(
+            req = V1::StreamingRecognizeRequest.new(
               audio_content: bytes.encode("ASCII-8BIT"))
             @request_queue.push req
           end
         end
 
@@ -151,11 +153,13 @@
         # @example
         #   require "google/cloud/speech"
         #
         #   speech = Google::Cloud::Speech.new
         #
-        #   stream = speech.stream encoding: :raw, sample_rate: 16000
+        #   stream = speech.stream encoding: :raw,
+        #                          language: "en-US",
+        #                          sample_rate: 16000
         #
         #   # Stream 5 seconds of audio from the microphone
         #   # Actual implementation of microphone input varies by platform
         #   5.times do
         #     stream.send MicrophoneInput.read(32000)
@@ -174,182 +178,172 @@
             @results
           end
         end
 
         ##
-        # Register to be notified on the reception of an interim result.
+        # Whether all speech recognition results have been returned.
         #
-        # @yield [callback] The block for accessing final and interim results.
-        # @yieldparam [Array<Result>] final_results The final results.
-        # @yieldparam [Array<Result>] interim_results The interim results.
+        # @return [Boolean] All speech recognition results have been returned.
         #
         # @example
         #   require "google/cloud/speech"
         #
         #   speech = Google::Cloud::Speech.new
         #
-        #   stream = speech.stream encoding: :raw, sample_rate: 16000
+        #   stream = speech.stream encoding: :raw,
+        #                          language: "en-US",
+        #                          sample_rate: 16000
         #
-        #   # register callback for when an interim result is returned
-        #   stream.on_interim do |final_results, interim_results|
-        #     interim_result = interim_results.first
-        #     puts interim_result.transcript # "how old is the Brooklyn Bridge"
-        #     puts interim_result.confidence # 0.9826789498329163
-        #     puts interim_result.stability # 0.8999
-        #   end
-        #
         #   # Stream 5 seconds of audio from the microphone
         #   # Actual implementation of microphone input varies by platform
         #   5.times do
         #     stream.send MicrophoneInput.read(32000)
         #   end
         #
         #   stream.stop
         #
-        def on_interim &block
+        #   stream.wait_until_complete!
+        #   stream.complete? #=> true
+        #
+        #   results = stream.results
+        #   results.each do |result|
+        #     puts result.transcript
+        #     puts result.confidence
+        #   end
+        #
+        def complete?
           synchronize do
-            @callbacks[:interim] << block
+            @complete
           end
         end
 
-        # @private yields two arguments, all final results and the
-        # non-final/incomplete result
-        def interim! interim_results
-          synchronize do
-            @callbacks[:interim].each { |c| c.call results, interim_results }
-          end
-        end
-
         ##
-        # Register to be notified on the reception of a final result.
+        # Blocks until all speech recognition results have been returned.
         #
-        # @yield [callback] The block for accessing final results.
-        # @yieldparam [Array<Result>] results The final results.
-        #
         # @example
         #   require "google/cloud/speech"
         #
         #   speech = Google::Cloud::Speech.new
         #
-        #   stream = speech.stream encoding: :raw, sample_rate: 16000
+        #   stream = speech.stream encoding: :raw,
+        #                          language: "en-US",
+        #                          sample_rate: 16000
         #
-        #   # register callback for when an interim result is returned
-        #   stream.on_result do |results|
-        #     result = results.first
-        #     puts result.transcript # "how old is the Brooklyn Bridge"
-        #     puts result.confidence # 0.9826789498329163
-        #   end
-        #
         #   # Stream 5 seconds of audio from the microphone
         #   # Actual implementation of microphone input varies by platform
         #   5.times do
         #     stream.send MicrophoneInput.read(32000)
         #   end
         #
         #   stream.stop
         #
-        def on_result &block
-          synchronize do
-            @callbacks[:result] << block
+        #   stream.wait_until_complete!
+        #   stream.complete? #=> true
+        #
+        #   results = stream.results
+        #   results.each do |result|
+        #     puts result.transcript
+        #     puts result.confidence
+        #   end
+        #
+        def wait_until_complete!
+          complete_check = nil
+          synchronize { complete_check = @complete }
+          while complete_check.nil?
+            sleep 1
+            synchronize { complete_check = @complete }
           end
         end
 
-        # @private add a result object, and call the callbacks
-        def add_result!result_index, result_grpc
-          synchronize do
-            @results[result_index] = Result.from_grpc result_grpc
-          end
-          # callback for final result received
-          result!
-        end
-
-        # @private yields each final results as they are received
-        def result!
-          synchronize do
-            @callbacks[:result].each { |c| c.call results }
-          end
-        end
-
         ##
-        # Register to be notified when speech has been detected in the audio
-        # stream.
+        # Register to be notified on the reception of an interim result.
         #
-        # @yield [callback] The block to be called when speech has been detected
-        #   in the audio stream.
+        # @yield [callback] The block for accessing final and interim results.
+        # @yieldparam [Array<Result>] final_results The final results.
+        # @yieldparam [Array<Result>] interim_results The interim results.
         #
         # @example
         #   require "google/cloud/speech"
         #
         #   speech = Google::Cloud::Speech.new
         #
-        #   stream = speech.stream encoding: :raw, sample_rate: 16000
+        #   stream = speech.stream encoding: :raw,
+        #                          language: "en-US",
+        #                          sample_rate: 16000
         #
-        #   # register callback for when speech has started.
-        #   stream.on_speech_start do
-        #     puts "Speech has started."
+        #   # register callback for when an interim result is returned
+        #   stream.on_interim do |final_results, interim_results|
+        #     interim_result = interim_results.first
+        #     puts interim_result.transcript # "how old is the Brooklyn Bridge"
+        #     puts interim_result.confidence # 0.9826789498329163
+        #     puts interim_result.stability # 0.8999
         #   end
         #
         #   # Stream 5 seconds of audio from the microphone
         #   # Actual implementation of microphone input varies by platform
         #   5.times do
         #     stream.send MicrophoneInput.read(32000)
         #   end
         #
         #   stream.stop
         #
-        def on_speech_start &block
+        def on_interim &block
           synchronize do
-            @callbacks[:speech_start] << block
+            @callbacks[:interim] << block
           end
         end
 
-        # @private returns single final result once :END_OF_UTTERANCE is
-        # received.
-        def speech_start!
+        ##
+        # @private yields two arguments, all final results and the
+        # non-final/incomplete result
+        def pass_interim! interim_results
           synchronize do
-            @callbacks[:speech_start].each(&:call)
+            @callbacks[:interim].each { |c| c.call results, interim_results }
           end
         end
 
         ##
-        # Register to be notified when speech has ceased to be detected in the
-        # audio stream.
+        # Register to be notified on the reception of a final result.
         #
-        # @yield [callback] The block to be called when speech has ceased to be
-        #   detected in the audio stream.
+        # @yield [callback] The block for accessing final results.
+        # @yieldparam [Array<Result>] results The final results.
         #
         # @example
         #   require "google/cloud/speech"
         #
         #   speech = Google::Cloud::Speech.new
         #
-        #   stream = speech.stream encoding: :raw, sample_rate: 16000
+        #   stream = speech.stream encoding: :raw,
+        #                          language: "en-US",
+        #                          sample_rate: 16000
         #
-        #   # register callback for when speech has ended.
-        #   stream.on_speech_end do
-        #     puts "Speech has ended."
-        #   end
-        #
         #   # Stream 5 seconds of audio from the microphone
         #   # Actual implementation of microphone input varies by platform
         #   5.times do
         #     stream.send MicrophoneInput.read(32000)
         #   end
         #
         #   stream.stop
+        #   stream.wait_until_complete!
         #
-        def on_speech_end &block
+        #   results = stream.results
+        #   result = results.first
+        #   result.transcript #=> "how old is the Brooklyn Bridge"
+        #   result.confidence #=> 0.9826789498329163
+        #
+        def on_result &block
           synchronize do
-            @callbacks[:speech_end] << block
+            @callbacks[:result] << block
           end
         end
 
-        # @private yields single final result once :END_OF_UTTERANCE is
-        # received.
-        def speech_end!
+        ##
+        # @private add a result object, and call the callbacks
+        def pass_result! result_grpc
           synchronize do
-            @callbacks[:speech_end].each(&:call)
+            @results << Result.from_grpc(result_grpc)
+            @callbacks[:result].each { |c| c.call @results }
           end
         end
 
         ##
         # Register to be notified when the end of the audio stream has been
@@ -361,15 +355,17 @@
         # @example
         #   require "google/cloud/speech"
         #
         #   speech = Google::Cloud::Speech.new
         #
-        #   stream = speech.stream encoding: :raw, sample_rate: 16000
+        #   stream = speech.stream encoding: :raw,
+        #                          language: "en-US",
+        #                          sample_rate: 16000
         #
-        #   # register callback for when audio has ended.
+        #   # register callback for when stream has ended.
         #   stream.on_complete do
-        #     puts "Audio has ended."
+        #     puts "Stream has ended."
         #   end
         #
         #   # Stream 5 seconds of audio from the microphone
         #   # Actual implementation of microphone input varies by platform
         #   5.times do
@@ -382,15 +378,15 @@
           synchronize do
             @callbacks[:complete] << block
           end
         end
 
-        # @private yields all final results once the recognition is completed
-        # depending on how the Stream is configured, this can be on the
-        # reception of :END_OF_AUDIO or :END_OF_UTTERANCE.
-        def complete!
+        ##
+        # @private yields when the end of the audio stream has been reached.
+        def pass_complete!
           synchronize do
+            @complete = true
             @callbacks[:complete].each(&:call)
           end
         end
 
         ##
@@ -407,10 +403,11 @@
         #   require "google/cloud/speech"
         #
         #   speech = Google::Cloud::Speech.new
         #
         #   stream = speech.stream encoding: :raw,
+        #                          language: "en-US",
         #                          sample_rate: 16000,
         #                          utterance: true
         #
         #   # register callback for when utterance has occurred.
         #   stream.on_utterance do
@@ -430,13 +427,14 @@
           synchronize do
             @callbacks[:utterance] << block
           end
         end
 
-        # @private returns single final result once :END_OF_UTTERANCE is
+        ##
+        # @private returns single final result once :END_OF_SINGLE_UTTERANCE is
         # received.
-        def utterance!
+        def pass_utterance!
           synchronize do
             @callbacks[:utterance].each(&:call)
           end
         end
 
@@ -449,11 +447,13 @@
         # @example
         #   require "google/cloud/speech"
         #
         #   speech = Google::Cloud::Speech.new
         #
-        #   stream = speech.stream encoding: :raw, sample_rate: 16000
+        #   stream = speech.stream encoding: :raw,
+        #                          language: "en-US",
+        #                          sample_rate: 16000
         #
         #   # register callback for when an error is returned
         #   stream.on_error do |error|
         #     puts "The following error occurred while streaming: #{error}"
         #     stream.stop
@@ -485,51 +485,48 @@
         def background_run
           response_enum = @service.recognize_stream @request_queue.each_item
           response_enum.each do |response|
             begin
               background_results response
-              background_endpointer response.endpointer_type
+              background_event_type response.speech_event_type
               background_error response.error
             rescue => e
               error! Google::Cloud::Error.from_error(e)
             end
           end
+        rescue => e
+          error! Google::Cloud::Error.from_error(e)
+        ensure
+          pass_complete!
           Thread.pass
         end
 
         def background_results response
           # Handle the results (StreamingRecognitionResult)
           return unless response.results && response.results.any?
 
           final_grpc, interim_grpcs = *response.results
-          if final_grpc && final_grpc.is_final
-            add_result! response.result_index, final_grpc
-          else
+          unless final_grpc && final_grpc.is_final
             # all results are interim
+            final_grpc = nil
             interim_grpcs = response.results
           end
 
           # convert to Speech object from GRPC object
           interim_results = Array(interim_grpcs).map do |grpc|
             InterimResult.from_grpc grpc
           end
+
           # callback for interim results received
-          interim! interim_results if interim_results.any?
+          pass_interim! interim_results if interim_results.any?
+          # callback for final results received, if any
+          pass_result! final_grpc if final_grpc
         end
 
-        def background_endpointer endpointer
-          # Handle the endpointer by raising events
-          if endpointer == :START_OF_SPEECH
-            speech_start!
-          elsif endpointer == :END_OF_SPEECH
-            speech_end!
-          elsif endpointer == :END_OF_AUDIO
-            # TODO: do we automatically call stop here?
-            complete!
-          elsif endpointer == :END_OF_UTTERANCE
-            # TODO: do we automatically call stop here?
-            utterance!
-          end
+        def background_event_type event_type
+          # Handle the event_type by raising events
+          # TODO: do we automatically call stop here?
+          pass_utterance! if event_type == :END_OF_SINGLE_UTTERANCE
         end
 
         def background_error error
           return if error.nil?