lib/google/cloud/speech/stream.rb in google-cloud-speech-0.23.0 vs lib/google/cloud/speech/stream.rb in google-cloud-speech-0.24.0

- old
+ new

@@ -11,11 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -require "google/cloud/speech/v1beta1" +require "google/cloud/speech/v1" require "google/cloud/speech/result" require "monitor" require "forwardable" module Google @@ -29,27 +29,28 @@ # @example # require "google/cloud/speech" # # speech = Google::Cloud::Speech.new # - # stream = speech.stream encoding: :raw, sample_rate: 16000 + # stream = speech.stream encoding: :raw, + # language: "en-US", + # sample_rate: 16000 # - # # register callback for when a result is returned - # stream.on_result do |results| - # result = results.first - # puts result.transcript # "how old is the Brooklyn Bridge" - # puts result.confidence # 0.9826789498329163 - # end - # # # Stream 5 seconds of audio from the microphone # # Actual implementation of microphone input varies by platform # 5.times do # stream.send MicrophoneInput.read(32000) # end # # stream.stop + # stream.wait_until_complete! # + # results = stream.results + # result = results.first + # result.transcript #=> "how old is the Brooklyn Bridge" + # result.confidence #=> 0.9826789498329163 + # class Stream include MonitorMixin ## # @private Creates a new Speech Stream instance. # This must always be private, since it may change as the implementation @@ -93,32 +94,33 @@ # # speech = Google::Cloud::Speech.new # # audio = speech.audio "path/to/audio.raw" # - # stream = speech.stream encoding: :raw, sample_rate: 16000 + # stream = speech.stream encoding: :raw, + # language: "en-US", + # sample_rate: 16000 # - # # register callback for when a result is returned - # stream.on_result do |results| - # result = results.first - # puts result.transcript # "how old is the Brooklyn Bridge" - # puts result.confidence # 0.9826789498329163 - # end - # # # Stream 5 seconds of audio from the microphone # # Actual implementation of microphone input varies by platform # 5.times do # stream.send MicrophoneInput.read(32000) # end # # stream.stop + # stream.wait_until_complete! # + # results = stream.results + # result = results.first + # result.transcript #=> "how old is the Brooklyn Bridge" + # result.confidence #=> 0.9826789498329163 + # def send bytes start # lazily call start if the stream wasn't started yet # TODO: do not send if stopped? synchronize do - req = V1beta1::StreamingRecognizeRequest.new( + req = V1::StreamingRecognizeRequest.new( audio_content: bytes.encode("ASCII-8BIT")) @request_queue.push req end end @@ -151,11 +153,13 @@ # @example # require "google/cloud/speech" # # speech = Google::Cloud::Speech.new # - # stream = speech.stream encoding: :raw, sample_rate: 16000 + # stream = speech.stream encoding: :raw, + # language: "en-US", + # sample_rate: 16000 # # # Stream 5 seconds of audio from the microphone # # Actual implementation of microphone input varies by platform # 5.times do # stream.send MicrophoneInput.read(32000) @@ -174,182 +178,172 @@ @results end end ## - # Register to be notified on the reception of an interim result. + # Whether all speech recognition results have been returned. # - # @yield [callback] The block for accessing final and interim results. - # @yieldparam [Array<Result>] final_results The final results. - # @yieldparam [Array<Result>] interim_results The interim results. + # @return [Boolean] All speech recognition results have been returned. # # @example # require "google/cloud/speech" # # speech = Google::Cloud::Speech.new # - # stream = speech.stream encoding: :raw, sample_rate: 16000 + # stream = speech.stream encoding: :raw, + # language: "en-US", + # sample_rate: 16000 # - # # register callback for when an interim result is returned - # stream.on_interim do |final_results, interim_results| - # interim_result = interim_results.first - # puts interim_result.transcript # "how old is the Brooklyn Bridge" - # puts interim_result.confidence # 0.9826789498329163 - # puts interim_result.stability # 0.8999 - # end - # # # Stream 5 seconds of audio from the microphone # # Actual implementation of microphone input varies by platform # 5.times do # stream.send MicrophoneInput.read(32000) # end # # stream.stop # - def on_interim &block + # stream.wait_until_complete! + # stream.complete? #=> true + # + # results = stream.results + # results.each do |result| + # puts result.transcript + # puts result.confidence + # end + # + def complete? synchronize do - @callbacks[:interim] << block + @complete end end - # @private yields two arguments, all final results and the - # non-final/incomplete result - def interim! interim_results - synchronize do - @callbacks[:interim].each { |c| c.call results, interim_results } - end - end - ## - # Register to be notified on the reception of a final result. + # Blocks until all speech recognition results have been returned. # - # @yield [callback] The block for accessing final results. - # @yieldparam [Array<Result>] results The final results. - # # @example # require "google/cloud/speech" # # speech = Google::Cloud::Speech.new # - # stream = speech.stream encoding: :raw, sample_rate: 16000 + # stream = speech.stream encoding: :raw, + # language: "en-US", + # sample_rate: 16000 # - # # register callback for when an interim result is returned - # stream.on_result do |results| - # result = results.first - # puts result.transcript # "how old is the Brooklyn Bridge" - # puts result.confidence # 0.9826789498329163 - # end - # # # Stream 5 seconds of audio from the microphone # # Actual implementation of microphone input varies by platform # 5.times do # stream.send MicrophoneInput.read(32000) # end # # stream.stop # - def on_result &block - synchronize do - @callbacks[:result] << block + # stream.wait_until_complete! + # stream.complete? #=> true + # + # results = stream.results + # results.each do |result| + # puts result.transcript + # puts result.confidence + # end + # + def wait_until_complete! + complete_check = nil + synchronize { complete_check = @complete } + while complete_check.nil? + sleep 1 + synchronize { complete_check = @complete } end end - # @private add a result object, and call the callbacks - def add_result!result_index, result_grpc - synchronize do - @results[result_index] = Result.from_grpc result_grpc - end - # callback for final result received - result! - end - - # @private yields each final results as they are received - def result! - synchronize do - @callbacks[:result].each { |c| c.call results } - end - end - ## - # Register to be notified when speech has been detected in the audio - # stream. + # Register to be notified on the reception of an interim result. # - # @yield [callback] The block to be called when speech has been detected - # in the audio stream. + # @yield [callback] The block for accessing final and interim results. + # @yieldparam [Array<Result>] final_results The final results. + # @yieldparam [Array<Result>] interim_results The interim results. # # @example # require "google/cloud/speech" # # speech = Google::Cloud::Speech.new # - # stream = speech.stream encoding: :raw, sample_rate: 16000 + # stream = speech.stream encoding: :raw, + # language: "en-US", + # sample_rate: 16000 # - # # register callback for when speech has started. - # stream.on_speech_start do - # puts "Speech has started." + # # register callback for when an interim result is returned + # stream.on_interim do |final_results, interim_results| + # interim_result = interim_results.first + # puts interim_result.transcript # "how old is the Brooklyn Bridge" + # puts interim_result.confidence # 0.9826789498329163 + # puts interim_result.stability # 0.8999 # end # # # Stream 5 seconds of audio from the microphone # # Actual implementation of microphone input varies by platform # 5.times do # stream.send MicrophoneInput.read(32000) # end # # stream.stop # - def on_speech_start &block + def on_interim &block synchronize do - @callbacks[:speech_start] << block + @callbacks[:interim] << block end end - # @private returns single final result once :END_OF_UTTERANCE is - # received. - def speech_start! + ## + # @private yields two arguments, all final results and the + # non-final/incomplete result + def pass_interim! interim_results synchronize do - @callbacks[:speech_start].each(&:call) + @callbacks[:interim].each { |c| c.call results, interim_results } end end ## - # Register to be notified when speech has ceased to be detected in the - # audio stream. + # Register to be notified on the reception of a final result. # - # @yield [callback] The block to be called when speech has ceased to be - # detected in the audio stream. + # @yield [callback] The block for accessing final results. + # @yieldparam [Array<Result>] results The final results. # # @example # require "google/cloud/speech" # # speech = Google::Cloud::Speech.new # - # stream = speech.stream encoding: :raw, sample_rate: 16000 + # stream = speech.stream encoding: :raw, + # language: "en-US", + # sample_rate: 16000 # - # # register callback for when speech has ended. - # stream.on_speech_end do - # puts "Speech has ended." - # end - # # # Stream 5 seconds of audio from the microphone # # Actual implementation of microphone input varies by platform # 5.times do # stream.send MicrophoneInput.read(32000) # end # # stream.stop + # stream.wait_until_complete! # - def on_speech_end &block + # results = stream.results + # result = results.first + # result.transcript #=> "how old is the Brooklyn Bridge" + # result.confidence #=> 0.9826789498329163 + # + def on_result &block synchronize do - @callbacks[:speech_end] << block + @callbacks[:result] << block end end - # @private yields single final result once :END_OF_UTTERANCE is - # received. - def speech_end! + ## + # @private add a result object, and call the callbacks + def pass_result! result_grpc synchronize do - @callbacks[:speech_end].each(&:call) + @results << Result.from_grpc(result_grpc) + @callbacks[:result].each { |c| c.call @results } end end ## # Register to be notified when the end of the audio stream has been @@ -361,15 +355,17 @@ # @example # require "google/cloud/speech" # # speech = Google::Cloud::Speech.new # - # stream = speech.stream encoding: :raw, sample_rate: 16000 + # stream = speech.stream encoding: :raw, + # language: "en-US", + # sample_rate: 16000 # - # # register callback for when audio has ended. + # # register callback for when stream has ended. # stream.on_complete do - # puts "Audio has ended." + # puts "Stream has ended." # end # # # Stream 5 seconds of audio from the microphone # # Actual implementation of microphone input varies by platform # 5.times do @@ -382,15 +378,15 @@ synchronize do @callbacks[:complete] << block end end - # @private yields all final results once the recognition is completed - # depending on how the Stream is configured, this can be on the - # reception of :END_OF_AUDIO or :END_OF_UTTERANCE. - def complete! + ## + # @private yields when the end of the audio stream has been reached. + def pass_complete! synchronize do + @complete = true @callbacks[:complete].each(&:call) end end ## @@ -407,10 +403,11 @@ # require "google/cloud/speech" # # speech = Google::Cloud::Speech.new # # stream = speech.stream encoding: :raw, + # language: "en-US", # sample_rate: 16000, # utterance: true # # # register callback for when utterance has occurred. # stream.on_utterance do @@ -430,13 +427,14 @@ synchronize do @callbacks[:utterance] << block end end - # @private returns single final result once :END_OF_UTTERANCE is + ## + # @private returns single final result once :END_OF_SINGLE_UTTERANCE is # received. - def utterance! + def pass_utterance! synchronize do @callbacks[:utterance].each(&:call) end end @@ -449,11 +447,13 @@ # @example # require "google/cloud/speech" # # speech = Google::Cloud::Speech.new # - # stream = speech.stream encoding: :raw, sample_rate: 16000 + # stream = speech.stream encoding: :raw, + # language: "en-US", + # sample_rate: 16000 # # # register callback for when an error is returned # stream.on_error do |error| # puts "The following error occurred while streaming: #{error}" # stream.stop @@ -485,51 +485,48 @@ def background_run response_enum = @service.recognize_stream @request_queue.each_item response_enum.each do |response| begin background_results response - background_endpointer response.endpointer_type + background_event_type response.speech_event_type background_error response.error rescue => e error! Google::Cloud::Error.from_error(e) end end + rescue => e + error! Google::Cloud::Error.from_error(e) + ensure + pass_complete! Thread.pass end def background_results response # Handle the results (StreamingRecognitionResult) return unless response.results && response.results.any? final_grpc, interim_grpcs = *response.results - if final_grpc && final_grpc.is_final - add_result! response.result_index, final_grpc - else + unless final_grpc && final_grpc.is_final # all results are interim + final_grpc = nil interim_grpcs = response.results end # convert to Speech object from GRPC object interim_results = Array(interim_grpcs).map do |grpc| InterimResult.from_grpc grpc end + # callback for interim results received - interim! interim_results if interim_results.any? + pass_interim! interim_results if interim_results.any? + # callback for final results received, if any + pass_result! final_grpc if final_grpc end - def background_endpointer endpointer - # Handle the endpointer by raising events - if endpointer == :START_OF_SPEECH - speech_start! - elsif endpointer == :END_OF_SPEECH - speech_end! - elsif endpointer == :END_OF_AUDIO - # TODO: do we automatically call stop here? - complete! - elsif endpointer == :END_OF_UTTERANCE - # TODO: do we automatically call stop here? - utterance! - end + def background_event_type event_type + # Handle the event_type by raising events + # TODO: do we automatically call stop here? + pass_utterance! if event_type == :END_OF_SINGLE_UTTERANCE end def background_error error return if error.nil?