lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb in google-cloud-speech-0.30.0 vs lib/google/cloud/speech/v1p1beta1/doc/google/cloud/speech/v1p1beta1/cloud_speech.rb in google-cloud-speech-0.30.1

- old
+ new

@@ -13,21 +13,10 @@ # limitations under the License. module Google module Cloud module Speech - ## - # # Cloud Speech API Contents - # - # | Class | Description | - # | ----- | ----------- | - # | [SpeechClient][] | Service that implements Google Cloud Speech API. | - # | [Data Types][] | Data types for Google::Cloud::Speech::V1p1beta1 | - # - # [SpeechClient]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1p1beta1/speechclient - # [Data Types]: https://googlecloudplatform.github.io/google-cloud-ruby/#/docs/google-cloud-speech/latest/google/cloud/speech/v1p1beta1/datatypes - # module V1p1beta1 # The top-level message sent by the client for the +Recognize+ method. # @!attribute [rw] config # @return [Google::Cloud::Speech::V1p1beta1::RecognitionConfig] # *Required* Provides information to the recognizer that specifies how to @@ -111,17 +100,50 @@ # 16000 is optimal. For best results, set the sampling rate of the audio # source to 16000 Hz. If that's not possible, use the native sample rate of # the audio source (instead of re-sampling). # This field is optional for +FLAC+ and +WAV+ audio files and required # for all other audio formats. For details, see {Google::Cloud::Speech::V1p1beta1::RecognitionConfig::AudioEncoding AudioEncoding}. + # @!attribute [rw] audio_channel_count + # @return [Integer] + # *Optional* The number of channels in the input audio data. + # ONLY set this for MULTI-CHANNEL recognition. + # Valid values for LINEAR16 and FLAC are +1+-+8+. + # Valid values for OGG_OPUS are '1'-'254'. + # Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only +1+. + # If +0+ or omitted, defaults to one channel (mono). + # NOTE: We only recognize the first channel by default. + # To perform independent recognition on each channel set + # enable_separate_recognition_per_channel to 'true'. + # @!attribute [rw] enable_separate_recognition_per_channel + # @return [true, false] + # This needs to be set to ‘true’ explicitly and audio_channel_count > 1 + # to get each channel recognized separately. The recognition result will + # contain a channel_tag field to state which channel that result belongs to. + # If this is not ‘true’, we will only recognize the first channel. + # NOTE: The request is also billed cumulatively for all channels recognized: + # (audio_channel_count times the audio length) # @!attribute [rw] language_code # @return [String] # *Required* The language of the supplied audio as a # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. # Example: "en-US". # See [Language Support](https://cloud.google.com/speech/docs/languages) # for a list of the currently supported language codes. + # @!attribute [rw] alternative_language_codes + # @return [Array<String>] + # *Optional* A list of up to 3 additional + # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, + # listing possible alternative languages of the supplied audio. + # See [Language Support](https://cloud.google.com/speech/docs/languages) + # for a list of the currently supported language codes. + # If alternative languages are listed, recognition result will contain + # recognition in the most likely language detected including the main + # language_code. The recognition result will include the language tag + # of the language detected in the audio. + # NOTE: This feature is only supported for Voice Command and Voice Search + # use cases and performance may vary for other use cases (e.g., phone call + # transcription). # @!attribute [rw] max_alternatives # @return [Integer] # *Optional* Maximum number of recognition hypotheses to be returned. # Specifically, the maximum number of +SpeechRecognitionAlternative+ messages # within each +SpeechRecognitionResult+. @@ -141,19 +163,39 @@ # @return [true, false] # *Optional* If +true+, the top result includes a list of words and # the start and end time offsets (timestamps) for those words. If # +false+, no word-level time offset information is returned. The default is # +false+. + # @!attribute [rw] enable_word_confidence + # @return [true, false] + # *Optional* If +true+, the top result includes a list of words and the + # confidence for those words. If +false+, no word-level confidence + # information is returned. The default is +false+. # @!attribute [rw] enable_automatic_punctuation # @return [true, false] # *Optional* If 'true', adds punctuation to recognition result hypotheses. # This feature is only available in select languages. Setting this for # requests in other languages has no effect at all. # The default 'false' value does not add punctuation to result hypotheses. # NOTE: "This is currently offered as an experimental service, complimentary # to all users. In the future this may be exclusively available as a # premium feature." + # @!attribute [rw] enable_speaker_diarization + # @return [true, false] + # *Optional* If 'true', enables speaker detection for each recognized word in + # the top alternative of the recognition result using a speaker_tag provided + # in the WordInfo. + # Note: When this is true, we send all the words from the beginning of the + # audio for the top alternative in every consecutive responses. + # This is done in order to improve our speaker tags as our models learn to + # identify the speakers in the conversation over time. + # @!attribute [rw] diarization_speaker_count + # @return [Integer] + # *Optional* + # If set, specifies the estimated number of speakers in the conversation. + # If not set, defaults to '2'. + # Ignored unless enable_speaker_diarization is set to true." # @!attribute [rw] metadata # @return [Google::Cloud::Speech::V1p1beta1::RecognitionMetadata] # *Optional* Metadata regarding this request. # @!attribute [rw] model # @return [String] @@ -565,19 +607,41 @@ # Output only. An estimate of the likelihood that the recognizer will not # change its guess about this interim result. Values range from 0.0 # (completely unstable) to 1.0 (completely stable). # This field is only provided for interim results (+is_final=false+). # The default of 0.0 is a sentinel value indicating +stability+ was not set. + # @!attribute [rw] channel_tag + # @return [Integer] + # For multi-channel audio, this is the channel number corresponding to the + # recognized result for the audio from that channel. + # For audio_channel_count = N, its output values can range from '1' to 'N'. + # @!attribute [rw] language_code + # @return [String] + # Output only. The + # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the + # language in this result. This language code was detected to have the most + # likelihood of being spoken in the audio. class StreamingRecognitionResult; end # A speech recognition result corresponding to a portion of the audio. # @!attribute [rw] alternatives # @return [Array<Google::Cloud::Speech::V1p1beta1::SpeechRecognitionAlternative>] # Output only. May contain one or more recognition hypotheses (up to the # maximum specified in +max_alternatives+). # These alternatives are ordered in terms of accuracy, with the top (first) # alternative being the most probable, as ranked by the recognizer. + # @!attribute [rw] channel_tag + # @return [Integer] + # For multi-channel audio, this is the channel number corresponding to the + # recognized result for the audio from that channel. + # For audio_channel_count = N, its output values can range from '1' to 'N'. + # @!attribute [rw] language_code + # @return [String] + # Output only. The + # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the + # language in this result. This language code was detected to have the most + # likelihood of being spoken in the audio. class SpeechRecognitionResult; end # Alternative hypotheses (a.k.a. n-best list). # @!attribute [rw] transcript # @return [String] @@ -616,9 +680,25 @@ # This is an experimental feature and the accuracy of the time offset can # vary. # @!attribute [rw] word # @return [String] # Output only. The word corresponding to this set of information. + # @!attribute [rw] confidence + # @return [Float] + # Output only. The confidence estimate between 0.0 and 1.0. A higher number + # indicates an estimated greater likelihood that the recognized words are + # correct. This field is set only for the top alternative of a non-streaming + # result or, of a streaming result where +is_final=true+. + # This field is not guaranteed to be accurate and users should not rely on it + # to be always provided. + # The default of 0.0 is a sentinel value indicating +confidence+ was not set. + # @!attribute [rw] speaker_tag + # @return [Integer] + # Output only. A distinct integer value is assigned for every speaker within + # the audio. This field specifies which one of those speakers was detected to + # have spoken this word. Value ranges from '1' to diarization_speaker_count. + # speaker_tag is set if enable_speaker_diarization = 'true' and only in the + # top alternative. class WordInfo; end end end end end \ No newline at end of file