# frozen_string_literal: true # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Auto-generated by gapic-generator-ruby. DO NOT EDIT! module Google module Cloud module TextToSpeech module V1 # The top-level message sent by the client for the `ListVoices` method. # @!attribute [rw] language_code # @return [::String] # Optional. Recommended. # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. # If not specified, the API will return all supported voices. # If specified, the ListVoices call will only return voices that can be used # to synthesize this language_code. For example, if you specify `"en-NZ"`, # all `"en-NZ"` voices will be returned. If you specify `"no"`, both # `"no-\*"` (Norwegian) and `"nb-\*"` (Norwegian Bokmal) voices will be # returned. class ListVoicesRequest include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # The message returned to the client by the `ListVoices` method. # @!attribute [rw] voices # @return [::Array<::Google::Cloud::TextToSpeech::V1::Voice>] # The list of voices. class ListVoicesResponse include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Description of a voice supported by the TTS service. # @!attribute [rw] language_codes # @return [::Array<::String>] # The languages that this voice supports, expressed as # [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags (e.g. # "en-US", "es-419", "cmn-tw"). # @!attribute [rw] name # @return [::String] # The name of this voice. Each distinct voice has a unique name. # @!attribute [rw] ssml_gender # @return [::Google::Cloud::TextToSpeech::V1::SsmlVoiceGender] # The gender of this voice. # @!attribute [rw] natural_sample_rate_hertz # @return [::Integer] # The natural sample rate (in hertz) for this voice. class Voice include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Used for advanced voice options. # @!attribute [rw] low_latency_journey_synthesis # @return [::Boolean] # Only for Journey voices. If false, the synthesis will be context aware # and have higher latency. class AdvancedVoiceOptions include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # The top-level message sent by the client for the `SynthesizeSpeech` method. # @!attribute [rw] input # @return [::Google::Cloud::TextToSpeech::V1::SynthesisInput] # Required. The Synthesizer requires either plain text or SSML as input. # @!attribute [rw] voice # @return [::Google::Cloud::TextToSpeech::V1::VoiceSelectionParams] # Required. The desired voice of the synthesized audio. # @!attribute [rw] audio_config # @return [::Google::Cloud::TextToSpeech::V1::AudioConfig] # Required. The configuration of the synthesized audio. # @!attribute [rw] advanced_voice_options # @return [::Google::Cloud::TextToSpeech::V1::AdvancedVoiceOptions] # Advanced voice options. class SynthesizeSpeechRequest include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Pronunciation customization for a phrase. # @!attribute [rw] phrase # @return [::String] # The phrase to which the customization will be applied. # The phrase can be multiple words (in the case of proper nouns etc), but # should not span to a whole sentence. # @!attribute [rw] phonetic_encoding # @return [::Google::Cloud::TextToSpeech::V1::CustomPronunciationParams::PhoneticEncoding] # The phonetic encoding of the phrase. # @!attribute [rw] pronunciation # @return [::String] # The pronunciation of the phrase. This must be in the phonetic encoding # specified above. class CustomPronunciationParams include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # The phonetic encoding of the phrase. module PhoneticEncoding # Not specified. PHONETIC_ENCODING_UNSPECIFIED = 0 # IPA. (e.g. apple -> ˈæpəl ) # https://en.wikipedia.org/wiki/International_Phonetic_Alphabet PHONETIC_ENCODING_IPA = 1 # X-SAMPA (e.g. apple -> "{p@l" ) # https://en.wikipedia.org/wiki/X-SAMPA PHONETIC_ENCODING_X_SAMPA = 2 end end # A collection of pronunciation customizations. # @!attribute [rw] pronunciations # @return [::Array<::Google::Cloud::TextToSpeech::V1::CustomPronunciationParams>] # The pronunciation customizations to be applied. class CustomPronunciations include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # A collection of turns for multi-speaker synthesis. # @!attribute [rw] turns # @return [::Array<::Google::Cloud::TextToSpeech::V1::MultiSpeakerMarkup::Turn>] # Required. Speaker turns. class MultiSpeakerMarkup include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # A Multi-speaker turn. # @!attribute [rw] speaker # @return [::String] # Required. The speaker of the turn, for example, 'O' or 'Q'. Please refer # to documentation for available speakers. # @!attribute [rw] text # @return [::String] # Required. The text to speak. class Turn include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end # Contains text input to be synthesized. Either `text` or `ssml` must be # supplied. Supplying both or neither returns # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. The # input size is limited to 5000 bytes. # @!attribute [rw] text # @return [::String] # The raw text to be synthesized. # @!attribute [rw] ssml # @return [::String] # The SSML document to be synthesized. The SSML document must be valid # and well-formed. Otherwise the RPC will fail and return # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. For # more information, see # [SSML](https://cloud.google.com/text-to-speech/docs/ssml). # @!attribute [rw] multi_speaker_markup # @return [::Google::Cloud::TextToSpeech::V1::MultiSpeakerMarkup] # The multi-speaker input to be synthesized. Only applicable for # multi-speaker synthesis. # @!attribute [rw] custom_pronunciations # @return [::Google::Cloud::TextToSpeech::V1::CustomPronunciations] # Optional. The pronunciation customizations to be applied to the input. If # this is set, the input will be synthesized using the given pronunciation # customizations. # # The initial support will be for EFIGS (English, French, # Italian, German, Spanish) languages, as provided in # VoiceSelectionParams. Journey and Instant Clone voices are # not supported yet. # # In order to customize the pronunciation of a phrase, there must be an exact # match of the phrase in the input types. If using SSML, the phrase must not # be inside a phoneme tag (entirely or partially). class SynthesisInput include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Description of which voice to use for a synthesis request. # @!attribute [rw] language_code # @return [::String] # Required. The language (and potentially also the region) of the voice # expressed as a [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) # language tag, e.g. "en-US". This should not include a script tag (e.g. use # "cmn-cn" rather than "cmn-Hant-cn"), because the script will be inferred # from the input provided in the SynthesisInput. The TTS service # will use this parameter to help choose an appropriate voice. Note that # the TTS service may choose a voice with a slightly different language code # than the one selected; it may substitute a different region # (e.g. using en-US rather than en-CA if there isn't a Canadian voice # available), or even a different language, e.g. using "nb" (Norwegian # Bokmal) instead of "no" (Norwegian)". # @!attribute [rw] name # @return [::String] # The name of the voice. If both the name and the gender are not set, # the service will choose a voice based on the other parameters such as # language_code. # @!attribute [rw] ssml_gender # @return [::Google::Cloud::TextToSpeech::V1::SsmlVoiceGender] # The preferred gender of the voice. If not set, the service will # choose a voice based on the other parameters such as language_code and # name. Note that this is only a preference, not requirement; if a # voice of the appropriate gender is not available, the synthesizer should # substitute a voice with a different gender rather than failing the request. # @!attribute [rw] custom_voice # @return [::Google::Cloud::TextToSpeech::V1::CustomVoiceParams] # The configuration for a custom voice. If [CustomVoiceParams.model] is set, # the service will choose the custom voice matching the specified # configuration. # @!attribute [rw] voice_clone # @return [::Google::Cloud::TextToSpeech::V1::VoiceCloneParams] # Optional. The configuration for a voice clone. If # [VoiceCloneParams.voice_clone_key] is set, the service will choose the # voice clone matching the specified configuration. class VoiceSelectionParams include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Description of audio data to be synthesized. # @!attribute [rw] audio_encoding # @return [::Google::Cloud::TextToSpeech::V1::AudioEncoding] # Required. The format of the audio byte stream. # @!attribute [rw] speaking_rate # @return [::Float] # Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is # the normal native speed supported by the specific voice. 2.0 is twice as # fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0 # speed. Any other values < 0.25 or > 4.0 will return an error. # @!attribute [rw] pitch # @return [::Float] # Optional. Input only. Speaking pitch, in the range [-20.0, 20.0]. 20 means # increase 20 semitones from the original pitch. -20 means decrease 20 # semitones from the original pitch. # @!attribute [rw] volume_gain_db # @return [::Float] # Optional. Input only. Volume gain (in dB) of the normal native volume # supported by the specific voice, in the range [-96.0, 16.0]. If unset, or # set to a value of 0.0 (dB), will play at normal native signal amplitude. A # value of -6.0 (dB) will play at approximately half the amplitude of the # normal native signal amplitude. A value of +6.0 (dB) will play at # approximately twice the amplitude of the normal native signal amplitude. # Strongly recommend not to exceed +10 (dB) as there's usually no effective # increase in loudness for any value greater than that. # @!attribute [rw] sample_rate_hertz # @return [::Integer] # Optional. The synthesis sample rate (in hertz) for this audio. When this is # specified in SynthesizeSpeechRequest, if this is different from the voice's # natural sample rate, then the synthesizer will honor this request by # converting to the desired sample rate (which might result in worse audio # quality), unless the specified sample rate is not supported for the # encoding chosen, in which case it will fail the request and return # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. # @!attribute [rw] effects_profile_id # @return [::Array<::String>] # Optional. Input only. An identifier which selects 'audio effects' profiles # that are applied on (post synthesized) text to speech. Effects are applied # on top of each other in the order they are given. See # [audio # profiles](https://cloud.google.com/text-to-speech/docs/audio-profiles) for # current supported profile ids. class AudioConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Description of the custom voice to be synthesized. # @!attribute [rw] model # @return [::String] # Required. The name of the AutoML model that synthesizes the custom voice. # @!attribute [rw] reported_usage # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Google::Cloud::TextToSpeech::V1::CustomVoiceParams::ReportedUsage] # Optional. Deprecated. The usage of the synthesized audio to be reported. class CustomVoiceParams include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # Deprecated. The usage of the synthesized audio. Usage does not affect # billing. module ReportedUsage # Request with reported usage unspecified will be rejected. REPORTED_USAGE_UNSPECIFIED = 0 # For scenarios where the synthesized audio is not downloadable and can # only be used once. For example, real-time request in IVR system. REALTIME = 1 # For scenarios where the synthesized audio is downloadable and can be # reused. For example, the synthesized audio is downloaded, stored in # customer service system and played repeatedly. OFFLINE = 2 end end # The configuration of Voice Clone feature. # @!attribute [rw] voice_cloning_key # @return [::String] # Required. Created by GenerateVoiceCloningKey. class VoiceCloneParams include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # The message returned to the client by the `SynthesizeSpeech` method. # @!attribute [rw] audio_content # @return [::String] # The audio data bytes encoded as specified in the request, including the # header for encodings that are wrapped in containers (e.g. MP3, OGG_OPUS). # For LINEAR16 audio, we include the WAV header. Note: as # with all bytes fields, protobuffers use a pure binary representation, # whereas JSON representations use base64. class SynthesizeSpeechResponse include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Provides configuration information for the StreamingSynthesize request. # @!attribute [rw] voice # @return [::Google::Cloud::TextToSpeech::V1::VoiceSelectionParams] # Required. The desired voice of the synthesized audio. class StreamingSynthesizeConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Input to be synthesized. # @!attribute [rw] text # @return [::String] # The raw text to be synthesized. It is recommended that each input # contains complete, terminating sentences, as this will likely result in # better prosody in the output audio. That being said, users are free to # input text however they please. class StreamingSynthesisInput include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Request message for the `StreamingSynthesize` method. Multiple # `StreamingSynthesizeRequest` messages are sent in one call. # The first message must contain a `streaming_config` that # fully specifies the request configuration and must not contain `input`. All # subsequent messages must only have `input` set. # @!attribute [rw] streaming_config # @return [::Google::Cloud::TextToSpeech::V1::StreamingSynthesizeConfig] # StreamingSynthesizeConfig to be used in this streaming attempt. Only # specified in the first message sent in a `StreamingSynthesize` call. # @!attribute [rw] input # @return [::Google::Cloud::TextToSpeech::V1::StreamingSynthesisInput] # Input to synthesize. Specified in all messages but the first in a # `StreamingSynthesize` call. class StreamingSynthesizeRequest include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # `StreamingSynthesizeResponse` is the only message returned to the # client by `StreamingSynthesize` method. A series of zero or more # `StreamingSynthesizeResponse` messages are streamed back to the client. # @!attribute [rw] audio_content # @return [::String] # The audio data bytes encoded as specified in the request. This is # headerless LINEAR16 audio with a sample rate of 24000. class StreamingSynthesizeResponse include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Gender of the voice as described in # [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice). module SsmlVoiceGender # An unspecified gender. # In VoiceSelectionParams, this means that the client doesn't care which # gender the selected voice will have. In the Voice field of # ListVoicesResponse, this may mean that the voice doesn't fit any of the # other categories in this enum, or that the gender of the voice isn't known. SSML_VOICE_GENDER_UNSPECIFIED = 0 # A male voice. MALE = 1 # A female voice. FEMALE = 2 # A gender-neutral voice. This voice is not yet supported. NEUTRAL = 3 end # Configuration to set up audio encoder. The encoding determines the output # audio format that we'd like. module AudioEncoding # Not specified. Will return result # [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. AUDIO_ENCODING_UNSPECIFIED = 0 # Uncompressed 16-bit signed little-endian samples (Linear PCM). # Audio content returned as LINEAR16 also contains a WAV header. LINEAR16 = 1 # MP3 audio at 32kbps. MP3 = 2 # Opus encoded audio wrapped in an ogg container. The result will be a # file which can be played natively on Android, and in browsers (at least # Chrome and Firefox). The quality of the encoding is considerably higher # than MP3 while using approximately the same bitrate. OGG_OPUS = 3 # 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. # Audio content returned as MULAW also contains a WAV header. MULAW = 5 # 8-bit samples that compand 14-bit audio samples using G.711 PCMU/A-law. # Audio content returned as ALAW also contains a WAV header. ALAW = 6 end end end end end