
    V                         S r SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJ	r	  SSKJ
r
  SS	KJr  SS
KJr  S r " S S5      rS rS rS rS rS rg)zFlags for speech commands.    )absolute_import)division)unicode_literals)apis)actions)arg_parsers)
exceptions)util)	arg_utilsc                     [         R                  " [        R                  U 5      n[        R
                  " SUR                  R                  SSS9$ )Nz
--encodingzencoding-unspecifiedzQThe type of encoding of the file. Required if the file format is not WAV or FLAC.)defaulthelp_str)r   GetMessagesModuler
   
SPEECH_APIr   ChoiceEnumMapperRecognitionConfigEncodingValueValuesEnumversionmessagess     1lib/googlecloudsdk/command_lib/ml/speech/flags.pyGetEncodingTypeMapperr      sE    ##DOOW=(		#	#  88$	
     c                   B    \ rS rSrSrS rS rS rS rS r	S r
S	 rS
rg)RecognizeArgsToRequestMapper'   z4Utility class to map arguments to Recognize request.c                 J    S U l         S U l        S U l        S U l        S U l        g )N)_encoding_type_mapper_original_media_type_mapper_interaction_type_mapper _microphone_distance_type_mapper_device_type_mapper)selfs    r   __init__%RecognizeArgsToRequestMapper.__init__*   s*    !%D'+D$$(D!,0D)#Dr   c                    UR                  SSS9  UR                  SSS9nUR                  SSS9  UR                  S[        R                  " SS	S
9SSS9  [	        U5      U l        U R
                  R                  R                  U5        UR                  S[        SSS9  UR                  SSS9nUR                  S[        SSS9  UR                  SSSSS9  UR                  SSSSSSSSS S!S"S#S$.S%S&9  UR                  S'[        S(S)S*9  UR                  S+[        R                  " 5       S,/ S-S.9  UR                  S/SSS0S19  UR                  S2SSS3S19  UR                  S4SS5S69  g7)8z2Add common, GA level flags for recognize commands.audiozThe location of the audio file to transcribe. Must be a local path or a Google Cloud Storage URL (in the format gs://bucket/object).)helpT)mutexrequiredz--language-codezThe language of the supplied audio as a BCP-47 (https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. Example: "en-US". See https://cloud.google.com/speech/docs/languages for a list of the currently supported language codes.z
--languagezLThe `--language` flag is deprecated. Use the `--language-code` flag instead.warn)actionhiddenr(   z--sample-rateFzThe sample rate in Hertz. For best results, set the sampling rate of the audio source to 16000 Hz. If that's not possible, use the native sample rate of the audio source (instead of re-sampling).)typer*   r(   zAudio channel settings.r*   r(   z--audio-channel-countzThe number of channels in the input audio data.  Set this for separate-channel-recognition. Valid values are: 1)LINEAR16 and FLAC are 1-8 2)OGG_OPUS are 1-254 3) MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.z--separate-channel-recognition
store_truezRecognition result will contain a `channel_tag` field to state which channel that result belongs to. If this is not true, only the first channel will be recognized.r-   r*   r(   z--modelzaudio that is not one of the specific audio models. For example, long-form audio. Ideally the audio is high-fidelity, recorded at a 16khz or greater sampling rate.z5short queries such as voice commands or voice search.a  Use this model for any kind of long form content such as media or spontaneous speech and conversations. Consider using this model in place of the video model, especially if the video model is not available in your target language. You can also use this in place of the default model.zUse this model for short utterances that are a few seconds in length. It is useful for trying to capture commands or other single shot directed speech use cases. Consider using this model instead of the command and search model.zZBest for audio that originated from a conversation between a medical provider and patient.zJBest for audio that originated from dictation notes by a medical provider.zVaudio that originated from a phone call (typically recorded at an 8khz sampling rate).zaudio that originated from a phone call (typically recorded at an 8khz sampling rate). This is a premium model and can produce better results but costs more than the standard rate.zImproved version of the "phone_call" model, best for audio that originated from a phone call, typically recorded at an 8kHz sampling rate.zDedicated version of the modern "telephony" model for short or even single-word utterances for audio that originated from a phone call, typically recorded at an 8kHz sampling rate.zaudio that originated from video or includes multiple speakers. Ideally the audio is recorded at a 16khz or greater sampling rate. This is a premium model that costs more than the standard rate.)r   command_and_searchlatest_longlatest_shortmedical_conversationmedical_dictation
phone_callphone_call_enhanced	telephonytelephony_shortvideo_enhancedad  Select the model best suited to your domain to get best results. If you do not explicitly specify a model, Speech-to-Text will auto-select a model based on your other specified parameters. Some models are premium and cost more than standard models (although you can reduce the price by opting into https://cloud.google.com/speech-to-text/docs/data-logging))choicesr(   z--max-alternatives   zMaximum number of recognition hypotheses to be returned. The server may return fewer than max_alternatives. Valid values are 0-30. A value of 0 or 1 will return a maximum of one.)r/   r   r(   z--hintsHINTa  A list of strings containing word and phrase "hints" so that the speech recognition is more likely to recognize them. This can be used to improve the accuracy for specific words and phrases, for example, if specific commands are typically spoken by the user. This can also be used to add additional words to the vocabulary of the recognizer. See https://cloud.google.com/speech/limits#content.)r/   metavarr   r(   z--include-word-time-offsetszIf True, the top result includes a list of words with the start and end time offsets (timestamps) for those words. If False, no word-level time offset information is returned.)r-   r   r(   z--filter-profanityzIf True, the server will attempt to filter out profanities, replacing all but the initial character in each filtered word with asterisks, e.g. ```f***```.z--enable-automatic-punctuationz2Adds punctuation to recognition result hypotheses.r-   r(   N)add_argument	add_groupr   DeprecationActionr   r   
choice_argAddToParserintr   ArgList)r#   parserapi_versionlanguage_argsaudio_channel_argss        r   AddRecognizeArgsToParser5RecognizeArgsToRequestMapper.AddRecognizeArgsToParser1   sO   
.  /
 $$4$$?M5  6 ((<> 5  
6 "7{!CD))55f=
$	  %  ))6 * 8##H	 $ I ##(0	 $ 1 , H;B0%+Q
&
M
&g8
tJy  CJ 	     ">  ? %=	  > &	  ' (A  Cr   c                    UR                  UR                  (       a  UR                  OUR                  U R                  R	                  UR
                  R                  SS5      R                  5       5      UR                  UR                  UR                  UR                  UR                  UR                  UR                  UR                  S9/S9	nUR                   (       a  UR                   Ul        UR$                  ba  UR$                  S;   a  UR$                  Ul        U$ UR$                  S:X  a  SUl        SUl        U$ UR$                  S	:X  a  S
Ul        SUl        U$ )z4Make RecognitionConfig message from given arguments._-)phrases)	languageCodeencodingsampleRateHertzaudioChannelCountmaxAlternativesenableWordTimeOffsets#enableSeparateRecognitionPerChannelprofanityFilterspeechContexts)	r   r3   r8   r4   r5   r6   r7   r:   r;   r9   r8   Tr<   video)r   language_codelanguager   GetEnumForChoicerT   replacelowersample_rateaudio_channel_countmax_alternativesinclude_word_time_offsetsseparate_channel_recognitionfilter_profanitySpeechContexthintsenable_automatic_punctuationenableAutomaticPunctuationmodeluseEnhanced)r#   argsr   configs       r   MakeRecognitionConfig2RecognizeArgsToRequestMapper.MakeRecognitionConfig   sB   '' ''#'==++<<MM!!#s+1135((22--"<<,0,M,M-- ..tzz.BC ( EF ((*.*K*Kf'zz	 
 

 zz M ::..#! M ::))!Mr   c           
      \   UR                  S[        R                  " 5       / SSS9  UR                  SS9nUR                  S[        S[
        R                  " SS	S
9SS9  UR                  S[        SS9  UR                  S[        SS9  UR                  SSSSS9  UR                  SSSS9  g)zAdd beta arguments.z--additional-language-codesLANGUAGE_CODEzThe BCP-47 language tags of other languages that the speech may be in.
Up to 3 can be provided.

If alternative languages are listed, recognition result will contain recognition
in the most likely language detected including the main language-code.)r/   r   r@   r(   F)r*   --diarization-speaker-countTzThe `--diarization-speaker-count` flag is deprecated. Use the `--min-diarization-speaker-count` and/or `--max-diarization-speaker-count` flag instead.r+   zBEstimated number of speakers in the conversation being recognized.)r/   r.   r-   r(   z--min-diarization-speaker-countzJMinimum estimated number of speakers in the conversation being recognized.)r/   r(   z--max-diarization-speaker-countzJMaximum estimated number of speakers in the conversation being recognized.z--enable-speaker-diarizationr1   zEnable speaker detection for each recognized word in the top alternative of the recognition result using an integer speaker_tag provided in the WordInfo.r2   z--include-word-confidencezMInclude a list of words and the confidence for those words in the top result.rA   N)rB   r   rH   rC   rG   r   rD   )r#   rI   speaker_argss      r   AddBetaRecognizeArgsToParser9RecognizeArgsToRequestMapper.AddBetaRecognizeArgsToParser   s   
%  "J  
K ##U#3L%(()DF
  
 )  
 )  
 &0	  1 #  r   c                 x   UR                   Ul        UR                  (       d3  UR                  (       d"  UR                  (       d  UR
                  (       a  UR                  S5      R                  SS9=o2l        UR                  (       a  UR                  Ul	        UR                  (       a  UR                  Ul
        UR
                  (       a[  UR                  (       d  UR                  (       a  [        R                  " SS5      eUR
                  Ul	        UR
                  Ul
        UR                  Ul        g)z+Updates config from command line arguments.diarizationConfigT)enableSpeakerDiarizationrt   zMdeprecated flag cannot be used with --max/min_diarization_speaker_count flagsN)additional_language_codesalternativeLanguageCodesenable_speaker_diarizationmin_diarization_speaker_countmax_diarization_speaker_countdiarization_speaker_countfield_by_namemessage_typery   minSpeakerCountmaxSpeakerCountr	   InvalidArgumentExceptioninclude_word_confidenceenableWordConfidence)r#   rn   ro   speaker_configs       r   !UpdateBetaArgsInRecognitionConfig>RecognizeArgsToRequestMapper.UpdateBetaArgsInRecognitionConfig+  s    &*&D&DF#''4+M+M**d.L.L282F2F
3+|T|JKn/		+	+)-)K)K&		+	+)-)K)K&		'	'....33+:; ; *.)G)G&)-)G)G&"&">">Fr   c                    UR                  SSS9nUR                  S[        S5      [        SS9  [	        U5      U l        U R
                  R                  R                  U5        [        U5      U l	        U R                  R                  R                  U5        [        U5      U l        U R                  R                  R                  U5        [        U5      U l        U R                  R                  R                  U5        UR                  S[        S	5      S
S9  UR                  S[        S5      SS9  UR                  S[        S5      SS9  g)zAdd alpha arguments.FzDescription of audio data to be recognized. Note that the Google Cloud Speech-to-text-api does not use this information, and only passes it through back into response.r0   z--naics-codez
naics-codezTThe industry vertical to which this speech recognition request most closely applies.)r-   r/   r(   z--recording-device-namezrecording-device-namez\The device used to make the recording.  Examples: `Nexus 5X`, `Polycom SoundStation IP 6000`rA   z--original-mime-typezoriginal-mime-typezJMime type of the original audio file. Examples: `audio/m4a`,  `audio/mp3`.z--audio-topiczaudio-topiczZDescription of the content, e.g. "Recordings of federal supreme court hearings from 2012".N)rC   rB   #MakeDeprecatedRecgonitionFlagActionrG   GetOriginalMediaTypeMapperr   rE   rF   GetInteractionTypeMapperr    GetMicrophoneDistanceTypeMapperr!   GetRecordingDeviceTypeMapperr"   )r#   rI   rJ   	meta_argss       r   AddAlphaRecognizeArgsToParser:RecognizeArgsToRequestMapper.AddAlphaRecognizeArgsToParserD  sZ     F ! GI
 2<@ 	  ! (B+'ND$$$//;;IF$<[$ID!!!,,88C,K-D)))44@@K;KHD''33I>!23JK)  *
 23GH  
 2=A%  &r   c                    UR                   c\  UR                  cO  UR                  cB  UR                  c5  UR                  c(  UR
                  c  UR                  c  UR                  Gbn  UR                  c$  UR                  S5      R                  5       Ul        U R                  R                  UR                   5      UR                  l        U R                  R                  UR                  5      UR                  l        UR                  UR                  l        U R"                  R                  UR                  5      UR                  l        U R&                  R                  UR                  5      UR                  l        UR
                  UR                  l        UR                  UR                  l        UR                  UR                  l        gg)z#Update RecognitionConfig with args.Nmetadata)interaction_typeoriginal_media_type
naics_codemicrophone_distancerecording_device_typerecording_device_nameoriginal_mime_typeaudio_topicr   r   r   r    r_   interactionTyper   originalMediaTypeindustryNaicsCodeOfAudior!   microphoneDistancer"   recordingDeviceTyperecordingDeviceNameoriginalMimeType
audioTopic)r#   rn   ro   s      r   "UpdateAlphaArgsInRecognitionConfig?RecognizeArgsToRequestMapper.UpdateAlphaArgsInRecognitionConfigj  ss   )  ,0K  ,""."".+t/?/?/K		  ..z:GGI

'
'
8
89N9N
O oo% 
*
*
;
;&&( oo' 26foo.

/
/
@
@&&( oo( 
"
"
3
3D4N4N
O oo),0,F,Ffoo))-)@)@foo&#'#3#3foo ! 0Lr   )r"   r   r    r!   r   N)__name__
__module____qualname____firstlineno____doc__r$   rM   rp   rv   r   r   r   __static_attributes__ r   r   r   r   '   s.    <$aCF#J0d?2$&L4r   r   c                 P    [         R                  " SU -   SR                  U 5      S9$ )Nz--zThe `{}` flag is deprecated and will be removed. The Google Cloud Speech-to-text api does not use it, and only passes it through back into response.r+   )r   rD   format)	flag_names    r   r   r     s.    		"	"
Y..4fY.?	
A Ar   c                     [         R                  " [        R                  U 5      n[        R
                  " SUR                  R                  [        S5      SSSSSSS	.S
S S9$ )Nz--recording-device-typezrecording-device-type)
smartphonez$Speech was recorded on a smartphone.)pcz8Speech was recorded using a personal computer or tablet.)z
phone-linez&Speech was recorded over a phone line.)vehiclez!Speech was recorded in a vehicle.)outdoorzSpeech was recorded outdoors.)indoorzSpeech was recorded indoors.)
SMARTPHONEPC
PHONE_LINEVEHICLEOTHER_OUTDOOR_DEVICEOTHER_INDOOR_DEVICEzAThe device type through which the original audio was recorded on.c                 .    U R                  S5      (       + $ NUNSPECIFIEDendswithxs    r   <lambda>.GetRecordingDeviceTypeMapper.<locals>.<lambda>      1::m#<<r   r-   custom_mappingsr   include_filter)	r   r   r
   r   r   r   RecognitionMetadata"RecordingDeviceTypeValueValuesEnumr   r   s     r   r   r     sk    ##DOOW=(		#	#""EE01HINM GE"N!K	<!
> >r   c           	          [         R                  " [        R                  U 5      n[        R
                  " SUR                  R                  [        S5      SSSS.SS S	9$ )
Nz--microphone-distancezmicrophone-distance)	nearfieldzThe audio was captured from a microphone close to the speaker, generally within
 1 meter. Examples include a phone, dictaphone, or handheld microphone.)midfieldz1The speaker is within 3 meters of the microphone.)farfieldz;The speaker is more than 3 meters away from the microphone.)	NEARFIELDMIDFIELDFARFIELDzLThe distance at which the audio device is placed to record the conversation.c                 .    U R                  S5      (       + $ r   r   r   s    r   r   1GetMicrophoneDistanceTypeMapper.<locals>.<lambda>  r   r   r   )	r   r   r
   r   r   r   r   !MicrophoneDistanceValueValuesEnumr   r   s     r   r   r     se    ##DOOW=(		#	#""DD01FGL PN	<!
> >r   c                     [         R                  " [        R                  U 5      n[        R
                  " SUR                  R                  [        S5      SSSSSSS	S
S.SS S9$ )Nz--interaction-typezinteraction-type)	dictationz`Transcribe speech to text to create a written document, such as a text-message, email or report.)
discussionz0Multiple people in a conversation or discussion.)presentationzLOne or more persons lecturing or presenting to others, mostly uninterrupted.)z
phone-callzwA phone-call or video-conference in which two or more people, who are not in the same room, are actively participating.)zprofessionally-producedz5Professionally produced audio (eg. TV Show, Podcast).)zvoice-commandz<Transcribe voice commands, such as for controlling a device.)zvoice-searchz2Transcribe spoken questions and queries into text.)	voicemailz<A recorded message intended for another person to listen to.)	DICTATION
DISCUSSIONPRESENTATION
PHONE_CALLPROFESSIONALLY_PRODUCEDVOICE_COMMANDVOICE_SEARCH	VOICEMAILz5Determining the interaction type in the conversation.c                 .    U R                  S5      (       + $ r   r   r   s    r   r   *GetInteractionTypeMapper.<locals>.<lambda>  r   r   r   )	r   r   r
   r   r   r   r   InteractionTypeValueValuesEnumr   r   s     r   r   r     s    ##DOOW=(		#	#""AA01CD4M<K
HOEO16 G<A 
>  >r   c           	          [         R                  " [        R                  U 5      n[        R
                  " SUR                  R                  [        S5      SSS.SS S9$ )	Nz--original-media-typezoriginal-media-type)r'   z&The speech data is an audio recording.)r\   z/The speech data originally recorded on a video.)AUDIOVIDEOz2The media type of the original audio conversation.c                 .    U R                  S5      (       + $ r   r   r   s    r   r   ,GetOriginalMediaTypeMapper.<locals>.<lambda>  r   r   r   )	r   r   r
   r   r   r   r    OriginalMediaTypeValueValuesEnumr   r   s     r   r   r     sZ    ##DOOW=(		#	#""CC01FGFO D<	
> 	>r   N)r   
__future__r   r   r   googlecloudsdk.api_lib.utilr   googlecloudsdk.callioper   r   r	   $googlecloudsdk.command_lib.ml.speechr
   $googlecloudsdk.command_lib.util.apisr   r   r   r   r   r   r   r   r   r   r   <module>r      sR    ! &  ' , + / . 5 :Z4 Z4z
A>*>*">J>r   