
    B                        S r SSKJr  SSKJr  SSKJr  SSKrSSKJr  SSK	J
r
  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  \R>                  " \R@                  RB                  \R@                  RD                  \R@                  RF                  5      \RH                   " S S\RJ                  5      5       5       r&g)zModel Garden deploy command.    )absolute_import)division)unicode_literalsN)
exceptions)
operations)client)apis)arg_parsers)base)	constants)endpoint_util)flags)model_garden_utils)region_util)
validation)_IsDefaultUniverse)
propertiesc                   .    \ rS rSrSr\S 5       rS rSrg)Deploy*   aC  Deploy a model in Model Garden to a Vertex AI endpoint.

## EXAMPLES

To deploy a Model Garden model `google/gemma2/gemma2-9b` under project
`example` in region
`us-central1`, run:

  $ gcloud ai model-garden models deploy
  --model=google/gemma2@gemma-2-9b
  --project=example
  --region=us-central1

To deploy a Hugging Face model `meta-llama/Meta-Llama-3-8B` under project
`example` in region `us-central1`, run:

  $ gcloud ai model-garden models deploy
  --model=meta-llama/Meta-Llama-3-8B
  --hugging-face-access-token={hf_token}
  --project=example
  --region=us-central1
c           
         [         R                  " SSSS9R                  U 5        [         R                  " SSSS9R                  U 5        [         R                  " SSS	S9R                  U 5        [        R                  " U S
[
        R                  S9  [         R                  " SSSS9R                  U 5        [         R                  " SSSS9R                  U 5        [         R                  " SS[        SS9R                  U 5        [         R                  " SSSSSS9R                  U 5        [         R                  " SSSSSS9R                  U 5        [         R                  " S[        R                  " [        [        [        R                  " 5       S.S/S9SS9R                  U 5        [         R                  " S SSSS!S"9R                  U 5        [         R                  " S#SSSS$S"9R                  U 5        [         R                  " S%SSSS&S"9R                  U 5        [         R                  " S'S(S)9R                  U 5        U R                  S*S+[        R                  " 5       [        R                  S,S-9  U R                  S.[        R                  " 5       S/[        R                  S0S19  U R                  S2S3[        R                  " 5       S4S59  U R                  S6S7[        R                  " [        R                  " S8S95      S:9[        R                  S;S-9  U R                  S<S7[        R                  " [        R                  " S8S95      S:9[        R                  S=S-9  U R                  S>S?S)9  U R                  S@SAS)9  U R                  SB[        SCS9  U R                  SD[        SES9  U R                  SF[        R                  " 5       SGSHSI9  U R                  SJ[        SKS9  U R                  SL[        SMS9  U R                  SN[        R                  " 5       SOSPSI9  U R                  SQ[        SRS9  U R                  SS[        STS9  g )UN--modelTa  The model to be deployed. If it is a Model Garden model, it should be in the format of `{publisher_name}/{model_name}@{model_version_name}, e.g. `google/gemma2@gemma-2-2b`. If it is a Hugging Face model, it should be in the convention of Hugging Face models, e.g. `meta-llama/Meta-Llama-3-8B`. If it is a Custom Weights model, it should be in the format of `gs://{gcs_bucket_uri}`, e.g. `gs://-model-garden-public-us/llama3.1/Meta-Llama-3.1-8B-Instruct`.)requiredhelpz--hugging-face-access-tokenFzThe access token from Hugging Face needed to read the model artifacts of gated models. It is only needed when the Hugging Face model to deploy is gated.z--endpoint-display-namez5Display name of the endpoint with the deployed model.zto deploy the model)prompt_funcz--machine-typezThe machine type to deploy the model to. It should be a supported machine type from the deployment configurations of the model. Use `gcloud ai model-garden models list-deployment-config` to check the supported machine types.)r   r   z--accelerator-typezThe accelerator type to serve the model. It should be a supported accelerator type from the verified deployment configurations of the model. Use `gcloud ai model-garden models list-deployment-config` to check the supported accelerator types.z--accelerator-countzSThe accelerator count to serve the model. Accelerator count should be non-negative.)r   typer   z--accept-eulazNWhen set, the user accepts the End User License Agreement (EULA) of the model.
store_true)r   actiondefaultr   z--asynchronouszaIf set to true, the command will terminate immediately and not keep polling the operation status.z--reservation-affinity)reservation-affinity-typekeyvaluesr    )specrequired_keyszA ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity.)r   r   z--spotz5If true, schedule the deployment workload on Spot VM.)r   r   r   r   z--use-dedicated-endpointzIf true, the endpoint will be exposed through a dedicated DNS. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability.z--enable-fast-tryoutzIf True, model will be deployed using faster deployment path. Useful for quick experiments. Not for production workloads. Only available for most popular models with certain machine types.z--container-image-uriz{      URI of the Model serving container file in the Container Registry
      (e.g. gcr.io/myproject/server:latest).
      )r   z--container-env-varsz	KEY=VALUEz8List of key-value pairs to set as environment variables.)metavarr   r   r   z--container-commandCOMMANDzm  Entrypoint for the container image. If not specified, the container
  image's default entrypoint is run.
  )r   r%   r   r   z--container-argsARGz  Comma-separated arguments passed to the command run by the container
  image. If not specified and no `--command` is provided, the container
  image's default command is used.
  )r%   r   r   z--container-portsPORT   i  )element_typezd  Container ports to receive http requests at. Must be a number between 1 and
  65535, inclusive.
  z--container-grpc-portszd  Container ports to receive grpc requests at. Must be a number between 1 and
  65535, inclusive.
  z--container-predict-routez>HTTP path to send prediction requests to inside the container.z--container-health-routez8HTTP path to send health checks to inside the container.z&--container-deployment-timeout-secondszDeployment timeout in seconds.z!--container-shared-memory-size-mbz`  The amount of the VM memory to reserve as the shared memory for the model in
  megabytes.
    z--container-startup-probe-execSTARTUP_PROBE_EXECz  Exec specifies the action to take. Used by startup probe. An example of this
  argument would be ["cat", "/tmp/healthy"].
    )r   r%   r   z(--container-startup-probe-period-secondszh  How often (in seconds) to perform the startup probe. Default to 10 seconds.
  Minimum value is 1.
    z)--container-startup-probe-timeout-secondszm  Number of seconds after which the startup probe times out. Defaults to 1 second.
  Minimum value is 1.
    z--container-health-probe-execHEALTH_PROBE_EXECz  Exec specifies the action to take. Used by health probe. An example of this
  argument would be ["cat", "/tmp/healthy"].
    z'--container-health-probe-period-secondszg  How often (in seconds) to perform the health probe. Default to 10 seconds.
  Minimum value is 1.
    z(--container-health-probe-timeout-secondszl  Number of seconds after which the health probe times out. Defaults to 1 second.
  Minimum value is 1.
    )r   ArgumentAddToParserr   AddRegionResourceArgr   PromptForOpRegionintr
   ArgDictstrArgListadd_argumentUpdateAction
BoundedInt)parsers    ,lib/surface/ai/model_garden/models/deploy.pyArgsDeploy.ArgsF   s   MML	 k&MM%:	 k&MM!D k&	%;3P3P 	MM,
 	 k&MMQ
 	 k&MM'  k&MM 	 k&MM2 	 k&MM   -0%--/
 77
#  k&MMD k&MM"P
 k&MMM
 k&MM
 k&
  "''G     "''  	   "	  	   k.D.DQ.NO''  	    k.D.DQ.NO''  	 #M   "G   0-  
 +   (  "$	   2   3   '  "#	   1   2      c           
         UR                   R                  S5      nU(       d  [        R                  " U5        [        R                  " UR
                  5        UR                  R                  R                  5       nUR                  5       S   Ul        [        R                  nSUR                   ;  n[        5       (       a  SOS n[        R                  " XFS9   U(       Ga  [        UR                   5      [        UR"                  5      s=:X  a  [        UR$                  5      :X  d  O  [&        R(                  " SS5      eS nUR                   (       a  [*        R,                  " UUR                   UR"                  UR$                  S9  [.        R0                  " [        R2                  [        R4                  U   5      nUR6                  R9                  UR                   UR6                  R8                  R;                  UR"                  5      UR$                  S	9n[        R                  " XAR                  S9   S
R=                  S[?        [@        R@                  " 5       5      RC                  S5      S   S/5      n	[D        RF                  " 5       n
[H        RJ                  " US9nUR
                  (       a  UR
                  OU	n[*        RL                  " UUUUR                   UU
5        S S S 5        GOM[D        RF                  " 5       n
U(       a^  UR                   RO                  5       RC                  S5      u  p U
RQ                  SU SU 3SS9nS
R=                  XSS/5      n	U SU 3nOrUR                   RO                  5       RC                  S5      u  nn U
RQ                  SU SU 35      nS
R=                  UURC                  S5      S   S/5      n	SU SU 3n[*        RX                  " X5      n[*        R,                  " UURZ                  R\                  R^                  [?        URZ                  R\                  R`                  5      URZ                  R\                  Rb                  S9  [d        Rf                  Rh                  Rj                  Rm                  S 5        [        R                  " XAR                  S9   [D        RF                  " 5       n
[H        RJ                  " US9nUR
                  (       a  UR
                  OU	n[*        RL                  " UURZ                  R\                  UUUU
5        S S S 5        S S S 5        g ! , (       d  f       N= f! [R        RT                   a%    [&        RV                  " SUR                    S35      ef = f! [R        RT                   a%    [&        RV                  " SUR                    S35      ef = f! , (       d  f       N= f! , (       d  f       g = f)Nzgs://locationsId@zus-central1)regionz:--machine-type, --accelerator-type and --accelerator-countz Arguments for MachineType, AcceleratorType and AcceleratorCount must either all be provided or all be empty for custom weights model deployment.)machine_typeaccelerator_typeaccelerator_count)machineTypeacceleratorTypeacceleratorCount-zcustom-weights.r   zmg-cli-deploy)version/zpublishers/z/models/T)
model_nameis_hugging_face_modelr   zF is not a supported Hugging Face model for deployment in Model Garden.hfzF is not a supported Model Garden model for deployment in Model Garden.r)   )7model
startswithr   ValidateModelGardenModelArgsValidateDisplayNameendpoint_display_nameCONCEPTSr@   ParseAsDictr   BETA_VERSIONr   r   AiplatformEndpointOverridesboolrA   rB   rC   c_exceptionsInvalidArgumentExceptionr   CheckAcceleratorQuotar	   GetClientInstanceAI_PLATFORM_API_NAMEAI_PLATFORM_API_VERSIONMESSAGES_MODULE'GoogleCloudAiplatformV1beta1MachineSpecAcceleratorTypeValueValuesEnumjoinr3   timesplit	client_mgModelGardenClientr   OperationsClientr   lowerGetPublisherModelapitools_exceptionsHttpNotFoundErrorUnknownArgumentExceptionGetDeployConfigdedicatedResourcesmachineSpecrD   rE   rF   r   VALUESapi_endpoint_overrides
aiplatformSet)selfargsis_custom_weights_model
region_refrI   is_hf_modelr@   machine_specr   default_endpoint_name	mg_clientoperation_clientendpoint_namepublisher_namerK   publisher_modelapi_model_argmodel_and_version_namedeploy_configs                      r9   Run
Deploy.RunJ  s\   "jj33G<"--d3""4#=#=>%%++-J##%m4DK$$GTZZ'K022]F		2	27	J	  ""#D))*,D**+, 55J#  

2
2,,#44 $ 6 6	 )),,//8&
  //WW++$44\\{{''  $55 X , 66KK
 #&(($))+$$S)!,, #

  113)'88I
 ++ (((  
#
#jj!
 
8 //1	'+zz'7'7'9'?'?'D
$.
'99((8M&* : O #&((4A#
 ,,Aj\:- 48::3C3C3E3K3KC3P
0.0	'99n-X6L5MNO #&(($**3/2, #
 N+84J3KL  +::

 	00&99EEQQ 00<<LL ,>>JJ[[	
 	00;;??E 66KK
  113)'88I
 ++ (((  
#
#..::
C 
K	JN
 
L %66 77::, 9 9 & %66 77::, / / B
 
C 
K	Jsr   E W%;B0U+AW%=UAW%V3DW%A8W:W%
U	W%9VW%9WW%
W"	W%%
W3 N)	__name__
__module____qualname____firstlineno____doc__staticmethodr:   r   __static_attributes__r   r<   r9   r   r   *   s$    
. A AF^r<   r   )'r   
__future__r   r   r   rc   apitools.base.pyr   rj   googlecloudsdk.api_lib.air   &googlecloudsdk.api_lib.ai.model_gardenr   re   googlecloudsdk.api_lib.utilr	   googlecloudsdk.callioper
   r   rY   googlecloudsdk.command_lib.air   r   r   r   r   r   )googlecloudsdk.command_lib.ai.region_utilr   googlecloudsdk.corer   ReleaseTracksReleaseTrackALPHABETAGAUniverseCompatibleCommandr   r   r<   r9   <module>r      s    # &  '  > 0 F , / ( > 3 7 / < 5 4 + T..33T5F5F5I5I zT\\ z zr<   