
                         d   S r SSKJr  SSKJr  SSKJr  Sr " S S\R                  5      r
 " S S	\R                  5      r " S
 S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S  S!\R                  5      r " S" S#\R                  5      r " S$ S%\R                  5      r " S& S'\R                  5      r " S( S)\R                  5      r " S* S+\R                  5      r " S, S-\R                  5      r " S. S/\R                  5      r " S0 S1\R                  5      r " S2 S3\R                  5      r  " S4 S5\R                  5      r! " S6 S7\R                  5      r" " S8 S9\R                  5      r# " S: S;\R                  5      r$ " S< S=\R                  5      r% " S> S?\R                  5      r& " S@ SA\R                  5      r' " SB SC\R                  5      r( " SD SE\R                  5      r) " SF SG\R                  5      r* " SH SI\R                  5      r+\RX                  " \(SJSK5        \RZ                  " \(R\                  SLSM5        \RZ                  " \(R\                  SNSO5        gP)QzNGenerated message classes for gkerecommender version v1.

GKE Recommender API
    )absolute_import)messages)encodinggkerecommenderc                       \ rS rSrSr\R                  " S\R                  R                  S9r	\R                  " S5      r
Srg)Amount   aW  Represents an amount of money in a specific currency.

Fields:
  nanos: Output only. Number of nano (10^-9) units of the amount. The value
    must be between -999,999,999 and +999,999,999 inclusive. If `units` is
    positive, `nanos` must be positive or zero. If `units` is zero, `nanos`
    can be positive, zero, or negative. If `units` is negative, `nanos` must
    be negative or zero. For example $-1.75 is represented as `units`=-1 and
    `nanos`=-750,000,000.
  units: Output only. The whole units of the amount. For example if
    `currencyCode` is `"USD"`, then 1 unit is one US dollar.
   variant    N)__name__
__module____qualname____firstlineno____doc__	_messagesIntegerFieldVariantINT32nanosunits__static_attributes__r       Ylib/googlecloudsdk/generated_clients/apis/gkerecommender/v1/gkerecommender_v1_messages.pyr   r      s7     
 
 I,=,=,C,C
D%

 
 
#%r   r   c                       \ rS rSrSr\R                  " SS5      r\R                  " SS5      r\R                  " S\R                  R                  S9r\R                  " S5      rS	rg
)Cost"   ab  Cost for running a model deployment on a given instance type. Currently,
only USD currency code is supported.

Fields:
  costPerMillionInputTokens: Optional. The cost per million input tokens.
    $/input token = ($/output token) / output-to-input-cost-ratio.
  costPerMillionOutputTokens: Optional. The cost per million output tokens,
    calculated as: $/output token = GPU $/s / (1/output-to-input-cost-ratio
    * input tokens/s + output tokens/s)
  outputInputCostRatio: Optional. The output-to-input cost ratio. This
    determines how the total GPU cost is split between input and output
    tokens. If not provided, `4.0` is used, assuming a 4:1 output:input cost
    ratio.
  pricingModel: Optional. The pricing model used to calculate the cost. Can
    be one of: `3-years-cud`, `1-year-cud`, `on-demand`, `spot`. If not
    provided, `spot` will be used.
r   r
   r      r      r   N)r   r   r   r   r   r   MessageFieldcostPerMillionInputTokenscostPerMillionOutputTokens
FloatFieldr   FLOAToutputInputCostRatioStringFieldpricingModelr   r   r   r   r   r   "   s^    $ (44XqA(55hB"--a9J9J9P9PQ&&q),r   r   c                       \ rS rSrSr\R                  " S5      r\R                  " SS5      r	\R                  " S5      r
\R                  " SS5      r\R                  " S	5      rS
rg)FetchBenchmarkingDataRequest;   a!  Request message for GkeInferenceQuickstart.FetchBenchmarkingData.

Fields:
  instanceType: Optional. The instance type to filter benchmarking data.
    Instance types are in the format `a2-highgpu-1g`. If not provided, all
    instance types for the given profile's `model_server_info` will be
    returned. Use GkeInferenceQuickstart.FetchProfiles to find available
    instance types.
  modelServerInfo: Required. The model server configuration to get
    benchmarking data for. Use GkeInferenceQuickstart.FetchProfiles to find
    valid configurations.
  pricingModel: Optional. The pricing model to use for the benchmarking
    data. Defaults to `spot`.
  servingStack: Optional. The serving stack to filter benchmarking data by,
    e.g. `llm-d/0.3`. If not provided, benchmarking data for all serving
    stacks that support the given model and model server will be returned.
  useCase: Optional. The use case to filter benchmarking data by. If not
    provided, all benchmarking data for the given profile's
    `model_server_info` will be returned.
r
   ModelServerInfor   r    ServingStackr!      r   N)r   r   r   r   r   r   r(   instanceTyper"   modelServerInfor)   servingStackuseCaser   r   r   r   r+   r+   ;   s^    * &&q),**+<a@/&&q),'':,!!!$'r   r+   c                   <    \ rS rSrSr\R                  " SSSS9rSrg)	FetchBenchmarkingDataResponseX   zResponse message for GkeInferenceQuickstart.FetchBenchmarkingData.

Fields:
  profile: Output only. List of profiles containing their respective
    benchmarking data.
Profiler
   Trepeatedr   N)	r   r   r   r   r   r   r"   profiler   r   r   r   r5   r5   X   s     ""9a$?'r   r5   c                   ^    \ rS rSrSr\R                  " SSS9r\R                  " S5      rSr	g)	 FetchModelServerVersionsResponsec   a  Response message for GkeInferenceQuickstart.FetchModelServerVersions.

Fields:
  modelServerVersions: Output only. A list of available model server
    versions.
  nextPageToken: Output only. A token which may be sent as page_token in a
    subsequent `FetchModelServerVersionsResponse` call to retrieve the next
    page of results. If this field is omitted or empty, then there are no
    more results to return.
r
   Tr8   r   r   N)
r   r   r   r   r   r   r(   modelServerVersionsnextPageTokenr   r   r   r   r<   r<   c   s,    	 "--a$?''*-r   r<   c                   ^    \ rS rSrSr\R                  " SSS9r\R                  " S5      rSr	g)	FetchModelServersResponses   a  Response message for GkeInferenceQuickstart.FetchModelServers.

Fields:
  modelServers: Output only. List of available model servers. Open-source
    model servers use simplified, lowercase names (e.g., `vllm`).
  nextPageToken: Output only. A token which may be sent as page_token in a
    subsequent `FetchModelServersResponse` call to retrieve the next page of
    results. If this field is omitted or empty, then there are no more
    results to return.
r
   Tr8   r   r   N)
r   r   r   r   r   r   r(   modelServersr?   r   r   r   r   rA   rA   s   s+    	 &&q48,''*-r   rA   c                   ^    \ rS rSrSr\R                  " SSS9r\R                  " S5      rSr	g)	FetchModelsResponse   a  Response message for GkeInferenceQuickstart.FetchModels.

Fields:
  models: Output only. List of available models. Open-source models follow
    the Huggingface Hub `owner/model_name` format.
  nextPageToken: Output only. A token which may be sent as page_token in a
    subsequent `FetchModelsResponse` call to retrieve the next page of
    results. If this field is omitted or empty, then there are no more
    results to return.
r
   Tr8   r   r   N)
r   r   r   r   r   r   r(   modelsr?   r   r   r   r   rE   rE      s+    	   T2&''*-r   rE   c                   d   \ rS rSrSr\R                  " S5      r\R                  " S5      r\R                  " S5      r	\R                  " S\R                  R                  S9r\R                  " S5      r\R                  " S	S
5      r\R                  " SS5      r\R                  " SS5      rSrg)FetchProfilesRequest   a  Request message for GkeInferenceQuickstart.FetchProfiles.

Fields:
  model: Optional. The model to filter profiles by. Open-source models
    follow the Huggingface Hub `owner/model_name` format. If not provided,
    all models are returned. Use GkeInferenceQuickstart.FetchModels to find
    available models.
  modelServer: Optional. The model server to filter profiles by. If not
    provided, all model servers are returned. Use
    GkeInferenceQuickstart.FetchModelServers to find available model servers
    for a given model.
  modelServerVersion: Optional. The model server version to filter profiles
    by. If not provided, all model server versions are returned. Use
    GkeInferenceQuickstart.FetchModelServerVersions to find available
    versions for a given model and server.
  pageSize: Optional. The target number of results to return in a single
    response. If not specified, a default value will be chosen by the
    service. Note that the response may include a partial list and a caller
    should only rely on the response's next_page_token to determine if there
    are more instances left to be queried.
  pageToken: Optional. The value of next_page_token received from a previous
    `FetchProfilesRequest` call. Provide this to retrieve the subsequent
    page in a multi-page list of results. When paginating, all other
    parameters provided to `FetchProfilesRequest` must match the call that
    provided the page token.
  performanceRequirements: Optional. The performance requirements to filter
    profiles. Profiles that do not meet these requirements are filtered out.
    If not provided, all profiles are returned.
  servingStack: Optional. The serving stack to filter profiles by. If not
    provided, profiles for all serving stacks that support the given model
    and model server will be returned.
  workloadSpec: Optional. The workload specification to filter profiles by.
    If not provided, all use cases are returned.
r
   r   r    r!   r   r/   PerformanceRequirements   r.      WorkloadSpec   r   N)r   r   r   r   r   r   r(   modelmodelServermodelServerVersionr   r   r   pageSize	pageTokenr"   performanceRequirementsr2   workloadSpecr   r   r   r   rI   rI      s    !F 


"%%%a(+ ,,Q/##Ay/@/@/F/FG(##A&)%223LaP'':,'':,r   rI   c                       \ rS rSrSr\R                  " S5      r\R                  " S5      r\R                  " SS5      r
\R                  " SSS	S
9rSrg)FetchProfilesResponse   ay  Response message for GkeInferenceQuickstart.FetchProfiles.

Fields:
  comments: Output only. Additional comments related to the response.
  nextPageToken: Output only. A token which may be sent as page_token in a
    subsequent `FetchProfilesResponse` call to retrieve the next page of
    results. If this field is omitted or empty, then there are no more
    results to return.
  performanceRange: Output only. The combined range of performance values
    observed across all profiles in this response.
  profile: Output only. List of profiles that match the given model server
    info and performance requirements (if provided).
r
   r   PerformanceRanger    r7   r!   Tr8   r   N)r   r   r   r   r   r   r(   commentsr?   r"   performanceRanger:   r   r   r   r   rX   rX      sQ     ""1%(''*-++,>B""9a$?'r   rX   c                   ^    \ rS rSrSr\R                  " S5      r\R                  " SSS9rSr	g)	!FetchServingStackVersionsResponse   a  Response message for GkeInferenceQuickstart.FetchServingStackVersions.

Fields:
  nextPageToken: Output only. A token which may be sent as page_token in a
    subsequent `FetchServingStackVersionsResponse` call to retrieve the next
    page of results. If this field is omitted or empty, then there are no
    more results to return.
  servingStackVersions: Output only. A list of available serving stack
    versions.
r
   r   Tr8   r   N)
r   r   r   r   r   r   r(   r?   servingStackVersionsr   r   r   r   r^   r^      s,    	 ''*-"..q4@r   r^   c                   `    \ rS rSrSr\R                  " S5      r\R                  " SSSS9r	Sr
g	)
FetchServingStacksResponse   a  Response message for GkeInferenceQuickstart.FetchServingStacks.

Fields:
  nextPageToken: Output only. A token which may be sent as page_token in a
    subsequent `FetchServingStacksResponse` call to retrieve the next page
    of results. If this field is omitted or empty, then there are no more
    results to return.
  servingStacks: Output only. List of available serving stacks.
r
   r.   r   Tr8   r   N)r   r   r   r   r   r   r(   r?   r"   servingStacksr   r   r   r   rb   rb      s-     ''*-((TJ-r   rb   c                       \ rS rSrSrSrg)FetchUseCasesRequest   z9Request message for GkeInferenceQuickstart.FetchUseCases.r   N)r   r   r   r   r   r   r   r   r   rf   rf      s    Br   rf   c                   <    \ rS rSrSr\R                  " SSSS9rSrg)	FetchUseCasesResponse   zResponse message for GkeInferenceQuickstart.FetchUseCases.

Fields:
  workloadSpecs: Output only. The workload specifications supported by the
    service.
rN   r
   Tr8   r   N)	r   r   r   r   r   r   r"   workloadSpecsr   r   r   r   ri   ri      s     ((TJ-r   ri   c                      \ rS rSrSr\R                  " S5      r\R                  " S5      r\R                  " SS5      r
\R                  " SS5      r\R                  " S	S
5      r\R                  " SS5      r\R                  " S5      rSrg) GenerateOptimizedManifestRequesti  a  Request message for GkeInferenceQuickstart.GenerateOptimizedManifest.

Fields:
  acceleratorType: Required. The accelerator type. Use
    GkeInferenceQuickstart.FetchProfiles to find valid accelerators for a
    given `model_server_info`.
  kubernetesNamespace: Optional. The kubernetes namespace to deploy the
    manifests in.
  modelServerInfo: Required. The model server configuration to generate the
    manifest for. Use GkeInferenceQuickstart.FetchProfiles to find valid
    configurations.
  performanceRequirements: Optional. The performance requirements to use for
    generating Horizontal Pod Autoscaler (HPA) resources. If provided, the
    manifest includes HPA resources to adjust the model server replica count
    to maintain the specified targets (e.g., NTPOT, TTFT) at a P50 latency.
    Cost targets are not currently supported for HPA generation. If the
    specified targets are not achievable, the HPA manifest will not be
    generated.
  servingStack: Optional. The serving stack to use for generating the
    manifest. If not provided, the latest serving stack that supports the
    given model and model server will be used.
  storageConfig: Optional. The storage configuration for the model. If not
    provided, the model is loaded from Huggingface.
  useCase: Optional. The use case of the workload. Can be one of: `advanced
    costumer support`, `code completion, `text summarization`, `chatbot`,
    `text generation`, `deep research`. If not provided, `chatbot` is used.
r
   r   r-   r    rK   r!   r.   r/   StorageConfigrL   rM   r   N)r   r   r   r   r   r   r(   acceleratorTypekubernetesNamespacer"   r1   rU   r2   storageConfigr3   r   r   r   r   rm   rm     s    8 ))!,/!--a0**+<a@/%223LaP'':,((!<-!!!$'r   rm   c                       \ rS rSrSr\R                  " SSS9r\R                  " SSSS9r	\R                  " S5      r
S	rg
)!GenerateOptimizedManifestResponsei*  a<  Response message for GkeInferenceQuickstart.GenerateOptimizedManifest.

Fields:
  comments: Output only. Comments related to deploying the generated
    manifests.
  kubernetesManifests: Output only. A list of generated Kubernetes
    manifests.
  manifestVersion: Output only. Additional information about the versioned
    dependencies used to generate the manifests. See [Run best practice
    inference with GKE Inference Quickstart
    recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-
    learning/inference/inference-quickstart) for details.
r
   Tr8   KubernetesManifestr   r    r   N)r   r   r   r   r   r   r(   r[   r"   kubernetesManifestsmanifestVersionr   r   r   r   rs   rs   *  sB     ""1t4(!../CQQUV))!,/r   rs   c                       \ rS rSrSr\R                  " S5      r\R                  " S5      r\R                  " S\R                  R                  S9r\R                  " S5      rSrg	)
-GkerecommenderModelServerVersionsFetchRequesti>  a  A GkerecommenderModelServerVersionsFetchRequest object.

Fields:
  model: Required. The model for which to list model server versions. Open-
    source models follow the Huggingface Hub `owner/model_name` format. Use
    GkeInferenceQuickstart.FetchModels to find available models.
  modelServer: Required. The model server for which to list versions. Open-
    source model servers use simplified, lowercase names (e.g., `vllm`). Use
    GkeInferenceQuickstart.FetchModelServers to find available model
    servers.
  pageSize: Optional. The target number of results to return in a single
    response. If not specified, a default value will be chosen by the
    service. Note that the response may include a partial list and a caller
    should only rely on the response's next_page_token to determine if there
    are more instances left to be queried.
  pageToken: Optional. The value of next_page_token received from a previous
    `FetchModelServerVersionsRequest` call. Provide this to retrieve the
    subsequent page in a multi-page list of results. When paginating, all
    other parameters provided to `FetchModelServerVersionsRequest` must
    match the call that provided the page token.
r
   r   r    r   r!   r   Nr   r   r   r   r   r   r(   rP   rQ   r   r   r   rS   rT   r   r   r   r   rx   rx   >  W    , 


"%%%a(+##Ay/@/@/F/FG(##A&)r   rx   c                       \ rS rSrSr\R                  " S5      r\R                  " S\R                  R                  S9r\R                  " S5      rSrg)	&GkerecommenderModelServersFetchRequesti[  a  A GkerecommenderModelServersFetchRequest object.

Fields:
  model: Required. The model for which to list model servers. Open-source
    models follow the Huggingface Hub `owner/model_name` format. Use
    GkeInferenceQuickstart.FetchModels to find available models.
  pageSize: Optional. The target number of results to return in a single
    response. If not specified, a default value will be chosen by the
    service. Note that the response may include a partial list and a caller
    should only rely on the response's next_page_token to determine if there
    are more instances left to be queried.
  pageToken: Optional. The value of next_page_token received from a previous
    `FetchModelServersRequest` call. Provide this to retrieve the subsequent
    page in a multi-page list of results. When paginating, all other
    parameters provided to `FetchModelServersRequest` must match the call
    that provided the page token.
r
   r   r   r    r   N)r   r   r   r   r   r   r(   rP   r   r   r   rS   rT   r   r   r   r   r|   r|   [  sG    $ 


"%##Ay/@/@/F/FG(##A&)r   r|   c                       \ rS rSrSr\R                  " S\R                  R                  S9r	\R                  " S5      rSrg) GkerecommenderModelsFetchRequestis  a  A GkerecommenderModelsFetchRequest object.

Fields:
  pageSize: Optional. The target number of results to return in a single
    response. If not specified, a default value will be chosen by the
    service. Note that the response may include a partial list and a caller
    should only rely on the response's next_page_token to determine if there
    are more instances left to be queried.
  pageToken: Optional. The value of next_page_token received from a previous
    `FetchModelsRequest` call. Provide this to retrieve the subsequent page
    in a multi-page list of results. When paginating, all other parameters
    provided to `FetchModelsRequest` must match the call that provided the
    page token.
r
   r   r   r   N)r   r   r   r   r   r   r   r   r   rS   r(   rT   r   r   r   r   r~   r~   s  s7     ##Ay/@/@/F/FG(##A&)r   r~   c                       \ rS rSrSr\R                  " S5      r\R                  " S5      r\R                  " S\R                  R                  S9r\R                  " S5      r\R                  " S5      rS	rg
).GkerecommenderServingStackVersionsFetchRequesti  a  A GkerecommenderServingStackVersionsFetchRequest object.

Fields:
  model: Optional. The model to filter serving stack versions by.
  modelServer: Optional. The model server to filter serving stack versions
    by.
  pageSize: Optional. The target number of results to return in a single
    response. If not specified, a default value will be chosen by the
    service. Note that the response may include a partial list and a caller
    should only rely on the response's next_page_token to determine if there
    are more instances left to be queried.
  pageToken: Optional. The value of next_page_token received from a previous
    `FetchServingStackVersionsRequest` call. Provide this to retrieve the
    subsequent page in a multi-page list of results. When paginating, all
    other parameters provided to `FetchServingStackVersionsRequest` must
    match the call that provided the page token.
  servingStack: Required. The serving stack to list versions for.
r
   r   r    r   r!   r/   r   N)r   r   r   r   r   r   r(   rP   rQ   r   r   r   rS   rT   r2   r   r   r   r   r   r     sg    & 


"%%%a(+##Ay/@/@/F/FG(##A&)&&q),r   r   c                       \ rS rSrSr\R                  " S5      r\R                  " S5      r\R                  " S\R                  R                  S9r\R                  " S5      rSrg	)
'GkerecommenderServingStacksFetchRequesti  a  A GkerecommenderServingStacksFetchRequest object.

Fields:
  model: Optional. The model for which to list serving stacks. Open-source
    models follow the Huggingface Hub `owner/model_name` format. Use
    GkeInferenceQuickstart.FetchModels to find available models.
  modelServer: Optional. The model server for which to list serving stacks.
    Open-source model servers use simplified, lowercase names (e.g.,
    `vllm`). Use GkeInferenceQuickstart.FetchModelServers to find available
    model servers.
  pageSize: Optional. The target number of results to return in a single
    response. If not specified, a default value will be chosen by the
    service. Note that the response may include a partial list and a caller
    should only rely on the response's next_page_token to determine if there
    are more instances left to be queried.
  pageToken: Optional. The value of next_page_token received from a previous
    `FetchServingStacksRequest` call. Provide this to retrieve the
    subsequent page in a multi-page list of results. When paginating, all
    other parameters provided to `FetchServingStacksRequest` must match the
    call that provided the page token.
r
   r   r    r   r!   r   Nry   r   r   r   r   r     rz   r   r   c                       \ rS rSrSr\R                  " S5      r\R                  " S5      r\R                  " S5      r	Sr
g)rt   i  zA Kubernetes manifest.

Fields:
  apiVersion: Output only. Kubernetes API version.
  content: Output only. YAML content.
  kind: Output only. Kubernetes resource kind.
r
   r   r    r   N)r   r   r   r   r   r   r(   
apiVersioncontentkindr   r   r   r   rt   rt     s9     $$Q'*!!!$'			q	!$r   rt   c                       \ rS rSrSr\R                  " S\R                  R                  S9r	\R                  " S\R                  R                  S9r
Srg)MillisecondRangei  zRepresents a range of latency values in milliseconds.

Fields:
  max: Output only. The maximum value of the range.
  min: Output only. The minimum value of the range.
r
   r   r   r   Nr   r   r   r   r   r   r   r   r   maxminr   r   r   r   r   r     E     	q)*;*;*A*AB#q)*;*;*A*AB#r   r   c                   h   \ rS rSrSr\R                  " S\R                  R                  S9r	\R                  " S\R                  R                  S9r
\R                  " S\R                  R                  S9r\R                  " S\R                  R                  S9r\R                  " S\R                  R                  S9r\R                  " S	\R                  R                  S9r\R                  " S
\R                  R                   S9r\R                  " S\R                  R                   S9rSrg)	ModelInfoi  a  Model information for a model deployment.

Fields:
  attentionHeadsCount: Output only. The number of attention heads in the
    model.
  headDimensions: Output only. The number of dimensions in the model's
    hidden layers.
  hiddenLayersCount: Output only. The number of hidden layers in the model.
  kvCacheSizePerToken: Output only. The size of the key value cache per
    token.
  kvHeadsCount: Output only. The number of key value heads in the model.
  maxContextLength: Output only. The maximum context length of the model.
  modelSizeGb: Output only. The size of the model in gigabytes.
  parametersBillionsCount: Output only. The number of parameters in
    billions.
r
   r   r   r    r!   r/   rL   rM   rO   r   N)r   r   r   r   r   r   r   r   r   attentionHeadsCountheadDimensionshiddenLayersCountkvCacheSizePerTokenkvHeadsCountmaxContextLengthr%   r&   modelSizeGbparametersBillionsCountr   r   r   r   r   r     s    " "..q):K:K:Q:QR))!Y5F5F5L5LM.,,Q	8I8I8O8OP!..q):K:K:Q:QR''93D3D3J3JK,++Ay7H7H7N7NO$$Q	0A0A0G0GH+%00I<M<M<S<STr   r   c                       \ rS rSrSr\R                  " S5      r\R                  " S5      r\R                  " S5      r	Sr
g)r-   i  a  Model server information gives. Valid model server info combinations can
be found using GkeInferenceQuickstart.FetchProfiles.

Fields:
  model: Required. The model. Open-source models follow the Huggingface Hub
    `owner/model_name` format. Use GkeInferenceQuickstart.FetchModels to
    find available models.
  modelServer: Required. The model server. Open-source model servers use
    simplified, lowercase names (e.g., `vllm`). Use
    GkeInferenceQuickstart.FetchModelServers to find available servers.
  modelServerVersion: Optional. The model server version. Use
    GkeInferenceQuickstart.FetchModelServerVersions to find available
    versions. If not provided, the latest available version is used.
r
   r   r    r   N)r   r   r   r   r   r   r(   rP   rQ   rR   r   r   r   r   r-   r-     s:     


"%%%a(+ ,,Q/r   r-   c                       \ rS rSrSr\R                  " SS5      r\R                  " SS5      r\R                  " SS5      r	\R                  " SS5      r
\R                  " SS	5      r\R                  " SS
5      rSrg)rZ   i
  a  Performance range for a model deployment.

Fields:
  itlRange: Output only. The range of inter-token latency (ITL) in
    milliseconds. ITL is the latency between consecutive tokens being
    generated.
  ntpotRange: Output only. The range of NTPOT (Normalized Time Per Output
    Token) in milliseconds. NTPOT is the request latency normalized by the
    number of output tokens, measured as request_latency /
    total_output_tokens.
  throughputInputRange: Output only. The range of input tokens per second.
    This is measured as the number of input tokens processed by the server
    divided by the elapsed time in seconds.
  throughputOutputRange: Output only. The range of throughput in output
    tokens per second. This is measured as
    total_output_tokens_generated_by_server / elapsed_time_in_seconds.
  throughputRange: Output only. The range of total tokens per second. This
    is measured as the sum of input and output tokens processed by the
    server divided by the elapsed time in seconds.
  ttftRange: Output only. The range of TTFT (Time To First Token) in
    milliseconds. TTFT is the time it takes to generate the first token for
    a request.
r   r
   r   TokensPerSecondRanger    r!   r/   rL   r   N)r   r   r   r   r   r   r"   itlRange
ntpotRangethroughputInputRangethroughputOutputRangethroughputRange	ttftRanger   r   r   r   rZ   rZ   
  s}    0 ##$6:(%%&8!<*"//0FJ#001GK**+A1E/$$%7;)r   rZ   c                      \ rS rSrSr\R                  " SS5      r\R                  " S\R                  R                  S9r\R                  " S\R                  R                  S9r\R                  " S\R                  R                  S9rS	rg
)rK   i+  af  Performance requirements for a profile and or model deployment.

Fields:
  targetCost: Optional. The target cost for running a profile's model
    server. If not provided, this requirement will not be enforced.
  targetItlMilliseconds: Optional. The target inter-token latency (ITL) in
    milliseconds. ITL is the latency between consecutive tokens being
    generated. If not provided, this target will not be enforced.
  targetNtpotMilliseconds: Optional. The target Normalized Time Per Output
    Token (NTPOT) in milliseconds. NTPOT is calculated as `request_latency /
    total_output_tokens`. If not provided, this target will not be enforced.
  targetTtftMilliseconds: Optional. The target Time To First Token (TTFT) in
    milliseconds. TTFT is the time it takes to generate the first token for
    a request. If not provided, this target will not be enforced.
r   r
   r   r   r    r!   r   N)r   r   r   r   r   r   r"   
targetCostr   r   r   targetItlMillisecondstargetNtpotMillisecondstargetTtftMillisecondsr   r   r   r   rK   rK   +  sx      %%fa0*#00I<M<M<S<ST%221i>O>O>U>UV$11!Y=N=N=T=TUr   rK   c                   B   \ rS rSrSr\R                  " SSSS9r\R                  " S\R                  R                  S9r\R                  " S	\R                  R                  S9r\R                  " S
\R                  R                  S9r\R                  " S\R                  R                  S9r\R                  " S\R                  R                   S9r\R                  " S\R                  R                  S9r\R                  " S\R                  R                  S9rSrg)PerformanceStatsiB  a  Performance statistics for a model deployment.

Fields:
  cost: Output only. The cost of running the model deployment.
  inputTokensPerSecond: Output only. The input tokens per second. This is
    the throughput measured as the number of input tokens processed by the
    server divided by the elapsed time in seconds.
  itlMilliseconds: Output only. The inter-token latency (ITL) in
    milliseconds. This is the latency between consecutive tokens being
    generated.
  ntpotMilliseconds: Output only. The Normalized Time Per Output Token
    (NTPOT) in milliseconds. This is the request latency normalized by the
    number of output tokens, measured as request_latency /
    total_output_tokens.
  outputTokensPerSecond: Output only. The number of output tokens per
    second. This is the throughput measured as
    total_output_tokens_generated_by_server / elapsed_time_in_seconds.
  queriesPerSecond: Output only. The number of queries per second. Note:
    This metric can vary widely based on context length and may not be a
    reliable measure of LLM throughput.
  totalTokensPerSecond: Output only. The total tokens per second. This is
    the throughput measured as the sum of input and output tokens processed
    by the server divided by the elapsed time in seconds.
  ttftMilliseconds: Output only. The Time To First Token (TTFT) in
    milliseconds. This is the time it takes to generate the first token for
    a request.
r   r
   Tr8   r   r   r    r!   r/   rL   rM   rO   r   N)r   r   r   r   r   r   r"   costr   r   r   inputTokensPerSeconditlMillisecondsntpotMillisecondsoutputTokensPerSecondr%   r&   queriesPerSecondtotalTokensPerSecondttftMillisecondsr   r   r   r   r   r   B  s    8 
		D	9$"//9;L;L;R;RS**1i6G6G6M6MN/,,Q	8I8I8O8OP#00I<M<M<S<ST))!Y5F5F5L5LM"//9;L;L;R;RS++Ay7H7H7N7NOr   r   c                   f   \ rS rSrSr\R                  " S5      r\R                  " S5      r\R                  " SS5      r
\R                  " SS5      r\R                  " S	S
SS9r\R                  " SS5      r\R                  " SS5      r\R                  " S5      r\R                  " SS5      rSrg)r7   ii  a/  A profile containing information about a model deployment.

Fields:
  acceleratorType: Output only. The accelerator type. Expected format:
    `nvidia-h100-80gb`.
  instanceType: Output only. The instance type. Expected format:
    `a2-highgpu-1g`.
  modelInfo: Output only. The model information of the model in this
    profile.
  modelServerInfo: Output only. The model server configuration. Use
    GkeInferenceQuickstart.FetchProfiles to find valid configurations.
  performanceStats: Output only. The performance statistics for this
    profile.
  resourcesUsed: Output only. The resources used by the model deployment.
  servingStack: Output only. The serving stack used for this profile.
  tpuTopology: Output only. The TPU topology (if applicable).
  workloadSpec: Output only. The workload specification.
r
   r   r   r    r-   r!   r   r/   Tr8   ResourcesUsedrL   r.   rM   rO   rN   	   r   N)r   r   r   r   r   r   r(   ro   r0   r"   	modelInfor1   performanceStatsresourcesUsedr2   tpuTopologyrV   r   r   r   r   r7   r7   i  s    & ))!,/&&q),$$[!4)**+<a@/++,>DQ((!<-'':,%%a(+'':,r   r7   c                   b    \ rS rSrSr\R                  " S\R                  R                  S9r	Sr
g)r   i  zResources used by a model deployment.

Fields:
  acceleratorCount: Output only. The number of accelerators (e.g., GPUs or
    TPUs) used by the model deployment on the Kubernetes node.
r
   r   r   N)r   r   r   r   r   r   r   r   r   acceleratorCountr   r   r   r   r   r     s(     ++Ay7H7H7N7NOr   r   c                   `    \ rS rSrSr\R                  " S5      r\R                  " S5      rSr	g)r.   i  zServing stack information.

Fields:
  name: Required. The name of the serving stack.
  version: Optional. The version of the serving stack.
r
   r   r   N)
r   r   r   r   r   r   r(   nameversionr   r   r   r   r.   r.     s)     
		q	!$!!!$'r   r.   c                      \ rS rSrSr " S S\R                  5      r " S S\R                  5      r\R                  " SS5      r
\R                  " S5      r\R                  " SS	S
S9r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R$                  " SSS9r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      rSrg)StandardQueryParametersi  a  Query parameters accepted by all methods.

Enums:
  FXgafvValueValuesEnum: V1 error format.
  AltValueValuesEnum: Data format for response.

Fields:
  f__xgafv: V1 error format.
  access_token: OAuth access token.
  alt: Data format for response.
  callback: JSONP
  fields: Selector specifying which fields to include in a partial response.
  key: API key. Your API key identifies your project and provides you with
    API access, quota, and reports. Required unless you provide an OAuth 2.0
    token.
  oauth_token: OAuth 2.0 token for the current user.
  prettyPrint: Returns response with indentations and line breaks.
  quotaUser: Available to use for quota purposes for server-side
    applications. Can be any arbitrary string assigned to a user, but should
    not exceed 40 characters.
  trace: A tracing token of the form "token:<tokenid>" to include in api
    requests.
  uploadType: Legacy upload protocol for media (e.g. "media", "multipart").
  upload_protocol: Upload protocol for media (e.g. "raw", "multipart").
c                   $    \ rS rSrSrSrSrSrSrg)*StandardQueryParameters.AltValueValuesEnumi  zData format for response.

Values:
  json: Responses with Content-Type of application/json
  media: Media download with context-dependent Content-Type
  proto: Responses with Content-Type of application/x-protobuf
r   r
   r   r   N)	r   r   r   r   r   jsonmediaprotor   r   r   r   AltValueValuesEnumr     s     DEEr   r   c                        \ rS rSrSrSrSrSrg)-StandardQueryParameters.FXgafvValueValuesEnumi  zFV1 error format.

Values:
  _1: v1 error format
  _2: v2 error format
r   r
   r   N)r   r   r   r   r   _1_2r   r   r   r   FXgafvValueValuesEnumr     s     
B	
Br   r   r
   r   r    r   )defaultr!   r/   rL   rM   rO   Tr   
         r   N)r   r   r   r   r   r   Enumr   r   	EnumFieldf__xgafvr(   access_tokenaltcallbackfieldskeyoauth_tokenBooleanFieldprettyPrint	quotaUsertrace
uploadTypeupload_protocolr   r   r   r   r   r     s    4
9>> 
inn    !8!<(&&q),0!VD#""1%(  #&a #%%a(+&&q$7+##A&)



#%$$R(*))"-/r   r   c                   `    \ rS rSrSr\R                  " S5      r\R                  " S5      rSr	g)rn   i  am  Storage configuration for a model deployment.

Fields:
  modelBucketUri: Optional. The Google Cloud Storage bucket URI to load the
    model from. This URI must point to the directory containing the model's
    config file (`config.json`) and model weights. A tuned GCSFuse setup can
    improve LLM Pod startup time by more than 7x. Expected format: `gs:///`.
  xlaCacheBucketUri: Optional. The URI for the GCS bucket containing the XLA
    compilation cache. If using TPUs, the XLA cache will be written to the
    same path as `model_bucket_uri`. This can speed up vLLM model
    preparation for repeated deployments.
r
   r   r   N)
r   r   r   r   r   r   r(   modelBucketUrixlaCacheBucketUrir   r   r   r   rn   rn     s*     ((+.++A.r   rn   c                       \ rS rSrSr\R                  " S\R                  R                  S9r	\R                  " S\R                  R                  S9r
Srg)r   i  zRepresents a range of throughput values in tokens per second.

Fields:
  max: Output only. The maximum value of the range.
  min: Output only. The minimum value of the range.
r
   r   r   r   Nr   r   r   r   r   r     r   r   r   c                       \ rS rSrSr\R                  " S\R                  R                  S9r	\R                  " S\R                  R                  S9r
\R                  " S5      rSrg)	rN   i  a  Workload specification for a workload.

Fields:
  averageInputLength: Optional. The average input length of the workload.
    Only works alongside average output length.
  averageOutputLength: Optional. The average output length of the workload.
    Only works alongside average input length.
  useCase: Optional. The use case of the workload. Can be one of: `advanced
    costumer support`, `code completion, `text summarization`, `chatbot`,
    `text generation`, `deep research`.
r
   r   r   r    r   N)r   r   r   r   r   r   r   r   r   averageInputLengthaverageOutputLengthr(   r3   r   r   r   r   rN   rN     sW    
 !--a9J9J9P9PQ!..q):K:K:Q:QR!!!$'r   rN   r   z$.xgafvr   1r   2N)/r   
__future__r   apitools.base.protorpcliter   r   apitools.base.pyr   packageMessager   r   r+   r5   r<   rA   rE   rI   rX   r^   rb   rf   ri   rm   rs   rx   r|   r~   r   r   rt   r   r   r-   rZ   rK   r   r7   r   r.   r   rn   r   rN   AddCustomJsonFieldMappingAddCustomJsonEnumMappingr   r   r   r   <module>r      s   ' < % $Y $$*9 *2%9#4#4 %:@I$5$5 @+y'8'8 + +	 1 1 + +)++ + +;9,, +;\@I-- @*A	(9(9 A K!2!2 KC9,, CKI-- K#%y'8'8 #%L-	(9(9 -('I4E4E ':'Y->-> '0'y'8'8 '(*Y5F5F *6'i.?.? ':"** "	Cy(( 	CU	!! U80i'' 0*<y(( <BVi// V.$Py(( $PN;i ;>PI%% P	%9$$ 	%<.i// <.~/I%% /$	C9,, 	C%9$$ %$ 	 " "Z4  ! !114>  ! !114>r   