machine_resources.proto 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. // Copyright 2021 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.cloud.aiplatform.v1;
  16. import "google/api/field_behavior.proto";
  17. import "google/cloud/aiplatform/v1/accelerator_type.proto";
  18. import "google/api/annotations.proto";
  19. option csharp_namespace = "Google.Cloud.AIPlatform.V1";
  20. option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1;aiplatform";
  21. option java_multiple_files = true;
  22. option java_outer_classname = "MachineResourcesProto";
  23. option java_package = "com.google.cloud.aiplatform.v1";
  24. option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
  25. option ruby_package = "Google::Cloud::AIPlatform::V1";
  26. // Specification of a single machine.
  27. message MachineSpec {
  28. // Immutable. The type of the machine.
  29. //
  30. // See the [list of machine types supported for
  31. // prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
  32. //
  33. // See the [list of machine types supported for custom
  34. // training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
  35. //
  36. // For [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] this field is optional, and the default
  37. // value is `n1-standard-2`. For [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] or as part of
  38. // [WorkerPoolSpec][google.cloud.aiplatform.v1.WorkerPoolSpec] this field is required.
  39. string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE];
  40. // Immutable. The type of accelerator(s) that may be attached to the machine as per
  41. // [accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count].
  42. AcceleratorType accelerator_type = 2 [(google.api.field_behavior) = IMMUTABLE];
  43. // The number of accelerators to attach to the machine.
  44. int32 accelerator_count = 3;
  45. }
  46. // A description of resources that are dedicated to a DeployedModel, and
  47. // that need a higher degree of manual configuration.
  48. message DedicatedResources {
  49. // Required. Immutable. The specification of a single machine used by the prediction.
  50. MachineSpec machine_spec = 1 [
  51. (google.api.field_behavior) = REQUIRED,
  52. (google.api.field_behavior) = IMMUTABLE
  53. ];
  54. // Required. Immutable. The minimum number of machine replicas this DeployedModel will be always
  55. // deployed on. This value must be greater than or equal to 1.
  56. //
  57. // If traffic against the DeployedModel increases, it may dynamically be
  58. // deployed onto more replicas, and as traffic decreases, some of these extra
  59. // replicas may be freed.
  60. int32 min_replica_count = 2 [
  61. (google.api.field_behavior) = REQUIRED,
  62. (google.api.field_behavior) = IMMUTABLE
  63. ];
  64. // Immutable. The maximum number of replicas this DeployedModel may be deployed on when
  65. // the traffic against it increases. If the requested value is too large,
  66. // the deployment will error, but if deployment succeeds then the ability
  67. // to scale the model to that many replicas is guaranteed (barring service
  68. // outages). If traffic against the DeployedModel increases beyond what its
  69. // replicas at maximum may handle, a portion of the traffic will be dropped.
  70. // If this value is not provided, will use [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count] as the
  71. // default value.
  72. int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
  73. // Immutable. The metric specifications that overrides a resource
  74. // utilization metric (CPU utilization, accelerator's duty cycle, and so on)
  75. // target value (default to 60 if not set). At most one entry is allowed per
  76. // metric.
  77. //
  78. // If [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count] is
  79. // above 0, the autoscaling will be based on both CPU utilization and
  80. // accelerator's duty cycle metrics and scale up when either metrics exceeds
  81. // its target value while scale down if both metrics are under their target
  82. // value. The default target value is 60 for both metrics.
  83. //
  84. // If [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count] is
  85. // 0, the autoscaling will be based on CPU utilization metric only with
  86. // default target value 60 if not explicitly set.
  87. //
  88. // For example, in the case of Online Prediction, if you want to override
  89. // target CPU utilization to 80, you should set
  90. // [autoscaling_metric_specs.metric_name][google.cloud.aiplatform.v1.AutoscalingMetricSpec.metric_name]
  91. // to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and
  92. // [autoscaling_metric_specs.target][google.cloud.aiplatform.v1.AutoscalingMetricSpec.target] to `80`.
  93. repeated AutoscalingMetricSpec autoscaling_metric_specs = 4 [(google.api.field_behavior) = IMMUTABLE];
  94. }
  95. // A description of resources that to large degree are decided by Vertex AI,
  96. // and require only a modest additional configuration.
  97. // Each Model supporting these resources documents its specific guidelines.
  98. message AutomaticResources {
  99. // Immutable. The minimum number of replicas this DeployedModel will be always deployed
  100. // on. If traffic against it increases, it may dynamically be deployed onto
  101. // more replicas up to [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count], and as traffic decreases, some
  102. // of these extra replicas may be freed.
  103. // If the requested value is too large, the deployment will error.
  104. int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE];
  105. // Immutable. The maximum number of replicas this DeployedModel may be deployed on when
  106. // the traffic against it increases. If the requested value is too large,
  107. // the deployment will error, but if deployment succeeds then the ability
  108. // to scale the model to that many replicas is guaranteed (barring service
  109. // outages). If traffic against the DeployedModel increases beyond what its
  110. // replicas at maximum may handle, a portion of the traffic will be dropped.
  111. // If this value is not provided, a no upper bound for scaling under heavy
  112. // traffic will be assume, though Vertex AI may be unable to scale beyond
  113. // certain replica number.
  114. int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
  115. }
  116. // A description of resources that are used for performing batch operations, are
  117. // dedicated to a Model, and need manual configuration.
  118. message BatchDedicatedResources {
  119. // Required. Immutable. The specification of a single machine.
  120. MachineSpec machine_spec = 1 [
  121. (google.api.field_behavior) = REQUIRED,
  122. (google.api.field_behavior) = IMMUTABLE
  123. ];
  124. // Immutable. The number of machine replicas used at the start of the batch operation.
  125. // If not set, Vertex AI decides starting number, not greater than
  126. // [max_replica_count][google.cloud.aiplatform.v1.BatchDedicatedResources.max_replica_count]
  127. int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
  128. // Immutable. The maximum number of machine replicas the batch operation may be scaled
  129. // to. The default value is 10.
  130. int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
  131. }
  132. // Statistics information about resource consumption.
  133. message ResourcesConsumed {
  134. // Output only. The number of replica hours used. Note that many replicas may run in
  135. // parallel, and additionally any given work may be queued for some time.
  136. // Therefore this value is not strictly related to wall time.
  137. double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
  138. }
  139. // Represents the spec of disk options.
  140. message DiskSpec {
  141. // Type of the boot disk (default is "pd-ssd").
  142. // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
  143. // "pd-standard" (Persistent Disk Hard Disk Drive).
  144. string boot_disk_type = 1;
  145. // Size in GB of the boot disk (default is 100GB).
  146. int32 boot_disk_size_gb = 2;
  147. }
  148. // The metric specification that defines the target resource utilization
  149. // (CPU utilization, accelerator's duty cycle, and so on) for calculating the
  150. // desired replica count.
  151. message AutoscalingMetricSpec {
  152. // Required. The resource metric name.
  153. // Supported metrics:
  154. //
  155. // * For Online Prediction:
  156. // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
  157. // * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
  158. string metric_name = 1 [(google.api.field_behavior) = REQUIRED];
  159. // The target resource utilization in percentage (1% - 100%) for the given
  160. // metric; once the real usage deviates from the target by a certain
  161. // percentage, the machine replicas change. The default value is 60
  162. // (representing 60%) if not provided.
  163. int32 target = 2;
  164. }