123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226 |
- // Copyright 2021 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- syntax = "proto3";
- package google.cloud.aiplatform.v1;
- import "google/api/field_behavior.proto";
- import "google/api/resource.proto";
- import "google/cloud/aiplatform/v1/encryption_spec.proto";
- import "google/cloud/aiplatform/v1/env_var.proto";
- import "google/cloud/aiplatform/v1/io.proto";
- import "google/cloud/aiplatform/v1/job_state.proto";
- import "google/cloud/aiplatform/v1/machine_resources.proto";
- import "google/protobuf/duration.proto";
- import "google/protobuf/timestamp.proto";
- import "google/rpc/status.proto";
- import "google/api/annotations.proto";
- option csharp_namespace = "Google.Cloud.AIPlatform.V1";
- option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1;aiplatform";
- option java_multiple_files = true;
- option java_outer_classname = "CustomJobProto";
- option java_package = "com.google.cloud.aiplatform.v1";
- option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
- option ruby_package = "Google::Cloud::AIPlatform::V1";
- // Represents a job that runs custom workloads such as a Docker container or a
- // Python package. A CustomJob can have multiple worker pools and each worker
- // pool can have its own machine and input spec. A CustomJob will be cleaned up
- // once the job enters terminal state (failed or succeeded).
- message CustomJob {
- option (google.api.resource) = {
- type: "aiplatform.googleapis.com/CustomJob"
- pattern: "projects/{project}/locations/{location}/customJobs/{custom_job}"
- };
- // Output only. Resource name of a CustomJob.
- string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Required. The display name of the CustomJob.
- // The name can be up to 128 characters long and can be consist of any UTF-8
- // characters.
- string display_name = 2 [(google.api.field_behavior) = REQUIRED];
- // Required. Job spec.
- CustomJobSpec job_spec = 4 [(google.api.field_behavior) = REQUIRED];
- // Output only. The detailed state of the job.
- JobState state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Time when the CustomJob was created.
- google.protobuf.Timestamp create_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Time when the CustomJob for the first time entered the
- // `JOB_STATE_RUNNING` state.
- google.protobuf.Timestamp start_time = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Time when the CustomJob entered any of the following states:
- // `JOB_STATE_SUCCEEDED`, `JOB_STATE_FAILED`, `JOB_STATE_CANCELLED`.
- google.protobuf.Timestamp end_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Time when the CustomJob was most recently updated.
- google.protobuf.Timestamp update_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
- // Output only. Only populated when job's state is `JOB_STATE_FAILED` or
- // `JOB_STATE_CANCELLED`.
- google.rpc.Status error = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
- // The labels with user-defined metadata to organize CustomJobs.
- //
- // Label keys and values can be no longer than 64 characters
- // (Unicode codepoints), can only contain lowercase letters, numeric
- // characters, underscores and dashes. International characters are allowed.
- //
- // See https://goo.gl/xmQnxf for more information and examples of labels.
- map<string, string> labels = 11;
- // Customer-managed encryption key options for a CustomJob. If this is set,
- // then all resources created by the CustomJob will be encrypted with the
- // provided encryption key.
- EncryptionSpec encryption_spec = 12;
- }
- // Represents the spec of a CustomJob.
- message CustomJobSpec {
- // Required. The spec of the worker pools including machine type and Docker image.
- // All worker pools except the first one are optional and can be skipped by
- // providing an empty value.
- repeated WorkerPoolSpec worker_pool_specs = 1 [(google.api.field_behavior) = REQUIRED];
- // Scheduling options for a CustomJob.
- Scheduling scheduling = 3;
- // Specifies the service account for workload run-as account.
- // Users submitting jobs must have act-as permission on this run-as account.
- // If unspecified, the [AI Platform Custom Code Service
- // Agent](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents)
- // for the CustomJob's project is used.
- string service_account = 4;
- // The full name of the Compute Engine
- // [network](/compute/docs/networks-and-firewalls#networks) to which the Job
- // should be peered. For example, `projects/12345/global/networks/myVPC`.
- // [Format](/compute/docs/reference/rest/v1/networks/insert)
- // is of the form `projects/{project}/global/networks/{network}`.
- // Where {project} is a project number, as in `12345`, and {network} is a
- // network name.
- //
- // Private services access must already be configured for the network. If left
- // unspecified, the job is not peered with any network.
- string network = 5 [(google.api.resource_reference) = {
- type: "compute.googleapis.com/Network"
- }];
- // The Cloud Storage location to store the output of this CustomJob or
- // HyperparameterTuningJob. For HyperparameterTuningJob,
- // the baseOutputDirectory of
- // each child CustomJob backing a Trial is set to a subdirectory of name
- // [id][google.cloud.aiplatform.v1.Trial.id] under its parent HyperparameterTuningJob's
- // baseOutputDirectory.
- //
- // The following Vertex AI environment variables will be passed to
- // containers or python modules when this field is set:
- //
- // For CustomJob:
- //
- // * AIP_MODEL_DIR = `<base_output_directory>/model/`
- // * AIP_CHECKPOINT_DIR = `<base_output_directory>/checkpoints/`
- // * AIP_TENSORBOARD_LOG_DIR = `<base_output_directory>/logs/`
- //
- // For CustomJob backing a Trial of HyperparameterTuningJob:
- //
- // * AIP_MODEL_DIR = `<base_output_directory>/<trial_id>/model/`
- // * AIP_CHECKPOINT_DIR = `<base_output_directory>/<trial_id>/checkpoints/`
- // * AIP_TENSORBOARD_LOG_DIR = `<base_output_directory>/<trial_id>/logs/`
- GcsDestination base_output_directory = 6;
- }
- // Represents the spec of a worker pool in a job.
- message WorkerPoolSpec {
- // The custom task to be executed in this worker pool.
- oneof task {
- // The custom container task.
- ContainerSpec container_spec = 6;
- // The Python packaged task.
- PythonPackageSpec python_package_spec = 7;
- }
- // Optional. Immutable. The specification of a single machine.
- MachineSpec machine_spec = 1 [
- (google.api.field_behavior) = OPTIONAL,
- (google.api.field_behavior) = IMMUTABLE
- ];
- // Optional. The number of worker replicas to use for this worker pool.
- int64 replica_count = 2 [(google.api.field_behavior) = OPTIONAL];
- // Disk spec.
- DiskSpec disk_spec = 5;
- }
- // The spec of a Container.
- message ContainerSpec {
- // Required. The URI of a container image in the Container Registry that is to be run on
- // each worker replica.
- string image_uri = 1 [(google.api.field_behavior) = REQUIRED];
- // The command to be invoked when the container is started.
- // It overrides the entrypoint instruction in Dockerfile when provided.
- repeated string command = 2;
- // The arguments to be passed when starting the container.
- repeated string args = 3;
- // Environment variables to be passed to the container.
- repeated EnvVar env = 4;
- }
- // The spec of a Python packaged code.
- message PythonPackageSpec {
- // Required. The URI of a container image in Artifact Registry that will run the
- // provided Python package. Vertex AI provides a wide range of executor
- // images with pre-installed packages to meet users' various use cases. See
- // the list of [pre-built containers for
- // training](https://cloud.google.com/vertex-ai/docs/training/pre-built-containers).
- // You must use an image from this list.
- string executor_image_uri = 1 [(google.api.field_behavior) = REQUIRED];
- // Required. The Google Cloud Storage location of the Python package files which are
- // the training program and its dependent packages.
- // The maximum number of package URIs is 100.
- repeated string package_uris = 2 [(google.api.field_behavior) = REQUIRED];
- // Required. The Python module name to run after installing the packages.
- string python_module = 3 [(google.api.field_behavior) = REQUIRED];
- // Command line arguments to be passed to the Python task.
- repeated string args = 4;
- // Environment variables to be passed to the python module.
- repeated EnvVar env = 5;
- }
- // All parameters related to queuing and scheduling of custom jobs.
- message Scheduling {
- // The maximum job running time. The default is 7 days.
- google.protobuf.Duration timeout = 1;
- // Restarts the entire CustomJob if a worker gets restarted.
- // This feature can be used by distributed training jobs that are not
- // resilient to workers leaving and joining a job.
- bool restart_job_on_worker_restart = 3;
- }
|