jobs.proto 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888
  1. // Copyright 2021 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.dataflow.v1beta3;
  16. import "google/api/annotations.proto";
  17. import "google/dataflow/v1beta3/environment.proto";
  18. import "google/dataflow/v1beta3/snapshots.proto";
  19. import "google/protobuf/duration.proto";
  20. import "google/protobuf/struct.proto";
  21. import "google/protobuf/timestamp.proto";
  22. import "google/api/client.proto";
  23. option csharp_namespace = "Google.Cloud.Dataflow.V1Beta3";
  24. option go_package = "google.golang.org/genproto/googleapis/dataflow/v1beta3;dataflow";
  25. option java_multiple_files = true;
  26. option java_outer_classname = "JobsProto";
  27. option java_package = "com.google.dataflow.v1beta3";
  28. option php_namespace = "Google\\Cloud\\Dataflow\\V1beta3";
  29. option ruby_package = "Google::Cloud::Dataflow::V1beta3";
  30. // Provides a method to create and modify Google Cloud Dataflow jobs.
  31. // A Job is a multi-stage computation graph run by the Cloud Dataflow service.
  32. service JobsV1Beta3 {
  33. option (google.api.default_host) = "dataflow.googleapis.com";
  34. option (google.api.oauth_scopes) =
  35. "https://www.googleapis.com/auth/cloud-platform,"
  36. "https://www.googleapis.com/auth/compute,"
  37. "https://www.googleapis.com/auth/compute.readonly,"
  38. "https://www.googleapis.com/auth/userinfo.email";
  39. // Creates a Cloud Dataflow job.
  40. //
  41. // To create a job, we recommend using `projects.locations.jobs.create` with a
  42. // [regional endpoint]
  43. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
  44. // `projects.jobs.create` is not recommended, as your job will always start
  45. // in `us-central1`.
  46. rpc CreateJob(CreateJobRequest) returns (Job) {
  47. }
  48. // Gets the state of the specified Cloud Dataflow job.
  49. //
  50. // To get the state of a job, we recommend using `projects.locations.jobs.get`
  51. // with a [regional endpoint]
  52. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
  53. // `projects.jobs.get` is not recommended, as you can only get the state of
  54. // jobs that are running in `us-central1`.
  55. rpc GetJob(GetJobRequest) returns (Job) {
  56. }
  57. // Updates the state of an existing Cloud Dataflow job.
  58. //
  59. // To update the state of an existing job, we recommend using
  60. // `projects.locations.jobs.update` with a [regional endpoint]
  61. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
  62. // `projects.jobs.update` is not recommended, as you can only update the state
  63. // of jobs that are running in `us-central1`.
  64. rpc UpdateJob(UpdateJobRequest) returns (Job) {
  65. }
  66. // List the jobs of a project.
  67. //
  68. // To list the jobs of a project in a region, we recommend using
  69. // `projects.locations.jobs.list` with a [regional endpoint]
  70. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To
  71. // list the all jobs across all regions, use `projects.jobs.aggregated`. Using
  72. // `projects.jobs.list` is not recommended, as you can only get the list of
  73. // jobs that are running in `us-central1`.
  74. rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
  75. }
  76. // List the jobs of a project across all regions.
  77. rpc AggregatedListJobs(ListJobsRequest) returns (ListJobsResponse) {
  78. }
  79. // Check for existence of active jobs in the given project across all regions.
  80. rpc CheckActiveJobs(CheckActiveJobsRequest) returns (CheckActiveJobsResponse) {
  81. }
  82. // Snapshot the state of a streaming job.
  83. rpc SnapshotJob(SnapshotJobRequest) returns (Snapshot) {
  84. }
  85. }
  86. // Defines a job to be run by the Cloud Dataflow service.
  87. // nextID: 26
  88. message Job {
  89. // The unique ID of this job.
  90. //
  91. // This field is set by the Cloud Dataflow service when the Job is
  92. // created, and is immutable for the life of the job.
  93. string id = 1;
  94. // The ID of the Cloud Platform project that the job belongs to.
  95. string project_id = 2;
  96. // The user-specified Cloud Dataflow job name.
  97. //
  98. // Only one Job with a given name may exist in a project at any
  99. // given time. If a caller attempts to create a Job with the same
  100. // name as an already-existing Job, the attempt returns the
  101. // existing Job.
  102. //
  103. // The name must match the regular expression
  104. // `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
  105. string name = 3;
  106. // The type of Cloud Dataflow job.
  107. JobType type = 4;
  108. // The environment for the job.
  109. Environment environment = 5;
  110. // Exactly one of step or steps_location should be specified.
  111. //
  112. // The top-level steps that constitute the entire job. Only retrieved with
  113. // JOB_VIEW_ALL.
  114. repeated Step steps = 6;
  115. // The Cloud Storage location where the steps are stored.
  116. string steps_location = 24;
  117. // The current state of the job.
  118. //
  119. // Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
  120. // specified.
  121. //
  122. // A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
  123. // terminal state. After a job has reached a terminal state, no
  124. // further state updates may be made.
  125. //
  126. // This field may be mutated by the Cloud Dataflow service;
  127. // callers cannot mutate it.
  128. JobState current_state = 7;
  129. // The timestamp associated with the current state.
  130. google.protobuf.Timestamp current_state_time = 8;
  131. // The job's requested state.
  132. //
  133. // `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
  134. // `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may
  135. // also be used to directly set a job's requested state to
  136. // `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
  137. // job if it has not already reached a terminal state.
  138. JobState requested_state = 9;
  139. // Deprecated.
  140. JobExecutionInfo execution_info = 10;
  141. // The timestamp when the job was initially created. Immutable and set by the
  142. // Cloud Dataflow service.
  143. google.protobuf.Timestamp create_time = 11;
  144. // If this job is an update of an existing job, this field is the job ID
  145. // of the job it replaced.
  146. //
  147. // When sending a `CreateJobRequest`, you can update a job by specifying it
  148. // here. The job named here is stopped, and its intermediate state is
  149. // transferred to this job.
  150. string replace_job_id = 12;
  151. // The map of transform name prefixes of the job to be replaced to the
  152. // corresponding name prefixes of the new job.
  153. map<string, string> transform_name_mapping = 13;
  154. // The client's unique identifier of the job, re-used across retried attempts.
  155. // If this field is set, the service will ensure its uniqueness.
  156. // The request to create a job will fail if the service has knowledge of a
  157. // previously submitted job with the same client's ID and job name.
  158. // The caller may use this field to ensure idempotence of job
  159. // creation across retried attempts to create a job.
  160. // By default, the field is empty and, in that case, the service ignores it.
  161. string client_request_id = 14;
  162. // If another job is an update of this job (and thus, this job is in
  163. // `JOB_STATE_UPDATED`), this field contains the ID of that job.
  164. string replaced_by_job_id = 15;
  165. // A set of files the system should be aware of that are used
  166. // for temporary storage. These temporary files will be
  167. // removed on job completion.
  168. // No duplicates are allowed.
  169. // No file patterns are supported.
  170. //
  171. // The supported files are:
  172. //
  173. // Google Cloud Storage:
  174. //
  175. // storage.googleapis.com/{bucket}/{object}
  176. // bucket.storage.googleapis.com/{object}
  177. repeated string temp_files = 16;
  178. // User-defined labels for this job.
  179. //
  180. // The labels map can contain no more than 64 entries. Entries of the labels
  181. // map are UTF8 strings that comply with the following restrictions:
  182. //
  183. // * Keys must conform to regexp: [\p{Ll}\p{Lo}][\p{Ll}\p{Lo}\p{N}_-]{0,62}
  184. // * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
  185. // * Both keys and values are additionally constrained to be <= 128 bytes in
  186. // size.
  187. map<string, string> labels = 17;
  188. // The [regional endpoint]
  189. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  190. // contains this job.
  191. string location = 18;
  192. // Preliminary field: The format of this data may change at any time.
  193. // A description of the user pipeline and stages through which it is executed.
  194. // Created by Cloud Dataflow service. Only retrieved with
  195. // JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
  196. PipelineDescription pipeline_description = 19;
  197. // This field may be mutated by the Cloud Dataflow service;
  198. // callers cannot mutate it.
  199. repeated ExecutionStageState stage_states = 20;
  200. // This field is populated by the Dataflow service to support filtering jobs
  201. // by the metadata values provided here. Populated for ListJobs and all GetJob
  202. // views SUMMARY and higher.
  203. JobMetadata job_metadata = 21;
  204. // The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
  205. // Flexible resource scheduling jobs are started with some delay after job
  206. // creation, so start_time is unset before start and is updated when the
  207. // job is started by the Cloud Dataflow service. For other jobs, start_time
  208. // always equals to create_time and is immutable and set by the Cloud Dataflow
  209. // service.
  210. google.protobuf.Timestamp start_time = 22;
  211. // If this is specified, the job's initial state is populated from the given
  212. // snapshot.
  213. string created_from_snapshot_id = 23;
  214. // Reserved for future use. This field is set only in responses from the
  215. // server; it is ignored if it is set in any requests.
  216. bool satisfies_pzs = 25;
  217. }
  218. // Metadata for a Datastore connector used by the job.
  219. message DatastoreIODetails {
  220. // Namespace used in the connection.
  221. string namespace = 1;
  222. // ProjectId accessed in the connection.
  223. string project_id = 2;
  224. }
  225. // Metadata for a Pub/Sub connector used by the job.
  226. message PubSubIODetails {
  227. // Topic accessed in the connection.
  228. string topic = 1;
  229. // Subscription used in the connection.
  230. string subscription = 2;
  231. }
  232. // Metadata for a File connector used by the job.
  233. message FileIODetails {
  234. // File Pattern used to access files by the connector.
  235. string file_pattern = 1;
  236. }
  237. // Metadata for a Cloud BigTable connector used by the job.
  238. message BigTableIODetails {
  239. // ProjectId accessed in the connection.
  240. string project_id = 1;
  241. // InstanceId accessed in the connection.
  242. string instance_id = 2;
  243. // TableId accessed in the connection.
  244. string table_id = 3;
  245. }
  246. // Metadata for a BigQuery connector used by the job.
  247. message BigQueryIODetails {
  248. // Table accessed in the connection.
  249. string table = 1;
  250. // Dataset accessed in the connection.
  251. string dataset = 2;
  252. // Project accessed in the connection.
  253. string project_id = 3;
  254. // Query used to access data in the connection.
  255. string query = 4;
  256. }
  257. // Metadata for a Spanner connector used by the job.
  258. message SpannerIODetails {
  259. // ProjectId accessed in the connection.
  260. string project_id = 1;
  261. // InstanceId accessed in the connection.
  262. string instance_id = 2;
  263. // DatabaseId accessed in the connection.
  264. string database_id = 3;
  265. }
  266. // The version of the SDK used to run the job.
  267. message SdkVersion {
  268. // The support status of the SDK used to run the job.
  269. enum SdkSupportStatus {
  270. // Cloud Dataflow is unaware of this version.
  271. UNKNOWN = 0;
  272. // This is a known version of an SDK, and is supported.
  273. SUPPORTED = 1;
  274. // A newer version of the SDK family exists, and an update is recommended.
  275. STALE = 2;
  276. // This version of the SDK is deprecated and will eventually be
  277. // unsupported.
  278. DEPRECATED = 3;
  279. // Support for this SDK version has ended and it should no longer be used.
  280. UNSUPPORTED = 4;
  281. }
  282. // The version of the SDK used to run the job.
  283. string version = 1;
  284. // A readable string describing the version of the SDK.
  285. string version_display_name = 2;
  286. // The support status for this SDK version.
  287. SdkSupportStatus sdk_support_status = 3;
  288. }
  289. // Metadata available primarily for filtering jobs. Will be included in the
  290. // ListJob response and Job SUMMARY view.
  291. message JobMetadata {
  292. // The SDK version used to run the job.
  293. SdkVersion sdk_version = 1;
  294. // Identification of a Spanner source used in the Dataflow job.
  295. repeated SpannerIODetails spanner_details = 2;
  296. // Identification of a BigQuery source used in the Dataflow job.
  297. repeated BigQueryIODetails bigquery_details = 3;
  298. // Identification of a Cloud BigTable source used in the Dataflow job.
  299. repeated BigTableIODetails big_table_details = 4;
  300. // Identification of a PubSub source used in the Dataflow job.
  301. repeated PubSubIODetails pubsub_details = 5;
  302. // Identification of a File source used in the Dataflow job.
  303. repeated FileIODetails file_details = 6;
  304. // Identification of a Datastore source used in the Dataflow job.
  305. repeated DatastoreIODetails datastore_details = 7;
  306. }
  307. // A message describing the state of a particular execution stage.
  308. message ExecutionStageState {
  309. // The name of the execution stage.
  310. string execution_stage_name = 1;
  311. // Executions stage states allow the same set of values as JobState.
  312. JobState execution_stage_state = 2;
  313. // The time at which the stage transitioned to this state.
  314. google.protobuf.Timestamp current_state_time = 3;
  315. }
  316. // A descriptive representation of submitted pipeline as well as the executed
  317. // form. This data is provided by the Dataflow service for ease of visualizing
  318. // the pipeline and interpreting Dataflow provided metrics.
  319. message PipelineDescription {
  320. // Description of each transform in the pipeline and collections between them.
  321. repeated TransformSummary original_pipeline_transform = 1;
  322. // Description of each stage of execution of the pipeline.
  323. repeated ExecutionStageSummary execution_pipeline_stage = 2;
  324. // Pipeline level display data.
  325. repeated DisplayData display_data = 3;
  326. }
  327. // Description of the type, names/ids, and input/outputs for a transform.
  328. message TransformSummary {
  329. // Type of transform.
  330. KindType kind = 1;
  331. // SDK generated id of this transform instance.
  332. string id = 2;
  333. // User provided name for this transform instance.
  334. string name = 3;
  335. // Transform-specific display data.
  336. repeated DisplayData display_data = 4;
  337. // User names for all collection outputs to this transform.
  338. repeated string output_collection_name = 5;
  339. // User names for all collection inputs to this transform.
  340. repeated string input_collection_name = 6;
  341. }
  342. // Type of transform or stage operation.
  343. enum KindType {
  344. // Unrecognized transform type.
  345. UNKNOWN_KIND = 0;
  346. // ParDo transform.
  347. PAR_DO_KIND = 1;
  348. // Group By Key transform.
  349. GROUP_BY_KEY_KIND = 2;
  350. // Flatten transform.
  351. FLATTEN_KIND = 3;
  352. // Read transform.
  353. READ_KIND = 4;
  354. // Write transform.
  355. WRITE_KIND = 5;
  356. // Constructs from a constant value, such as with Create.of.
  357. CONSTANT_KIND = 6;
  358. // Creates a Singleton view of a collection.
  359. SINGLETON_KIND = 7;
  360. // Opening or closing a shuffle session, often as part of a GroupByKey.
  361. SHUFFLE_KIND = 8;
  362. }
  363. // Description of the composing transforms, names/ids, and input/outputs of a
  364. // stage of execution. Some composing transforms and sources may have been
  365. // generated by the Dataflow service during execution planning.
  366. message ExecutionStageSummary {
  367. // Description of an input or output of an execution stage.
  368. message StageSource {
  369. // Human-readable name for this source; may be user or system generated.
  370. string user_name = 1;
  371. // Dataflow service generated name for this source.
  372. string name = 2;
  373. // User name for the original user transform or collection with which this
  374. // source is most closely associated.
  375. string original_transform_or_collection = 3;
  376. // Size of the source, if measurable.
  377. int64 size_bytes = 4;
  378. }
  379. // Description of a transform executed as part of an execution stage.
  380. message ComponentTransform {
  381. // Human-readable name for this transform; may be user or system generated.
  382. string user_name = 1;
  383. // Dataflow service generated name for this source.
  384. string name = 2;
  385. // User name for the original user transform with which this transform is
  386. // most closely associated.
  387. string original_transform = 3;
  388. }
  389. // Description of an interstitial value between transforms in an execution
  390. // stage.
  391. message ComponentSource {
  392. // Human-readable name for this transform; may be user or system generated.
  393. string user_name = 1;
  394. // Dataflow service generated name for this source.
  395. string name = 2;
  396. // User name for the original user transform or collection with which this
  397. // source is most closely associated.
  398. string original_transform_or_collection = 3;
  399. }
  400. // Dataflow service generated name for this stage.
  401. string name = 1;
  402. // Dataflow service generated id for this stage.
  403. string id = 2;
  404. // Type of transform this stage is executing.
  405. KindType kind = 3;
  406. // Input sources for this stage.
  407. repeated StageSource input_source = 4;
  408. // Output sources for this stage.
  409. repeated StageSource output_source = 5;
  410. // Other stages that must complete before this stage can run.
  411. repeated string prerequisite_stage = 8;
  412. // Transforms that comprise this execution stage.
  413. repeated ComponentTransform component_transform = 6;
  414. // Collections produced and consumed by component transforms of this stage.
  415. repeated ComponentSource component_source = 7;
  416. }
  417. // Data provided with a pipeline or transform to provide descriptive info.
  418. message DisplayData {
  419. // The key identifying the display data.
  420. // This is intended to be used as a label for the display data
  421. // when viewed in a dax monitoring system.
  422. string key = 1;
  423. // The namespace for the key. This is usually a class name or programming
  424. // language namespace (i.e. python module) which defines the display data.
  425. // This allows a dax monitoring system to specially handle the data
  426. // and perform custom rendering.
  427. string namespace = 2;
  428. // Various value types which can be used for display data. Only one will be
  429. // set.
  430. oneof Value {
  431. // Contains value if the data is of string type.
  432. string str_value = 4;
  433. // Contains value if the data is of int64 type.
  434. int64 int64_value = 5;
  435. // Contains value if the data is of float type.
  436. float float_value = 6;
  437. // Contains value if the data is of java class type.
  438. string java_class_value = 7;
  439. // Contains value if the data is of timestamp type.
  440. google.protobuf.Timestamp timestamp_value = 8;
  441. // Contains value if the data is of duration type.
  442. google.protobuf.Duration duration_value = 9;
  443. // Contains value if the data is of a boolean type.
  444. bool bool_value = 10;
  445. }
  446. // A possible additional shorter value to display.
  447. // For example a java_class_name_value of com.mypackage.MyDoFn
  448. // will be stored with MyDoFn as the short_str_value and
  449. // com.mypackage.MyDoFn as the java_class_name value.
  450. // short_str_value can be displayed and java_class_name_value
  451. // will be displayed as a tooltip.
  452. string short_str_value = 11;
  453. // An optional full URL.
  454. string url = 12;
  455. // An optional label to display in a dax UI for the element.
  456. string label = 13;
  457. }
  458. // Defines a particular step within a Cloud Dataflow job.
  459. //
  460. // A job consists of multiple steps, each of which performs some
  461. // specific operation as part of the overall job. Data is typically
  462. // passed from one step to another as part of the job.
  463. //
  464. // Here's an example of a sequence of steps which together implement a
  465. // Map-Reduce job:
  466. //
  467. // * Read a collection of data from some source, parsing the
  468. // collection's elements.
  469. //
  470. // * Validate the elements.
  471. //
  472. // * Apply a user-defined function to map each element to some value
  473. // and extract an element-specific key value.
  474. //
  475. // * Group elements with the same key into a single element with
  476. // that key, transforming a multiply-keyed collection into a
  477. // uniquely-keyed collection.
  478. //
  479. // * Write the elements out to some data sink.
  480. //
  481. // Note that the Cloud Dataflow service may be used to run many different
  482. // types of jobs, not just Map-Reduce.
  483. message Step {
  484. // The kind of step in the Cloud Dataflow job.
  485. string kind = 1;
  486. // The name that identifies the step. This must be unique for each
  487. // step with respect to all other steps in the Cloud Dataflow job.
  488. string name = 2;
  489. // Named properties associated with the step. Each kind of
  490. // predefined step has its own required set of properties.
  491. // Must be provided on Create. Only retrieved with JOB_VIEW_ALL.
  492. google.protobuf.Struct properties = 3;
  493. }
  494. // Additional information about how a Cloud Dataflow job will be executed that
  495. // isn't contained in the submitted job.
  496. message JobExecutionInfo {
  497. // A mapping from each stage to the information about that stage.
  498. map<string, JobExecutionStageInfo> stages = 1;
  499. }
  500. // Contains information about how a particular
  501. // [google.dataflow.v1beta3.Step][google.dataflow.v1beta3.Step] will be executed.
  502. message JobExecutionStageInfo {
  503. // The steps associated with the execution stage.
  504. // Note that stages may have several steps, and that a given step
  505. // might be run by more than one stage.
  506. repeated string step_name = 1;
  507. }
  508. // Describes the overall state of a [google.dataflow.v1beta3.Job][google.dataflow.v1beta3.Job].
  509. enum JobState {
  510. // The job's run state isn't specified.
  511. JOB_STATE_UNKNOWN = 0;
  512. // `JOB_STATE_STOPPED` indicates that the job has not
  513. // yet started to run.
  514. JOB_STATE_STOPPED = 1;
  515. // `JOB_STATE_RUNNING` indicates that the job is currently running.
  516. JOB_STATE_RUNNING = 2;
  517. // `JOB_STATE_DONE` indicates that the job has successfully completed.
  518. // This is a terminal job state. This state may be set by the Cloud Dataflow
  519. // service, as a transition from `JOB_STATE_RUNNING`. It may also be set via a
  520. // Cloud Dataflow `UpdateJob` call, if the job has not yet reached a terminal
  521. // state.
  522. JOB_STATE_DONE = 3;
  523. // `JOB_STATE_FAILED` indicates that the job has failed. This is a
  524. // terminal job state. This state may only be set by the Cloud Dataflow
  525. // service, and only as a transition from `JOB_STATE_RUNNING`.
  526. JOB_STATE_FAILED = 4;
  527. // `JOB_STATE_CANCELLED` indicates that the job has been explicitly
  528. // cancelled. This is a terminal job state. This state may only be
  529. // set via a Cloud Dataflow `UpdateJob` call, and only if the job has not
  530. // yet reached another terminal state.
  531. JOB_STATE_CANCELLED = 5;
  532. // `JOB_STATE_UPDATED` indicates that the job was successfully updated,
  533. // meaning that this job was stopped and another job was started, inheriting
  534. // state from this one. This is a terminal job state. This state may only be
  535. // set by the Cloud Dataflow service, and only as a transition from
  536. // `JOB_STATE_RUNNING`.
  537. JOB_STATE_UPDATED = 6;
  538. // `JOB_STATE_DRAINING` indicates that the job is in the process of draining.
  539. // A draining job has stopped pulling from its input sources and is processing
  540. // any data that remains in-flight. This state may be set via a Cloud Dataflow
  541. // `UpdateJob` call, but only as a transition from `JOB_STATE_RUNNING`. Jobs
  542. // that are draining may only transition to `JOB_STATE_DRAINED`,
  543. // `JOB_STATE_CANCELLED`, or `JOB_STATE_FAILED`.
  544. JOB_STATE_DRAINING = 7;
  545. // `JOB_STATE_DRAINED` indicates that the job has been drained.
  546. // A drained job terminated by stopping pulling from its input sources and
  547. // processing any data that remained in-flight when draining was requested.
  548. // This state is a terminal state, may only be set by the Cloud Dataflow
  549. // service, and only as a transition from `JOB_STATE_DRAINING`.
  550. JOB_STATE_DRAINED = 8;
  551. // `JOB_STATE_PENDING` indicates that the job has been created but is not yet
  552. // running. Jobs that are pending may only transition to `JOB_STATE_RUNNING`,
  553. // or `JOB_STATE_FAILED`.
  554. JOB_STATE_PENDING = 9;
  555. // `JOB_STATE_CANCELLING` indicates that the job has been explicitly cancelled
  556. // and is in the process of stopping. Jobs that are cancelling may only
  557. // transition to `JOB_STATE_CANCELLED` or `JOB_STATE_FAILED`.
  558. JOB_STATE_CANCELLING = 10;
  559. // `JOB_STATE_QUEUED` indicates that the job has been created but is being
  560. // delayed until launch. Jobs that are queued may only transition to
  561. // `JOB_STATE_PENDING` or `JOB_STATE_CANCELLED`.
  562. JOB_STATE_QUEUED = 11;
  563. // `JOB_STATE_RESOURCE_CLEANING_UP` indicates that the batch job's associated
  564. // resources are currently being cleaned up after a successful run.
  565. // Currently, this is an opt-in feature, please reach out to Cloud support
  566. // team if you are interested.
  567. JOB_STATE_RESOURCE_CLEANING_UP = 12;
  568. }
  569. // Request to create a Cloud Dataflow job.
  570. message CreateJobRequest {
  571. // The ID of the Cloud Platform project that the job belongs to.
  572. string project_id = 1;
  573. // The job to create.
  574. Job job = 2;
  575. // The level of information requested in response.
  576. JobView view = 3;
  577. // Deprecated. This field is now in the Job message.
  578. string replace_job_id = 4;
  579. // The [regional endpoint]
  580. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  581. // contains this job.
  582. string location = 5;
  583. }
  584. // Request to get the state of a Cloud Dataflow job.
  585. message GetJobRequest {
  586. // The ID of the Cloud Platform project that the job belongs to.
  587. string project_id = 1;
  588. // The job ID.
  589. string job_id = 2;
  590. // The level of information requested in response.
  591. JobView view = 3;
  592. // The [regional endpoint]
  593. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  594. // contains this job.
  595. string location = 4;
  596. }
  597. // Selector for how much information is returned in Job responses.
  598. enum JobView {
  599. // The job view to return isn't specified, or is unknown.
  600. // Responses will contain at least the `JOB_VIEW_SUMMARY` information,
  601. // and may contain additional information.
  602. JOB_VIEW_UNKNOWN = 0;
  603. // Request summary information only:
  604. // Project ID, Job ID, job name, job type, job status, start/end time,
  605. // and Cloud SDK version details.
  606. JOB_VIEW_SUMMARY = 1;
  607. // Request all information available for this job.
  608. JOB_VIEW_ALL = 2;
  609. // Request summary info and limited job description data for steps, labels and
  610. // environment.
  611. JOB_VIEW_DESCRIPTION = 3;
  612. }
  613. // Request to update a Cloud Dataflow job.
  614. message UpdateJobRequest {
  615. // The ID of the Cloud Platform project that the job belongs to.
  616. string project_id = 1;
  617. // The job ID.
  618. string job_id = 2;
  619. // The updated job.
  620. // Only the job state is updatable; other fields will be ignored.
  621. Job job = 3;
  622. // The [regional endpoint]
  623. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  624. // contains this job.
  625. string location = 4;
  626. }
  627. // Request to list Cloud Dataflow jobs.
  628. message ListJobsRequest {
  629. // This field filters out and returns jobs in the specified job state. The
  630. // order of data returned is determined by the filter used, and is subject to
  631. // change.
  632. enum Filter {
  633. // The filter isn't specified, or is unknown. This returns all jobs ordered
  634. // on descending `JobUuid`.
  635. UNKNOWN = 0;
  636. // Returns all running jobs first ordered on creation timestamp, then
  637. // returns all terminated jobs ordered on the termination timestamp.
  638. ALL = 1;
  639. // Filters the jobs that have a terminated state, ordered on the
  640. // termination timestamp. Example terminated states: `JOB_STATE_STOPPED`,
  641. // `JOB_STATE_UPDATED`, `JOB_STATE_DRAINED`, etc.
  642. TERMINATED = 2;
  643. // Filters the jobs that are running ordered on the creation timestamp.
  644. ACTIVE = 3;
  645. }
  646. // The kind of filter to use.
  647. Filter filter = 5;
  648. // The project which owns the jobs.
  649. string project_id = 1;
  650. // Deprecated. ListJobs always returns summaries now.
  651. // Use GetJob for other JobViews.
  652. JobView view = 2 [deprecated = true];
  653. // If there are many jobs, limit response to at most this many.
  654. // The actual number of jobs returned will be the lesser of max_responses
  655. // and an unspecified server-defined limit.
  656. int32 page_size = 3;
  657. // Set this to the 'next_page_token' field of a previous response
  658. // to request additional results in a long list.
  659. string page_token = 4;
  660. // The [regional endpoint]
  661. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  662. // contains this job.
  663. string location = 17;
  664. }
  665. // Indicates which [regional endpoint]
  666. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) failed
  667. // to respond to a request for data.
  668. message FailedLocation {
  669. // The name of the [regional endpoint]
  670. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  671. // failed to respond.
  672. string name = 1;
  673. }
  674. // Response to a request to list Cloud Dataflow jobs in a project. This might
  675. // be a partial response, depending on the page size in the ListJobsRequest.
  676. // However, if the project does not have any jobs, an instance of
  677. // ListJobsResponse is not returned and the requests's response
  678. // body is empty {}.
  679. message ListJobsResponse {
  680. // A subset of the requested job information.
  681. repeated Job jobs = 1;
  682. // Set if there may be more results than fit in this response.
  683. string next_page_token = 2;
  684. // Zero or more messages describing the [regional endpoints]
  685. // (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
  686. // failed to respond.
  687. repeated FailedLocation failed_location = 3;
  688. }
  689. // Request to create a snapshot of a job.
  690. message SnapshotJobRequest {
  691. // The project which owns the job to be snapshotted.
  692. string project_id = 1;
  693. // The job to be snapshotted.
  694. string job_id = 2;
  695. // TTL for the snapshot.
  696. google.protobuf.Duration ttl = 3;
  697. // The location that contains this job.
  698. string location = 4;
  699. // If true, perform snapshots for sources which support this.
  700. bool snapshot_sources = 5;
  701. // User specified description of the snapshot. Maybe empty.
  702. string description = 6;
  703. }
  704. // Request to check is active jobs exists for a project
  705. message CheckActiveJobsRequest {
  706. // The project which owns the jobs.
  707. string project_id = 1;
  708. }
  709. // Response for CheckActiveJobsRequest.
  710. message CheckActiveJobsResponse {
  711. // If True, active jobs exists for project. False otherwise.
  712. bool active_jobs_exist = 1;
  713. }