storage.proto 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. // Copyright 2020 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.privacy.dlp.v2;
  16. import "google/api/resource.proto";
  17. import "google/protobuf/timestamp.proto";
  18. import "google/api/annotations.proto";
  19. option csharp_namespace = "Google.Cloud.Dlp.V2";
  20. option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2;dlp";
  21. option java_multiple_files = true;
  22. option java_outer_classname = "DlpStorage";
  23. option java_package = "com.google.privacy.dlp.v2";
  24. option php_namespace = "Google\\Cloud\\Dlp\\V2";
  25. option ruby_package = "Google::Cloud::Dlp::V2";
  26. // Type of information detected by the API.
  27. message InfoType {
  28. // Name of the information type. Either a name of your choosing when
  29. // creating a CustomInfoType, or one of the names listed
  30. // at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
  31. // a built-in type. When sending Cloud DLP results to Data Catalog, infoType
  32. // names should conform to the pattern `[A-Za-z0-9$-_]{1,64}`.
  33. string name = 1;
  34. }
  35. // A reference to a StoredInfoType to use with scanning.
  36. message StoredType {
  37. // Resource name of the requested `StoredInfoType`, for example
  38. // `organizations/433245324/storedInfoTypes/432452342` or
  39. // `projects/project-id/storedInfoTypes/432452342`.
  40. string name = 1;
  41. // Timestamp indicating when the version of the `StoredInfoType` used for
  42. // inspection was created. Output-only field, populated by the system.
  43. google.protobuf.Timestamp create_time = 2;
  44. }
  45. // Categorization of results based on how likely they are to represent a match,
  46. // based on the number of elements they contain which imply a match.
  47. enum Likelihood {
  48. // Default value; same as POSSIBLE.
  49. LIKELIHOOD_UNSPECIFIED = 0;
  50. // Few matching elements.
  51. VERY_UNLIKELY = 1;
  52. UNLIKELY = 2;
  53. // Some matching elements.
  54. POSSIBLE = 3;
  55. LIKELY = 4;
  56. // Many matching elements.
  57. VERY_LIKELY = 5;
  58. }
  59. // Custom information type provided by the user. Used to find domain-specific
  60. // sensitive information configurable to the data in question.
  61. message CustomInfoType {
  62. // Custom information type based on a dictionary of words or phrases. This can
  63. // be used to match sensitive information specific to the data, such as a list
  64. // of employee IDs or job titles.
  65. //
  66. // Dictionary words are case-insensitive and all characters other than letters
  67. // and digits in the unicode [Basic Multilingual
  68. // Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
  69. // will be replaced with whitespace when scanning for matches, so the
  70. // dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
  71. // "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
  72. // surrounding any match must be of a different type than the adjacent
  73. // characters within the word, so letters must be next to non-letters and
  74. // digits next to non-digits. For example, the dictionary word "jen" will
  75. // match the first three letters of the text "jen123" but will return no
  76. // matches for "jennifer".
  77. //
  78. // Dictionary words containing a large number of characters that are not
  79. // letters or digits may result in unexpected findings because such characters
  80. // are treated as whitespace. The
  81. // [limits](https://cloud.google.com/dlp/limits) page contains details about
  82. // the size limits of dictionaries. For dictionaries that do not fit within
  83. // these constraints, consider using `LargeCustomDictionaryConfig` in the
  84. // `StoredInfoType` API.
  85. message Dictionary {
  86. // Message defining a list of words or phrases to search for in the data.
  87. message WordList {
  88. // Words or phrases defining the dictionary. The dictionary must contain
  89. // at least one phrase and every phrase must contain at least 2 characters
  90. // that are letters or digits. [required]
  91. repeated string words = 1;
  92. }
  93. oneof source {
  94. // List of words or phrases to search for.
  95. WordList word_list = 1;
  96. // Newline-delimited file of words in Cloud Storage. Only a single file
  97. // is accepted.
  98. CloudStoragePath cloud_storage_path = 3;
  99. }
  100. }
  101. // Message defining a custom regular expression.
  102. message Regex {
  103. // Pattern defining the regular expression. Its syntax
  104. // (https://github.com/google/re2/wiki/Syntax) can be found under the
  105. // google/re2 repository on GitHub.
  106. string pattern = 1;
  107. // The index of the submatch to extract as findings. When not
  108. // specified, the entire match is returned. No more than 3 may be included.
  109. repeated int32 group_indexes = 2;
  110. }
  111. // Message for detecting output from deidentification transformations
  112. // such as
  113. // [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
  114. // These types of transformations are
  115. // those that perform pseudonymization, thereby producing a "surrogate" as
  116. // output. This should be used in conjunction with a field on the
  117. // transformation such as `surrogate_info_type`. This CustomInfoType does
  118. // not support the use of `detection_rules`.
  119. message SurrogateType {
  120. }
  121. // Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
  122. // `CustomInfoType` to alter behavior under certain circumstances, depending
  123. // on the specific details of the rule. Not supported for the `surrogate_type`
  124. // custom infoType.
  125. message DetectionRule {
  126. // Message for specifying a window around a finding to apply a detection
  127. // rule.
  128. message Proximity {
  129. // Number of characters before the finding to consider.
  130. int32 window_before = 1;
  131. // Number of characters after the finding to consider.
  132. int32 window_after = 2;
  133. }
  134. // Message for specifying an adjustment to the likelihood of a finding as
  135. // part of a detection rule.
  136. message LikelihoodAdjustment {
  137. oneof adjustment {
  138. // Set the likelihood of a finding to a fixed value.
  139. Likelihood fixed_likelihood = 1;
  140. // Increase or decrease the likelihood by the specified number of
  141. // levels. For example, if a finding would be `POSSIBLE` without the
  142. // detection rule and `relative_likelihood` is 1, then it is upgraded to
  143. // `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
  144. // Likelihood may never drop below `VERY_UNLIKELY` or exceed
  145. // `VERY_LIKELY`, so applying an adjustment of 1 followed by an
  146. // adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
  147. // a final likelihood of `LIKELY`.
  148. int32 relative_likelihood = 2;
  149. }
  150. }
  151. // The rule that adjusts the likelihood of findings within a certain
  152. // proximity of hotwords.
  153. message HotwordRule {
  154. // Regular expression pattern defining what qualifies as a hotword.
  155. Regex hotword_regex = 1;
  156. // Proximity of the finding within which the entire hotword must reside.
  157. // The total length of the window cannot exceed 1000 characters. Note that
  158. // the finding itself will be included in the window, so that hotwords may
  159. // be used to match substrings of the finding itself. For example, the
  160. // certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
  161. // adjusted upwards if the area code is known to be the local area code of
  162. // a company office using the hotword regex "\(xxx\)", where "xxx"
  163. // is the area code in question.
  164. Proximity proximity = 2;
  165. // Likelihood adjustment to apply to all matching findings.
  166. LikelihoodAdjustment likelihood_adjustment = 3;
  167. }
  168. oneof type {
  169. // Hotword-based detection rule.
  170. HotwordRule hotword_rule = 1;
  171. }
  172. }
  173. enum ExclusionType {
  174. // A finding of this custom info type will not be excluded from results.
  175. EXCLUSION_TYPE_UNSPECIFIED = 0;
  176. // A finding of this custom info type will be excluded from final results,
  177. // but can still affect rule execution.
  178. EXCLUSION_TYPE_EXCLUDE = 1;
  179. }
  180. // CustomInfoType can either be a new infoType, or an extension of built-in
  181. // infoType, when the name matches one of existing infoTypes and that infoType
  182. // is specified in `InspectContent.info_types` field. Specifying the latter
  183. // adds findings to the one detected by the system. If built-in info type is
  184. // not specified in `InspectContent.info_types` list then the name is treated
  185. // as a custom info type.
  186. InfoType info_type = 1;
  187. // Likelihood to return for this CustomInfoType. This base value can be
  188. // altered by a detection rule if the finding meets the criteria specified by
  189. // the rule. Defaults to `VERY_LIKELY` if not specified.
  190. Likelihood likelihood = 6;
  191. oneof type {
  192. // A list of phrases to detect as a CustomInfoType.
  193. Dictionary dictionary = 2;
  194. // Regular expression based CustomInfoType.
  195. Regex regex = 3;
  196. // Message for detecting output from deidentification transformations that
  197. // support reversing.
  198. SurrogateType surrogate_type = 4;
  199. // Load an existing `StoredInfoType` resource for use in
  200. // `InspectDataSource`. Not currently supported in `InspectContent`.
  201. StoredType stored_type = 5;
  202. }
  203. // Set of detection rules to apply to all findings of this CustomInfoType.
  204. // Rules are applied in order that they are specified. Not supported for the
  205. // `surrogate_type` CustomInfoType.
  206. repeated DetectionRule detection_rules = 7;
  207. // If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
  208. // to be returned. It still can be used for rules matching.
  209. ExclusionType exclusion_type = 8;
  210. }
  211. // General identifier of a data field in a storage service.
  212. message FieldId {
  213. // Name describing the field.
  214. string name = 1;
  215. }
  216. // Datastore partition ID.
  217. // A partition ID identifies a grouping of entities. The grouping is always
  218. // by project and namespace, however the namespace ID may be empty.
  219. //
  220. // A partition ID contains several dimensions:
  221. // project ID and namespace ID.
  222. message PartitionId {
  223. // The ID of the project to which the entities belong.
  224. string project_id = 2;
  225. // If not empty, the ID of the namespace to which the entities belong.
  226. string namespace_id = 4;
  227. }
  228. // A representation of a Datastore kind.
  229. message KindExpression {
  230. // The name of the kind.
  231. string name = 1;
  232. }
  233. // Options defining a data set within Google Cloud Datastore.
  234. message DatastoreOptions {
  235. // A partition ID identifies a grouping of entities. The grouping is always
  236. // by project and namespace, however the namespace ID may be empty.
  237. PartitionId partition_id = 1;
  238. // The kind to process.
  239. KindExpression kind = 2;
  240. }
  241. // Message representing a set of files in a Cloud Storage bucket. Regular
  242. // expressions are used to allow fine-grained control over which files in the
  243. // bucket to include.
  244. //
  245. // Included files are those that match at least one item in `include_regex` and
  246. // do not match any items in `exclude_regex`. Note that a file that matches
  247. // items from both lists will _not_ be included. For a match to occur, the
  248. // entire file path (i.e., everything in the url after the bucket name) must
  249. // match the regular expression.
  250. //
  251. // For example, given the input `{bucket_name: "mybucket", include_regex:
  252. // ["directory1/.*"], exclude_regex:
  253. // ["directory1/excluded.*"]}`:
  254. //
  255. // * `gs://mybucket/directory1/myfile` will be included
  256. // * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
  257. // across `/`)
  258. // * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
  259. // full path doesn't match any items in `include_regex`)
  260. // * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
  261. // matches an item in `exclude_regex`)
  262. //
  263. // If `include_regex` is left empty, it will match all files by default
  264. // (this is equivalent to setting `include_regex: [".*"]`).
  265. //
  266. // Some other common use cases:
  267. //
  268. // * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
  269. // files in `mybucket` except for .pdf files
  270. // * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
  271. // include all files directly under `gs://mybucket/directory/`, without matching
  272. // across `/`
  273. message CloudStorageRegexFileSet {
  274. // The name of a Cloud Storage bucket. Required.
  275. string bucket_name = 1;
  276. // A list of regular expressions matching file paths to include. All files in
  277. // the bucket that match at least one of these regular expressions will be
  278. // included in the set of files, except for those that also match an item in
  279. // `exclude_regex`. Leaving this field empty will match all files by default
  280. // (this is equivalent to including `.*` in the list).
  281. //
  282. // Regular expressions use RE2
  283. // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
  284. // under the google/re2 repository on GitHub.
  285. repeated string include_regex = 2;
  286. // A list of regular expressions matching file paths to exclude. All files in
  287. // the bucket that match at least one of these regular expressions will be
  288. // excluded from the scan.
  289. //
  290. // Regular expressions use RE2
  291. // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
  292. // under the google/re2 repository on GitHub.
  293. repeated string exclude_regex = 3;
  294. }
  295. // Options defining a file or a set of files within a Google Cloud Storage
  296. // bucket.
  297. message CloudStorageOptions {
  298. // Set of files to scan.
  299. message FileSet {
  300. // The Cloud Storage url of the file(s) to scan, in the format
  301. // `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
  302. //
  303. // If the url ends in a trailing slash, the bucket or directory represented
  304. // by the url will be scanned non-recursively (content in sub-directories
  305. // will not be scanned). This means that `gs://mybucket/` is equivalent to
  306. // `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
  307. // `gs://mybucket/directory/*`.
  308. //
  309. // Exactly one of `url` or `regex_file_set` must be set.
  310. string url = 1;
  311. // The regex-filtered set of files to scan. Exactly one of `url` or
  312. // `regex_file_set` must be set.
  313. CloudStorageRegexFileSet regex_file_set = 2;
  314. }
  315. // How to sample bytes if not all bytes are scanned. Meaningful only when used
  316. // in conjunction with bytes_limit_per_file. If not specified, scanning would
  317. // start from the top.
  318. enum SampleMethod {
  319. SAMPLE_METHOD_UNSPECIFIED = 0;
  320. // Scan from the top (default).
  321. TOP = 1;
  322. // For each file larger than bytes_limit_per_file, randomly pick the offset
  323. // to start scanning. The scanned bytes are contiguous.
  324. RANDOM_START = 2;
  325. }
  326. // The set of one or more files to scan.
  327. FileSet file_set = 1;
  328. // Max number of bytes to scan from a file. If a scanned file's size is bigger
  329. // than this value then the rest of the bytes are omitted. Only one
  330. // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
  331. int64 bytes_limit_per_file = 4;
  332. // Max percentage of bytes to scan from a file. The rest are omitted. The
  333. // number of bytes scanned is rounded down. Must be between 0 and 100,
  334. // inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
  335. // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
  336. int32 bytes_limit_per_file_percent = 8;
  337. // List of file type groups to include in the scan.
  338. // If empty, all files are scanned and available data format processors
  339. // are applied. In addition, the binary content of the selected files
  340. // is always scanned as well.
  341. // Images are scanned only as binary if the specified region
  342. // does not support image inspection and no file_types were specified.
  343. // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
  344. repeated FileType file_types = 5;
  345. SampleMethod sample_method = 6;
  346. // Limits the number of files to scan to this percentage of the input FileSet.
  347. // Number of files scanned is rounded down. Must be between 0 and 100,
  348. // inclusively. Both 0 and 100 means no limit. Defaults to 0.
  349. int32 files_limit_percent = 7;
  350. }
  351. // Message representing a set of files in Cloud Storage.
  352. message CloudStorageFileSet {
  353. // The url, in the format `gs://<bucket>/<path>`. Trailing wildcard in the
  354. // path is allowed.
  355. string url = 1;
  356. }
  357. // Message representing a single file or path in Cloud Storage.
  358. message CloudStoragePath {
  359. // A url representing a file or path (no wildcards) in Cloud Storage.
  360. // Example: gs://[BUCKET_NAME]/dictionary.txt
  361. string path = 1;
  362. }
  363. // Options defining BigQuery table and row identifiers.
  364. message BigQueryOptions {
  365. // How to sample rows if not all rows are scanned. Meaningful only when used
  366. // in conjunction with either rows_limit or rows_limit_percent. If not
  367. // specified, rows are scanned in the order BigQuery reads them.
  368. enum SampleMethod {
  369. SAMPLE_METHOD_UNSPECIFIED = 0;
  370. // Scan groups of rows in the order BigQuery provides (default). Multiple
  371. // groups of rows may be scanned in parallel, so results may not appear in
  372. // the same order the rows are read.
  373. TOP = 1;
  374. // Randomly pick groups of rows to scan.
  375. RANDOM_START = 2;
  376. }
  377. // Complete BigQuery table reference.
  378. BigQueryTable table_reference = 1;
  379. // Table fields that may uniquely identify a row within the table. When
  380. // `actions.saveFindings.outputConfig.table` is specified, the values of
  381. // columns specified here are available in the output table under
  382. // `location.content_locations.record_location.record_key.id_values`. Nested
  383. // fields such as `person.birthdate.year` are allowed.
  384. repeated FieldId identifying_fields = 2;
  385. // Max number of rows to scan. If the table has more rows than this value, the
  386. // rest of the rows are omitted. If not set, or if set to 0, all rows will be
  387. // scanned. Only one of rows_limit and rows_limit_percent can be specified.
  388. // Cannot be used in conjunction with TimespanConfig.
  389. int64 rows_limit = 3;
  390. // Max percentage of rows to scan. The rest are omitted. The number of rows
  391. // scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
  392. // 100 means no limit. Defaults to 0. Only one of rows_limit and
  393. // rows_limit_percent can be specified. Cannot be used in conjunction with
  394. // TimespanConfig.
  395. int32 rows_limit_percent = 6;
  396. SampleMethod sample_method = 4;
  397. // References to fields excluded from scanning. This allows you to skip
  398. // inspection of entire columns which you know have no findings.
  399. repeated FieldId excluded_fields = 5;
  400. }
  401. // Shared message indicating Cloud storage type.
  402. message StorageConfig {
  403. // Configuration of the timespan of the items to include in scanning.
  404. // Currently only supported when inspecting Google Cloud Storage and BigQuery.
  405. message TimespanConfig {
  406. // Exclude files, tables, or rows older than this value.
  407. // If not set, no lower time limit is applied.
  408. google.protobuf.Timestamp start_time = 1;
  409. // Exclude files, tables, or rows newer than this value.
  410. // If not set, no upper time limit is applied.
  411. google.protobuf.Timestamp end_time = 2;
  412. // Specification of the field containing the timestamp of scanned items.
  413. // Used for data sources like Datastore and BigQuery.
  414. //
  415. // For BigQuery:
  416. // If this value is not specified and the table was modified between the
  417. // given start and end times, the entire table will be scanned. If this
  418. // value is specified, then rows are filtered based on the given start and
  419. // end times. Rows with a `NULL` value in the provided BigQuery column are
  420. // skipped.
  421. // Valid data types of the provided BigQuery column are: `INTEGER`, `DATE`,
  422. // `TIMESTAMP`, and `DATETIME`.
  423. //
  424. // For Datastore:
  425. // If this value is specified, then entities are filtered based on the given
  426. // start and end times. If an entity does not contain the provided timestamp
  427. // property or contains empty or invalid values, then it is included.
  428. // Valid data types of the provided timestamp property are: `TIMESTAMP`.
  429. FieldId timestamp_field = 3;
  430. // When the job is started by a JobTrigger we will automatically figure out
  431. // a valid start_time to avoid scanning files that have not been modified
  432. // since the last time the JobTrigger executed. This will be based on the
  433. // time of the execution of the last run of the JobTrigger.
  434. bool enable_auto_population_of_timespan_config = 4;
  435. }
  436. oneof type {
  437. // Google Cloud Datastore options.
  438. DatastoreOptions datastore_options = 2;
  439. // Google Cloud Storage options.
  440. CloudStorageOptions cloud_storage_options = 3;
  441. // BigQuery options.
  442. BigQueryOptions big_query_options = 4;
  443. // Hybrid inspection options.
  444. // Early access feature is in a pre-release state and might change or have
  445. // limited support. For more information, see
  446. // https://cloud.google.com/products#product-launch-stages.
  447. HybridOptions hybrid_options = 9;
  448. }
  449. TimespanConfig timespan_config = 6;
  450. }
  451. // Definitions of file type groups to scan. New types will be added to this
  452. // list.
  453. enum FileType {
  454. // Includes all files.
  455. FILE_TYPE_UNSPECIFIED = 0;
  456. // Includes all file extensions not covered by another entry. Binary
  457. // scanning attempts to convert the content of the file to utf_8 to scan
  458. // the file.
  459. // If you wish to avoid this fall back, specify one or more of the other
  460. // FileType's in your storage scan.
  461. BINARY_FILE = 1;
  462. // Included file extensions:
  463. // asc, brf, c, cc, cpp, csv, cxx, c++, cs, css, dart, eml, go, h, hh, hpp,
  464. // hxx, h++, hs, html, htm, shtml, shtm, xhtml, lhs, ini, java, js, json,
  465. // ocaml, md, mkd, markdown, m, ml, mli, pl, pm, php, phtml, pht, py, pyw,
  466. // rb, rbw, rs, rc, scala, sh, sql, tex, txt, text, tsv, vcard, vcs, wml,
  467. // xml, xsl, xsd, yml, yaml.
  468. TEXT_FILE = 2;
  469. // Included file extensions:
  470. // bmp, gif, jpg, jpeg, jpe, png.
  471. // bytes_limit_per_file has no effect on image files.
  472. // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
  473. IMAGE = 3;
  474. // Word files >30 MB will be scanned as binary files.
  475. // Included file extensions:
  476. // docx, dotx, docm, dotm
  477. WORD = 5;
  478. // PDF files >30 MB will be scanned as binary files.
  479. // Included file extensions:
  480. // pdf
  481. PDF = 6;
  482. // Included file extensions:
  483. // avro
  484. AVRO = 7;
  485. // Included file extensions:
  486. // csv
  487. CSV = 8;
  488. // Included file extensions:
  489. // tsv
  490. TSV = 9;
  491. }
  492. // Configuration to control jobs where the content being inspected is outside
  493. // of Google Cloud Platform.
  494. message HybridOptions {
  495. // A short description of where the data is coming from. Will be stored once
  496. // in the job. 256 max length.
  497. string description = 1;
  498. // These are labels that each inspection request must include within their
  499. // 'finding_labels' map. Request may contain others, but any missing one of
  500. // these will be rejected.
  501. //
  502. // Label keys must be between 1 and 63 characters long and must conform
  503. // to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
  504. //
  505. // No more than 10 keys can be required.
  506. repeated string required_finding_label_keys = 2;
  507. // To organize findings, these labels will be added to each finding.
  508. //
  509. // Label keys must be between 1 and 63 characters long and must conform
  510. // to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
  511. //
  512. // Label values must be between 0 and 63 characters long and must conform
  513. // to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`.
  514. //
  515. // No more than 10 labels can be associated with a given finding.
  516. //
  517. // Examples:
  518. // * `"environment" : "production"`
  519. // * `"pipeline" : "etl"`
  520. map<string, string> labels = 3;
  521. // If the container is a table, additional information to make findings
  522. // meaningful such as the columns that are primary keys.
  523. TableOptions table_options = 4;
  524. }
  525. // Row key for identifying a record in BigQuery table.
  526. message BigQueryKey {
  527. // Complete BigQuery table reference.
  528. BigQueryTable table_reference = 1;
  529. // Row number inferred at the time the table was scanned. This value is
  530. // nondeterministic, cannot be queried, and may be null for inspection
  531. // jobs. To locate findings within a table, specify
  532. // `inspect_job.storage_config.big_query_options.identifying_fields` in
  533. // `CreateDlpJobRequest`.
  534. int64 row_number = 2;
  535. }
  536. // Record key for a finding in Cloud Datastore.
  537. message DatastoreKey {
  538. // Datastore entity key.
  539. Key entity_key = 1;
  540. }
  541. // A unique identifier for a Datastore entity.
  542. // If a key's partition ID or any of its path kinds or names are
  543. // reserved/read-only, the key is reserved/read-only.
  544. // A reserved/read-only key is forbidden in certain documented contexts.
  545. message Key {
  546. // A (kind, ID/name) pair used to construct a key path.
  547. //
  548. // If either name or ID is set, the element is complete.
  549. // If neither is set, the element is incomplete.
  550. message PathElement {
  551. // The kind of the entity.
  552. // A kind matching regex `__.*__` is reserved/read-only.
  553. // A kind must not contain more than 1500 bytes when UTF-8 encoded.
  554. // Cannot be `""`.
  555. string kind = 1;
  556. // The type of ID.
  557. oneof id_type {
  558. // The auto-allocated ID of the entity.
  559. // Never equal to zero. Values less than zero are discouraged and may not
  560. // be supported in the future.
  561. int64 id = 2;
  562. // The name of the entity.
  563. // A name matching regex `__.*__` is reserved/read-only.
  564. // A name must not be more than 1500 bytes when UTF-8 encoded.
  565. // Cannot be `""`.
  566. string name = 3;
  567. }
  568. }
  569. // Entities are partitioned into subsets, currently identified by a project
  570. // ID and namespace ID.
  571. // Queries are scoped to a single partition.
  572. PartitionId partition_id = 1;
  573. // The entity path.
  574. // An entity path consists of one or more elements composed of a kind and a
  575. // string or numerical identifier, which identify entities. The first
  576. // element identifies a _root entity_, the second element identifies
  577. // a _child_ of the root entity, the third element identifies a child of the
  578. // second entity, and so forth. The entities identified by all prefixes of
  579. // the path are called the element's _ancestors_.
  580. //
  581. // A path can never be empty, and a path can have at most 100 elements.
  582. repeated PathElement path = 2;
  583. }
  584. // Message for a unique key indicating a record that contains a finding.
  585. message RecordKey {
  586. oneof type {
  587. DatastoreKey datastore_key = 2;
  588. BigQueryKey big_query_key = 3;
  589. }
  590. // Values of identifying columns in the given row. Order of values matches
  591. // the order of `identifying_fields` specified in the scanning request.
  592. repeated string id_values = 5;
  593. }
  594. // Message defining the location of a BigQuery table. A table is uniquely
  595. // identified by its project_id, dataset_id, and table_name. Within a query
  596. // a table is often referenced with a string in the format of:
  597. // `<project_id>:<dataset_id>.<table_id>` or
  598. // `<project_id>.<dataset_id>.<table_id>`.
  599. message BigQueryTable {
  600. // The Google Cloud Platform project ID of the project containing the table.
  601. // If omitted, project ID is inferred from the API call.
  602. string project_id = 1;
  603. // Dataset ID of the table.
  604. string dataset_id = 2;
  605. // Name of the table.
  606. string table_id = 3;
  607. }
  608. // Message defining a field of a BigQuery table.
  609. message BigQueryField {
  610. // Source table of the field.
  611. BigQueryTable table = 1;
  612. // Designated field in the BigQuery table.
  613. FieldId field = 2;
  614. }
  615. // An entity in a dataset is a field or set of fields that correspond to a
  616. // single person. For example, in medical records the `EntityId` might be a
  617. // patient identifier, or for financial records it might be an account
  618. // identifier. This message is used when generalizations or analysis must take
  619. // into account that multiple rows correspond to the same entity.
  620. message EntityId {
  621. // Composite key indicating which field contains the entity identifier.
  622. FieldId field = 1;
  623. }
  624. // Instructions regarding the table content being inspected.
  625. message TableOptions {
  626. // The columns that are the primary keys for table objects included in
  627. // ContentItem. A copy of this cell's value will stored alongside alongside
  628. // each finding so that the finding can be traced to the specific row it came
  629. // from. No more than 3 may be provided.
  630. repeated FieldId identifying_fields = 1;
  631. }