service.proto 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. // Copyright 2021 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. syntax = "proto3";
  15. package google.monitoring.v3;
  16. import "google/api/resource.proto";
  17. import "google/protobuf/duration.proto";
  18. import "google/protobuf/timestamp.proto";
  19. import "google/type/calendar_period.proto";
  20. option csharp_namespace = "Google.Cloud.Monitoring.V3";
  21. option go_package = "google.golang.org/genproto/googleapis/monitoring/v3;monitoring";
  22. option java_multiple_files = true;
  23. option java_outer_classname = "ServiceMonitoringProto";
  24. option java_package = "com.google.monitoring.v3";
  25. option php_namespace = "Google\\Cloud\\Monitoring\\V3";
  26. option ruby_package = "Google::Cloud::Monitoring::V3";
  27. // A `Service` is a discrete, autonomous, and network-accessible unit, designed
  28. // to solve an individual concern
  29. // ([Wikipedia](https://en.wikipedia.org/wiki/Service-orientation)). In
  30. // Cloud Monitoring, a `Service` acts as the root resource under which
  31. // operational aspects of the service are accessible.
  32. message Service {
  33. option (google.api.resource) = {
  34. type: "monitoring.googleapis.com/Service"
  35. pattern: "projects/{project}/services/{service}"
  36. pattern: "organizations/{organization}/services/{service}"
  37. pattern: "folders/{folder}/services/{service}"
  38. pattern: "*"
  39. };
  40. // Custom view of service telemetry. Currently a place-holder pending final
  41. // design.
  42. message Custom {
  43. }
  44. // App Engine service. Learn more at https://cloud.google.com/appengine.
  45. message AppEngine {
  46. // The ID of the App Engine module underlying this service. Corresponds to
  47. // the `module_id` resource label in the `gae_app` monitored resource:
  48. // https://cloud.google.com/monitoring/api/resources#tag_gae_app
  49. string module_id = 1;
  50. }
  51. // Cloud Endpoints service. Learn more at https://cloud.google.com/endpoints.
  52. message CloudEndpoints {
  53. // The name of the Cloud Endpoints service underlying this service.
  54. // Corresponds to the `service` resource label in the `api` monitored
  55. // resource: https://cloud.google.com/monitoring/api/resources#tag_api
  56. string service = 1;
  57. }
  58. // Istio service scoped to a single Kubernetes cluster. Learn more at
  59. // https://istio.io. Clusters running OSS Istio will have their services
  60. // ingested as this type.
  61. message ClusterIstio {
  62. // The location of the Kubernetes cluster in which this Istio service is
  63. // defined. Corresponds to the `location` resource label in `k8s_cluster`
  64. // resources.
  65. string location = 1;
  66. // The name of the Kubernetes cluster in which this Istio service is
  67. // defined. Corresponds to the `cluster_name` resource label in
  68. // `k8s_cluster` resources.
  69. string cluster_name = 2;
  70. // The namespace of the Istio service underlying this service. Corresponds
  71. // to the `destination_service_namespace` metric label in Istio metrics.
  72. string service_namespace = 3;
  73. // The name of the Istio service underlying this service. Corresponds to the
  74. // `destination_service_name` metric label in Istio metrics.
  75. string service_name = 4;
  76. }
  77. // Istio service scoped to an Istio mesh. Anthos clusters running ASM < 1.6.8
  78. // will have their services ingested as this type.
  79. message MeshIstio {
  80. // Identifier for the mesh in which this Istio service is defined.
  81. // Corresponds to the `mesh_uid` metric label in Istio metrics.
  82. string mesh_uid = 1;
  83. // The namespace of the Istio service underlying this service. Corresponds
  84. // to the `destination_service_namespace` metric label in Istio metrics.
  85. string service_namespace = 3;
  86. // The name of the Istio service underlying this service. Corresponds to the
  87. // `destination_service_name` metric label in Istio metrics.
  88. string service_name = 4;
  89. }
  90. // Canonical service scoped to an Istio mesh. Anthos clusters running ASM >=
  91. // 1.6.8 will have their services ingested as this type.
  92. message IstioCanonicalService {
  93. // Identifier for the Istio mesh in which this canonical service is defined.
  94. // Corresponds to the `mesh_uid` metric label in
  95. // [Istio metrics](https://cloud.google.com/monitoring/api/metrics_istio).
  96. string mesh_uid = 1;
  97. // The namespace of the canonical service underlying this service.
  98. // Corresponds to the `destination_canonical_service_namespace` metric
  99. // label in [Istio
  100. // metrics](https://cloud.google.com/monitoring/api/metrics_istio).
  101. string canonical_service_namespace = 3;
  102. // The name of the canonical service underlying this service.
  103. // Corresponds to the `destination_canonical_service_name` metric label in
  104. // label in [Istio
  105. // metrics](https://cloud.google.com/monitoring/api/metrics_istio).
  106. string canonical_service = 4;
  107. }
  108. // Configuration for how to query telemetry on a Service.
  109. message Telemetry {
  110. // The full name of the resource that defines this service. Formatted as
  111. // described in https://cloud.google.com/apis/design/resource_names.
  112. string resource_name = 1;
  113. }
  114. // Resource name for this Service. The format is:
  115. //
  116. // projects/[PROJECT_ID_OR_NUMBER]/services/[SERVICE_ID]
  117. string name = 1;
  118. // Name used for UI elements listing this Service.
  119. string display_name = 2;
  120. // REQUIRED. Service-identifying atoms specifying the underlying service.
  121. oneof identifier {
  122. // Custom service type.
  123. Custom custom = 6;
  124. // Type used for App Engine services.
  125. AppEngine app_engine = 7;
  126. // Type used for Cloud Endpoints services.
  127. CloudEndpoints cloud_endpoints = 8;
  128. // Type used for Istio services that live in a Kubernetes cluster.
  129. ClusterIstio cluster_istio = 9;
  130. // Type used for Istio services scoped to an Istio mesh.
  131. MeshIstio mesh_istio = 10;
  132. // Type used for canonical services scoped to an Istio mesh.
  133. // Metrics for Istio are
  134. // [documented here](https://istio.io/latest/docs/reference/config/metrics/)
  135. IstioCanonicalService istio_canonical_service = 11;
  136. }
  137. // Configuration for how to query telemetry on a Service.
  138. Telemetry telemetry = 13;
  139. }
  140. // A Service-Level Objective (SLO) describes a level of desired good service. It
  141. // consists of a service-level indicator (SLI), a performance goal, and a period
  142. // over which the objective is to be evaluated against that goal. The SLO can
  143. // use SLIs defined in a number of different manners. Typical SLOs might include
  144. // "99% of requests in each rolling week have latency below 200 milliseconds" or
  145. // "99.5% of requests in each calendar month return successfully."
  146. message ServiceLevelObjective {
  147. option (google.api.resource) = {
  148. type: "monitoring.googleapis.com/ServiceLevelObjective"
  149. pattern: "projects/{project}/services/{service}/serviceLevelObjectives/{service_level_objective}"
  150. pattern: "organizations/{organization}/services/{service}/serviceLevelObjectives/{service_level_objective}"
  151. pattern: "folders/{folder}/services/{service}/serviceLevelObjectives/{service_level_objective}"
  152. pattern: "*"
  153. history: ORIGINALLY_SINGLE_PATTERN
  154. };
  155. // `ServiceLevelObjective.View` determines what form of
  156. // `ServiceLevelObjective` is returned from `GetServiceLevelObjective`,
  157. // `ListServiceLevelObjectives`, and `ListServiceLevelObjectiveVersions` RPCs.
  158. enum View {
  159. // Same as FULL.
  160. VIEW_UNSPECIFIED = 0;
  161. // Return the embedded `ServiceLevelIndicator` in the form in which it was
  162. // defined. If it was defined using a `BasicSli`, return that `BasicSli`.
  163. FULL = 2;
  164. // For `ServiceLevelIndicator`s using `BasicSli` articulation, instead
  165. // return the `ServiceLevelIndicator` with its mode of computation fully
  166. // spelled out as a `RequestBasedSli`. For `ServiceLevelIndicator`s using
  167. // `RequestBasedSli` or `WindowsBasedSli`, return the
  168. // `ServiceLevelIndicator` as it was provided.
  169. EXPLICIT = 1;
  170. }
  171. // Resource name for this `ServiceLevelObjective`. The format is:
  172. //
  173. // projects/[PROJECT_ID_OR_NUMBER]/services/[SERVICE_ID]/serviceLevelObjectives/[SLO_NAME]
  174. string name = 1;
  175. // Name used for UI elements listing this SLO.
  176. string display_name = 11;
  177. // The definition of good service, used to measure and calculate the quality
  178. // of the `Service`'s performance with respect to a single aspect of service
  179. // quality.
  180. ServiceLevelIndicator service_level_indicator = 3;
  181. // The fraction of service that must be good in order for this objective to be
  182. // met. `0 < goal <= 0.999`.
  183. double goal = 4;
  184. // The time period over which the objective will be evaluated.
  185. oneof period {
  186. // A rolling time period, semantically "in the past `<rolling_period>`".
  187. // Must be an integer multiple of 1 day no larger than 30 days.
  188. google.protobuf.Duration rolling_period = 5;
  189. // A calendar period, semantically "since the start of the current
  190. // `<calendar_period>`". At this time, only `DAY`, `WEEK`, `FORTNIGHT`, and
  191. // `MONTH` are supported.
  192. google.type.CalendarPeriod calendar_period = 6;
  193. }
  194. }
  195. // A Service-Level Indicator (SLI) describes the "performance" of a service. For
  196. // some services, the SLI is well-defined. In such cases, the SLI can be
  197. // described easily by referencing the well-known SLI and providing the needed
  198. // parameters. Alternatively, a "custom" SLI can be defined with a query to the
  199. // underlying metric store. An SLI is defined to be `good_service /
  200. // total_service` over any queried time interval. The value of performance
  201. // always falls into the range `0 <= performance <= 1`. A custom SLI describes
  202. // how to compute this ratio, whether this is by dividing values from a pair of
  203. // time series, cutting a `Distribution` into good and bad counts, or counting
  204. // time windows in which the service complies with a criterion. For separation
  205. // of concerns, a single Service-Level Indicator measures performance for only
  206. // one aspect of service quality, such as fraction of successful queries or
  207. // fast-enough queries.
  208. message ServiceLevelIndicator {
  209. // Service level indicators can be grouped by whether the "unit" of service
  210. // being measured is based on counts of good requests or on counts of good
  211. // time windows
  212. oneof type {
  213. // Basic SLI on a well-known service type.
  214. BasicSli basic_sli = 4;
  215. // Request-based SLIs
  216. RequestBasedSli request_based = 1;
  217. // Windows-based SLIs
  218. WindowsBasedSli windows_based = 2;
  219. }
  220. }
  221. // An SLI measuring performance on a well-known service type. Performance will
  222. // be computed on the basis of pre-defined metrics. The type of the
  223. // `service_resource` determines the metrics to use and the
  224. // `service_resource.labels` and `metric_labels` are used to construct a
  225. // monitoring filter to filter that metric down to just the data relevant to
  226. // this service.
  227. message BasicSli {
  228. // Future parameters for the availability SLI.
  229. message AvailabilityCriteria {
  230. }
  231. // Parameters for a latency threshold SLI.
  232. message LatencyCriteria {
  233. // Good service is defined to be the count of requests made to this service
  234. // that return in no more than `threshold`.
  235. google.protobuf.Duration threshold = 3;
  236. }
  237. // OPTIONAL: The set of RPCs to which this SLI is relevant. Telemetry from
  238. // other methods will not be used to calculate performance for this SLI. If
  239. // omitted, this SLI applies to all the Service's methods. For service types
  240. // that don't support breaking down by method, setting this field will result
  241. // in an error.
  242. repeated string method = 7;
  243. // OPTIONAL: The set of locations to which this SLI is relevant. Telemetry
  244. // from other locations will not be used to calculate performance for this
  245. // SLI. If omitted, this SLI applies to all locations in which the Service has
  246. // activity. For service types that don't support breaking down by location,
  247. // setting this field will result in an error.
  248. repeated string location = 8;
  249. // OPTIONAL: The set of API versions to which this SLI is relevant. Telemetry
  250. // from other API versions will not be used to calculate performance for this
  251. // SLI. If omitted, this SLI applies to all API versions. For service types
  252. // that don't support breaking down by version, setting this field will result
  253. // in an error.
  254. repeated string version = 9;
  255. // This SLI can be evaluated on the basis of availability or latency.
  256. oneof sli_criteria {
  257. // Good service is defined to be the count of requests made to this service
  258. // that return successfully.
  259. AvailabilityCriteria availability = 2;
  260. // Good service is defined to be the count of requests made to this service
  261. // that are fast enough with respect to `latency.threshold`.
  262. LatencyCriteria latency = 3;
  263. }
  264. }
  265. // Range of numerical values, inclusive of `min` and exclusive of `max`. If the
  266. // open range "< range.max" is desired, set `range.min = -infinity`. If the open
  267. // range ">= range.min" is desired, set `range.max = infinity`.
  268. message Range {
  269. // Range minimum.
  270. double min = 1;
  271. // Range maximum.
  272. double max = 2;
  273. }
  274. // Service Level Indicators for which atomic units of service are counted
  275. // directly.
  276. message RequestBasedSli {
  277. // The means to compute a ratio of `good_service` to `total_service`.
  278. oneof method {
  279. // `good_total_ratio` is used when the ratio of `good_service` to
  280. // `total_service` is computed from two `TimeSeries`.
  281. TimeSeriesRatio good_total_ratio = 1;
  282. // `distribution_cut` is used when `good_service` is a count of values
  283. // aggregated in a `Distribution` that fall into a good range. The
  284. // `total_service` is the total count of all values aggregated in the
  285. // `Distribution`.
  286. DistributionCut distribution_cut = 3;
  287. }
  288. }
  289. // A `TimeSeriesRatio` specifies two `TimeSeries` to use for computing the
  290. // `good_service / total_service` ratio. The specified `TimeSeries` must have
  291. // `ValueType = DOUBLE` or `ValueType = INT64` and must have `MetricKind =
  292. // DELTA` or `MetricKind = CUMULATIVE`. The `TimeSeriesRatio` must specify
  293. // exactly two of good, bad, and total, and the relationship `good_service +
  294. // bad_service = total_service` will be assumed.
  295. message TimeSeriesRatio {
  296. // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
  297. // specifying a `TimeSeries` quantifying good service provided. Must have
  298. // `ValueType = DOUBLE` or `ValueType = INT64` and must have `MetricKind =
  299. // DELTA` or `MetricKind = CUMULATIVE`.
  300. string good_service_filter = 4;
  301. // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
  302. // specifying a `TimeSeries` quantifying bad service, either demanded service
  303. // that was not provided or demanded service that was of inadequate quality.
  304. // Must have `ValueType = DOUBLE` or `ValueType = INT64` and must have
  305. // `MetricKind = DELTA` or `MetricKind = CUMULATIVE`.
  306. string bad_service_filter = 5;
  307. // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
  308. // specifying a `TimeSeries` quantifying total demanded service. Must have
  309. // `ValueType = DOUBLE` or `ValueType = INT64` and must have `MetricKind =
  310. // DELTA` or `MetricKind = CUMULATIVE`.
  311. string total_service_filter = 6;
  312. }
  313. // A `DistributionCut` defines a `TimeSeries` and thresholds used for measuring
  314. // good service and total service. The `TimeSeries` must have `ValueType =
  315. // DISTRIBUTION` and `MetricKind = DELTA` or `MetricKind = CUMULATIVE`. The
  316. // computed `good_service` will be the count of values x in the `Distribution`
  317. // such that `range.min <= x < range.max`.
  318. message DistributionCut {
  319. // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
  320. // specifying a `TimeSeries` aggregating values. Must have `ValueType =
  321. // DISTRIBUTION` and `MetricKind = DELTA` or `MetricKind = CUMULATIVE`.
  322. string distribution_filter = 4;
  323. // Range of values considered "good." For a one-sided range, set one bound to
  324. // an infinite value.
  325. Range range = 5;
  326. }
  327. // A `WindowsBasedSli` defines `good_service` as the count of time windows for
  328. // which the provided service was of good quality. Criteria for determining
  329. // if service was good are embedded in the `window_criterion`.
  330. message WindowsBasedSli {
  331. // A `PerformanceThreshold` is used when each window is good when that window
  332. // has a sufficiently high `performance`.
  333. message PerformanceThreshold {
  334. // The means, either a request-based SLI or a basic SLI, by which to compute
  335. // performance over a window.
  336. oneof type {
  337. // `RequestBasedSli` to evaluate to judge window quality.
  338. RequestBasedSli performance = 1;
  339. // `BasicSli` to evaluate to judge window quality.
  340. BasicSli basic_sli_performance = 3;
  341. }
  342. // If window `performance >= threshold`, the window is counted as good.
  343. double threshold = 2;
  344. }
  345. // A `MetricRange` is used when each window is good when the value x of a
  346. // single `TimeSeries` satisfies `range.min <= x < range.max`. The provided
  347. // `TimeSeries` must have `ValueType = INT64` or `ValueType = DOUBLE` and
  348. // `MetricKind = GAUGE`.
  349. message MetricRange {
  350. // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
  351. // specifying the `TimeSeries` to use for evaluating window quality.
  352. string time_series = 1;
  353. // Range of values considered "good." For a one-sided range, set one bound
  354. // to an infinite value.
  355. Range range = 4;
  356. }
  357. // The criterion to use for evaluating window goodness.
  358. oneof window_criterion {
  359. // A [monitoring filter](https://cloud.google.com/monitoring/api/v3/filters)
  360. // specifying a `TimeSeries` with `ValueType = BOOL`. The window is good if
  361. // any `true` values appear in the window.
  362. string good_bad_metric_filter = 5;
  363. // A window is good if its `performance` is high enough.
  364. PerformanceThreshold good_total_ratio_threshold = 2;
  365. // A window is good if the metric's value is in a good range, averaged
  366. // across returned streams.
  367. MetricRange metric_mean_in_range = 6;
  368. // A window is good if the metric's value is in a good range, summed across
  369. // returned streams.
  370. MetricRange metric_sum_in_range = 7;
  371. }
  372. // Duration over which window quality is evaluated. Must be an integer
  373. // fraction of a day and at least `60s`.
  374. google.protobuf.Duration window_period = 4;
  375. }