From 7062709a941b4f84a41f0790af5d3121bebd86d1 Mon Sep 17 00:00:00 2001 From: Yi Hu Date: Fri, 26 Jul 2024 10:35:55 -0400 Subject: [PATCH] Regenerate Dataflow Python client --- .../clients/dataflow/dataflow_v1b3_client.py | 81 +- .../dataflow/dataflow_v1b3_messages.py | 1497 +++++++++++++---- 2 files changed, 1224 insertions(+), 354 deletions(-) diff --git a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py index cc982098797b..e42b180bbecd 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_client.py @@ -1,8 +1,5 @@ """Generated client library for dataflow version v1b3.""" # NOTE: This file is autogenerated and should not be edited by hand. - -from __future__ import absolute_import - from apitools.base.py import base_api from . import dataflow_v1b3_messages as messages @@ -17,9 +14,7 @@ class DataflowV1b3(base_api.BaseApiClient): _PACKAGE = 'dataflow' _SCOPES = [ 'https://www.googleapis.com/auth/cloud-platform', - 'https://www.googleapis.com/auth/compute', - 'https://www.googleapis.com/auth/compute.readonly', - 'https://www.googleapis.com/auth/userinfo.email' + 'https://www.googleapis.com/auth/compute' ] _VERSION = 'v1b3' _CLIENT_ID = '1042881264118.apps.googleusercontent.com' @@ -75,7 +70,6 @@ def __init__( self.projects_locations_jobs = self.ProjectsLocationsJobsService(self) self.projects_locations_snapshots = self.ProjectsLocationsSnapshotsService( self) - self.projects_locations_sql = self.ProjectsLocationsSqlService(self) self.projects_locations_templates = self.ProjectsLocationsTemplatesService( self) self.projects_locations = self.ProjectsLocationsService(self) @@ -254,7 +248,7 @@ def __init__(self, client): self._upload_configs = {} def Aggregated(self, request, global_params=None): - r"""List the jobs of a project across all regions. + r"""List the jobs of a project across all regions. **Note:** This method doesn't support filtering the list of jobs by name. Args: request: (DataflowProjectsJobsAggregatedRequest) input message @@ -270,7 +264,8 @@ def Aggregated(self, request, global_params=None): method_id='dataflow.projects.jobs.aggregated', ordered_params=['projectId'], path_params=['projectId'], - query_params=['filter', 'location', 'pageSize', 'pageToken', 'view'], + query_params= + ['filter', 'location', 'name', 'pageSize', 'pageToken', 'view'], relative_path='v1b3/projects/{projectId}/jobs:aggregated', request_field='', request_type_name='DataflowProjectsJobsAggregatedRequest', @@ -279,7 +274,7 @@ def Aggregated(self, request, global_params=None): ) def Create(self, request, global_params=None): - r"""Creates a Cloud Dataflow job. To create a job, we recommend using `projects.locations.jobs.create` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using `projects.jobs.create` is not recommended, as your job will always start in `us-central1`. + r"""Creates a Cloud Dataflow job. To create a job, we recommend using `projects.locations.jobs.create` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using `projects.jobs.create` is not recommended, as your job will always start in `us-central1`. Do not enter confidential information when you supply string values using the API. Args: request: (DataflowProjectsJobsCreateRequest) input message @@ -354,7 +349,7 @@ def GetMetrics(self, request, global_params=None): ) def List(self, request, global_params=None): - r"""List the jobs of a project. To list the jobs of a project in a region, we recommend using `projects.locations.jobs.list` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To list the all jobs across all regions, use `projects.jobs.aggregated`. Using `projects.jobs.list` is not recommended, as you can only get the list of jobs that are running in `us-central1`. + r"""List the jobs of a project. To list the jobs of a project in a region, we recommend using `projects.locations.jobs.list` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To list the all jobs across all regions, use `projects.jobs.aggregated`. Using `projects.jobs.list` is not recommended, because you can only get the list of jobs that are running in `us-central1`. `projects.locations.jobs.list` and `projects.jobs.list` support filtering the list of jobs by name. Filtering by name isn't supported by `projects.jobs.aggregated`. Args: request: (DataflowProjectsJobsListRequest) input message @@ -370,7 +365,8 @@ def List(self, request, global_params=None): method_id='dataflow.projects.jobs.list', ordered_params=['projectId'], path_params=['projectId'], - query_params=['filter', 'location', 'pageSize', 'pageToken', 'view'], + query_params= + ['filter', 'location', 'name', 'pageSize', 'pageToken', 'view'], relative_path='v1b3/projects/{projectId}/jobs', request_field='', request_type_name='DataflowProjectsJobsListRequest', @@ -420,7 +416,7 @@ def Update(self, request, global_params=None): method_id='dataflow.projects.jobs.update', ordered_params=['projectId', 'jobId'], path_params=['jobId', 'projectId'], - query_params=['location'], + query_params=['location', 'updateMask'], relative_path='v1b3/projects/{projectId}/jobs/{jobId}', request_field='job', request_type_name='DataflowProjectsJobsUpdateRequest', @@ -611,7 +607,7 @@ def __init__(self, client): self._upload_configs = {} def GetExecutionDetails(self, request, global_params=None): - r"""Request detailed information about the execution status of a stage of the job. + r"""Request detailed information about the execution status of a stage of the job. EXPERIMENTAL. This API is subject to change or removal without notice. Args: request: (DataflowProjectsLocationsJobsStagesGetExecutionDetailsRequest) input message @@ -710,7 +706,7 @@ def __init__(self, client): self._upload_configs = {} def Create(self, request, global_params=None): - r"""Creates a Cloud Dataflow job. To create a job, we recommend using `projects.locations.jobs.create` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using `projects.jobs.create` is not recommended, as your job will always start in `us-central1`. + r"""Creates a Cloud Dataflow job. To create a job, we recommend using `projects.locations.jobs.create` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using `projects.jobs.create` is not recommended, as your job will always start in `us-central1`. Do not enter confidential information when you supply string values using the API. Args: request: (DataflowProjectsLocationsJobsCreateRequest) input message @@ -761,7 +757,7 @@ def Get(self, request, global_params=None): ) def GetExecutionDetails(self, request, global_params=None): - r"""Request detailed information about the execution status of the job. + r"""Request detailed information about the execution status of the job. EXPERIMENTAL. This API is subject to change or removal without notice. Args: request: (DataflowProjectsLocationsJobsGetExecutionDetailsRequest) input message @@ -814,7 +810,7 @@ def GetMetrics(self, request, global_params=None): ) def List(self, request, global_params=None): - r"""List the jobs of a project. To list the jobs of a project in a region, we recommend using `projects.locations.jobs.list` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To list the all jobs across all regions, use `projects.jobs.aggregated`. Using `projects.jobs.list` is not recommended, as you can only get the list of jobs that are running in `us-central1`. + r"""List the jobs of a project. To list the jobs of a project in a region, we recommend using `projects.locations.jobs.list` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To list the all jobs across all regions, use `projects.jobs.aggregated`. Using `projects.jobs.list` is not recommended, because you can only get the list of jobs that are running in `us-central1`. `projects.locations.jobs.list` and `projects.jobs.list` support filtering the list of jobs by name. Filtering by name isn't supported by `projects.jobs.aggregated`. Args: request: (DataflowProjectsLocationsJobsListRequest) input message @@ -830,7 +826,7 @@ def List(self, request, global_params=None): method_id='dataflow.projects.locations.jobs.list', ordered_params=['projectId', 'location'], path_params=['location', 'projectId'], - query_params=['filter', 'pageSize', 'pageToken', 'view'], + query_params=['filter', 'name', 'pageSize', 'pageToken', 'view'], relative_path='v1b3/projects/{projectId}/locations/{location}/jobs', request_field='', request_type_name='DataflowProjectsLocationsJobsListRequest', @@ -881,7 +877,7 @@ def Update(self, request, global_params=None): method_id='dataflow.projects.locations.jobs.update', ordered_params=['projectId', 'location', 'jobId'], path_params=['jobId', 'location', 'projectId'], - query_params=[], + query_params=['updateMask'], relative_path= 'v1b3/projects/{projectId}/locations/{location}/jobs/{jobId}', request_field='job', @@ -978,41 +974,6 @@ def List(self, request, global_params=None): supports_download=False, ) - class ProjectsLocationsSqlService(base_api.BaseApiService): - """Service class for the projects_locations_sql resource.""" - - _NAME = 'projects_locations_sql' - - def __init__(self, client): - super(DataflowV1b3.ProjectsLocationsSqlService, self).__init__(client) - self._upload_configs = {} - - def Validate(self, request, global_params=None): - r"""Validates a GoogleSQL query for Cloud Dataflow syntax. Will always confirm the given query parses correctly, and if able to look up schema information from DataCatalog, will validate that the query analyzes properly as well. - - Args: - request: (DataflowProjectsLocationsSqlValidateRequest) input message - global_params: (StandardQueryParameters, default: None) global arguments - Returns: - (ValidateResponse) The response message. - """ - config = self.GetMethodConfig('Validate') - return self._RunMethod(config, request, global_params=global_params) - - Validate.method_config = lambda: base_api.ApiMethodInfo( - http_method='GET', - method_id='dataflow.projects.locations.sql.validate', - ordered_params=['projectId', 'location'], - path_params=['location', 'projectId'], - query_params=['query'], - relative_path= - 'v1b3/projects/{projectId}/locations/{location}/sql:validate', - request_field='', - request_type_name='DataflowProjectsLocationsSqlValidateRequest', - response_type_name='ValidateResponse', - supports_download=False, - ) - class ProjectsLocationsTemplatesService(base_api.BaseApiService): """Service class for the projects_locations_templates resource.""" @@ -1024,7 +985,7 @@ def __init__(self, client): self._upload_configs = {} def Create(self, request, global_params=None): - r"""Creates a Cloud Dataflow job from a template. + r"""Creates a Cloud Dataflow job from a template. Do not enter confidential information when you supply string values using the API. To create a job, we recommend using `projects.locations.templates.create` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using `projects.templates.create` is not recommended, because your job will always start in `us-central1`. Args: request: (DataflowProjectsLocationsTemplatesCreateRequest) input message @@ -1050,7 +1011,7 @@ def Create(self, request, global_params=None): ) def Get(self, request, global_params=None): - r"""Get the template associated with a template. + r"""Get the template associated with a template. To get the template, we recommend using `projects.locations.templates.get` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using `projects.templates.get` is not recommended, because only templates that are running in `us-central1` are retrieved. Args: request: (DataflowProjectsLocationsTemplatesGetRequest) input message @@ -1076,7 +1037,7 @@ def Get(self, request, global_params=None): ) def Launch(self, request, global_params=None): - r"""Launch a template. + r"""Launches a template. To launch a template, we recommend using `projects.locations.templates.launch` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using `projects.templates.launch` is not recommended, because jobs launched from the template will always start in `us-central1`. Args: request: (DataflowProjectsLocationsTemplatesLaunchRequest) input message @@ -1210,7 +1171,7 @@ def __init__(self, client): self._upload_configs = {} def Create(self, request, global_params=None): - r"""Creates a Cloud Dataflow job from a template. + r"""Creates a Cloud Dataflow job from a template. Do not enter confidential information when you supply string values using the API. To create a job, we recommend using `projects.locations.templates.create` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using `projects.templates.create` is not recommended, because your job will always start in `us-central1`. Args: request: (DataflowProjectsTemplatesCreateRequest) input message @@ -1235,7 +1196,7 @@ def Create(self, request, global_params=None): ) def Get(self, request, global_params=None): - r"""Get the template associated with a template. + r"""Get the template associated with a template. To get the template, we recommend using `projects.locations.templates.get` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using `projects.templates.get` is not recommended, because only templates that are running in `us-central1` are retrieved. Args: request: (DataflowProjectsTemplatesGetRequest) input message @@ -1260,7 +1221,7 @@ def Get(self, request, global_params=None): ) def Launch(self, request, global_params=None): - r"""Launch a template. + r"""Launches a template. To launch a template, we recommend using `projects.locations.templates.launch` with a [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using `projects.templates.launch` is not recommended, because jobs launched from the template will always start in `us-central1`. Args: request: (DataflowProjectsTemplatesLaunchRequest) input message diff --git a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py index e7cf625250d2..c0bbfa74ac1e 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py @@ -4,8 +4,6 @@ """ # NOTE: This file is autogenerated and should not be edited by hand. -from __future__ import absolute_import - from apitools.base.protorpclite import messages as _messages from apitools.base.py import encoding from apitools.base.py import extra_types @@ -169,6 +167,22 @@ class AlgorithmValueValuesEnum(_messages.Enum): maxNumWorkers = _messages.IntegerField(2, variant=_messages.Variant.INT32) +class Base2Exponent(_messages.Message): + r"""Exponential buckets where the growth factor between buckets is + `2**(2**-scale)`. e.g. for `scale=1` growth factor is + `2**(2**(-1))=sqrt(2)`. `n` buckets will have the following boundaries. - + 0th: [0, gf) - i in [1, n-1]: [gf^(i), gf^(i+1)) + + Fields: + numberOfBuckets: Must be greater than 0. + scale: Must be between -3 and 3. This forces the growth factor of the + bucket boundaries to be between `2^(1/8)` and `256`. + """ + + numberOfBuckets = _messages.IntegerField(1, variant=_messages.Variant.INT32) + scale = _messages.IntegerField(2, variant=_messages.Variant.INT32) + + class BigQueryIODetails(_messages.Message): r"""Metadata for a BigQuery connector used by the job. @@ -199,6 +213,18 @@ class BigTableIODetails(_messages.Message): tableId = _messages.StringField(3) +class BucketOptions(_messages.Message): + r"""`BucketOptions` describes the bucket boundaries used in the histogram. + + Fields: + exponential: Bucket boundaries grow exponentially. + linear: Bucket boundaries grow linearly. + """ + + exponential = _messages.MessageField('Base2Exponent', 1) + linear = _messages.MessageField('Linear', 2) + + class CPUTime(_messages.Message): r"""Modeled after information exposed by /proc/stat. @@ -288,6 +314,12 @@ class ContainerSpec(_messages.Message): Fields: defaultEnvironment: Default runtime environment for the job. image: Name of the docker container image. E.g., gcr.io/project/some-image + imageRepositoryCertPath: Cloud Storage path to self-signed certificate of + private registry. + imageRepositoryPasswordSecretId: Secret Manager secret id for password to + authenticate to private registry. + imageRepositoryUsernameSecretId: Secret Manager secret id for username to + authenticate to private registry. metadata: Metadata describing a template including description and validation rules. sdkInfo: Required. SDK info of the Flex Template. @@ -296,8 +328,11 @@ class ContainerSpec(_messages.Message): defaultEnvironment = _messages.MessageField( 'FlexTemplateRuntimeEnvironment', 1) image = _messages.StringField(2) - metadata = _messages.MessageField('TemplateMetadata', 3) - sdkInfo = _messages.MessageField('SDKInfo', 4) + imageRepositoryCertPath = _messages.StringField(3) + imageRepositoryPasswordSecretId = _messages.StringField(4) + imageRepositoryUsernameSecretId = _messages.StringField(5) + metadata = _messages.MessageField('TemplateMetadata', 6) + sdkInfo = _messages.MessageField('SDKInfo', 7) class CounterMetadata(_messages.Message): @@ -568,6 +603,94 @@ class DataDiskAssignment(_messages.Message): vmInstance = _messages.StringField(2) +class DataSamplingConfig(_messages.Message): + r"""Configuration options for sampling elements. + + Enums: + BehaviorsValueListEntryValuesEnum: + + Fields: + behaviors: List of given sampling behaviors to enable. For example, + specifying behaviors = [ALWAYS_ON] samples in-flight elements but does + not sample exceptions. Can be used to specify multiple behaviors like, + behaviors = [ALWAYS_ON, EXCEPTIONS] for specifying periodic sampling and + exception sampling. If DISABLED is in the list, then sampling will be + disabled and ignore the other given behaviors. Ordering does not matter. + """ + class BehaviorsValueListEntryValuesEnum(_messages.Enum): + r"""BehaviorsValueListEntryValuesEnum enum type. + + Values: + DATA_SAMPLING_BEHAVIOR_UNSPECIFIED: If given, has no effect on sampling + behavior. Used as an unknown or unset sentinel value. + DISABLED: When given, disables element sampling. Has same behavior as + not setting the behavior. + ALWAYS_ON: When given, enables sampling in-flight from all PCollections. + EXCEPTIONS: When given, enables sampling input elements when a user- + defined DoFn causes an exception. + """ + DATA_SAMPLING_BEHAVIOR_UNSPECIFIED = 0 + DISABLED = 1 + ALWAYS_ON = 2 + EXCEPTIONS = 3 + + behaviors = _messages.EnumField( + 'BehaviorsValueListEntryValuesEnum', 1, repeated=True) + + +class DataSamplingReport(_messages.Message): + r"""Contains per-worker telemetry about the data sampling feature. + + Fields: + bytesWrittenDelta: Optional. Delta of bytes written to file from previous + report. + elementsSampledBytes: Optional. Delta of bytes sampled from previous + report. + elementsSampledCount: Optional. Delta of number of elements sampled from + previous report. + exceptionsSampledCount: Optional. Delta of number of samples taken from + user code exceptions from previous report. + pcollectionsSampledCount: Optional. Delta of number of PCollections + sampled from previous report. + persistenceErrorsCount: Optional. Delta of errors counts from persisting + the samples from previous report. + translationErrorsCount: Optional. Delta of errors counts from retrieving, + or translating the samples from previous report. + """ + + bytesWrittenDelta = _messages.IntegerField(1) + elementsSampledBytes = _messages.IntegerField(2) + elementsSampledCount = _messages.IntegerField(3) + exceptionsSampledCount = _messages.IntegerField(4) + pcollectionsSampledCount = _messages.IntegerField(5) + persistenceErrorsCount = _messages.IntegerField(6) + translationErrorsCount = _messages.IntegerField(7) + + +class DataflowHistogramValue(_messages.Message): + r"""Summary statistics for a population of values. HistogramValue contains a + sequence of buckets and gives a count of values that fall into each bucket. + Bucket boundares are defined by a formula and bucket widths are either fixed + or exponentially increasing. + + Fields: + bucketCounts: Optional. The number of values in each bucket of the + histogram, as described in `bucket_options`. `bucket_counts` should + contain N values, where N is the number of buckets specified in + `bucket_options`. If `bucket_counts` has fewer than N values, the + remaining values are assumed to be 0. + bucketOptions: Describes the bucket boundaries used in the histogram. + count: Number of values recorded in this histogram. + outlierStats: Statistics on the values recorded in the histogram that fall + out of the bucket boundaries. + """ + + bucketCounts = _messages.IntegerField(1, repeated=True) + bucketOptions = _messages.MessageField('BucketOptions', 2) + count = _messages.IntegerField(3) + outlierStats = _messages.MessageField('OutlierStats', 4) + + class DataflowProjectsDeleteSnapshotsRequest(_messages.Message): r"""A DataflowProjectsDeleteSnapshotsRequest object. @@ -596,6 +719,7 @@ class DataflowProjectsJobsAggregatedRequest(_messages.Message): location: The [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that contains this job. + name: Optional. The job name. pageSize: If there are many jobs, limit response to at most this many. The actual number of jobs returned will be the lesser of max_responses and an unspecified server-defined limit. @@ -635,7 +759,12 @@ class ViewValueValuesEnum(_messages.Enum): JOB_VIEW_SUMMARY: Request summary information only: Project ID, Job ID, job name, job type, job status, start/end time, and Cloud SDK version details. - JOB_VIEW_ALL: Request all information available for this job. + JOB_VIEW_ALL: Request all information available for this job. When the + job is in `JOB_STATE_PENDING`, the job has been created but is not yet + running, and not all job information is available. For complete job + information, wait until the job in is `JOB_STATE_RUNNING`. For more + information, see [JobState](https://cloud.google.com/dataflow/docs/ref + erence/rest/v1b3/projects.jobs#jobstate). JOB_VIEW_DESCRIPTION: Request summary info and limited job description data for steps, labels and environment. """ @@ -646,10 +775,11 @@ class ViewValueValuesEnum(_messages.Enum): filter = _messages.EnumField('FilterValueValuesEnum', 1) location = _messages.StringField(2) - pageSize = _messages.IntegerField(3, variant=_messages.Variant.INT32) - pageToken = _messages.StringField(4) - projectId = _messages.StringField(5, required=True) - view = _messages.EnumField('ViewValueValuesEnum', 6) + name = _messages.StringField(3) + pageSize = _messages.IntegerField(4, variant=_messages.Variant.INT32) + pageToken = _messages.StringField(5) + projectId = _messages.StringField(6, required=True) + view = _messages.EnumField('ViewValueValuesEnum', 7) class DataflowProjectsJobsCreateRequest(_messages.Message): @@ -677,7 +807,12 @@ class ViewValueValuesEnum(_messages.Enum): JOB_VIEW_SUMMARY: Request summary information only: Project ID, Job ID, job name, job type, job status, start/end time, and Cloud SDK version details. - JOB_VIEW_ALL: Request all information available for this job. + JOB_VIEW_ALL: Request all information available for this job. When the + job is in `JOB_STATE_PENDING`, the job has been created but is not yet + running, and not all job information is available. For complete job + information, wait until the job in is `JOB_STATE_RUNNING`. For more + information, see [JobState](https://cloud.google.com/dataflow/docs/ref + erence/rest/v1b3/projects.jobs#jobstate). JOB_VIEW_DESCRIPTION: Request summary info and limited job description data for steps, labels and environment. """ @@ -766,7 +901,12 @@ class ViewValueValuesEnum(_messages.Enum): JOB_VIEW_SUMMARY: Request summary information only: Project ID, Job ID, job name, job type, job status, start/end time, and Cloud SDK version details. - JOB_VIEW_ALL: Request all information available for this job. + JOB_VIEW_ALL: Request all information available for this job. When the + job is in `JOB_STATE_PENDING`, the job has been created but is not yet + running, and not all job information is available. For complete job + information, wait until the job in is `JOB_STATE_RUNNING`. For more + information, see [JobState](https://cloud.google.com/dataflow/docs/ref + erence/rest/v1b3/projects.jobs#jobstate). JOB_VIEW_DESCRIPTION: Request summary info and limited job description data for steps, labels and environment. """ @@ -794,6 +934,7 @@ class DataflowProjectsJobsListRequest(_messages.Message): location: The [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that contains this job. + name: Optional. The job name. pageSize: If there are many jobs, limit response to at most this many. The actual number of jobs returned will be the lesser of max_responses and an unspecified server-defined limit. @@ -833,7 +974,12 @@ class ViewValueValuesEnum(_messages.Enum): JOB_VIEW_SUMMARY: Request summary information only: Project ID, Job ID, job name, job type, job status, start/end time, and Cloud SDK version details. - JOB_VIEW_ALL: Request all information available for this job. + JOB_VIEW_ALL: Request all information available for this job. When the + job is in `JOB_STATE_PENDING`, the job has been created but is not yet + running, and not all job information is available. For complete job + information, wait until the job in is `JOB_STATE_RUNNING`. For more + information, see [JobState](https://cloud.google.com/dataflow/docs/ref + erence/rest/v1b3/projects.jobs#jobstate). JOB_VIEW_DESCRIPTION: Request summary info and limited job description data for steps, labels and environment. """ @@ -844,10 +990,11 @@ class ViewValueValuesEnum(_messages.Enum): filter = _messages.EnumField('FilterValueValuesEnum', 1) location = _messages.StringField(2) - pageSize = _messages.IntegerField(3, variant=_messages.Variant.INT32) - pageToken = _messages.StringField(4) - projectId = _messages.StringField(5, required=True) - view = _messages.EnumField('ViewValueValuesEnum', 6) + name = _messages.StringField(3) + pageSize = _messages.IntegerField(4, variant=_messages.Variant.INT32) + pageToken = _messages.StringField(5) + projectId = _messages.StringField(6, required=True) + view = _messages.EnumField('ViewValueValuesEnum', 7) class DataflowProjectsJobsMessagesListRequest(_messages.Message): @@ -947,12 +1094,19 @@ class DataflowProjectsJobsUpdateRequest(_messages.Message): (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that contains this job. projectId: The ID of the Cloud Platform project that the job belongs to. + updateMask: The list of fields to update relative to Job. If empty, only + RequestedJobState will be considered for update. If the FieldMask is not + empty and RequestedJobState is none/empty, The fields specified in the + update mask will be the only ones considered for update. If both + RequestedJobState and update_mask are specified, an error will be + returned as we cannot update both state and mask. """ job = _messages.MessageField('Job', 1) jobId = _messages.StringField(2, required=True) location = _messages.StringField(3) projectId = _messages.StringField(4, required=True) + updateMask = _messages.StringField(5) class DataflowProjectsJobsWorkItemsLeaseRequest(_messages.Message): @@ -1030,7 +1184,12 @@ class ViewValueValuesEnum(_messages.Enum): JOB_VIEW_SUMMARY: Request summary information only: Project ID, Job ID, job name, job type, job status, start/end time, and Cloud SDK version details. - JOB_VIEW_ALL: Request all information available for this job. + JOB_VIEW_ALL: Request all information available for this job. When the + job is in `JOB_STATE_PENDING`, the job has been created but is not yet + running, and not all job information is available. For complete job + information, wait until the job in is `JOB_STATE_RUNNING`. For more + information, see [JobState](https://cloud.google.com/dataflow/docs/ref + erence/rest/v1b3/projects.jobs#jobstate). JOB_VIEW_DESCRIPTION: Request summary info and limited job description data for steps, labels and environment. """ @@ -1152,7 +1311,12 @@ class ViewValueValuesEnum(_messages.Enum): JOB_VIEW_SUMMARY: Request summary information only: Project ID, Job ID, job name, job type, job status, start/end time, and Cloud SDK version details. - JOB_VIEW_ALL: Request all information available for this job. + JOB_VIEW_ALL: Request all information available for this job. When the + job is in `JOB_STATE_PENDING`, the job has been created but is not yet + running, and not all job information is available. For complete job + information, wait until the job in is `JOB_STATE_RUNNING`. For more + information, see [JobState](https://cloud.google.com/dataflow/docs/ref + erence/rest/v1b3/projects.jobs#jobstate). JOB_VIEW_DESCRIPTION: Request summary info and limited job description data for steps, labels and environment. """ @@ -1180,6 +1344,7 @@ class DataflowProjectsLocationsJobsListRequest(_messages.Message): location: The [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that contains this job. + name: Optional. The job name. pageSize: If there are many jobs, limit response to at most this many. The actual number of jobs returned will be the lesser of max_responses and an unspecified server-defined limit. @@ -1219,7 +1384,12 @@ class ViewValueValuesEnum(_messages.Enum): JOB_VIEW_SUMMARY: Request summary information only: Project ID, Job ID, job name, job type, job status, start/end time, and Cloud SDK version details. - JOB_VIEW_ALL: Request all information available for this job. + JOB_VIEW_ALL: Request all information available for this job. When the + job is in `JOB_STATE_PENDING`, the job has been created but is not yet + running, and not all job information is available. For complete job + information, wait until the job in is `JOB_STATE_RUNNING`. For more + information, see [JobState](https://cloud.google.com/dataflow/docs/ref + erence/rest/v1b3/projects.jobs#jobstate). JOB_VIEW_DESCRIPTION: Request summary info and limited job description data for steps, labels and environment. """ @@ -1230,10 +1400,11 @@ class ViewValueValuesEnum(_messages.Enum): filter = _messages.EnumField('FilterValueValuesEnum', 1) location = _messages.StringField(2, required=True) - pageSize = _messages.IntegerField(3, variant=_messages.Variant.INT32) - pageToken = _messages.StringField(4) - projectId = _messages.StringField(5, required=True) - view = _messages.EnumField('ViewValueValuesEnum', 6) + name = _messages.StringField(3) + pageSize = _messages.IntegerField(4, variant=_messages.Variant.INT32) + pageToken = _messages.StringField(5) + projectId = _messages.StringField(6, required=True) + view = _messages.EnumField('ViewValueValuesEnum', 7) class DataflowProjectsLocationsJobsMessagesListRequest(_messages.Message): @@ -1380,12 +1551,19 @@ class DataflowProjectsLocationsJobsUpdateRequest(_messages.Message): (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that contains this job. projectId: The ID of the Cloud Platform project that the job belongs to. + updateMask: The list of fields to update relative to Job. If empty, only + RequestedJobState will be considered for update. If the FieldMask is not + empty and RequestedJobState is none/empty, The fields specified in the + update mask will be the only ones considered for update. If both + RequestedJobState and update_mask are specified, an error will be + returned as we cannot update both state and mask. """ job = _messages.MessageField('Job', 1) jobId = _messages.StringField(2, required=True) location = _messages.StringField(3, required=True) projectId = _messages.StringField(4, required=True) + updateMask = _messages.StringField(5) class DataflowProjectsLocationsJobsWorkItemsLeaseRequest(_messages.Message): @@ -1472,23 +1650,6 @@ class DataflowProjectsLocationsSnapshotsListRequest(_messages.Message): projectId = _messages.StringField(3, required=True) -class DataflowProjectsLocationsSqlValidateRequest(_messages.Message): - r"""A DataflowProjectsLocationsSqlValidateRequest object. - - Fields: - location: The [regional endpoint] - (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to - which to direct the request. - projectId: Required. The ID of the Cloud Platform project that the job - belongs to. - query: The sql query to validate. - """ - - location = _messages.StringField(1, required=True) - projectId = _messages.StringField(2, required=True) - query = _messages.StringField(3) - - class DataflowProjectsLocationsTemplatesCreateRequest(_messages.Message): r"""A DataflowProjectsLocationsTemplatesCreateRequest object. @@ -1543,13 +1704,13 @@ class DataflowProjectsLocationsTemplatesLaunchRequest(_messages.Message): r"""A DataflowProjectsLocationsTemplatesLaunchRequest object. Fields: - dynamicTemplate_gcsPath: Path to dynamic template spec file on Cloud - Storage. The file must be a Json serialized DynamicTemplateFieSpec - object. + dynamicTemplate_gcsPath: Path to the dynamic template specification file + on Cloud Storage. The file must be a JSON serialized + `DynamicTemplateFileSpec` object. dynamicTemplate_stagingLocation: Cloud Storage path for staging dependencies. Must be a valid Cloud Storage URL, beginning with `gs://`. - gcsPath: A Cloud Storage path to the template from which to create the - job. Must be valid Cloud Storage URL, beginning with 'gs://'. + gcsPath: A Cloud Storage path to the template to use to create the job. + Must be valid Cloud Storage URL, beginning with `gs://`. launchTemplateParameters: A LaunchTemplateParameters resource to be passed as the request body. location: The [regional endpoint] @@ -1668,13 +1829,13 @@ class DataflowProjectsTemplatesLaunchRequest(_messages.Message): r"""A DataflowProjectsTemplatesLaunchRequest object. Fields: - dynamicTemplate_gcsPath: Path to dynamic template spec file on Cloud - Storage. The file must be a Json serialized DynamicTemplateFieSpec - object. + dynamicTemplate_gcsPath: Path to the dynamic template specification file + on Cloud Storage. The file must be a JSON serialized + `DynamicTemplateFileSpec` object. dynamicTemplate_stagingLocation: Cloud Storage path for staging dependencies. Must be a valid Cloud Storage URL, beginning with `gs://`. - gcsPath: A Cloud Storage path to the template from which to create the - job. Must be valid Cloud Storage URL, beginning with 'gs://'. + gcsPath: A Cloud Storage path to the template to use to create the job. + Must be valid Cloud Storage URL, beginning with `gs://`. launchTemplateParameters: A LaunchTemplateParameters resource to be passed as the request body. location: The [regional endpoint] @@ -1726,11 +1887,14 @@ class DebugOptions(_messages.Message): r"""Describes any options that have an effect on the debugging of pipelines. Fields: - enableHotKeyLogging: When true, enables the logging of the literal hot key - to the user's Cloud Logging. + dataSampling: Configuration options for sampling elements from a running + pipeline. + enableHotKeyLogging: Optional. When true, enables the logging of the + literal hot key to the user's Cloud Logging. """ - enableHotKeyLogging = _messages.BooleanField(1) + dataSampling = _messages.MessageField('DataSamplingConfig', 1) + enableHotKeyLogging = _messages.BooleanField(2) class DeleteSnapshotResponse(_messages.Message): @@ -1883,10 +2047,17 @@ class Environment(_messages.Message): r"""Describes the environment in which a Dataflow Job runs. Enums: - FlexResourceSchedulingGoalValueValuesEnum: Which Flexible Resource - Scheduling mode to run in. + FlexResourceSchedulingGoalValueValuesEnum: Optional. Which Flexible + Resource Scheduling mode to run in. ShuffleModeValueValuesEnum: Output only. The shuffle mode used for the job. + StreamingModeValueValuesEnum: Optional. Specifies the Streaming Engine + message processing guarantees. Reduces cost and latency but might result + in duplicate messages committed to storage. Designed to run simple + mapping streaming ETL jobs at the lowest cost. For example, Change Data + Capture (CDC) to BigQuery is a canonical use case. For more information, + see [Set the pipeline streaming + mode](https://cloud.google.com/dataflow/docs/guides/streaming-modes). Messages: InternalExperimentsValue: Experimental settings. @@ -1903,31 +2074,38 @@ class Environment(_messages.Message): unknown or unspecified, the service will attempt to choose a reasonable default. This should be in the form of the API service name, e.g. "compute.googleapis.com". - dataset: The dataset for the current project where various workflow - related tables are stored. The supported resource type is: Google - BigQuery: bigquery.googleapis.com/{dataset} - debugOptions: Any debugging options to be supplied to the job. + dataset: Optional. The dataset for the current project where various + workflow related tables are stored. The supported resource type is: + Google BigQuery: bigquery.googleapis.com/{dataset} + debugOptions: Optional. Any debugging options to be supplied to the job. experiments: The list of experiments to enable. This field should be used for SDK related experiments and not for service related experiments. The proper field for service related experiments is service_options. - flexResourceSchedulingGoal: Which Flexible Resource Scheduling mode to run - in. + flexResourceSchedulingGoal: Optional. Which Flexible Resource Scheduling + mode to run in. internalExperiments: Experimental settings. sdkPipelineOptions: The Cloud Dataflow SDK pipeline options specified by the user. These options are passed through the service and are used to recreate the SDK pipeline options on the worker in a language agnostic and platform independent way. - serviceAccountEmail: Identity to run virtual machines as. Defaults to the - default account. - serviceKmsKeyName: If set, contains the Cloud KMS key identifier used to - encrypt data at rest, AKA a Customer Managed Encryption Key (CMEK). - Format: + serviceAccountEmail: Optional. Identity to run virtual machines as. + Defaults to the default account. + serviceKmsKeyName: Optional. If set, contains the Cloud KMS key identifier + used to encrypt data at rest, AKA a Customer Managed Encryption Key + (CMEK). Format: projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY - serviceOptions: The list of service options to enable. This field should - be used for service related experiments only. These experiments, when - graduating to GA, should be replaced by dedicated fields or become - default (i.e. always on). + serviceOptions: Optional. The list of service options to enable. This + field should be used for service related experiments only. These + experiments, when graduating to GA, should be replaced by dedicated + fields or become default (i.e. always on). shuffleMode: Output only. The shuffle mode used for the job. + streamingMode: Optional. Specifies the Streaming Engine message processing + guarantees. Reduces cost and latency but might result in duplicate + messages committed to storage. Designed to run simple mapping streaming + ETL jobs at the lowest cost. For example, Change Data Capture (CDC) to + BigQuery is a canonical use case. For more information, see [Set the + pipeline streaming + mode](https://cloud.google.com/dataflow/docs/guides/streaming-modes). tempStoragePrefix: The prefix of the resources the system should use for temporary storage. The system will append the suffix "/temp-{JOBNAME} to this resource prefix, where {JOBNAME} is the value of the job_name @@ -1937,17 +2115,19 @@ class Environment(_messages.Message): The supported resource type is: Google Cloud Storage: storage.googleapis.com/{bucket}/{object} bucket.storage.googleapis.com/{object} + useStreamingEngineResourceBasedBilling: Output only. Whether the job uses + the Streaming Engine resource-based billing model. userAgent: A description of the process that generated the request. version: A structure describing which components and their versions of the service are required in order to run the job. workerPools: The worker pools. At least one "harness" worker pool must be specified in order for the job to have workers. - workerRegion: The Compute Engine region + workerRegion: Optional. The Compute Engine region (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in which worker processing should occur, e.g. "us-west1". Mutually exclusive with worker_zone. If neither worker_region nor worker_zone is specified, default to the control plane's region. - workerZone: The Compute Engine zone + workerZone: Optional. The Compute Engine zone (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in which worker processing should occur, e.g. "us-west1-a". Mutually exclusive with worker_region. If neither worker_region nor worker_zone @@ -1955,7 +2135,7 @@ class Environment(_messages.Message): available capacity. """ class FlexResourceSchedulingGoalValueValuesEnum(_messages.Enum): - r"""Which Flexible Resource Scheduling mode to run in. + r"""Optional. Which Flexible Resource Scheduling mode to run in. Values: FLEXRS_UNSPECIFIED: Run in the default mode. @@ -1978,6 +2158,29 @@ class ShuffleModeValueValuesEnum(_messages.Enum): VM_BASED = 1 SERVICE_BASED = 2 + class StreamingModeValueValuesEnum(_messages.Enum): + r"""Optional. Specifies the Streaming Engine message processing + guarantees. Reduces cost and latency but might result in duplicate + messages committed to storage. Designed to run simple mapping streaming + ETL jobs at the lowest cost. For example, Change Data Capture (CDC) to + BigQuery is a canonical use case. For more information, see [Set the + pipeline streaming + mode](https://cloud.google.com/dataflow/docs/guides/streaming-modes). + + Values: + STREAMING_MODE_UNSPECIFIED: Run in the default mode. + STREAMING_MODE_EXACTLY_ONCE: In this mode, message deduplication is + performed against persistent state to make sure each message is + processed and committed to storage exactly once. + STREAMING_MODE_AT_LEAST_ONCE: Message deduplication is not performed. + Messages might be processed multiple times, and the results are + applied multiple times. Note: Setting this value also enables + Streaming Engine and Streaming Engine resource-based billing. + """ + STREAMING_MODE_UNSPECIFIED = 0 + STREAMING_MODE_EXACTLY_ONCE = 1 + STREAMING_MODE_AT_LEAST_ONCE = 2 + @encoding.MapUnrecognizedFields('additionalProperties') class InternalExperimentsValue(_messages.Message): r"""Experimental settings. @@ -2093,12 +2296,14 @@ class AdditionalProperty(_messages.Message): serviceKmsKeyName = _messages.StringField(9) serviceOptions = _messages.StringField(10, repeated=True) shuffleMode = _messages.EnumField('ShuffleModeValueValuesEnum', 11) - tempStoragePrefix = _messages.StringField(12) - userAgent = _messages.MessageField('UserAgentValue', 13) - version = _messages.MessageField('VersionValue', 14) - workerPools = _messages.MessageField('WorkerPool', 15, repeated=True) - workerRegion = _messages.StringField(16) - workerZone = _messages.StringField(17) + streamingMode = _messages.EnumField('StreamingModeValueValuesEnum', 12) + tempStoragePrefix = _messages.StringField(13) + useStreamingEngineResourceBasedBilling = _messages.BooleanField(14) + userAgent = _messages.MessageField('UserAgentValue', 15) + version = _messages.MessageField('VersionValue', 16) + workerPools = _messages.MessageField('WorkerPool', 17, repeated=True) + workerRegion = _messages.StringField(18) + workerZone = _messages.StringField(19) class ExecutionStageState(_messages.Message): @@ -2281,12 +2486,20 @@ class FlattenInstruction(_messages.Message): class FlexTemplateRuntimeEnvironment(_messages.Message): r"""The environment values to be set at runtime for flex template. + LINT.IfChange Enums: AutoscalingAlgorithmValueValuesEnum: The algorithm to use for autoscaling FlexrsGoalValueValuesEnum: Set FlexRS goal for the job. https://cloud.google.com/dataflow/docs/guides/flexrs IpConfigurationValueValuesEnum: Configuration for VM IPs. + StreamingModeValueValuesEnum: Optional. Specifies the Streaming Engine + message processing guarantees. Reduces cost and latency but might result + in duplicate messages committed to storage. Designed to run simple + mapping streaming ETL jobs at the lowest cost. For example, Change Data + Capture (CDC) to BigQuery is a canonical use case. For more information, + see [Set the pipeline streaming + mode](https://cloud.google.com/dataflow/docs/guides/streaming-modes). Messages: AdditionalUserLabelsValue: Additional user labels to be specified for the @@ -2304,10 +2517,15 @@ class FlexTemplateRuntimeEnvironment(_messages.Message): value pairs. Example: { "name": "wrench", "mass": "1kg", "count": "3" }. autoscalingAlgorithm: The algorithm to use for autoscaling diskSizeGb: Worker disk size, in gigabytes. - dumpHeapOnOom: If true, save a heap dump before killing a thread or - process which is GC thrashing or out of memory. The location of the heap - file will either be echoed back to the user, or the user will be given - the opportunity to download the heap file. + dumpHeapOnOom: If true, when processing time is spent almost entirely on + garbage collection (GC), saves a heap dump before ending the thread or + process. If false, ends the thread or process without saving a heap + dump. Does not save a heap dump when the Java Virtual Machine (JVM) has + an out of memory error during processing. The location of the heap file + is either echoed back to the user, or the user is given the opportunity + to download the heap file. + enableLauncherVmSerialPortLogging: If true serial port logging will be + enabled for the launcher VM. enableStreamingEngine: Whether to enable Streaming Engine for the job. flexrsGoal: Set FlexRS goal for the job. https://cloud.google.com/dataflow/docs/guides/flexrs @@ -2325,8 +2543,8 @@ class FlexTemplateRuntimeEnvironment(_messages.Message): numWorkers: The initial number of Google Compute Engine instances for the job. saveHeapDumpsToGcsPath: Cloud Storage bucket (directory) to upload heap - dumps to the given location. Enabling this implies that heap dumps - should be generated on OOM (dump_heap_on_oom is set to true). + dumps to. Enabling this field implies that `dump_heap_on_oom` is set to + true. sdkContainerImage: Docker registry location of container image to use for the 'worker harness. Default is the container for the version of the SDK. Note this field is only valid for portable pipelines. @@ -2334,6 +2552,13 @@ class FlexTemplateRuntimeEnvironment(_messages.Message): job as. stagingLocation: The Cloud Storage path for staging local files. Must be a valid Cloud Storage URL, beginning with `gs://`. + streamingMode: Optional. Specifies the Streaming Engine message processing + guarantees. Reduces cost and latency but might result in duplicate + messages committed to storage. Designed to run simple mapping streaming + ETL jobs at the lowest cost. For example, Change Data Capture (CDC) to + BigQuery is a canonical use case. For more information, see [Set the + pipeline streaming + mode](https://cloud.google.com/dataflow/docs/guides/streaming-modes). subnetwork: Subnetwork to which VMs will be assigned, if desired. You can specify a subnetwork using either a complete URL or an abbreviated path. Expected to be of the form "https://www.googleapis.com/compute/v1/projec @@ -2397,6 +2622,29 @@ class IpConfigurationValueValuesEnum(_messages.Enum): WORKER_IP_PUBLIC = 1 WORKER_IP_PRIVATE = 2 + class StreamingModeValueValuesEnum(_messages.Enum): + r"""Optional. Specifies the Streaming Engine message processing + guarantees. Reduces cost and latency but might result in duplicate + messages committed to storage. Designed to run simple mapping streaming + ETL jobs at the lowest cost. For example, Change Data Capture (CDC) to + BigQuery is a canonical use case. For more information, see [Set the + pipeline streaming + mode](https://cloud.google.com/dataflow/docs/guides/streaming-modes). + + Values: + STREAMING_MODE_UNSPECIFIED: Run in the default mode. + STREAMING_MODE_EXACTLY_ONCE: In this mode, message deduplication is + performed against persistent state to make sure each message is + processed and committed to storage exactly once. + STREAMING_MODE_AT_LEAST_ONCE: Message deduplication is not performed. + Messages might be processed multiple times, and the results are + applied multiple times. Note: Setting this value also enables + Streaming Engine and Streaming Engine resource-based billing. + """ + STREAMING_MODE_UNSPECIFIED = 0 + STREAMING_MODE_EXACTLY_ONCE = 1 + STREAMING_MODE_AT_LEAST_ONCE = 2 + @encoding.MapUnrecognizedFields('additionalProperties') class AdditionalUserLabelsValue(_messages.Message): r"""Additional user labels to be specified for the job. Keys and values @@ -2433,24 +2681,26 @@ class AdditionalProperty(_messages.Message): 'AutoscalingAlgorithmValueValuesEnum', 3) diskSizeGb = _messages.IntegerField(4, variant=_messages.Variant.INT32) dumpHeapOnOom = _messages.BooleanField(5) - enableStreamingEngine = _messages.BooleanField(6) - flexrsGoal = _messages.EnumField('FlexrsGoalValueValuesEnum', 7) - ipConfiguration = _messages.EnumField('IpConfigurationValueValuesEnum', 8) - kmsKeyName = _messages.StringField(9) - launcherMachineType = _messages.StringField(10) - machineType = _messages.StringField(11) - maxWorkers = _messages.IntegerField(12, variant=_messages.Variant.INT32) - network = _messages.StringField(13) - numWorkers = _messages.IntegerField(14, variant=_messages.Variant.INT32) - saveHeapDumpsToGcsPath = _messages.StringField(15) - sdkContainerImage = _messages.StringField(16) - serviceAccountEmail = _messages.StringField(17) - stagingLocation = _messages.StringField(18) - subnetwork = _messages.StringField(19) - tempLocation = _messages.StringField(20) - workerRegion = _messages.StringField(21) - workerZone = _messages.StringField(22) - zone = _messages.StringField(23) + enableLauncherVmSerialPortLogging = _messages.BooleanField(6) + enableStreamingEngine = _messages.BooleanField(7) + flexrsGoal = _messages.EnumField('FlexrsGoalValueValuesEnum', 8) + ipConfiguration = _messages.EnumField('IpConfigurationValueValuesEnum', 9) + kmsKeyName = _messages.StringField(10) + launcherMachineType = _messages.StringField(11) + machineType = _messages.StringField(12) + maxWorkers = _messages.IntegerField(13, variant=_messages.Variant.INT32) + network = _messages.StringField(14) + numWorkers = _messages.IntegerField(15, variant=_messages.Variant.INT32) + saveHeapDumpsToGcsPath = _messages.StringField(16) + sdkContainerImage = _messages.StringField(17) + serviceAccountEmail = _messages.StringField(18) + stagingLocation = _messages.StringField(19) + streamingMode = _messages.EnumField('StreamingModeValueValuesEnum', 20) + subnetwork = _messages.StringField(21) + tempLocation = _messages.StringField(22) + workerRegion = _messages.StringField(23) + workerZone = _messages.StringField(24) + zone = _messages.StringField(25) class FloatingPointList(_messages.Message): @@ -2556,6 +2806,46 @@ class Histogram(_messages.Message): firstBucketOffset = _messages.IntegerField(2, variant=_messages.Variant.INT32) +class HotKeyDebuggingInfo(_messages.Message): + r"""Information useful for debugging a hot key detection. + + Messages: + DetectedHotKeysValue: Debugging information for each detected hot key. + Keyed by a hash of the key. + + Fields: + detectedHotKeys: Debugging information for each detected hot key. Keyed by + a hash of the key. + """ + @encoding.MapUnrecognizedFields('additionalProperties') + class DetectedHotKeysValue(_messages.Message): + r"""Debugging information for each detected hot key. Keyed by a hash of + the key. + + Messages: + AdditionalProperty: An additional property for a DetectedHotKeysValue + object. + + Fields: + additionalProperties: Additional properties of type DetectedHotKeysValue + """ + class AdditionalProperty(_messages.Message): + r"""An additional property for a DetectedHotKeysValue object. + + Fields: + key: Name of the additional property. + value: A HotKeyInfo attribute. + """ + + key = _messages.StringField(1) + value = _messages.MessageField('HotKeyInfo', 2) + + additionalProperties = _messages.MessageField( + 'AdditionalProperty', 1, repeated=True) + + detectedHotKeys = _messages.MessageField('DetectedHotKeysValue', 1) + + class HotKeyDetection(_messages.Message): r"""Proto describing a hot key detected on a given WorkItem. @@ -2572,6 +2862,25 @@ class HotKeyDetection(_messages.Message): userStepName = _messages.StringField(3) +class HotKeyInfo(_messages.Message): + r"""Information about a hot key. + + Fields: + hotKeyAge: The age of the hot key measured from when it was first + detected. + key: A detected hot key that is causing limited parallelism. This field + will be populated only if the following flag is set to true: "-- + enable_hot_key_logging". + keyTruncated: If true, then the above key is truncated and cannot be + deserialized. This occurs if the key above is populated and the key size + is >5MB. + """ + + hotKeyAge = _messages.StringField(1) + key = _messages.StringField(2) + keyTruncated = _messages.BooleanField(3) + + class InstructionInput(_messages.Message): r"""An input of an instruction, as a reference to an output of a producer instruction. @@ -2676,22 +2985,25 @@ class IntegerMean(_messages.Message): class Job(_messages.Message): - r"""Defines a job to be run by the Cloud Dataflow service. + r"""Defines a job to be run by the Cloud Dataflow service. Do not enter + confidential information when you supply string values using the API. Enums: CurrentStateValueValuesEnum: The current state of the job. Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise specified. A job in the `JOB_STATE_RUNNING` state may asynchronously enter a terminal state. After a job has reached a terminal state, no further state - updates may be made. This field may be mutated by the Cloud Dataflow + updates may be made. This field might be mutated by the Dataflow service; callers cannot mutate it. - RequestedStateValueValuesEnum: The job's requested state. `UpdateJob` may - be used to switch between the `JOB_STATE_STOPPED` and - `JOB_STATE_RUNNING` states, by setting requested_state. `UpdateJob` may - also be used to directly set a job's requested state to - `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the - job if it has not already reached a terminal state. - TypeValueValuesEnum: The type of Cloud Dataflow job. + RequestedStateValueValuesEnum: The job's requested state. Applies to + `UpdateJob` requests. Set `requested_state` with `UpdateJob` requests to + switch between the states `JOB_STATE_STOPPED` and `JOB_STATE_RUNNING`. + You can also use `UpdateJob` requests to change a job's state from + `JOB_STATE_RUNNING` to `JOB_STATE_CANCELLED`, `JOB_STATE_DONE`, or + `JOB_STATE_DRAINED`. These states irrevocably terminate the job if it + hasn't already reached a terminal state. This field has no effect on + `CreateJob` requests. + TypeValueValuesEnum: Optional. The type of Dataflow job. Messages: LabelsValue: User-defined labels for this job. The labels map can contain @@ -2700,8 +3012,9 @@ class Job(_messages.Message): \p{Ll}\p{Lo}{0,62} * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63} * Both keys and values are additionally constrained to be <= 128 bytes in size. - TransformNameMappingValue: The map of transform name prefixes of the job - to be replaced to the corresponding name prefixes of the new job. + TransformNameMappingValue: Optional. The map of transform name prefixes of + the job to be replaced to the corresponding name prefixes of the new + job. Fields: clientRequestId: The client's unique identifier of the job, re-used across @@ -2719,14 +3032,13 @@ class Job(_messages.Message): `JOB_STATE_STOPPED` state unless otherwise specified. A job in the `JOB_STATE_RUNNING` state may asynchronously enter a terminal state. After a job has reached a terminal state, no further state updates may - be made. This field may be mutated by the Cloud Dataflow service; - callers cannot mutate it. + be made. This field might be mutated by the Dataflow service; callers + cannot mutate it. currentStateTime: The timestamp associated with the current state. - environment: The environment for the job. + environment: Optional. The environment for the job. executionInfo: Deprecated. - id: The unique ID of this job. This field is set by the Cloud Dataflow - service when the Job is created, and is immutable for the life of the - job. + id: The unique ID of this job. This field is set by the Dataflow service + when the job is created, and is immutable for the life of the job. jobMetadata: This field is populated by the Dataflow service to support filtering jobs by the metadata values provided here. Populated for ListJobs and all GetJob views SUMMARY and higher. @@ -2736,33 +3048,44 @@ class Job(_messages.Message): \p{Ll}\p{Lo}{0,62} * Values must conform to regexp: [\p{Ll}\p{Lo}\p{N}_-]{0,63} * Both keys and values are additionally constrained to be <= 128 bytes in size. - location: The [regional endpoint] + location: Optional. The [regional endpoint] (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that contains this job. - name: The user-specified Cloud Dataflow job name. Only one Job with a - given name may exist in a project at any given time. If a caller - attempts to create a Job with the same name as an already-existing Job, - the attempt returns the existing Job. The name must match the regular - expression `[a-z]([-a-z0-9]{0,1022}[a-z0-9])?` + name: Optional. The user-specified Dataflow job name. Only one active job + with a given name can exist in a project within one region at any given + time. Jobs in different regions can have the same name. If a caller + attempts to create a job with the same name as an active job that + already exists, the attempt returns the existing job. The name must + match the regular expression `[a-z]([-a-z0-9]{0,1022}[a-z0-9])?` pipelineDescription: Preliminary field: The format of this data may change at any time. A description of the user pipeline and stages through which it is executed. Created by Cloud Dataflow service. Only retrieved with JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL. - projectId: The ID of the Cloud Platform project that the job belongs to. + projectId: The ID of the Google Cloud project that the job belongs to. replaceJobId: If this job is an update of an existing job, this field is the job ID of the job it replaced. When sending a `CreateJobRequest`, you can update a job by specifying it here. The job named here is stopped, and its intermediate state is transferred to this job. replacedByJobId: If another job is an update of this job (and thus, this job is in `JOB_STATE_UPDATED`), this field contains the ID of that job. - requestedState: The job's requested state. `UpdateJob` may be used to - switch between the `JOB_STATE_STOPPED` and `JOB_STATE_RUNNING` states, - by setting requested_state. `UpdateJob` may also be used to directly set - a job's requested state to `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, - irrevocably terminating the job if it has not already reached a terminal - state. + requestedState: The job's requested state. Applies to `UpdateJob` + requests. Set `requested_state` with `UpdateJob` requests to switch + between the states `JOB_STATE_STOPPED` and `JOB_STATE_RUNNING`. You can + also use `UpdateJob` requests to change a job's state from + `JOB_STATE_RUNNING` to `JOB_STATE_CANCELLED`, `JOB_STATE_DONE`, or + `JOB_STATE_DRAINED`. These states irrevocably terminate the job if it + hasn't already reached a terminal state. This field has no effect on + `CreateJob` requests. + runtimeUpdatableParams: This field may ONLY be modified at runtime using + the projects.jobs.update method to adjust job behavior. This field has + no effect when specified at job creation. + satisfiesPzi: Output only. Reserved for future use. This field is set only + in responses from the server; it is ignored if it is set in any + requests. satisfiesPzs: Reserved for future use. This field is set only in responses from the server; it is ignored if it is set in any requests. + serviceResources: Output only. Resources used by the Dataflow Service to + run the job. stageStates: This field may be mutated by the Cloud Dataflow service; callers cannot mutate it. startTime: The timestamp when the job was started (transitioned to @@ -2781,17 +3104,17 @@ class Job(_messages.Message): The supported files are: Google Cloud Storage: storage.googleapis.com/{bucket}/{object} bucket.storage.googleapis.com/{object} - transformNameMapping: The map of transform name prefixes of the job to be - replaced to the corresponding name prefixes of the new job. - type: The type of Cloud Dataflow job. + transformNameMapping: Optional. The map of transform name prefixes of the + job to be replaced to the corresponding name prefixes of the new job. + type: Optional. The type of Dataflow job. """ class CurrentStateValueValuesEnum(_messages.Enum): r"""The current state of the job. Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise specified. A job in the `JOB_STATE_RUNNING` state may asynchronously enter a terminal state. After a job has reached a terminal state, no further state updates may be made. - This field may be mutated by the Cloud Dataflow service; callers cannot - mutate it. + This field might be mutated by the Dataflow service; callers cannot mutate + it. Values: JOB_STATE_UNKNOWN: The job's run state isn't specified. @@ -2859,11 +3182,13 @@ class CurrentStateValueValuesEnum(_messages.Enum): JOB_STATE_RESOURCE_CLEANING_UP = 12 class RequestedStateValueValuesEnum(_messages.Enum): - r"""The job's requested state. `UpdateJob` may be used to switch between - the `JOB_STATE_STOPPED` and `JOB_STATE_RUNNING` states, by setting - requested_state. `UpdateJob` may also be used to directly set a job's - requested state to `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably - terminating the job if it has not already reached a terminal state. + r"""The job's requested state. Applies to `UpdateJob` requests. Set + `requested_state` with `UpdateJob` requests to switch between the states + `JOB_STATE_STOPPED` and `JOB_STATE_RUNNING`. You can also use `UpdateJob` + requests to change a job's state from `JOB_STATE_RUNNING` to + `JOB_STATE_CANCELLED`, `JOB_STATE_DONE`, or `JOB_STATE_DRAINED`. These + states irrevocably terminate the job if it hasn't already reached a + terminal state. This field has no effect on `CreateJob` requests. Values: JOB_STATE_UNKNOWN: The job's run state isn't specified. @@ -2931,7 +3256,7 @@ class RequestedStateValueValuesEnum(_messages.Enum): JOB_STATE_RESOURCE_CLEANING_UP = 12 class TypeValueValuesEnum(_messages.Enum): - r"""The type of Cloud Dataflow job. + r"""Optional. The type of Dataflow job. Values: JOB_TYPE_UNKNOWN: The type of the job is unspecified, or unknown. @@ -2975,8 +3300,8 @@ class AdditionalProperty(_messages.Message): @encoding.MapUnrecognizedFields('additionalProperties') class TransformNameMappingValue(_messages.Message): - r"""The map of transform name prefixes of the job to be replaced to the - corresponding name prefixes of the new job. + r"""Optional. The map of transform name prefixes of the job to be replaced + to the corresponding name prefixes of the new job. Messages: AdditionalProperty: An additional property for a @@ -3017,14 +3342,17 @@ class AdditionalProperty(_messages.Message): replaceJobId = _messages.StringField(15) replacedByJobId = _messages.StringField(16) requestedState = _messages.EnumField('RequestedStateValueValuesEnum', 17) - satisfiesPzs = _messages.BooleanField(18) - stageStates = _messages.MessageField('ExecutionStageState', 19, repeated=True) - startTime = _messages.StringField(20) - steps = _messages.MessageField('Step', 21, repeated=True) - stepsLocation = _messages.StringField(22) - tempFiles = _messages.StringField(23, repeated=True) - transformNameMapping = _messages.MessageField('TransformNameMappingValue', 24) - type = _messages.EnumField('TypeValueValuesEnum', 25) + runtimeUpdatableParams = _messages.MessageField('RuntimeUpdatableParams', 18) + satisfiesPzi = _messages.BooleanField(19) + satisfiesPzs = _messages.BooleanField(20) + serviceResources = _messages.MessageField('ServiceResources', 21) + stageStates = _messages.MessageField('ExecutionStageState', 22, repeated=True) + startTime = _messages.StringField(23) + steps = _messages.MessageField('Step', 24, repeated=True) + stepsLocation = _messages.StringField(25) + tempFiles = _messages.StringField(26, repeated=True) + transformNameMapping = _messages.MessageField('TransformNameMappingValue', 27) + type = _messages.EnumField('TypeValueValuesEnum', 28) class JobExecutionDetails(_messages.Message): @@ -3150,6 +3478,10 @@ class JobMetadata(_messages.Message): r"""Metadata available primarily for filtering jobs. Will be included in the ListJob response and Job SUMMARY view. + Messages: + UserDisplayPropertiesValue: List of display properties to help UI filter + jobs. + Fields: bigTableDetails: Identification of a Cloud Bigtable source used in the Dataflow job. @@ -3163,7 +3495,33 @@ class JobMetadata(_messages.Message): sdkVersion: The SDK version used to run the job. spannerDetails: Identification of a Spanner source used in the Dataflow job. + userDisplayProperties: List of display properties to help UI filter jobs. """ + @encoding.MapUnrecognizedFields('additionalProperties') + class UserDisplayPropertiesValue(_messages.Message): + r"""List of display properties to help UI filter jobs. + + Messages: + AdditionalProperty: An additional property for a + UserDisplayPropertiesValue object. + + Fields: + additionalProperties: Additional properties of type + UserDisplayPropertiesValue + """ + class AdditionalProperty(_messages.Message): + r"""An additional property for a UserDisplayPropertiesValue object. + + Fields: + key: Name of the additional property. + value: A string attribute. + """ + + key = _messages.StringField(1) + value = _messages.StringField(2) + + additionalProperties = _messages.MessageField( + 'AdditionalProperty', 1, repeated=True) bigTableDetails = _messages.MessageField( 'BigTableIODetails', 1, repeated=True) @@ -3175,14 +3533,18 @@ class JobMetadata(_messages.Message): pubsubDetails = _messages.MessageField('PubSubIODetails', 5, repeated=True) sdkVersion = _messages.MessageField('SdkVersion', 6) spannerDetails = _messages.MessageField('SpannerIODetails', 7, repeated=True) + userDisplayProperties = _messages.MessageField( + 'UserDisplayPropertiesValue', 8) class JobMetrics(_messages.Message): r"""JobMetrics contains a collection of metrics describing the detailed progress of a Dataflow job. Metrics correspond to user-defined and system- - defined metrics in the job. This resource captures only the most recent - values of each metric; time-series data can be queried for them (under the - same metric names) from Cloud Monitoring. + defined metrics in the job. For more information, see [Dataflow job metrics] + (https://cloud.google.com/dataflow/docs/guides/using-monitoring-intf). This + resource captures only the most recent values of each metric; time-series + data can be queried for them (under the same metric names) from Cloud + Monitoring. Fields: metricTime: Timestamp as of which metric values are current. @@ -3380,7 +3742,10 @@ class LaunchFlexTemplateResponse(_messages.Message): class LaunchTemplateParameters(_messages.Message): - r"""Parameters to provide to the template being launched. + r"""Parameters to provide to the template being launched. Note that the + [metadata in the pipeline code] + (https://cloud.google.com/dataflow/docs/guides/templates/creating- + templates#metadata) determines which runtime parameters are valid. Messages: ParametersValue: The runtime parameters to pass to the job. @@ -3390,7 +3755,8 @@ class LaunchTemplateParameters(_messages.Message): Fields: environment: The runtime environment for the job. - jobName: Required. The job name to use for the created job. + jobName: Required. The job name to use for the created job. The name must + match the regular expression `[a-z]([-a-z0-9]{0,1022}[a-z0-9])?` parameters: The runtime parameters to pass to the job. transformNameMapping: Only applicable when updating a pipeline. Map of transform name prefixes of the job to be replaced to the corresponding @@ -3567,6 +3933,21 @@ class AdditionalProperty(_messages.Message): workItems = _messages.MessageField('WorkItem', 2, repeated=True) +class Linear(_messages.Message): + r"""Linear buckets with the following boundaries for indices in 0 to n-1. - + i in [0, n-1]: [start + (i)*width, start + (i+1)*width) + + Fields: + numberOfBuckets: Must be greater than 0. + start: Lower bound of the first bucket. + width: Distance between bucket boundaries. Must be greater than 0. + """ + + numberOfBuckets = _messages.IntegerField(1, variant=_messages.Variant.INT32) + start = _messages.FloatField(2) + width = _messages.FloatField(3) + + class ListJobMessagesResponse(_messages.Message): r"""Response to a request to list job messages. @@ -3778,6 +4159,49 @@ class MetricUpdate(_messages.Message): updateTime = _messages.StringField(11) +class MetricValue(_messages.Message): + r"""The value of a metric along with its name and labels. + + Messages: + MetricLabelsValue: Optional. Set of metric labels for this metric. + + Fields: + metric: Base name for this metric. + metricLabels: Optional. Set of metric labels for this metric. + valueHistogram: Histogram value of this metric. + valueInt64: Integer value of this metric. + """ + @encoding.MapUnrecognizedFields('additionalProperties') + class MetricLabelsValue(_messages.Message): + r"""Optional. Set of metric labels for this metric. + + Messages: + AdditionalProperty: An additional property for a MetricLabelsValue + object. + + Fields: + additionalProperties: Additional properties of type MetricLabelsValue + """ + class AdditionalProperty(_messages.Message): + r"""An additional property for a MetricLabelsValue object. + + Fields: + key: Name of the additional property. + value: A string attribute. + """ + + key = _messages.StringField(1) + value = _messages.StringField(2) + + additionalProperties = _messages.MessageField( + 'AdditionalProperty', 1, repeated=True) + + metric = _messages.StringField(1) + metricLabels = _messages.MessageField('MetricLabelsValue', 2) + valueHistogram = _messages.MessageField('DataflowHistogramValue', 3) + valueInt64 = _messages.IntegerField(4) + + class MountedDataDisk(_messages.Message): r"""Describes mounted data disk. @@ -3843,6 +4267,24 @@ class KindValueValuesEnum(_messages.Enum): name = _messages.StringField(2) +class OutlierStats(_messages.Message): + r"""Statistics for the underflow and overflow bucket. + + Fields: + overflowCount: Number of values that are larger than the upper bound of + the largest bucket. + overflowMean: Mean of values in the overflow bucket. + underflowCount: Number of values that are smaller than the lower bound of + the smallest bucket. + underflowMean: Mean of values in the undeflow bucket. + """ + + overflowCount = _messages.IntegerField(1) + overflowMean = _messages.FloatField(2) + underflowCount = _messages.IntegerField(3) + underflowMean = _messages.FloatField(4) + + class Package(_messages.Message): r"""The packages that must be installed in order for a worker to run the steps of the Cloud Dataflow job that will be assigned to its worker pool. @@ -3964,13 +4406,32 @@ class ParameterMetadata(_messages.Message): Fields: customMetadata: Optional. Additional metadata for describing this parameter. + defaultValue: Optional. The default values will pre-populate the parameter + with the given value from the proto. If default_value is left empty, the + parameter will be populated with a default of the relevant type, e.g. + false for a boolean. + enumOptions: Optional. The options shown when ENUM ParameterType is + specified. + groupName: Optional. Specifies a group name for this parameter to be + rendered under. Group header text will be rendered exactly as specified + in this field. Only considered when parent_name is NOT provided. helpText: Required. The help text to display for the parameter. + hiddenUi: Optional. Whether the parameter should be hidden in the UI. isOptional: Optional. Whether the parameter is optional. Defaults to false. label: Required. The label to display for the parameter. name: Required. The name of the parameter. paramType: Optional. The type of the parameter. Used for selecting input picker. + parentName: Optional. Specifies the name of the parent parameter. Used in + conjunction with 'parent_trigger_values' to make this parameter + conditional (will only be rendered conditionally). Should be mappable to + a ParameterMetadata.name field. + parentTriggerValues: Optional. The value(s) of the 'parent_name' parameter + which will trigger this parameter to be shown. If left empty, ANY non- + empty value in parent_name will trigger this parameter to be shown. Only + considered when this parameter is conditional (when 'parent_name' has + been provided). regexes: Optional. Regexes that the parameter must match. """ class ParamTypeValueValuesEnum(_messages.Enum): @@ -3993,6 +4454,25 @@ class ParamTypeValueValuesEnum(_messages.Enum): write to. PUBSUB_TOPIC: The parameter specifies a Pub/Sub Topic. PUBSUB_SUBSCRIPTION: The parameter specifies a Pub/Sub Subscription. + BIGQUERY_TABLE: The parameter specifies a BigQuery table. + JAVASCRIPT_UDF_FILE: The parameter specifies a JavaScript UDF in Cloud + Storage. + SERVICE_ACCOUNT: The parameter specifies a Service Account email. + MACHINE_TYPE: The parameter specifies a Machine Type. + KMS_KEY_NAME: The parameter specifies a KMS Key name. + WORKER_REGION: The parameter specifies a Worker Region. + WORKER_ZONE: The parameter specifies a Worker Zone. + BOOLEAN: The parameter specifies a boolean input. + ENUM: The parameter specifies an enum input. + NUMBER: The parameter specifies a number input. + KAFKA_TOPIC: Deprecated. Please use KAFKA_READ_TOPIC instead. + KAFKA_READ_TOPIC: The parameter specifies the fully-qualified name of an + Apache Kafka topic. This can be either a Google Managed Kafka topic or + a non-managed Kafka topic. + KAFKA_WRITE_TOPIC: The parameter specifies the fully-qualified name of + an Apache Kafka topic. This can be an existing Google Managed Kafka + topic, the name for a new Google Managed Kafka topic, or an existing + non-managed Kafka topic. """ DEFAULT = 0 TEXT = 1 @@ -4004,6 +4484,19 @@ class ParamTypeValueValuesEnum(_messages.Enum): GCS_WRITE_FOLDER = 7 PUBSUB_TOPIC = 8 PUBSUB_SUBSCRIPTION = 9 + BIGQUERY_TABLE = 10 + JAVASCRIPT_UDF_FILE = 11 + SERVICE_ACCOUNT = 12 + MACHINE_TYPE = 13 + KMS_KEY_NAME = 14 + WORKER_REGION = 15 + WORKER_ZONE = 16 + BOOLEAN = 17 + ENUM = 18 + NUMBER = 19 + KAFKA_TOPIC = 20 + KAFKA_READ_TOPIC = 21 + KAFKA_WRITE_TOPIC = 22 @encoding.MapUnrecognizedFields('additionalProperties') class CustomMetadataValue(_messages.Message): @@ -4031,12 +4524,33 @@ class AdditionalProperty(_messages.Message): 'AdditionalProperty', 1, repeated=True) customMetadata = _messages.MessageField('CustomMetadataValue', 1) - helpText = _messages.StringField(2) - isOptional = _messages.BooleanField(3) - label = _messages.StringField(4) - name = _messages.StringField(5) - paramType = _messages.EnumField('ParamTypeValueValuesEnum', 6) - regexes = _messages.StringField(7, repeated=True) + defaultValue = _messages.StringField(2) + enumOptions = _messages.MessageField( + 'ParameterMetadataEnumOption', 3, repeated=True) + groupName = _messages.StringField(4) + helpText = _messages.StringField(5) + hiddenUi = _messages.BooleanField(6) + isOptional = _messages.BooleanField(7) + label = _messages.StringField(8) + name = _messages.StringField(9) + paramType = _messages.EnumField('ParamTypeValueValuesEnum', 10) + parentName = _messages.StringField(11) + parentTriggerValues = _messages.StringField(12, repeated=True) + regexes = _messages.StringField(13, repeated=True) + + +class ParameterMetadataEnumOption(_messages.Message): + r"""ParameterMetadataEnumOption specifies the option shown in the enum form. + + Fields: + description: Optional. The description to display for the enum option. + label: Optional. The label to display for the enum option. + value: Required. The value of the enum option. + """ + + description = _messages.StringField(1) + label = _messages.StringField(2) + value = _messages.StringField(3) class PartialGroupByKeyInstruction(_messages.Message): @@ -4119,6 +4633,36 @@ class AdditionalProperty(_messages.Message): valueCombiningFn = _messages.MessageField('ValueCombiningFnValue', 6) +class PerStepNamespaceMetrics(_messages.Message): + r"""Metrics for a particular unfused step and namespace. A metric is + uniquely identified by the `metrics_namespace`, `original_step`, `metric + name` and `metric_labels`. + + Fields: + metricValues: Optional. Metrics that are recorded for this namespace and + unfused step. + metricsNamespace: The namespace of these metrics on the worker. + originalStep: The original system name of the unfused step that these + metrics are reported from. + """ + + metricValues = _messages.MessageField('MetricValue', 1, repeated=True) + metricsNamespace = _messages.StringField(2) + originalStep = _messages.StringField(3) + + +class PerWorkerMetrics(_messages.Message): + r"""Per worker metrics. + + Fields: + perStepNamespaceMetrics: Optional. Metrics for a particular unfused step + and namespace. + """ + + perStepNamespaceMetrics = _messages.MessageField( + 'PerStepNamespaceMetrics', 1, repeated=True) + + class PipelineDescription(_messages.Message): r"""A descriptive representation of submitted pipeline as well as the executed form. This data is provided by the Dataflow service for ease of @@ -4130,6 +4674,8 @@ class PipelineDescription(_messages.Message): pipeline. originalPipelineTransform: Description of each transform in the pipeline and collections between them. + stepNamesHash: A hash value of the submitted pipeline portable graph step + names if exists. """ displayData = _messages.MessageField('DisplayData', 1, repeated=True) @@ -4137,6 +4683,7 @@ class PipelineDescription(_messages.Message): 'ExecutionStageSummary', 2, repeated=True) originalPipelineTransform = _messages.MessageField( 'TransformSummary', 3, repeated=True) + stepNamesHash = _messages.StringField(4) class Point(_messages.Message): @@ -4207,6 +4754,8 @@ class PubsubLocation(_messages.Message): Fields: dropLateData: Indicates whether the pipeline allows late-arriving data. + dynamicDestinations: If true, then this location represents dynamic + topics. idLabel: If set, contains a pubsub label from which to extract record ids. If left empty, record deduplication will be strictly best effort. subscription: A pubsub subscription, in the form of @@ -4222,12 +4771,13 @@ class PubsubLocation(_messages.Message): """ dropLateData = _messages.BooleanField(1) - idLabel = _messages.StringField(2) - subscription = _messages.StringField(3) - timestampLabel = _messages.StringField(4) - topic = _messages.StringField(5) - trackingSubscription = _messages.StringField(6) - withAttributes = _messages.BooleanField(7) + dynamicDestinations = _messages.BooleanField(2) + idLabel = _messages.StringField(3) + subscription = _messages.StringField(4) + timestampLabel = _messages.StringField(5) + topic = _messages.StringField(6) + trackingSubscription = _messages.StringField(7) + withAttributes = _messages.BooleanField(8) class PubsubSnapshotMetadata(_messages.Message): @@ -4244,31 +4794,6 @@ class PubsubSnapshotMetadata(_messages.Message): topicName = _messages.StringField(3) -class QueryInfo(_messages.Message): - r"""Information about a validated query. - - Enums: - QueryPropertyValueListEntryValuesEnum: - - Fields: - queryProperty: Includes an entry for each satisfied QueryProperty. - """ - class QueryPropertyValueListEntryValuesEnum(_messages.Enum): - r"""QueryPropertyValueListEntryValuesEnum enum type. - - Values: - QUERY_PROPERTY_UNSPECIFIED: The query property is unknown or - unspecified. - HAS_UNBOUNDED_SOURCE: Indicates this query reads from >= 1 unbounded - source. - """ - QUERY_PROPERTY_UNSPECIFIED = 0 - HAS_UNBOUNDED_SOURCE = 1 - - queryProperty = _messages.EnumField( - 'QueryPropertyValueListEntryValuesEnum', 1, repeated=True) - - class ReadInstruction(_messages.Message): r"""An instruction that reads records. Takes no inputs, produces one output. @@ -4448,69 +4973,88 @@ class ResourceUtilizationReportResponse(_messages.Message): class RuntimeEnvironment(_messages.Message): - r"""The environment values to set at runtime. + r"""The environment values to set at runtime. LINT.IfChange Enums: - IpConfigurationValueValuesEnum: Configuration for VM IPs. + IpConfigurationValueValuesEnum: Optional. Configuration for VM IPs. + StreamingModeValueValuesEnum: Optional. Specifies the Streaming Engine + message processing guarantees. Reduces cost and latency but might result + in duplicate messages committed to storage. Designed to run simple + mapping streaming ETL jobs at the lowest cost. For example, Change Data + Capture (CDC) to BigQuery is a canonical use case. For more information, + see [Set the pipeline streaming + mode](https://cloud.google.com/dataflow/docs/guides/streaming-modes). Messages: - AdditionalUserLabelsValue: Additional user labels to be specified for the - job. Keys and values should follow the restrictions specified in the - [labeling restrictions](https://cloud.google.com/compute/docs/labeling- + AdditionalUserLabelsValue: Optional. Additional user labels to be + specified for the job. Keys and values should follow the restrictions + specified in the [labeling + restrictions](https://cloud.google.com/compute/docs/labeling- resources#restrictions) page. An object containing a list of "key": value pairs. Example: { "name": "wrench", "mass": "1kg", "count": "3" }. Fields: - additionalExperiments: Additional experiment flags for the job, specified - with the `--experiments` option. - additionalUserLabels: Additional user labels to be specified for the job. - Keys and values should follow the restrictions specified in the + additionalExperiments: Optional. Additional experiment flags for the job, + specified with the `--experiments` option. + additionalUserLabels: Optional. Additional user labels to be specified for + the job. Keys and values should follow the restrictions specified in the [labeling restrictions](https://cloud.google.com/compute/docs/labeling- resources#restrictions) page. An object containing a list of "key": value pairs. Example: { "name": "wrench", "mass": "1kg", "count": "3" }. - bypassTempDirValidation: Whether to bypass the safety checks for the job's - temporary directory. Use with caution. - enableStreamingEngine: Whether to enable Streaming Engine for the job. - ipConfiguration: Configuration for VM IPs. - kmsKeyName: Name for the Cloud KMS key for the job. Key format is: - projects//locations//keyRings//cryptoKeys/ - machineType: The machine type to use for the job. Defaults to the value - from the template if not specified. - maxWorkers: The maximum number of Google Compute Engine instances to be - made available to your pipeline during execution, from 1 to 1000. - network: Network to which VMs will be assigned. If empty or unspecified, - the service will use the network "default". - numWorkers: The initial number of Google Compute Engine instnaces for the - job. - serviceAccountEmail: The email address of the service account to run the - job as. - subnetwork: Subnetwork to which VMs will be assigned, if desired. You can - specify a subnetwork using either a complete URL or an abbreviated path. - Expected to be of the form "https://www.googleapis.com/compute/v1/projec - ts/HOST_PROJECT_ID/regions/REGION/subnetworks/SUBNETWORK" or - "regions/REGION/subnetworks/SUBNETWORK". If the subnetwork is located in - a Shared VPC network, you must use the complete URL. - tempLocation: The Cloud Storage path to use for temporary files. Must be a - valid Cloud Storage URL, beginning with `gs://`. - workerRegion: The Compute Engine region + bypassTempDirValidation: Optional. Whether to bypass the safety checks for + the job's temporary directory. Use with caution. + diskSizeGb: Optional. The disk size, in gigabytes, to use on each remote + Compute Engine worker instance. + enableStreamingEngine: Optional. Whether to enable Streaming Engine for + the job. + ipConfiguration: Optional. Configuration for VM IPs. + kmsKeyName: Optional. Name for the Cloud KMS key for the job. Key format + is: projects//locations//keyRings//cryptoKeys/ + machineType: Optional. The machine type to use for the job. Defaults to + the value from the template if not specified. + maxWorkers: Optional. The maximum number of Google Compute Engine + instances to be made available to your pipeline during execution, from 1 + to 1000. The default value is 1. + network: Optional. Network to which VMs will be assigned. If empty or + unspecified, the service will use the network "default". + numWorkers: Optional. The initial number of Google Compute Engine + instances for the job. The default value is 11. + serviceAccountEmail: Optional. The email address of the service account to + run the job as. + streamingMode: Optional. Specifies the Streaming Engine message processing + guarantees. Reduces cost and latency but might result in duplicate + messages committed to storage. Designed to run simple mapping streaming + ETL jobs at the lowest cost. For example, Change Data Capture (CDC) to + BigQuery is a canonical use case. For more information, see [Set the + pipeline streaming + mode](https://cloud.google.com/dataflow/docs/guides/streaming-modes). + subnetwork: Optional. Subnetwork to which VMs will be assigned, if + desired. You can specify a subnetwork using either a complete URL or an + abbreviated path. Expected to be of the form "https://www.googleapis.com + /compute/v1/projects/HOST_PROJECT_ID/regions/REGION/subnetworks/SUBNETWO + RK" or "regions/REGION/subnetworks/SUBNETWORK". If the subnetwork is + located in a Shared VPC network, you must use the complete URL. + tempLocation: Required. The Cloud Storage path to use for temporary files. + Must be a valid Cloud Storage URL, beginning with `gs://`. + workerRegion: Required. The Compute Engine region (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in which worker processing should occur, e.g. "us-west1". Mutually exclusive with worker_zone. If neither worker_region nor worker_zone is specified, default to the control plane's region. - workerZone: The Compute Engine zone + workerZone: Optional. The Compute Engine zone (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in which worker processing should occur, e.g. "us-west1-a". Mutually exclusive with worker_region. If neither worker_region nor worker_zone is specified, a zone in the control plane's region is chosen based on available capacity. If both `worker_zone` and `zone` are set, `worker_zone` takes precedence. - zone: The Compute Engine [availability + zone: Optional. The Compute Engine [availability zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones) for launching worker instances to run your pipeline. In the future, worker_zone will take precedence. """ class IpConfigurationValueValuesEnum(_messages.Enum): - r"""Configuration for VM IPs. + r"""Optional. Configuration for VM IPs. Values: WORKER_IP_UNSPECIFIED: The configuration is unknown, or unspecified. @@ -4521,10 +5065,33 @@ class IpConfigurationValueValuesEnum(_messages.Enum): WORKER_IP_PUBLIC = 1 WORKER_IP_PRIVATE = 2 + class StreamingModeValueValuesEnum(_messages.Enum): + r"""Optional. Specifies the Streaming Engine message processing + guarantees. Reduces cost and latency but might result in duplicate + messages committed to storage. Designed to run simple mapping streaming + ETL jobs at the lowest cost. For example, Change Data Capture (CDC) to + BigQuery is a canonical use case. For more information, see [Set the + pipeline streaming + mode](https://cloud.google.com/dataflow/docs/guides/streaming-modes). + + Values: + STREAMING_MODE_UNSPECIFIED: Run in the default mode. + STREAMING_MODE_EXACTLY_ONCE: In this mode, message deduplication is + performed against persistent state to make sure each message is + processed and committed to storage exactly once. + STREAMING_MODE_AT_LEAST_ONCE: Message deduplication is not performed. + Messages might be processed multiple times, and the results are + applied multiple times. Note: Setting this value also enables + Streaming Engine and Streaming Engine resource-based billing. + """ + STREAMING_MODE_UNSPECIFIED = 0 + STREAMING_MODE_EXACTLY_ONCE = 1 + STREAMING_MODE_AT_LEAST_ONCE = 2 + @encoding.MapUnrecognizedFields('additionalProperties') class AdditionalUserLabelsValue(_messages.Message): - r"""Additional user labels to be specified for the job. Keys and values - should follow the restrictions specified in the [labeling + r"""Optional. Additional user labels to be specified for the job. Keys and + values should follow the restrictions specified in the [labeling restrictions](https://cloud.google.com/compute/docs/labeling- resources#restrictions) page. An object containing a list of "key": value pairs. Example: { "name": "wrench", "mass": "1kg", "count": "3" }. @@ -4554,19 +5121,21 @@ class AdditionalProperty(_messages.Message): additionalExperiments = _messages.StringField(1, repeated=True) additionalUserLabels = _messages.MessageField('AdditionalUserLabelsValue', 2) bypassTempDirValidation = _messages.BooleanField(3) - enableStreamingEngine = _messages.BooleanField(4) - ipConfiguration = _messages.EnumField('IpConfigurationValueValuesEnum', 5) - kmsKeyName = _messages.StringField(6) - machineType = _messages.StringField(7) - maxWorkers = _messages.IntegerField(8, variant=_messages.Variant.INT32) - network = _messages.StringField(9) - numWorkers = _messages.IntegerField(10, variant=_messages.Variant.INT32) - serviceAccountEmail = _messages.StringField(11) - subnetwork = _messages.StringField(12) - tempLocation = _messages.StringField(13) - workerRegion = _messages.StringField(14) - workerZone = _messages.StringField(15) - zone = _messages.StringField(16) + diskSizeGb = _messages.IntegerField(4, variant=_messages.Variant.INT32) + enableStreamingEngine = _messages.BooleanField(5) + ipConfiguration = _messages.EnumField('IpConfigurationValueValuesEnum', 6) + kmsKeyName = _messages.StringField(7) + machineType = _messages.StringField(8) + maxWorkers = _messages.IntegerField(9, variant=_messages.Variant.INT32) + network = _messages.StringField(10) + numWorkers = _messages.IntegerField(11, variant=_messages.Variant.INT32) + serviceAccountEmail = _messages.StringField(12) + streamingMode = _messages.EnumField('StreamingModeValueValuesEnum', 13) + subnetwork = _messages.StringField(14) + tempLocation = _messages.StringField(15) + workerRegion = _messages.StringField(16) + workerZone = _messages.StringField(17) + zone = _messages.StringField(18) class RuntimeMetadata(_messages.Message): @@ -4581,6 +5150,29 @@ class RuntimeMetadata(_messages.Message): sdkInfo = _messages.MessageField('SDKInfo', 2) +class RuntimeUpdatableParams(_messages.Message): + r"""Additional job parameters that can only be updated during runtime using + the projects.jobs.update method. These fields have no effect when specified + during job creation. + + Fields: + maxNumWorkers: The maximum number of workers to cap autoscaling at. This + field is currently only supported for Streaming Engine jobs. + minNumWorkers: The minimum number of workers to scale down to. This field + is currently only supported for Streaming Engine jobs. + workerUtilizationHint: Target worker utilization, compared against the + aggregate utilization of the worker pool by autoscaler, to determine + upscaling and downscaling when absent other constraints such as backlog. + For more information, see [Update an existing + pipeline](https://cloud.google.com/dataflow/docs/guides/updating-a- + pipeline). + """ + + maxNumWorkers = _messages.IntegerField(1, variant=_messages.Variant.INT32) + minNumWorkers = _messages.IntegerField(2, variant=_messages.Variant.INT32) + workerUtilizationHint = _messages.FloatField(3) + + class SDKInfo(_messages.Message): r"""SDK Information. @@ -4598,22 +5190,75 @@ class LanguageValueValuesEnum(_messages.Enum): UNKNOWN: UNKNOWN Language. JAVA: Java. PYTHON: Python. + GO: Go. """ UNKNOWN = 0 JAVA = 1 PYTHON = 2 + GO = 3 language = _messages.EnumField('LanguageValueValuesEnum', 1) version = _messages.StringField(2) +class SdkBug(_messages.Message): + r"""A bug found in the Dataflow SDK. + + Enums: + SeverityValueValuesEnum: Output only. How severe the SDK bug is. + TypeValueValuesEnum: Output only. Describes the impact of this SDK bug. + + Fields: + severity: Output only. How severe the SDK bug is. + type: Output only. Describes the impact of this SDK bug. + uri: Output only. Link to more information on the bug. + """ + class SeverityValueValuesEnum(_messages.Enum): + r"""Output only. How severe the SDK bug is. + + Values: + SEVERITY_UNSPECIFIED: A bug of unknown severity. + NOTICE: A minor bug that that may reduce reliability or performance for + some jobs. Impact will be minimal or non-existent for most jobs. + WARNING: A bug that has some likelihood of causing performance + degradation, data loss, or job failures. + SEVERE: A bug with extremely significant impact. Jobs may fail + erroneously, performance may be severely degraded, and data loss may + be very likely. + """ + SEVERITY_UNSPECIFIED = 0 + NOTICE = 1 + WARNING = 2 + SEVERE = 3 + + class TypeValueValuesEnum(_messages.Enum): + r"""Output only. Describes the impact of this SDK bug. + + Values: + TYPE_UNSPECIFIED: Unknown issue with this SDK. + GENERAL: Catch-all for SDK bugs that don't fit in the below categories. + PERFORMANCE: Using this version of the SDK may result in degraded + performance. + DATALOSS: Using this version of the SDK may cause data loss. + """ + TYPE_UNSPECIFIED = 0 + GENERAL = 1 + PERFORMANCE = 2 + DATALOSS = 3 + + severity = _messages.EnumField('SeverityValueValuesEnum', 1) + type = _messages.EnumField('TypeValueValuesEnum', 2) + uri = _messages.StringField(3) + + class SdkHarnessContainerImage(_messages.Message): - r"""Defines a SDK harness container for executing Dataflow pipelines. + r"""Defines an SDK harness container for executing Dataflow pipelines. Fields: capabilities: The set of capabilities enumerated in the above Environment - proto. See also https://github.com/apache/beam/blob/master/model/pipelin - e/src/main/proto/beam_runner_api.proto + proto. See also [beam_runner_api.proto](https://github.com/apache/beam/b + lob/master/model/pipeline/src/main/proto/org/apache/beam/model/pipeline/ + v1/beam_runner_api.proto) containerImage: A docker container image that resides in Google Container Registry. environmentId: Environment ID for the Beam runner API proto Environment @@ -4638,6 +5283,7 @@ class SdkVersion(_messages.Message): SdkSupportStatusValueValuesEnum: The support status for this SDK version. Fields: + bugs: Output only. Known bugs found in this SDK version. sdkSupportStatus: The support status for this SDK version. version: The version of the SDK used to run the job. versionDisplayName: A readable string describing the version of the SDK. @@ -4661,9 +5307,10 @@ class SdkSupportStatusValueValuesEnum(_messages.Enum): DEPRECATED = 3 UNSUPPORTED = 4 - sdkSupportStatus = _messages.EnumField('SdkSupportStatusValueValuesEnum', 1) - version = _messages.StringField(2) - versionDisplayName = _messages.StringField(3) + bugs = _messages.MessageField('SdkBug', 1, repeated=True) + sdkSupportStatus = _messages.EnumField('SdkSupportStatusValueValuesEnum', 2) + version = _messages.StringField(3) + versionDisplayName = _messages.StringField(4) class SendDebugCaptureRequest(_messages.Message): @@ -4796,6 +5443,17 @@ class SeqMapTaskOutputInfo(_messages.Message): tag = _messages.StringField(2) +class ServiceResources(_messages.Message): + r"""Resources used by the Dataflow Service to run the job. + + Fields: + zones: Output only. List of Cloud Zones being used by the Dataflow Service + for this job. Example: us-central1-c + """ + + zones = _messages.StringField(1, repeated=True) + + class ShellTask(_messages.Message): r"""A task which consists of a shell command for the worker to execute. @@ -5381,6 +6039,7 @@ class StageSummary(_messages.Message): stageId: ID of this stage startTime: Start time of this stage. state: State of this stage. + stragglerSummary: Straggler summary for this stage. """ class StateValueValuesEnum(_messages.Enum): r"""State of this stage. @@ -5406,6 +6065,7 @@ class StateValueValuesEnum(_messages.Enum): stageId = _messages.StringField(4) startTime = _messages.StringField(5) state = _messages.EnumField('StateValueValuesEnum', 6) + stragglerSummary = _messages.MessageField('StragglerSummary', 7) class StandardQueryParameters(_messages.Message): @@ -5536,15 +6196,16 @@ class Step(_messages.Message): r"""Defines a particular step within a Cloud Dataflow job. A job consists of multiple steps, each of which performs some specific operation as part of the overall job. Data is typically passed from one step to another as part - of the job. Here's an example of a sequence of steps which together - implement a Map-Reduce job: * Read a collection of data from some source, - parsing the collection's elements. * Validate the elements. * Apply a user- - defined function to map each element to some value and extract an element- - specific key value. * Group elements with the same key into a single element - with that key, transforming a multiply-keyed collection into a uniquely- - keyed collection. * Write the elements out to some data sink. Note that the - Cloud Dataflow service may be used to run many different types of jobs, not - just Map-Reduce. + of the job. **Note:** The properties of this object are not stable and might + change. Here's an example of a sequence of steps which together implement a + Map-Reduce job: * Read a collection of data from some source, parsing the + collection's elements. * Validate the elements. * Apply a user-defined + function to map each element to some value and extract an element-specific + key value. * Group elements with the same key into a single element with + that key, transforming a multiply-keyed collection into a uniquely-keyed + collection. * Write the elements out to some data sink. Note that the Cloud + Dataflow service may be used to run many different types of jobs, not just + Map-Reduce. Messages: PropertiesValue: Named properties associated with the step. Each kind of @@ -5590,6 +6251,120 @@ class AdditionalProperty(_messages.Message): properties = _messages.MessageField('PropertiesValue', 3) +class Straggler(_messages.Message): + r"""Information for a straggler. + + Fields: + batchStraggler: Batch straggler identification and debugging information. + streamingStraggler: Streaming straggler identification and debugging + information. + """ + + batchStraggler = _messages.MessageField('StragglerInfo', 1) + streamingStraggler = _messages.MessageField('StreamingStragglerInfo', 2) + + +class StragglerDebuggingInfo(_messages.Message): + r"""Information useful for debugging a straggler. Each type will provide + specialized debugging information relevant for a particular cause. The + StragglerDebuggingInfo will be 1:1 mapping to the StragglerCause enum. + + Fields: + hotKey: Hot key debugging details. + """ + + hotKey = _messages.MessageField('HotKeyDebuggingInfo', 1) + + +class StragglerInfo(_messages.Message): + r"""Information useful for straggler identification and debugging. + + Messages: + CausesValue: The straggler causes, keyed by the string representation of + the StragglerCause enum and contains specialized debugging information + for each straggler cause. + + Fields: + causes: The straggler causes, keyed by the string representation of the + StragglerCause enum and contains specialized debugging information for + each straggler cause. + startTime: The time when the work item attempt became a straggler. + """ + @encoding.MapUnrecognizedFields('additionalProperties') + class CausesValue(_messages.Message): + r"""The straggler causes, keyed by the string representation of the + StragglerCause enum and contains specialized debugging information for + each straggler cause. + + Messages: + AdditionalProperty: An additional property for a CausesValue object. + + Fields: + additionalProperties: Additional properties of type CausesValue + """ + class AdditionalProperty(_messages.Message): + r"""An additional property for a CausesValue object. + + Fields: + key: Name of the additional property. + value: A StragglerDebuggingInfo attribute. + """ + + key = _messages.StringField(1) + value = _messages.MessageField('StragglerDebuggingInfo', 2) + + additionalProperties = _messages.MessageField( + 'AdditionalProperty', 1, repeated=True) + + causes = _messages.MessageField('CausesValue', 1) + startTime = _messages.StringField(2) + + +class StragglerSummary(_messages.Message): + r"""Summarized straggler identification details. + + Messages: + StragglerCauseCountValue: Aggregated counts of straggler causes, keyed by + the string representation of the StragglerCause enum. + + Fields: + recentStragglers: The most recent stragglers. + stragglerCauseCount: Aggregated counts of straggler causes, keyed by the + string representation of the StragglerCause enum. + totalStragglerCount: The total count of stragglers. + """ + @encoding.MapUnrecognizedFields('additionalProperties') + class StragglerCauseCountValue(_messages.Message): + r"""Aggregated counts of straggler causes, keyed by the string + representation of the StragglerCause enum. + + Messages: + AdditionalProperty: An additional property for a + StragglerCauseCountValue object. + + Fields: + additionalProperties: Additional properties of type + StragglerCauseCountValue + """ + class AdditionalProperty(_messages.Message): + r"""An additional property for a StragglerCauseCountValue object. + + Fields: + key: Name of the additional property. + value: A string attribute. + """ + + key = _messages.StringField(1) + value = _messages.IntegerField(2) + + additionalProperties = _messages.MessageField( + 'AdditionalProperty', 1, repeated=True) + + recentStragglers = _messages.MessageField('Straggler', 1, repeated=True) + stragglerCauseCount = _messages.MessageField('StragglerCauseCountValue', 2) + totalStragglerCount = _messages.IntegerField(3) + + class StreamLocation(_messages.Message): r"""Describes a stream of data, either as input to be processed or as output of a streaming Dataflow job. @@ -5736,6 +6511,8 @@ class StreamingConfigTask(_messages.Message): harness to windmill. maxWorkItemCommitBytes: Maximum size for work item commit supported windmill storage layer. + operationalLimits: Operational limits for the streaming job. Can be used + by the worker to validate outputs sent to the backend. streamingComputationConfigs: Set of computation configuration information. userStepToStateFamilyNameMap: Map from user step names to state families. windmillServiceEndpoint: If present, the worker must use this endpoint to @@ -5775,12 +6552,80 @@ class AdditionalProperty(_messages.Message): commitStreamChunkSizeBytes = _messages.IntegerField(1) getDataStreamChunkSizeBytes = _messages.IntegerField(2) maxWorkItemCommitBytes = _messages.IntegerField(3) + operationalLimits = _messages.MessageField('StreamingOperationalLimits', 4) streamingComputationConfigs = _messages.MessageField( - 'StreamingComputationConfig', 4, repeated=True) + 'StreamingComputationConfig', 5, repeated=True) userStepToStateFamilyNameMap = _messages.MessageField( - 'UserStepToStateFamilyNameMapValue', 5) - windmillServiceEndpoint = _messages.StringField(6) - windmillServicePort = _messages.IntegerField(7) + 'UserStepToStateFamilyNameMapValue', 6) + windmillServiceEndpoint = _messages.StringField(7) + windmillServicePort = _messages.IntegerField(8) + + +class StreamingOperationalLimits(_messages.Message): + r"""Operational limits imposed on streaming jobs by the backend. + + Fields: + maxBagElementBytes: The maximum size for an element in bag state. + maxGlobalDataBytes: The maximum size for an element in global data. + maxKeyBytes: The maximum size allowed for a key. + maxProductionOutputBytes: The maximum size for a single output element. + maxSortedListElementBytes: The maximum size for an element in sorted list + state. + maxSourceStateBytes: The maximum size for a source state update. + maxTagBytes: The maximum size for a state tag. + maxValueBytes: The maximum size for a value state field. + """ + + maxBagElementBytes = _messages.IntegerField(1) + maxGlobalDataBytes = _messages.IntegerField(2) + maxKeyBytes = _messages.IntegerField(3) + maxProductionOutputBytes = _messages.IntegerField(4) + maxSortedListElementBytes = _messages.IntegerField(5) + maxSourceStateBytes = _messages.IntegerField(6) + maxTagBytes = _messages.IntegerField(7) + maxValueBytes = _messages.IntegerField(8) + + +class StreamingScalingReport(_messages.Message): + r"""Contains per-user worker telemetry used in streaming autoscaling. + + Fields: + activeBundleCount: A integer attribute. + activeThreadCount: Current acive thread count. + maximumBundleCount: Maximum bundle count. + maximumBytes: Maximum bytes. + maximumBytesCount: A integer attribute. + maximumThreadCount: Maximum thread count limit. + outstandingBundleCount: Current outstanding bundle count. + outstandingBytes: Current outstanding bytes. + outstandingBytesCount: A integer attribute. + """ + + activeBundleCount = _messages.IntegerField(1, variant=_messages.Variant.INT32) + activeThreadCount = _messages.IntegerField(2, variant=_messages.Variant.INT32) + maximumBundleCount = _messages.IntegerField( + 3, variant=_messages.Variant.INT32) + maximumBytes = _messages.IntegerField(4) + maximumBytesCount = _messages.IntegerField(5, variant=_messages.Variant.INT32) + maximumThreadCount = _messages.IntegerField( + 6, variant=_messages.Variant.INT32) + outstandingBundleCount = _messages.IntegerField( + 7, variant=_messages.Variant.INT32) + outstandingBytes = _messages.IntegerField(8) + outstandingBytesCount = _messages.IntegerField( + 9, variant=_messages.Variant.INT32) + + +class StreamingScalingReportResponse(_messages.Message): + r"""Contains per-user-worker streaming scaling recommendation from the + backend. + + Fields: + maximumThreadCount: Maximum thread count limit; + """ + + maximumThreadCount = _messages.IntegerField( + 1, variant=_messages.Variant.INT32) class StreamingSetupTask(_messages.Message): @@ -5829,6 +6674,26 @@ class StreamingStageLocation(_messages.Message): streamId = _messages.StringField(1) +class StreamingStragglerInfo(_messages.Message): + r"""Information useful for streaming straggler identification and debugging. + + Fields: + dataWatermarkLag: The event-time watermark lag at the time of the + straggler detection. + endTime: End time of this straggler. + startTime: Start time of this straggler. + systemWatermarkLag: The system watermark lag at the time of the straggler + detection. + workerName: Name of the worker where the straggler was detected. + """ + + dataWatermarkLag = _messages.StringField(1) + endTime = _messages.StringField(2) + startTime = _messages.StringField(3) + systemWatermarkLag = _messages.StringField(4) + workerName = _messages.StringField(5) + + class StringList(_messages.Message): r"""A metric value representing a list of strings. @@ -5923,14 +6788,27 @@ class TemplateMetadata(_messages.Message): r"""Metadata describing a template. Fields: + defaultStreamingMode: Optional. Indicates the default streaming mode for a + streaming template. Only valid if both supports_at_least_once and + supports_exactly_once are true. Possible values: UNSPECIFIED, + EXACTLY_ONCE and AT_LEAST_ONCE description: Optional. A description of the template. name: Required. The name of the template. parameters: The parameters for the template. + streaming: Optional. Indicates if the template is streaming or not. + supportsAtLeastOnce: Optional. Indicates if the streaming template + supports at least once mode. + supportsExactlyOnce: Optional. Indicates if the streaming template + supports exactly once mode. """ - description = _messages.StringField(1) - name = _messages.StringField(2) - parameters = _messages.MessageField('ParameterMetadata', 3, repeated=True) + defaultStreamingMode = _messages.StringField(1) + description = _messages.StringField(2) + name = _messages.StringField(3) + parameters = _messages.MessageField('ParameterMetadata', 4, repeated=True) + streaming = _messages.BooleanField(5) + supportsAtLeastOnce = _messages.BooleanField(6) + supportsExactlyOnce = _messages.BooleanField(7) class TopologyConfig(_messages.Message): @@ -6036,19 +6914,6 @@ class KindValueValuesEnum(_messages.Enum): outputCollectionName = _messages.StringField(6, repeated=True) -class ValidateResponse(_messages.Message): - r"""Response to the validation request. - - Fields: - errorMessage: Will be empty if validation succeeds. - queryInfo: Information about the validated query. Not defined if - validation fails. - """ - - errorMessage = _messages.StringField(1) - queryInfo = _messages.MessageField('QueryInfo', 2) - - class WorkItem(_messages.Message): r"""WorkItem represents basic information about a WorkItem to be executed in the cloud. @@ -6110,6 +6975,7 @@ class WorkItemDetails(_messages.Message): progress: Progress of this work item. startTime: Start time of this work item attempt. state: State of this work item. + stragglerInfo: Information about straggler detections for this work item. taskId: Name of this work item. """ class StateValueValuesEnum(_messages.Enum): @@ -6136,7 +7002,8 @@ class StateValueValuesEnum(_messages.Enum): progress = _messages.MessageField('ProgressTimeseries', 4) startTime = _messages.StringField(5) state = _messages.EnumField('StateValueValuesEnum', 6) - taskId = _messages.StringField(7) + stragglerInfo = _messages.MessageField('StragglerInfo', 7) + taskId = _messages.StringField(8) class WorkItemServiceState(_messages.Message): @@ -6459,6 +7326,8 @@ class WorkerMessage(_messages.Message): not be used here. Fields: + dataSamplingReport: Optional. Contains metrics related to go/dataflow- + data-sampling-telemetry. labels: Labels are used to group WorkerMessages. For example, a worker_message about a particular container might have the labels: { "JOB_ID": "2015-04-22", "WORKER_ID": "wordcount-vm-2015..." @@ -6466,12 +7335,16 @@ class WorkerMessage(_messages.Message): typically correspond to Label enum values. However, for ease of development other strings can be used as tags. LABEL_UNSPECIFIED should not be used here. + perWorkerMetrics: System defined metrics for this worker. + streamingScalingReport: Contains per-user worker telemetry used in + streaming autoscaling. time: The timestamp of the worker_message. workerHealthReport: The health of a worker. workerLifecycleEvent: Record of worker lifecycle events. workerMessageCode: A worker message code. workerMetrics: Resource metrics reported by workers. workerShutdownNotice: Shutdown notice by workers. + workerThreadScalingReport: Thread scaling information reported by workers. """ @encoding.MapUnrecognizedFields('additionalProperties') class LabelsValue(_messages.Message): @@ -6502,13 +7375,18 @@ class AdditionalProperty(_messages.Message): additionalProperties = _messages.MessageField( 'AdditionalProperty', 1, repeated=True) - labels = _messages.MessageField('LabelsValue', 1) - time = _messages.StringField(2) - workerHealthReport = _messages.MessageField('WorkerHealthReport', 3) - workerLifecycleEvent = _messages.MessageField('WorkerLifecycleEvent', 4) - workerMessageCode = _messages.MessageField('WorkerMessageCode', 5) - workerMetrics = _messages.MessageField('ResourceUtilizationReport', 6) - workerShutdownNotice = _messages.MessageField('WorkerShutdownNotice', 7) + dataSamplingReport = _messages.MessageField('DataSamplingReport', 1) + labels = _messages.MessageField('LabelsValue', 2) + perWorkerMetrics = _messages.MessageField('PerWorkerMetrics', 3) + streamingScalingReport = _messages.MessageField('StreamingScalingReport', 4) + time = _messages.StringField(5) + workerHealthReport = _messages.MessageField('WorkerHealthReport', 6) + workerLifecycleEvent = _messages.MessageField('WorkerLifecycleEvent', 7) + workerMessageCode = _messages.MessageField('WorkerMessageCode', 8) + workerMetrics = _messages.MessageField('ResourceUtilizationReport', 9) + workerShutdownNotice = _messages.MessageField('WorkerShutdownNotice', 10) + workerThreadScalingReport = _messages.MessageField( + 'WorkerThreadScalingReport', 11) class WorkerMessageCode(_messages.Message): @@ -6600,20 +7478,28 @@ class WorkerMessageResponse(_messages.Message): sender. Fields: + streamingScalingReportResponse: Service's streaming scaling response for + workers. workerHealthReportResponse: The service's response to a worker's health report. workerMetricsResponse: Service's response to reporting worker metrics (currently empty). workerShutdownNoticeResponse: Service's response to shutdown notice (currently empty). + workerThreadScalingReportResponse: Service's thread scaling recommendation + for workers. """ + streamingScalingReportResponse = _messages.MessageField( + 'StreamingScalingReportResponse', 1) workerHealthReportResponse = _messages.MessageField( - 'WorkerHealthReportResponse', 1) + 'WorkerHealthReportResponse', 2) workerMetricsResponse = _messages.MessageField( - 'ResourceUtilizationReportResponse', 2) + 'ResourceUtilizationReportResponse', 3) workerShutdownNoticeResponse = _messages.MessageField( - 'WorkerShutdownNoticeResponse', 3) + 'WorkerShutdownNoticeResponse', 4) + workerThreadScalingReportResponse = _messages.MessageField( + 'WorkerThreadScalingReportResponse', 5) class WorkerPool(_messages.Message): @@ -6884,6 +7770,29 @@ class WorkerShutdownNoticeResponse(_messages.Message): r"""Service-side response to WorkerMessage issuing shutdown notice.""" +class WorkerThreadScalingReport(_messages.Message): + r"""Contains information about the thread scaling information of a worker. + + Fields: + currentThreadCount: Current number of active threads in a worker. + """ + + currentThreadCount = _messages.IntegerField( + 1, variant=_messages.Variant.INT32) + + +class WorkerThreadScalingReportResponse(_messages.Message): + r"""Contains the thread scaling recommendation for a worker from the + backend. + + Fields: + recommendedThreadCount: Recommended number of threads for a worker. + """ + + recommendedThreadCount = _messages.IntegerField( + 1, variant=_messages.Variant.INT32) + + class WriteInstruction(_messages.Message): r"""An instruction that writes records. Takes one input, produces no outputs.