From de33204bd67bc897c3a19b709becd0b9473bd907 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 30 May 2025 11:16:38 +0200 Subject: [PATCH 01/29] chore(deps): update all dependencies (#2205) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 4a5b75346..a512dbd3a 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.33.0 +google-cloud-bigquery==3.34.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7a0946fae..049e88237 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,8 +12,8 @@ geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 google-auth==2.40.2 -google-cloud-bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google-cloud-bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 7d0c91e3d..960eb6db4 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.0 db-dtypes==1.4.3 -google.cloud.bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google.cloud.bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 9f131e5b8..27eb7459a 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ bigquery-magics==0.10.0 db-dtypes==1.4.3 -google-cloud-bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google-cloud-bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.2.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index dae43eff3..fd8bd672b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.33.0 +google-cloud-bigquery==3.34.0 From b86329188ba35e61871db82ae1d95d2a576eed1b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 30 May 2025 12:36:07 -0400 Subject: [PATCH 02/29] Fix: Update type hints for various BigQuery files (#2206) * Fix: Update type hints for various BigQuery files This commit addresses Issue #2132 by updating type hints in the following files: - google/cloud/bigquery/external_config.py - google/cloud/bigquery/job/base.py - google/cloud/bigquery/routine/routine.py - google/cloud/bigquery/schema.py - google/cloud/bigquery/table.py These changes improve code clarity and maintainability by providing more accurate type information. * updates type hints across the board --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/external_config.py | 9 +++++---- google/cloud/bigquery/job/base.py | 4 +--- google/cloud/bigquery/routine/routine.py | 12 +++--------- google/cloud/bigquery/schema.py | 6 ++---- google/cloud/bigquery/table.py | 11 ++++++----- 5 files changed, 17 insertions(+), 25 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 6e943adf3..cb8141cd0 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -22,6 +22,7 @@ import base64 import copy +import typing from typing import Any, Dict, FrozenSet, Iterable, Optional, Union from google.cloud.bigquery._helpers import _to_bytes @@ -835,10 +836,10 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema """ - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - prop = self._properties.get("schema", {}) # type: ignore - return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] # type: ignore + prop: Dict[str, Any] = typing.cast( + Dict[str, Any], self._properties.get("schema", {}) + ) + return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] @schema.setter def schema(self, value): diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 5eb700ce7..f007b9341 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -435,9 +435,7 @@ def __init__(self, job_id, client): @property def configuration(self) -> _JobConfig: """Job-type specific configurtion.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - configuration = self._CONFIG_CLASS() # pytype: disable=not-callable + configuration: _JobConfig = self._CONFIG_CLASS() # pytype: disable=not-callable configuration._properties = self._properties.setdefault("configuration", {}) return configuration diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 7e079781d..e933fa137 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -518,23 +518,17 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["projectId"] # pytype: disable=typed-dict-error + return self._properties.get("projectId", "") @property def dataset_id(self): """str: ID of dataset containing the routine.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["datasetId"] # pytype: disable=typed-dict-error + return self._properties.get("datasetId", "") @property def routine_id(self): """str: The routine ID.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["routineId"] # pytype: disable=typed-dict-error + return self._properties.get("routineId", "") @property def path(self): diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 1f1aab7a4..456730b00 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -232,11 +232,9 @@ def __init__( if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length if policy_tags is not _DEFAULT_VALUE: - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 self._properties["policyTags"] = ( - policy_tags.to_api_repr() # pytype: disable=attribute-error - if policy_tags is not None + policy_tags.to_api_repr() + if isinstance(policy_tags, PolicyTagList) else None ) if isinstance(range_element_type, str): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 3b1334bd3..3ffd5ca56 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -137,9 +137,9 @@ def _reference_getter(table): return TableReference(dataset_ref, table.table_id) -# TODO: The typehinting for this needs work. Setting this pragma to temporarily -# manage a pytype issue that came up in another PR. See Issue: #2132 -def _view_use_legacy_sql_getter(table): +def _view_use_legacy_sql_getter( + table: Union["Table", "TableListItem"] +) -> Optional[bool]: """bool: Specifies whether to execute the view with Legacy or Standard SQL. This boolean specifies whether to execute the view with Legacy SQL @@ -151,15 +151,16 @@ def _view_use_legacy_sql_getter(table): ValueError: For invalid value types. """ - view = table._properties.get("view") # type: ignore + view: Optional[Dict[str, Any]] = table._properties.get("view") if view is not None: # The server-side default for useLegacySql is True. - return view.get("useLegacySql", True) # type: ignore + return view.get("useLegacySql", True) if view is not None else True # In some cases, such as in a table list no view object is present, but the # resource still represents a view. Use the type as a fallback. if table.table_type == "VIEW": # The server-side default for useLegacySql is True. return True + return None # explicit return statement to appease mypy class _TableBase: From eb9c2aff242c5107f968bbd8b6a9d30cecc877f6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 30 May 2025 17:58:02 -0400 Subject: [PATCH 03/29] feat: Add UpdateMode to update_dataset (#2204) * feat: Add UpdateMode to update_dataset This commit introduces the `UpdateMode` enum and integrates it into the `update_dataset` method in the BigQuery client. The `UpdateMode` enum allows you to specify which parts of a dataset should be updated (metadata, ACL, or full update). The following changes were made: - Defined the `UpdateMode` enum in `google/cloud/bigquery/enums.py` with values: `UPDATE_MODE_UNSPECIFIED`, `UPDATE_METADATA`, `UPDATE_ACL`, and `UPDATE_FULL`. - Modified the `update_dataset` method in `google/cloud/bigquery/client.py` to accept an optional `update_mode` parameter. This parameter is added to the query parameters if provided. - Added unit tests in `tests/unit/test_client.py` to verify the correct handling of the `update_mode` parameter, including testing all enum values and the default case where it's not provided. * updates enums, client, and tests --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/client.py | 22 ++++++++ google/cloud/bigquery/enums.py | 18 +++++++ tests/unit/test_client.py | 93 ++++++++++++++++++++++++++++++++- 3 files changed, 132 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index c6873545b..cc3b3eb2a 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -91,6 +91,7 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.enums import UpdateMode from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, @@ -1198,6 +1199,7 @@ def update_dataset( fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + update_mode: Optional[UpdateMode] = None, ) -> Dataset: """Change some fields of a dataset. @@ -1237,6 +1239,20 @@ def update_dataset( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + update_mode (Optional[google.cloud.bigquery.enums.UpdateMode]): + Specifies the kind of information to update in a dataset. + By default, dataset metadata (e.g. friendlyName, description, + labels, etc) and ACL information are updated. This argument can + take on the following possible enum values. + + * :attr:`~google.cloud.bigquery.enums.UPDATE_MODE_UNSPECIFIED`: + The default value. Behavior defaults to UPDATE_FULL. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_METADATA`: + Includes metadata information for the dataset, such as friendlyName, description, labels, etc. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_ACL`: + Includes ACL information for the dataset, which defines dataset access for one or more entities. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_FULL`: + Includes both dataset metadata and ACL information. Returns: google.cloud.bigquery.dataset.Dataset: @@ -1250,6 +1266,11 @@ def update_dataset( path = dataset.path span_attributes = {"path": path, "fields": fields} + if update_mode: + query_params = {"updateMode": update_mode.value} + else: + query_params = {} + api_response = self._call_api( retry, span_name="BigQuery.updateDataset", @@ -1259,6 +1280,7 @@ def update_dataset( data=partial, headers=headers, timeout=timeout, + query_params=query_params, ) return Dataset.from_api_repr(api_response) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 4cb7a056d..e9cd911d0 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -409,6 +409,24 @@ class BigLakeTableFormat(object): """Apache Iceberg format.""" +class UpdateMode(enum.Enum): + """Specifies the kind of information to update in a dataset.""" + + UPDATE_MODE_UNSPECIFIED = "UPDATE_MODE_UNSPECIFIED" + """The default value. Behavior defaults to UPDATE_FULL.""" + + UPDATE_METADATA = "UPDATE_METADATA" + """Includes metadata information for the dataset, such as friendlyName, + description, labels, etc.""" + + UPDATE_ACL = "UPDATE_ACL" + """Includes ACL information for the dataset, which defines dataset access + for one or more entities.""" + + UPDATE_FULL = "UPDATE_FULL" + """Includes both dataset metadata and ACL information.""" + + class JobCreationMode(object): """Documented values for Job Creation Mode.""" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8ce8d2cbd..ed092bcdb 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -60,7 +60,8 @@ from google.cloud.bigquery import job as bqjob import google.cloud.bigquery._job_helpers -from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.dataset import DatasetReference, Dataset +from google.cloud.bigquery.enums import UpdateMode from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -2101,6 +2102,7 @@ def test_update_dataset(self): }, path="/" + PATH, timeout=7.5, + query_params={}, ) self.assertEqual(ds2.description, ds.description) self.assertEqual(ds2.friendly_name, ds.friendly_name) @@ -2114,6 +2116,94 @@ def test_update_dataset(self): client.update_dataset(ds, []) req = conn.api_request.call_args self.assertEqual(req[1]["headers"]["If-Match"], "etag") + self.assertEqual(req[1].get("query_params"), {}) + + def test_update_dataset_w_update_mode(self): + PATH = f"projects/{self.PROJECT}/datasets/{self.DS_ID}" + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + DESCRIPTION = "DESCRIPTION" + RESOURCE = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "description": DESCRIPTION, + } + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + orig_dataset = Dataset(dataset_ref) + orig_dataset.description = DESCRIPTION + filter_fields = ["description"] + + test_cases = [ + (None, None), + (UpdateMode.UPDATE_MODE_UNSPECIFIED, "UPDATE_MODE_UNSPECIFIED"), + (UpdateMode.UPDATE_METADATA, "UPDATE_METADATA"), + (UpdateMode.UPDATE_ACL, "UPDATE_ACL"), + (UpdateMode.UPDATE_FULL, "UPDATE_FULL"), + ] + + for update_mode_arg, expected_param_value in test_cases: + with self.subTest( + update_mode_arg=update_mode_arg, + expected_param_value=expected_param_value, + ): + conn = client._connection = make_connection(RESOURCE, RESOURCE) + + new_dataset = client.update_dataset( + orig_dataset, + fields=filter_fields, + update_mode=update_mode_arg, + ) + self.assertEqual(orig_dataset.description, new_dataset.description) + + if expected_param_value: + expected_query_params = {"updateMode": expected_param_value} + else: + expected_query_params = {} + + conn.api_request.assert_called_once_with( + method="PATCH", + path="/" + PATH, + data={"description": DESCRIPTION}, + timeout=DEFAULT_TIMEOUT, + query_params=expected_query_params if expected_query_params else {}, + ) + + def test_update_dataset_w_invalid_update_mode(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + DESCRIPTION = "DESCRIPTION" + resource = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + } + + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + orig_dataset = Dataset(dataset_ref) + orig_dataset.description = DESCRIPTION + filter_fields = ["description"] # A non-empty list of fields is required + + # Mock the connection to prevent actual API calls + # and to provide a minimal valid response if the call were to proceed. + conn = client._connection = make_connection(resource) + + test_cases = [ + "INVALID_STRING", + 123, + 123.45, + object(), + ] + + for invalid_update_mode in test_cases: + with self.subTest(invalid_update_mode=invalid_update_mode): + conn.api_request.reset_mock() # Reset mock for each sub-test + with self.assertRaises(AttributeError): + client.update_dataset( + orig_dataset, + fields=filter_fields, + update_mode=invalid_update_mode, + ) def test_update_dataset_w_custom_property(self): # The library should handle sending properties to the API that are not @@ -2145,6 +2235,7 @@ def test_update_dataset_w_custom_property(self): data={"newAlphaProperty": "unreleased property"}, path=path, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual(dataset.dataset_id, self.DS_ID) From 28a5750d455f0381548df6f9b1f7661823837d81 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 2 Jun 2025 05:42:08 -0400 Subject: [PATCH 04/29] feat: Adds dataset_view parameter to get_dataset method (#2198) * feat: Add dataset_view parameter to get_dataset method This commit introduces a new `dataset_view` parameter to the `get_dataset` method in the BigQuery client. This allows you to specify the level of detail (METADATA, ACL, FULL) returned when fetching a dataset. The `DatasetView` enum has been added to `enums.py`. Unit tests have been added to verify: - Correct query parameter (`view`) formation for each enum value. - Correct behavior when `dataset_view` is None. - AttributeError is raised for invalid `dataset_view` types. * test edits, linting, etc. * Fixes docstring * updates docstrings * update parameter name to align with discovery doc * Update google/cloud/bigquery/client.py --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/client.py | 26 +++++++++++- google/cloud/bigquery/enums.py | 18 ++++++++ tests/unit/test_client.py | 70 ++++++++++++++++++++++++++++++- tests/unit/test_create_dataset.py | 7 +++- 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cc3b3eb2a..bb4d80c73 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -90,8 +90,8 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.enums import UpdateMode + +from google.cloud.bigquery.enums import AutoRowIDs, DatasetView, UpdateMode from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, @@ -865,6 +865,7 @@ def get_dataset( dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + dataset_view: Optional[DatasetView] = None, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -882,7 +883,21 @@ def get_dataset( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + dataset_view (Optional[google.cloud.bigquery.enums.DatasetView]): + Specifies the view that determines which dataset information is + returned. By default, dataset metadata (e.g. friendlyName, description, + labels, etc) and ACL information are returned. This argument can + take on the following possible enum values. + * :attr:`~google.cloud.bigquery.enums.DatasetView.ACL`: + Includes dataset metadata and the ACL. + * :attr:`~google.cloud.bigquery.enums.DatasetView.FULL`: + Includes all dataset metadata, including the ACL and table metadata. + This view is not supported by the `datasets.list` API method. + * :attr:`~google.cloud.bigquery.enums.DatasetView.METADATA`: + Includes basic dataset metadata, but not the ACL. + * :attr:`~google.cloud.bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED`: + The server will decide which view to use. Currently defaults to FULL. Returns: google.cloud.bigquery.dataset.Dataset: A ``Dataset`` instance. @@ -892,6 +907,12 @@ def get_dataset( dataset_ref, default_project=self.project ) path = dataset_ref.path + + if dataset_view: + query_params = {"datasetView": dataset_view.value} + else: + query_params = {} + span_attributes = {"path": path} api_response = self._call_api( retry, @@ -900,6 +921,7 @@ def get_dataset( method="GET", path=path, timeout=timeout, + query_params=query_params, ) return Dataset.from_api_repr(api_response) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index e9cd911d0..9a1e4880c 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -80,6 +80,24 @@ class CreateDisposition(object): returned in the job result.""" +class DatasetView(enum.Enum): + """DatasetView specifies which dataset information is returned.""" + + DATASET_VIEW_UNSPECIFIED = "DATASET_VIEW_UNSPECIFIED" + """The default value. Currently maps to the FULL view.""" + + METADATA = "METADATA" + """View metadata information for the dataset, such as friendlyName, + description, labels, etc.""" + + ACL = "ACL" + """View ACL information for the dataset, which defines dataset access + for one or more entities.""" + + FULL = "FULL" + """View both dataset metadata and ACL information.""" + + class DefaultPandasDTypes(enum.Enum): """Default Pandas DataFrem DTypes to convert BigQuery data. These Sentinel values are used instead of None to maintain backward compatibility, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ed092bcdb..42bfc84b9 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -61,7 +61,7 @@ from google.cloud.bigquery import job as bqjob import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference, Dataset -from google.cloud.bigquery.enums import UpdateMode +from google.cloud.bigquery.enums import UpdateMode, DatasetView from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -753,7 +753,7 @@ def test_get_dataset(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 + method="GET", path="/%s" % path, timeout=7.5, query_params={} ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -819,6 +819,72 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) + def test_get_dataset_with_dataset_view(self): + path = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + test_cases = [ + (None, None), + (DatasetView.DATASET_VIEW_UNSPECIFIED, "DATASET_VIEW_UNSPECIFIED"), + (DatasetView.METADATA, "METADATA"), + (DatasetView.ACL, "ACL"), + (DatasetView.FULL, "FULL"), + ] + + for dataset_view_arg, expected_param_value in test_cases: + with self.subTest( + dataset_view_arg=dataset_view_arg, + expected_param_value=expected_param_value, + ): + # Re-initialize the connection mock for each sub-test to reset side_effect + conn = client._connection = make_connection(resource) + + dataset = client.get_dataset(dataset_ref, dataset_view=dataset_view_arg) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + + if expected_param_value: + expected_query_params = {"datasetView": expected_param_value} + else: + expected_query_params = {} + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % path, + timeout=DEFAULT_TIMEOUT, + query_params=expected_query_params if expected_query_params else {}, + ) + + def test_get_dataset_with_invalid_dataset_view(self): + invalid_view_values = [ + "INVALID_STRING", + 123, + 123.45, + object(), + ] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + conn = client._connection = make_connection(resource) + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + for invalid_view_value in invalid_view_values: + with self.subTest(invalid_view_value=invalid_view_value): + conn.api_request.reset_mock() # Reset mock for each sub-test + with self.assertRaises(AttributeError): + client.get_dataset(dataset_ref, dataset_view=invalid_view_value) + def test_ensure_bqstorage_client_creating_new_instance(self): bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index bd7c6a8f8..b144471ca 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -372,7 +372,12 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) }, timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), + mock.call( + method="GET", + path=get_path, + timeout=DEFAULT_TIMEOUT, + query_params={}, + ), ] ) From 0378caa0fdaeec23929b179ca62a7199a8a6098d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 5 Jun 2025 00:19:15 +0200 Subject: [PATCH 05/29] chore(deps): update all dependencies (#2209) * chore(deps): update all dependencies * pin geopandas===1.0.1 for python <= 3.9 --------- Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 9 +++++---- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 7 files changed, 11 insertions(+), 10 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 2ad35b418..4b9c515a7 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 3ca365401..824a1df4a 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 049e88237..5ff1c0c02 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -9,8 +9,9 @@ cligj==0.7.2 db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 -geopandas==1.0.1 -google-api-core==2.24.2 +geopandas===1.0.1; python_version <= '3.9' +geopandas==1.1.0; python_version >= '3.10' +google-api-core==2.25.0 google-auth==2.40.2 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 @@ -18,7 +19,7 @@ google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.71.0 +grpcio==1.72.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 @@ -38,6 +39,6 @@ rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.2 +typing-extensions==4.14.0 typing-inspect==0.9.0 urllib3==2.4.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 2ad35b418..4b9c515a7 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 2ad35b418..4b9c515a7 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 27eb7459a..c3feffb35 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -3,7 +3,7 @@ db-dtypes==1.4.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.2.0; python_version >= '3.10' +ipython==9.3.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' pandas==2.2.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 767f71fb1..d311187ec 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 From 45643a2e20ce5d503118522dd195aeca00dec3bc Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 5 Jun 2025 11:01:02 -0700 Subject: [PATCH 06/29] fix: fix rows returned when both start_index and page_size are provided (#2181) * fix: fix total rows returned when both start_index and page_size are provided * use shallow copy and add comments * add docstring * add unit test * lint * add comment --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/client.py | 9 +++- google/cloud/bigquery/job/query.py | 8 ++++ google/cloud/bigquery/table.py | 11 ++++- tests/unit/job/test_query.py | 72 ++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index bb4d80c73..811e9ef03 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2045,6 +2045,7 @@ def _get_query_results( location: Optional[str] = None, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = 0, + start_index: Optional[int] = None, ) -> _QueryResults: """Get the query results object for a query job. @@ -2063,9 +2064,12 @@ def _get_query_results( before using ``retry``. If set, this connection timeout may be increased to a minimum value. This prevents retries on what would otherwise be a successful response. - page_size (int): + page_size (Optional[int]): Maximum number of rows in a single response. See maxResults in the jobs.getQueryResults REST API. + start_index (Optional[int]): + Zero-based index of the starting row. See startIndex in the + jobs.getQueryResults REST API. Returns: google.cloud.bigquery.query._QueryResults: @@ -2095,6 +2099,9 @@ def _get_query_results( if location is not None: extra_params["location"] = location + if start_index is not None: + extra_params["startIndex"] = start_index + path = "/projects/{}/queries/{}".format(project, job_id) # This call is typically made in a polling loop that checks whether the diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 954a46963..4d95f0e71 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1409,6 +1409,7 @@ def _reload_query_results( retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None, page_size: int = 0, + start_index: Optional[int] = None, ): """Refresh the cached query results unless already cached and complete. @@ -1421,6 +1422,9 @@ def _reload_query_results( page_size (int): Maximum number of rows in a single response. See maxResults in the jobs.getQueryResults REST API. + start_index (Optional[int]): + Zero-based index of the starting row. See startIndex in the + jobs.getQueryResults REST API. """ # Optimization: avoid a call to jobs.getQueryResults if it's already # been fetched, e.g. from jobs.query first page of results. @@ -1468,6 +1472,7 @@ def _reload_query_results( location=self.location, timeout=transport_timeout, page_size=page_size, + start_index=start_index, ) def result( # type: ignore # (incompatible with supertype) @@ -1570,6 +1575,9 @@ def result( # type: ignore # (incompatible with supertype) if page_size is not None: reload_query_results_kwargs["page_size"] = page_size + if start_index is not None: + reload_query_results_kwargs["start_index"] = start_index + try: retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 3ffd5ca56..861f806b4 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1987,12 +1987,19 @@ def _get_next_page_response(self): return response params = self._get_query_params() + + # If the user has provided page_size and start_index, we need to pass + # start_index for the first page, but for all subsequent pages, we + # should not pass start_index. We make a shallow copy of params and do + # not alter the original, so if the user iterates the results again, + # start_index is preserved. + params_copy = copy.copy(params) if self._page_size is not None: if self.page_number and "startIndex" in params: - del params["startIndex"] + del params_copy["startIndex"] return self.api_request( - method=self._HTTP_METHOD, path=self.path, query_params=params + method=self._HTTP_METHOD, path=self.path, query_params=params_copy ) @property diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 1df65279d..46b802aa3 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -1682,6 +1682,78 @@ def test_result_with_start_index(self): tabledata_list_request[1]["query_params"]["maxResults"], page_size ) + def test_result_with_start_index_multi_page(self): + # When there are multiple pages of response and the user has set + # start_index, we should supply start_index to the server in the first + # request. However, in the subsequent requests, we will pass only + # page_token but not start_index, because the server only allows one + # of them. + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "7", + } + + # Although the result has 7 rows, the response only returns 6, because + # start_index is 1. + tabledata_resource_1 = { + "totalRows": "7", + "pageToken": "page_token_1", + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + ], + } + tabledata_resource_2 = { + "totalRows": "7", + "pageToken": None, + "rows": [ + {"f": [{"v": "jkl"}]}, + {"f": [{"v": "mno"}]}, + {"f": [{"v": "pqe"}]}, + ], + } + + connection = make_connection( + query_resource, tabledata_resource_1, tabledata_resource_2 + ) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + start_index = 1 + page_size = 3 + + result = job.result(page_size=page_size, start_index=start_index) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 7) + + rows = list(result) + + self.assertEqual(len(rows), 6) + self.assertEqual(len(connection.api_request.call_args_list), 3) + + # First call has both startIndex and maxResults. + tabledata_list_request_1 = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request_1[1]["query_params"]["startIndex"], start_index + ) + self.assertEqual( + tabledata_list_request_1[1]["query_params"]["maxResults"], page_size + ) + + # Second call only has maxResults. + tabledata_list_request_2 = connection.api_request.call_args_list[2] + self.assertFalse("startIndex" in tabledata_list_request_2[1]["query_params"]) + self.assertEqual( + tabledata_list_request_2[1]["query_params"]["maxResults"], page_size + ) + def test_result_error(self): from google.cloud import exceptions From bd5aba8ba40c2f35fb672a68eed11d6baedb304f Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 5 Jun 2025 12:01:06 -0700 Subject: [PATCH 07/29] docs: Improve clarity of "Output Only" fields in Dataset class (#2201) fixes b/407210727 --- google/cloud/bigquery/dataset.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index f788275cd..ec4098511 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -574,6 +574,10 @@ class Dataset(object): A pointer to a dataset. If ``dataset_ref`` is a string, it must include both the project ID and the dataset ID, separated by ``.``. + + Note: + Fields marked as "Output Only" are populated by the server and will only be + available after calling :meth:`google.cloud.bigquery.client.Client.get_dataset`. """ _PROPERTY_TO_API_FIELD = { @@ -692,7 +696,7 @@ def access_entries(self, value): @property def created(self): - """Union[datetime.datetime, None]: Datetime at which the dataset was + """Union[datetime.datetime, None]: Output only. Datetime at which the dataset was created (:data:`None` until set from the server). """ creation_time = self._properties.get("creationTime") @@ -709,8 +713,8 @@ def dataset_id(self): @property def full_dataset_id(self): - """Union[str, None]: ID for the dataset resource (:data:`None` until - set from the server) + """Union[str, None]: Output only. ID for the dataset resource + (:data:`None` until set from the server). In the format ``project_id:dataset_id``. """ @@ -725,14 +729,14 @@ def reference(self): @property def etag(self): - """Union[str, None]: ETag for the dataset resource (:data:`None` until - set from the server). + """Union[str, None]: Output only. ETag for the dataset resource + (:data:`None` until set from the server). """ return self._properties.get("etag") @property def modified(self): - """Union[datetime.datetime, None]: Datetime at which the dataset was + """Union[datetime.datetime, None]: Output only. Datetime at which the dataset was last modified (:data:`None` until set from the server). """ modified_time = self._properties.get("lastModifiedTime") @@ -744,8 +748,8 @@ def modified(self): @property def self_link(self): - """Union[str, None]: URL for the dataset resource (:data:`None` until - set from the server). + """Union[str, None]: Output only. URL for the dataset resource + (:data:`None` until set from the server). """ return self._properties.get("selfLink") From 99493bfb0d6230b9a04583d2b9dc40bc84ffdc49 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 9 Jun 2025 12:31:37 -0400 Subject: [PATCH 08/29] test: remove pragma (#2212) * test: remove pragma * test: remove comment about pragma * updates to conditionals related to pandas 2.0+ tests --- google/cloud/bigquery/_pandas_helpers.py | 9 ++------- tests/unit/job/test_query_pandas.py | 12 +++++------- tests/unit/test_table_pandas.py | 14 ++++++-------- 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 10a5c59bb..2dab03a06 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -337,13 +337,8 @@ def types_mapper(arrow_data_type): ): return range_date_dtype - # TODO: this section does not have a test yet OR at least not one that is - # recognized by coverage, hence the pragma. See Issue: #2132 - elif ( - range_timestamp_dtype is not None - and arrow_data_type.equals( # pragma: NO COVER - range_timestamp_dtype.pyarrow_dtype - ) + elif range_timestamp_dtype is not None and arrow_data_type.equals( + range_timestamp_dtype.pyarrow_dtype ): return range_timestamp_dtype diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index d82f0dfe3..a6c59b158 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -647,12 +647,6 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) -# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 -# pragma added due to issues with coverage. -@pytest.mark.skipif( - pandas.__version__.startswith("2."), - reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", -) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class @@ -704,7 +698,6 @@ def test_to_dataframe_column_dtypes(): exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] assert list(df) == exp_columns # verify the column names - assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" assert df.seconds.dtype.name == "Int64" assert df.miles.dtype.name == "float64" assert df.km.dtype.name == "float16" @@ -712,6 +705,11 @@ def test_to_dataframe_column_dtypes(): assert df.complete.dtype.name == "boolean" assert df.date.dtype.name == "dbdate" + if pandas.__version__.startswith("2."): + assert df.start_timestamp.dtype.name == "datetime64[us, UTC]" + else: + assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" + def test_to_dataframe_column_date_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 43d64d77d..a4fa3fa39 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -34,12 +34,6 @@ def class_under_test(): return RowIterator -# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 -# pragma added due to issues with coverage. -@pytest.mark.skipif( - pandas.__version__.startswith("2."), - reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", -) def test_to_dataframe_nullable_scalars( monkeypatch, class_under_test ): # pragma: NO COVER @@ -113,14 +107,18 @@ def test_to_dataframe_nullable_scalars( assert df.dtypes["bool_col"].name == "boolean" assert df.dtypes["bytes_col"].name == "object" assert df.dtypes["date_col"].name == "dbdate" - assert df.dtypes["datetime_col"].name == "datetime64[ns]" assert df.dtypes["float64_col"].name == "float64" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["numeric_col"].name == "object" assert df.dtypes["string_col"].name == "object" assert df.dtypes["time_col"].name == "dbtime" - assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" assert df.dtypes["json_col"].name == "object" + if pandas.__version__.startswith("2."): + assert df.dtypes["datetime_col"].name == "datetime64[us]" + assert df.dtypes["timestamp_col"].name == "datetime64[us, UTC]" + else: + assert df.dtypes["datetime_col"].name == "datetime64[ns]" + assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" # Check for expected values. assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415") From dc374b4e22de98850c54643a58bb9e80f865dcf7 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jun 2025 18:58:40 +0200 Subject: [PATCH 09/29] chore(deps): update all dependencies (#2211) Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5ff1c0c02..4ebff482d 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,19 +12,19 @@ geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.0; python_version >= '3.10' google-api-core==2.25.0 -google-auth==2.40.2 +google-auth==2.40.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.72.1 +grpcio==1.73.0 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.2.3 +pandas==2.3.0 proto-plus==1.26.1 pyarrow==20.0.0 pyasn1==0.6.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 960eb6db4..d2456fc5a 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -3,4 +3,4 @@ db-dtypes==1.4.3 google.cloud.bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.2.3 +pandas==2.3.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index c3feffb35..66409e49d 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -6,4 +6,4 @@ ipython===8.18.1; python_version == '3.9' ipython==9.3.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' -pandas==2.2.3 +pandas==2.3.0 From 5a0fbf5cca551626d3cb49f934369049450546c1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jun 2025 20:53:46 +0200 Subject: [PATCH 10/29] chore(deps): update dependency requests to v2.32.4 (#2213) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 4ebff482d..75a196eeb 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -34,7 +34,7 @@ pyparsing==3.2.3 python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 -requests==2.32.3 +requests==2.32.4 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' From 96b067da092836f0d8e19d5df683a0e5680caee8 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 13 Jun 2025 19:49:19 +0200 Subject: [PATCH 11/29] chore(deps): update dependency google-api-core to v2.25.1 (#2215) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 75a196eeb..daaf67b9e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.0; python_version >= '3.10' -google-api-core==2.25.0 +google-api-core==2.25.1 google-auth==2.40.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 From 4fbb37595f0e148b7912f26ac8e48a996a6cbae6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Jun 2025 14:36:25 -0700 Subject: [PATCH 12/29] chore(deps): bump urllib3 from 2.4.0 to 2.5.0 in /samples/geography (#2220) Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.4.0 to 2.5.0. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.4.0...2.5.0) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.5.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index daaf67b9e..379d682b4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -41,4 +41,4 @@ Shapely==2.1.1; python_version >= '3.10' six==1.17.0 typing-extensions==4.14.0 typing-inspect==0.9.0 -urllib3==2.4.0 +urllib3==2.5.0 From 6e70fe2c4c0bec6d6aeb16ab5a83b01746e8c64a Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Mon, 23 Jun 2025 11:50:33 -0400 Subject: [PATCH 13/29] chore: add label job sample (#2219) * chore: add label job sample * lint * remove unnecessary api call * Apply suggestions from code review Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- samples/snippets/label_job.py | 36 ++++++++++++++++++++++++++++++ samples/snippets/label_job_test.py | 31 +++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 samples/snippets/label_job.py create mode 100644 samples/snippets/label_job_test.py diff --git a/samples/snippets/label_job.py b/samples/snippets/label_job.py new file mode 100644 index 000000000..cfd06d189 --- /dev/null +++ b/samples/snippets/label_job.py @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def label_job() -> None: + # [START bigquery_label_job] + from google.cloud import bigquery + + client = bigquery.Client() + + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + labels = {"color": "green"} + + config = bigquery.QueryJobConfig() + config.labels = labels + location = "us" + job = client.query(sql, location=location, job_config=config) + job_id = job.job_id + + print(f"Added {job.labels} to {job_id}.") + # [END bigquery_label_job] diff --git a/samples/snippets/label_job_test.py b/samples/snippets/label_job_test.py new file mode 100644 index 000000000..0780db61a --- /dev/null +++ b/samples/snippets/label_job_test.py @@ -0,0 +1,31 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import label_job # type: ignore + + +if typing.TYPE_CHECKING: + import pytest + + +def test_label_job( + capsys: "pytest.CaptureFixture[str]", +) -> None: + label_job.label_job() + + out, _ = capsys.readouterr() + assert "color" in out + assert "green" in out From cd2e1387c98e9df74ec85b1f3a3aba371d9ad7b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 23 Jun 2025 20:32:50 +0200 Subject: [PATCH 14/29] chore: update PyPI URL for official nightly pyarrow repository (#2223) Co-authored-by: Lingqing Gan --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 6807b7ee4..eb79c238d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -408,10 +408,10 @@ def prerelease_deps(session): ) # PyArrow prerelease packages are published to an alternative PyPI host. - # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages + # https://arrow.apache.org/docs/developers/python.html#installing-nightly-packages session.install( "--extra-index-url", - "https://pypi.fury.io/arrow-nightlies/", + "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple", "--prefer-binary", "--pre", "--upgrade", From 7c9e7fde1d710641c27247fa5f5271c86a9be2b1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 25 Jun 2025 19:08:09 +0200 Subject: [PATCH 15/29] chore(deps): update all dependencies (#2216) Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 2 +- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 4b9c515a7..b3046227c 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 824a1df4a..ee895a4f4 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 379d682b4..f8f79a970 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.4.26 +certifi==2025.6.15 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 4b9c515a7..b3046227c 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 4b9c515a7..b3046227c 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index d311187ec..d71018b3f 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 From 4941de441cb32cabeb55ec0320f305fb62551155 Mon Sep 17 00:00:00 2001 From: Prabakar <86585391+drokeye@users.noreply.github.com> Date: Thu, 26 Jun 2025 23:28:53 +0530 Subject: [PATCH 16/29] fix: make AccessEntry equality consistent with from_api_repr (#2218) * fix: make AccessEntry equality consistent for view entity type * fix: make AccessEntry equality consistent for view entity type * fix: use json.dumps() for normalizaiton of entity_id * remove trailing whitespace and add test assertions * revert back to the original code * fix linting in `dataset.py` * fix linting in `test_dataset.py` --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/dataset.py | 17 +++++++- tests/unit/test_dataset.py | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index ec4098511..878b77d41 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -17,6 +17,7 @@ from __future__ import absolute_import import copy +import json import typing from typing import Optional, List, Dict, Any, Union @@ -506,7 +507,20 @@ def entity_id(self) -> Optional[Union[Dict[str, Any], str]]: def __eq__(self, other): if not isinstance(other, AccessEntry): return NotImplemented - return self._key() == other._key() + return ( + self.role == other.role + and self.entity_type == other.entity_type + and self._normalize_entity_id(self.entity_id) + == self._normalize_entity_id(other.entity_id) + and self.condition == other.condition + ) + + @staticmethod + def _normalize_entity_id(value): + """Ensure consistent equality for dicts like 'view'.""" + if isinstance(value, dict): + return json.dumps(value, sort_keys=True) + return value def __ne__(self, other): return not self == other @@ -557,7 +571,6 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": google.cloud.bigquery.dataset.AccessEntry: Access entry parsed from ``resource``. """ - access_entry = cls() access_entry._properties = resource.copy() return access_entry diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 3fd2579af..604e5ed2e 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1767,3 +1767,70 @@ def test__hash__with_minimal_inputs(self): description=None, ) assert hash(cond1) is not None + + def test_access_entry_view_equality(self): + from google.cloud import bigquery + + entry1 = bigquery.dataset.AccessEntry( + entity_type="view", + entity_id={ + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "my_table", + }, + ) + entry2 = bigquery.dataset.AccessEntry.from_api_repr( + { + "view": { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "my_table", + } + } + ) + + entry3 = bigquery.dataset.AccessEntry( + entity_type="routine", + entity_id={ + "projectId": "my_project", + "datasetId": "my_dataset", + "routineId": "my_routine", + }, + ) + + entry4 = bigquery.dataset.AccessEntry.from_api_repr( + { + "routine": { + "projectId": "my_project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + } + ) + + entry5 = bigquery.dataset.AccessEntry( + entity_type="dataset", + entity_id={ + "dataset": { + "projectId": "my_project", + "datasetId": "my_dataset", + }, + "target_types": "VIEWS", + }, + ) + + entry6 = bigquery.dataset.AccessEntry.from_api_repr( + { + "dataset": { + "dataset": { + "projectId": "my_project", + "datasetId": "my_dataset", + }, + "target_types": "VIEWS", + } + } + ) + + assert entry1 == entry2 + assert entry3 == entry4 + assert entry5 == entry6 From 37e4e0ed8e6ffba6584a37131f03cb77b4fcfe64 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 27 Jun 2025 21:00:19 +0200 Subject: [PATCH 17/29] chore(deps): update all dependencies (#2224) --- samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index f8f79a970..68f6c1662 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -4,13 +4,13 @@ cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' click==8.2.1; python_version >= '3.10' -click-plugins==1.1.1 +click-plugins==1.1.1.2 cligj==0.7.2 db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' -geopandas==1.1.0; python_version >= '3.10' +geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 google-cloud-bigquery==3.34.0 @@ -19,7 +19,7 @@ google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.73.0 +grpcio==1.73.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 From 27ff3a89a5f97305fa3ff673aa9183baa7df200f Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 2 Jul 2025 17:00:39 -0400 Subject: [PATCH 18/29] =?UTF-8?q?fix:=20adds=20magics.context.project=20to?= =?UTF-8?q?=20eliminate=20issues=20with=20unit=20tests=20=E2=80=A6=20(#222?= =?UTF-8?q?8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `magics.context.project` to eliminate issues with unit tests in an upcoming PR. Several magics unit tests fail with an error message. If the test does not have knowledge of the project, it attempts to initiate a login sequence to be able to get the project identifier. The login cannot complete because the process is running in an ipython interpreter and pytest does not capture any input. This change provides an explicit reference to a project to avoid that process. ``` Please visit this URL to authorize this application: [REDACTED DUE TO SPACE REASONS] self = <_pytest.capture.DontReadFromInput object at 0x7f55d6821bd0>, size = -1 def read(self, size: int = -1) -> str: > raise OSError( "pytest: reading from stdin while output is captured! Consider using `-s`.") E OSError: pytest: reading from stdin while output is captured! Consider using `-s`. .nox/unit-3-11/lib/python3.11/site-packages/_pytest/capture.py:229: OSError ``` --- tests/unit/test_magics.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 814150693..c79e923f8 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -986,6 +986,7 @@ def test_bigquery_magic_dryrun_option_sets_job_config(monkeypatch): google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) @@ -1007,6 +1008,7 @@ def test_bigquery_magic_dryrun_option_returns_query_job(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1035,6 +1037,7 @@ def test_bigquery_magic_dryrun_option_variable_error_message( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" ipython_ns_cleanup.append((ip, "q_job")) run_query_patch = mock.patch( @@ -1064,6 +1067,7 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable( magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1098,6 +1102,7 @@ def test_bigquery_magic_saves_query_job_to_variable_on_error( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" ipython_ns_cleanup.append((ip, "result")) client_query_patch = mock.patch( From 7ed9fd293ab1181b5b7b97e7e9ec82aade56e7ef Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 7 Jul 2025 22:52:24 +0200 Subject: [PATCH 19/29] chore(deps): update all dependencies (#2226) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Change | Age | Confidence | |---|---|---|---| | [bigquery-magics](https://redirect.github.com/googleapis/python-bigquery-magics) | `==0.10.0` -> `==0.10.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/bigquery-magics/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/bigquery-magics/0.10.0/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [bigquery_magics](https://redirect.github.com/googleapis/python-bigquery-magics) | `==0.10.0` -> `==0.10.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/bigquery-magics/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/bigquery-magics/0.10.0/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [ipython](https://redirect.github.com/ipython/ipython) | `==9.3.0` -> `==9.4.0` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/ipython/9.4.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/ipython/9.3.0/9.4.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [pandas](https://redirect.github.com/pandas-dev/pandas) | `==2.3.0` -> `==2.3.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/pandas/2.3.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/pandas/2.3.0/2.3.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [pytest-xdist](https://redirect.github.com/pytest-dev/pytest-xdist) ([changelog](https://pytest-xdist.readthedocs.io/en/latest/changelog.html)) | `==3.7.0` -> `==3.8.0` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/pytest-xdist/3.8.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/pytest-xdist/3.7.0/3.8.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [typing-extensions](https://redirect.github.com/python/typing_extensions) ([changelog](https://redirect.github.com/python/typing_extensions/blob/main/CHANGELOG.md)) | `==4.14.0` -> `==4.14.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/typing-extensions/4.14.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/typing-extensions/4.14.0/4.14.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-magics (bigquery-magics) ### [`v0.10.1`](https://redirect.github.com/googleapis/python-bigquery-magics/blob/HEAD/CHANGELOG.md#0101-2025-07-07) [Compare Source](https://redirect.github.com/googleapis/python-bigquery-magics/compare/v0.10.0...v0.10.1) ##### Dependencies - Move spanner-graph-notebook back to version 1.1.6 ([#​126](https://redirect.github.com/googleapis/python-bigquery-magics/issues/126)) ([17ee695](https://redirect.github.com/googleapis/python-bigquery-magics/commit/17ee6956c8fec740440836609a9106e900b63074))
ipython/ipython (ipython) ### [`v9.4.0`](https://redirect.github.com/ipython/ipython/compare/9.3.0...9.4.0) [Compare Source](https://redirect.github.com/ipython/ipython/compare/9.3.0...9.4.0)
pandas-dev/pandas (pandas) ### [`v2.3.1`](https://redirect.github.com/pandas-dev/pandas/releases/tag/v2.3.1): Pandas 2.3.1 [Compare Source](https://redirect.github.com/pandas-dev/pandas/compare/v2.3.0...v2.3.1) We are pleased to announce the release of pandas 2.3.1. This release includes some improvements and fixes to the future string data type (preview feature for the upcoming pandas 3.0). We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/2.3.1/whatsnew/v2.3.1.html) for a list of all the changes. Pandas 2.3.1 supports Python 3.9 and higher. The release will be available on the conda-forge channel: ``` conda install pandas --channel conda-forge ``` Or via PyPI: ``` python3 -m pip install --upgrade pandas ``` Please report any issues with the release on the [pandas issue tracker](https://redirect.github.com/pandas-dev/pandas/issues). Thanks to all the contributors who made this release possible.
pytest-dev/pytest-xdist (pytest-xdist) ### [`v3.8.0`](https://redirect.github.com/pytest-dev/pytest-xdist/blob/HEAD/CHANGELOG.rst#pytest-xdist-380-2025-06-30) [Compare Source](https://redirect.github.com/pytest-dev/pytest-xdist/compare/v3.7.0...v3.8.0) \=============================== ## Features - `#​1083 `\_: Add `--no-loadscope-reorder` and `--loadscope-reorder` option to control whether to automatically reorder tests in loadscope for tests where relative ordering matters. This only applies when using `loadscope`. For example, \[test\_file\_1, test\_file\_2, ..., test\_file\_n] are given as input test files, if `--no-loadscope-reorder` is used, for either worker, the `test_file_a` will be executed before `test_file_b` only if `a < b`. The default behavior is to reorder the tests to maximize the number of tests that can be executed in parallel.
python/typing_extensions (typing-extensions) ### [`v4.14.1`](https://redirect.github.com/python/typing_extensions/blob/HEAD/CHANGELOG.md#Release-4141-July-4-2025) [Compare Source](https://redirect.github.com/python/typing_extensions/compare/4.14.0...4.14.1) - Fix usage of `typing_extensions.TypedDict` nested inside other types (e.g., `typing.Type[typing_extensions.TypedDict]`). This is not allowed by the type system but worked on older versions, so we maintain support.
--- ### Configuration 📅 **Schedule**: Branch creation - At any time (no schedule defined), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://redirect.github.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR was generated by [Mend Renovate](https://mend.io/renovate/). View the [repository job log](https://developer.mend.io/github/googleapis/python-bigquery). --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 6 +++--- samples/snippets/requirements-test.txt | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index b3046227c..3bf52c85d 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index ee895a4f4..d449b373b 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 68f6c1662..5b342fe5c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,7 @@ idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.3.0 +pandas==2.3.1 proto-plus==1.26.1 pyarrow==20.0.0 pyasn1==0.6.1 @@ -39,6 +39,6 @@ rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.14.0 +typing-extensions==4.14.1 typing-inspect==0.9.0 urllib3==2.5.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index b3046227c..3bf52c85d 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index d2456fc5a..b53a35982 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.10.0 +bigquery_magics==0.10.1 db-dtypes==1.4.3 google.cloud.bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.3.0 +pandas==2.3.1 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index b3046227c..3bf52c85d 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 66409e49d..4b134ac9d 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ -bigquery-magics==0.10.0 +bigquery-magics==0.10.1 db-dtypes==1.4.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.3.0; python_version >= '3.10' +ipython==9.4.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' -pandas==2.3.0 +pandas==2.3.1 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index d71018b3f..cef3450e1 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -2,4 +2,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 From b2300d032843512b7e4a5703377632fe60ef3f8d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 9 Jul 2025 19:12:23 -0400 Subject: [PATCH 20/29] feat: adds time_zone to external config and load job (#2229) This commit introduces new configuration options for BigQuery load jobs and external table definitions, aligning with recent updates to the underlying protos. New options added: `time_zone`: Time zone used when parsing timestamp values that do not have specific time zone information. (Applies to `LoadJobConfig`, `LoadJob`, and `ExternalConfig`) Changes include: Added corresponding properties (getters/setters) to `LoadJobConfig`, `LoadJob`, and `ExternalConfig`. Updated docstrings and type hints for all new attributes. Updated unit tests to cover the new options, ensuring they are correctly handled during object initialization, serialization to API representation, and deserialization from API responses. --- google/cloud/bigquery/external_config.py | 17 +++ google/cloud/bigquery/job/load.py | 21 ++++ tests/unit/job/test_load.py | 32 ++++++ tests/unit/job/test_load_config.py | 127 +++++++++++++++++++++++ tests/unit/test_external_config.py | 7 ++ 5 files changed, 204 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index cb8141cd0..fcfcaca20 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -848,6 +848,23 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def time_zone(self) -> Optional[str]: + """Optional[str]: Time zone used when parsing timestamp values that do not + have specific time zone information (e.g. 2024-04-20 12:34:56). The expected + format is an IANA timezone string (e.g. America/Los_Angeles). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone + """ + + result = self._properties.get("timeZone") + return typing.cast(str, result) + + @time_zone.setter + def time_zone(self, value: Optional[str]): + self._properties["timeZone"] = value + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index e56ce16f0..5d49aef18 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -548,6 +548,20 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def time_zone(self) -> Optional[str]: + """Optional[str]: Default time zone that will apply when parsing timestamp + values that have no specific time zone. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone + """ + return self._get_sub_prop("timeZone") + + @time_zone.setter + def time_zone(self, value: Optional[str]): + self._set_sub_prop("timeZone", value) + @property def time_partitioning(self): """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based @@ -889,6 +903,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def time_zone(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`. + """ + return self.configuration.time_zone + @property def schema_update_options(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 10df46fb3..81d8e44b4 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -38,10 +38,14 @@ def _setUpConstants(self): self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" + self.TIME_ZONE = "UTC" + def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] + + config["timeZone"] = self.TIME_ZONE config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, @@ -152,6 +156,10 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "timeZone" in config: + self.assertEqual(job.time_zone, config["timeZone"]) + else: + self.assertIsNone(job.time_zone) def test_ctor(self): client = _make_client(project=self.PROJECT) @@ -195,6 +203,8 @@ def test_ctor(self): self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) + self.assertIsNone(job.time_zone) + def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.job import LoadJobConfig @@ -431,6 +441,24 @@ def test_from_api_repr_w_properties(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_to_api_repr(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource(ended=False) + + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client) + api_repr = job.to_api_repr() + + # as per the documentation in load.py -> LoadJob.to_api_repr(), + # the return value from to_api_repr should not include statistics + expected = { + "jobReference": RESOURCE["jobReference"], + "configuration": RESOURCE["configuration"], + } + + self.assertEqual(api_repr, expected) + def test_begin_w_already_running(self): conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) @@ -571,6 +599,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "timeZone": self.TIME_ZONE, } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = make_connection() @@ -599,6 +628,9 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" + + config.time_zone = self.TIME_ZONE + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 3a681c476..6424f7e68 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -828,6 +828,22 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_time_zone_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.time_zone) + + def test_time_zone_hit(self): + time_zone = "UTC" + config = self._get_target_class()() + config._properties["load"]["timeZone"] = time_zone + self.assertEqual(config.time_zone, time_zone) + + def test_time_zone_setter(self): + time_zone = "America/New_York" + config = self._get_target_class()() + config.time_zone = time_zone + self.assertEqual(config._properties["load"]["timeZone"], time_zone) + def test_parquet_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.parquet_options) @@ -901,3 +917,114 @@ def test_column_name_character_map_none(self): config._properties["load"]["columnNameCharacterMap"], ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, ) + + RESOURCE = { + "load": { + "allowJaggedRows": True, + "createDisposition": "CREATE_NEVER", + "encoding": "UTF-8", + "fieldDelimiter": ",", + "ignoreUnknownValues": True, + "maxBadRecords": 10, + "nullMarker": "\\N", + "quote": '"', + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "skipLeadingRows": "1", + "sourceFormat": "CSV", + "timePartitioning": { + "type": "DAY", + "field": "transaction_date", + }, + "useAvroLogicalTypes": True, + "writeDisposition": "WRITE_TRUNCATE", + "timeZone": "America/New_York", + "parquetOptions": {"enableListInference": True}, + "columnNameCharacterMap": "V2", + "someNewField": "some-value", + } + } + + def test_from_api_repr(self): + from google.cloud.bigquery.job import ( + CreateDisposition, + LoadJobConfig, + SourceFormat, + WriteDisposition, + ) + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType + + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = LoadJobConfig.from_api_repr(self.RESOURCE) + + self.assertTrue(config.allow_jagged_rows) + self.assertEqual(config.create_disposition, CreateDisposition.CREATE_NEVER) + self.assertEqual(config.encoding, "UTF-8") + self.assertEqual(config.field_delimiter, ",") + self.assertTrue(config.ignore_unknown_values) + self.assertEqual(config.max_bad_records, 10) + self.assertEqual(config.null_marker, "\\N") + self.assertEqual(config.quote_character, '"') + self.assertEqual( + config.schema, + [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")], + ) + self.assertEqual(config.skip_leading_rows, 1) + self.assertEqual(config.source_format, SourceFormat.CSV) + self.assertEqual( + config.time_partitioning, + TimePartitioning(type_=TimePartitioningType.DAY, field="transaction_date"), + ) + self.assertTrue(config.use_avro_logical_types) + self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE) + self.assertEqual(config.time_zone, "America/New_York") + self.assertTrue(config.parquet_options.enable_list_inference) + self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) + self.assertEqual(config._properties["load"]["someNewField"], "some-value") + + def test_to_api_repr(self): + from google.cloud.bigquery.job import ( + CreateDisposition, + LoadJobConfig, + SourceFormat, + WriteDisposition, + ) + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType + from google.cloud.bigquery.format_options import ParquetOptions + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = LoadJobConfig() + config.allow_jagged_rows = True + config.create_disposition = CreateDisposition.CREATE_NEVER + config.encoding = "UTF-8" + config.field_delimiter = "," + config.ignore_unknown_values = True + config.max_bad_records = 10 + config.null_marker = r"\N" + config.quote_character = '"' + config.schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + config.skip_leading_rows = 1 + config.source_format = SourceFormat.CSV + config.time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, field="transaction_date" + ) + config.use_avro_logical_types = True + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.time_zone = "America/New_York" + parquet_options = ParquetOptions() + parquet_options.enable_list_inference = True + config.parquet_options = parquet_options + config.column_name_character_map = ColumnNameCharacterMap.V2 + config._properties["load"]["someNewField"] = "some-value" + + api_repr = config.to_api_repr() + + expected = self.RESOURCE + self.assertEqual(api_repr, expected) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 7f84a9f5b..a89b7a1fb 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -26,6 +26,8 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] + TIME_ZONE = "America/Los_Angeles" + BASE_RESOURCE = { "sourceFormat": "", "sourceUris": SOURCE_URIS, @@ -33,6 +35,7 @@ class TestExternalConfig(unittest.TestCase): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "timeZone": TIME_ZONE, } def test_from_api_repr_base(self): @@ -79,6 +82,7 @@ def test_to_api_repr_base(self): ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] + ec.time_zone = self.TIME_ZONE exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } @@ -92,6 +96,7 @@ def test_to_api_repr_base(self): "compression": "compression", "connectionId": "path/to/connection", "schema": exp_schema, + "timeZone": self.TIME_ZONE, } self.assertEqual(got_resource, exp_resource) @@ -128,6 +133,8 @@ def _verify_base(self, ec): self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) + self.assertEqual(ec.time_zone, self.TIME_ZONE) + def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") got = ec.to_api_repr() From 3ed0a0a3d9699f1f70a616cfd06d1958b69e1f03 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 10 Jul 2025 19:44:58 +0200 Subject: [PATCH 21/29] chore(deps): update dependency certifi to v2025.7.9 (#2232) Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5b342fe5c..447e92c81 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.6.15 +certifi==2025.7.9 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' From d44bf0231e6e96369e4e03667a3f96618fb664e2 Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Fri, 11 Jul 2025 10:10:58 -0700 Subject: [PATCH 22/29] feat: add total slot ms to RowIterator (#2233) * feat: add total slot ms to RowIterator * format fix --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/_job_helpers.py | 1 + google/cloud/bigquery/client.py | 4 ++++ google/cloud/bigquery/job/query.py | 1 + google/cloud/bigquery/query.py | 14 ++++++++++++++ google/cloud/bigquery/table.py | 7 +++++++ tests/unit/job/test_query.py | 2 ++ tests/unit/test_client.py | 2 ++ tests/unit/test_query.py | 16 ++++++++++++++++ 8 files changed, 47 insertions(+) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 888dc1e73..73d4f6e7b 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -560,6 +560,7 @@ def do_query(): num_dml_affected_rows=query_results.num_dml_affected_rows, query=query, total_bytes_processed=query_results.total_bytes_processed, + slot_millis=query_results.slot_millis, ) if job_retry is not None: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 811e9ef03..804f77ea2 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -4144,6 +4144,7 @@ def _list_rows_from_query_results( num_dml_affected_rows: Optional[int] = None, query: Optional[str] = None, total_bytes_processed: Optional[int] = None, + slot_millis: Optional[int] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4195,6 +4196,8 @@ def _list_rows_from_query_results( The query text used. total_bytes_processed (Optional[int]): total bytes processed from job statistics, if present. + slot_millis (Optional[int]): + Number of slot ms the user is actually billed for. Returns: google.cloud.bigquery.table.RowIterator: @@ -4234,6 +4237,7 @@ def _list_rows_from_query_results( num_dml_affected_rows=num_dml_affected_rows, query=query, total_bytes_processed=total_bytes_processed, + slot_millis=slot_millis, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 4d95f0e71..ec9379ea9 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1766,6 +1766,7 @@ def is_job_done(): num_dml_affected_rows=self._query_results.num_dml_affected_rows, query=self.query, total_bytes_processed=self.total_bytes_processed, + slot_millis=self.slot_millis, **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 8745c09f5..4a006d621 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1282,6 +1282,20 @@ def total_bytes_processed(self): if total_bytes_processed is not None: return int(total_bytes_processed) + @property + def slot_millis(self): + """Total number of slot ms the user is actually billed for. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.slot_millis + + Returns: + Optional[int]: Count generated on the server (None until set by the server). + """ + slot_millis = self._properties.get("totalSlotMs") + if slot_millis is not None: + return int(slot_millis) + @property def num_dml_affected_rows(self): """Total number of rows affected by a DML query. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 861f806b4..dbdde36d1 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1812,6 +1812,7 @@ def __init__( num_dml_affected_rows: Optional[int] = None, query: Optional[str] = None, total_bytes_processed: Optional[int] = None, + slot_millis: Optional[int] = None, ): super(RowIterator, self).__init__( client, @@ -1841,6 +1842,7 @@ def __init__( self._num_dml_affected_rows = num_dml_affected_rows self._query = query self._total_bytes_processed = total_bytes_processed + self._slot_millis = slot_millis @property def _billing_project(self) -> Optional[str]: @@ -1898,6 +1900,11 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed + @property + def slot_millis(self) -> Optional[int]: + """Number of slot ms the user is actually billed for.""" + return self._slot_millis + def _is_almost_completely_cached(self): """Check if all results are completely cached. diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 46b802aa3..7201adb55 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -888,6 +888,7 @@ def test_result_reloads_job_state_until_done(self): job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234) + job_resource_done["statistics"]["query"]["totalSlotMs"] = str(5678) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -969,6 +970,7 @@ def test_result_reloads_job_state_until_done(self): self.assertEqual(result.total_rows, 1) self.assertEqual(result.query, job.query) self.assertEqual(result.total_bytes_processed, 1234) + self.assertEqual(result.slot_millis, 5678) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 42bfc84b9..bb86ccc3c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5718,6 +5718,7 @@ def test_query_and_wait_defaults(self): "rows": [{"f": [{"v": "5552452"}]}], "queryId": "job_abcDEF_", "totalBytesProcessed": 1234, + "totalSlotMs": 5678, } creds = _make_credentials() http = object() @@ -5735,6 +5736,7 @@ def test_query_and_wait_defaults(self): self.assertIsNone(rows.location) self.assertEqual(rows.query, query) self.assertEqual(rows.total_bytes_processed, 1234) + self.assertEqual(rows.slot_millis, 5678) # Verify the request we send is to jobs.query. conn.api_request.assert_called_once() diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 0d967bdb8..2b704d3c9 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -2000,6 +2000,22 @@ def test_total_bytes_processed_present_string(self): query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) + def test_slot_millis_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.slot_millis) + + def test_slot_millis_present_integer(self): + resource = self._make_resource() + resource["totalSlotMs"] = 123456 + query = self._make_one(resource) + self.assertEqual(query.slot_millis, 123456) + + def test_slot_millis_present_string(self): + resource = self._make_resource() + resource["totalSlotMs"] = "123456" + query = self._make_one(resource) + self.assertEqual(query.slot_millis, 123456) + def test_num_dml_affected_rows_missing(self): query = self._make_one(self._make_resource()) self.assertIsNone(query.num_dml_affected_rows) From 7d3182802deccfceb0646b87fc8d12275d0a569b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 11 Jul 2025 20:29:28 -0400 Subject: [PATCH 23/29] feat: adds date_format to load job and external config (#2231) * feat: adds date_format to load job and external config * adds date_format to new to/from_api_repr tests --- google/cloud/bigquery/external_config.py | 14 ++++++++++++++ google/cloud/bigquery/job/load.py | 20 ++++++++++++++++++++ tests/unit/job/test_load.py | 14 +++++++++----- tests/unit/job/test_load_config.py | 19 +++++++++++++++++++ tests/unit/test_external_config.py | 7 +++++-- 5 files changed, 67 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index fcfcaca20..54b7bf396 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -848,6 +848,20 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Format used to parse DATE values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format + """ + result = self._properties.get("dateFormat") + return typing.cast(str, result) + + @date_format.setter + def date_format(self, value: Optional[str]): + self._properties["dateFormat"] = value + @property def time_zone(self) -> Optional[str]: """Optional[str]: Time zone used when parsing timestamp values that do not diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 5d49aef18..277478d81 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -548,6 +548,19 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATE values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.date_format + """ + return self._get_sub_prop("dateFormat") + + @date_format.setter + def date_format(self, value: Optional[str]): + self._set_sub_prop("dateFormat", value) + @property def time_zone(self) -> Optional[str]: """Optional[str]: Default time zone that will apply when parsing timestamp @@ -903,6 +916,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def date_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.date_format`. + """ + return self.configuration.date_format + @property def time_zone(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 81d8e44b4..82baa03c7 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -37,14 +37,14 @@ def _setUpConstants(self): self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" - + self.DATE_FORMAT = "%Y-%m-%d" self.TIME_ZONE = "UTC" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] - + config["dateFormat"] = self.DATE_FORMAT config["timeZone"] = self.TIME_ZONE config["destinationTable"] = { "projectId": self.PROJECT, @@ -147,7 +147,6 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.reference_file_schema_uri) - if "destinationEncryptionConfiguration" in config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( @@ -156,6 +155,10 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "dateFormat" in config: + self.assertEqual(job.date_format, config["dateFormat"]) + else: + self.assertIsNone(job.date_format) if "timeZone" in config: self.assertEqual(job.time_zone, config["timeZone"]) else: @@ -202,7 +205,7 @@ def test_ctor(self): self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) - + self.assertIsNone(job.date_format) self.assertIsNone(job.time_zone) def test_ctor_w_config(self): @@ -599,6 +602,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION @@ -628,7 +632,7 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" - + config.date_format = self.DATE_FORMAT config.time_zone = self.TIME_ZONE with mock.patch( diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 6424f7e68..5b7f8175b 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -828,6 +828,22 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_date_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.date_format) + + def test_date_format_hit(self): + date_format = "%Y-%m-%d" + config = self._get_target_class()() + config._properties["load"]["dateFormat"] = date_format + self.assertEqual(config.date_format, date_format) + + def test_date_format_setter(self): + date_format = "YYYY/MM/DD" + config = self._get_target_class()() + config.date_format = date_format + self.assertEqual(config._properties["load"]["dateFormat"], date_format) + def test_time_zone_missing(self): config = self._get_target_class()() self.assertIsNone(config.time_zone) @@ -942,6 +958,7 @@ def test_column_name_character_map_none(self): }, "useAvroLogicalTypes": True, "writeDisposition": "WRITE_TRUNCATE", + "dateFormat": "%Y-%m-%d", "timeZone": "America/New_York", "parquetOptions": {"enableListInference": True}, "columnNameCharacterMap": "V2", @@ -983,6 +1000,7 @@ def test_from_api_repr(self): ) self.assertTrue(config.use_avro_logical_types) self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE) + self.assertEqual(config.date_format, "%Y-%m-%d") self.assertEqual(config.time_zone, "America/New_York") self.assertTrue(config.parquet_options.enable_list_inference) self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) @@ -1017,6 +1035,7 @@ def test_to_api_repr(self): ) config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.date_format = "%Y-%m-%d" config.time_zone = "America/New_York" parquet_options = ParquetOptions() parquet_options.enable_list_inference = True diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index a89b7a1fb..0f5d09504 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -25,7 +25,7 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] - + DATE_FORMAT = "MM/DD/YYYY" TIME_ZONE = "America/Los_Angeles" BASE_RESOURCE = { @@ -35,6 +35,7 @@ class TestExternalConfig(unittest.TestCase): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "dateFormat": DATE_FORMAT, "timeZone": TIME_ZONE, } @@ -82,6 +83,7 @@ def test_to_api_repr_base(self): ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] + ec.date_format = self.DATE_FORMAT ec.time_zone = self.TIME_ZONE exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] @@ -96,6 +98,7 @@ def test_to_api_repr_base(self): "compression": "compression", "connectionId": "path/to/connection", "schema": exp_schema, + "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, } self.assertEqual(got_resource, exp_resource) @@ -132,7 +135,7 @@ def _verify_base(self, ec): self.assertEqual(ec.ignore_unknown_values, False) self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) - + self.assertEqual(ec.date_format, self.DATE_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) def test_to_api_repr_source_format(self): From 371ad292df537278767dba71d81822ed57dd8e7d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 14 Jul 2025 12:14:58 -0400 Subject: [PATCH 24/29] feat: adds time_format and timestamp_format and associated tests (#2238) --- google/cloud/bigquery/external_config.py | 28 +++++++++++++++++ google/cloud/bigquery/job/load.py | 40 ++++++++++++++++++++++++ tests/unit/job/test_load.py | 20 ++++++++++++ tests/unit/job/test_load_config.py | 34 ++++++++++++++++++++ tests/unit/test_external_config.py | 11 +++++++ 5 files changed, 133 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 54b7bf396..370f62c0a 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -879,6 +879,34 @@ def time_zone(self) -> Optional[str]: def time_zone(self, value: Optional[str]): self._properties["timeZone"] = value + @property + def time_format(self) -> Optional[str]: + """Optional[str]: Format used to parse TIME values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_format + """ + result = self._properties.get("timeFormat") + return typing.cast(str, result) + + @time_format.setter + def time_format(self, value: Optional[str]): + self._properties["timeFormat"] = value + + @property + def timestamp_format(self) -> Optional[str]: + """Optional[str]: Format used to parse TIMESTAMP values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.timestamp_format + """ + result = self._properties.get("timestampFormat") + return typing.cast(str, result) + + @timestamp_format.setter + def timestamp_format(self, value: Optional[str]): + self._properties["timestampFormat"] = value + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 277478d81..2e5a9a9bb 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -575,6 +575,32 @@ def time_zone(self) -> Optional[str]: def time_zone(self, value: Optional[str]): self._set_sub_prop("timeZone", value) + @property + def time_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIME values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_format + """ + return self._get_sub_prop("timeFormat") + + @time_format.setter + def time_format(self, value: Optional[str]): + self._set_sub_prop("timeFormat", value) + + @property + def timestamp_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIMESTAMP values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.timestamp_format + """ + return self._get_sub_prop("timestampFormat") + + @timestamp_format.setter + def timestamp_format(self, value: Optional[str]): + self._set_sub_prop("timestampFormat", value) + @property def time_partitioning(self): """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based @@ -930,6 +956,20 @@ def time_zone(self): """ return self.configuration.time_zone + @property + def time_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_format`. + """ + return self.configuration.time_format + + @property + def timestamp_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.timestamp_format`. + """ + return self.configuration.timestamp_format + @property def schema_update_options(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 82baa03c7..77adf0cc8 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -39,6 +39,8 @@ def _setUpConstants(self): self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" self.DATE_FORMAT = "%Y-%m-%d" self.TIME_ZONE = "UTC" + self.TIME_FORMAT = "%H:%M:%S" + self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -46,6 +48,9 @@ def _make_resource(self, started=False, ended=False): config["sourceUris"] = [self.SOURCE1] config["dateFormat"] = self.DATE_FORMAT config["timeZone"] = self.TIME_ZONE + config["timeFormat"] = self.TIME_FORMAT + config["timestampFormat"] = self.TIMESTAMP_FORMAT + config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, @@ -163,6 +168,14 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.time_zone, config["timeZone"]) else: self.assertIsNone(job.time_zone) + if "timeFormat" in config: + self.assertEqual(job.time_format, config["timeFormat"]) + else: + self.assertIsNone(job.time_format) + if "timestampFormat" in config: + self.assertEqual(job.timestamp_format, config["timestampFormat"]) + else: + self.assertIsNone(job.timestamp_format) def test_ctor(self): client = _make_client(project=self.PROJECT) @@ -207,6 +220,8 @@ def test_ctor(self): self.assertIsNone(job.reference_file_schema_uri) self.assertIsNone(job.date_format) self.assertIsNone(job.time_zone) + self.assertIsNone(job.time_format) + self.assertIsNone(job.timestamp_format) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -604,7 +619,10 @@ def test_begin_w_alternate_client(self): "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, + "timeFormat": self.TIME_FORMAT, + "timestampFormat": self.TIMESTAMP_FORMAT, } + RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) @@ -634,6 +652,8 @@ def test_begin_w_alternate_client(self): config.reference_file_schema_uri = "gs://path/to/reference" config.date_format = self.DATE_FORMAT config.time_zone = self.TIME_ZONE + config.time_format = self.TIME_FORMAT + config.timestamp_format = self.TIMESTAMP_FORMAT with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 5b7f8175b..b733bdda0 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -860,6 +860,40 @@ def test_time_zone_setter(self): config.time_zone = time_zone self.assertEqual(config._properties["load"]["timeZone"], time_zone) + def test_time_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.time_format) + + def test_time_format_hit(self): + time_format = "%H:%M:%S" + config = self._get_target_class()() + config._properties["load"]["timeFormat"] = time_format + self.assertEqual(config.time_format, time_format) + + def test_time_format_setter(self): + time_format = "HH24:MI:SS" + config = self._get_target_class()() + config.time_format = time_format + self.assertEqual(config._properties["load"]["timeFormat"], time_format) + + def test_timestamp_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.timestamp_format) + + def test_timestamp_format_hit(self): + timestamp_format = "%Y-%m-%dT%H:%M:%S.%fZ" + config = self._get_target_class()() + config._properties["load"]["timestampFormat"] = timestamp_format + self.assertEqual(config.timestamp_format, timestamp_format) + + def test_timestamp_format_setter(self): + timestamp_format = "YYYY/MM/DD HH24:MI:SS.FF6 TZR" + config = self._get_target_class()() + config.timestamp_format = timestamp_format + self.assertEqual( + config._properties["load"]["timestampFormat"], timestamp_format + ) + def test_parquet_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.parquet_options) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 0f5d09504..8b41cd8e3 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -27,6 +27,8 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] DATE_FORMAT = "MM/DD/YYYY" TIME_ZONE = "America/Los_Angeles" + TIME_FORMAT = "HH24:MI:SS" + TIMESTAMP_FORMAT = "MM/DD/YYYY HH24:MI:SS.FF6 TZR" BASE_RESOURCE = { "sourceFormat": "", @@ -37,6 +39,8 @@ class TestExternalConfig(unittest.TestCase): "compression": "compression", "dateFormat": DATE_FORMAT, "timeZone": TIME_ZONE, + "timeFormat": TIME_FORMAT, + "timestampFormat": TIMESTAMP_FORMAT, } def test_from_api_repr_base(self): @@ -85,6 +89,9 @@ def test_to_api_repr_base(self): ec.date_format = self.DATE_FORMAT ec.time_zone = self.TIME_ZONE + ec.time_format = self.TIME_FORMAT + ec.timestamp_format = self.TIMESTAMP_FORMAT + exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } @@ -100,6 +107,8 @@ def test_to_api_repr_base(self): "schema": exp_schema, "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, + "timeFormat": self.TIME_FORMAT, + "timestampFormat": self.TIMESTAMP_FORMAT, } self.assertEqual(got_resource, exp_resource) @@ -137,6 +146,8 @@ def _verify_base(self, ec): self.assertEqual(ec.source_uris, self.SOURCE_URIS) self.assertEqual(ec.date_format, self.DATE_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) + self.assertEqual(ec.time_format, self.TIME_FORMAT) + self.assertEqual(ec.timestamp_format, self.TIMESTAMP_FORMAT) def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") From 54d3dc66244d50a031e3c80d43d372d2743ecbc3 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 05:22:44 -0400 Subject: [PATCH 25/29] feat: adds datetime_format as an option (#2236) * feat: adds datetime_format as an option * updates docstrings --- google/cloud/bigquery/external_config.py | 15 +++++++++++++++ google/cloud/bigquery/job/load.py | 20 ++++++++++++++++++++ tests/unit/job/test_load.py | 9 +++++++++ tests/unit/job/test_load_config.py | 16 ++++++++++++++++ tests/unit/test_external_config.py | 5 +++++ 5 files changed, 65 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 370f62c0a..82c6a9e75 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -862,6 +862,21 @@ def date_format(self) -> Optional[str]: def date_format(self, value: Optional[str]): self._properties["dateFormat"] = value + @property + def datetime_format(self) -> Optional[str]: + """Optional[str]: Format used to parse DATETIME values. Supports C-style + and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.datetime_format + """ + result = self._properties.get("datetimeFormat") + return typing.cast(str, result) + + @datetime_format.setter + def datetime_format(self, value: Optional[str]): + self._properties["datetimeFormat"] = value + @property def time_zone(self) -> Optional[str]: """Optional[str]: Time zone used when parsing timestamp values that do not diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 2e5a9a9bb..3be914f43 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -561,6 +561,19 @@ def date_format(self) -> Optional[str]: def date_format(self, value: Optional[str]): self._set_sub_prop("dateFormat", value) + @property + def datetime_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATETIME values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.datetime_format + """ + return self._get_sub_prop("datetimeFormat") + + @datetime_format.setter + def datetime_format(self, value: Optional[str]): + self._set_sub_prop("datetimeFormat", value) + @property def time_zone(self) -> Optional[str]: """Optional[str]: Default time zone that will apply when parsing timestamp @@ -949,6 +962,13 @@ def date_format(self): """ return self.configuration.date_format + @property + def datetime_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.datetime_format`. + """ + return self.configuration.datetime_format + @property def time_zone(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 77adf0cc8..7afe9cba6 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -38,6 +38,7 @@ def _setUpConstants(self): self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" self.DATE_FORMAT = "%Y-%m-%d" + self.DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" self.TIME_ZONE = "UTC" self.TIME_FORMAT = "%H:%M:%S" self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" @@ -47,6 +48,7 @@ def _make_resource(self, started=False, ended=False): config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] config["dateFormat"] = self.DATE_FORMAT + config["datetimeFormat"] = self.DATETIME_FORMAT config["timeZone"] = self.TIME_ZONE config["timeFormat"] = self.TIME_FORMAT config["timestampFormat"] = self.TIMESTAMP_FORMAT @@ -164,6 +166,10 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.date_format, config["dateFormat"]) else: self.assertIsNone(job.date_format) + if "datetimeFormat" in config: + self.assertEqual(job.datetime_format, config["datetimeFormat"]) + else: + self.assertIsNone(job.datetime_format) if "timeZone" in config: self.assertEqual(job.time_zone, config["timeZone"]) else: @@ -219,6 +225,7 @@ def test_ctor(self): self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) self.assertIsNone(job.date_format) + self.assertIsNone(job.datetime_format) self.assertIsNone(job.time_zone) self.assertIsNone(job.time_format) self.assertIsNone(job.timestamp_format) @@ -618,6 +625,7 @@ def test_begin_w_alternate_client(self): }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], "dateFormat": self.DATE_FORMAT, + "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, "timeFormat": self.TIME_FORMAT, "timestampFormat": self.TIMESTAMP_FORMAT, @@ -651,6 +659,7 @@ def test_begin_w_alternate_client(self): config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" config.date_format = self.DATE_FORMAT + config.datetime_format = self.DATETIME_FORMAT config.time_zone = self.TIME_ZONE config.time_format = self.TIME_FORMAT config.timestamp_format = self.TIMESTAMP_FORMAT diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index b733bdda0..dbb062486 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -844,6 +844,22 @@ def test_date_format_setter(self): config.date_format = date_format self.assertEqual(config._properties["load"]["dateFormat"], date_format) + def test_datetime_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.datetime_format) + + def test_datetime_format_hit(self): + datetime_format = "%Y-%m-%dT%H:%M:%S" + config = self._get_target_class()() + config._properties["load"]["datetimeFormat"] = datetime_format + self.assertEqual(config.datetime_format, datetime_format) + + def test_datetime_format_setter(self): + datetime_format = "YYYY/MM/DD HH24:MI:SS" + config = self._get_target_class()() + config.datetime_format = datetime_format + self.assertEqual(config._properties["load"]["datetimeFormat"], datetime_format) + def test_time_zone_missing(self): config = self._get_target_class()() self.assertIsNone(config.time_zone) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 8b41cd8e3..3a441d1f5 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -26,6 +26,7 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] DATE_FORMAT = "MM/DD/YYYY" + DATETIME_FORMAT = "MM/DD/YYYY HH24:MI:SS" TIME_ZONE = "America/Los_Angeles" TIME_FORMAT = "HH24:MI:SS" TIMESTAMP_FORMAT = "MM/DD/YYYY HH24:MI:SS.FF6 TZR" @@ -38,6 +39,7 @@ class TestExternalConfig(unittest.TestCase): "ignoreUnknownValues": False, "compression": "compression", "dateFormat": DATE_FORMAT, + "datetimeFormat": DATETIME_FORMAT, "timeZone": TIME_ZONE, "timeFormat": TIME_FORMAT, "timestampFormat": TIMESTAMP_FORMAT, @@ -88,6 +90,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] ec.date_format = self.DATE_FORMAT + ec.datetime_format = self.DATETIME_FORMAT ec.time_zone = self.TIME_ZONE ec.time_format = self.TIME_FORMAT ec.timestamp_format = self.TIMESTAMP_FORMAT @@ -106,6 +109,7 @@ def test_to_api_repr_base(self): "connectionId": "path/to/connection", "schema": exp_schema, "dateFormat": self.DATE_FORMAT, + "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, "timeFormat": self.TIME_FORMAT, "timestampFormat": self.TIMESTAMP_FORMAT, @@ -145,6 +149,7 @@ def _verify_base(self, ec): self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) self.assertEqual(ec.date_format, self.DATE_FORMAT) + self.assertEqual(ec.datetime_format, self.DATETIME_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) self.assertEqual(ec.time_format, self.TIME_FORMAT) self.assertEqual(ec.timestamp_format, self.TIMESTAMP_FORMAT) From 69a2c2bfd89914605d53aefc78bd6e45c38c578f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 15 Jul 2025 11:43:28 +0200 Subject: [PATCH 26/29] chore(deps): update dependency certifi to v2025.7.14 (#2237) Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 447e92c81..e932625b8 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.7.9 +certifi==2025.7.14 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' From 289446dd8c356d11a0b63b8e6275629b1ae5dc08 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 14:32:47 -0400 Subject: [PATCH 27/29] feat: Add null_markers property to LoadJobConfig and CSVOptions (#2239) * feat: Add null_markers property to LoadJobConfig and CSVOptions * feat: adds null_markers as a load and external_config option --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/external_config.py | 21 ++++++++++++++++++ google/cloud/bigquery/job/load.py | 28 ++++++++++++++++++++++++ tests/unit/job/test_load.py | 7 ++++++ tests/unit/job/test_load_config.py | 16 ++++++++++++++ tests/unit/test_external_config.py | 4 ++++ 5 files changed, 76 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 82c6a9e75..69ed72bc9 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -474,6 +474,27 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) + @property + def null_markers(self) -> Optional[Iterable[str]]: + """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file. + + .. note:: + null_marker and null_markers can't be set at the same time. + If null_marker is set, null_markers has to be not set. + If null_markers is set, null_marker has to be not set. + If both null_marker and null_markers are set at the same time, a user error would be thrown. + Any strings listed in null_markers, including empty string would be interpreted as SQL NULL. + This applies to all column types. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers + """ + return self._properties.get("nullMarkers") + + @null_markers.setter + def null_markers(self, value: Optional[Iterable[str]]): + self._properties["nullMarkers"] = value + def to_api_repr(self) -> dict: """Build an API representation of this object. diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 3be914f43..eabc12cfc 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -386,6 +386,27 @@ def null_marker(self): def null_marker(self, value): self._set_sub_prop("nullMarker", value) + @property + def null_markers(self) -> Optional[List[str]]: + """Optional[List[str]]: A list of strings represented as SQL NULL values in a CSV file. + + .. note:: + null_marker and null_markers can't be set at the same time. + If null_marker is set, null_markers has to be not set. + If null_markers is set, null_marker has to be not set. + If both null_marker and null_markers are set at the same time, a user error would be thrown. + Any strings listed in null_markers, including empty string would be interpreted as SQL NULL. + This applies to all column types. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_markers + """ + return self._get_sub_prop("nullMarkers") + + @null_markers.setter + def null_markers(self, value: Optional[List[str]]): + self._set_sub_prop("nullMarkers", value) + @property def preserve_ascii_control_characters(self): """Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV. @@ -854,6 +875,13 @@ def null_marker(self): """ return self.configuration.null_marker + @property + def null_markers(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.null_markers`. + """ + return self.configuration.null_markers + @property def quote_character(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 7afe9cba6..5d52401c9 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -42,6 +42,7 @@ def _setUpConstants(self): self.TIME_ZONE = "UTC" self.TIME_FORMAT = "%H:%M:%S" self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" + self.NULL_MARKERS = ["", "NA"] def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -52,6 +53,7 @@ def _make_resource(self, started=False, ended=False): config["timeZone"] = self.TIME_ZONE config["timeFormat"] = self.TIME_FORMAT config["timestampFormat"] = self.TIMESTAMP_FORMAT + config["nullMarkers"] = self.NULL_MARKERS config["destinationTable"] = { "projectId": self.PROJECT, @@ -140,6 +142,10 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.null_marker, config["nullMarker"]) else: self.assertIsNone(job.null_marker) + if "nullMarkers" in config: + self.assertEqual(job.null_markers, config["nullMarkers"]) + else: + self.assertIsNone(job.null_markers) if "quote" in config: self.assertEqual(job.quote_character, config["quote"]) else: @@ -211,6 +217,7 @@ def test_ctor(self): self.assertIsNone(job.ignore_unknown_values) self.assertIsNone(job.max_bad_records) self.assertIsNone(job.null_marker) + self.assertIsNone(job.null_markers) self.assertIsNone(job.quote_character) self.assertIsNone(job.skip_leading_rows) self.assertIsNone(job.source_format) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index dbb062486..8ff9244d2 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -469,6 +469,22 @@ def test_null_marker_setter(self): config.null_marker = null_marker self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + def test_null_markers_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.null_markers) + + def test_null_markers_hit(self): + null_markers = ["", "NA"] + config = self._get_target_class()() + config._properties["load"]["nullMarkers"] = null_markers + self.assertEqual(config.null_markers, null_markers) + + def test_null_markers_setter(self): + null_markers = ["", "NA"] + config = self._get_target_class()() + config.null_markers = null_markers + self.assertEqual(config._properties["load"]["nullMarkers"], null_markers) + def test_preserve_ascii_control_characters_missing(self): config = self._get_target_class()() self.assertIsNone(config.preserve_ascii_control_characters) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 3a441d1f5..61532b4b8 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -277,6 +277,7 @@ def test_from_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "nullMarkers": ["", "NA"], }, }, ) @@ -293,6 +294,7 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") self.assertEqual(ec.options.preserve_ascii_control_characters, False) + self.assertEqual(ec.options.null_markers, ["", "NA"]) got_resource = ec.to_api_repr() @@ -314,6 +316,7 @@ def test_to_api_repr_csv(self): options.skip_leading_rows = 123 options.allow_jagged_rows = False options.preserve_ascii_control_characters = False + options.null_markers = ["", "NA"] ec.csv_options = options exp_resource = { @@ -326,6 +329,7 @@ def test_to_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "nullMarkers": ["", "NA"], }, } From 6d5d23685cd457d85955356705c1101e9ec3cdcd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 18:04:24 -0400 Subject: [PATCH 28/29] feat: Adds source_column_match and associated tests (#2227) This commit introduces new configuration options for BigQuery load jobs and external table definitions, aligning with recent updates to the underlying protos. New options added: - `source_column_name_match_option`: Controls how source columns are matched to the schema. (Applies to LoadJobConfig, ExternalConfig, LoadJob) Changes include: - Added corresponding properties (getters/setters) to `LoadJobConfig`, `LoadJob`, `ExternalConfig`, and `CSVOptions`. - Updated docstrings and type hints for all new attributes. - Updated unit tests to cover the new options, ensuring they are correctly handled during object initialization, serialization to API representation, and deserialization from API responses. --- google/cloud/bigquery/enums.py | 18 ++++++++++ google/cloud/bigquery/external_config.py | 34 +++++++++++++++++++ google/cloud/bigquery/job/load.py | 43 +++++++++++++++++++++++- tests/unit/job/test_load.py | 15 +++++++++ tests/unit/job/test_load_config.py | 32 ++++++++++++++++++ tests/unit/test_external_config.py | 43 +++++++++++++++++++++++- 6 files changed, 183 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 9a1e4880c..1b1eb241a 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -462,3 +462,21 @@ class JobCreationMode(object): The conditions under which BigQuery can decide to not create a Job are subject to change. """ + + +class SourceColumnMatch(str, enum.Enum): + """Uses sensible defaults based on how the schema is provided. + If autodetect is used, then columns are matched by name. Otherwise, columns + are matched by position. This is done to keep the behavior backward-compatible. + """ + + SOURCE_COLUMN_MATCH_UNSPECIFIED = "SOURCE_COLUMN_MATCH_UNSPECIFIED" + """Unspecified column name match option.""" + + POSITION = "POSITION" + """Matches by position. This assumes that the columns are ordered the same + way as the schema.""" + + NAME = "NAME" + """Matches by name. This reads the header row as column names and reorders + columns to match the field names in the schema.""" diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 69ed72bc9..dc7a33e6a 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -30,6 +30,7 @@ from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery import _helpers +from google.cloud.bigquery.enums import SourceColumnMatch from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery import schema from google.cloud.bigquery.schema import SchemaField @@ -474,6 +475,39 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the + strategy used to match loaded columns to the schema. If not set, a sensible + default is chosen based on how the schema is provided. If autodetect is + used, then columns are matched by name. Otherwise, columns are matched by + position. This is done to keep the behavior backward-compatible. + + Acceptable values are: + + SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. + POSITION: matches by position. This assumes that the columns are ordered + the same way as the schema. + NAME: matches by name. This reads the header row as column names and + reorders columns to match the field names in the schema. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.source_column_match + """ + + value = self._properties.get("sourceColumnMatch") + return SourceColumnMatch(value) if value is not None else None + + @source_column_match.setter + def source_column_match(self, value: Union[SourceColumnMatch, str, None]): + if value is not None and not isinstance(value, (SourceColumnMatch, str)): + raise TypeError( + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None" + ) + if isinstance(value, SourceColumnMatch): + value = value.value + self._properties["sourceColumnMatch"] = value if value else None + @property def null_markers(self) -> Optional[Iterable[str]]: """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file. diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index eabc12cfc..8cdb779ac 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -15,9 +15,10 @@ """Classes for load jobs.""" import typing -from typing import FrozenSet, List, Iterable, Optional +from typing import FrozenSet, List, Iterable, Optional, Union from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.enums import SourceColumnMatch from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery import _helpers @@ -569,6 +570,39 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the + strategy used to match loaded columns to the schema. If not set, a sensible + default is chosen based on how the schema is provided. If autodetect is + used, then columns are matched by name. Otherwise, columns are matched by + position. This is done to keep the behavior backward-compatible. + + Acceptable values are: + + SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. + POSITION: matches by position. This assumes that the columns are ordered + the same way as the schema. + NAME: matches by name. This reads the header row as column names and + reorders columns to match the field names in the schema. + + See: + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_column_match + """ + value = self._get_sub_prop("sourceColumnMatch") + return SourceColumnMatch(value) if value is not None else None + + @source_column_match.setter + def source_column_match(self, value: Union[SourceColumnMatch, str, None]): + if value is not None and not isinstance(value, (SourceColumnMatch, str)): + raise TypeError( + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None" + ) + if isinstance(value, SourceColumnMatch): + value = value.value + self._set_sub_prop("sourceColumnMatch", value if value else None) + @property def date_format(self) -> Optional[str]: """Optional[str]: Date format used for parsing DATE values. @@ -983,6 +1017,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_column_match`. + """ + return self.configuration.source_column_match + @property def date_format(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 5d52401c9..b551d52dd 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -19,6 +19,7 @@ from .helpers import _Base from .helpers import _make_client +from google.cloud.bigquery.enums import SourceColumnMatch class TestLoadJob(_Base): @@ -37,6 +38,7 @@ def _setUpConstants(self): self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" + self.SOURCE_COLUMN_MATCH = "NAME" self.DATE_FORMAT = "%Y-%m-%d" self.DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" self.TIME_ZONE = "UTC" @@ -48,6 +50,7 @@ def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] + config["sourceColumnMatch"] = self.SOURCE_COLUMN_MATCH config["dateFormat"] = self.DATE_FORMAT config["datetimeFormat"] = self.DATETIME_FORMAT config["timeZone"] = self.TIME_ZONE @@ -189,6 +192,15 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.timestamp_format) + if "sourceColumnMatch" in config: + # job.source_column_match will be an Enum, config[...] is a string + self.assertEqual( + job.source_column_match.value, + config["sourceColumnMatch"], + ) + else: + self.assertIsNone(job.source_column_match) + def test_ctor(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -231,6 +243,7 @@ def test_ctor(self): self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) + self.assertIsNone(job.source_column_match) self.assertIsNone(job.date_format) self.assertIsNone(job.datetime_format) self.assertIsNone(job.time_zone) @@ -631,6 +644,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "dateFormat": self.DATE_FORMAT, "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, @@ -665,6 +679,7 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" + config.source_column_match = SourceColumnMatch(self.SOURCE_COLUMN_MATCH) config.date_format = self.DATE_FORMAT config.datetime_format = self.DATETIME_FORMAT config.time_zone = self.TIME_ZONE diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 8ff9244d2..27d3cead1 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -844,6 +844,38 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_source_column_match_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.source_column_match) + + def test_source_column_match_hit(self): + from google.cloud.bigquery.enums import SourceColumnMatch + + option_enum = SourceColumnMatch.NAME + config = self._get_target_class()() + # Assume API stores the string value of the enum + config._properties["load"]["sourceColumnMatch"] = option_enum.value + self.assertEqual(config.source_column_match, option_enum) + + def test_source_column_match_setter(self): + from google.cloud.bigquery.enums import SourceColumnMatch + + option_enum = SourceColumnMatch.POSITION + config = self._get_target_class()() + config.source_column_match = option_enum + # Assert that the string value of the enum is stored + self.assertEqual( + config._properties["load"]["sourceColumnMatch"], option_enum.value + ) + option_str = "NAME" + config.source_column_match = option_str + self.assertEqual(config._properties["load"]["sourceColumnMatch"], option_str) + + def test_source_column_match_setter_invalid_type(self): + config = self._get_target_class()() + with self.assertRaises(TypeError): + config.source_column_match = 3.14 + def test_date_format_missing(self): config = self._get_target_class()() self.assertIsNone(config.date_format) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 61532b4b8..ea827a560 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -19,12 +19,14 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +from google.cloud.bigquery.enums import SourceColumnMatch import pytest class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] + SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME DATE_FORMAT = "MM/DD/YYYY" DATETIME_FORMAT = "MM/DD/YYYY HH24:MI:SS" TIME_ZONE = "America/Los_Angeles" @@ -277,6 +279,7 @@ def test_from_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "nullMarkers": ["", "NA"], }, }, @@ -294,6 +297,10 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") self.assertEqual(ec.options.preserve_ascii_control_characters, False) + self.assertEqual( + ec.options.source_column_match, + self.SOURCE_COLUMN_MATCH, + ) self.assertEqual(ec.options.null_markers, ["", "NA"]) got_resource = ec.to_api_repr() @@ -316,7 +323,9 @@ def test_to_api_repr_csv(self): options.skip_leading_rows = 123 options.allow_jagged_rows = False options.preserve_ascii_control_characters = False + options.source_column_match = self.SOURCE_COLUMN_MATCH options.null_markers = ["", "NA"] + ec.csv_options = options exp_resource = { @@ -329,6 +338,7 @@ def test_to_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "nullMarkers": ["", "NA"], }, } @@ -881,7 +891,9 @@ def test_to_api_repr(self): ) -class CSVOptions(unittest.TestCase): +class TestCSVOptions(unittest.TestCase): + SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME + def test_to_api_repr(self): options = external_config.CSVOptions() options.field_delimiter = "\t" @@ -891,6 +903,7 @@ def test_to_api_repr(self): options.allow_jagged_rows = False options.encoding = "UTF-8" options.preserve_ascii_control_characters = False + options.source_column_match = self.SOURCE_COLUMN_MATCH resource = options.to_api_repr() @@ -904,9 +917,37 @@ def test_to_api_repr(self): "allowJaggedRows": False, "encoding": "UTF-8", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, }, ) + def test_source_column_match_None(self): + ec = external_config.CSVOptions() + ec.source_column_match = None + expected = None + result = ec.source_column_match + self.assertEqual(expected, result) + + def test_source_column_match_valid_input(self): + ec = external_config.CSVOptions() + ec.source_column_match = SourceColumnMatch.NAME + expected = "NAME" + result = ec.source_column_match + self.assertEqual(expected, result) + + ec.source_column_match = "POSITION" + expected = "POSITION" + result = ec.source_column_match + self.assertEqual(expected, result) + + def test_source_column_match_invalid_input(self): + ec = external_config.CSVOptions() + with self.assertRaisesRegex( + TypeError, + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None", + ): + ec.source_column_match = 3.14 + class TestGoogleSheetsOptions(unittest.TestCase): def test_to_api_repr(self): From ef49f739998120ef348ffafdb97c22bddb323e83 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 15 Jul 2025 20:29:42 -0400 Subject: [PATCH 29/29] chore(main): release 3.35.0 (#2207) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b29a6a41..e4574aa7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.35.0](https://github.com/googleapis/python-bigquery/compare/v3.34.0...v3.35.0) (2025-07-15) + + +### Features + +* Add null_markers property to LoadJobConfig and CSVOptions ([#2239](https://github.com/googleapis/python-bigquery/issues/2239)) ([289446d](https://github.com/googleapis/python-bigquery/commit/289446dd8c356d11a0b63b8e6275629b1ae5dc08)) +* Add total slot ms to RowIterator ([#2233](https://github.com/googleapis/python-bigquery/issues/2233)) ([d44bf02](https://github.com/googleapis/python-bigquery/commit/d44bf0231e6e96369e4e03667a3f96618fb664e2)) +* Add UpdateMode to update_dataset ([#2204](https://github.com/googleapis/python-bigquery/issues/2204)) ([eb9c2af](https://github.com/googleapis/python-bigquery/commit/eb9c2aff242c5107f968bbd8b6a9d30cecc877f6)) +* Adds dataset_view parameter to get_dataset method ([#2198](https://github.com/googleapis/python-bigquery/issues/2198)) ([28a5750](https://github.com/googleapis/python-bigquery/commit/28a5750d455f0381548df6f9b1f7661823837d81)) +* Adds date_format to load job and external config ([#2231](https://github.com/googleapis/python-bigquery/issues/2231)) ([7d31828](https://github.com/googleapis/python-bigquery/commit/7d3182802deccfceb0646b87fc8d12275d0a569b)) +* Adds datetime_format as an option ([#2236](https://github.com/googleapis/python-bigquery/issues/2236)) ([54d3dc6](https://github.com/googleapis/python-bigquery/commit/54d3dc66244d50a031e3c80d43d372d2743ecbc3)) +* Adds source_column_match and associated tests ([#2227](https://github.com/googleapis/python-bigquery/issues/2227)) ([6d5d236](https://github.com/googleapis/python-bigquery/commit/6d5d23685cd457d85955356705c1101e9ec3cdcd)) +* Adds time_format and timestamp_format and associated tests ([#2238](https://github.com/googleapis/python-bigquery/issues/2238)) ([371ad29](https://github.com/googleapis/python-bigquery/commit/371ad292df537278767dba71d81822ed57dd8e7d)) +* Adds time_zone to external config and load job ([#2229](https://github.com/googleapis/python-bigquery/issues/2229)) ([b2300d0](https://github.com/googleapis/python-bigquery/commit/b2300d032843512b7e4a5703377632fe60ef3f8d)) + + +### Bug Fixes + +* Adds magics.context.project to eliminate issues with unit tests … ([#2228](https://github.com/googleapis/python-bigquery/issues/2228)) ([27ff3a8](https://github.com/googleapis/python-bigquery/commit/27ff3a89a5f97305fa3ff673aa9183baa7df200f)) +* Fix rows returned when both start_index and page_size are provided ([#2181](https://github.com/googleapis/python-bigquery/issues/2181)) ([45643a2](https://github.com/googleapis/python-bigquery/commit/45643a2e20ce5d503118522dd195aeca00dec3bc)) +* Make AccessEntry equality consistent with from_api_repr ([#2218](https://github.com/googleapis/python-bigquery/issues/2218)) ([4941de4](https://github.com/googleapis/python-bigquery/commit/4941de441cb32cabeb55ec0320f305fb62551155)) +* Update type hints for various BigQuery files ([#2206](https://github.com/googleapis/python-bigquery/issues/2206)) ([b863291](https://github.com/googleapis/python-bigquery/commit/b86329188ba35e61871db82ae1d95d2a576eed1b)) + + +### Documentation + +* Improve clarity of "Output Only" fields in Dataset class ([#2201](https://github.com/googleapis/python-bigquery/issues/2201)) ([bd5aba8](https://github.com/googleapis/python-bigquery/commit/bd5aba8ba40c2f35fb672a68eed11d6baedb304f)) + ## [3.34.0](https://github.com/googleapis/python-bigquery/compare/v3.33.0...v3.34.0) (2025-05-27) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 9e1393854..0107ae309 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.34.0" +__version__ = "3.35.0"