diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cc3b3eb2a..bb4d80c73 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -90,8 +90,8 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.enums import UpdateMode + +from google.cloud.bigquery.enums import AutoRowIDs, DatasetView, UpdateMode from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, @@ -865,6 +865,7 @@ def get_dataset( dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + dataset_view: Optional[DatasetView] = None, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -882,7 +883,21 @@ def get_dataset( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + dataset_view (Optional[google.cloud.bigquery.enums.DatasetView]): + Specifies the view that determines which dataset information is + returned. By default, dataset metadata (e.g. friendlyName, description, + labels, etc) and ACL information are returned. This argument can + take on the following possible enum values. + * :attr:`~google.cloud.bigquery.enums.DatasetView.ACL`: + Includes dataset metadata and the ACL. + * :attr:`~google.cloud.bigquery.enums.DatasetView.FULL`: + Includes all dataset metadata, including the ACL and table metadata. + This view is not supported by the `datasets.list` API method. + * :attr:`~google.cloud.bigquery.enums.DatasetView.METADATA`: + Includes basic dataset metadata, but not the ACL. + * :attr:`~google.cloud.bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED`: + The server will decide which view to use. Currently defaults to FULL. Returns: google.cloud.bigquery.dataset.Dataset: A ``Dataset`` instance. @@ -892,6 +907,12 @@ def get_dataset( dataset_ref, default_project=self.project ) path = dataset_ref.path + + if dataset_view: + query_params = {"datasetView": dataset_view.value} + else: + query_params = {} + span_attributes = {"path": path} api_response = self._call_api( retry, @@ -900,6 +921,7 @@ def get_dataset( method="GET", path=path, timeout=timeout, + query_params=query_params, ) return Dataset.from_api_repr(api_response) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index e9cd911d0..9a1e4880c 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -80,6 +80,24 @@ class CreateDisposition(object): returned in the job result.""" +class DatasetView(enum.Enum): + """DatasetView specifies which dataset information is returned.""" + + DATASET_VIEW_UNSPECIFIED = "DATASET_VIEW_UNSPECIFIED" + """The default value. Currently maps to the FULL view.""" + + METADATA = "METADATA" + """View metadata information for the dataset, such as friendlyName, + description, labels, etc.""" + + ACL = "ACL" + """View ACL information for the dataset, which defines dataset access + for one or more entities.""" + + FULL = "FULL" + """View both dataset metadata and ACL information.""" + + class DefaultPandasDTypes(enum.Enum): """Default Pandas DataFrem DTypes to convert BigQuery data. These Sentinel values are used instead of None to maintain backward compatibility, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ed092bcdb..42bfc84b9 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -61,7 +61,7 @@ from google.cloud.bigquery import job as bqjob import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference, Dataset -from google.cloud.bigquery.enums import UpdateMode +from google.cloud.bigquery.enums import UpdateMode, DatasetView from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -753,7 +753,7 @@ def test_get_dataset(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 + method="GET", path="/%s" % path, timeout=7.5, query_params={} ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -819,6 +819,72 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) + def test_get_dataset_with_dataset_view(self): + path = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + test_cases = [ + (None, None), + (DatasetView.DATASET_VIEW_UNSPECIFIED, "DATASET_VIEW_UNSPECIFIED"), + (DatasetView.METADATA, "METADATA"), + (DatasetView.ACL, "ACL"), + (DatasetView.FULL, "FULL"), + ] + + for dataset_view_arg, expected_param_value in test_cases: + with self.subTest( + dataset_view_arg=dataset_view_arg, + expected_param_value=expected_param_value, + ): + # Re-initialize the connection mock for each sub-test to reset side_effect + conn = client._connection = make_connection(resource) + + dataset = client.get_dataset(dataset_ref, dataset_view=dataset_view_arg) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + + if expected_param_value: + expected_query_params = {"datasetView": expected_param_value} + else: + expected_query_params = {} + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % path, + timeout=DEFAULT_TIMEOUT, + query_params=expected_query_params if expected_query_params else {}, + ) + + def test_get_dataset_with_invalid_dataset_view(self): + invalid_view_values = [ + "INVALID_STRING", + 123, + 123.45, + object(), + ] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + conn = client._connection = make_connection(resource) + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + for invalid_view_value in invalid_view_values: + with self.subTest(invalid_view_value=invalid_view_value): + conn.api_request.reset_mock() # Reset mock for each sub-test + with self.assertRaises(AttributeError): + client.get_dataset(dataset_ref, dataset_view=invalid_view_value) + def test_ensure_bqstorage_client_creating_new_instance(self): bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index bd7c6a8f8..b144471ca 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -372,7 +372,12 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) }, timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), + mock.call( + method="GET", + path=get_path, + timeout=DEFAULT_TIMEOUT, + query_params={}, + ), ] )