|
15 | 15 | """Unit tests for read_gbq_table helper functions.""" |
16 | 16 |
|
17 | 17 | import datetime |
| 18 | +import unittest.mock as mock |
18 | 19 |
|
| 20 | +import google.cloud.bigquery |
19 | 21 | import google.cloud.bigquery as bigquery |
| 22 | +import pytest |
20 | 23 |
|
21 | 24 | import bigframes.session._io.bigquery.read_gbq_table as bf_read_gbq_table |
22 | 25 |
|
@@ -45,3 +48,71 @@ def test_get_ibis_time_travel_table_doesnt_timetravel_anonymous_datasets(): |
45 | 48 |
|
46 | 49 | # Need fully-qualified table name. |
47 | 50 | assert "my-test-project" in sql |
| 51 | + |
| 52 | + |
| 53 | +@pytest.mark.parametrize( |
| 54 | + ("index_cols", "primary_keys", "values_distinct", "expected"), |
| 55 | + ( |
| 56 | + (["col1", "col2"], ["col1", "col2", "col3"], False, False), |
| 57 | + (["col1", "col2", "col3"], ["col1", "col2", "col3"], True, True), |
| 58 | + ( |
| 59 | + ["col2", "col3", "col1"], |
| 60 | + [ |
| 61 | + "col3", |
| 62 | + "col2", |
| 63 | + ], |
| 64 | + True, |
| 65 | + True, |
| 66 | + ), |
| 67 | + (["col1", "col2"], [], False, False), |
| 68 | + ([], ["col1", "col2", "col3"], False, False), |
| 69 | + ([], [], False, False), |
| 70 | + ), |
| 71 | +) |
| 72 | +def test_are_index_cols_unique(index_cols, primary_keys, values_distinct, expected): |
| 73 | + """If a primary key is set on the table, we use that as the index column |
| 74 | + by default, no error should be raised in this case. |
| 75 | +
|
| 76 | + See internal issue 335727141. |
| 77 | + """ |
| 78 | + table = google.cloud.bigquery.Table.from_api_repr( |
| 79 | + { |
| 80 | + "tableReference": { |
| 81 | + "projectId": "my-project", |
| 82 | + "datasetId": "my_dataset", |
| 83 | + "tableId": "my_table", |
| 84 | + }, |
| 85 | + "clustering": { |
| 86 | + "fields": ["col1", "col2"], |
| 87 | + }, |
| 88 | + }, |
| 89 | + ) |
| 90 | + table.schema = ( |
| 91 | + google.cloud.bigquery.SchemaField("col1", "INT64"), |
| 92 | + google.cloud.bigquery.SchemaField("col2", "INT64"), |
| 93 | + google.cloud.bigquery.SchemaField("col3", "INT64"), |
| 94 | + google.cloud.bigquery.SchemaField("col4", "INT64"), |
| 95 | + ) |
| 96 | + |
| 97 | + # TODO(b/305264153): use setter for table_constraints in client library |
| 98 | + # when available. |
| 99 | + table._properties["tableConstraints"] = { |
| 100 | + "primaryKey": { |
| 101 | + "columns": primary_keys, |
| 102 | + }, |
| 103 | + } |
| 104 | + bqclient = mock.create_autospec(google.cloud.bigquery.Client, instance=True) |
| 105 | + bqclient.project = "test-project" |
| 106 | + bqclient.get_table.return_value = table |
| 107 | + |
| 108 | + bqclient.query_and_wait.return_value = ( |
| 109 | + {"total_count": 3, "distinct_count": 3 if values_distinct else 2}, |
| 110 | + ) |
| 111 | + session = resources.create_bigquery_session( |
| 112 | + bqclient=bqclient, table_schema=table.schema |
| 113 | + ) |
| 114 | + table._properties["location"] = session._location |
| 115 | + |
| 116 | + result = bf_read_gbq_table.are_index_cols_unique(bqclient, table, index_cols, "") |
| 117 | + |
| 118 | + assert result == expected |
0 commit comments