@@ -294,7 +294,7 @@ def read_gbq(
294294 self ,
295295 query_or_table : str ,
296296 * ,
297- index_col : Iterable [str ] | str = (),
297+ index_col : Iterable [str ] | str | bigframes . enums . DefaultIndexKind = (),
298298 columns : Iterable [str ] = (),
299299 configuration : Optional [Dict ] = None ,
300300 max_results : Optional [int ] = None ,
@@ -313,6 +313,9 @@ def read_gbq(
313313
314314 filters = list (filters )
315315 if len (filters ) != 0 or _is_table_with_wildcard_suffix (query_or_table ):
316+ # TODO(b/338111344): This appears to be missing index_cols, which
317+ # are necessary to be selected.
318+ # TODO(b/338039517): Also, need to account for primary keys.
316319 query_or_table = self ._to_query (query_or_table , columns , filters )
317320
318321 if _is_query (query_or_table ):
@@ -326,9 +329,6 @@ def read_gbq(
326329 use_cache = use_cache ,
327330 )
328331 else :
329- # TODO(swast): Query the snapshot table but mark it as a
330- # deterministic query so we can avoid serializing if we have a
331- # unique index.
332332 if configuration is not None :
333333 raise ValueError (
334334 "The 'configuration' argument is not allowed when "
@@ -359,6 +359,8 @@ def _to_query(
359359 else f"`{ query_or_table } `"
360360 )
361361
362+ # TODO(b/338111344): Generate an index based on DefaultIndexKind if we
363+ # don't have index columns specified.
362364 select_clause = "SELECT " + (
363365 ", " .join (f"`{ column } `" for column in columns ) if columns else "*"
364366 )
@@ -488,7 +490,7 @@ def read_gbq_query(
488490 self ,
489491 query : str ,
490492 * ,
491- index_col : Iterable [str ] | str = (),
493+ index_col : Iterable [str ] | str | bigframes . enums . DefaultIndexKind = (),
492494 columns : Iterable [str ] = (),
493495 configuration : Optional [Dict ] = None ,
494496 max_results : Optional [int ] = None ,
@@ -566,7 +568,7 @@ def _read_gbq_query(
566568 self ,
567569 query : str ,
568570 * ,
569- index_col : Iterable [str ] | str = (),
571+ index_col : Iterable [str ] | str | bigframes . enums . DefaultIndexKind = (),
570572 columns : Iterable [str ] = (),
571573 configuration : Optional [Dict ] = None ,
572574 max_results : Optional [int ] = None ,
@@ -598,7 +600,9 @@ def _read_gbq_query(
598600 True if use_cache is None else use_cache
599601 )
600602
601- if isinstance (index_col , str ):
603+ if isinstance (index_col , bigframes .enums .DefaultIndexKind ):
604+ index_cols = []
605+ elif isinstance (index_col , str ):
602606 index_cols = [index_col ]
603607 else :
604608 index_cols = list (index_col )
@@ -628,7 +632,7 @@ def _read_gbq_query(
628632
629633 return self .read_gbq_table (
630634 f"{ destination .project } .{ destination .dataset_id } .{ destination .table_id } " ,
631- index_col = index_cols ,
635+ index_col = index_col ,
632636 columns = columns ,
633637 max_results = max_results ,
634638 use_cache = configuration ["query" ]["useQueryCache" ],
@@ -638,7 +642,7 @@ def read_gbq_table(
638642 self ,
639643 query : str ,
640644 * ,
641- index_col : Iterable [str ] | str = (),
645+ index_col : Iterable [str ] | str | bigframes . enums . DefaultIndexKind = (),
642646 columns : Iterable [str ] = (),
643647 max_results : Optional [int ] = None ,
644648 filters : third_party_pandas_gbq .FiltersType = (),
@@ -693,7 +697,7 @@ def _read_gbq_table(
693697 self ,
694698 query : str ,
695699 * ,
696- index_col : Iterable [str ] | str = (),
700+ index_col : Iterable [str ] | str | bigframes . enums . DefaultIndexKind = (),
697701 columns : Iterable [str ] = (),
698702 max_results : Optional [int ] = None ,
699703 api_name : str ,
@@ -821,10 +825,12 @@ def _read_bigquery_load_job(
821825 table : Union [bigquery .Table , bigquery .TableReference ],
822826 * ,
823827 job_config : bigquery .LoadJobConfig ,
824- index_col : Iterable [str ] | str = (),
828+ index_col : Iterable [str ] | str | bigframes . enums . DefaultIndexKind = (),
825829 columns : Iterable [str ] = (),
826830 ) -> dataframe .DataFrame :
827- if isinstance (index_col , str ):
831+ if isinstance (index_col , bigframes .enums .DefaultIndexKind ):
832+ index_cols = []
833+ elif isinstance (index_col , str ):
828834 index_cols = [index_col ]
829835 else :
830836 index_cols = list (index_col )
@@ -1113,7 +1119,13 @@ def read_csv(
11131119 Union [MutableSequence [Any ], np .ndarray [Any , Any ], Tuple [Any , ...], range ]
11141120 ] = None ,
11151121 index_col : Optional [
1116- Union [int , str , Sequence [Union [str , int ]], Literal [False ]]
1122+ Union [
1123+ int ,
1124+ str ,
1125+ Sequence [Union [str , int ]],
1126+ bigframes .enums .DefaultIndexKind ,
1127+ Literal [False ],
1128+ ]
11171129 ] = None ,
11181130 usecols : Optional [
11191131 Union [
@@ -1143,18 +1155,37 @@ def read_csv(
11431155 f"{ constants .FEEDBACK_LINK } "
11441156 )
11451157
1146- if index_col is not None and (
1147- not index_col or not isinstance (index_col , str )
1158+ # TODO(b/338089659): Looks like we can relax this 1 column
1159+ # restriction if we check the contents of an iterable are strings
1160+ # not integers.
1161+ if (
1162+ # Empty tuples, None, and False are allowed and falsey.
1163+ index_col
1164+ and not isinstance (index_col , bigframes .enums .DefaultIndexKind )
1165+ and not isinstance (index_col , str )
11481166 ):
11491167 raise NotImplementedError (
1150- "BigQuery engine only supports a single column name for `index_col`. "
1151- f"{ constants .FEEDBACK_LINK } "
1168+ "BigQuery engine only supports a single column name for `index_col`, "
1169+ f"got: { repr ( index_col ) } . { constants .FEEDBACK_LINK } "
11521170 )
11531171
1154- # None value for index_col cannot be passed to read_gbq
1155- if index_col is None :
1172+ # None and False cannot be passed to read_gbq.
1173+ # TODO(b/338400133): When index_col is None, we should be using the
1174+ # first column of the CSV as the index to be compatible with the
1175+ # pandas engine. According to the pandas docs, only "False"
1176+ # indicates a default sequential index.
1177+ if not index_col :
11561178 index_col = ()
11571179
1180+ index_col = typing .cast (
1181+ Union [
1182+ Sequence [str ], # Falsey values
1183+ bigframes .enums .DefaultIndexKind ,
1184+ str ,
1185+ ],
1186+ index_col ,
1187+ )
1188+
11581189 # usecols should only be an iterable of strings (column names) for use as columns in read_gbq.
11591190 columns : Tuple [Any , ...] = tuple ()
11601191 if usecols is not None :
@@ -1199,6 +1230,11 @@ def read_csv(
11991230 columns = columns ,
12001231 )
12011232 else :
1233+ if isinstance (index_col , bigframes .enums .DefaultIndexKind ):
1234+ raise NotImplementedError (
1235+ f"With index_col={ repr (index_col )} , only engine='bigquery' is supported. "
1236+ f"{ constants .FEEDBACK_LINK } "
1237+ )
12021238 if any (arg in kwargs for arg in ("chunksize" , "iterator" )):
12031239 raise NotImplementedError (
12041240 "'chunksize' and 'iterator' arguments are not supported. "
0 commit comments