@@ -2302,52 +2302,19 @@ def melt(
23022302 self ._block .melt (id_col_ids , val_col_ids , var_name , value_name )
23032303 )
23042304
2305- _NUMERIC_DESCRIBE_AGGS = (
2306- "count" ,
2307- "mean" ,
2308- "std" ,
2309- "min" ,
2310- "25%" ,
2311- "50%" ,
2312- "75%" ,
2313- "max" ,
2314- )
2315- _NON_NUMERIC_DESCRIBE_AGGS = ("count" , "nunique" )
2316-
23172305 def describe (self , include : None | Literal ["all" ] = None ) -> DataFrame :
2318-
2319- allowed_non_numeric_types = {
2320- bigframes .dtypes .STRING_DTYPE ,
2321- bigframes .dtypes .BOOL_DTYPE ,
2322- bigframes .dtypes .BYTES_DTYPE ,
2323- }
2324-
23252306 if include is None :
23262307 numeric_df = self ._drop_non_numeric (permissive = False )
23272308 if len (numeric_df .columns ) == 0 :
23282309 # Describe eligible non-numeric columns
2329- result = self .select_dtypes (include = allowed_non_numeric_types ).agg (
2330- self ._NON_NUMERIC_DESCRIBE_AGGS
2331- )
2332- else :
2333- # Otherwise, only describe numeric columns
2334- result = numeric_df .agg (self ._NUMERIC_DESCRIBE_AGGS )
2335- return typing .cast (DataFrame , result )
2310+ return self ._describe_non_numeric ()
23362311
2337- elif include == "all" :
2338- numeric_result = typing .cast (
2339- DataFrame ,
2340- self ._drop_non_numeric (permissive = False ).agg (
2341- self ._NUMERIC_DESCRIBE_AGGS
2342- ),
2343- )
2312+ # Otherwise, only describe numeric columns
2313+ return self ._describe_numeric ()
23442314
2345- non_numeric_result = typing .cast (
2346- DataFrame ,
2347- self .select_dtypes (include = allowed_non_numeric_types ).agg (
2348- self ._NON_NUMERIC_DESCRIBE_AGGS
2349- ),
2350- )
2315+ elif include == "all" :
2316+ numeric_result = self ._describe_numeric ()
2317+ non_numeric_result = self ._describe_non_numeric ()
23512318
23522319 if len (numeric_result .columns ) == 0 :
23532320 return non_numeric_result
@@ -2364,6 +2331,35 @@ def describe(self, include: None | Literal["all"] = None) -> DataFrame:
23642331 else :
23652332 raise ValueError (f"Unsupported include type: { include } " )
23662333
2334+ def _describe_numeric (self ) -> DataFrame :
2335+ return typing .cast (
2336+ DataFrame ,
2337+ self ._drop_non_numeric (permissive = False ).agg (
2338+ [
2339+ "count" ,
2340+ "mean" ,
2341+ "std" ,
2342+ "min" ,
2343+ "25%" ,
2344+ "50%" ,
2345+ "75%" ,
2346+ "max" ,
2347+ ]
2348+ ),
2349+ )
2350+
2351+ def _describe_non_numeric (self ) -> DataFrame :
2352+ return typing .cast (
2353+ DataFrame ,
2354+ self .select_dtypes (
2355+ include = {
2356+ bigframes .dtypes .STRING_DTYPE ,
2357+ bigframes .dtypes .BOOL_DTYPE ,
2358+ bigframes .dtypes .BYTES_DTYPE ,
2359+ }
2360+ ).agg (["count" , "nunique" ]),
2361+ )
2362+
23672363 def skew (self , * , numeric_only : bool = False ):
23682364 if not numeric_only :
23692365 frame = self ._raise_on_non_numeric ("skew" )
0 commit comments