@@ -2802,6 +2802,7 @@ def dropna(
28022802 * ,
28032803 axis : int | str = 0 ,
28042804 how : str = "any" ,
2805+ thresh : typing .Optional [int ] = None ,
28052806 subset : typing .Union [None , blocks .Label , Sequence [blocks .Label ]] = None ,
28062807 inplace : bool = False ,
28072808 ignore_index = False ,
@@ -2810,8 +2811,18 @@ def dropna(
28102811 raise NotImplementedError (
28112812 f"'inplace'=True not supported. { constants .FEEDBACK_LINK } "
28122813 )
2813- if how not in ("any" , "all" ):
2814- raise ValueError ("'how' must be one of 'any', 'all'" )
2814+
2815+ # Check if both thresh and how are explicitly provided
2816+ if thresh is not None :
2817+ # cannot specify both thresh and how parameters
2818+ if how != "any" :
2819+ raise TypeError (
2820+ "You cannot set both the how and thresh arguments at the same time."
2821+ )
2822+ else :
2823+ # Only validate 'how' when thresh is not provided
2824+ if how not in ("any" , "all" ):
2825+ raise ValueError ("'how' must be one of 'any', 'all'" )
28152826
28162827 axis_n = utils .get_axis_number (axis )
28172828
@@ -2833,21 +2844,38 @@ def dropna(
28332844 for id_ in self ._block .label_to_col_id [label ]
28342845 ]
28352846
2836- result = block_ops .dropna (self ._block , self ._block .value_columns , how = how , subset = subset_ids ) # type: ignore
2847+ result = block_ops .dropna (
2848+ self ._block ,
2849+ self ._block .value_columns ,
2850+ how = how ,
2851+ thresh = thresh ,
2852+ subset = subset_ids ,
2853+ ) # type: ignore
28372854 if ignore_index :
28382855 result = result .reset_index ()
28392856 return DataFrame (result )
28402857 else :
2841- isnull_block = self ._block .multi_apply_unary_op (ops .isnull_op )
2842- if how == "any" :
2843- null_locations = DataFrame (isnull_block ).any ().to_pandas ()
2844- else : # 'all'
2845- null_locations = DataFrame (isnull_block ).all ().to_pandas ()
2846- keep_columns = [
2847- col
2848- for col , to_drop in zip (self ._block .value_columns , null_locations )
2849- if not to_drop
2850- ]
2858+ if thresh is not None :
2859+ # Keep columns with at least 'thresh' non-null values
2860+ notnull_block = self ._block .multi_apply_unary_op (ops .notnull_op )
2861+ notnull_counts = DataFrame (notnull_block ).sum ().to_pandas ()
2862+
2863+ keep_columns = [
2864+ col
2865+ for col , count in zip (self ._block .value_columns , notnull_counts )
2866+ if count >= thresh
2867+ ]
2868+ else :
2869+ isnull_block = self ._block .multi_apply_unary_op (ops .isnull_op )
2870+ if how == "any" :
2871+ null_locations = DataFrame (isnull_block ).any ().to_pandas ()
2872+ else : # 'all'
2873+ null_locations = DataFrame (isnull_block ).all ().to_pandas ()
2874+ keep_columns = [
2875+ col
2876+ for col , to_drop in zip (self ._block .value_columns , null_locations )
2877+ if not to_drop
2878+ ]
28512879 return DataFrame (self ._block .select_columns (keep_columns ))
28522880
28532881 def any (
0 commit comments