@@ -75,6 +75,25 @@ def is_null(x):
7575
7676
7777def null_for_np_type (dtype ):
78+ """
79+ Return the appropriate null value for a given numpy dtype.
80+
81+ Parameters
82+ ----------
83+ dtype : np.dtype
84+ NumPy data type to get null value for
85+
86+ Returns
87+ -------
88+ scalar
89+ NaT for datetime/timedelta types, np.nan for other types
90+
91+ Notes
92+ -----
93+ For datetime64 and timedelta64 dtypes (kind 'm' or 'M'), returns
94+ the appropriate NaT (Not a Time) value. For all other dtypes,
95+ returns np.nan.
96+ """
7897 if dtype .kind in "mM" :
7998 return np .array (["NaT" ], dtype = dtype )[0 ]
8099 else :
@@ -132,6 +151,25 @@ def _get_initial_value(
132151
133152
134153def _njit_scalar_reduce (func ):
154+ """
155+ Decorator to create numba-compiled scalar reduction functions.
156+
157+ Parameters
158+ ----------
159+ func : callable
160+ Scalar reduction function taking two arguments
161+
162+ Returns
163+ -------
164+ staticmethod
165+ Numba-compiled version of the function with standard signatures
166+
167+ Notes
168+ -----
169+ This decorator compiles the function with predefined signatures for
170+ common numeric types (float64, uint64, int64) and enables nogil mode
171+ for better performance in multithreaded environments.
172+ """
135173 return staticmethod (nb .njit (_SCALAR_SIGNATURES , nogil = True )(func ))
136174
137175
@@ -280,6 +318,26 @@ def _nb_reduce_arr_list_in_parallel(
280318def reduction_return_type_and_empty_result_for_op_and_type (
281319 dtype , op : Literal ["count" , "min" , "max" , "sum" , "sum_square" , "mean" ]
282320):
321+ """
322+ Determine the return dtype and empty result value for a reduction operation.
323+
324+ Parameters
325+ ----------
326+ dtype : np.dtype
327+ Input array dtype
328+ op : {"count", "min", "max", "sum", "sum_square", "mean"}
329+ Reduction operation to perform
330+
331+ Returns
332+ -------
333+ tuple
334+ (return_dtype, empty_result_value) for the given operation and input dtype
335+
336+ Notes
337+ -----
338+ This function defines the type promotion rules and empty result values
339+ for various reduction operations on different input dtypes.
340+ """
283341 if op == "count" :
284342 return np .int64 , 0
285343 elif op in ("min" , "max" ):
@@ -311,6 +369,28 @@ def reduction_return_type_and_empty_result_for_op_and_type(
311369
312370
313371def _nullify_below_mincount (result , count , min_count ):
372+ """
373+ Set result elements to null where count is below minimum threshold.
374+
375+ Parameters
376+ ----------
377+ result : np.ndarray
378+ Result array to modify
379+ count : np.ndarray
380+ Count of valid values for each result element
381+ min_count : int
382+ Minimum number of non-null values required
383+
384+ Returns
385+ -------
386+ np.ndarray
387+ Modified result array with nullified values
388+
389+ Notes
390+ -----
391+ For unsigned integer dtypes, uses MIN_INT as null value.
392+ For all other dtypes, uses np.nan as null value.
393+ """
314394 if result .dtype .kind in "ui" :
315395 null = MIN_INT
316396 else :
@@ -340,6 +420,62 @@ def _chunk_arr_into_arr_list(
340420 axis : Optional [int ],
341421 mask : Optional [np .ndarray ] = None ,
342422) -> NumbaList :
423+ """
424+ Split arrays into chunks for parallel processing in reduction operations.
425+
426+ Parameters
427+ ----------
428+ values : np.ndarray
429+ Input array to be chunked. Must be 1D or 2D.
430+ multi_threading : bool
431+ If True, split array into multiple chunks for parallel processing.
432+ If False, return single chunk (no parallelization).
433+ axis : int or None
434+ Reduction axis. For 2D arrays:
435+ - axis=0: transpose array so reduction operates along columns
436+ - axis=1: keep array as-is, reduction operates along rows
437+ - axis=None: flatten to 1D for scalar reduction
438+ mask : np.ndarray, optional
439+ Boolean mask indicating null values. If provided, will be split
440+ consistently with values array.
441+
442+ Returns
443+ -------
444+ tuple
445+ - arr_list : NumbaList
446+ List of array chunks ready for parallel processing
447+ - mask_list : NumbaList
448+ List of corresponding mask chunks (empty if mask=None)
449+ - final_length : int
450+ Length of the final reduction dimension. 0 for 1D arrays,
451+ number of columns/rows for 2D arrays.
452+
453+ Notes
454+ -----
455+ Thread count is determined automatically based on array size when
456+ multi_threading=True, with a maximum of 6 threads and minimum of 1.
457+ Arrays smaller than 1 million elements use single threading.
458+
459+ For 1D arrays, the array is split into n_threads chunks along axis 0.
460+ For 2D arrays, the array is either transposed (axis=0) or used as-is
461+ (axis=1) to prepare for row-wise or column-wise reductions.
462+
463+ Raises
464+ ------
465+ ValueError
466+ If input array has more than 2 dimensions.
467+
468+ Examples
469+ --------
470+ >>> arr = np.array([[1, 2, 3], [4, 5, 6]])
471+ >>> arr_list, mask_list, final_length = _chunk_arr_into_arr_list(
472+ ... arr, multi_threading=False, axis=0
473+ ... )
474+ >>> final_length
475+ 3
476+ >>> len(arr_list)
477+ 3 # transposed, so 3 columns become 3 arrays
478+ """
343479 ndim = values .ndim
344480 if multi_threading :
345481 # TODO: be smarter about this choice. numba is handling the distribution of the compute
@@ -560,6 +696,27 @@ def nb_reduce(
560696
561697
562698def _cast_to_timelike (arr , to_dtype ):
699+ """
700+ Convert a float array to timelike (datetime/timedelta) dtype.
701+
702+ Parameters
703+ ----------
704+ arr : np.ndarray
705+ Float array to convert
706+ to_dtype : np.dtype
707+ Target datetime or timedelta dtype
708+
709+ Returns
710+ -------
711+ np.ndarray
712+ Array converted to timelike dtype with NaN values replaced by MIN_INT
713+
714+ Notes
715+ -----
716+ This function is used to convert float arrays back to timelike dtypes
717+ after reduction operations. NaN values are replaced with MIN_INT before
718+ conversion to preserve null representation in integer-based time types.
719+ """
563720 isnan = np .isnan (arr )
564721 if isnan .any ():
565722 arr [isnan ] = MIN_INT
0 commit comments