|
20 | 20 |
|
21 | 21 | import numpy as np |
22 | 22 |
|
| 23 | +from datetime import datetime as _pydatetime |
| 24 | + |
| 25 | +from pandas.core.dtypes.dtypes import DatetimeTZDtype |
| 26 | +from pandas.core.dtypes.common import is_object_dtype |
| 27 | + |
23 | 28 | from pandas._libs import writers as libwriters |
24 | 29 | from pandas._typing import SequenceNotStr |
25 | 30 | from pandas.util._decorators import cache_readonly |
@@ -312,11 +317,63 @@ def _save_body(self) -> None: |
312 | 317 | break |
313 | 318 | self._save_chunk(start_i, end_i) |
314 | 319 |
|
| 320 | + |
| 321 | + # tz-aware CSV formatting helper |
| 322 | + @staticmethod |
| 323 | + def _csv_format_datetime_tz_ea(ser, na_rep: str): |
| 324 | + """ |
| 325 | + Consistent tz-aware formatting for ExtensionArray datetimes: |
| 326 | + 'YYYY-MM-DD HH:MM:SS.ffffff+HH:MM' |
| 327 | + """ |
| 328 | + # +HHMM → +HH:MM |
| 329 | + s = ser.dt.strftime("%Y-%m-%d %H:%M:%S.%f%z") |
| 330 | + s = s.str.replace(r"([+-]\d{2})(\d{2})$", r"\1:\2", regex=True) |
| 331 | + return s.fillna(na_rep) |
| 332 | + |
| 333 | + # tz-aware CSV formatting helper |
| 334 | + @staticmethod |
| 335 | + def _csv_format_py_tz_aware_obj(ser, na_rep: str): |
| 336 | + """ |
| 337 | + For object-dtype Series containing stdlib tz-aware datetimes, render |
| 338 | + with microseconds and colonized offset. Leave other objects untouched. |
| 339 | + """ |
| 340 | + if ser.empty: |
| 341 | + return ser.astype(str) |
| 342 | + |
| 343 | + vals = ser.to_numpy(object, copy=False) |
| 344 | + |
| 345 | + def _is_tzaware_dt(x: object) -> bool: |
| 346 | + return ( |
| 347 | + isinstance(x, _pydatetime) |
| 348 | + and getattr(x, "tzinfo", None) is not None |
| 349 | + and x.tzinfo.utcoffset(x) is not None |
| 350 | + ) |
| 351 | + |
| 352 | + mask = np.fromiter((_is_tzaware_dt(x) for x in vals), dtype=bool, count=len(vals)) |
| 353 | + if mask.any(): |
| 354 | + out = vals.copy() |
| 355 | + # isoformat gives 'YYYY-MM-DD HH:MM:SS.ffffff+HH:MM' |
| 356 | + out[mask] = [x.isoformat(sep=" ", timespec="microseconds") for x in out[mask]] |
| 357 | + ser = ser._constructor(out, index=ser.index, name=ser.name) |
| 358 | + |
| 359 | + return ser.fillna(na_rep) |
| 360 | + |
| 361 | + |
315 | 362 | def _save_chunk(self, start_i: int, end_i: int) -> None: |
316 | 363 | # create the data for a chunk |
317 | 364 | slicer = slice(start_i, end_i) |
318 | 365 | df = self.obj.iloc[slicer] |
319 | 366 |
|
| 367 | + # If user didn't set date_format, normalize tz-aware datetimes to a |
| 368 | + # single canonical string form for CSV (GH 62111). |
| 369 | + if self.date_format is None: |
| 370 | + for col in df.columns: |
| 371 | + col_dtype = df.dtypes[col] |
| 372 | + if isinstance(col_dtype, DatetimeTZDtype): |
| 373 | + df[col] = self._csv_format_datetime_tz_ea(df[col], self.na_rep) |
| 374 | + elif is_object_dtype(col_dtype): |
| 375 | + df[col] = self._csv_format_py_tz_aware_obj(df[col], self.na_rep) |
| 376 | + |
320 | 377 | res = df._get_values_for_csv(**self._number_format) |
321 | 378 | data = list(res._iter_column_arrays()) |
322 | 379 |
|
|
0 commit comments