Skip to content

Commit 7b0baa9

Browse files
committed
Fixes inconsistent format
1 parent 85d39a2 commit 7b0baa9

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed

pandas/io/formats/csvs.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@
2020

2121
import numpy as np
2222

23+
from datetime import datetime as _pydatetime
24+
25+
from pandas.core.dtypes.dtypes import DatetimeTZDtype
26+
from pandas.core.dtypes.common import is_object_dtype
27+
2328
from pandas._libs import writers as libwriters
2429
from pandas._typing import SequenceNotStr
2530
from pandas.util._decorators import cache_readonly
@@ -312,11 +317,63 @@ def _save_body(self) -> None:
312317
break
313318
self._save_chunk(start_i, end_i)
314319

320+
321+
# tz-aware CSV formatting helper
322+
@staticmethod
323+
def _csv_format_datetime_tz_ea(ser, na_rep: str):
324+
"""
325+
Consistent tz-aware formatting for ExtensionArray datetimes:
326+
'YYYY-MM-DD HH:MM:SS.ffffff+HH:MM'
327+
"""
328+
# +HHMM → +HH:MM
329+
s = ser.dt.strftime("%Y-%m-%d %H:%M:%S.%f%z")
330+
s = s.str.replace(r"([+-]\d{2})(\d{2})$", r"\1:\2", regex=True)
331+
return s.fillna(na_rep)
332+
333+
# tz-aware CSV formatting helper
334+
@staticmethod
335+
def _csv_format_py_tz_aware_obj(ser, na_rep: str):
336+
"""
337+
For object-dtype Series containing stdlib tz-aware datetimes, render
338+
with microseconds and colonized offset. Leave other objects untouched.
339+
"""
340+
if ser.empty:
341+
return ser.astype(str)
342+
343+
vals = ser.to_numpy(object, copy=False)
344+
345+
def _is_tzaware_dt(x: object) -> bool:
346+
return (
347+
isinstance(x, _pydatetime)
348+
and getattr(x, "tzinfo", None) is not None
349+
and x.tzinfo.utcoffset(x) is not None
350+
)
351+
352+
mask = np.fromiter((_is_tzaware_dt(x) for x in vals), dtype=bool, count=len(vals))
353+
if mask.any():
354+
out = vals.copy()
355+
# isoformat gives 'YYYY-MM-DD HH:MM:SS.ffffff+HH:MM'
356+
out[mask] = [x.isoformat(sep=" ", timespec="microseconds") for x in out[mask]]
357+
ser = ser._constructor(out, index=ser.index, name=ser.name)
358+
359+
return ser.fillna(na_rep)
360+
361+
315362
def _save_chunk(self, start_i: int, end_i: int) -> None:
316363
# create the data for a chunk
317364
slicer = slice(start_i, end_i)
318365
df = self.obj.iloc[slicer]
319366

367+
# If user didn't set date_format, normalize tz-aware datetimes to a
368+
# single canonical string form for CSV (GH 62111).
369+
if self.date_format is None:
370+
for col in df.columns:
371+
col_dtype = df.dtypes[col]
372+
if isinstance(col_dtype, DatetimeTZDtype):
373+
df[col] = self._csv_format_datetime_tz_ea(df[col], self.na_rep)
374+
elif is_object_dtype(col_dtype):
375+
df[col] = self._csv_format_py_tz_aware_obj(df[col], self.na_rep)
376+
320377
res = df._get_values_for_csv(**self._number_format)
321378
data = list(res._iter_column_arrays())
322379

0 commit comments

Comments
 (0)