@@ -1705,9 +1705,9 @@ Consider this example:
17051705 .. ipython :: python
17061706 :suppress:
17071707
1708- import os
1709- if os.path.exists(' test_precision.csv' ):
1710- os.remove(' test_precision.csv' )
1708+ import os
1709+ if os.path.exists(' test_precision.csv' ):
1710+ os.remove(' test_precision.csv' )
17111711
17121712 In this case, the slight precision loss occurs because the decimal ``0.3 `` cannot be
17131713exactly represented in binary floating point format.
@@ -1739,9 +1739,9 @@ roundtrip operations.
17391739 .. ipython :: python
17401740 :suppress:
17411741
1742- for f in [' default.csv' , ' formatted.csv' ]:
1743- if os.path.exists(f):
1744- os.remove(f)
1742+ for f in [' default.csv' , ' formatted.csv' ]:
1743+ if os.path.exists(f):
1744+ os.remove(f)
17451745
17461746 Format specifiers
17471747~~~~~~~~~~~~~~~~~
@@ -1765,24 +1765,24 @@ Different format specifiers have different effects on precision and output forma
17651765
17661766.. ipython :: python
17671767
1768- # Demonstrate different format effects
1769- df = pd.DataFrame({' number' : [123456789.123456789 ]})
1768+ # Demonstrate different format effects
1769+ df = pd.DataFrame({' number' : [123456789.123456789 ]})
17701770
1771- formats = {' %.6f ' : ' 6 decimal places' ,
1772- ' %.10g ' : ' 10 significant digits' ,
1773- ' %.6e ' : ' scientific notation' }
1771+ formats = {' %.6f ' : ' 6 decimal places' ,
1772+ ' %.10g ' : ' 10 significant digits' ,
1773+ ' %.6e ' : ' scientific notation' }
17741774
1775- for fmt, description in formats.items():
1776- df.to_csv(' temp.csv' , index = False , float_format = fmt)
1777- with open (' temp.csv' , ' r' ) as f:
1778- csv_content = f.read().strip().split(' \n ' )[1 ]
1779- print (f " { description:20 } : { csv_content} " )
1775+ for fmt, description in formats.items():
1776+ df.to_csv(' temp.csv' , index = False , float_format = fmt)
1777+ with open (' temp.csv' , ' r' ) as f:
1778+ csv_content = f.read().strip().split(' \n ' )[1 ]
1779+ print (f " { description:20 } : { csv_content} " )
17801780
17811781 .. ipython :: python
17821782 :suppress:
17831783
1784- if os.path.exists(' temp.csv' ):
1785- os.remove(' temp.csv' )
1784+ if os.path.exists(' temp.csv' ):
1785+ os.remove(' temp.csv' )
17861786
17871787 Best practices
17881788~~~~~~~~~~~~~~
@@ -1792,86 +1792,86 @@ Best practices
17921792
17931793.. ipython :: python
17941794
1795- # High precision example
1796- scientific_data = pd.DataFrame({
1797- ' measurement' : [1.23456789012345e-10 , 9.87654321098765e15 ]
1798- })
1799- scientific_data.to_csv(' scientific.csv' , index = False , float_format = ' %.17g ' )
1795+ # High precision example
1796+ scientific_data = pd.DataFrame({
1797+ ' measurement' : [1.23456789012345e-10 , 9.87654321098765e15 ]
1798+ })
1799+ scientific_data.to_csv(' scientific.csv' , index = False , float_format = ' %.17g ' )
18001800
18011801 .. ipython :: python
18021802 :suppress:
18031803
1804- if os.path.exists(' scientific.csv' ):
1805- os.remove(' scientific.csv' )
1804+ if os.path.exists(' scientific.csv' ):
1805+ os.remove(' scientific.csv' )
18061806
18071807 **For financial data **:
18081808 Use fixed decimal places like ``float_format='%.2f' ``:
18091809
18101810.. ipython :: python
18111811
1812- # Financial data example
1813- financial_data = pd.DataFrame({
1814- ' price' : [19.99 , 1234.56 , 0.01 ]
1815- })
1816- financial_data.to_csv(' financial.csv' , index = False , float_format = ' %.2f ' )
1812+ # Financial data example
1813+ financial_data = pd.DataFrame({
1814+ ' price' : [19.99 , 1234.56 , 0.01 ]
1815+ })
1816+ financial_data.to_csv(' financial.csv' , index = False , float_format = ' %.2f ' )
18171817
18181818 .. ipython :: python
18191819 :suppress:
18201820
1821- if os.path.exists(' financial.csv' ):
1822- os.remove(' financial.csv' )
1821+ if os.path.exists(' financial.csv' ):
1822+ os.remove(' financial.csv' )
18231823
18241824 **For ensuring exact roundtrip **:
18251825 Test your specific data to find the minimum precision needed:
18261826
18271827.. ipython :: python
18281828
1829- def test_roundtrip_precision (df , float_format ):
1830- """ Test if a float_format preserves data during CSV roundtrip."""
1831- df.to_csv(' test.csv' , index = False , float_format = float_format)
1832- df_read = pd.read_csv(' test.csv' )
1833- return df.equals(df_read)
1829+ def test_roundtrip_precision (df , float_format ):
1830+ """ Test if a float_format preserves data during CSV roundtrip."""
1831+ df.to_csv(' test.csv' , index = False , float_format = float_format)
1832+ df_read = pd.read_csv(' test.csv' )
1833+ return df.equals(df_read)
18341834
1835- # Test data
1836- test_df = pd.DataFrame({' values' : [123.456789 , 0.000123456 , 1.23e15 ]})
1835+ # Test data
1836+ test_df = pd.DataFrame({' values' : [123.456789 , 0.000123456 , 1.23e15 ]})
18371837
1838- # Test different precisions
1839- for fmt in [' %.6g ' , ' %.10g ' , ' %.15g ' ]:
1840- success = test_roundtrip_precision(test_df, fmt)
1841- print (f " Format { fmt} : { ' ✓' if success else ' ✗' } roundtrip success " )
1838+ # Test different precisions
1839+ for fmt in [' %.6g ' , ' %.10g ' , ' %.15g ' ]:
1840+ success = test_roundtrip_precision(test_df, fmt)
1841+ print (f " Format { fmt} : { ' ✓' if success else ' ✗' } roundtrip success " )
18421842
18431843 .. ipython :: python
18441844 :suppress:
18451845
1846- if os.path.exists(' test.csv' ):
1847- os.remove(' test.csv' )
1846+ if os.path.exists(' test.csv' ):
1847+ os.remove(' test.csv' )
18481848
18491849 **dtype Preservation Note **:
18501850 Be aware that CSV format does not preserve NumPy dtypes. All numeric data
18511851 will be read back as ``float64 `` or ``int64 `` regardless of the original dtype:
18521852
18531853.. ipython :: python
18541854
1855- # dtype preservation example
1856- original_df = pd.DataFrame({
1857- ' float32_col' : np.array([1.23 ], dtype = np.float32),
1858- ' float64_col' : np.array([1.23 ], dtype = np.float64)
1859- })
1855+ # dtype preservation example
1856+ original_df = pd.DataFrame({
1857+ ' float32_col' : np.array([1.23 ], dtype = np.float32),
1858+ ' float64_col' : np.array([1.23 ], dtype = np.float64)
1859+ })
18601860
1861- print (" Original dtypes:" )
1862- print (original_df.dtypes)
1861+ print (" Original dtypes:" )
1862+ print (original_df.dtypes)
18631863
1864- original_df.to_csv(' dtypes.csv' , index = False )
1865- read_df = pd.read_csv(' dtypes.csv' )
1864+ original_df.to_csv(' dtypes.csv' , index = False )
1865+ read_df = pd.read_csv(' dtypes.csv' )
18661866
1867- print (" \n After CSV roundtrip:" )
1868- print (read_df.dtypes)
1867+ print (" \n After CSV roundtrip:" )
1868+ print (read_df.dtypes)
18691869
18701870 .. ipython :: python
18711871 :suppress:
18721872
1873- if os.path.exists(' dtypes.csv' ):
1874- os.remove(' dtypes.csv' )
1873+ if os.path.exists(' dtypes.csv' ):
1874+ os.remove(' dtypes.csv' )
18751875
18761876 Writing a formatted string
18771877++++++++++++++++++++++++++
0 commit comments