From 6423201b410aa35290a0083b6f52885889345d43 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Thu, 27 Mar 2025 16:12:03 -0700 Subject: [PATCH 1/3] fix: Remove unneeded local build of county FIPS dataset --- changelog_entry.yaml | 4 ++++ policyengine_us_data/geography/county_fips.py | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..2be38dae 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + removed: + - Unneeded build of county FIPS dataset to local folder \ No newline at end of file diff --git a/policyengine_us_data/geography/county_fips.py b/policyengine_us_data/geography/county_fips.py index 644bf8a2..d85be8d5 100644 --- a/policyengine_us_data/geography/county_fips.py +++ b/policyengine_us_data/geography/county_fips.py @@ -76,8 +76,6 @@ def generate_county_fips_2020_dataset(): repo_file_path="county_fips_2020.csv.gz", ) - county_fips.to_csv(LOCAL_FOLDER / "county_fips.csv.gz", compression="gzip") - if __name__ == "__main__": generate_county_fips_2020_dataset() From e30f6f8f4b6f2aeb8d1db1429cad186fdeab12f0 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Thu, 27 Mar 2025 18:24:33 -0700 Subject: [PATCH 2/3] fix: Properly select encoding for dataset --- policyengine_us_data/geography/county_fips.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/policyengine_us_data/geography/county_fips.py b/policyengine_us_data/geography/county_fips.py index d85be8d5..51ac5436 100644 --- a/policyengine_us_data/geography/county_fips.py +++ b/policyengine_us_data/geography/county_fips.py @@ -29,6 +29,7 @@ def generate_county_fips_2020_dataset(): raise ValueError( f"Failed to download county FIPS codes: {response.status_code}" ) + response.encoding = "utf-8" county_fips_raw = StringIO(response.text) @@ -42,6 +43,7 @@ def generate_county_fips_2020_dataset(): "COUNTYFP": str, "COUNTYNAME": str, }, + encoding="utf-8", ) county_fips = county_fips.rename( @@ -66,7 +68,7 @@ def generate_county_fips_2020_dataset(): csv_buffer = BytesIO() # Save CSV into buffer object and reset pointer - county_fips.to_csv(csv_buffer, index=False, compression="gzip") + county_fips.to_csv(csv_buffer, index=False, compression="gzip", encoding="utf-8") csv_buffer.seek(0) # Upload to Hugging Face From 81dabae4e5d18d3b6d75c101d982fa2fd2aa67fa Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Thu, 27 Mar 2025 18:26:54 -0700 Subject: [PATCH 3/3] chore: Remove unneeded test, lint & changelog --- changelog_entry.yaml | 2 ++ policyengine_us_data/geography/county_fips.py | 4 +++- policyengine_us_data/tests/test_datasets/test_county_fips.py | 4 ---- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index 2be38dae..83ae728d 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -1,4 +1,6 @@ - bump: patch changes: + changed: + - Explicitly specified encoding while building county FIPS dataset removed: - Unneeded build of county FIPS dataset to local folder \ No newline at end of file diff --git a/policyengine_us_data/geography/county_fips.py b/policyengine_us_data/geography/county_fips.py index 51ac5436..3e5ac518 100644 --- a/policyengine_us_data/geography/county_fips.py +++ b/policyengine_us_data/geography/county_fips.py @@ -68,7 +68,9 @@ def generate_county_fips_2020_dataset(): csv_buffer = BytesIO() # Save CSV into buffer object and reset pointer - county_fips.to_csv(csv_buffer, index=False, compression="gzip", encoding="utf-8") + county_fips.to_csv( + csv_buffer, index=False, compression="gzip", encoding="utf-8" + ) csv_buffer.seek(0) # Upload to Hugging Face diff --git a/policyengine_us_data/tests/test_datasets/test_county_fips.py b/policyengine_us_data/tests/test_datasets/test_county_fips.py index 72748a2b..ad1f10c5 100644 --- a/policyengine_us_data/tests/test_datasets/test_county_fips.py +++ b/policyengine_us_data/tests/test_datasets/test_county_fips.py @@ -93,10 +93,6 @@ def test_successful_download_and_processing( # Check that upload_to_hf was called mock_upload_to_hf.assert_called_once() - # Check that to_csv was called with the right path - local_csv_call = mock_to_csv.call_args_list[-1] - assert str(LOCAL_FOLDER / "county_fips.csv.gz") in str(local_csv_call) - def test_download_failure(): """Test handling of download failure"""