diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml index f9c5b49a..dce1daf4 100644 --- a/.github/workflows/reusable_test.yaml +++ b/.github/workflows/reusable_test.yaml @@ -58,6 +58,10 @@ jobs: if: inputs.full_suite run: make download + - name: Create and load calibration targets database + if: inputs.full_suite + run: make database + - name: Build datasets if: inputs.full_suite run: make data @@ -90,4 +94,4 @@ jobs: with: branch: gh-pages folder: docs/_build/html - clean: true \ No newline at end of file + clean: true diff --git a/Makefile b/Makefile index 4124babc..b03e23d5 100644 --- a/Makefile +++ b/Makefile @@ -22,12 +22,6 @@ changelog: download: python policyengine_us_data/storage/download_private_prerequisites.py -targets: - python policyengine_us_data/storage/calibration_targets/pull_hardcoded_targets.py - python policyengine_us_data/storage/calibration_targets/pull_age_targets.py - python policyengine_us_data/storage/calibration_targets/pull_soi_targets.py - python policyengine_us_data/storage/calibration_targets/pull_snap_targets.py - upload: python policyengine_us_data/storage/upload_completed_datasets.py @@ -61,10 +55,12 @@ documentation-dev: database: python policyengine_us_data/db/create_database_tables.py - python policyengine_us_data/db/load_age_targets.py - -clean-database: - rm *.db + python policyengine_us_data/db/create_initial_strata.py + python policyengine_us_data/db/etl_age.py + python policyengine_us_data/db/etl_medicaid.py + python policyengine_us_data/db/etl_snap.py + python policyengine_us_data/db/etl_irs_soi.py + python policyengine_us_data/db/validate_database.py data: python policyengine_us_data/utils/uprating.py @@ -80,6 +76,7 @@ data: clean: rm -f policyengine_us_data/storage/*.h5 + rm -f policyengine_us_data/storage/*.db git clean -fX -- '*.csv' rm -rf policyengine_us_data/docs/_build diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..b3a3fb5d 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - add SQLite database for calibration targets diff --git a/policyengine_us_data/db/create_database_tables.py b/policyengine_us_data/db/create_database_tables.py index ec42fb61..df03772d 100644 --- a/policyengine_us_data/db/create_database_tables.py +++ b/policyengine_us_data/db/create_database_tables.py @@ -1,16 +1,20 @@ import logging import hashlib from typing import List, Optional +from enum import Enum from sqlalchemy import event, UniqueConstraint from sqlalchemy.orm.attributes import get_history - from sqlmodel import ( Field, Relationship, SQLModel, create_engine, ) +from policyengine_us.system import system + +from policyengine_us_data.storage import STORAGE_FOLDER + logging.basicConfig( level=logging.INFO, @@ -20,6 +24,12 @@ logger = logging.getLogger(__name__) +# An Enum type to ensure the variable exists in policyengine-us +USVariable = Enum( + "USVariable", {name: name for name in system.variables.keys()}, type=str +) + + class Stratum(SQLModel, table=True): """Represents a unique population subgroup (stratum).""" @@ -79,7 +89,7 @@ class StratumConstraint(SQLModel, table=True): __tablename__ = "stratum_constraints" stratum_id: int = Field(foreign_key="strata.stratum_id", primary_key=True) - constraint_variable: str = Field( + constraint_variable: USVariable = Field( primary_key=True, description="The variable the constraint applies to (e.g., 'age').", ) @@ -112,7 +122,7 @@ class Target(SQLModel, table=True): ) target_id: Optional[int] = Field(default=None, primary_key=True) - variable: str = Field( + variable: USVariable = Field( description="A variable defined in policyengine-us (e.g., 'income_tax')." ) period: int = Field( @@ -171,12 +181,11 @@ def calculate_definition_hash(mapper, connection, target: Stratum): fingerprint_text = "\n".join(constraint_strings) h = hashlib.sha256(fingerprint_text.encode("utf-8")) target.definition_hash = h.hexdigest() - logger.info( - f"Set definition_hash for Stratum to '{target.definition_hash}'" - ) -def create_database(db_uri="sqlite:///policy_data.db"): +def create_database( + db_uri: str = f"sqlite:///{STORAGE_FOLDER / 'policy_data.db'}", +): """ Creates a SQLite database and all the defined tables. diff --git a/policyengine_us_data/db/create_initial_strata.py b/policyengine_us_data/db/create_initial_strata.py new file mode 100644 index 00000000..5653948b --- /dev/null +++ b/policyengine_us_data/db/create_initial_strata.py @@ -0,0 +1,75 @@ +from typing import Dict + +import pandas as pd +from sqlmodel import Session, create_engine + +from policyengine_us_data.storage import STORAGE_FOLDER + + +from policyengine_us.variables.household.demographic.geographic.ucgid.ucgid_enum import ( + UCGID, +) +from policyengine_us_data.db.create_database_tables import ( + Stratum, + StratumConstraint, +) + + +def main(): + # Get the implied hierarchy by the UCGID enum -------- + rows = [] + for node in UCGID: + codes = node.get_hierarchical_codes() + rows.append( + { + "name": node.name, + "code": codes[0], + "parent": codes[1] if len(codes) > 1 else None, + } + ) + + hierarchy_df = ( + pd.DataFrame(rows) + .sort_values(["parent", "code"], na_position="first") + .reset_index(drop=True) + ) + + DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'policy_data.db'}" + engine = create_engine(DATABASE_URL) + + # map the ucgid_str 'code' to auto-generated 'stratum_id' + code_to_stratum_id: Dict[str, int] = {} + + with Session(engine) as session: + for _, row in hierarchy_df.iterrows(): + parent_code = row["parent"] + + parent_id = ( + code_to_stratum_id.get(parent_code) if parent_code else None + ) + + new_stratum = Stratum( + parent_stratum_id=parent_id, + notes=f'{row["name"]} (ucgid {row["code"]})', + stratum_group_id=1, + ) + + new_stratum.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value=row["code"], + ) + ] + + session.add(new_stratum) + + session.flush() + + code_to_stratum_id[row["code"]] = new_stratum.stratum_id + + session.commit() + + +if __name__ == "__main__": + main() diff --git a/policyengine_us_data/db/etl_age.py b/policyengine_us_data/db/etl_age.py new file mode 100644 index 00000000..bb83067c --- /dev/null +++ b/policyengine_us_data/db/etl_age.py @@ -0,0 +1,206 @@ +import pandas as pd +import numpy as np +from sqlmodel import Session, create_engine + +from policyengine_us_data.storage import STORAGE_FOLDER + +from policyengine_us_data.db.create_database_tables import ( + Stratum, + StratumConstraint, + Target, +) +from policyengine_us_data.utils.census import get_census_docs, pull_acs_table + + +LABEL_TO_SHORT = { + "Estimate!!Total!!Total population!!AGE!!Under 5 years": "0-4", + "Estimate!!Total!!Total population!!AGE!!5 to 9 years": "5-9", + "Estimate!!Total!!Total population!!AGE!!10 to 14 years": "10-14", + "Estimate!!Total!!Total population!!AGE!!15 to 19 years": "15-19", + "Estimate!!Total!!Total population!!AGE!!20 to 24 years": "20-24", + "Estimate!!Total!!Total population!!AGE!!25 to 29 years": "25-29", + "Estimate!!Total!!Total population!!AGE!!30 to 34 years": "30-34", + "Estimate!!Total!!Total population!!AGE!!35 to 39 years": "35-39", + "Estimate!!Total!!Total population!!AGE!!40 to 44 years": "40-44", + "Estimate!!Total!!Total population!!AGE!!45 to 49 years": "45-49", + "Estimate!!Total!!Total population!!AGE!!50 to 54 years": "50-54", + "Estimate!!Total!!Total population!!AGE!!55 to 59 years": "55-59", + "Estimate!!Total!!Total population!!AGE!!60 to 64 years": "60-64", + "Estimate!!Total!!Total population!!AGE!!65 to 69 years": "65-69", + "Estimate!!Total!!Total population!!AGE!!70 to 74 years": "70-74", + "Estimate!!Total!!Total population!!AGE!!75 to 79 years": "75-79", + "Estimate!!Total!!Total population!!AGE!!80 to 84 years": "80-84", + "Estimate!!Total!!Total population!!AGE!!85 years and over": "85-999", +} +AGE_COLS = list(LABEL_TO_SHORT.values()) + + +def transform_age_data(age_data, docs): + df = age_data.copy() + + label_to_variable_mapping = dict( + [ + (value["label"], key) + for key, value in docs["variables"].items() + if value["group"] == "S0101" + and value["concept"] == "Age and Sex" + and value["label"] in LABEL_TO_SHORT.keys() + ] + ) + + # By transitivity, map the data set variable names to short names + rename_mapping = dict( + [ + (label_to_variable_mapping[v], LABEL_TO_SHORT[v]) + for v in LABEL_TO_SHORT.keys() + ] + ) + + df = df.drop(columns="NAME") + df = df.rename({"GEO_ID": "ucgid_str"}, axis=1) + df_data = df.rename(columns=rename_mapping)[["ucgid_str"] + list(AGE_COLS)] + + # Filter out Puerto Rico's district and state records, if needed + df_geos = df_data[ + ~df_data["ucgid_str"].isin(["5001800US7298", "0400000US72"]) + ].copy() + + df = df_geos[["ucgid_str"] + AGE_COLS] + + df_long = df.melt( + id_vars="ucgid_str", + value_vars=AGE_COLS, + var_name="age_range", + value_name="value", + ) + age_bounds = df_long["age_range"].str.split("-", expand=True).astype(int) + age_bounds.columns = ["ge", "le"] + age_bounds[["gt"]] = age_bounds[["ge"]] - 1 + age_bounds[["lt"]] = age_bounds[["le"]] + 1 + + df_long["age_greater_than"] = age_bounds[["gt"]] + df_long["age_less_than"] = age_bounds[["lt"]] + df_long["variable"] = "person_count" + df_long["reform_id"] = 0 + df_long["source_id"] = 1 + df_long["active"] = True + + return df_long + + +def get_parent_geo(geo): + return {"National": None, "State": "National", "District": "State"}[geo] + + +def load_age_data(df_long, geo, year, stratum_lookup=None): + + # Quick data quality check before loading ---- + if geo == "National": + assert len(set(df_long.ucgid_str)) == 1 + elif geo == "State": + assert len(set(df_long.ucgid_str)) == 51 + elif geo == "District": + assert len(set(df_long.ucgid_str)) == 436 + else: + raise ValueError('geo must be one of "National", "State", "District"') + + # Prepare to load data ----------- + DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'policy_data.db'}" + engine = create_engine(DATABASE_URL) + + if stratum_lookup is None: + if geo != "National": + raise ValueError("Include stratum_lookup unless National geo") + stratum_lookup = {"National": {}} + else: + stratum_lookup[geo] = {} + + with Session(engine) as session: + for _, row in df_long.iterrows(): + # Create the parent Stratum object. + # We will attach children to it before adding it to the session. + note = f"Age: {row['age_range']}, Geo: {row['ucgid_str']}" + parent_geo = get_parent_geo(geo) + parent_stratum_id = ( + stratum_lookup[parent_geo][row["age_range"]] + if parent_geo + else None + ) + + new_stratum = Stratum( + parent_stratum_id=parent_stratum_id, + stratum_group_id=0, + notes=note, + ) + + # Create constraints and link them to the parent's relationship attribute. + new_stratum.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value=row["ucgid_str"], + ), + StratumConstraint( + constraint_variable="age", + operation="greater_than", + value=str(row["age_greater_than"]), + ), + ] + + age_lt_value = row["age_less_than"] + if not np.isinf(age_lt_value): + new_stratum.constraints_rel.append( + StratumConstraint( + constraint_variable="age", + operation="less_than", + value=str(row["age_less_than"]), + ) + ) + + # Create the Target and link it to the parent. + new_stratum.targets_rel.append( + Target( + variable=row["variable"], + period=year, + value=row["value"], + source_id=row["source_id"], + active=row["active"], + ) + ) + + # Add ONLY the parent object to the session. + # The 'cascade' setting will handle the children automatically. + session.add(new_stratum) + + # Flush to get the id + session.flush() + stratum_lookup[geo][row["age_range"]] = new_stratum.stratum_id + + # Commit all the new objects at once. + session.commit() + + return stratum_lookup + + +if __name__ == "__main__": + + # --- ETL: Extract, Transform, Load ---- + year = 2023 + + # ---- Extract ---------- + docs = get_census_docs(year) + national_df = pull_acs_table("S0101", "National", year) + state_df = pull_acs_table("S0101", "State", year) + district_df = pull_acs_table("S0101", "District", year) + + # --- Transform ---------- + long_national_df = transform_age_data(national_df, docs) + long_state_df = transform_age_data(state_df, docs) + long_district_df = transform_age_data(district_df, docs) + + # --- Load -------- + national_strata_lku = load_age_data(long_national_df, "National", year) + state_strata_lku = load_age_data( + long_state_df, "State", year, national_strata_lku + ) + load_age_data(long_district_df, "District", year, state_strata_lku) diff --git a/policyengine_us_data/db/etl_irs_soi.py b/policyengine_us_data/db/etl_irs_soi.py new file mode 100644 index 00000000..786abb1c --- /dev/null +++ b/policyengine_us_data/db/etl_irs_soi.py @@ -0,0 +1,547 @@ +from typing import Optional + +import numpy as np +import pandas as pd + +from sqlmodel import Session, create_engine + +from policyengine_us_data.storage import STORAGE_FOLDER + +from policyengine_us_data.db.create_database_tables import ( + Stratum, + StratumConstraint, + Target, +) +from policyengine_us_data.utils.db import ( + get_stratum_by_id, + get_simple_stratum_by_ucgid, + get_root_strata, + get_stratum_children, + get_stratum_parent, +) +from policyengine_us_data.utils.census import TERRITORY_UCGIDS +from policyengine_us_data.storage.calibration_targets.make_district_mapping import ( + get_district_mapping, +) + + +"""See the 22incddocguide.docx manual from the IRS SOI""" +# Let's make this work with strict inequalities +# Language in the doc: '$10,000 under $25,000' +epsilon = 0.005 # i.e., half a penny +AGI_STUB_TO_INCOME_RANGE = { + 1: (-np.inf, 1), + 2: (1 - epsilon, 10_000), + 3: (10_000 - epsilon, 25_000), + 4: (25_000 - epsilon, 50_000), + 5: (50_000 - epsilon, 75_000), + 6: (75_000 - epsilon, 100_000), + 7: (100_000 - epsilon, 200_000), + 8: (200_000 - epsilon, 500_000), + 9: (500_000 - epsilon, np.inf), +} + + +def create_records(df, breakdown_variable, target_variable): + """Transforms a DataFrame subset into a standardized list of records.""" + temp_df = df[["ucgid_str"]].copy() + temp_df["breakdown_variable"] = breakdown_variable + temp_df["breakdown_value"] = df[breakdown_variable] + temp_df["target_variable"] = target_variable + temp_df["target_value"] = df[target_variable] + return temp_df + + +def make_records( + df: pd.DataFrame, + *, + count_col: str, + amount_col: str, + amount_name: str, + breakdown_col: Optional[str] = None, + multiplier: int = 1_000, +): + df = df.rename( + {count_col: "tax_unit_count", amount_col: amount_name}, axis=1 + ).copy() + + if breakdown_col is None: + breakdown_col = "one" + df[breakdown_col] = 1 + + rec_counts = create_records(df, breakdown_col, "tax_unit_count") + rec_amounts = create_records(df, breakdown_col, amount_name) + rec_amounts["target_value"] *= multiplier # Only the amounts get * 1000 + rec_counts["target_variable"] = f"{amount_name}_tax_unit_count" + + return rec_counts, rec_amounts + + +def make_agi_long(df: pd.DataFrame) -> pd.DataFrame: + """Convert IRS SOI AGI‑split table from wide to the long format used""" + target_col_map = { + "N1": "tax_unit_count", + "N2": "person_count", + "A00100": "adjusted_gross_income", + } + work = df[["ucgid_str", "agi_stub"] + list(target_col_map)].rename( + columns=target_col_map + ) + long = ( + work.melt( + id_vars=["ucgid_str", "agi_stub"], + var_name="target_variable", + value_name="target_value", + ) + .rename(columns={"agi_stub": "breakdown_value"}) + .assign(breakdown_variable="agi_stub") + ) + long = long[ + [ + "ucgid_str", + "breakdown_variable", + "breakdown_value", + "target_variable", + "target_value", + ] + ] + + return [ + df.sort_values(by="ucgid_str").reset_index(drop=True) + for name, df in long.groupby(["breakdown_value", "target_variable"]) + ] + + +def convert_district_data( + input_df: pd.DataFrame, + mapping_matrix: np.ndarray, # 436 x 436A + new_district_codes, +) -> pd.DataFrame: + """Transforms data from pre- to post- 2020 census districts""" + df = input_df.copy() + old_districts_df = df[df["ucgid_str"].str.startswith("5001800US")].copy() + old_districts_df = old_districts_df.sort_values("ucgid_str").reset_index( + drop=True + ) + old_values = old_districts_df["target_value"].to_numpy() + new_values = mapping_matrix.T @ old_values + + # Create a new DataFrame for the transformed data, preserving the original schema. + new_districts_df = pd.DataFrame( + { + "ucgid_str": new_district_codes, + "breakdown_variable": old_districts_df["breakdown_variable"], + "breakdown_value": old_districts_df["breakdown_value"], + "target_variable": old_districts_df["target_variable"], + "target_value": new_values, + } + ) + + other_geos_df = df[~df["ucgid_str"].str.startswith("5001800US")].copy() + + final_df = pd.concat([other_geos_df, new_districts_df], ignore_index=True) + + return final_df + + +def extract_soi_data() -> pd.DataFrame: + """Download and save congressional district AGI totals. + + In the file below, "22" is 2022, "in" is individual returns, + "cd" is congressional districts + """ + return pd.read_csv("https://www.irs.gov/pub/irs-soi/22incd.csv") + + +def transform_soi_data(raw_df): + + TARGETS = [ + dict(code="59661", name="eitc", breakdown=("eitc_child_count", 0)), + dict(code="59662", name="eitc", breakdown=("eitc_child_count", 1)), + dict(code="59663", name="eitc", breakdown=("eitc_child_count", 2)), + dict(code="59664", name="eitc", breakdown=("eitc_child_count", "3+")), + dict( + code="04475", + name="qualified_business_income_deduction", + breakdown=None, + ), + dict(code="18500", name="real_estate_taxes", breakdown=None), + dict(code="01000", name="net_capital_gain", breakdown=None), + dict(code="01400", name="taxable_ira_distributions", breakdown=None), + dict(code="00300", name="taxable_interest_income", breakdown=None), + dict(code="00400", name="tax_exempt_interest_income", breakdown=None), + dict(code="00600", name="dividend_income", breakdown=None), + dict(code="00650", name="qualified_dividend_income", breakdown=None), + dict( + code="26270", + name="tax_unit_partnership_s_corp_income", + breakdown=None, + ), + dict(code="02500", name="taxable_social_security", breakdown=None), + dict(code="02300", name="unemployment_compensation", breakdown=None), + dict(code="17000", name="medical_expense_deduction", breakdown=None), + dict(code="01700", name="taxable_pension_income", breakdown=None), + dict(code="11070", name="refundable_ctc", breakdown=None), + dict(code="18425", name="salt", breakdown=None), + dict(code="06500", name="income_tax", breakdown=None), + ] + + # National --------------- + national_df = raw_df.copy().loc[(raw_df.STATE == "US")] + national_df["ucgid_str"] = "0100000US" + + # State ------------------- + # You've got agi_stub == 0 in here, which you want to use any time you don't want to + # divide data by AGI classes (i.e., agi_stub) + state_df = raw_df.copy().loc[ + (raw_df.STATE != "US") & (raw_df.CONG_DISTRICT == 0) + ] + state_df["ucgid_str"] = "0400000US" + state_df["STATEFIPS"].astype( + str + ).str.zfill(2) + + # District ------------------ + district_df = raw_df.copy().loc[(raw_df.CONG_DISTRICT > 0)] + + max_cong_district_by_state = raw_df.groupby("STATE")[ + "CONG_DISTRICT" + ].transform("max") + district_df = raw_df.copy().loc[ + (raw_df["CONG_DISTRICT"] > 0) | (max_cong_district_by_state == 0) + ] + district_df = district_df.loc[district_df["STATE"] != "US"] + district_df["STATEFIPS"] = ( + district_df["STATEFIPS"].astype(int).astype(str).str.zfill(2) + ) + district_df["CONG_DISTRICT"] = ( + district_df["CONG_DISTRICT"].astype(int).astype(str).str.zfill(2) + ) + district_df["ucgid_str"] = ( + "5001800US" + district_df["STATEFIPS"] + district_df["CONG_DISTRICT"] + ) + district_df = district_df[~district_df["ucgid_str"].isin(TERRITORY_UCGIDS)] + + assert district_df.shape[0] % 436 == 0 + + all_df = pd.concat([national_df, state_df, district_df]) + + # "Marginal" over AGI bands, which this data set is organized according to + all_marginals = all_df.copy().loc[all_df.agi_stub == 0] + assert all_marginals.shape[0] == 436 + 51 + 1 + + # Collect targets from the SOI file + records = [] + for spec in TARGETS: + count_col = f"N{spec['code']}" # e.g. 'N59661' + amount_col = f"A{spec['code']}" # e.g. 'A59661' + + df = all_marginals.copy() + + if spec["breakdown"] is not None: + col, val = spec["breakdown"] + df[col] = val + breakdown_col = col + else: + breakdown_col = None + + rec_counts, rec_amounts = make_records( + df, + count_col=count_col, + amount_col=amount_col, + amount_name=spec["name"], + breakdown_col=breakdown_col, + multiplier=1_000, + ) + records.extend([rec_counts, rec_amounts]) + + # AGI Processing (separate, doesn't have a count column) + temp_df = df[["ucgid_str"]].copy() + temp_df["breakdown_variable"] = "one" + temp_df["breakdown_value"] = 1 + temp_df["target_variable"] = "adjusted_gross_income" + temp_df["target_value"] = df["A00100"] * 1_000 + + records.append(temp_df) + + # Note: national counts only have agi_stub = 0 + all_agi_splits = all_df.copy().loc[all_df.agi_stub != 0] + assert all_agi_splits.shape[0] % (436 + 51 + 0) == 0 + + agi_long_records = make_agi_long(all_agi_splits) + + records.extend(agi_long_records) + + # Pre- to Post- 2020 Census redisticting + mapping = get_district_mapping() + converted = [ + convert_district_data( + r, mapping["mapping_matrix"], mapping["new_codes"] + ) + for r in records + ] + + return converted + + +def load_soi_data(long_dfs, year): + """Load a list of databases into the db, critically dependent on order""" + + DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'policy_data.db'}" + engine = create_engine(DATABASE_URL) + + session = Session(engine) + + # Load EITC data -------------------------------------------------------- + eitc_data = { + "0": (long_dfs[0], long_dfs[1]), + "1": (long_dfs[2], long_dfs[3]), + "2": (long_dfs[4], long_dfs[5]), + "3+": (long_dfs[6], long_dfs[7]), + } + + stratum_lookup = {"State": {}, "District": {}} + for n_children in eitc_data.keys(): + eitc_count_i, eitc_amount_i = eitc_data[n_children] + for i in range(eitc_count_i.shape[0]): + ucgid_i = eitc_count_i[["ucgid_str"]].iloc[i].values[0] + note = f"Geo: {ucgid_i}, EITC received with {n_children} children" + + if len(ucgid_i) == 9: # National. + new_stratum = Stratum( + parent_stratum_id=None, stratum_group_id=0, notes=note + ) + elif len(ucgid_i) == 11: # State + new_stratum = Stratum( + parent_stratum_id=stratum_lookup["National"], + stratum_group_id=0, + notes=note, + ) + elif len(ucgid_i) == 13: # District + new_stratum = Stratum( + parent_stratum_id=stratum_lookup["State"][ + "0400000US" + ucgid_i[9:11] + ], + stratum_group_id=0, + notes=note, + ) + + new_stratum.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value=ucgid_i, + ), + ] + if n_children == "3+": + new_stratum.constraints_rel.append( + StratumConstraint( + constraint_variable="eitc_child_count", + operation="greater_than", + value="2", + ) + ) + else: + new_stratum.constraints_rel.append( + StratumConstraint( + constraint_variable="eitc_child_count", + operation="equals", + value=f"{n_children}", + ) + ) + + new_stratum.targets_rel = [ + Target( + variable="eitc", + period=year, + value=eitc_amount_i.iloc[i][["target_value"]].values[0], + source_id=5, + active=True, + ) + ] + + session.add(new_stratum) + session.flush() + + if len(ucgid_i) == 9: + stratum_lookup["National"] = new_stratum.stratum_id + elif len(ucgid_i) == 11: + stratum_lookup["State"][ucgid_i] = new_stratum.stratum_id + + session.commit() + + # There are no breakdown variables used in the following set + first_agi_index = [ + i + for i in range(len(long_dfs)) + if long_dfs[i][["target_variable"]].values[0] + == "adjusted_gross_income" + and long_dfs[i][["breakdown_variable"]].values[0] == "one" + ][0] + for j in range(8, first_agi_index, 2): + count_j, amount_j = long_dfs[j], long_dfs[j + 1] + amount_variable_name = amount_j.iloc[0][["target_variable"]].values[0] + print( + f"Loading amount data for IRS SOI data on {amount_variable_name}" + ) + for i in range(count_j.shape[0]): + ucgid_i = count_j[["ucgid_str"]].iloc[i].values[0] + + # Reusing an existing stratum this time, since there is no breakdown + stratum = get_simple_stratum_by_ucgid(session, ucgid_i) + amount_value = amount_j.iloc[i][["target_value"]].values[0] + + stratum.targets_rel.append( + Target( + variable=amount_variable_name, + period=year, + value=amount_value, + source_id=5, + active=True, + ) + ) + + session.add(stratum) + session.flush() + + session.commit() + + # Adjusted Gross Income ------ + agi_values = long_dfs[first_agi_index] + assert agi_values[["target_variable"]].values[0] == "adjusted_gross_income" + + for i in range(agi_values.shape[0]): + ucgid_i = agi_values[["ucgid_str"]].iloc[i].values[0] + stratum = get_simple_stratum_by_ucgid(session, ucgid_i) + stratum.targets_rel.append( + Target( + variable="adjusted_gross_income", + period=year, + value=agi_values.iloc[i][["target_value"]].values[0], + source_id=5, + active=True, + ) + ) + session.add(stratum) + session.flush() + + session.commit() + + agi_person_count_dfs = [ + df + for df in long_dfs[(first_agi_index + 1) :] + if df["target_variable"].iloc[0] == "person_count" + ] + + for agi_df in agi_person_count_dfs: + agi_stub = agi_df.iloc[0][["breakdown_value"]].values[0] + agi_income_lower, agi_income_upper = AGI_STUB_TO_INCOME_RANGE[agi_stub] + + # Make a National Stratum for each AGI Stub even w/o associated national target + note = f"Geo: 0100000US, AGI > {agi_income_lower}, AGI < {agi_income_upper}" + nat_stratum = Stratum( + parent_stratum_id=None, stratum_group_id=0, notes=note + ) + nat_stratum.constraints_rel.extend( + [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value="0100000US", + ), + StratumConstraint( + constraint_variable="adjusted_gross_income", + operation="greater_than", + value=str(agi_income_lower), + ), + StratumConstraint( + constraint_variable="adjusted_gross_income", + operation="less_than", + value=str(agi_income_upper), + ), + ] + ) + session.add(nat_stratum) + session.flush() + + stratum_lookup = { + "National": nat_stratum.stratum_id, + "State": {}, + "District": {}, + } + for i in range(agi_df.shape[0]): + ucgid_i = agi_df[["ucgid_str"]].iloc[i].values[0] + note = f"Geo: {ucgid_i}, AGI > {agi_income_lower}, AGI < {agi_income_upper}" + + person_count = agi_df.iloc[i][["target_value"]].values[0] + + if len(ucgid_i) == 11: # State + new_stratum = Stratum( + parent_stratum_id=stratum_lookup["National"], + stratum_group_id=0, + notes=note, + ) + elif len(ucgid_i) == 13: # District + new_stratum = Stratum( + parent_stratum_id=stratum_lookup["State"][ + "0400000US" + ucgid_i[9:11] + ], + stratum_group_id=0, + notes=note, + ) + new_stratum.constraints_rel.extend( + [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value=ucgid_i, + ), + StratumConstraint( + constraint_variable="adjusted_gross_income", + operation="greater_than", + value=str(agi_income_lower), + ), + StratumConstraint( + constraint_variable="adjusted_gross_income", + operation="less_than", + value=str(agi_income_upper), + ), + ] + ) + new_stratum.targets_rel.append( + Target( + variable="person_count", + period=year, + value=person_count, + source_id=5, + active=True, + ) + ) + + session.add(new_stratum) + session.flush() + + if len(ucgid_i) == 9: + stratum_lookup["National"] = new_stratum.stratum_id + elif len(ucgid_i) == 11: + stratum_lookup["State"][ucgid_i] = new_stratum.stratum_id + + session.commit() + + +def main(): + # NOTE: predates the finalization of the 2020 Census redistricting + # and there is district mapping in the Transform step + year = 2022 + + # Extract ----------------------- + raw_df = extract_soi_data() + + # Transform --------------------- + long_dfs = transform_soi_data(raw_df) + + # Load --------------------- + load_soi_data(long_dfs, year) + + +if __name__ == "__main__": + main() diff --git a/policyengine_us_data/db/etl_medicaid.py b/policyengine_us_data/db/etl_medicaid.py new file mode 100644 index 00000000..926a0d88 --- /dev/null +++ b/policyengine_us_data/db/etl_medicaid.py @@ -0,0 +1,208 @@ +import requests + +import pandas as pd +from sqlmodel import Session, create_engine + +from policyengine_us_data.storage import STORAGE_FOLDER + +from policyengine_us_data.db.create_database_tables import ( + Stratum, + StratumConstraint, + Target, +) +from policyengine_us_data.utils.census import STATE_ABBREV_TO_FIPS + + +def extract_medicaid_data(year): + base_url = ( + f"https://api.census.gov/data/{year}/acs/acs1/subject?get=group(S2704)" + ) + url = f"{base_url}&for=congressional+district:*" + response = requests.get(url) + response.raise_for_status() + + data = response.json() + + headers = data[0] + data_rows = data[1:] + cd_survey_df = pd.DataFrame(data_rows, columns=headers) + + item = "6165f45b-ca93-5bb5-9d06-db29c692a360" + response = requests.get( + f"https://data.medicaid.gov/api/1/metastore/schemas/dataset/items/{item}?show-reference-ids=false" + ) + metadata = response.json() + + data_url = metadata["distribution"][0]["data"]["downloadURL"] + state_admin_df = pd.read_csv(data_url) + + return cd_survey_df, state_admin_df + + +def transform_medicaid_data(state_admin_df, cd_survey_df, year): + + reporting_period = year * 100 + 12 + print(f"Reporting period is {reporting_period}") + state_df = state_admin_df.loc[ + (state_admin_df["Reporting Period"] == reporting_period) + & (state_admin_df["Final Report"] == "Y"), + [ + "State Abbreviation", + "Reporting Period", + "Total Medicaid Enrollment", + ], + ] + + state_df["FIPS"] = state_df["State Abbreviation"].map(STATE_ABBREV_TO_FIPS) + + cd_df = cd_survey_df[ + ["GEO_ID", "state", "congressional district", "S2704_C02_006E"] + ] + + nc_cd_sum = cd_df.loc[cd_df.state == "37"].S2704_C02_006E.astype(int).sum() + nc_state_sum = state_df.loc[state_df.FIPS == "37"][ + "Total Medicaid Enrollment" + ].values[0] + assert nc_cd_sum > 0.5 * nc_state_sum + assert nc_cd_sum <= nc_state_sum + + state_df = state_df.rename( + columns={"Total Medicaid Enrollment": "medicaid_enrollment"} + ) + state_df["ucgid_str"] = "0400000US" + state_df["FIPS"].astype(str) + + cd_df = cd_df.rename( + columns={ + "S2704_C02_006E": "medicaid_enrollment", + "GEO_ID": "ucgid_str", + } + ) + cd_df = cd_df.loc[cd_df.state != "72"] + + out_cols = ["ucgid_str", "medicaid_enrollment"] + return state_df[out_cols], cd_df[out_cols] + + +def load_medicaid_data(long_state, long_cd, year): + + DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'policy_data.db'}" + engine = create_engine(DATABASE_URL) + + stratum_lookup = {} + + with Session(engine) as session: + # National ---------------- + nat_stratum = Stratum( + parent_stratum_id=None, + stratum_group_id=0, + notes="Geo: 0100000US Medicaid Enrolled", + ) + nat_stratum.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value="0100000US", + ), + StratumConstraint( + constraint_variable="medicaid_enrolled", + operation="equals", + value="True", + ), + ] + # No target at the national level is provided at this time. + + session.add(nat_stratum) + session.flush() + stratum_lookup["National"] = nat_stratum.stratum_id + + # State ------------------- + stratum_lookup["State"] = {} + for _, row in long_state.iterrows(): + + note = f"Geo: {row['ucgid_str']} Medicaid Enrolled" + parent_stratum_id = nat_stratum.stratum_id + + new_stratum = Stratum( + parent_stratum_id=parent_stratum_id, + stratum_group_id=0, + notes=note, + ) + new_stratum.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value=row["ucgid_str"], + ), + StratumConstraint( + constraint_variable="medicaid_enrolled", + operation="equals", + value="True", + ), + ] + new_stratum.targets_rel.append( + Target( + variable="person_count", + period=year, + value=row["medicaid_enrollment"], + source_id=2, + active=True, + ) + ) + session.add(new_stratum) + session.flush() + stratum_lookup["State"][row["ucgid_str"]] = new_stratum.stratum_id + + # District ------------------- + for _, row in long_cd.iterrows(): + + note = f"Geo: {row['ucgid_str']} Medicaid Enrolled" + parent_stratum_id = stratum_lookup["State"][ + f'0400000US{row["ucgid_str"][-4:-2]}' + ] + + new_stratum = Stratum( + parent_stratum_id=parent_stratum_id, + stratum_group_id=0, + notes=note, + ) + new_stratum.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value=row["ucgid_str"], + ), + StratumConstraint( + constraint_variable="medicaid_enrolled", + operation="equals", + value="True", + ), + ] + new_stratum.targets_rel.append( + Target( + variable="person_count", + period=year, + value=row["medicaid_enrollment"], + source_id=2, + active=True, + ) + ) + session.add(new_stratum) + session.flush() + + session.commit() + + +if __name__ == "__main__": + + year = 2023 + + # Extract ------------------------------ + cd_survey_df, state_admin_df = extract_medicaid_data(year) + + # Transform ------------------- + long_state, long_cd = transform_medicaid_data( + state_admin_df, cd_survey_df, year + ) + + # Load ----------------------- + load_medicaid_data(long_state, long_cd, year) diff --git a/policyengine_us_data/db/etl_snap.py b/policyengine_us_data/db/etl_snap.py new file mode 100644 index 00000000..1fba44a4 --- /dev/null +++ b/policyengine_us_data/db/etl_snap.py @@ -0,0 +1,299 @@ +import requests +import zipfile +import io + +import pandas as pd +import numpy as np +import us +from sqlmodel import Session, create_engine + +from policyengine_us_data.storage import STORAGE_FOLDER + +from policyengine_us_data.db.create_database_tables import ( + Stratum, + StratumConstraint, + Target, +) +from policyengine_us_data.utils.census import ( + pull_acs_table, + STATE_NAME_TO_FIPS, +) + + +def extract_administrative_snap_data(year=2023): + """ + Downloads and extracts annual state-level SNAP data from the USDA FNS zip file. + """ + url = "https://www.fns.usda.gov/sites/default/files/resource-files/snap-zip-fy69tocurrent-6.zip" + + # Note: extra complexity in request due to regional restrictions on downloads (e.g., Spain) + headers = { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + } + + try: + session = requests.Session() + session.headers.update(headers) + + # Try to visit the main page first to get any necessary cookies + main_page = "https://www.fns.usda.gov/pd/supplemental-nutrition-assistance-program-snap" + try: + session.get(main_page, timeout=30) + except: + pass # Ignore errors on the main page + + response = session.get(url, timeout=30, allow_redirects=True) + response.raise_for_status() + except requests.exceptions.RequestException as e: + print(f"Error downloading file: {e}") + # Try alternative URL or method + try: + alt_url = "https://www.fns.usda.gov/sites/default/files/resource-files/snap-zip-fy69tocurrent-6.zip" + response = session.get(alt_url, timeout=30, allow_redirects=True) + response.raise_for_status() + except requests.exceptions.RequestException as e2: + print(f"Alternative URL also failed: {e2}") + return None + + return zipfile.ZipFile(io.BytesIO(response.content)) + + +def extract_survey_snap_data(year): + return pull_acs_table("S2201", "District", year) + + +def transform_administrative_snap_data(zip_file, year): + filename = f"FY{str(year)[-2:]}.xlsx" + with zip_file.open(filename) as f: + xls = pd.ExcelFile(f) + tab_results = [] + for sheet_name in [ + "NERO", + "MARO", + "SERO", + "MWRO", + "SWRO", + "MPRO", + "WRO", + ]: + df_raw = pd.read_excel( + xls, sheet_name=sheet_name, header=None, dtype={0: str} + ) + + state_row_mask = ( + df_raw[0].notna() + & df_raw[1].isna() + & ~df_raw[0].str.contains("Total", na=False) + & ~df_raw[0].str.contains("Footnote", na=False) + ) + + df_raw["State"] = df_raw.loc[state_row_mask, 0] + df_raw["State"] = df_raw["State"].ffill() + total_rows = df_raw[df_raw[0].eq("Total")].copy() + total_rows = total_rows.rename( + columns={ + 1: "Households", + 2: "Persons", + 3: "Cost", + } + ) + + state_totals = total_rows[ + [ + "State", + "Households", + "Persons", + "Cost", # Annual (Note: the CostPer* vars are monthly) + ] + ] + + tab_results.append(state_totals) + + results_df = pd.concat(tab_results) + + df_states = results_df.loc[ + results_df["State"].isin(STATE_NAME_TO_FIPS.keys()) + ].copy() + df_states["STATE_FIPS"] = df_states["State"].map(STATE_NAME_TO_FIPS) + df_states = ( + df_states.loc[~df_states["STATE_FIPS"].isna()] + .sort_values("STATE_FIPS") + .reset_index(drop=True) + ) + df_states["ucgid_str"] = "0400000US" + df_states["STATE_FIPS"] + + return df_states + + +def transform_survey_snap_data(raw_df): + df = raw_df.copy() + return df[["GEO_ID", "S2201_C03_001E"]].rename( + {"GEO_ID": "ucgid_str", "S2201_C03_001E": "snap_household_ct"}, axis=1 + )[ + ~df["GEO_ID"].isin( + [ # Puerto Rico's state and district + "0400000US72", + "5001800US7298", + ] + ) + ] + + +def load_administrative_snap_data(df_states, year): + + DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'policy_data.db'}" + engine = create_engine(DATABASE_URL) + + stratum_lookup = {} + + with Session(engine) as session: + # National ---------------- + nat_stratum = Stratum( + parent_stratum_id=None, + stratum_group_id=0, + notes="Geo: 0100000US Received SNAP Benefits", + ) + nat_stratum.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value="0100000US", + ), + StratumConstraint( + constraint_variable="snap", + operation="greater_than", + value="0", + ), + ] + # No target at the national level is provided at this time. Keeping it + # so that the state strata can have a parent stratum + + session.add(nat_stratum) + session.flush() + stratum_lookup["National"] = nat_stratum.stratum_id + + # State ------------------- + stratum_lookup["State"] = {} + for _, row in df_states.iterrows(): + + note = f"Geo: {row['ucgid_str']} Received SNAP Benefits" + parent_stratum_id = nat_stratum.stratum_id + + new_stratum = Stratum( + parent_stratum_id=parent_stratum_id, + stratum_group_id=0, + notes=note, + ) + new_stratum.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value=row["ucgid_str"], + ), + StratumConstraint( + constraint_variable="snap", + operation="greater_than", + value="0", + ), + ] + # Two targets now. Same data source. Same stratum + new_stratum.targets_rel.append( + Target( + variable="household_count", + period=year, + value=row["Households"], + source_id=3, + active=True, + ) + ) + new_stratum.targets_rel.append( + Target( + variable="snap", + period=year, + value=row["Cost"], + source_id=3, + active=True, + ) + ) + session.add(new_stratum) + session.flush() + stratum_lookup["State"][row["ucgid_str"]] = new_stratum.stratum_id + + session.commit() + return stratum_lookup + + +def load_survey_snap_data(survey_df, year, stratum_lookup=None): + """Use an already defined stratum_lookup to load the survey SNAP data""" + + if stratum_lookup is None: + raise ValueError("stratum_lookup must be provided") + + DATABASE_URL = f"sqlite:///{STORAGE_FOLDER / 'policy_data.db'}" + engine = create_engine(DATABASE_URL) + + with Session(engine) as session: + # Create new strata for districts whose households recieve SNAP benefits + district_df = survey_df.copy() + for _, row in district_df.iterrows(): + note = f"Geo: {row['ucgid_str']} Received SNAP Benefits" + state_ucgid_str = "0400000US" + row["ucgid_str"][9:11] + state_stratum_id = stratum_lookup["State"][state_ucgid_str] + new_stratum = Stratum( + parent_stratum_id=state_stratum_id, + stratum_group_id=0, + notes=note, + ) + + new_stratum.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="in", + value=row["ucgid_str"], + ), + StratumConstraint( + constraint_variable="snap", + operation="greater_than", + value="0", + ), + ] + new_stratum.targets_rel.append( + Target( + variable="household_count", + period=year, + value=row["snap_household_ct"], + source_id=4, + active=True, + ) + ) + session.add(new_stratum) + session.flush() + + session.commit() + + return stratum_lookup + + +def main(): + year = 2023 + + # Extract --------- + zip_file_admin = extract_administrative_snap_data() + raw_survey_df = extract_survey_snap_data(year) + + # Transform ------- + state_admin_df = transform_administrative_snap_data(zip_file_admin, year) + district_survey_df = transform_survey_snap_data(raw_survey_df) + + # Load ----------- + stratum_lookup = load_administrative_snap_data(state_admin_df, year) + load_survey_snap_data(district_survey_df, year, stratum_lookup) + + +if __name__ == "__main__": + main() diff --git a/policyengine_us_data/db/load_age_targets.py b/policyengine_us_data/db/load_age_targets.py deleted file mode 100644 index f42adcf3..00000000 --- a/policyengine_us_data/db/load_age_targets.py +++ /dev/null @@ -1,319 +0,0 @@ -import logging -import requests -from pathlib import Path -import io - -import pandas as pd -import numpy as np -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker - -from policyengine_us_data.db.create_database_tables import ( - Stratum, - StratumConstraint, - Target, -) - - -logger = logging.getLogger(__name__) - -STATE_NAME_TO_ABBREV = { - "Alabama": "AL", - "Alabama": "AL", - "Alaska": "AK", - "Arizona": "AZ", - "Arkansas": "AR", - "California": "CA", - "Colorado": "CO", - "Connecticut": "CT", - "Delaware": "DE", - "District of Columbia": "DC", - "Florida": "FL", - "Georgia": "GA", - "Hawaii": "HI", - "Idaho": "ID", - "Illinois": "IL", - "Indiana": "IN", - "Iowa": "IA", - "Kansas": "KS", - "Kentucky": "KY", - "Louisiana": "LA", - "Maine": "ME", - "Maryland": "MD", - "Massachusetts": "MA", - "Michigan": "MI", - "Minnesota": "MN", - "Mississippi": "MS", - "Missouri": "MO", - "Montana": "MT", - "Nebraska": "NE", - "Nevada": "NV", - "New Hampshire": "NH", - "New Jersey": "NJ", - "New Mexico": "NM", - "New York": "NY", - "North Carolina": "NC", - "North Dakota": "ND", - "Ohio": "OH", - "Oklahoma": "OK", - "Oregon": "OR", - "Pennsylvania": "PA", - "Rhode Island": "RI", - "South Carolina": "SC", - "South Dakota": "SD", - "Tennessee": "TN", - "Texas": "TX", - "Utah": "UT", - "Vermont": "VT", - "Virginia": "VA", - "Washington": "WA", - "West Virginia": "WV", - "Wisconsin": "WI", - "Wyoming": "WY", -} - - -LABEL_TO_SHORT = { - "Estimate!!Total!!Total population!!AGE!!Under 5 years": "0-4", - "Estimate!!Total!!Total population!!AGE!!5 to 9 years": "5-9", - "Estimate!!Total!!Total population!!AGE!!10 to 14 years": "10-14", - "Estimate!!Total!!Total population!!AGE!!15 to 19 years": "15-19", - "Estimate!!Total!!Total population!!AGE!!20 to 24 years": "20-24", - "Estimate!!Total!!Total population!!AGE!!25 to 29 years": "25-29", - "Estimate!!Total!!Total population!!AGE!!30 to 34 years": "30-34", - "Estimate!!Total!!Total population!!AGE!!35 to 39 years": "35-39", - "Estimate!!Total!!Total population!!AGE!!40 to 44 years": "40-44", - "Estimate!!Total!!Total population!!AGE!!45 to 49 years": "45-49", - "Estimate!!Total!!Total population!!AGE!!50 to 54 years": "50-54", - "Estimate!!Total!!Total population!!AGE!!55 to 59 years": "55-59", - "Estimate!!Total!!Total population!!AGE!!60 to 64 years": "60-64", - "Estimate!!Total!!Total population!!AGE!!65 to 69 years": "65-69", - "Estimate!!Total!!Total population!!AGE!!70 to 74 years": "70-74", - "Estimate!!Total!!Total population!!AGE!!75 to 79 years": "75-79", - "Estimate!!Total!!Total population!!AGE!!80 to 84 years": "80-84", - "Estimate!!Total!!Total population!!AGE!!85 years and over": "85-inf", -} -AGE_COLS = list(LABEL_TO_SHORT.values()) - - -def extract_docs(year=2023): - docs_url = ( - f"https://api.census.gov/data/{year}/acs/acs1/subject/variables.json" - ) - - try: - docs_response = requests.get(docs_url) - docs_response.raise_for_status() - - docs = docs_response.json() - docs["year"] = year - - except requests.exceptions.RequestException as e: - print(f"Error during API request: {e}") - raise - except Exception as e: - print(f"An error occurred: {e}") - raise - return docs - - -def extract_age_data(geo, year=2023): - base_url = ( - f"https://api.census.gov/data/{year}/acs/acs1/subject?get=group(S0101)" - ) - - if geo == "State": - url = f"{base_url}&for=state:*" - elif geo == "District": - url = f"{base_url}&for=congressional+district:*" - elif geo == "National": - url = f"{base_url}&for=us:*" - else: - raise ValueError( - "geo must be either 'National', 'State', or 'District'" - ) - - try: - response = requests.get(url) - response.raise_for_status() - - data = response.json() - - headers = data[0] - data_rows = data[1:] - df = pd.DataFrame(data_rows, columns=headers) - - except requests.exceptions.RequestException as e: - print(f"Error during API request: {e}") - raise - except Exception as e: - print(f"An error occurred: {e}") - raise - return df - - -def transform_age_data(age_data, docs): - df = age_data.copy() - - label_to_variable_mapping = dict( - [ - (value["label"], key) - for key, value in docs["variables"].items() - if value["group"] == "S0101" - and value["concept"] == "Age and Sex" - and value["label"] in LABEL_TO_SHORT.keys() - ] - ) - - # By transitivity, map the data set variable names to short names - rename_mapping = dict( - [ - (label_to_variable_mapping[v], LABEL_TO_SHORT[v]) - for v in LABEL_TO_SHORT.keys() - ] - ) - - df = df.drop(columns="NAME") - df = df.rename({"GEO_ID": "ucgid_str"}, axis=1) - df_data = df.rename(columns=rename_mapping)[["ucgid_str"] + list(AGE_COLS)] - - # Filter out Puerto Rico's district and state records, if needed - df_geos = df_data[ - ~df_data["ucgid_str"].isin(["5001800US7298", "0400000US72"]) - ].copy() - - df = df_geos[["ucgid_str"] + AGE_COLS] - - df_long = df.melt( - id_vars="ucgid_str", - value_vars=AGE_COLS, - var_name="age_range", - value_name="value", - ) - age_bounds = df_long["age_range"].str.split("-", expand=True) - df_long["age_greater_than_or_equal_to"] = ( - age_bounds[0].str.replace("+", "").astype(int) - ) - df_long["age_less_than_or_equal_to"] = pd.to_numeric(age_bounds[1]) - df_long["variable"] = "person_count" - df_long["period"] = docs["year"] - df_long["reform_id"] = 0 - df_long["source_id"] = 1 - df_long["active"] = True - - return df_long - - -def get_parent_geo(geo): - return {"National": None, "State": "National", "District": "State"}[geo] - - -def load_age_data(df_long, geo, stratum_lookup={}): - - # Quick data quality check before loading ---- - if geo == "National": - assert len(set(df_long.ucgid_str)) == 1 - elif geo == "State": - assert len(set(df_long.ucgid_str)) == 51 - elif geo == "District": - assert len(set(df_long.ucgid_str)) == 436 - else: - raise ValueError('geo must be one of "National", "State", "District"') - - # Prepare to load data ----------- - DATABASE_URL = "sqlite:///policy_data.db" - engine = create_engine(DATABASE_URL) - - Session = sessionmaker(bind=engine) - session = Session() - - if not stratum_lookup: - if geo != "National": - raise ValueError("Include stratum_lookup unless National geo") - stratum_lookup = {"National": {}} - else: - stratum_lookup[geo] = {} - - for _, row in df_long.iterrows(): - - # Create the parent Stratum object. - # We will attach children to it before adding it to the session. - note = f"Age: {row['age_range']}, Geo: {row['ucgid_str']}" - parent_geo = get_parent_geo(geo) - parent_stratum_id = ( - stratum_lookup[parent_geo][row["age_range"]] - if parent_geo - else None - ) - - new_stratum = Stratum( - parent_stratum_id=parent_stratum_id, stratum_group_id=0, notes=note - ) - - # Create constraints and link them to the parent's relationship attribute. - new_stratum.constraints_rel = [ - StratumConstraint( - constraint_variable="ucgid_str", - operation="in", - value=row["ucgid_str"], - ), - StratumConstraint( - constraint_variable="age", - operation="greater_than_or_equal", - value=str(row["age_greater_than_or_equal_to"]), - ), - ] - - age_lt_value = row["age_less_than_or_equal_to"] - if not np.isinf(age_lt_value): - new_stratum.constraints_rel.append( - StratumConstraint( - constraint_variable="age", - operation="less_than", - value=str(age_lt_value + 1), - ) - ) - - # Create the Target and link it to the parent. - new_stratum.targets_rel.append( - Target( - variable=row["variable"], - period=row["period"], - value=row["value"], - source_id=row["source_id"], - active=row["active"], - ) - ) - - # Add ONLY the parent object to the session. - # The 'cascade' setting will handle the children automatically. - session.add(new_stratum) - - # Flush to get the id - session.flush() - stratum_lookup[geo][row["age_range"]] = new_stratum.stratum_id - - # Commit all the new objects at once. - session.commit() - - return stratum_lookup - - -if __name__ == "__main__": - - # --- ETL: Extract, Transform, Load ---- - - # ---- Extract ---------- - docs = extract_docs(2023) - national_df = extract_age_data("National", 2023) - state_df = extract_age_data("State", 2023) - - # --- Transform ---------- - long_national_df = transform_age_data(national_df, docs) - long_state_df = transform_age_data(state_df, docs) - - # --- Load -------- - national_strata_lku = load_age_data(long_national_df, "National") - state_strata_lku = load_age_data( - long_state_df, "State", national_strata_lku - ) diff --git a/policyengine_us_data/db/validate_database.py b/policyengine_us_data/db/validate_database.py new file mode 100644 index 00000000..fee6a49d --- /dev/null +++ b/policyengine_us_data/db/validate_database.py @@ -0,0 +1,24 @@ +""" +This is the start of a data validation pipeline. It is meant to be a separate +validation track from the unit tests in policyengine_us_data/tests in that it tests +the overall correctness of data after a full pipeline run with production data. +""" + +import sqlite3 + +import pandas as pd +from policyengine_us.system import system + + +conn = sqlite3.connect("policyengine_us_data/storage/policy_data.db") + +stratum_constraints_df = pd.read_sql("SELECT * FROM stratum_constraints", conn) +targets_df = pd.read_sql("SELECT * FROM targets", conn) + +for var_name in set(targets_df["variable"]): + if not var_name in system.variables.keys(): + raise ValueError(f"{var_name} not a policyengine-us variable") + +for var_name in set(stratum_constraints_df["constraint_variable"]): + if not var_name in system.variables.keys(): + raise ValueError(f"{var_name} not a policyengine-us variable") diff --git a/policyengine_us_data/storage/README.md b/policyengine_us_data/storage/README.md index 55f98ed9..d2c1f054 100644 --- a/policyengine_us_data/storage/README.md +++ b/policyengine_us_data/storage/README.md @@ -9,3 +9,9 @@ • Source: MACPAC Enrollment Tables, FFY 2024 • Date: 2024 • Location: https://www.medicaid.gov/resources-for-states/downloads/eligib-oper-and-enrol-snap-december2024.pdf#page=26 + +- **district_mapping.csv** + • Source: created by the script `policyengine_us/storage/calibration_targets/make_district_mapping.py` + • Notes: this script is not part of `make data` because of the length of time it takes to run and the + likelhood of timeout errors. See the script for more notes, including an alternative source. Also, + once the IRS SOI updates their data in 2026, this mapping will likely be unncessesary. diff --git a/policyengine_us_data/storage/calibration_targets/make_district_mapping.py b/policyengine_us_data/storage/calibration_targets/make_district_mapping.py new file mode 100644 index 00000000..72ff8d88 --- /dev/null +++ b/policyengine_us_data/storage/calibration_targets/make_district_mapping.py @@ -0,0 +1,258 @@ +""" +This was built before finding out about the crosswalk provided by the +Missouri Census Data Center (MCDC) at the University of Missouri. This crosswalk can be +accessed at (https://mcdc.missouri.edu/applications/geocorr.html) and would be a logical place +to transition to, though since this is already built and new IRS SOI files may be available soon, +it may not be worth the effort to transition. + +To see the definitive "before and after" of congressional redistricting following the 2020 census, +you should compare the block-level data from the 116th Congress to the 119th Congress. + +This approach is necessary for states whose initial redistricting maps were altered due to legal +challenges and is aligned with the mapping files provided by the U.S. Census Bureau. + +- **116th Congress (The "Before"):** This session (2019-2021) used the congressional maps +based on the 2010 census data. It serves as the stable pre-redistricting baseline, as these +maps were identical to those used by the 117th Congress. The Census Bureau's most recent files +for that decade correspond to the 116th Congress. + +- **118th Congress (The "Interim" Stage):** In several states, the initial congressional maps drawn +for the 2022 elections were successfully challenged and invalidated by courts (e.g., for reasons of +partisan or racial gerrymandering). This required the use of temporary, court-ordered, or remedial +maps for the 2022 elections. Consequently, the 118th Congress (2023-2025) in these states represents +an interim stage, not the final outcome of the redistricting cycle. + +- **119th Congress (The Definitive "After"):** Following these legal resolutions, new and more permanent +congressional maps were enacted ahead of the 2024 election cycle. The elections in November 2024 were +the first to use these new maps. Therefore, the 119th Congress (2025-2027) is the first to reflect the +final, settled mapping decisions based on the 2020 census data. + +By comparing the 116th and 119th Congresses, you bypass the anomalous, non-final maps of the 118th Congress, +providing a clear analysis of the redistricting cycle's ultimate impact. +""" + +import requests +import zipfile +import io +from pathlib import Path + +import pandas as pd +import numpy as np +import us + +from policyengine_us_data.storage import STORAGE_FOLDER, CALIBRATION_FOLDER + + +def fetch_block_to_district_map(congress: int) -> pd.DataFrame: + """ + Fetches the Census Block Equivalency File (BEF) for a given Congress. + + This file maps every 2020 census block (GEOID) to its corresponding + congressional district. + + Args: + congress: The congressional session number (e.g., 118 or 119). + + Returns: + A DataFrame with columns ['GEOID', f'CD{congress}']. + """ + if congress == 116: + url = "https://www2.census.gov/programs-surveys/decennial/rdo/mapping-files/2019/116-congressional-district-bef/cd116.zip" + zbytes = requests.get(url, timeout=120).content + + with zipfile.ZipFile(io.BytesIO(zbytes)) as z: + fname = "National_CD116.txt" + bef = pd.read_csv(z.open(fname), dtype=str) + bef.columns = bef.columns.str.strip() + bef = bef.rename(columns={"BLOCKID": "GEOID"}) + return bef[["GEOID", f"CD{congress}"]] + + elif congress == 118: + url = "https://www2.census.gov/programs-surveys/decennial/rdo/mapping-files/2023/118-congressional-district-bef/cd118.zip" + zbytes = requests.get(url, timeout=120).content + + with zipfile.ZipFile(io.BytesIO(zbytes)) as z: + fname = "National_CD118.txt" + bef = pd.read_csv(z.open(fname), dtype=str) + bef.columns = bef.columns.str.strip() + district_col = [c for c in bef.columns if c != "GEOID"][0] + bef = bef.rename(columns={district_col: f"CD{congress}"}) + return bef[["GEOID", f"CD{congress}"]] + + elif congress == 119: + url = "https://www2.census.gov/programs-surveys/decennial/rdo/mapping-files/2025/119-congressional-district-befs/cd119.zip" + zbytes = requests.get(url, timeout=120).content + + with zipfile.ZipFile(io.BytesIO(zbytes)) as z: + fname = "NationalCD119.txt" + bef = pd.read_csv(z.open(fname), sep=",", dtype=str) + bef.columns = bef.columns.str.strip() + bef = bef.rename(columns={"CDFP": f"CD{congress}"}) + return bef[["GEOID", f"CD{congress}"]] + + else: + raise ValueError( + f"Congress {congress} is not supported by this function." + ) + + +def fetch_block_population(state) -> pd.DataFrame: + """ + Download & parse the 2020 PL-94-171 “legacy” files for one state. + + Parameters + ---------- + state : str + Two-letter state/territory postal code **or** full state name + (e.g., "GA", "Georgia", "PR", "Puerto Rico"). + + Returns + ------- + pandas.DataFrame with columns GEOID (15-digit block code) and POP20. + """ + BASE = ( + "https://www2.census.gov/programs-surveys/decennial/2020/data/" + "01-Redistricting_File--PL_94-171/{dir}/{abbr}2020.pl.zip" + ) + st = us.states.lookup(state) + if st is None: + raise ValueError(f"Unrecognised state name/abbr: {state}") + + # Build URL components ----------------------------------------------------- + dir_name = st.name.replace(" ", "_") + abbr = st.abbr.lower() + url = BASE.format(dir=dir_name, abbr=abbr) + + # Download and open the zip ------------------------------------------------ + zbytes = requests.get(url, timeout=120).content + with zipfile.ZipFile(io.BytesIO(zbytes)) as z: + raw = z.read(f"{abbr}geo2020.pl") + try: + geo_lines = raw.decode("utf-8").splitlines() + except UnicodeDecodeError: + geo_lines = raw.decode("latin-1").splitlines() + + p1_lines = z.read(f"{abbr}000012020.pl").decode("utf-8").splitlines() + + # ---------------- GEO file: keep blocks (SUMLEV 750) ---------------------- + geo_records = [ + (parts[7], parts[8][-15:]) # LOGRECNO, 15-digit block GEOID + for ln in geo_lines + if (parts := ln.split("|"))[2] == "750" # summary level 750 = blocks + ] + geo_df = pd.DataFrame(geo_records, columns=["LOGRECNO", "GEOID"]) + + # ---------------- P-file: pull total-population cell ---------------------- + p1_records = [ + (p[4], int(p[5])) for p in map(lambda x: x.split("|"), p1_lines) + ] + p1_df = pd.DataFrame(p1_records, columns=["LOGRECNO", "P0010001"]) + + # ---------------- Merge & finish ----------------------------------------- + return ( + geo_df.merge(p1_df, on="LOGRECNO", how="left") + .assign(POP20=lambda d: d["P0010001"].fillna(0).astype(int)) + .loc[:, ["GEOID", "POP20"]] + .sort_values("GEOID") + .reset_index(drop=True) + ) + + +def build_crosswalk_cd116_to_cd119(): + """Builds the crosswalk between 116th and 119th congress""" + # Pull the census block level population data one state at a time + state_pops = [] + for s in us.states.STATES_AND_TERRITORIES: + if not s.is_territory and s.abbr not in ["DC", "ZZ"]: + print(s.name) + state_pops.append(fetch_block_population(s.abbr)) + block_pop_df = pd.concat(state_pops) + + # Get census blocks for each district under the 116th and 119th congress + # Remove 'ZZ': blocks not assigned to any congressional district + df116 = fetch_block_to_district_map(116) + df116 = df116.loc[df116["CD116"] != "ZZ"] + df119 = fetch_block_to_district_map(119) + df119 = df119.loc[df119["CD119"] != "ZZ"] + + common_blocks = df116.merge(df119, on="GEOID") + + block_stats = block_pop_df.merge(common_blocks, on="GEOID") + block_stats["state_fips"] = block_stats.GEOID.str[:2] + shares = ( + block_stats.groupby(["state_fips", "CD116", "CD119"])["POP20"] + .sum() + .rename("pop_shared") + .reset_index() + ) + + def make_cd_code(state, district): + return f"5001800US{str(state).zfill(2)}{str(district).zfill(2)}" + + shares["code_old"] = shares.apply( + lambda row: make_cd_code(row.state_fips, row.CD116), axis=1 + ) + shares["code_new"] = shares.apply( + lambda row: make_cd_code(row.state_fips, row.CD119), axis=1 + ) + shares["proportion"] = shares.groupby("code_old").pop_shared.transform( + lambda s: s / s.sum() + ) + + ## add DC's district + dc_row = pd.DataFrame( + { + "state_fips": ["11"], # DC's FIPS + "CD116": ["98"], # at-large code in the BEF files + "CD119": ["98"], + "pop_shared": [689545], + "code_old": ["5001800US1198"], + "code_new": ["5001800US1198"], + "proportion": [1.0], + } + ) + + shares = pd.concat([shares, dc_row], ignore_index=True) + + district_mapping = ( + shares[["code_old", "code_new", "proportion"]] + .sort_values(["code_old", "proportion"], ascending=[True, False]) + .reset_index(drop=True) + ) + assert len(set(district_mapping.code_old)) == 436 + assert len(set(district_mapping.code_new)) == 436 + mapping_path = Path(STORAGE_FOLDER, "district_mapping.csv") + district_mapping.to_csv(mapping_path, index=False) + + +def get_district_mapping(): + """Puts the 436 by 436 - with DC - (old by new) district mapping matrix into memory""" + + mapping_path = Path(STORAGE_FOLDER, "district_mapping.csv") + mapping_df = pd.read_csv(mapping_path) + + old_codes = sorted(mapping_df.code_old.unique()) + new_codes = sorted(mapping_df.code_new.unique()) + assert len(old_codes) == len(new_codes) == 436 + + old_index = {c: i for i, c in enumerate(old_codes)} + new_index = {c: j for j, c in enumerate(new_codes)} + + mapping_matrix = np.zeros((436, 436), dtype=float) + + for row in mapping_df.itertuples(index=False): + i = old_index[row.code_old] + j = new_index[row.code_new] + mapping_matrix[i, j] = row.proportion + + assert np.allclose(mapping_matrix.sum(axis=1), 1.0) + return { + "mapping_matrix": mapping_matrix, + "old_codes": old_codes, + "new_codes": new_codes, + } + + +if __name__ == "__main__": + build_crosswalk_cd116_to_cd119() + print(get_district_mapping_matrix()) diff --git a/policyengine_us_data/storage/district_mapping.csv b/policyengine_us_data/storage/district_mapping.csv new file mode 100644 index 00000000..fb5eef63 --- /dev/null +++ b/policyengine_us_data/storage/district_mapping.csv @@ -0,0 +1,1502 @@ +code_old,code_new,proportion +5001800US0101,5001800US0102,0.5011459283535776 +5001800US0101,5001800US0101,0.4820707064326126 +5001800US0101,5001800US0107,0.016783365213809763 +5001800US0102,5001800US0101,0.4678169491253603 +5001800US0102,5001800US0102,0.39705192508930826 +5001800US0102,5001800US0106,0.1351311257853314 +5001800US0103,5001800US0103,0.8198603796970461 +5001800US0103,5001800US0102,0.180139620302954 +5001800US0104,5001800US0104,0.7692270137962715 +5001800US0104,5001800US0103,0.17447390203471147 +5001800US0104,5001800US0105,0.04907806643602609 +5001800US0104,5001800US0107,0.007221017732990997 +5001800US0105,5001800US0105,0.878817662367874 +5001800US0105,5001800US0104,0.12118233763212609 +5001800US0106,5001800US0106,0.7473312551163003 +5001800US0106,5001800US0107,0.22864246122636855 +5001800US0106,5001800US0104,0.024026283657331227 +5001800US0107,5001800US0107,0.8633782301260244 +5001800US0107,5001800US0102,0.08164855122567322 +5001800US0107,5001800US0104,0.034608328493330016 +5001800US0107,5001800US0106,0.020364890154972295 +5001800US0200,5001800US0200,1.0 +5001800US0401,5001800US0402,0.6948503590446475 +5001800US0401,5001800US0406,0.29812837069923254 +5001800US0401,5001800US0407,0.007021270256119934 +5001800US0402,5001800US0406,0.7492521226985535 +5001800US0402,5001800US0407,0.25074787730144654 +5001800US0403,5001800US0407,0.7754175253607597 +5001800US0403,5001800US0403,0.12104397018340192 +5001800US0403,5001800US0409,0.08970929465623763 +5001800US0403,5001800US0406,0.013829209799600803 +5001800US0403,5001800US0402,0.0 +5001800US0404,5001800US0409,0.46093471548467213 +5001800US0404,5001800US0402,0.32851231142675985 +5001800US0404,5001800US0405,0.19748931684056112 +5001800US0404,5001800US0401,0.007724890354579596 +5001800US0404,5001800US0407,0.005338765893427278 +5001800US0404,5001800US0406,0.0 +5001800US0404,5001800US0408,0.0 +5001800US0405,5001800US0405,0.603375246029751 +5001800US0405,5001800US0404,0.3889039941038361 +5001800US0405,5001800US0401,0.007720759866412899 +5001800US0406,5001800US0401,0.7549732899257154 +5001800US0406,5001800US0408,0.24332248142885407 +5001800US0406,5001800US0404,0.0017042286454305089 +5001800US0407,5001800US0403,0.8274880581174503 +5001800US0407,5001800US0409,0.0731090792934076 +5001800US0407,5001800US0408,0.049613133214102066 +5001800US0407,5001800US0401,0.041687081990827386 +5001800US0407,5001800US0407,0.008038587992499856 +5001800US0407,5001800US0404,6.405939171279232e-05 +5001800US0408,5001800US0408,0.7247876203185409 +5001800US0408,5001800US0409,0.27432545665102437 +5001800US0408,5001800US0403,0.0008836562789413765 +5001800US0408,5001800US0401,3.2667514933137765e-06 +5001800US0408,5001800US0407,0.0 +5001800US0409,5001800US0404,0.6942271093254387 +5001800US0409,5001800US0401,0.23192258506278385 +5001800US0409,5001800US0403,0.05066437655628968 +5001800US0409,5001800US0408,0.023060778551217485 +5001800US0409,5001800US0405,0.0001251505042702215 +5001800US0409,5001800US0402,0.0 +5001800US0501,5001800US0501,0.9636508992922538 +5001800US0501,5001800US0502,0.030829978572819945 +5001800US0501,5001800US0504,0.005519122134926303 +5001800US0502,5001800US0502,0.9185648948063828 +5001800US0502,5001800US0504,0.06575058440898465 +5001800US0502,5001800US0501,0.015684520784632585 +5001800US0503,5001800US0503,0.8207885304659498 +5001800US0503,5001800US0501,0.11114866688730685 +5001800US0503,5001800US0504,0.06806280264674333 +5001800US0504,5001800US0504,0.937181462111058 +5001800US0504,5001800US0503,0.06281853788894193 +5001800US0601,5001800US0601,0.7902007441225233 +5001800US0601,5001800US0603,0.20979925587747675 +5001800US0602,5001800US0602,0.9992455338468796 +5001800US0602,5001800US0604,0.0007544661531203383 +5001800US0603,5001800US0604,0.44723347944752295 +5001800US0603,5001800US0601,0.25560513928793605 +5001800US0603,5001800US0608,0.2287328339799995 +5001800US0603,5001800US0606,0.0501127320716451 +5001800US0603,5001800US0607,0.01653583513428961 +5001800US0603,5001800US0603,0.001779980078606797 +5001800US0604,5001800US0603,0.6014165640096494 +5001800US0604,5001800US0605,0.38940702976059494 +5001800US0604,5001800US0620,0.009176406229755659 +5001800US0604,5001800US0613,0.0 +5001800US0605,5001800US0604,0.5395799676898223 +5001800US0605,5001800US0608,0.3792849775928005 +5001800US0605,5001800US0602,0.06123965207232369 +5001800US0605,5001800US0610,0.01989540264505353 +5001800US0606,5001800US0607,0.5752703267757333 +5001800US0606,5001800US0606,0.4243660728533943 +5001800US0606,5001800US0604,0.0003636003708723783 +5001800US0607,5001800US0606,0.5304593199540961 +5001800US0607,5001800US0607,0.2849996108440379 +5001800US0607,5001800US0603,0.18454106920186597 +5001800US0608,5001800US0623,0.7911347146383424 +5001800US0608,5001800US0633,0.09254544094547086 +5001800US0608,5001800US0603,0.07483581419702297 +5001800US0608,5001800US0628,0.02122223053332336 +5001800US0608,5001800US0625,0.020261799685840378 +5001800US0609,5001800US0609,0.7449549952013407 +5001800US0609,5001800US0610,0.12939921639709387 +5001800US0609,5001800US0608,0.058466694739002095 +5001800US0609,5001800US0607,0.03610857581347429 +5001800US0609,5001800US0613,0.03107051784908911 +5001800US0610,5001800US0605,0.46922839800977745 +5001800US0610,5001800US0613,0.36093406736538647 +5001800US0610,5001800US0609,0.1698375346248361 +5001800US0611,5001800US0610,0.5421216175756663 +5001800US0611,5001800US0608,0.4577227617261042 +5001800US0611,5001800US0609,0.00015562069822947625 +5001800US0612,5001800US0611,0.8824556656398301 +5001800US0612,5001800US0615,0.11754433436016984 +5001800US0613,5001800US0612,0.968319632028912 +5001800US0613,5001800US0614,0.03168036797108799 +5001800US0613,5001800US0617,0.0 +5001800US0614,5001800US0615,0.8147246523351065 +5001800US0614,5001800US0616,0.10011027095148078 +5001800US0614,5001800US0611,0.08516507671341275 +5001800US0615,5001800US0614,0.879209193527972 +5001800US0615,5001800US0610,0.11949548033093281 +5001800US0615,5001800US0617,0.001295326141095208 +5001800US0615,5001800US0612,0.0 +5001800US0616,5001800US0621,0.576124338019409 +5001800US0616,5001800US0613,0.4206240681965774 +5001800US0616,5001800US0620,0.002105990066861183 +5001800US0616,5001800US0605,0.0011456037171525002 +5001800US0617,5001800US0617,0.8767390155851017 +5001800US0617,5001800US0614,0.11236828974787944 +5001800US0617,5001800US0616,0.009779211059258606 +5001800US0617,5001800US0618,0.0011134836077602653 +5001800US0618,5001800US0616,0.7235221984095743 +5001800US0618,5001800US0619,0.10660128717891398 +5001800US0618,5001800US0615,0.08535457356731285 +5001800US0618,5001800US0617,0.07351142602123051 +5001800US0618,5001800US0618,0.011010514822968352 +5001800US0619,5001800US0618,0.4510363311700647 +5001800US0619,5001800US0619,0.32974361985417794 +5001800US0619,5001800US0616,0.1975073480905326 +5001800US0619,5001800US0617,0.021712700885224725 +5001800US0620,5001800US0618,0.5925036040365209 +5001800US0620,5001800US0619,0.4074963959634791 +5001800US0621,5001800US0622,0.5329178551896014 +5001800US0621,5001800US0621,0.18150259864876814 +5001800US0621,5001800US0613,0.17215597787464262 +5001800US0621,5001800US0620,0.11342356828698776 +5001800US0622,5001800US0620,0.41246978264039647 +5001800US0622,5001800US0621,0.321831053895241 +5001800US0622,5001800US0605,0.1605561021095157 +5001800US0622,5001800US0622,0.10514306135484679 +5001800US0623,5001800US0620,0.4311170291854314 +5001800US0623,5001800US0622,0.3619284258105871 +5001800US0623,5001800US0627,0.16211411824180327 +5001800US0623,5001800US0623,0.03673538167724541 +5001800US0623,5001800US0621,0.00810504508493281 +5001800US0624,5001800US0624,0.909745972391165 +5001800US0624,5001800US0619,0.09025402760883502 +5001800US0624,5001800US0626,0.0 +5001800US0625,5001800US0627,0.7963998249467975 +5001800US0625,5001800US0626,0.16642496739205054 +5001800US0625,5001800US0623,0.03555339122674538 +5001800US0625,5001800US0632,0.001615380654904922 +5001800US0625,5001800US0628,6.435779501613235e-06 +5001800US0626,5001800US0626,0.8517580843638418 +5001800US0626,5001800US0624,0.14824191563615824 +5001800US0627,5001800US0628,0.9333023507342876 +5001800US0627,5001800US0631,0.04593987122505196 +5001800US0627,5001800US0630,0.010313825562334173 +5001800US0627,5001800US0635,0.009475605428151353 +5001800US0627,5001800US0638,0.0009683470501749108 +5001800US0627,5001800US0634,0.0 +5001800US0628,5001800US0630,0.9093157999708373 +5001800US0628,5001800US0628,0.06209304235460142 +5001800US0628,5001800US0634,0.016399816979048125 +5001800US0628,5001800US0632,0.009668936551060667 +5001800US0628,5001800US0629,0.0022592696257288576 +5001800US0628,5001800US0627,0.0002631345187236132 +5001800US0629,5001800US0629,0.9349816769417437 +5001800US0629,5001800US0632,0.043869984424961504 +5001800US0629,5001800US0627,0.019276524536392385 +5001800US0629,5001800US0630,0.001871814096902376 +5001800US0630,5001800US0632,0.7750581173919288 +5001800US0630,5001800US0629,0.11788517210448435 +5001800US0630,5001800US0627,0.06792398640562922 +5001800US0630,5001800US0630,0.038539583302982565 +5001800US0630,5001800US0626,0.0005931407949751609 +5001800US0631,5001800US0633,0.7205763667291623 +5001800US0631,5001800US0635,0.1382415144629724 +5001800US0631,5001800US0628,0.07333507162161602 +5001800US0631,5001800US0623,0.06784704718624932 +5001800US0632,5001800US0631,0.9862601829590875 +5001800US0632,5001800US0638,0.0120510508192493 +5001800US0632,5001800US0628,0.0016887662216632747 +5001800US0632,5001800US0635,0.0 +5001800US0633,5001800US0636,0.7322890602472397 +5001800US0633,5001800US0632,0.11928339549982792 +5001800US0633,5001800US0630,0.07530486386932944 +5001800US0633,5001800US0644,0.04079176239686718 +5001800US0633,5001800US0626,0.03222115769205726 +5001800US0633,5001800US0643,0.00010976029467848605 +5001800US0634,5001800US0634,0.9419729727306151 +5001800US0634,5001800US0637,0.037979859572979904 +5001800US0634,5001800US0630,0.01977097662239838 +5001800US0634,5001800US0628,0.0002761910740066537 +5001800US0635,5001800US0635,0.817698648971904 +5001800US0635,5001800US0633,0.17846596699720388 +5001800US0635,5001800US0638,0.003835384030892129 +5001800US0635,5001800US0631,0.0 +5001800US0635,5001800US0640,0.0 +5001800US0636,5001800US0625,0.7774587938511587 +5001800US0636,5001800US0641,0.21970444069455705 +5001800US0636,5001800US0648,0.0028367654542842463 +5001800US0637,5001800US0637,0.7674439153156702 +5001800US0637,5001800US0636,0.20994549380477542 +5001800US0637,5001800US0634,0.00766878788603953 +5001800US0637,5001800US0630,0.007550976454280881 +5001800US0637,5001800US0632,0.006917740008578145 +5001800US0637,5001800US0643,0.0004730865306558231 +5001800US0638,5001800US0638,0.6887156260676834 +5001800US0638,5001800US0645,0.153737658674189 +5001800US0638,5001800US0642,0.09233370419364165 +5001800US0638,5001800US0644,0.05784019132887717 +5001800US0638,5001800US0631,0.005413976469351461 +5001800US0638,5001800US0634,0.0011844168586671805 +5001800US0638,5001800US0628,0.0007744264075900797 +5001800US0639,5001800US0645,0.35148980358066995 +5001800US0639,5001800US0638,0.3314724177470801 +5001800US0639,5001800US0640,0.20141183792614742 +5001800US0639,5001800US0646,0.10893543902612272 +5001800US0639,5001800US0635,0.005736648593066638 +5001800US0639,5001800US0631,0.0009538531269132028 +5001800US0640,5001800US0642,0.4998035349610858 +5001800US0640,5001800US0637,0.2331520216516807 +5001800US0640,5001800US0634,0.15496950827355685 +5001800US0640,5001800US0644,0.09446356234285 +5001800US0640,5001800US0638,0.015750681460572197 +5001800US0640,5001800US0643,0.001836023054247257 +5001800US0640,5001800US0628,2.4668256007160842e-05 +5001800US0641,5001800US0639,0.9629794917777905 +5001800US0641,5001800US0641,0.03549452538231834 +5001800US0641,5001800US0625,0.0015259828398910835 +5001800US0642,5001800US0641,0.7325250092756586 +5001800US0642,5001800US0648,0.14663007740216222 +5001800US0642,5001800US0635,0.03994483608336192 +5001800US0642,5001800US0639,0.03684828289475219 +5001800US0642,5001800US0640,0.023671013975325582 +5001800US0642,5001800US0625,0.020380780368739512 +5001800US0643,5001800US0643,0.7911076363450807 +5001800US0643,5001800US0636,0.1290098010271214 +5001800US0643,5001800US0637,0.05355938751781425 +5001800US0643,5001800US0644,0.026323175109983634 +5001800US0644,5001800US0644,0.6706043411251857 +5001800US0644,5001800US0643,0.27499939117093486 +5001800US0644,5001800US0642,0.05439626770387946 +5001800US0645,5001800US0640,0.8173764937841915 +5001800US0645,5001800US0647,0.1781247975118253 +5001800US0645,5001800US0649,0.003175014579148578 +5001800US0645,5001800US0646,0.0013236941248345381 +5001800US0646,5001800US0646,0.8446515657194924 +5001800US0646,5001800US0645,0.08557874500843862 +5001800US0646,5001800US0640,0.069769689272069 +5001800US0646,5001800US0647,0.0 +5001800US0647,5001800US0642,0.41273816684764353 +5001800US0647,5001800US0645,0.3861931582935575 +5001800US0647,5001800US0644,0.13927145747564623 +5001800US0647,5001800US0646,0.0574975197056457 +5001800US0647,5001800US0647,0.00429969767750705 +5001800US0647,5001800US0636,0.0 +5001800US0648,5001800US0647,0.6061644354137836 +5001800US0648,5001800US0645,0.17512332201809025 +5001800US0648,5001800US0649,0.0871521016978921 +5001800US0648,5001800US0640,0.07501824469389795 +5001800US0648,5001800US0646,0.0565418961763361 +5001800US0649,5001800US0649,0.9467849127978065 +5001800US0649,5001800US0650,0.049865068699184355 +5001800US0649,5001800US0640,0.0033283775566073054 +5001800US0649,5001800US0647,2.1640946401868047e-05 +5001800US0649,5001800US0648,0.0 +5001800US0650,5001800US0648,0.7026490283613446 +5001800US0650,5001800US0650,0.22277524290966386 +5001800US0650,5001800US0651,0.07349453453256302 +5001800US0650,5001800US0649,0.0010811941964285715 +5001800US0650,5001800US0652,0.0 +5001800US0651,5001800US0652,0.8287402679957188 +5001800US0651,5001800US0625,0.1625106624192056 +5001800US0651,5001800US0648,0.008269147531661273 +5001800US0651,5001800US0651,0.0003531501902483461 +5001800US0651,5001800US0650,0.00012677186316607296 +5001800US0652,5001800US0651,0.4962193959357763 +5001800US0652,5001800US0650,0.411860889626181 +5001800US0652,5001800US0648,0.09191773246998805 +5001800US0652,5001800US0652,1.981968054638895e-06 +5001800US0653,5001800US0651,0.596359860966912 +5001800US0653,5001800US0652,0.24744097864849005 +5001800US0653,5001800US0650,0.12459892565341037 +5001800US0653,5001800US0648,0.03160023473118765 +5001800US0801,5001800US0801,0.8051210712426631 +5001800US0801,5001800US0806,0.1736981924575039 +5001800US0801,5001800US0807,0.021180736299833067 +5001800US0802,5001800US0802,0.6336205761200139 +5001800US0802,5001800US0807,0.25830170826047577 +5001800US0802,5001800US0804,0.09109034065927386 +5001800US0802,5001800US0808,0.014904476408337267 +5001800US0802,5001800US0806,0.0020828985518992265 +5001800US0803,5001800US0803,0.9294525872910616 +5001800US0803,5001800US0802,0.06644105519096945 +5001800US0803,5001800US0807,0.004106357517968887 +5001800US0804,5001800US0804,0.5029337358528304 +5001800US0804,5001800US0808,0.291207855380495 +5001800US0804,5001800US0802,0.15484187515332773 +5001800US0804,5001800US0803,0.050532135150296344 +5001800US0804,5001800US0806,0.000484398463050554 +5001800US0804,5001800US0807,0.0 +5001800US0805,5001800US0805,0.8312852988063479 +5001800US0805,5001800US0807,0.16231261698785063 +5001800US0805,5001800US0804,0.006402084205801441 +5001800US0806,5001800US0806,0.8110107505768004 +5001800US0806,5001800US0804,0.14431790290118304 +5001800US0806,5001800US0808,0.04467134652201659 +5001800US0807,5001800US0807,0.5448955821367417 +5001800US0807,5001800US0808,0.4551044178632583 +5001800US0807,5001800US0801,0.0 +5001800US0901,5001800US0901,0.9672058926598738 +5001800US0901,5001800US0902,0.03089007332450013 +5001800US0901,5001800US0905,0.001904034015626095 +5001800US0902,5001800US0902,1.0 +5001800US0903,5001800US0903,0.9781755317954713 +5001800US0903,5001800US0901,0.019992090206190017 +5001800US0903,5001800US0905,0.0018323779983385978 +5001800US0904,5001800US0904,0.9696486259458347 +5001800US0904,5001800US0903,0.030351374054165325 +5001800US0905,5001800US0905,0.9878146024980679 +5001800US0905,5001800US0901,0.007909204505424981 +5001800US0905,5001800US0903,0.004276192996507192 +5001800US1000,5001800US1000,1.0 +5001800US1198,5001800US1198,1.0 +5001800US1201,5001800US1201,0.9747987495611197 +5001800US1201,5001800US1202,0.025201250438880357 +5001800US1202,5001800US1202,0.6993820113510333 +5001800US1202,5001800US1203,0.3006179886489667 +5001800US1203,5001800US1203,0.5792179712439726 +5001800US1203,5001800US1206,0.2533873174390422 +5001800US1203,5001800US1204,0.16739471131698533 +5001800US1204,5001800US1205,0.7624656298645562 +5001800US1204,5001800US1204,0.2245951961825509 +5001800US1204,5001800US1206,0.01293917395289291 +5001800US1205,5001800US1204,0.39587483354951214 +5001800US1205,5001800US1202,0.36349765080899793 +5001800US1205,5001800US1205,0.15146400892627834 +5001800US1205,5001800US1203,0.08916350671521157 +5001800US1206,5001800US1206,0.6101570207856056 +5001800US1206,5001800US1207,0.38717161920369225 +5001800US1206,5001800US1211,0.00237036169963713 +5001800US1206,5001800US1205,0.00030099831106503237 +5001800US1207,5001800US1207,0.7243416431150246 +5001800US1207,5001800US1210,0.25373695130119417 +5001800US1207,5001800US1209,0.021921405583781223 +5001800US1208,5001800US1208,1.0 +5001800US1209,5001800US1218,0.5605197858168995 +5001800US1209,5001800US1209,0.3590995167820295 +5001800US1209,5001800US1210,0.07936855165208306 +5001800US1209,5001800US1211,0.0010121457489878543 +5001800US1210,5001800US1210,0.5368456034843628 +5001800US1210,5001800US1211,0.2794200823797457 +5001800US1210,5001800US1209,0.18373431413589142 +5001800US1211,5001800US1212,0.5453889102810936 +5001800US1211,5001800US1211,0.2894787507853191 +5001800US1211,5001800US1206,0.13233133372514846 +5001800US1211,5001800US1203,0.032801005208438885 +5001800US1212,5001800US1212,0.5772534157521615 +5001800US1212,5001800US1213,0.23956926250122584 +5001800US1212,5001800US1215,0.17647868298593206 +5001800US1212,5001800US1214,0.006698638760680651 +5001800US1213,5001800US1213,0.7453819831827065 +5001800US1213,5001800US1214,0.2546180168172935 +5001800US1214,5001800US1214,0.6632249170480902 +5001800US1214,5001800US1215,0.3367750829519099 +5001800US1215,5001800US1215,0.5084681430291608 +5001800US1215,5001800US1216,0.19446251879108797 +5001800US1215,5001800US1218,0.17809650953203646 +5001800US1215,5001800US1211,0.060903692402043276 +5001800US1215,5001800US1214,0.05644244504775367 +5001800US1215,5001800US1209,0.0016266911979178353 +5001800US1216,5001800US1216,0.6709346399186465 +5001800US1216,5001800US1217,0.2958556747095621 +5001800US1216,5001800US1214,0.033209685371791316 +5001800US1217,5001800US1217,0.5462582449029376 +5001800US1217,5001800US1218,0.4537417550970623 +5001800US1218,5001800US1221,0.95046338878938 +5001800US1218,5001800US1220,0.04953661121062002 +5001800US1218,5001800US1222,0.0 +5001800US1219,5001800US1219,0.8897051787745729 +5001800US1219,5001800US1217,0.09399619379737818 +5001800US1219,5001800US1226,0.01629862742804895 +5001800US1220,5001800US1220,0.8047201512884832 +5001800US1220,5001800US1222,0.07604962558569756 +5001800US1220,5001800US1224,0.07049154352527683 +5001800US1220,5001800US1221,0.0194666628254647 +5001800US1220,5001800US1223,0.016768904372591105 +5001800US1220,5001800US1225,0.01250311240248663 +5001800US1221,5001800US1222,0.887138729405424 +5001800US1221,5001800US1223,0.10680921618971338 +5001800US1221,5001800US1220,0.006052054404862603 +5001800US1222,5001800US1223,0.86342693421676 +5001800US1222,5001800US1220,0.09376805885037684 +5001800US1222,5001800US1225,0.0410140895909871 +5001800US1222,5001800US1222,0.0017909173418760554 +5001800US1223,5001800US1225,0.7424736452134326 +5001800US1223,5001800US1220,0.1538560090194546 +5001800US1223,5001800US1224,0.10367034576711273 +5001800US1223,5001800US1227,0.0 +5001800US1224,5001800US1224,0.7304388158425426 +5001800US1224,5001800US1226,0.14046409077464977 +5001800US1224,5001800US1225,0.11044786821184337 +5001800US1224,5001800US1227,0.0186492251709643 +5001800US1225,5001800US1226,0.6776329968288068 +5001800US1225,5001800US1227,0.11529185581505136 +5001800US1225,5001800US1228,0.10399597699773193 +5001800US1225,5001800US1218,0.08066530057916181 +5001800US1225,5001800US1219,0.020929190370793552 +5001800US1225,5001800US1224,0.0014846794084544623 +5001800US1226,5001800US1228,0.8915866534230953 +5001800US1226,5001800US1227,0.10841334657690468 +5001800US1227,5001800US1227,0.7776326837521148 +5001800US1227,5001800US1224,0.20028071934331726 +5001800US1227,5001800US1226,0.022086596904567955 +5001800US1227,5001800US1228,0.0 +5001800US1301,5001800US1301,0.9561881617774692 +5001800US1301,5001800US1308,0.0422120002314413 +5001800US1301,5001800US1312,0.00159983799108951 +5001800US1302,5001800US1302,0.9535883881372637 +5001800US1302,5001800US1308,0.0425193208077129 +5001800US1302,5001800US1303,0.00389229105502341 +5001800US1303,5001800US1303,0.9356156702384453 +5001800US1303,5001800US1302,0.03246398019680113 +5001800US1303,5001800US1310,0.02951333091719719 +5001800US1303,5001800US1306,0.0024070186475563973 +5001800US1304,5001800US1304,0.5292481140839385 +5001800US1304,5001800US1313,0.34044250327086667 +5001800US1304,5001800US1305,0.12428806972660905 +5001800US1304,5001800US1310,0.0060213129185858185 +5001800US1305,5001800US1305,0.8409159109359058 +5001800US1305,5001800US1306,0.09164795107729144 +5001800US1305,5001800US1304,0.054169391087904833 +5001800US1305,5001800US1313,0.013266746898897912 +5001800US1306,5001800US1311,0.35373415889203297 +5001800US1306,5001800US1304,0.2627546017426386 +5001800US1306,5001800US1307,0.2618954230054446 +5001800US1306,5001800US1305,0.07814324003772916 +5001800US1306,5001800US1306,0.04347257632215467 +5001800US1307,5001800US1307,0.5697457009305716 +5001800US1307,5001800US1304,0.2169990136798319 +5001800US1307,5001800US1313,0.11428448904326943 +5001800US1307,5001800US1309,0.09897079634632704 +5001800US1308,5001800US1308,0.8445285294339339 +5001800US1308,5001800US1302,0.15547147056606617 +5001800US1309,5001800US1309,0.5130770554046914 +5001800US1309,5001800US1307,0.28956600272784244 +5001800US1309,5001800US1310,0.19105955789292603 +5001800US1309,5001800US1311,0.006297383974540137 +5001800US1310,5001800US1310,0.6874925073082563 +5001800US1310,5001800US1312,0.2771235441123581 +5001800US1310,5001800US1308,0.03511959104504222 +5001800US1310,5001800US1313,0.00026435753434342503 +5001800US1311,5001800US1311,0.5762284303464563 +5001800US1311,5001800US1306,0.34235702976142557 +5001800US1311,5001800US1314,0.08141453989211819 +5001800US1312,5001800US1312,0.9080261599572386 +5001800US1312,5001800US1308,0.0680724848815151 +5001800US1312,5001800US1301,0.02390135516124637 +5001800US1313,5001800US1306,0.4755343460142492 +5001800US1313,5001800US1305,0.21698908699849687 +5001800US1313,5001800US1313,0.1867369758887436 +5001800US1313,5001800US1303,0.1207395910985103 +5001800US1314,5001800US1314,0.9248507449920454 +5001800US1314,5001800US1311,0.055233761568332276 +5001800US1314,5001800US1303,0.019915493439622286 +5001800US1501,5001800US1501,0.9875087703991304 +5001800US1501,5001800US1502,0.012491229600869588 +5001800US1502,5001800US1502,1.0 +5001800US1502,5001800US1501,0.0 +5001800US1601,5001800US1601,0.9915164906448278 +5001800US1601,5001800US1602,0.008483509355172204 +5001800US1602,5001800US1602,0.9999813547194701 +5001800US1602,5001800US1601,1.8645280529898873e-05 +5001800US1701,5001800US1701,0.7328664399960247 +5001800US1701,5001800US1706,0.16391961524214507 +5001800US1701,5001800US1707,0.05042568505630125 +5001800US1701,5001800US1702,0.0340563102878155 +5001800US1701,5001800US1704,0.014863591053235477 +5001800US1701,5001800US1714,0.0038683583644779597 +5001800US1702,5001800US1702,0.866871509273929 +5001800US1702,5001800US1701,0.13312849072607102 +5001800US1703,5001800US1706,0.42346040790348827 +5001800US1703,5001800US1704,0.38180285318279644 +5001800US1703,5001800US1701,0.08662255730722768 +5001800US1703,5001800US1714,0.05225556482250935 +5001800US1703,5001800US1707,0.03742106541145526 +5001800US1703,5001800US1711,0.018437551372522995 +5001800US1704,5001800US1704,0.5137089622251435 +5001800US1704,5001800US1703,0.4625259850064018 +5001800US1704,5001800US1707,0.022781258287399162 +5001800US1704,5001800US1705,0.00098379448105554 +5001800US1705,5001800US1705,0.5793785972791489 +5001800US1705,5001800US1703,0.20435952079881398 +5001800US1705,5001800US1704,0.09853002092962414 +5001800US1705,5001800US1707,0.05321520449986919 +5001800US1705,5001800US1706,0.037086040376733236 +5001800US1705,5001800US1709,0.017764291008982296 +5001800US1705,5001800US1708,0.00966632510682829 +5001800US1706,5001800US1706,0.2649640408149341 +5001800US1706,5001800US1708,0.17813077219242998 +5001800US1706,5001800US1703,0.16607018985994842 +5001800US1706,5001800US1709,0.14958111245225883 +5001800US1706,5001800US1705,0.14925295544375114 +5001800US1706,5001800US1711,0.07081855679146921 +5001800US1706,5001800US1704,0.02099717492555547 +5001800US1706,5001800US1710,0.00018519751965286838 +5001800US1707,5001800US1707,0.8356214674918557 +5001800US1707,5001800US1704,0.05429995574161812 +5001800US1707,5001800US1705,0.04485768390808768 +5001800US1707,5001800US1703,0.03863103746000421 +5001800US1707,5001800US1701,0.020530084816472824 +5001800US1707,5001800US1702,0.006059770581961444 +5001800US1708,5001800US1708,0.6857132588595797 +5001800US1708,5001800US1703,0.170406761704302 +5001800US1708,5001800US1706,0.09747842433163478 +5001800US1708,5001800US1709,0.025812671788885817 +5001800US1708,5001800US1705,0.013671231035835221 +5001800US1708,5001800US1710,0.006917652279762546 +5001800US1708,5001800US1704,0.0 +5001800US1709,5001800US1709,0.7062608912123475 +5001800US1709,5001800US1705,0.1787306724199928 +5001800US1709,5001800US1708,0.06752966558791801 +5001800US1709,5001800US1710,0.04747877077974166 +5001800US1710,5001800US1710,0.8249960175825701 +5001800US1710,5001800US1709,0.11204077995448215 +5001800US1710,5001800US1705,0.05610713570343687 +5001800US1710,5001800US1708,0.0068560667595108805 +5001800US1711,5001800US1711,0.5593862773123179 +5001800US1711,5001800US1714,0.32027369568135694 +5001800US1711,5001800US1706,0.08971903945924177 +5001800US1711,5001800US1701,0.030620987547083395 +5001800US1712,5001800US1712,0.5961217321481562 +5001800US1712,5001800US1713,0.3996578683491372 +5001800US1712,5001800US1715,0.004220399502706603 +5001800US1713,5001800US1713,0.5947948448680809 +5001800US1713,5001800US1715,0.33531142019670085 +5001800US1713,5001800US1717,0.059265515333313916 +5001800US1713,5001800US1716,0.010628219601904309 +5001800US1714,5001800US1711,0.44987551317288366 +5001800US1714,5001800US1714,0.21123631670458598 +5001800US1714,5001800US1710,0.20862351333697274 +5001800US1714,5001800US1716,0.04576508120832689 +5001800US1714,5001800US1703,0.04057103006932764 +5001800US1714,5001800US1709,0.022025806166594615 +5001800US1714,5001800US1708,0.021852250387344944 +5001800US1714,5001800US1701,5.0488953963540664e-05 +5001800US1715,5001800US1712,0.5253890530153864 +5001800US1715,5001800US1715,0.3359293734830502 +5001800US1715,5001800US1702,0.1347890821973043 +5001800US1715,5001800US1713,0.003779529041499511 +5001800US1715,5001800US1716,0.0001129622627596367 +5001800US1716,5001800US1716,0.4820501673237491 +5001800US1716,5001800US1714,0.2313222345126926 +5001800US1716,5001800US1717,0.10048715840624348 +5001800US1716,5001800US1702,0.09952826798445441 +5001800US1716,5001800US1701,0.045711200436146134 +5001800US1716,5001800US1711,0.04090097133671428 +5001800US1717,5001800US1717,0.8136161411561997 +5001800US1717,5001800US1716,0.14275098070808273 +5001800US1717,5001800US1715,0.04363287813571751 +5001800US1718,5001800US1715,0.43658111095457497 +5001800US1718,5001800US1716,0.34141233887645295 +5001800US1718,5001800US1717,0.1454151426131452 +5001800US1718,5001800US1713,0.07659140755582688 +5001800US1801,5001800US1801,0.988531203859671 +5001800US1801,5001800US1802,0.011468796140328988 +5001800US1802,5001800US1802,0.9614751108212379 +5001800US1802,5001800US1801,0.030442216351061046 +5001800US1802,5001800US1803,0.008082672827701072 +5001800US1803,5001800US1803,0.9597306037408526 +5001800US1803,5001800US1802,0.040269396259147425 +5001800US1804,5001800US1804,0.8313791776657714 +5001800US1804,5001800US1805,0.12645125968862403 +5001800US1804,5001800US1802,0.023428810986406735 +5001800US1804,5001800US1808,0.018740751659197804 +5001800US1805,5001800US1805,0.6400148982319304 +5001800US1805,5001800US1807,0.3104134259360691 +5001800US1805,5001800US1804,0.03134544179830423 +5001800US1805,5001800US1803,0.018226234033696297 +5001800US1806,5001800US1806,0.5011702888227366 +5001800US1806,5001800US1809,0.32791859893966063 +5001800US1806,5001800US1805,0.1407964956011474 +5001800US1806,5001800US1803,0.030114616636455326 +5001800US1807,5001800US1807,0.8034097347630396 +5001800US1807,5001800US1806,0.19659026523696044 +5001800US1808,5001800US1808,1.0 +5001800US1809,5001800US1809,0.7601494094636573 +5001800US1809,5001800US1806,0.15824668034071387 +5001800US1809,5001800US1808,0.051898563431577525 +5001800US1809,5001800US1804,0.029705346764051274 +5001800US1901,5001800US1902,0.8720655061774997 +5001800US1901,5001800US1901,0.07558665679428175 +5001800US1901,5001800US1904,0.05234783702821857 +5001800US1902,5001800US1901,0.8463125152936964 +5001800US1902,5001800US1903,0.15368748470630356 +5001800US1903,5001800US1903,0.7723673464179066 +5001800US1903,5001800US1904,0.18090719991832777 +5001800US1903,5001800US1901,0.0467254536637656 +5001800US1904,5001800US1904,0.8253842778468936 +5001800US1904,5001800US1902,0.1635109839452142 +5001800US1904,5001800US1903,0.011104738207892137 +5001800US2001,5001800US2001,0.8843389638576956 +5001800US2001,5001800US2002,0.11097291686326319 +5001800US2001,5001800US2004,0.004688119279041238 +5001800US2002,5001800US2002,0.7700740811652688 +5001800US2002,5001800US2001,0.15106556109486954 +5001800US2002,5001800US2003,0.0788603577398617 +5001800US2003,5001800US2003,0.8321204268744993 +5001800US2003,5001800US2002,0.1678795731255007 +5001800US2004,5001800US2004,1.0 +5001800US2101,5001800US2101,0.9002669684401693 +5001800US2101,5001800US2102,0.09973303155983068 +5001800US2102,5001800US2102,0.8524362905311212 +5001800US2102,5001800US2101,0.06378432398065823 +5001800US2102,5001800US2106,0.06038959650183638 +5001800US2102,5001800US2104,0.02338978898638415 +5001800US2103,5001800US2103,0.9897572842907433 +5001800US2103,5001800US2102,0.010242715709256708 +5001800US2104,5001800US2104,0.9288639599361822 +5001800US2104,5001800US2105,0.05473830922861576 +5001800US2104,5001800US2102,0.012965229702017368 +5001800US2104,5001800US2103,0.0034325011331846384 +5001800US2105,5001800US2105,0.9925275551511651 +5001800US2105,5001800US2104,0.007472444848834865 +5001800US2106,5001800US2106,0.9323193563898508 +5001800US2106,5001800US2105,0.030919473416900174 +5001800US2106,5001800US2101,0.02025572148334682 +5001800US2106,5001800US2104,0.016505448709902303 +5001800US2201,5001800US2201,0.8325783943568084 +5001800US2201,5001800US2202,0.07545747116388095 +5001800US2201,5001800US2203,0.05912838698044444 +5001800US2201,5001800US2205,0.032835747498866225 +5001800US2202,5001800US2202,0.8091613356906828 +5001800US2202,5001800US2206,0.15022270954110004 +5001800US2202,5001800US2201,0.03794810609405958 +5001800US2202,5001800US2205,0.002667848674157528 +5001800US2203,5001800US2203,0.8182390505997069 +5001800US2203,5001800US2204,0.11466251786457297 +5001800US2203,5001800US2206,0.06709843153572011 +5001800US2204,5001800US2204,0.6867822356979522 +5001800US2204,5001800US2206,0.3132177643020479 +5001800US2205,5001800US2205,0.5912365355773487 +5001800US2205,5001800US2204,0.22153779067846815 +5001800US2205,5001800US2206,0.18722567374418314 +5001800US2206,5001800US2206,0.4288051906586389 +5001800US2206,5001800US2205,0.2877383232986681 +5001800US2206,5001800US2203,0.11320356686735997 +5001800US2206,5001800US2202,0.11290993887976647 +5001800US2206,5001800US2201,0.057342980295566504 +5001800US2301,5001800US2301,0.941166511470414 +5001800US2301,5001800US2302,0.05883348852958605 +5001800US2302,5001800US2302,0.9799833209581671 +5001800US2302,5001800US2301,0.02001667904183294 +5001800US2401,5001800US2401,0.8710774342335513 +5001800US2401,5001800US2402,0.12892256576644867 +5001800US2402,5001800US2402,0.4366817998188828 +5001800US2402,5001800US2407,0.26050547631884446 +5001800US2402,5001800US2401,0.14073831560957367 +5001800US2402,5001800US2403,0.13744035971147392 +5001800US2402,5001800US2405,0.0246340485412252 +5001800US2403,5001800US2403,0.3098054988636244 +5001800US2403,5001800US2407,0.2845048950213751 +5001800US2403,5001800US2402,0.22175483091363193 +5001800US2403,5001800US2408,0.11619308900964953 +5001800US2403,5001800US2404,0.03769359660246525 +5001800US2403,5001800US2405,0.030048089589253785 +5001800US2404,5001800US2404,0.7260962321123802 +5001800US2404,5001800US2405,0.14302711041092295 +5001800US2404,5001800US2403,0.1308471182880399 +5001800US2404,5001800US2408,2.9539188656951555e-05 +5001800US2405,5001800US2405,0.6828384316140046 +5001800US2405,5001800US2404,0.31716156838599535 +5001800US2405,5001800US2403,0.0 +5001800US2406,5001800US2406,0.763907930400518 +5001800US2406,5001800US2408,0.23609206959948204 +5001800US2407,5001800US2407,0.6091312221019334 +5001800US2407,5001800US2403,0.22031060524784374 +5001800US2407,5001800US2402,0.14582256423260606 +5001800US2407,5001800US2401,0.024735608417616784 +5001800US2408,5001800US2408,0.6534096708918524 +5001800US2408,5001800US2406,0.19547588882017042 +5001800US2408,5001800US2402,0.12317537423217105 +5001800US2408,5001800US2403,0.027939066055806077 +5001800US2408,5001800US2404,0.0 +5001800US2501,5001800US2501,0.9788216495205518 +5001800US2501,5001800US2502,0.021178350479448205 +5001800US2502,5001800US2502,0.8937180011792643 +5001800US2502,5001800US2501,0.07845007297906001 +5001800US2502,5001800US2504,0.027831925841675716 +5001800US2503,5001800US2503,0.9435368111458736 +5001800US2503,5001800US2506,0.03571847060615648 +5001800US2503,5001800US2505,0.01977800822582346 +5001800US2503,5001800US2502,0.0009667100221464477 +5001800US2504,5001800US2504,0.9128553617716668 +5001800US2504,5001800US2505,0.03787473233404711 +5001800US2504,5001800US2508,0.03331384537360532 +5001800US2504,5001800US2502,0.015862729629212216 +5001800US2504,5001800US2509,9.333089146849995e-05 +5001800US2505,5001800US2505,0.9431400489978259 +5001800US2505,5001800US2502,0.04449642731279783 +5001800US2505,5001800US2504,0.007218581718941797 +5001800US2505,5001800US2507,0.0051449419704344595 +5001800US2505,5001800US2508,0.0 +5001800US2506,5001800US2506,0.9606618898017568 +5001800US2506,5001800US2503,0.03818849125728009 +5001800US2506,5001800US2505,0.0011496189409631033 +5001800US2507,5001800US2507,0.9583135106287732 +5001800US2507,5001800US2508,0.03649893879041418 +5001800US2507,5001800US2505,0.005187550580812559 +5001800US2508,5001800US2508,0.944787405218379 +5001800US2508,5001800US2509,0.04152162405656574 +5001800US2508,5001800US2507,0.00965431657324345 +5001800US2508,5001800US2504,0.004036654151811849 +5001800US2508,5001800US2505,0.0 +5001800US2509,5001800US2509,0.9300874102769248 +5001800US2509,5001800US2504,0.06991258972307526 +5001800US2509,5001800US2508,0.0 +5001800US2601,5001800US2601,0.9404095360933812 +5001800US2601,5001800US2602,0.05959046390661885 +5001800US2602,5001800US2603,0.486529033934378 +5001800US2602,5001800US2602,0.3071689764664076 +5001800US2602,5001800US2604,0.2063019895992144 +5001800US2603,5001800US2603,0.5487075481609025 +5001800US2603,5001800US2602,0.23468724570740748 +5001800US2603,5001800US2604,0.13520842866987057 +5001800US2603,5001800US2605,0.08139677746181939 +5001800US2604,5001800US2602,0.48116656595311563 +5001800US2604,5001800US2607,0.23333636365256855 +5001800US2604,5001800US2608,0.19860177505405555 +5001800US2604,5001800US2601,0.08689529534026029 +5001800US2605,5001800US2608,0.9182020676890162 +5001800US2605,5001800US2601,0.05437712451879429 +5001800US2605,5001800US2609,0.027063642313728525 +5001800US2605,5001800US2607,0.00035716547846098627 +5001800US2606,5001800US2604,0.6152097283555882 +5001800US2606,5001800US2605,0.3847902716444118 +5001800US2607,5001800US2605,0.709607595652092 +5001800US2607,5001800US2607,0.17329276792946893 +5001800US2607,5001800US2606,0.10808356724375036 +5001800US2607,5001800US2602,0.009016069174688696 +5001800US2608,5001800US2607,0.6547975192781864 +5001800US2608,5001800US2609,0.23932954941294593 +5001800US2608,5001800US2610,0.10573974277537583 +5001800US2608,5001800US2611,0.00013318853349179353 +5001800US2609,5001800US2610,0.7095396396101716 +5001800US2609,5001800US2611,0.26760158451704424 +5001800US2609,5001800US2612,0.022858775872784145 +5001800US2610,5001800US2609,0.7811212928925769 +5001800US2610,5001800US2610,0.21887870710742313 +5001800US2611,5001800US2611,0.5101384973032931 +5001800US2611,5001800US2606,0.27019631562775975 +5001800US2611,5001800US2612,0.1290129337582104 +5001800US2611,5001800US2607,0.04340239196263452 +5001800US2611,5001800US2609,0.03674516844031591 +5001800US2611,5001800US2610,0.010504692907786342 +5001800US2612,5001800US2606,0.5760270363496696 +5001800US2612,5001800US2613,0.27983553028363195 +5001800US2612,5001800US2612,0.1441374333666985 +5001800US2613,5001800US2612,0.6246669596506167 +5001800US2613,5001800US2613,0.37533304034938336 +5001800US2614,5001800US2613,0.41190846300540057 +5001800US2614,5001800US2611,0.36712753000753195 +5001800US2614,5001800US2612,0.2209640069870675 +5001800US2701,5001800US2701,0.9416988690444315 +5001800US2701,5001800US2702,0.04513284884373742 +5001800US2701,5001800US2707,0.01316828211183107 +5001800US2702,5001800US2702,0.8406623603360881 +5001800US2702,5001800US2701,0.15933763966391196 +5001800US2703,5001800US2703,0.9515062148633415 +5001800US2703,5001800US2706,0.04849378513665852 +5001800US2703,5001800US2705,0.0 +5001800US2704,5001800US2704,0.9872695337649051 +5001800US2704,5001800US2702,0.010586497371606685 +5001800US2704,5001800US2708,0.0021439688634881477 +5001800US2705,5001800US2705,0.9848856987648165 +5001800US2705,5001800US2703,0.015114301235183586 +5001800US2706,5001800US2706,0.8292238057785354 +5001800US2706,5001800US2708,0.07227300687658564 +5001800US2706,5001800US2707,0.052240016403426355 +5001800US2706,5001800US2703,0.046263170941452623 +5001800US2707,5001800US2707,0.9425849699683243 +5001800US2707,5001800US2708,0.05734855249659078 +5001800US2707,5001800US2706,6.647753508486665e-05 +5001800US2708,5001800US2708,0.870002069399742 +5001800US2708,5001800US2707,0.12999793060025802 +5001800US2801,5001800US2801,0.9572797698899 +5001800US2801,5001800US2803,0.04272023011009995 +5001800US2802,5001800US2802,0.9960763018430357 +5001800US2802,5001800US2803,0.003923698156964258 +5001800US2803,5001800US2803,0.8655147881482269 +5001800US2803,5001800US2802,0.13448521185177312 +5001800US2804,5001800US2804,0.8733773563607631 +5001800US2804,5001800US2803,0.12662264363923692 +5001800US2901,5001800US2901,0.9769804108589307 +5001800US2901,5001800US2902,0.023019589141069274 +5001800US2902,5001800US2902,0.6755620222021533 +5001800US2902,5001800US2903,0.1577787440583039 +5001800US2902,5001800US2901,0.10836278630706166 +5001800US2902,5001800US2908,0.05829644743248115 +5001800US2903,5001800US2903,0.6891206851164442 +5001800US2903,5001800US2902,0.15215873989581244 +5001800US2903,5001800US2908,0.13246115910457637 +5001800US2903,5001800US2904,0.0161691151786814 +5001800US2903,5001800US2906,0.01009030070448557 +5001800US2904,5001800US2904,0.7471718789325862 +5001800US2904,5001800US2903,0.14004405610136914 +5001800US2904,5001800US2906,0.09228840041606065 +5001800US2904,5001800US2907,0.020495664549984076 +5001800US2905,5001800US2905,0.8467785096552989 +5001800US2905,5001800US2904,0.12422438948743988 +5001800US2905,5001800US2906,0.02899710085726119 +5001800US2906,5001800US2906,0.8902455639134994 +5001800US2906,5001800US2905,0.07760777481442337 +5001800US2906,5001800US2904,0.03214666127207722 +5001800US2907,5001800US2907,1.0 +5001800US2908,5001800US2908,0.9008372927864159 +5001800US2908,5001800US2903,0.09916270721358411 +5001800US3000,5001800US3002,0.6106969801944205 +5001800US3000,5001800US3001,0.3893030198055794 +5001800US3101,5001800US3101,0.8532079757052351 +5001800US3101,5001800US3103,0.10288864440171386 +5001800US3101,5001800US3102,0.04390337989305108 +5001800US3102,5001800US3102,0.9112527509181813 +5001800US3102,5001800US3101,0.08874724908181875 +5001800US3103,5001800US3103,1.0 +5001800US3201,5001800US3201,0.5651471999748242 +5001800US3201,5001800US3203,0.24891507875316665 +5001800US3201,5001800US3204,0.1859377212720092 +5001800US3202,5001800US3202,0.9999979321753515 +5001800US3202,5001800US3204,2.0678246484698098e-06 +5001800US3203,5001800US3203,0.596842989599895 +5001800US3203,5001800US3201,0.39709212860480453 +5001800US3203,5001800US3204,0.006064881795300441 +5001800US3204,5001800US3204,0.7958110422121486 +5001800US3204,5001800US3203,0.13791955636236355 +5001800US3204,5001800US3201,0.05101764187104722 +5001800US3204,5001800US3202,0.015251759554440665 +5001800US3301,5001800US3301,0.9951414538029315 +5001800US3301,5001800US3302,0.00485854619706852 +5001800US3302,5001800US3302,1.0 +5001800US3401,5001800US3401,0.9874253792650199 +5001800US3401,5001800US3402,0.012574620734980009 +5001800US3402,5001800US3402,0.9562124416629942 +5001800US3402,5001800US3401,0.04128904730206906 +5001800US3402,5001800US3403,0.002498511034936744 +5001800US3403,5001800US3403,0.5688511147176902 +5001800US3403,5001800US3404,0.3768787904672418 +5001800US3403,5001800US3402,0.05427009481506791 +5001800US3404,5001800US3404,0.5840949485209217 +5001800US3404,5001800US3403,0.3162222700610063 +5001800US3404,5001800US3406,0.09968278141807203 +5001800US3405,5001800US3405,0.8014430283325896 +5001800US3405,5001800US3407,0.1225707742628345 +5001800US3405,5001800US3409,0.0759861974045759 +5001800US3406,5001800US3406,0.9214673469750344 +5001800US3406,5001800US3404,0.038562113672227685 +5001800US3406,5001800US3403,0.03432553046124326 +5001800US3406,5001800US3412,0.005645008891494691 +5001800US3407,5001800US3407,0.7117057460687819 +5001800US3407,5001800US3412,0.12228593963095023 +5001800US3407,5001800US3410,0.10259780804811121 +5001800US3407,5001800US3411,0.06341050625215662 +5001800US3408,5001800US3408,0.8651699383579543 +5001800US3408,5001800US3411,0.05875516886231227 +5001800US3408,5001800US3409,0.03899128402994481 +5001800US3408,5001800US3410,0.03708360874978863 +5001800US3409,5001800US3409,0.8068333136385843 +5001800US3409,5001800US3405,0.1931666863614157 +5001800US3410,5001800US3410,0.7555074254785711 +5001800US3410,5001800US3411,0.13674598648723596 +5001800US3410,5001800US3407,0.05987169842445893 +5001800US3410,5001800US3408,0.04787488960973397 +5001800US3411,5001800US3411,0.7817920628097502 +5001800US3411,5001800US3407,0.08850637200364114 +5001800US3411,5001800US3410,0.08514337008192577 +5001800US3411,5001800US3409,0.02606800596743198 +5001800US3411,5001800US3405,0.018490189137250937 +5001800US3412,5001800US3412,0.8600999399935214 +5001800US3412,5001800US3403,0.07852883231640938 +5001800US3412,5001800US3407,0.04225411947130575 +5001800US3412,5001800US3406,0.019117108218763442 +5001800US3501,5001800US3501,0.7854562273666081 +5001800US3501,5001800US3502,0.2145437726333918 +5001800US3502,5001800US3502,0.653947440544708 +5001800US3502,5001800US3503,0.28274241481788653 +5001800US3502,5001800US3501,0.06331014463740553 +5001800US3503,5001800US3503,0.8652971597638904 +5001800US3503,5001800US3501,0.13295054080006238 +5001800US3503,5001800US3502,0.0017522994360471634 +5001800US3601,5001800US3601,0.7777308735322014 +5001800US3601,5001800US3602,0.2222691264677986 +5001800US3602,5001800US3602,0.8105130028090965 +5001800US3602,5001800US3603,0.13333430807948812 +5001800US3602,5001800US3604,0.05615268911141531 +5001800US3603,5001800US3603,0.7886431433183732 +5001800US3603,5001800US3601,0.21122219916322235 +5001800US3603,5001800US3614,0.00012990490010788444 +5001800US3603,5001800US3606,4.752618296629918e-06 +5001800US3604,5001800US3604,0.883769170512824 +5001800US3604,5001800US3603,0.11623082948717599 +5001800US3605,5001800US3605,0.8415889123726904 +5001800US3605,5001800US3604,0.1295387765021246 +5001800US3605,5001800US3603,0.028801614245421273 +5001800US3605,5001800US3606,7.069687976375459e-05 +5001800US3606,5001800US3606,0.7995057816525636 +5001800US3606,5001800US3607,0.08138881766120859 +5001800US3606,5001800US3605,0.06057608947824707 +5001800US3606,5001800US3603,0.05852931120798076 +5001800US3607,5001800US3607,0.551085215662451 +5001800US3607,5001800US3610,0.42588501684431435 +5001800US3607,5001800US3605,0.021521756472328075 +5001800US3607,5001800US3611,0.0008790338911286611 +5001800US3607,5001800US3608,0.000628977129777925 +5001800US3608,5001800US3608,0.7606744331212871 +5001800US3608,5001800US3607,0.12366337258642578 +5001800US3608,5001800US3609,0.050926153220319965 +5001800US3608,5001800US3605,0.0467109462122753 +5001800US3608,5001800US3610,0.01802509485969182 +5001800US3608,5001800US3611,0.0 +5001800US3609,5001800US3609,0.7456206621537796 +5001800US3609,5001800US3608,0.1622446481435032 +5001800US3609,5001800US3610,0.0921346897027172 +5001800US3610,5001800US3612,0.33220618493145404 +5001800US3610,5001800US3610,0.276788397666587 +5001800US3610,5001800US3609,0.20647622276850047 +5001800US3610,5001800US3611,0.1516949609685051 +5001800US3610,5001800US3613,0.03283423366495334 +5001800US3610,5001800US3608,0.0 +5001800US3611,5001800US3611,0.8708431322850461 +5001800US3611,5001800US3608,0.08634092972667742 +5001800US3611,5001800US3609,0.04281593798827655 +5001800US3612,5001800US3612,0.5659346745382134 +5001800US3612,5001800US3607,0.16946860446469458 +5001800US3612,5001800US3610,0.1482147669203262 +5001800US3612,5001800US3614,0.11638195407676577 +5001800US3612,5001800US3613,0.0 +5001800US3613,5001800US3613,0.9412145148753163 +5001800US3613,5001800US3615,0.058413339615789775 +5001800US3613,5001800US3612,0.00037214550889397757 +5001800US3614,5001800US3614,0.5794431989565075 +5001800US3614,5001800US3606,0.20730444707129173 +5001800US3614,5001800US3615,0.12144132393103167 +5001800US3614,5001800US3607,0.09181103004116904 +5001800US3614,5001800US3603,0.0 +5001800US3615,5001800US3615,0.5799387820555455 +5001800US3615,5001800US3614,0.338683622056787 +5001800US3615,5001800US3613,0.08137759588766759 +5001800US3616,5001800US3616,0.7157904756373399 +5001800US3616,5001800US3615,0.28231387998877133 +5001800US3616,5001800US3613,0.0018956443738888298 +5001800US3617,5001800US3617,0.7087298045166474 +5001800US3617,5001800US3616,0.2912701954833527 +5001800US3618,5001800US3618,0.74410854131465 +5001800US3618,5001800US3617,0.25589145868535 +5001800US3619,5001800US3619,0.46916598700882584 +5001800US3619,5001800US3618,0.38997189129358306 +5001800US3619,5001800US3621,0.07942348023652093 +5001800US3619,5001800US3620,0.03867677900439836 +5001800US3619,5001800US3617,0.022761862456671812 +5001800US3620,5001800US3620,0.966826774691358 +5001800US3620,5001800US3619,0.018674768518518518 +5001800US3620,5001800US3621,0.014498456790123456 +5001800US3621,5001800US3621,0.8338834368606118 +5001800US3621,5001800US3624,0.11727619117629041 +5001800US3621,5001800US3620,0.048840371963097734 +5001800US3622,5001800US3619,0.408574246313831 +5001800US3622,5001800US3622,0.31161061676333013 +5001800US3622,5001800US3621,0.23706694854743782 +5001800US3622,5001800US3624,0.024235780587885464 +5001800US3622,5001800US3623,0.01851240778751559 +5001800US3623,5001800US3623,0.7260454178274032 +5001800US3623,5001800US3619,0.1516280509440657 +5001800US3623,5001800US3624,0.1223265312285311 +5001800US3624,5001800US3622,0.7947957839262187 +5001800US3624,5001800US3624,0.20520421607378128 +5001800US3625,5001800US3625,1.0 +5001800US3626,5001800US3626,1.0 +5001800US3626,5001800US3623,0.0 +5001800US3627,5001800US3624,0.4975097244887349 +5001800US3627,5001800US3623,0.4278562834096933 +5001800US3627,5001800US3625,0.04418620936988708 +5001800US3627,5001800US3626,0.03044778273168475 +5001800US3701,5001800US3701,0.521492970029817 +5001800US3701,5001800US3704,0.34909429470483555 +5001800US3701,5001800US3713,0.06587311876112244 +5001800US3701,5001800US3703,0.06353961650422506 +5001800US3702,5001800US3713,0.5915759434296931 +5001800US3702,5001800US3702,0.16855733629744082 +5001800US3702,5001800US3701,0.13473024782356202 +5001800US3702,5001800US3704,0.10513647244930403 +5001800US3703,5001800US3703,0.7100253896113005 +5001800US3703,5001800US3701,0.28997461038869954 +5001800US3704,5001800US3702,0.7875290965786815 +5001800US3704,5001800US3704,0.21247090342131847 +5001800US3704,5001800US3713,0.0 +5001800US3705,5001800US3710,0.4431789962096458 +5001800US3705,5001800US3705,0.3711018167559796 +5001800US3705,5001800US3706,0.15662821853352504 +5001800US3705,5001800US3711,0.02909096850084956 +5001800US3706,5001800US3709,0.49629613747708534 +5001800US3706,5001800US3705,0.31303087802803714 +5001800US3706,5001800US3713,0.17451213716262332 +5001800US3706,5001800US3704,0.012183617242525273 +5001800US3706,5001800US3706,0.003977230089728884 +5001800US3707,5001800US3707,0.7133882206152413 +5001800US3707,5001800US3701,0.22876632486593035 +5001800US3707,5001800US3703,0.03617171459784961 +5001800US3707,5001800US3713,0.021673739920978714 +5001800US3708,5001800US3706,0.3366908737138983 +5001800US3708,5001800US3709,0.3030188382582718 +5001800US3708,5001800US3708,0.21712640402511768 +5001800US3708,5001800US3707,0.14316388400271224 +5001800US3709,5001800US3708,0.7445891182383714 +5001800US3709,5001800US3707,0.13599570539877687 +5001800US3709,5001800US3712,0.09601381009020768 +5001800US3709,5001800US3714,0.022401397850594192 +5001800US3709,5001800US3709,0.00099996842204983 +5001800US3710,5001800US3714,0.44833967154996746 +5001800US3710,5001800US3710,0.38225843236217394 +5001800US3710,5001800US3711,0.16940189608785863 +5001800US3711,5001800US3711,0.7531361162748712 +5001800US3711,5001800US3705,0.1277896704077199 +5001800US3711,5001800US3714,0.11907421331740889 +5001800US3712,5001800US3712,0.873563091049287 +5001800US3712,5001800US3714,0.12120554112861055 +5001800US3712,5001800US3708,0.0052313678221024736 +5001800US3713,5001800US3706,0.515749821393778 +5001800US3713,5001800US3710,0.26001402204558666 +5001800US3713,5001800US3705,0.1766078306962895 +5001800US3713,5001800US3709,0.047628325864345865 +5001800US3800,5001800US3800,1.0 +5001800US3901,5001800US3901,0.6711940673083381 +5001800US3901,5001800US3908,0.3288059326916619 +5001800US3902,5001800US3902,0.524744787922358 +5001800US3902,5001800US3901,0.36714234363767073 +5001800US3902,5001800US3908,0.10811286843997124 +5001800US3903,5001800US3903,0.7221931735657225 +5001800US3903,5001800US3915,0.2778068264342774 +5001800US3904,5001800US3904,0.5038674881480202 +5001800US3904,5001800US3905,0.3343569861103114 +5001800US3904,5001800US3909,0.11068405596821558 +5001800US3904,5001800US3915,0.051091469773452844 +5001800US3905,5001800US3905,0.5013060509523489 +5001800US3905,5001800US3909,0.45066168756797337 +5001800US3905,5001800US3904,0.04803226147967777 +5001800US3906,5001800US3906,0.6051412365923513 +5001800US3906,5001800US3902,0.2667525411355085 +5001800US3906,5001800US3912,0.12810622227214016 +5001800US3907,5001800US3912,0.2310239411946553 +5001800US3907,5001800US3906,0.2139061587606911 +5001800US3907,5001800US3913,0.20307251056229175 +5001800US3907,5001800US3904,0.16239652388967127 +5001800US3907,5001800US3905,0.10656751279497131 +5001800US3907,5001800US3907,0.08303335279771923 +5001800US3908,5001800US3908,0.6526831903336879 +5001800US3908,5001800US3915,0.1947346967181333 +5001800US3908,5001800US3910,0.1255066296822856 +5001800US3908,5001800US3905,0.027075483265893176 +5001800US3909,5001800US3909,0.3935681798678452 +5001800US3909,5001800US3911,0.2864348236505621 +5001800US3909,5001800US3907,0.19962456019909036 +5001800US3909,5001800US3905,0.12037243628250235 +5001800US3910,5001800US3910,0.9608606430757323 +5001800US3910,5001800US3915,0.039139356924267704 +5001800US3911,5001800US3911,0.773182304717457 +5001800US3911,5001800US3913,0.17935194944204988 +5001800US3911,5001800US3907,0.04746574584049311 +5001800US3912,5001800US3904,0.4063692930988564 +5001800US3912,5001800US3912,0.29986602855367334 +5001800US3912,5001800US3903,0.24525248775291145 +5001800US3912,5001800US3915,0.04851219059455882 +5001800US3913,5001800US3914,0.3893675468313271 +5001800US3913,5001800US3913,0.3339660267653348 +5001800US3913,5001800US3906,0.27666642640333805 +5001800US3914,5001800US3914,0.6613176044166261 +5001800US3914,5001800US3913,0.20643028543620895 +5001800US3914,5001800US3907,0.06744416290503918 +5001800US3914,5001800US3911,0.06480794724212575 +5001800US3915,5001800US3912,0.3399033912242208 +5001800US3915,5001800US3902,0.2715417322639428 +5001800US3915,5001800US3915,0.2244335907639035 +5001800US3915,5001800US3903,0.16412128574793292 +5001800US3916,5001800US3907,0.7033591519716375 +5001800US3916,5001800US3913,0.2373600205030363 +5001800US3916,5001800US3914,0.03914620515850698 +5001800US3916,5001800US3906,0.020134622366819253 +5001800US4001,5001800US4001,0.8842488103200041 +5001800US4001,5001800US4002,0.11561434516042611 +5001800US4001,5001800US4003,0.00013684451956973 +5001800US4002,5001800US4002,0.9971427986045532 +5001800US4002,5001800US4001,0.002857201395446731 +5001800US4003,5001800US4003,0.8040516338653957 +5001800US4003,5001800US4005,0.15793542830858967 +5001800US4003,5001800US4001,0.03801293782601462 +5001800US4004,5001800US4004,0.9535074377312492 +5001800US4004,5001800US4003,0.03701929818883963 +5001800US4004,5001800US4005,0.009473264079911115 +5001800US4005,5001800US4005,0.6984205169252319 +5001800US4005,5001800US4003,0.25506467818681733 +5001800US4005,5001800US4004,0.04651480488795079 +5001800US4101,5001800US4101,0.6319223670158587 +5001800US4101,5001800US4106,0.36793770924258046 +5001800US4101,5001800US4105,0.00013992374156084934 +5001800US4101,5001800US4103,0.0 +5001800US4102,5001800US4102,0.9444103440623628 +5001800US4102,5001800US4105,0.032057244573711126 +5001800US4102,5001800US4103,0.023532411363926097 +5001800US4103,5001800US4103,0.820860213343091 +5001800US4103,5001800US4105,0.10341333036360599 +5001800US4103,5001800US4101,0.075726456293303 +5001800US4104,5001800US4104,0.7968913676393032 +5001800US4104,5001800US4105,0.12964253978803794 +5001800US4104,5001800US4102,0.07346609257265878 +5001800US4105,5001800US4106,0.47915371509797267 +5001800US4105,5001800US4105,0.41577139823175385 +5001800US4105,5001800US4104,0.07165227852397482 +5001800US4105,5001800US4101,0.025655027336911048 +5001800US4105,5001800US4103,0.007767580809387633 +5001800US4105,5001800US4102,0.0 +5001800US4201,5001800US4201,0.9704663963944842 +5001800US4201,5001800US4204,0.029533603605515817 +5001800US4202,5001800US4202,0.9980442399551832 +5001800US4202,5001800US4203,0.001955760044816739 +5001800US4203,5001800US4203,0.9651450324415789 +5001800US4203,5001800US4202,0.03485496755842115 +5001800US4204,5001800US4204,0.8132087976394857 +5001800US4204,5001800US4205,0.11333182922229233 +5001800US4204,5001800US4201,0.07345937313822193 +5001800US4205,5001800US4205,0.9352211556887271 +5001800US4205,5001800US4203,0.06179816385903176 +5001800US4205,5001800US4204,0.002980680452241172 +5001800US4206,5001800US4206,0.9999884570322645 +5001800US4206,5001800US4205,1.1542967735481352e-05 +5001800US4207,5001800US4207,0.9560041017339772 +5001800US4207,5001800US4208,0.04399589826602283 +5001800US4208,5001800US4208,1.0 +5001800US4209,5001800US4209,0.8270239211179662 +5001800US4209,5001800US4204,0.09447253130846436 +5001800US4209,5001800US4207,0.05684158093527296 +5001800US4209,5001800US4208,0.014906084412248052 +5001800US4209,5001800US4206,0.006755882226048486 +5001800US4210,5001800US4210,0.9981201533812293 +5001800US4210,5001800US4211,0.0018798466187706694 +5001800US4211,5001800US4211,0.9940133511133313 +5001800US4211,5001800US4210,0.005986648886668627 +5001800US4212,5001800US4209,0.41404264144225494 +5001800US4212,5001800US4215,0.4093864763823965 +5001800US4212,5001800US4213,0.17657088217534858 +5001800US4213,5001800US4213,0.803400098291927 +5001800US4213,5001800US4214,0.16630191669257696 +5001800US4213,5001800US4210,0.030297985015496024 +5001800US4214,5001800US4214,0.8417178002193104 +5001800US4214,5001800US4212,0.15828219978068955 +5001800US4215,5001800US4215,0.6702230151650312 +5001800US4215,5001800US4213,0.146158616881186 +5001800US4215,5001800US4214,0.1102858842020305 +5001800US4215,5001800US4216,0.07333248375175226 +5001800US4216,5001800US4216,1.0 +5001800US4217,5001800US4217,0.9754036675840521 +5001800US4217,5001800US4216,0.02459633241594798 +5001800US4218,5001800US4212,0.9361369308404336 +5001800US4218,5001800US4217,0.06386306915956647 +5001800US4401,5001800US4401,0.9892810219452863 +5001800US4401,5001800US4402,0.010718978054713618 +5001800US4402,5001800US4402,0.9957674643078059 +5001800US4402,5001800US4401,0.004232535692194053 +5001800US4501,5001800US4501,0.8144612879153154 +5001800US4501,5001800US4506,0.18553871208468461 +5001800US4502,5001800US4502,0.9552158458685349 +5001800US4502,5001800US4506,0.044784154131465075 +5001800US4503,5001800US4503,0.9695316285950909 +5001800US4503,5001800US4504,0.030468371404909093 +5001800US4504,5001800US4504,0.9457216266573607 +5001800US4504,5001800US4505,0.04442757407486771 +5001800US4504,5001800US4503,0.00985079926777154 +5001800US4505,5001800US4505,0.9358826486854168 +5001800US4505,5001800US4503,0.04183548733721744 +5001800US4505,5001800US4506,0.022281863977365708 +5001800US4506,5001800US4506,0.8276732845698362 +5001800US4506,5001800US4501,0.12874787012718047 +5001800US4506,5001800US4502,0.03427800841593945 +5001800US4506,5001800US4507,0.008498310222448154 +5001800US4506,5001800US4505,0.0008025266645956301 +5001800US4507,5001800US4507,0.9991042095720993 +5001800US4507,5001800US4506,0.0008957904279006572 +5001800US4600,5001800US4600,1.0 +5001800US4701,5001800US4701,1.0 +5001800US4702,5001800US4702,0.9579557293782648 +5001800US4702,5001800US4701,0.04204427062173518 +5001800US4703,5001800US4703,0.9350354507888885 +5001800US4703,5001800US4702,0.04586915262987514 +5001800US4703,5001800US4706,0.019095396581236365 +5001800US4704,5001800US4704,0.7687311547065842 +5001800US4704,5001800US4705,0.14059506960476245 +5001800US4704,5001800US4703,0.07852941701917332 +5001800US4704,5001800US4706,0.012144358669480102 +5001800US4705,5001800US4705,0.3956991032305144 +5001800US4705,5001800US4707,0.34082255169480097 +5001800US4705,5001800US4706,0.2634783450746847 +5001800US4706,5001800US4706,0.7452178436822422 +5001800US4706,5001800US4707,0.11167944818754638 +5001800US4706,5001800US4705,0.11013147746620294 +5001800US4706,5001800US4704,0.0329712306640085 +5001800US4707,5001800US4707,0.4736478313461206 +5001800US4707,5001800US4705,0.22107563606054137 +5001800US4707,5001800US4708,0.17320363085617865 +5001800US4707,5001800US4704,0.13207290173715935 +5001800US4708,5001800US4708,0.9002346736875378 +5001800US4708,5001800US4709,0.09958526977470926 +5001800US4708,5001800US4707,0.0001800565377528544 +5001800US4709,5001800US4709,0.9920755629102133 +5001800US4709,5001800US4708,0.007924437089786735 +5001800US4801,5001800US4801,0.7893897761480967 +5001800US4801,5001800US4817,0.18520767492239737 +5001800US4801,5001800US4805,0.025402548929505876 +5001800US4802,5001800US4802,0.359408487885635 +5001800US4802,5001800US4838,0.35927425669478064 +5001800US4802,5001800US4818,0.1757354750665563 +5001800US4802,5001800US4807,0.07756325644869012 +5001800US4802,5001800US4808,0.028018523904337903 +5001800US4803,5001800US4803,0.5176007187488897 +5001800US4803,5001800US4804,0.44218792034881654 +5001800US4803,5001800US4832,0.040211360902293804 +5001800US4804,5001800US4804,0.5595153906279079 +5001800US4804,5001800US4801,0.2735209550749991 +5001800US4804,5001800US4803,0.1612014739271225 +5001800US4804,5001800US4805,0.005762180369970596 +5001800US4805,5001800US4805,0.4807568300439261 +5001800US4805,5001800US4832,0.2446883559945106 +5001800US4805,5001800US4806,0.2341394723353597 +5001800US4805,5001800US4824,0.02817444789215227 +5001800US4805,5001800US4830,0.01224089373405129 +5001800US4806,5001800US4825,0.48740579826057345 +5001800US4806,5001800US4806,0.40433282851606944 +5001800US4806,5001800US4833,0.07654560512282023 +5001800US4806,5001800US4830,0.019458254536260412 +5001800US4806,5001800US4812,0.01225751356427648 +5001800US4807,5001800US4838,0.47138783269961976 +5001800US4807,5001800US4807,0.3573855182674822 +5001800US4807,5001800US4809,0.11178293932881468 +5001800US4807,5001800US4808,0.03407174739626385 +5001800US4807,5001800US4822,0.025371962307819473 +5001800US4808,5001800US4802,0.3594753042668233 +5001800US4808,5001800US4817,0.3366527611834511 +5001800US4808,5001800US4808,0.14210931329863713 +5001800US4808,5001800US4810,0.1356544014904518 +5001800US4808,5001800US4838,0.02610821976063668 +5001800US4809,5001800US4809,0.7790925827668356 +5001800US4809,5001800US4807,0.19671033655130174 +5001800US4809,5001800US4822,0.024197080681862694 +5001800US4810,5001800US4810,0.37407784444107517 +5001800US4810,5001800US4838,0.23814680846455413 +5001800US4810,5001800US4837,0.19529121271849076 +5001800US4810,5001800US4822,0.10530442504710953 +5001800US4810,5001800US4808,0.0803092984470099 +5001800US4810,5001800US4802,0.003942039030518313 +5001800US4810,5001800US4827,0.0029283718512421756 +5001800US4810,5001800US4835,0.0 +5001800US4811,5001800US4811,0.7644499701265127 +5001800US4811,5001800US4825,0.1508330739141616 +5001800US4811,5001800US4819,0.08471695595932566 +5001800US4812,5001800US4812,0.8362937978608432 +5001800US4812,5001800US4824,0.11262287629205561 +5001800US4812,5001800US4833,0.022888814480278543 +5001800US4812,5001800US4825,0.0165105446982504 +5001800US4812,5001800US4826,0.011683966668572254 +5001800US4813,5001800US4813,0.8933847583643123 +5001800US4813,5001800US4826,0.07749256505576207 +5001800US4813,5001800US4819,0.016944237918215612 +5001800US4813,5001800US4825,0.012178438661710037 +5001800US4814,5001800US4814,0.9115498013075247 +5001800US4814,5001800US4836,0.08567210934495577 +5001800US4814,5001800US4822,0.002778089347519549 +5001800US4815,5001800US4815,0.5363008110572539 +5001800US4815,5001800US4834,0.2530354657580266 +5001800US4815,5001800US4828,0.2106637231847195 +5001800US4816,5001800US4816,0.9752759215156578 +5001800US4816,5001800US4823,0.024724078484342233 +5001800US4817,5001800US4817,0.7427373353057313 +5001800US4817,5001800US4810,0.23411768768958907 +5001800US4817,5001800US4806,0.014758448405868842 +5001800US4817,5001800US4837,0.008386528598810805 +5001800US4818,5001800US4818,0.7611156199003413 +5001800US4818,5001800US4829,0.14345803771674914 +5001800US4818,5001800US4809,0.05516140010995628 +5001800US4818,5001800US4802,0.035727064080076094 +5001800US4818,5001800US4807,0.004537878192877276 +5001800US4819,5001800US4819,0.9708470161442841 +5001800US4819,5001800US4825,0.029152983855715832 +5001800US4820,5001800US4820,0.8164786861998101 +5001800US4820,5001800US4823,0.12017930752683004 +5001800US4820,5001800US4835,0.030393346873535315 +5001800US4820,5001800US4828,0.016710932711605717 +5001800US4820,5001800US4821,0.016237726688218823 +5001800US4821,5001800US4821,0.7765707422880009 +5001800US4821,5001800US4837,0.10765738483121347 +5001800US4821,5001800US4835,0.06671481004074069 +5001800US4821,5001800US4820,0.04238946436613812 +5001800US4821,5001800US4823,0.005448484001387357 +5001800US4821,5001800US4810,0.0012191144725194514 +5001800US4822,5001800US4822,0.6477084119200301 +5001800US4822,5001800US4836,0.12348642021878536 +5001800US4822,5001800US4809,0.09349302150132026 +5001800US4822,5001800US4807,0.08792436816295737 +5001800US4822,5001800US4829,0.03448698604300264 +5001800US4822,5001800US4814,0.012900792153904187 +5001800US4823,5001800US4823,0.7146581769317389 +5001800US4823,5001800US4820,0.1716762170236079 +5001800US4823,5001800US4828,0.05921161818321338 +5001800US4823,5001800US4816,0.04323269023829197 +5001800US4823,5001800US4821,0.011221297623147853 +5001800US4824,5001800US4824,0.5527587741059345 +5001800US4824,5001800US4832,0.1824468964810402 +5001800US4824,5001800US4833,0.12638159994223194 +5001800US4824,5001800US4826,0.0776724131906835 +5001800US4824,5001800US4806,0.060740316280109855 +5001800US4825,5001800US4831,0.3023374758849657 +5001800US4825,5001800US4825,0.2401274171115797 +5001800US4825,5001800US4837,0.19086245831277013 +5001800US4825,5001800US4806,0.09325377241389624 +5001800US4825,5001800US4835,0.09159376074894941 +5001800US4825,5001800US4811,0.04402769677120254 +5001800US4825,5001800US4821,0.032500336488851006 +5001800US4825,5001800US4810,0.005297082267785305 +5001800US4826,5001800US4826,0.6296421569031847 +5001800US4826,5001800US4824,0.1817924328545776 +5001800US4826,5001800US4813,0.14163029938044994 +5001800US4826,5001800US4804,0.04693511086178781 +5001800US4827,5001800US4827,0.8794279948976444 +5001800US4827,5001800US4822,0.11994515564771459 +5001800US4827,5001800US4810,0.0006268494546409745 +5001800US4828,5001800US4828,0.6541677607453619 +5001800US4828,5001800US4835,0.2067333375587407 +5001800US4828,5001800US4815,0.1198016171237652 +5001800US4828,5001800US4821,0.019297284572132232 +5001800US4829,5001800US4829,0.8676507112362517 +5001800US4829,5001800US4809,0.05858777578215073 +5001800US4829,5001800US4836,0.054501533321307284 +5001800US4829,5001800US4818,0.01707176152866775 +5001800US4829,5001800US4802,0.002188218131622497 +5001800US4830,5001800US4830,0.81055350194274 +5001800US4830,5001800US4832,0.16845001703655557 +5001800US4830,5001800US4833,0.0170451454406455 +5001800US4830,5001800US4805,0.0039513355800589265 +5001800US4831,5001800US4831,0.5523763873013532 +5001800US4831,5001800US4811,0.18186091958261125 +5001800US4831,5001800US4837,0.139949977262392 +5001800US4831,5001800US4817,0.07882268352936839 +5001800US4831,5001800US4810,0.04699003232427516 +5001800US4832,5001800US4832,0.41153177123086754 +5001800US4832,5001800US4805,0.3357794521236251 +5001800US4832,5001800US4824,0.21257067385109382 +5001800US4832,5001800US4830,0.03093020306694007 +5001800US4832,5001800US4803,0.009187899727473496 +5001800US4833,5001800US4833,0.7618571219504003 +5001800US4833,5001800US4806,0.1902009754880439 +5001800US4833,5001800US4825,0.021347497822785538 +5001800US4833,5001800US4812,0.012749000160742005 +5001800US4833,5001800US4830,0.009347027592444646 +5001800US4833,5001800US4824,0.004498376985583602 +5001800US4834,5001800US4834,0.7260866211307312 +5001800US4834,5001800US4827,0.21776602757401928 +5001800US4834,5001800US4815,0.05614735129524958 +5001800US4835,5001800US4835,0.4980094714394129 +5001800US4835,5001800US4828,0.27394251845319606 +5001800US4835,5001800US4821,0.13928400327303098 +5001800US4835,5001800US4837,0.056425867120029505 +5001800US4835,5001800US4827,0.018898362212602864 +5001800US4835,5001800US4820,0.013439777501727668 +5001800US4836,5001800US4836,0.7846067785752631 +5001800US4836,5001800US4814,0.1150296162296036 +5001800US4836,5001800US4802,0.057854278225642736 +5001800US4836,5001800US4808,0.04162963698126486 +5001800US4836,5001800US4829,0.0008796899882256878 +5001800US4901,5001800US4901,0.9091577680566965 +5001800US4901,5001800US4903,0.08594433513534165 +5001800US4901,5001800US4902,0.004897896807961948 +5001800US4902,5001800US4902,0.850299662104018 +5001800US4902,5001800US4901,0.1264724400185614 +5001800US4902,5001800US4904,0.023223540740321163 +5001800US4902,5001800US4903,4.357137099497404e-06 +5001800US4903,5001800US4903,0.8580554067686578 +5001800US4903,5001800US4904,0.1419445932313423 +5001800US4904,5001800US4904,0.6890885107218001 +5001800US4904,5001800US4902,0.16057517251905068 +5001800US4904,5001800US4903,0.11050594686522255 +5001800US4904,5001800US4901,0.03983036989392667 +5001800US5000,5001800US5000,1.0 +5001800US5101,5001800US5107,0.4138336030231669 +5001800US5101,5001800US5101,0.3760757059304695 +5001800US5101,5001800US5110,0.18039576075309813 +5001800US5101,5001800US5105,0.029694930293265413 +5001800US5102,5001800US5102,0.637406523986961 +5001800US5102,5001800US5103,0.19848895092520033 +5001800US5102,5001800US5101,0.16410452508783874 +5001800US5103,5001800US5103,0.8910319597934137 +5001800US5103,5001800US5102,0.10896804020658633 +5001800US5104,5001800US5104,0.826274517976534 +5001800US5104,5001800US5102,0.1736402268167411 +5001800US5104,5001800US5101,8.525520672493071e-05 +5001800US5104,5001800US5103,0.0 +5001800US5105,5001800US5105,0.7519580459694037 +5001800US5105,5001800US5109,0.1122738043233655 +5001800US5105,5001800US5110,0.09247051348238751 +5001800US5105,5001800US5104,0.022080139787935348 +5001800US5105,5001800US5107,0.021217496436907898 +5001800US5106,5001800US5106,0.7818803439226134 +5001800US5106,5001800US5105,0.16303469094355702 +5001800US5106,5001800US5109,0.055084965133829544 +5001800US5107,5001800US5101,0.36917190007713147 +5001800US5107,5001800US5107,0.2482688386662914 +5001800US5107,5001800US5104,0.24557675656745567 +5001800US5107,5001800US5105,0.13698250468912146 +5001800US5108,5001800US5108,0.8917904053884835 +5001800US5108,5001800US5111,0.10820959461151643 +5001800US5109,5001800US5109,0.8736352440574401 +5001800US5109,5001800US5106,0.12636475594255986 +5001800US5110,5001800US5110,0.6431140976040182 +5001800US5110,5001800US5111,0.2229155152185085 +5001800US5110,5001800US5106,0.11234417068259463 +5001800US5110,5001800US5108,0.021626216494878683 +5001800US5111,5001800US5111,0.7142771189258514 +5001800US5111,5001800US5107,0.19590702534952578 +5001800US5111,5001800US5108,0.08625647196668619 +5001800US5111,5001800US5110,0.003559383757936571 +5001800US5301,5001800US5301,0.6544521645742007 +5001800US5301,5001800US5308,0.2143462379510508 +5001800US5301,5001800US5302,0.13046452131906006 +5001800US5301,5001800US5307,0.0007370761556884516 +5001800US5302,5001800US5302,0.7274945610630916 +5001800US5302,5001800US5301,0.2674738973027913 +5001800US5302,5001800US5308,0.005031541634117044 +5001800US5303,5001800US5303,0.9919913928719961 +5001800US5303,5001800US5304,0.007950744082599498 +5001800US5303,5001800US5310,5.786304540440844e-05 +5001800US5304,5001800US5304,0.9752913669888389 +5001800US5304,5001800US5305,0.024708633011161118 +5001800US5305,5001800US5305,1.0 +5001800US5306,5001800US5306,0.9678031875376516 +5001800US5306,5001800US5310,0.03219681246234835 +5001800US5307,5001800US5307,0.93133559866906 +5001800US5307,5001800US5302,0.06398800652663977 +5001800US5307,5001800US5309,0.00374244060117506 +5001800US5307,5001800US5301,0.0009339542031251035 +5001800US5308,5001800US5308,0.7489307823424434 +5001800US5308,5001800US5309,0.1608098906527457 +5001800US5308,5001800US5310,0.039252860518832026 +5001800US5308,5001800US5304,0.03868542971201218 +5001800US5308,5001800US5306,0.01232103677396672 +5001800US5309,5001800US5309,0.856215398176304 +5001800US5309,5001800US5301,0.05388375598456842 +5001800US5309,5001800US5306,0.04078059500546384 +5001800US5309,5001800US5307,0.03738154126660535 +5001800US5309,5001800US5308,0.01173870956705836 +5001800US5310,5001800US5310,0.9179232212990777 +5001800US5310,5001800US5306,0.04179782992767838 +5001800US5310,5001800US5303,0.029684911599845782 +5001800US5310,5001800US5308,0.010594037173398184 +5001800US5401,5001800US5402,0.9959840131183196 +5001800US5401,5001800US5401,0.004015986881680378 +5001800US5402,5001800US5401,0.6415205133288331 +5001800US5402,5001800US5402,0.3584794866711669 +5001800US5403,5001800US5401,1.0 +5001800US5501,5001800US5501,0.8404202053791578 +5001800US5501,5001800US5505,0.15536829376588288 +5001800US5501,5001800US5502,0.004211500854959236 +5001800US5502,5001800US5502,0.9055727408456167 +5001800US5502,5001800US5501,0.08796755651533779 +5001800US5502,5001800US5503,0.0064597026390455226 +5001800US5503,5001800US5503,0.9898182054912683 +5001800US5503,5001800US5507,0.01018179450873167 +5001800US5504,5001800US5504,0.9340872822066614 +5001800US5504,5001800US5501,0.06485038713631118 +5001800US5504,5001800US5505,0.0010623306570274028 +5001800US5505,5001800US5505,0.8350322664299464 +5001800US5505,5001800US5504,0.14214019024061014 +5001800US5505,5001800US5501,0.02017607729314409 +5001800US5505,5001800US5506,0.002651466036299318 +5001800US5506,5001800US5506,0.9870854663832449 +5001800US5506,5001800US5505,0.010109862174732536 +5001800US5506,5001800US5504,0.0027695503327306997 +5001800US5506,5001800US5508,3.512110929187482e-05 +5001800US5507,5001800US5507,0.9930117910625982 +5001800US5507,5001800US5503,0.006988208937401796 +5001800US5508,5001800US5508,0.976997827441602 +5001800US5508,5001800US5506,0.023002172558398034 +5001800US5600,5001800US5600,1.0 diff --git a/policyengine_us_data/storage/upload_completed_datasets.py b/policyengine_us_data/storage/upload_completed_datasets.py index 650b9873..e302d65a 100644 --- a/policyengine_us_data/storage/upload_completed_datasets.py +++ b/policyengine_us_data/storage/upload_completed_datasets.py @@ -15,6 +15,7 @@ def upload_datasets(): Pooled_3_Year_CPS_2023.file_path, CPS_2023.file_path, STORAGE_FOLDER / "small_enhanced_cps_2024.h5", + STORAGE_FOLDER / "policy_data.db", ] # Filter to only existing files diff --git a/policyengine_us_data/tests/test_database.py b/policyengine_us_data/tests/test_database.py new file mode 100644 index 00000000..64060b48 --- /dev/null +++ b/policyengine_us_data/tests/test_database.py @@ -0,0 +1,82 @@ +import hashlib +from enum import Enum + +import pytest +from sqlalchemy.exc import IntegrityError +from sqlmodel import Session, select + +from policyengine_us_data.db.create_database_tables import ( + Stratum, + StratumConstraint, + Target, + create_database, +) + + +@pytest.fixture +def engine(tmp_path): + db_uri = f"sqlite:///{tmp_path/'test.db'}" + return create_database(db_uri) + + +def test_stratum_hash_and_relationships(engine): + with Session(engine) as session: + stratum = Stratum(notes="test", stratum_group_id=0) + stratum.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="equals", + value="0400000US30", + ), + StratumConstraint( + constraint_variable="age", operation="greater_than", value="20" + ), + StratumConstraint( + constraint_variable="age", operation="less_than", value="65" + ), + ] + stratum.targets_rel = [ + Target(variable="person_count", period=2023, value=100.0) + ] + session.add(stratum) + session.commit() + expected_hash = hashlib.sha256( + "\n".join( + sorted( + [ + "ucgid_str|equals|0400000US30", + "age|greater_than|20", + "age|less_than|65", + ] + ) + ).encode("utf-8") + ).hexdigest() + assert stratum.definition_hash == expected_hash + retrieved = session.get(Stratum, stratum.stratum_id) + assert len(retrieved.constraints_rel) == 3 + assert retrieved.targets_rel[0].value == 100.0 + + +def test_unique_definition_hash(engine): + with Session(engine) as session: + s1 = Stratum(stratum_group_id=0) + s1.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="equals", + value="0400000US30", + ) + ] + session.add(s1) + session.commit() + s2 = Stratum(stratum_group_id=0) + s2.constraints_rel = [ + StratumConstraint( + constraint_variable="ucgid_str", + operation="equals", + value="0400000US30", + ) + ] + session.add(s2) + with pytest.raises(IntegrityError): + session.commit() diff --git a/policyengine_us_data/utils/census.py b/policyengine_us_data/utils/census.py new file mode 100644 index 00000000..2f424ccb --- /dev/null +++ b/policyengine_us_data/utils/census.py @@ -0,0 +1,160 @@ +import pathlib +import requests + +import pandas as pd +import numpy as np + + +STATE_NAME_TO_FIPS = { + "Alabama": "01", + "Alaska": "02", + "Arizona": "04", + "Arkansas": "05", + "California": "06", + "Colorado": "08", + "Connecticut": "09", + "Delaware": "10", + "District of Columbia": "11", + "Florida": "12", + "Georgia": "13", + "Hawaii": "15", + "Idaho": "16", + "Illinois": "17", + "Indiana": "18", + "Iowa": "19", + "Kansas": "20", + "Kentucky": "21", + "Louisiana": "22", + "Maine": "23", + "Maryland": "24", + "Massachusetts": "25", + "Michigan": "26", + "Minnesota": "27", + "Mississippi": "28", + "Missouri": "29", + "Montana": "30", + "Nebraska": "31", + "Nevada": "32", + "New Hampshire": "33", + "New Jersey": "34", + "New Mexico": "35", + "New York": "36", + "North Carolina": "37", + "North Dakota": "38", + "Ohio": "39", + "Oklahoma": "40", + "Oregon": "41", + "Pennsylvania": "42", + "Rhode Island": "44", + "South Carolina": "45", + "South Dakota": "46", + "Tennessee": "47", + "Texas": "48", + "Utah": "49", + "Vermont": "50", + "Virginia": "51", + "Washington": "53", + "West Virginia": "54", + "Wisconsin": "55", + "Wyoming": "56", +} + +STATE_ABBREV_TO_FIPS = { + "AL": "01", + "AK": "02", + "AZ": "04", + "AR": "05", + "CA": "06", + "CO": "08", + "CT": "09", + "DE": "10", + "FL": "12", + "GA": "13", + "HI": "15", + "ID": "16", + "IL": "17", + "IN": "18", + "IA": "19", + "KS": "20", + "KY": "21", + "LA": "22", + "ME": "23", + "MD": "24", + "MA": "25", + "MI": "26", + "MN": "27", + "MS": "28", + "MO": "29", + "MT": "30", + "NE": "31", + "NV": "32", + "NH": "33", + "NJ": "34", + "NM": "35", + "NY": "36", + "NC": "37", + "ND": "38", + "OH": "39", + "OK": "40", + "OR": "41", + "PA": "42", + "RI": "44", + "SC": "45", + "SD": "46", + "TN": "47", + "TX": "48", + "UT": "49", + "VT": "50", + "VA": "51", + "WA": "53", + "WV": "54", + "WI": "55", + "WY": "56", + "DC": "11", +} + +TERRITORY_UCGIDS = { + "0400000US72", # Puerto Rico (state level) + "5001800US7298", # Puerto Rico + "5001800US6098", # American Samoa + "5001800US6698", # Guam + "5001800US6998", # Northern Mariana Islands + "5001800US7898", # U.S. Virgin Islands +} + + +def get_census_docs(year): + docs_url = ( + f"https://api.census.gov/data/{year}/acs/acs1/subject/variables.json" + ) + # NOTE: The URL for detail tables, should we ever need it is: + # "https://api.census.gov/data/2023/acs/acs1/variables.json" + + docs_response = requests.get(docs_url) + docs_response.raise_for_status() + + return docs_response.json() + + +def pull_acs_table(group: str, geo: str, year: int) -> pd.DataFrame: + """ + "group": e.g., 'S2201' + "geo": 'National' | 'State' | 'District' + "year": e.g., 2023 + """ + base = f"https://api.census.gov/data/{year}/acs/acs1" + + if group[0] == "S": + base = base + "/subject" + geo_q = { + "National": "us:*", + "State": "state:*", + "District": "congressional+district:*", + }[geo] + + url = f"{base}?get=group({group})&for={geo_q}" + + data = requests.get(url).json() + headers, rows = data[0], data[1:] + df = pd.DataFrame(rows, columns=headers) + return df diff --git a/policyengine_us_data/utils/db.py b/policyengine_us_data/utils/db.py new file mode 100644 index 00000000..a8081db4 --- /dev/null +++ b/policyengine_us_data/utils/db.py @@ -0,0 +1,68 @@ +from typing import List, Optional + +from sqlmodel import Session, select +import sqlalchemy as sa + +from policyengine_us_data.db.create_database_tables import ( + Stratum, + StratumConstraint, +) + + +def get_stratum_by_id(session: Session, stratum_id: int) -> Optional[Stratum]: + """Retrieves a single Stratum by its primary key""" + return session.get(Stratum, stratum_id) + + +def get_simple_stratum_by_ucgid( + session: Session, ucgid: str +) -> Optional[Stratum]: + """ + Finds a stratum defined *only* by a single ucgid_str constraint. + """ + constraint_count_subquery = ( + select( + StratumConstraint.stratum_id, + sa.func.count(StratumConstraint.stratum_id).label( + "constraint_count" + ), + ) + .group_by(StratumConstraint.stratum_id) + .subquery() + ) + + statement = ( + select(Stratum) + .join(StratumConstraint) + .join( + constraint_count_subquery, + Stratum.stratum_id == constraint_count_subquery.c.stratum_id, + ) + .where(StratumConstraint.constraint_variable == "ucgid_str") + .where(StratumConstraint.value == ucgid) + .where(constraint_count_subquery.c.constraint_count == 1) + ) + + return session.exec(statement).first() + + +def get_root_strata(session: Session) -> List[Stratum]: + """Finds all strata that do not have a parent""" + statement = select(Stratum).where(Stratum.parent_stratum_id == None) + return session.exec(statement).all() + + +def get_stratum_children(session: Session, stratum_id: int) -> List[Stratum]: + """Retrieves all direct children for a given stratum""" + parent_stratum = get_stratum_by_id(session, stratum_id) + if parent_stratum: + return parent_stratum.children_rel + return [] + + +def get_stratum_parent(session: Session, stratum_id: int) -> Optional[Stratum]: + """Retrieves the direct parent for a given stratum.""" + child_stratum = get_stratum_by_id(session, stratum_id) + if child_stratum: + return child_stratum.parent_rel + return None diff --git a/pyproject.toml b/pyproject.toml index 5d723671..3e87a403 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dependencies = [ "us>=2.0.0", "sqlalchemy>=2.0.41", "sqlmodel>=0.0.24", + "xlrd>=2.0.2", ] [project.optional-dependencies]