diff --git a/policyengine_us/variables/household/demographic/geographic/county/county.py b/policyengine_us/variables/household/demographic/geographic/county/county.py index e75c4ced0e3..41493fba4b8 100644 --- a/policyengine_us/variables/household/demographic/geographic/county/county.py +++ b/policyengine_us/variables/household/demographic/geographic/county/county.py @@ -36,22 +36,25 @@ def formula(household, period, parameters): # First look if county FIPS is provided; if so, map to county name county_fips: "pd.Series[str]" | None = household("county_fips", period) - COUNTY_FIPS_DATASET: "pd.DataFrame" = load_county_fips_dataset() - - # Decode FIPS codes - county_fips_codes = COUNTY_FIPS_DATASET.set_index("county_fips") result = household("first_county_in_state", period) county_fips = np.asarray(county_fips).astype(str) known_fips = county_fips != "" - valid_fips = known_fips & np.isin(county_fips, county_fips_codes.index) - if valid_fips.any(): - county_name = county_fips_codes.loc[ - county_fips[valid_fips], - "county_name", - ] - state_code = county_fips_codes.loc[county_fips[valid_fips], "state"] + if known_fips.any(): + COUNTY_FIPS_DATASET: "pd.DataFrame" = load_county_fips_dataset() + counties = pd.merge( + pd.DataFrame({"county_fips": county_fips[known_fips]}), + COUNTY_FIPS_DATASET, + on="county_fips", + how="left", + ) + valid_fips = counties["county_name"].notna().to_numpy() + if not valid_fips.any(): + return result + known_indices = np.where(known_fips)[0] + county_name = counties.loc[valid_fips, "county_name"] + state_code = counties.loc[valid_fips, "state"] result = np.array(result, copy=True) - result[valid_fips] = map_county_string_to_enum( + result[known_indices[valid_fips]] = map_county_string_to_enum( county_name, state_code, ).to_numpy()