Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ node_modules
!policyengine_us_data/storage/social_security_aux.csv
!policyengine_us_data/storage/SSPopJul_TR2024.csv
!policyengine_us_data/storage/national_and_district_rents_2023.csv
!policyengine_us_data/storage/calibration_targets/aca_marketplace_state_metal_selection_2024.csv
docs/.ipynb_checkpoints/

## ACA PTC state-level uprating factors
Expand Down
2 changes: 2 additions & 0 deletions changelog.d/618.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add an ACA marketplace ETL that loads state-level HC.gov bronze-plan
selection targets for APTC recipients into the calibration database.
1 change: 1 addition & 0 deletions policyengine_us_data/db/create_field_valid_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def populate_field_valid_values(session: Session) -> None:
("source", "Census ACS S0101", "survey"),
("source", "IRS SOI", "administrative"),
("source", "CMS Medicaid", "administrative"),
("source", "CMS 2024 OEP state metal status PUF", "administrative"),
("source", "Census ACS S2704", "survey"),
("source", "USDA FNS SNAP", "administrative"),
("source", "Census ACS S2201", "survey"),
Expand Down
213 changes: 213 additions & 0 deletions policyengine_us_data/db/etl_aca_marketplace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
from __future__ import annotations

import logging
from pathlib import Path

import pandas as pd
from policyengine_us.system import system
from sqlmodel import Session, create_engine

from policyengine_us_data.calibration.calibration_utils import STATE_CODES
from policyengine_us_data.db.create_database_tables import (
Stratum,
StratumConstraint,
Target,
)
from policyengine_us_data.storage import CALIBRATION_FOLDER, STORAGE_FOLDER
from policyengine_us_data.utils.db import etl_argparser, get_geographic_strata

logger = logging.getLogger(__name__)

BENCHMARK_SILVER_RATIO = 1.0

STATE_METAL_SELECTION_PATH = (
CALIBRATION_FOLDER / "aca_marketplace_state_metal_selection_2024.csv"
)

REQUIRED_POLICYENGINE_US_VARIABLES = {
"selected_marketplace_plan_benchmark_ratio",
"used_aca_ptc",
}

STATE_ABBR_TO_FIPS = {abbr: fips for fips, abbr in STATE_CODES.items()}


def _extra_args(parser) -> None:
parser.add_argument(
"--state-metal-csv",
type=Path,
default=STATE_METAL_SELECTION_PATH,
help=("State-metal CMS OEP proxy CSV. Default: %(default)s"),
)


def require_marketplace_variables() -> None:
missing = sorted(
var_name
for var_name in REQUIRED_POLICYENGINE_US_VARIABLES
if var_name not in system.variables
)
if missing:
raise RuntimeError(
"ACA marketplace ETL requires policyengine-us variables that are not "
f"available in the current environment: {', '.join(missing)}"
)


def build_state_marketplace_bronze_aptc_targets(
state_metal_df: pd.DataFrame,
) -> pd.DataFrame:
"""
Build HC.gov state bronze-selection targets among APTC consumers.

The 2024 CMS state-metal-status PUF exposes:
- metal rows (`B`, `G`, `S`) with enrollment_status=`All`
- aggregate rows (`All`) broken out by enrollment status (`01-atv`, etc.)

We use:
- total APTC consumers = sum of `aptc_consumers` for `metal_level == All`
across enrollment statuses
- bronze APTC consumers = `aptc_consumers` on the bronze row
"""
df = state_metal_df.copy()
df = df[df["platform"] == "HC.gov"].copy()

total_rows = df[
(df["metal_level"] == "All") & (df["aptc_consumers"].notna())
].copy()
bronze_rows = df[
(df["metal_level"] == "B")
& (df["enrollment_status"] == "All")
& (df["aptc_consumers"].notna())
].copy()

total_aptc = total_rows.groupby("state_code", as_index=False).agg(
marketplace_aptc_consumers=("aptc_consumers", "sum"),
marketplace_consumers=("consumers", "sum"),
)
bronze_aptc = bronze_rows[["state_code", "aptc_consumers", "consumers"]].rename(
columns={
"aptc_consumers": "bronze_aptc_consumers",
"consumers": "bronze_consumers",
}
)

result = total_aptc.merge(bronze_aptc, on="state_code", how="inner")
result["state_fips"] = result["state_code"].map(STATE_ABBR_TO_FIPS)
result = result[result["state_fips"].notna()].copy()
result["state_fips"] = result["state_fips"].astype(int)
result["bronze_aptc_share"] = (
result["bronze_aptc_consumers"] / result["marketplace_aptc_consumers"]
)
result.insert(0, "year", 2024)
result.insert(1, "source", "cms_2024_oep_state_metal_status_puf")
return result.sort_values("state_code").reset_index(drop=True)


def load_state_marketplace_bronze_aptc_targets(
targets_df: pd.DataFrame,
year: int,
) -> None:
db_url = f"sqlite:///{STORAGE_FOLDER / 'calibration' / 'policy_data.db'}"
engine = create_engine(db_url)

with Session(engine) as session:
geo_strata = get_geographic_strata(session)

for row in targets_df.itertuples(index=False):
state_fips = int(row.state_fips)
parent_id = geo_strata["state"].get(state_fips)
if parent_id is None:
logger.warning(
"No state geographic stratum for FIPS %s, skipping", state_fips
)
continue

aptc_stratum = Stratum(
parent_stratum_id=parent_id,
notes=f"State FIPS {state_fips} Marketplace APTC recipients",
)
aptc_stratum.constraints_rel = [
StratumConstraint(
constraint_variable="state_fips",
operation="==",
value=str(state_fips),
),
StratumConstraint(
constraint_variable="used_aca_ptc",
operation=">",
value="0",
),
]
aptc_stratum.targets_rel.append(
Target(
variable="tax_unit_count",
period=year,
value=float(row.marketplace_aptc_consumers),
active=True,
source="CMS 2024 OEP state metal status PUF",
notes="HC.gov APTC consumers across all enrollment statuses",
)
)
session.add(aptc_stratum)
session.flush()

bronze_stratum = Stratum(
parent_stratum_id=aptc_stratum.stratum_id,
notes=f"State FIPS {state_fips} Marketplace bronze APTC recipients",
)
bronze_stratum.constraints_rel = [
StratumConstraint(
constraint_variable="state_fips",
operation="==",
value=str(state_fips),
),
StratumConstraint(
constraint_variable="used_aca_ptc",
operation=">",
value="0",
),
StratumConstraint(
constraint_variable="selected_marketplace_plan_benchmark_ratio",
operation="<",
value=str(BENCHMARK_SILVER_RATIO),
),
]
bronze_stratum.targets_rel.append(
Target(
variable="tax_unit_count",
period=year,
value=float(row.bronze_aptc_consumers),
active=True,
source="CMS 2024 OEP state metal status PUF",
notes="HC.gov bronze plan selections among APTC consumers",
)
)
session.add(bronze_stratum)
session.flush()

session.commit()


def main() -> None:
args, year = etl_argparser(
"ETL for ACA marketplace bronze-selection calibration targets",
extra_args_fn=_extra_args,
)
require_marketplace_variables()

state_metal = pd.read_csv(args.state_metal_csv)
targets_df = build_state_marketplace_bronze_aptc_targets(state_metal)
if targets_df.empty:
raise RuntimeError("No HC.gov marketplace bronze/APTC targets were generated.")

print(
"Loading ACA marketplace bronze/APTC state targets for "
f"{len(targets_df)} states from {args.state_metal_csv}"
)
load_state_marketplace_bronze_aptc_targets(targets_df, year)
print("ACA marketplace bronze/APTC targets loaded.")


if __name__ == "__main__":
main()
Loading
Loading