diff --git a/scripts/us_census/pep/population_estimate_by_race/golden_data/golden_summary_report_after_2000.csv b/scripts/us_census/pep/population_estimate_by_race/golden_data/golden_summary_report_after_2000.csv new file mode 100644 index 0000000000..844c4e4585 --- /dev/null +++ b/scripts/us_census/pep/population_estimate_by_race/golden_data/golden_summary_report_after_2000.csv @@ -0,0 +1,7 @@ +"StatVar","Units","MeasurementMethods","MinDate","ScalingFactors","observationPeriods","NumPlaces" +"Count_Person_WhiteAlone","[]","[CensusPEPSurvey_Race2000Onwards]","2000","[]","[P1Y]","3156" +"Count_Person_AsianAlone","[]","[CensusPEPSurvey_Race2000Onwards]","2000","[]","[P1Y]","3156" +"Count_Person_NativeHawaiianAndOtherPacificIslanderAlone","[]","[CensusPEPSurvey_Race2000Onwards]","2000","[]","[P1Y]","3156" +"Count_Person_TwoOrMoreRaces","[]","[CensusPEPSurvey_Race2000Onwards]","2000","[]","[P1Y]","3156" +"Count_Person_BlackOrAfricanAmericanAlone","[]","[CensusPEPSurvey_Race2000Onwards]","2000","[]","[P1Y]","3156" +"Count_Person_AmericanIndianAndAlaskaNativeAlone","[]","[CensusPEPSurvey_Race2000Onwards]","2000","[]","[P1Y]","3156" diff --git a/scripts/us_census/pep/population_estimate_by_race/golden_data/golden_summary_report_before_2000.csv b/scripts/us_census/pep/population_estimate_by_race/golden_data/golden_summary_report_before_2000.csv new file mode 100644 index 0000000000..aee7759bb9 --- /dev/null +++ b/scripts/us_census/pep/population_estimate_by_race/golden_data/golden_summary_report_before_2000.csv @@ -0,0 +1,6 @@ +"MeasurementMethods","MinDate","Units","ScalingFactors","observationPeriods","NumPlaces","StatVar" +"[dcAggregate/CensusPEPSurvey_PartialAggregate_RaceUpto1999]","1900","[]","[]","[P1Y]","3213","Count_Person_WhiteAlone" +"[CensusPEPSurvey_RaceUpto1999]","1990","[]","[]","[P1Y]","3193","Count_Person_AsianOrPacificIslander" +"[CensusPEPSurvey_RaceUpto1999]","1900","[]","[]","[P1Y]","1","Count_Person_NonWhite" +"[dcAggregate/CensusPEPSurvey_PartialAggregate_RaceUpto1999]","1960","[]","[]","[P1Y]","3213","Count_Person_BlackOrAfricanAmericanAlone" +"[CensusPEPSurvey_RaceUpto1999]","1990","[]","[]","[P1Y]","3193","Count_Person_AmericanIndianAndAlaskaNativeAlone" diff --git a/scripts/us_census/pep/population_estimate_by_race/golden_data/golden_summary_report_national.csv b/scripts/us_census/pep/population_estimate_by_race/golden_data/golden_summary_report_national.csv new file mode 100644 index 0000000000..b7a282b9d2 --- /dev/null +++ b/scripts/us_census/pep/population_estimate_by_race/golden_data/golden_summary_report_national.csv @@ -0,0 +1,7 @@ +"MinDate","StatVar","ScalingFactors","MeasurementMethods","observationPeriods","Units","NumPlaces" +"2000","Count_Person_WhiteAlone","[]","[dcAggregate/CensusPEPSurvey_PartialAggregate_Race2000Onwards]","[P1Y]","[]","52" +"2000","Count_Person_AsianAlone","[]","[dcAggregate/CensusPEPSurvey_PartialAggregate_Race2000Onwards]","[P1Y]","[]","52" +"2000","Count_Person_NativeHawaiianAndOtherPacificIslanderAlone","[]","[dcAggregate/CensusPEPSurvey_PartialAggregate_Race2000Onwards]","[P1Y]","[]","52" +"2000","Count_Person_TwoOrMoreRaces","[]","[dcAggregate/CensusPEPSurvey_PartialAggregate_Race2000Onwards]","[P1Y]","[]","52" +"2000","Count_Person_BlackOrAfricanAmericanAlone","[]","[dcAggregate/CensusPEPSurvey_PartialAggregate_Race2000Onwards]","[P1Y]","[]","52" +"2000","Count_Person_AmericanIndianAndAlaskaNativeAlone","[]","[dcAggregate/CensusPEPSurvey_PartialAggregate_Race2000Onwards]","[P1Y]","[]","52" diff --git a/scripts/us_census/pep/population_estimate_by_race/manifest.json b/scripts/us_census/pep/population_estimate_by_race/manifest.json index 3792d57c28..8c76657f86 100644 --- a/scripts/us_census/pep/population_estimate_by_race/manifest.json +++ b/scripts/us_census/pep/population_estimate_by_race/manifest.json @@ -27,7 +27,8 @@ "cleaned_csv": "output/USA_Population_Count_by_Race_county_after_2000.csv" } ], - "cron_schedule": "0 04 * * 1" + "cron_schedule": "0 04 * * 1", + "validation_config_file": "validation_config.json" } ] } \ No newline at end of file diff --git a/scripts/us_census/pep/population_estimate_by_race/validation_config.json b/scripts/us_census/pep/population_estimate_by_race/validation_config.json new file mode 100644 index 0000000000..a1ef3d4ae3 --- /dev/null +++ b/scripts/us_census/pep/population_estimate_by_race/validation_config.json @@ -0,0 +1,38 @@ +{ + "schema_version": "1.0", + "rules": [ + { + "rule_id": "check_deleted_records_percent", + "description": "Checks that the percentage of deleted records for the entire import is within threshold.", + "validator": "DELETED_RECORDS_PERCENT", + "params": { "threshold": 0.15} + }, + { + "rule_id": "check_goldens_national", + "description": "Validates national and state-level 2000+ data against its golden summary report.", + "validator": "GOLDENS_CHECK", + "params": { + "golden_files": "golden_data/golden_summary_report_national.csv", + "input_files": "USCensusPEP_PopulationEstimatebyRace/*/input0/genmcf/summary_report.csv" + } + }, + { + "rule_id": "check_goldens_before_2000", + "description": "Validates data before 2000 against its golden summary report.", + "validator": "GOLDENS_CHECK", + "params": { + "golden_files": "golden_data/golden_summary_report_before_2000.csv", + "input_files": "USCensusPEP_PopulationEstimatebyRace/*/input1/genmcf/summary_report.csv" + } + }, + { + "rule_id": "check_goldens_after_2000", + "description": "Validates county-level 2000+ data against its golden summary report.", + "validator": "GOLDENS_CHECK", + "params": { + "golden_files": "golden_data/golden_summary_report_after_2000.csv", + "input_files": "USCensusPEP_PopulationEstimatebyRace/*/input2/genmcf/summary_report.csv" + } + } + ] +} \ No newline at end of file