Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
parameter,value
header_rows,1
#input_rows,10000
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
key,property1,value1,property2,value2,property3,value3

# Global properties from DATAFLOW
"DATAFLOW:ESTAT:DEMO_PJANGROUP(1.0)",populationType,dcs:Person,measuredProperty,dcs:count,statType,dcs:measuredValue

# --- AUTOMATED CLEANUP: Ignored Columns ---
LAST UPDATE,#ignore,"",,,,
OBS_FLAG,#ignore,"",,,,
CONF_STATUS,#ignore,"",,,,

# Frequency
freq:A,observationPeriod,P1Y,,,,

# Unit
unit:NR,unit,dcs:Person,,,,

# Sex
sex:T,gender,"",,,,
sex:M,gender,dcs:Male,,,,
sex:F,gender,dcs:Female,,,,

# --- Broad Age Group Mappings --- (dcs:Years0To4 corrected to dcs:YearsUpto4)
age:TOTAL,age,"",,,,
age:UNK,age,dcs:USC_AgeNotStated,,,
age:Y_LT5,age,dcs:YearsUpto4,,,,
age:Y_GE75,age,dcs:Years75Onwards,,,,
age:Y_GE80,age,dcs:Years80Onwards,,,,
age:Y_GE85,age,dcs:Years85Onwards,,,,

# Explicit Ranges (Replaces the Regex)
age:Y5-9,age,dcs:Years5To9,,,,
age:Y10-14,age,dcs:Years10To14,,,,
age:Y15-19,age,dcs:Years15To19,,,,
age:Y20-24,age,dcs:Years20To24,,,,
age:Y25-29,age,dcs:Years25To29,,,,
age:Y30-34,age,dcs:Years30To34,,,,
age:Y35-39,age,dcs:Years35To39,,,,
age:Y40-44,age,dcs:Years40To44,,,,
age:Y45-49,age,dcs:Years45To49,,,,
age:Y50-54,age,dcs:Years50To54,,,,
age:Y55-59,age,dcs:Years55To59,,,,
age:Y60-64,age,dcs:Years60To64,,,,
age:Y65-69,age,dcs:Years65To69,,,,
age:Y70-74,age,dcs:Years70To74,,,,
age:Y75-79,age,dcs:Years75To79,,,,
age:Y80-84,age,dcs:Years80To84,,,,

# --- Geopolitical Entity Translation (Bypasses offline API call failures) ---
geo,observationAbout,{Data},,,,

# Time
TIME_PERIOD,observationDate,{Data},,,,

# Value
OBS_VALUE,value,{Number},,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Eurostat Population By Age Group And Sex On January 1 Import

## Overview

This dataset contains annual population stock data broken down by age groups and sex at the national level, sourced from Eurostat. The data tracks demographic distributions on January 1st across various European countries to support long-term social, economic, and institutional planning.

type of place: Country
years: Historical data to present (1960-2025)
place_resolution: Resolved to DCIDs (e.g., dcid:country/FRA, dcid:country/DEU)

## Data Source
**Source URL:**
https://ec.europa.eu/eurostat/databrowser/view/demo_pjangroup/default/table

**Provenance Description:**
This dataset is produced and harmonized by Eurostat using demographic data provided by national statistical institutes across European countries. It breaks down the annual population stock on January 1st by specific age groups and sex to support EU socioeconomic policy and long-term planning.

### Script Details:
- **Download**: Uses `curl` to fetch the latest SDMX-CSV data from Eurostat's dissemination API.
- **Processing**: Uses `stat_var_processor.py` to map raw data to Data Commons StatVarObservations using the PV map and metadata configuration.

## Processing
To process the Eurostat Population By Age Group And Sex On January 1 data and generate statistical variables, use the following commands from your current import data directory:

# Download input file

```bash
mkdir -p source_files
curl -L --retry 3 "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/DEMO_PJANGROUP/?format=SDMX-CSV&compressed=false" -o ./source_files/Population_on_1_January_by_age_group_and_sex_data_input.csv
Comment thread
shvngisingh marked this conversation as resolved.

## For Test Data Run

python3 tools/statvar_importer/stat_var_processor.py \
"--input_data=./testdata/Population_on_1_January_by_age_group_and_sex_data_input.csv"\
"--pv_map=./Population_on_1_January_by_age_group_and_sex_pvmap.csv" \
"--output_path=./testdata/Population_on_1_January_by_age_group_and_sex_output" \
"--config_file=./Population_on_1_January_by_age_group_and_sex_metadata.csv" \
"--existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf"

## For Main data run

python3 ../../../tools/statvar_importer/stat_var_processor.py \
"--input_data=./source_files/*.csv" \
"--pv_map=./Population_on_1_January_by_age_group_and_sex_pvmap.csv" \
"--config_file=./Population_on_1_January_by_age_group_and_sex_metadata.csv" \
"--generate_statvar_name=True" \
"--skip_constant_csv_columns=False" \
"--output_columns=observationDate,observationAbout,variableMeasured,value,observationPeriod,unit" \
"--output_path=./Population_on_1_January_by_age_group_and_sex_output" \
"--places_resolved_csv=./places_resolved_runtime.csv" \
"--existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf"
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"import_specifications": [
{
"import_name": "EuroStat_Population_On_1_January_By_Age_Group_And_Sex",
"curator_emails": [
"support@datacommons.org"
],
"provenance_url": "https://ec.europa.eu/eurostat/databrowser/view/demo_pjangroup/default/table",
"provenance_description": "Annual European population stock on January 1st by age group and sex, harmonized by Eurostat.",
"scripts": [
"run.sh"
],
"import_inputs": [
{
"template_mcf": "Population_On_1_January_By_Age_Group_And_Sex_output.tmcf",
"cleaned_csv": "Population_On_1_January_By_Age_Group_And_Sex_output.csv"
}
],
"source_files": [
"source_files/*.csv"
],
"cron_schedule": "5 1 1,15 * *",
"resource_limits": {"cpu": 4, "memory": 8, "disk":100},
"config_override": {
"invoke_import_validation": true,
"invoke_import_tool": true,
"invoke_differ_tool": true,
"skip_input_upload": false,
"skip_gcs_upload": false,
"cleanup_gcs_volume_mount": false
}
}
]
}
Loading