Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
81a632b
improve validation path resolution and sanitize CSV parsing
niveditasing Jun 8, 2026
903e031
fixed tests
niveditasing Jun 8, 2026
20e1292
docs: add detailed docstrings for path resolution helpers in runner
niveditasing Jun 8, 2026
14409c4
modified validation
niveditasing Jun 8, 2026
194a3bc
testing
niveditasing Jun 9, 2026
aabebba
fix: strip dcid: namespace prefix from CSV values in golden loading
niveditasing Jun 9, 2026
6e36dcb
docs: add descriptive comments to CSV writer configuration
niveditasing Jun 10, 2026
1161f17
testing
niveditasing Jun 10, 2026
ddf793b
testing
niveditasing Jun 14, 2026
5315183
revert: undo changes to validator_goldens_test.py
niveditasing Jun 14, 2026
8d80c46
style: clean up whitespace in validator_goldens.py
niveditasing Jun 14, 2026
c3eff23
refactor: reduce base directory search depth limit to 8 in runner.py
niveditasing Jun 14, 2026
ed220f7
Merge branch 'master' into validation_golden_fix
niveditasing Jun 14, 2026
cab980d
revert: restore tab character in delim_chars in file_util.py
niveditasing Jun 14, 2026
2701c54
style: restore literal tab character in delim_chars in file_util.py
niveditasing Jun 14, 2026
b3d2053
testing
niveditasing Jun 14, 2026
8f7e9e7
testing
niveditasing Jun 14, 2026
298cc57
testing
niveditasing Jun 15, 2026
a889f0e
revert: restore original runner_test.py to remove it from PR
niveditasing Jun 15, 2026
44bdc78
refactor: inline path resolution in runner.py and remove util.py from PR
niveditasing Jun 15, 2026
800087e
feat: add path-resolution helper functions to util.py
niveditasing Jun 15, 2026
41ee2bd
testing
niveditasing Jun 15, 2026
8958e7e
testing
niveditasing Jun 15, 2026
14b5630
testing
niveditasing Jun 15, 2026
d0fdae5
testing
niveditasing Jun 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tools/import_validation/Validations.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ To generate goldens for the summary_report.csv to verify that all the expected
StatVars are generated with the corresponding number of places and dates, run
the following:

This will compare the golden files using summary_report.csv as the default input:
Comment thread
niveditasing marked this conversation as resolved.

```shell
python3 validator_goldens.py \
--validate_goldens_input=summary_report.csv \
Expand Down
43 changes: 43 additions & 0 deletions tools/import_validation/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class ValidationRunner:

def __init__(self, validation_config_path: str, differ_output: str,
stats_summary: str, lint_report: str, validation_output: str):
self.validation_config_path = validation_config_path
self.stats_summary = stats_summary
self.config = ValidationConfig(validation_config_path)
self.validation_output = validation_output
self.validator = Validator()
Expand Down Expand Up @@ -212,6 +214,47 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
if output_dir:
rule_params.setdefault('output_path', output_dir)

# Resolve paths relative to the directory of the validation config.
if validator_name == 'GOLDENS_CHECK':
config_dir = os.path.dirname(
os.path.abspath(self.validation_config_path))
# We walk up to find where the golden_data folder is situated.
curr = config_dir
while curr and curr != os.path.dirname(curr):
if os.path.exists(os.path.join(curr, 'golden_data')):
config_dir = curr
break
curr = os.path.dirname(curr)

print(
f"DEBUG: Found GOLDENS_CHECK rule: '{rule.get('rule_id')}'"
)
print(
f"DEBUG: Config directory resolved to: '{config_dir}'")
Comment thread
niveditasing marked this conversation as resolved.
for path_key in list(rule_params.keys()):
# Check any key in rule_params that equals 'golden_files' or 'input_files' or ends with '_file' or '_files'
if path_key in (
'golden_files',
'input_files') or path_key.endswith(
'_file') or path_key.endswith('_files'):
val = rule_params[path_key]
print(
f"DEBUG: Before resolve '{path_key}': '{val}'")
Comment thread
niveditasing marked this conversation as resolved.
if isinstance(val, str):
if val and not os.path.isabs(val):
rule_params[path_key] = os.path.join(
config_dir, val)
elif isinstance(val, list):
rule_params[path_key] = [
os.path.join(config_dir, item)
if isinstance(item, str) and item and
not os.path.isabs(item) else item
for item in val
]
print(
f"DEBUG: After resolve '{path_key}': '{rule_params[path_key]}'"
)
Comment thread
niveditasing marked this conversation as resolved.

if validator_name == 'SQL_VALIDATOR':
result = validation_func(self.data_sources['stats'],
self.data_sources['differ'],
Expand Down
8 changes: 7 additions & 1 deletion tools/import_validation/validator_goldens.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,13 @@ def load_nodes_from_file(files: str) -> dict:
file_nodes = file_util.file_load_csv_dict(input_file,
key_index=True)
for node in file_nodes.values():
nodes[len(nodes)] = node
# Clean up "dcid:" prefixes from values (column headers are kept as is)
clean_node = {}
for k, v in node.items():
clean_val = v.removeprefix("dcid:") if isinstance(
v, str) else v
clean_node[k] = clean_val
Comment thread
niveditasing marked this conversation as resolved.
nodes[len(nodes)] = clean_node
else:
# For MCF or JSON, we assume nodes are already keyed by DCID.
file_nodes = mcf_file_util.load_mcf_nodes(input_file)
Expand Down
10 changes: 6 additions & 4 deletions util/file_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ def file_write_csv_dict(py_dict: dict,
filename: str,
columns: list = None,
key_column_name: str = 'key') -> list:
"""Returns the filename after writing py_dict with a csv row per item.
"""Returns the list of columns after writing py_dict with a csv row per item.

Each dictionary items is written as a row in the CSV file.

Expand Down Expand Up @@ -674,9 +674,11 @@ def file_write_csv_dict(py_dict: dict,
if col not in columns:
columns.append(col)
if len(columns) == 1:
# Value is not a dict. Write it as a column name value.
value_column_name = 'value'
columns.append(value_column_name)
# Check if values are dicts. If they are, it's not a primitive value.
if not any(isinstance(value, dict) for value in py_dict.values()):
# Value is not a dict. Write it as a column name value.
value_column_name = 'value'
columns.append(value_column_name)
# Use the first column for the key.
if key_column_name == '':
key_column_name = columns[0]
Expand Down
Loading