datacommonsorg · niveditasing · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/tools/import_validation/Validations.md b/tools/import_validation/Validations.md
@@ -72,6 +72,8 @@ To generate goldens for the summary_report.csv to verify that all the expected
 StatVars are generated with the corresponding number of places and dates, run
 the following:
 
+This will compare the golden files using summary_report.csv as the default input:
+
 ```shell
     python3 validator_goldens.py \
       --validate_goldens_input=summary_report.csv \

diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
@@ -41,6 +41,8 @@ class ValidationRunner:
 
     def __init__(self, validation_config_path: str, differ_output: str,
                  stats_summary: str, lint_report: str, validation_output: str):
+        self.validation_config_path = validation_config_path
+        self.stats_summary = stats_summary
         self.config = ValidationConfig(validation_config_path)
         self.validation_output = validation_output
         self.validator = Validator()
@@ -212,6 +214,47 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
                 if output_dir:
                     rule_params.setdefault('output_path', output_dir)
 
+                # Resolve paths relative to the directory of the validation config.
+                if validator_name == 'GOLDENS_CHECK':
+                    config_dir = os.path.dirname(
+                        os.path.abspath(self.validation_config_path))
+                    # We walk up to find where the golden_data folder is situated.
+                    curr = config_dir
+                    while curr and curr != os.path.dirname(curr):
+                        if os.path.exists(os.path.join(curr, 'golden_data')):
+                            config_dir = curr
+                            break
+                        curr = os.path.dirname(curr)
+
+                    print(
+                        f"DEBUG: Found GOLDENS_CHECK rule: '{rule.get('rule_id')}'"
+                    )
+                    print(
+                        f"DEBUG: Config directory resolved to: '{config_dir}'")
+                    for path_key in list(rule_params.keys()):
+                        # Check any key in rule_params that equals 'golden_files' or 'input_files' or ends with '_file' or '_files'
+                        if path_key in (
+                                'golden_files',
+                                'input_files') or path_key.endswith(
+                                    '_file') or path_key.endswith('_files'):
+                            val = rule_params[path_key]
+                            print(
+                                f"DEBUG: Before resolve '{path_key}': '{val}'")
+                            if isinstance(val, str):
+                                if val and not os.path.isabs(val):
+                                    rule_params[path_key] = os.path.join(
+                                        config_dir, val)
+                            elif isinstance(val, list):
+                                rule_params[path_key] = [
+                                    os.path.join(config_dir, item)
+                                    if isinstance(item, str) and item and
+                                    not os.path.isabs(item) else item
+                                    for item in val
+                                ]
+                            print(
+                                f"DEBUG: After resolve '{path_key}': '{rule_params[path_key]}'"
+                            )
+
             if validator_name == 'SQL_VALIDATOR':
                 result = validation_func(self.data_sources['stats'],
                                          self.data_sources['differ'],

diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
@@ -298,7 +298,13 @@ def load_nodes_from_file(files: str) -> dict:
             file_nodes = file_util.file_load_csv_dict(input_file,
                                                       key_index=True)
             for node in file_nodes.values():
-                nodes[len(nodes)] = node
+                # Clean up "dcid:" prefixes from values (column headers are kept as is)
+                clean_node = {}
+                for k, v in node.items():
+                    clean_val = v.removeprefix("dcid:") if isinstance(
+                        v, str) else v
+                    clean_node[k] = clean_val
+                nodes[len(nodes)] = clean_node
         else:
             # For MCF or JSON, we assume nodes are already keyed by DCID.
             file_nodes = mcf_file_util.load_mcf_nodes(input_file)

diff --git a/util/file_util.py b/util/file_util.py
@@ -619,7 +619,7 @@ def file_write_csv_dict(py_dict: dict,
                         filename: str,
                         columns: list = None,
                         key_column_name: str = 'key') -> list:
-    """Returns the filename after writing py_dict with a csv row per item.
+    """Returns the list of columns after writing py_dict with a csv row per item.
 
   Each dictionary items is written as a row in the CSV file.
 
@@ -674,9 +674,11 @@ def file_write_csv_dict(py_dict: dict,
                     if col not in columns:
                         columns.append(col)
     if len(columns) == 1:
-        # Value is not a dict. Write it as a column name value.
-        value_column_name = 'value'
-        columns.append(value_column_name)
+        # Check if values are dicts. If they are, it's not a primitive value.
+        if not any(isinstance(value, dict) for value in py_dict.values()):
+            # Value is not a dict. Write it as a column name value.
+            value_column_name = 'value'
+            columns.append(value_column_name)
     # Use the first column for the key.
     if key_column_name == '':
         key_column_name = columns[0]