From 81a632b74de84db2cb05de6ce75f4b43432a520c Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 8 Jun 2026 10:48:30 +0000
Subject: [PATCH 01/24] improve validation path resolution and sanitize CSV
 parsing

---
 tools/import_validation/runner.py             |  71 +++++++++++
 tools/import_validation/runner_test.py        | 116 ++++++++++++++++++
 tools/import_validation/validator_goldens.py  |  39 +++++-
 .../validator_goldens_test.py                 |  19 +++
 4 files changed, 242 insertions(+), 3 deletions(-)

diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
index f1364518e6..aca77b0233 100644
--- a/tools/import_validation/runner.py
+++ b/tools/import_validation/runner.py
@@ -14,6 +14,7 @@
 """Module for the ValidationRunner class."""
 
 import os
+import logging
 from absl import app
 from absl import flags
 from absl import logging
@@ -34,6 +35,30 @@
 _FLAGS = flags.FLAGS
 
 
+def _is_relative_local(path_val: str) -> bool:
+    """Checks if a path is a relative, local file path."""
+    if not isinstance(path_val, str) or not path_val:
+        return False
+    return (not os.path.isabs(path_val) and not path_val.startswith('gs://') and
+            not path_val.startswith('http://') and
+            not path_val.startswith('https://'))
+
+
+def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
+    """Helper to find a base directory containing target_sub_path by walking up."""
+    if not start_path:
+        return None
+    curr = os.path.abspath(start_path)
+    for _ in range(10):  # limit to 10 levels up
+        if os.path.exists(os.path.join(curr, target_sub_path)):
+            return curr
+        parent = os.path.dirname(curr)
+        if parent == curr:
+            break
+        curr = parent
+    return None
+
+
 class ValidationRunner:
     """
   Orchestrates the validation process based on the new schema.
@@ -41,6 +66,8 @@ class ValidationRunner:
 
     def __init__(self, validation_config_path: str, differ_output: str,
                  stats_summary: str, lint_report: str, validation_output: str):
+        self.validation_config_path = validation_config_path
+        self.stats_summary = stats_summary
         self.config = ValidationConfig(validation_config_path)
         self.validation_output = validation_output
         self.validator = Validator()
@@ -212,6 +239,50 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
                 if output_dir:
                     rule_params.setdefault('output_path', output_dir)
 
+                # Resolve paths relative to the directory of the validation config.
+                if validator_name == 'GOLDENS_CHECK':
+                    config_dir = None
+                    # Walk up from validation_config_path, self.stats_summary, or CWD to find where 'golden_data' lives
+                    for start in [
+                            self.validation_config_path, self.stats_summary,
+                            os.getcwd()
+                    ]:
+                        config_dir = _find_base_dir(start, 'golden_data')
+                        if config_dir:
+                            break
+
+                    if not config_dir:
+                        config_dir = os.path.dirname(
+                            os.path.abspath(self.validation_config_path))
+
+                    print(
+                        f"DEBUG: Found GOLDENS_CHECK rule: '{rule.get('rule_id')}'"
+                    )
+                    print(
+                        f"DEBUG: Config directory resolved to: '{config_dir}'")
+                    for path_key in list(rule_params.keys()):
+                        # Check any key in rule_params that equals 'golden_files' or 'input_files' or ends with '_file' or '_files'
+                        if path_key in (
+                                'golden_files',
+                                'input_files') or path_key.endswith(
+                                    '_file') or path_key.endswith('_files'):
+                            val = rule_params[path_key]
+                            print(
+                                f"DEBUG: Before resolve '{path_key}': '{val}'")
+                            if isinstance(val, str):
+                                if _is_relative_local(val):
+                                    rule_params[path_key] = os.path.join(
+                                        config_dir, val)
+                            elif isinstance(val, list):
+                                rule_params[path_key] = [
+                                    os.path.join(config_dir, item)
+                                    if _is_relative_local(item) else item
+                                    for item in val
+                                ]
+                            print(
+                                f"DEBUG: After resolve '{path_key}': '{rule_params[path_key]}'"
+                            )
+
             if validator_name == 'SQL_VALIDATOR':
                 result = validation_func(self.data_sources['stats'],
                                          self.data_sources['differ'],
diff --git a/tools/import_validation/runner_test.py b/tools/import_validation/runner_test.py
index 76c5bc1d0b..049f5a9f60 100644
--- a/tools/import_validation/runner_test.py
+++ b/tools/import_validation/runner_test.py
@@ -350,3 +350,119 @@ def test_init_raises_error_if_required_file_is_missing(self):
                 lint_report=self.report_path,
                 validation_output=self.output_path)
         self.assertIn("'stats' data source", str(context.exception))
+
+    @patch('tools.import_validation.runner.Validator')
+    def test_runner_resolves_goldens_check_paths(self, MockValidator):
+        """Tests that GOLDENS_CHECK validator resolves relative paths correctly."""
+        # 1. Setup the mock
+        mock_validator_instance = MockValidator.return_value
+        mock_validator_instance.validate_goldens.return_value = ValidationResult(
+            ValidationStatus.PASSED, 'GOLDENS_CHECK')
+
+        # 2. Create the config file with relative paths
+        with open(self.config_path, 'w') as f:
+            json.dump(
+                {
+                    'rules': [{
+                        'rule_id': 'check_goldens_output_csv',
+                        'validator': 'GOLDENS_CHECK',
+                        'scope': {},
+                        'params': {
+                            'golden_files': 'golden_data/golden_file.csv',
+                            'input_files': 'output/input_file.csv'
+                        }
+                    }]
+                }, f)
+        pd.DataFrame({'StatVar': ['sv1']}).to_csv(self.stats_path, index=False)
+
+        # 3. Setup a dummy golden_data folder inside the temp dir to trigger base dir resolution
+        golden_dir = os.path.join(self.test_dir.name, 'golden_data')
+        os.makedirs(golden_dir, exist_ok=True)
+
+        runner = ValidationRunner(validation_config_path=self.config_path,
+                                  stats_summary=self.stats_path,
+                                  differ_output=self.differ_path,
+                                  lint_report=self.report_path,
+                                  validation_output=self.output_path)
+        runner.run_validations()
+
+        # 4. Assert that validate_goldens was called with resolved paths
+        mock_validator_instance.validate_goldens.assert_called_once()
+        args, kwargs = mock_validator_instance.validate_goldens.call_args
+        rule_params = args[1]
+        expected_golden = os.path.join(self.test_dir.name,
+                                       'golden_data/golden_file.csv')
+        expected_input = os.path.join(self.test_dir.name,
+                                      'output/input_file.csv')
+        self.assertEqual(rule_params['golden_files'], expected_golden)
+        self.assertEqual(rule_params['input_files'], expected_input)
+
+    @patch('tools.import_validation.runner.Validator')
+    def test_path_resolution_helpers_and_robustness(self, MockValidator):
+        """Tests that relative local path helpers work and robust path resolution handles ends-with keys."""
+        from tools.import_validation.runner import _is_relative_local, _find_base_dir
+
+        # Test helper functions
+        self.assertTrue(_is_relative_local("some/relative/path.csv"))
+        self.assertFalse(_is_relative_local("/absolute/path.csv"))
+        self.assertFalse(_is_relative_local("gs://bucket/path.csv"))
+        self.assertFalse(_is_relative_local("http://example.com/path.csv"))
+        self.assertFalse(_is_relative_local("https://example.com/path.csv"))
+        self.assertFalse(_is_relative_local(None))
+        self.assertFalse(_is_relative_local(123))
+
+        # Setup mock validator
+        mock_validator_instance = MockValidator.return_value
+        mock_validator_instance.validate_goldens.return_value = ValidationResult(
+            ValidationStatus.PASSED, 'GOLDENS_CHECK')
+
+        # Create config file with various parameter keys (like output_file) that contain paths
+        with open(self.config_path, 'w') as f:
+            json.dump(
+                {
+                    'rules': [{
+                        'rule_id': 'check_goldens_output_csv',
+                        'validator': 'GOLDENS_CHECK',
+                        'scope': {},
+                        'params': {
+                            'golden_files':
+                                'golden_data/golden_file.csv',
+                            'input_files':
+                                'output/input_file.csv',
+                            'output_file':
+                                'output/output_file.csv',
+                            'custom_files':
+                                ['output/custom_file.csv', 'gs://skip/this.csv']
+                        }
+                    }]
+                }, f)
+        pd.DataFrame({'StatVar': ['sv1']}).to_csv(self.stats_path, index=False)
+
+        # Setup dummy golden_data folder
+        golden_dir = os.path.join(self.test_dir.name, 'golden_data')
+        os.makedirs(golden_dir, exist_ok=True)
+
+        runner = ValidationRunner(validation_config_path=self.config_path,
+                                  stats_summary=self.stats_path,
+                                  differ_output=self.differ_path,
+                                  lint_report=self.report_path,
+                                  validation_output=self.output_path)
+        runner.run_validations()
+
+        # Check path resolution for the parameters
+        args, kwargs = mock_validator_instance.validate_goldens.call_args
+        rule_params = args[1]
+
+        self.assertEqual(
+            rule_params['golden_files'],
+            os.path.join(self.test_dir.name,
+                         'golden_data/golden_file.csv'))
+        self.assertEqual(
+            rule_params['input_files'],
+            os.path.join(self.test_dir.name, 'output/input_file.csv'))
+        self.assertEqual(rule_params['output_file'],
+                         os.path.join(self.test_dir.name, 'output/output_file.csv'))
+        self.assertEqual(rule_params['custom_files'], [
+            os.path.join(self.test_dir.name, 'output/custom_file.csv'),
+            'gs://skip/this.csv'
+        ])
diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
index 7b19b783fe..c6f3efe599 100644
--- a/tools/import_validation/validator_goldens.py
+++ b/tools/import_validation/validator_goldens.py
@@ -68,6 +68,7 @@
       --generate_goldens=goldens_data/generated_goldens.csv
 """
 
+import csv
 import os
 import sys
 import tempfile
@@ -297,15 +298,47 @@ def load_nodes_from_file(files: str) -> dict:
             # Nodes are keyed by their index in the combined loaded set.
             file_nodes = file_util.file_load_csv_dict(input_file,
                                                       key_index=True)
+            # Check if any loaded node has None as a key, indicating an incorrect delimiter
+            # was auto-detected (e.g. splitting 'dcid:Earth' on colon ':')
+            has_delimiter_error = False
             for node in file_nodes.values():
-                nodes[len(nodes)] = node
+                if None in node:
+                    has_delimiter_error = True
+                    break
+
+            if has_delimiter_error:
+                import csv
+                with file_util.FileIO(input_file) as csvfile:
+                    rawdata = csvfile.read()
+                    if isinstance(rawdata, bytes):
+                        encoding = file_util.file_get_encoding(input_file)
+                        data_str = rawdata.decode(encoding)
+                    else:
+                        data_str = rawdata
+                    reader = csv.DictReader(data_str.splitlines(),
+                                            delimiter=',')
+                    file_nodes = {}
+                    for row in reader:
+                        file_nodes[len(file_nodes)] = dict(row)
+
+            for node in file_nodes.values():
+                # Clean up None/empty keys and strip whitespace from headers/keys and values to ensure robust parsing
+                cleaned_node = {
+                    k.strip(): (v.strip() if isinstance(v, str) else v)
+                    for k, v in node.items()
+                    if k is not None and isinstance(k, str) and k.strip() != ''
+                }
+                nodes[len(nodes)] = cleaned_node
         else:
             # For MCF or JSON, we assume nodes are already keyed by DCID.
             file_nodes = mcf_file_util.load_mcf_nodes(input_file)
             for dcid, node in file_nodes.items():
-                # Ensure the dcid is present in the node dictionary itself.
+                # Ensure the dcid is present in the node dictionary itself with a null-safe check.
                 if 'dcid' not in node:
-                    node['dcid'] = mcf_file_util.strip_namespace(dcid)
+                    if dcid and isinstance(dcid, str):
+                        node['dcid'] = mcf_file_util.strip_namespace(dcid)
+                    else:
+                        node['dcid'] = ''
                 mcf_file_util.add_mcf_node(node, nodes)
 
     logging.info(f'Loaded {len(nodes)} nodes from {input_files}')
diff --git a/tools/import_validation/validator_goldens_test.py b/tools/import_validation/validator_goldens_test.py
index e486a8fe91..6fa4ab09ac 100644
--- a/tools/import_validation/validator_goldens_test.py
+++ b/tools/import_validation/validator_goldens_test.py
@@ -190,6 +190,25 @@ def test_validate_goldens(self, mock_file, mock_compare, mock_load):
         self.assertEqual(missing, [])
         mock_compare.assert_called_once()
 
+    def test_load_nodes_from_file_with_colon_values(self):
+        import tempfile
+        # Create a temporary CSV file with values containing colons and no commas
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.csv',
+                                         delete=False) as f:
+            f.write("header1\n")
+            f.write("value:1\n")
+            f.write("value:2\n")
+            temp_name = f.name
+
+        try:
+            nodes = validator_goldens.load_nodes_from_file(temp_name)
+            # The nodes should be parsed using comma as delimiter, which correctly preserves values with colons
+            self.assertEqual(len(nodes), 2)
+            self.assertEqual(nodes[0], {'header1': 'value:1'})
+            self.assertEqual(nodes[1], {'header1': 'value:2'})
+        finally:
+            os.remove(temp_name)
+
 
 if __name__ == '__main__':
     unittest.main()

From 903e0314c0eed3f9f3942ada3052dec8f554cae2 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 8 Jun 2026 11:05:28 +0000
Subject: [PATCH 02/24] fixed tests

---
 tools/import_validation/runner_test.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tools/import_validation/runner_test.py b/tools/import_validation/runner_test.py
index 049f5a9f60..80e7508020 100644
--- a/tools/import_validation/runner_test.py
+++ b/tools/import_validation/runner_test.py
@@ -431,8 +431,9 @@ def test_path_resolution_helpers_and_robustness(self, MockValidator):
                                 'output/input_file.csv',
                             'output_file':
                                 'output/output_file.csv',
-                            'custom_files':
-                                ['output/custom_file.csv', 'gs://skip/this.csv']
+                            'custom_files': [
+                                'output/custom_file.csv', 'gs://skip/this.csv'
+                            ]
                         }
                     }]
                 }, f)
@@ -455,13 +456,13 @@ def test_path_resolution_helpers_and_robustness(self, MockValidator):
 
         self.assertEqual(
             rule_params['golden_files'],
-            os.path.join(self.test_dir.name,
-                         'golden_data/golden_file.csv'))
+            os.path.join(self.test_dir.name, 'golden_data/golden_file.csv'))
         self.assertEqual(
             rule_params['input_files'],
             os.path.join(self.test_dir.name, 'output/input_file.csv'))
-        self.assertEqual(rule_params['output_file'],
-                         os.path.join(self.test_dir.name, 'output/output_file.csv'))
+        self.assertEqual(
+            rule_params['output_file'],
+            os.path.join(self.test_dir.name, 'output/output_file.csv'))
         self.assertEqual(rule_params['custom_files'], [
             os.path.join(self.test_dir.name, 'output/custom_file.csv'),
             'gs://skip/this.csv'

From 20e1292b928ab911104a18ba0b256443d13c2097 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 8 Jun 2026 11:32:14 +0000
Subject: [PATCH 03/24] docs: add detailed docstrings for path resolution
 helpers in runner

---
 tools/import_validation/runner.py | 32 +++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
index aca77b0233..0b33f7abb0 100644
--- a/tools/import_validation/runner.py
+++ b/tools/import_validation/runner.py
@@ -36,7 +36,19 @@
 
 
 def _is_relative_local(path_val: str) -> bool:
-    """Checks if a path is a relative, local file path."""
+    """Checks if a path is a relative, local file path.
+
+    This function identifies path strings that represent local relative files
+    (e.g., 'golden_data/un_wpp.csv') as opposed to absolute paths or remote/cloud 
+    URIs. It filters out non-strings, empty strings, absolute local paths,
+    Google Cloud Storage URIs (gs://), and web URLs (http://, https://).
+
+    Args:
+        path_val: The file path string to evaluate.
+
+    Returns:
+        True if the path represents a relative, local file path; False otherwise.
+    """
     if not isinstance(path_val, str) or not path_val:
         return False
     return (not os.path.isabs(path_val) and not path_val.startswith('gs://') and
@@ -45,7 +57,23 @@ def _is_relative_local(path_val: str) -> bool:
 
 
 def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
-    """Helper to find a base directory containing target_sub_path by walking up."""
+    """Helper to find a base directory containing a target sub-path by walking up.
+
+    Starting from the absolute directory of `start_path`, this function recursively
+    checks if `target_sub_path` exists in the current folder. If not, it walks up the 
+    parent directory tree up to 10 levels. This is crucial for resolving paths relative 
+    to import-specific golden directories when tests/validation are run from
+    different working directories (such as the repository root in CI/CD).
+
+    Args:
+        start_path: The file or directory path to start the upward search from.
+        target_sub_path: The name of the subdirectory or file (e.g., 'golden_data')
+            to search for within the parent tree.
+
+    Returns:
+        The absolute path of the directory containing `target_sub_path` if found,
+        or None if the root was reached or the 10-level limit was exceeded.
+    """
     if not start_path:
         return None
     curr = os.path.abspath(start_path)

From 14409c496c517d8ddac67a37a64b8225f172cfcb Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 8 Jun 2026 13:41:18 +0000
Subject: [PATCH 04/24] modified validation

---
 tools/import_validation/Validations.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/import_validation/Validations.md b/tools/import_validation/Validations.md
index 4efebb3a55..d46ece74fc 100644
--- a/tools/import_validation/Validations.md
+++ b/tools/import_validation/Validations.md
@@ -72,6 +72,8 @@ To generate goldens for the summary_report.csv to verify that all the expected
 StatVars are generated with the corresponding number of places and dates, run
 the following:
 
+This will compare the golden files using summary_report.csv as the default input:
+
 ```shell
     python3 validator_goldens.py \
       --validate_goldens_input=summary_report.csv \

From 194a3bc311a19c4e617c96a7234ac74d0771a1a2 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Tue, 9 Jun 2026 16:10:32 +0000
Subject: [PATCH 05/24] testing

---
 tools/import_validation/runner.py             |  9 +--
 tools/import_validation/runner_test.py        |  7 +--
 tools/import_validation/validator_goldens.py  | 59 +++++++------------
 .../validator_goldens_test.py                 | 19 ------
 4 files changed, 25 insertions(+), 69 deletions(-)

diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
index 0b33f7abb0..37ce44a5a6 100644
--- a/tools/import_validation/runner.py
+++ b/tools/import_validation/runner.py
@@ -39,9 +39,8 @@ def _is_relative_local(path_val: str) -> bool:
     """Checks if a path is a relative, local file path.
 
     This function identifies path strings that represent local relative files
-    (e.g., 'golden_data/un_wpp.csv') as opposed to absolute paths or remote/cloud 
-    URIs. It filters out non-strings, empty strings, absolute local paths,
-    Google Cloud Storage URIs (gs://), and web URLs (http://, https://).
+    (e.g., 'golden_data/un_wpp.csv') as opposed to absolute paths. It filters
+    out non-strings, empty strings, and absolute local paths.
 
     Args:
         path_val: The file path string to evaluate.
@@ -51,9 +50,7 @@ def _is_relative_local(path_val: str) -> bool:
     """
     if not isinstance(path_val, str) or not path_val:
         return False
-    return (not os.path.isabs(path_val) and not path_val.startswith('gs://') and
-            not path_val.startswith('http://') and
-            not path_val.startswith('https://'))
+    return not os.path.isabs(path_val)
 
 
 def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
diff --git a/tools/import_validation/runner_test.py b/tools/import_validation/runner_test.py
index 80e7508020..e1201eca69 100644
--- a/tools/import_validation/runner_test.py
+++ b/tools/import_validation/runner_test.py
@@ -405,9 +405,6 @@ def test_path_resolution_helpers_and_robustness(self, MockValidator):
         # Test helper functions
         self.assertTrue(_is_relative_local("some/relative/path.csv"))
         self.assertFalse(_is_relative_local("/absolute/path.csv"))
-        self.assertFalse(_is_relative_local("gs://bucket/path.csv"))
-        self.assertFalse(_is_relative_local("http://example.com/path.csv"))
-        self.assertFalse(_is_relative_local("https://example.com/path.csv"))
         self.assertFalse(_is_relative_local(None))
         self.assertFalse(_is_relative_local(123))
 
@@ -432,7 +429,7 @@ def test_path_resolution_helpers_and_robustness(self, MockValidator):
                             'output_file':
                                 'output/output_file.csv',
                             'custom_files': [
-                                'output/custom_file.csv', 'gs://skip/this.csv'
+                                'output/custom_file.csv', '/skip/this.csv'
                             ]
                         }
                     }]
@@ -465,5 +462,5 @@ def test_path_resolution_helpers_and_robustness(self, MockValidator):
             os.path.join(self.test_dir.name, 'output/output_file.csv'))
         self.assertEqual(rule_params['custom_files'], [
             os.path.join(self.test_dir.name, 'output/custom_file.csv'),
-            'gs://skip/this.csv'
+            '/skip/this.csv'
         ])
diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
index c6f3efe599..4f8d8b8b32 100644
--- a/tools/import_validation/validator_goldens.py
+++ b/tools/import_validation/validator_goldens.py
@@ -298,53 +298,20 @@ def load_nodes_from_file(files: str) -> dict:
             # Nodes are keyed by their index in the combined loaded set.
             file_nodes = file_util.file_load_csv_dict(input_file,
                                                       key_index=True)
-            # Check if any loaded node has None as a key, indicating an incorrect delimiter
-            # was auto-detected (e.g. splitting 'dcid:Earth' on colon ':')
-            has_delimiter_error = False
             for node in file_nodes.values():
-                if None in node:
-                    has_delimiter_error = True
-                    break
-
-            if has_delimiter_error:
-                import csv
-                with file_util.FileIO(input_file) as csvfile:
-                    rawdata = csvfile.read()
-                    if isinstance(rawdata, bytes):
-                        encoding = file_util.file_get_encoding(input_file)
-                        data_str = rawdata.decode(encoding)
-                    else:
-                        data_str = rawdata
-                    reader = csv.DictReader(data_str.splitlines(),
-                                            delimiter=',')
-                    file_nodes = {}
-                    for row in reader:
-                        file_nodes[len(file_nodes)] = dict(row)
-
-            for node in file_nodes.values():
-                # Clean up None/empty keys and strip whitespace from headers/keys and values to ensure robust parsing
-                cleaned_node = {
-                    k.strip(): (v.strip() if isinstance(v, str) else v)
-                    for k, v in node.items()
-                    if k is not None and isinstance(k, str) and k.strip() != ''
-                }
-                nodes[len(nodes)] = cleaned_node
+                nodes[len(nodes)] = node
         else:
             # For MCF or JSON, we assume nodes are already keyed by DCID.
             file_nodes = mcf_file_util.load_mcf_nodes(input_file)
             for dcid, node in file_nodes.items():
-                # Ensure the dcid is present in the node dictionary itself with a null-safe check.
+                # Ensure the dcid is present in the node dictionary itself.
                 if 'dcid' not in node:
-                    if dcid and isinstance(dcid, str):
-                        node['dcid'] = mcf_file_util.strip_namespace(dcid)
-                    else:
-                        node['dcid'] = ''
+                    node['dcid'] = mcf_file_util.strip_namespace(dcid)
                 mcf_file_util.add_mcf_node(node, nodes)
 
     logging.info(f'Loaded {len(nodes)} nodes from {input_files}')
     return nodes
 
-
 def generate_goldens(input_files: str,
                      property_sets: list,
                      output_file: str = None,
@@ -473,9 +440,23 @@ def generate_goldens(input_files: str,
     if golden_nodes and output_file:
         logging.info(f'Writing {len(golden_nodes)} goldens to {output_file}')
         if file_util.file_is_csv(output_file):
-            file_util.file_write_csv_dict(golden_nodes,
-                                          output_file,
-                                          key_column_name=None)
+            headers = []
+            for node in golden_nodes.values():
+                for prop in node.keys():
+                    if prop not in headers:
+                        headers.append(prop)
+            with file_util.FileIO(output_file, mode='w') as csvfile:
+                writer = csv.DictWriter(
+                    csvfile,
+                    fieldnames=headers,
+                    escapechar='\\',
+                    extrasaction='ignore',
+                    quotechar='"',
+                    quoting=csv.QUOTE_NONNUMERIC,
+                )
+                writer.writeheader()
+                for node in golden_nodes.values():
+                    writer.writerow(node)
         else:
             mcf_file_util.write_mcf_nodes([golden_nodes], output_file)
 
diff --git a/tools/import_validation/validator_goldens_test.py b/tools/import_validation/validator_goldens_test.py
index 6fa4ab09ac..e486a8fe91 100644
--- a/tools/import_validation/validator_goldens_test.py
+++ b/tools/import_validation/validator_goldens_test.py
@@ -190,25 +190,6 @@ def test_validate_goldens(self, mock_file, mock_compare, mock_load):
         self.assertEqual(missing, [])
         mock_compare.assert_called_once()
 
-    def test_load_nodes_from_file_with_colon_values(self):
-        import tempfile
-        # Create a temporary CSV file with values containing colons and no commas
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.csv',
-                                         delete=False) as f:
-            f.write("header1\n")
-            f.write("value:1\n")
-            f.write("value:2\n")
-            temp_name = f.name
-
-        try:
-            nodes = validator_goldens.load_nodes_from_file(temp_name)
-            # The nodes should be parsed using comma as delimiter, which correctly preserves values with colons
-            self.assertEqual(len(nodes), 2)
-            self.assertEqual(nodes[0], {'header1': 'value:1'})
-            self.assertEqual(nodes[1], {'header1': 'value:2'})
-        finally:
-            os.remove(temp_name)
-
 
 if __name__ == '__main__':
     unittest.main()

From aabebba716833239a6518b440260884ee24fcc33 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Tue, 9 Jun 2026 20:45:51 +0000
Subject: [PATCH 06/24] fix: strip dcid: namespace prefix from CSV values in
 golden loading

---
 tools/import_validation/validator_goldens.py  |  7 +++++-
 .../validator_goldens_test.py                 | 22 +++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
index 4f8d8b8b32..54653367a8 100644
--- a/tools/import_validation/validator_goldens.py
+++ b/tools/import_validation/validator_goldens.py
@@ -299,7 +299,12 @@ def load_nodes_from_file(files: str) -> dict:
             file_nodes = file_util.file_load_csv_dict(input_file,
                                                       key_index=True)
             for node in file_nodes.values():
-                nodes[len(nodes)] = node
+                # Clean up "dcid:" prefixes from values (column headers are kept as is)
+                clean_node = {}
+                for k, v in node.items():
+                    clean_val = v[5:] if (isinstance(v, str) and v.startswith("dcid:")) else v
+                    clean_node[k] = clean_val
+                nodes[len(nodes)] = clean_node
         else:
             # For MCF or JSON, we assume nodes are already keyed by DCID.
             file_nodes = mcf_file_util.load_mcf_nodes(input_file)
diff --git a/tools/import_validation/validator_goldens_test.py b/tools/import_validation/validator_goldens_test.py
index e486a8fe91..c2fa1b9ada 100644
--- a/tools/import_validation/validator_goldens_test.py
+++ b/tools/import_validation/validator_goldens_test.py
@@ -190,6 +190,28 @@ def test_validate_goldens(self, mock_file, mock_compare, mock_load):
         self.assertEqual(missing, [])
         mock_compare.assert_called_once()
 
+    @patch('validator_goldens.file_util')
+    def test_load_nodes_from_file_with_dcid_prefix_values(self, mock_file):
+        mock_file.file_get_matching.return_value = ['dummy.csv']
+        mock_file.file_is_csv.return_value = True
+        mock_file.file_load_csv_dict.return_value = {
+            0: {
+                'observationAbout': 'dcid:Earth',
+                'variableMeasured': 'dcid:Count_Person',
+                'value': '10'
+            }
+        }
+
+        nodes = validator_goldens.load_nodes_from_file('dummy.csv')
+
+        self.assertEqual(len(nodes), 1)
+        # Verify that prefixes 'dcid:' have been stripped from values only
+        self.assertIn('observationAbout', nodes[0])
+        self.assertEqual(nodes[0]['observationAbout'], 'Earth')
+        self.assertIn('variableMeasured', nodes[0])
+        self.assertEqual(nodes[0]['variableMeasured'], 'Count_Person')
+        self.assertEqual(nodes[0]['value'], '10')
+
 
 if __name__ == '__main__':
     unittest.main()

From 6e36dcb848bfd630e2ba53384b5455a4bdb638ce Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Wed, 10 Jun 2026 05:24:37 +0000
Subject: [PATCH 07/24] docs: add descriptive comments to CSV writer
 configuration

---
 tools/import_validation/validator_goldens.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
index 54653367a8..62030d489b 100644
--- a/tools/import_validation/validator_goldens.py
+++ b/tools/import_validation/validator_goldens.py
@@ -445,12 +445,19 @@ def generate_goldens(input_files: str,
     if golden_nodes and output_file:
         logging.info(f'Writing {len(golden_nodes)} goldens to {output_file}')
         if file_util.file_is_csv(output_file):
+            # Gather all unique column headers across all generated golden nodes.
+            # This ensures that even if some nodes have differing schema properties,
+            # every single column is fully represented in the CSV header.
             headers = []
             for node in golden_nodes.values():
                 for prop in node.keys():
                     if prop not in headers:
                         headers.append(prop)
             with file_util.FileIO(output_file, mode='w') as csvfile:
+                # Use standard-compliant csv.DictWriter with QUOTE_NONNUMERIC.
+                # This automatically wraps all string fields (e.g. text containing commas)
+                # in double quotes to prevent column misalignment/corruption, while leaving
+                # numbers (like float and int values) unquoted.
                 writer = csv.DictWriter(
                     csvfile,
                     fieldnames=headers,

From 1161f179694283ac1d1698267b92c92a3fdd737d Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Wed, 10 Jun 2026 05:36:27 +0000
Subject: [PATCH 08/24] testing

---
 tools/import_validation/runner_test.py       | 5 ++---
 tools/import_validation/validator_goldens.py | 4 +++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/import_validation/runner_test.py b/tools/import_validation/runner_test.py
index e1201eca69..ace339a81e 100644
--- a/tools/import_validation/runner_test.py
+++ b/tools/import_validation/runner_test.py
@@ -428,9 +428,8 @@ def test_path_resolution_helpers_and_robustness(self, MockValidator):
                                 'output/input_file.csv',
                             'output_file':
                                 'output/output_file.csv',
-                            'custom_files': [
-                                'output/custom_file.csv', '/skip/this.csv'
-                            ]
+                            'custom_files':
+                                ['output/custom_file.csv', '/skip/this.csv']
                         }
                     }]
                 }, f)
diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
index 62030d489b..0e78a334de 100644
--- a/tools/import_validation/validator_goldens.py
+++ b/tools/import_validation/validator_goldens.py
@@ -302,7 +302,8 @@ def load_nodes_from_file(files: str) -> dict:
                 # Clean up "dcid:" prefixes from values (column headers are kept as is)
                 clean_node = {}
                 for k, v in node.items():
-                    clean_val = v[5:] if (isinstance(v, str) and v.startswith("dcid:")) else v
+                    clean_val = v[5:] if (isinstance(v, str) and
+                                          v.startswith("dcid:")) else v
                     clean_node[k] = clean_val
                 nodes[len(nodes)] = clean_node
         else:
@@ -317,6 +318,7 @@ def load_nodes_from_file(files: str) -> dict:
     logging.info(f'Loaded {len(nodes)} nodes from {input_files}')
     return nodes
 
+
 def generate_goldens(input_files: str,
                      property_sets: list,
                      output_file: str = None,

From ddf793b4b485aaad8a65b1b56b75246934437e24 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Sun, 14 Jun 2026 11:36:04 +0000
Subject: [PATCH 09/24] testing

---
 tools/import_validation/validator_goldens.py | 32 +++-----------------
 util/file_util.py                            | 11 ++++---
 2 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
index 0e78a334de..478f3a7853 100644
--- a/tools/import_validation/validator_goldens.py
+++ b/tools/import_validation/validator_goldens.py
@@ -68,7 +68,7 @@
       --generate_goldens=goldens_data/generated_goldens.csv
 """
 
-import csv
+
 import os
 import sys
 import tempfile
@@ -302,8 +302,7 @@ def load_nodes_from_file(files: str) -> dict:
                 # Clean up "dcid:" prefixes from values (column headers are kept as is)
                 clean_node = {}
                 for k, v in node.items():
-                    clean_val = v[5:] if (isinstance(v, str) and
-                                          v.startswith("dcid:")) else v
+                    clean_val = v[5:] if (isinstance(v, str) and v.startswith("dcid:")) else v
                     clean_node[k] = clean_val
                 nodes[len(nodes)] = clean_node
         else:
@@ -447,30 +446,9 @@ def generate_goldens(input_files: str,
     if golden_nodes and output_file:
         logging.info(f'Writing {len(golden_nodes)} goldens to {output_file}')
         if file_util.file_is_csv(output_file):
-            # Gather all unique column headers across all generated golden nodes.
-            # This ensures that even if some nodes have differing schema properties,
-            # every single column is fully represented in the CSV header.
-            headers = []
-            for node in golden_nodes.values():
-                for prop in node.keys():
-                    if prop not in headers:
-                        headers.append(prop)
-            with file_util.FileIO(output_file, mode='w') as csvfile:
-                # Use standard-compliant csv.DictWriter with QUOTE_NONNUMERIC.
-                # This automatically wraps all string fields (e.g. text containing commas)
-                # in double quotes to prevent column misalignment/corruption, while leaving
-                # numbers (like float and int values) unquoted.
-                writer = csv.DictWriter(
-                    csvfile,
-                    fieldnames=headers,
-                    escapechar='\\',
-                    extrasaction='ignore',
-                    quotechar='"',
-                    quoting=csv.QUOTE_NONNUMERIC,
-                )
-                writer.writeheader()
-                for node in golden_nodes.values():
-                    writer.writerow(node)
+            file_util.file_write_csv_dict(golden_nodes,
+                                          output_file,
+                                          key_column_name=None)
         else:
             mcf_file_util.write_mcf_nodes([golden_nodes], output_file)
 
diff --git a/util/file_util.py b/util/file_util.py
index c255bc365a..7ff1c9c2f3 100644
--- a/util/file_util.py
+++ b/util/file_util.py
@@ -674,9 +674,12 @@ def file_write_csv_dict(py_dict: dict,
                     if col not in columns:
                         columns.append(col)
     if len(columns) == 1:
-        # Value is not a dict. Write it as a column name value.
-        value_column_name = 'value'
-        columns.append(value_column_name)
+        # Check if values are dicts. If they are, it's not a primitive value.
+        is_value_dict = any(isinstance(value, dict) for value in py_dict.values())
+        if not is_value_dict:
+            # Value is not a dict. Write it as a column name value.
+            value_column_name = 'value'
+            columns.append(value_column_name)
     # Use the first column for the key.
     if key_column_name == '':
         key_column_name = columns[0]
@@ -1052,7 +1055,7 @@ def file_get_csv_reader_options(
         default_options: dict = {},
         data: str = None,
         encoding: str = None,
-        delim_chars: list = [',', '	', ';', '|', ':']) -> dict:
+        delim_chars: list = [',', ' ', ';', '|', ':']) -> dict:
     """Returns a dictionary with options for the CSV file reader.
 
     Args:

From 5315183f2f6fec2a6075aa958e1a9c1e31ec497b Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Sun, 14 Jun 2026 11:40:49 +0000
Subject: [PATCH 10/24] revert: undo changes to validator_goldens_test.py

---
 .../validator_goldens_test.py                 | 22 -------------------
 1 file changed, 22 deletions(-)

diff --git a/tools/import_validation/validator_goldens_test.py b/tools/import_validation/validator_goldens_test.py
index c2fa1b9ada..e486a8fe91 100644
--- a/tools/import_validation/validator_goldens_test.py
+++ b/tools/import_validation/validator_goldens_test.py
@@ -190,28 +190,6 @@ def test_validate_goldens(self, mock_file, mock_compare, mock_load):
         self.assertEqual(missing, [])
         mock_compare.assert_called_once()
 
-    @patch('validator_goldens.file_util')
-    def test_load_nodes_from_file_with_dcid_prefix_values(self, mock_file):
-        mock_file.file_get_matching.return_value = ['dummy.csv']
-        mock_file.file_is_csv.return_value = True
-        mock_file.file_load_csv_dict.return_value = {
-            0: {
-                'observationAbout': 'dcid:Earth',
-                'variableMeasured': 'dcid:Count_Person',
-                'value': '10'
-            }
-        }
-
-        nodes = validator_goldens.load_nodes_from_file('dummy.csv')
-
-        self.assertEqual(len(nodes), 1)
-        # Verify that prefixes 'dcid:' have been stripped from values only
-        self.assertIn('observationAbout', nodes[0])
-        self.assertEqual(nodes[0]['observationAbout'], 'Earth')
-        self.assertIn('variableMeasured', nodes[0])
-        self.assertEqual(nodes[0]['variableMeasured'], 'Count_Person')
-        self.assertEqual(nodes[0]['value'], '10')
-
 
 if __name__ == '__main__':
     unittest.main()

From 8d80c46037580678fae7f18cefaa9f5980aa1795 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Sun, 14 Jun 2026 11:50:35 +0000
Subject: [PATCH 11/24] style: clean up whitespace in validator_goldens.py

---
 tools/import_validation/validator_goldens.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
index 478f3a7853..c8ba12f44e 100644
--- a/tools/import_validation/validator_goldens.py
+++ b/tools/import_validation/validator_goldens.py
@@ -68,7 +68,6 @@
       --generate_goldens=goldens_data/generated_goldens.csv
 """
 
-
 import os
 import sys
 import tempfile

From c3eff23b6ff5d6e2c10bcb4002655ace53a66384 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Sun, 14 Jun 2026 11:51:14 +0000
Subject: [PATCH 12/24] refactor: reduce base directory search depth limit to 8
 in runner.py

---
 tools/import_validation/runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
index 37ce44a5a6..a6c588faf9 100644
--- a/tools/import_validation/runner.py
+++ b/tools/import_validation/runner.py
@@ -74,7 +74,7 @@ def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
     if not start_path:
         return None
     curr = os.path.abspath(start_path)
-    for _ in range(10):  # limit to 10 levels up
+    for _ in range(8):  # limit to 8 levels up
         if os.path.exists(os.path.join(curr, target_sub_path)):
             return curr
         parent = os.path.dirname(curr)

From cab980d97d9113848c7bd26e504afb23899590d6 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Sun, 14 Jun 2026 11:57:41 +0000
Subject: [PATCH 13/24] revert: restore tab character in delim_chars in
 file_util.py

---
 util/file_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/file_util.py b/util/file_util.py
index 7ff1c9c2f3..9c0f0d744f 100644
--- a/util/file_util.py
+++ b/util/file_util.py
@@ -1055,7 +1055,7 @@ def file_get_csv_reader_options(
         default_options: dict = {},
         data: str = None,
         encoding: str = None,
-        delim_chars: list = [',', ' ', ';', '|', ':']) -> dict:
+        delim_chars: list = [',', '\t', ';', '|', ':']) -> dict:
     """Returns a dictionary with options for the CSV file reader.
 
     Args:

From 2701c54e0ede24141c8965039355d4f610a199e6 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Sun, 14 Jun 2026 12:00:59 +0000
Subject: [PATCH 14/24] style: restore literal tab character in delim_chars in
 file_util.py

---
 util/file_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/file_util.py b/util/file_util.py
index 9c0f0d744f..1e57a4c7ef 100644
--- a/util/file_util.py
+++ b/util/file_util.py
@@ -1055,7 +1055,7 @@ def file_get_csv_reader_options(
         default_options: dict = {},
         data: str = None,
         encoding: str = None,
-        delim_chars: list = [',', '\t', ';', '|', ':']) -> dict:
+        delim_chars: list = [',', '	', ';', '|', ':']) -> dict:
     """Returns a dictionary with options for the CSV file reader.
 
     Args:

From b3d2053453359d21304341eb16e1f8b2a54b1f31 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Sun, 14 Jun 2026 17:47:20 +0000
Subject: [PATCH 15/24] testing

---
 tools/import_validation/validator_goldens.py | 3 ++-
 util/file_util.py                            | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
index c8ba12f44e..b8f0b7f53b 100644
--- a/tools/import_validation/validator_goldens.py
+++ b/tools/import_validation/validator_goldens.py
@@ -301,7 +301,8 @@ def load_nodes_from_file(files: str) -> dict:
                 # Clean up "dcid:" prefixes from values (column headers are kept as is)
                 clean_node = {}
                 for k, v in node.items():
-                    clean_val = v[5:] if (isinstance(v, str) and v.startswith("dcid:")) else v
+                    clean_val = v[5:] if (isinstance(v, str) and
+                                          v.startswith("dcid:")) else v
                     clean_node[k] = clean_val
                 nodes[len(nodes)] = clean_node
         else:
diff --git a/util/file_util.py b/util/file_util.py
index 1e57a4c7ef..fae21b6c72 100644
--- a/util/file_util.py
+++ b/util/file_util.py
@@ -675,7 +675,8 @@ def file_write_csv_dict(py_dict: dict,
                         columns.append(col)
     if len(columns) == 1:
         # Check if values are dicts. If they are, it's not a primitive value.
-        is_value_dict = any(isinstance(value, dict) for value in py_dict.values())
+        is_value_dict = any(
+            isinstance(value, dict) for value in py_dict.values())
         if not is_value_dict:
             # Value is not a dict. Write it as a column name value.
             value_column_name = 'value'

From 8f7e9e71953159d2a68c9d2e728d22e1bda9e574 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Sun, 14 Jun 2026 18:18:45 +0000
Subject: [PATCH 16/24] testing

---
 tools/import_validation/runner.py            | 5 ++---
 tools/import_validation/validator_goldens.py | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
index a6c588faf9..6e6a61d089 100644
--- a/tools/import_validation/runner.py
+++ b/tools/import_validation/runner.py
@@ -14,7 +14,6 @@
 """Module for the ValidationRunner class."""
 
 import os
-import logging
 from absl import app
 from absl import flags
 from absl import logging
@@ -58,7 +57,7 @@ def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
 
     Starting from the absolute directory of `start_path`, this function recursively
     checks if `target_sub_path` exists in the current folder. If not, it walks up the 
-    parent directory tree up to 10 levels. This is crucial for resolving paths relative 
+    parent directory tree up to 8 levels. This is crucial for resolving paths relative 
     to import-specific golden directories when tests/validation are run from
     different working directories (such as the repository root in CI/CD).
 
@@ -69,7 +68,7 @@ def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
 
     Returns:
         The absolute path of the directory containing `target_sub_path` if found,
-        or None if the root was reached or the 10-level limit was exceeded.
+        or None if the root was reached or the 8-level limit was exceeded.
     """
     if not start_path:
         return None
diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
index b8f0b7f53b..48e2937968 100644
--- a/tools/import_validation/validator_goldens.py
+++ b/tools/import_validation/validator_goldens.py
@@ -301,8 +301,7 @@ def load_nodes_from_file(files: str) -> dict:
                 # Clean up "dcid:" prefixes from values (column headers are kept as is)
                 clean_node = {}
                 for k, v in node.items():
-                    clean_val = v[5:] if (isinstance(v, str) and
-                                          v.startswith("dcid:")) else v
+                    clean_val = v.removeprefix("dcid:") if isinstance(v, str) else v
                     clean_node[k] = clean_val
                 nodes[len(nodes)] = clean_node
         else:

From 298cc57f3bee7311621cd5edae1dce41ec3ca971 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 15 Jun 2026 07:01:34 +0000
Subject: [PATCH 17/24] testing

---
 tools/import_validation/runner.py      | 51 +-------------------------
 tools/import_validation/runner_test.py |  2 +-
 tools/import_validation/util.py        | 50 +++++++++++++++++++++++++
 util/file_util.py                      |  6 +--
 4 files changed, 54 insertions(+), 55 deletions(-)

diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
index 6e6a61d089..8517753f2a 100644
--- a/tools/import_validation/runner.py
+++ b/tools/import_validation/runner.py
@@ -29,60 +29,11 @@
 from report_generator import ReportGenerator
 from validator import Validator
 from result import ValidationResult, ValidationStatus
-from util import filter_dataframe
+from util import filter_dataframe, _is_relative_local, _find_base_dir
 
 _FLAGS = flags.FLAGS
 
 
-def _is_relative_local(path_val: str) -> bool:
-    """Checks if a path is a relative, local file path.
-
-    This function identifies path strings that represent local relative files
-    (e.g., 'golden_data/un_wpp.csv') as opposed to absolute paths. It filters
-    out non-strings, empty strings, and absolute local paths.
-
-    Args:
-        path_val: The file path string to evaluate.
-
-    Returns:
-        True if the path represents a relative, local file path; False otherwise.
-    """
-    if not isinstance(path_val, str) or not path_val:
-        return False
-    return not os.path.isabs(path_val)
-
-
-def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
-    """Helper to find a base directory containing a target sub-path by walking up.
-
-    Starting from the absolute directory of `start_path`, this function recursively
-    checks if `target_sub_path` exists in the current folder. If not, it walks up the 
-    parent directory tree up to 8 levels. This is crucial for resolving paths relative 
-    to import-specific golden directories when tests/validation are run from
-    different working directories (such as the repository root in CI/CD).
-
-    Args:
-        start_path: The file or directory path to start the upward search from.
-        target_sub_path: The name of the subdirectory or file (e.g., 'golden_data')
-            to search for within the parent tree.
-
-    Returns:
-        The absolute path of the directory containing `target_sub_path` if found,
-        or None if the root was reached or the 8-level limit was exceeded.
-    """
-    if not start_path:
-        return None
-    curr = os.path.abspath(start_path)
-    for _ in range(8):  # limit to 8 levels up
-        if os.path.exists(os.path.join(curr, target_sub_path)):
-            return curr
-        parent = os.path.dirname(curr)
-        if parent == curr:
-            break
-        curr = parent
-    return None
-
-
 class ValidationRunner:
     """
   Orchestrates the validation process based on the new schema.
diff --git a/tools/import_validation/runner_test.py b/tools/import_validation/runner_test.py
index ace339a81e..690e6dd9ef 100644
--- a/tools/import_validation/runner_test.py
+++ b/tools/import_validation/runner_test.py
@@ -400,7 +400,7 @@ def test_runner_resolves_goldens_check_paths(self, MockValidator):
     @patch('tools.import_validation.runner.Validator')
     def test_path_resolution_helpers_and_robustness(self, MockValidator):
         """Tests that relative local path helpers work and robust path resolution handles ends-with keys."""
-        from tools.import_validation.runner import _is_relative_local, _find_base_dir
+        from tools.import_validation.util import _is_relative_local, _find_base_dir
 
         # Test helper functions
         self.assertTrue(_is_relative_local("some/relative/path.csv"))
diff --git a/tools/import_validation/util.py b/tools/import_validation/util.py
index c26675eb8e..ca2f8d9ddb 100644
--- a/tools/import_validation/util.py
+++ b/tools/import_validation/util.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 """Utility functions for the import validation tool."""
 
+import os
 import pandas as pd
 import re
 
@@ -108,3 +109,52 @@ def filter_dataframe(df: pd.DataFrame,
         matching_indices.update(df[combined_condition].index)
 
     return df.loc[sorted(list(matching_indices))]
+
+
+def _is_relative_local(path_val: str) -> bool:
+    """Checks if a path is a relative, local file path.
+
+    This function identifies path strings that represent local relative files
+    (e.g., 'golden_data/un_wpp.csv') as opposed to absolute paths. It filters
+    out non-strings, empty strings, and absolute local paths.
+
+    Args:
+        path_val: The file path string to evaluate.
+
+    Returns:
+        True if the path represents a relative, local file path; False otherwise.
+    """
+    if not isinstance(path_val, str) or not path_val:
+        return False
+    return not os.path.isabs(path_val)
+
+
+def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
+    """Helper to find a base directory containing a target sub-path by walking up.
+
+    Starting from the absolute directory of `start_path`, this function recursively
+    checks if `target_sub_path` exists in the current folder. If not, it walks up the 
+    parent directory tree up to 8 levels. This is crucial for resolving paths relative 
+    to import-specific golden directories when tests/validation are run from
+    different working directories (such as the repository root in CI/CD).
+
+    Args:
+        start_path: The file or directory path to start the upward search from.
+        target_sub_path: The name of the subdirectory or file (e.g., 'golden_data')
+            to search for within the parent tree.
+
+    Returns:
+        The absolute path of the directory containing `target_sub_path` if found,
+        or None if the root was reached or the 8-level limit was exceeded.
+    """
+    if not start_path:
+        return None
+    curr = os.path.abspath(start_path)
+    for _ in range(8):  # limit to 8 levels up
+        if os.path.exists(os.path.join(curr, target_sub_path)):
+            return curr
+        parent = os.path.dirname(curr)
+        if parent == curr:
+            break
+        curr = parent
+    return None
diff --git a/util/file_util.py b/util/file_util.py
index fae21b6c72..0371a67adc 100644
--- a/util/file_util.py
+++ b/util/file_util.py
@@ -619,7 +619,7 @@ def file_write_csv_dict(py_dict: dict,
                         filename: str,
                         columns: list = None,
                         key_column_name: str = 'key') -> list:
-    """Returns the filename after writing py_dict with a csv row per item.
+    """Returns the list of columns after writing py_dict with a csv row per item.
 
   Each dictionary items is written as a row in the CSV file.
 
@@ -675,9 +675,7 @@ def file_write_csv_dict(py_dict: dict,
                         columns.append(col)
     if len(columns) == 1:
         # Check if values are dicts. If they are, it's not a primitive value.
-        is_value_dict = any(
-            isinstance(value, dict) for value in py_dict.values())
-        if not is_value_dict:
+        if not any(isinstance(value, dict) for value in py_dict.values()):
             # Value is not a dict. Write it as a column name value.
             value_column_name = 'value'
             columns.append(value_column_name)

From a889f0e57dd27291c5aed70a1c17a2e50d92bc9a Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 15 Jun 2026 07:10:27 +0000
Subject: [PATCH 18/24] revert: restore original runner_test.py to remove it
 from PR

---
 tools/import_validation/runner_test.py | 113 -------------------------
 1 file changed, 113 deletions(-)

diff --git a/tools/import_validation/runner_test.py b/tools/import_validation/runner_test.py
index 690e6dd9ef..76c5bc1d0b 100644
--- a/tools/import_validation/runner_test.py
+++ b/tools/import_validation/runner_test.py
@@ -350,116 +350,3 @@ def test_init_raises_error_if_required_file_is_missing(self):
                 lint_report=self.report_path,
                 validation_output=self.output_path)
         self.assertIn("'stats' data source", str(context.exception))
-
-    @patch('tools.import_validation.runner.Validator')
-    def test_runner_resolves_goldens_check_paths(self, MockValidator):
-        """Tests that GOLDENS_CHECK validator resolves relative paths correctly."""
-        # 1. Setup the mock
-        mock_validator_instance = MockValidator.return_value
-        mock_validator_instance.validate_goldens.return_value = ValidationResult(
-            ValidationStatus.PASSED, 'GOLDENS_CHECK')
-
-        # 2. Create the config file with relative paths
-        with open(self.config_path, 'w') as f:
-            json.dump(
-                {
-                    'rules': [{
-                        'rule_id': 'check_goldens_output_csv',
-                        'validator': 'GOLDENS_CHECK',
-                        'scope': {},
-                        'params': {
-                            'golden_files': 'golden_data/golden_file.csv',
-                            'input_files': 'output/input_file.csv'
-                        }
-                    }]
-                }, f)
-        pd.DataFrame({'StatVar': ['sv1']}).to_csv(self.stats_path, index=False)
-
-        # 3. Setup a dummy golden_data folder inside the temp dir to trigger base dir resolution
-        golden_dir = os.path.join(self.test_dir.name, 'golden_data')
-        os.makedirs(golden_dir, exist_ok=True)
-
-        runner = ValidationRunner(validation_config_path=self.config_path,
-                                  stats_summary=self.stats_path,
-                                  differ_output=self.differ_path,
-                                  lint_report=self.report_path,
-                                  validation_output=self.output_path)
-        runner.run_validations()
-
-        # 4. Assert that validate_goldens was called with resolved paths
-        mock_validator_instance.validate_goldens.assert_called_once()
-        args, kwargs = mock_validator_instance.validate_goldens.call_args
-        rule_params = args[1]
-        expected_golden = os.path.join(self.test_dir.name,
-                                       'golden_data/golden_file.csv')
-        expected_input = os.path.join(self.test_dir.name,
-                                      'output/input_file.csv')
-        self.assertEqual(rule_params['golden_files'], expected_golden)
-        self.assertEqual(rule_params['input_files'], expected_input)
-
-    @patch('tools.import_validation.runner.Validator')
-    def test_path_resolution_helpers_and_robustness(self, MockValidator):
-        """Tests that relative local path helpers work and robust path resolution handles ends-with keys."""
-        from tools.import_validation.util import _is_relative_local, _find_base_dir
-
-        # Test helper functions
-        self.assertTrue(_is_relative_local("some/relative/path.csv"))
-        self.assertFalse(_is_relative_local("/absolute/path.csv"))
-        self.assertFalse(_is_relative_local(None))
-        self.assertFalse(_is_relative_local(123))
-
-        # Setup mock validator
-        mock_validator_instance = MockValidator.return_value
-        mock_validator_instance.validate_goldens.return_value = ValidationResult(
-            ValidationStatus.PASSED, 'GOLDENS_CHECK')
-
-        # Create config file with various parameter keys (like output_file) that contain paths
-        with open(self.config_path, 'w') as f:
-            json.dump(
-                {
-                    'rules': [{
-                        'rule_id': 'check_goldens_output_csv',
-                        'validator': 'GOLDENS_CHECK',
-                        'scope': {},
-                        'params': {
-                            'golden_files':
-                                'golden_data/golden_file.csv',
-                            'input_files':
-                                'output/input_file.csv',
-                            'output_file':
-                                'output/output_file.csv',
-                            'custom_files':
-                                ['output/custom_file.csv', '/skip/this.csv']
-                        }
-                    }]
-                }, f)
-        pd.DataFrame({'StatVar': ['sv1']}).to_csv(self.stats_path, index=False)
-
-        # Setup dummy golden_data folder
-        golden_dir = os.path.join(self.test_dir.name, 'golden_data')
-        os.makedirs(golden_dir, exist_ok=True)
-
-        runner = ValidationRunner(validation_config_path=self.config_path,
-                                  stats_summary=self.stats_path,
-                                  differ_output=self.differ_path,
-                                  lint_report=self.report_path,
-                                  validation_output=self.output_path)
-        runner.run_validations()
-
-        # Check path resolution for the parameters
-        args, kwargs = mock_validator_instance.validate_goldens.call_args
-        rule_params = args[1]
-
-        self.assertEqual(
-            rule_params['golden_files'],
-            os.path.join(self.test_dir.name, 'golden_data/golden_file.csv'))
-        self.assertEqual(
-            rule_params['input_files'],
-            os.path.join(self.test_dir.name, 'output/input_file.csv'))
-        self.assertEqual(
-            rule_params['output_file'],
-            os.path.join(self.test_dir.name, 'output/output_file.csv'))
-        self.assertEqual(rule_params['custom_files'], [
-            os.path.join(self.test_dir.name, 'output/custom_file.csv'),
-            '/skip/this.csv'
-        ])

From 44bdc78251c2d8b854d1b43c91a2f4b5ab3d16a5 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 15 Jun 2026 10:01:20 +0000
Subject: [PATCH 19/24] refactor: inline path resolution in runner.py and
 remove util.py from PR

---
 tools/import_validation/runner.py | 26 +++++++---------
 tools/import_validation/util.py   | 49 -------------------------------
 2 files changed, 11 insertions(+), 64 deletions(-)

diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
index 8517753f2a..a852ff812d 100644
--- a/tools/import_validation/runner.py
+++ b/tools/import_validation/runner.py
@@ -29,7 +29,7 @@
 from report_generator import ReportGenerator
 from validator import Validator
 from result import ValidationResult, ValidationStatus
-from util import filter_dataframe, _is_relative_local, _find_base_dir
+from util import filter_dataframe
 
 _FLAGS = flags.FLAGS
 
@@ -216,19 +216,15 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
 
                 # Resolve paths relative to the directory of the validation config.
                 if validator_name == 'GOLDENS_CHECK':
-                    config_dir = None
-                    # Walk up from validation_config_path, self.stats_summary, or CWD to find where 'golden_data' lives
-                    for start in [
-                            self.validation_config_path, self.stats_summary,
-                            os.getcwd()
-                    ]:
-                        config_dir = _find_base_dir(start, 'golden_data')
-                        if config_dir:
+                    config_dir = os.path.dirname(
+                        os.path.abspath(self.validation_config_path))
+                    # We walk up to find where the golden_data folder is situated.
+                    curr = config_dir
+                    while curr and curr != os.path.dirname(curr):
+                        if os.path.exists(os.path.join(curr, 'golden_data')):
+                            config_dir = curr
                             break
-
-                    if not config_dir:
-                        config_dir = os.path.dirname(
-                            os.path.abspath(self.validation_config_path))
+                        curr = os.path.dirname(curr)
 
                     print(
                         f"DEBUG: Found GOLDENS_CHECK rule: '{rule.get('rule_id')}'"
@@ -245,13 +241,13 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
                             print(
                                 f"DEBUG: Before resolve '{path_key}': '{val}'")
                             if isinstance(val, str):
-                                if _is_relative_local(val):
+                                if val and not os.path.isabs(val):
                                     rule_params[path_key] = os.path.join(
                                         config_dir, val)
                             elif isinstance(val, list):
                                 rule_params[path_key] = [
                                     os.path.join(config_dir, item)
-                                    if _is_relative_local(item) else item
+                                    if isinstance(item, str) and item and not os.path.isabs(item) else item
                                     for item in val
                                 ]
                             print(
diff --git a/tools/import_validation/util.py b/tools/import_validation/util.py
index ca2f8d9ddb..7cd5ecc6da 100644
--- a/tools/import_validation/util.py
+++ b/tools/import_validation/util.py
@@ -109,52 +109,3 @@ def filter_dataframe(df: pd.DataFrame,
         matching_indices.update(df[combined_condition].index)
 
     return df.loc[sorted(list(matching_indices))]
-
-
-def _is_relative_local(path_val: str) -> bool:
-    """Checks if a path is a relative, local file path.
-
-    This function identifies path strings that represent local relative files
-    (e.g., 'golden_data/un_wpp.csv') as opposed to absolute paths. It filters
-    out non-strings, empty strings, and absolute local paths.
-
-    Args:
-        path_val: The file path string to evaluate.
-
-    Returns:
-        True if the path represents a relative, local file path; False otherwise.
-    """
-    if not isinstance(path_val, str) or not path_val:
-        return False
-    return not os.path.isabs(path_val)
-
-
-def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
-    """Helper to find a base directory containing a target sub-path by walking up.
-
-    Starting from the absolute directory of `start_path`, this function recursively
-    checks if `target_sub_path` exists in the current folder. If not, it walks up the 
-    parent directory tree up to 8 levels. This is crucial for resolving paths relative 
-    to import-specific golden directories when tests/validation are run from
-    different working directories (such as the repository root in CI/CD).
-
-    Args:
-        start_path: The file or directory path to start the upward search from.
-        target_sub_path: The name of the subdirectory or file (e.g., 'golden_data')
-            to search for within the parent tree.
-
-    Returns:
-        The absolute path of the directory containing `target_sub_path` if found,
-        or None if the root was reached or the 8-level limit was exceeded.
-    """
-    if not start_path:
-        return None
-    curr = os.path.abspath(start_path)
-    for _ in range(8):  # limit to 8 levels up
-        if os.path.exists(os.path.join(curr, target_sub_path)):
-            return curr
-        parent = os.path.dirname(curr)
-        if parent == curr:
-            break
-        curr = parent
-    return None

From 800087ebde257dd48b7a96618b5ed46dc14d7b10 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 15 Jun 2026 10:05:07 +0000
Subject: [PATCH 20/24] feat: add path-resolution helper functions to util.py

---
 tools/import_validation/util.py | 49 +++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/tools/import_validation/util.py b/tools/import_validation/util.py
index 7cd5ecc6da..ca2f8d9ddb 100644
--- a/tools/import_validation/util.py
+++ b/tools/import_validation/util.py
@@ -109,3 +109,52 @@ def filter_dataframe(df: pd.DataFrame,
         matching_indices.update(df[combined_condition].index)
 
     return df.loc[sorted(list(matching_indices))]
+
+
+def _is_relative_local(path_val: str) -> bool:
+    """Checks if a path is a relative, local file path.
+
+    This function identifies path strings that represent local relative files
+    (e.g., 'golden_data/un_wpp.csv') as opposed to absolute paths. It filters
+    out non-strings, empty strings, and absolute local paths.
+
+    Args:
+        path_val: The file path string to evaluate.
+
+    Returns:
+        True if the path represents a relative, local file path; False otherwise.
+    """
+    if not isinstance(path_val, str) or not path_val:
+        return False
+    return not os.path.isabs(path_val)
+
+
+def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
+    """Helper to find a base directory containing a target sub-path by walking up.
+
+    Starting from the absolute directory of `start_path`, this function recursively
+    checks if `target_sub_path` exists in the current folder. If not, it walks up the 
+    parent directory tree up to 8 levels. This is crucial for resolving paths relative 
+    to import-specific golden directories when tests/validation are run from
+    different working directories (such as the repository root in CI/CD).
+
+    Args:
+        start_path: The file or directory path to start the upward search from.
+        target_sub_path: The name of the subdirectory or file (e.g., 'golden_data')
+            to search for within the parent tree.
+
+    Returns:
+        The absolute path of the directory containing `target_sub_path` if found,
+        or None if the root was reached or the 8-level limit was exceeded.
+    """
+    if not start_path:
+        return None
+    curr = os.path.abspath(start_path)
+    for _ in range(8):  # limit to 8 levels up
+        if os.path.exists(os.path.join(curr, target_sub_path)):
+            return curr
+        parent = os.path.dirname(curr)
+        if parent == curr:
+            break
+        curr = parent
+    return None

From 41ee2bdabf05be68520005d5923cbd87440a8ed0 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 15 Jun 2026 10:10:47 +0000
Subject: [PATCH 21/24] testing

---
 tools/import_validation/util.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/import_validation/util.py b/tools/import_validation/util.py
index ca2f8d9ddb..74d970c6a5 100644
--- a/tools/import_validation/util.py
+++ b/tools/import_validation/util.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 """Utility functions for the import validation tool."""
 
-import os
 import pandas as pd
 import re
 

From 8958e7e1e6949aeb566247c164c64637b4a8654e Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 15 Jun 2026 10:12:19 +0000
Subject: [PATCH 22/24] testing

---
 tools/import_validation/util.py | 51 +--------------------------------
 1 file changed, 1 insertion(+), 50 deletions(-)

diff --git a/tools/import_validation/util.py b/tools/import_validation/util.py
index 74d970c6a5..5d1a1703ae 100644
--- a/tools/import_validation/util.py
+++ b/tools/import_validation/util.py
@@ -107,53 +107,4 @@ def filter_dataframe(df: pd.DataFrame,
                                                              case=False)
         matching_indices.update(df[combined_condition].index)
 
-    return df.loc[sorted(list(matching_indices))]
-
-
-def _is_relative_local(path_val: str) -> bool:
-    """Checks if a path is a relative, local file path.
-
-    This function identifies path strings that represent local relative files
-    (e.g., 'golden_data/un_wpp.csv') as opposed to absolute paths. It filters
-    out non-strings, empty strings, and absolute local paths.
-
-    Args:
-        path_val: The file path string to evaluate.
-
-    Returns:
-        True if the path represents a relative, local file path; False otherwise.
-    """
-    if not isinstance(path_val, str) or not path_val:
-        return False
-    return not os.path.isabs(path_val)
-
-
-def _find_base_dir(start_path: str, target_sub_path: str) -> str | None:
-    """Helper to find a base directory containing a target sub-path by walking up.
-
-    Starting from the absolute directory of `start_path`, this function recursively
-    checks if `target_sub_path` exists in the current folder. If not, it walks up the 
-    parent directory tree up to 8 levels. This is crucial for resolving paths relative 
-    to import-specific golden directories when tests/validation are run from
-    different working directories (such as the repository root in CI/CD).
-
-    Args:
-        start_path: The file or directory path to start the upward search from.
-        target_sub_path: The name of the subdirectory or file (e.g., 'golden_data')
-            to search for within the parent tree.
-
-    Returns:
-        The absolute path of the directory containing `target_sub_path` if found,
-        or None if the root was reached or the 8-level limit was exceeded.
-    """
-    if not start_path:
-        return None
-    curr = os.path.abspath(start_path)
-    for _ in range(8):  # limit to 8 levels up
-        if os.path.exists(os.path.join(curr, target_sub_path)):
-            return curr
-        parent = os.path.dirname(curr)
-        if parent == curr:
-            break
-        curr = parent
-    return None
+    return df.loc[sorted(list(matching_indices))]
\ No newline at end of file

From 14b56309995a5c25a2a9a125883f1e8d321de899 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 15 Jun 2026 10:13:54 +0000
Subject: [PATCH 23/24] testing

---
 tools/import_validation/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/import_validation/util.py b/tools/import_validation/util.py
index 5d1a1703ae..c26675eb8e 100644
--- a/tools/import_validation/util.py
+++ b/tools/import_validation/util.py
@@ -107,4 +107,4 @@ def filter_dataframe(df: pd.DataFrame,
                                                              case=False)
         matching_indices.update(df[combined_condition].index)
 
-    return df.loc[sorted(list(matching_indices))]
\ No newline at end of file
+    return df.loc[sorted(list(matching_indices))]

From d0fdae5d4db7f3391ce84348eedc802ed26824b1 Mon Sep 17 00:00:00 2001
From: Nivedita Singh <niveditasing@google.com>
Date: Mon, 15 Jun 2026 11:48:32 +0000
Subject: [PATCH 24/24] testing

---
 tools/import_validation/runner.py            | 3 ++-
 tools/import_validation/validator_goldens.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
index a852ff812d..964ed98285 100644
--- a/tools/import_validation/runner.py
+++ b/tools/import_validation/runner.py
@@ -247,7 +247,8 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
                             elif isinstance(val, list):
                                 rule_params[path_key] = [
                                     os.path.join(config_dir, item)
-                                    if isinstance(item, str) and item and not os.path.isabs(item) else item
+                                    if isinstance(item, str) and item and
+                                    not os.path.isabs(item) else item
                                     for item in val
                                 ]
                             print(
diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
index 48e2937968..82c1781ad9 100644
--- a/tools/import_validation/validator_goldens.py
+++ b/tools/import_validation/validator_goldens.py
@@ -301,7 +301,8 @@ def load_nodes_from_file(files: str) -> dict:
                 # Clean up "dcid:" prefixes from values (column headers are kept as is)
                 clean_node = {}
                 for k, v in node.items():
-                    clean_val = v.removeprefix("dcid:") if isinstance(v, str) else v
+                    clean_val = v.removeprefix("dcid:") if isinstance(
+                        v, str) else v
                     clean_node[k] = clean_val
                 nodes[len(nodes)] = clean_node
         else: