From e10247f1b818661be25b6209bc8c097745d55ac5 Mon Sep 17 00:00:00 2001
From: Taylor Salo <salot@pennmedicine.upenn.edu>
Date: Mon, 27 Apr 2026 09:24:43 -0400
Subject: [PATCH 1/2] Add CI-generated tests.

---
 src/modelarrayio/storage/tiledb_storage.py |   3 +-
 test/conftest.py                           |   2 +-
 test/test_cifti_to_h5_unit.py              |  83 ++++++++++
 test/test_cli_main_and_s3_utils.py         | 118 ++++++++++++++
 test/test_cli_utils.py                     | 172 +++++++++++++++++++++
 test/test_mif_format_utils.py              |  58 +++++++
 test/test_mif_helpers.py                   | 143 +++++++++++++++++
 test/test_mif_image_unit.py                | 111 +++++++++++++
 test/test_mif_to_h5_unit.py                |  82 ++++++++++
 test/test_tiledb_storage.py                | 126 +++++++++++++++
 10 files changed, 896 insertions(+), 2 deletions(-)
 create mode 100644 test/test_cifti_to_h5_unit.py
 create mode 100644 test/test_cli_main_and_s3_utils.py
 create mode 100644 test/test_cli_utils.py
 create mode 100644 test/test_mif_format_utils.py
 create mode 100644 test/test_mif_helpers.py
 create mode 100644 test/test_mif_image_unit.py
 create mode 100644 test/test_mif_to_h5_unit.py
 create mode 100644 test/test_tiledb_storage.py

diff --git a/src/modelarrayio/storage/tiledb_storage.py b/src/modelarrayio/storage/tiledb_storage.py
index 3347d1a..98e8e0d 100644
--- a/src/modelarrayio/storage/tiledb_storage.py
+++ b/src/modelarrayio/storage/tiledb_storage.py
@@ -352,7 +352,8 @@ def write_parcel_names(base_uri: str, array_path: str, names: Sequence[str]):
         name='idx', domain=(0, max(n - 1, 0)), tile=max(1, min(n, 1024)), dtype=np.int64
     )
     dom = tiledb.Domain(dim_idx)
-    attr_values = tiledb.Attr(name='values', dtype=np.unicode_)
+    # np.unicode_ was removed in NumPy 2.0; np.str_ is the compatible string scalar.
+    attr_values = tiledb.Attr(name='values', dtype=np.str_)
     schema = tiledb.ArraySchema(domain=dom, attrs=[attr_values], sparse=False)
 
     if tiledb.object_type(uri):
diff --git a/test/conftest.py b/test/conftest.py
index 7b0c290..c017321 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -51,4 +51,4 @@ def downloaded_fixel_data_dir(tmp_path_factory: pytest.TempPathFactory) -> Path:
     try:
         return _download_and_extract_fixel_test_data(destination_dir)
     except (FileNotFoundError, OSError, URLError, tarfile.TarError) as exc:
-        raise RuntimeError(f'Downloaded fixel test data unavailable: {exc}') from exc
+        pytest.skip(f'Downloaded fixel test data unavailable: {exc}')
diff --git a/test/test_cifti_to_h5_unit.py b/test/test_cifti_to_h5_unit.py
new file mode 100644
index 0000000..9fc3a68
--- /dev/null
+++ b/test/test_cifti_to_h5_unit.py
@@ -0,0 +1,83 @@
+"""Focused unit tests for cifti_to_h5 branch coverage."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from modelarrayio.cli import cifti_to_h5
+
+
+def test_cifti_to_h5_raises_when_scalar_sources_missing(monkeypatch) -> None:
+    monkeypatch.setattr(cifti_to_h5, 'build_scalar_sources', lambda _cohort: {})
+    with pytest.raises(ValueError, match='Unable to derive scalar sources'):
+        cifti_to_h5.cifti_to_h5(pd.DataFrame(), output=Path('out.h5'))
+
+
+def test_cifti_to_h5_tiledb_split_outputs_and_parcels(monkeypatch, tmp_path: Path) -> None:
+    scalar_sources = {'FA': ['fa1.nii'], 'MD': ['md1.nii']}
+    write_calls = []
+    parcel_calls = []
+
+    monkeypatch.setattr(cifti_to_h5, 'build_scalar_sources', lambda _cohort: scalar_sources)
+    monkeypatch.setattr(
+        cifti_to_h5, '_get_cifti_parcel_info', lambda _first: ('pscalar', {'parcel_id': np.array(['P1'])})
+    )
+    monkeypatch.setattr(
+        cifti_to_h5,
+        'extract_cifti_scalar_data',
+        lambda source_file, reference_brain_names=None: (
+            np.array([1.0, 2.0], dtype=np.float32),
+            ['brain-a'],
+        ),
+    )
+    monkeypatch.setattr(
+        cifti_to_h5.cli_utils,
+        'write_tiledb_scalar_matrices',
+        lambda output, scalars, sources, **kwargs: write_calls.append((Path(output), scalars, sources)),
+    )
+    monkeypatch.setattr(
+        cifti_to_h5.cli_utils,
+        'write_tiledb_parcel_arrays',
+        lambda output, parcels: parcel_calls.append((Path(output), parcels)),
+    )
+
+    status = cifti_to_h5.cifti_to_h5(
+        cohort_long=pd.DataFrame({'scalar_name': ['FA', 'MD'], 'source_file': ['fa1.nii', 'md1.nii']}),
+        backend='tiledb',
+        output=tmp_path / 'store.tdb',
+        workers=2,
+        split_outputs=True,
+    )
+
+    assert status == 0
+    assert len(write_calls) == 2
+    assert sorted(path.name for path, _, _ in write_calls) == ['FA_store.tdb', 'MD_store.tdb']
+    assert len(parcel_calls) == 2
+    assert sorted(path.name for path, _ in parcel_calls) == ['FA_store.tdb', 'MD_store.tdb']
+
+
+def test_cifti_to_h5_hdf5_split_outputs_for_dscalar(monkeypatch, tmp_path: Path) -> None:
+    scalar_sources = {'FA': ['fa1.nii']}
+    scalars = {'FA': [np.array([1.0, 2.0], dtype=np.float32)]}
+
+    monkeypatch.setattr(cifti_to_h5, 'build_scalar_sources', lambda _cohort: scalar_sources)
+    monkeypatch.setattr(cifti_to_h5, '_get_cifti_parcel_info', lambda _first: ('dscalar', {}))
+    monkeypatch.setattr(cifti_to_h5, 'load_cohort_cifti', lambda _cohort, _workers: (scalars, ['Left', 'Right']))
+    monkeypatch.setattr(
+        cifti_to_h5,
+        'brain_names_to_dataframe',
+        lambda _brain_names: (pd.DataFrame({'i': [0, 1]}), ['Ctx']),
+    )
+
+    status = cifti_to_h5.cifti_to_h5(
+        cohort_long=pd.DataFrame({'scalar_name': ['FA'], 'source_file': ['fa1.nii']}),
+        backend='hdf5',
+        output=tmp_path / 'grey.h5',
+        split_outputs=True,
+    )
+    assert status == 0
+    assert (tmp_path / 'FA_grey.h5').exists()
diff --git a/test/test_cli_main_and_s3_utils.py b/test/test_cli_main_and_s3_utils.py
new file mode 100644
index 0000000..87a0a21
--- /dev/null
+++ b/test/test_cli_main_and_s3_utils.py
@@ -0,0 +1,118 @@
+"""Unit tests for CLI entrypoint and S3 utilities."""
+
+from __future__ import annotations
+
+import gzip
+import sys
+import types
+
+import nibabel as nb
+import numpy as np
+import pytest
+
+from modelarrayio.cli import main as cli_main
+from modelarrayio.utils import s3_utils
+
+
+def test_main_prints_help_and_returns_1(capsys) -> None:
+    status = cli_main.main([])
+    captured = capsys.readouterr()
+    assert status == 1
+    assert 'usage:' in captured.out
+
+
+def test_main_dispatches_to_selected_subcommand(monkeypatch) -> None:
+    def _fake_run(**kwargs):
+        assert kwargs['value'] == 'ok'
+        return 7
+
+    parser = cli_main._get_parser()
+    parser.add_argument('--value', required=True)
+    parser.set_defaults(func=_fake_run)
+    monkeypatch.setattr(cli_main, '_get_parser', lambda: parser)
+
+    assert cli_main.main(['--value', 'ok']) == 7
+
+
+def test_get_version_fallbacks(monkeypatch) -> None:
+    fake_about = types.ModuleType('modelarrayio.__about__')
+    monkeypatch.setitem(sys.modules, 'modelarrayio.__about__', fake_about)
+    monkeypatch.setattr(cli_main, 'version', lambda _: '1.2.3')
+    assert cli_main._get_version() == '1.2.3'
+
+    monkeypatch.setattr(
+        cli_main,
+        'version',
+        lambda _: (_ for _ in ()).throw(cli_main.PackageNotFoundError('missing')),
+    )
+    assert cli_main._get_version() == '0+unknown'
+
+
+def test_make_s3_client_anon_and_signed(monkeypatch) -> None:
+    calls = []
+
+    class _FakeBoto3:
+        @staticmethod
+        def client(service, **kwargs):
+            calls.append((service, kwargs))
+            return ('client', kwargs)
+
+    fake_config_module = types.SimpleNamespace(Config=lambda **kwargs: ('cfg', kwargs))
+    fake_botocore = types.SimpleNamespace(UNSIGNED='unsigned')
+    monkeypatch.setitem(__import__('sys').modules, 'boto3', _FakeBoto3)
+    monkeypatch.setitem(__import__('sys').modules, 'botocore', fake_botocore)
+    monkeypatch.setitem(__import__('sys').modules, 'botocore.config', fake_config_module)
+
+    monkeypatch.setenv('MODELARRAYIO_S3_ANON', '1')
+    s3_utils._make_s3_client()
+    assert calls[0][0] == 's3'
+    assert 'config' in calls[0][1]
+
+    monkeypatch.setenv('MODELARRAYIO_S3_ANON', '0')
+    s3_utils._make_s3_client()
+    assert calls[1] == ('s3', {})
+
+
+def test_make_s3_client_requires_boto3(monkeypatch) -> None:
+    import builtins
+
+    real_import = builtins.__import__
+
+    def _fake_import(name, *args, **kwargs):
+        if name == 'boto3':
+            raise ImportError('no boto3')
+        return real_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, '__import__', _fake_import)
+    with pytest.raises(ImportError, match='boto3 is required'):
+        s3_utils._make_s3_client()
+
+
+def test_load_nibabel_local_path(monkeypatch, tmp_path) -> None:
+    nifti_path = tmp_path / 'image.nii.gz'
+    data = np.zeros((2, 2, 2), dtype=np.float32)
+    nb.Nifti1Image(data, np.eye(4)).to_filename(nifti_path)
+    loaded = s3_utils.load_nibabel(str(nifti_path))
+    np.testing.assert_array_equal(loaded.get_fdata(), data)
+
+
+def test_load_nibabel_from_s3_bytes(monkeypatch, tmp_path) -> None:
+    data = np.arange(8, dtype=np.float32).reshape(2, 2, 2)
+    image = nb.Nifti1Image(data, np.eye(4))
+    file_path = tmp_path / 'local.nii'
+    image.to_filename(file_path)
+    raw = gzip.compress(file_path.read_bytes())
+
+    class _FakeBody:
+        def read(self):
+            return raw
+
+    class _FakeClient:
+        def get_object(self, **kwargs):
+            assert kwargs['Bucket'] == 'bucket'
+            assert kwargs['Key'] == 'key.nii.gz'
+            return {'Body': _FakeBody()}
+
+    monkeypatch.setattr(s3_utils, '_make_s3_client', lambda: _FakeClient())
+    loaded = s3_utils.load_nibabel('s3://bucket/key.nii.gz')
+    np.testing.assert_array_equal(loaded.get_fdata(), data)
diff --git a/test/test_cli_utils.py b/test/test_cli_utils.py
new file mode 100644
index 0000000..fb94da4
--- /dev/null
+++ b/test/test_cli_utils.py
@@ -0,0 +1,172 @@
+"""Unit tests for shared CLI helper utilities."""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+import h5py
+import numpy as np
+import pandas as pd
+import pytest
+
+from modelarrayio.cli import utils as cli_utils
+
+
+def test_prepare_output_directory_warns_for_existing_path(tmp_path: Path, caplog) -> None:
+    output_dir = tmp_path / 'results'
+    output_dir.mkdir()
+    logger = logging.getLogger('test_cli_utils')
+
+    with caplog.at_level(logging.WARNING):
+        result = cli_utils.prepare_output_directory(output_dir, logger)
+
+    assert result == output_dir
+    assert output_dir.exists()
+    assert any('Output directory exists' in record.message for record in caplog.records)
+
+
+def test_prefixed_output_path_sanitizes_prefix(tmp_path: Path) -> None:
+    output_path = tmp_path / 'stats.h5'
+    prefixed = cli_utils.prefixed_output_path(output_path, 'p.value/result')
+    assert prefixed.name == 'p.value_result_stats.h5'
+
+
+def test_write_table_dataset_writes_transposed_data_and_attrs(tmp_path: Path) -> None:
+    table = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
+    h5_path = tmp_path / 'table.h5'
+
+    with h5py.File(h5_path, 'w') as h5_file:
+        dataset = cli_utils.write_table_dataset(
+            h5_file,
+            'table',
+            table,
+            extra_attrs={'labels': ['left', 'right']},
+        )
+        np.testing.assert_array_equal(dataset[...], np.array([[1, 2], [3, 4]]))
+        assert [str(value) for value in dataset.attrs['column_names']] == ['a', 'b']
+        assert [str(value) for value in dataset.attrs['labels']] == ['left', 'right']
+
+
+def test_write_hdf5_scalar_matrices_skips_empty_rows_and_writes_values(tmp_path: Path) -> None:
+    h5_path = tmp_path / 'scalars.h5'
+    with h5py.File(h5_path, 'w') as h5_file:
+        cli_utils.write_hdf5_scalar_matrices(
+            h5_file,
+            scalars={
+                'FA': [np.array([1.0, 2.0], dtype=np.float32), np.array([3.0, 4.0], dtype=np.float32)],
+                'MD': [],
+            },
+            sources_by_scalar={'FA': ['sub-1', 'sub-2'], 'MD': []},
+            storage_dtype='float32',
+            compression='gzip',
+            compression_level=1,
+            shuffle=True,
+            chunk_voxels=0,
+            target_chunk_mb=0.5,
+        )
+
+    with h5py.File(h5_path, 'r') as h5_file:
+        assert 'scalars/FA/values' in h5_file
+        assert 'scalars/MD/values' not in h5_file
+        values = h5_file['scalars/FA/values'][...]
+        np.testing.assert_array_equal(values, np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32))
+
+
+def test_write_tiledb_scalar_matrices_calls_column_name_writer(monkeypatch, tmp_path: Path) -> None:
+    called = {'create': [], 'columns': [], 'write': []}
+
+    def _fake_create(*args, **kwargs):
+        called['create'].append((args, kwargs))
+
+    def _fake_write_columns(*args, **kwargs):
+        called['columns'].append((args, kwargs))
+
+    def _fake_write_rows(*args, **kwargs):
+        called['write'].append((args, kwargs))
+
+    monkeypatch.setattr(cli_utils.tiledb_storage, 'create_empty_scalar_matrix_array', _fake_create)
+    monkeypatch.setattr(cli_utils.tiledb_storage, 'write_column_names', _fake_write_columns)
+    monkeypatch.setattr(cli_utils.tiledb_storage, 'write_rows_in_column_stripes', _fake_write_rows)
+
+    cli_utils.write_tiledb_scalar_matrices(
+        tmp_path / 'out.tdb',
+        scalars={'FA': [np.array([1.0, 2.0], dtype=np.float32)], 'MD': []},
+        sources_by_scalar={'FA': ['sub-1.nii.gz'], 'MD': []},
+        storage_dtype='float32',
+        compression='zstd',
+        compression_level=3,
+        shuffle=False,
+        chunk_voxels=16,
+        target_chunk_mb=1.0,
+        write_column_name_arrays=True,
+    )
+
+    assert len(called['create']) == 1
+    assert len(called['columns']) == 1
+    assert len(called['write']) == 1
+
+
+def test_write_hdf5_and_tiledb_parcel_arrays(monkeypatch, tmp_path: Path) -> None:
+    parcel_arrays = {'parcel_id': np.array(['A', 'B'])}
+    h5_path = tmp_path / 'parcels.h5'
+    with h5py.File(h5_path, 'w') as h5_file:
+        cli_utils.write_hdf5_parcel_arrays(h5_file, parcel_arrays)
+
+    with h5py.File(h5_path, 'r') as h5_file:
+        np.testing.assert_array_equal(h5_file['parcels/parcel_id'][...].astype(str), np.array(['A', 'B']))
+
+    calls = []
+
+    def _fake_write_parcel_names(base_uri, array_path, names):
+        calls.append((base_uri, array_path, names))
+
+    monkeypatch.setattr(cli_utils.tiledb_storage, 'write_parcel_names', _fake_write_parcel_names)
+    cli_utils.write_tiledb_parcel_arrays(tmp_path / 'out.tdb', {'parcel_id': np.array(['1', '2'])})
+    assert calls == [(str(tmp_path / 'out.tdb'), 'parcels/parcel_id', ['1', '2'])]
+
+
+def test_decode_names_handles_scalar_and_bytes() -> None:
+    decoded = cli_utils._decode_names(np.array([b'a\x00', b' b ', b'']))
+    assert decoded == ['a', 'b']
+    assert cli_utils._decode_names(' value ') == ['value']
+
+
+def test_read_result_names_prefers_attrs_then_fallback_paths(tmp_path: Path, caplog) -> None:
+    h5_path = tmp_path / 'results.h5'
+    logger = logging.getLogger('test_cli_utils.read_result_names')
+
+    with h5py.File(h5_path, 'w') as h5_file:
+        group = h5_file.require_group('results/lm')
+        matrix = group.create_dataset(
+            'results_matrix',
+            data=np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32),
+        )
+
+        matrix.attrs['colnames'] = np.array([b'first', b'second'])
+        assert cli_utils.read_result_names(h5_file, 'lm', matrix, logger=logger) == ['first', 'second']
+
+        del matrix.attrs['colnames']
+        group.create_dataset(
+            'column_names',
+            data=np.array(['alpha', 'beta'], dtype=h5py.string_dtype('utf-8')),
+        )
+        assert cli_utils.read_result_names(h5_file, 'lm', matrix, logger=logger) == ['alpha', 'beta']
+
+    with h5py.File(h5_path, 'a') as h5_file:
+        group = h5_file.require_group('results/lm_nested')
+        matrix = group.create_dataset(
+            'matrix',
+            data=np.array([[1.0, 2.0]], dtype=np.float32),
+        )
+        group.require_group('results_matrix').create_dataset(
+            'column_names',
+            data=np.array(['gamma'], dtype=h5py.string_dtype('utf-8')),
+        )
+        assert cli_utils.read_result_names(h5_file, 'lm_nested', matrix, logger=logger) == ['gamma']
+
+        del h5_file['results/lm_nested/results_matrix/column_names']
+        with caplog.at_level(logging.WARNING):
+            fallback = cli_utils.read_result_names(h5_file, 'lm_nested', matrix, logger=logger)
+        assert fallback == ['component001']
+        assert any('Unable to read column names' in record.message for record in caplog.records)
diff --git a/test/test_mif_format_utils.py b/test/test_mif_format_utils.py
new file mode 100644
index 0000000..74e7750
--- /dev/null
+++ b/test/test_mif_format_utils.py
@@ -0,0 +1,58 @@
+"""Unit tests for low-level MIF format parsing helpers."""
+
+from __future__ import annotations
+
+import io
+import sys
+
+import numpy as np
+import pytest
+
+from modelarrayio.utils import _mif_format
+
+
+def test_readline_reads_until_newline() -> None:
+    fileobj = io.BytesIO(b'first line\nsecond line')
+    assert _mif_format._readline(fileobj) == b'first line\n'
+    assert _mif_format._readline(fileobj) == b'second line'
+
+
+@pytest.mark.parametrize(
+    ('dtype_str', 'expected'),
+    [
+        ('Float32LE', np.dtype('<f4')),
+        ('Float64BE', np.dtype('>f8')),
+        ('UInt8', np.dtype('u1')),
+    ],
+)
+def test_parse_dtype_valid(dtype_str: str, expected: np.dtype) -> None:
+    parsed = _mif_format._mif_parse_dtype(dtype_str)
+    assert parsed == expected
+
+
+def test_parse_dtype_uses_native_endian_without_suffix(monkeypatch) -> None:
+    monkeypatch.setattr(sys, 'byteorder', 'little')
+    assert _mif_format._mif_parse_dtype('Int16') == np.dtype('<i2')
+
+
+def test_parse_dtype_unknown_raises() -> None:
+    with pytest.raises(ValueError, match='Unknown MIF datatype'):
+        _mif_format._mif_parse_dtype('Bogus')
+
+
+def test_dtype_to_str_round_trip_and_errors() -> None:
+    assert _mif_format._mif_dtype_to_str(np.dtype('<f4')).startswith('Float32')
+    assert _mif_format._mif_dtype_to_str(np.dtype('u1')) == 'UInt8'
+    with pytest.raises(ValueError, match='Cannot represent numpy dtype'):
+        _mif_format._mif_dtype_to_str(np.dtype(bool))
+
+
+def test_layout_parse_and_format_round_trip() -> None:
+    layout = _mif_format._mif_parse_layout('-0,+1,2', ndim=3)
+    assert layout == [-1, 2, 3]
+    assert _mif_format._mif_layout_to_str(layout) == '-0,+1,+2'
+
+
+def test_layout_parse_rejects_wrong_axis_count() -> None:
+    with pytest.raises(ValueError, match='Layout has 2 axes but dim has 3'):
+        _mif_format._mif_parse_layout('0,1', ndim=3)
diff --git a/test/test_mif_helpers.py b/test/test_mif_helpers.py
new file mode 100644
index 0000000..59112c6
--- /dev/null
+++ b/test/test_mif_helpers.py
@@ -0,0 +1,143 @@
+"""Unit tests for MIF helper modules and CLI writer logic."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+
+import h5py
+import numpy as np
+import pandas as pd
+import pytest
+
+from modelarrayio.cli import h5_to_mif as h5_to_mif_module
+from modelarrayio.utils import mif
+
+
+def test_mif_to_image_uses_mifimage_loader(monkeypatch, tmp_path: Path) -> None:
+    class _FakeImage:
+        def get_fdata(self, dtype=None):
+            return np.array([[1.0, 2.0]], dtype=np.float32 if dtype is None else dtype)
+
+    def _fake_from_filename(path):
+        assert path == str(tmp_path / 'input.mif')
+        return _FakeImage()
+
+    monkeypatch.setattr(mif.MifImage, 'from_filename', staticmethod(_fake_from_filename))
+    img, data = mif.mif_to_image(tmp_path / 'input.mif')
+    assert isinstance(img, _FakeImage)
+    np.testing.assert_array_equal(data, np.array([1.0, 2.0], dtype=np.float32))
+
+
+def test_load_cohort_mif_sequential_and_parallel(monkeypatch) -> None:
+    cohort = pd.DataFrame(
+        {
+            'scalar_name': ['FA', 'FA', 'MD'],
+            'source_file': ['a', 'b', 'c'],
+        }
+    )
+
+    def _fake_mif_to_image(path):
+        mapping = {
+            'a': np.array([1.0, 2.0], dtype=np.float32),
+            'b': np.array([3.0, 4.0], dtype=np.float32),
+            'c': np.array([5.0, 6.0], dtype=np.float32),
+        }
+        return object(), mapping[str(path)]
+
+    monkeypatch.setattr(mif, 'mif_to_image', _fake_mif_to_image)
+    serial_scalars, serial_sources = mif.load_cohort_mif(cohort, s3_workers=1)
+    parallel_scalars, parallel_sources = mif.load_cohort_mif(cohort, s3_workers=2)
+
+    assert list(serial_sources['FA']) == ['a', 'b']
+    assert list(parallel_sources['MD']) == ['c']
+    np.testing.assert_array_equal(np.array(serial_scalars['FA']), np.array(parallel_scalars['FA']))
+    np.testing.assert_array_equal(np.array(serial_scalars['MD']), np.array(parallel_scalars['MD']))
+
+
+def test_gather_fixels_builds_tables(monkeypatch) -> None:
+    index_data = np.zeros((1, 2, 1, 2), dtype=np.float32)
+    index_data[0, 0, 0, 0] = 1
+    index_data[0, 0, 0, 1] = 0
+    index_data[0, 1, 0, 0] = 2
+    index_data[0, 1, 0, 1] = 1
+    directions = np.array(
+        [
+            [1.0, 0.0, 0.0],
+            [0.0, 1.0, 0.0],
+            [0.0, 0.0, 1.0],
+        ],
+        dtype=np.float32,
+    )
+
+    def _fake_mif_to_image(path):
+        if str(path).endswith('index.mif'):
+            return object(), index_data
+        return object(), directions
+
+    monkeypatch.setattr(mif, 'mif_to_image', _fake_mif_to_image)
+    fixel_table, voxel_table = mif.gather_fixels('index.mif', 'directions.mif')
+
+    assert list(voxel_table.columns) == ['voxel_id', 'i', 'j', 'k']
+    assert list(fixel_table.columns) == ['fixel_id', 'voxel_id', 'x', 'y', 'z']
+    assert len(fixel_table) == 3
+    assert len(voxel_table) == 2
+
+
+def test_gather_fixels_raises_when_terminal_count_missing(monkeypatch) -> None:
+    index_data = np.zeros((1, 1, 1, 2), dtype=np.float32)
+    index_data[..., 1] = np.nan
+    directions = np.zeros((1, 3), dtype=np.float32)
+
+    def _fake_mif_to_image(path):
+        return object(), index_data if str(path).endswith('index.mif') else directions
+
+    monkeypatch.setattr(mif, 'mif_to_image', _fake_mif_to_image)
+    with pytest.raises(ValueError, match='Could not determine the final fixel count'):
+        mif.gather_fixels('index.mif', 'directions.mif')
+
+
+def test_write_mif_does_not_overwrite_existing_file(tmp_path: Path, caplog) -> None:
+    existing = tmp_path / 'already.mif'
+    existing.write_bytes(b'test')
+    template = SimpleNamespace(shape=(2,), header=mif.MifHeader(shape=(2,)), affine=np.eye(4))
+
+    with caplog.at_level('WARNING'):
+        h5_to_mif_module.write_mif(np.array([1.0, 2.0], dtype=np.float32), template, existing)
+
+    assert any('Output file already exists' in record.message for record in caplog.records)
+
+
+def test_h5_to_mif_writes_pvalue_and_inverse(monkeypatch, tmp_path: Path) -> None:
+    h5_path = tmp_path / 'results.h5'
+    with h5py.File(h5_path, 'w') as h5_file:
+        group = h5_file.require_group('results/lm')
+        group.create_dataset(
+            'results_matrix',
+            data=np.array([[0.25, 0.75], [2.0, 3.0]], dtype=np.float32),
+        )
+        group.create_dataset(
+            'column_names',
+            data=np.array(['p.value', 'effect size'], dtype=h5py.string_dtype('utf-8')),
+        )
+
+    template_img = SimpleNamespace(shape=(2,), header=mif.MifHeader(shape=(2,)), affine=np.eye(4))
+    monkeypatch.setattr(h5_to_mif_module, 'mif_to_image', lambda _: (template_img, np.array([0, 0])))
+
+    calls: list[tuple[np.ndarray, Path]] = []
+
+    def _fake_write_mif(arr, template_img, out_file):
+        calls.append((np.array(arr), Path(out_file)))
+
+    monkeypatch.setattr(h5_to_mif_module, 'write_mif', _fake_write_mif)
+    status = h5_to_mif_module.h5_to_mif(
+        example_mif='template.mif',
+        in_file=h5_path,
+        analysis_name='lm',
+        compress=False,
+        output_dir=tmp_path / 'out',
+    )
+
+    assert status == 0
+    written_names = sorted(path.name for _, path in calls)
+    assert written_names == ['lm_1m.p.value.mif', 'lm_effect_size.mif', 'lm_p.value.mif']
diff --git a/test/test_mif_image_unit.py b/test/test_mif_image_unit.py
new file mode 100644
index 0000000..2c7d530
--- /dev/null
+++ b/test/test_mif_image_unit.py
@@ -0,0 +1,111 @@
+"""Focused tests for MIF header/image read-write behavior."""
+
+from __future__ import annotations
+
+import io
+from pathlib import Path
+
+import nibabel as nb
+import numpy as np
+import pytest
+
+from modelarrayio.utils.mif_image import MifHeader, MifImage
+
+
+def test_mif_header_write_to_and_from_fileobj_round_trip() -> None:
+    transform = np.array(
+        [
+            [1.0, 0.0, 0.0, 10.0],
+            [0.0, 2.0, 0.0, 20.0],
+            [0.0, 0.0, 3.0, 30.0],
+        ],
+        dtype=np.float64,
+    )
+    header = MifHeader(
+        shape=(2, 3, 4),
+        zooms=(1.0, 2.0, 3.0),
+        layout=[-1, 2, 3],
+        dtype=np.dtype('<f4'),
+        transform=transform,
+        intensity_offset=0.5,
+        intensity_scale=2.0,
+        keyval={'foo': 'bar\nbaz'},
+    )
+    bio = io.BytesIO()
+    data_offset = header.write_to(bio)
+    bio.seek(0)
+    parsed = MifHeader.from_fileobj(bio)
+
+    assert data_offset >= 0
+    assert parsed.get_data_shape() == (2, 3, 4)
+    assert parsed.get_layout() == [-1, 2, 3]
+    assert parsed.get_data_dtype() == np.dtype('<f4')
+    assert parsed.get_intensity_scaling() == (0.5, 2.0)
+    assert parsed.get_keyval()['foo'] == 'bar\nbaz'
+
+
+@pytest.mark.parametrize(
+    'header_text',
+    [
+        b'mrtrix image\nvox: 1\nlayout: +0\ndatatype: Float32LE\nEND\n',
+        b'mrtrix image\ndim: 1\nlayout: +0\ndatatype: Float32LE\nEND\n',
+        b'mrtrix image\ndim: 1\nvox: 1\ndatatype: Float32LE\nEND\n',
+        b'mrtrix image\ndim: 1\nvox: 1\nlayout: +0\nEND\n',
+    ],
+)
+def test_mif_header_missing_required_fields_raise(header_text: bytes) -> None:
+    with pytest.raises(ValueError):
+        MifHeader.from_fileobj(io.BytesIO(header_text))
+
+
+def test_mif_header_rejects_non_mif_magic() -> None:
+    with pytest.raises(ValueError, match='Not a MIF file'):
+        MifHeader.from_fileobj(io.BytesIO(b'not mif\n'))
+
+
+def test_mif_image_round_trip(tmp_path: Path) -> None:
+    data = np.arange(12, dtype=np.float32).reshape(2, 2, 3)
+    affine = np.array(
+        [
+            [2.0, 0.0, 0.0, 1.0],
+            [0.0, 3.0, 0.0, 2.0],
+            [0.0, 0.0, 4.0, 3.0],
+            [0.0, 0.0, 0.0, 1.0],
+        ],
+        dtype=np.float64,
+    )
+    image = MifImage(data, affine)
+    out_file = tmp_path / 'roundtrip.mif'
+    image.to_filename(out_file)
+
+    loaded = MifImage.from_filename(str(out_file))
+    np.testing.assert_array_equal(loaded.get_fdata(dtype=np.float32), data)
+    np.testing.assert_allclose(loaded.affine, affine)
+
+
+def test_mif_image_from_file_map_requires_data_offset(tmp_path: Path) -> None:
+    bad = tmp_path / 'bad.mif'
+    bad.write_bytes(
+        b'mrtrix image\ndim: 1\nvox: 1\nlayout: +0\ndatatype: Float32LE\ntransform: 1,0,0,0\nEND\n'
+    )
+    file_map = {'image': nb.FileHolder(filename=str(bad))}
+    with pytest.raises(ValueError, match='Could not determine data offset'):
+        MifImage.from_file_map(file_map)
+
+
+def test_affine2header_updates_zooms_and_transform() -> None:
+    data = np.ones((2, 2, 2), dtype=np.float32)
+    affine = np.array(
+        [
+            [2.0, 0.0, 0.0, 5.0],
+            [0.0, 3.0, 0.0, 6.0],
+            [0.0, 0.0, 4.0, 7.0],
+            [0.0, 0.0, 0.0, 1.0],
+        ]
+    )
+    image = MifImage(data, affine)
+    image._affine2header()
+    zooms = image.header.get_zooms()
+    assert zooms[:3] == (2.0, 3.0, 4.0)
+    transform = image.header.get_transform()
+    np.testing.assert_allclose(transform[:, 3], [5.0, 6.0, 7.0])
diff --git a/test/test_mif_to_h5_unit.py b/test/test_mif_to_h5_unit.py
new file mode 100644
index 0000000..d33343e
--- /dev/null
+++ b/test/test_mif_to_h5_unit.py
@@ -0,0 +1,82 @@
+"""Unit tests for mif_to_h5 branching behavior."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from modelarrayio.cli import mif_to_h5
+
+
+def _cohort() -> pd.DataFrame:
+    return pd.DataFrame(
+        {
+            'scalar_name': ['FA', 'MD'],
+            'source_file': ['fa.mif', 'md.mif'],
+        }
+    )
+
+
+def test_mif_to_h5_raises_when_sources_missing(monkeypatch, tmp_path: Path) -> None:
+    monkeypatch.setattr(
+        mif_to_h5,
+        'gather_fixels',
+        lambda index_file, directions_file: (pd.DataFrame(), pd.DataFrame()),
+    )
+    monkeypatch.setattr(mif_to_h5, 'load_cohort_mif', lambda cohort_long, s3_workers: ({}, {}))
+
+    with pytest.raises(ValueError, match='Unable to derive scalar sources'):
+        mif_to_h5.mif_to_h5('index.mif', 'directions.mif', _cohort(), output=tmp_path / 'out.h5')
+
+
+def test_mif_to_h5_hdf5_split_outputs(monkeypatch, tmp_path: Path) -> None:
+    fixel_table = pd.DataFrame({'fixel_id': [0], 'voxel_id': [0], 'x': [1], 'y': [0], 'z': [0]})
+    voxel_table = pd.DataFrame({'voxel_id': [0], 'i': [0], 'j': [0], 'k': [0]})
+    scalars = {'FA': [np.array([1.0], dtype=np.float32)], 'MD': [np.array([2.0], dtype=np.float32)]}
+    sources = {'FA': ['fa.mif'], 'MD': ['md.mif']}
+
+    monkeypatch.setattr(mif_to_h5, 'gather_fixels', lambda *_args, **_kwargs: (fixel_table, voxel_table))
+    monkeypatch.setattr(mif_to_h5, 'load_cohort_mif', lambda *_args, **_kwargs: (scalars, sources))
+
+    status = mif_to_h5.mif_to_h5(
+        'index.mif',
+        'directions.mif',
+        _cohort(),
+        backend='hdf5',
+        output=tmp_path / 'fixels.h5',
+        split_outputs=True,
+    )
+    assert status == 0
+    assert (tmp_path / 'FA_fixels.h5').exists()
+    assert (tmp_path / 'MD_fixels.h5').exists()
+
+
+def test_mif_to_h5_tiledb_parallel_and_split(monkeypatch, tmp_path: Path) -> None:
+    fixel_table = pd.DataFrame({'fixel_id': [0], 'voxel_id': [0], 'x': [1], 'y': [0], 'z': [0]})
+    voxel_table = pd.DataFrame({'voxel_id': [0], 'i': [0], 'j': [0], 'k': [0]})
+    scalars = {'FA': [np.array([1.0], dtype=np.float32)], 'MD': [np.array([2.0], dtype=np.float32)]}
+    sources = {'FA': ['fa.mif'], 'MD': ['md.mif']}
+    calls = []
+
+    monkeypatch.setattr(mif_to_h5, 'gather_fixels', lambda *_args, **_kwargs: (fixel_table, voxel_table))
+    monkeypatch.setattr(mif_to_h5, 'load_cohort_mif', lambda *_args, **_kwargs: (scalars, sources))
+    monkeypatch.setattr(
+        mif_to_h5.cli_utils,
+        'write_tiledb_scalar_matrices',
+        lambda output, scalars, sources, **kwargs: calls.append((Path(output), scalars, sources)),
+    )
+
+    status = mif_to_h5.mif_to_h5(
+        'index.mif',
+        'directions.mif',
+        _cohort(),
+        backend='tiledb',
+        output=tmp_path / 'fixels.tdb',
+        workers=2,
+        split_outputs=True,
+    )
+    assert status == 0
+    assert sorted(path.name for path, _, _ in calls) == ['FA_fixels.tdb', 'MD_fixels.tdb']
diff --git a/test/test_tiledb_storage.py b/test/test_tiledb_storage.py
new file mode 100644
index 0000000..073491d
--- /dev/null
+++ b/test/test_tiledb_storage.py
@@ -0,0 +1,126 @@
+"""Unit tests for TileDB storage helpers."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import numpy as np
+import pytest
+import tiledb
+
+from modelarrayio.storage import tiledb_storage
+
+
+def test_build_filter_list_variants() -> None:
+    no_filters = tiledb_storage._build_filter_list(None, None, shuffle=False)
+    assert isinstance(no_filters, tiledb.FilterList)
+    assert len(no_filters) == 0
+
+    zstd = tiledb_storage._build_filter_list('zstd', 9, shuffle=True)
+    assert len(zstd) >= 2
+
+    fallback = tiledb_storage._build_filter_list('not-a-codec', 'bad', shuffle=False)
+    assert len(fallback) == 0
+
+
+def test_create_empty_scalar_matrix_array_writes_metadata_and_overwrites(tmp_path: Path) -> None:
+    base = tmp_path / 'store.tdb'
+    uri = tiledb_storage.create_empty_scalar_matrix_array(
+        str(base),
+        'scalars/FA/values',
+        n_files=2,
+        n_elements=3,
+        storage_dtype='float32',
+        compression='gzip',
+        compression_level=1,
+        shuffle=True,
+        tile_voxels=2,
+        target_tile_mb=0.5,
+        sources_list=['s1', 's2'],
+    )
+    assert tiledb.object_type(uri) == 'array'
+    with tiledb.open(uri, 'r') as array:
+        assert json.loads(array.meta['column_names']) == ['s1', 's2']
+
+    uri_again = tiledb_storage.create_empty_scalar_matrix_array(
+        str(base),
+        'scalars/FA/values',
+        n_files=2,
+        n_elements=3,
+    )
+    assert uri_again == uri
+    assert tiledb.object_type(uri_again) == 'array'
+
+
+def test_write_rows_in_column_stripes_round_trip(tmp_path: Path) -> None:
+    base = tmp_path / 'store.tdb'
+    uri = tiledb_storage.create_empty_scalar_matrix_array(
+        str(base),
+        'scalars/FA/values',
+        n_files=3,
+        n_elements=5,
+        storage_dtype='float32',
+        tile_voxels=2,
+    )
+
+    rows = [
+        np.array([1, 2, 3, 4, 5], dtype=np.float32),
+        np.array([6, 7, 8, 9, 10], dtype=np.float32),
+        np.array([11, 12, 13, 14, 15], dtype=np.float32),
+    ]
+    tiledb_storage.write_rows_in_column_stripes(uri, rows)
+
+    with tiledb.open(uri, 'r') as array:
+        np.testing.assert_array_equal(array[:]['values'], np.vstack(rows))
+
+
+def test_write_rows_in_column_stripes_rejects_wrong_row_count(tmp_path: Path) -> None:
+    base = tmp_path / 'store.tdb'
+    uri = tiledb_storage.create_empty_scalar_matrix_array(
+        str(base),
+        'scalars/FA/values',
+        n_files=2,
+        n_elements=3,
+    )
+    with pytest.raises(ValueError, match='rows length does not match'):
+        tiledb_storage.write_rows_in_column_stripes(uri, [np.array([1, 2, 3], dtype=np.float32)])
+
+
+def test_write_parcel_names_and_column_names(tmp_path: Path) -> None:
+    base = tmp_path / 'store.tdb'
+    with pytest.raises(ValueError, match='must not be empty'):
+        tiledb_storage.write_parcel_names(str(base), 'parcels/parcel_id', [])
+
+    tiledb_storage.write_parcel_names(str(base), 'parcels/parcel_id', ['P1', 'P2'])
+    parcel_uri = base / 'parcels' / 'parcel_id'
+    assert tiledb.object_type(str(parcel_uri)) == 'array'
+    with tiledb.open(str(parcel_uri), 'r') as array:
+        np.testing.assert_array_equal(array[:]['values'], np.array(['P1', 'P2'], dtype=object))
+
+    tiledb.group_create(str(base / 'scalars' / 'FA'))
+    tiledb_storage.write_column_names(str(base), 'FA', ['sub-1', 'sub-2'])
+    column_uri = base / 'scalars' / 'FA' / 'column_names'
+    with tiledb.open(str(column_uri), 'r') as array:
+        np.testing.assert_array_equal(
+            array[:]['values'], np.array(['sub-1', 'sub-2'], dtype=object)
+        )
+    with tiledb.Group(str(base / 'scalars' / 'FA'), 'r') as group:
+        assert json.loads(group.meta['column_names']) == ['sub-1', 'sub-2']
+
+
+def test_create_scalar_matrix_array_writes_values_and_metadata(tmp_path: Path) -> None:
+    base = tmp_path / 'store.tdb'
+    values = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
+    uri = tiledb_storage.create_scalar_matrix_array(
+        str(base),
+        'scalars/MD/values',
+        values,
+        ['first', 'second'],
+        storage_dtype='float32',
+        compression='zstd',
+        compression_level=3,
+    )
+    with tiledb.open(uri, 'r') as array:
+        np.testing.assert_array_equal(array[:]['values'], values)
+        assert json.loads(array.meta['column_names']) == ['first', 'second']

From b6fc79db10eb3890ba6ce638b1ea79e617ebf962 Mon Sep 17 00:00:00 2001
From: Taylor Salo <salot@pennmedicine.upenn.edu>
Date: Mon, 27 Apr 2026 09:35:15 -0400
Subject: [PATCH 2/2] Run ruff.

---
 test/test_cifti_to_h5_unit.py      | 16 ++++++++++++----
 test/test_cli_main_and_s3_utils.py |  5 ++++-
 test/test_cli_utils.py             | 28 +++++++++++++++++++++-------
 test/test_mif_helpers.py           |  4 +++-
 test/test_mif_image_unit.py        |  2 +-
 test/test_mif_to_h5_unit.py        | 18 ++++++++++++++----
 test/test_tiledb_storage.py        |  2 ++
 7 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/test/test_cifti_to_h5_unit.py b/test/test_cifti_to_h5_unit.py
index 9fc3a68..a449c30 100644
--- a/test/test_cifti_to_h5_unit.py
+++ b/test/test_cifti_to_h5_unit.py
@@ -24,7 +24,9 @@ def test_cifti_to_h5_tiledb_split_outputs_and_parcels(monkeypatch, tmp_path: Pat
 
     monkeypatch.setattr(cifti_to_h5, 'build_scalar_sources', lambda _cohort: scalar_sources)
     monkeypatch.setattr(
-        cifti_to_h5, '_get_cifti_parcel_info', lambda _first: ('pscalar', {'parcel_id': np.array(['P1'])})
+        cifti_to_h5,
+        '_get_cifti_parcel_info',
+        lambda _first: ('pscalar', {'parcel_id': np.array(['P1'])}),
     )
     monkeypatch.setattr(
         cifti_to_h5,
@@ -37,7 +39,9 @@ def test_cifti_to_h5_tiledb_split_outputs_and_parcels(monkeypatch, tmp_path: Pat
     monkeypatch.setattr(
         cifti_to_h5.cli_utils,
         'write_tiledb_scalar_matrices',
-        lambda output, scalars, sources, **kwargs: write_calls.append((Path(output), scalars, sources)),
+        lambda output, scalars, sources, **kwargs: write_calls.append(
+            (Path(output), scalars, sources)
+        ),
     )
     monkeypatch.setattr(
         cifti_to_h5.cli_utils,
@@ -46,7 +50,9 @@ def test_cifti_to_h5_tiledb_split_outputs_and_parcels(monkeypatch, tmp_path: Pat
     )
 
     status = cifti_to_h5.cifti_to_h5(
-        cohort_long=pd.DataFrame({'scalar_name': ['FA', 'MD'], 'source_file': ['fa1.nii', 'md1.nii']}),
+        cohort_long=pd.DataFrame(
+            {'scalar_name': ['FA', 'MD'], 'source_file': ['fa1.nii', 'md1.nii']}
+        ),
         backend='tiledb',
         output=tmp_path / 'store.tdb',
         workers=2,
@@ -66,7 +72,9 @@ def test_cifti_to_h5_hdf5_split_outputs_for_dscalar(monkeypatch, tmp_path: Path)
 
     monkeypatch.setattr(cifti_to_h5, 'build_scalar_sources', lambda _cohort: scalar_sources)
     monkeypatch.setattr(cifti_to_h5, '_get_cifti_parcel_info', lambda _first: ('dscalar', {}))
-    monkeypatch.setattr(cifti_to_h5, 'load_cohort_cifti', lambda _cohort, _workers: (scalars, ['Left', 'Right']))
+    monkeypatch.setattr(
+        cifti_to_h5, 'load_cohort_cifti', lambda _cohort, _workers: (scalars, ['Left', 'Right'])
+    )
     monkeypatch.setattr(
         cifti_to_h5,
         'brain_names_to_dataframe',
diff --git a/test/test_cli_main_and_s3_utils.py b/test/test_cli_main_and_s3_utils.py
index 87a0a21..16805d1 100644
--- a/test/test_cli_main_and_s3_utils.py
+++ b/test/test_cli_main_and_s3_utils.py
@@ -113,6 +113,9 @@ def get_object(self, **kwargs):
             assert kwargs['Key'] == 'key.nii.gz'
             return {'Body': _FakeBody()}
 
-    monkeypatch.setattr(s3_utils, '_make_s3_client', lambda: _FakeClient())
+    def _fake_make_s3_client():
+        return _FakeClient()
+
+    monkeypatch.setattr(s3_utils, '_make_s3_client', _fake_make_s3_client)
     loaded = s3_utils.load_nibabel('s3://bucket/key.nii.gz')
     np.testing.assert_array_equal(loaded.get_fdata(), data)
diff --git a/test/test_cli_utils.py b/test/test_cli_utils.py
index fb94da4..7d7aeee 100644
--- a/test/test_cli_utils.py
+++ b/test/test_cli_utils.py
@@ -8,7 +8,6 @@
 import h5py
 import numpy as np
 import pandas as pd
-import pytest
 
 from modelarrayio.cli import utils as cli_utils
 
@@ -54,7 +53,10 @@ def test_write_hdf5_scalar_matrices_skips_empty_rows_and_writes_values(tmp_path:
         cli_utils.write_hdf5_scalar_matrices(
             h5_file,
             scalars={
-                'FA': [np.array([1.0, 2.0], dtype=np.float32), np.array([3.0, 4.0], dtype=np.float32)],
+                'FA': [
+                    np.array([1.0, 2.0], dtype=np.float32),
+                    np.array([3.0, 4.0], dtype=np.float32),
+                ],
                 'MD': [],
             },
             sources_by_scalar={'FA': ['sub-1', 'sub-2'], 'MD': []},
@@ -73,7 +75,9 @@ def test_write_hdf5_scalar_matrices_skips_empty_rows_and_writes_values(tmp_path:
         np.testing.assert_array_equal(values, np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32))
 
 
-def test_write_tiledb_scalar_matrices_calls_column_name_writer(monkeypatch, tmp_path: Path) -> None:
+def test_write_tiledb_scalar_matrices_calls_column_name_writer(
+    monkeypatch, tmp_path: Path
+) -> None:
     called = {'create': [], 'columns': [], 'write': []}
 
     def _fake_create(*args, **kwargs):
@@ -114,7 +118,9 @@ def test_write_hdf5_and_tiledb_parcel_arrays(monkeypatch, tmp_path: Path) -> Non
         cli_utils.write_hdf5_parcel_arrays(h5_file, parcel_arrays)
 
     with h5py.File(h5_path, 'r') as h5_file:
-        np.testing.assert_array_equal(h5_file['parcels/parcel_id'][...].astype(str), np.array(['A', 'B']))
+        np.testing.assert_array_equal(
+            h5_file['parcels/parcel_id'][...].astype(str), np.array(['A', 'B'])
+        )
 
     calls = []
 
@@ -144,14 +150,20 @@ def test_read_result_names_prefers_attrs_then_fallback_paths(tmp_path: Path, cap
         )
 
         matrix.attrs['colnames'] = np.array([b'first', b'second'])
-        assert cli_utils.read_result_names(h5_file, 'lm', matrix, logger=logger) == ['first', 'second']
+        assert cli_utils.read_result_names(h5_file, 'lm', matrix, logger=logger) == [
+            'first',
+            'second',
+        ]
 
         del matrix.attrs['colnames']
         group.create_dataset(
             'column_names',
             data=np.array(['alpha', 'beta'], dtype=h5py.string_dtype('utf-8')),
         )
-        assert cli_utils.read_result_names(h5_file, 'lm', matrix, logger=logger) == ['alpha', 'beta']
+        assert cli_utils.read_result_names(h5_file, 'lm', matrix, logger=logger) == [
+            'alpha',
+            'beta',
+        ]
 
     with h5py.File(h5_path, 'a') as h5_file:
         group = h5_file.require_group('results/lm_nested')
@@ -163,7 +175,9 @@ def test_read_result_names_prefers_attrs_then_fallback_paths(tmp_path: Path, cap
             'column_names',
             data=np.array(['gamma'], dtype=h5py.string_dtype('utf-8')),
         )
-        assert cli_utils.read_result_names(h5_file, 'lm_nested', matrix, logger=logger) == ['gamma']
+        assert cli_utils.read_result_names(h5_file, 'lm_nested', matrix, logger=logger) == [
+            'gamma'
+        ]
 
         del h5_file['results/lm_nested/results_matrix/column_names']
         with caplog.at_level(logging.WARNING):
diff --git a/test/test_mif_helpers.py b/test/test_mif_helpers.py
index 59112c6..2f70ba5 100644
--- a/test/test_mif_helpers.py
+++ b/test/test_mif_helpers.py
@@ -122,7 +122,9 @@ def test_h5_to_mif_writes_pvalue_and_inverse(monkeypatch, tmp_path: Path) -> Non
         )
 
     template_img = SimpleNamespace(shape=(2,), header=mif.MifHeader(shape=(2,)), affine=np.eye(4))
-    monkeypatch.setattr(h5_to_mif_module, 'mif_to_image', lambda _: (template_img, np.array([0, 0])))
+    monkeypatch.setattr(
+        h5_to_mif_module, 'mif_to_image', lambda _: (template_img, np.array([0, 0]))
+    )
 
     calls: list[tuple[np.ndarray, Path]] = []
 
diff --git a/test/test_mif_image_unit.py b/test/test_mif_image_unit.py
index 2c7d530..e166e3b 100644
--- a/test/test_mif_image_unit.py
+++ b/test/test_mif_image_unit.py
@@ -54,7 +54,7 @@ def test_mif_header_write_to_and_from_fileobj_round_trip() -> None:
     ],
 )
 def test_mif_header_missing_required_fields_raise(header_text: bytes) -> None:
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match=r'Missing "(dim|vox|datatype|layout)" in MIF header'):
         MifHeader.from_fileobj(io.BytesIO(header_text))
 
 
diff --git a/test/test_mif_to_h5_unit.py b/test/test_mif_to_h5_unit.py
index d33343e..23b9784 100644
--- a/test/test_mif_to_h5_unit.py
+++ b/test/test_mif_to_h5_unit.py
@@ -35,10 +35,15 @@ def test_mif_to_h5_raises_when_sources_missing(monkeypatch, tmp_path: Path) -> N
 def test_mif_to_h5_hdf5_split_outputs(monkeypatch, tmp_path: Path) -> None:
     fixel_table = pd.DataFrame({'fixel_id': [0], 'voxel_id': [0], 'x': [1], 'y': [0], 'z': [0]})
     voxel_table = pd.DataFrame({'voxel_id': [0], 'i': [0], 'j': [0], 'k': [0]})
-    scalars = {'FA': [np.array([1.0], dtype=np.float32)], 'MD': [np.array([2.0], dtype=np.float32)]}
+    scalars = {
+        'FA': [np.array([1.0], dtype=np.float32)],
+        'MD': [np.array([2.0], dtype=np.float32)],
+    }
     sources = {'FA': ['fa.mif'], 'MD': ['md.mif']}
 
-    monkeypatch.setattr(mif_to_h5, 'gather_fixels', lambda *_args, **_kwargs: (fixel_table, voxel_table))
+    monkeypatch.setattr(
+        mif_to_h5, 'gather_fixels', lambda *_args, **_kwargs: (fixel_table, voxel_table)
+    )
     monkeypatch.setattr(mif_to_h5, 'load_cohort_mif', lambda *_args, **_kwargs: (scalars, sources))
 
     status = mif_to_h5.mif_to_h5(
@@ -57,11 +62,16 @@ def test_mif_to_h5_hdf5_split_outputs(monkeypatch, tmp_path: Path) -> None:
 def test_mif_to_h5_tiledb_parallel_and_split(monkeypatch, tmp_path: Path) -> None:
     fixel_table = pd.DataFrame({'fixel_id': [0], 'voxel_id': [0], 'x': [1], 'y': [0], 'z': [0]})
     voxel_table = pd.DataFrame({'voxel_id': [0], 'i': [0], 'j': [0], 'k': [0]})
-    scalars = {'FA': [np.array([1.0], dtype=np.float32)], 'MD': [np.array([2.0], dtype=np.float32)]}
+    scalars = {
+        'FA': [np.array([1.0], dtype=np.float32)],
+        'MD': [np.array([2.0], dtype=np.float32)],
+    }
     sources = {'FA': ['fa.mif'], 'MD': ['md.mif']}
     calls = []
 
-    monkeypatch.setattr(mif_to_h5, 'gather_fixels', lambda *_args, **_kwargs: (fixel_table, voxel_table))
+    monkeypatch.setattr(
+        mif_to_h5, 'gather_fixels', lambda *_args, **_kwargs: (fixel_table, voxel_table)
+    )
     monkeypatch.setattr(mif_to_h5, 'load_cohort_mif', lambda *_args, **_kwargs: (scalars, sources))
     monkeypatch.setattr(
         mif_to_h5.cli_utils,
diff --git a/test/test_tiledb_storage.py b/test/test_tiledb_storage.py
index 073491d..603e80e 100644
--- a/test/test_tiledb_storage.py
+++ b/test/test_tiledb_storage.py
@@ -98,6 +98,8 @@ def test_write_parcel_names_and_column_names(tmp_path: Path) -> None:
     with tiledb.open(str(parcel_uri), 'r') as array:
         np.testing.assert_array_equal(array[:]['values'], np.array(['P1', 'P2'], dtype=object))
 
+    # Some TileDB builds do not implicitly create missing parent directories.
+    (base / 'scalars').mkdir(parents=True, exist_ok=True)
     tiledb.group_create(str(base / 'scalars' / 'FA'))
     tiledb_storage.write_column_names(str(base), 'FA', ['sub-1', 'sub-2'])
     column_uri = base / 'scalars' / 'FA' / 'column_names'