PennLINC · tsalo · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/src/modelarrayio/storage/tiledb_storage.py b/src/modelarrayio/storage/tiledb_storage.py
@@ -352,7 +352,8 @@ def write_parcel_names(base_uri: str, array_path: str, names: Sequence[str]):
         name='idx', domain=(0, max(n - 1, 0)), tile=max(1, min(n, 1024)), dtype=np.int64
     )
     dom = tiledb.Domain(dim_idx)
-    attr_values = tiledb.Attr(name='values', dtype=np.unicode_)
+    # np.unicode_ was removed in NumPy 2.0; np.str_ is the compatible string scalar.
+    attr_values = tiledb.Attr(name='values', dtype=np.str_)
     schema = tiledb.ArraySchema(domain=dom, attrs=[attr_values], sparse=False)
 
     if tiledb.object_type(uri):

diff --git a/test/conftest.py b/test/conftest.py
@@ -51,4 +51,4 @@ def downloaded_fixel_data_dir(tmp_path_factory: pytest.TempPathFactory) -> Path:
     try:
         return _download_and_extract_fixel_test_data(destination_dir)
     except (FileNotFoundError, OSError, URLError, tarfile.TarError) as exc:
-        raise RuntimeError(f'Downloaded fixel test data unavailable: {exc}') from exc
+        pytest.skip(f'Downloaded fixel test data unavailable: {exc}')
diff --git a/test/test_cifti_to_h5_unit.py b/test/test_cifti_to_h5_unit.py
@@ -0,0 +1,91 @@
+"""Focused unit tests for cifti_to_h5 branch coverage."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from modelarrayio.cli import cifti_to_h5
+
+
+def test_cifti_to_h5_raises_when_scalar_sources_missing(monkeypatch) -> None:
+    monkeypatch.setattr(cifti_to_h5, 'build_scalar_sources', lambda _cohort: {})
+    with pytest.raises(ValueError, match='Unable to derive scalar sources'):
+        cifti_to_h5.cifti_to_h5(pd.DataFrame(), output=Path('out.h5'))
+
+
+def test_cifti_to_h5_tiledb_split_outputs_and_parcels(monkeypatch, tmp_path: Path) -> None:
+    scalar_sources = {'FA': ['fa1.nii'], 'MD': ['md1.nii']}
+    write_calls = []
+    parcel_calls = []
+
+    monkeypatch.setattr(cifti_to_h5, 'build_scalar_sources', lambda _cohort: scalar_sources)
+    monkeypatch.setattr(
+        cifti_to_h5,
+        '_get_cifti_parcel_info',
+        lambda _first: ('pscalar', {'parcel_id': np.array(['P1'])}),
+    )
+    monkeypatch.setattr(
+        cifti_to_h5,
+        'extract_cifti_scalar_data',
+        lambda source_file, reference_brain_names=None: (
+            np.array([1.0, 2.0], dtype=np.float32),
+            ['brain-a'],
+        ),
+    )
+    monkeypatch.setattr(
+        cifti_to_h5.cli_utils,
+        'write_tiledb_scalar_matrices',
+        lambda output, scalars, sources, **kwargs: write_calls.append(
+            (Path(output), scalars, sources)
+        ),
+    )
+    monkeypatch.setattr(
+        cifti_to_h5.cli_utils,
+        'write_tiledb_parcel_arrays',
+        lambda output, parcels: parcel_calls.append((Path(output), parcels)),
+    )
+
+    status = cifti_to_h5.cifti_to_h5(
+        cohort_long=pd.DataFrame(
+            {'scalar_name': ['FA', 'MD'], 'source_file': ['fa1.nii', 'md1.nii']}
+        ),
+        backend='tiledb',
+        output=tmp_path / 'store.tdb',
+        workers=2,
+        split_outputs=True,
+    )
+
+    assert status == 0
+    assert len(write_calls) == 2
+    assert sorted(path.name for path, _, _ in write_calls) == ['FA_store.tdb', 'MD_store.tdb']
+    assert len(parcel_calls) == 2
+    assert sorted(path.name for path, _ in parcel_calls) == ['FA_store.tdb', 'MD_store.tdb']
+
+
+def test_cifti_to_h5_hdf5_split_outputs_for_dscalar(monkeypatch, tmp_path: Path) -> None:
+    scalar_sources = {'FA': ['fa1.nii']}
+    scalars = {'FA': [np.array([1.0, 2.0], dtype=np.float32)]}
+
+    monkeypatch.setattr(cifti_to_h5, 'build_scalar_sources', lambda _cohort: scalar_sources)
+    monkeypatch.setattr(cifti_to_h5, '_get_cifti_parcel_info', lambda _first: ('dscalar', {}))
+    monkeypatch.setattr(
+        cifti_to_h5, 'load_cohort_cifti', lambda _cohort, _workers: (scalars, ['Left', 'Right'])
+    )
+    monkeypatch.setattr(
+        cifti_to_h5,
+        'brain_names_to_dataframe',
+        lambda _brain_names: (pd.DataFrame({'i': [0, 1]}), ['Ctx']),
+    )
+
+    status = cifti_to_h5.cifti_to_h5(
+        cohort_long=pd.DataFrame({'scalar_name': ['FA'], 'source_file': ['fa1.nii']}),
+        backend='hdf5',
+        output=tmp_path / 'grey.h5',
+        split_outputs=True,
+    )
+    assert status == 0
+    assert (tmp_path / 'FA_grey.h5').exists()
diff --git a/test/test_cli_main_and_s3_utils.py b/test/test_cli_main_and_s3_utils.py
@@ -0,0 +1,121 @@
+"""Unit tests for CLI entrypoint and S3 utilities."""
+
+from __future__ import annotations
+
+import gzip
+import sys
+import types
+
+import nibabel as nb
+import numpy as np
+import pytest
+
+from modelarrayio.cli import main as cli_main
+from modelarrayio.utils import s3_utils
+
+
+def test_main_prints_help_and_returns_1(capsys) -> None:
+    status = cli_main.main([])
+    captured = capsys.readouterr()
+    assert status == 1
+    assert 'usage:' in captured.out
+
+
+def test_main_dispatches_to_selected_subcommand(monkeypatch) -> None:
+    def _fake_run(**kwargs):
+        assert kwargs['value'] == 'ok'
+        return 7
+
+    parser = cli_main._get_parser()
+    parser.add_argument('--value', required=True)
+    parser.set_defaults(func=_fake_run)
+    monkeypatch.setattr(cli_main, '_get_parser', lambda: parser)
+
+    assert cli_main.main(['--value', 'ok']) == 7
+
+
+def test_get_version_fallbacks(monkeypatch) -> None:
+    fake_about = types.ModuleType('modelarrayio.__about__')
+    monkeypatch.setitem(sys.modules, 'modelarrayio.__about__', fake_about)
+    monkeypatch.setattr(cli_main, 'version', lambda _: '1.2.3')
+    assert cli_main._get_version() == '1.2.3'
+
+    monkeypatch.setattr(
+        cli_main,
+        'version',
+        lambda _: (_ for _ in ()).throw(cli_main.PackageNotFoundError('missing')),
+    )
+    assert cli_main._get_version() == '0+unknown'
+
+
+def test_make_s3_client_anon_and_signed(monkeypatch) -> None:
+    calls = []
+
+    class _FakeBoto3:
+        @staticmethod
+        def client(service, **kwargs):
+            calls.append((service, kwargs))
+            return ('client', kwargs)
+
+    fake_config_module = types.SimpleNamespace(Config=lambda **kwargs: ('cfg', kwargs))
+    fake_botocore = types.SimpleNamespace(UNSIGNED='unsigned')
+    monkeypatch.setitem(__import__('sys').modules, 'boto3', _FakeBoto3)
+    monkeypatch.setitem(__import__('sys').modules, 'botocore', fake_botocore)
+    monkeypatch.setitem(__import__('sys').modules, 'botocore.config', fake_config_module)
+
+    monkeypatch.setenv('MODELARRAYIO_S3_ANON', '1')
+    s3_utils._make_s3_client()
+    assert calls[0][0] == 's3'
+    assert 'config' in calls[0][1]
+
+    monkeypatch.setenv('MODELARRAYIO_S3_ANON', '0')
+    s3_utils._make_s3_client()
+    assert calls[1] == ('s3', {})
+
+
+def test_make_s3_client_requires_boto3(monkeypatch) -> None:
+    import builtins
+
+    real_import = builtins.__import__
+
+    def _fake_import(name, *args, **kwargs):
+        if name == 'boto3':
+            raise ImportError('no boto3')
+        return real_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, '__import__', _fake_import)
+    with pytest.raises(ImportError, match='boto3 is required'):
+        s3_utils._make_s3_client()
+
+
+def test_load_nibabel_local_path(monkeypatch, tmp_path) -> None:
+    nifti_path = tmp_path / 'image.nii.gz'
+    data = np.zeros((2, 2, 2), dtype=np.float32)
+    nb.Nifti1Image(data, np.eye(4)).to_filename(nifti_path)
+    loaded = s3_utils.load_nibabel(str(nifti_path))
+    np.testing.assert_array_equal(loaded.get_fdata(), data)
+
+
+def test_load_nibabel_from_s3_bytes(monkeypatch, tmp_path) -> None:
+    data = np.arange(8, dtype=np.float32).reshape(2, 2, 2)
+    image = nb.Nifti1Image(data, np.eye(4))
+    file_path = tmp_path / 'local.nii'
+    image.to_filename(file_path)
+    raw = gzip.compress(file_path.read_bytes())
+
+    class _FakeBody:
+        def read(self):
+            return raw
+
+    class _FakeClient:
+        def get_object(self, **kwargs):
+            assert kwargs['Bucket'] == 'bucket'
+            assert kwargs['Key'] == 'key.nii.gz'
+            return {'Body': _FakeBody()}
+
+    def _fake_make_s3_client():
+        return _FakeClient()
+
+    monkeypatch.setattr(s3_utils, '_make_s3_client', _fake_make_s3_client)
+    loaded = s3_utils.load_nibabel('s3://bucket/key.nii.gz')
+    np.testing.assert_array_equal(loaded.get_fdata(), data)