From aae286d3c5b5a006921007cd4c00bdc7ee7beef9 Mon Sep 17 00:00:00 2001 From: Yun Zheng Hu Date: Tue, 19 May 2026 18:44:13 +0200 Subject: [PATCH] Add autoskip option to hexdump --- dissect/cstruct/utils.py | 42 ++++++++++++++++--- tests/test_utils.py | 88 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 6 deletions(-) diff --git a/dissect/cstruct/utils.py b/dissect/cstruct/utils.py index 6c35fd0..30855ca 100644 --- a/dissect/cstruct/utils.py +++ b/dissect/cstruct/utils.py @@ -94,7 +94,13 @@ def _human_colors() -> dict[str, str]: def _hexdump( - data: bytes, palette: Palette | None = None, offset: int = 0, prefix: str = "", pretty: bool | None = False + data: bytes, + *, + palette: Palette | None = None, + offset: int = 0, + prefix: str = "", + pretty: bool | None = False, + autoskip: bool = False, ) -> Iterator[str]: """Hexdump some data. @@ -104,6 +110,7 @@ def _hexdump( offset: Byte offset of the hexdump. prefix: Optional prefix. pretty: Use pretty colors, mutual exclusive with palette. + autoskip: A single '*' replaces NUL-lines in the output. """ if palette: palette = palette[::-1] @@ -114,6 +121,9 @@ def _hexdump( remaining = 0 active = None + in_null_run = False + in_collapsed_null_run = False + last_offset = len(data) - 16 for i in range(0, len(data), 16): values = "" @@ -166,17 +176,32 @@ def _hexdump( if j == 7: values += " " + if autoskip and 0 < i < last_offset and data[i : i + 16] == b"\x00" * 16: + if in_null_run: + if not in_collapsed_null_run: + yield "*" + in_collapsed_null_run = True + continue + + # Keep the first interior NUL line visible, collapse from the second onwards. + in_null_run = True + else: + in_null_run = False + in_collapsed_null_run = False + chars = "".join(chars) yield f"{prefix}{offset + i:08x} {values:48s} {chars}" def hexdump( data: bytes, + *, palette: Palette | None = None, offset: int = 0, prefix: str = "", output: str = "print", pretty: bool | None = None, + autoskip: bool = False, ) -> Iterator[str] | str | None: """Hexdump some data. @@ -190,6 +215,7 @@ def hexdump( prefix: Optional prefix. output: Output format, can be 'print', 'generator' or 'string'. pretty: Use pretty colors for improved human readability. + autoskip: A single '*' replaces NUL-lines in the output. """ # Enable pretty colors by default if ... if ( @@ -200,7 +226,7 @@ def hexdump( ): pretty = True - generator = _hexdump(data, palette, offset, prefix, pretty) + generator = _hexdump(data, palette=palette, offset=offset, prefix=prefix, pretty=pretty, autoskip=autoskip) if output == "print": print("\n".join(generator)) return None @@ -217,6 +243,7 @@ def _dumpstruct( offset: int, color: bool, output: str, + autoskip: bool, ) -> str | None: palette = [] colors = [ @@ -258,11 +285,11 @@ def _dumpstruct( if output == "print": print() - hexdump(data, palette, offset=offset) + hexdump(data, palette=palette, offset=offset, autoskip=autoskip) print() print(out) elif output == "string": - return f"\n{hexdump(data, palette, offset=offset, output='string')}\n\n{out}" + return f"\n{hexdump(data, palette=palette, offset=offset, output='string', autoskip=autoskip)}\n\n{out}" return None @@ -271,6 +298,7 @@ def dumpstruct( data: bytes | None = None, offset: int = 0, color: bool = True, + autoskip: bool = False, output: str = "print", ) -> str | None: """Dump a structure or parsed structure instance. @@ -281,15 +309,17 @@ def dumpstruct( obj: Structure to dump. data: Bytes to parse the Structure on, if obj is not a parsed Structure already. offset: Byte offset of the hexdump. + color: Colorize the hexdump and structure output. + autoskip: A single '*' replaces NUL-lines in the output. output: Output format, can be 'print' or 'string'. """ if output not in ("print", "string"): raise ValueError(f"Invalid output argument: {output!r} (should be 'print' or 'string').") if isinstance(obj, Structure): - return _dumpstruct(obj, obj.dumps(), offset, color, output) + return _dumpstruct(obj, obj.dumps(), offset, color, output, autoskip) if issubclass(obj, Structure) and data is not None: - return _dumpstruct(obj(data), data, offset, color, output) + return _dumpstruct(obj(data), data, offset, color, output, autoskip) raise ValueError("Invalid arguments") diff --git a/tests/test_utils.py b/tests/test_utils.py index 0daa790..db10d3a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -53,6 +53,94 @@ def test_hexdump_pretty(capsys: pytest.CaptureFixture) -> None: ) +def test_hexdump_autoskip_collapses_middle_null_run() -> None: + """Keep first interior NUL line, then collapse the rest of that run to '*'.""" + # Layout: [A data line] [3 NUL lines] [B data line] + # Expected: [A line] [first NUL line] [*] [B line] = 4 lines + data = (b"A" * 16) + (b"\x00" * 48) + (b"B" * 16) + + out = utils.hexdump(data, output="string", pretty=False, autoskip=True) + assert out is not None + + lines = out.splitlines() + assert len(lines) == 4 + assert lines[0].startswith("00000000") # A line + assert lines[1].startswith("00000010") # First NUL line is kept + assert lines[2] == "*" # Remaining NUL lines collapsed + assert lines[3].startswith("00000040") # B line + + +def test_hexdump_autoskip_keeps_edge_null_lines() -> None: + """Do not collapse first/last hexdump lines even when they are all NUL bytes.""" + # Layout: [3 NUL lines] + # Expected: all lines are kept (only one interior line, so nothing is repeated there) + data = b"\x00" * 48 + + out = utils.hexdump(data, output="string", pretty=False, autoskip=True) + assert out is not None + + lines = out.splitlines() + assert len(lines) == 3 + assert lines[0].startswith("00000000") # First line kept (edge) + assert lines[1].startswith("00000010") # Single interior NUL line is kept + assert lines[2].startswith("00000020") # Last line kept (edge) + + +def test_hexdump_autoskip_separate_null_runs() -> None: + """Emit one '*' per interior NUL run, after keeping each run's first interior NUL line.""" + # Layout: [A data] [2 NUL lines] [B data] [2 NUL lines] [C data] + # Expected: [A line] [NUL line] [*] [B line] [NUL line] [*] [C line] = 7 lines + data = (b"A" * 16) + (b"\x00" * 32) + (b"B" * 16) + (b"\x00" * 32) + (b"C" * 16) + + out = utils.hexdump(data, output="string", pretty=False, autoskip=True) + assert out is not None + + lines = out.splitlines() + assert len(lines) == 7 + assert lines[0].startswith("00000000") # A line + assert lines[1].startswith("00000010") # First NUL line of first run kept + assert lines[2] == "*" # Remaining NUL lines of first run collapsed + assert lines[3].startswith("00000030") # B line + assert lines[4].startswith("00000040") # First NUL line of second run kept + assert lines[5] == "*" # Remaining NUL lines of second run collapsed + assert lines[6].startswith("00000060") # C line + + +def test_hexdump_autoskip_single_interior_null_line_is_not_collapsed() -> None: + """Keep a single interior all-NUL line visible when autoskip is enabled.""" + # Layout: [A data line] [1 NUL line] [B data line] + # Expected: [A line] [NUL line] [B line] = 3 lines (no repeated interior NUL line) + data = (b"A" * 16) + (b"\x00" * 16) + (b"B" * 16) + + out = utils.hexdump(data, output="string", pretty=False, autoskip=True) + assert out is not None + + lines = out.splitlines() + assert len(lines) == 3 + assert lines[0].startswith("00000000") # A line + assert lines[1].startswith("00000010") # Single NUL line is kept + assert lines[2].startswith("00000020") # B line + + +def test_hexdump_autoskip_false_does_not_collapse() -> None: + """Keep all lines expanded and never emit '*' when autoskip is disabled.""" + # Layout: [A data line] [3 NUL lines] [B data line] + # Expected: [A line] [NUL line] [NUL line] [NUL line] [B line] = 5 lines (no * when disabled) + data = (b"A" * 16) + (b"\x00" * 48) + (b"B" * 16) + + out = utils.hexdump(data, output="string", pretty=False, autoskip=False) + assert out is not None + + lines = out.splitlines() + assert len(lines) == 5 + assert all(line != "*" for line in lines) # No * when autoskip disabled + assert lines[0].startswith("00000000") # A line + assert lines[1].startswith("00000010") # First NUL line (expanded) + assert lines[2].startswith("00000020") # Second NUL line (expanded) + assert lines[3].startswith("00000030") # Third NUL line (expanded) + assert lines[4].startswith("00000040") # B line + + def test_hexdump_pretty_print_conditions(capsys: pytest.CaptureFixture, monkeypatch: pytest.MonkeyPatch) -> None: """Test if we respec the ``NO_COLOR`` environment variable and ``pretty=False`` argument.""" # Test regular print output behavior