diff --git a/CHANGELOG.md b/CHANGELOG.md index 684a09dbd6..ed84733985 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Added - **GFQL schema effects (#1485)**: Added an internal typed schema-effect model for graph-growing GFQL calls so bound experimental `GraphSchema` snapshots are updated after successful degree, PageRank-style node-property writes, and edge-property write calls. Later local validation can see properties added by those calls without exposing a public `SchemaEffect` API or changing remote GFQL transport. +- **GFQL schema pretty-printer (#1633)**: Added experimental `GraphSchema.pretty()`, `NodeType.pretty()`, `EdgeType.pretty()`, `EdgeTopology.pretty()`, compact `__repr__` output, and `graphistry.schema.pretty_print_schema()` for Cypher-style, YAML-style, and single-line schema rendering in prompts, logs, and notebooks. - **GFQL NetworkX CALL parity (#1058)**: Expanded the local Cypher `graphistry.nx.*` CALL surface with explicit NetworkX dispatch for `degree_centrality`, `closeness_centrality`, `eigenvector_centrality`, `katz_centrality`, `connected_components`, `strongly_connected_components`, `core_number`, and multi-output `hits`, including row and `.write()` coverage. - **NetworkX/SciPy optional dependency policy (#1618)**: Declared supported `networkx>=2.5,<4` and optional `scipy>=1.5,<2` ranges for NetworkX-backed GFQL CALL procedures, with runtime version guards and a focused lower/current-upper CI matrix. - **GFQL schema Arrow boundary APIs (#1339)**: Added experimental public schema↔Arrow import/export helpers, graph-level Arrow declaration payloads, and opt-in `schema_validate='strict'|'autofix'` enforcement for `plot()`, `upload()`, `to_arrow()`, and `validate_arrow_schema()` when a `GraphSchema` is bound. diff --git a/docs/source/gfql/schema.rst b/docs/source/gfql/schema.rst index 9d99b41f41..e4dcdeaf61 100644 --- a/docs/source/gfql/schema.rst +++ b/docs/source/gfql/schema.rst @@ -112,6 +112,54 @@ Schema Objects are useful for dataframe boundary validation; the per-type entries preserve type names and edge topology. +Pretty-Printing Schemas +----------------------- + +Use ``schema.pretty()`` or ``repr(schema)`` when a prompt, log, or notebook +cell needs the declared graph contract without the verbose dataclass wrapper. +The default ``"cypher"`` format is compact and LLM-friendly: + +.. code-block:: python + + from graphistry.schema import pretty_print_schema + + print(schema.pretty()) + print(pretty_print_schema(schema)) + +.. code-block:: text + + (:Person {id: int64!, name: string}) + (:Company {id: int64!, name: string}) + (:Person)-[:WORKS_AT {since: int64!}]->(:Company) + +The ``!`` suffix marks declared non-null Arrow fields. + +``schema.pretty("yaml")`` renders an indented debugging view: + +.. code-block:: yaml + + strict: true + node_id_column: id + edge_columns: + source: src + destination: dst + nodes: + Person: + labels: Person + properties: + id: int64! + name: string + relationships: + WORKS_AT: + from: Person + to: Company + properties: + since: int64! + +``schema.pretty("compact")`` returns a single-line summary such as +``GraphSchema(2 node types, 1 edge type, 5 properties)``. ``NodeType``, +``EdgeType``, and ``EdgeTopology`` support the same three formats. + What Preflight Checks --------------------- diff --git a/graphistry/__init__.py b/graphistry/__init__.py index 988cf0f10d..93e5f3e9f6 100644 --- a/graphistry/__init__.py +++ b/graphistry/__init__.py @@ -138,6 +138,7 @@ EdgeType, GraphSchema, NodeType, + pretty_print_schema, ) from graphistry.privacy import ( diff --git a/graphistry/schema.py b/graphistry/schema.py index 203e08c771..7cc3f6382f 100644 --- a/graphistry/schema.py +++ b/graphistry/schema.py @@ -7,7 +7,8 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Any, Dict, FrozenSet, Iterable, Mapping, Optional, Tuple, Union, cast +import re +from typing import Any, Dict, FrozenSet, Iterable, Literal, Mapping, Optional, Tuple, Union, cast from graphistry.compute.gfql.ir.compilation import GraphSchemaCatalog from graphistry.compute.gfql.ir.arrow_bridge import CoercionMode, from_arrow, to_arrow @@ -18,6 +19,113 @@ NodeRefInput = Union["NodeType", str, Iterable[str]] PropertySchemaInput = Union[Mapping[str, Any], RowSchema, Any] GraphArrowDeclaration = Mapping[str, Any] +SchemaPrettyFormat = Literal["cypher", "yaml", "compact"] + +_PRETTY_FORMATS: Tuple[SchemaPrettyFormat, ...] = ("cypher", "yaml", "compact") +_CYPHER_IDENTIFIER = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") +_YAML_PLAIN_SCALAR = re.compile(r"^[A-Za-z_][A-Za-z0-9_.-]*$") +_YAML_RESERVED = {"false", "null", "off", "on", "true", "yes", "no", "~"} + + +def _validate_pretty_format(format: str) -> SchemaPrettyFormat: + if format not in _PRETTY_FORMATS: + expected = ", ".join(repr(value) for value in _PRETTY_FORMATS) + raise ValueError(f"schema pretty format must be one of {expected}; got {format!r}") + return cast(SchemaPrettyFormat, format) + + +def _plural(count: int, noun: str) -> str: + if count == 1: + return f"{count} {noun}" + if noun.endswith("y"): + return f"{count} {noun[:-1]}ies" + return f"{count} {noun}s" + + +def _ordered_labels(primary: Optional[str], labels: Iterable[str]) -> Tuple[str, ...]: + ordered = tuple(sorted(str(label) for label in labels if str(label))) + if primary is None or primary not in ordered: + return ordered + return (primary,) + tuple(label for label in ordered if label != primary) + + +def _format_logical_type(logical_type: LogicalType) -> str: + if isinstance(logical_type, ScalarType): + suffix = "" if logical_type.nullable else "!" + return f"{logical_type.kind}{suffix}" + if isinstance(logical_type, ListType): + return f"list<{_format_logical_type(logical_type.element_type)}>" + if isinstance(logical_type, NodeRef): + labels = _ordered_labels(None, logical_type.labels) + return "node" if not labels else f"node<{ '|'.join(labels) }>" + if isinstance(logical_type, EdgeRef): + parts = [part for part in (logical_type.src_label, logical_type.type, logical_type.dst_label) if part] + return "edge" if not parts else f"edge<{ '->'.join(parts) }>" + if isinstance(logical_type, PathType): + return f"path<{logical_type.min_hops}..{logical_type.max_hops}>" + return "unknown" + + +def _cypher_identifier(name: str) -> str: + value = str(name) + if _CYPHER_IDENTIFIER.match(value): + return value + return f"`{value.replace('`', '``')}`" + + +def _cypher_label_pattern(labels: Iterable[str]) -> str: + ordered = tuple(labels) + return "".join(f":{_cypher_identifier(label)}" for label in ordered) + + +def _cypher_property_map(properties: Mapping[str, LogicalType]) -> str: + if not properties: + return "" + body = ", ".join( + f"{_cypher_identifier(str(name))}: {_format_logical_type(logical_type)}" + for name, logical_type in properties.items() + ) + return f" {{{body}}}" + + +def _inline_labels(labels: Iterable[str]) -> str: + ordered = tuple(labels) + if not ordered: + return "[]" + if len(ordered) == 1: + return ordered[0] + return "[" + ", ".join(ordered) + "]" + + +def _yaml_atom(value: str) -> str: + text = str(value) + if _YAML_PLAIN_SCALAR.match(text) and text.lower() not in _YAML_RESERVED: + return text + return '"' + text.replace("\\", "\\\\").replace('"', '\\"') + '"' + + +def _yaml_inline_labels(labels: Iterable[str]) -> str: + ordered = tuple(labels) + if not ordered: + return "[]" + if len(ordered) == 1: + return _yaml_atom(ordered[0]) + return "[" + ", ".join(_yaml_atom(label) for label in ordered) + "]" + + +def _yaml_property_lines(properties: Mapping[str, LogicalType], indent: str) -> Tuple[str, ...]: + if not properties: + return (f"{indent}properties: {{}}",) + lines = [f"{indent}properties:"] + lines.extend( + f"{indent} {_yaml_atom(str(name))}: {_format_logical_type(logical_type)}" + for name, logical_type in properties.items() + ) + return tuple(lines) + + +def _indent_lines(lines: Iterable[str], indent: str) -> Tuple[str, ...]: + return tuple(f"{indent}{line}" for line in lines) def _is_arrow_schema(value: Any) -> bool: @@ -217,6 +325,27 @@ def from_arrow( properties = _strip_label_properties(properties, normalized_labels) return cls(name, properties=properties, labels=normalized_labels) + def pretty(self, format: SchemaPrettyFormat = "cypher") -> str: + """Render this node contract as compact schema text. + + Example: + ``NodeType("Person", {"id": int}).pretty()`` returns + ``(:Person {id: int64})``. + """ + pretty_format = _validate_pretty_format(format) + labels = _ordered_labels(self.name, self.labels) or (self.name,) + if pretty_format == "cypher": + return f"({_cypher_label_pattern(labels)}{_cypher_property_map(self.properties)})" + if pretty_format == "compact": + return f"NodeType({self.name}, {_plural(len(self.properties), 'property')})" + + lines = [f"{_yaml_atom(self.name)}:", f" labels: {_yaml_inline_labels(labels)}"] + lines.extend(_yaml_property_lines(self.properties, " ")) + return "\n".join(lines) + + def __repr__(self) -> str: + return self.pretty("cypher") + @dataclass(frozen=True) class EdgeTopology: @@ -233,6 +362,34 @@ def as_metadata(self) -> Dict[str, object]: "destination_labels": tuple(sorted(self.destination_labels)), } + def pretty(self, format: SchemaPrettyFormat = "cypher") -> str: + """Render this relationship topology as compact schema text.""" + pretty_format = _validate_pretty_format(format) + source = _ordered_labels(None, self.source_labels) + destination = _ordered_labels(None, self.destination_labels) + if pretty_format == "cypher": + return ( + f"({_cypher_label_pattern(source)})" + f"-[:{_cypher_identifier(self.relationship_type)}]->" + f"({_cypher_label_pattern(destination)})" + ) + if pretty_format == "compact": + return ( + f"EdgeTopology({self.relationship_type}, " + f"{_inline_labels(source)} -> {_inline_labels(destination)})" + ) + + return "\n".join( + ( + f"{_yaml_atom(self.relationship_type)}:", + f" from: {_yaml_inline_labels(source)}", + f" to: {_yaml_inline_labels(destination)}", + ) + ) + + def __repr__(self) -> str: + return self.pretty("cypher") + @classmethod def from_metadata(cls, value: Mapping[str, object]) -> "EdgeTopology": """Import topology from the metadata shape emitted by ``as_metadata()``.""" @@ -320,6 +477,39 @@ def from_arrow( properties.pop(_label_column(name), None) return cls(name, source=source, destination=destination, properties=properties) + def pretty(self, format: SchemaPrettyFormat = "cypher") -> str: + """Render this edge contract as compact schema text. + + Example: + ``EdgeType("WORKS_AT", "Person", "Company", {"since": int}).pretty()`` + returns ``(:Person)-[:WORKS_AT {since: int64}]->(:Company)``. + """ + pretty_format = _validate_pretty_format(format) + source = _ordered_labels(None, self.source) + destination = _ordered_labels(None, self.destination) + if pretty_format == "cypher": + return ( + f"({_cypher_label_pattern(source)})" + f"-[:{_cypher_identifier(self.name)}{_cypher_property_map(self.properties)}]->" + f"({_cypher_label_pattern(destination)})" + ) + if pretty_format == "compact": + return ( + f"EdgeType({self.name}, {_inline_labels(source)} -> {_inline_labels(destination)}, " + f"{_plural(len(self.properties), 'property')})" + ) + + lines = [ + f"{_yaml_atom(self.name)}:", + f" from: {_yaml_inline_labels(source)}", + f" to: {_yaml_inline_labels(destination)}", + ] + lines.extend(_yaml_property_lines(self.properties, " ")) + return "\n".join(lines) + + def __repr__(self) -> str: + return self.pretty("cypher") + @dataclass(frozen=True) class GraphSchema: @@ -524,6 +714,51 @@ def to_catalog( metadata=metadata, ) + def pretty(self, format: SchemaPrettyFormat = "cypher") -> str: + """Render this schema for prompts, logs, or debugging. + + ``format="cypher"`` is the compact default for LLM prompts, + ``format="yaml"`` is indented for human debugging, and + ``format="compact"`` returns a one-line summary. + """ + pretty_format = _validate_pretty_format(format) + if pretty_format == "cypher": + patterns = tuple(node_type.pretty("cypher") for node_type in self.node_types) + patterns += tuple(edge_type.pretty("cypher") for edge_type in self.edge_types) + return "\n".join(patterns) if patterns else self.pretty("compact") + if pretty_format == "compact": + property_count = sum(len(node_type.properties) for node_type in self.node_types) + property_count += sum(len(edge_type.properties) for edge_type in self.edge_types) + return ( + f"GraphSchema({_plural(len(self.node_types), 'node type')}, " + f"{_plural(len(self.edge_types), 'edge type')}, " + f"{_plural(property_count, 'property')})" + ) + + lines = [ + f"strict: {str(self.strict).lower()}", + f"node_id_column: {self.node_id_column if self.node_id_column is not None else 'null'}", + "edge_columns:", + f" source: {self.edge_source_column if self.edge_source_column is not None else 'null'}", + f" destination: {self.edge_destination_column if self.edge_destination_column is not None else 'null'}", + "nodes:", + ] + if self.node_types: + for node_type in self.node_types: + lines.extend(_indent_lines(node_type.pretty("yaml").splitlines(), " ")) + else: + lines[-1] = "nodes: {}" + lines.append("relationships:") + if self.edge_types: + for edge_type in self.edge_types: + lines.extend(_indent_lines(edge_type.pretty("yaml").splitlines(), " ")) + else: + lines[-1] = "relationships: {}" + return "\n".join(lines) + + def __repr__(self) -> str: + return self.pretty("cypher") + def _merge_arrow_schemas(schemas: Iterable[Any], *, kind: str) -> Any: import pyarrow as pa @@ -561,4 +796,27 @@ def _edge_type_from_arrow_entry( return EdgeType.from_arrow(name, source, destination, schema, coercion=coercion) -__all__ = ["EdgeTopology", "EdgeType", "GraphSchema", "NodeType"] +PrettySchemaInput = Union[EdgeTopology, EdgeType, GraphSchema, NodeType] + + +def pretty_print_schema( + schema: PrettySchemaInput, + format: SchemaPrettyFormat = "cypher", +) -> str: + """Render a public graph schema declaration. + + This ergonomic wrapper mirrors each schema dataclass's ``pretty()`` method. + Example: + ``pretty_print_schema(schema, format="compact")`` returns a one-line + summary such as ``GraphSchema(2 node types, 1 edge type, 5 properties)``. + """ + return schema.pretty(format) + + +__all__ = [ + "EdgeTopology", + "EdgeType", + "GraphSchema", + "NodeType", + "pretty_print_schema", +] diff --git a/graphistry/tests/compute/gfql/test_public_schema.py b/graphistry/tests/compute/gfql/test_public_schema.py index 163ab90877..e5bb7d4960 100644 --- a/graphistry/tests/compute/gfql/test_public_schema.py +++ b/graphistry/tests/compute/gfql/test_public_schema.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import Any, cast + import pandas as pd import pytest @@ -8,7 +10,7 @@ from graphistry.compute.exceptions import ErrorCode, GFQLValidationError from graphistry.compute.gfql.ir.logical_plan import RowSchema from graphistry.compute.gfql.ir.types import ScalarType -from graphistry.schema import EdgeType, GraphSchema, NodeType +from graphistry.schema import EdgeTopology, EdgeType, GraphSchema, NodeType, pretty_print_schema def _schema(*, strict: bool = True) -> GraphSchema: @@ -26,6 +28,20 @@ def _schema(*, strict: bool = True) -> GraphSchema: ) +def _pretty_schema() -> GraphSchema: + person = NodeType("Person", {"id": int, "name": str, "age": int}) + company = NodeType("Company", {"id": int, "name": str}) + works_at = EdgeType("WORKS_AT", source=person, destination=company, properties={"since": int}) + contracts = EdgeType("CONTRACTS", source=company, destination=person, properties={"fee": float}) + return GraphSchema( + node_types=[person, company], + edge_types=[works_at, contracts], + node_id_column="id", + edge_source_column="src", + edge_destination_column="dst", + ) + + def _graph(schema: GraphSchema): nodes = pd.DataFrame( [ @@ -45,6 +61,124 @@ def test_public_schema_imports_are_stable() -> None: assert graphistry.NodeType is NodeType assert graphistry.EdgeType is EdgeType assert graphistry.GraphSchema is GraphSchema + assert graphistry.pretty_print_schema is pretty_print_schema + + +def test_graph_schema_pretty_cypher_and_repr_are_compact() -> None: + schema = _pretty_schema() + + expected = "\n".join( + ( + "(:Person {id: int64, name: string, age: int64})", + "(:Company {id: int64, name: string})", + "(:Person)-[:WORKS_AT {since: int64}]->(:Company)", + "(:Company)-[:CONTRACTS {fee: float64}]->(:Person)", + ) + ) + + assert schema.pretty() == expected + assert repr(schema) == expected + assert pretty_print_schema(schema) == expected + + +def test_graph_schema_pretty_yaml_is_stable() -> None: + schema = _pretty_schema() + + expected = "\n".join( + ( + "strict: true", + "node_id_column: id", + "edge_columns:", + " source: src", + " destination: dst", + "nodes:", + " Person:", + " labels: Person", + " properties:", + " id: int64", + " name: string", + " age: int64", + " Company:", + " labels: Company", + " properties:", + " id: int64", + " name: string", + "relationships:", + " WORKS_AT:", + " from: Person", + " to: Company", + " properties:", + " since: int64", + " CONTRACTS:", + " from: Company", + " to: Person", + " properties:", + " fee: float64", + ) + ) + + assert schema.pretty("yaml") == expected + assert pretty_print_schema(schema, format="yaml") == expected + + +def test_graph_schema_pretty_compact_counts_public_contracts() -> None: + schema = _pretty_schema() + + assert schema.pretty("compact") == "GraphSchema(2 node types, 2 edge types, 7 properties)" + assert GraphSchema().pretty() == "GraphSchema(0 node types, 0 edge types, 0 properties)" + + +def test_schema_dataclass_pretty_outputs_are_anchored() -> None: + person = NodeType("Person", {"id": int, "name": str}) + edge = EdgeType("WORKS_AT", "Person", "Company", {"since": int}) + topology = EdgeTopology("WORKS_AT", frozenset({"Person"}), frozenset({"Company"})) + + assert person.pretty() == "(:Person {id: int64, name: string})" + assert person.pretty("compact") == "NodeType(Person, 2 properties)" + assert edge.pretty() == "(:Person)-[:WORKS_AT {since: int64}]->(:Company)" + assert edge.pretty("compact") == "EdgeType(WORKS_AT, Person -> Company, 1 property)" + assert topology.pretty() == "(:Person)-[:WORKS_AT]->(:Company)" + assert topology.pretty("compact") == "EdgeTopology(WORKS_AT, Person -> Company)" + + +def test_schema_pretty_prints_arrow_dtypes_without_dataclass_noise() -> None: + pa = pytest.importorskip("pyarrow") + + person = NodeType( + "Person", + pa.schema( + [ + pa.field("id", pa.int64(), nullable=False), + pa.field("name", pa.large_string()), + ] + ), + ) + + assert person.pretty() == "(:Person {id: int64!, name: string})" + + +def test_schema_pretty_escapes_non_identifier_names() -> None: + node = NodeType("Person Type", {"display name": str}) + edge = EdgeType("WORKS-AT", "Person Type", "Company", {"start year": int}) + + assert node.pretty() == "(:`Person Type` {`display name`: string})" + assert edge.pretty() == "(:`Person Type`)-[:`WORKS-AT` {`start year`: int64}]->(:Company)" + assert node.pretty("yaml") == "\n".join( + ( + '"Person Type":', + ' labels: "Person Type"', + " properties:", + ' "display name": string', + ) + ) + + +def test_schema_pretty_rejects_unknown_format_with_anchored_message() -> None: + with pytest.raises( + ValueError, + match="schema pretty format must be one of 'cypher', 'yaml', 'compact'; got 'json'", + ): + pretty_print_schema(_pretty_schema(), format=cast(Any, "json")) def test_graph_schema_adapts_to_internal_catalog() -> None: