From 19289d82bf207a573de250c91f80e87fab55ada4 Mon Sep 17 00:00:00 2001
From: lfischerr <fi.laura2301@web.de>
Date: Wed, 25 Mar 2026 16:59:30 +0100
Subject: [PATCH 01/10] add second stage and tests to DoublyStochastic

---
 .../operators/doubly_stochastic.py            | 45 ++++++++-
 tests/test_doubly_stochastic.py               | 96 ++++++++++++++++---
 2 files changed, 124 insertions(+), 17 deletions(-)

diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py
index 0213d7a..99afa93 100644
--- a/graphconstructor/operators/doubly_stochastic.py
+++ b/graphconstructor/operators/doubly_stochastic.py
@@ -2,8 +2,11 @@
 import numpy as np
 from ..graph import Graph
 from .base import GraphOperator
+import networkx as nx
 
 
+# Z. 48-57:
+# https://gitlab.liris.cnrs.fr/coregraphie/netbone/-/blob/main/netbone/structural/doubly_stochastic.py?ref_type=heads
 @dataclass(slots=True)
 class DoublyStochastic(GraphOperator):
     """
@@ -30,6 +33,7 @@ class DoublyStochastic(GraphOperator):
         Copy metadata frame if present. Default True.
     """
 
+    backbone_method: bool = False
     tolerance: float = 1e-5
     max_iter: int = 10_000
     copy_meta: bool = True
@@ -84,14 +88,12 @@ def apply(self, G: Graph) -> Graph:
 
             # Only check rows/cols that have edges (others stay 0 and are irrelevant)
             if row_has_edges.any():
-                rows_ok = np.all((row_sums[row_has_edges] >= min_thres) &
-                                 (row_sums[row_has_edges] <= max_thres))
+                rows_ok = np.all((row_sums[row_has_edges] >= min_thres) & (row_sums[row_has_edges] <= max_thres))
             else:
                 rows_ok = True
 
             if col_has_edges.any():
-                cols_ok = np.all((col_sums[col_has_edges] >= min_thres) &
-                                 (col_sums[col_has_edges] <= max_thres))
+                cols_ok = np.all((col_sums[col_has_edges] >= min_thres) & (col_sums[col_has_edges] <= max_thres))
             else:
                 cols_ok = True
 
@@ -105,6 +107,41 @@ def apply(self, G: Graph) -> Graph:
         # col scaling
         A_scaled.data *= c[A_scaled.indices]
 
+        # step 2
+        if self.backbone_method:
+            i = 0
+
+            rows, cols = A_scaled.nonzero()
+            vals = A_scaled.data
+
+            order = np.argsort(vals)[::-1]
+            rows = rows[order]
+            cols = cols[order]
+            vals = vals[order]
+            print(rows, cols, order)
+
+            if not G.directed:
+                G_filtered = nx.Graph()
+                while (
+                    nx.number_connected_components(G_filtered) != 1
+                    or len(G_filtered) < A_scaled.shape[0]
+                    or not nx.is_connected(G_filtered)
+                ):
+                    if i == A_scaled.shape[0]:
+                        break
+                    G_filtered.add_edge(rows[i], cols[i], weight=vals[i])
+                    i += 1
+            G_csr = nx.to_scipy_sparse_array(G_filtered)
+
+            return Graph.from_csr(
+                G_csr,
+                directed=G.directed,
+                weighted=True,
+                mode=G.mode,
+                # meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta),
+                sym_op="max",
+            )
+
         return Graph.from_csr(
             A_scaled,
             directed=G.directed,
diff --git a/tests/test_doubly_stochastic.py b/tests/test_doubly_stochastic.py
index c0febd5..bfa99b2 100644
--- a/tests/test_doubly_stochastic.py
+++ b/tests/test_doubly_stochastic.py
@@ -16,12 +16,15 @@ def _csr(data, rows, cols, n):
 # ----------------- Positive dense matrix: converges to ~doubly stochastic -----------------
 def test_doubly_stochastic_converges_on_positive_dense():
     # Strictly positive, symmetric 4x4 (undirected)
-    M = np.array([
-        [0.2, 0.8, 0.5, 0.3],
-        [0.7, 0.1, 0.4, 0.6],
-        [0.3, 0.9, 0.2, 0.5],
-        [0.5, 0.2, 0.7, 0.4],
-    ], dtype=float)
+    M = np.array(
+        [
+            [0.2, 0.8, 0.5, 0.3],
+            [0.7, 0.1, 0.4, 0.6],
+            [0.3, 0.9, 0.2, 0.5],
+            [0.5, 0.2, 0.7, 0.4],
+        ],
+        dtype=float,
+    )
     # Zero the diagonal (typical adjacency semantics)
     np.fill_diagonal(M, 0.0)
 
@@ -43,6 +46,49 @@ def test_doubly_stochastic_converges_on_positive_dense():
     assert not G.directed and G.weighted
 
 
+def test_doubly_stochastic_with_backbone_method():
+    # Strictly positive, asymmetric matrix (to make backbone relevant)
+    M = np.array(
+        [
+            [0.2, 0.8, 0.5, 0.3],
+            [0.7, 0.1, 0.4, 0.6],
+            [0.3, 0.9, 0.2, 0.5],
+            [0.5, 0.2, 0.7, 0.4],
+        ],
+        dtype=float,
+    )
+
+    # Zero diagonal
+    np.fill_diagonal(M, 0.0)
+
+    G0 = Graph.from_dense(M, directed=False, weighted=True, mode="similarity", sym_op="max")
+
+    op = DoublyStochastic(tolerance=1e-6, max_iter=10_000, backbone_method=True)
+
+    G = op.apply(G0)
+    A = G.adj
+
+    # No NaNs or infs
+    assert np.isfinite(A.data).all()
+
+    # Check that only backbone edges remain (sparser than original)
+    assert A.nnz <= G0.adj.nnz
+
+    # Rows/cols should still approximately sum to 1 (on non-isolated nodes)
+    row_sums = np.asarray(A.sum(axis=1)).ravel()
+    col_sums = np.asarray(A.sum(axis=0)).ravel()
+
+    # Only check nodes that still have edges
+    nonzero_rows = row_sums > 0
+    nonzero_cols = col_sums > 0
+
+    assert np.allclose(row_sums[nonzero_rows], row_sums[nonzero_rows][0], atol=1e-6)
+    assert np.allclose(col_sums[nonzero_cols], col_sums[nonzero_cols][0], atol=1e-6)
+
+    # Graph properties preserved
+    assert not G.directed and G.weighted
+
+
 # ----------------- Sparse graph with isolates: zero rows/cols remain zero, others ~1 -----------------
 def test_doubly_stochastic_sparse_with_isolates():
     # 5 nodes, node 4 is isolated
@@ -65,7 +111,7 @@ def test_doubly_stochastic_sparse_with_isolates():
     col_sums = np.asarray(A2.sum(axis=0)).ravel()
 
     # Indices with edges
-    rows_with = (np.diff(A2.indptr) > 0)
+    rows_with = np.diff(A2.indptr) > 0
     cols_with = (sp.csc_matrix(A2).indptr[1:] - sp.csc_matrix(A2).indptr[:-1]) > 0
 
     # Non-isolated rows/cols sum ~1
@@ -81,6 +127,35 @@ def test_doubly_stochastic_sparse_with_isolates():
     assert not G.directed and G.weighted
 
 
+def test_doubly_stochastic_sparse_with_isolates_backbone():
+    A = _csr(
+        data=[0.4, 0.6, 0.3, 0.7, 0.2, 0.5],
+        rows=[0, 0, 1, 1, 2, 3],
+        cols=[1, 2, 2, 3, 3, 2],
+        n=5,
+    )
+    G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max")
+
+    op = DoublyStochastic(tolerance=1e-6, max_iter=10_000, backbone_method=True)
+    G = op.apply(G0)
+    A2 = G.adj
+
+    assert np.isfinite(A2.data).all()
+
+    row_sums = np.asarray(A2.sum(axis=1)).ravel()
+    col_sums = np.asarray(A2.sum(axis=0)).ravel()
+    rows_with = np.diff(A2.indptr) > 0
+    cols_with = (sp.csc_matrix(A2).indptr[1:] - sp.csc_matrix(A2).indptr[:-1]) > 0
+
+    if rows_with.any():
+        assert np.all(row_sums[rows_with] > 0)
+    if cols_with.any():
+        assert np.all(col_sums[cols_with] > 0)
+
+    # Isolated node (4) stays isolated (not in the graph)
+    assert len(row_sums) == 4
+
+
 # ----------------- Directed case: rows and cols ~1 for nonzero rows/cols -----------------
 def test_doubly_stochastic_directed_graph_unsolvable():
     # Directed 4x4 with zeros on diagonal, not symmetric
@@ -100,12 +175,7 @@ def test_doubly_stochastic_directed_graph_unsolvable():
     # No NaNs or infs
     assert np.isfinite(A2.data).all()
 
-    expected_result = np.array([
-        [0. , 0.5, 0.5, 0. ],
-        [0. , 0. , 1. , 0. ],
-        [0.5, 0. , 0. , 0.5],
-        [0. , 1. , 0. , 0. ]
-        ])
+    expected_result = np.array([[0.0, 0.5, 0.5, 0.0], [0.0, 0.0, 1.0, 0.0], [0.5, 0.0, 0.0, 0.5], [0.0, 1.0, 0.0, 0.0]])
     assert np.allclose(A2.toarray(), expected_result, atol=1e-4)
 
     # Directed flag preserved

From 701fa0b8eaf732504a86b29bb62a4be01f7fdaf7 Mon Sep 17 00:00:00 2001
From: lfischerr <fi.laura2301@web.de>
Date: Wed, 25 Mar 2026 17:02:29 +0100
Subject: [PATCH 02/10] fix linting

---
 graphconstructor/operators/doubly_stochastic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py
index 99afa93..a4622b0 100644
--- a/graphconstructor/operators/doubly_stochastic.py
+++ b/graphconstructor/operators/doubly_stochastic.py
@@ -1,8 +1,8 @@
 from dataclasses import dataclass
+import networkx as nx
 import numpy as np
 from ..graph import Graph
 from .base import GraphOperator
-import networkx as nx
 
 
 # Z. 48-57:

From ebbe3a6d669595ecbb8099201f66827f5f36c72b Mon Sep 17 00:00:00 2001
From: lfischerr <fi.laura2301@web.de>
Date: Sun, 12 Apr 2026 20:10:46 +0200
Subject: [PATCH 03/10] fix break statement, add metadata and adjust test

---
 graphconstructor/operators/doubly_stochastic.py | 9 +++++----
 tests/test_doubly_stochastic.py                 | 7 +++++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py
index a4622b0..89341eb 100644
--- a/graphconstructor/operators/doubly_stochastic.py
+++ b/graphconstructor/operators/doubly_stochastic.py
@@ -118,8 +118,6 @@ def apply(self, G: Graph) -> Graph:
             rows = rows[order]
             cols = cols[order]
             vals = vals[order]
-            print(rows, cols, order)
-
             if not G.directed:
                 G_filtered = nx.Graph()
                 while (
@@ -127,10 +125,13 @@ def apply(self, G: Graph) -> Graph:
                     or len(G_filtered) < A_scaled.shape[0]
                     or not nx.is_connected(G_filtered)
                 ):
-                    if i == A_scaled.shape[0]:
+                    if i == len(rows) or G_filtered.number_of_nodes() == len(set(rows)):
                         break
                     G_filtered.add_edge(rows[i], cols[i], weight=vals[i])
                     i += 1
+
+                # add isolated nodes
+                G_filtered.add_nodes_from(range(G.n_nodes))
             G_csr = nx.to_scipy_sparse_array(G_filtered)
 
             return Graph.from_csr(
@@ -138,7 +139,7 @@ def apply(self, G: Graph) -> Graph:
                 directed=G.directed,
                 weighted=True,
                 mode=G.mode,
-                # meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta),
+                meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta),
                 sym_op="max",
             )
 
diff --git a/tests/test_doubly_stochastic.py b/tests/test_doubly_stochastic.py
index bfa99b2..a3ffffd 100644
--- a/tests/test_doubly_stochastic.py
+++ b/tests/test_doubly_stochastic.py
@@ -152,8 +152,11 @@ def test_doubly_stochastic_sparse_with_isolates_backbone():
     if cols_with.any():
         assert np.all(col_sums[cols_with] > 0)
 
-    # Isolated node (4) stays isolated (not in the graph)
-    assert len(row_sums) == 4
+    # Isolated node (4) stays isolated but is still part of the graph
+    G_nx = G.to_networkx()
+
+    assert len(row_sums) == 5
+    assert len(G_nx.edges(4)) == 0
 
 
 # ----------------- Directed case: rows and cols ~1 for nonzero rows/cols -----------------

From 0f60b381f40cc8226c6a1a1f3b5e6db4610c3269 Mon Sep 17 00:00:00 2001
From: Florian Huber <florian.huber@hs-duesseldorf.de>
Date: Fri, 24 Apr 2026 16:52:57 +0200
Subject: [PATCH 04/10] break into two classes (normalize and backbond)

---
 .../operators/doubly_stochastic.py            | 145 ++++++++++++------
 1 file changed, 102 insertions(+), 43 deletions(-)

diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py
index 89341eb..3ca0505 100644
--- a/graphconstructor/operators/doubly_stochastic.py
+++ b/graphconstructor/operators/doubly_stochastic.py
@@ -5,10 +5,8 @@
 from .base import GraphOperator
 
 
-# Z. 48-57:
-# https://gitlab.liris.cnrs.fr/coregraphie/netbone/-/blob/main/netbone/structural/doubly_stochastic.py?ref_type=heads
 @dataclass(slots=True)
-class DoublyStochastic(GraphOperator):
+class DoublyStochasticNormalize(GraphOperator):
     """
     Alternating row/column normalization (Sinkhorn-Knopp) to make the adjacency
     approximately doubly stochastic. Works with CSR without densifying.
@@ -33,7 +31,6 @@ class DoublyStochastic(GraphOperator):
         Copy metadata frame if present. Default True.
     """
 
-    backbone_method: bool = False
     tolerance: float = 1e-5
     max_iter: int = 10_000
     copy_meta: bool = True
@@ -44,9 +41,9 @@ def apply(self, G: Graph) -> Graph:
         A = G.adj.tocsr(copy=False)
 
         if A.shape[0] != A.shape[1]:
-            raise TypeError("DoublyStochastic requires a square adjacency.")
+            raise TypeError("DoublyStochasticNormalize requires a square adjacency.")
         if (A.data < 0).any():
-            raise ValueError("DoublyStochastic requires nonnegative weights.")
+            raise ValueError("DoublyStochasticNormalize requires nonnegative weights.")
 
         n = A.shape[0]
         if A.nnz == 0:
@@ -67,18 +64,20 @@ def apply(self, G: Graph) -> Graph:
         # Precompute nnz masks for convergence checks under sparsity
         row_has_edges = np.array(A.sum(axis=1) > 0).T[0]
         col_has_edges = np.array(A.sum(axis=0) > 0)[0]
+
         # The csc conversions above could be heavy; build once
         A_T = A.T.tocsr(copy=False)
 
         min_thres = 1.0 - self.tolerance
         max_thres = 1.0 + self.tolerance
 
-        # Parameter for stabilitation
+        # Parameter for stabilization
         MAX_FACTOR = 1e50
 
         for _ in range(self.max_iter):
             c[col_has_edges] = 1 / A_T.dot(r)[col_has_edges]
             r[row_has_edges] = 1 / A.dot(c)[row_has_edges]
+
             if np.any(np.abs(r) > MAX_FACTOR) or np.any(np.abs(c) > MAX_FACTOR):
                 break  # avoid overflow; close enough
 
@@ -88,12 +87,18 @@ def apply(self, G: Graph) -> Graph:
 
             # Only check rows/cols that have edges (others stay 0 and are irrelevant)
             if row_has_edges.any():
-                rows_ok = np.all((row_sums[row_has_edges] >= min_thres) & (row_sums[row_has_edges] <= max_thres))
+                rows_ok = np.all(
+                    (row_sums[row_has_edges] >= min_thres)
+                    & (row_sums[row_has_edges] <= max_thres)
+                )
             else:
                 rows_ok = True
 
             if col_has_edges.any():
-                cols_ok = np.all((col_sums[col_has_edges] >= min_thres) & (col_sums[col_has_edges] <= max_thres))
+                cols_ok = np.all(
+                    (col_sums[col_has_edges] >= min_thres)
+                    & (col_sums[col_has_edges] <= max_thres)
+                )
             else:
                 cols_ok = True
 
@@ -102,49 +107,103 @@ def apply(self, G: Graph) -> Graph:
 
         # Apply scaling once: A' = diag(r) * A * diag(c)  (CSR-friendly)
         A_scaled = A.copy()
+
         # row scaling
         A_scaled.data *= np.repeat(r, np.diff(A_scaled.indptr))
+
         # col scaling
         A_scaled.data *= c[A_scaled.indices]
 
-        # step 2
-        if self.backbone_method:
-            i = 0
-
-            rows, cols = A_scaled.nonzero()
-            vals = A_scaled.data
-
-            order = np.argsort(vals)[::-1]
-            rows = rows[order]
-            cols = cols[order]
-            vals = vals[order]
-            if not G.directed:
-                G_filtered = nx.Graph()
-                while (
-                    nx.number_connected_components(G_filtered) != 1
-                    or len(G_filtered) < A_scaled.shape[0]
-                    or not nx.is_connected(G_filtered)
-                ):
-                    if i == len(rows) or G_filtered.number_of_nodes() == len(set(rows)):
-                        break
-                    G_filtered.add_edge(rows[i], cols[i], weight=vals[i])
-                    i += 1
-
-                # add isolated nodes
-                G_filtered.add_nodes_from(range(G.n_nodes))
-            G_csr = nx.to_scipy_sparse_array(G_filtered)
+        return Graph.from_csr(
+            A_scaled,
+            directed=G.directed,
+            weighted=True,
+            mode=G.mode,
+            meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta),
+            sym_op="max",
+        )
 
-            return Graph.from_csr(
-                G_csr,
-                directed=G.directed,
-                weighted=True,
-                mode=G.mode,
-                meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta),
-                sym_op="max",
+
+@dataclass(slots=True)
+class DoublyStochasticBackbone(GraphOperator):
+    """
+    Backbone extraction based on doubly-stochastic edge scores.
+
+    First applies DoublyStochasticNormalize, then sorts edges by normalized score
+    and adds strongest edges until the graph becomes connected, following the
+    behavior of the previous DoublyStochastic(backbone_method=True) branch.
+
+    References
+    ----------
+    - Coscia, M. (2025). "The Atlas for the Inspiring Network Scientist."
+    - Yassin, A. (2023). "An evaluation tool for backbone extraction techniques
+      in weighted complex networks." Scientific Reports.
+
+    Parameters
+    ----------
+    tolerance : float
+        Band tolerance passed to DoublyStochasticNormalize. Default 1e-5.
+    max_iter : int
+        Maximum Sinkhorn iterations passed to DoublyStochasticNormalize.
+        Default 10_000.
+    copy_meta : bool
+        Copy metadata frame if present. Default True.
+    """
+
+    tolerance: float = 1e-5
+    max_iter: int = 10_000
+    copy_meta: bool = True
+    supported_modes = ["similarity"]
+
+    def apply(self, G: Graph) -> Graph:
+        self._check_mode_supported(G)
+
+        normalized = DoublyStochasticNormalize(
+            tolerance=self.tolerance,
+            max_iter=self.max_iter,
+            copy_meta=self.copy_meta,
+        ).apply(G)
+
+        A_scaled = normalized.adj.tocsr(copy=False)
+
+        i = 0
+        rows, cols = A_scaled.nonzero()
+        vals = A_scaled.data
+
+        order = np.argsort(vals)[::-1]
+        rows = rows[order]
+        cols = cols[order]
+        vals = vals[order]
+
+        if not G.directed:
+            G_filtered = nx.Graph()
+
+            while (
+                nx.number_connected_components(G_filtered) != 1
+                or len(G_filtered) < A_scaled.shape[0]
+                or not nx.is_connected(G_filtered)
+            ):
+                if i == len(rows) or G_filtered.number_of_nodes() == len(set(rows)):
+                    break
+
+                G_filtered.add_edge(rows[i], cols[i], weight=vals[i])
+                i += 1
+
+            # add isolated nodes
+            G_filtered.add_nodes_from(range(G.n_nodes))
+
+        else:
+            raise NotImplementedError(
+                "DoublyStochasticBackbone currently supports only undirected graphs."
             )
 
+        G_csr = nx.to_scipy_sparse_array(
+            G_filtered,
+            nodelist=list(range(G.n_nodes)),
+        )
+
         return Graph.from_csr(
-            A_scaled,
+            G_csr,
             directed=G.directed,
             weighted=True,
             mode=G.mode,

From 6b482cf9c89e06e068ce19cfe51ae3841b01428a Mon Sep 17 00:00:00 2001
From: Florian Huber <florian.huber@hs-duesseldorf.de>
Date: Fri, 24 Apr 2026 16:56:50 +0200
Subject: [PATCH 05/10] adjust tests

---
 graphconstructor/operators/__init__.py |  5 +++--
 tests/test_doubly_stochastic.py        | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/graphconstructor/operators/__init__.py b/graphconstructor/operators/__init__.py
index 9e3c09e..7838bec 100644
--- a/graphconstructor/operators/__init__.py
+++ b/graphconstructor/operators/__init__.py
@@ -1,6 +1,6 @@
 from .base import GraphOperator
 from .disparity import DisparityFilter
-from .doubly_stochastic import DoublyStochastic
+from .doubly_stochastic import DoublyStochasticNormalize, DoublyStochasticBackbone
 from .knn_selector import KNNSelector
 from .locally_adaptive_sparsification import LocallyAdaptiveSparsification
 from .marginal_likelihood import MarginalLikelihoodFilter
@@ -11,7 +11,8 @@
 
 __all__ = [
     "DisparityFilter",
-    "DoublyStochastic",
+    "DoublyStochasticNormalize",
+    "DoublyStochasticBackbone",
     "GraphOperator",
     "KNNSelector",
     "LocallyAdaptiveSparsification",
diff --git a/tests/test_doubly_stochastic.py b/tests/test_doubly_stochastic.py
index a3ffffd..02538a0 100644
--- a/tests/test_doubly_stochastic.py
+++ b/tests/test_doubly_stochastic.py
@@ -3,7 +3,7 @@
 import pytest
 import scipy.sparse as sp
 from graphconstructor import Graph
-from graphconstructor.operators import DoublyStochastic
+from graphconstructor.operators import DoublyStochasticNormalize, DoublyStochasticBackbone
 
 
 def _csr(data, rows, cols, n):
@@ -29,7 +29,7 @@ def test_doubly_stochastic_converges_on_positive_dense():
     np.fill_diagonal(M, 0.0)
 
     G0 = Graph.from_dense(M, directed=False, weighted=True, mode="similarity", sym_op="max")
-    op = DoublyStochastic(tolerance=1e-6, max_iter=10_000)
+    op = DoublyStochasticNormalize(tolerance=1e-6, max_iter=10_000)
     G = op.apply(G0)
 
     A = G.adj
@@ -63,7 +63,7 @@ def test_doubly_stochastic_with_backbone_method():
 
     G0 = Graph.from_dense(M, directed=False, weighted=True, mode="similarity", sym_op="max")
 
-    op = DoublyStochastic(tolerance=1e-6, max_iter=10_000, backbone_method=True)
+    op = DoublyStochasticBackbone(tolerance=1e-6, max_iter=10_000)
 
     G = op.apply(G0)
     A = G.adj
@@ -100,7 +100,7 @@ def test_doubly_stochastic_sparse_with_isolates():
     )
     G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max")
 
-    op = DoublyStochastic(tolerance=1e-6, max_iter=10_000)
+    op = DoublyStochasticNormalize(tolerance=1e-6, max_iter=10_000)
     G = op.apply(G0)
     A2 = G.adj
 
@@ -136,7 +136,7 @@ def test_doubly_stochastic_sparse_with_isolates_backbone():
     )
     G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max")
 
-    op = DoublyStochastic(tolerance=1e-6, max_iter=10_000, backbone_method=True)
+    op = DoublyStochasticBackbone(tolerance=1e-6, max_iter=10_000)
     G = op.apply(G0)
     A2 = G.adj
 
@@ -171,7 +171,7 @@ def test_doubly_stochastic_directed_graph_unsolvable():
     )
     G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity")
 
-    op = DoublyStochastic(tolerance=1e-6, max_iter=10_000)
+    op = DoublyStochasticNormalize(tolerance=1e-6, max_iter=10_000)
     G = op.apply(G0)
     A2 = G.adj
 
@@ -189,7 +189,7 @@ def test_doubly_stochastic_directed_graph_unsolvable():
 def test_doubly_stochastic_rejects_negative_weights():
     A = _csr([-0.2, 0.5], [0, 1], [1, 0], 2)
     G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity", sym_op="max")
-    op = DoublyStochastic()
+    op = DoublyStochasticNormalize()
     with pytest.raises(ValueError, match="nonnegative"):
         op.apply(G0)
 
@@ -197,7 +197,7 @@ def test_doubly_stochastic_rejects_negative_weights():
 def test_doubly_stochastic_rejects_distances():
     A = _csr([0.2, 0.5], [0, 1], [1, 0], 2)
     G0 = Graph.from_csr(A, directed=True, weighted=True, mode="distance", sym_op="max")
-    op = DoublyStochastic()
+    op = DoublyStochasticNormalize()
     with pytest.raises(ValueError, match="only supports modes"):
         op.apply(G0)
 
@@ -206,7 +206,7 @@ def test_doubly_stochastic_rejects_distances():
 def test_doubly_stochastic_all_zero_matrix_noop():
     A = sp.csr_matrix((4, 4), dtype=float)
     G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max")
-    op = DoublyStochastic()
+    op = DoublyStochasticNormalize()
     G = op.apply(G0)
     assert G.adj.nnz == 0
     assert G.adj.shape == (4, 4)
@@ -219,7 +219,7 @@ def test_doubly_stochastic_preserves_flags_and_copies_metadata():
     A = _csr([0.4, 0.6, 0.3], [0, 1, 2], [1, 2, 0], 3)
     G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", meta=meta, sym_op="max")
 
-    op = DoublyStochastic(tolerance=1e-6, max_iter=10_000, copy_meta=True)
+    op = DoublyStochasticNormalize(tolerance=1e-6, max_iter=10_000, copy_meta=True)
     G = op.apply(G0)
 
     # Flags

From ddf37c1763e6f73c863b97f8a3d8ab6b98b8209b Mon Sep 17 00:00:00 2001
From: Florian Huber <florian.huber@hs-duesseldorf.de>
Date: Fri, 24 Apr 2026 16:58:42 +0200
Subject: [PATCH 06/10] linting

---
 graphconstructor/operators/__init__.py | 2 +-
 tests/test_doubly_stochastic.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/graphconstructor/operators/__init__.py b/graphconstructor/operators/__init__.py
index 7838bec..88475b6 100644
--- a/graphconstructor/operators/__init__.py
+++ b/graphconstructor/operators/__init__.py
@@ -1,6 +1,6 @@
 from .base import GraphOperator
 from .disparity import DisparityFilter
-from .doubly_stochastic import DoublyStochasticNormalize, DoublyStochasticBackbone
+from .doubly_stochastic import DoublyStochasticBackbone, DoublyStochasticNormalize
 from .knn_selector import KNNSelector
 from .locally_adaptive_sparsification import LocallyAdaptiveSparsification
 from .marginal_likelihood import MarginalLikelihoodFilter
diff --git a/tests/test_doubly_stochastic.py b/tests/test_doubly_stochastic.py
index 02538a0..6772bdf 100644
--- a/tests/test_doubly_stochastic.py
+++ b/tests/test_doubly_stochastic.py
@@ -3,7 +3,7 @@
 import pytest
 import scipy.sparse as sp
 from graphconstructor import Graph
-from graphconstructor.operators import DoublyStochasticNormalize, DoublyStochasticBackbone
+from graphconstructor.operators import DoublyStochasticBackbone, DoublyStochasticNormalize
 
 
 def _csr(data, rows, cols, n):

From 180383d7ba478640e574a80c54ee0919ccdc5a28 Mon Sep 17 00:00:00 2001
From: Florian Huber <florian.huber@hs-duesseldorf.de>
Date: Fri, 24 Apr 2026 17:24:50 +0200
Subject: [PATCH 07/10] add tests

---
 tests/test_doubly_stochastic.py | 80 ++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 11 deletions(-)

diff --git a/tests/test_doubly_stochastic.py b/tests/test_doubly_stochastic.py
index 6772bdf..2a5c792 100644
--- a/tests/test_doubly_stochastic.py
+++ b/tests/test_doubly_stochastic.py
@@ -61,7 +61,13 @@ def test_doubly_stochastic_with_backbone_method():
     # Zero diagonal
     np.fill_diagonal(M, 0.0)
 
-    G0 = Graph.from_dense(M, directed=False, weighted=True, mode="similarity", sym_op="max")
+    G0 = Graph.from_dense(
+        M,
+        directed=False,
+        weighted=True,
+        mode="similarity",
+        sym_op="max",
+    )
 
     op = DoublyStochasticBackbone(tolerance=1e-6, max_iter=10_000)
 
@@ -71,19 +77,17 @@ def test_doubly_stochastic_with_backbone_method():
     # No NaNs or infs
     assert np.isfinite(A.data).all()
 
-    # Check that only backbone edges remain (sparser than original)
+    # Backbone should be no denser than original graph
     assert A.nnz <= G0.adj.nnz
 
-    # Rows/cols should still approximately sum to 1 (on non-isolated nodes)
-    row_sums = np.asarray(A.sum(axis=1)).ravel()
-    col_sums = np.asarray(A.sum(axis=0)).ravel()
+    # For this connected 4-node graph, the backbone should connect all nodes.
+    assert G.is_connected()
 
-    # Only check nodes that still have edges
-    nonzero_rows = row_sums > 0
-    nonzero_cols = col_sums > 0
-
-    assert np.allclose(row_sums[nonzero_rows], row_sums[nonzero_rows][0], atol=1e-6)
-    assert np.allclose(col_sums[nonzero_cols], col_sums[nonzero_cols][0], atol=1e-6)
+    # The backbone implementation adds strongest edges until
+    # connected, a connected undirected graph should end up with at least n-1
+    # and at most the original number of edges.
+    assert G.n_edges >= G.n_nodes - 1
+    assert G.n_edges <= G0.n_edges
 
     # Graph properties preserved
     assert not G.directed and G.weighted
@@ -159,6 +163,60 @@ def test_doubly_stochastic_sparse_with_isolates_backbone():
     assert len(G_nx.edges(4)) == 0
 
 
+def test_doubly_stochastic_backbone_keeps_adding_edges_until_connected():
+    # This graph has two very strong edges, 0--1 and 2--3, plus one weak
+    # bridge, 1--2. 
+    A = _csr(
+        data=[100.0, 100.0, 100.0, 100.0, 1.0, 1.0],
+        rows=[0, 1, 2, 3, 1, 2],
+        cols=[1, 0, 3, 2, 2, 1],
+        n=4,
+    )
+    G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max")
+
+    op = DoublyStochasticBackbone(tolerance=1e-6, max_iter=10_000)
+    G = op.apply(G0)
+
+    assert G.is_connected()
+    assert G.n_edges == 3
+
+    # The weak bridge is required for connectivity and should not be skipped.
+    assert G.adj[1, 2] > 0
+    assert G.adj[2, 1] > 0
+
+
+def test_doubly_stochastic_backbone_preserves_node_order_after_networkx_conversion():
+    # Node 4 is part of the strongest edge. Without an explicit nodelist in
+    # nx.to_scipy_sparse_array, NetworkX insertion order can remap this edge
+    # to different matrix indices.
+    A = _csr(
+        data=[10.0, 10.0, 1.0, 1.0, 1.0, 1.0],
+        rows=[0, 4, 0, 1, 1, 4],
+        cols=[4, 0, 1, 0, 4, 1],
+        n=5,
+    )
+    G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max")
+
+    op = DoublyStochasticBackbone(tolerance=1e-6, max_iter=10_000)
+    G = op.apply(G0)
+
+    assert G.adj.shape == (5, 5)
+
+    # Node labels should still correspond to the original matrix indices.
+    # In particular, node 4 should still be connected to the component,
+    # not silently remapped to another row/column.
+    assert G.adj[0, 4] > 0 or G.adj[1, 4] > 0
+    assert np.asarray(G.adj[4].sum()).ravel()[0] > 0
+
+    # Node 2 and node 3 were isolated in the input and should remain isolated.
+    row_sums = np.asarray(G.adj.sum(axis=1)).ravel()
+    col_sums = np.asarray(G.adj.sum(axis=0)).ravel()
+
+    assert row_sums[2] == 0.0
+    assert col_sums[2] == 0.0
+    assert row_sums[3] == 0.0
+    assert col_sums[3] == 0.0
+
 # ----------------- Directed case: rows and cols ~1 for nonzero rows/cols -----------------
 def test_doubly_stochastic_directed_graph_unsolvable():
     # Directed 4x4 with zeros on diagonal, not symmetric

From 7776bba19a56a6d125f25347bde116a9208fc3ae Mon Sep 17 00:00:00 2001
From: Florian Huber <florian.huber@hs-duesseldorf.de>
Date: Fri, 24 Apr 2026 17:27:26 +0200
Subject: [PATCH 08/10] refactor backbone part

---
 .../operators/doubly_stochastic.py            | 76 +++++++++++--------
 1 file changed, 45 insertions(+), 31 deletions(-)

diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py
index 3ca0505..997fbeb 100644
--- a/graphconstructor/operators/doubly_stochastic.py
+++ b/graphconstructor/operators/doubly_stochastic.py
@@ -1,6 +1,7 @@
 from dataclasses import dataclass
 import networkx as nx
 import numpy as np
+import scipy.sparse as sp
 from ..graph import Graph
 from .base import GraphOperator
 
@@ -158,6 +159,11 @@ class DoublyStochasticBackbone(GraphOperator):
     def apply(self, G: Graph) -> Graph:
         self._check_mode_supported(G)
 
+        if G.directed:
+            raise NotImplementedError(
+                "DoublyStochasticBackbone currently supports only undirected graphs."
+            )
+
         normalized = DoublyStochasticNormalize(
             tolerance=self.tolerance,
             max_iter=self.max_iter,
@@ -165,37 +171,7 @@ def apply(self, G: Graph) -> Graph:
         ).apply(G)
 
         A_scaled = normalized.adj.tocsr(copy=False)
-
-        i = 0
-        rows, cols = A_scaled.nonzero()
-        vals = A_scaled.data
-
-        order = np.argsort(vals)[::-1]
-        rows = rows[order]
-        cols = cols[order]
-        vals = vals[order]
-
-        if not G.directed:
-            G_filtered = nx.Graph()
-
-            while (
-                nx.number_connected_components(G_filtered) != 1
-                or len(G_filtered) < A_scaled.shape[0]
-                or not nx.is_connected(G_filtered)
-            ):
-                if i == len(rows) or G_filtered.number_of_nodes() == len(set(rows)):
-                    break
-
-                G_filtered.add_edge(rows[i], cols[i], weight=vals[i])
-                i += 1
-
-            # add isolated nodes
-            G_filtered.add_nodes_from(range(G.n_nodes))
-
-        else:
-            raise NotImplementedError(
-                "DoublyStochasticBackbone currently supports only undirected graphs."
-            )
+        G_filtered = self._extract_undirected_backbone(A_scaled, G.n_nodes)
 
         G_csr = nx.to_scipy_sparse_array(
             G_filtered,
@@ -210,3 +186,41 @@ def apply(self, G: Graph) -> Graph:
             meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta),
             sym_op="max",
         )
+
+    @staticmethod
+    def _extract_undirected_backbone(A_scaled, n_nodes: int) -> nx.Graph:
+        """
+        Extract an undirected backbone from normalized edge scores.
+
+        Uses only the upper triangle of the adjacency matrix so that each
+        undirected edge is considered once.
+        """
+        G_filtered = nx.Graph()
+        G_filtered.add_nodes_from(range(n_nodes))
+
+        if n_nodes <= 1 or A_scaled.nnz == 0:
+            return G_filtered
+
+        # Use only one orientation per undirected edge and ignore self-loops.
+        A_upper = sp.triu(A_scaled, k=1).tocoo()
+
+        if A_upper.nnz == 0:
+            return G_filtered
+
+        rows = A_upper.row
+        cols = A_upper.col
+        vals = A_upper.data
+
+        order = np.argsort(vals)[::-1]
+
+        for idx in order:
+            if nx.is_connected(G_filtered):
+                break
+
+            G_filtered.add_edge(
+                int(rows[idx]),
+                int(cols[idx]),
+                weight=float(vals[idx]),
+            )
+
+        return G_filtered

From b373fb6730c32dc837e74a415c66676a3e5a7853 Mon Sep 17 00:00:00 2001
From: Florian Huber <florian.huber@hs-duesseldorf.de>
Date: Fri, 24 Apr 2026 17:38:32 +0200
Subject: [PATCH 09/10] add warnings and some linting

---
 .../operators/doubly_stochastic.py            | 30 +++++++++++++++----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py
index 997fbeb..d80c527 100644
--- a/graphconstructor/operators/doubly_stochastic.py
+++ b/graphconstructor/operators/doubly_stochastic.py
@@ -2,6 +2,7 @@
 import networkx as nx
 import numpy as np
 import scipy.sparse as sp
+import warnings
 from ..graph import Graph
 from .base import GraphOperator
 
@@ -66,7 +67,7 @@ def apply(self, G: Graph) -> Graph:
         row_has_edges = np.array(A.sum(axis=1) > 0).T[0]
         col_has_edges = np.array(A.sum(axis=0) > 0)[0]
 
-        # The csc conversions above could be heavy; build once
+        # Build transposed CSR once for repeated column-sum products
         A_T = A.T.tocsr(copy=False)
 
         min_thres = 1.0 - self.tolerance
@@ -75,12 +76,18 @@ def apply(self, G: Graph) -> Graph:
         # Parameter for stabilization
         MAX_FACTOR = 1e50
 
+        converged = False
         for _ in range(self.max_iter):
             c[col_has_edges] = 1 / A_T.dot(r)[col_has_edges]
             r[row_has_edges] = 1 / A.dot(c)[row_has_edges]
 
             if np.any(np.abs(r) > MAX_FACTOR) or np.any(np.abs(c) > MAX_FACTOR):
-                break  # avoid overflow; close enough
+                warnings.warn(
+                    "DoublyStochasticNormalize stopped early because scaling factors "
+                    "became very large. Result may not be doubly stochastic.",
+                    RuntimeWarning,
+                )
+                break
 
             # Convergence check (band and tol)
             row_sums = r * (A.dot(c))
@@ -104,8 +111,15 @@ def apply(self, G: Graph) -> Graph:
                 cols_ok = True
 
             if rows_ok and cols_ok:
+                converged = True
                 break
 
+        if not converged:
+            warnings.warn(
+                "DoublyStochasticNormalize did not converge within max_iter.",
+                RuntimeWarning,
+            )
+
         # Apply scaling once: A' = diag(r) * A * diag(c)  (CSR-friendly)
         A_scaled = A.copy()
 
@@ -115,6 +129,9 @@ def apply(self, G: Graph) -> Graph:
         # col scaling
         A_scaled.data *= c[A_scaled.indices]
 
+        # TODO: For undirected graphs, Graph.from_csr symmetrizes A_scaled again.
+        # This may change row/column sums after normalization. A future revision should
+        # either use symmetric scaling or allow Graph construction without re-symmetrizing.
         return Graph.from_csr(
             A_scaled,
             directed=G.directed,
@@ -131,8 +148,9 @@ class DoublyStochasticBackbone(GraphOperator):
     Backbone extraction based on doubly-stochastic edge scores.
 
     First applies DoublyStochasticNormalize, then sorts edges by normalized score
-    and adds strongest edges until the graph becomes connected, following the
-    behavior of the previous DoublyStochastic(backbone_method=True) branch.
+    and adds strongest edges until the graph becomes connected, or until no candidate
+    edges remain. For disconnected inputs, the result is a strongest-edge forest
+    over the available components.
 
     References
     ----------
@@ -176,6 +194,8 @@ def apply(self, G: Graph) -> Graph:
         G_csr = nx.to_scipy_sparse_array(
             G_filtered,
             nodelist=list(range(G.n_nodes)),
+            weight="weight",
+            format="csr",
         )
 
         return Graph.from_csr(
@@ -214,7 +234,7 @@ def _extract_undirected_backbone(A_scaled, n_nodes: int) -> nx.Graph:
         order = np.argsort(vals)[::-1]
 
         for idx in order:
-            if nx.is_connected(G_filtered):
+            if nx.is_connected(G_filtered):  # TODO: For large graphs, this check could be expensive
                 break
 
             G_filtered.add_edge(

From 27aef7b7fc9fcfbdcd53343457163d0e023e5733 Mon Sep 17 00:00:00 2001
From: Florian Huber <florian.huber@hs-duesseldorf.de>
Date: Fri, 24 Apr 2026 17:42:08 +0200
Subject: [PATCH 10/10] linting

---
 graphconstructor/operators/doubly_stochastic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py
index d80c527..406525c 100644
--- a/graphconstructor/operators/doubly_stochastic.py
+++ b/graphconstructor/operators/doubly_stochastic.py
@@ -1,8 +1,8 @@
+import warnings
 from dataclasses import dataclass
 import networkx as nx
 import numpy as np
 import scipy.sparse as sp
-import warnings
 from ..graph import Graph
 from .base import GraphOperator