From 2ca3384342a3df21fdbc439fb95aeff01955eaa9 Mon Sep 17 00:00:00 2001 From: Michael Catanzaro Date: Sun, 25 Feb 2024 13:09:47 -0500 Subject: [PATCH 01/10] Add issue test --- test/test_distances.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/test_distances.py b/test/test_distances.py index 48dbbd3..df93547 100644 --- a/test/test_distances.py +++ b/test/test_distances.py @@ -2,8 +2,7 @@ import pytest import scipy.sparse as sps -from persim import (bottleneck, gromov_hausdorff, heat, sliced_wasserstein, - wasserstein) +from persim import bottleneck, gromov_hausdorff, heat, sliced_wasserstein, wasserstein class TestBottleneck: @@ -103,6 +102,13 @@ def test_repeated(self): dist = bottleneck(G, H) assert dist == 0.5 + def test_one_diagonal(self): + # Issue #70: https://github.com/scikit-tda/persim/issues/70 + dgm1 = np.array([[0, 10]]) + dgm2 = np.array([[5, 5]]) + dist = bottleneck(dgm1, dgm2) + assert dist == 5.0 + class TestWasserstein: def test_single(self): From 9b00b19e0aac1f88be05cf66785552ec1783b830 Mon Sep 17 00:00:00 2001 From: Michael Catanzaro Date: Sun, 25 Feb 2024 13:10:05 -0500 Subject: [PATCH 02/10] Remove modificatin of ds --- persim/bottleneck.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/persim/bottleneck.py b/persim/bottleneck.py index 16add2b..edb1f32 100644 --- a/persim/bottleneck.py +++ b/persim/bottleneck.py @@ -27,9 +27,9 @@ def bottleneck(dgm1, dgm2, matching=False): Parameters ----------- - dgm1: Mx(>=2) + dgm1: Mx(>=2) array of birth/death pairs for PD 1 - dgm2: Nx(>=2) + dgm2: Nx(>=2) array of birth/death paris for PD 2 matching: bool, default False if True, return matching infromation and cross-similarity matrix @@ -47,15 +47,13 @@ def bottleneck(dgm1, dgm2, matching=False): """ return_matching = matching - S = np.array(dgm1) M = min(S.shape[0], S.size) if S.size > 0: S = S[np.isfinite(S[:, 1]), :] if S.shape[0] < M: warnings.warn( - "dgm1 has points with non-finite death times;"+ - "ignoring those points" + "dgm1 has points with non-finite death times;" + "ignoring those points" ) M = S.shape[0] T = np.array(dgm2) @@ -64,8 +62,7 @@ def bottleneck(dgm1, dgm2, matching=False): T = T[np.isfinite(T[:, 1]), :] if T.shape[0] < N: warnings.warn( - "dgm2 has points with non-finite death times;"+ - "ignoring those points" + "dgm2 has points with non-finite death times;" + "ignoring those points" ) N = T.shape[0] @@ -101,7 +98,7 @@ def bottleneck(dgm1, dgm2, matching=False): # Step 2: Perform a binary search + Hopcroft Karp to find the # bottleneck distance - ds = np.sort(np.unique(D.flatten()))[0:-1] # Everything but np.inf + ds = np.sort(np.unique(D.flatten())) # [0:-1] # Everything but np.inf bdist = ds[-1] matching = {} while len(ds) >= 1: @@ -118,18 +115,18 @@ def bottleneck(dgm1, dgm2, matching=False): matching = res ds = ds[0:idx] else: - ds = ds[idx + 1::] + ds = ds[idx + 1 : :] if return_matching: matchidx = [] - for i in range(M+N): + for i in range(M + N): j = matching["{}".format(i)] d = D[i, j] if i < M: if j >= N: - j = -1 # Diagonal match from first persistence diagram + j = -1 # Diagonal match from first persistence diagram else: - if j >= N: # Diagonal to diagonal, so don't include this + if j >= N: # Diagonal to diagonal, so don't include this continue i = -1 matchidx.append([i, j, d]) From 6ba80431ab4d9cd41b3bb4b0ab7ee83def760454 Mon Sep 17 00:00:00 2001 From: Michael Catanzaro Date: Sun, 25 Feb 2024 14:17:21 -0500 Subject: [PATCH 03/10] Bump version, fix broken badges in docs --- RELEASE.txt | 3 +++ docs/index.rst | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/RELEASE.txt b/RELEASE.txt index 876b141..8b1253f 100644 --- a/RELEASE.txt +++ b/RELEASE.txt @@ -1,3 +1,6 @@ +0.3.4 + - Fix bug of Issue #70 (https://github.com/scikit-tda/persim/issues/70). + 0.3.3 - Fix plotting methods of Persistence Landscapes, add doc strings. - Update to notebooks. diff --git a/docs/index.rst b/docs/index.rst index a8c12e3..bdb277e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -50,12 +50,10 @@ Documentation notebooks/Persistence Landscapes and Machine Learning -.. |Downloads| image:: https://pypip.in/download/persim/badge.svg +.. |Downloads| image:: https://img.shields.io/pypi/dm/persim :target: https://pypi.python.org/pypi/persim/ .. |PyPI version| image:: https://badge.fury.io/py/persim.svg :target: https://badge.fury.io/py/persim -.. |Build Status| image:: https://travis-ci.org/scikit-tda/persim.svg?branch=master - :target: https://travis-ci.org/scikit-tda/persim .. |Codecov| image:: https://codecov.io/gh/scikit-tda/persim/branch/master/graph/badge.svg :target: https://codecov.io/gh/scikit-tda/persim .. |License: MIT| image:: https://img.shields.io/badge/License-MIT-yellow.svg From 04165d1bd35a97f68ae87e8db5b7b916dfb1eb9d Mon Sep 17 00:00:00 2001 From: Michael Catanzaro Date: Sun, 25 Feb 2024 14:27:58 -0500 Subject: [PATCH 04/10] Bump version, add tests --- persim/_version.py | 2 +- test/test_distances.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/persim/_version.py b/persim/_version.py index e19434e..334b899 100644 --- a/persim/_version.py +++ b/persim/_version.py @@ -1 +1 @@ -__version__ = "0.3.3" +__version__ = "0.3.4" diff --git a/test/test_distances.py b/test/test_distances.py index df93547..370102c 100644 --- a/test/test_distances.py +++ b/test/test_distances.py @@ -106,8 +106,11 @@ def test_one_diagonal(self): # Issue #70: https://github.com/scikit-tda/persim/issues/70 dgm1 = np.array([[0, 10]]) dgm2 = np.array([[5, 5]]) - dist = bottleneck(dgm1, dgm2) + dist, returned_matching = bottleneck(dgm1, dgm2, matching=True) assert dist == 5.0 + np.testing.assert_array_equal( + returned_matching, np.array([[0.0, -1.0, 5.0], [-1.0, 0.0, 0.0]]) + ) class TestWasserstein: From cac22e02e4965096d81b721359b03bb3943db6a2 Mon Sep 17 00:00:00 2001 From: Michael Catanzaro Date: Sun, 25 Feb 2024 16:15:22 -0500 Subject: [PATCH 05/10] Modify test for matching shape --- test/test_distances.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/test_distances.py b/test/test_distances.py index 370102c..9434691 100644 --- a/test/test_distances.py +++ b/test/test_distances.py @@ -108,9 +108,7 @@ def test_one_diagonal(self): dgm2 = np.array([[5, 5]]) dist, returned_matching = bottleneck(dgm1, dgm2, matching=True) assert dist == 5.0 - np.testing.assert_array_equal( - returned_matching, np.array([[0.0, -1.0, 5.0], [-1.0, 0.0, 0.0]]) - ) + assert returned_matching.shape[1] == 3 class TestWasserstein: From 19c28d566234c7238f086f6e737b344ec4d49517 Mon Sep 17 00:00:00 2001 From: Michael Catanzaro Date: Sun, 25 Feb 2024 16:16:19 -0500 Subject: [PATCH 06/10] Remove old travis badge --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index bdb277e..ee96155 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,4 +1,4 @@ -|PyPI version| |Downloads| |Build Status| |Codecov| |License: MIT| +|PyPI version| |Downloads| |Codecov| |License: MIT| Persim is a Python package for many tools used in analyzing Persistence Diagrams. It currently includes implementations of most of the popular methods of working with persistence diagrams, including From 68d1c89e930055bb46234b2467368cda705f1e40 Mon Sep 17 00:00:00 2001 From: Michael Catanzaro Date: Wed, 1 Oct 2025 16:50:00 -0400 Subject: [PATCH 07/10] Update github action to use ruff, codecov token Add ruff dep Run ruff format, lint Run ruff More linting, update pyproject.toml --- .github/workflows/python-test.yml | 50 ++- ...assification with persistence images.ipynb | 39 ++- ...ntiation with Persistence Landscapes.ipynb | 76 +++-- ...ence Landscapes and Machine Learning.ipynb | 142 ++++---- .../Persistence barcode measure.ipynb | 21 +- docs/notebooks/Persistence images.ipynb | 113 ++++--- docs/notebooks/Persistence landscapes.ipynb | 37 +-- docs/notebooks/distances.ipynb | 79 +++-- persim/__init__.py | 31 +- persim/bottleneck.py | 6 +- persim/gromov_hausdorff.py | 302 +++++++++++------- persim/heat.py | 5 +- persim/images.py | 2 - persim/images_kernels.py | 216 +++++++++---- persim/images_weights.py | 25 +- persim/landscapes/__init__.py | 15 +- persim/landscapes/approximate.py | 2 +- persim/landscapes/auxiliary.py | 18 +- persim/landscapes/base.py | 9 +- persim/landscapes/exact.py | 5 +- persim/landscapes/tools.py | 20 +- persim/landscapes/transformer.py | 5 +- persim/landscapes/visuals.py | 20 +- persim/persistent_entropy.py | 61 ++-- persim/sliced_wasserstein.py | 41 +-- persim/visuals.py | 78 +++-- persim/wasserstein.py | 35 +- pyproject.toml | 9 + test/__init__.py | 3 +- test/test_landscapes.py | 7 +- test/test_persim.py | 13 +- test/test_persistence_imager.py | 1 - test/test_persistent_entropy.py | 1 - test/test_visuals.py | 1 - 34 files changed, 885 insertions(+), 603 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index ccf085f..9d20980 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -1,41 +1,63 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Python application +name: Tests and coverage on: push: branches: [master] pull_request: branches: [master] + workflow_dispatch: jobs: + lint_and_format_check_with_ruff: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v5 + + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - uses: astral-sh/ruff-action@v3 + with: + args: "format --check" + + - uses: astral-sh/ruff-action@v3 + with: + args: "check" + test: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 + - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} + - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 pip install -e ".[testing]" - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest run: | - pytest --cov persim + pytest --cov=persim --cov-report=xml + - name: Upload coverage results - run: | - bash <(curl -s https://codecov.io/bash) + if: matrix.python-version == '3.12' + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + name: persim + verbose: true + fail_ci_if_error: true diff --git a/docs/notebooks/Classification with persistence images.ipynb b/docs/notebooks/Classification with persistence images.ipynb index d301140..4e51278 100644 --- a/docs/notebooks/Classification with persistence images.ipynb +++ b/docs/notebooks/Classification with persistence images.ipynb @@ -28,7 +28,6 @@ "import matplotlib.pyplot as plt\n", "\n", "from ripser import Rips\n", - "from persim import PersImage\n", "from persim import PersistenceImager" ] }, @@ -51,17 +50,23 @@ "N_per_class = int(N / 2)\n", "N_in_class = 400\n", "\n", + "\n", "def noise(N, scale):\n", " return scale * np.random.random((N, 2))\n", "\n", + "\n", "def circle(N, scale, offset):\n", - " return offset + scale * datasets.make_circles(n_samples=N, factor=0.4, noise=0.05)[0]\n", - " \n", + " return (\n", + " offset + scale * datasets.make_circles(n_samples=N, factor=0.4, noise=0.05)[0]\n", + " )\n", + "\n", + "\n", "just_noise = [noise(N_in_class, 150) for _ in range(N_per_class)]\n", "\n", "half = int(N_in_class / 2)\n", - "with_circle = [np.concatenate((circle(half, 50, 70), noise(half, 150)))\n", - " for _ in range(N_per_class)]\n", + "with_circle = [\n", + " np.concatenate((circle(half, 50, 70), noise(half, 150))) for _ in range(N_per_class)\n", + "]\n", "\n", "datas = []\n", "datas.extend(just_noise)\n", @@ -93,17 +98,17 @@ "source": [ "# Visualize the data\n", "fig, axs = plt.subplots(1, 2)\n", - "fig.set_size_inches(10,5)\n", + "fig.set_size_inches(10, 5)\n", "\n", - "xs, ys = just_noise[0][:,0], just_noise[0][:,1]\n", + "xs, ys = just_noise[0][:, 0], just_noise[0][:, 1]\n", "axs[0].scatter(xs, ys)\n", "axs[0].set_title(\"Example noise dataset\")\n", - "axs[0].set_aspect('equal', 'box')\n", + "axs[0].set_aspect(\"equal\", \"box\")\n", "\n", - "xs_, ys_ = with_circle[0][:,0], with_circle[0][:,1]\n", + "xs_, ys_ = with_circle[0][:, 0], with_circle[0][:, 1]\n", "axs[1].scatter(xs_, ys_)\n", "axs[1].set_title(\"Example noise with circle dataset\")\n", - "axs[1].set_aspect('equal', 'box')\n", + "axs[1].set_aspect(\"equal\", \"box\")\n", "\n", "fig.tight_layout()" ] @@ -155,7 +160,7 @@ } ], "source": [ - "plt.figure(figsize=(12,6))\n", + "plt.figure(figsize=(12, 6))\n", "plt.subplot(121)\n", "\n", "rips.plot(diagrams_h1[0], show=False)\n", @@ -234,16 +239,16 @@ } ], "source": [ - "plt.figure(figsize=(15,7.5))\n", + "plt.figure(figsize=(15, 7.5))\n", "\n", "for i in range(4):\n", - " ax = plt.subplot(240+i+1)\n", + " ax = plt.subplot(240 + i + 1)\n", " pimgr.plot_image(imgs[i], ax)\n", " plt.title(\"PI of $H_1$ for noise\")\n", "\n", "for i in range(4):\n", - " ax = plt.subplot(240+i+5)\n", - " pimgr.plot_image(imgs[-(i+1)], ax)\n", + " ax = plt.subplot(240 + i + 5)\n", + " pimgr.plot_image(imgs[-(i + 1)], ax)\n", " plt.title(\"PI of $H_1$ for circle w/ noise\")" ] }, @@ -260,7 +265,9 @@ "metadata": {}, "outputs": [], "source": [ - "X_train, X_test, y_train, y_test = train_test_split(imgs_array, labels, test_size=0.40, random_state=42)" + "X_train, X_test, y_train, y_test = train_test_split(\n", + " imgs_array, labels, test_size=0.40, random_state=42\n", + ")" ] }, { diff --git a/docs/notebooks/Differentiation with Persistence Landscapes.ipynb b/docs/notebooks/Differentiation with Persistence Landscapes.ipynb index 037c767..b9146d8 100644 --- a/docs/notebooks/Differentiation with Persistence Landscapes.ipynb +++ b/docs/notebooks/Differentiation with Persistence Landscapes.ipynb @@ -37,16 +37,15 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", "import random\n", "\n", "from ripser import ripser\n", "from persim.landscapes import (\n", - " PersLandscapeApprox, \n", - " average_approx, \n", - " snap_pl, \n", - " plot_landscape, \n", - " plot_landscape_simple\n", + " PersLandscapeApprox,\n", + " average_approx,\n", + " snap_pl,\n", + " plot_landscape,\n", + " plot_landscape_simple,\n", ")\n", "from tadasets import dsphere" ] @@ -100,15 +99,19 @@ "sph3_pl1 = []\n", "\n", "for _ in range(num_runs):\n", - " sph2 = dsphere(n=num_pts, d=2)/1.3333 # sample points, scaling appropriately\n", - " sph2_dgm = ripser(sph2, maxdim=2)['dgms'] # compute PH0, PH1, PH2\n", - " sph2_pl = PersLandscapeApprox(dgms=sph2_dgm, hom_deg=1) # compute persistence landscape\n", + " sph2 = dsphere(n=num_pts, d=2) / 1.3333 # sample points, scaling appropriately\n", + " sph2_dgm = ripser(sph2, maxdim=2)[\"dgms\"] # compute PH0, PH1, PH2\n", + " sph2_pl = PersLandscapeApprox(\n", + " dgms=sph2_dgm, hom_deg=1\n", + " ) # compute persistence landscape\n", " sph2_pl1.append(sph2_pl)\n", - " \n", - " sph3 = dsphere(n=num_pts, d=3)/1.3581 # sample points, scaling appropriately\n", - " sph3_dgm = ripser(sph3, maxdim=2)['dgms'] # compute PH0, PH1, PH2\n", - " sph3_pl = PersLandscapeApprox(dgms=sph3_dgm, hom_deg=1) # compute persistence landscape\n", - " sph3_pl1.append(sph3_pl)\n" + "\n", + " sph3 = dsphere(n=num_pts, d=3) / 1.3581 # sample points, scaling appropriately\n", + " sph3_dgm = ripser(sph3, maxdim=2)[\"dgms\"] # compute PH0, PH1, PH2\n", + " sph3_pl = PersLandscapeApprox(\n", + " dgms=sph3_dgm, hom_deg=1\n", + " ) # compute persistence landscape\n", + " sph3_pl1.append(sph3_pl)" ] }, { @@ -138,7 +141,7 @@ "source": [ "avg_sph2 = average_approx(sph2_pl1)\n", "avg_sph3 = average_approx(sph3_pl1)\n", - "print(avg_sph2, '\\n') \n", + "print(avg_sph2, \"\\n\")\n", "print(avg_sph3)" ] }, @@ -169,7 +172,7 @@ "true_diff_pl = avg_sph2_snapped - avg_sph3_snapped\n", "significance = true_diff_pl.sup_norm()\n", "\n", - "print(f'The threshold for significance is {significance}.')" + "print(f\"The threshold for significance is {significance}.\")" ] }, { @@ -201,9 +204,11 @@ ], "source": [ "# view figures in notebook\n", - "# %matplotlib inline \n", + "# %matplotlib inline\n", "\n", - "plot_landscape_simple(avg_sph2_snapped,title='Average $PL_1$ for $S^2$.',depth_range=range(13))" + "plot_landscape_simple(\n", + " avg_sph2_snapped, title=\"Average $PL_1$ for $S^2$.\", depth_range=range(13)\n", + ")" ] }, { @@ -226,7 +231,9 @@ } ], "source": [ - "plot_landscape_simple(avg_sph3_snapped,title='Average $PL_1$ for $S^2$.',depth_range=range(13))" + "plot_landscape_simple(\n", + " avg_sph3_snapped, title=\"Average $PL_1$ for $S^2$.\", depth_range=range(13)\n", + ")" ] }, { @@ -249,7 +256,9 @@ } ], "source": [ - "plot_landscape_simple(true_diff_pl,title='Difference of average PLs',depth_range=range(13))" + "plot_landscape_simple(\n", + " true_diff_pl, title=\"Difference of average PLs\", depth_range=range(13)\n", + ")" ] }, { @@ -280,7 +289,7 @@ } ], "source": [ - "plot_landscape(true_diff_pl,title='Difference of average PLs')" + "plot_landscape(true_diff_pl, title=\"Difference of average PLs\")" ] }, { @@ -310,24 +319,27 @@ "sig_count = 0\n", "\n", "for shuffle in range(num_perms):\n", - " A_indices = random.sample(range(2*num_runs),num_runs)\n", - " B_indices = [_ for _ in range(2*num_runs) if _ not in A_indices]\n", - " \n", + " A_indices = random.sample(range(2 * num_runs), num_runs)\n", + " B_indices = [_ for _ in range(2 * num_runs) if _ not in A_indices]\n", + "\n", " A_pl = [comb_pl[i] for i in A_indices]\n", " B_pl = [comb_pl[j] for j in B_indices]\n", - " \n", + "\n", " A_avg = average_approx(A_pl)\n", " B_avg = average_approx(B_pl)\n", - " [A_avg_sn, B_avg_sn] = snap_pl([A_avg,B_avg])\n", - " \n", + " [A_avg_sn, B_avg_sn] = snap_pl([A_avg, B_avg])\n", + "\n", " shuff_diff = A_avg_sn - B_avg_sn\n", - " if (shuff_diff.sup_norm() >= significance): sig_count += 1\n", + " if shuff_diff.sup_norm() >= significance:\n", + " sig_count += 1\n", "\n", - "pval = sig_count/num_perms\n", + "pval = sig_count / num_perms\n", "\n", - "print(f'There were {sig_count} shuffles out of {num_perms} that',\n", - " 'were more significant than the true labelling. Thus, the',\n", - " f'p-value is {pval}.')" + "print(\n", + " f\"There were {sig_count} shuffles out of {num_perms} that\",\n", + " \"were more significant than the true labelling. Thus, the\",\n", + " f\"p-value is {pval}.\",\n", + ")" ] }, { diff --git a/docs/notebooks/Persistence Landscapes and Machine Learning.ipynb b/docs/notebooks/Persistence Landscapes and Machine Learning.ipynb index d2aa00d..df93f6b 100644 --- a/docs/notebooks/Persistence Landscapes and Machine Learning.ipynb +++ b/docs/notebooks/Persistence Landscapes and Machine Learning.ipynb @@ -27,7 +27,6 @@ "source": [ "# Import general utilities\n", "import numpy as np\n", - "import matplotlib\n", "import matplotlib.pyplot as plt\n", "\n", "# Import TDA utilities\n", @@ -71,7 +70,7 @@ ], "source": [ "# Instantiate Vietoris-Rips solver\n", - "rips = Rips(maxdim = 2)" + "rips = Rips(maxdim=2)" ] }, { @@ -136,11 +135,17 @@ "fig, axs = plt.subplots(1, 2, dpi=300)\n", "fig.set_size_inches(10, 5)\n", "\n", - "plot_landscape_simple(PersLandscapeExact(dgms_torus, hom_deg=1),\n", - " title=\"Degree 1 Persistence Landscape of Torus\", ax=axs[0])\n", + "plot_landscape_simple(\n", + " PersLandscapeExact(dgms_torus, hom_deg=1),\n", + " title=\"Degree 1 Persistence Landscape of Torus\",\n", + " ax=axs[0],\n", + ")\n", "\n", - "plot_landscape_simple(PersLandscapeExact(dgms_sphere, hom_deg=1),\n", - " title=\"Degree 1 Persistence Landscape of Sphere\", ax=axs[1])\n", + "plot_landscape_simple(\n", + " PersLandscapeExact(dgms_sphere, hom_deg=1),\n", + " title=\"Degree 1 Persistence Landscape of Sphere\",\n", + " ax=axs[1],\n", + ")\n", "\n", "fig.tight_layout()" ] @@ -178,33 +183,37 @@ " # Resample data\n", " _data_torus = torus(n=100, c=2, a=1)\n", " _data_sphere = sphere(n=100, r=2)\n", - " \n", + "\n", " # Compute persistence diagrams\n", " dgm_torus = rips.fit_transform(_data_torus)\n", - " \n", + "\n", " # Instantiate persistence landscape transformer\n", - " torus_landscaper = PersistenceLandscaper(hom_deg=1, start=0, stop=2.0, num_steps=500, flatten=True)\n", - " \n", + " torus_landscaper = PersistenceLandscaper(\n", + " hom_deg=1, start=0, stop=2.0, num_steps=500, flatten=True\n", + " )\n", + "\n", " # Compute flattened persistence landscape\n", " torus_flat = torus_landscaper.fit_transform(dgm_torus)\n", - " \n", + "\n", " landscapes_torus.append(torus_flat)\n", - " \n", + "\n", " # Compute persistence diagrams\n", " dgm_sphere = rips.fit_transform(_data_sphere)\n", - " \n", + "\n", " # Instantiate persistence landscape transformer\n", - " sphere_landscaper = PersistenceLandscaper(hom_deg=1, start=0, stop=2.0, num_steps=500, flatten=True)\n", - " \n", + " sphere_landscaper = PersistenceLandscaper(\n", + " hom_deg=1, start=0, stop=2.0, num_steps=500, flatten=True\n", + " )\n", + "\n", " # Compute flattened persistence landscape\n", " sphere_flat = sphere_landscaper.fit_transform(dgm_sphere)\n", - " \n", - " landscapes_sphere.append(sphere_flat) \n", + "\n", + " landscapes_sphere.append(sphere_flat)\n", "\n", "landscapes_torus = np.array(landscapes_torus, dtype=object)\n", "landscapes_sphere = np.array(landscapes_sphere, dtype=object)\n", - "print('Torus:', np.shape(landscapes_torus))\n", - "print('Sphere:', np.shape(landscapes_sphere))" + "print(\"Torus:\", np.shape(landscapes_torus))\n", + "print(\"Sphere:\", np.shape(landscapes_sphere))" ] }, { @@ -235,7 +244,7 @@ "u = np.max([len(a) for a in landscapes_torus])\n", "v = np.max([len(a) for a in landscapes_sphere])\n", "\n", - "w = np.max([u,v])\n", + "w = np.max([u, v])\n", "\n", "# Instantiate zero-padded arrays\n", "ls_torus = np.zeros((100, w))\n", @@ -243,12 +252,12 @@ "\n", "# Populate arrays\n", "for i in range(len(landscapes_torus)):\n", - " ls_torus[i, 0:len(landscapes_torus[i])] = landscapes_torus[i]\n", - " ls_sphere[i, 0:len(landscapes_sphere[i])] = landscapes_sphere[i]\n", + " ls_torus[i, 0 : len(landscapes_torus[i])] = landscapes_torus[i]\n", + " ls_sphere[i, 0 : len(landscapes_sphere[i])] = landscapes_sphere[i]\n", "\n", "\n", - "print('Torus:', ls_torus.shape)\n", - "print('Sphere:', ls_sphere.shape)" + "print(\"Torus:\", ls_torus.shape)\n", + "print(\"Sphere:\", ls_sphere.shape)" ] }, { @@ -305,8 +314,8 @@ } ], "source": [ - "print('Singular values for torus dataset:', pca_torus.singular_values_)\n", - "print('Singular values for sphere dataset:', pca_sphere.singular_values_)" + "print(\"Singular values for torus dataset:\", pca_torus.singular_values_)\n", + "print(\"Singular values for sphere dataset:\", pca_sphere.singular_values_)" ] }, { @@ -338,10 +347,10 @@ "source": [ "# Plot projection of data onto the first two principal components\n", "plt.figure()\n", - "plt.scatter(comp_sphere[0], comp_sphere[1], label='Sphere', alpha=0.4)\n", - "plt.scatter(comp_torus[0], comp_torus[1], label='Torus', alpha=0.4)\n", - "plt.xlabel('Principal Component 1')\n", - "plt.ylabel('Principal Component 2')\n", + "plt.scatter(comp_sphere[0], comp_sphere[1], label=\"Sphere\", alpha=0.4)\n", + "plt.scatter(comp_torus[0], comp_torus[1], label=\"Torus\", alpha=0.4)\n", + "plt.xlabel(\"Principal Component 1\")\n", + "plt.ylabel(\"Principal Component 2\")\n", "plt.legend()" ] }, @@ -359,8 +368,10 @@ "outputs": [], "source": [ "# Produce lists of points\n", - "pts_torus = [[comp_torus[0,i], comp_torus[1,i]] for i in range(len(comp_torus[0]))]\n", - "pts_sphere = [[comp_sphere[0,i], comp_sphere[1,i]] for i in range(len(comp_sphere[0]))]\n", + "pts_torus = [[comp_torus[0, i], comp_torus[1, i]] for i in range(len(comp_torus[0]))]\n", + "pts_sphere = [\n", + " [comp_sphere[0, i], comp_sphere[1, i]] for i in range(len(comp_sphere[0]))\n", + "]\n", "\n", "# Instantiate indicator functions\n", "chi_torus = np.zeros(len(pts_torus))\n", @@ -371,7 +382,7 @@ "\n", "for p in pts_torus:\n", " pts.append(p)\n", - " \n", + "\n", "for p in pts_sphere:\n", " pts.append(p)\n", "\n", @@ -403,7 +414,7 @@ ], "source": [ "# Split points and indicator arrays\n", - "P_train, P_test, c_train, c_test = train_test_split(pts, chi, train_size=.8)\n", + "P_train, P_test, c_train, c_test = train_test_split(pts, chi, train_size=0.8)\n", "\n", "# Instantiate support vector classifier\n", "clf = svm.SVC()\n", @@ -412,7 +423,7 @@ "clf.fit(P_train, c_train)\n", "\n", "# Evaluate model performance using accuracy between ground truth data and predicted data\n", - "print(f'Model accuracy: {metrics.accuracy_score(c_test, clf.predict(P_test)):.2f}')" + "print(f\"Model accuracy: {metrics.accuracy_score(c_test, clf.predict(P_test)):.2f}\")" ] }, { @@ -464,8 +475,8 @@ " ls_sphere_trim[i, 0:1000] = np.zeros((1000,))\n", "\n", "\n", - "print('Torus:', ls_torus_trim.shape)\n", - "print('Sphere:', ls_sphere_trim.shape)\n" + "print(\"Torus:\", ls_torus_trim.shape)\n", + "print(\"Sphere:\", ls_sphere_trim.shape)" ] }, { @@ -515,11 +526,11 @@ "\n", "# Plot projection of data onto the first two principal components\n", "plt.figure()\n", - "plt.scatter(comp_sphere_trim[0], comp_sphere_trim[1], label='Sphere', alpha=0.4)\n", - "plt.scatter(comp_torus_trim[0], comp_torus_trim[1], label='Torus', alpha=0.4)\n", - "plt.xlabel('Principal Component 1')\n", - "plt.ylabel('Principal Component 2')\n", - "plt.title('PCA Projection with Trimmed Diagrams')\n", + "plt.scatter(comp_sphere_trim[0], comp_sphere_trim[1], label=\"Sphere\", alpha=0.4)\n", + "plt.scatter(comp_torus_trim[0], comp_torus_trim[1], label=\"Torus\", alpha=0.4)\n", + "plt.xlabel(\"Principal Component 1\")\n", + "plt.ylabel(\"Principal Component 2\")\n", + "plt.title(\"PCA Projection with Trimmed Diagrams\")\n", "plt.legend()" ] }, @@ -537,8 +548,10 @@ "outputs": [], "source": [ "# Produce lists of points\n", - "pts_torus = [[comp_torus[0,i], comp_torus[1,i]] for i in range(len(comp_torus[0]))]\n", - "pts_sphere = [[comp_sphere[0,i], comp_sphere[1,i]] for i in range(len(comp_sphere[0]))]\n", + "pts_torus = [[comp_torus[0, i], comp_torus[1, i]] for i in range(len(comp_torus[0]))]\n", + "pts_sphere = [\n", + " [comp_sphere[0, i], comp_sphere[1, i]] for i in range(len(comp_sphere[0]))\n", + "]\n", "\n", "# Instantiate indicator functions\n", "chi_torus = np.zeros(len(pts_torus))\n", @@ -549,7 +562,7 @@ "\n", "for p in pts_torus:\n", " pts.append(p)\n", - " \n", + "\n", "for p in pts_sphere:\n", " pts.append(p)\n", "\n", @@ -581,8 +594,14 @@ ], "source": [ "# Produce lists of points\n", - "pts_torus_trim = [[comp_torus_trim[0,i], comp_torus_trim[1,i]] for i in range(len(comp_torus_trim[0]))]\n", - "pts_sphere_trim = [[comp_sphere_trim[0,i], comp_sphere_trim[1,i]] for i in range(len(comp_sphere_trim[0]))]\n", + "pts_torus_trim = [\n", + " [comp_torus_trim[0, i], comp_torus_trim[1, i]]\n", + " for i in range(len(comp_torus_trim[0]))\n", + "]\n", + "pts_sphere_trim = [\n", + " [comp_sphere_trim[0, i], comp_sphere_trim[1, i]]\n", + " for i in range(len(comp_sphere_trim[0]))\n", + "]\n", "\n", "# Instantiate indicator functions\n", "chi_torus_trim = np.zeros(len(pts_torus_trim))\n", @@ -593,7 +612,7 @@ "\n", "for p in pts_torus_trim:\n", " pts_trim.append(p)\n", - " \n", + "\n", "for p in pts_sphere_trim:\n", " pts_trim.append(p)\n", "\n", @@ -603,7 +622,9 @@ "chi_trim = np.hstack((chi_torus_trim, chi_sphere_trim))\n", "\n", "# Split points and indicator arrays\n", - "P_train_trim, P_test_trim, c_train_trim, c_test_trim = train_test_split(pts_trim, chi_trim, train_size=.8)\n", + "P_train_trim, P_test_trim, c_train_trim, c_test_trim = train_test_split(\n", + " pts_trim, chi_trim, train_size=0.8\n", + ")\n", "\n", "# Instantiate support vector classifier\n", "clf_trim = svm.SVC()\n", @@ -612,7 +633,9 @@ "clf_trim.fit(P_train_trim, c_train_trim)\n", "\n", "# Evaluate model performance using accuracy between ground truth data and predicted data\n", - "print(f'Model accuracy: {metrics.accuracy_score(c_test_trim, clf_trim.predict(P_test_trim)):.2f}')" + "print(\n", + " f\"Model accuracy: {metrics.accuracy_score(c_test_trim, clf_trim.predict(P_test_trim)):.2f}\"\n", + ")" ] }, { @@ -660,8 +683,13 @@ "\n", "\n", "# Produce lists of points\n", - "pts_torus_mcomp = [[comp_torus_mcomp[j,i] for j in range(6)] for i in range(len(comp_torus_mcomp[0]))]\n", - "pts_sphere_mcomp = [[comp_sphere_mcomp[j,i] for j in range(6)] for i in range(len(comp_sphere_mcomp[0]))]\n", + "pts_torus_mcomp = [\n", + " [comp_torus_mcomp[j, i] for j in range(6)] for i in range(len(comp_torus_mcomp[0]))\n", + "]\n", + "pts_sphere_mcomp = [\n", + " [comp_sphere_mcomp[j, i] for j in range(6)]\n", + " for i in range(len(comp_sphere_mcomp[0]))\n", + "]\n", "\n", "# Instantiate indicator functions\n", "chi_torus_mcomp = np.zeros(len(pts_torus_mcomp))\n", @@ -672,7 +700,7 @@ "\n", "for p in pts_torus_mcomp:\n", " pts_mcomp.append(p)\n", - " \n", + "\n", "for p in pts_sphere_mcomp:\n", " pts_mcomp.append(p)\n", "\n", @@ -682,7 +710,9 @@ "chi_mcomp = np.hstack((chi_torus_mcomp, chi_sphere_mcomp))\n", "\n", "# Split points and indicator arrays\n", - "P_train_mcomp, P_test_mcomp, c_train_mcomp, c_test_mcomp = train_test_split(pts_mcomp, chi_mcomp, train_size=.8)\n", + "P_train_mcomp, P_test_mcomp, c_train_mcomp, c_test_mcomp = train_test_split(\n", + " pts_mcomp, chi_mcomp, train_size=0.8\n", + ")\n", "\n", "# Instantiate support vector classifier\n", "clf_mcomp = svm.SVC()\n", @@ -691,7 +721,9 @@ "clf_mcomp.fit(P_train_mcomp, c_train_mcomp)\n", "\n", "# Evaluate model performance using accuracy between ground truth data and predicted data\n", - "print(f'Model accuracy: {metrics.accuracy_score(c_test_mcomp, clf_mcomp.predict(P_test_mcomp)):.2f}')\n" + "print(\n", + " f\"Model accuracy: {metrics.accuracy_score(c_test_mcomp, clf_mcomp.predict(P_test_mcomp)):.2f}\"\n", + ")" ] } ], diff --git a/docs/notebooks/Persistence barcode measure.ipynb b/docs/notebooks/Persistence barcode measure.ipynb index f6e3d2d..9245935 100644 --- a/docs/notebooks/Persistence barcode measure.ipynb +++ b/docs/notebooks/Persistence barcode measure.ipynb @@ -16,7 +16,6 @@ "import numpy as np\n", "import ripser\n", "import matplotlib.pyplot as plt\n", - "import random\n", "from persim.persistent_entropy import *\n", "from scipy import stats" ] @@ -26,9 +25,7 @@ "execution_count": 2, "metadata": {}, "outputs": [], - "source": [ - "import cechmate" - ] + "source": [] }, { "cell_type": "markdown", @@ -58,12 +55,12 @@ "sigma = 0.25\n", "l1 = []\n", "for i in range(10):\n", - " d1 = np.random.normal(mu, sigma, (50,2))\n", + " d1 = np.random.normal(mu, sigma, (50, 2))\n", " l1.append(d1)\n", "# Uniform point clouds\n", "l2 = []\n", "for i in range(10):\n", - " d2 = np.random.random((50,2))\n", + " d2 = np.random.random((50, 2))\n", " l2.append(d2)" ] }, @@ -87,11 +84,11 @@ ], "source": [ "# Example of normal and uniform point clouds\n", - "plt.scatter(d1[:,0], d1[:,1], label=\"Normal distribution\")\n", - "plt.scatter(d2[:,0], d2[:,1], label=\"Uniform distribution\")\n", - "plt.axis('equal')\n", + "plt.scatter(d1[:, 0], d1[:, 1], label=\"Normal distribution\")\n", + "plt.scatter(d2[:, 0], d2[:, 1], label=\"Uniform distribution\")\n", + "plt.axis(\"equal\")\n", "plt.legend()\n", - "plt.show()\n" + "plt.show()" ] }, { @@ -113,8 +110,8 @@ "dgm_d1 = []\n", "dgm_d2 = []\n", "for i in range(len(l1)):\n", - " dgm_d1.append(ripser.ripser(l1[i])['dgms'][p])\n", - " dgm_d2.append(ripser.ripser(l2[i])['dgms'][p])\n", + " dgm_d1.append(ripser.ripser(l1[i])[\"dgms\"][p])\n", + " dgm_d2.append(ripser.ripser(l2[i])[\"dgms\"][p])\n", "# Calculate their persistent entropy.\n", "e1 = persistent_entropy(dgm_d1)\n", "e2 = persistent_entropy(dgm_d2)" diff --git a/docs/notebooks/Persistence images.ipynb b/docs/notebooks/Persistence images.ipynb index 4be119a..d1a8f32 100644 --- a/docs/notebooks/Persistence images.ipynb +++ b/docs/notebooks/Persistence images.ipynb @@ -20,12 +20,9 @@ "metadata": {}, "outputs": [], "source": [ - "from itertools import product\n", - "\n", "import time\n", "import numpy as np\n", "from sklearn import datasets\n", - "from scipy.stats import multivariate_normal as mvn\n", "import matplotlib.pyplot as plt\n", "\n", "from ripser import Rips\n", @@ -54,7 +51,7 @@ ], "source": [ "# Printing a PersistenceImager() object will print its defining attributes\n", - "pimgr = PersistenceImager(pixel_size=0.2, birth_range=(0,1))\n", + "pimgr = PersistenceImager(pixel_size=0.2, birth_range=(0, 1))\n", "print(pimgr)" ] }, @@ -96,13 +93,15 @@ } ], "source": [ - "# The `fit()` method can be called on one or more (*,2) numpy arrays to automatically determine the miniumum birth and \n", - "# persistence ranges needed to capture all persistence pairs. The ranges and resolution are automatically adjusted to \n", + "# The `fit()` method can be called on one or more (*,2) numpy arrays to automatically determine the miniumum birth and\n", + "# persistence ranges needed to capture all persistence pairs. The ranges and resolution are automatically adjusted to\n", "# accomodate the specified pixel size.\n", "pimgr = PersistenceImager(pixel_size=0.5)\n", - "pdgms = [np.array([[0.5, 0.8], [0.7, 2.2], [2.5, 4.0]]),\n", - " np.array([[0.1, 0.2], [3.1, 3.3], [1.6, 2.9]]),\n", - " np.array([[0.2, 1.5], [0.4, 0.6], [0.2, 2.6]])]\n", + "pdgms = [\n", + " np.array([[0.5, 0.8], [0.7, 2.2], [2.5, 4.0]]),\n", + " np.array([[0.1, 0.2], [3.1, 3.3], [1.6, 2.9]]),\n", + " np.array([[0.2, 1.5], [0.4, 0.6], [0.2, 2.6]]),\n", + "]\n", "pimgr.fit(pdgms, skew=True)\n", "print(pimgr)\n", "print(pimgr.resolution)" @@ -131,7 +130,7 @@ ], "source": [ "# The `transform()` method can then be called on one or more (*,2) numpy arrays to generate persistence images from diagrams.\n", - "# The option `skew=True` specifies that the diagrams are currently in birth-death coordinates and must first be transformed \n", + "# The option `skew=True` specifies that the diagrams are currently in birth-death coordinates and must first be transformed\n", "# to birth-persistence coordinates.\n", "pimgs = pimgr.transform(pdgms, skew=True)\n", "pimgs[0]" @@ -157,7 +156,7 @@ ], "source": [ "# The `plot_diagram()` and `plot_image()` methods can be used to visualize persistence diagrams and images\n", - "fig, axs = plt.subplots(1, 3, figsize=(10,5))\n", + "fig, axs = plt.subplots(1, 3, figsize=(10, 5))\n", "\n", "axs[0].set_title(\"Original Diagram\")\n", "pimgr.plot_diagram(pdgms[0], skew=False, ax=axs[0])\n", @@ -205,18 +204,22 @@ ], "source": [ "# lots of random noise and 2 circles\n", - "data = np.concatenate([150 * np.random.random((300,2)), \n", - " 10 + 10 * datasets.make_circles(n_samples=100)[0],\n", - " 100 + 20 * datasets.make_circles(n_samples=100)[0]])\n", + "data = np.concatenate(\n", + " [\n", + " 150 * np.random.random((300, 2)),\n", + " 10 + 10 * datasets.make_circles(n_samples=100)[0],\n", + " 100 + 20 * datasets.make_circles(n_samples=100)[0],\n", + " ]\n", + ")\n", "\n", "rips = Rips()\n", "dgms = rips.fit_transform(data)\n", "H0_dgm = dgms[0]\n", "H1_dgm = dgms[1]\n", "\n", - "plt.figure(figsize=(10,5))\n", + "plt.figure(figsize=(10, 5))\n", "plt.subplot(121)\n", - "plt.scatter(data[:,0], data[:,1], s=4)\n", + "plt.scatter(data[:, 0], data[:, 1], s=4)\n", "plt.title(\"Scatter plot of noisy data with some circles\")\n", "\n", "plt.subplot(122)\n", @@ -253,16 +256,16 @@ "pimgr = PersistenceImager(pixel_size=1)\n", "pimgr.fit(H1_dgm)\n", "\n", - "fig, axs = plt.subplots(1, 3, figsize=(20,5))\n", + "fig, axs = plt.subplots(1, 3, figsize=(20, 5))\n", "pimgr.plot_diagram(H1_dgm, skew=True, ax=axs[0])\n", - "axs[0].set_title('Diagram', fontsize=16)\n", + "axs[0].set_title(\"Diagram\", fontsize=16)\n", "\n", "pimgr.plot_image(pimgr.transform(H1_dgm), ax=axs[1])\n", - "axs[1].set_title('Pixel Size: 1', fontsize=16)\n", + "axs[1].set_title(\"Pixel Size: 1\", fontsize=16)\n", "\n", - "pimgr.pixel_size = 0.1 \n", + "pimgr.pixel_size = 0.1\n", "pimgr.plot_image(pimgr.transform(H1_dgm), ax=axs[2])\n", - "axs[2].set_title('Pixel Size: 0.1', fontsize=16)\n", + "axs[2].set_title(\"Pixel Size: 0.1\", fontsize=16)\n", "\n", "plt.tight_layout()" ] @@ -294,20 +297,20 @@ "# We first import one of the implemented weighting functions, a peicewise linear ramp\n", "from persim.images_weights import linear_ramp\n", "\n", - "pimgr.pixel_size = 1 \n", + "pimgr.pixel_size = 1\n", "pimgr.weight = linear_ramp\n", - "pimgr.weight_params = {'low':0.0, 'high':1.0, 'start':0.0, 'end':10.0}\n", + "pimgr.weight_params = {\"low\": 0.0, \"high\": 1.0, \"start\": 0.0, \"end\": 10.0}\n", "\n", - "fig, axs = plt.subplots(1, 3, figsize=(20,5))\n", + "fig, axs = plt.subplots(1, 3, figsize=(20, 5))\n", "pimgr.plot_diagram(H1_dgm, skew=True, ax=axs[0])\n", - "axs[0].set_title('Diagram', fontsize=16)\n", + "axs[0].set_title(\"Diagram\", fontsize=16)\n", "\n", "pimgr.plot_image(pimgr.transform(H1_dgm), ax=axs[1])\n", - "axs[1].set_title('Linear Ramp Weighting', fontsize=16)\n", + "axs[1].set_title(\"Linear Ramp Weighting\", fontsize=16)\n", "\n", - "pimgr.weight_params = {'low':0.0, 'high':1.0, 'start':2.0, 'end':10.0} \n", + "pimgr.weight_params = {\"low\": 0.0, \"high\": 1.0, \"start\": 2.0, \"end\": 10.0}\n", "pimgr.plot_image(pimgr.transform(H1_dgm), ax=axs[2])\n", - "axs[2].set_title('Linear Ramp Weighting, Remove Small Persistence Pairs', fontsize=16)\n", + "axs[2].set_title(\"Linear Ramp Weighting, Remove Small Persistence Pairs\", fontsize=16)\n", "\n", "plt.tight_layout()" ] @@ -338,28 +341,28 @@ } ], "source": [ - "# For the default bivariate normal Gaussian kernel, the parameter controlling the spread (sigma) may be specified \n", + "# For the default bivariate normal Gaussian kernel, the parameter controlling the spread (sigma) may be specified\n", "# either by a float or a 2x2 covariance matrix\n", "pimgr = PersistenceImager(pixel_size=1)\n", "pimgr.fit(H1_dgm)\n", "\n", - "fig, axs = plt.subplots(1, 4, figsize=(20,5))\n", - "pimgr.kernel_params = {'sigma': .1}\n", + "fig, axs = plt.subplots(1, 4, figsize=(20, 5))\n", + "pimgr.kernel_params = {\"sigma\": 0.1}\n", "pimgr.plot_diagram(H1_dgm, skew=True, ax=axs[0])\n", - "axs[0].set_title('Diagram', fontsize=16)\n", + "axs[0].set_title(\"Diagram\", fontsize=16)\n", "\n", - "pimgr.kernel_params = {'sigma': 0.1}\n", + "pimgr.kernel_params = {\"sigma\": 0.1}\n", "pimgr.plot_image(pimgr.transform(H1_dgm), ax=axs[1])\n", - "axs[1].set_title('Kernel Spread: 0.1', fontsize=16)\n", + "axs[1].set_title(\"Kernel Spread: 0.1\", fontsize=16)\n", "\n", - "pimgr.kernel_params = {'sigma': .5}\n", + "pimgr.kernel_params = {\"sigma\": 0.5}\n", "pimgr.plot_image(pimgr.transform(H1_dgm), ax=axs[2])\n", - "axs[2].set_title('Kernel Spread: 0.5', fontsize=16)\n", + "axs[2].set_title(\"Kernel Spread: 0.5\", fontsize=16)\n", "\n", "# Non-isotropic, standard bivariate Gaussian with greater spread along the persistence axis\n", - "pimgr.kernel_params = {'sigma': np.array([[1, 0],[0, 6]])}\n", + "pimgr.kernel_params = {\"sigma\": np.array([[1, 0], [0, 6]])}\n", "pimgr.plot_image(pimgr.transform(H1_dgm), ax=axs[3])\n", - "axs[3].set_title('Kernel Spread: 2', fontsize=16)\n", + "axs[3].set_title(\"Kernel Spread: 2\", fontsize=16)\n", "\n", "plt.tight_layout()" ] @@ -391,24 +394,27 @@ } ], "source": [ - "# A valid kernel is a python function of the form kernel(x, y, mu=(birth, persistence), **kwargs) defining a \n", - "# cumulative distribution function such that kernel(x, y) = P(X <= x, Y <=y), where x and y are numpy arrays of equal length. \n", - "# The required parameter mu defines the dependance of the kernel on the location of a persistence pair and is usually \n", + "# A valid kernel is a python function of the form kernel(x, y, mu=(birth, persistence), **kwargs) defining a\n", + "# cumulative distribution function such that kernel(x, y) = P(X <= x, Y <=y), where x and y are numpy arrays of equal length.\n", + "# The required parameter mu defines the dependance of the kernel on the location of a persistence pair and is usually\n", "# taken to be the mean of the probability distribution function associated to kernel CDF.\n", "\n", - "# Example of a custom kernel which defines the cumulative distribution function for the uniform probability density \n", + "# Example of a custom kernel which defines the cumulative distribution function for the uniform probability density\n", "# with value 1/(width*height) over the region [mu[0]-width/2, mu[0]+width/2] x [mu[1]-height/2, mu[1]+height/2]\n", "def uniform_kernel(x, y, mu=None, width=1, height=1):\n", - " w1 = np.maximum(x - (mu[0] - width/2), 0)\n", - " h1 = np.maximum(y - (mu[1] - height/2), 0)\n", - " \n", + " w1 = np.maximum(x - (mu[0] - width / 2), 0)\n", + " h1 = np.maximum(y - (mu[1] - height / 2), 0)\n", + "\n", " w = np.minimum(w1, width)\n", " h = np.minimum(h1, height)\n", "\n", - " return w*h / (width*height)\n", + " return w * h / (width * height)\n", + "\n", "\n", "# Construct a PersistenceImager() object that uses the uniform distribution kernel supported on a rectangular region\n", - "pimgr = PersistenceImager(pixel_size=.5, kernel=uniform_kernel, kernel_params={'width': 3, 'height': 1})\n", + "pimgr = PersistenceImager(\n", + " pixel_size=0.5, kernel=uniform_kernel, kernel_params={\"width\": 3, \"height\": 1}\n", + ")\n", "pimgr.fit(H1_dgm)\n", "pimgr.plot_image(pimgr.transform(H1_dgm))" ] @@ -437,13 +443,16 @@ "source": [ "# For diagrams with small numbers of persistence pairs, overhead costs may not justify parallelization\n", "# Also, initial run of job in parallel is very costly. Run twice to see speed gains.\n", - "import time\n", + "\n", "num_diagrams = 100\n", "min_pairs = 50\n", "max_pairs = 100\n", "\n", "pimgr = PersistenceImager()\n", - "dgms = [np.random.rand(np.random.randint(min_pairs, max_pairs), 2) for _ in range(num_diagrams)]\n", + "dgms = [\n", + " np.random.rand(np.random.randint(min_pairs, max_pairs), 2)\n", + " for _ in range(num_diagrams)\n", + "]\n", "\n", "pimgr.fit(dgms)\n", "\n", @@ -473,12 +482,16 @@ "source": [ "# For larger diagrams, speed up can be significant\n", "import time\n", + "\n", "num_diagrams = 100\n", "min_pairs = 500\n", "max_pairs = 1000\n", "\n", "pimgr = PersistenceImager()\n", - "dgms = [np.random.rand(np.random.randint(min_pairs, max_pairs), 2) for _ in range(num_diagrams)]\n", + "dgms = [\n", + " np.random.rand(np.random.randint(min_pairs, max_pairs), 2)\n", + " for _ in range(num_diagrams)\n", + "]\n", "\n", "pimgr.fit(dgms)\n", "\n", diff --git a/docs/notebooks/Persistence landscapes.ipynb b/docs/notebooks/Persistence landscapes.ipynb index 842b051..6f26046 100644 --- a/docs/notebooks/Persistence landscapes.ipynb +++ b/docs/notebooks/Persistence landscapes.ipynb @@ -61,7 +61,7 @@ "metadata": {}, "outputs": [], "source": [ - "pd = [np.array([[0,3], [1,4]]), np.array([[1,4]])]" + "pd = [np.array([[0, 3], [1, 4]]), np.array([[1, 4]])]" ] }, { @@ -90,7 +90,7 @@ } ], "source": [ - "ple = PersLandscapeExact(dgms=pd,hom_deg=0)\n", + "ple = PersLandscapeExact(dgms=pd, hom_deg=0)\n", "ple" ] }, @@ -199,8 +199,8 @@ } ], "source": [ - "pd2 = [ np.array([[0.5,7],[3,5],[4.1,6.5]]), np.array([[1,4]])]\n", - "ple2 = PersLandscapeExact(dgms=pd2,hom_deg=0)\n", + "pd2 = [np.array([[0.5, 7], [3, 5], [4.1, 6.5]]), np.array([[1, 4]])]\n", + "ple2 = PersLandscapeExact(dgms=pd2, hom_deg=0)\n", "ple2" ] }, @@ -296,7 +296,7 @@ } ], "source": [ - "(7*ple).critical_pairs" + "(7 * ple).critical_pairs" ] }, { @@ -451,7 +451,7 @@ } ], "source": [ - "pla = PersLandscapeApprox(dgms=pd,hom_deg=0)\n", + "pla = PersLandscapeApprox(dgms=pd, hom_deg=0)\n", "pla" ] }, @@ -481,7 +481,9 @@ } ], "source": [ - "pla_manual_grid = PersLandscapeApprox(dgms=pd, hom_deg=0, start=-1, stop=5, num_steps=10000)\n", + "pla_manual_grid = PersLandscapeApprox(\n", + " dgms=pd, hom_deg=0, start=-1, stop=5, num_steps=10000\n", + ")\n", "pla_manual_grid" ] }, @@ -541,7 +543,7 @@ "source": [ "from persim.landscapes import snap_pl\n", "\n", - "[snapped_pla, snapped_pla_manual_grid] = snap_pl([pla,pla_manual_grid])" + "[snapped_pla, snapped_pla_manual_grid] = snap_pl([pla, pla_manual_grid])" ] }, { @@ -559,7 +561,7 @@ } ], "source": [ - "print(snapped_pla + 4*snapped_pla_manual_grid)" + "print(snapped_pla + 4 * snapped_pla_manual_grid)" ] }, { @@ -795,8 +797,8 @@ "source": [ "from persim.landscapes import average_approx, lc_approx\n", "\n", - "lc_landscape = lc_approx(landscapes=[pla,2*pla,5*pla], coeffs = [1,1/2,1/5])\n", - "np.testing.assert_almost_equal(lc_landscape[0],pla[0]*3)" + "lc_landscape = lc_approx(landscapes=[pla, 2 * pla, 5 * pla], coeffs=[1, 1 / 2, 1 / 5])\n", + "np.testing.assert_almost_equal(lc_landscape[0], pla[0] * 3)" ] }, { @@ -814,8 +816,8 @@ "metadata": {}, "outputs": [], "source": [ - "avg_landscape = average_approx([pla,pla,pla,pla])\n", - "np.testing.assert_almost_equal(avg_landscape[0],pla[0])" + "avg_landscape = average_approx([pla, pla, pla, pla])\n", + "np.testing.assert_almost_equal(avg_landscape[0], pla[0])" ] }, { @@ -927,11 +929,10 @@ "\n", "random_pds = [\n", " [\n", - " np.array([\n", - " [rng.random(), rng.random()+1] for _ in range(500)\n", - " ])\n", - " ] # extra bracketing to mimic the output of ripser.py\n", - " for _ in range(100)]" + " np.array([[rng.random(), rng.random() + 1] for _ in range(500)])\n", + " ] # extra bracketing to mimic the output of ripser.py\n", + " for _ in range(100)\n", + "]" ] }, { diff --git a/docs/notebooks/distances.ipynb b/docs/notebooks/distances.ipynb index bb9f6d2..431dfb7 100644 --- a/docs/notebooks/distances.ipynb +++ b/docs/notebooks/distances.ipynb @@ -54,9 +54,9 @@ } ], "source": [ - "plt.scatter(data_clean[:,0], data_clean[:,1], label=\"clean data\")\n", - "plt.scatter(data_noisy[:,0], data_noisy[:,1], label=\"noisy data\")\n", - "plt.axis('equal')\n", + "plt.scatter(data_clean[:, 0], data_clean[:, 1], label=\"clean data\")\n", + "plt.scatter(data_noisy[:, 0], data_noisy[:, 1], label=\"noisy data\")\n", + "plt.axis(\"equal\")\n", "plt.legend()\n", "plt.show()" ] @@ -74,8 +74,8 @@ "metadata": {}, "outputs": [], "source": [ - "dgm_clean = ripser.ripser(data_clean)['dgms'][1]\n", - "dgm_noisy = ripser.ripser(data_noisy)['dgms'][1]" + "dgm_clean = ripser.ripser(data_clean)[\"dgms\"][1]\n", + "dgm_noisy = ripser.ripser(data_noisy)[\"dgms\"][1]" ] }, { @@ -97,7 +97,7 @@ } ], "source": [ - "persim.plot_diagrams([dgm_clean, dgm_noisy] , labels=['Clean $H_1$', 'Noisy $H_1$'])\n", + "persim.plot_diagrams([dgm_clean, dgm_noisy], labels=[\"Clean $H_1$\", \"Noisy $H_1$\"])\n", "plt.show()" ] }, @@ -136,7 +136,9 @@ } ], "source": [ - "persim.bottleneck_matching(dgm_clean, dgm_noisy, matching, labels=['Clean $H_1$', 'Noisy $H_1$'])\n", + "persim.bottleneck_matching(\n", + " dgm_clean, dgm_noisy, matching, labels=[\"Clean $H_1$\", \"Noisy $H_1$\"]\n", + ")\n", "plt.show()" ] }, @@ -199,23 +201,12 @@ } ], "source": [ - "dgm1 = np.array([\n", - " [0.6, 1.05],\n", - " [0.53, 1],\n", - " [0.5, 0.51]\n", - "])\n", - "dgm2 = np.array([\n", - " [0.55, 1.1],\n", - " [0.8,0.9]\n", - "])\n", + "dgm1 = np.array([[0.6, 1.05], [0.53, 1], [0.5, 0.51]])\n", + "dgm2 = np.array([[0.55, 1.1], [0.8, 0.9]])\n", "\n", - "d, matching = persim.bottleneck(\n", - " dgm1,\n", - " dgm2,\n", - " matching=True\n", - ")\n", + "d, matching = persim.bottleneck(dgm1, dgm2, matching=True)\n", "\n", - "persim.bottleneck_matching(dgm1, dgm2, matching, labels=['Clean $H_1$', 'Noisy $H_1$'])\n", + "persim.bottleneck_matching(dgm1, dgm2, matching, labels=[\"Clean $H_1$\", \"Noisy $H_1$\"])\n", "plt.title(\"Distance {:.3f}\".format(d))\n", "plt.show()" ] @@ -413,7 +404,7 @@ "outputs": [], "source": [ "data_clean = tadasets.dsphere(d=1, n=100, noise=0.0)\n", - "dgm_clean = ripser.ripser(data_clean)['dgms'][1]" + "dgm_clean = ripser.ripser(data_clean)[\"dgms\"][1]" ] }, { @@ -426,20 +417,20 @@ "noise_levels = np.linspace(0.0, 0.9, 30)\n", "samples = 15\n", "\n", - "dists_bottleneck=[]\n", - "dists_sliced=[]\n", + "dists_bottleneck = []\n", + "dists_sliced = []\n", "\n", "for n in noise_levels:\n", " for i in range(samples):\n", " ds_clean = tadasets.dsphere(d=1, n=100, noise=0.0)\n", - " dgm_clean = ripser.ripser(ds_clean)['dgms'][1]\n", - " \n", + " dgm_clean = ripser.ripser(ds_clean)[\"dgms\"][1]\n", + "\n", " ds = tadasets.dsphere(d=1, n=100, noise=n)\n", - " dgm = ripser.ripser(ds)['dgms'][1]\n", - " \n", + " dgm = ripser.ripser(ds)[\"dgms\"][1]\n", + "\n", " dists_bottleneck.append((n, persim.bottleneck(dgm_clean, dgm)))\n", - " dists_sliced.append((n, persim.sliced_wasserstein(dgm_clean, dgm))) \n", - " \n", + " dists_sliced.append((n, persim.sliced_wasserstein(dgm_clean, dgm)))\n", + "\n", "dists_sliced = np.array(dists_sliced)\n", "dists_bottleneck = np.array(dists_bottleneck)" ] @@ -461,8 +452,12 @@ } ], "source": [ - "plt.scatter(dists_bottleneck[:,0], dists_bottleneck[:,1], label=\"Bottleneck\", alpha=0.3)\n", - "plt.scatter(dists_sliced[:,0], dists_sliced[:,1], label=\"Sliced Wasserstein\", alpha=0.3)\n", + "plt.scatter(\n", + " dists_bottleneck[:, 0], dists_bottleneck[:, 1], label=\"Bottleneck\", alpha=0.3\n", + ")\n", + "plt.scatter(\n", + " dists_sliced[:, 0], dists_sliced[:, 1], label=\"Sliced Wasserstein\", alpha=0.3\n", + ")\n", "plt.legend()\n", "plt.show()" ] @@ -491,23 +486,21 @@ } ], "source": [ - "plt.figure(figsize=(9,9))\n", + "plt.figure(figsize=(9, 9))\n", "for i, n in enumerate([0.0, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.6, 0.8]):\n", - " plt.subplot(331+i)\n", + " plt.subplot(331 + i)\n", "\n", " ds_clean = tadasets.dsphere(d=1, n=100, noise=0.0)\n", - " dgm_clean = ripser.ripser(ds_clean)['dgms'][1]\n", + " dgm_clean = ripser.ripser(ds_clean)[\"dgms\"][1]\n", "\n", " ds = tadasets.dsphere(d=1, n=100, noise=n)\n", - " dgm = ripser.ripser(ds)['dgms'][1]\n", + " dgm = ripser.ripser(ds)[\"dgms\"][1]\n", + "\n", + " d, matching = persim.bottleneck(dgm_clean, dgm, matching=True)\n", "\n", - " d, matching = persim.bottleneck(\n", - " dgm_clean,\n", - " dgm,\n", - " matching=True\n", + " persim.bottleneck_matching(\n", + " dgm_clean, dgm, matching, labels=[\"Clean $H_1$\", \"Noisy $H_1$\"]\n", " )\n", - " \n", - " persim.bottleneck_matching(dgm_clean, dgm, matching, labels=['Clean $H_1$', 'Noisy $H_1$'])\n", "\n", " plt.title(\"Noise:{} Distance:{:.3f}\".format(n, d))\n", "\n", diff --git a/persim/__init__.py b/persim/__init__.py index a92216a..787dcfa 100644 --- a/persim/__init__.py +++ b/persim/__init__.py @@ -1,13 +1,26 @@ -from ._version import __version__ -from .bottleneck import * -from .gromov_hausdorff import * -from .heat import * -from .images import * +from .bottleneck import bottleneck +from .gromov_hausdorff import gromov_hausdorff +from .heat import heat +from .images import PersistenceImager, PersImage from .landscapes.approximate import PersLandscapeApprox from .landscapes.exact import PersLandscapeExact from .landscapes.transformer import PersistenceLandscaper -from .sliced_wasserstein import * -from .visuals import * -from .wasserstein import * +from .sliced_wasserstein import sliced_wasserstein +from .visuals import plot_diagrams, wasserstein_matching, bottleneck_matching +from .wasserstein import wasserstein -__all__ = ["PersLandscapeApprox", "PersistenceLandscaper", "PersLandscapeExact"] +__all__ = [ + "bottleneck", + "gromov_hausdorff", + "heat", + "PersImage", + "PersistenceImager", + "PersLandscapeApprox", + "PersistenceLandscaper", + "PersLandscapeExact", + "sliced_wasserstein", + "plot_diagrams", + "wasserstein_matching", + "bottleneck_matching", + "wasserstein", +] diff --git a/persim/bottleneck.py b/persim/bottleneck.py index edb1f32..286e668 100644 --- a/persim/bottleneck.py +++ b/persim/bottleneck.py @@ -1,9 +1,9 @@ """ - Implementation of the bottleneck distance using binary - search and the Hopcroft-Karp algorithm +Implementation of the bottleneck distance using binary +search and the Hopcroft-Karp algorithm - Author: Chris Tralie +Author: Chris Tralie """ diff --git a/persim/gromov_hausdorff.py b/persim/gromov_hausdorff.py index e1a2ea9..024721d 100644 --- a/persim/gromov_hausdorff.py +++ b/persim/gromov_hausdorff.py @@ -1,105 +1,106 @@ # -*- coding: utf-8 -*- """ - Implementation of the modified Gromov–Hausdorff (mGH) distance - between compact metric spaces induced by unweighted graphs. This - code complements the results from "Efficient estimation of a - Gromov–Hausdorff distance between unweighted graphs" by V. Oles et - al. (https://arxiv.org/pdf/1909.09772). The mGH distance was first - defined in "Some properties of Gromov–Hausdorff distances" by F. - Mémoli (Discrete & Computational Geometry, 2012). +Implementation of the modified Gromov–Hausdorff (mGH) distance +between compact metric spaces induced by unweighted graphs. This +code complements the results from "Efficient estimation of a +Gromov–Hausdorff distance between unweighted graphs" by V. Oles et +al. (https://arxiv.org/pdf/1909.09772). The mGH distance was first +defined in "Some properties of Gromov–Hausdorff distances" by F. +Mémoli (Discrete & Computational Geometry, 2012). - Author: Vladyslav Oles +Author: Vladyslav Oles - =================================================================== +=================================================================== - Usage examples: +Usage examples: - 1) Estimating the mGH distance between 4-clique and single-vertex - graph from their adjacency matrices. Note that it suffices to fill - only the upper triangle of an adjacency matrix. +1) Estimating the mGH distance between 4-clique and single-vertex +graph from their adjacency matrices. Note that it suffices to fill +only the upper triangle of an adjacency matrix. - >>> AG = [[0, 1, 1, 1], [0, 0, 1, 1], [0, 0, 0, 1], [0, 0, 0, 0]] - >>> AH = [[0]] - >>> lb, ub = gromov_hausdorff(AG, AH) - >>> lb, ub - (0.5, 0.5) +>>> AG = [[0, 1, 1, 1], [0, 0, 1, 1], [0, 0, 0, 1], [0, 0, 0, 0]] +>>> AH = [[0]] +>>> lb, ub = gromov_hausdorff(AG, AH) +>>> lb, ub +(0.5, 0.5) - 2) Estimating the mGH distance between cycle graphs of length 2 and - 5 from their adjacency matrices. Note that the adjacency matrices - can be given in both dense and sparse SciPy formats. +2) Estimating the mGH distance between cycle graphs of length 2 and +5 from their adjacency matrices. Note that the adjacency matrices +can be given in both dense and sparse SciPy formats. - >>> AI = np.array([[0, 1], [0, 0]]) - >>> AJ = sps.csr_matrix(([1] * 5, ([0, 0, 1, 2, 3], [1, 4, 2, 3, 4])), shape=(5, 5)) - >>> lb, ub = gromov_hausdorff(AI, AJ) - >>> lb, ub - (0.5, 1.0) +>>> AI = np.array([[0, 1], [0, 0]]) +>>> AJ = sps.csr_matrix(([1] * 5, ([0, 0, 1, 2, 3], [1, 4, 2, 3, 4])), shape=(5, 5)) +>>> lb, ub = gromov_hausdorff(AI, AJ) +>>> lb, ub +(0.5, 1.0) - 3) Estimating all pairwise mGH distances between multiple graphs - from their adjacency matrices as an iterable. +3) Estimating all pairwise mGH distances between multiple graphs +from their adjacency matrices as an iterable. - >>> As = [AG, AH, AI, AJ] - >>> lbs, ubs = gromov_hausdorff(As) - >>> lbs - array([[0. , 0.5, 0.5, 0.5], - [0.5, 0. , 0.5, 1. ], - [0.5, 0.5, 0. , 0.5], - [0.5, 1. , 0.5, 0. ]]) - >>> ubs - array([[0. , 0.5, 0.5, 0.5], - [0.5, 0. , 0.5, 1. ], - [0.5, 0.5, 0. , 1. ], - [0.5, 1. , 1. , 0. ]]) +>>> As = [AG, AH, AI, AJ] +>>> lbs, ubs = gromov_hausdorff(As) +>>> lbs +array([[0. , 0.5, 0.5, 0.5], + [0.5, 0. , 0.5, 1. ], + [0.5, 0.5, 0. , 0.5], + [0.5, 1. , 0.5, 0. ]]) +>>> ubs +array([[0. , 0.5, 0.5, 0.5], + [0.5, 0. , 0.5, 1. ], + [0.5, 0.5, 0. , 1. ], + [0.5, 1. , 1. , 0. ]]) - =================================================================== +=================================================================== - Notations: +Notations: - |X| denotes the number of elements in set X. +|X| denotes the number of elements in set X. - X → Y denotes the set of all mappings of set X into set Y. +X → Y denotes the set of all mappings of set X into set Y. - V(G) denotes vertex set of graph G. +V(G) denotes vertex set of graph G. - mGH(X, Y) denotes the modified Gromov–Hausdorff distance between - compact metric spaces X and Y. +mGH(X, Y) denotes the modified Gromov–Hausdorff distance between +compact metric spaces X and Y. - row_i(A) denotes the i-th row of matrix A. +row_i(A) denotes the i-th row of matrix A. - PSPS^n(A) denotes the set of all permutation similarities of - all n×n principal submatrices of square matrix A. +PSPS^n(A) denotes the set of all permutation similarities of +all n×n principal submatrices of square matrix A. - PSPS^n_{i←j}(A) denotes the set of all permutation similarities of - all n×n principal submatrices of square matrix A whose i-th row is - comprised of the entries in row_j(A). +PSPS^n_{i←j}(A) denotes the set of all permutation similarities of +all n×n principal submatrices of square matrix A whose i-th row is +comprised of the entries in row_j(A). - =================================================================== +=================================================================== - Glossary: +Glossary: - Distance matrix of metric space X is a |X|×|X| matrix whose - (i, j)-th entry holds the distance between i-th and j-th points of - X. By the properties of a metric, distance matrices are symmetric - and non-negative, their diagonal entries are 0 and off-diagonal - entries are positive. +Distance matrix of metric space X is a |X|×|X| matrix whose +(i, j)-th entry holds the distance between i-th and j-th points of +X. By the properties of a metric, distance matrices are symmetric +and non-negative, their diagonal entries are 0 and off-diagonal +entries are positive. - Curvature is a generalization of distance matrix that allows - repetitions in the underlying points of a metric space. Curvature - of an n-tuple of points from metric space X is an n×n matrix whose - (i, j)-th entry holds the distance between the points from i-th and - j-th positions of the tuple. Since these points need not be - distinct, the off-diagonal entries of a curvature can equal 0. +Curvature is a generalization of distance matrix that allows +repetitions in the underlying points of a metric space. Curvature +of an n-tuple of points from metric space X is an n×n matrix whose +(i, j)-th entry holds the distance between the points from i-th and +j-th positions of the tuple. Since these points need not be +distinct, the off-diagonal entries of a curvature can equal 0. - n-th curvature set of metric space X is the set of all curvatures - of X that are of size n×n. +n-th curvature set of metric space X is the set of all curvatures +of X that are of size n×n. - d-bounded curvature for some d > 0 is a curvature whose - off-diagonal entries are all ≥ d. +d-bounded curvature for some d > 0 is a curvature whose +off-diagonal entries are all ≥ d. - Positive-bounded curvature is a curvature whose off-diagonal - entries are all positive, i.e. the points in the underlying tuple - are distinct. Equivalently, positive-bounded curvatures are - distance matrices on the subsets of a metric space. +Positive-bounded curvature is a curvature whose off-diagonal +entries are all positive, i.e. the points in the underlying tuple +are distinct. Equivalently, positive-bounded curvatures are +distance matrices on the subsets of a metric space. """ + import numpy as np import warnings import scipy.sparse as sps @@ -110,11 +111,12 @@ # To sample √|X| * log (|X| + 1) mappings from X → Y by default. -DEFAULT_MAPPING_SAMPLE_SIZE_ORDER = np.array([.5, 1]) +DEFAULT_MAPPING_SAMPLE_SIZE_ORDER = np.array([0.5, 1]) def gromov_hausdorff( - AG, AH=None, mapping_sample_size_order=DEFAULT_MAPPING_SAMPLE_SIZE_ORDER): + AG, AH=None, mapping_sample_size_order=DEFAULT_MAPPING_SAMPLE_SIZE_ORDER +): """ Estimate the mGH distance between simple unweighted graphs, represented as compact metric spaces based on their shortest @@ -127,7 +129,7 @@ def gromov_hausdorff( adjacency matrices if AH=None. AH: (M,M) np.array (Sparse) adjacency matrix of graph H with M vertices, or None. - mapping_sample_size_order: (2,) np.array + mapping_sample_size_order: (2,) np.array Parameter that regulates the number of mappings to sample when tightening upper bound of the mGH distance. @@ -143,8 +145,10 @@ def gromov_hausdorff( # Form iterable with adjacency matrices. if AH is None: if len(AG) < 2: - raise ValueError("'estimate_between_unweighted_graphs' needs at least" - "2 graphs to discriminate") + raise ValueError( + "'estimate_between_unweighted_graphs' needs at least" + "2 graphs to discriminate" + ) As = AG else: As = (AG, AH) @@ -163,7 +167,8 @@ def gromov_hausdorff( # Find lower and upper bounds of the mGH distance between # the pair of graphs. lbs[i, j], ubs[i, j] = estimate( - DX, DY, mapping_sample_size_order=mapping_sample_size_order) + DX, DY, mapping_sample_size_order=mapping_sample_size_order + ) if AH is None: # Symmetrize matrices with lower and upper bounds of pairwise @@ -203,10 +208,14 @@ def make_distance_matrix_from_adjacency_matrix(AG): # Ensure compactness of metric space, represented by distance # matrix. if np.any(np.isinf(DG)): - warnings.warn("disconnected graph is approximated by its largest connected component") + warnings.warn( + "disconnected graph is approximated by its largest connected component" + ) # Extract largest connected component of the graph. _, components_by_vertex = connected_components(AG, directed=False) - components, component_sizes = np.unique(components_by_vertex, return_counts=True) + components, component_sizes = np.unique( + components_by_vertex, return_counts=True + ) largest_component = components[np.argmax(component_sizes)] DG = DG[components_by_vertex == largest_component] @@ -253,8 +262,11 @@ def determine_optimal_int_type(value): optimal_int_type: np.dtype Optimal signed integer type to hold the value. """ - feasible_int_types = (int_type for int_type in [np.int8, np.int16, np.int32, np.int64] - if value <= np.iinfo(int_type).max) + feasible_int_types = ( + int_type + for int_type in [np.int8, np.int16, np.int32, np.int64] + if value <= np.iinfo(int_type).max + ) try: optimal_int_type = next(feasible_int_types) except StopIteration: @@ -286,7 +298,9 @@ def estimate(DX, DY, mapping_sample_size_order=DEFAULT_MAPPING_SAMPLE_SIZE_ORDER Upper bound of mGH(X, Y). """ # Ensure distance matrices are of integer type. - if not np.issubdtype(DX.dtype, np.integer) or not np.issubdtype(DY.dtype, np.integer): + if not np.issubdtype(DX.dtype, np.integer) or not np.issubdtype( + DY.dtype, np.integer + ): raise ValueError("non-integer metrics are not yet supported") # Cast distance matrices to signed integer type to allow # subtractions. @@ -297,7 +311,8 @@ def estimate(DX, DY, mapping_sample_size_order=DEFAULT_MAPPING_SAMPLE_SIZE_ORDER # Estimate mGH(X, Y). double_lb = find_lb(DX, DY) double_ub = find_ub( - DX, DY, mapping_sample_size_order=mapping_sample_size_order, double_lb=double_lb) + DX, DY, mapping_sample_size_order=mapping_sample_size_order, double_lb=double_lb + ) return 0.5 * double_lb, 0.5 * double_ub @@ -372,8 +387,10 @@ def find_largest_size_bounded_curvature(DX, diam_X, d): # Pick a row (and column) with highest number of off-diagonal # distances < d, then with smallest sum of off-diagonal # distances ≥ d. - K_rows_sortkeys = -np.sum(K < d, axis=0) * (len(K) * diam_X) + \ - np.sum(np.ma.masked_less(K, d), axis=0).data + K_rows_sortkeys = ( + -np.sum(K < d, axis=0) * (len(K) * diam_X) + + np.sum(np.ma.masked_less(K, d), axis=0).data + ) row_to_remove = np.argmin(K_rows_sortkeys) # Remove the row and column from K. K = np.delete(K, row_to_remove, axis=0) @@ -406,8 +423,9 @@ def confirm_lb_using_bounded_curvature(d, K, DY, max_diam): # If K exceeds DY in size, the Hausdorff distance between the n-th # curvature sets of X and Y is ≥ d, entailing 2*mGH(X, Y) ≥ d (from # Theorem A). - lb_is_confirmed = len(K) > len(DY) or \ - confirm_lb_using_bounded_curvature_row(d, K, DY, max_diam) + lb_is_confirmed = len(K) > len(DY) or confirm_lb_using_bounded_curvature_row( + d, K, DY, max_diam + ) return lb_is_confirmed @@ -438,9 +456,12 @@ def confirm_lb_using_bounded_curvature_row(d, K, DY, max_diam): # Represent row of K as distance distributions, and retain those # that are maximal by the entry-wise partial order. K_max_rows_distance_distributions = find_unique_max_distributions( - represent_distance_matrix_rows_as_distributions(K, max_diam)) + represent_distance_matrix_rows_as_distributions(K, max_diam) + ) # Represent rows of DY as distance distributions. - DY_rows_distance_distributions = represent_distance_matrix_rows_as_distributions(DY, max_diam) + DY_rows_distance_distributions = represent_distance_matrix_rows_as_distributions( + DY, max_diam + ) # For each i ∈ 1,...,n, check if ||row_i(K) - row_i(L)||_∞ ≥ d # ∀L ∈ PSPS^n(DY), which entails that the Hausdorff distance # between the n-th curvature sets of X and Y is ≥ d, and therefore @@ -458,13 +479,17 @@ def confirm_lb_using_bounded_curvature_row(d, K, DY, max_diam): # (bottleneck) assignment feasibility problem between the # entries of row_i(K) and row_j(DY). lb_is_confirmed = not check_assignment_feasibility( - K_max_rows_distance_distributions[i], DY_rows_distance_distributions[j], d) + K_max_rows_distance_distributions[i], + DY_rows_distance_distributions[j], + d, + ) j += 1 i += 1 return lb_is_confirmed + def represent_distance_matrix_rows_as_distributions(DX, max_d): """ Given a metric space X induced by simple unweighted graph, @@ -488,15 +513,17 @@ def represent_distance_matrix_rows_as_distributions(DX, max_d): # Add imaginary part to distinguish identical distances from # different rows of D. unique_distances, distance_frequencies = np.unique( - DX + 1j * np.arange(len(DX))[:, None], return_counts=True) + DX + 1j * np.arange(len(DX))[:, None], return_counts=True + ) # Type is signed integer to allow subtractions. optimal_int_type = determine_optimal_int_type(len(DX)) DX_rows_distributons = np.zeros((len(DX), max_d + 1), dtype=optimal_int_type) # Construct index pairs for distance frequencies, so that the # frequencies of larger distances appear on the left. - distance_frequencies_index_pairs = \ - (np.imag(unique_distances).astype(optimal_int_type), - max_d - np.real(unique_distances).astype(max_d.dtype)) + distance_frequencies_index_pairs = ( + np.imag(unique_distances).astype(optimal_int_type), + max_d - np.real(unique_distances).astype(max_d.dtype), + ) # Fill frequency distributions of the rows of DX. DX_rows_distributons[distance_frequencies_index_pairs] = distance_frequencies # Remove (unit) frequency of distance 0 from each row. @@ -524,18 +551,26 @@ def find_unique_max_distributions(distributions): unique_max_distributions: np.array (m×max_d) Unique frequency distributions of the maximal vectors; m ≤ M. """ - pairwise_distribution_differences = \ - np.cumsum(distributions - distributions[:, None, :], axis=2) + pairwise_distribution_differences = np.cumsum( + distributions - distributions[:, None, :], axis=2 + ) pairwise_distribution_less_thans = np.logical_and( np.all(pairwise_distribution_differences >= 0, axis=2), - np.any(pairwise_distribution_differences > 0, axis=2)) + np.any(pairwise_distribution_differences > 0, axis=2), + ) distributions_are_max = ~np.any(pairwise_distribution_less_thans, axis=1) try: - unique_max_distributions = np.unique(distributions[distributions_are_max], axis=0) + unique_max_distributions = np.unique( + distributions[distributions_are_max], axis=0 + ) except AttributeError: # `np.unique` is not implemented in NumPy 1.12 (Python 3.4). unique_max_distributions = np.vstack( - {tuple(distribution) for distribution in distributions[distributions_are_max]}) + { + tuple(distribution) + for distribution in distributions[distributions_are_max] + } + ) return unique_max_distributions @@ -562,13 +597,17 @@ def check_assignment_feasibility(v_distribution, u_distribution, d): is_assignment_feasible: bool Whether such injective f: {1,...,p} → {1,...,q} exists. """ + def next_i_and_j(min_i, min_j): # Find reversed v distribution index of smallest v entries yet # to be assigned. Then find index in reversed u distribution of # smallest u entries to which the b entries can be assigned to. try: - i = next(i for i in range(min_i, len(reversed_v_distribution)) - if reversed_v_distribution[i] > 0) + i = next( + i + for i in range(min_i, len(reversed_v_distribution)) + if reversed_v_distribution[i] > 0 + ) except StopIteration: # All v entries are assigned. i = None @@ -583,9 +622,13 @@ def next_j(i, min_j): # which v entries, corresponding to a given reversed v # distribution index, can be assigned to. try: - j = next(j for j in range(min_j, min(i + (d - 1), - len(reversed_u_distribution) - 1) + 1) - if reversed_u_distribution[j] > 0) + j = next( + j + for j in range( + min_j, min(i + (d - 1), len(reversed_u_distribution) - 1) + 1 + ) + if reversed_u_distribution[j] > 0 + ) except StopIteration: # No u entries left to assign the particular v entries to. j = None @@ -617,7 +660,10 @@ def next_j(i, min_j): return is_assignment_feasible -def find_ub(DX, DY, mapping_sample_size_order=DEFAULT_MAPPING_SAMPLE_SIZE_ORDER, double_lb=0): + +def find_ub( + DX, DY, mapping_sample_size_order=DEFAULT_MAPPING_SAMPLE_SIZE_ORDER, double_lb=0 +): """ For X, Y metric spaces, find an upper bound of mGH(X, Y). @@ -640,18 +686,28 @@ def find_ub(DX, DY, mapping_sample_size_order=DEFAULT_MAPPING_SAMPLE_SIZE_ORDER, """ # Find upper bound of smallest distortion of a mapping in X → Y. ub_of_X_to_Y_min_distortion = find_ub_of_min_distortion( - DX, DY, mapping_sample_size_order=mapping_sample_size_order, goal_distortion=double_lb) + DX, + DY, + mapping_sample_size_order=mapping_sample_size_order, + goal_distortion=double_lb, + ) # Find upper bound of smallest distortion of a mapping in Y → X. ub_of_Y_to_X_min_distortion = find_ub_of_min_distortion( - DY, DX, mapping_sample_size_order=mapping_sample_size_order, - goal_distortion=ub_of_X_to_Y_min_distortion) + DY, + DX, + mapping_sample_size_order=mapping_sample_size_order, + goal_distortion=ub_of_X_to_Y_min_distortion, + ) return max(ub_of_X_to_Y_min_distortion, ub_of_Y_to_X_min_distortion) -def find_ub_of_min_distortion(DX, DY, - mapping_sample_size_order=DEFAULT_MAPPING_SAMPLE_SIZE_ORDER, - goal_distortion=0): +def find_ub_of_min_distortion( + DX, + DY, + mapping_sample_size_order=DEFAULT_MAPPING_SAMPLE_SIZE_ORDER, + goal_distortion=0, +): """ For X, Y metric spaces, find an upper bound of smallest distortion of a mapping in X → Y by heuristically constructing some mappings @@ -675,13 +731,20 @@ def find_ub_of_min_distortion(DX, DY, Upper bound of smallest distortion of a mapping in X → Y. """ # Compute the numper of mappings to sample. - n_mappings_to_sample = int(np.ceil(np.prod( - np.array([len(DX), np.log(len(DX) + 1)]) ** mapping_sample_size_order))) + n_mappings_to_sample = int( + np.ceil( + np.prod( + np.array([len(DX), np.log(len(DX) + 1)]) ** mapping_sample_size_order + ) + ) + ) # Construct each mapping in X → Y in |X| steps by choosing the image # of π(i)-th point in X at i-th step, where π is randomly sampled # |X|-permutation. Image of each point is chosen to minimize the # intermediate distortion at each step. - permutations_generator = (np.random.permutation(len(DX)) for _ in range(n_mappings_to_sample)) + permutations_generator = ( + np.random.permutation(len(DX)) for _ in range(n_mappings_to_sample) + ) ub_of_min_distortion = np.inf goal_distortion_is_matched = False all_sampled_permutations_are_tried = False @@ -731,11 +794,12 @@ def construct_mapping(DX, DY, pi): # Choose point in Y that minimizes the distortion after # mapping π(i)-th point in X to it. bottlenecks_from_mapping_x = np.max( - np.abs(DX[x, mapped_xs] - DY[:, mapped_xs_images]), axis=1) + np.abs(DX[x, mapped_xs] - DY[:, mapped_xs_images]), axis=1 + ) y = np.argmin(bottlenecks_from_mapping_x) # Map π(i)-th point in X to the chosen point in Y. mapped_xs.append(x) mapped_xs_images.append(y) distortion = max(bottlenecks_from_mapping_x[y], distortion) - + return mapped_xs_images, distortion diff --git a/persim/heat.py b/persim/heat.py index f1bee93..d313789 100644 --- a/persim/heat.py +++ b/persim/heat.py @@ -1,7 +1,7 @@ """ - Implementation of the "multiscale heat kernel" (CVPR 2015), +Implementation of the "multiscale heat kernel" (CVPR 2015), - Author: Chris Tralie +Author: Chris Tralie """ @@ -9,6 +9,7 @@ __all__ = ["heat"] + def evalHeatKernel(dgm1, dgm2, sigma): """ Evaluate the continuous heat-based kernel between dgm1 and dgm2 (more correct than L2 on the discretized version above but may be slower because can't exploit fast matrix multiplication when evaluating many, many kernels) diff --git a/persim/images.py b/persim/images.py index f1b7956..be43c93 100644 --- a/persim/images.py +++ b/persim/images.py @@ -1,12 +1,10 @@ from __future__ import division import copy -from itertools import product from typing import Iterable import matplotlib.pyplot as plt import numpy as np -import scipy.spatial as spatial from deprecated.sphinx import deprecated from joblib import Parallel, delayed from matplotlib.collections import LineCollection diff --git a/persim/images_kernels.py b/persim/images_kernels.py index 010e28e..8521869 100644 --- a/persim/images_kernels.py +++ b/persim/images_kernels.py @@ -1,30 +1,32 @@ """ Kernel functions for PersistenceImager() transformer: -A valid kernel is a Python function of the form +A valid kernel is a Python function of the form -kernel(x, y, mu=(birth, persistence), **kwargs) +kernel(x, y, mu=(birth, persistence), **kwargs) -defining a cumulative distribution function(CDF) such that kernel(x, y) = P(X <= x, Y <=y), where x and y are numpy arrays of equal length. +defining a cumulative distribution function(CDF) such that kernel(x, y) = P(X <= x, Y <=y), where x and y are numpy arrays of equal length. The required parameter mu defines the dependance of the kernel on the location of a persistence pair and is usually taken to be the mean of the probability distribution function associated to kernel CDF. """ + import numpy as np from scipy.special import erfc + def uniform(x, y, mu=None, width=1, height=1): - w1 = np.maximum(x - (mu[0] - width/2), 0) - h1 = np.maximum(y - (mu[1] - height/2), 0) - + w1 = np.maximum(x - (mu[0] - width / 2), 0) + h1 = np.maximum(y - (mu[1] - height / 2), 0) + w = np.minimum(w1, width) h = np.minimum(h1, height) - return w*h / (width*height) + return w * h / (width * height) def gaussian(birth, pers, mu=None, sigma=None): - """ Optimized bivariate normal cumulative distribution function for computing persistence images using a Gaussian kernel. - + """Optimized bivariate normal cumulative distribution function for computing persistence images using a Gaussian kernel. + Parameters ---------- birth : (M,) numpy.ndarray @@ -33,9 +35,9 @@ def gaussian(birth, pers, mu=None, sigma=None): Persistence coordinates of pixel corners. mu : (2,) numpy.ndarray Coordinates of the distribution mean (birth-persistence pairs). - sigma : float or (2,2) numpy.ndarray + sigma : float or (2,2) numpy.ndarray Distribution's covariance matrix or the equal variances if the distribution is standard isotropic. - + Returns ------- float @@ -47,21 +49,34 @@ def gaussian(birth, pers, mu=None, sigma=None): sigma = np.array([[1.0, 0.0], [0.0, 1.0]], dtype=np.float64) if sigma[0][1] == 0.0: - return sbvn_cdf(birth, pers, - mu_x=mu[0], mu_y=mu[1], sigma_x=sigma[0][0], sigma_y=sigma[1][1]) + return sbvn_cdf( + birth, + pers, + mu_x=mu[0], + mu_y=mu[1], + sigma_x=sigma[0][0], + sigma_y=sigma[1][1], + ) else: - return bvn_cdf(birth, pers, - mu_x=mu[0], mu_y=mu[1], sigma_xx=sigma[0][0], sigma_yy=sigma[1][1], sigma_xy=sigma[0][1]) + return bvn_cdf( + birth, + pers, + mu_x=mu[0], + mu_y=mu[1], + sigma_xx=sigma[0][0], + sigma_yy=sigma[1][1], + sigma_xy=sigma[0][1], + ) def norm_cdf(x): - """ Univariate normal cumulative distribution function (CDF) with mean 0.0 and standard deviation 1.0. - + """Univariate normal cumulative distribution function (CDF) with mean 0.0 and standard deviation 1.0. + Parameters ---------- x : float Value at which to evaluate the CDF (upper limit). - + Returns ------- float @@ -71,13 +86,13 @@ def norm_cdf(x): def sbvn_cdf(x, y, mu_x=0.0, mu_y=0.0, sigma_x=1.0, sigma_y=1.0): - """ Standard bivariate normal cumulative distribution function with specified mean and variances. - + """Standard bivariate normal cumulative distribution function with specified mean and variances. + Parameters ---------- x : float or numpy.ndarray of floats x-coordinate(s) at which to evaluate the CDF (upper limit). - y : float or numpy.ndarray of floats + y : float or numpy.ndarray of floats y-coordinate(s) at which to evaluate the CDF (upper limit). mu_x : float x-coordinate of the mean. @@ -87,7 +102,7 @@ def sbvn_cdf(x, y, mu_x=0.0, mu_y=0.0, sigma_x=1.0, sigma_y=1.0): Variance in x. sigma_y : float Variance in y. - + Returns ------- float @@ -99,13 +114,13 @@ def sbvn_cdf(x, y, mu_x=0.0, mu_y=0.0, sigma_x=1.0, sigma_y=1.0): def bvn_cdf(x, y, mu_x=0.0, mu_y=0.0, sigma_xx=1.0, sigma_yy=1.0, sigma_xy=0.0): - """ Bivariate normal cumulative distribution function with specified mean and covariance matrix. - + """Bivariate normal cumulative distribution function with specified mean and covariance matrix. + Parameters ---------- x : float or numpy.ndarray of floats x-coordinate(s) at which to evaluate the CDF (upper limit). - y : float or numpy.ndarray of floats + y : float or numpy.ndarray of floats y-coordinate(s) at which to evaluate the CDF (upper limit). mu_x : float x-coordinate of the mean. @@ -117,12 +132,12 @@ def bvn_cdf(x, y, mu_x=0.0, mu_y=0.0, sigma_xx=1.0, sigma_yy=1.0, sigma_xy=0.0): Variance in y. sigma_xy : float Covariance of x and y. - + Returns ------- float Value of joint CDF at (x, y), i.e., P(X <= birth, Y <= pers). - + Notes ----- Based on the Matlab implementations by Thomas H. Jørgensen (http://www.tjeconomics.com/code/) and Alan Genz (http://www.math.wsu.edu/math/faculty/genz/software/matlab/bvnl.m) using the approach described by Drezner and Wesolowsky (https://doi.org/10.1080/00949659008811236). @@ -151,11 +166,27 @@ def bvn_cdf(x, y, mu_x=0.0, mu_y=0.0, sigma_xx=1.0, sigma_yy=1.0, sigma_xy=0.0): dim1sn2 = np.outer(dim1, sn2) sn12 = np.multiply(sn1, sn1) sn22 = np.multiply(sn2, sn2) - bvn = asr * np.sum(np.multiply(dim1w, np.exp(np.divide(np.multiply(dim1sn1, hkdim2) - hsdim2, - (1 - np.outer(dim1, sn12))))) + - np.multiply(dim1w, np.exp(np.divide(np.multiply(dim1sn2, hkdim2) - hsdim2, - (1 - np.outer(dim1, sn22))))), axis=1) / (4 * np.pi) \ - + np.multiply(norm_cdf(-dh), norm_cdf(-dk)) + bvn = asr * np.sum( + np.multiply( + dim1w, + np.exp( + np.divide( + np.multiply(dim1sn1, hkdim2) - hsdim2, + (1 - np.outer(dim1, sn12)), + ) + ), + ) + + np.multiply( + dim1w, + np.exp( + np.divide( + np.multiply(dim1sn2, hkdim2) - hsdim2, + (1 - np.outer(dim1, sn22)), + ) + ), + ), + axis=1, + ) / (4 * np.pi) + np.multiply(norm_cdf(-dh), norm_cdf(-dk)) else: if r < 0: dk = -dk @@ -171,16 +202,28 @@ def bvn_cdf(x, y, mu_x=0.0, mu_y=0.0, sigma_xx=1.0, sigma_yy=1.0, sigma_xy=0.0): asr = -1.0 * (np.divide(xmy2, opmr) + hk) / 2.0 ind = asr > 100 - bvn[ind] = sopmr * np.multiply(np.exp(asr[ind]), - 1.0 - np.multiply(np.multiply(rhk8[ind], xmy2[ind] - opmr), - (1.0 - np.multiply(rhk16[ind], xmy2[ind]) / 5.0) / 3.0) - + np.multiply(rhk8[ind], rhk16[ind]) * opmr * opmr / 5.0) + bvn[ind] = sopmr * np.multiply( + np.exp(asr[ind]), + 1.0 + - np.multiply( + np.multiply(rhk8[ind], xmy2[ind] - opmr), + (1.0 - np.multiply(rhk16[ind], xmy2[ind]) / 5.0) / 3.0, + ) + + np.multiply(rhk8[ind], rhk16[ind]) * opmr * opmr / 5.0, + ) ind = hk > -100 ncdfxmyt = np.sqrt(2.0 * np.pi) * norm_cdf(-xmy / sopmr) - bvn[ind] = bvn[ind] - np.multiply(np.multiply(np.multiply(np.exp(-hk[ind] / 2.0), ncdfxmyt[ind]), xmy[ind]), - 1.0 - np.multiply(np.multiply(rhk8[ind], xmy2[ind]), - (1.0 - np.multiply(rhk16[ind], xmy2[ind]) / 5.0) / 3.0)) + bvn[ind] = bvn[ind] - np.multiply( + np.multiply( + np.multiply(np.exp(-hk[ind] / 2.0), ncdfxmyt[ind]), xmy[ind] + ), + 1.0 + - np.multiply( + np.multiply(rhk8[ind], xmy2[ind]), + (1.0 - np.multiply(rhk16[ind], xmy2[ind]) / 5.0) / 3.0, + ), + ) sopmr = sopmr / 2 for ix in [-1, 1]: xs = np.multiply(sopmr + sopmr * ix * x, sopmr + sopmr * ix * x) @@ -195,11 +238,26 @@ def bvn_cdf(x, y, mu_x=0.0, mu_y=0.0, sigma_xx=1.0, sigma_yy=1.0, sigma_xy=0.0): ind1 = asr1 > -100 cdim2 = np.outer(rhk8, dim2) - sp1 = 1.0 + np.multiply(np.multiply(cdim2, dim1xs), 1.0 + np.multiply(rhk16dim2, dim1xs)) - ep1 = np.divide(np.exp(np.divide(-np.multiply(hkdim2, (1.0 - dim1rs)), - 2.0 * (1.0 + dim1rs))), dim1rs) - bvn = bvn + np.sum(np.multiply(np.multiply(np.multiply(sopmr, dim1w), np.exp(np.multiply(asr1, ind1))), - np.multiply(ep1, ind1) - np.multiply(sp1, ind1)), axis=1) + sp1 = 1.0 + np.multiply( + np.multiply(cdim2, dim1xs), 1.0 + np.multiply(rhk16dim2, dim1xs) + ) + ep1 = np.divide( + np.exp( + np.divide( + -np.multiply(hkdim2, (1.0 - dim1rs)), 2.0 * (1.0 + dim1rs) + ) + ), + dim1rs, + ) + bvn = bvn + np.sum( + np.multiply( + np.multiply( + np.multiply(sopmr, dim1w), np.exp(np.multiply(asr1, ind1)) + ), + np.multiply(ep1, ind1) - np.multiply(sp1, ind1), + ), + axis=1, + ) bvn = -bvn / (2.0 * np.pi) if r > 0: @@ -211,13 +269,13 @@ def bvn_cdf(x, y, mu_x=0.0, mu_y=0.0, sigma_xx=1.0, sigma_yy=1.0, sigma_xy=0.0): def gauss_legendre_quad(r): - """ Return weights and abscissae for the Legendre-Gauss quadrature integral approximation. - + """Return weights and abscissae for the Legendre-Gauss quadrature integral approximation. + Parameters ---------- r : float Correlation - + Returns ------- tuple @@ -229,19 +287,55 @@ def gauss_legendre_quad(r): x = np.array([0.9324695142031522, 0.6612093864662647, 0.2386191860831970]) elif np.abs(r) < 0.75: lg = 6 - w = np.array([.04717533638651177, 0.1069393259953183, 0.1600783285433464, - 0.2031674267230659, 0.2334925365383547, 0.2491470458134029]) - x = np.array([0.9815606342467191, 0.9041172563704750, 0.7699026741943050, - 0.5873179542866171, 0.3678314989981802, 0.1252334085114692]) + w = np.array( + [ + 0.04717533638651177, + 0.1069393259953183, + 0.1600783285433464, + 0.2031674267230659, + 0.2334925365383547, + 0.2491470458134029, + ] + ) + x = np.array( + [ + 0.9815606342467191, + 0.9041172563704750, + 0.7699026741943050, + 0.5873179542866171, + 0.3678314989981802, + 0.1252334085114692, + ] + ) else: lg = 10 - w = np.array([0.01761400713915212, 0.04060142980038694, 0.06267204833410906, - 0.08327674157670475, 0.1019301198172404, 0.1181945319615184, - 0.1316886384491766, 0.1420961093183821, 0.1491729864726037, - 0.1527533871307259]) - x = np.array([0.9931285991850949, 0.9639719272779138, 0.9122344282513259, - 0.8391169718222188, 0.7463319064601508, 0.6360536807265150, - 0.5108670019508271, 0.3737060887154196, 0.2277858511416451, - 0.07652652113349733]) - - return lg, w, x \ No newline at end of file + w = np.array( + [ + 0.01761400713915212, + 0.04060142980038694, + 0.06267204833410906, + 0.08327674157670475, + 0.1019301198172404, + 0.1181945319615184, + 0.1316886384491766, + 0.1420961093183821, + 0.1491729864726037, + 0.1527533871307259, + ] + ) + x = np.array( + [ + 0.9931285991850949, + 0.9639719272779138, + 0.9122344282513259, + 0.8391169718222188, + 0.7463319064601508, + 0.6360536807265150, + 0.5108670019508271, + 0.3737060887154196, + 0.2277858511416451, + 0.07652652113349733, + ] + ) + + return lg, w, x diff --git a/persim/images_weights.py b/persim/images_weights.py index 43b0144..31a4ee0 100644 --- a/persim/images_weights.py +++ b/persim/images_weights.py @@ -1,17 +1,19 @@ """ Weight Functions for PersistenceImager() transformer: -A valid weight function is a Python function of the form +A valid weight function is a Python function of the form -weight(birth, persistence, **kwargs) +weight(birth, persistence, **kwargs) defining a scalar-valued function over the birth-persistence plane, where birth and persistence are assumed to be numpy arrays of equal length. To ensure stability, functions should vanish continuously at the line persistence = 0. """ + import numpy as np + def linear_ramp(birth, pers, low=0.0, high=1.0, start=0.0, end=1.0): - """ Continuous peicewise linear ramp function which is constant below and above specified input values. - + """Continuous peicewise linear ramp function which is constant below and above specified input values. + Parameters ---------- birth : (N,) numpy.ndarray @@ -19,14 +21,14 @@ def linear_ramp(birth, pers, low=0.0, high=1.0, start=0.0, end=1.0): pers : (N,) numpy.ndarray Persistence coordinates of a collection of persistence pairs. low : float - Minimum weight. + Minimum weight. high : float Maximum weight. start : float Start persistence value of linear transition from low to high weight. end : float End persistence value of linear transition from low to high weight. - + Returns ------- (N,) numpy.ndarray @@ -34,7 +36,7 @@ def linear_ramp(birth, pers, low=0.0, high=1.0, start=0.0, end=1.0): """ try: n = len(birth) - except: + except TypeError: n = 1 birth = [birth] pers = [pers] @@ -50,9 +52,10 @@ def linear_ramp(birth, pers, low=0.0, high=1.0, start=0.0, end=1.0): return w + def persistence(birth, pers, n=1.0): - """ Continuous monotonic function which weight a persistence pair (b,p) by p^n for some n > 0. - + """Continuous monotonic function which weight a persistence pair (b,p) by p^n for some n > 0. + Parameters ---------- birth : (N,) numpy.ndarray @@ -61,10 +64,10 @@ def persistence(birth, pers, n=1.0): Persistence coordinates of a collection of persistence pairs. n : positive float Exponent of persistence weighting function. - + Returns ------- (N,) numpy.ndarray Weights at the persistence pairs. """ - return pers ** n \ No newline at end of file + return pers**n diff --git a/persim/landscapes/__init__.py b/persim/landscapes/__init__.py index d664a77..ce98e7a 100644 --- a/persim/landscapes/__init__.py +++ b/persim/landscapes/__init__.py @@ -2,4 +2,17 @@ from .approximate import PersLandscapeApprox from .transformer import PersistenceLandscaper from .visuals import plot_landscape, plot_landscape_simple -from .tools import * +from .tools import death_vector, vectorize, snap_pl, lc_approx, average_approx + +__all__ = [ + "average_approx", + "death_vector", + "lc_approx", + "PersLandscapeExact", + "PersLandscapeApprox", + "PersistenceLandscaper", + "plot_landscape", + "plot_landscape_simple", + "snap_pl", + "vectorize", +] diff --git a/persim/landscapes/approximate.py b/persim/landscapes/approximate.py index 34b26a9..db1c541 100644 --- a/persim/landscapes/approximate.py +++ b/persim/landscapes/approximate.py @@ -1,5 +1,5 @@ """ - Persistence Landscape Approximate class +Persistence Landscape Approximate class """ import numpy as np diff --git a/persim/landscapes/auxiliary.py b/persim/landscapes/auxiliary.py index 5bd7ec8..ce40082 100644 --- a/persim/landscapes/auxiliary.py +++ b/persim/landscapes/auxiliary.py @@ -1,5 +1,5 @@ """ - Auxilary functions for working with persistence diagrams. +Auxilary functions for working with persistence diagrams. """ import itertools @@ -53,7 +53,7 @@ def union_crit_pairs(A, B): return result_pairs -def pos_to_slope_interp(l: list) -> list: +def pos_to_slope_interp(input_list: list) -> list: """Convert positions of critical pairs to (x-value, slope) pairs. Intended for internal use. Inverse function of `slope_to_pos_interp`. @@ -66,14 +66,14 @@ def pos_to_slope_interp(l: list) -> list: output = [] # for sequential pairs in landscape function - for [[x0, y0], [x1, y1]] in zip(l, l[1:]): + for [[x0, y0], [x1, y1]] in zip(input_list, input_list[1:]): slope = (y1 - y0) / (x1 - x0) output.append([x0, slope]) - output.append([l[-1][0], 0]) + output.append([input_list[-1][0], 0]) return output -def slope_to_pos_interp(l: list) -> list: +def slope_to_pos_interp(input_list: list) -> list: """Convert positions of (x-value, slope) pairs to critical pairs. Intended @@ -84,9 +84,9 @@ def slope_to_pos_interp(l: list) -> list: list [(xi, yi)]_i for i in len(function in landscape) """ - output = [[l[0][0], 0]] + output = [[input_list[0][0], 0]] # for sequential pairs in [(xi,mi)]_i - for [[x0, m], [x1, _]] in zip(l, l[1:]): + for [[x0, m], [x1, _]] in zip(input_list, input_list[1:]): # uncover y0 and y1 from slope formula y0 = output[-1][1] y1 = y0 + (x1 - x0) * m @@ -148,8 +148,8 @@ def _p_norm(p: float, critical_pairs: list = []): critical pairs. """ result = 0.0 - for l in critical_pairs: - for [[x0, y0], [x1, y1]] in zip(l, l[1:]): + for crit_pair in critical_pairs: + for [[x0, y0], [x1, y1]] in zip(crit_pair, crit_pair[1:]): if y0 == y1: # horizontal line segment result += (np.abs(y0) ** p) * (x1 - x0) diff --git a/persim/landscapes/base.py b/persim/landscapes/base.py index e318c15..4a2119a 100644 --- a/persim/landscapes/base.py +++ b/persim/landscapes/base.py @@ -1,8 +1,9 @@ """ - Base Persistence Landscape class. +Base Persistence Landscape class. - authors: Gabrielle Angeloro, Michael Catanzaro +authors: Gabrielle Angeloro, Michael Catanzaro """ + from abc import ABC, abstractmethod import numpy as np @@ -66,9 +67,7 @@ def __sub__(self, other): @abstractmethod def __mul__(self, other): if not isinstance(other, (int, float)): - raise TypeError( - "Can only multiply persistence landscapes" "by real numbers" - ) + raise TypeError("Can only multiply persistence landscapesby real numbers") @abstractmethod def __truediv__(self, other): diff --git a/persim/landscapes/exact.py b/persim/landscapes/exact.py index 3ae5b91..97d1061 100644 --- a/persim/landscapes/exact.py +++ b/persim/landscapes/exact.py @@ -1,5 +1,5 @@ """ - Persistence Landscape Exact class +Persistence Landscape Exact class """ import itertools @@ -7,7 +7,6 @@ import numpy as np -from .approximate import PersLandscapeApprox from .auxiliary import _p_norm, union_crit_pairs from .base import PersLandscape @@ -270,7 +269,7 @@ def compute_landscape(self, verbose: bool = False) -> list: A = sorted(A, key=lambda x: [x[0], -x[1]]) while A: - verboseprint(f"computing landscape index {landscape_idx+1}...") + verboseprint(f"computing landscape index {landscape_idx + 1}...") # add a 0 element to begin count of lamda_k # size_landscapes = np.append(size_landscapes, [0]) diff --git a/persim/landscapes/tools.py b/persim/landscapes/tools.py index d13eb39..fb06a4a 100644 --- a/persim/landscapes/tools.py +++ b/persim/landscapes/tools.py @@ -1,5 +1,5 @@ """ - Tools for working with exact and approximate persistence landscapes. +Tools for working with exact and approximate persistence landscapes. """ import numpy as np @@ -34,8 +34,7 @@ def death_vector(dgms: list, hom_deg: int = 0): """ if hom_deg != 0: raise NotImplementedError( - "The death vector is not defined for " - "homological degrees greater than zero." + "The death vector is not defined for homological degrees greater than zero." ) return sorted(dgms[hom_deg][:, 1], reverse=True) @@ -164,7 +163,10 @@ def average_approx( def vectorize( - l: PersLandscapeExact, start: float = None, stop: float = None, num_steps: int = 500 + landscape: PersLandscapeExact, + start: float | None = None, + stop: float | None = None, + num_steps: int = 500, ) -> PersLandscapeApprox: """Converts a `PersLandscapeExact` type to a `PersLandscapeApprox` type. @@ -183,21 +185,21 @@ def vectorize( """ - l.compute_landscape() + landscape.compute_landscape() if start is None: - start = min(l.critical_pairs[0], key=itemgetter(0))[0] + start = min(landscape.critical_pairs[0], key=itemgetter(0))[0] if stop is None: - stop = max(l.critical_pairs[0], key=itemgetter(0))[0] + stop = max(landscape.critical_pairs[0], key=itemgetter(0))[0] grid = np.linspace(start, stop, num_steps) result = [] # creates sequential pairs of points for each lambda in critical_pairs - for depth in l.critical_pairs: + for depth in landscape.critical_pairs: xs, ys = zip(*depth) result.append(np.interp(grid, xs, ys)) return PersLandscapeApprox( start=start, stop=stop, num_steps=num_steps, - hom_deg=l.hom_deg, + hom_deg=landscape.hom_deg, values=np.array(result), ) diff --git a/persim/landscapes/transformer.py b/persim/landscapes/transformer.py index 4dbe5db..9efeb4d 100644 --- a/persim/landscapes/transformer.py +++ b/persim/landscapes/transformer.py @@ -1,7 +1,8 @@ """ - Implementation of scikit-learn transformers for persistence - landscapes. +Implementation of scikit-learn transformers for persistence +landscapes. """ + from operator import itemgetter import numpy as np diff --git a/persim/landscapes/visuals.py b/persim/landscapes/visuals.py index 720dc72..0e91d33 100644 --- a/persim/landscapes/visuals.py +++ b/persim/landscapes/visuals.py @@ -1,5 +1,5 @@ """ - Visualization methods for plotting persistence landscapes. +Visualization methods for plotting persistence landscapes. """ import itertools @@ -224,11 +224,11 @@ def plot_landscape_exact( # for each landscape function if not depth_range: depth_range = range(landscape.max_depth + 1) - for depth, l in enumerate(landscape): + for depth, lscape in enumerate(landscape): if depth not in depth_range: continue # sequential pairs in landscape - xs, zs = zip(*l) + xs, zs = zip(*lscape) image = np.interp(domain, xs, zs) for x, z in zip(domain, image): if z == 0.0: @@ -303,10 +303,10 @@ def plot_landscape_exact_simple( landscape.compute_landscape() if not depth_range: depth_range = range(landscape.max_depth + 1) - for depth, l in enumerate(landscape): + for depth, lscape in enumerate(landscape): if depth not in depth_range: continue - ls = np.array(l) + ls = np.array(lscape) ax.plot(ls[:, 0], ls[:, 1], label=f"$\lambda_{{{depth}}}$", alpha=alpha) ax.legend() ax.margins(padding) @@ -371,7 +371,7 @@ def plot_landscape_approx( # for each landscape function if not depth_range: depth_range = range(landscape.max_depth + 1) - for depth, l in enumerate(landscape): + for depth, lscape in enumerate(landscape): if depth not in depth_range: continue # sequential pairs in landscape @@ -381,7 +381,7 @@ def plot_landscape_approx( np.linspace( start=landscape.start, stop=landscape.stop, num=landscape.num_steps ), - l, + lscape, ) for x, z in zip(domain, image): if z == 0.0: @@ -462,11 +462,11 @@ def plot_landscape_approx_simple( landscape.compute_landscape() if not depth_range: depth_range = range(landscape.max_depth + 1) - for depth, l in enumerate(landscape): + for depth, lscape in enumerate(landscape): if depth not in depth_range: continue - domain = np.linspace(landscape.start, landscape.stop, num=len(l)) - ax.plot(domain, l, label=f"$\lambda_{{{depth}}}$", alpha=alpha) + domain = np.linspace(landscape.start, landscape.stop, num=len(lscape)) + ax.plot(domain, lscape, label=f"$\lambda_{{{depth}}}$", alpha=alpha) ax.legend() ax.margins(padding) if title: diff --git a/persim/persistent_entropy.py b/persim/persistent_entropy.py index 36fa3dc..8b6b409 100644 --- a/persim/persistent_entropy.py +++ b/persim/persistent_entropy.py @@ -2,20 +2,20 @@ # -*- coding: utf-8 -*- """ - - The persistent entropy has been defined in [1]. A precursor of this definition was given in [2] - to measure how different bars of the barcode are in length. - - [1] M. Rucco, F. Castiglione, E. Merelli, M. Pettini, Characterisation of the - idiotypic immune network through persistent entropy, in: Proc. Complex, 2015. - [2] H. Chintakunta, T. Gentimis, R. Gonzalez-Diaz, M.-J. Jimenez, - H. Krim, An entropy-based persistence barcode, Pattern Recognition - 48 (2) (2015) 391–401. - - Implementation of persistent entropy - - Author: Eduardo Paluzo Hidalgo (cimagroup, University of Seville) - contact: epaluzo@us.es + +The persistent entropy has been defined in [1]. A precursor of this definition was given in [2] +to measure how different bars of the barcode are in length. + +[1] M. Rucco, F. Castiglione, E. Merelli, M. Pettini, Characterisation of the +idiotypic immune network through persistent entropy, in: Proc. Complex, 2015. +[2] H. Chintakunta, T. Gentimis, R. Gonzalez-Diaz, M.-J. Jimenez, +H. Krim, An entropy-based persistence barcode, Pattern Recognition +48 (2) (2015) 391–401. + +Implementation of persistent entropy + +Author: Eduardo Paluzo Hidalgo (cimagroup, University of Seville) +contact: epaluzo@us.es """ @@ -26,9 +26,7 @@ __all__ = ["persistent_entropy"] -def persistent_entropy( - dgms, keep_inf=False, val_inf=None, normalize=False -): +def persistent_entropy(dgms, keep_inf=False, val_inf=None, normalize=False): """ Perform the persistent entropy values of a family of persistence barcodes (or persistence diagrams). Assumes that the input diagrams are from a determined dimension. If the infinity bars have any meaning @@ -36,7 +34,7 @@ def persistent_entropy( Parameters ----------- - dgms: ndarray (n_pairs, 2) or list of diagrams + dgms: ndarray (n_pairs, 2) or list of diagrams array or list of arrays of birth/death pairs of a persistence barcode of a determined dimension. keep_inf: bool, default False if False, the infinity bars are removed. @@ -46,7 +44,7 @@ def persistent_entropy( normalize: bool, default False if False, the persistent entropy values are not normalized. if True, the persistent entropy values are normalized. - + Returns -------- @@ -55,19 +53,16 @@ def persistent_entropy( """ - if isinstance(dgms, list) == False: + if not isinstance(dgms, list): dgms = [dgms] # Step 1: Remove infinity bars if keep_inf = False. If keep_inf = True, infinity value is substituted by val_inf. - if keep_inf == False: + if not keep_inf: dgms = [(dgm[dgm[:, 1] != np.inf]) for dgm in dgms] - if keep_inf == True: - if val_inf != None: - dgms = [ - np.where(dgm == np.inf, val_inf, dgm) - for dgm in dgms - ] + if keep_inf: + if val_inf is not None: + dgms = [np.where(dgm == np.inf, val_inf, dgm) for dgm in dgms] else: raise Exception( "Remember: You need to provide a value to infinity bars if you want to keep them." @@ -76,13 +71,13 @@ def persistent_entropy( # Step 2: Persistent entropy computation. ps = [] for dgm in dgms: - l = dgm[:, 1] - dgm[:, 0] - if all(l > 0): - L = np.sum(l) - p = l / L + ll = dgm[:, 1] - dgm[:, 0] + if all(ll > 0): + L = np.sum(ll) + p = ll / L E = -np.sum(p * np.log(p)) - if normalize == True: - E = E / np.log(len(l)) + if normalize: + E = E / np.log(len(ll)) ps.append(E) else: raise Exception("A bar is born after dying") diff --git a/persim/sliced_wasserstein.py b/persim/sliced_wasserstein.py index 91fcbaf..20529ae 100644 --- a/persim/sliced_wasserstein.py +++ b/persim/sliced_wasserstein.py @@ -3,25 +3,26 @@ __all__ = ["sliced_wasserstein"] + def sliced_wasserstein(PD1, PD2, M=50): - """ Implementation of Sliced Wasserstein distance as described in - Sliced Wasserstein Kernel for Persistence Diagrams by Mathieu Carriere, Marco Cuturi, Steve Oudot (https://arxiv.org/abs/1706.03358) - - - Parameters - ----------- - - PD1: np.array size (m,2) - Persistence diagram - PD2: np.array size (n,2) - Persistence diagram - M: int, default is 50 - Iterations to run approximation. - - Returns - -------- - sw: float - Sliced Wasserstein distance between PD1 and PD2 + """Implementation of Sliced Wasserstein distance as described in + Sliced Wasserstein Kernel for Persistence Diagrams by Mathieu Carriere, Marco Cuturi, Steve Oudot (https://arxiv.org/abs/1706.03358) + + + Parameters + ----------- + + PD1: np.array size (m,2) + Persistence diagram + PD2: np.array size (n,2) + Persistence diagram + M: int, default is 50 + Iterations to run approximation. + + Returns + -------- + sw: float + Sliced Wasserstein distance between PD1 and PD2 """ diag_theta = np.array( @@ -34,8 +35,8 @@ def sliced_wasserstein(PD1, PD2, M=50): if (len(l_theta1) != PD1.shape[0]) or (len(l_theta2) != PD2.shape[0]): raise ValueError("The projected points and origin do not match") - PD_delta1 = [[np.sqrt(x ** 2 / 2.0)] * 2 for x in l_theta1] - PD_delta2 = [[np.sqrt(x ** 2 / 2.0)] * 2 for x in l_theta2] + PD_delta1 = [[np.sqrt(x**2 / 2.0)] * 2 for x in l_theta1] + PD_delta2 = [[np.sqrt(x**2 / 2.0)] * 2 for x in l_theta2] # i have the input now to compute the sw sw = 0 diff --git a/persim/visuals.py b/persim/visuals.py index 9b6aeba..4431c9c 100644 --- a/persim/visuals.py +++ b/persim/visuals.py @@ -17,28 +17,28 @@ def plot_diagrams( lifetime=False, legend=True, show=False, - ax=None + ax=None, ): - """A helper function to plot persistence diagrams. + """A helper function to plot persistence diagrams. Parameters ---------- diagrams: ndarray (n_pairs, 2) or list of diagrams - A diagram or list of diagrams. If diagram is a list of diagrams, + A diagram or list of diagrams. If diagram is a list of diagrams, then plot all on the same plot using different colors. plot_only: list of numeric If specified, an array of only the diagrams that should be plotted. title: string, default is None If title is defined, add it as title of the plot. xy_range: list of numeric [xmin, xmax, ymin, ymax] - User provided range of axes. This is useful for comparing + User provided range of axes. This is useful for comparing multiple persistence diagrams. labels: string or list of strings - Legend labels for each diagram. + Legend labels for each diagram. If none are specified, we use H_0, H_1, H_2,... by default. colormap: string, default is 'default' - Any of matplotlib color palettes. - Some options are 'default', 'seaborn', 'sequential'. + Any of matplotlib color palettes. + Some options are 'default', 'seaborn', 'sequential'. See all available styles with .. code:: python @@ -48,17 +48,17 @@ def plot_diagrams( size: numeric, default is 20 Pixel size of each point plotted. - ax_color: any valid matplotlib color type. + ax_color: any valid matplotlib color type. See [https://matplotlib.org/api/colors_api.html](https://matplotlib.org/api/colors_api.html) for complete API. diagonal: bool, default is True Plot the diagonal x=y line. lifetime: bool, default is False. If True, diagonal is turned to False. - Plot life time of each point instead of birth and death. + Plot life time of each point instead of birth and death. Essentially, visualize (x, y-x). legend: bool, default is True If true, show the legend. show: bool, default is False - Call plt.show() after plotting. If you are using self.plot() as part + Call plt.show() after plotting. If you are using self.plot() as part of a subplot, set show=False and call plt.show() only once at the end. """ @@ -73,7 +73,7 @@ def plot_diagrams( if labels is None: # Provide default labels for diagrams if using self.dgm_ - labels = ["$H_{{{}}}$".format(i) for i , _ in enumerate(diagrams)] + labels = ["$H_{{{}}}$".format(i) for i, _ in enumerate(diagrams)] if plot_only: diagrams = [diagrams[i] for i in plot_only] @@ -111,7 +111,6 @@ def plot_diagrams( yr = y_up - y_down if lifetime: - # Don't plot landscape and diagonal at the same time. diagonal = False @@ -145,7 +144,6 @@ def plot_diagrams( # Plot each diagram for dgm, label in zip(diagrams, labels): - # plot persistence pairs ax.scatter(dgm[:, 0], dgm[:, 1], size, label=label, edgecolor="none") @@ -154,7 +152,7 @@ def plot_diagrams( ax.set_xlim([x_down, x_up]) ax.set_ylim([y_down, y_up]) - ax.set_aspect('equal', 'box') + ax.set_aspect("equal", "box") if title is not None: ax.set_title(title) @@ -165,18 +163,20 @@ def plot_diagrams( if show is True: plt.show() -def plot_a_bar(p, q, c='b', linestyle='-'): + +def plot_a_bar(p, q, c="b", linestyle="-"): plt.plot([p[0], q[0]], [p[1], q[1]], c=c, linestyle=linestyle, linewidth=1) + def bottleneck_matching(dgm1, dgm2, matching, labels=["dgm1", "dgm2"], ax=None): - """ Visualize bottleneck matching between two diagrams + """Visualize bottleneck matching between two diagrams Parameters =========== - dgm1: Mx(>=2) + dgm1: Mx(>=2) array of birth/death pairs for PD 1 - dgm2: Nx(>=2) + dgm2: Nx(>=2) array of birth/death paris for PD 2 matching: ndarray(Mx+Nx, 3) A list of correspondences in an optimal matching, as well as their distance, where: @@ -184,7 +184,7 @@ def bottleneck_matching(dgm1, dgm2, matching, labels=["dgm1", "dgm2"], ax=None): * Second column is index of point in second persistence diagram, or -1 if diagonal * Third column is the distance of each matching labels: list of strings - names of diagrams for legend. Default = ["dgm1", "dgm2"], + names of diagrams for legend. Default = ["dgm1", "dgm2"], ax: matplotlib Axis object For plotting on a particular axis. @@ -212,28 +212,46 @@ def bottleneck_matching(dgm1, dgm2, matching, labels=["dgm1", "dgm2"], ax=None): for idx, [i, j, d] in enumerate(matching): i = int(i) j = int(j) - linestyle = '--' + linestyle = "--" linewidth = 1 - c = 'C2' + c = "C2" if idx == max_idx: - linestyle = '-' + linestyle = "-" linewidth = 2 - c = 'C3' - if i != -1 or j != -1: # At least one point is a non-diagonal point + c = "C3" + if i != -1 or j != -1: # At least one point is a non-diagonal point if i == -1: diagElem = np.array([dgm2Rot[j, 0], 0]) diagElem = diagElem.dot(R.T) - plt.plot([dgm2[j, 0], diagElem[0]], [dgm2[j, 1], diagElem[1]], c, linewidth=linewidth, linestyle=linestyle) + plt.plot( + [dgm2[j, 0], diagElem[0]], + [dgm2[j, 1], diagElem[1]], + c, + linewidth=linewidth, + linestyle=linestyle, + ) elif j == -1: diagElem = np.array([dgm1Rot[i, 0], 0]) diagElem = diagElem.dot(R.T) - ax.plot([dgm1[i, 0], diagElem[0]], [dgm1[i, 1], diagElem[1]], c, linewidth=linewidth, linestyle=linestyle) + ax.plot( + [dgm1[i, 0], diagElem[0]], + [dgm1[i, 1], diagElem[1]], + c, + linewidth=linewidth, + linestyle=linestyle, + ) else: - ax.plot([dgm1[i, 0], dgm2[j, 0]], [dgm1[i, 1], dgm2[j, 1]], c, linewidth=linewidth, linestyle=linestyle) + ax.plot( + [dgm1[i, 0], dgm2[j, 0]], + [dgm1[i, 1], dgm2[j, 1]], + c, + linewidth=linewidth, + linestyle=linestyle, + ) def wasserstein_matching(dgm1, dgm2, matching, labels=["dgm1", "dgm2"], ax=None): - """ Visualize bottleneck matching between two diagrams + """Visualize bottleneck matching between two diagrams Parameters =========== @@ -248,7 +266,7 @@ def wasserstein_matching(dgm1, dgm2, matching, labels=["dgm1", "dgm2"], ax=None) * Second column is index of point in second persistence diagram, or -1 if diagonal * Third column is the distance of each matching labels: list of strings - names of diagrams for legend. Default = ["dgm1", "dgm2"], + names of diagrams for legend. Default = ["dgm1", "dgm2"], ax: matplotlib Axis object For plotting on a particular axis. @@ -273,7 +291,7 @@ def wasserstein_matching(dgm1, dgm2, matching, labels=["dgm1", "dgm2"], ax=None) for [i, j, d] in matching: i = int(i) j = int(j) - if i != -1 or j != -1: # At least one point is a non-diagonal point + if i != -1 or j != -1: # At least one point is a non-diagonal point if i == -1: diagElem = np.array([dgm2Rot[j, 0], 0]) diagElem = diagElem.dot(R.T) diff --git a/persim/wasserstein.py b/persim/wasserstein.py index 5f18038..907810a 100644 --- a/persim/wasserstein.py +++ b/persim/wasserstein.py @@ -1,11 +1,12 @@ """ - Implementation of the Wasserstein distance using - the Hungarian algorithm +Implementation of the Wasserstein distance using +the Hungarian algorithm - Author: Chris Tralie +Author: Chris Tralie """ + import numpy as np from sklearn import metrics from scipy import optimize @@ -26,14 +27,14 @@ def wasserstein(dgm1, dgm2, matching=False): Parameters ------------ - dgm1: Mx(>=2) + dgm1: Mx(>=2) array of birth/death pairs for PD 1 - dgm2: Nx(>=2) + dgm2: Nx(>=2) array of birth/death paris for PD 2 matching: bool, default False if True, return matching information and cross-similarity matrix - Returns + Returns --------- d: float @@ -49,8 +50,7 @@ def wasserstein(dgm1, dgm2, matching=False): S = S[np.isfinite(S[:, 1]), :] if S.shape[0] < M: warnings.warn( - "dgm1 has points with non-finite death times;"+ - "ignoring those points" + "dgm1 has points with non-finite death times;" + "ignoring those points" ) M = S.shape[0] T = np.array(dgm2) @@ -59,8 +59,7 @@ def wasserstein(dgm1, dgm2, matching=False): T = T[np.isfinite(T[:, 1]), :] if T.shape[0] < N: warnings.warn( - "dgm2 has points with non-finite death times;"+ - "ignoring those points" + "dgm2 has points with non-finite death times;" + "ignoring those points" ) N = T.shape[0] @@ -76,20 +75,20 @@ def wasserstein(dgm1, dgm2, matching=False): # Put diagonal elements into the matrix # Rotate the diagrams to make it easy to find the straight line # distance to the diagonal - cp = np.cos(np.pi/4) - sp = np.sin(np.pi/4) + cp = np.cos(np.pi / 4) + sp = np.sin(np.pi / 4) R = np.array([[cp, -sp], [sp, cp]]) S = S[:, 0:2].dot(R) T = T[:, 0:2].dot(R) - D = np.zeros((M+N, M+N)) + D = np.zeros((M + N, M + N)) np.fill_diagonal(D, 0) D[0:M, 0:N] = DUL - UR = np.inf*np.ones((M, M)) + UR = np.inf * np.ones((M, M)) np.fill_diagonal(UR, S[:, 1]) - D[0:M, N:N+M] = UR - UL = np.inf*np.ones((N, N)) + D[0:M, N : N + M] = UR + UL = np.inf * np.ones((N, N)) np.fill_diagonal(UL, T[:, 1]) - D[M:N+M, 0:N] = UL + D[M : N + M, 0:N] = UL # Step 2: Run the hungarian algorithm matchi, matchj = optimize.linear_sum_assignment(D) @@ -104,7 +103,7 @@ def wasserstein(dgm1, dgm2, matching=False): ret[ret[:, 0] >= M, 0] = -1 ret[ret[:, 1] >= N, 1] = -1 # Exclude diagonal to diagonal - ret = ret[ret[:, 0] + ret[:, 1] != -2, :] + ret = ret[ret[:, 0] + ret[:, 1] != -2, :] return matchdist, ret return matchdist diff --git a/pyproject.toml b/pyproject.toml index fad7cd7..c1e27dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,9 +61,18 @@ testing = ["pytest", "pytest-cov"] docs = ["ripser", "sktda_docs_config"] +ruff = ["ruff<0.14.0"] + [project.urls] Homepage = "https://persim.scikit-tda.org" Documentation = "https://persim.scikit-tda.org" Repository = "https://github.com/scikit-tda/persim" Issues = "https://github.com/scikit-tda/persim/issues" Changelog = "https://github.com/scikit-tda/persim/blob/master/RELEASE.txt" + +[tool.ruff] +# Exclude test directory +extend-exclude = ["test"] + +[tool.ruff.lint] +extend-ignore = ["F403","F405"] diff --git a/test/__init__.py b/test/__init__.py index b3e1f49..290cc21 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,2 +1,3 @@ import matplotlib -matplotlib.use('Agg') \ No newline at end of file + +matplotlib.use("Agg") diff --git a/test/test_landscapes.py b/test/test_landscapes.py index 038f0f8..a163fc8 100644 --- a/test/test_landscapes.py +++ b/test/test_landscapes.py @@ -174,14 +174,13 @@ def test_approx_grid_params(self): Q = PersLandscapeApprox(values=np.array([[2, 6], [4, 10]]), start=0, stop=5) assert Q.start == 0 assert Q.stop == 5 - + def test_max_depth(self): - P1 = PersLandscapeApprox(dgms=[np.array([[2,6],[4,10]])]) + P1 = PersLandscapeApprox(dgms=[np.array([[2, 6], [4, 10]])]) assert P1.max_depth == 2 - P2 = PersLandscapeApprox(dgms=[np.array([[1,3],[5,7]])]) + P2 = PersLandscapeApprox(dgms=[np.array([[1, 3], [5, 7]])]) assert P2.max_depth == 1 - def test_approx_compute_landscape(self): diagrams1 = [np.array([[2, 6], [4, 10]])] P1 = PersLandscapeApprox( diff --git a/test/test_persim.py b/test/test_persim.py index 2888aa1..eaac3b7 100644 --- a/test/test_persim.py +++ b/test/test_persim.py @@ -1,5 +1,4 @@ import numpy as np -import pytest from persim import PersImage @@ -82,12 +81,12 @@ def test_kernel_mean(self): kf = pim.kernel(2) data = np.array([[0, 0]]) - assert kf(np.array([[0, 0]]), [0, 0]) >= kf( - np.array([[1, 1]]), [0, 0] - ), "decreasing away" - assert kf(np.array([[0, 0]]), [1, 1]) == kf( - np.array([[1, 1]]), [0, 0] - ), "symmetric" + assert kf(np.array([[0, 0]]), [0, 0]) >= kf(np.array([[1, 1]]), [0, 0]), ( + "decreasing away" + ) + assert kf(np.array([[0, 0]]), [1, 1]) == kf(np.array([[1, 1]]), [0, 0]), ( + "symmetric" + ) class TestTransforms: diff --git a/test/test_persistence_imager.py b/test/test_persistence_imager.py index 7c01336..f3a095c 100644 --- a/test/test_persistence_imager.py +++ b/test/test_persistence_imager.py @@ -1,5 +1,4 @@ import numpy as np -import pytest from persim import PersistenceImager, images_kernels, images_weights diff --git a/test/test_persistent_entropy.py b/test/test_persistent_entropy.py index fe230a5..10ed307 100644 --- a/test/test_persistent_entropy.py +++ b/test/test_persistent_entropy.py @@ -1,6 +1,5 @@ import numpy as np -import pytest from persim import persistent_entropy as pe diff --git a/test/test_visuals.py b/test/test_visuals.py index 0afa9fc..fb8f5a7 100644 --- a/test/test_visuals.py +++ b/test/test_visuals.py @@ -1,4 +1,3 @@ -import pytest import numpy as np import matplotlib.pyplot as plt From 884520e74ac0c9eca26ae0dd40e755ef7f5d2400 Mon Sep 17 00:00:00 2001 From: Michael Catanzaro Date: Wed, 1 Oct 2025 18:40:36 -0400 Subject: [PATCH 08/10] Add version to docs --- docs/conf.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 682b108..82633ac 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,18 +1,29 @@ # -*- coding: utf-8 -*- +import re import os import sys sys.path.insert(0, os.path.abspath(".")) from sktda_docs_config import * -from persim import __version__ + +def get_version(): + VERSIONFILE = "persim/_version.py" + verstrline = open(VERSIONFILE, "rt").read() + VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]" + mo = re.search(VSRE, verstrline, re.M) + if mo: + return mo.group(1) + else: + raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE,)) + project = "Persim" copyright = "2019, Nathaniel Saul" author = "Nathaniel Saul" -version = __version__ -release = __version__ +version = get_version() +release = get_version() language = "en" From feccf34babcfccdeb12c8ff033d1a93fda21441e Mon Sep 17 00:00:00 2001 From: Michael Catanzaro Date: Wed, 1 Oct 2025 18:43:55 -0400 Subject: [PATCH 09/10] Support python 3.8 type hints --- persim/landscapes/tools.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/persim/landscapes/tools.py b/persim/landscapes/tools.py index fb06a4a..8dfabb1 100644 --- a/persim/landscapes/tools.py +++ b/persim/landscapes/tools.py @@ -2,6 +2,8 @@ Tools for working with exact and approximate persistence landscapes. """ +from typing import Optional + import numpy as np from operator import itemgetter, attrgetter @@ -164,8 +166,8 @@ def average_approx( def vectorize( landscape: PersLandscapeExact, - start: float | None = None, - stop: float | None = None, + start: Optional[float] = None, + stop: Optional[float] = None, num_steps: int = 500, ) -> PersLandscapeApprox: """Converts a `PersLandscapeExact` type to a `PersLandscapeApprox` type. From fe0c696b08441b6b4e6128570f0f2b21a228b8c1 Mon Sep 17 00:00:00 2001 From: Michael Catanzaro Date: Wed, 1 Oct 2025 18:48:45 -0400 Subject: [PATCH 10/10] Try grabbing version using importlib --- docs/conf.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 171a82b..e1fd171 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,29 +1,18 @@ # -*- coding: utf-8 -*- -import re import os import sys +from importlib import metadata sys.path.insert(0, os.path.abspath(".")) from sktda_docs_config import * -def get_version(): - VERSIONFILE = "persim/_version.py" - verstrline = open(VERSIONFILE, "rt").read() - VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]" - mo = re.search(VSRE, verstrline, re.M) - if mo: - return mo.group(1) - else: - raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE,)) - - project = "Persim" copyright = "2019, Nathaniel Saul" author = "Nathaniel Saul" -version = get_version() -release = get_version() +version = metadata.version("persim") +release = metadata.version("persim") language = "en"