diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8277ba1..d9b66a6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -17,12 +17,12 @@ repos:
rev: v1.10.0
hooks:
- id: blacken-docs
- additional_dependencies: [black==19.3b0]
+ additional_dependencies: [black]
language: python
language_version: python
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v1.2.3
+ rev: v3.4.0
hooks:
- id: check-yaml
language: python
@@ -32,8 +32,6 @@ repos:
language: python
- id: check-docstring-first
language: python
- - id: flake8
- language: python
- id: end-of-file-fixer
language: python
exclude: docs/notebooks/
diff --git a/CHANGELOG b/CHANGELOG
index 9ce1dcb..04a9be8 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,6 +4,7 @@ v0.2.0
What's new!
'''''''''''
+- Added :func:`~mrinversion.linear_model.SmoothLassoLS` function to evaluate optimum alpha and lambda values using least-squares analysis.
- Added :func:`~mrinversion.utils.to_Haeberlen_grid` function to convert the 3D :math:`\rho(\delta_\text{iso}, x, y)`
distribution to :math:`\rho(\delta_\text{iso}, \zeta_\sigma, \eta_\sigma)` distribution.
diff --git a/docs/api/SmoothLassoLS.rst b/docs/api/SmoothLassoLS.rst
new file mode 100644
index 0000000..82664a4
--- /dev/null
+++ b/docs/api/SmoothLassoLS.rst
@@ -0,0 +1,13 @@
+Smooth Lasso cross-validation with Least-squares
+================================================
+
+.. currentmodule:: mrinversion.linear_model
+
+.. autoclass:: SmoothLassoLS
+ :show-inheritance:
+
+ .. rubric:: Methods Documentation
+
+ .. automethod:: fit
+ .. automethod:: predict
+ .. automethod:: residuals
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
index 740dd23..07b1922 100644
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@@ -151,7 +151,7 @@ csdmpy library as shown below:
Here, the anisotropic dimension is sampled at 625 Hz for 32 points with an offset of
-10 kHz.
-Similarly, we can create the CSDM dimensions needed for the *x*-*y* inversion grid as
+Similarly, we can create the CSDM dimensions needed for the *x-y* inversion grid as
shown below:
.. plot::
@@ -228,7 +228,7 @@ instance to generate the spinning sideband kernel, as follows,
Here, ``K`` is the :math:`32\times 625` kernel, where the 32 is the number of samples
(sideband amplitudes), and 625 is the number of features (subspectra) on the
-:math:`25 \times 25` *x*-*y* grid. The argument *supersampling* is the supersampling
+:math:`25 \times 25` *x-y* grid. The argument *supersampling* is the supersampling
factor. In a supersampling scheme, each grid cell is averaged over a :math:`n\times n`
sub-grid, where :math:`n` is the supersampling factor. A supersampling factor of 1 is
equivalent to no sub-grid averaging.
@@ -425,7 +425,7 @@ The arguments of the :py:class:`~mrinversion.linear_model.SmoothLassoCV` is a li
of the *alpha* and *lambda* values, along with the standard deviation of the
noise, *sigma*. The value of the argument *folds* is the number of folds used in the
cross-validation. As before, to solve the problem, use the
-:meth:`~mrinversion.linear_model.SmoothLassoCV.fit` method, whose arguments are
+:py:meth:`~mrinversion.linear_model.SmoothLassoCV.fit` method, whose arguments are
the kernel and signal.
The optimum hyperparameters
diff --git a/docs/introduction.rst b/docs/introduction.rst
index b98f0d6..255e5a1 100644
--- a/docs/introduction.rst
+++ b/docs/introduction.rst
@@ -8,10 +8,6 @@ In ``mrinversion``, we solve for the distribution of the second-rank traceless
symmetric tensor principal components, through an inversion of a pure anisotropic
NMR spectrum.
-.. whose frequency
-.. contributions are assumed to arise predominantly from the second-rank traceless
-.. symmetric tensors.
-
In the case of the shielding tensors, the pure anisotropic frequency spectra corresponds
the cross-sections of the 2D isotropic *v.s.* anisotropic correlation spectrum, such as
the 2D One Pulse (TOP) MAS, phase adjusted spinning sidebands (PASS), magic-angle turning
@@ -201,7 +197,7 @@ second-rank traceless tensor parameters, :math:`\zeta`-:math:`\eta`, defined as
.. math::
:label: zeta_eta_def
- r_\zeta = | \zeta_ | ~~~~\text{and}~~~~
+ r_\zeta = | \zeta | ~~~~\text{and}~~~~
\theta = \left\{ \begin{array}{l r}
\frac{\pi}{4} \eta &: \zeta \le 0, \\
\frac{\pi}{2} \left(1 - \frac{\eta}{2} \right) &: \zeta > 0.
diff --git a/docs/notebooks/auto_examples/sideband/plot_2D_KMg0.5O4SiO2.ipynb b/docs/notebooks/auto_examples/sideband/plot_2D_KMg0.5O4SiO2.ipynb
index dee96b3..585f547 100644
--- a/docs/notebooks/auto_examples/sideband/plot_2D_KMg0.5O4SiO2.ipynb
+++ b/docs/notebooks/auto_examples/sideband/plot_2D_KMg0.5O4SiO2.ipynb
@@ -400,7 +400,7 @@
},
"outputs": [],
"source": [
- "# Isotropic chemical shift projection of the 2D MAT dataset.\ndata_iso = data_object_truncated.sum(axis=0)\ndata_iso /= data_iso.max() # normalize the projection\n\n# Isotropic chemical shift projection of the tensor distribution dataset.\nf_sol_iso = f_sol.sum(axis=(0, 1))\n\n# Isotropic chemical shift projection of the tensor distribution for the Q4 region.\nQ4_region_iso = Q4_region.sum(axis=(0, 1))\n\n# Isotropic chemical shift projection of the tensor distribution for the Q3 region.\nQ3_region_iso = Q3_region.sum(axis=(0, 1))\n\n# Normalize the three projections.\nf_sol_iso_max = f_sol_iso.max()\nf_sol_iso /= f_sol_iso_max\nQ4_region_iso /= f_sol_iso_max\nQ3_region_iso /= f_sol_iso_max\n\n# The plot of the different projections.\nplt.figure(figsize=(5.5, 3.5))\nax = plt.subplot(projection=\"csdm\")\nax.plot(f_sol_iso, \"--k\", label=\"tensor\")\nax.plot(Q4_region_iso, \"r\", label=\"Q4\")\nax.plot(Q3_region_iso, \"b\", label=\"Q3\")\nax.plot(data_iso, \"-k\", label=\"MAF\")\nax.plot(data_iso - f_sol_iso - 0.1, \"gray\", label=\"residuals\")\nax.set_title(\"Isotropic projection\")\nax.invert_xaxis()\nplt.legend()\nplt.tight_layout()\nplt.show()"
+ "# Isotropic chemical shift projection of the 2D MAT dataset.\ndata_iso = data_object_truncated.sum(axis=0)\ndata_iso /= data_iso.max() # normalize the projection\n\n# Isotropic chemical shift projection of the tensor distribution dataset.\nf_sol_iso = f_sol.sum(axis=(0, 1))\n\n# Isotropic chemical shift projection of the tensor distribution for the Q4 region.\nQ4_region_iso = Q4_region.sum(axis=(0, 1))\n\n# Isotropic chemical shift projection of the tensor distribution for the Q3 region.\nQ3_region_iso = Q3_region.sum(axis=(0, 1))\n\n# Normalize the three projections.\nf_sol_iso_max = f_sol_iso.max()\nf_sol_iso /= f_sol_iso_max\nQ4_region_iso /= f_sol_iso_max\nQ3_region_iso /= f_sol_iso_max\n\n# The plot of the different projections.\nplt.figure(figsize=(5.5, 3.5))\nax = plt.subplot(projection=\"csdm\")\nax.plot(f_sol_iso, \"--k\", label=\"tensor\")\nax.plot(Q4_region_iso, \"r\", label=\"Q4\")\nax.plot(Q3_region_iso, \"b\", label=\"Q3\")\nax.plot(data_iso, \"-k\", label=\"MAT\")\nax.plot(data_iso - f_sol_iso - 0.1, \"gray\", label=\"residuals\")\nax.set_title(\"Isotropic projection\")\nax.invert_xaxis()\nplt.legend()\nplt.tight_layout()\nplt.show()"
]
},
{
diff --git a/docs/notebooks/auto_examples/synthetic/plot_1D_1_sideband.ipynb b/docs/notebooks/auto_examples/synthetic/plot_1D_1_sideband.ipynb
index f0c2b3b..d61413f 100644
--- a/docs/notebooks/auto_examples/synthetic/plot_1D_1_sideband.ipynb
+++ b/docs/notebooks/auto_examples/synthetic/plot_1D_1_sideband.ipynb
@@ -195,7 +195,7 @@
},
"outputs": [],
"source": [
- "# guess alpha and lambda values.\ns_lasso = SmoothLasso(alpha=5e-5, lambda1=5e-6, inverse_dimension=inverse_dimension)\ns_lasso.fit(K=compressed_K, s=compressed_s)\nf_sol = s_lasso.f"
+ "# guess alpha and lambda values.\ns_lasso = SmoothLasso(\n hyperparameters={\"alpha\": 5e-5, \"lambda\": 5e-6},\n inverse_dimension=inverse_dimension,\n)\ns_lasso.fit(K=compressed_K, s=compressed_s)\nf_sol = s_lasso.f"
]
},
{
diff --git a/docs/notebooks/auto_examples/synthetic/plot_1D_2_sideband_bimodal.ipynb b/docs/notebooks/auto_examples/synthetic/plot_1D_2_sideband_bimodal.ipynb
index e16f64c..6354aef 100644
--- a/docs/notebooks/auto_examples/synthetic/plot_1D_2_sideband_bimodal.ipynb
+++ b/docs/notebooks/auto_examples/synthetic/plot_1D_2_sideband_bimodal.ipynb
@@ -177,7 +177,7 @@
},
"outputs": [],
"source": [
- "# guess alpha and lambda values.\ns_lasso = SmoothLasso(alpha=5e-5, lambda1=5e-6, inverse_dimension=inverse_dimension)\ns_lasso.fit(K=compressed_K, s=compressed_s)\nf_sol = s_lasso.f"
+ "# guess alpha and lambda values.\ns_lasso = SmoothLasso(\n hyperparameters={\"alpha\": 5e-5, \"lambda\": 5e-6},\n inverse_dimension=inverse_dimension,\n)\ns_lasso.fit(K=compressed_K, s=compressed_s)\nf_sol = s_lasso.f"
]
},
{
diff --git a/docs/notebooks/auto_examples/synthetic/plot_1D_3_MAF.ipynb b/docs/notebooks/auto_examples/synthetic/plot_1D_3_MAF.ipynb
index 2baf2c2..7dc65cc 100644
--- a/docs/notebooks/auto_examples/synthetic/plot_1D_3_MAF.ipynb
+++ b/docs/notebooks/auto_examples/synthetic/plot_1D_3_MAF.ipynb
@@ -33,7 +33,7 @@
},
"outputs": [],
"source": [
- "import csdmpy as cp\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom mrinversion.kernel.nmr import ShieldingPALineshape\nfrom mrinversion.linear_model import SmoothLasso, SmoothLassoCV, TSVDCompression\nfrom mrinversion.utils import get_polar_grids\n\n# Setup for the matplotlib figures\n\n\n# function for 2D x-y plot.\ndef plot2D(ax, csdm_object, title=\"\"):\n # convert the dimension coordinates of the csdm_object from Hz to pmm.\n _ = [item.to(\"ppm\", \"nmr_frequency_ratio\") for item in csdm_object.dimensions]\n\n levels = (np.arange(9) + 1) / 10\n plt.figure(figsize=(4.5, 3.5))\n ax.contourf(csdm_object, cmap=\"gist_ncar\", levels=levels)\n ax.grid(None)\n ax.set_title(title)\n get_polar_grids(ax)\n ax.set_aspect(\"equal\")"
+ "import csdmpy as cp\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom mrinversion.kernel.nmr import ShieldingPALineshape\nfrom mrinversion.linear_model import SmoothLasso, SmoothLassoCV, TSVDCompression\nfrom mrinversion.utils import get_polar_grids\n\n# Setup for the matplotlib figures\n\n\n# function for 2D x-y plot.\ndef plot2D(ax, csdm_object, title=\"\"):\n # convert the dimension coordinates of the csdm_object from Hz to pmm.\n _ = [item.to(\"ppm\", \"nmr_frequency_ratio\") for item in csdm_object.dimensions]\n\n levels = (np.arange(9) + 1) / 10\n plt.figure(figsize=(4.5, 3.5))\n ax.contourf(csdm_object, cmap=\"gist_ncar\", levels=levels)\n ax.set_xlim(0, 100)\n ax.set_ylim(0, 100)\n ax.set_title(title)\n get_polar_grids(ax)\n ax.set_aspect(\"equal\")"
]
},
{
@@ -195,7 +195,7 @@
},
"outputs": [],
"source": [
- "# guess alpha and lambda values.\ns_lasso = SmoothLasso(alpha=5e-5, lambda1=5e-6, inverse_dimension=inverse_dimension)\ns_lasso.fit(K=compressed_K, s=compressed_s)\nf_sol = s_lasso.f"
+ "# guess alpha and lambda values.\ns_lasso = SmoothLasso(\n hyperparameters={\"alpha\": 5e-5, \"lambda\": 5e-6},\n inverse_dimension=inverse_dimension,\n)\ns_lasso.fit(K=compressed_K, s=compressed_s)\nf_sol = s_lasso.f"
]
},
{
@@ -249,7 +249,7 @@
},
"outputs": [],
"source": [
- "lambdas = 10 ** (-5.2 - 1 * (np.arange(6) / 5))\nalphas = 10 ** (-4 - 2 * (np.arange(6) / 5))\n\n# set up cross validation smooth lasso method\ns_lasso_cv = SmoothLassoCV(\n alphas=alphas,\n lambdas=lambdas,\n inverse_dimension=inverse_dimension,\n sigma=0.005,\n folds=10,\n)\n# run the fit using the compressed kernel and compressed signal.\ns_lasso_cv.fit(compressed_K, compressed_s)"
+ "lambdas = 10 ** (-4.8 - 1.5 * (np.arange(6) / 5))\nalphas = 10 ** (-4 - 2 * (np.arange(6) / 5))\n\n# set up cross validation smooth lasso method\ns_lasso_cv = SmoothLassoCV(\n alphas=alphas,\n lambdas=lambdas,\n inverse_dimension=inverse_dimension,\n sigma=0.005,\n folds=10,\n)\n# run the fit using the compressed kernel and compressed signal.\ns_lasso_cv.fit(compressed_K, compressed_s)"
]
},
{
diff --git a/docs/notebooks/auto_examples/synthetic/plot_1D_4_MAF_bimodal.ipynb b/docs/notebooks/auto_examples/synthetic/plot_1D_4_MAF_bimodal.ipynb
index 2f896eb..8b3f1bf 100644
--- a/docs/notebooks/auto_examples/synthetic/plot_1D_4_MAF_bimodal.ipynb
+++ b/docs/notebooks/auto_examples/synthetic/plot_1D_4_MAF_bimodal.ipynb
@@ -177,7 +177,7 @@
},
"outputs": [],
"source": [
- "# guess alpha and lambda values.\ns_lasso = SmoothLasso(alpha=5e-5, lambda1=5e-6, inverse_dimension=inverse_dimension)\ns_lasso.fit(K=compressed_K, s=compressed_s)\nf_sol = s_lasso.f"
+ "# guess alpha and lambda values.\ns_lasso = SmoothLasso(\n hyperparameters={\"alpha\": 5e-5, \"lambda\": 5e-6},\n inverse_dimension=inverse_dimension,\n)\ns_lasso.fit(K=compressed_K, s=compressed_s)\nf_sol = s_lasso.f"
]
},
{
diff --git a/docs/referenceAPI.rst b/docs/referenceAPI.rst
index b25b901..6b18184 100644
--- a/docs/referenceAPI.rst
+++ b/docs/referenceAPI.rst
@@ -8,5 +8,6 @@ API-Reference
api/shielding_kernel
api/SmoothLasso
api/SmoothLassoCV
+ api/SmoothLassoLS
api/TSVDCompression
api/utils
diff --git a/docs/requirement.rst b/docs/requirement.rst
index 5a76662..8544980 100644
--- a/docs/requirement.rst
+++ b/docs/requirement.rst
@@ -12,7 +12,7 @@ The ``mrinversion`` library depends on the following packages:
- `csdmpy>=0.4 `_
- `mrsimulator>=0.6 `_ (for generating
the NMR line-shape)
-- `scikit-learn>=0.22.1 `_ (for statistical leaning methods)
+- `scikit-learn>=0.24.1 `_ (for statistical leaning methods)
**Other packages**
diff --git a/docs/requirements.txt b/docs/requirements.txt
index ccb73f8..dfdd94f 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,4 +3,4 @@ sphinx>=2.1.2
sphinx-gallery
pillow
matplotlib
-scikit-learn>=0.22
+scikit-learn>=0.24.1
diff --git a/examples/sideband/plot_2D_KMg0.5O4SiO2.py b/examples/sideband/plot_2D_KMg0.5O4SiO2.py
index fbb283d..6b10d3e 100644
--- a/examples/sideband/plot_2D_KMg0.5O4SiO2.py
+++ b/examples/sideband/plot_2D_KMg0.5O4SiO2.py
@@ -394,7 +394,7 @@ def plot2D(csdm_object, **kwargs):
ax.plot(f_sol_iso, "--k", label="tensor")
ax.plot(Q4_region_iso, "r", label="Q4")
ax.plot(Q3_region_iso, "b", label="Q3")
-ax.plot(data_iso, "-k", label="MAF")
+ax.plot(data_iso, "-k", label="MAT")
ax.plot(data_iso - f_sol_iso - 0.1, "gray", label="residuals")
ax.set_title("Isotropic projection")
ax.invert_xaxis()
diff --git a/examples/synthetic/plot_1D_1_sideband.py b/examples/synthetic/plot_1D_1_sideband.py
index bd776fc..3abb431 100644
--- a/examples/synthetic/plot_1D_1_sideband.py
+++ b/examples/synthetic/plot_1D_1_sideband.py
@@ -160,7 +160,10 @@ def plot2D(ax, csdm_object, title=""):
# accordingly.
# guess alpha and lambda values.
-s_lasso = SmoothLasso(alpha=5e-5, lambda1=5e-6, inverse_dimension=inverse_dimension)
+s_lasso = SmoothLasso(
+ hyperparameters={"alpha": 5e-5, "lambda": 5e-6},
+ inverse_dimension=inverse_dimension,
+)
s_lasso.fit(K=compressed_K, s=compressed_s)
f_sol = s_lasso.f
diff --git a/examples/synthetic/plot_1D_2_sideband_bimodal.py b/examples/synthetic/plot_1D_2_sideband_bimodal.py
index aca155c..2e0e32e 100644
--- a/examples/synthetic/plot_1D_2_sideband_bimodal.py
+++ b/examples/synthetic/plot_1D_2_sideband_bimodal.py
@@ -143,7 +143,10 @@ def plot2D(ax, csdm_object, title=""):
# accordingly.
# guess alpha and lambda values.
-s_lasso = SmoothLasso(alpha=5e-5, lambda1=5e-6, inverse_dimension=inverse_dimension)
+s_lasso = SmoothLasso(
+ hyperparameters={"alpha": 5e-5, "lambda": 5e-6},
+ inverse_dimension=inverse_dimension,
+)
s_lasso.fit(K=compressed_K, s=compressed_s)
f_sol = s_lasso.f
diff --git a/examples/synthetic/plot_1D_3_MAF.py b/examples/synthetic/plot_1D_3_MAF.py
index 8efe6db..e21e022 100644
--- a/examples/synthetic/plot_1D_3_MAF.py
+++ b/examples/synthetic/plot_1D_3_MAF.py
@@ -33,7 +33,8 @@ def plot2D(ax, csdm_object, title=""):
levels = (np.arange(9) + 1) / 10
plt.figure(figsize=(4.5, 3.5))
ax.contourf(csdm_object, cmap="gist_ncar", levels=levels)
- ax.grid(None)
+ ax.set_xlim(0, 100)
+ ax.set_ylim(0, 100)
ax.set_title(title)
get_polar_grids(ax)
ax.set_aspect("equal")
@@ -159,7 +160,10 @@ def plot2D(ax, csdm_object, title=""):
# accordingly.
# guess alpha and lambda values.
-s_lasso = SmoothLasso(alpha=5e-5, lambda1=5e-6, inverse_dimension=inverse_dimension)
+s_lasso = SmoothLasso(
+ hyperparameters={"alpha": 5e-5, "lambda": 5e-6},
+ inverse_dimension=inverse_dimension,
+)
s_lasso.fit(K=compressed_K, s=compressed_s)
f_sol = s_lasso.f
@@ -209,7 +213,7 @@ def plot2D(ax, csdm_object, title=""):
# hyperparameters.
# The following code generates a range of :math:`\lambda` and :math:`\alpha` values
# that are uniformly sampled on the log scale.
-lambdas = 10 ** (-5.2 - 1 * (np.arange(6) / 5))
+lambdas = 10 ** (-4.8 - 1.5 * (np.arange(6) / 5))
alphas = 10 ** (-4 - 2 * (np.arange(6) / 5))
# set up cross validation smooth lasso method
diff --git a/examples/synthetic/plot_1D_4_MAF_bimodal.py b/examples/synthetic/plot_1D_4_MAF_bimodal.py
index 908ef23..454bcba 100644
--- a/examples/synthetic/plot_1D_4_MAF_bimodal.py
+++ b/examples/synthetic/plot_1D_4_MAF_bimodal.py
@@ -142,7 +142,10 @@ def plot2D(ax, csdm_object, title=""):
# accordingly.
# guess alpha and lambda values.
-s_lasso = SmoothLasso(alpha=5e-5, lambda1=5e-6, inverse_dimension=inverse_dimension)
+s_lasso = SmoothLasso(
+ hyperparameters={"alpha": 5e-5, "lambda": 5e-6},
+ inverse_dimension=inverse_dimension,
+)
s_lasso.fit(K=compressed_K, s=compressed_s)
f_sol = s_lasso.f
diff --git a/mrinversion/__init__.py b/mrinversion/__init__.py
index 3e129c8..e47d359 100644
--- a/mrinversion/__init__.py
+++ b/mrinversion/__init__.py
@@ -1,3 +1,3 @@
# -*- coding: utf-8 -*-
-__version__ = "0.2.0"
+__version__ = "0.3.0dev0"
diff --git a/mrinversion/kernel/base.py b/mrinversion/kernel/base.py
index 51bc57d..6eb2804 100644
--- a/mrinversion/kernel/base.py
+++ b/mrinversion/kernel/base.py
@@ -15,12 +15,27 @@
class BaseModel:
"""Base kernel class."""
- def __init__(self, kernel_dimension, inverse_kernel_dimension, n_dir, n_inv):
+ def __init__(
+ self,
+ kernel_dimension: list,
+ inverse_kernel_dimension: list,
+ n_dir: int,
+ n_inv: int,
+ ):
+ """Initialize the class
+
+ Args:
+ kernel_dimension: A list of kernel CSDM dimensions.
+ inverse_kernel_dimension: A list of inverse kernel CSDM dimensions.
+ n_dir: Number of kernel dimensions.
+ n_inv: Number of inverse dimensions.
+ """
kernel = self.__class__.__name__
message = (
f"Exactly {n_inv} inverse dimension(s) is/are required for the "
f"{kernel} kernel."
)
+
if isinstance(inverse_kernel_dimension, list):
if len(inverse_kernel_dimension) != n_inv:
raise ValueError(message)
@@ -158,10 +173,9 @@ def __init__(
def _get_zeta_eta(self, supersampling):
"""Return zeta and eta coordinates over x-y grid"""
- zeta, eta = _x_y_to_zeta_eta_distribution(
+ return _x_y_to_zeta_eta_distribution(
self.inverse_kernel_dimension, supersampling
)
- return zeta, eta
def _check_csdm_dimension(dimensions, dimension_id):
diff --git a/mrinversion/kernel/csa_aniso.py b/mrinversion/kernel/csa_aniso.py
index b8d4e3e..e4fca33 100644
--- a/mrinversion/kernel/csa_aniso.py
+++ b/mrinversion/kernel/csa_aniso.py
@@ -178,7 +178,7 @@ def __init__(
inverse_dimension,
channel,
magnetic_flux_density,
- "54.735 deg",
+ "54.7356 deg",
None,
None,
)
diff --git a/mrinversion/kernel/quad_aniso.py b/mrinversion/kernel/quad_aniso.py
new file mode 100644
index 0000000..911166a
--- /dev/null
+++ b/mrinversion/kernel/quad_aniso.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+# from copy import deepcopy
+# import numpy as np
+# from mrsimulator import Simulator
+# from mrsimulator import SpinSystem
+# from mrsimulator.methods import Method1D
+# from mrinversion.kernel.base import LineShape
+# class MQMAS(LineShape):
+# """
+# A generalized class for simulating the pure anisotropic NMR nuclear shielding
+# line-shape kernel.
+# Args:
+# anisotropic_dimension: A Dimension object, or an equivalent dictionary
+# object. This dimension must represent the pure anisotropic
+# dimension.
+# inverse_dimension: A list of two Dimension objects, or equivalent
+# dictionary objects representing the `x`-`y` coordinate grid.
+# channel: The channel is an isotope symbol of the nuclei given as the atomic
+# number followed by the atomic symbol, for example, `1H`, `13C`, and
+# `29Si`. This nucleus must correspond to the recorded frequency
+# resonances.
+# magnetic_flux_density: The magnetic flux density of the external static
+# magnetic field. The default value is 9.4 T.
+# """
+# def __init__(
+# self,
+# anisotropic_dimension,
+# inverse_dimension,
+# channel,
+# magnetic_flux_density="9.4 T",
+# ):
+# super().__init__(
+# anisotropic_dimension,
+# inverse_dimension,
+# channel,
+# magnetic_flux_density,
+# rotor_angle=54.7356 * np.pi / 180,
+# rotor_frequency=1e9,
+# number_of_sidebands=1,
+# )
+# def kernel(self, supersampling=1):
+# """
+# Return the quadrupolar anisotropic line-shape kernel.
+# Args:
+# supersampling: An integer. Each cell is supersampled by the factor
+# `supersampling` along every dimension.
+# Returns:
+# A numpy array containing the line-shape kernel.
+# """
+# self.inverse_kernel_dimension[0].application["half"] = True
+# args_ = deepcopy(self.method_args)
+# args_["spectral_dimensions"][0]["events"] = [
+# {"fraction": 27 / 17, "freq_contrib": ["Quad2_0"]},
+# {"fraction": 1, "freq_contrib": ["Quad2_4"]},
+# ]
+# method = Method1D.parse_dict_with_units(args_)
+# isotope = args_["channels"][0]
+# zeta, eta = self._get_zeta_eta(supersampling)
+# # new_size = zeta.size
+# # n1, n2 = [item.count for item in self.inverse_kernel_dimension]
+# # index = []
+# # for i in range(n2):
+# # i_ = i * supersampling
+# # for j in range(supersampling):
+# # index = np.append(index, np.arange(n1 - i_) + (i_ + j) * n1 + i_)
+# # index = np.asarray(index, dtype=int)
+# # print(index)
+# # zeta = zeta[index]
+# # eta = eta[index]
+# # larmor frequency from method.
+# B0 = method.spectral_dimensions[0].events[0].magnetic_flux_density # in T
+# gamma = method.channels[0].gyromagnetic_ratio # in MHz/T
+# larmor_frequency = -gamma * B0 # in MHz
+# for dim_i in self.inverse_kernel_dimension:
+# if dim_i.origin_offset.value == 0:
+# dim_i.origin_offset = f"{abs(larmor_frequency)} MHz"
+# spin_systems = [
+# SpinSystem(sites=[dict(isotope=isotope, quadrupolar=dict(Cq=z, eta=e))])
+# for z, e in zip(zeta, eta)
+# ]
+# dim = method.spectral_dimensions[0]
+# if dim.origin_offset == 0:
+# dim.origin_offset = larmor_frequency * 1e6 # in Hz
+# sim = Simulator()
+# sim.config.number_of_sidebands = self.number_of_sidebands
+# sim.config.decompose_spectrum = "spin_system"
+# sim.spin_systems = spin_systems
+# sim.methods = [method]
+# sim.run(pack_as_csdm=False)
+# amp = sim.methods[0].simulation
+# # amp2 = np.zeros((new_size, amp.shape[1]))
+# # amp2 = (amp.T + amp) / 2.0
+# # amp2[index] = amp
+# # print(amp2.shape, amp.shape)
+# kernel = self._averaged_kernel(amp, supersampling)
+# # print(kernel.shape)
+# n1, n2 = [item.count for item in self.inverse_kernel_dimension]
+# shape = kernel.shape[0]
+# kernel.shape = (shape, n1, n2)
+# for i in range(n1):
+# for j in range(n2):
+# if i > j:
+# kernel[:, i, j] = 0
+# kernel.shape = (shape, n1 * n2)
+# # if not half:
+# # return kernel
+# index = np.where(kernel.sum(axis=0) != 0)[0]
+# self.inverse_kernel_dimension[0].application["index"] = index.tolist()
+# return kernel[:, index]
+# #
+# # return kernel.reshape(shape, n1 * n2)
diff --git a/mrinversion/kernel/relaxation.py b/mrinversion/kernel/relaxation.py
index 24020a1..1a28894 100644
--- a/mrinversion/kernel/relaxation.py
+++ b/mrinversion/kernel/relaxation.py
@@ -6,8 +6,7 @@
class T2(BaseModel):
- r"""
- A class for simulating the kernel of T2 decaying functions,
+ r"""A class for simulating the kernel of T2 decaying functions,
.. math::
y = \exp(-x/x_\text{inv}).
@@ -19,20 +18,23 @@ class T2(BaseModel):
dictionary objects representing the `x`-`y` coordinate grid.
"""
- def __init__(self, kernel_dimension, inverse_kernel_dimension):
- super().__init__(kernel_dimension, inverse_kernel_dimension, 1, 1)
+ def __init__(self, kernel_dimension, inverse_dimension):
+ # unit = kernel_dimension.coordinates.to('s')
+ # inverse_dimension = inverse_dimension * cp.ScalarQuantity("s")
+ super().__init__(kernel_dimension, inverse_dimension, 1, 1)
def kernel(self, supersampling=1):
- """
- Return the kernel of T2 decaying functions.
+ """Return the kernel of T2 decaying functions.
- Args:
- supersampling: An integer. Each cell is supersampled by the factor
- `supersampling`.
Returns:
A numpy array.
"""
x = self.kernel_dimension.coordinates
+ # x_inverse = self.inverse_kernel_dimension.coordinates
+ # x = self.kernel_dimension.coordinates.to("s").value
+ # self.inverse_kernel_dimension = (
+ # self.inverse_kernel_dimension / cp.ScalarQuantity("s")
+ # )
x_inverse = _supersampled_coordinates(
self.inverse_kernel_dimension, supersampling=supersampling
)
@@ -41,8 +43,7 @@ def kernel(self, supersampling=1):
class T1(BaseModel):
- r"""
- A class for simulating the kernel of T1 recovery functions,
+ r"""A class for simulating the kernel of T1 recovery functions,
.. math::
y = 1 - \exp(-x/x_\text{inv}).
@@ -54,11 +55,12 @@ class T1(BaseModel):
dictionary objects representing the `x`-`y` coordinate grid.
"""
- def __init__(self, kernel_dimension, inverse_kernel_dimension):
- super().__init__(kernel_dimension, inverse_kernel_dimension, 1, 1)
+ def __init__(self, kernel_dimension, inverse_dimension):
+ super().__init__(kernel_dimension, inverse_dimension, 1, 1)
def kernel(self, supersampling=1):
x = self.kernel_dimension.coordinates
+
x_inverse = _supersampled_coordinates(
self.inverse_kernel_dimension, supersampling=supersampling
)
diff --git a/mrinversion/kernel/tests/other_kernel_test.py b/mrinversion/kernel/tests/other_kernel_test.py
index c1e4185..f6ee134 100644
--- a/mrinversion/kernel/tests/other_kernel_test.py
+++ b/mrinversion/kernel/tests/other_kernel_test.py
@@ -7,34 +7,28 @@
kernel_dimension = cp.Dimension(type="linear", count=96, increment="20 ms")
-inverse_kernel_dimension = cp.Dimension(
+inverse_dimension = cp.Dimension(
type="monotonic", coordinates=["1ms", "10ms", "100ms", "1s", "2s"]
)
def test_T2_kernel():
- T2_obj = T2(
- kernel_dimension=kernel_dimension,
- inverse_kernel_dimension=inverse_kernel_dimension,
- )
- K = T2_obj.kernel(supersampling=1)
+ T2_obj = T2(kernel_dimension=kernel_dimension, inverse_dimension=inverse_dimension)
+ K = T2_obj.kernel()
x = kernel_dimension.coordinates
- x_inverse = inverse_kernel_dimension.coordinates
+ x_inverse = inverse_dimension.coordinates
amp = np.exp(np.tensordot(-x, (1 / x_inverse), 0))
amp /= amp[:, 0].sum()
assert np.allclose(K, amp)
def test_T1_kernel():
- T1_obj = T1(
- kernel_dimension=kernel_dimension,
- inverse_kernel_dimension=inverse_kernel_dimension,
- )
- K = T1_obj.kernel(supersampling=1)
+ T1_obj = T1(kernel_dimension=kernel_dimension, inverse_dimension=inverse_dimension)
+ K = T1_obj.kernel()
x = kernel_dimension.coordinates
- x_inverse = inverse_kernel_dimension.coordinates
+ x_inverse = inverse_dimension.coordinates
amp = 1 - np.exp(np.tensordot(-x, (1 / x_inverse), 0))
amp /= amp[:, 0].sum()
assert np.allclose(K, amp)
diff --git a/mrinversion/kernel/utils.py b/mrinversion/kernel/utils.py
index 6fc15ed..49924d0 100644
--- a/mrinversion/kernel/utils.py
+++ b/mrinversion/kernel/utils.py
@@ -130,9 +130,7 @@ def _x_y_to_zeta_eta_distribution(grid, supersampling):
# y_coordinates = (y_coordinates * dimension.larmor_frequency).to("").value
# offset = grid.x.coordinates_offset.to("").value
- x_mesh, y_mesh = np.meshgrid(
- np.abs(x_coordinates), np.abs(y_coordinates), indexing="xy"
- )
+ x_mesh, y_mesh = np.meshgrid(x_coordinates, y_coordinates, indexing="xy")
# y_offset = y_coordinates[0]
# x_offset = x_coordinates[0]
diff --git a/mrinversion/linear_model/__init__.py b/mrinversion/linear_model/__init__.py
index 316427f..8e45b45 100644
--- a/mrinversion/linear_model/__init__.py
+++ b/mrinversion/linear_model/__init__.py
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from .smooth_lasso import SmoothLasso # noqa: F401
from .smooth_lasso import SmoothLassoCV # noqa: F401
+from .smooth_lasso import SmoothLassoLS # noqa: F401
from .tsvd_compression import TSVDCompression # noqa: F401
diff --git a/mrinversion/linear_model/_base_l1l2.py b/mrinversion/linear_model/_base_l1l2.py
index 58c56b3..6fcf3e6 100644
--- a/mrinversion/linear_model/_base_l1l2.py
+++ b/mrinversion/linear_model/_base_l1l2.py
@@ -1,166 +1,192 @@
# -*- coding: utf-8 -*-
from copy import deepcopy
+from typing import Any
+from typing import List
+from typing import Union
import csdmpy as cp
import numpy as np
from joblib import delayed
from joblib import Parallel
+from pydantic import BaseModel
+from pydantic import PrivateAttr
+from scipy.optimize import least_squares
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoLars
from sklearn.linear_model import MultiTaskLasso
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
+from typing_extensions import Literal
from mrinversion.linear_model.tsvd_compression import TSVDCompression # noqa: F401
+# from scipy.optimize import minimize
+
__author__ = "Deepansh J. Srivastava"
__email__ = "srivastava.89@osu.edu"
-class GeneralL2Lasso:
- r"""
- The Minimizer class solves the following equation,
+class Optimizer(BaseModel):
+ inverse_dimension: List[
+ Union[cp.Dimension, cp.LinearDimension, cp.MonotonicDimension]
+ ]
+ max_iterations: int = 20000
+ tolerance: float = 1e-5
+ positive: bool = True
+ regularizer: Literal["smooth lasso", "sparse ridge fusion"] = "smooth lasso"
+ method: Literal["multi-task", "gradient_decent", "lars"] = "gradient_decent"
- .. math::
- {\bf f} = \underset{{\bf f}}{\text{argmin}} \left( \frac{1}{m} \|
- {\bf Kf - s} \|^2_2 +
- \alpha \sum_{i=1}^{d} \| {\bf J}_i {\bf f} \|_2^2 +
- \lambda \| {\bf f} \|_1 \right),
+ folds: int = 10
+ sigma: float = 0.0
+ randomize: bool = False
+ times: int = 2
- where :math:`{\bf K} \in \mathbb{R}^{m \times n}` is the kernel,
- :math:`{\bf s} \in \mathbb{R}^{m \times m_\text{count}}` is the known signal
- containing noise, and :math:`{\bf f} \in \mathbb{R}^{n \times m_\text{count}}`
- is the desired solution matrix.
+ f: Union[cp.CSDM, np.ndarray] = None
+ n_iter: int = None
+ _scale: float = PrivateAttr(1.0)
+ _estimator: Any = PrivateAttr()
- Based on the regularization literal, the above problem is constraint
+ class Config:
+ arbitrary_types_allowed = True
- Args:
- alpha: Float, the hyperparameter, :math:`\alpha`.
- lambda1: Float, the hyperparameter, :math:`\lambda`.
- max_iterations: Interger, the maximum number of iterations allowed when
- solving the problem. The default value is 10000.
- tolerance: Float, the tolerance at which the solution is
- considered converged. The default value is 1e-5.
- positive: Boolean. If True, the amplitudes in the solution,
- :math:`{\bf f}` is all positive, else the solution may contain
- positive and negative amplitudes. The default is True.
- regularizer: String, a literal specifying the form of matrix
- :math:`{\bf J}_i`. The allowed literals are `smooth lasso`
- and `sparse ridge fusion`.
- f_shape: The shape of the solution, :math:`{\bf f}`, given as a tuple
- (n1, n2, ..., nd)
- Attributes:
- """
+ def predict(self, K):
+ r"""Predict the signal using the linear model.
+
+ Args
+ ----
- def __init__(
- self,
- alpha=1e-3,
- lambda1=1e-6,
- max_iterations=10000,
- tolerance=1e-5,
- positive=True,
- regularizer=None,
- inverse_dimension=None,
- method="gradient_decent",
- ):
-
- self.hyperparameters = {"lambda": lambda1, "alpha": alpha}
- self.max_iterations = max_iterations
- self.tolerance = tolerance
- self.positive = positive
- self.regularizer = regularizer
- self.inverse_dimension = inverse_dimension
- self.f_shape = tuple([item.count for item in inverse_dimension])[::-1]
- self.method = method
-
- # attributes
- self.f = None
- self.n_iter = None
+ K: ndarray
+ A :math:`m \times n` kernel matrix, :math:`{\bf K}`. A numpy array of shape
+ (m, n).
- def fit(self, K, s):
- r"""Fit the model using the coordinate descent method from scikit-learn.
+ Return
+ ------
+ ndarray
+ A numpy array of shape (m, m_count) with the predicted values
+ """
+ return self._estimator.predict(K) * self._scale
+
+ def residuals(self, K, s):
+ r"""Return the residual as the difference the data and the prediced data(fit),
+ following
+
+ .. math::
+ \text{residuals} = {\bf s - Kf^*}
+
+ where :math:`{\bf f^*}` is the optimum solution.
Args
----
+ K: ndarray.
+ A :math:`m \times n` kernel matrix, :math:`{\bf K}`. A numpy array of shape
+ (m, n).
+ s: ndarray ot CSDM object.
+ A csdm object or a :math:`m \times m_\text{count}` signal matrix,
+ :math:`{\bf s}`.
- K: ndarray
- The :math:`m \times n` kernel matrix, :math:`{\bf K}`. A numpy array of
- shape (m, n).
- s: ndarray or CSDM object.
- A csdm object or an equivalent numpy array holding the signal,
- :math:`{\bf s}`, as a :math:`m \times m_\text{count}` matrix.
+ Return
+ ------
+ ndarray or CSDM object.
+ If `s` is a csdm object, returns a csdm object with the residuals. If `s`
+ is a numpy array, return a :math:`m \times m_\text{count}` residue matrix.
+ csdm object
"""
- if isinstance(s, cp.CSDM):
- self.s = s
- s_ = s.dependent_variables[0].components[0].T
- else:
- s_ = s
-
- if s_.ndim == 1:
- s_ = s_[:, np.newaxis]
- prod = np.asarray(self.f_shape).prod()
- if K.shape[1] != prod:
- raise ValueError(
- "The product of the shape, `f_shape`, must be equal to the length of "
- f"the axis 1 of kernel, K, {K.shape[1]} != {prod}."
- )
+ s_ = s.y[0].components[0].T if isinstance(s, cp.CSDM) else s
+ predict = np.squeeze(self._estimator.predict(K)) * self._scale
+ residue = s_ - predict
- self.scale = s_.real.max()
- Ks, ss = _get_augmented_data(
- K=K,
- s=s_ / self.scale,
- alpha=s_.size * self.hyperparameters["alpha"],
- regularizer=self.regularizer,
- f_shape=self.f_shape,
- )
+ if not isinstance(s, cp.CSDM):
+ return residue
+
+ residue = cp.as_csdm(residue.T.copy())
+ residue._dimensions = s._dimensions
+ return residue
+
+ def score(self, K, s, sample_weights=None):
+ """The coefficient of determination, :math:`R^2`, of the prediction.
+ For more information, read scikit-learn documentation.
+ """
+ return self._estimator.score(K, s / self._scale, sample_weights)
+ def get_optimizer(self, alpha):
+ """Return the scikit-learn estimator for the method."""
# The factor 0.5 for alpha in the Lasso/LassoLars problem is to compensate
- # 1/(2 * n_sample) factor in OLS term
+ # 1/(2 * n_sample) factor in OLS term.
if self.method == "multi-task":
- estimator = MultiTaskLasso(
- alpha=self.hyperparameters["lambda"] / 2.0,
+ return MultiTaskLasso(
+ alpha=alpha / 2.0, # self.cv_lambdas[0] / 2.0,
fit_intercept=False,
- copy_X=True,
+ normalize=False,
+ # precompute=True,
max_iter=self.max_iterations,
tol=self.tolerance,
- warm_start=False,
+ copy_X=True,
+ # positive=self.positive,
random_state=None,
+ warm_start=False,
selection="random",
- # positive=self.positive,
)
+ kwargs = dict(
+ alpha=alpha / 2.0,
+ fit_intercept=False,
+ normalize=False,
+ max_iter=self.max_iterations,
+ copy_X=True,
+ positive=self.positive,
+ random_state=45,
+ )
+
if self.method == "gradient_decent":
- estimator = Lasso(
- alpha=self.hyperparameters["lambda"] / 2.0,
- fit_intercept=False,
- copy_X=True,
- max_iter=self.max_iterations,
+ return Lasso(
+ precompute=True,
tol=self.tolerance,
warm_start=False,
- random_state=None,
selection="random",
- positive=self.positive,
+ **kwargs,
)
if self.method == "lars":
- estimator = LassoLars(
- alpha=self.hyperparameters["lambda"] / 2.0,
- fit_intercept=False,
- verbose=True,
- normalize=False,
- precompute=True,
- max_iter=self.max_iterations,
- eps=2.220446049250313e-16,
- copy_X=True,
+ return LassoLars(
+ precompute="auto",
fit_path=False,
- positive=True,
- jitter=None,
- random_state=None,
+ jitter=self.sigma,
+ verbose=True,
+ **kwargs,
)
- estimator.fit(Ks, ss)
- f = estimator.coef_.copy()
+ @property
+ def f_shape(self):
+ """Shape of solution grid"""
+ return tuple([item.count for item in self.inverse_dimension])[::-1]
+
+ def pack_solution_as_csdm(self, f, s):
+ """Pack the solution as a CSDM object.
+
+ Args:
+ np.ndarray f: Solution numpy array.
+ cp.csdm s: CSDM dataset
+ """
+ f = cp.as_csdm(f)
+
+ for i, item in enumerate(self.inverse_dimension):
+ f.x[i] = item
+
+ if len(s.x) > 1:
+ f.x[i + 1] = s.x[1]
+ return f
+
+ def shape_solution(self, f, s_, half=False):
+ if len(self.f_shape) == 1:
+ f *= self._scale
+ return f
+
+ if half:
+ index = self.inverse_dimension[0].application["index"]
+ f_new = np.zeros((s_.shape[1], np.prod(self.f_shape)), dtype=float)
+ f_new[:, index] = f
+ f = f_new
if s_.shape[1] > 1:
f.shape = (s_.shape[1],) + self.f_shape
f[:, :, 0] /= 2.0
@@ -170,135 +196,140 @@ def fit(self, K, s):
f[:, 0] /= 2.0
f[0, :] /= 2.0
- f *= self.scale
+ f *= self._scale
+ return f
- if isinstance(s, cp.CSDM):
- f = cp.as_csdm(f)
+ def _fit_setup(self, K, s, alpha, get_cv_indexes=False):
+ """Compress the kernel and dataset, and optionally calculate the
+ cross-validation indexes (test-train sets)."""
+ s_ = _get_proper_data(s)
+ self._scale = s_.max().real
+ s_ = s_ / self._scale
- if len(s.dimensions) > 1:
- f.dimensions[2] = s.dimensions[1]
- f.dimensions[1] = self.inverse_dimension[1]
- f.dimensions[0] = self.inverse_dimension[0]
+ # prod = np.asarray(self.f_shape).prod()
+ # if K.shape[1] != prod:
+ # raise ValueError(
+ # "The product of the shape, `f_shape`, must be equal to the length of "
+ # f"the axis 1 of kernel, K, {K.shape[1]} != {prod}."
+ # )
+ half = False
+ if "half" in self.inverse_dimension[0].application:
+ half = self.inverse_dimension[0].application["half"]
- self.estimator = estimator
- self.f = f
- self.n_iter = estimator.n_iter_
+ args = {"regularizer": self.regularizer, "f_shape": self.f_shape, "half": half}
+ Ks, ss = _get_augmented_data(K=K, s=s_, alpha=s_.size * alpha, **args)
- def predict(self, K):
- r"""
- Predict the signal using the linear model.
+ if not get_cv_indexes:
+ return Ks, ss, (half, s_)
- Args
- ----
+ args = (self.folds, self.regularizer, self.f_shape, self.randomize, self.times)
+ cv_indexes = _get_cv_indexes(K, *args)
- K: ndarray
- A :math:`m \times n` kernel matrix, :math:`{\bf K}`. A numpy array of shape
- (m, n).
+ return Ks, ss, cv_indexes
- Return
- ------
- ndarray
- A numpy array of shape (m, m_count) with the predicted values
- """
- predict = self.estimator.predict(K) * self.scale
+ def _get_bootstrap_cv_indexes(self, cv_indexes, n_repeats, arr_length):
+ if n_repeats == 1:
+ return [cv_indexes]
- return predict
+ a = np.arange(arr_length)
+ np.random.shuffle(a)
+ list_ = a[:n_repeats]
- def residuals(self, K, s):
- r"""
- Return the residual as the difference the data and the prediced data(fit),
- following
+ return [
+ [
+ [np.delete(t_index, np.where(t_index == list_[i])) for t_index in items]
+ for items in cv_indexes
+ ]
+ for i in range(n_repeats)
+ ]
- .. math::
- \text{residuals} = {\bf s - Kf^*}
- where :math:`{\bf f^*}` is the optimum solution.
+class GeneralL2Lasso(Optimizer):
+ r"""The Minimizer class solves the following equation,
+
+ .. math::
+ {\bf f} = \underset{{\bf f}}{\text{argmin}} \left( \frac{1}{m} \|
+ {\bf Kf - s} \|^2_2 +
+ \alpha \sum_{i=1}^{d} \| {\bf J}_i {\bf f} \|_2^2 +
+ \lambda \| {\bf f} \|_1 \right),
+
+ where :math:`{\bf K} \in \mathbb{R}^{m \times n}` is the kernel,
+ :math:`{\bf s} \in \mathbb{R}^{m \times m_\text{count}}` is the known signal
+ containing noise, and :math:`{\bf f} \in \mathbb{R}^{n \times m_\text{count}}`
+ is the desired solution matrix.
+
+
+ Based on the regularization literal, the above problem is constraint
+
+ Args:
+ alpha: Float, the hyperparameter, :math:`\alpha`.
+ lambda1: Float, the hyperparameter, :math:`\lambda`.
+ hyperparameters: Dict, a python dictionary of hyperparameters.
+ max_iterations: Interger, the maximum number of iterations allowed when
+ solving the problem. The default value is 10000.
+ tolerance: Float, the tolerance at which the solution is
+ considered converged. The default value is 1e-5.
+ positive: Boolean. If True, the amplitudes in the solution,
+ :math:`{\bf f}` is all positive, else the solution may contain
+ positive and negative amplitudes. The default is True.
+ regularizer: String, a literal specifying the form of matrix
+ :math:`{\bf J}_i`. The allowed literals are `smooth lasso`
+ and `sparse ridge fusion`.
+ f_shape: The shape of the solution, :math:`{\bf f}`, given as a tuple
+ (n1, n2, ..., nd)
+ """
+
+ alpha: float = 1e-3
+ lambda1: float = 1e-6
+ hyperparameters: dict = None
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ if self.hyperparameters is None:
+ self.hyperparameters = {"lambda": self.lambda1, "alpha": self.alpha}
+
+ def fit(self, K, s):
+ r"""Fit the model using the coordinate descent method from scikit-learn.
Args
----
- K: ndarray.
- A :math:`m \times n` kernel matrix, :math:`{\bf K}`. A numpy array of shape
- (m, n).
- s: ndarray ot CSDM object.
- A csdm object or a :math:`m \times m_\text{count}` signal matrix,
- :math:`{\bf s}`.
- Return
- ------
- ndarray or CSDM object.
- If `s` is a csdm object, returns a csdm object with the residuals. If `s`
- is a numpy array, return a :math:`m \times m_\text{count}` residue matrix.
- csdm object
+ K: ndarray
+ The :math:`m \times n` kernel matrix, :math:`{\bf K}`. A numpy array of
+ shape (m, n).
+ s: ndarray or CSDM object.
+ A csdm object or an equivalent numpy array holding the signal,
+ :math:`{\bf s}`, as a :math:`m \times m_\text{count}` matrix.
"""
+
+ Ks, ss, (half, s_) = self._fit_setup(K, s, self.hyperparameters["alpha"], False)
+
+ _estimator = self.get_optimizer(self.hyperparameters["lambda"])
+ _estimator.fit(Ks, ss)
+ f = _estimator.coef_.copy()
+
+ f = self.shape_solution(f, s_, half)
+
if isinstance(s, cp.CSDM):
- s_ = s.dependent_variables[0].components[0].T
- else:
- s_ = s
- predict = np.squeeze(self.estimator.predict(K)) * self.scale
- residue = s_ - predict
+ f = self.pack_solution_as_csdm(f, s)
+ # f = cp.as_csdm(f)
- if not isinstance(s, cp.CSDM):
- return residue
+ # if len(s.x) > 1:
+ # f.x[2] = s.x[1]
+ # f.x[1] = self.inverse_dimension[1]
+ # f.x[0] = self.inverse_dimension[0]
- residue = cp.as_csdm(residue.T.copy())
- residue._dimensions = s._dimensions
- return residue
+ self._estimator = _estimator
+ self.f = f
+ self.n_iter = _estimator.n_iter_
- def score(self, K, s, sample_weights=None):
- """
- The coefficient of determination, :math:`R^2`, of the prediction.
- For more information, read scikit-learn documentation.
- """
- return self.estimator.score(K, s / self.scale, sample_weights)
-
-
-class GeneralL2LassoCV:
- def __init__(
- self,
- alphas=None,
- lambdas=None,
- folds=10,
- max_iterations=10000,
- tolerance=1e-5,
- positive=True,
- sigma=0.0,
- regularizer=None,
- randomize=False,
- times=2,
- verbose=False,
- inverse_dimension=None,
- n_jobs=-1,
- method="gradient_decent",
- ):
-
- if alphas is None:
- self.cv_alphas = 10 ** ((np.arange(5) / 4) * 2 - 4)[::-1]
- else:
- self.cv_alphas = np.asarray(alphas).ravel()
- if lambdas is None:
- self.cv_lambdas = 10 ** ((np.arange(10) / 9) * 5 - 9)[::-1]
- else:
- self.cv_lambdas = np.asarray(lambdas).ravel()
-
- self.method = method
- self.folds = folds
-
- self.n_jobs = n_jobs
- self.max_iterations = max_iterations
- self.tolerance = tolerance
- self.positive = positive
- self.sigma = sigma
- self.regularizer = regularizer
- self.hyperparameters = {}
- self.f = None
- self.randomize = randomize
- self.times = times
- self.verbose = verbose
- self.inverse_dimension = inverse_dimension
- self.f_shape = tuple([item.count for item in inverse_dimension])[::-1]
+class GeneralL2LassoLS(GeneralL2Lasso):
+ verbose: bool = False
+ n_jobs: int = -1
+ _path: Any = PrivateAttr()
- def fit(self, K, s):
+ def fit(self, K, s, n_repeats=1, **kwargs):
r"""Fit the model using the coordinate descent method from scikit-learn for
all alpha anf lambda values using the `n`-folds cross-validation technique.
The cross-validation metric is the mean squared error.
@@ -308,95 +339,185 @@ def fit(self, K, s):
shape (m, n).
s: A :math:`m \times m_\text{count}` signal matrix, :math:`{\bf s}` as a
csdm object or a numpy array or shape (m, m_count).
+ n_repeats: int. Repeat the cross-validation by adding noise to the input
+ data and evaluate the mean cross-validation error.
"""
+ Ks, ss, cv_indexes = self._fit_setup(
+ K, s, self.hyperparameters["alpha"], get_cv_indexes=True
+ )
+ l1 = self.get_optimizer(self.hyperparameters["lambda"])
- if isinstance(s, cp.CSDM):
- self.s = s
- s_ = s.dependent_variables[0].components[0].T
- else:
- s_ = s
-
- s_ = s_[:, np.newaxis] if s_.ndim == 1 else s_
- prod = np.asarray(self.f_shape).prod()
- if K.shape[1] != prod:
- raise ValueError(
- "The product of the shape, `f_shape`, must be equal to the length of "
- f"the axis 1 of kernel, K, {K.shape[1]} != {prod}."
- )
+ ks0 = K.shape[0]
+ new_indexes = self._get_bootstrap_cv_indexes(cv_indexes, n_repeats, ks0)
+
+ self._path = []
+ sigma_sq = self.sigma ** 2
+
+ def fnc(x_):
+ x = [10 ** x_[0], 10 ** x_[1]]
+ self._path.append(x)
+ Ks[ks0:] *= np.sqrt(x[0] / self.hyperparameters["alpha"])
+ l1.alpha = x[1] / 2.0
- self.scale = s_.max().real
- s_ = s_ / self.scale
- cv_indexes = _get_cv_indexes(
- K,
- self.folds,
- self.regularizer,
- f_shape=self.f_shape,
- random=self.randomize,
- times=self.times,
+ mse = np.sum(
+ [cv(l1, Ks, ss, indexes, n_jobs=self.n_jobs) for indexes in new_indexes]
+ )
+ mse /= -n_repeats
+ mse -= sigma_sq
+ return np.sqrt(2 * mse)
+
+ x0 = np.log10([self.hyperparameters["alpha"], self.hyperparameters["lambda"]])
+
+ kwargs["xtol"] = 1e-4 if "xtol" not in kwargs else kwargs["xtol"]
+ res = least_squares(
+ fnc,
+ x0,
+ # jac="3-point",
+ # loss="cauchy",
+ # method="dogbox",
+ bounds=[(-15, -15), (-4, -4)],
+ # diff_step=[5e-5, 5e-5],
+ verbose=2,
+ **kwargs,
)
- self.cv_map = np.zeros((self.cv_alphas.size, self.cv_lambdas.size))
- alpha_ratio = np.ones(self.cv_alphas.size)
- if self.cv_alphas.size != 1 and self.cv_alphas[0] != 0:
- alpha_ratio[1:] = np.sqrt(self.cv_alphas[1:] / self.cv_alphas[:-1])
+ self.hyperparameters["alpha"] = 10 ** res.x[0]
+ self.hyperparameters["lambda"] = 10 ** res.x[1]
- Ks, ss = _get_augmented_data(
- K=K,
- s=s_,
- alpha=s_.size * self.cv_alphas[0],
- regularizer=self.regularizer,
- f_shape=self.f_shape,
+ print(res.message)
+ print(res.status)
+
+ # Calculate the solution using the complete data at the optimized lambda and
+ # alpha values
+ super().fit(K, s)
+
+ @property
+ def path(self):
+ alpha_ = [item[0] for item in self._path]
+ lambda_ = [item[1] for item in self._path]
+ return {"alpha": alpha_, "lambda": lambda_}
+
+
+class GeneralL2LassoCV(Optimizer):
+ alphas: Union[List[float], np.ndarray] = None
+ lambdas: Union[List[float], np.ndarray] = None
+ hyperparameters: dict = None
+
+ verbose: bool = False
+ n_jobs: int = -1
+ f_std: Union[cp.CSDM, np.ndarray] = None
+
+ _path: Any = PrivateAttr()
+ _cv_alphas: Any = PrivateAttr()
+ _cv_lambdas: Any = PrivateAttr()
+ _cv_map: Any = PrivateAttr()
+
+ class Config:
+ arbitrary_types_allowed = True
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self._cv_alphas = (
+ 10 ** ((np.arange(5) / 4) * 2 - 4)[::-1]
+ if self.alphas is None
+ else np.asarray(self.alphas).ravel()
)
- start_index = K.shape[0]
- l1 = self._get_minimizer()
- # l1.fit(Ks, ss)
+ self._cv_lambdas = (
+ 10 ** ((np.arange(10) / 9) * 5 - 9)[::-1]
+ if self.lambdas is None
+ else np.asarray(self.lambdas).ravel()
+ )
+ if self.hyperparameters is None:
+ self.hyperparameters = {"lambda": None, "alpha": None}
+
+ def get_optimizer_jobs(self, Ks, ss, cv_indexes, start_index, n_repeats=1):
+ """Generate a grid of n_alpha x n_lambda optimizers (jobs) over the given
+ grid of hyperparameters.
+
+ Args:
+ np.ndarray Ks: The augmented kernel
+ cp.CSDM ss: The augmented dataset
+ (list of tuple) cv_indexes: (test, train) indexes for cross-validation
+ int start_index: The index in the augmented kernel (Ks), where the
+ smoothening matrix starts.
+ int n_repeats: Repeat the cross-validation by adding noise to the input
+ data and evaluate the mean cross-validation error.
+ """
+ # create an l1 over all lambda values
+ l1 = self.get_optimizer(self._cv_lambdas[0])
l1_array = []
- for lambda_ in self.cv_lambdas:
- l1_array.append(deepcopy(l1))
+ for lambda_ in self._cv_lambdas:
+ l1_array += [deepcopy(l1)]
l1_array[-1].alpha = lambda_ / 2.0
- j = 0
- for alpha_ratio_ in alpha_ratio:
- if alpha_ratio_ != 0:
- Ks[start_index:] *= alpha_ratio_
- jobs = (
- delayed(cv)(l1_array[i], Ks, ss, cv_indexes)
- for i in range(self.cv_lambdas.size)
- )
- self.cv_map[j] = Parallel(
- n_jobs=self.n_jobs,
- verbose=self.verbose,
- **{
- "backend": {
- "threads": "threading",
- "processes": "multiprocessing",
- None: None,
- }["threads"]
- },
- )(jobs)
- j += 1
+ # l2 optimizer per l1 over alpha values
+ alpha_ratio = np.ones(self._cv_alphas.size)
+ if self._cv_alphas.size != 1 and self._cv_alphas[0] != 0:
+ alpha_ratio[1:] = np.sqrt(self._cv_alphas[1:] / self._cv_alphas[:-1])
+
+ def alpha_lambda_grid_jobs(new_indexes):
+ """Create optimization jobs over alpha-lambda grid
+ Args:
+ ss: Augmented dataset.
+ """
+ jobs_ = []
+ for alpha_ratio_ in alpha_ratio:
+ if alpha_ratio_ != 0:
+ Ks[start_index:] *= alpha_ratio_
+ Ks_copy = deepcopy(Ks)
+ jobs_ += [
+ delayed(cv)(l1_, Ks_copy, ss, new_indexes) for l1_ in l1_array
+ ]
+ return jobs_
+
+ new_indexes = self._get_bootstrap_cv_indexes(cv_indexes, n_repeats, start_index)
+ return [
+ item for indexes in new_indexes for item in alpha_lambda_grid_jobs(indexes)
+ ]
+
+ def fit(self, K, s, scoring="neg_mean_squared_error", n_repeats=1):
+ r"""Fit the model using the coordinate descent method from scikit-learn for
+ all alpha anf lambda values using the `n`-folds cross-validation technique.
+ The cross-validation metric is the mean squared error.
- # cv_map contains negated mean square errors, therefore multiply by -1.
- self.cv_map *= -1
- # subtract the variance.
- self.cv_map -= self.sigma ** 2
+ Args:
+ K: A :math:`m \times n` kernel matrix, :math:`{\bf K}`. A numpy array of
+ shape (m, n).
+ s: A :math:`m \times m_\text{count}` signal matrix, :math:`{\bf s}` as a
+ csdm object or a numpy array or shape (m, m_count).
+ n_repeats: int. Repeat the cross-validation by adding noise to the input
+ data and evaluate the mean cross-validation error.
+ """
+ # get augmented kernel, dataset, and cv-indexes.
+ Ks, ss, cv_indexes = self._fit_setup(K, s, self._cv_alphas[0], True)
+
+ # optimizers as jobs over the hyperparameter grid.
+ jobs = self.get_optimizer_jobs(Ks, ss, cv_indexes, K.shape[0], n_repeats)
+
+ # start parallel processes for running jobs
+ processes = Parallel(
+ n_jobs=self.n_jobs,
+ verbose=self.verbose,
+ **{
+ "backend": {
+ "threads": "threading",
+ "processes": "multiprocessing",
+ None: None,
+ }["threads"]
+ },
+ )
+ result = processes(jobs) # run jobs
- # After subtracting the variance, any negative values in the cv grid is a
- # result of fitting noise. Take the absolute value of cv to avoid such
- # models.
- self.cv_map = np.abs(self.cv_map)
+ cv_map = self.get_cv_map(result, n_repeats) # get cross-validation error metric
- # The argmin of the minimum value is the selected model as it has the least
- # prediction error.
- index = np.unravel_index(self.cv_map.argmin(), self.cv_map.shape)
- self.hyperparameters["alpha"] = self.cv_alphas[index[0]]
- self.hyperparameters["lambda"] = self.cv_lambdas[index[1]]
+ # find the minima of the curve to get the optimum hyperparameters.
+ self.find_optimum_hyperparameters(cv_map, scoring)
# Calculate the solution using the complete data at the optimized lambda and
- # alpha values
- self.opt = GeneralL2Lasso(
+ # alpha values.
+ opt = GeneralL2Lasso(
alpha=self.hyperparameters["alpha"],
lambda1=self.hyperparameters["lambda"],
max_iterations=self.max_iterations,
@@ -406,114 +527,65 @@ def fit(self, K, s):
inverse_dimension=self.inverse_dimension,
method=self.method,
)
- self.opt.fit(K, s)
- self.f = self.opt.f
-
- # convert cv_map to csdm
- self.cv_map = cp.as_csdm(np.squeeze(self.cv_map.T.copy()))
- if len(self.cv_alphas) != 1:
- d0 = cp.as_dimension(-np.log10(self.cv_alphas), label="-log(α)")
- self.cv_map.dimensions[0] = d0
+ opt.fit(K, s)
+ self._estimator = opt._estimator
+ self.f = opt.f
+ self.n_iter = opt.n_iter
- if len(self.cv_lambdas) == 1:
- return
+ def get_cv_map(self, cv_map, n_repeats=1, scoring="neg_mean_squared_error"):
+ # cv_map = np.asarray([item["test_score"].mean() for item in result])
+ cv_map = np.asarray(cv_map)
+ cv_map.shape = (n_repeats, self._cv_alphas.size, self._cv_lambdas.size)
- d1 = cp.as_dimension(-np.log10(self.cv_lambdas), label="-log(λ)")
- if len(self.cv_alphas) != 1:
- self.cv_map.dimensions[1] = d1
- else:
- self.cv_map.dimensions[0] = d1
+ # cv_map contains negated mean square errors, therefore multiply by -1.
+ cv_map *= -1
- def _get_minimizer(self):
- """Return the estimator for the method"""
- # The factor 0.5 for alpha in the Lasso/LassoLars problem is to compensate
- # 1/(2 * n_sample) factor in OLS term.
- if self.method == "multi-task":
- return MultiTaskLasso(
- alpha=self.cv_lambdas[0] / 2.0,
- fit_intercept=False,
- normalize=False,
- # precompute=True,
- max_iter=self.max_iterations,
- tol=self.tolerance,
- copy_X=True,
- # positive=self.positive,
- random_state=None,
- warm_start=True,
- selection="random",
- )
+ # subtract the variance.
+ if scoring == "neg_mean_squared_error":
+ cv_map -= self.sigma ** 2
- if self.method == "gradient_decent":
- return Lasso(
- alpha=self.cv_lambdas[0] / 2.0,
- fit_intercept=False,
- normalize=False,
- precompute=True,
- max_iter=self.max_iterations,
- tol=self.tolerance,
- copy_X=True,
- positive=self.positive,
- random_state=None,
- warm_start=True,
- selection="random",
- )
+ # After subtracting the variance, any negative values in the cv grid is a
+ # result of noise. Substitute negative values for np.nan to avoid such
+ # models.
+ index = np.where(cv_map < 0)
+ cv_map[index] = np.nan
- if self.method == "lars":
- return LassoLars(
- alpha=self.cv_lambdas[0] / 2.0,
- fit_intercept=False,
- verbose=True,
- normalize=False,
- precompute="auto",
- max_iter=self.max_iterations,
- eps=2.220446049250313e-16,
- copy_X=True,
- fit_path=False,
- positive=self.positive,
- jitter=None,
- random_state=None,
- )
+ return cv_map.mean(axis=0)
- def predict(self, K):
- r"""
- Predict the signal using the linear model.
+ def find_optimum_hyperparameters(self, cv_map, scoring="neg_mean_squared_error"):
+ """Find the global minimal of the cross-validation error metric and update
+ the hyperparameter values.
Args:
- K: A :math:`m \times n` kernel matrix, :math:`{\bf K}`. A numpy array of
- shape (m, n).
-
- Return:
- A numpy array of shape (m, m_count) with the predicted values.
+ np.nd.array cv_map: cross-calidation error metric array.
+ str scoring: Scoring criteria for error metric.
"""
- return self.opt.predict(K)
- def residuals(self, K, s):
- r"""
- Return the residual as the difference the data and the prediced data(fit),
- following
-
- .. math::
- \text{residuals} = {\bf s - Kf^*}
+ # The argmin of the minimum value is the selected model as it has the least
+ # prediction error.
+ cv_shape = (self._cv_alphas.size, self._cv_lambdas.size)
+ index = np.unravel_index(cv_map.argmin(), cv_shape)
+ self.hyperparameters["alpha"] = self._cv_alphas[index[0]]
+ self.hyperparameters["lambda"] = self._cv_lambdas[index[1]]
+ self.pack_cv_map_as_csdm(cv_map)
- where :math:`{\bf f^*}` is the optimum solution.
+ def pack_cv_map_as_csdm(self, cv_map):
+ """Convert the cross-validation error metric as a CSDM object.
Args:
- K: A :math:`m \times n` kernel matrix, :math:`{\bf K}`. A numpy array of
- shape (m, n).
- s: A csdm object or a :math:`m \times m_\text{count}` signal matrix,
- :math:`{\bf s}`.
- Return:
- If `s` is a csdm object, returns a csdm object with the residuals. If `s`
- is a numpy array, return a :math:`m \times m_\text{count}` residue matrix.
+ np.nd.array cv_map: cross-calidation error metric array.
"""
- return self.opt.residuals(K, s)
+ cv_map = cp.as_csdm(cv_map.T.copy())
+ cv_map.x[0] = cp.as_dimension(-np.log10(self._cv_alphas), label="-log(α)")
+ cv_map.x[1] = cp.as_dimension(-np.log10(self._cv_lambdas), label="-log(λ)")
- def score(self, K, s, sample_weights=None):
- """
- Return the coefficient of determination, :math:`R^2`, of the prediction.
- For more information, read scikit-learn documentation.
- """
- return self.opt.score(K, s, sample_weights)
+ if self._cv_alphas.size == 1:
+ cv_map = cv_map[0]
+
+ if self._cv_lambdas.size == 1:
+ cv_map = cv_map[:, 0]
+
+ self._cv_map = cv_map
@property
def cross_validation_curve(self):
@@ -521,10 +593,10 @@ def cross_validation_curve(self):
Returns: A two-dimensional CSDM object.
"""
- return self.cv_map
+ return self._cv_map
-def cv(l1, X, y, cv):
+def cv(l1, X, y, cv_indexes, n_jobs=1):
"""Return the cross-validation score as negative of mean square error."""
if isinstance(l1, (Lasso, MultiTaskLasso)):
fit_params = {"check_input": False}
@@ -535,13 +607,26 @@ def cv(l1, X, y, cv):
l1,
X=X,
y=y,
- scoring="neg_mean_squared_error", # 'neg_mean_absolute_error",
- cv=cv,
+ scoring="neg_mean_absolute_error",
+ cv=cv_indexes,
fit_params=fit_params,
- n_jobs=1,
+ n_jobs=n_jobs,
verbose=0,
+ # return_estimator=True,
)
- return cv_score["test_score"].mean()
+ # estimators = cv_score["estimator"]
+ # print(estimators)
+ # print([item.coef_.shape for item in estimators])
+ # diff = 0
+ # if len(estimators[0].coef_.shape) >= 2:
+ # diff = np.mean(
+ # [
+ # np.sum((item.coef_[:, 1:] - item.coef_[:, :-1]) ** 2)
+ # for item in estimators
+ # ]
+ # )
+ # print(diff)
+ return cv_score["test_score"].mean() # + diff * alpha
def _get_smooth_size(f_shape, regularizer, max_size):
@@ -555,11 +640,11 @@ def _get_smooth_size(f_shape, regularizer, max_size):
"""
shape = np.asarray(f_shape)
shape_prod = shape.prod()
- if shape_prod != max_size:
- raise ValueError(
- "The product of the shape must be equal to the length of axis 1 of the "
- "kernel, K"
- )
+ # if shape_prod != max_size:
+ # raise ValueError(
+ # "The product of the shape must be equal to the length of axis 1 of the "
+ # "kernel, K"
+ # )
if regularizer == "smooth lasso":
smooth_array_size = [int(shape_prod * (i - 1) / i) for i in shape]
smooth_size = np.asarray(smooth_array_size).sum()
@@ -568,7 +653,6 @@ def _get_smooth_size(f_shape, regularizer, max_size):
smooth_size = np.asarray(smooth_array_size).sum()
else:
smooth_size = 0
-
return smooth_size
@@ -579,10 +663,8 @@ def _get_cv_indexes(K, folds, regularizer, f_shape=None, random=False, times=1):
"""
cv_indexes = []
ks0, ks1 = K.shape
- if isinstance(f_shape, int):
- f_shape = (f_shape,)
+ f_shape = (f_shape,) if isinstance(f_shape, int) else f_shape
- tr_ = []
smooth_size = _get_smooth_size(f_shape, regularizer, ks1)
tr_ = (np.arange(smooth_size) + ks0).tolist()
@@ -621,7 +703,7 @@ def generate_J_i(Ai, alpha, f_shape):
return J
-def _get_augmented_data(K, s, alpha, regularizer, f_shape=None):
+def _get_augmented_data(K, s, alpha, regularizer, f_shape=None, half=False):
"""Creates a smooth kernel, K, with alpha regularization parameter."""
if alpha == 0:
return np.asfortranarray(K), np.asfortranarray(s)
@@ -629,8 +711,7 @@ def _get_augmented_data(K, s, alpha, regularizer, f_shape=None):
ks0, ks1 = K.shape
ss0, ss1 = s.shape
- if isinstance(f_shape, int):
- f_shape = (f_shape,)
+ f_shape = (f_shape,) if isinstance(f_shape, int) else f_shape
smooth_size = _get_smooth_size(f_shape, regularizer, ks1)
@@ -658,6 +739,12 @@ def Ai_sparse_ridge_fusion(i):
K_ = np.empty((ks0 + smooth_size, ks1))
+ if half:
+ row1, col, row0 = get_indexes(f_shape)
+ J[0] = J[0][row0][:, col]
+ J[1] = J[1][row1][:, col]
+ # print(row1.size, col.size, row0.size)
+
K_[:ks0] = K
start = ks0
end = ks0
@@ -670,3 +757,35 @@ def Ai_sparse_ridge_fusion(i):
s_[:ss0] = s.real
return np.asfortranarray(K_), np.asfortranarray(s_)
+
+
+def get_indexes(f_shape):
+ s0, s1 = f_shape[0], f_shape[1]
+
+ def arr_j1(s0, s1):
+ arr = []
+ lst = [np.arange(s1 - i) + (s1 + 1) * i for i in range(s0)]
+ for item in lst:
+ arr = np.append(arr, item)
+ return np.asarray(arr, dtype=int)
+
+ arr1 = arr_j1(s0, s1 - 1)
+ arr2 = arr_j1(min(s0, s1), s1)
+
+ def arr_j0(s0, s1):
+ arr = []
+ lst = [np.arange(s1 - i) + (s1 + 2) * i + 1 for i in range(s0)]
+ for item in lst:
+ arr = np.append(arr, item)
+ return np.asarray(arr, dtype=int)
+
+ arr3 = arr_j0(min(s0 - 1, s1), s1 - 1)
+
+ return arr1, arr2, arr3
+
+
+def _get_proper_data(s):
+ """Extract the numpy array from the csdm, if csdm and returns a 2D array"""
+ s_ = deepcopy(s.y[0].components[0].T) if isinstance(s, cp.CSDM) else deepcopy(s)
+ s_ = s_[:, np.newaxis] if s_.ndim == 1 else s_
+ return s_
diff --git a/mrinversion/linear_model/smooth_lasso.py b/mrinversion/linear_model/smooth_lasso.py
index 6988f79..ec979b6 100644
--- a/mrinversion/linear_model/smooth_lasso.py
+++ b/mrinversion/linear_model/smooth_lasso.py
@@ -1,14 +1,14 @@
# -*- coding: utf-8 -*-
from ._base_l1l2 import GeneralL2Lasso
from ._base_l1l2 import GeneralL2LassoCV
+from ._base_l1l2 import GeneralL2LassoLS
__author__ = "Deepansh J. Srivastava"
__email__ = "srivastava.89@osu.edu"
class SmoothLasso(GeneralL2Lasso):
- r"""
- The linear model trained with the combined l1 and l2 priors as the regularizer.
+ r"""The linear model trained with the combined l1 and l2 priors as the regularizer.
The method minimizes the objective function,
.. math::
@@ -72,32 +72,19 @@ class SmoothLasso(GeneralL2Lasso):
The number of iterations required to reach the specified tolerance.
"""
- def __init__(
- self,
- alpha,
- lambda1,
- inverse_dimension,
- max_iterations=10000,
- tolerance=1e-5,
- positive=True,
- method="gradient_decent",
- ):
- super().__init__(
- alpha=alpha,
- lambda1=lambda1,
- max_iterations=max_iterations,
- tolerance=tolerance,
- positive=positive,
- regularizer="smooth lasso",
- inverse_dimension=inverse_dimension,
- method=method,
- )
+ def __init__(self, **kwargs):
+ if "regularizer" in kwargs:
+ raise KeyError("regularizer is an immutable property of SmoothLasso")
+ super().__init__(regularizer="smooth lasso", **kwargs)
class SmoothLassoCV(GeneralL2LassoCV):
- r"""
- The linear model trained with the combined l1 and l2 priors as the
- regularizer. The method minimizes the objective function,
+ r"""The linear model trained with the combined l1 and l2 priors as the regularizer
+ with in-built cross-validation. The cross-validation is carried out using a
+ stratified splitting of the signal over the given range of :math:`alpha` and
+ :math:`lambda` values.
+
+ The method minimizes the objective function,
.. math::
\| {\bf Kf - s} \|^2_2 + \alpha \sum_{i=1}^{d} \| {\bf J}_i {\bf f} \|_2^2
@@ -131,8 +118,6 @@ class SmoothLassoCV(GeneralL2LassoCV):
respective dimensions, with the constraint that :math:`\prod_{i=1}^{d}n_i = n`,
where :math:`d` is the total number of dimensions.
- The cross-validation is carried out using a stratified splitting of the signal.
-
Parameters
----------
@@ -156,6 +141,98 @@ class SmoothLassoCV(GeneralL2LassoCV):
The default is True.
sigma: float
The standard deviation of the noise in the signal. The default is 0.0.
+ randomize: bool
+ If true, the folds are created by randomly assigning the samples to each fold.
+ If false, a stratified sampled is used to generate folds. The default is False.
+ times: int
+ The number of times to randomized n-folds are created. Only applicable when
+ `randomize` attribute is True.
+ verbose: bool
+ If true, prints the process.
+ n_jobs: int
+ The number of CPUs used for computation. The default is -1, that is, all
+ available CPUs are used.
+
+ Attributes
+ ----------
+ f: ndarray or CSDM object.
+ A ndarray of shape (m_count, nd, ..., n1, n0). The solution,
+ :math:`{\bf f} \in \mathbb{R}^{m_\text{count} \times n_d \times \cdots n_1
+ \times n_0}` or an equivalent CSDM object.
+ n_iter: int.
+ The number of iterations required to reach the specified tolerance.
+ hyperparameters: dict.
+ A dictionary with the :math:`\alpha` and :math:\lambda` hyperparameters.
+ cross_validation_curve: CSDM object.
+ The cross-validation error metric determined as the mean square error.
+ """
+
+ def __init__(self, **kwargs):
+ if "regularizer" in kwargs:
+ raise KeyError("regularizer is an immutable property of SmoothLasso")
+ super().__init__(regularizer="smooth lasso", **kwargs)
+
+
+class SmoothLassoLS(GeneralL2LassoLS):
+ r"""The linear model trained with the combined l1 and l2 priors as the regularizer
+ with in-built cross-validation. The cross-validation is carried out using a
+ stratified splitting of the signal using the least-squares analysis.
+
+ The method minimizes the objective function,
+
+ .. math::
+ \| {\bf Kf - s} \|^2_2 + \alpha \sum_{i=1}^{d} \| {\bf J}_i {\bf f} \|_2^2
+ + \lambda \| {\bf f} \|_1 ,
+
+ where :math:`{\bf K} \in \mathbb{R}^{m \times n}` is the kernel,
+ :math:`{\bf s} \in \mathbb{R}^{m \times m_\text{count}}` is the known signal
+ containing noise, and :math:`{\bf f} \in \mathbb{R}^{n \times m_\text{count}}`
+ is the desired solution. The parameters, :math:`\alpha` and :math:`\lambda`,
+ are the hyperparameters controlling the smoothness and sparsity of the
+ solution :math:`{\bf f}`.
+ The matrix :math:`{\bf J}_i` is given as
+
+ .. math::
+ {\bf J}_i = {\bf I}_{n_1} \otimes \cdots \otimes {\bf A}_{n_i}
+ \otimes \cdots \otimes {\bf I}_{n_{d}},
+
+ where :math:`{\bf I}_{n_i} \in \mathbb{R}^{n_i \times n_i}` is the identity
+ matrix,
+
+ .. math::
+ {\bf A}_{n_i} = \left(\begin{array}{ccccc}
+ 1 & -1 & 0 & \cdots & \vdots \\
+ 0 & 1 & -1 & \cdots & \vdots \\
+ \vdots & \vdots & \vdots & \vdots & 0 \\
+ 0 & \cdots & 0 & 1 & -1
+ \end{array}\right) \in \mathbb{R}^{(n_i-1)\times n_i},
+
+ and the symbol :math:`\otimes` is the Kronecker product. The terms,
+ :math:`\left(n_1, n_2, \cdots, n_d\right)`, are the number of points along the
+ respective dimensions, with the constraint that :math:`\prod_{i=1}^{d}n_i = n`,
+ where :math:`d` is the total number of dimensions.
+
+ Parameters
+ ----------
+
+ alpha: float
+ A starting guess for the :math:`\alpha` hyperparameters.
+ lambda1: float
+ A starting guess for the :math:`\lambda` hyperparameters.
+ inverse_dimension: list
+ A list of csdmpy Dimension objects representing the inverse space.
+ folds: int
+ The number of folds used in cross-validation.The default is 10.
+ max_iterations: int
+ The maximum number of iterations allowed when solving the problem. The default
+ value is 10000.
+ tolerance: float
+ The tolerance at which the solution is considered converged. The default value
+ is 1e-5.
+ positive: bool
+ If True, the amplitudes in the solution, :math:`{\bf f}`, is contrained to only
+ positive values, else the solution may contain positive and negative amplitudes.
+ The default is True.
sigma: float
The standard deviation of the noise in the signal. The default is 0.0.
randomize: bool
@@ -170,7 +247,6 @@ class SmoothLassoCV(GeneralL2LassoCV):
The number of CPUs used for computation. The default is -1, that is, all
available CPUs are used.
-
Attributes
----------
f: ndarray or CSDM object.
@@ -185,35 +261,7 @@ class SmoothLassoCV(GeneralL2LassoCV):
The cross-validation error metric determined as the mean square error.
"""
- def __init__(
- self,
- alphas,
- lambdas,
- inverse_dimension,
- folds=10,
- max_iterations=10000,
- tolerance=1e-5,
- positive=True,
- sigma=0.0,
- randomize=False,
- times=2,
- verbose=False,
- n_jobs=-1,
- method="gradient_decent",
- ):
- super().__init__(
- alphas=alphas,
- lambdas=lambdas,
- inverse_dimension=inverse_dimension,
- folds=folds,
- max_iterations=max_iterations,
- tolerance=tolerance,
- positive=positive,
- sigma=sigma,
- regularizer="smooth lasso",
- randomize=randomize,
- times=times,
- verbose=verbose,
- n_jobs=n_jobs,
- method=method,
- )
+ def __init__(self, **kwargs):
+ if "regularizer" in kwargs:
+ raise KeyError("regularizer is an immutable property of SmoothLasso")
+ super().__init__(regularizer="smooth lasso", **kwargs)
diff --git a/mrinversion/linear_model/tsvd_compression.py b/mrinversion/linear_model/tsvd_compression.py
index 48580a9..fb37320 100644
--- a/mrinversion/linear_model/tsvd_compression.py
+++ b/mrinversion/linear_model/tsvd_compression.py
@@ -34,7 +34,7 @@ def __init__(self, K, s, r=None):
self.truncation_index = r
if isinstance(s, cp.CSDM):
- signal = s.dependent_variables[0].components[0].T
+ signal = s.y[0].components[0].T
else:
signal = s
(
@@ -48,7 +48,7 @@ def __init__(self, K, s, r=None):
if isinstance(s, cp.CSDM):
self.compressed_s = cp.as_csdm(compressed_signal.T.copy())
- if len(s.dimensions) > 1:
- self.compressed_s.dimensions[1] = s.dimensions[1]
+ if len(s.x) > 1:
+ self.compressed_s.x[1] = s.x[1]
else:
self.compressed_s = compressed_signal
diff --git a/mrinversion/tests/integration_test.py b/mrinversion/tests/integration_test.py
index 4f6ee80..43dce8f 100644
--- a/mrinversion/tests/integration_test.py
+++ b/mrinversion/tests/integration_test.py
@@ -5,20 +5,21 @@
from mrinversion.kernel.nmr import ShieldingPALineshape
from mrinversion.linear_model import SmoothLasso
+from mrinversion.linear_model import SmoothLassoLS
from mrinversion.linear_model import TSVDCompression
-def test_01():
+def setup():
# The 2D MAF dataset in csdm format
filename = "https://osu.box.com/shared/static/8lnwmg0dr7y6egk40c2orpkmmugh9j7c.csdf"
data_object = cp.load(filename)
data_object = data_object.real
- _ = [item.to("ppm", "nmr_frequency_ratio") for item in data_object.dimensions]
+ _ = [item.to("ppm", "nmr_frequency_ratio") for item in data_object.x]
data_object = data_object.T
data_object_truncated = data_object[:, 155:180]
- anisotropic_dimension = data_object_truncated.dimensions[0]
+ anisotropic_dimension = data_object_truncated.x[0]
inverse_dimensions = [
cp.LinearDimension(count=25, increment="400 Hz", label="x"),
cp.LinearDimension(count=25, increment="400 Hz", label="y"),
@@ -41,19 +42,20 @@ def test_01():
assert new_system.truncation_index == 87
- s_lasso = SmoothLasso(
- alpha=2.07e-7, lambda1=7.85e-6, inverse_dimension=inverse_dimensions
- )
- s_lasso.fit(K=compressed_K, s=compressed_s)
- f_sol = s_lasso.f
+ return inverse_dimensions, compressed_K, compressed_s, K, data_object_truncated
- residuals = s_lasso.residuals(K=K, s=data_object_truncated)
+
+def run_test(s_lasso, K, data_object_truncated):
+ residuals1 = s_lasso.residuals(K=K, s=data_object_truncated)
+ residuals2 = s_lasso.residuals(K=K, s=data_object_truncated.y[0].components[0].T)
+ assert np.allclose(residuals1.y[0].components[0].T, residuals2)
# assert np.allclose(residuals.mean().value, 0.00048751)
- np.testing.assert_almost_equal(residuals.std().value, 0.00336372, decimal=2)
+ np.testing.assert_almost_equal(residuals1.std().value, 0.00336372, decimal=2)
+ f_sol = s_lasso.f
f_sol /= f_sol.max()
- [item.to("ppm", "nmr_frequency_ratio") for item in f_sol.dimensions]
+ _ = [item.to("ppm", "nmr_frequency_ratio") for item in f_sol.x]
Q4_region = f_sol[0:8, 0:8, 3:18]
Q4_region.description = "Q4 region"
@@ -97,3 +99,23 @@ def test_01():
np.asarray([6.138761032132587, 7.837190479891721, 4.210912435356488]),
decimal=0,
)
+
+
+def test_01():
+ inverse_dimensions, compressed_K, compressed_s, K, data_object_truncated = setup()
+
+ s_lasso = SmoothLasso(
+ alpha=2.07e-7, lambda1=7.85e-6, inverse_dimension=inverse_dimensions
+ )
+ s_lasso.fit(K=compressed_K, s=compressed_s)
+ run_test(s_lasso, K, data_object_truncated)
+
+
+def test_ls():
+ inverse_dimensions, compressed_K, compressed_s, K, data_object_truncated = setup()
+
+ s_lasso_ls = SmoothLassoLS(
+ alpha=2.07e-7, lambda1=7.85e-6, inverse_dimension=inverse_dimensions
+ )
+ s_lasso_ls.fit(K=compressed_K, s=compressed_s)
+ run_test(s_lasso_ls, K, data_object_truncated)
diff --git a/mrinversion/utils.py b/mrinversion/utils.py
index 47a7947..4ba2683 100644
--- a/mrinversion/utils.py
+++ b/mrinversion/utils.py
@@ -26,11 +26,11 @@ def to_Haeberlen_grid(csdm_object, zeta, eta, n=5):
n: int
An interger used in linear interpolation of the data. The default is 5.
"""
- [item.to("ppm", "nmr_frequency_ratio") for item in csdm_object.dimensions]
- data = csdm_object.dependent_variables[0].components[0]
- iso = csdm_object.dimensions[2].coordinates.value
+ _ = [item.to("ppm", "nmr_frequency_ratio") for item in csdm_object.x]
+ data = csdm_object.y[0].components[0]
+ iso = csdm_object.x[2].coordinates.value
- reg_x, reg_y = [csdm_object.dimensions[i].coordinates.value for i in range(2)]
+ reg_x, reg_y = [csdm_object.x[i].coordinates.value for i in range(2)]
dx = reg_x[1] - reg_x[0]
dy = reg_y[1] - reg_y[0]
sol = np.zeros((data.shape[0], zeta.count, eta.count))
@@ -79,9 +79,9 @@ def to_Haeberlen_grid(csdm_object, zeta, eta, n=5):
del zeta_grid, eta_grid, index, x_, y_, avg_range_x, avg_range_y
csdm_new = cp.as_csdm(sol)
- csdm_new.dimensions[0] = eta
- csdm_new.dimensions[1] = zeta
- csdm_new.dimensions[2] = csdm_object.dimensions[2]
+ csdm_new.x[0] = eta
+ csdm_new.x[1] = zeta
+ csdm_new.x[2] = csdm_object.x[2]
return csdm_new
@@ -152,7 +152,7 @@ def plot_3d(
alpha=0.15,
**kwargs,
):
- """Generate a 3D density plot with 2D contour and 1D projections.
+ r"""Generate a 3D density plot with 2D contour and 1D projections.
Args:
ax: Matplotlib Axes to render the plot.
@@ -189,10 +189,10 @@ def plot_3d(
lw = linewidth
if isinstance(csdm_object, cp.CSDM):
- f = csdm_object.dependent_variables[0].components[0].T
+ f = csdm_object.y[0].components[0].T
label = csdm_object.description
- a_, b_, c_ = [item for item in csdm_object.dimensions]
+ a_, b_, c_ = [item for item in csdm_object.x]
a = a_.coordinates.value
b = b_.coordinates.value
@@ -282,14 +282,14 @@ def plot_3d(
if max_1d[0] is None:
max_1d[0] = proj_x.max()
proj_x /= max_1d[0]
- ax.plot(a, sign * 14 * proj_x + offz, offy, zdir="y", c=ck, linewidth=lw)
+ ax.plot(a, 4 * sign * proj_x + offz, offy, zdir="y", c=ck, linewidth=lw)
# 1D z-axis projection from 2D x-z projection
proj_z = dist.sum(axis=0)
if max_1d[2] is None:
max_1d[2] = proj_z.max()
proj_z /= max_1d[2]
- ax.plot(-20 * proj_z + offy_n, c, offx, zdir="x", c=ck, linewidth=lw)
+ ax.plot(sign * proj_z / 4 + offy_n, c, offx, zdir="x", c=ck, linewidth=lw)
ax.set_xlim(z_lim)
# 2D y-z contour projection
@@ -308,7 +308,7 @@ def plot_3d(
max_1d[1] = proj_y.max()
proj_y /= max_1d[1]
ax.plot(
- b, sign * 14 * proj_y + offz, offx, zdir="x", c=ck, linewidth=lw, label=label
+ b, sign * proj_y * 4 + offz, offx, zdir="x", c=ck, linewidth=lw, label=label
)
ax.set_xlim(x_lim)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..d26b730
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,6 @@
+[project]
+requires-python = ">=3.6"
+
+[tool.pylint.messages_control]
+extension-pkg-whitelist = ["pydantic", "mrsimulator"]
+disable = "R0201"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index a7378bd..5ac0ca0 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,8 +3,10 @@ numpy>=1.20; python_version > '3.6'
matplotlib>=3.0
csdmpy>=0.3.1
joblib>=0.13.2
-mrsimulator>=0.3.0a0
-scikit-learn>=0.22
+mrsimulator>=0.5
+scikit-learn>=0.24.1
+pydantic>=1.8.1
+typing-extensions>=3.7
# development packages
pytest-runner>=5.0
diff --git a/requirements.txt b/requirements.txt
index 9a0f616..9401100 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,6 @@ matplotlib>=3.0
csdmpy>=0.4
joblib>=0.13.2
mrsimulator>=0.6
-scikit-learn>=0.22
+scikit-learn>=0.24.1
+pydantic>=1.8.1
+typing-extensions>=3.7