diff --git a/gpfa/__init__.py b/blockinvgpfa/__init__.py similarity index 75% rename from gpfa/__init__.py rename to blockinvgpfa/__init__.py index 646526a..856eeb7 100644 --- a/gpfa/__init__.py +++ b/blockinvgpfa/__init__.py @@ -2,10 +2,10 @@ :copyright: Copyright 2021 Brooks M. Musangu and Jan Drugowitsch. :license: Modified BSD, see LICENSE.txt for details. """ -from .gpfa import GPFA +from .blockinvgpfa import BlockInvGPFA from .preprocessing import EventTimesToCounts __all__ = [ - "GPFA", + "BlockInvGPFA", "EventTimesToCounts" ] diff --git a/gpfa/gpfa.py b/blockinvgpfa/blockinvgpfa.py similarity index 94% rename from gpfa/gpfa.py rename to blockinvgpfa/blockinvgpfa.py index 2d1b544..cd99d94 100644 --- a/gpfa/gpfa.py +++ b/blockinvgpfa/blockinvgpfa.py @@ -25,17 +25,18 @@ from tqdm import trange __all__ = [ - "GPFA" + "BlockInvGPFA" ] -class GPFA(sklearn.base.BaseEstimator): - """Gaussian Process Factor Analysis (GPFA) is a probabilistic - dimensionality reduction technique that extracts smooth latent - trajectories from noisy, high-dimensional time series data. - It combines Factor Analysis (FA) for dimensionality reduction with Gaussian - Processes (GPs) to model the time courses of the latent factors in a - unified, probabilistic framework. +class BlockInvGPFA(sklearn.base.BaseEstimator): + """BlockInvGPFA is a high-performance implementation of Gaussian + Process Factor Analysis (GPFA) is a probabilistic dimensionality + reduction technique that extracts smooth latent trajectories from + noisy, high-dimensional time series data. GPFA combines Factor + Analysis (FA) for dimensionality reduction with Gaussian Processes + (GPs) to model the time courses of the latent factors in a unified, + probabilistic framework. GPFA operates on a set of time-series data, where each time series is given by an ``x_dim`` x ``bins`` matrix. ``x_dim`` needs to be the same @@ -47,6 +48,16 @@ class GPFA(sklearn.base.BaseEstimator): Please consult the :ref:`main page ` for more details on the underlying probabilistic model. + This implementation, BlockInvGPFA, builds on the core model described + in [Yu et al., 2009] and is adapted from the Elephant's toolkit, with + substantial algorithmic enhancements: + + 1. It uses block-matrix identities and Schur complement updates to + efficiently share kernel inversion computations across + variable-length trials. + 2. It introduces a new variance-explained metric to evaluate BlockInvGPFA + model fits + Parameters ---------- bin_size : float, optional, default=0.02 @@ -194,7 +205,7 @@ class GPFA(sklearn.base.BaseEstimator): Examples -------- >>> import numpy as np - >>> from gpfa import GPFA + >>> from blockinvgpfa import BlockInvGPFA >>> X = np.array([[ ... [3, 1, 0, 4, 1, 2, 1, 3, 4, 2, 2, 1, 2, 0, 0, 2, 2, 5, 1, 3], @@ -202,15 +213,15 @@ class GPFA(sklearn.base.BaseEstimator): ... [2, 2, 1, 1, 3, 2, 3, 2, 2, 0, 3, 4, 1, 2, 3, 1, 4, 1, 0, 1] ... ]]) >>> - >>> gpfa_model = GPFA(z_dim=1) + >>> blockinv_gpfa = BlockInvGPFA(z_dim=1) >>> # Fit the model - >>> gpfa_model.fit(X) + >>> blockinv_gpfa.fit(X) Initializing parameters using factor analysis... - Fitting GPFA model... + Fitting GP parameters by EM... >>> # Infere latent variable time-courses for the same data - >>> Zs, _ = gpfa_model.predict() + >>> Zs, _ = blockinv_gpfa.predict() >>> print(Zs) [array([[-1.18405633, -2.00878 , -0.01470251, -2.3143544 , 0.03651376, -1.06948736, 2.05355342, -0.16920794, -2.26437342, -1.21934552, @@ -218,21 +229,21 @@ class GPFA(sklearn.base.BaseEstimator): 0.89241818, 0.03509708, -1.3936327 , 0.84781358, -1.2281484 ]])] >>> # Display the loading matrix (C_) and observation mean (d_) parameters - >>> print(gpfa_model.C_) + >>> print(blockinv_gpfa.C_) [[-0.78376501] [ 1.77773876] [ 0.51134474]] - >>> print(gpfa_model.d_) + >>> print(blockinv_gpfa.d_) [1.95470037 2.08933859 1.89693338] >>> # Obtaining log-likelihood scores - >>> llhs = gpfa_model.score() + >>> llhs = blockinv_gpfa.score() >>> print(llhs) [-117.54588379661148, -107.17193271370158, ..., -100.13200154180569] >>> # Evaluate the total explained variance regression score - >>> print(gpfa_model.variance_explained()) + >>> print(blockinv_gpfa.variance_explained()) (0.6581475357501596, array([0.65814754])) @@ -761,7 +772,7 @@ def _em(self, X): if np.any(np.diag(self.R_) == var_floor): warnings.warn('Private variance floor used for one or more ' - 'observed dimensions in GPFA.') + 'observed dimensions in BlockInvGPFA.') self.fit_info_ = {'iteration_time': iter_time, 'log_likelihoods': lls} @@ -1188,7 +1199,7 @@ def _fill_p_auto_sum(self, Seqs, precomp): on the caller (which should be :func:`_learn_gp_params()`) to make sure this is called correctly. - Finally, see the notes in the GPFA README. + Finally, see the notes in the BlockInvGPFA README. """ ############################################################ # Fill out PautoSum diff --git a/gpfa/preprocessing.py b/blockinvgpfa/preprocessing.py similarity index 100% rename from gpfa/preprocessing.py rename to blockinvgpfa/preprocessing.py diff --git a/docs/_static/log-likelihoods.png b/docs/_static/log-likelihoods.png deleted file mode 100644 index a4f9d97..0000000 Binary files a/docs/_static/log-likelihoods.png and /dev/null differ diff --git a/docs/_static/reach_trajectories_48.gif b/docs/_static/reach_trajectories_48.gif deleted file mode 100644 index 16d3366..0000000 Binary files a/docs/_static/reach_trajectories_48.gif and /dev/null differ diff --git a/docs/_static/reach_trajectories_48_mult_params.gif b/docs/_static/reach_trajectories_48_mult_params.gif deleted file mode 100644 index 40edfcc..0000000 Binary files a/docs/_static/reach_trajectories_48_mult_params.gif and /dev/null differ diff --git a/docs/blockinv_gpfa_class.rst b/docs/blockinv_gpfa_class.rst new file mode 100644 index 0000000..dab9f05 --- /dev/null +++ b/docs/blockinv_gpfa_class.rst @@ -0,0 +1,7 @@ +================== +BlockInvGPFA Class +================== + +.. currentmodule:: blockinvgpfa +.. autoclass:: BlockInvGPFA + :members: diff --git a/docs/conf.py b/docs/conf.py index 0d7c7b6..fb78a23 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,7 +7,7 @@ import os from datetime import date -# include root path to allow autodoc to find gpfa module +# include root path to allow autodoc to find blockgpfa module sys.path.insert(0, os.path.abspath('../')) # Path to the source directory (where your .rst files are) @@ -22,7 +22,7 @@ # -- General configuration ----------------------------------------------- -project = 'GPFA Documentation' +project = 'BlockInvGPFA Documentation' authors = 'Brooks M. Musangu and Jan Drugowitsch' copyright = "2021-{this_year}, {authors}".format( this_year=date.today().year, authors=authors) diff --git a/docs/contribute.rst b/docs/contribute.rst index 70bc891..2bf71c1 100755 --- a/docs/contribute.rst +++ b/docs/contribute.rst @@ -4,7 +4,7 @@ Contribution ============== -We welcome contributions to the GPFA package, whether they involve enhancements, bug fixes, +We welcome contributions to the BlockInvGPFA package, whether they involve enhancements, bug fixes, or documentation improvements. To ensure the codebase's quality and maintainability, please follow this guide carefully. @@ -33,9 +33,9 @@ include tests for any module, class, or function you contribute. **Version Control Workflow** -When contributing to the GPFA package, please follow these steps: +When contributing to the BlockInvGPFA package, please follow these steps: -1. Fork the GPFA repository on GitHub. +1. Fork the BlockInvGPFA repository on GitHub. 2. Clone your forked repository to your local machine. @@ -50,12 +50,12 @@ When contributing to the GPFA package, please follow these steps: 7. Push your branch to your GitHub fork. -8. Create a pull request (PR) from your branch to the GPFA main repository. Provide a +8. Create a pull request (PR) from your branch to the BlockInvGPFA main repository. Provide a detailed description of your changes in the PR. 9. We will review your PR, and if necessary, provide feedback or request changes. -10. Once your PR is approved, it will be merged into the main GPFA repository. +10. Once your PR is approved, it will be merged into the main BlockInvGPFA repository. -Thank you for your contribution to the GPFA package. Your efforts help improve +Thank you for your contribution to the BlockInvGPFA package. Your efforts help improve and maintain this valuable resource for the community. diff --git a/docs/gpfa_class.rst b/docs/gpfa_class.rst deleted file mode 100644 index 92fbc9b..0000000 --- a/docs/gpfa_class.rst +++ /dev/null @@ -1,7 +0,0 @@ -========== -GPFA Class -========== - -.. currentmodule:: gpfa -.. autoclass:: GPFA - :members: diff --git a/docs/index.rst b/docs/index.rst index 5dfcf51..7270405 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,6 +6,8 @@ Gaussian-process factor analysis (GPFA) is a dimensionality reduction method [Yu GPFA applies `factor analysis (FA) `_ to observed data to simultaneously reduce their dimensionality and smooth the resulting low-dimensional trajectories by fitting a `Gaussian process (GP) `_ model to them. See the below :ref:`notes` for its main differences to the popular PCA dimensionality redunction method. +This documentation describes `BlockInvGPFA`, a Python implementation of the GPFA model that incorporates several performance and usability improvements over existing implementations (e.g., from the Elephant toolbox [Denker2018]_). + The essential ingredients of the generative model underlying GPFA are *observations* :math:`\boldsymbol{X}` and *latent variables* :math:`\boldsymbol{Z}`. For each high-dimensional time series in a dataset, the :math:`x_\textrm{dim}`-dimensional time series is split into :math:`T` time bins of size :math:`\delta t`, leading to the :math:`x_\textrm{dim} \times T` matrix :math:`\boldsymbol{X}` that contains these data. The observations in each time bin are assumed to be stochastically generated by a :math:`z_\textrm{dim}`-dimensional latent variable (:math:`z_\textrm{dim} < x_\textrm{dim}`) through some noisy linear mapping. @@ -49,15 +51,15 @@ Examples .. _example_1: -(i) Applying GPFA to synthetic data ------------------------------------ +(i) Applying BlockInvGPFA to synthetic data +------------------------------------------- -This example illustrates the application of GPFA to data analysis by encompassing synthetic data generation, GPFA model fitting, and latent variable extraction. It emphasizes a step-by-step explanation of utilizing GPFA for data analysis, highlighting key functionalities of the library. +This example illustrates the application of BlockInvGPFA to data analysis by encompassing synthetic data generation, BlockInvGPFA model fitting, and latent variable extraction. It emphasizes a step-by-step explanation of utilizing BlockInvGPFA for data analysis, highlighting key functionalities of the library. .. code-block:: python import numpy as np - from gpfa import GPFA + from blockinvgpfa import BlockInvGPFA from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import ConstantKernel, RBF, WhiteKernel @@ -95,18 +97,18 @@ This example illustrates the application of GPFA to data analysis by encompassin x = C @ z + np.random.normal(0, sqrtR[:, np.newaxis] , (x_dim, T_per_obs)) X.append(x) - gpfa = GPFA(bin_size=bin_size, z_dim=z_dim, em_tol=1e-3, verbose=True) - gpfa.fit(X) # this will take a while + blockinv_gpfa = BlockInvGPFA(bin_size=bin_size, z_dim=z_dim, em_tol=1e-3, verbose=True) + blockinv_gpfa.fit(X) # this will take a while - results = gpfa.predict(returned_data=['Z_mu_orth', 'Z_mu']) + results = blockinv_gpfa.predict(returned_data=['Z_mu_orth', 'Z_mu']) - print(gpfa.variance_explained()) + print(blockinv_gpfa.variance_explained()) .. code-block:: console Initializing parameters using factor analysis... - Fitting GPFA model... + Fitting GP parameters by EM... Fitting has converged after 135 EM iterations. (0.9834339959622203, array([0.80069798, 0.18273602])) @@ -123,7 +125,7 @@ Let us now visualize the true orthogonalized latents against the inferred orthog # orthogonalize true latents using inferred C true_Z = Z - true_Z_orth = [gpfa.OrthTrans_ @ Zn for Zn in true_Z] + true_Z_orth = [blockinv_gpfa.OrthTrans_ @ Zn for Zn in true_Z] # Extract Z_orth and pZ_mu from results Z_orth = results[0]['Z_mu_orth'] @@ -158,9 +160,9 @@ Let us now visualize the true orthogonalized latents against the inferred orthog .. _examples_2: -(ii) Applying GPFA to real neural Data --------------------------------------- -You can find a more complex example that applies GPFA to neural data used by [Even2019]_ (available at [Even2022]_) in an :doc:`neural_example`. To run this example yourself, download the :download:`Jupyter notebook containing the code `. +(ii) Applying BlockInvGPFA to real neural Data +---------------------------------------------- +You can find a more complex example that applies BlockInvGPFA to neural data used by [Even2019]_ (available at [Even2022]_) in an :doc:`neural_example`. To run this example yourself, download the :download:`Jupyter notebook containing the code `. Original code ------------- @@ -175,6 +177,12 @@ References In Journal of Neurophysiology, Vol. 102, Issue 1. pp. 614-635. `_ +.. [Denker2018] `Denker, M., Yegenoglu, A., and Gr\"un, S. + "Collaborative HPC-enabled workflows on the HBP Collaboratory using the Elephant framework." + In Neuroinformatics 2018, 2018, p. P19. + `_ + + .. [Even2022] `Even-Chen, Nir and Sheffer, Blue and Vyas, Saurabh and Ryu, Stephen and Shenoy, Krishna (2022) "Structure and variability of delay activity in premotor cortex" @@ -190,7 +198,7 @@ References :maxdepth: 2 :hidden: - gpfa_class + blockinv_gpfa_class preprocessing_class neural_example installation diff --git a/docs/installation.rst b/docs/installation.rst index 9bd9378..1f43a55 100755 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -5,62 +5,75 @@ Installation Guide ==================== Clone the Project Locally --------------------------- +------------------------- -Begin by cloning the project's repository locally using the `git` command within a Python virtual environment. Ensure that you have `Python virtual environment`_ installed and activated before proceeding. Open a terminal and run the following commands: +Begin by cloning the project repository locally. It is strongly recommended to do this inside a `Python virtual environment`_: .. code-block:: bash - $ git clone https://github.com/CausalityInMotion/GPFA_for_sklearn - $ cd GPFA_for_sklearn + $ git clone https://github.com/CausalityInMotion/BlockInverseGPFA.git + $ cd BlockInverseGPFA -Install Required Packages -------------------------- +Install the Package +------------------- -Once you are in the project's working directory, install the required Python packages using `pip` and the `requirements.txt` file: +Use `pip` to install the package and its core dependencies: .. code-block:: bash - $ pip install -r requirements.txt + $ pip install . -Your environment is now properly configured to use the GPFA package. +If you also want to run tests or build documentation, you can install the appropriate optional dependencies: -Building the Documentation --------------------------- +- For testing: -To build the documentation, you will need the following Python packages: + .. code-block:: bash -- `Sphinx`_: A documentation generator. -- `Read the Docs Sphinx Theme`_: A theme for Sphinx that provides an elegant and readable documentation format. + $ pip install .[test] -You can install these packages using `pip`: +- For documentation: -.. code-block:: bash + .. code-block:: bash - $ pip install sphinx - $ pip install sphinx-rtd-theme + $ pip install .[docs] -After installing the required packages, navigate to the "docs" directory: +Building the Documentation +-------------------------- -.. code-block:: bash +To build the documentation, you will need the following packages: - $ cd docs +- Sphinx_: A documentation generator. +- `Read the Docs Sphinx Theme`_: A theme for Sphinx that provides an elegant and readable documentation format. -To generate the documentation in HTML format, run the following command: +After installing the documentation dependencies, generate the HTML documentation as follows: .. code-block:: bash + $ cd docs $ make html -To view the generated documentation, open the HTML index page in your web browser: +To view the generated documentation in your browser: .. code-block:: bash - $ open _build/html/index.html + $ open _build/html/index.html # On macOS + + # OR + $ xdg-open _build/html/index.html # On Linux + + # OR + $ start _build/html/index.html # On Windows (in cmd) Your documentation is now available for reference. -This guide provides step-by-step instructions for installing the GPFA package and generating its documentation. +Alternatively, you can view the latest published documentation online: + +📖 https://blockinversegpfa.readthedocs.io/en/latest/ + +About This Guide +---------------- + +This guide provides step-by-step instructions for installing the BlockInvGPFA package, running its test suite, and building its documentation using Python packaging standards. .. _Python virtual environment: https://docs.python.org/3/library/venv.html .. _Sphinx: http://www.sphinx-doc.org diff --git a/docs/neural_example.ipynb b/docs/neural_example.ipynb index 3459fa0..db12665 100644 --- a/docs/neural_example.ipynb +++ b/docs/neural_example.ipynb @@ -5,7 +5,7 @@ "id": "aa4e07c6-7055-41ae-ae7f-2e20819bed88", "metadata": {}, "source": [ - "# Extended GPFA example" + "# Extended BlockInvGPFA example" ] }, { @@ -13,7 +13,7 @@ "id": "0307a67b-8f85-46fd-a2fa-22169fa0d838", "metadata": {}, "source": [ - "In this example, we apply GPFA to neural data of reaching monkeys from [Even-Chen et al. (2019)](https://doi.org/10.1371/journal.pcbi.1006808), as available from the [DANDI Archive](https://doi.org/10.48324/dandi.000121/0.220124.2156). In order to run this example, please download the two `sub-JenkindsC_ses-2016*.nwb` files from the `sub-JenkinsC` folder of the archive and place them in a local directory.\n", + "In this example, we apply BlockInvGPFA to neural data of reaching monkeys from [Even-Chen et al. (2019)](https://doi.org/10.1371/journal.pcbi.1006808), as available from the [DANDI Archive](https://doi.org/10.48324/dandi.000121/0.220124.2156). In order to run this example, please download the two `sub-JenkindsC_ses-2016*.nwb` files from the `sub-JenkinsC` folder of the archive and place them in a local directory.\n", "\n", "The data originates from recordings of motor cortex area M1 and dorsal pre-motor cortex of monkey J performing the 3-ring task, with the following details:\n", "\n", @@ -47,12 +47,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "40f75a11-7129-41ab-a9e5-7066ba724dc8", "metadata": {}, "outputs": [], "source": [ - "# include root path to access gpfa module\n", + "# include root path to access blockinvgpfa module\n", "import sys\n", "import os\n", "sys.path.insert(0, os.path.abspath('../'))" @@ -209,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "d3891d06-dab4-4749-b0cf-15b73b6e0538", "metadata": {}, "outputs": [ @@ -222,7 +222,7 @@ } ], "source": [ - "from gpfa import EventTimesToCounts\n", + "from blockinvgpfa import EventTimesToCounts\n", "from sklearn.preprocessing import FunctionTransformer\n", "from sklearn.pipeline import make_pipeline\n", "\n", @@ -311,43 +311,43 @@ "id": "f18c4b15-ccde-4f7c-93b4-0cd39e0e9e2f", "metadata": {}, "source": [ - "## Fitting GPFA and assessing explained variance\n", - "Now that have completed data pre-processing, let us fit GPFA with 48 latent dimensions. As this takes a while, it is recommended to save the fitted GPFA instance for later re-use." + "## Fitting BlcokInvGPFA and assessing explained variance\n", + "Now that have completed data pre-processing, let us fit BlockInvGPFA with 48 latent dimensions. As this takes a while, it is recommended to save the fitted BlockInvGPFA instance for later re-use." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "9a445134-f602-48a5-863f-10bf8f335745", "metadata": {}, "outputs": [], "source": [ - "from gpfa import GPFA\n", + "from blockinvgpfa import BlockInvGPFA\n", "import pickle\n", "\n", - "def load_or_fit_gpfa(X_train, gpfa_params, gpfa_filename):\n", + "def load_or_fit_gpfa(X_train, blockinv_gpfa_params, blockinv_gpfa_filename):\n", " try:\n", - " with open(gpfa_filename, 'rb') as f:\n", - " gpfa = pickle.load(f)\n", - " print(f'Loaded GPFA instance from {gpfa_filename}')\n", + " with open(blockinv_gpfa_filename, 'rb') as f:\n", + " blockinv_gpfa = pickle.load(f)\n", + " print(f'Loaded BlockInvGPFA instance from {blockinv_gpfa_filename}')\n", " except FileNotFoundError:\n", - " gpfa = None\n", + " blockinv_gpfa = None\n", "\n", - " if not gpfa:\n", - " # Initialize & fit GPFA\n", - " gpfa = GPFA(**gpfa_params)\n", - " gpfa.fit(X_train)\n", + " if not blockinv_gpfa:\n", + " # Initialize & fit BlockInvGPFA\n", + " blockinv_gpfa = BlockInvGPFA(**blockinv_gpfa_params)\n", + " blockinv_gpfa.fit(X_train)\n", "\n", - " with open(gpfa_filename, 'wb') as f:\n", - " pickle.dump(gpfa, f, protocol=5)\n", - " print(f'Saved trained GPFA instance to {gpfa_filename}') \n", + " with open(blockinv_gpfa_filename, 'wb') as f:\n", + " pickle.dump(blockinv_gpfa, f, protocol=5)\n", + " print(f'Saved trained BlockInvGPFA instance to {blockinv_gpfa_filename}') \n", "\n", - " return gpfa" + " return blockinv_gpfa" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "893c2755-14f7-44bf-b01a-393cb56fae7a", "metadata": {}, "outputs": [ @@ -361,12 +361,12 @@ ], "source": [ "n_latents = 48 # number of latent states\n", - "gpfa_params = {\n", + "blockinv_gpfa_params = {\n", " 'bin_size': bin_size,\n", " 'z_dim': n_latents,\n", " 'verbose': True\n", "}\n", - "gpfa = load_or_fit_gpfa(X_train, gpfa_params, f'gpfa_trained_z{n_latents}.pkl')" + "blockinv_gpfa = load_or_fit_gpfa(X_train, blockinv_gpfa_params, f'blockinv_gpfa_trained_z{n_latents}.pkl')" ] }, { @@ -374,12 +374,12 @@ "id": "1c80c100-58a0-40e3-878e-3404728b4385", "metadata": {}, "source": [ - "We can now check how much of the training and test set variance GPFA is able to explain. This tells us the overall variance explained, as well as how each latent dimension contributes to this. Please note that there is no guarantee to get the exact same numbers across different machines. This is due to differences in the numberical libraries used by `numpy` for different machine types." + "We can now check how much of the training and test set variance BlockInvGPFA is able to explain. This tells us the overall variance explained, as well as how each latent dimension contributes to this. Please note that there is no guarantee to get the exact same numbers across different machines. This is due to differences in the numberical libraries used by `numpy` for different machine types." ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "f1d75c01-922a-4167-bbe4-2dab7fe5fca8", "metadata": {}, "outputs": [ @@ -402,13 +402,13 @@ } ], "source": [ - "variance_explained_train = gpfa.variance_explained()\n", + "variance_explained_train = blockinv_gpfa.variance_explained()\n", "print(f\"Variance Explained on Training Set: \\n{variance_explained_train}\")" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "886fa743-0437-4694-b5d3-e4fd54272c58", "metadata": {}, "outputs": [ @@ -431,7 +431,7 @@ } ], "source": [ - "variance_explained_test = gpfa.variance_explained(X_test)\n", + "variance_explained_test = blockinv_gpfa.variance_explained(X_test)\n", "print(f\"Variance Explained on Test Set: \\n{variance_explained_test}\")" ] }, @@ -450,17 +450,17 @@ "source": [ "## Plotting latent trajectories\n", "\n", - "We will now inspect inferred (orthogonalized) latent trajectories by plotting them in different ways. For this we will focus on the test set, for which we can get the latent trajectory predictions by calling `gpfa.predict(.)`." + "We will now inspect inferred (orthogonalized) latent trajectories by plotting them in different ways. For this we will focus on the test set, for which we can get the latent trajectory predictions by calling `blockinv_gpfa.predict(.)`." ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "d5b94c64-b5a6-4eac-8bc6-1a04d48f8288", "metadata": {}, "outputs": [], "source": [ - "outs, ll = gpfa.predict(X=X_test ,returned_data=['Z_mu', 'Z_mu_orth'])\n", + "outs, ll = blockinv_gpfa.predict(X=X_test ,returned_data=['Z_mu', 'Z_mu_orth'])\n", "Z_mu_orth = outs['Z_mu_orth']\n", "\n", "max_bins = np.max([Zi.shape[1] for Zi in Z_mu_orth])\n", @@ -721,14 +721,14 @@ "id": "0d677985-22e2-4b75-9ab6-5ee4739d8c60", "metadata": {}, "source": [ - "## Tuning more GPFA kernel parameters\n", + "## Tuning more BlockInvGPFA kernel parameters\n", "\n", - "So far we used the standard GPFA kernels, which are weighted sums of RBF and white kernels, and where the only parameters that are being learned are the length scales of the RBF kernel per latent dimension. Let us now demonstrate the use of a slightly more flexible kernel that additionally learns the noise level of the white kernels." + "So far we used the standard BlockInvGPFA kernels, which are weighted sums of RBF and white kernels, and where the only parameters that are being learned are the length scales of the RBF kernel per latent dimension. Let us now demonstrate the use of a slightly more flexible kernel that additionally learns the noise level of the white kernels." ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "0bcf348f-095f-4887-9d18-2f3c9f0465f7", "metadata": {}, "outputs": [ @@ -749,13 +749,13 @@ "multi_param_kernel = \\\n", " ConstantKernel(1-0.001, constant_value_bounds='fixed') * RBF(length_scale=0.1) + \\\n", " ConstantKernel(0.001, constant_value_bounds='fixed') * WhiteKernel(noise_level=1.0)\n", - "gpfa_params = {\n", + "blockinv_gpfa_params = {\n", " 'bin_size': bin_size,\n", " 'z_dim': n_latents,\n", " 'gp_kernel': multi_param_kernel,\n", " 'verbose': True\n", "}\n", - "gpfa = load_or_fit_gpfa(X_train, gpfa_params, f'gpfa_wnoise_trained_z{n_latents}.pkl')" + "gpfa = load_or_fit_gpfa(X_train, blockinv_gpfa_params, f'gpfa_wnoise_trained_z{n_latents}.pkl')" ] }, { @@ -768,7 +768,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "9e624c24", "metadata": {}, "outputs": [ @@ -802,9 +802,9 @@ } ], "source": [ - "variance_explained_train = gpfa.variance_explained()\n", + "variance_explained_train = blockinv_gpfa.variance_explained()\n", "print(f\"Variance Explained on Training Set: \\n{variance_explained_train}\")\n", - "variance_explained_test = gpfa.variance_explained(X_test)\n", + "variance_explained_test = blockinv_gpfa.variance_explained(X_test)\n", "print(f\"Variance Explained on Test Set: \\n{variance_explained_test}\")" ] }, @@ -815,7 +815,7 @@ "source": [ "We see a minor decrease in the variance explained for the training set, but the variance explained for the test set remains practially unchanged. Thus, the mild increase in the adjustable GP kernel parameters did not substantially increase the fit quality in this example.\n", "\n", - "Note that this does not imply that using different GPFA kernels would not lead to better fits, or that the same fitting applied to different data does not improve the fit." + "Note that this does not imply that using different BlockInvGPFA kernels would not lead to better fits, or that the same fitting applied to different data does not improve the fit." ] }, { @@ -825,7 +825,7 @@ "source": [ "## Determining the best latent space dimensionality\n", "\n", - "So far we fixed the dimensionality of the latent space to 48. Let us now ask how to best tune the number of dimensions. For this we fit multiple GPFA models with different latent space dimensionality to the data, and then compare their fit quality. Note that this might take a very long time." + "So far we fixed the dimensionality of the latent space to 48. Let us now ask how to best tune the number of dimensions. For this we fit multiple BlockInvGPFA models with different latent space dimensionality to the data, and then compare their fit quality. Note that this might take a very long time." ] }, { @@ -842,7 +842,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "0644dff3-94fb-4d16-a77f-59fb1c523787", "metadata": {}, "outputs": [ @@ -878,7 +878,7 @@ "log_likelihoods = np.empty((cv_folds, len(n_latents)))\n", "var_explained_train = np.empty((cv_folds, len(n_latents)))\n", "var_explained_test = np.empty((cv_folds, len(n_latents)))\n", - "gpfa_params = {\n", + "blockinv_gpfa_params = {\n", " 'bin_size': bin_size,\n", " 'z_dim': n_latents[0],\n", " 'verbose': True\n", @@ -891,13 +891,13 @@ " # assemble train/test spike counts\n", " X_train = [spike_counts[i] for i in itrain]\n", " X_test = [spike_counts[i] for i in itest]\n", - " # load or fit gpfa\n", - " gpfa_params['z_dim'] = z_dim\n", - " gpfa = load_or_fit_gpfa(X_train, gpfa_params, f'gpfa_trained_z{z_dim}_cv{cv_fold}.pkl')\n", + " # load or fit blockinv_gpfa\n", + " blockinv_gpfa_params['z_dim'] = z_dim\n", + " blockinv_gpfa = load_or_fit_gpfa(X_train, blockinv_gpfa_params, f'blockinv_gpfa_trained_z{z_dim}_cv{cv_fold}.pkl')\n", " # compute various quality-of-fit metrics\n", - " var_explained_train[cv_fold, in_latents], _ = gpfa.variance_explained()\n", - " var_explained_test[cv_fold, in_latents], _ = gpfa.variance_explained(X_test)\n", - " log_likelihoods[cv_fold, in_latents] = gpfa.score(X_test)" + " var_explained_train[cv_fold, in_latents], _ = blockinv_gpfa.variance_explained()\n", + " var_explained_test[cv_fold, in_latents], _ = blockinv_gpfa.variance_explained(X_test)\n", + " log_likelihoods[cv_fold, in_latents] = blockinv_gpfa.score(X_test)" ] }, { @@ -986,7 +986,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.10" + "version": "3.13.1" } }, "nbformat": 4, diff --git a/docs/preprocessing_class.rst b/docs/preprocessing_class.rst index 9dce1e9..87f06ec 100644 --- a/docs/preprocessing_class.rst +++ b/docs/preprocessing_class.rst @@ -2,7 +2,6 @@ Preprocessing ============= -.. currentmodule:: gpfa +.. currentmodule:: blockinvgpfa .. autoclass:: EventTimesToCounts :members: - diff --git a/test/test_gpfa.py b/test/test_blockinvgpfa.py similarity index 90% rename from test/test_gpfa.py rename to test/test_blockinvgpfa.py index fe528c4..1348a06 100644 --- a/test/test_gpfa.py +++ b/test/test_blockinvgpfa.py @@ -3,19 +3,19 @@ # license Modified BSD, see LICENSE.txt for details. # ... """ -GPFA Unittests +BlockInvGPFA Unittests """ import unittest import numpy as np from scipy import linalg -from gpfa import GPFA +from blockinvgpfa import BlockInvGPFA from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel class TestGPFA(unittest.TestCase): """ - Unit tests for the GPFA analysis. + Unit tests for the BlockInvGPFA analysis. """ def setUp(self): """ @@ -113,7 +113,7 @@ def gen_test_data(trial_lens, rates_a, rates_b, use_sqrt=True): self.t_half = int(np.ceil(self.T[0] / 2.0)) # ================================================== - # initialize GPFA + # initialize BlockInvGPFA # ================================================== multi_params_kernel = ConstantKernel( 1-0.001, constant_value_bounds='fixed' @@ -136,29 +136,29 @@ def gen_test_data(trial_lens, rates_a, rates_b, use_sqrt=True): ) * WhiteKernel( noise_level=1, noise_level_bounds='fixed' )] - self.gpfa = GPFA( + self.blockinv_gpfa = BlockInvGPFA( bin_size=self.bin_size, z_dim=self.z_dim, em_max_iters=self.n_iters ) - self.gpfa_with_seq_kernel = GPFA( + self.gpfa_with_seq_kernel = BlockInvGPFA( bin_size=self.bin_size, z_dim=self.z_dim, gp_kernel=seq_kernel, em_max_iters=self.n_iters ) - self.gpfa_with_multi_params_kernel = GPFA( + self.gpfa_with_multi_params_kernel = BlockInvGPFA( bin_size=self.bin_size, z_dim=self.z_dim, gp_kernel=multi_params_kernel, em_max_iters=self.n_iters ) # fit the model - self.gpfa.fit(self.X) + self.blockinv_gpfa.fit(self.X) self.gpfa_with_multi_params_kernel.fit(self.X) self.gpfa_with_seq_kernel.fit(self.X) - self.results, _ = self.gpfa.predict( + self.results, _ = self.blockinv_gpfa.predict( returned_data=['Z_mu', 'Z_mu_orth']) # get latents sequence and data log_likelihood - self.latent_seqs, self.ll = self.gpfa._infer_latents(self.X) + self.latent_seqs, self.ll = self.blockinv_gpfa._infer_latents(self.X) self.latent_seqs_multiparamskern, self.ll_multiparams_kernel = \ self.gpfa_with_multi_params_kernel._infer_latents(self.X) self.latent_seqs_seqkernel, self.ll_seq_kernel = \ @@ -173,7 +173,7 @@ def create_mu_and_cov(self, gpfa_inst): and covaraince is (K_inv + C'R_invC) Paramters: - gpfa_inst : GPFA instance + gpfa_inst : BlockInvGPFA instance Each istance is different based on the input params Returns: test_latent_seqs: numpy.ndarray @@ -220,10 +220,10 @@ def create_mu_and_cov(self, gpfa_inst): def test_infer_latents(self): """ - Test the mean and cov for different GPFA instances + Test the mean and cov for different BlockInvGPFA instances """ - # get test mean and cov for different GPFA instances - test_latent_seqs_gpfa = self.create_mu_and_cov(self.gpfa) + # get test mean and cov for different BlockInvGPFA instances + test_latent_seqs_gpfa = self.create_mu_and_cov(self.blockinv_gpfa) test_latent_seqs_seq_kern = self.create_mu_and_cov( self.gpfa_with_seq_kernel ) @@ -262,28 +262,29 @@ def test_data_loglikelihood(self): def test_orthonormalized_transform(self): """ - Test GPFA orthonormalization transform of the parameter `C`. + Test BlockInvGPFA orthonormalization transform of the parameter `C`. """ - corth = self.gpfa.Corth_ - c_orth = linalg.orth(self.gpfa.C_) + corth = self.blockinv_gpfa.Corth_ + c_orth = linalg.orth(self.blockinv_gpfa.C_) # Assert self.assertTrue(np.allclose(c_orth, corth)) def test_orthonormalized_latents(self): """ - Test GPFA orthonormalization functions applied in `gpfa.predict`. + Test BlockInvGPFA orthonormalization functions applied in + `blockinv_gpfa.predict`. """ Z_mu = self.results['Z_mu'][0] Z_mu_orth = self.results['Z_mu_orth'][0] - test_Z_mu_orth = np.dot(self.gpfa.OrthTrans_, Z_mu) + test_Z_mu_orth = np.dot(self.blockinv_gpfa.OrthTrans_, Z_mu) # Assert self.assertTrue(np.allclose(Z_mu_orth, test_Z_mu_orth)) def test_variance_explained(self): """ - Test GPFA explained_variance + Test BlockInvGPFA explained_variance """ test_r2_score = 0.6648115733320232 - r2_t1 = self.gpfa.variance_explained()[0] + r2_t1 = self.blockinv_gpfa.variance_explained()[0] # Assert self.assertAlmostEqual(test_r2_score, r2_t1) diff --git a/test/test_preprocessing.py b/test/test_preprocessing.py index 49fd6de..51556e2 100644 --- a/test/test_preprocessing.py +++ b/test/test_preprocessing.py @@ -8,7 +8,7 @@ import unittest import numpy as np -from gpfa.preprocessing import EventTimesToCounts +from blockinvgpfa.preprocessing import EventTimesToCounts try: import neo neo_imported = True