Dask integration (#14)

OriolAbril · web-flow · commit 36e6f3eb52c9 · 2022-06-19T17:40:12.000+03:00
* better handling of non core dims in einops

* add einops daskbackend class

* add some docs about dask support

* fix checks and docs

* update changelog
diff --git a/.pylintrc b/.pylintrc
@@ -22,7 +22,7 @@ jobs=1
 
 # List of plugins (as comma separated values of python modules names) to load,
 # usually to register additional checkers.
-load-plugins=
+load-plugins=pylint.extensions.no_self_use
 
 # Pickle collected data for later comparisons.
 persistent=yes
@@ -60,7 +60,6 @@ disable=missing-docstring,
         too-many-branches,
         too-many-statements,
         too-few-public-methods,
-        bad-continuation,
         import-outside-toplevel,
         unsubscriptable-object,
         ungrouped-imports,
@@ -135,13 +134,6 @@ max-line-length=100
 # Maximum number of lines in a module
 max-module-lines=1000
 
-# List of optional constructs for which whitespace checking is disabled. `dict-
-# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
-# `trailing-comma` allows a space between comma and closing bracket: (a, ).
-# `empty-line` allows space-only lines.
-no-space-check=trailing-comma,
-               dict-separator
-
 # Allow the body of a class to be on the same line as the declaration if body
 # contains single statement.
 single-line-class-stmt=no
diff --git a/docs/source/_templates/class-no-members.rst b/docs/source/_templates/class-no-members.rst
@@ -0,0 +1,5 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
diff --git a/docs/source/api/einops.md b/docs/source/api/einops.md
@@ -14,3 +14,10 @@
   raw_reduce
 ```
 
+```{eval-rst}
+.. autosummary::
+  :toctree: generated/
+  :template: class-no-members
+
+  DaskBackend
+```
diff --git a/docs/source/changelog.md b/docs/source/changelog.md
@@ -1,5 +1,12 @@
 # Change Log
 
+## v.0.x.x (Unreleased)
+### New features
+* Add `DaskBackend` to support using einops functions on Dask backed DataArrays {pull}`14`
+
+### Documentation
+* Add dask support guide {pull}`14`
+
 ## v0.2.2 (2022 Apr 3)
 ### Maintenance and fixes
 * Add license file to `pyproject.toml` and remove ignored manifest file {pull}`13`
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -104,11 +104,11 @@
 }
 
 intersphinx_mapping = {
-    "arviz": ("https://arviz-devs.github.io/arviz", None),
+    "arviz": ("https://python.arviz.org/en/latest/", None),
     "dask": ("https://docs.dask.org/en/latest/", None),
     "numba": ("https://numba.pydata.org/numba-doc/dev", None),
     "numpy": ("https://numpy.org/doc/stable/", None),
     "python": ("https://docs.python.org/3/", None),
     "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),
-    "xarray": ("http://xarray.pydata.org/en/stable/", None),
+    "xarray": ("http://docs.xarray.dev/en/stable/", None),
 }
diff --git a/docs/source/tutorials/dask_support.md b/docs/source/tutorials/dask_support.md
@@ -0,0 +1,13 @@
+# Dask support
+Like xarray, xarray-einstats also aims to support using their functions on
+NumPy or Dask backed xarray objects.
+
+Functions in xarray-einstats should support Dask backed xarray objects.
+But Dask support is still not extensively tested.
+If you have issues using Dask backed xarray objects please open an [issue](https://github.com/arviz-devs/xarray-einstats)
+
+:::{seealso}
+{ref}`Xarray documentation <xarray:dask>`
+
+[Dask examples](https://examples.dask.org/xarray.html)
+:::
diff --git a/docs/source/tutorials/index.md b/docs/source/tutorials/index.md
@@ -8,4 +8,5 @@ In construction
 stats_tutorial
 linalg_tutorial
 einops-basics-port
+dask_support
 :::
diff --git a/src/xarray_einstats/einops.py b/src/xarray_einstats/einops.py
@@ -9,7 +9,7 @@
 import einops
 import xarray as xr
 
-__all__ = ["rearrange", "raw_rearrange", "reduce", "raw_reduce"]
+__all__ = ["rearrange", "raw_rearrange", "reduce", "raw_reduce", "DaskBackend"]
 
 
 class DimHandler:
@@ -223,8 +223,15 @@ def rearrange(da, out_dims, in_dims=None, **kwargs):
     missing_in_dims = [dim for dim in da_dims if dim not in in_names]
     expected_missing = set(out_dims).union(in_names).difference(in_dims)
     missing_out_dims = [dim for dim in da_dims if dim not in expected_missing]
-    pattern = f"{handler.get_names(missing_in_dims)} {in_pattern} ->\
-        {handler.get_names(missing_out_dims)} {out_pattern}"
+
+    # avoid using dimensions as core dims unnecesarly
+    non_core_dims = [dim for dim in missing_in_dims if dim in missing_out_dims]
+    missing_in_dims = [dim for dim in missing_in_dims if dim not in non_core_dims]
+    missing_out_dims = [dim for dim in missing_out_dims if dim not in non_core_dims]
+
+    non_core_pattern = handler.get_names(non_core_dims)
+    pattern = f"{non_core_pattern} {handler.get_names(missing_in_dims)} {in_pattern} ->\
+        {non_core_pattern} {handler.get_names(missing_out_dims)} {out_pattern}"
 
     axes_lengths = {
         handler.rename_kwarg(k): v for k, v in kwargs.items() if k in out_names + out_dims
@@ -395,3 +402,58 @@ def raw_reduce(da, pattern, reduction, **kwargs):
         in_dims = None
     out_dims = translate_pattern(out_pattern)
     return reduce(da, reduction, out_dims=out_dims, in_dims=in_dims, **kwargs)
+
+
+class DaskBackend(einops._backends.AbstractBackend):  # pylint: disable=protected-access
+    """Dask backend class for einops.
+
+    It should be imported before using functions of :mod:`xarray_einstats.einops`
+    on Dask backed DataArrays.
+    It doesn't need to be initialized or used explicitly
+
+    Notes
+    -----
+    Class created from the advise on
+    `issue einops#120 <https://github.com/arogozhnikov/einops/issues/120>`_ about Dask support.
+    And from reading
+    `einops/_backends <https://github.com/arogozhnikov/einops/blob/master/einops/_backends.py>`_,
+    the source of the AbstractBackend class of which DaskBackend is a subclass.
+    """
+
+    # pylint: disable=no-self-use
+    framework_name = "dask"
+
+    def __init__(self):
+        """Initialize DaskBackend.
+
+        Contains the imports to avoid errors when dask is not installed
+        """
+        import dask.array as dsar
+
+        self.dsar = dsar
+
+    def is_appropriate_type(self, tensor):
+        """Recognizes tensors it can handle."""
+        return isinstance(tensor, self.dsar.core.Array)
+
+    def from_numpy(self, x):  # noqa: D102
+        return self.dsar.array(x)
+
+    def to_numpy(self, x):  # noqa: D102
+        return x.compute()
+
+    def arange(self, start, stop):  # noqa: D102
+        # supplementary method used only in testing, so should implement CPU version
+        return self.dsar.arange(start, stop)
+
+    def stack_on_zeroth_dimension(self, tensors: list):  # noqa: D102
+        return self.dsar.stack(tensors)
+
+    def tile(self, x, repeats):  # noqa: D102
+        return self.dsar.tile(x, repeats)
+
+    def is_float_type(self, x):  # noqa: D102
+        return x.dtype in ("float16", "float32", "float64", "float128")
+
+    def add_axis(self, x, new_position):  # noqa: D102
+        return self.dsar.expand_dims(x, new_position)
diff --git a/tox.ini b/tox.ini
@@ -92,4 +92,4 @@ commands =
 skip_install = true
 allowlist_externals = gnome-open
 commands =
-    gnome-open "{toxworkdir}/docs_out/index.html"
+    python -m webbrowser "{toxworkdir}/docs_out/index.html"

Original file line number	Diff line number	Diff line change
`@@ -104,11 +104,11 @@`
`104`	`104`	`}`
`105`	`105`
`106`	`106`	`intersphinx_mapping = {`
`107`		`- "arviz": ("https://arviz-devs.github.io/arviz", None),`
	`107`	`+ "arviz": ("https://python.arviz.org/en/latest/", None),`
`108`	`108`	`"dask": ("https://docs.dask.org/en/latest/", None),`
`109`	`109`	`"numba": ("https://numba.pydata.org/numba-doc/dev", None),`
`110`	`110`	`"numpy": ("https://numpy.org/doc/stable/", None),`
`111`	`111`	`"python": ("https://docs.python.org/3/", None),`
`112`	`112`	`"scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),`
`113`		`- "xarray": ("http://xarray.pydata.org/en/stable/", None),`
	`113`	`+ "xarray": ("http://docs.xarray.dev/en/stable/", None),`
`114`	`114`	`}`