diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..90ffbf9 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,7 @@ +# When making commits that are strictly formatting/style changes, add the +# commit hash here, so git blame can ignore the change. +# +# For more details, see: +# https://git-scm.com/docs/git-config#Documentation/git-config.txt-blameignoreRevsFile + +7c44d41878b36b6f058ba448a4762757c3b4c0da # initial autoformat with black diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..e50b9e5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,29 @@ +--- +name: Bug report +about: Create a report to help us improve python-rasterstats +title: "" +labels: '' +assignees: '' + +--- + +Welcome to the `python-rasterstats` issue tracker. Thanks for putting together a bug report! By following the template below, we'll be better able to reproduce the problem on our end. + +If you don't have a bug specifically but a general support question, please visit https://gis.stackexchange.com/ + +**Describe the bug** +A clear and concise description of what the bug is. What you expected to happen vs. what did happen. + +**To Reproduce** +Steps to reproduce the behavior: +1. How did you install rasterstats and its dependencies? +2. What datasets are necessary to reproduce the bug? Please provide links to example data if necessary. +3. What code is necessary to reproduce the bug? Provide the code directly below or provide links to it. + +```python +# Code to reproduce the error +``` + +**Feature Requests** + +`python-rasterstats` is not currently accepting any feature requests via the issue tracker. If you'd like to add a backwards-compatible feature, please open a pull request - it doesn't need to be 100% ready but should include a working proof-of-concept, tests, and should not break the existing API. diff --git a/.github/workflows/test-rasterstats.yml b/.github/workflows/test-rasterstats.yml new file mode 100644 index 0000000..bc979fe --- /dev/null +++ b/.github/workflows/test-rasterstats.yml @@ -0,0 +1,26 @@ +name: Rasterstats Python package + +on: + pull_request: + push: + branches: [ $default-branch ] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install pip --upgrade + python -m pip install -e ".[dev]" + - name: Test all packages + run: | + pytest diff --git a/.gitignore b/.gitignore index b2e04b2..0de38f5 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,7 @@ Vagrantfile venv .eggs .cache + +# no uv lockfile until this is fixed: +# https://github.com/astral-sh/uv/issues/10845 +uv.lock diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 6f2d1fd..0000000 --- a/.travis.yml +++ /dev/null @@ -1,31 +0,0 @@ -language: python -sudo: false -cache: - directories: - - ~/.cache/pip -env: - global: - - PIP_WHEEL_DIR=$HOME/.cache/pip/wheels - - PIP_FIND_LINKS=file://$HOME/.cache/pip/wheels -addons: - apt: - packages: - - libgdal1h - - gdal-bin - - libgdal-dev -python: - - 2.7 - - 3.4 - - 3.5 - - 3.6 -before_install: - - pip install -U pip setuptools --upgrade - - pip install wheel -install: - - pip install numpy>=1.9 Cython - - pip install -r requirements_dev.txt - - pip install coveralls - - pip install -e . -script: py.test --cov rasterstats --cov-report term-missing -after_success: - - coveralls diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 2184375..7e9d042 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,50 @@ +0.20.0 +- Progress bar for interactive use (#300) +- Fixes to support Fiona 1.10 (#301) +- Drop Python 3.8 support, 3.9+ is the minimum version + +0.19.0 +- Drop Python 2 support, Python 3 style updates #283 +- Sort imports #284 +- Add support for Fiona>1.9 #287 + +0.18.0 +- Move project metadata to pyproject.toml #277 +- Black formatting #278 +- Don't cast integers to int64, use numpy accumulator dtype #279 +- Warn about nodata only once, io.NodataWarning type #280 + +0.17.1 +- Fixes to keep up with recent versions of dependencies: #275 fiona, #266 shapely, #264 click +- Added a pyproject.toml #265 +- Added CI testing for python 3.7 through 3.11 + +0.17.0 +- Fix performance regression due to platform.architecture performance #258 + +0.16.0 +- Fix deprecation warning with shapely 1.8+ #250 + +0.15.0 +- Fix deprecation warning with Affine #211 +- Avoid unnecessary memory copy operation #213 + +0.14.0 +- Add support return statement to zone_func #203 +- Take into account per dataset mask #198 +- Accessing geometry properties for user-defined stats #193 +- Updated method calls for numpy v1.16 #184 + +0.13.1 +- Bug fix for io.read_features with Fiona 1.8+ + +0.13.0 +- Require Rasterio>=1.0 +- Fix buffer logic for boxify_points (#171) + +0.12.1 +- Cast all integer data to int64 if we're on a 64 bit platform (#159) + 0.12.0 - zone_func argument to apply a function to the masked array before computing stats - support shapely 1.6 exceptions @@ -21,7 +68,7 @@ 0.10.0 - Added a generator variant of zonal_stats (gen_zonal_stats) and point_query (gen_point_query) which yield results instead of returning a list -- Dependency on cligj to standardize the geojson input/output args and opts +- Dependency on cligj to standardize the geojson input/output args and opts - Input/Output can be geojson sequences; allows for stream processing 0.9.2 diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 0837708..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,5 +0,0 @@ -include LICENSE.txt -include README.rst -include requirements.txt -exclude MANIFEST.in -exclude Vagrantfile diff --git a/README.rst b/README.rst index 7943a31..906c36e 100644 --- a/README.rst +++ b/README.rst @@ -2,17 +2,16 @@ rasterstats =========== |BuildStatus|_ -|CoverageStatus|_ ``rasterstats`` is a Python module for summarizing geospatial raster datasets based on vector geometries. It includes functions for **zonal statistics** and interpolated **point queries**. The command-line interface allows for -easy interoperability with other GeoJSON tools. +easy interoperability with other GeoJSON tools. Documentation ------------- For details on installation and usage, visit the documentation at `http://pythonhosted.org/rasterstats `_. -What does it do? +What does it do? ---------------- Given a vector layer and a raster band, calculate the summary statistics of each vector geometry. For example, with a polygon vector layer and a digital elevation model (DEM) raster, compute the @@ -25,39 +24,39 @@ mean elevation of each polygon. Command Line Quick Start ------------------------ -The command line interfaces to zonalstats and point_query +The command line interfaces to zonalstats and point_query are `rio` subcommands which read and write geojson features .. code-block:: bash - $ fio cat polygon.shp | rio zonalstats -r elevation.tif + $ fio cat polygon.shp | rio zonalstats -r elevation.tif $ fio cat points.shp | rio pointquery -r elevation.tif See the `CLI Docs `_. for more detail. Python Quick Start ------------ +------------------ For zonal statistics .. code-block:: python >>> from rasterstats import zonal_stats - >>> stats = zonal_stats("tests/data/polygons.shp", "tests/data/elevation.tif") - >>> stats[1].keys() - ['count', 'min', 'max', 'mean'] + >>> stats = zonal_stats("tests/data/polygons.shp", "tests/data/slope.tif") + >>> stats[0].keys() + dict_keys(['min', 'max', 'mean', 'count']) >>> [f['mean'] for f in stats] - [756.6057470703125, 114.660084635416666] + [14.660084635416666, 56.60576171875] and for point queries .. code-block:: python >>> from rasterstats import point_query - >>> point = "POINT(245309 1000064)" - >>> point_query(point, "tests/data/elevation.tif") - [723.9872347624] + >>> point = {'type': 'Point', 'coordinates': (245309.0, 1000064.0)} + >>> point_query(point, "tests/data/slope.tif") + [74.09817594635244] Issues @@ -69,8 +68,5 @@ Find a bug? Report it via github issues by providing - python code or command to reproduce the error - information on your environment: versions of python, gdal and numpy and system memory -.. |BuildStatus| image:: https://api.travis-ci.org/perrygeo/python-rasterstats.svg -.. _BuildStatus: https://travis-ci.org/perrygeo/python-rasterstats - -.. |CoverageStatus| image:: https://coveralls.io/repos/github/perrygeo/python-rasterstats/badge.svg?branch=master -.. _CoverageStatus: https://coveralls.io/github/perrygeo/python-rasterstats?branch=master +.. |BuildStatus| image:: https://github.com/perrygeo/python-rasterstats/workflows/Rasterstats%20Python%20package/badge.svg +.. _BuildStatus: https://github.com/perrygeo/python-rasterstats/actions diff --git a/docs/cli.rst b/docs/cli.rst index 2d396aa..a98d2cb 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -85,7 +85,7 @@ for performing zonal statistics and point_queries at the command line. Example ----------- -In the following examples we use a polygon shapefile representing countries (``countries.shp``) and a raster digitial elevation model (``dem.tif``). The data are assumed to be in the same spatial reference system. +In the following examples we use a polygon shapefile representing countries (``countries.shp``) and a raster digital elevation model (``dem.tif``). The data are assumed to be in the same spatial reference system. GeoJSON inputs ^^^^^^^^^^^^^^ @@ -97,7 +97,7 @@ This will print the GeoJSON Features to the terminal (stdout) with Features like {"type": Feature, "geometry": {...} ,"properties": {...}} -We'll use unix pipes to pass this data directly into our zonal stats command without an intemediate file. +We'll use unix pipes to pass this data directly into our zonal stats command without an intermediate file. Specifying the Raster ^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/conf.py b/docs/conf.py index f10f52a..42bc13b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # rasterstats documentation build configuration file, created by # sphinx-quickstart on Mon Aug 31 09:59:38 2015. @@ -13,55 +12,54 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os -import shlex import re # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.autosummary', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.ifconfig', - 'sphinx.ext.viewcode', - 'numpydoc', + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.mathjax", + "sphinx.ext.ifconfig", + "sphinx.ext.viewcode", + "numpydoc", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'rasterstats' -copyright = '2015, Matthew T. Perry' -author = 'Matthew T. Perry' +project = "rasterstats" +copyright = "2015, Matthew T. Perry" +author = "Matthew T. Perry" + # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -70,17 +68,19 @@ # The short X.Y version. def get_version(): vfile = os.path.join( - os.path.dirname(__file__), "..", "src", "rasterstats", "_version.py") - with open(vfile, "r") as vfh: + os.path.dirname(__file__), "..", "src", "rasterstats", "_version.py" + ) + with open(vfile) as vfh: vline = vfh.read() vregex = r"^__version__ = ['\"]([^'\"]*)['\"]" match = re.search(vregex, vline, re.M) if match: return match.group(1) else: - raise RuntimeError("Unable to find version string in {}.".format(vfile)) + raise RuntimeError(f"Unable to find version string in {vfile}.") + -version = '.'.join(get_version().split(".")[0:2]) +version = ".".join(get_version().split(".")[0:2]) # The full version, including alpha/beta/rc tags. release = get_version() @@ -93,37 +93,37 @@ def get_version(): # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -133,156 +133,155 @@ def get_version(): # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'alabaster' +html_theme = "alabaster" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. -#html_extra_path = [] +# html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' -#html_search_language = 'en' +# html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # Now only 'ja' uses this config value -#html_search_options = {'type': 'default'} +# html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' +# html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'rasterstatsdoc' +htmlhelp_basename = "rasterstatsdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', + # Latex figure (float) alignment + #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'rasterstats.tex', 'rasterstats Documentation', - 'Matthew T. Perry', 'manual'), + ( + master_doc, + "rasterstats.tex", + "rasterstats Documentation", + "Matthew T. Perry", + "manual", + ), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'rasterstats', 'rasterstats Documentation', - [author], 1) -] +man_pages = [(master_doc, "rasterstats", "rasterstats Documentation", [author], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -291,21 +290,27 @@ def get_version(): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'rasterstats', 'rasterstats Documentation', - author, 'rasterstats', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "rasterstats", + "rasterstats Documentation", + author, + "rasterstats", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" diff --git a/docs/index.rst b/docs/index.rst index 766dffe..ca44d47 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,7 +25,7 @@ Install:: pip install rasterstats -Given a polygon vector layer and a digitial elevation model (DEM) raster: +Given a polygon vector layer and a digital elevation model (DEM) raster: .. figure:: https://github.com/perrygeo/python-raster-stats/raw/master/docs/img/zones_elevation.png :align: center diff --git a/docs/manual.rst b/docs/manual.rst index fd26cc0..d508d74 100644 --- a/docs/manual.rst +++ b/docs/manual.rst @@ -105,7 +105,7 @@ to a coordinate reference system:: >>> import rasterio >>> with rasterio.open('tests/data/slope.tif') as src: - ... affine = src.affine + ... affine = src.transform ... array = src.read(1) >>> zs = zonal_stats('tests/data/polygons.shp', array, affine=affine) @@ -149,7 +149,7 @@ You can also specify as a space-delimited string:: Note that certain statistics (majority, minority, and unique) require significantly more processing -due to expensive counting of unique occurences for each pixel value. +due to expensive counting of unique occurrences for each pixel value. You can also use a percentile statistic by specifying ``percentile_`` where ```` can be a floating point number between 0 and 100. @@ -174,6 +174,20 @@ then use it in your ``zonal_stats`` call like so:: ... add_stats={'mymean':mymean}) [{'count': 75, 'mymean': 14.660084635416666}, {'count': 50, 'mymean': 56.605761718750003}] +To have access to geometry properties, a dictionary can be passed to the user-defined function:: + + >>> def mymean_prop(x,prop): + ... return np.ma.mean(x) * prop['id'] + +then use it in your ``zonal_stats`` call like so:: + + >>> zonal_stats("tests/data/polygons.shp", + ... "tests/data/slope.tif", + ... stats="count", + ... add_stats={'mymean_prop':mymean_prop}, + ... properties=['id']) + [{'count': 75, 'mymean_prop': 14.660084635416666}, {'count': 50, 'mymean_prop': 113.2115234375}] + GeoJSON output ^^^^^^^^^^^^^^ @@ -207,7 +221,7 @@ There is no right or wrong way to rasterize a vector. The default strategy is to The figure above illustrates the difference; the default ``all_touched=False`` is on the left while the ``all_touched=True`` option is on the right. -Both approaches are valid and there are tradeoffs to consider. Using the default rasterizer may miss polygons that are smaller than your cell size resulting in ``None`` stats for those geometries. Using the ``all_touched`` strategy includes many cells along the edges that may not be representative of the geometry and may give severly biased results in some cases. +Both approaches are valid and there are tradeoffs to consider. Using the default rasterizer may miss polygons that are smaller than your cell size resulting in ``None`` stats for those geometries. Using the ``all_touched`` strategy includes many cells along the edges that may not be representative of the geometry and may give severely biased results in some cases. Working with categorical rasters @@ -274,7 +288,7 @@ and standard interfaces like GeoJSON are employed to keep the core library lean. History -------- -This work grew out of a need to have a native python implementation (based on numpy) for zonal statisics. +This work grew out of a need to have a native python implementation (based on numpy) for zonal statistics. I had been `using starspan `_, a C++ command line tool, as well as GRASS's `r.statistics `_ for many years. They were suitable for offline analyses but were rather clunky to deploy in a large python application. diff --git a/examples/benchmark.py b/examples/benchmark.py index 49f5ee4..34f6227 100644 --- a/examples/benchmark.py +++ b/examples/benchmark.py @@ -1,4 +1,3 @@ -from __future__ import print_function """ First, download the data and place in `benchmark_data` @@ -16,16 +15,20 @@ 1bc8711 130.93s MacBook Pro (Retina, 15-inch, Mid 2014) 2.2GHz i7, 16GB RAM 2277962 80.68s MacBook Pro (Retina, 15-inch, Mid 2014) 2.2GHz i7, 16GB RAM """ -from rasterstats import zonal_stats + import time -class Timer(): +from rasterstats import zonal_stats + + +class Timer: def __enter__(self): self.start = time.time() def __exit__(self, *args): print("Time:", time.time() - self.start) + countries = "./benchmark_data/ne_50m_admin_0_countries.shp" elevation = "./benchmark_data/SRTM_1km.tif" diff --git a/examples/multiproc.py b/examples/multiproc.py index 7c73734..1cb955b 100644 --- a/examples/multiproc.py +++ b/examples/multiproc.py @@ -2,9 +2,9 @@ import itertools import multiprocessing -from rasterstats import zonal_stats import fiona +from rasterstats import zonal_stats shp = "benchmark_data/ne_50m_admin_0_countries.shp" tif = "benchmark_data/srtm.tif" @@ -13,7 +13,7 @@ def chunks(data, n): """Yield successive n-sized chunks from a slice-able iterable.""" for i in range(0, len(data), n): - yield data[i:i+n] + yield data[i : i + n] def zonal_stats_partial(feats): @@ -22,7 +22,6 @@ def zonal_stats_partial(feats): if __name__ == "__main__": - with fiona.open(shp) as src: features = list(src) diff --git a/examples/simple.py b/examples/simple.py index adba80d..af24258 100644 --- a/examples/simple.py +++ b/examples/simple.py @@ -1,8 +1,9 @@ +from pprint import pprint + from rasterstats import zonal_stats polys = "../tests/data/multilines.shp" raster = "../tests/data/slope.tif" stats = zonal_stats(polys, raster, stats="*") -from pprint import pprint pprint(stats) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..23c76b4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,103 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "rasterstats" +description = "Summarize geospatial raster datasets based on vector geometries" +authors = [ + {name = "Matthew Perry", email = "perrygeo@gmail.com"}, +] +readme = "README.rst" +keywords = ["gis", "geospatial", "geographic", "raster", "vector", "zonal statistics"] +dynamic = ["version"] +license = {text = "BSD-3-Clause"} +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Utilities", + "Topic :: Scientific/Engineering :: GIS", +] +requires-python = ">=3.9" +dependencies = [ + "affine", + "click >7.1, !=8.2.1", + "cligj >=0.4", + "fiona", + "numpy >=1.9", + "rasterio >=1.0", + "simplejson", + "shapely", +] + +[project.optional-dependencies] +progress = [ + "tqdm" +] +docs = [ + "numpydoc", + "sphinx", + "sphinx-rtd-theme", +] +test = [ + "coverage", + "geopandas", + "pyshp >=1.1.4", + "pytest >=4.6", + "pytest-cov >=2.2.0", + "simplejson", +] +dev = [ + "rasterstats[test]", + "ruff", + "twine", +] + +[project.entry-points."rasterio.rio_plugins"] +zonalstats = "rasterstats.cli:zonalstats" +pointquery = "rasterstats.cli:pointquery" + +[project.urls] +Documentation = "https://pythonhosted.org/rasterstats/" +"Source Code" = "https://github.com/perrygeo/python-rasterstats" + +[tool.hatch.build.targets.sdist] +only-include = ["src", "tests"] + +[tool.pytest.ini_options] +filterwarnings = [ + "error", + "ignore::UserWarning", +] +testpaths = ["tests"] +# addopts = "--verbose -rf --ipdb --maxfail=1" + +[tool.setuptools.dynamic] +version = {attr = "rasterstats._version.__version__"} + +[tool.hatch.version] +path = "src/rasterstats/_version.py" + +[tool.ruff.lint] +select = [ + "E", # pycodestyle + "F", # Pyflakes + "I", # isort + "RUF", # Ruff-specific rules + "UP", # pyupgrade +] +ignore = [ + "RUF005", # Consider iterable unpacking instead of concatenation +] + +[tool.ruff] +# TODO: files in docs/notebooks/ use old versions and are incompatible with modern tools +extend-exclude = ["*.ipynb"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index ad82f14..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -shapely -numpy>=1.9 -rasterio>=0.27 -cligj>=0.4 -fiona -simplejson diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index 267e341..0000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,9 +0,0 @@ -# https://github.com/pytest-dev/pytest/issues/1043 and 1032 -pytest>=3.0 - -coverage -simplejson -git+git://github.com/mverteuil/pytest-ipdb.git -twine -numpydoc -pytest-cov diff --git a/scripts/release.sh b/scripts/release.sh index 4fc1512..fabec1e 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -1,4 +1,11 @@ -python setup.py sdist --formats=gztar,zip bdist_wheel +#!/bin/bash + +python -m build +# Redirect any warnings and check for failures +if [[ -n $(twine check --strict dist/* 2>/dev/null | grep "Failed") ]]; then + echo "Detected invalid markup, exiting!" + exit 1 +fi twine upload dist/* echo "Don't forget to publish the docs..." diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index ef0c067..0000000 --- a/setup.cfg +++ /dev/null @@ -1,5 +0,0 @@ -# content of setup.cfg -[pytest] -norecursedirs = examples* src* scripts* docs* -# addopts = --verbose -rf --ipdb --maxfail=1 - diff --git a/setup.py b/setup.py deleted file mode 100644 index a87a1b1..0000000 --- a/setup.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -import sys -import re -from setuptools import setup -from setuptools.command.test import test as TestCommand - - -def read(fname): - return open(os.path.join(os.path.dirname(__file__), fname)).read() - - -def get_version(): - vfile = os.path.join( - os.path.dirname(__file__), "src", "rasterstats", "_version.py") - with open(vfile, "r") as vfh: - vline = vfh.read() - vregex = r"^__version__ = ['\"]([^'\"]*)['\"]" - match = re.search(vregex, vline, re.M) - if match: - return match.group(1) - else: - raise RuntimeError("Unable to find version string in {}.".format(vfile)) - -class PyTest(TestCommand): - def finalize_options(self): - TestCommand.finalize_options(self) - self.test_args = [] - self.test_suite = True - - def run_tests(self): - import pytest - errno = pytest.main(self.test_args) - sys.exit(errno) - - -setup( - name="rasterstats", - version=get_version(), - author="Matthew Perry", - author_email="perrygeo@gmail.com", - description="Summarize geospatial raster datasets based on vector geometries", - license="BSD", - keywords="gis geospatial geographic raster vector zonal statistics", - url="https://github.com/perrygeo/python-raster-stats", - package_dir={'': 'src'}, - packages=['rasterstats'], - long_description=read('README.rst'), - install_requires=read('requirements.txt').splitlines(), - tests_require=['pytest', 'pytest-cov>=2.2.0', 'pyshp>=1.1.4', - 'coverage', 'simplejson'], - cmdclass={'test': PyTest}, - classifiers=[ - "Development Status :: 4 - Beta", - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - "License :: OSI Approved :: BSD License", - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - "Topic :: Utilities", - 'Topic :: Scientific/Engineering :: GIS', - ], - entry_points=""" - [rasterio.rio_plugins] - zonalstats=rasterstats.cli:zonalstats - pointquery=rasterstats.cli:pointquery - """) diff --git a/src/rasterstats/__init__.py b/src/rasterstats/__init__.py index 9dc58e9..8133fb4 100644 --- a/src/rasterstats/__init__.py +++ b/src/rasterstats/__init__.py @@ -1,12 +1,15 @@ -# -*- coding: utf-8 -*- -from .main import gen_zonal_stats, raster_stats, zonal_stats -from .point import gen_point_query, point_query +# isort: skip_file +from rasterstats.main import gen_zonal_stats, raster_stats, zonal_stats +from rasterstats.point import gen_point_query, point_query from rasterstats import cli from rasterstats._version import __version__ -__all__ = ['gen_zonal_stats', - 'gen_point_query', - 'raster_stats', - 'zonal_stats', - 'point_query', - 'cli'] +__all__ = [ + "__version__", + "cli", + "gen_point_query", + "gen_zonal_stats", + "point_query", + "raster_stats", + "zonal_stats", +] diff --git a/src/rasterstats/_version.py b/src/rasterstats/_version.py index ea370a8..5f4bb0b 100644 --- a/src/rasterstats/_version.py +++ b/src/rasterstats/_version.py @@ -1 +1 @@ -__version__ = "0.12.0" +__version__ = "0.20.0" diff --git a/src/rasterstats/cli.py b/src/rasterstats/cli.py index d365439..0a413bf 100644 --- a/src/rasterstats/cli.py +++ b/src/rasterstats/cli.py @@ -1,40 +1,51 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import division import logging import click import cligj import simplejson as json -from rasterstats import gen_zonal_stats, gen_point_query +from rasterstats import gen_point_query, gen_zonal_stats from rasterstats._version import __version__ as version -SETTINGS = dict(help_option_names=['-h', '--help']) +SETTINGS = dict(help_option_names=["-h", "--help"]) + @click.command(context_settings=SETTINGS) @cligj.features_in_arg -@click.version_option(version=version, message='%(version)s') -@click.option('--raster', '-r', required=True) -@click.option('--all-touched/--no-all-touched', default=False) -@click.option('--band', type=int, default=1) -@click.option('--categorical/--no-categorical', default=False) -@click.option('--indent', type=int, default=None) -@click.option('--info/--no-info', default=False) -@click.option('--nodata', type=int, default=None) -@click.option('--prefix', type=str, default='_') -@click.option('--stats', type=str, default=None) -@cligj.sequence_opt +@click.version_option(version=version, message="%(version)s") +@click.option("--raster", "-r", required=True) +@click.option("--all-touched/--no-all-touched", default=False) +@click.option("--band", type=int, default=1) +@click.option("--categorical/--no-categorical", default=False) +@click.option("--indent", type=int, default=None) +@click.option("--info/--no-info", default=False) +@click.option("--nodata", type=int, default=None) +@click.option("--prefix", type=str, default="_") +@click.option("--stats", type=str, default=None) +@click.option("--sequence/--no-sequence", type=bool, default=False) @cligj.use_rs_opt -def zonalstats(features, raster, all_touched, band, categorical, - indent, info, nodata, prefix, stats, sequence, use_rs): - '''zonalstats generates summary statistics of geospatial raster datasets +def zonalstats( + features, + raster, + all_touched, + band, + categorical, + indent, + info, + nodata, + prefix, + stats, + sequence, + use_rs, +): + """zonalstats generates summary statistics of geospatial raster datasets based on vector features. The input arguments to zonalstats should be valid GeoJSON Features. (see cligj) - The output GeoJSON will be mostly unchanged but have additional properties per feature - describing the summary statistics (min, max, mean, etc.) of the underlying raster dataset. + The output GeoJSON will be mostly unchanged but have additional properties per + feature describing the summary statistics (min, max, mean, etc.) of the underlying + raster dataset. The raster is specified by the required -r/--raster argument. @@ -42,14 +53,13 @@ def zonalstats(features, raster, all_touched, band, categorical, \b rio zonalstats states.geojson -r rainfall.tif > mean_rainfall_by_state.geojson - ''' - + """ if info: logging.basicConfig(level=logging.INFO) if stats is not None: stats = stats.split(" ") - if 'all' in [x.lower() for x in stats]: + if "all" in [x.lower() for x in stats]: stats = "ALL" zonal_results = gen_zonal_stats( @@ -61,41 +71,43 @@ def zonalstats(features, raster, all_touched, band, categorical, nodata=nodata, stats=stats, prefix=prefix, - geojson_out=True) + geojson_out=True, + ) if sequence: for feature in zonal_results: if use_rs: - click.echo(b'\x1e', nl=False) + click.echo(b"\x1e", nl=False) click.echo(json.dumps(feature)) else: - click.echo(json.dumps( - {'type': 'FeatureCollection', - 'features': list(zonal_results)})) + click.echo( + json.dumps({"type": "FeatureCollection", "features": list(zonal_results)}) + ) @click.command(context_settings=SETTINGS) @cligj.features_in_arg -@click.version_option(version=version, message='%(version)s') -@click.option('--raster', '-r', required=True) -@click.option('--band', type=int, default=1) -@click.option('--nodata', type=int, default=None) -@click.option('--indent', type=int, default=None) -@click.option('--interpolate', type=str, default='bilinear') -@click.option('--property-name', type=str, default='value') -@cligj.sequence_opt +@click.version_option(version=version, message="%(version)s") +@click.option("--raster", "-r", required=True) +@click.option("--band", type=int, default=1) +@click.option("--nodata", type=int, default=None) +@click.option("--indent", type=int, default=None) +@click.option("--interpolate", type=str, default="bilinear") +@click.option("--property-name", type=str, default="value") +@click.option("--sequence/--no-sequence", type=bool, default=False) @cligj.use_rs_opt -def pointquery(features, raster, band, indent, nodata, - interpolate, property_name, sequence, use_rs): +def pointquery( + features, raster, band, indent, nodata, interpolate, property_name, sequence, use_rs +): """ Queries the raster values at the points of the input GeoJSON Features. The raster values are added to the features properties and output as GeoJSON Feature Collection. - If the Features are Points, the point geometery is used. - For other Feauture types, all of the verticies of the geometry will be queried. + If the Features are Points, the point geometry is used. + For other Feauture types, all of the vertices of the geometry will be queried. For example, you can provide a linestring and get the profile along the line - if the verticies are spaced properly. + if the vertices are spaced properly. You can use either bilinear (default) or nearest neighbor interpolation. """ @@ -107,14 +119,13 @@ def pointquery(features, raster, band, indent, nodata, nodata=nodata, interpolate=interpolate, property_name=property_name, - geojson_out=True) + geojson_out=True, + ) if sequence: for feature in results: if use_rs: - click.echo(b'\x1e', nl=False) + click.echo(b"\x1e", nl=False) click.echo(json.dumps(feature)) else: - click.echo(json.dumps( - {'type': 'FeatureCollection', - 'features': list(results)})) + click.echo(json.dumps({"type": "FeatureCollection", "features": list(results)})) diff --git a/src/rasterstats/io.py b/src/rasterstats/io.py index 4486f85..7ae8d87 100644 --- a/src/rasterstats/io.py +++ b/src/rasterstats/io.py @@ -1,115 +1,133 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import division -import sys import json import math +import warnings +from collections.abc import Iterable, Mapping +from json import JSONDecodeError +from os import PathLike + import fiona +import numpy as np import rasterio -import warnings -from rasterio.transform import guard_transform from affine import Affine -import numpy as np +from fiona.errors import DriverError +from rasterio.enums import MaskFlags +from rasterio.transform import guard_transform +from shapely import wkb, wkt + +try: + from shapely.errors import ShapelyError +except ImportError: # pragma: no cover + from shapely.errors import ReadingError as ShapelyError + + +geom_types = [ + "Point", + "LineString", + "Polygon", + "MultiPoint", + "MultiLineString", + "MultiPolygon", +] + try: - from shapely.errors import ReadingError -except: - from shapely.geos import ReadingError -from shapely import wkt, wkb -from collections import Iterable, Mapping + # Fiona 1.9+ + import fiona.model + def fiona_generator(obj, layer=0): + with fiona.open(obj, "r", layer=layer) as src: + for feat in src: + yield fiona.model.to_dict(feat) -geom_types = ["Point", "LineString", "Polygon", - "MultiPoint", "MultiLineString", "MultiPolygon"] +except ModuleNotFoundError: + # Fiona <1.9 + def fiona_generator(obj, layer=0): + with fiona.open(obj, "r", layer=layer) as src: + yield from src -PY3 = sys.version_info[0] >= 3 -if PY3: - string_types = str, # pragma: no cover -else: - string_types = basestring, # pragma: no cover def wrap_geom(geom): - """ Wraps a geometry dict in an GeoJSON Feature - """ - return {'type': 'Feature', - 'properties': {}, - 'geometry': geom} + """Wraps a geometry dict in an GeoJSON Feature""" + return {"type": "Feature", "properties": {}, "geometry": geom} def parse_feature(obj): - """ Given a python object + """Given a python object attemp to a GeoJSON-like Feature from it """ # object implementing geo_interface - if hasattr(obj, '__geo_interface__'): + if hasattr(obj, "__geo_interface__"): gi = obj.__geo_interface__ - if gi['type'] in geom_types: + if gi["type"] in geom_types: return wrap_geom(gi) - elif gi['type'] == 'Feature': + elif gi["type"] == "Feature": return gi # wkt try: shape = wkt.loads(obj) return wrap_geom(shape.__geo_interface__) - except (ReadingError, TypeError, AttributeError): + except (ShapelyError, TypeError, AttributeError): pass # wkb try: shape = wkb.loads(obj) return wrap_geom(shape.__geo_interface__) - except (ReadingError, TypeError): + except (ShapelyError, TypeError): pass # geojson-like python mapping try: - if obj['type'] in geom_types: + if obj["type"] in geom_types: return wrap_geom(obj) - elif obj['type'] == 'Feature': + elif obj["type"] == "Feature": return obj except (AssertionError, TypeError): pass - raise ValueError("Can't parse %s as a geojson Feature object" % obj) + raise ValueError(f"Can't parse {obj} as a geojson Feature object") def read_features(obj, layer=0): features_iter = None - if isinstance(obj, string_types): + if isinstance(obj, (str, PathLike)): + obj = str(obj) try: # test it as fiona data source - with fiona.open(obj, 'r', layer=layer) as src: + with fiona.open(obj, "r", layer=layer) as src: assert len(src) > 0 - def fiona_generator(obj): - with fiona.open(obj, 'r', layer=layer) as src: - for feature in src: - yield feature - - features_iter = fiona_generator(obj) - except (AssertionError, TypeError, IOError, OSError): + features_iter = fiona_generator(obj, layer) + except ( + AssertionError, + DriverError, + OSError, + TypeError, + UnicodeDecodeError, + ValueError, + ): try: mapping = json.loads(obj) - if 'type' in mapping and mapping['type'] == 'FeatureCollection': - features_iter = mapping['features'] - elif mapping['type'] in geom_types + ['Feature']: + if "type" in mapping and mapping["type"] == "FeatureCollection": + features_iter = mapping["features"] + elif mapping["type"] in geom_types + ["Feature"]: features_iter = [parse_feature(mapping)] - except ValueError: + except (ValueError, JSONDecodeError): # Single feature-like string features_iter = [parse_feature(obj)] elif isinstance(obj, Mapping): - if 'type' in obj and obj['type'] == 'FeatureCollection': - features_iter = obj['features'] + if "type" in obj and obj["type"] == "FeatureCollection": + features_iter = obj["features"] else: features_iter = [parse_feature(obj)] elif isinstance(obj, bytes): # Single binary object, probably a wkb features_iter = [parse_feature(obj)] - elif hasattr(obj, '__geo_interface__'): + elif hasattr(obj, "__geo_interface__"): mapping = obj.__geo_interface__ - if mapping['type'] == 'FeatureCollection': - features_iter = mapping['features'] + if mapping["type"] == "FeatureCollection": + features_iter = mapping["features"] else: features_iter = [parse_feature(mapping)] elif isinstance(obj, Iterable): @@ -123,22 +141,20 @@ def fiona_generator(obj): def read_featurecollection(obj, layer=0): features = read_features(obj, layer=layer) - fc = {'type': 'FeatureCollection', 'features': []} - fc['features'] = [f for f in features] + fc = {"type": "FeatureCollection", "features": []} + fc["features"] = [f for f in features] return fc def rowcol(x, y, affine, op=math.floor): - """ Get row/col for a x/y - """ + """Get row/col for a x/y""" r = int(op((y - affine.f) / affine.e)) c = int(op((x - affine.c) / affine.a)) return r, c def bounds_window(bounds, affine): - """Create a full cover rasterio-style window - """ + """Create a full cover rasterio-style window""" w, s, e, n = bounds row_start, col_start = rowcol(w, n, affine) row_stop, col_stop = rowcol(e, s, affine, op=math.ceil) @@ -147,11 +163,17 @@ def bounds_window(bounds, affine): def window_bounds(window, affine): (row_start, row_stop), (col_start, col_stop) = window - w, s = (col_start, row_stop) * affine - e, n = (col_stop, row_start) * affine + w, s = affine * (col_start, row_stop) + e, n = affine * (col_stop, row_start) return w, s, e, n +def beyond_extent(window, shape): + """Checks if window references pixels beyond the raster extent""" + (wr_start, wr_stop), (wc_start, wc_stop) = window + return wr_start < 0 or wc_start < 0 or wr_stop > shape[0] or wc_stop > shape[1] + + def boundless_array(arr, window, nodata, masked=False): dim3 = False if len(arr.shape) == 3: @@ -176,7 +198,8 @@ def boundless_array(arr, window, nodata, masked=False): window_shape = (wr_stop - wr_start, wc_stop - wc_start) # create an array of nodata values - out = np.ones(shape=window_shape) * nodata + out = np.empty(shape=window_shape, dtype=arr.dtype) + out[:] = nodata # Fill with data where overlapping nr_start = olr_start - wr_start @@ -184,11 +207,13 @@ def boundless_array(arr, window, nodata, masked=False): nc_start = olc_start - wc_start nc_stop = nc_start + overlap_shape[1] if dim3: - out[:, nr_start:nr_stop, nc_start:nc_stop] = \ - arr[:, olr_start:olr_stop, olc_start:olc_stop] + out[:, nr_start:nr_stop, nc_start:nc_stop] = arr[ + :, olr_start:olr_stop, olc_start:olc_stop + ] else: - out[nr_start:nr_stop, nc_start:nc_stop] = \ - arr[olr_start:olr_stop, olc_start:olc_stop] + out[nr_start:nr_stop, nc_start:nc_stop] = arr[ + olr_start:olr_stop, olc_start:olc_stop + ] if masked: out = np.ma.MaskedArray(out, mask=(out == nodata)) @@ -196,8 +221,18 @@ def boundless_array(arr, window, nodata, masked=False): return out -class Raster(object): - """ Raster abstraction for data access to 2/3D array-like things +class NodataWarning(UserWarning): + pass + + +# *should* limit NodataWarnings to once, but doesn't! Bug in CPython. +# warnings.filterwarnings("once", category=NodataWarning) +# instead we resort to a global bool +already_warned_nodata = False + + +class Raster: + """Raster abstraction for data access to 2/3D array-like things Use as a context manager to ensure dataset gets closed properly:: @@ -238,7 +273,7 @@ def __init__(self, raster, affine=None, nodata=None, band=1): self.shape = raster.shape self.nodata = nodata else: - self.src = rasterio.open(raster, 'r') + self.src = rasterio.open(raster, "r") self.affine = guard_transform(self.src.transform) self.shape = (self.src.height, self.src.width) self.band = band @@ -250,13 +285,12 @@ def __init__(self, raster, affine=None, nodata=None, band=1): self.nodata = self.src.nodata def index(self, x, y): - """ Given (x, y) in crs, return the (row, column) on the raster - """ - col, row = [math.floor(a) for a in (~self.affine * (x, y))] + """Given (x, y) in crs, return the (row, column) on the raster""" + col, row = (math.floor(a) for a in (~self.affine * (x, y))) return row, col - def read(self, bounds=None, window=None, masked=False): - """ Performs a boundless read against the underlying array source + def read(self, bounds=None, window=None, masked=False, boundless=True): + """Performs a read against the underlying array source Parameters ---------- @@ -267,7 +301,9 @@ def read(self, bounds=None, window=None, masked=False): specifying both or neither will raise exception masked: boolean return a masked numpy array, default: False - bounds OR window are required, specifying both or neither will raise exception + boundless: boolean + allow window/bounds that extend beyond the dataset's extent, default: True + partially or completely filled arrays will be returned as appropriate. Returns ------- @@ -284,6 +320,11 @@ def read(self, bounds=None, window=None, masked=False): else: raise ValueError("Specify either bounds or window") + if not boundless and beyond_extent(win, self.shape): + raise ValueError( + "Window/bounds is outside dataset extent, boundless reads are disabled" + ) + c, _, _, f = window_bounds(win, self.affine) # c ~ west, f ~ north a, b, _, d, e, _, _, _, _ = tuple(self.affine) new_affine = Affine(a, b, c, d, e, f) @@ -291,16 +332,32 @@ def read(self, bounds=None, window=None, masked=False): nodata = self.nodata if nodata is None: nodata = -999 - warnings.warn("Setting nodata to -999; specify nodata explicitly") + global already_warned_nodata + if not already_warned_nodata: + warnings.warn( + "Setting nodata to -999; specify nodata explicitly", NodataWarning + ) + already_warned_nodata = True if self.array is not None: # It's an ndarray already new_array = boundless_array( - self.array, window=win, nodata=nodata, masked=masked) + self.array, window=win, nodata=nodata, masked=masked + ) elif self.src: # It's an open rasterio dataset + if all( + MaskFlags.per_dataset in flags for flags in self.src.mask_flag_enums + ): + if not masked: + masked = True + warnings.warn( + "Setting masked to True because dataset mask has been detected" + ) + new_array = self.src.read( - self.band, window=win, boundless=True, masked=masked) + self.band, window=win, boundless=boundless, masked=masked + ) return Raster(new_array, new_affine, nodata) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 540ed20..43d9dc1 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -1,19 +1,33 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import division -import numpy as np +import inspect +import sys import warnings + +import numpy as np from affine import Affine from shapely.geometry import shape -from .io import read_features, Raster -from .utils import (rasterize_geom, get_percentile, check_stats, - remap_categories, key_assoc_val, boxify_points) + +from rasterstats.io import Raster, read_features +from rasterstats.utils import ( + boxify_points, + check_stats, + get_percentile, + key_assoc_val, + rasterize_geom, + remap_categories, +) + +try: + from tqdm import tqdm +except ImportError: + tqdm = None def raster_stats(*args, **kwargs): """Deprecated. Use zonal_stats instead.""" - warnings.warn("'raster_stats' is an alias to 'zonal_stats'" - " and will disappear in 1.0", DeprecationWarning) + warnings.warn( + "'raster_stats' is an alias to 'zonal_stats' and will disappear in 1.0", + DeprecationWarning, + ) return zonal_stats(*args, **kwargs) @@ -25,35 +39,53 @@ def zonal_stats(*args, **kwargs): The only difference is that ``zonal_stats`` will return a list rather than a generator.""" - return list(gen_zonal_stats(*args, **kwargs)) + progress = kwargs.get("progress") + if progress: + if tqdm is None: + raise ValueError( + "You specified progress=True, but tqdm is not installed in " + "the environment. " + "You can do pip install rasterstats[progress] to install tqdm!" + ) + stats = gen_zonal_stats(*args, **kwargs) + total = len(args[0]) + return [stat for stat in tqdm(stats, total=total)] + else: + return list(gen_zonal_stats(*args, **kwargs)) def gen_zonal_stats( - vectors, raster, - layer=0, - band=1, - nodata=None, - affine=None, - stats=None, - all_touched=False, - categorical=False, - category_map=None, - add_stats=None, - zone_func=None, - raster_out=False, - prefix=None, - geojson_out=False, **kwargs): + vectors, + raster, + layer=0, + band=1, + nodata=None, + affine=None, + stats=None, + all_touched=False, + categorical=False, + category_map=None, + add_stats=None, + zone_func=None, + raster_out=False, + prefix=None, + geojson_out=False, + boundless=True, + **kwargs, +): """Zonal statistics of raster values aggregated to vector geometries. Parameters ---------- - vectors: path to an vector source or geo-like python objects + vectors : str or PathLike + Path to an vector source or geo-like python objects. - raster: ndarray or path to a GDAL raster source + raster: array_like, str or PathLike + NumPy array or path to a GDAL raster source. If ndarray is passed, the ``affine`` kwarg is required. layer: int or string, optional - If `vectors` is a path to an fiona source, + If `vectors` is a path to a fiona source, specify the vector layer to use either by name or number. defaults to 0 @@ -109,6 +141,10 @@ def gen_zonal_stats( with zonal stats appended as additional properties. Use with `prefix` to ensure unique and meaningful property names. + boundless: boolean + Allow features that extend beyond the raster dataset's extent, default: True + Cells outside dataset extents are treated as nodata. + Returns ------- generator of dicts (if geojson_out is False) @@ -121,20 +157,23 @@ def gen_zonal_stats( stats, run_count = check_stats(stats, categorical) # Handle 1.0 deprecations - transform = kwargs.get('transform') + transform = kwargs.get("transform") if transform: - warnings.warn("GDAL-style transforms will disappear in 1.0. " - "Use affine=Affine.from_gdal(*transform) instead", - DeprecationWarning) + warnings.warn( + "GDAL-style transforms will disappear in 1.0. " + "Use affine=Affine.from_gdal(*transform) instead", + DeprecationWarning, + ) if not affine: affine = Affine.from_gdal(*transform) - cp = kwargs.get('copy_properties') + cp = kwargs.get("copy_properties") if cp: - warnings.warn("Use `geojson_out` to preserve feature properties", - DeprecationWarning) + warnings.warn( + "Use `geojson_out` to preserve feature properties", DeprecationWarning + ) - band_num = kwargs.get('band_num') + band_num = kwargs.get("band_num") if band_num: warnings.warn("Use `band` to specify band number", DeprecationWarning) band = band_num @@ -142,52 +181,71 @@ def gen_zonal_stats( with Raster(raster, affine, nodata, band) as rast: features_iter = read_features(vectors, layer) for _, feat in enumerate(features_iter): - geom = shape(feat['geometry']) + geom = shape(feat["geometry"]) - if 'Point' in geom.type: + if "Point" in geom.geom_type: geom = boxify_points(geom, rast) geom_bounds = tuple(geom.bounds) - fsrc = rast.read(bounds=geom_bounds) + fsrc = rast.read(bounds=geom_bounds, boundless=boundless) # rasterized geometry rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched) # nodata mask - isnodata = (fsrc.array == fsrc.nodata) + isnodata = fsrc.array == fsrc.nodata # add nan mask (if necessary) - has_nan = (np.issubdtype(fsrc.array.dtype, float) - and np.isnan(fsrc.array.min())) + has_nan = np.issubdtype(fsrc.array.dtype, np.floating) and np.isnan( + fsrc.array.min() + ) if has_nan: - isnodata = (isnodata | np.isnan(fsrc.array)) + isnodata = isnodata | np.isnan(fsrc.array) # Mask the source data array # mask everything that is not a valid value or not within our geom - masked = np.ma.MaskedArray( - fsrc.array, - mask=(isnodata | ~rv_array)) + masked = np.ma.MaskedArray(fsrc.array, mask=(isnodata | ~rv_array)) + + # If we're on 64 bit platform and the array is an integer type + # make sure we cast to 64 bit to avoid overflow for certain numpy ops + if sys.maxsize > 2**32 and issubclass(masked.dtype.type, np.integer): + accum_dtype = "int64" + else: + accum_dtype = None # numpy default # execute zone_func on masked zone ndarray if zone_func is not None: if not callable(zone_func): - raise TypeError(('zone_func must be a callable ' - 'which accepts function a ' - 'single `zone_array` arg.')) - zone_func(masked) + raise TypeError( + "zone_func must be a callable function " + "which accepts a single `zone_array` arg." + ) + value = zone_func(masked) + + # check if zone_func has return statement + if value is not None: + masked = value if masked.compressed().size == 0: # nothing here, fill with None and move on - feature_stats = dict([(stat, None) for stat in stats]) - if 'count' in stats: # special case, zero makes sense here - feature_stats['count'] = 0 + feature_stats = {stat: None for stat in stats} + if "count" in stats: # special case, zero makes sense here + feature_stats["count"] = 0 else: if run_count: keys, counts = np.unique(masked.compressed(), return_counts=True) - pixel_count = dict(zip([np.asscalar(k) for k in keys], - [np.asscalar(c) for c in counts])) - + try: + pixel_count = dict( + zip([k.item() for k in keys], [c.item() for c in counts]) + ) + except AttributeError: + pixel_count = dict( + zip( + [np.asscalar(k) for k in keys], + [np.asscalar(c) for c in counts], + ) + ) if categorical: feature_stats = dict(pixel_count) @@ -196,72 +254,84 @@ def gen_zonal_stats( else: feature_stats = {} - if 'min' in stats: - feature_stats['min'] = float(masked.min()) - if 'max' in stats: - feature_stats['max'] = float(masked.max()) - if 'mean' in stats: - feature_stats['mean'] = float(masked.mean()) - if 'count' in stats: - feature_stats['count'] = int(masked.count()) + if "min" in stats: + feature_stats["min"] = float(masked.min()) + if "max" in stats: + feature_stats["max"] = float(masked.max()) + if "mean" in stats: + feature_stats["mean"] = float(masked.mean(dtype=accum_dtype)) + if "count" in stats: + feature_stats["count"] = int(masked.count()) # optional - if 'sum' in stats: - feature_stats['sum'] = float(masked.sum()) - if 'std' in stats: - feature_stats['std'] = float(masked.std()) - if 'median' in stats: - feature_stats['median'] = float(np.median(masked.compressed())) - if 'majority' in stats: - feature_stats['majority'] = float(key_assoc_val(pixel_count, max)) - if 'minority' in stats: - feature_stats['minority'] = float(key_assoc_val(pixel_count, min)) - if 'unique' in stats: - feature_stats['unique'] = len(list(pixel_count.keys())) - if 'range' in stats: + if "sum" in stats: + feature_stats["sum"] = float(masked.sum(dtype=accum_dtype)) + if "std" in stats: + feature_stats["std"] = float(masked.std()) + if "median" in stats: + feature_stats["median"] = float(np.median(masked.compressed())) + if "majority" in stats: + feature_stats["majority"] = float(key_assoc_val(pixel_count, max)) + if "minority" in stats: + feature_stats["minority"] = float(key_assoc_val(pixel_count, min)) + if "unique" in stats: + feature_stats["unique"] = len(list(pixel_count.keys())) + if "range" in stats: try: - rmin = feature_stats['min'] + rmin = feature_stats["min"] except KeyError: rmin = float(masked.min()) try: - rmax = feature_stats['max'] + rmax = feature_stats["max"] except KeyError: rmax = float(masked.max()) - feature_stats['range'] = rmax - rmin + feature_stats["range"] = rmax - rmin - for pctile in [s for s in stats if s.startswith('percentile_')]: + for pctile in [s for s in stats if s.startswith("percentile_")]: q = get_percentile(pctile) pctarr = masked.compressed() - feature_stats[pctile] = np.percentile(pctarr, q) + feature_stats[pctile] = float(np.percentile(pctarr, q)) - if 'nodata' in stats or 'nan' in stats: + if "nodata" in stats or "nan" in stats: featmasked = np.ma.MaskedArray(fsrc.array, mask=(~rv_array)) - if 'nodata' in stats: - feature_stats['nodata'] = float((featmasked == fsrc.nodata).sum()) - if 'nan' in stats: - feature_stats['nan'] = float(np.isnan(featmasked).sum()) if has_nan else 0 + if "nodata" in stats: + feature_stats["nodata"] = float((featmasked == fsrc.nodata).sum()) + if "nan" in stats: + feature_stats["nan"] = ( + float(np.isnan(featmasked).sum()) if has_nan else 0 + ) if add_stats is not None: for stat_name, stat_func in add_stats.items(): - feature_stats[stat_name] = stat_func(masked) + n_params = len(inspect.signature(stat_func).parameters.keys()) + if n_params == 3: + feature_stats[stat_name] = stat_func( + masked, feat["properties"], rv_array + ) + # backwards compatible with two-argument function + elif n_params == 2: + feature_stats[stat_name] = stat_func(masked, feat["properties"]) + # backwards compatible with single-argument function + else: + feature_stats[stat_name] = stat_func(masked) if raster_out: - feature_stats['mini_raster_array'] = masked - feature_stats['mini_raster_affine'] = fsrc.affine - feature_stats['mini_raster_nodata'] = fsrc.nodata + feature_stats["mini_raster_array"] = masked + feature_stats["mini_raster_affine"] = fsrc.affine + feature_stats["mini_raster_nodata"] = fsrc.nodata if prefix is not None: prefixed_feature_stats = {} for key, val in feature_stats.items(): - newkey = "{}{}".format(prefix, key) + newkey = f"{prefix}{key}" prefixed_feature_stats[newkey] = val feature_stats = prefixed_feature_stats if geojson_out: for key, val in feature_stats.items(): - if 'properties' not in feat: - feat['properties'] = {} - feat['properties'][key] = val + if "properties" not in feat: + feat["properties"] = {} + feat["properties"][key] = val yield feat else: yield feature_stats diff --git a/src/rasterstats/point.py b/src/rasterstats/point.py index ebeb15b..f262c6c 100644 --- a/src/rasterstats/point.py +++ b/src/rasterstats/point.py @@ -1,14 +1,12 @@ -from __future__ import absolute_import -from __future__ import division -from shapely.geometry import shape -from shapely import wkt from numpy.ma import masked -from numpy import asscalar -from .io import read_features, Raster +from shapely.geometry import shape +from shapely.ops import transform + +from rasterstats.io import Raster, read_features def point_window_unitxy(x, y, affine): - """ Given an x, y and a geotransform + """Given an x, y and a geotransform Returns - rasterio window representing 2x2 window whose center points encompass point - the cartesian x, y coordinates of the point on the unit square @@ -17,20 +15,19 @@ def point_window_unitxy(x, y, affine): ((row1, row2), (col1, col2)), (unitx, unity) """ fcol, frow = ~affine * (x, y) - r, c = int(round(frow)), int(round(fcol)) + r, c = round(frow), round(fcol) # The new source window for our 2x2 array new_win = ((r - 1, r + 1), (c - 1, c + 1)) # the new x, y coords on the unit square - unitxy = (0.5 - (c - fcol), - 0.5 + (r - frow)) + unitxy = (0.5 - (c - fcol), 0.5 + (r - frow)) return new_win, unitxy def bilinear(arr, x, y): - """ Given a 2x2 array, an x, and y, treat center points as a unit square + """Given a 2x2 array, an x, and y, treat center points as a unit square return the value for the fractional row/col using bilinear interpolation between the cells @@ -50,20 +47,22 @@ def bilinear(arr, x, y): assert 0.0 <= x <= 1.0 assert 0.0 <= y <= 1.0 - if hasattr(arr, 'count') and arr.count() != 4: + if hasattr(arr, "count") and arr.count() != 4: # a masked array with at least one nodata # fall back to nearest neighbor - val = arr[int(round(1 - y)), int(round(x))] + val = arr[round(1 - y), round(x)] if val is masked: return None else: - return asscalar(val) + return val.item() # bilinear interp on unit square - return ((llv * (1 - x) * (1 - y)) + - (lrv * x * (1 - y)) + - (ulv * (1 - x) * y) + - (urv * x * y)) + return ( + (llv * (1 - x) * (1 - y)) + + (lrv * x * (1 - y)) + + (ulv * (1 - x) * y) + + (urv * x * y) + ) def geom_xys(geom): @@ -71,9 +70,8 @@ def geom_xys(geom): generate a flattened series of 2D points as x,y tuples """ if geom.has_z: - # hack to convert to 2D, https://gist.github.com/ThomasG77/cad711667942826edc70 - geom = wkt.loads(geom.to_wkt()) - assert not geom.has_z + # convert to 2D + geom = transform(lambda x, y, z=None: (x, y), geom) if hasattr(geom, "geoms"): geoms = geom.geoms @@ -81,9 +79,12 @@ def geom_xys(geom): geoms = [geom] for g in geoms: - arr = g.array_interface_base['data'] - for pair in zip(arr[::2], arr[1::2]): - yield pair + if hasattr(g, "exterior"): + yield from geom_xys(g.exterior) + for interior in g.interiors: + yield from geom_xys(interior) + else: + yield from g.coords def point_query(*args, **kwargs): @@ -104,9 +105,11 @@ def gen_point_query( layer=0, nodata=None, affine=None, - interpolate='bilinear', - property_name='value', - geojson_out=False): + interpolate="bilinear", + property_name="value", + geojson_out=False, + boundless=True, +): """ Given a set of vector features and a raster, generate raster values at each vertex of the geometry @@ -126,7 +129,7 @@ def gen_point_query( If ndarray is passed, the `transform` kwarg is required. layer: int or string, optional - If `vectors` is a path to an fiona source, + If `vectors` is a path to a fiona source, specify the vector layer to use either by name or number. defaults to 0 @@ -154,44 +157,51 @@ def gen_point_query( original feature geometry and properties will be retained point query values appended as additional properties. + boundless: boolean + Allow features that extend beyond the raster dataset's extent, default: True + Cells outside dataset extents are treated as nodata. + Returns ------- generator of arrays (if ``geojson_out`` is False) generator of geojson features (if ``geojson_out`` is True) """ - if interpolate not in ['nearest', 'bilinear']: + if interpolate not in ["nearest", "bilinear"]: raise ValueError("interpolate must be nearest or bilinear") features_iter = read_features(vectors, layer) with Raster(raster, nodata=nodata, affine=affine, band=band) as rast: - for feat in features_iter: - geom = shape(feat['geometry']) + geom = shape(feat["geometry"]) vals = [] for x, y in geom_xys(geom): - if interpolate == 'nearest': + if interpolate == "nearest": r, c = rast.index(x, y) - window = ((int(r), int(r+1)), (int(c), int(c+1))) - src_array = rast.read(window=window, masked=True).array + window = ((int(r), int(r + 1)), (int(c), int(c + 1))) + src_array = rast.read( + window=window, masked=True, boundless=boundless + ).array val = src_array[0, 0] if val is masked: vals.append(None) else: - vals.append(asscalar(val)) + vals.append(val.item()) - elif interpolate == 'bilinear': + elif interpolate == "bilinear": window, unitxy = point_window_unitxy(x, y, rast.affine) - src_array = rast.read(window=window, masked=True).array + src_array = rast.read( + window=window, masked=True, boundless=boundless + ).array vals.append(bilinear(src_array, *unitxy)) if len(vals) == 1: vals = vals[0] # flatten single-element lists if geojson_out: - if 'properties' not in feat: - feat['properties'] = {} - feat['properties'][property_name] = vals + if "properties" not in feat: + feat["properties"] = {} + feat["properties"][property_name] = vals yield feat else: yield vals diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index c3ad76f..57fb11f 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -1,27 +1,32 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import division -import sys from rasterio import features -from shapely.geometry import box, MultiPolygon -from .io import window_bounds - - -DEFAULT_STATS = ['count', 'min', 'max', 'mean'] -VALID_STATS = DEFAULT_STATS + \ - ['sum', 'std', 'median', 'majority', 'minority', 'unique', 'range', 'nodata', 'nan'] +from shapely.geometry import MultiPolygon, box + +from rasterstats.io import window_bounds + +DEFAULT_STATS = ["count", "min", "max", "mean"] +VALID_STATS = DEFAULT_STATS + [ + "sum", + "std", + "median", + "majority", + "minority", + "unique", + "range", + "nodata", + "nan", +] # also percentile_{q} but that is handled as special case def get_percentile(stat): - if not stat.startswith('percentile_'): + if not stat.startswith("percentile_"): raise ValueError("must start with 'percentile_'") - qstr = stat.replace("percentile_", '') + qstr = stat.replace("percentile_", "") q = float(qstr) if q > 100.0: - raise ValueError('percentiles must be <= 100') + raise ValueError("percentiles must be <= 100") if q < 0.0: - raise ValueError('percentiles must be >= 0') + raise ValueError("percentiles must be >= 0") return q @@ -43,21 +48,18 @@ def rasterize_geom(geom, like, all_touched=False): out_shape=like.shape, transform=like.affine, fill=0, - dtype='uint8', - all_touched=all_touched) + dtype="uint8", + all_touched=all_touched, + ) return rv_array.astype(bool) def stats_to_csv(stats): - if sys.version_info[0] >= 3: - from io import StringIO as IO # pragma: no cover - else: - from cStringIO import StringIO as IO # pragma: no cover - import csv + from io import StringIO - csv_fh = IO() + csv_fh = StringIO() keys = set() for stat in stats: @@ -66,8 +68,8 @@ def stats_to_csv(stats): fieldnames = sorted(list(keys), key=str) - csvwriter = csv.DictWriter(csv_fh, delimiter=str(","), fieldnames=fieldnames) - csvwriter.writerow(dict((fn, fn) for fn in fieldnames)) + csvwriter = csv.DictWriter(csv_fh, delimiter=",", fieldnames=fieldnames) + csvwriter.writerow({fn: fn for fn in fieldnames}) for row in stats: csvwriter.writerow(row) contents = csv_fh.getvalue() @@ -83,7 +85,7 @@ def check_stats(stats, categorical): stats = [] else: if isinstance(stats, str): - if stats in ['*', 'ALL']: + if stats in ["*", "ALL"]: stats = VALID_STATS else: stats = stats.split() @@ -91,12 +93,10 @@ def check_stats(stats, categorical): if x.startswith("percentile_"): get_percentile(x) elif x not in VALID_STATS: - raise ValueError( - "Stat `%s` not valid; " - "must be one of \n %r" % (x, VALID_STATS)) + raise ValueError(f"Stat {x!r} not valid; must be one of \n {VALID_STATS}") run_count = False - if categorical or 'majority' in stats or 'minority' in stats or 'unique' in stats: + if categorical or "majority" in stats or "minority" in stats or "unique" in stats: # run the counter once, only if needed run_count = True @@ -105,20 +105,17 @@ def check_stats(stats, categorical): def remap_categories(category_map, stats): def lookup(m, k): - """ Dict lookup but returns original key if not found - """ + """Dict lookup but returns original key if not found""" try: return m[k] except KeyError: return k - return {lookup(category_map, k): v - for k, v in stats.items()} + return {lookup(category_map, k): v for k, v in stats.items()} def key_assoc_val(d, func, exclude=None): - """return the key associated with the value returned by func - """ + """return the key associated with the value returned by func""" vs = list(d.values()) ks = list(d.keys()) key = ks[vs.index(func(vs))] @@ -130,14 +127,14 @@ def boxify_points(geom, rast): Point and MultiPoint don't play well with GDALRasterize convert them into box polygons 99% cellsize, centered on the raster cell """ - if 'Point' not in geom.type: + if "Point" not in geom.geom_type: raise ValueError("Points or multipoints only") - buff = -0.01 * min(rast.affine.a, rast.affine.e) + buff = -0.01 * abs(min(rast.affine.a, rast.affine.e)) - if geom.type == 'Point': + if geom.geom_type == "Point": pts = [geom] - elif geom.type == "MultiPoint": + elif geom.geom_type == "MultiPoint": pts = geom.geoms geoms = [] for pt in pts: diff --git a/tests/conftest.py b/tests/conftest.py index b554816..fde21cd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,4 @@ import logging import sys + logging.basicConfig(stream=sys.stderr, level=logging.INFO) diff --git a/tests/data/dataset_mask.tif b/tests/data/dataset_mask.tif new file mode 100644 index 0000000..0207d12 Binary files /dev/null and b/tests/data/dataset_mask.tif differ diff --git a/tests/myfunc.py b/tests/myfunc.py index 0018cf5..3c6e7da 100755 --- a/tests/myfunc.py +++ b/tests/myfunc.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Additional functions to be used in raster stat computation -from __future__ import division import numpy as np + def mymean(x): return np.ma.mean(x) diff --git a/tests/test_cli.py b/tests/test_cli.py index b6834aa..1fd9418 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,132 +1,152 @@ -import os.path import json import warnings +from pathlib import Path + +from click.testing import CliRunner + +from rasterstats.cli import pointquery, zonalstats + # Some warnings must be ignored to parse output properly # https://github.com/pallets/click/issues/371#issuecomment-223790894 -from click.testing import CliRunner -from rasterstats.cli import zonalstats, pointquery +data_dir = Path(__file__).parent / "data" def test_cli_feature(): - raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') - vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson') + raster = str(data_dir / "slope.tif") + vector = str(data_dir / "feature.geojson") runner = CliRunner() - warnings.simplefilter('ignore') - result = runner.invoke(zonalstats, [vector, - '--raster', raster, - '--stats', 'mean', - '--prefix', 'test_']) + warnings.simplefilter("ignore") + result = runner.invoke( + zonalstats, [vector, "--raster", raster, "--stats", "mean", "--prefix", "test_"] + ) assert result.exit_code == 0 outdata = json.loads(result.output) - assert len(outdata['features']) == 1 - feature = outdata['features'][0] - assert 'test_mean' in feature['properties'] - assert round(feature['properties']['test_mean'], 2) == 14.66 - assert 'test_count' not in feature['properties'] + assert len(outdata["features"]) == 1 + feature = outdata["features"][0] + assert "test_mean" in feature["properties"] + assert round(feature["properties"]["test_mean"], 2) == 14.66 + assert "test_count" not in feature["properties"] def test_cli_feature_stdin(): - raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') - vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson') + raster = str(data_dir / "slope.tif") + vector_pth = data_dir / "feature.geojson" runner = CliRunner() - warnings.simplefilter('ignore') - result = runner.invoke(zonalstats, - ['--raster', raster, - '--stats', 'all', - '--prefix', 'test_'], - input=open(vector, 'r').read()) + warnings.simplefilter("ignore") + result = runner.invoke( + zonalstats, + ["--raster", raster, "--stats", "all", "--prefix", "test_"], + input=vector_pth.read_text(), + ) assert result.exit_code == 0 outdata = json.loads(result.output) - assert len(outdata['features']) == 1 - feature = outdata['features'][0] - assert 'test_mean' in feature['properties'] - assert 'test_std' in feature['properties'] + assert len(outdata["features"]) == 1 + feature = outdata["features"][0] + assert "test_mean" in feature["properties"] + assert "test_std" in feature["properties"] def test_cli_features_sequence(): - raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') - vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') + raster = str(data_dir / "slope.tif") + vector = str(data_dir / "featurecollection.geojson") runner = CliRunner() - result = runner.invoke(zonalstats, [vector, - '--raster', raster, - '--stats', 'mean', - '--prefix', 'test_', - '--sequence']) + result = runner.invoke( + zonalstats, + [ + vector, + "--raster", + raster, + "--stats", + "mean", + "--prefix", + "test_", + "--sequence", + ], + ) assert result.exit_code == 0 results = result.output.splitlines() for r in results: outdata = json.loads(r) - assert outdata['type'] == 'Feature' + assert outdata["type"] == "Feature" def test_cli_features_sequence_rs(): - raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') - vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') + raster = str(data_dir / "slope.tif") + vector = str(data_dir / "featurecollection.geojson") runner = CliRunner() - result = runner.invoke(zonalstats, [vector, - '--raster', raster, - '--stats', 'mean', - '--prefix', 'test_', - '--sequence', '--rs']) + result = runner.invoke( + zonalstats, + [ + vector, + "--raster", + raster, + "--stats", + "mean", + "--prefix", + "test_", + "--sequence", + "--rs", + ], + ) assert result.exit_code == 0 # assert result.output.startswith(b'\x1e') - assert result.output[0] == '\x1e' + assert result.output[0] == "\x1e" def test_cli_featurecollection(): - raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') - vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') + raster = str(data_dir / "slope.tif") + vector = str(data_dir / "featurecollection.geojson") runner = CliRunner() - result = runner.invoke(zonalstats, [vector, - '--raster', raster, - '--stats', 'mean', - '--prefix', 'test_']) + result = runner.invoke( + zonalstats, [vector, "--raster", raster, "--stats", "mean", "--prefix", "test_"] + ) assert result.exit_code == 0 outdata = json.loads(result.output) - assert len(outdata['features']) == 2 - feature = outdata['features'][0] - assert 'test_mean' in feature['properties'] - assert round(feature['properties']['test_mean'], 2) == 14.66 - assert 'test_count' not in feature['properties'] + assert len(outdata["features"]) == 2 + feature = outdata["features"][0] + assert "test_mean" in feature["properties"] + assert round(feature["properties"]["test_mean"], 2) == 14.66 + assert "test_count" not in feature["properties"] def test_cli_pointquery(): - raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') - vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') + raster = str(data_dir / "slope.tif") + vector = str(data_dir / "featurecollection.geojson") runner = CliRunner() - result = runner.invoke(pointquery, [vector, - '--raster', raster, - '--property-name', 'slope']) + result = runner.invoke( + pointquery, [vector, "--raster", raster, "--property-name", "slope"] + ) assert result.exit_code == 0 outdata = json.loads(result.output) - assert len(outdata['features']) == 2 - feature = outdata['features'][0] - assert 'slope' in feature['properties'] + assert len(outdata["features"]) == 2 + feature = outdata["features"][0] + assert "slope" in feature["properties"] + def test_cli_point_sequence(): - raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') - vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') + raster = str(data_dir / "slope.tif") + vector = str(data_dir / "featurecollection.geojson") runner = CliRunner() - result = runner.invoke(pointquery, [vector, - '--raster', raster, - '--property-name', 'slope', - '--sequence']) + result = runner.invoke( + pointquery, + [vector, "--raster", raster, "--property-name", "slope", "--sequence"], + ) assert result.exit_code == 0 results = result.output.splitlines() for r in results: outdata = json.loads(r) - assert outdata['type'] == 'Feature' + assert outdata["type"] == "Feature" def test_cli_point_sequence_rs(): - raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') - vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') + raster = str(data_dir / "slope.tif") + vector = str(data_dir / "featurecollection.geojson") runner = CliRunner() - result = runner.invoke(pointquery, [vector, - '--raster', raster, - '--property-name', 'slope', - '--sequence', '--rs']) + result = runner.invoke( + pointquery, + [vector, "--raster", raster, "--property-name", "slope", "--sequence", "--rs"], + ) assert result.exit_code == 0 - assert result.output[0] == '\x1e' + assert result.output[0] == "\x1e" diff --git a/tests/test_io.py b/tests/test_io.py index b624133..3199658 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,50 +1,54 @@ -import sys -import os -import fiona -import rasterio import json +from pathlib import Path + +import fiona +import numpy as np import pytest +import rasterio from shapely.geometry import shape -from rasterstats.io import read_features, read_featurecollection, Raster # todo parse_feature -from rasterstats.io import boundless_array, window_bounds, bounds_window, rowcol +from rasterstats.io import ( # todo parse_feature + Raster, + boundless_array, + bounds_window, + fiona_generator, + read_featurecollection, + read_features, + rowcol, + window_bounds, +) -sys.path.append(os.path.dirname(os.path.abspath(__file__))) -DATA = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") -polygons = os.path.join(DATA, 'polygons.shp') -raster = os.path.join(DATA, 'slope.tif') +data_dir = Path(__file__).parent / "data" +polygons = data_dir / "polygons.shp" +raster = data_dir / "slope.tif" -import numpy as np -arr = np.array([[1, 1, 1], - [1, 1, 1], - [1, 1, 1]]) +arr = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]]) + +arr3d = np.array([[[1, 1, 1], [1, 1, 1], [1, 1, 1]]]) -arr3d = np.array([[[1, 1, 1], - [1, 1, 1], - [1, 1, 1]]]) +eps = 1e-6 -with fiona.open(polygons, 'r') as src: - target_features = [f for f in src] +target_features = [f for f in fiona_generator(polygons)] -target_geoms = [shape(f['geometry']) for f in target_features] +target_geoms = [shape(f["geometry"]) for f in target_features] def _compare_geomlists(aa, bb): for a, b in zip(aa, bb): - assert a.almost_equals(b) + assert a.equals_exact(b, eps) def _test_read_features(indata): features = list(read_features(indata)) # multi - geoms = [shape(f['geometry']) for f in features] + geoms = [shape(f["geometry"]) for f in features] _compare_geomlists(geoms, target_geoms) def _test_read_features_single(indata): # single (first target geom) - geom = shape(list(read_features(indata))[0]['geometry']) - assert geom.almost_equals(target_geoms[0]) + geom = shape(next(iter(read_features(indata)))["geometry"]) + assert geom.equals_exact(target_geoms[0], eps) def test_fiona_path(): @@ -52,12 +56,12 @@ def test_fiona_path(): def test_layer_index(): - layer = fiona.listlayers(DATA).index('polygons') - assert list(read_features(DATA, layer=layer)) == target_features + layer = fiona.listlayers(data_dir).index("polygons") + assert list(read_features(data_dir, layer=layer)) == target_features def test_layer_name(): - assert list(read_features(DATA, layer='polygons')) == target_features + assert list(read_features(data_dir, layer="polygons")) == target_features def test_path_unicode(): @@ -70,112 +74,115 @@ def test_path_unicode(): def test_featurecollection(): - assert read_featurecollection(polygons)['features'] == \ - list(read_features(polygons)) == \ - target_features + assert ( + read_featurecollection(polygons)["features"] + == list(read_features(polygons)) + == target_features + ) def test_shapely(): - with fiona.open(polygons, 'r') as src: - indata = [shape(f['geometry']) for f in src] + indata = [shape(f["geometry"]) for f in fiona_generator(polygons)] _test_read_features(indata) _test_read_features_single(indata[0]) def test_wkt(): - with fiona.open(polygons, 'r') as src: - indata = [shape(f['geometry']).wkt for f in src] + indata = [shape(f["geometry"]).wkt for f in fiona_generator(polygons)] _test_read_features(indata) _test_read_features_single(indata[0]) def test_wkb(): - with fiona.open(polygons, 'r') as src: - indata = [shape(f['geometry']).wkb for f in src] + indata = [shape(f["geometry"]).wkb for f in fiona_generator(polygons)] _test_read_features(indata) _test_read_features_single(indata[0]) def test_mapping_features(): # list of Features - with fiona.open(polygons, 'r') as src: - indata = [f for f in src] + indata = [f for f in fiona_generator(polygons)] _test_read_features(indata) def test_mapping_feature(): # list of Features - with fiona.open(polygons, 'r') as src: - indata = [f for f in src] + indata = [f for f in fiona_generator(polygons)] _test_read_features(indata[0]) def test_mapping_geoms(): - with fiona.open(polygons, 'r') as src: - indata = [f for f in src] - _test_read_features(indata[0]['geometry']) + indata = [f for f in fiona_generator(polygons)] + _test_read_features(indata[0]["geometry"]) def test_mapping_collection(): - indata = {'type': "FeatureCollection"} - with fiona.open(polygons, 'r') as src: - indata['features'] = [f for f in src] + indata = {"type": "FeatureCollection"} + indata["features"] = [f for f in fiona_generator(polygons)] _test_read_features(indata) def test_jsonstr(): # Feature str - with fiona.open(polygons, 'r') as src: - indata = [f for f in src] + indata = [f for f in fiona_generator(polygons)] indata = json.dumps(indata[0]) _test_read_features(indata) def test_jsonstr_geom(): # geojson geom str - with fiona.open(polygons, 'r') as src: - indata = [f for f in src] - indata = json.dumps(indata[0]['geometry']) + indata = [f for f in fiona_generator(polygons)] + indata = json.dumps(indata[0]["geometry"]) _test_read_features(indata) def test_jsonstr_collection(): - indata = {'type': "FeatureCollection"} - with fiona.open(polygons, 'r') as src: - indata['features'] = [f for f in src] + indata = {"type": "FeatureCollection"} + indata["features"] = [f for f in fiona_generator(polygons)] indata = json.dumps(indata) _test_read_features(indata) +def test_jsonstr_collection_without_features(): + indata = {"type": "FeatureCollection", "features": []} + indata = json.dumps(indata) + with pytest.raises(ValueError): + _test_read_features(indata) + + +def test_invalid_jsonstr(): + indata = {"type": "InvalidGeometry", "coordinates": [30, 10]} + indata = json.dumps(indata) + with pytest.raises(ValueError): + _test_read_features(indata) + + class MockGeoInterface: def __init__(self, f): self.__geo_interface__ = f def test_geo_interface(): - with fiona.open(polygons, 'r') as src: - indata = [MockGeoInterface(f) for f in src] + indata = [MockGeoInterface(f) for f in fiona_generator(polygons)] _test_read_features(indata) def test_geo_interface_geom(): - with fiona.open(polygons, 'r') as src: - indata = [MockGeoInterface(f['geometry']) for f in src] + indata = [MockGeoInterface(f["geometry"]) for f in fiona_generator(polygons)] _test_read_features(indata) def test_geo_interface_collection(): # geointerface for featurecollection? - indata = {'type': "FeatureCollection"} - with fiona.open(polygons, 'r') as src: - indata['features'] = [f for f in src] + indata = {"type": "FeatureCollection"} + indata["features"] = [f for f in fiona_generator(polygons)] indata = MockGeoInterface(indata) _test_read_features(indata) def test_notafeature(): with pytest.raises(ValueError): - list(read_features(['foo', 'POINT(-122 42)'])) + list(read_features(["foo", "POINT(-122 42)"])) with pytest.raises(ValueError): list(read_features(Exception())) @@ -224,26 +231,30 @@ def test_boundless_masked(): def test_window_bounds(): with rasterio.open(raster) as src: win = ((0, src.shape[0]), (0, src.shape[1])) - assert src.bounds == window_bounds(win, src.affine) + assert src.bounds == window_bounds(win, src.transform) win = ((5, 10), (5, 10)) - assert src.window_bounds(win) == window_bounds(win, src.affine) + assert src.window_bounds(win) == window_bounds(win, src.transform) def test_bounds_window(): with rasterio.open(raster) as src: - assert bounds_window(src.bounds, src.affine) == \ - ((0, src.shape[0]), (0, src.shape[1])) + assert bounds_window(src.bounds, src.transform) == ( + (0, src.shape[0]), + (0, src.shape[1]), + ) def test_rowcol(): import math + with rasterio.open(raster) as src: x, _, _, y = src.bounds x += 1.0 y -= 1.0 - assert rowcol(x, y, src.affine, op=math.floor) == (0, 0) - assert rowcol(x, y, src.affine, op=math.ceil) == (1, 1) + assert rowcol(x, y, src.transform, op=math.floor) == (0, 0) + assert rowcol(x, y, src.transform, op=math.ceil) == (1, 1) + def test_Raster_index(): x, y = 245114, 1000968 @@ -263,19 +274,54 @@ def test_Raster(): with rasterio.open(raster) as src: arr = src.read(1) - affine = src.affine + affine = src.transform nodata = src.nodata r2 = Raster(arr, affine, nodata, band=1).read(bounds) with pytest.raises(ValueError): - r3 = Raster(arr, affine, nodata, band=1).read() + Raster(arr, affine, nodata, band=1).read() with pytest.raises(ValueError): - r4 = Raster(arr, affine, nodata, band=1).read(bounds=1, window=1) + Raster(arr, affine, nodata, band=1).read(bounds=1, window=1) # If the abstraction is correct, the arrays are equal assert np.array_equal(r1.array, r2.array) + +def test_Raster_boundless_disabled(): + import numpy as np + + bounds = ( + 244300.61494985913, + 998877.8262535353, + 246444.72726211764, + 1000868.7876863468, + ) + outside_bounds = (244156, 1000258, 245114, 1000968) + + # rasterio src fails outside extent + with pytest.raises(ValueError): + Raster(raster, band=1).read(outside_bounds, boundless=False) + + # rasterio src works inside extent + r2 = Raster(raster, band=1).read(bounds, boundless=False) + + with rasterio.open(raster) as src: + arr = src.read(1) + affine = src.transform + nodata = src.nodata + + # ndarray works inside extent + r3 = Raster(arr, affine, nodata, band=1).read(bounds, boundless=False) + + # ndarray src fails outside extent + with pytest.raises(ValueError): + Raster(arr, affine, nodata, band=1).read(outside_bounds, boundless=False) + + # If the abstraction is correct, the arrays are equal + assert np.array_equal(r2.array, r3.array) + + def test_Raster_context(): # Assigned a regular name, stays open r1 = Raster(raster, band=1) @@ -289,11 +335,9 @@ def test_Raster_context(): def test_geointerface(): - class MockGeo(object): + class MockGeo: def __init__(self, features): - self.__geo_interface__ = { - 'type': "FeatureCollection", - 'features': features} + self.__geo_interface__ = {"type": "FeatureCollection", "features": features} # Make it iterable just to ensure that geo interface # takes precendence over iterability @@ -306,18 +350,21 @@ def __next__(self): def next(self): pass - features = [{ - "type": "Feature", - "properties": {}, - "geometry": { - "type": "Point", - "coordinates": [0, 0]} - }, { - "type": "Feature", - "properties": {}, - "geometry": { - "type": "Polygon", - "coordinates": [[[-50, -10], [-40, 10], [-30, -10], [-50, -10]]]}}] + features = [ + { + "type": "Feature", + "properties": {}, + "geometry": {"type": "Point", "coordinates": [0, 0]}, + }, + { + "type": "Feature", + "properties": {}, + "geometry": { + "type": "Polygon", + "coordinates": [[[-50, -10], [-40, 10], [-30, -10], [-50, -10]]], + }, + }, + ] geothing = MockGeo(features) assert list(read_features(geothing)) == features @@ -325,11 +372,15 @@ def next(self): # Optional tests def test_geodataframe(): - try: - import geopandas as gpd - df = gpd.read_file(polygons) - if not hasattr(df, '__geo_interface__'): - pytest.skip("This version of geopandas doesn't support df.__geo_interface__") - except ImportError: - pytest.skip("Can't import geopands") + gpd = pytest.importorskip("geopandas") + + df = gpd.read_file(polygons) + if not hasattr(df, "__geo_interface__"): + pytest.skip("This version of geopandas doesn't support df.__geo_interface__") assert list(read_features(df)) + + +# TODO # io.parse_features on a feature-only geo_interface +# TODO # io.parse_features on a feature-only geojson-like object +# TODO # io.read_features on a feature-only +# TODO # io.Raster.read() on an open rasterio dataset diff --git a/tests/test_point.py b/tests/test_point.py index de75fbb..9a50119 100644 --- a/tests/test_point.py +++ b/tests/test_point.py @@ -1,13 +1,16 @@ import os + import rasterio -from rasterstats.point import point_window_unitxy, bilinear, geom_xys + from rasterstats import point_query +from rasterstats.point import bilinear, geom_xys, point_window_unitxy -raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') -raster_nodata = os.path.join(os.path.dirname(__file__), 'data/slope_nodata.tif') +raster = os.path.join(os.path.dirname(__file__), "data/slope.tif") +raster_nodata = os.path.join(os.path.dirname(__file__), "data/slope_nodata.tif") with rasterio.open(raster) as src: - affine = src.affine + affine = src.transform + def test_unitxy_ul(): win, unitxy = point_window_unitxy(245300, 1000073, affine) @@ -17,6 +20,7 @@ def test_unitxy_ul(): assert x > 0.5 assert y < 0.5 + def test_unitxy_ur(): win, unitxy = point_window_unitxy(245318, 1000073, affine) assert win == ((30, 32), (39, 41)) @@ -32,6 +36,7 @@ def test_unitxy_ur(): assert x < 0.5 assert y < 0.5 + def test_unitxy_lr(): win, unitxy = point_window_unitxy(245318, 1000056, affine) assert win == ((31, 33), (39, 41)) @@ -40,6 +45,7 @@ def test_unitxy_lr(): assert x < 0.5 assert y > 0.5 + def test_unitxy_ll(): win, unitxy = point_window_unitxy(245300, 1000056, affine) assert win == ((31, 33), (38, 40)) @@ -48,10 +54,11 @@ def test_unitxy_ll(): assert x > 0.5 assert y > 0.5 + def test_bilinear(): import numpy as np - arr = np.array([[1.0, 2.0], - [3.0, 4.0]]) + + arr = np.array([[1.0, 2.0], [3.0, 4.0]]) assert bilinear(arr, 0, 0) == 3.0 assert bilinear(arr, 1, 0) == 4.0 @@ -63,8 +70,7 @@ def test_bilinear(): def test_xy_array_bilinear_window(): - """ integration test - """ + """integration test""" x, y = (245309, 1000064) with rasterio.open(raster) as src: @@ -85,8 +91,8 @@ def test_point_query_geojson(): point = "POINT(245309 1000064)" features = point_query(point, raster, property_name="TEST", geojson_out=True) for feature in features: - assert 'TEST' in feature['properties'] - assert round(feature['properties']['TEST']) == 74 + assert "TEST" in feature["properties"] + assert round(feature["properties"]["TEST"]) == 74 def test_point_query_nodata(): @@ -112,23 +118,49 @@ def test_point_query_nodata(): def test_geom_xys(): - from shapely.geometry import (Point, MultiPoint, - LineString, MultiLineString, - Polygon, MultiPolygon) + from shapely.geometry import ( + LineString, + MultiLineString, + MultiPoint, + MultiPolygon, + Point, + Polygon, + ) + pt = Point(0, 0) assert list(geom_xys(pt)) == [(0, 0)] + mpt = MultiPoint([(0, 0), (1, 1)]) assert list(geom_xys(mpt)) == [(0, 0), (1, 1)] + line = LineString([(0, 0), (1, 1)]) assert list(geom_xys(line)) == [(0, 0), (1, 1)] + mline = MultiLineString([((0, 0), (1, 1)), ((-1, 0), (1, 0))]) assert list(geom_xys(mline)) == [(0, 0), (1, 1), (-1, 0), (1, 0)] - poly = Polygon([(0, 0), (1, 1), (1, 0)]) + + poly = Polygon([(0, 0), (1, 1), (1, 0), (0, 0)]) assert list(geom_xys(poly)) == [(0, 0), (1, 1), (1, 0), (0, 0)] + ring = poly.exterior assert list(geom_xys(ring)) == [(0, 0), (1, 1), (1, 0), (0, 0)] + mpoly = MultiPolygon([poly, Polygon([(2, 2), (3, 3), (3, 2)])]) - assert list(geom_xys(mpoly)) == [(0, 0), (1, 1), (1, 0), (0, 0), - (2, 2), (3, 3), (3, 2), (2, 2)] + assert list(geom_xys(mpoly)) == [ + (0, 0), + (1, 1), + (1, 0), + (0, 0), + (2, 2), + (3, 3), + (3, 2), + (2, 2), + ] + mpt3d = MultiPoint([(0, 0, 1), (1, 1, 2)]) assert list(geom_xys(mpt3d)) == [(0, 0), (1, 1)] + + +# TODO # gen_point_query(interpolation="fake") +# TODO # gen_point_query(interpolation="bilinear") +# TODO # gen_point_query() diff --git a/tests/test_utils.py b/tests/test_utils.py index a6edc06..0f88d35 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,61 +1,65 @@ -import sys -import os +from pathlib import Path + import pytest from shapely.geometry import LineString -from rasterstats.utils import \ - stats_to_csv, get_percentile, remap_categories, boxify_points -from rasterstats import zonal_stats -from rasterstats.utils import VALID_STATS +from rasterstats import zonal_stats +from rasterstats.utils import ( + VALID_STATS, + boxify_points, + get_percentile, + remap_categories, + stats_to_csv, +) -sys.path.append(os.path.dirname(os.path.abspath(__file__))) -DATA = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") -raster = os.path.join(DATA, 'slope.tif') +data_dir = Path(__file__).parent / "data" +raster = data_dir / "slope.tif" def test_csv(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, raster, stats="*") csv = stats_to_csv(stats) - assert csv.split()[0] == ','.join(sorted(VALID_STATS)) + assert csv.split()[0] == ",".join(sorted(VALID_STATS)) def test_categorical_csv(): - polygons = os.path.join(DATA, 'polygons.shp') - categorical_raster = os.path.join(DATA, 'slope_classes.tif') + polygons = data_dir / "polygons.shp" + categorical_raster = data_dir / "slope_classes.tif" stats = zonal_stats(polygons, categorical_raster, categorical=True) csv = stats_to_csv(stats) assert csv.split()[0] == "1.0,2.0,5.0" def test_get_percentile(): - assert get_percentile('percentile_0') == 0.0 - assert get_percentile('percentile_100') == 100.0 - assert get_percentile('percentile_13.2') == 13.2 + assert get_percentile("percentile_0") == 0.0 + assert get_percentile("percentile_100") == 100.0 + assert get_percentile("percentile_13.2") == 13.2 + def test_get_bad_percentile(): with pytest.raises(ValueError): - get_percentile('foo') + get_percentile("foo") with pytest.raises(ValueError): - get_percentile('percentile_101') + get_percentile("percentile_101") with pytest.raises(ValueError): - get_percentile('percentile_101') + get_percentile("percentile_101") with pytest.raises(ValueError): - get_percentile('percentile_-1') + get_percentile("percentile_-1") with pytest.raises(ValueError): - get_percentile('percentile_foobar') + get_percentile("percentile_foobar") def test_remap_categories(): feature_stats = {1: 22.343, 2: 54.34, 3: 987.5} - category_map = {1: 'grassland', 2: 'forest'} + category_map = {1: "grassland", 2: "forest"} new_stats = remap_categories(category_map, feature_stats) assert 1 not in new_stats.keys() - assert 'grassland' in new_stats.keys() + assert "grassland" in new_stats.keys() assert 3 in new_stats.keys() @@ -63,3 +67,7 @@ def test_boxify_non_point(): line = LineString([(0, 0), (1, 1)]) with pytest.raises(ValueError): boxify_points(line, None) + + +# TODO # def test_boxify_multi_point +# TODO # def test_boxify_point diff --git a/tests/test_zonal.py b/tests/test_zonal.py index 48babc9..c906a53 100644 --- a/tests/test_zonal.py +++ b/tests/test_zonal.py @@ -1,132 +1,131 @@ # test zonal stats -import os -import pytest -import simplejson import json -import sys +from pathlib import Path + import numpy as np +import pytest import rasterio -from rasterstats import zonal_stats, raster_stats -from rasterstats.utils import VALID_STATS -from rasterstats.io import read_featurecollection, read_features -from shapely.geometry import Polygon +import simplejson from affine import Affine +from shapely.geometry import Polygon -sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from rasterstats import raster_stats, zonal_stats +from rasterstats.io import read_featurecollection, read_features +from rasterstats.utils import VALID_STATS -DATA = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") -raster = os.path.join(DATA, 'slope.tif') +data_dir = Path(__file__).parent / "data" +raster = data_dir / "slope.tif" def test_main(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, raster) - for key in ['count', 'min', 'max', 'mean']: + for key in ["count", "min", "max", "mean"]: assert key in stats[0] assert len(stats) == 2 - assert stats[0]['count'] == 75 - assert stats[1]['count'] == 50 - assert round(stats[0]['mean'], 2) == 14.66 + assert stats[0]["count"] == 75 + assert stats[1]["count"] == 50 + assert round(stats[0]["mean"], 2) == 14.66 # remove after band_num alias is removed def test_band_alias(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats_a = zonal_stats(polygons, raster) stats_b = zonal_stats(polygons, raster, band=1) with pytest.deprecated_call(): stats_c = zonal_stats(polygons, raster, band_num=1) - assert stats_a[0]['count'] == stats_b[0]['count'] == stats_c[0]['count'] + assert stats_a[0]["count"] == stats_b[0]["count"] == stats_c[0]["count"] def test_zonal_global_extent(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, raster) global_stats = zonal_stats(polygons, raster, global_src_extent=True) assert stats == global_stats def test_zonal_nodata(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, raster, nodata=0) assert len(stats) == 2 - assert stats[0]['count'] == 75 - assert stats[1]['count'] == 50 + assert stats[0]["count"] == 75 + assert stats[1]["count"] == 50 def test_doesnt_exist(): - nonexistent = os.path.join(DATA, 'DOESNOTEXIST.shp') + nonexistent = data_dir / "DOESNOTEXIST.shp" with pytest.raises(ValueError): zonal_stats(nonexistent, raster) def test_nonsense(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" with pytest.raises(ValueError): zonal_stats("blaghrlargh", raster) - with pytest.raises(IOError): + with pytest.raises(OSError): zonal_stats(polygons, "blercherlerch") with pytest.raises(ValueError): - zonal_stats(["blaghrlargh", ], raster) + zonal_stats(["blaghrlargh"], raster) # Different geometry types def test_points(): - points = os.path.join(DATA, 'points.shp') + points = data_dir / "points.shp" stats = zonal_stats(points, raster) # three features assert len(stats) == 3 # three pixels - assert sum([x['count'] for x in stats]) == 3 - assert round(stats[0]['mean'], 3) == 11.386 - assert round(stats[1]['mean'], 3) == 35.547 + assert sum([x["count"] for x in stats]) == 3 + assert round(stats[0]["mean"], 3) == 11.386 + assert round(stats[1]["mean"], 3) == 35.547 def test_points_categorical(): - points = os.path.join(DATA, 'points.shp') - categorical_raster = os.path.join(DATA, 'slope_classes.tif') + points = data_dir / "points.shp" + categorical_raster = data_dir / "slope_classes.tif" stats = zonal_stats(points, categorical_raster, categorical=True) # three features assert len(stats) == 3 - assert 'mean' not in stats[0] + assert "mean" not in stats[0] assert stats[0][1.0] == 1 assert stats[1][2.0] == 1 def test_lines(): - lines = os.path.join(DATA, 'lines.shp') + lines = data_dir / "lines.shp" stats = zonal_stats(lines, raster) assert len(stats) == 2 - assert stats[0]['count'] == 58 - assert stats[1]['count'] == 32 + assert stats[0]["count"] == 58 + assert stats[1]["count"] == 32 # Test multigeoms def test_multipolygons(): - multipolygons = os.path.join(DATA, 'multipolygons.shp') + multipolygons = data_dir / "multipolygons.shp" stats = zonal_stats(multipolygons, raster) assert len(stats) == 1 - assert stats[0]['count'] == 125 + assert stats[0]["count"] == 125 def test_multilines(): - multilines = os.path.join(DATA, 'multilines.shp') + multilines = data_dir / "multilines.shp" stats = zonal_stats(multilines, raster) assert len(stats) == 1 # can differ slightly based on platform/gdal version - assert stats[0]['count'] in [89, 90] + assert stats[0]["count"] in [89, 90] def test_multipoints(): - multipoints = os.path.join(DATA, 'multipoints.shp') + multipoints = data_dir / "multipoints.shp" stats = zonal_stats(multipoints, raster) assert len(stats) == 1 - assert stats[0]['count'] == 3 + assert stats[0]["count"] == 3 def test_categorical(): - polygons = os.path.join(DATA, 'polygons.shp') - categorical_raster = os.path.join(DATA, 'slope_classes.tif') + polygons = data_dir / "polygons.shp" + categorical_raster = data_dir / "slope_classes.tif" stats = zonal_stats(polygons, categorical_raster, categorical=True) assert len(stats) == 2 assert stats[0][1.0] == 75 @@ -134,221 +133,269 @@ def test_categorical(): def test_categorical_map(): - polygons = os.path.join(DATA, 'polygons.shp') - categorical_raster = os.path.join(DATA, 'slope_classes.tif') - catmap = {5.0: 'cat5'} - stats = zonal_stats(polygons, categorical_raster, - categorical=True, category_map=catmap) + polygons = data_dir / "polygons.shp" + categorical_raster = data_dir / "slope_classes.tif" + catmap = {5.0: "cat5"} + stats = zonal_stats( + polygons, categorical_raster, categorical=True, category_map=catmap + ) assert len(stats) == 2 assert stats[0][1.0] == 75 assert 5.0 not in stats[1] - assert 'cat5' in stats[1] + assert "cat5" in stats[1] def test_specify_stats_list(): - polygons = os.path.join(DATA, 'polygons.shp') - stats = zonal_stats(polygons, raster, stats=['min', 'max']) - assert sorted(stats[0].keys()) == sorted(['min', 'max']) - assert 'count' not in list(stats[0].keys()) + polygons = data_dir / "polygons.shp" + stats = zonal_stats(polygons, raster, stats=["min", "max"]) + assert sorted(stats[0].keys()) == sorted(["min", "max"]) + assert "count" not in list(stats[0].keys()) def test_specify_all_stats(): - polygons = os.path.join(DATA, 'polygons.shp') - stats = zonal_stats(polygons, raster, stats='ALL') + polygons = data_dir / "polygons.shp" + stats = zonal_stats(polygons, raster, stats="ALL") assert sorted(stats[0].keys()) == sorted(VALID_STATS) - stats = zonal_stats(polygons, raster, stats='*') + stats = zonal_stats(polygons, raster, stats="*") assert sorted(stats[0].keys()) == sorted(VALID_STATS) def test_specify_stats_string(): - polygons = os.path.join(DATA, 'polygons.shp') - stats = zonal_stats(polygons, raster, stats='min max') - assert sorted(stats[0].keys()) == sorted(['min', 'max']) - assert 'count' not in list(stats[0].keys()) + polygons = data_dir / "polygons.shp" + stats = zonal_stats(polygons, raster, stats="min max") + assert sorted(stats[0].keys()) == sorted(["min", "max"]) + assert "count" not in list(stats[0].keys()) def test_specify_stats_invalid(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" with pytest.raises(ValueError): - zonal_stats(polygons, raster, stats='foo max') + zonal_stats(polygons, raster, stats="foo max") def test_optional_stats(): - polygons = os.path.join(DATA, 'polygons.shp') - stats = zonal_stats(polygons, raster, - stats='min max sum majority median std') - assert stats[0]['min'] <= stats[0]['median'] <= stats[0]['max'] + polygons = data_dir / "polygons.shp" + stats = zonal_stats(polygons, raster, stats="min max sum majority median std") + assert stats[0]["min"] <= stats[0]["median"] <= stats[0]["max"] def test_range(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, raster, stats="range min max") for stat in stats: - assert stat['range'] == stat['max'] - stat['min'] - ranges = [x['range'] for x in stats] + assert stat["range"] == stat["max"] - stat["min"] + ranges = [x["range"] for x in stats] # without min/max specified stats = zonal_stats(polygons, raster, stats="range") - assert 'min' not in stats[0] - assert ranges == [x['range'] for x in stats] + assert "min" not in stats[0] + assert ranges == [x["range"] for x in stats] def test_nodata(): - polygons = os.path.join(DATA, 'polygons.shp') - categorical_raster = os.path.join(DATA, 'slope_classes.tif') - stats = zonal_stats(polygons, categorical_raster, stats="*", - categorical=True, nodata=1.0) - assert stats[0]['majority'] is None - assert stats[0]['count'] == 0 # no pixels; they're all null - assert stats[1]['minority'] == 2.0 - assert stats[1]['count'] == 49 # used to be 50 if we allowed 1.0 - assert '1.0' not in stats[0] + polygons = data_dir / "polygons.shp" + categorical_raster = data_dir / "slope_classes.tif" + stats = zonal_stats( + polygons, categorical_raster, stats="*", categorical=True, nodata=1.0 + ) + assert stats[0]["majority"] is None + assert stats[0]["count"] == 0 # no pixels; they're all null + assert stats[1]["minority"] == 2.0 + assert stats[1]["count"] == 49 # used to be 50 if we allowed 1.0 + assert "1.0" not in stats[0] + + +def test_dataset_mask(): + polygons = data_dir / "polygons.shp" + raster = data_dir / "dataset_mask.tif" + stats = zonal_stats(polygons, raster, stats="*") + assert stats[0]["count"] == 75 + assert stats[1]["count"] == 0 def test_partial_overlap(): - polygons = os.path.join(DATA, 'polygons_partial_overlap.shp') + polygons = data_dir / "polygons_partial_overlap.shp" stats = zonal_stats(polygons, raster, stats="count") for res in stats: # each polygon should have at least a few pixels overlap - assert res['count'] > 0 + assert res["count"] > 0 def test_no_overlap(): - polygons = os.path.join(DATA, 'polygons_no_overlap.shp') + polygons = data_dir / "polygons_no_overlap.shp" stats = zonal_stats(polygons, raster, stats="count") for res in stats: # no polygon should have any overlap - assert res['count'] is 0 + assert res["count"] == 0 + def test_all_touched(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, raster, all_touched=True) - assert stats[0]['count'] == 95 # 75 if ALL_TOUCHED=False - assert stats[1]['count'] == 73 # 50 if ALL_TOUCHED=False + assert stats[0]["count"] == 95 # 75 if ALL_TOUCHED=False + assert stats[1]["count"] == 73 # 50 if ALL_TOUCHED=False def test_ndarray_without_affine(): with rasterio.open(raster) as src: - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" with pytest.raises(ValueError): zonal_stats(polygons, src.read(1)) # needs affine kwarg def _assert_dict_eq(a, b): - """Assert that dicts a and b similar within floating point precision - """ + """Assert that dicts a and b similar within floating point precision""" err = 1e-5 for k in set(a.keys()).union(set(b.keys())): if a[k] == b[k]: continue try: - if abs(a[k]-b[k]) > err: - raise AssertionError("{}: {} != {}".format(k, a[k], b[k])) + if abs(a[k] - b[k]) > err: + raise AssertionError(f"{k}: {a[k]} != {b[k]}") except TypeError: # can't take abs, nan - raise AssertionError("{} != {}".format(a[k], b[k])) + raise AssertionError(f"{a[k]} != {b[k]}") def test_ndarray(): with rasterio.open(raster) as src: arr = src.read(1) - affine = src.affine + affine = src.transform - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, arr, affine=affine) stats2 = zonal_stats(polygons, raster) for s1, s2 in zip(stats, stats2): _assert_dict_eq(s1, s2) with pytest.raises(AssertionError): _assert_dict_eq(stats[0], stats[1]) - assert stats[0]['count'] == 75 - assert stats[1]['count'] == 50 + assert stats[0]["count"] == 75 + assert stats[1]["count"] == 50 - points = os.path.join(DATA, 'points.shp') + points = data_dir / "points.shp" stats = zonal_stats(points, arr, affine=affine) assert stats == zonal_stats(points, raster) - assert sum([x['count'] for x in stats]) == 3 - assert round(stats[0]['mean'], 3) == 11.386 - assert round(stats[1]['mean'], 3) == 35.547 + assert sum([x["count"] for x in stats]) == 3 + assert round(stats[0]["mean"], 3) == 11.386 + assert round(stats[1]["mean"], 3) == 35.547 def test_alias(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, raster) - stats2 = raster_stats(polygons, raster) + with pytest.deprecated_call(): + stats2 = raster_stats(polygons, raster) assert stats == stats2 - pytest.deprecated_call(raster_stats, polygons, raster) def test_add_stats(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" def mymean(x): return np.ma.mean(x) - stats = zonal_stats(polygons, raster, add_stats={'mymean': mymean}) + stats = zonal_stats(polygons, raster, add_stats={"mymean": mymean}) for i in range(len(stats)): - assert stats[i]['mean'] == stats[i]['mymean'] + assert stats[i]["mean"] == stats[i]["mymean"] + + +def test_add_stats_prop(): + polygons = data_dir / "polygons.shp" + + def mymean_prop(x, prop): + return np.ma.mean(x) * prop["id"] + + stats = zonal_stats(polygons, raster, add_stats={"mymean_prop": mymean_prop}) + for i in range(len(stats)): + assert stats[i]["mymean_prop"] == stats[i]["mean"] * (i + 1) + + +def test_add_stats_prop_and_array(): + polygons = data_dir / "polygons.shp" + + def mymean_prop_and_array(x, prop, rv_array): + # confirm that the object exists and is accessible. + assert rv_array is not None + return np.ma.mean(x) * prop["id"] + + stats = zonal_stats( + polygons, raster, add_stats={"mymean_prop_and_array": mymean_prop_and_array} + ) + for i in range(len(stats)): + assert stats[i]["mymean_prop_and_array"] == stats[i]["mean"] * (i + 1) def test_mini_raster(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, raster, raster_out=True) - stats2 = zonal_stats(polygons, stats[0]['mini_raster_array'], - raster_out=True, affine=stats[0]['mini_raster_affine']) - assert (stats[0]['mini_raster_array'] == stats2[0]['mini_raster_array']).sum() == \ - stats[0]['count'] + stats2 = zonal_stats( + polygons, + stats[0]["mini_raster_array"], + raster_out=True, + affine=stats[0]["mini_raster_affine"], + ) + assert ( + stats[0]["mini_raster_array"] == stats2[0]["mini_raster_array"] + ).sum() == stats[0]["count"] def test_percentile_good(): - polygons = os.path.join(DATA, 'polygons.shp') - stats = zonal_stats(polygons, raster, - stats="median percentile_50 percentile_90") - assert 'percentile_50' in stats[0].keys() - assert 'percentile_90' in stats[0].keys() - assert stats[0]['percentile_50'] == stats[0]['median'] - assert stats[0]['percentile_50'] <= stats[0]['percentile_90'] + polygons = data_dir / "polygons.shp" + stats = zonal_stats(polygons, raster, stats="median percentile_50 percentile_90") + assert "percentile_50" in stats[0].keys() + assert "percentile_90" in stats[0].keys() + assert stats[0]["percentile_50"] == stats[0]["median"] + assert stats[0]["percentile_50"] <= stats[0]["percentile_90"] -def test_zone_func_good(): +def test_zone_func_has_return(): + def example_zone_func(zone_arr): + return np.ma.masked_array(np.full(zone_arr.shape, 1)) + + polygons = data_dir / "polygons.shp" + stats = zonal_stats(polygons, raster, zone_func=example_zone_func) + assert stats[0]["max"] == 1 + assert stats[0]["min"] == 1 + assert stats[0]["mean"] == 1 + +def test_zone_func_good(): def example_zone_func(zone_arr): zone_arr[:] = 0 - polygons = os.path.join(DATA, 'polygons.shp') - stats = zonal_stats(polygons, - raster, - zone_func=example_zone_func) - assert stats[0]['max'] == 0 - assert stats[0]['min'] == 0 - assert stats[0]['mean'] == 0 + polygons = data_dir / "polygons.shp" + stats = zonal_stats(polygons, raster, zone_func=example_zone_func) + assert stats[0]["max"] == 0 + assert stats[0]["min"] == 0 + assert stats[0]["mean"] == 0 + def test_zone_func_bad(): - not_a_func = 'jar jar binks' - polygons = os.path.join(DATA, 'polygons.shp') + not_a_func = "jar jar binks" + polygons = data_dir / "polygons.shp" with pytest.raises(TypeError): zonal_stats(polygons, raster, zone_func=not_a_func) + def test_percentile_nodata(): - polygons = os.path.join(DATA, 'polygons.shp') - categorical_raster = os.path.join(DATA, 'slope_classes.tif') + polygons = data_dir / "polygons.shp" + categorical_raster = data_dir / "slope_classes.tif" # By setting nodata to 1, one of our polygons is within the raster extent # but has an empty masked array - stats = zonal_stats(polygons, categorical_raster, - stats=["percentile_90"], nodata=1) - assert 'percentile_90' in stats[0].keys() - assert [None, 5.0] == [x['percentile_90'] for x in stats] + stats = zonal_stats(polygons, categorical_raster, stats=["percentile_90"], nodata=1) + assert "percentile_90" in stats[0].keys() + assert [None, 5.0] == [x["percentile_90"] for x in stats] def test_percentile_bad(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" with pytest.raises(ValueError): zonal_stats(polygons, raster, stats="percentile_101") def test_json_serializable(): - polygons = os.path.join(DATA, 'polygons.shp') - stats = zonal_stats(polygons, raster, - stats=VALID_STATS + ["percentile_90"], - categorical=True) + polygons = data_dir / "polygons.shp" + stats = zonal_stats( + polygons, raster, stats=VALID_STATS + ["percentile_90"], categorical=True + ) try: json.dumps(stats) simplejson.dumps(stats) @@ -357,7 +404,7 @@ def test_json_serializable(): def test_direct_features_collections(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" features = read_features(polygons) collection = read_featurecollection(polygons) @@ -369,169 +416,162 @@ def test_direct_features_collections(): def test_all_nodata(): - polygons = os.path.join(DATA, 'polygons.shp') - raster = os.path.join(DATA, 'all_nodata.tif') - stats = zonal_stats(polygons, raster, stats=['nodata', 'count']) - assert stats[0]['nodata'] == 75 - assert stats[0]['count'] == 0 - assert stats[1]['nodata'] == 50 - assert stats[1]['count'] == 0 + polygons = data_dir / "polygons.shp" + raster = data_dir / "all_nodata.tif" + stats = zonal_stats(polygons, raster, stats=["nodata", "count"]) + assert stats[0]["nodata"] == 75 + assert stats[0]["count"] == 0 + assert stats[1]["nodata"] == 50 + assert stats[1]["count"] == 0 + def test_some_nodata(): - polygons = os.path.join(DATA, 'polygons.shp') - raster = os.path.join(DATA, 'slope_nodata.tif') - stats = zonal_stats(polygons, raster, stats=['nodata', 'count']) - assert stats[0]['nodata'] == 36 - assert stats[0]['count'] == 39 - assert stats[1]['nodata'] == 19 - assert stats[1]['count'] == 31 + polygons = data_dir / "polygons.shp" + raster = data_dir / "slope_nodata.tif" + stats = zonal_stats(polygons, raster, stats=["nodata", "count"]) + assert stats[0]["nodata"] == 36 + assert stats[0]["count"] == 39 + assert stats[1]["nodata"] == 19 + assert stats[1]["count"] == 31 # update this if nan end up being incorporated into nodata def test_nan_nodata(): polygon = Polygon([[0, 0], [2, 0], [2, 2], [0, 2]]) - arr = np.array([ - [np.nan, 12.25], - [-999, 12.75] - ]) - affine = Affine(1, 0, 0, - 0, -1, 2) + arr = np.array([[np.nan, 12.25], [-999, 12.75]]) + affine = Affine(1, 0, 0, 0, -1, 2) - stats = zonal_stats(polygon, arr, affine=affine, nodata=-999, - stats='nodata count sum mean min max') + stats = zonal_stats( + polygon, arr, affine=affine, nodata=-999, stats="nodata count sum mean min max" + ) - assert stats[0]['nodata'] == 1 - assert stats[0]['count'] == 2 - assert stats[0]['mean'] == 12.5 - assert stats[0]['min'] == 12.25 - assert stats[0]['max'] == 12.75 + assert stats[0]["nodata"] == 1 + assert stats[0]["count"] == 2 + assert stats[0]["mean"] == 12.5 + assert stats[0]["min"] == 12.25 + assert stats[0]["max"] == 12.75 def test_some_nodata_ndarray(): - polygons = os.path.join(DATA, 'polygons.shp') - raster = os.path.join(DATA, 'slope_nodata.tif') + polygons = data_dir / "polygons.shp" + raster = data_dir / "slope_nodata.tif" with rasterio.open(raster) as src: arr = src.read(1) - affine = src.affine + affine = src.transform # without nodata - stats = zonal_stats(polygons, arr, affine=affine, stats=['nodata', 'count', 'min']) - assert stats[0]['min'] == -9999.0 - assert stats[0]['nodata'] == 0 - assert stats[0]['count'] == 75 + stats = zonal_stats(polygons, arr, affine=affine, stats=["nodata", "count", "min"]) + assert stats[0]["min"] == -9999.0 + assert stats[0]["nodata"] == 0 + assert stats[0]["count"] == 75 # with nodata - stats = zonal_stats(polygons, arr, affine=affine, - nodata=-9999.0, stats=['nodata', 'count', 'min']) - assert stats[0]['min'] >= 0.0 - assert stats[0]['nodata'] == 36 - assert stats[0]['count'] == 39 + stats = zonal_stats( + polygons, arr, affine=affine, nodata=-9999.0, stats=["nodata", "count", "min"] + ) + assert stats[0]["min"] >= 0.0 + assert stats[0]["nodata"] == 36 + assert stats[0]["count"] == 39 def test_transform(): with rasterio.open(raster) as src: arr = src.read(1) - affine = src.affine - polygons = os.path.join(DATA, 'polygons.shp') + affine = src.transform + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, arr, affine=affine) - stats2 = zonal_stats(polygons, arr, transform=affine.to_gdal()) + with pytest.deprecated_call(): + stats2 = zonal_stats(polygons, arr, transform=affine.to_gdal()) assert stats == stats2 - pytest.deprecated_call(zonal_stats, polygons, raster, transform=affine.to_gdal()) def test_prefix(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" stats = zonal_stats(polygons, raster, prefix="TEST") - for key in ['count', 'min', 'max', 'mean']: + for key in ["count", "min", "max", "mean"]: assert key not in stats[0] - for key in ['TESTcount', 'TESTmin', 'TESTmax', 'TESTmean']: + for key in ["TESTcount", "TESTmin", "TESTmax", "TESTmean"]: assert key in stats[0] def test_geojson_out(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" features = zonal_stats(polygons, raster, geojson_out=True) for feature in features: - assert feature['type'] == 'Feature' - assert 'id' in feature['properties'] # from orig - assert 'count' in feature['properties'] # from zonal stats - + assert feature["type"] == "Feature" + assert "id" in feature["properties"] # from orig + assert "count" in feature["properties"] # from zonal stats # do not think this is actually testing the line i wanted it to # since the read_features func for this data type is generating # the properties field def test_geojson_out_with_no_properties(): - polygon = Polygon([[0, 0], [0, 0,5], [1, 1.5], [1.5, 2], [2, 2], [2, 0]]) - arr = np.array([ - [100, 1], - [100, 1] - ]) - affine = Affine(1, 0, 0, - 0, -1, 2) + polygon = Polygon([[0, 0], [0, 0.5], [1, 1.5], [1.5, 2], [2, 2], [2, 0]]) + arr = np.array([[100, 1], [100, 1]]) + affine = Affine(1, 0, 0, 0, -1, 2) stats = zonal_stats(polygon, arr, affine=affine, geojson_out=True) - assert 'properties' in stats[0] - for key in ['count', 'min', 'max', 'mean']: - assert key in stats[0]['properties'] + assert "properties" in stats[0] + for key in ["count", "min", "max", "mean"]: + assert key in stats[0]["properties"] - assert stats[0]['properties']['mean'] == 34 + assert stats[0]["properties"]["mean"] == 34 # remove when copy_properties alias is removed def test_copy_properties_warn(): - polygons = os.path.join(DATA, 'polygons.shp') + polygons = data_dir / "polygons.shp" # run once to trigger any other unrelated deprecation warnings # so the test does not catch them instead stats_a = zonal_stats(polygons, raster) with pytest.deprecated_call(): stats_b = zonal_stats(polygons, raster, copy_properties=True) assert stats_a == stats_b - + def test_nan_counts(): from affine import Affine + transform = Affine(1, 0, 1, 0, -1, 3) - data = np.array([ - [np.nan, np.nan, np.nan], - [0, 0, 0], - [1, 4, 5] - ]) + data = np.array([[np.nan, np.nan, np.nan], [0, 0, 0], [1, 4, 5]]) # geom extends an additional row to left - geom = 'POLYGON ((1 0, 4 0, 4 3, 1 3, 1 0))' + geom = "POLYGON ((1 0, 4 0, 4 3, 1 3, 1 0))" # nan stat is requested stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="*") for res in stats: - assert res['count'] == 3 # 3 pixels of valid data - assert res['nodata'] == 3 # 3 pixels of nodata - assert res['nan'] == 3 # 3 pixels of nans + assert res["count"] == 3 # 3 pixels of valid data + assert res["nodata"] == 3 # 3 pixels of nodata + assert res["nan"] == 3 # 3 pixels of nans # nan are ignored if nan stat is not requested stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="count nodata") for res in stats: - assert res['count'] == 3 # 3 pixels of valid data - assert res['nodata'] == 3 # 3 pixels of nodata - assert 'nan' not in res + assert res["count"] == 3 # 3 pixels of valid data + assert res["nodata"] == 3 # 3 pixels of nodata + assert "nan" not in res # Optional tests def test_geodataframe_zonal(): - polygons = os.path.join(DATA, 'polygons.shp') + gpd = pytest.importorskip("geopandas") - try: - import geopandas as gpd - df = gpd.read_file(polygons) - if not hasattr(df, '__geo_interface__'): - pytest.skip("This version of geopandas doesn't support df.__geo_interface__") - except ImportError: - pytest.skip("Can't import geopands") + polygons = data_dir / "polygons.shp" + df = gpd.read_file(polygons) + if not hasattr(df, "__geo_interface__"): + pytest.skip("This version of geopandas doesn't support df.__geo_interface__") expected = zonal_stats(polygons, raster) assert zonal_stats(df, raster) == expected + +# TODO # gen_zonal_stats() +# TODO # gen_zonal_stats(stats=nodata) +# TODO # gen_zonal_stats() +# TODO # gen_zonal_stats(transform AND affine>)