diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 5c9eb68..48e19b8 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -10,7 +10,9 @@ from .io import read_features, Raster from .utils import (rasterize_geom, get_percentile, check_stats, - remap_categories, key_assoc_val, boxify_points) + remap_categories, key_assoc_val, boxify_points, + rasterize_pctcover_geom, + rs_mean, rs_count, rs_sum) def raster_stats(*args, **kwargs): @@ -39,6 +41,9 @@ def gen_zonal_stats( affine=None, stats=None, all_touched=False, + percent_cover_selection=None, + percent_cover_weighting=False, + percent_cover_scale=None, categorical=False, category_map=None, add_stats=None, @@ -83,6 +88,29 @@ def gen_zonal_stats( those having a center point within the polygon. defaults to `False` + percent_cover_selection: float, optional + Include only raster cells that have at least the given percent + covered by the vector feature. Requires percent_cover_scale argument + be used to specify scale at which to generate percent coverage + estimates + + percent_cover_weighting: bool, optional + whether or not to use percent coverage of cells during calculations + to adjust stats (only applies to mean, count and sum) + + percent_cover_scale: int, optional + Scale used when generating percent coverage estimates of each + raster cell by vector feature. Percent coverage is generated by + rasterizing the feature at a finer resolution than the raster + (based on percent_cover_scale value) then using a summation to aggregate + to the raster resolution and dividing by the square of percent_cover_scale + to get percent coverage value for each cell. Increasing percent_cover_scale + will increase the accuracy of percent coverage values; three orders + magnitude finer resolution (percent_cover_scale=1000) is usually enough to + get coverage estimates with <1% error in individual edge cells coverage + estimates, though much smaller values (e.g., percent_cover_scale=10) are often + sufficient (<10% error) and require less memory. + categorical: bool, optional category_map: dict @@ -142,6 +170,49 @@ def gen_zonal_stats( warnings.warn("Use `band` to specify band number", DeprecationWarning) band = band_num + # check inputs related to percent coverage + percent_cover = False + if percent_cover_weighting or percent_cover_selection is not None: + percent_cover = True + if percent_cover_scale is None: + warnings.warn('No value for `percent_cover_scale` was given. ' + 'Using default value of 10.') + percent_cover_scale = 10 + + try: + if percent_cover_scale != int(percent_cover_scale): + warnings.warn('Value for `percent_cover_scale` given ({0}) ' + 'was converted to int ({1}) but does not ' + 'match original value'.format( + percent_cover_scale, int(percent_cover_scale))) + + percent_cover_scale = int(percent_cover_scale) + + if percent_cover_scale <= 1: + raise Exception('Value for `percent_cover_scale` must be ' + 'greater than one ({0})'.format( + percent_cover_scale)) + + except: + raise Exception('Invalid value for `percent_cover_scale` ' + 'provided ({0}). Must be type int.'.format( + percent_cover_scale)) + + if percent_cover_selection is not None: + try: + percent_cover_selection = float(percent_cover_selection) + except: + raise Exception('Invalid value for `percent_cover_selection` ' + 'provided ({0}). Must be able to be converted ' + 'to a float.'.format(percent_cover_selection)) + + # if not all_touched: + # warnings.warn('`all_touched` was not enabled but an option requiring ' + # 'percent_cover calculations was selected. Automatically ' + # 'enabling `all_touched`.') + # all_touched = True + + with Raster(raster, affine, nodata, band) as rast: features_iter = read_features(vectors, layer) for _, feat in enumerate(features_iter): @@ -149,13 +220,24 @@ def gen_zonal_stats( if 'Point' in geom.type: geom = boxify_points(geom, rast) + percent_cover = False geom_bounds = tuple(geom.bounds) fsrc = rast.read(bounds=geom_bounds) # rasterized geometry - rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched) + if percent_cover: + cover_weights = rasterize_pctcover_geom( + geom, like=fsrc, + scale=percent_cover_scale, + all_touched=all_touched) + rv_array = cover_weights > (percent_cover_selection or 0) + else: + cover_weights = None + rv_array = rasterize_geom( + geom, like=fsrc, + all_touched=all_touched) # nodata mask isnodata = (fsrc.array == fsrc.nodata) @@ -169,10 +251,12 @@ def gen_zonal_stats( # Mask the source data array # mask everything that is not a valid value or not within our geom + masked = np.ma.MaskedArray( fsrc.array, mask=(isnodata | ~rv_array)) + # If we're on 64 bit platform and the array is an integer type # make sure we cast to 64 bit to avoid overflow. # workaround for https://github.com/numpy/numpy/issues/8433 @@ -212,12 +296,12 @@ def gen_zonal_stats( if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: - feature_stats['mean'] = float(masked.mean()) + feature_stats['mean'] = rs_mean(masked, cover_weights) if 'count' in stats: - feature_stats['count'] = int(masked.count()) + feature_stats['count'] = rs_count(masked, cover_weights) # optional if 'sum' in stats: - feature_stats['sum'] = float(masked.sum()) + feature_stats['sum'] = rs_sum(masked, cover_weights) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index cf57b94..07e1bcd 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -2,7 +2,10 @@ from __future__ import absolute_import from __future__ import division import sys +import numpy as np from rasterio import features +from affine import Affine +from numpy import min_scalar_type from shapely.geometry import box, MultiPolygon from .io import window_bounds @@ -45,10 +48,54 @@ def rasterize_geom(geom, like, all_touched=False): fill=0, dtype='uint8', all_touched=all_touched) - return rv_array.astype(bool) +# https://stackoverflow.com/questions/8090229/ +# resize-with-averaging-or-rebin-a-numpy-2d-array/8090605#8090605 +def rebin_sum(a, shape, dtype): + sh = shape[0],a.shape[0]//shape[0],shape[1],a.shape[1]//shape[1] + return a.reshape(sh).sum(-1, dtype=dtype).sum(1, dtype=dtype) + + +class objectview(object): + def __init__(self, d): + self.__dict__ = d + +def rasterize_pctcover_geom(geom, like, scale=None, all_touched=False): + """ + Parameters + ---------- + geom: GeoJSON geometry + like: raster object with desired shape and transform + scale: scale at which to generate percent cover estimate + + Returns + ------- + ndarray: float32 + """ + scale = scale if scale is not None else 10 + min_dtype = min_scalar_type(scale**2) + + pixel_size_lon = like.affine[0]/scale + pixel_size_lat = like.affine[4]/scale + + topleftlon = like.affine[2] + topleftlat = like.affine[5] + + new_affine = Affine(pixel_size_lon, 0, topleftlon, + 0, pixel_size_lat, topleftlat) + + new_shape = (like.shape[0]*scale, like.shape[1]*scale) + + new_like = objectview({'shape': new_shape, 'affine': new_affine}) + + rv_array = rasterize_geom(geom, new_like, all_touched=all_touched) + rv_array = rebin_sum(rv_array, like.shape, min_dtype) + + return rv_array.astype('float32') / (scale**2) + + def stats_to_csv(stats): if sys.version_info[0] >= 3: from io import StringIO as IO # pragma: no cover @@ -146,3 +193,30 @@ def boxify_points(geom, rast): geoms.append(box(*window_bounds(win, rast.affine)).buffer(buff)) return MultiPolygon(geoms) + + + +def rs_mean(masked, cover_weights=None): + if cover_weights is not None: + val = float( + np.sum(masked * cover_weights) / + np.sum(~masked.mask * cover_weights)) + else: + val = float(masked.mean()) + return val + + +def rs_count(masked, cover_weights=None): + if cover_weights is not None: + val = float(np.sum(~masked.mask * cover_weights)) + else: + val = int(masked.count()) + return val + + +def rs_sum(masked, cover_weights=None): + if cover_weights is not None: + val = float(np.sum(masked * cover_weights)) + else: + val = float(masked.sum()) + return val diff --git a/tests/test_cli.py b/tests/test_cli.py index b6834aa..3c6c6e2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -26,6 +26,25 @@ def test_cli_feature(): assert 'test_count' not in feature['properties'] +def test_cli_feature_info(): + raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') + vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson') + runner = CliRunner() + warnings.simplefilter('ignore') + result = runner.invoke(zonalstats, [vector, + '--raster', raster, + '--stats', 'mean', + '--prefix', 'test_', + '--info']) + assert result.exit_code == 0 + outdata = json.loads(result.output) + assert len(outdata['features']) == 1 + feature = outdata['features'][0] + assert 'test_mean' in feature['properties'] + assert round(feature['properties']['test_mean'], 2) == 14.66 + assert 'test_count' not in feature['properties'] + + def test_cli_feature_stdin(): raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson') diff --git a/tests/test_point.py b/tests/test_point.py index 0ea48ab..725fa00 100644 --- a/tests/test_point.py +++ b/tests/test_point.py @@ -1,4 +1,5 @@ import os +import pytest import rasterio from rasterstats.point import point_window_unitxy, bilinear, geom_xys from rasterstats import point_query @@ -86,6 +87,12 @@ def test_point_query(): assert round(val) == 74 +def test_point_query_invalid_interp(): + point = "POINT(245309 1000064)" + with pytest.raises(ValueError): + point_query(point, raster, interpolate="invalid_type") + + def test_point_query_geojson(): point = "POINT(245309 1000064)" features = point_query(point, raster, property_name="TEST", geojson_out=True) diff --git a/tests/test_utils.py b/tests/test_utils.py index a6edc06..d205ca6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,9 +1,12 @@ import sys import os import pytest -from shapely.geometry import LineString +import numpy as np +from affine import Affine +from shapely.geometry import LineString, Polygon from rasterstats.utils import \ - stats_to_csv, get_percentile, remap_categories, boxify_points + stats_to_csv, get_percentile, remap_categories, boxify_points, \ + rebin_sum, rasterize_pctcover_geom from rasterstats import zonal_stats from rasterstats.utils import VALID_STATS @@ -63,3 +66,53 @@ def test_boxify_non_point(): line = LineString([(0, 0), (1, 1)]) with pytest.raises(ValueError): boxify_points(line, None) + + +def test_rebin_sum(): + test_input = np.array( + [ + [1, 1, 2, 2], + [1, 1, 2, 2], + [3, 3, 4, 4], + [3, 3, 4, 4] + ]) + test_output = rebin_sum(test_input, (2,2), np.int32) + correct_output = np.array([[4, 8],[12, 16]]) + assert np.array_equal(test_output, correct_output) + + +def test_rasterize_pctcover_geom(): + # https://goodcode.io/articles/python-dict-object/ + class objectview(object): + def __init__(self, d): + self.__dict__ = d + + polygon_a = Polygon([[0, 0], [2, 0], [2, 2], [0, 2]]) + shape_a = (2, 2) + affine_a = Affine(1, 0, 0, + 0, -1, 2) + like_a = objectview({'shape': shape_a, 'affine': affine_a}) + + pct_cover_a = rasterize_pctcover_geom(polygon_a, like_a, scale=10, all_touched=False) + correct_output_a = np.array([[1, 1], [1, 1]]) + assert np.array_equal(pct_cover_a, correct_output_a) + + polygon_b = Polygon([[0.5, 0.5], [1.5, 0.5], [1.5, 1.5], [0.5, 1.5]]) + shape_b = (2, 2) + affine_b = Affine(1, 0, 0, + 0, -1, 2) + like_b = objectview({'shape': shape_b, 'affine': affine_b}) + + pct_cover_b = rasterize_pctcover_geom(polygon_b, like_b, scale=10, all_touched=False) + correct_output_b = np.array([[0.25, 0.25], [0.25, 0.25]]) + assert np.array_equal(pct_cover_b, correct_output_b) + + polygon_c = Polygon([[0.5, 0.5], [1.5, 0.5], [1.5, 1.5], [0.5, 1.5]]) + shape_c = (2, 2) + affine_c = Affine(1, 0, 0, + 0, -1, 2) + like_c = objectview({'shape': shape_c, 'affine': affine_c}) + + pct_cover_c = rasterize_pctcover_geom(polygon_c, like_c, scale=100, all_touched=False) + correct_output_c = np.array([[0.25, 0.25], [0.25, 0.25]]) + assert np.array_equal(pct_cover_c, correct_output_c) diff --git a/tests/test_zonal.py b/tests/test_zonal.py index b5c3014..fd33d8c 100644 --- a/tests/test_zonal.py +++ b/tests/test_zonal.py @@ -519,6 +519,44 @@ def test_nan_counts(): assert 'nan' not in res +def test_percent_cover_zonal_stats(): + polygon = Polygon([[0, 0], [0, 0,5], [1, 1.5], [1.5, 2], [2, 2], [2, 0]]) + arr = np.array([ + [100, 1], + [100, 1] + ]) + affine = Affine(1, 0, 0, + 0, -1, 2) + + stats_options = 'min max mean count sum nodata' + + # run base case + stats_a = zonal_stats(polygon, arr, affine=affine, stats=stats_options) + assert stats_a[0]['mean'] == 34 + + # test selection + stats_b = zonal_stats(polygon, arr, affine=affine, percent_cover_selection=0.75, percent_cover_scale=10, stats=stats_options) + assert stats_b[0]['mean'] == 1 + + # test weighting + stats_c = zonal_stats(polygon, arr, affine=affine, percent_cover_weighting=True, percent_cover_scale=10, stats=stats_options) + assert round(stats_c[0]['count'], 2) == 2.6 + assert round(stats_c[0]['mean'], 2) == 29.56 + assert round(stats_c[0]['sum'], 2) == 76.85 + + # check that percent_cover_scale is set to 10 when not provided by user + stats_d = zonal_stats(polygon, arr, affine=affine, percent_cover_weighting=True, stats=stats_options) + assert round(stats_d[0]['mean'], 2) == round(stats_c[0]['mean'], 2) + + # check invalid percent_cover_scale value + with pytest.raises(Exception): + zonal_stats(polygon, arr, affine=affine, percent_cover_selection=0.75, percent_cover_scale=0.5) + + # check invalid percent_cover_selection value + with pytest.raises(Exception): + zonal_stats(polygon, arr, affine=affine, percent_cover_selection='one', percent_cover_scale=10) + + # Optional tests def test_geodataframe_zonal(): polygons = os.path.join(DATA, 'polygons.shp')