From 34dff525f2e70ed7f5a3c02ac090cbbd614cf7ef Mon Sep 17 00:00:00 2001 From: sgoodm Date: Mon, 21 Nov 2016 13:17:11 -0500 Subject: [PATCH 01/19] Add cell percent coverage selection method and stats weighting (with some test coverage). --- src/rasterstats/main.py | 111 +++++++++++++++++++++++++++++++++++---- src/rasterstats/utils.py | 51 ++++++++++++++++-- tests/test_utils.py | 18 +++++++ 3 files changed, 167 insertions(+), 13 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 8b2b450..dab8ec2 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -7,7 +7,8 @@ from shapely.geometry import shape from .io import read_features, Raster from .utils import (rasterize_geom, get_percentile, check_stats, - remap_categories, key_assoc_val, boxify_points) + remap_categories, key_assoc_val, boxify_points, + rasterize_pctcover_geom) def raster_stats(*args, **kwargs): @@ -36,6 +37,9 @@ def gen_zonal_stats( affine=None, stats=None, all_touched=False, + percent_cover_selection=None, + percent_cover_weighting=False, + percent_cover_scale=None, categorical=False, category_map=None, add_stats=None, @@ -80,6 +84,29 @@ def gen_zonal_stats( those having a center point within the polygon. defaults to `False` + percent_cover_selection: float, optional + Include only raster cells that have at least the given percent + covered by the vector feature. Requires percent_cover_scale argument + be used to specify scale at which to generate percent coverage + estimates + + percent_cover_weighting: bool, optional + whether or not to use percent coverage of cells during calculations + to adjust stats (only applies to mean, count and sum) + + percent_cover_scale: int, optional + Scale used when generating percent coverage estimates of each + raster cell by vector feature. Percent coverage is generated by + rasterizing the feature at a finer resolution than the raster + (based on percent_cover_scale value) then using a summation to aggregate + to the raster resolution and dividing by the square of percent_cover_scale + to get percent coverage value for each cell. Increasing percent_cover_scale + will increase the accuracy of percent coverage values; three orders + magnitude finer resolution (percent_cover_scale=1000) is usually enough to + get coverage estimates with <1% error in individual edge cells coverage + estimates, though much smaller values (e.g., percent_cover_scale=10) are often + sufficient (<10% error) and require less memory. + categorical: bool, optional category_map: dict @@ -139,6 +166,49 @@ def gen_zonal_stats( warnings.warn("Use `band` to specify band number", DeprecationWarning) band = band_num + # check inputs related to percent coverage + percent_cover = False + if percent_cover_weighting or percent_cover_selection is not None: + percent_cover = True + if percent_cover_scale is None: + warnings.warn('No value for `percent_cover_scale` was given. ' + 'Using default value of 10.') + percent_cover_scale = 10 + + try: + if percent_cover_scale != int(percent_cover_scale): + warnings.warn('Value for `percent_cover_scale` given ({0}) ' + 'was converted to int ({1}) but does not ' + 'match original value'.format( + percent_cover_scale, int(percent_cover_scale))) + + percent_cover_scale = int(percent_cover_scale) + + if percent_cover_scale <= 1: + raise Exception('Value for `percent_cover_scale` must be ' + 'greater than one ({0})'.format( + percent_cover_scale)) + + except: + raise Exception('Invalid value for `percent_cover_scale` ' + 'provided ({0}). Must be type int.'.format( + percent_cover_scale)) + + if percent_cover_selection is not None: + try: + percent_cover_selection = float(percent_cover_selection) + except: + raise Exception('Invalid value for `percent_cover_selection` ' + 'provided ({0}). Must be able to be converted ' + 'to a float.'.format(percent_cover_selection)) + + if not all_touched: + warnings.warn('`all_touched` was not enabled but an option requiring ' + 'percent_cover calculations was selected. Automatically ' + 'enabling `all_touched`.') + all_touched = True + + with Raster(raster, affine, nodata, band) as rast: features_iter = read_features(vectors, layer) for _, feat in enumerate(features_iter): @@ -146,13 +216,21 @@ def gen_zonal_stats( if 'Point' in geom.type: geom = boxify_points(geom, rast) + percent_cover = False geom_bounds = tuple(geom.bounds) fsrc = rast.read(bounds=geom_bounds) # rasterized geometry - rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched) + if percent_cover: + rv_array = rasterize_pctcover_geom( + geom, shape=fsrc.shape, affine=fsrc.affine, + scale=percent_cover_scale) + else: + rv_array = rasterize_geom( + geom, shape=fsrc.shape, affine=fsrc.affine, + all_touched=all_touched) # nodata mask isnodata = (fsrc.array == fsrc.nodata) @@ -164,9 +242,14 @@ def gen_zonal_stats( # Mask the source data array # mask everything that is not a valid value or not within our geom - masked = np.ma.MaskedArray( - fsrc.array, - mask=(isnodata | ~rv_array)) + if percent_cover_selection is not None: + masked = np.ma.MaskedArray( + fsrc.array, + mask=(isnodata | ~rv_array | percent_cover > percent_cover_selection)) + else: + masked = np.ma.MaskedArray( + fsrc.array, + mask=(isnodata | ~rv_array)) # execute zone_func on masked zone ndarray if zone_func is not None: @@ -187,7 +270,6 @@ def gen_zonal_stats( pixel_count = dict(zip([np.asscalar(k) for k in keys], [np.asscalar(c) for c in counts])) - if categorical: feature_stats = dict(pixel_count) if category_map: @@ -200,12 +282,23 @@ def gen_zonal_stats( if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: - feature_stats['mean'] = float(masked.mean()) + if percent_cover_weighting: + feature_stats['mean'] = float( + np.sum(masked * rv_array) / + np.sum(~masked.mask * rv_array)) + else: + feature_stats['mean'] = float(masked.mean()) if 'count' in stats: - feature_stats['count'] = int(masked.count()) + if percent_cover_weighting: + feature_stats['count'] = float(np.sum(~masked.mask * rv_array)) + else: + feature_stats['count'] = int(masked.count()) # optional if 'sum' in stats: - feature_stats['sum'] = float(masked.sum()) + if percent_cover_weighting: + feature_stats['sum'] = float(np.sum(masked * rv_array)) + else: + feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index 9987789..f8664a1 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -3,6 +3,8 @@ from __future__ import division import sys from rasterio import features +from affine import Affine +from numpy import min_scalar_type from shapely.geometry import box, MultiPolygon from .io import window_bounds @@ -25,12 +27,13 @@ def get_percentile(stat): return q -def rasterize_geom(geom, like, all_touched=False): +def rasterize_geom(geom, shape, affine, all_touched=False): """ Parameters ---------- geom: GeoJSON geometry - like: raster object with desired shape and transform + shape: desired shape + affine: desired transform all_touched: rasterization strategy Returns @@ -40,8 +43,8 @@ def rasterize_geom(geom, like, all_touched=False): geoms = [(geom, 1)] rv_array = features.rasterize( geoms, - out_shape=like.shape, - transform=like.affine, + out_shape=shape, + transform=affine, fill=0, dtype='uint8', all_touched=all_touched) @@ -49,6 +52,46 @@ def rasterize_geom(geom, like, all_touched=False): return rv_array.astype(bool) +# https://stackoverflow.com/questions/8090229/ +# resize-with-averaging-or-rebin-a-numpy-2d-array/8090605#8090605 +def rebin_sum(a, shape, dtype): + sh = shape[0],a.shape[0]//shape[0],shape[1],a.shape[1]//shape[1] + return a.reshape(sh).sum(-1, dtype=dtype).sum(1, dtype=dtype) + + +def rasterize_pctcover_geom(geom, shape, affine, scale=None): + """ + Parameters + ---------- + geom: GeoJSON geometry + shape: desired shape + affine: desired transform + scale: scale at which to generate percent cover estimate + + Returns + ------- + ndarray: float32 + """ + if scale is None: + scale = 10 + + min_dtype = min_scalar_type(scale**2) + + pixel_size = affine[0]/scale + topleftlon = affine[2] + topleftlat = affine[5] + + new_affine = Affine(pixel_size, 0, topleftlon, + 0, -pixel_size, topleftlat) + + new_shape = (shape[0]*scale, shape[1]*scale) + + rv_array = rasterize_geom(geom, new_shape, new_affine, True) + rv_array = rebin_sum(rv_array, shape, min_dtype) + + return rv_array.astype('float32') / (scale**2) + + def stats_to_csv(stats): if sys.version_info[0] >= 3: from io import StringIO as IO # pragma: no cover diff --git a/tests/test_utils.py b/tests/test_utils.py index a6edc06..ebfc057 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,7 @@ import sys import os import pytest +import numpy as np from shapely.geometry import LineString from rasterstats.utils import \ stats_to_csv, get_percentile, remap_categories, boxify_points @@ -63,3 +64,20 @@ def test_boxify_non_point(): line = LineString([(0, 0), (1, 1)]) with pytest.raises(ValueError): boxify_points(line, None) + + +def test_rebin_sum(): + + test_input = np.array( + [ + [1, 1, 2, 2], + [1, 1, 2, 2], + [3, 3, 4, 4], + [3, 3, 4, 4] + ]) + + test_output = rebin_sum(test_input, (2,2), np.int32) + + correct_output = np.array([[4, 8],[12, 16]]) + + assert np.array_equal(test_output, correct_output) From ba627d793de2a57f81b759302bfcc9ada50f0e63 Mon Sep 17 00:00:00 2001 From: sgoodm Date: Mon, 21 Nov 2016 13:41:44 -0500 Subject: [PATCH 02/19] Fix tests. --- tests/test_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index ebfc057..24b20ed 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,7 +4,8 @@ import numpy as np from shapely.geometry import LineString from rasterstats.utils import \ - stats_to_csv, get_percentile, remap_categories, boxify_points + stats_to_csv, get_percentile, remap_categories, boxify_points, \ + rebin_sum from rasterstats import zonal_stats from rasterstats.utils import VALID_STATS From 6725da1834cee629a617c7a50c0faae935a25c24 Mon Sep 17 00:00:00 2001 From: sgoodm Date: Wed, 11 Jan 2017 11:04:59 -0500 Subject: [PATCH 03/19] Modify all_touched for percent cover to rely on user input --- src/rasterstats/main.py | 13 +++++++------ src/rasterstats/utils.py | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index dab8ec2..7185100 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -202,11 +202,11 @@ def gen_zonal_stats( 'provided ({0}). Must be able to be converted ' 'to a float.'.format(percent_cover_selection)) - if not all_touched: - warnings.warn('`all_touched` was not enabled but an option requiring ' - 'percent_cover calculations was selected. Automatically ' - 'enabling `all_touched`.') - all_touched = True + # if not all_touched: + # warnings.warn('`all_touched` was not enabled but an option requiring ' + # 'percent_cover calculations was selected. Automatically ' + # 'enabling `all_touched`.') + # all_touched = True with Raster(raster, affine, nodata, band) as rast: @@ -226,7 +226,8 @@ def gen_zonal_stats( if percent_cover: rv_array = rasterize_pctcover_geom( geom, shape=fsrc.shape, affine=fsrc.affine, - scale=percent_cover_scale) + scale=percent_cover_scale, + all_touched=all_touched) else: rv_array = rasterize_geom( geom, shape=fsrc.shape, affine=fsrc.affine, diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index f8664a1..49372a5 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -59,7 +59,7 @@ def rebin_sum(a, shape, dtype): return a.reshape(sh).sum(-1, dtype=dtype).sum(1, dtype=dtype) -def rasterize_pctcover_geom(geom, shape, affine, scale=None): +def rasterize_pctcover_geom(geom, shape, affine, scale=None, all_touched=False): """ Parameters ---------- @@ -86,7 +86,7 @@ def rasterize_pctcover_geom(geom, shape, affine, scale=None): new_shape = (shape[0]*scale, shape[1]*scale) - rv_array = rasterize_geom(geom, new_shape, new_affine, True) + rv_array = rasterize_geom(geom, new_shape, new_affine, all_touched=all_touched) rv_array = rebin_sum(rv_array, shape, min_dtype) return rv_array.astype('float32') / (scale**2) From 4ffe2ad2868cb3fd5a05ed7687a0b0366e1a532c Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Fri, 3 Mar 2017 14:10:39 +0100 Subject: [PATCH 04/19] Clearly distinguish between raster mask and weight arrays --- src/rasterstats/main.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 7185100..eea4d6b 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -224,10 +224,11 @@ def gen_zonal_stats( # rasterized geometry if percent_cover: - rv_array = rasterize_pctcover_geom( + cover_weights = rasterize_pctcover_geom( geom, shape=fsrc.shape, affine=fsrc.affine, scale=percent_cover_scale, all_touched=all_touched) + rv_array = cover_weights > (percent_cover_selection or 0) else: rv_array = rasterize_geom( geom, shape=fsrc.shape, affine=fsrc.affine, @@ -243,14 +244,9 @@ def gen_zonal_stats( # Mask the source data array # mask everything that is not a valid value or not within our geom - if percent_cover_selection is not None: - masked = np.ma.MaskedArray( - fsrc.array, - mask=(isnodata | ~rv_array | percent_cover > percent_cover_selection)) - else: - masked = np.ma.MaskedArray( - fsrc.array, - mask=(isnodata | ~rv_array)) + masked = np.ma.MaskedArray( + fsrc.array, + mask=(isnodata | ~rv_array)) # execute zone_func on masked zone ndarray if zone_func is not None: @@ -285,19 +281,19 @@ def gen_zonal_stats( if 'mean' in stats: if percent_cover_weighting: feature_stats['mean'] = float( - np.sum(masked * rv_array) / - np.sum(~masked.mask * rv_array)) + np.sum(masked * cover_weights) / + np.sum(~masked.mask * cover_weights)) else: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: if percent_cover_weighting: - feature_stats['count'] = float(np.sum(~masked.mask * rv_array)) + feature_stats['count'] = float(np.sum(~masked.mask * cover_weights)) else: feature_stats['count'] = int(masked.count()) # optional if 'sum' in stats: if percent_cover_weighting: - feature_stats['sum'] = float(np.sum(masked * rv_array)) + feature_stats['sum'] = float(np.sum(masked * cover_weights)) else: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: From c0d9bc36902aab0a32594b8e5bb3ee8844d9c9fa Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Fri, 10 Mar 2017 21:30:07 +0100 Subject: [PATCH 05/19] Pixels aren't always square --- src/rasterstats/utils.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index 49372a5..b0f1d95 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -77,14 +77,13 @@ def rasterize_pctcover_geom(geom, shape, affine, scale=None, all_touched=False): min_dtype = min_scalar_type(scale**2) - pixel_size = affine[0]/scale + pixel_size_lon = affine[0]/scale + pixel_size_lat = affine[4]/scale topleftlon = affine[2] topleftlat = affine[5] - new_affine = Affine(pixel_size, 0, topleftlon, - 0, -pixel_size, topleftlat) - - new_shape = (shape[0]*scale, shape[1]*scale) + new_affine = Affine(pixel_size_lon, 0, topleftlon, + 0, pixel_size_lat, topleftlat) rv_array = rasterize_geom(geom, new_shape, new_affine, all_touched=all_touched) rv_array = rebin_sum(rv_array, shape, min_dtype) From a03eb04c1ddb1d69050f14de6073d59569747a01 Mon Sep 17 00:00:00 2001 From: sgoodm Date: Tue, 21 Mar 2017 11:40:13 -0400 Subject: [PATCH 06/19] Rename rasterize array for pct cover case, fix masked array mask definitions Create separate var name for pct cover rasterized array (rv_pct_array, float) to distinguish from basic rv_array (bool). Update masks for these to use np.logical_not to handle non bool data. Fix bug in pct cover selection mask (was using wrong var and wrong logical check. Updated all rv_array references related to pct cover cases to use new rv_pct_array --- src/rasterstats/main.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 7185100..552ef24 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -224,7 +224,7 @@ def gen_zonal_stats( # rasterized geometry if percent_cover: - rv_array = rasterize_pctcover_geom( + rv_pct_array = rasterize_pctcover_geom( geom, shape=fsrc.shape, affine=fsrc.affine, scale=percent_cover_scale, all_touched=all_touched) @@ -246,11 +246,15 @@ def gen_zonal_stats( if percent_cover_selection is not None: masked = np.ma.MaskedArray( fsrc.array, - mask=(isnodata | ~rv_array | percent_cover > percent_cover_selection)) + mask=(isnodata | np.logical_not(rv_pct_array) | rv_pct_array < percent_cover_selection)) + elif percent_cover: + masked = np.ma.MaskedArray( + fsrc.array, + mask=(isnodata | np.logical_not(rv_pct_array))) else: masked = np.ma.MaskedArray( fsrc.array, - mask=(isnodata | ~rv_array)) + mask=(isnodata | np.logical_not(rv_array))) # execute zone_func on masked zone ndarray if zone_func is not None: @@ -285,19 +289,19 @@ def gen_zonal_stats( if 'mean' in stats: if percent_cover_weighting: feature_stats['mean'] = float( - np.sum(masked * rv_array) / - np.sum(~masked.mask * rv_array)) + np.sum(masked * rv_pct_array) / + np.sum(~masked.mask * rv_pct_array)) else: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: if percent_cover_weighting: - feature_stats['count'] = float(np.sum(~masked.mask * rv_array)) + feature_stats['count'] = float(np.sum(~masked.mask * rv_pct_array)) else: feature_stats['count'] = int(masked.count()) # optional if 'sum' in stats: if percent_cover_weighting: - feature_stats['sum'] = float(np.sum(masked * rv_array)) + feature_stats['sum'] = float(np.sum(masked * rv_pct_array)) else: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: @@ -327,7 +331,11 @@ def gen_zonal_stats( feature_stats[pctile] = np.percentile(pctarr, q) if 'nodata' in stats: - featmasked = np.ma.MaskedArray(fsrc.array, mask=np.logical_not(rv_array)) + if percent_cover: + featmasked = np.ma.MaskedArray(fsrc.array, mask=np.logical_not(rv_pct_array)) + else: + featmasked = np.ma.MaskedArray(fsrc.array, mask=np.logical_not(rv_array)) + feature_stats['nodata'] = float((featmasked == fsrc.nodata).sum()) if add_stats is not None: From 795d86e76cc6051065ee05c27d043ba8541dc6e4 Mon Sep 17 00:00:00 2001 From: sgoodm Date: Tue, 21 Mar 2017 13:48:31 -0400 Subject: [PATCH 07/19] Add tests for percent cover functions --- .gitignore | 3 ++- src/rasterstats/utils.py | 9 ++------- tests/test_utils.py | 35 ++++++++++++++++++++++++++------ tests/test_zonal.py | 43 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 432f35f..b2e04b2 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ Vagrantfile *.ipynb_checkpoints* .idea venv -.eggs \ No newline at end of file +.eggs +.cache diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index 49372a5..2b9db67 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -48,7 +48,6 @@ def rasterize_geom(geom, shape, affine, all_touched=False): fill=0, dtype='uint8', all_touched=all_touched) - return rv_array.astype(bool) @@ -74,21 +73,15 @@ def rasterize_pctcover_geom(geom, shape, affine, scale=None, all_touched=False): """ if scale is None: scale = 10 - min_dtype = min_scalar_type(scale**2) - pixel_size = affine[0]/scale topleftlon = affine[2] topleftlat = affine[5] - new_affine = Affine(pixel_size, 0, topleftlon, 0, -pixel_size, topleftlat) - new_shape = (shape[0]*scale, shape[1]*scale) - rv_array = rasterize_geom(geom, new_shape, new_affine, all_touched=all_touched) rv_array = rebin_sum(rv_array, shape, min_dtype) - return rv_array.astype('float32') / (scale**2) @@ -189,3 +182,5 @@ def boxify_points(geom, rast): geoms.append(box(*window_bounds(win, rast.affine)).buffer(buff)) return MultiPolygon(geoms) + + diff --git a/tests/test_utils.py b/tests/test_utils.py index 24b20ed..7836473 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,10 +2,11 @@ import os import pytest import numpy as np -from shapely.geometry import LineString +from affine import Affine +from shapely.geometry import LineString, Polygon from rasterstats.utils import \ stats_to_csv, get_percentile, remap_categories, boxify_points, \ - rebin_sum + rebin_sum, rasterize_pctcover_geom from rasterstats import zonal_stats from rasterstats.utils import VALID_STATS @@ -68,7 +69,6 @@ def test_boxify_non_point(): def test_rebin_sum(): - test_input = np.array( [ [1, 1, 2, 2], @@ -76,9 +76,32 @@ def test_rebin_sum(): [3, 3, 4, 4], [3, 3, 4, 4] ]) - test_output = rebin_sum(test_input, (2,2), np.int32) - correct_output = np.array([[4, 8],[12, 16]]) - assert np.array_equal(test_output, correct_output) + + +def test_rasterize_pctcover_geom(): + polygon_a = Polygon([[0, 0], [2, 0], [2, 2], [0, 2]]) + shape_a = (2, 2) + affine_a = Affine(1, 0, 0, + 0, -1, 2) + pct_cover_a = rasterize_pctcover_geom(polygon_a, shape_a, affine_a, scale=10, all_touched=False) + correct_output_a = np.array([[1, 1], [1, 1]]) + assert np.array_equal(pct_cover_a, correct_output_a) + + polygon_b = Polygon([[0.5, 0.5], [1.5, 0.5], [1.5, 1.5], [0.5, 1.5]]) + shape_b = (2, 2) + affine_b = Affine(1, 0, 0, + 0, -1, 2) + pct_cover_b = rasterize_pctcover_geom(polygon_b, shape_b, affine_b, scale=10, all_touched=False) + correct_output_b = np.array([[0.25, 0.25], [0.25, 0.25]]) + assert np.array_equal(pct_cover_b, correct_output_b) + + polygon_c = Polygon([[0.5, 0.5], [1.5, 0.5], [1.5, 1.5], [0.5, 1.5]]) + shape_c = (2, 2) + affine_c = Affine(1, 0, 0, + 0, -1, 2) + pct_cover_c = rasterize_pctcover_geom(polygon_c, shape_c, affine_c, scale=100, all_touched=False) + correct_output_c = np.array([[0.25, 0.25], [0.25, 0.25]]) + assert np.array_equal(pct_cover_c, correct_output_c) diff --git a/tests/test_zonal.py b/tests/test_zonal.py index c91b207..018ab53 100644 --- a/tests/test_zonal.py +++ b/tests/test_zonal.py @@ -9,6 +9,8 @@ from rasterstats import zonal_stats, raster_stats from rasterstats.utils import VALID_STATS from rasterstats.io import read_featurecollection, read_features +from shapely.geometry import Polygon +from affine import Affine sys.path.append(os.path.dirname(os.path.abspath(__file__))) @@ -424,6 +426,45 @@ def test_geojson_out(): assert 'id' in feature['properties'] # from orig assert 'count' in feature['properties'] # from zonal stats + +def test_percent_cover_zonal_stats() + polygon = Polygon([[0, 0], [0, 0,5], [1, 1.5], [1.5, 2], [2, 2], [2, 0]]) + arr = np.array([ + [100, 1], + [100, 1] + ]) + affine = Affine(1, 0, 0, + 0, -1, 2) + + stats_options = 'min max mean count sum nodata' + + # run base case + stats_a = zonal_stats(polygon, arr, affine=affine, stats=stats_options) + assert stats_a[0]['mean'] == 34 + + # test selection + stats_b = zonal_stats(polygon, arr, affine=affine, percent_cover_selection=0.75, percent_cover_scale=10, stats=stats_options) + assert stats_b[0]['mean'] == 1 + + # test weighting + stats_c = zonal_stats(polygon, arr, affine=affine, percent_cover_weighting=True, percent_cover_scale=10, stats=stats_options) + assert round(stats_c[0]['count'], 2) == 2.6 + assert round(stats_c[0]['mean'], 2) == 29.56 + assert round(stats_c[0]['sum'], 2) == 76.85 + + # check that percent_cover_scale is required + with pytest.raises(Exception): + zonal_stats(polygon, arr, affine=affine, percent_cover_selection=0.75) + + # check invalid percent_cover_scale value + with pytest.raises(Exception): + zonal_stats(polygon, arr, affine=affine, percent_cover_selection=0.75, percent_cover_scale=0.5) + + # check invalid percent_cover_selection value + with pytest.raises(Exception): + zonal_stats(polygon, arr, affine=affine, percent_cover_selection='one', percent_cover_scale=10) + + # Optional tests def test_geodataframe_zonal(): polygons = os.path.join(DATA, 'polygons.shp') @@ -438,3 +479,5 @@ def test_geodataframe_zonal(): expected = zonal_stats(polygons, raster) assert zonal_stats(df, raster) == expected + + From a9a4a3cb571fd4c6ffcbff592b8146425859ebe4 Mon Sep 17 00:00:00 2001 From: sgoodm Date: Tue, 21 Mar 2017 13:53:26 -0400 Subject: [PATCH 08/19] Fix --- tests/test_zonal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_zonal.py b/tests/test_zonal.py index 018ab53..d1070af 100644 --- a/tests/test_zonal.py +++ b/tests/test_zonal.py @@ -427,7 +427,7 @@ def test_geojson_out(): assert 'count' in feature['properties'] # from zonal stats -def test_percent_cover_zonal_stats() +def test_percent_cover_zonal_stats(): polygon = Polygon([[0, 0], [0, 0,5], [1, 1.5], [1.5, 2], [2, 2], [2, 0]]) arr = np.array([ [100, 1], From cfa198ac5c39406bbf55a031db1b59054a03c5db Mon Sep 17 00:00:00 2001 From: sgoodm Date: Tue, 21 Mar 2017 14:01:16 -0400 Subject: [PATCH 09/19] Fix mask conditionals for non-bool arrays --- src/rasterstats/main.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 552ef24..c8e6bc8 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -246,15 +246,25 @@ def gen_zonal_stats( if percent_cover_selection is not None: masked = np.ma.MaskedArray( fsrc.array, - mask=(isnodata | np.logical_not(rv_pct_array) | rv_pct_array < percent_cover_selection)) + mask=np.logical_or.reduce(( + isnodata, + np.logical_not(rv_pct_array), + rv_pct_array < percent_cover_selection + )) + ) elif percent_cover: masked = np.ma.MaskedArray( fsrc.array, - mask=(isnodata | np.logical_not(rv_pct_array))) + mask=np.logical_or( + isnodata, + np.logical_not(rv_pct_array) + ) + ) else: masked = np.ma.MaskedArray( fsrc.array, - mask=(isnodata | np.logical_not(rv_array))) + mask=(isnodata | ~rv_array) + ) # execute zone_func on masked zone ndarray if zone_func is not None: From 7000632805cad9083867712064bcbe1814ac1d17 Mon Sep 17 00:00:00 2001 From: sgoodm Date: Tue, 21 Mar 2017 14:08:39 -0400 Subject: [PATCH 10/19] Update old version of test case --- tests/test_zonal.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_zonal.py b/tests/test_zonal.py index d1070af..6cb197e 100644 --- a/tests/test_zonal.py +++ b/tests/test_zonal.py @@ -452,9 +452,9 @@ def test_percent_cover_zonal_stats(): assert round(stats_c[0]['mean'], 2) == 29.56 assert round(stats_c[0]['sum'], 2) == 76.85 - # check that percent_cover_scale is required - with pytest.raises(Exception): - zonal_stats(polygon, arr, affine=affine, percent_cover_selection=0.75) + # check that percent_cover_scale is set to 10 when not provided by user + stats_d = zonal_stats(polygon, arr, affine=affine, percent_cover_weighting=True, stats=stats_options) + assert round(stats_d[0]['mean'], 2) == round(stats_c[0]['mean'], 2) # check invalid percent_cover_scale value with pytest.raises(Exception): From 644ddc3905bbffcd5f226315bc37443ecca36334 Mon Sep 17 00:00:00 2001 From: userw Date: Fri, 24 Mar 2017 13:29:41 -0400 Subject: [PATCH 11/19] Update nodata stat for percent cover changes (may need review) --- src/rasterstats/main.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 0de293d..259e8a3 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -326,11 +326,7 @@ def gen_zonal_stats( feature_stats[pctile] = np.percentile(pctarr, q) if 'nodata' in stats: - if percent_cover: - featmasked = np.ma.MaskedArray(fsrc.array, mask=np.logical_not(rv_pct_array)) - else: - featmasked = np.ma.MaskedArray(fsrc.array, mask=np.logical_not(rv_array)) - + featmasked = np.ma.MaskedArray(fsrc.array, mask=np.logical_not(rv_array)) feature_stats['nodata'] = float((featmasked == fsrc.nodata).sum()) if add_stats is not None: From 69554769d8bfd1435cd941d31559137a3b96b2f4 Mon Sep 17 00:00:00 2001 From: userz Date: Fri, 31 Aug 2018 12:55:43 -0400 Subject: [PATCH 12/19] Use 'like' object for rasterize funcs to keep inline with original code --- src/rasterstats/main.py | 4 ++-- src/rasterstats/utils.py | 30 ++++++++++++++---------------- tests/test_utils.py | 17 ++++++++++++++--- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 38d3b9e..7271a6c 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -228,13 +228,13 @@ def gen_zonal_stats( # rasterized geometry if percent_cover: cover_weights = rasterize_pctcover_geom( - geom, shape=fsrc.shape, affine=fsrc.affine, + geom, like=fsrc, scale=percent_cover_scale, all_touched=all_touched) rv_array = cover_weights > (percent_cover_selection or 0) else: rv_array = rasterize_geom( - geom, shape=fsrc.shape, affine=fsrc.affine, + geom, like=fsrc, all_touched=all_touched) # nodata mask diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index 4770e8d..cc466f4 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -27,13 +27,12 @@ def get_percentile(stat): return q -def rasterize_geom(geom, shape, affine, all_touched=False): +def rasterize_geom(geom, like, all_touched=False): """ Parameters ---------- geom: GeoJSON geometry - shape: desired shape - affine: desired transform + like: raster object with desired shape and transform all_touched: rasterization strategy Returns @@ -43,8 +42,8 @@ def rasterize_geom(geom, shape, affine, all_touched=False): geoms = [(geom, 1)] rv_array = features.rasterize( geoms, - out_shape=shape, - transform=affine, + out_shape=like.shape, + transform=like.affine, fill=0, dtype='uint8', all_touched=all_touched) @@ -58,13 +57,12 @@ def rebin_sum(a, shape, dtype): return a.reshape(sh).sum(-1, dtype=dtype).sum(1, dtype=dtype) -def rasterize_pctcover_geom(geom, shape, affine, scale=None, all_touched=False): +def rasterize_pctcover_geom(geom, like, scale=None, all_touched=False): """ Parameters ---------- geom: GeoJSON geometry - shape: desired shape - affine: desired transform + like: raster object with desired shape and transform scale: scale at which to generate percent cover estimate Returns @@ -75,20 +73,20 @@ def rasterize_pctcover_geom(geom, shape, affine, scale=None, all_touched=False): scale = 10 min_dtype = min_scalar_type(scale**2) - pixel_size_lon = affine[0]/scale - pixel_size_lat = affine[4]/scale - - topleftlon = affine[2] - topleftlat = affine[5] + pixel_size_lon = like.affine[0]/scale + pixel_size_lat = like.affine[4]/scale + + topleftlon = like.affine[2] + topleftlat = like.affine[5] new_affine = Affine(pixel_size_lon, 0, topleftlon, 0, pixel_size_lat, topleftlat) - new_shape = (shape[0]*scale, shape[1]*scale) + new_shape = (like.shape[0]*scale, like.shape[1]*scale) rv_array = rasterize_geom(geom, new_shape, new_affine, all_touched=all_touched) - rv_array = rebin_sum(rv_array, shape, min_dtype) - + rv_array = rebin_sum(rv_array, like.shape, min_dtype) + return rv_array.astype('float32') / (scale**2) diff --git a/tests/test_utils.py b/tests/test_utils.py index 7836473..d205ca6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -82,11 +82,18 @@ def test_rebin_sum(): def test_rasterize_pctcover_geom(): + # https://goodcode.io/articles/python-dict-object/ + class objectview(object): + def __init__(self, d): + self.__dict__ = d + polygon_a = Polygon([[0, 0], [2, 0], [2, 2], [0, 2]]) shape_a = (2, 2) affine_a = Affine(1, 0, 0, 0, -1, 2) - pct_cover_a = rasterize_pctcover_geom(polygon_a, shape_a, affine_a, scale=10, all_touched=False) + like_a = objectview({'shape': shape_a, 'affine': affine_a}) + + pct_cover_a = rasterize_pctcover_geom(polygon_a, like_a, scale=10, all_touched=False) correct_output_a = np.array([[1, 1], [1, 1]]) assert np.array_equal(pct_cover_a, correct_output_a) @@ -94,7 +101,9 @@ def test_rasterize_pctcover_geom(): shape_b = (2, 2) affine_b = Affine(1, 0, 0, 0, -1, 2) - pct_cover_b = rasterize_pctcover_geom(polygon_b, shape_b, affine_b, scale=10, all_touched=False) + like_b = objectview({'shape': shape_b, 'affine': affine_b}) + + pct_cover_b = rasterize_pctcover_geom(polygon_b, like_b, scale=10, all_touched=False) correct_output_b = np.array([[0.25, 0.25], [0.25, 0.25]]) assert np.array_equal(pct_cover_b, correct_output_b) @@ -102,6 +111,8 @@ def test_rasterize_pctcover_geom(): shape_c = (2, 2) affine_c = Affine(1, 0, 0, 0, -1, 2) - pct_cover_c = rasterize_pctcover_geom(polygon_c, shape_c, affine_c, scale=100, all_touched=False) + like_c = objectview({'shape': shape_c, 'affine': affine_c}) + + pct_cover_c = rasterize_pctcover_geom(polygon_c, like_c, scale=100, all_touched=False) correct_output_c = np.array([[0.25, 0.25], [0.25, 0.25]]) assert np.array_equal(pct_cover_c, correct_output_c) From 1c3906787edaddc54a8925d73b41a72832c18e77 Mon Sep 17 00:00:00 2001 From: userz Date: Fri, 31 Aug 2018 13:03:30 -0400 Subject: [PATCH 13/19] Add correct usage of in rasterize_pctcover_geom func --- src/rasterstats/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index cc466f4..e579a60 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -57,6 +57,10 @@ def rebin_sum(a, shape, dtype): return a.reshape(sh).sum(-1, dtype=dtype).sum(1, dtype=dtype) +class objectview(object): + def __init__(self, d): + self.__dict__ = d + def rasterize_pctcover_geom(geom, like, scale=None, all_touched=False): """ Parameters @@ -84,7 +88,9 @@ def rasterize_pctcover_geom(geom, like, scale=None, all_touched=False): new_shape = (like.shape[0]*scale, like.shape[1]*scale) - rv_array = rasterize_geom(geom, new_shape, new_affine, all_touched=all_touched) + new_like = objectview({'shape': new_shape, 'affine': new_affine}) + + rv_array = rasterize_geom(geom, new_like, all_touched=all_touched) rv_array = rebin_sum(rv_array, like.shape, min_dtype) return rv_array.astype('float32') / (scale**2) From 2fe6d6691ffec125c7eb567d8b31ed43c17de381 Mon Sep 17 00:00:00 2001 From: userz Date: Fri, 31 Aug 2018 13:24:54 -0400 Subject: [PATCH 14/19] Reduce main function complexity by moving percent cover stat calc logic to separate util functions (could expand this to class that covers all stat options later on) --- src/rasterstats/main.py | 22 ++++++---------------- src/rasterstats/utils.py | 26 ++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 7271a6c..48e19b8 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -11,7 +11,8 @@ from .io import read_features, Raster from .utils import (rasterize_geom, get_percentile, check_stats, remap_categories, key_assoc_val, boxify_points, - rasterize_pctcover_geom) + rasterize_pctcover_geom, + rs_mean, rs_count, rs_sum) def raster_stats(*args, **kwargs): @@ -233,6 +234,7 @@ def gen_zonal_stats( all_touched=all_touched) rv_array = cover_weights > (percent_cover_selection or 0) else: + cover_weights = None rv_array = rasterize_geom( geom, like=fsrc, all_touched=all_touched) @@ -294,24 +296,12 @@ def gen_zonal_stats( if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: - if percent_cover_weighting: - feature_stats['mean'] = float( - np.sum(masked * cover_weights) / - np.sum(~masked.mask * cover_weights)) - else: - feature_stats['mean'] = float(masked.mean()) + feature_stats['mean'] = rs_mean(masked, cover_weights) if 'count' in stats: - if percent_cover_weighting: - feature_stats['count'] = float(np.sum(~masked.mask * cover_weights)) - else: - feature_stats['count'] = int(masked.count()) + feature_stats['count'] = rs_count(masked, cover_weights) # optional if 'sum' in stats: - if percent_cover_weighting: - feature_stats['sum'] = float(np.sum(masked * cover_weights)) - - else: - feature_stats['sum'] = float(masked.sum()) + feature_stats['sum'] = rs_sum(masked, cover_weights) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index e579a60..488c16d 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -2,6 +2,7 @@ from __future__ import absolute_import from __future__ import division import sys +import numpy as np from rasterio import features from affine import Affine from numpy import min_scalar_type @@ -195,3 +196,28 @@ def boxify_points(geom, rast): return MultiPolygon(geoms) + +def rs_mean(masked, cover_weights=None): + if cover_weights: + val = float( + np.sum(masked * cover_weights) / + np.sum(~masked.mask * cover_weights)) + else: + val = float(masked.mean()) + return val + + +def rs_count(masked, cover_weights=None): + if cover_weights: + val = float(np.sum(~masked.mask * cover_weights)) + else: + val = int(masked.count()) + return val + + + def rs_sum(masked, cover_weights=None): + if cover_weights: + val = float(np.sum(masked * cover_weights)) + else: + val = float(masked.sum()) + return val From d986437d679100a1d0087ec3aaf186a4923ffb6a Mon Sep 17 00:00:00 2001 From: userz Date: Fri, 31 Aug 2018 13:28:28 -0400 Subject: [PATCH 15/19] Fix indent --- src/rasterstats/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index 488c16d..1bc62b7 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -215,7 +215,7 @@ def rs_count(masked, cover_weights=None): return val - def rs_sum(masked, cover_weights=None): +def rs_sum(masked, cover_weights=None): if cover_weights: val = float(np.sum(masked * cover_weights)) else: From 46fcebcf8ec9f0df1063921bec7092bba7d8d9e1 Mon Sep 17 00:00:00 2001 From: userz Date: Fri, 31 Aug 2018 13:33:55 -0400 Subject: [PATCH 16/19] Fix check for percent cover usage in stat functions --- src/rasterstats/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index 1bc62b7..288475a 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -198,7 +198,7 @@ def boxify_points(geom, rast): def rs_mean(masked, cover_weights=None): - if cover_weights: + if cover_weights is not None: val = float( np.sum(masked * cover_weights) / np.sum(~masked.mask * cover_weights)) @@ -208,7 +208,7 @@ def rs_mean(masked, cover_weights=None): def rs_count(masked, cover_weights=None): - if cover_weights: + if cover_weights is not None: val = float(np.sum(~masked.mask * cover_weights)) else: val = int(masked.count()) @@ -216,7 +216,7 @@ def rs_count(masked, cover_weights=None): def rs_sum(masked, cover_weights=None): - if cover_weights: + if cover_weights is not None: val = float(np.sum(masked * cover_weights)) else: val = float(masked.sum()) From fc558c2b390ce202437b033c4d26b278cc88335b Mon Sep 17 00:00:00 2001 From: userz Date: Tue, 4 Sep 2018 11:07:44 -0400 Subject: [PATCH 17/19] Reduce lines to get test coverage --- src/rasterstats/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index 288475a..07e1bcd 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -74,8 +74,7 @@ def rasterize_pctcover_geom(geom, like, scale=None, all_touched=False): ------- ndarray: float32 """ - if scale is None: - scale = 10 + scale = scale if scale is not None else 10 min_dtype = min_scalar_type(scale**2) pixel_size_lon = like.affine[0]/scale From c475b5831a24987238d79262bad404b40cfc0ec2 Mon Sep 17 00:00:00 2001 From: userz Date: Tue, 4 Sep 2018 11:21:40 -0400 Subject: [PATCH 18/19] Add in some easy test coverage --- tests/test_cli.py | 19 +++++++++++++++++++ tests/test_point.py | 6 ++++++ 2 files changed, 25 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index b6834aa..3c6c6e2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -26,6 +26,25 @@ def test_cli_feature(): assert 'test_count' not in feature['properties'] +def test_cli_feature_info(): + raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') + vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson') + runner = CliRunner() + warnings.simplefilter('ignore') + result = runner.invoke(zonalstats, [vector, + '--raster', raster, + '--stats', 'mean', + '--prefix', 'test_', + '--info']) + assert result.exit_code == 0 + outdata = json.loads(result.output) + assert len(outdata['features']) == 1 + feature = outdata['features'][0] + assert 'test_mean' in feature['properties'] + assert round(feature['properties']['test_mean'], 2) == 14.66 + assert 'test_count' not in feature['properties'] + + def test_cli_feature_stdin(): raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson') diff --git a/tests/test_point.py b/tests/test_point.py index 0ea48ab..a7f5591 100644 --- a/tests/test_point.py +++ b/tests/test_point.py @@ -86,6 +86,12 @@ def test_point_query(): assert round(val) == 74 +def test_point_query_invalid_interp(): + point = "POINT(245309 1000064)" + with pytest.raises(ValueError): + point_query(point, raster, interpolate="invalid_type") + + def test_point_query_geojson(): point = "POINT(245309 1000064)" features = point_query(point, raster, property_name="TEST", geojson_out=True) From a52055f74458b9f88ccfd5e8e136b3a36385503b Mon Sep 17 00:00:00 2001 From: userz Date: Tue, 4 Sep 2018 11:23:42 -0400 Subject: [PATCH 19/19] Add missing import for tests --- tests/test_point.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_point.py b/tests/test_point.py index a7f5591..725fa00 100644 --- a/tests/test_point.py +++ b/tests/test_point.py @@ -1,4 +1,5 @@ import os +import pytest import rasterio from rasterstats.point import point_window_unitxy, bilinear, geom_xys from rasterstats import point_query