diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 540ed20..adb0442 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -33,6 +33,7 @@ def gen_zonal_stats( layer=0, band=1, nodata=None, + no_overlap=None, affine=None, stats=None, all_touched=False, @@ -139,7 +140,31 @@ def gen_zonal_stats( warnings.warn("Use `band` to specify band number", DeprecationWarning) band = band_num - with Raster(raster, affine, nodata, band) as rast: + + if 'no_overlap' in stats: + + if 'nodata' in stats and nodata is None: + nodata = -999 + warnings.warn("Setting nodata to -999; specify nodata explicitly " + "when requesting no_overlap stat") + + if no_overlap is None: + no_overlap = -998 + if no_overlap == nodata: + no_overlap = -997 + + warnings.warn("Setting no_overlap to {0}; specify no_overlap " + "explicitly when request the `no_overlap` stat".format( + no_overlap)) + elif no_overlap == nodata: + raise Exception("`no_overlap` value is equal to `nodata` value. " + "Values must be distinct to calculate `no_overlap`") + + tmp_nodata = no_overlap if 'no_overlap' in stats else nodata + + + with Raster(raster, affine=affine, nodata=tmp_nodata, band=band) as rast: + features_iter = read_features(vectors, layer) for _, feat in enumerate(features_iter): geom = shape(feat['geometry']) @@ -149,26 +174,49 @@ def gen_zonal_stats( geom_bounds = tuple(geom.bounds) - fsrc = rast.read(bounds=geom_bounds) + fsrc = rast.read(bounds=geom_bounds, masked=False) # rasterized geometry rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched) + if nodata is None and no_overlap is None: + nodata = fsrc.nodata + # nodata mask isnodata = (fsrc.array == fsrc.nodata) + # include actual nodata val when no_overlap is used + if nodata is not None and no_overlap is not None: + isnodata = (isnodata | (fsrc.array == nodata)) + # add nan mask (if necessary) has_nan = (np.issubdtype(fsrc.array.dtype, float) and np.isnan(fsrc.array.min())) if has_nan: isnodata = (isnodata | np.isnan(fsrc.array)) + # Mask the source data array # mask everything that is not a valid value or not within our geom masked = np.ma.MaskedArray( fsrc.array, mask=(isnodata | ~rv_array)) + # print stats + # print "no_overlap: {0}".format(no_overlap) + # print "nodata: {0}".format(nodata) + # print "tmp_nodata: {0}".format(tmp_nodata) + # print "fsrc.nodata: {0}".format(fsrc.nodata) + + # print "fsrc.array" + # print fsrc.array + # print "isnodata" + # print isnodata + # print "rv_array" + # print rv_array + # print "masked" + # print masked + # execute zone_func on masked zone ndarray if zone_func is not None: if not callable(zone_func): @@ -233,13 +281,16 @@ def gen_zonal_stats( pctarr = masked.compressed() feature_stats[pctile] = np.percentile(pctarr, q) - if 'nodata' in stats or 'nan' in stats: + + if any(i in stats for i in ['nodata', 'nan', 'no_overlap']): featmasked = np.ma.MaskedArray(fsrc.array, mask=(~rv_array)) if 'nodata' in stats: - feature_stats['nodata'] = float((featmasked == fsrc.nodata).sum()) + feature_stats['nodata'] = float((featmasked == nodata).sum()) if 'nan' in stats: feature_stats['nan'] = float(np.isnan(featmasked).sum()) if has_nan else 0 + if 'no_overlap' in stats: + feature_stats['no_overlap'] = float((featmasked == no_overlap).sum()) if add_stats is not None: for stat_name, stat_func in add_stats.items(): diff --git a/src/rasterstats/utils.py b/src/rasterstats/utils.py index c3ad76f..03f012b 100644 --- a/src/rasterstats/utils.py +++ b/src/rasterstats/utils.py @@ -9,7 +9,7 @@ DEFAULT_STATS = ['count', 'min', 'max', 'mean'] VALID_STATS = DEFAULT_STATS + \ - ['sum', 'std', 'median', 'majority', 'minority', 'unique', 'range', 'nodata', 'nan'] + ['sum', 'std', 'median', 'majority', 'minority', 'unique', 'range', 'nodata', 'nan', 'no_overlap'] # also percentile_{q} but that is handled as special case diff --git a/tests/data/single_polygon_partial_overlap.cpg b/tests/data/single_polygon_partial_overlap.cpg new file mode 100644 index 0000000..3ad133c --- /dev/null +++ b/tests/data/single_polygon_partial_overlap.cpg @@ -0,0 +1 @@ +UTF-8 \ No newline at end of file diff --git a/tests/data/single_polygon_partial_overlap.dbf b/tests/data/single_polygon_partial_overlap.dbf new file mode 100644 index 0000000..063d6f7 Binary files /dev/null and b/tests/data/single_polygon_partial_overlap.dbf differ diff --git a/tests/data/single_polygon_partial_overlap.prj b/tests/data/single_polygon_partial_overlap.prj new file mode 100644 index 0000000..f2e7a2c --- /dev/null +++ b/tests/data/single_polygon_partial_overlap.prj @@ -0,0 +1 @@ +PROJCS["Albers",GEOGCS["GCS_GRS 1980(IUGG, 1980)",DATUM["D_unknown",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["standard_parallel_1",43],PARAMETER["standard_parallel_2",48],PARAMETER["latitude_of_origin",34],PARAMETER["central_meridian",-120],PARAMETER["false_easting",600000],PARAMETER["false_northing",0],UNIT["Meter",1]] \ No newline at end of file diff --git a/tests/data/single_polygon_partial_overlap.qpj b/tests/data/single_polygon_partial_overlap.qpj new file mode 100644 index 0000000..a656ae9 --- /dev/null +++ b/tests/data/single_polygon_partial_overlap.qpj @@ -0,0 +1 @@ +PROJCS["unnamed",GEOGCS["GRS 1980(IUGG, 1980)",DATUM["unknown",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",43],PARAMETER["standard_parallel_2",48],PARAMETER["latitude_of_center",34],PARAMETER["longitude_of_center",-120],PARAMETER["false_easting",600000],PARAMETER["false_northing",0],UNIT["Meter",1]] diff --git a/tests/data/single_polygon_partial_overlap.shp b/tests/data/single_polygon_partial_overlap.shp new file mode 100644 index 0000000..7b7268f Binary files /dev/null and b/tests/data/single_polygon_partial_overlap.shp differ diff --git a/tests/data/single_polygon_partial_overlap.shx b/tests/data/single_polygon_partial_overlap.shx new file mode 100644 index 0000000..412eebf Binary files /dev/null and b/tests/data/single_polygon_partial_overlap.shx differ diff --git a/tests/test_zonal.py b/tests/test_zonal.py index 48babc9..5c52556 100644 --- a/tests/test_zonal.py +++ b/tests/test_zonal.py @@ -457,8 +457,6 @@ def test_geojson_out(): assert 'id' in feature['properties'] # from orig assert 'count' in feature['properties'] # from zonal stats - - # do not think this is actually testing the line i wanted it to # since the read_features func for this data type is generating # the properties field @@ -488,10 +486,9 @@ def test_copy_properties_warn(): with pytest.deprecated_call(): stats_b = zonal_stats(polygons, raster, copy_properties=True) assert stats_a == stats_b - + def test_nan_counts(): - from affine import Affine transform = Affine(1, 0, 1, 0, -1, 3) data = np.array([ @@ -504,7 +501,7 @@ def test_nan_counts(): geom = 'POLYGON ((1 0, 4 0, 4 3, 1 3, 1 0))' # nan stat is requested - stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="*") + stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="count nodata nan") for res in stats: assert res['count'] == 3 # 3 pixels of valid data @@ -519,6 +516,100 @@ def test_nan_counts(): assert res['nodata'] == 3 # 3 pixels of nodata assert 'nan' not in res + # nan stat is not impacted by no_overlap + stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="*") + + for res in stats: + assert res['count'] == 3 # 3 pixels of valid data + assert res['nodata'] == 3 # 3 pixels of nodata + assert res['nan'] == 3 # 3 pixels of nans + assert res['no_overlap'] == 0 # 3 pixels of nans + + +def test_array_overlap_counts(): + nodata = -9999 + no_overlap = -8888 + + transform = Affine(1, 0, 1, 0, -1, 3) + + # data = np.array([ + # [-9999, 0.0, 516.2840576171875, 524.4825439453125], + # [-9999, 178.74169921875, 573.80126953125, 415.345947265625], + # [-9999, 397.3150939941406, 568.3016357421875, 185.3491973876953] + # ]) + + data = np.array([ + [-9999, 0.0, 516.2840576171875, np.nan], + [-9999, 178.74169921875, 573.80126953125, 415.345947265625], + [-9999, 397.3150939941406, 568.3016357421875, 185.3491973876953] + ]) + + geom = 'POLYGON ((0 0, 5 0, 5 3, 0 3, 0 0))' + + stats = zonal_stats(geom, data, affine=transform, stats="*", nodata=nodata, no_overlap=no_overlap) + + for res in stats: + assert res['count'] == 8 # Two pixels of valid data + assert res['nodata'] == 3 # Two pixels of nodata + assert res['no_overlap'] == 3 # Three pixels of no overlap + assert res['nan'] == 1 # One pixel of nan + + +def test_missing_no_overlap_logic(): + nodata = -998 + no_overlap = None + + transform = Affine(1, 0, 1, 0, -1, 3) + + data = np.array([ + [-998, 0.0, 516.2840576171875, np.nan], + [-998, 178.74169921875, 573.80126953125, 415.345947265625], + [-998, 397.3150939941406, 568.3016357421875, 185.3491973876953] + ]) + + geom = 'POLYGON ((0 0, 5 0, 5 3, 0 3, 0 0))' + + stats = zonal_stats(geom, data, affine=transform, stats="*", nodata=nodata, no_overlap=no_overlap) + + for res in stats: + assert res['count'] == 8 # Two pixels of valid data + assert res['nodata'] == 3 # Two pixels of nodata + assert res['no_overlap'] == 3 # Three pixels of no overlap + assert res['nan'] == 1 # One pixel of nan + + +def test_nodata_and_no_overlap_check(): + nodata = -9999 + no_overlap = -9999 + + transform = Affine(1, 0, 1, 0, -1, 3) + + data = np.array([ + [-998, 0.0, 516.2840576171875, np.nan], + [-998, 178.74169921875, 573.80126953125, 415.345947265625], + [-998, 397.3150939941406, 568.3016357421875, 185.3491973876953] + ]) + + geom = 'POLYGON ((0 0, 5 0, 5 3, 0 3, 0 0))' + + with pytest.raises(Exception): + stats = zonal_stats(geom, data, affine=transform, stats="*", nodata=nodata, no_overlap=no_overlap) + + +def test_raster_overlap_counts(): + nodata = -9999 + no_overlap = -8888 + + # same shape/overlap/nodata-pixel as test_array_overlap_counts + polygons = os.path.join(DATA, 'single_polygon_partial_overlap.shp') + + stats = zonal_stats(polygons, raster, stats="*", nodata=nodata, no_overlap=no_overlap) + + for res in stats: + assert res['count'] == 9 # Two pixels of valid data + assert res['nodata'] == 3 # Two pixels of nodata + assert res['no_overlap'] == 3 # Three pixels of no overlap + # Optional tests def test_geodataframe_zonal():