From cc1f5b8c9d40687dd64f710ec373ec9602e0d237 Mon Sep 17 00:00:00 2001 From: Yifan Yu Date: Thu, 10 Feb 2022 00:32:24 -0800 Subject: [PATCH 1/4] added option to process multi-polygon cases by reading in individual polygons one at a time to conserve memory usage --- src/rasterstats/main.py | 75 +++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 18 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index ddee0c4..3a9b34c 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -46,7 +46,8 @@ def gen_zonal_stats( raster_out=False, prefix=None, geojson_out=False, - boundless=True, **kwargs): + boundless=True, + split_multi_geom=False, **kwargs): """Zonal statistics of raster values aggregated to vector geometries. Parameters @@ -155,28 +156,66 @@ def gen_zonal_stats( if 'Point' in geom.type: geom = boxify_points(geom, rast) - geom_bounds = tuple(geom.bounds) + #Multiple polygons found, and split_multi_geom option is enabled + #In this case, we read the polygons in one at a time and concact the raster + #values together. In order to save memory + if ('MultiPolygon' in geom.type) and (split_multi_geom): + fsrc = None + rv_array = None + isnodata = None + + #iterate through individual polygons + for singlePolygon in geom: + + polygon_bounds = tuple(singlePolygon.bounds) + polygon_fsrc = rast.read(bounds=polygon_bounds) + + # rasterized geometry + polygon_rv_array = rasterize_geom(singlePolygon, like=polygon_fsrc, all_touched=all_touched) + + # nodata mask + polygon_isnodata = (polygon_fsrc.array == polygon_fsrc.nodata) + + # add nan mask (if necessary) + has_nan = ( + np.issubdtype(polygon_fsrc.array.dtype, np.floating) + and np.isnan(polygon_fsrc.array.min())) + if has_nan: + polygon_isnodata = (polygon_isnodata | np.isnan(polygon_fsrc.array)) + + if fsrc is None: + fsrc = np.ravel(polygon_fsrc.array) + rv_array = np.ravel(polygon_rv_array) + isnodata = np.ravel(polygon_isnodata) + else: + np.append(fsrc, polygon_fsrc.array) + np.append(rv_array, polygon_rv_array) + np.append(isnodata, polygon_isnodata) + masked = np.ma.MaskedArray(fsrc, mask = (isnodata | ~rv_array)) + else: + + geom_bounds = tuple(geom.bounds) - fsrc = rast.read(bounds=geom_bounds, boundless=boundless) + fsrc = rast.read(bounds=geom_bounds, boundless=boundless) - # rasterized geometry - rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched) + # rasterized geometry + rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched) - # nodata mask - isnodata = (fsrc.array == fsrc.nodata) + # nodata mask + isnodata = (fsrc.array == fsrc.nodata) - # add nan mask (if necessary) - has_nan = ( - np.issubdtype(fsrc.array.dtype, np.floating) - and np.isnan(fsrc.array.min())) - if has_nan: - isnodata = (isnodata | np.isnan(fsrc.array)) + # add nan mask (if necessary) + has_nan = ( + np.issubdtype(fsrc.array.dtype, np.floating) + and np.isnan(fsrc.array.min())) + if has_nan: + isnodata = (isnodata | np.isnan(fsrc.array)) - # Mask the source data array - # mask everything that is not a valid value or not within our geom - masked = np.ma.MaskedArray( - fsrc.array, - mask=(isnodata | ~rv_array)) + # Mask the source data array + # mask everything that is not a valid value or not within our geom + masked = np.ma.MaskedArray( + fsrc.array, + mask=(isnodata | ~rv_array)) # If we're on 64 bit platform and the array is an integer type # make sure we cast to 64 bit to avoid overflow. From 6b25e4bd401704a5573234a8607d95bc9ff26865 Mon Sep 17 00:00:00 2001 From: Yifan Yu Date: Thu, 10 Feb 2022 01:11:09 -0800 Subject: [PATCH 2/4] update to remove deprecated part in Shapely 1.8 --- src/rasterstats/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index 3a9b34c..f6736be 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -165,7 +165,7 @@ def gen_zonal_stats( isnodata = None #iterate through individual polygons - for singlePolygon in geom: + for singlePolygon in geom.geoms: polygon_bounds = tuple(singlePolygon.bounds) polygon_fsrc = rast.read(bounds=polygon_bounds) From cd8a88e062780ded0a887de7e6cb2d7d09daaa57 Mon Sep 17 00:00:00 2001 From: Yifan Yu Date: Thu, 10 Feb 2022 01:36:23 -0800 Subject: [PATCH 3/4] Copy over old variable. numpy.append does not append in-place --- src/rasterstats/main.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index f6736be..e5ceaec 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -152,7 +152,6 @@ def gen_zonal_stats( features_iter = read_features(vectors, layer) for _, feat in enumerate(features_iter): geom = shape(feat['geometry']) - if 'Point' in geom.type: geom = boxify_points(geom, rast) @@ -166,7 +165,6 @@ def gen_zonal_stats( #iterate through individual polygons for singlePolygon in geom.geoms: - polygon_bounds = tuple(singlePolygon.bounds) polygon_fsrc = rast.read(bounds=polygon_bounds) @@ -188,9 +186,9 @@ def gen_zonal_stats( rv_array = np.ravel(polygon_rv_array) isnodata = np.ravel(polygon_isnodata) else: - np.append(fsrc, polygon_fsrc.array) - np.append(rv_array, polygon_rv_array) - np.append(isnodata, polygon_isnodata) + fsrc = np.append(fsrc, polygon_fsrc.array) + rv_array = np.append(rv_array, polygon_rv_array) + isnodata = np.append(isnodata, polygon_isnodata) masked = np.ma.MaskedArray(fsrc, mask = (isnodata | ~rv_array)) else: From 39fe2786b5fc5d994fb02866a3bd6164f28000c7 Mon Sep 17 00:00:00 2001 From: Yifan Yu Date: Mon, 14 Feb 2022 23:17:54 -0800 Subject: [PATCH 4/4] changed variable name to follow PEP8 convention --- src/rasterstats/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rasterstats/main.py b/src/rasterstats/main.py index e5ceaec..fbb0fe2 100644 --- a/src/rasterstats/main.py +++ b/src/rasterstats/main.py @@ -164,12 +164,12 @@ def gen_zonal_stats( isnodata = None #iterate through individual polygons - for singlePolygon in geom.geoms: - polygon_bounds = tuple(singlePolygon.bounds) + for single_polygon in geom.geoms: + polygon_bounds = tuple(single_polygon.bounds) polygon_fsrc = rast.read(bounds=polygon_bounds) # rasterized geometry - polygon_rv_array = rasterize_geom(singlePolygon, like=polygon_fsrc, all_touched=all_touched) + polygon_rv_array = rasterize_geom(single_polygon, like=polygon_fsrc, all_touched=all_touched) # nodata mask polygon_isnodata = (polygon_fsrc.array == polygon_fsrc.nodata)