From d3b3f3d13ebabd432d3b566e02070d067e83cf7b Mon Sep 17 00:00:00 2001 From: Levi Szamek Date: Mon, 20 Jan 2025 11:44:08 +0100 Subject: [PATCH 1/8] feat: sequences are now in direction of travel --- mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py b/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py index 3cafb5362..a35271fe3 100644 --- a/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py +++ b/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py @@ -146,4 +146,7 @@ def spatial_sampling(df, interval_length): [sampled_sequence_df, filtered_sorted_sub_df], axis=0 ) + # reverse order such that sequence are in direction of travel + sampled_sequence_df = sampled_sequence_df.iloc[::-1] + return sampled_sequence_df From 417a5fae9cd2a1599017851ad8bdb8e5bf8448f2 Mon Sep 17 00:00:00 2001 From: Levi Szamek Date: Mon, 20 Jan 2025 11:47:59 +0100 Subject: [PATCH 2/8] fix: just use on dataframe to save memory --- .../mapswipe_workers/utils/process_mapillary.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py index 1faf0b23b..35dd96e30 100644 --- a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py +++ b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py @@ -245,27 +245,27 @@ def get_image_metadata( downloaded_metadata["geometry"].apply(lambda geom: isinstance(geom, Point)) ] - filtered_metadata = filter_results( + downloaded_metadata = filter_results( downloaded_metadata, creator_id, is_pano, organization_id, start_time, end_time ) if ( - filtered_metadata is None - or filtered_metadata.empty - or filtered_metadata.isna().all().all() + downloaded_metadata is None + or downloaded_metadata.empty + or downloaded_metadata.isna().all().all() ): raise ValueError("No Mapillary Features in the AoI match the filter criteria.") if sampling_threshold is not None: - filtered_metadata = spatial_sampling(filtered_metadata, sampling_threshold) + downloaded_metadata = spatial_sampling(downloaded_metadata, sampling_threshold) - total_images = len(filtered_metadata) + total_images = len(downloaded_metadata) if total_images > 100000: raise ValueError( f"Too many Images with selected filter options for the AoI: {total_images}" ) return { - "ids": filtered_metadata["id"].tolist(), - "geometries": filtered_metadata["geometry"].tolist(), + "ids": downloaded_metadata["id"].tolist(), + "geometries": downloaded_metadata["geometry"].tolist(), } From 3f09ae7fff3821e1422a9016edad0d623a0b0e82 Mon Sep 17 00:00:00 2001 From: Levi Szamek Date: Mon, 20 Jan 2025 14:59:50 +0100 Subject: [PATCH 3/8] feat: add option to randomize order of images --- .../project_types/street/project.py | 1 + .../mapswipe_workers/utils/process_mapillary.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/mapswipe_workers/mapswipe_workers/project_types/street/project.py b/mapswipe_workers/mapswipe_workers/project_types/street/project.py index cc122c409..4d0fe8c5b 100644 --- a/mapswipe_workers/mapswipe_workers/project_types/street/project.py +++ b/mapswipe_workers/mapswipe_workers/project_types/street/project.py @@ -51,6 +51,7 @@ def __init__(self, project_draft): start_time=project_draft.get("startTimestamp", None), end_time=project_draft.get("endTimestamp", None), organization_id=project_draft.get("organizationId", None), + randomize_order=project_draft.get("randomizeOrder", None), sampling_threshold=project_draft.get("samplingThreshold", None), ) diff --git a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py index 35dd96e30..1c9159059 100644 --- a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py +++ b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py @@ -189,6 +189,7 @@ def filter_results( organization_id: str = None, start_time: str = None, end_time: str = None, + randomize_order: bool = None, ): df = results_df.copy() if creator_id is not None: @@ -221,6 +222,9 @@ def filter_results( return None df = filter_by_timerange(df, start_time, end_time) + if randomize_order is not None: + df.sample(frac=1).reset_index(drop=True) + return df @@ -233,6 +237,7 @@ def get_image_metadata( organization_id: str = None, start_time: str = None, end_time: str = None, + randomize_order=False, sampling_threshold=None, ): aoi_polygon = geojson_to_polygon(aoi_geojson) @@ -246,7 +251,13 @@ def get_image_metadata( ] downloaded_metadata = filter_results( - downloaded_metadata, creator_id, is_pano, organization_id, start_time, end_time + downloaded_metadata, + creator_id, + is_pano, + organization_id, + start_time, + end_time, + randomize_order, ) if ( From b9e0a495ef7a0e49764ff629bd6b1038b012edc2 Mon Sep 17 00:00:00 2001 From: Levi Szamek Date: Mon, 20 Jan 2025 15:37:00 +0100 Subject: [PATCH 4/8] fix: change test to match new code --- .../tests/unittests/test_spatial_sampling.py | 55 ++++++++++--------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/mapswipe_workers/tests/unittests/test_spatial_sampling.py b/mapswipe_workers/tests/unittests/test_spatial_sampling.py index f43597c89..8c5aea418 100644 --- a/mapswipe_workers/tests/unittests/test_spatial_sampling.py +++ b/mapswipe_workers/tests/unittests/test_spatial_sampling.py @@ -1,29 +1,32 @@ import os - import unittest + import numpy as np import pandas as pd from shapely import wkt from shapely.geometry import Point -from mapswipe_workers.utils.spatial_sampling import distance_on_sphere, filter_points, spatial_sampling +from mapswipe_workers.utils.spatial_sampling import ( + distance_on_sphere, + filter_points, + spatial_sampling, +) class TestDistanceCalculations(unittest.TestCase): - @classmethod def setUpClass(cls): with open( - os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "..", - "fixtures", - "mapillary_sequence.csv", - ), - "r", + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "..", + "fixtures", + "mapillary_sequence.csv", + ), + "r", ) as file: df = pd.read_csv(file) - df['geometry'] = df['geometry'].apply(wkt.loads) + df["geometry"] = df["geometry"].apply(wkt.loads) cls.fixture_df = df @@ -42,41 +45,43 @@ def test_filter_points(self): "POINT (-74.006 40.7128)", "POINT (-75.006 41.7128)", "POINT (-76.006 42.7128)", - "POINT (-77.006 43.7128)" + "POINT (-77.006 43.7128)", ] } df = pd.DataFrame(data) - df['geometry'] = df['geometry'].apply(wkt.loads) + df["geometry"] = df["geometry"].apply(wkt.loads) - df['long'] = df['geometry'].apply(lambda geom: geom.x if geom.geom_type == 'Point' else None) - df['lat'] = df['geometry'].apply(lambda geom: geom.y if geom.geom_type == 'Point' else None) + df["long"] = df["geometry"].apply( + lambda geom: geom.x if geom.geom_type == "Point" else None + ) + df["lat"] = df["geometry"].apply( + lambda geom: geom.y if geom.geom_type == "Point" else None + ) threshold_distance = 100 filtered_df = filter_points(df, threshold_distance) self.assertIsInstance(filtered_df, pd.DataFrame) self.assertLessEqual(len(filtered_df), len(df)) - def test_spatial_sampling_ordering(self): data = { "geometry": [ "POINT (-74.006 40.7128)", "POINT (-75.006 41.7128)", "POINT (-76.006 42.7128)", - "POINT (-77.006 43.7128)" + "POINT (-77.006 43.7128)", ], - 'captured_at': [1, 2, 3, 4], - 'sequence_id': ['1', '1', '1', '1'] + "captured_at": [1, 2, 3, 4], + "sequence_id": ["1", "1", "1", "1"], } df = pd.DataFrame(data) - df['geometry'] = df['geometry'].apply(wkt.loads) + df["geometry"] = df["geometry"].apply(wkt.loads) interval_length = 0.1 filtered_gdf = spatial_sampling(df, interval_length) - self.assertTrue(filtered_gdf['captured_at'].is_monotonic_increasing) - + self.assertTrue(filtered_gdf["captured_at"].is_monotonic_decreasing) def test_spatial_sampling_with_sequence(self): threshold_distance = 0.01 @@ -86,15 +91,13 @@ def test_spatial_sampling_with_sequence(self): filtered_df.reset_index(drop=True, inplace=True) for i in range(len(filtered_df) - 1): - geom1 = filtered_df.loc[i, 'geometry'] - geom2 = filtered_df.loc[i + 1, 'geometry'] + geom1 = filtered_df.loc[i, "geometry"] + geom2 = filtered_df.loc[i + 1, "geometry"] distance = geom1.distance(geom2) self.assertLess(distance, threshold_distance) - - if __name__ == "__main__": unittest.main() From cb12624d050546fd8a438d958ad255a629d1e14e Mon Sep 17 00:00:00 2001 From: Levi Szamek Date: Mon, 27 Jan 2025 15:56:01 +0100 Subject: [PATCH 5/8] fix: tasks are only randomized if randomize_order is True --- mapswipe_workers/mapswipe_workers/utils/process_mapillary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py index 1c9159059..17d592c77 100644 --- a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py +++ b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py @@ -222,7 +222,7 @@ def filter_results( return None df = filter_by_timerange(df, start_time, end_time) - if randomize_order is not None: + if randomize_order is True: df.sample(frac=1).reset_index(drop=True) return df From 50cb94fe0680fdef8a61fe062fcf3806f5425d31 Mon Sep 17 00:00:00 2001 From: Levi Szamek Date: Mon, 3 Feb 2025 11:33:01 +0100 Subject: [PATCH 6/8] fix: randomize after spatial sampling --- .../mapswipe_workers/utils/process_mapillary.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py index 17d592c77..7067115c2 100644 --- a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py +++ b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py @@ -189,7 +189,6 @@ def filter_results( organization_id: str = None, start_time: str = None, end_time: str = None, - randomize_order: bool = None, ): df = results_df.copy() if creator_id is not None: @@ -222,9 +221,6 @@ def filter_results( return None df = filter_by_timerange(df, start_time, end_time) - if randomize_order is True: - df.sample(frac=1).reset_index(drop=True) - return df @@ -257,7 +253,6 @@ def get_image_metadata( organization_id, start_time, end_time, - randomize_order, ) if ( @@ -270,6 +265,9 @@ def get_image_metadata( if sampling_threshold is not None: downloaded_metadata = spatial_sampling(downloaded_metadata, sampling_threshold) + if randomize_order is True: + downloaded_metadata.sample(frac=1).reset_index(drop=True) + total_images = len(downloaded_metadata) if total_images > 100000: raise ValueError( From 8b372843f6ad646b926fcec16f3784c84f01667b Mon Sep 17 00:00:00 2001 From: Levi Szamek Date: Mon, 3 Feb 2025 17:49:10 +0100 Subject: [PATCH 7/8] fix: reassign downloaded metadata --- mapswipe_workers/mapswipe_workers/utils/process_mapillary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py index 7067115c2..30828d77b 100644 --- a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py +++ b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py @@ -266,7 +266,7 @@ def get_image_metadata( downloaded_metadata = spatial_sampling(downloaded_metadata, sampling_threshold) if randomize_order is True: - downloaded_metadata.sample(frac=1).reset_index(drop=True) + downloaded_metadata = downloaded_metadata.sample(frac=1).reset_index(drop=True) total_images = len(downloaded_metadata) if total_images > 100000: From 3bdc562852dd600700ee00f3b09ae51d3c1395e4 Mon Sep 17 00:00:00 2001 From: Levi Szamek Date: Tue, 4 Feb 2025 17:04:17 +0100 Subject: [PATCH 8/8] feat: images are in sequential order if randomizeOrder is false or none --- .../mapswipe_workers/utils/process_mapillary.py | 3 +-- .../mapswipe_workers/utils/spatial_sampling.py | 7 +++---- mapswipe_workers/tests/unittests/test_process_mapillary.py | 6 +++++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py index 30828d77b..bc0cf737d 100644 --- a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py +++ b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py @@ -262,8 +262,7 @@ def get_image_metadata( ): raise ValueError("No Mapillary Features in the AoI match the filter criteria.") - if sampling_threshold is not None: - downloaded_metadata = spatial_sampling(downloaded_metadata, sampling_threshold) + downloaded_metadata = spatial_sampling(downloaded_metadata, sampling_threshold) if randomize_order is True: downloaded_metadata = downloaded_metadata.sample(frac=1).reset_index(drop=True) diff --git a/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py b/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py index a35271fe3..97302b945 100644 --- a/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py +++ b/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py @@ -141,10 +141,9 @@ def spatial_sampling(df, interval_length): for sequence in sorted_df["sequence_id"].unique(): sequence_df = sorted_df[sorted_df["sequence_id"] == sequence] - filtered_sorted_sub_df = filter_points(sequence_df, interval_length) - sampled_sequence_df = pd.concat( - [sampled_sequence_df, filtered_sorted_sub_df], axis=0 - ) + if interval_length: + sequence_df = filter_points(sequence_df, interval_length) + sampled_sequence_df = pd.concat([sampled_sequence_df, sequence_df], axis=0) # reverse order such that sequence are in direction of travel sampled_sequence_df = sampled_sequence_df.iloc[::-1] diff --git a/mapswipe_workers/tests/unittests/test_process_mapillary.py b/mapswipe_workers/tests/unittests/test_process_mapillary.py index 32c1bad46..ec19cbf78 100644 --- a/mapswipe_workers/tests/unittests/test_process_mapillary.py +++ b/mapswipe_workers/tests/unittests/test_process_mapillary.py @@ -343,7 +343,11 @@ def test_get_image_metadata_empty_response(self, mock_coordinate_download): def test_get_image_metadata_size_restriction( self, mock_coordinate_download, mock_filter_results ): - mock_filter_results.return_value = pd.DataFrame({"ID": range(1, 100002)}) + mock_df = pd.DataFrame({"ID": range(1, 100002)}) + mock_df["geometry"] = self.test_polygon + mock_df["captured_at"] = range(1, 100002) + mock_df["sequence_id"] = 1 + mock_filter_results.return_value = mock_df mock_coordinate_download.return_value = self.fixture_df with self.assertRaises(ValueError):