diff --git a/mapswipe_workers/mapswipe_workers/project_types/street/project.py b/mapswipe_workers/mapswipe_workers/project_types/street/project.py index cc122c409..4d0fe8c5b 100644 --- a/mapswipe_workers/mapswipe_workers/project_types/street/project.py +++ b/mapswipe_workers/mapswipe_workers/project_types/street/project.py @@ -51,6 +51,7 @@ def __init__(self, project_draft): start_time=project_draft.get("startTimestamp", None), end_time=project_draft.get("endTimestamp", None), organization_id=project_draft.get("organizationId", None), + randomize_order=project_draft.get("randomizeOrder", None), sampling_threshold=project_draft.get("samplingThreshold", None), ) diff --git a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py index 1faf0b23b..bc0cf737d 100644 --- a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py +++ b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py @@ -233,6 +233,7 @@ def get_image_metadata( organization_id: str = None, start_time: str = None, end_time: str = None, + randomize_order=False, sampling_threshold=None, ): aoi_polygon = geojson_to_polygon(aoi_geojson) @@ -245,27 +246,34 @@ def get_image_metadata( downloaded_metadata["geometry"].apply(lambda geom: isinstance(geom, Point)) ] - filtered_metadata = filter_results( - downloaded_metadata, creator_id, is_pano, organization_id, start_time, end_time + downloaded_metadata = filter_results( + downloaded_metadata, + creator_id, + is_pano, + organization_id, + start_time, + end_time, ) if ( - filtered_metadata is None - or filtered_metadata.empty - or filtered_metadata.isna().all().all() + downloaded_metadata is None + or downloaded_metadata.empty + or downloaded_metadata.isna().all().all() ): raise ValueError("No Mapillary Features in the AoI match the filter criteria.") - if sampling_threshold is not None: - filtered_metadata = spatial_sampling(filtered_metadata, sampling_threshold) + downloaded_metadata = spatial_sampling(downloaded_metadata, sampling_threshold) - total_images = len(filtered_metadata) + if randomize_order is True: + downloaded_metadata = downloaded_metadata.sample(frac=1).reset_index(drop=True) + + total_images = len(downloaded_metadata) if total_images > 100000: raise ValueError( f"Too many Images with selected filter options for the AoI: {total_images}" ) return { - "ids": filtered_metadata["id"].tolist(), - "geometries": filtered_metadata["geometry"].tolist(), + "ids": downloaded_metadata["id"].tolist(), + "geometries": downloaded_metadata["geometry"].tolist(), } diff --git a/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py b/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py index 3cafb5362..97302b945 100644 --- a/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py +++ b/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py @@ -141,9 +141,11 @@ def spatial_sampling(df, interval_length): for sequence in sorted_df["sequence_id"].unique(): sequence_df = sorted_df[sorted_df["sequence_id"] == sequence] - filtered_sorted_sub_df = filter_points(sequence_df, interval_length) - sampled_sequence_df = pd.concat( - [sampled_sequence_df, filtered_sorted_sub_df], axis=0 - ) + if interval_length: + sequence_df = filter_points(sequence_df, interval_length) + sampled_sequence_df = pd.concat([sampled_sequence_df, sequence_df], axis=0) + + # reverse order such that sequence are in direction of travel + sampled_sequence_df = sampled_sequence_df.iloc[::-1] return sampled_sequence_df diff --git a/mapswipe_workers/tests/unittests/test_process_mapillary.py b/mapswipe_workers/tests/unittests/test_process_mapillary.py index 32c1bad46..ec19cbf78 100644 --- a/mapswipe_workers/tests/unittests/test_process_mapillary.py +++ b/mapswipe_workers/tests/unittests/test_process_mapillary.py @@ -343,7 +343,11 @@ def test_get_image_metadata_empty_response(self, mock_coordinate_download): def test_get_image_metadata_size_restriction( self, mock_coordinate_download, mock_filter_results ): - mock_filter_results.return_value = pd.DataFrame({"ID": range(1, 100002)}) + mock_df = pd.DataFrame({"ID": range(1, 100002)}) + mock_df["geometry"] = self.test_polygon + mock_df["captured_at"] = range(1, 100002) + mock_df["sequence_id"] = 1 + mock_filter_results.return_value = mock_df mock_coordinate_download.return_value = self.fixture_df with self.assertRaises(ValueError): diff --git a/mapswipe_workers/tests/unittests/test_spatial_sampling.py b/mapswipe_workers/tests/unittests/test_spatial_sampling.py index f43597c89..8c5aea418 100644 --- a/mapswipe_workers/tests/unittests/test_spatial_sampling.py +++ b/mapswipe_workers/tests/unittests/test_spatial_sampling.py @@ -1,29 +1,32 @@ import os - import unittest + import numpy as np import pandas as pd from shapely import wkt from shapely.geometry import Point -from mapswipe_workers.utils.spatial_sampling import distance_on_sphere, filter_points, spatial_sampling +from mapswipe_workers.utils.spatial_sampling import ( + distance_on_sphere, + filter_points, + spatial_sampling, +) class TestDistanceCalculations(unittest.TestCase): - @classmethod def setUpClass(cls): with open( - os.path.join( - os.path.dirname(os.path.abspath(__file__)), - "..", - "fixtures", - "mapillary_sequence.csv", - ), - "r", + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "..", + "fixtures", + "mapillary_sequence.csv", + ), + "r", ) as file: df = pd.read_csv(file) - df['geometry'] = df['geometry'].apply(wkt.loads) + df["geometry"] = df["geometry"].apply(wkt.loads) cls.fixture_df = df @@ -42,41 +45,43 @@ def test_filter_points(self): "POINT (-74.006 40.7128)", "POINT (-75.006 41.7128)", "POINT (-76.006 42.7128)", - "POINT (-77.006 43.7128)" + "POINT (-77.006 43.7128)", ] } df = pd.DataFrame(data) - df['geometry'] = df['geometry'].apply(wkt.loads) + df["geometry"] = df["geometry"].apply(wkt.loads) - df['long'] = df['geometry'].apply(lambda geom: geom.x if geom.geom_type == 'Point' else None) - df['lat'] = df['geometry'].apply(lambda geom: geom.y if geom.geom_type == 'Point' else None) + df["long"] = df["geometry"].apply( + lambda geom: geom.x if geom.geom_type == "Point" else None + ) + df["lat"] = df["geometry"].apply( + lambda geom: geom.y if geom.geom_type == "Point" else None + ) threshold_distance = 100 filtered_df = filter_points(df, threshold_distance) self.assertIsInstance(filtered_df, pd.DataFrame) self.assertLessEqual(len(filtered_df), len(df)) - def test_spatial_sampling_ordering(self): data = { "geometry": [ "POINT (-74.006 40.7128)", "POINT (-75.006 41.7128)", "POINT (-76.006 42.7128)", - "POINT (-77.006 43.7128)" + "POINT (-77.006 43.7128)", ], - 'captured_at': [1, 2, 3, 4], - 'sequence_id': ['1', '1', '1', '1'] + "captured_at": [1, 2, 3, 4], + "sequence_id": ["1", "1", "1", "1"], } df = pd.DataFrame(data) - df['geometry'] = df['geometry'].apply(wkt.loads) + df["geometry"] = df["geometry"].apply(wkt.loads) interval_length = 0.1 filtered_gdf = spatial_sampling(df, interval_length) - self.assertTrue(filtered_gdf['captured_at'].is_monotonic_increasing) - + self.assertTrue(filtered_gdf["captured_at"].is_monotonic_decreasing) def test_spatial_sampling_with_sequence(self): threshold_distance = 0.01 @@ -86,15 +91,13 @@ def test_spatial_sampling_with_sequence(self): filtered_df.reset_index(drop=True, inplace=True) for i in range(len(filtered_df) - 1): - geom1 = filtered_df.loc[i, 'geometry'] - geom2 = filtered_df.loc[i + 1, 'geometry'] + geom1 = filtered_df.loc[i, "geometry"] + geom2 = filtered_df.loc[i + 1, "geometry"] distance = geom1.distance(geom2) self.assertLess(distance, threshold_distance) - - if __name__ == "__main__": unittest.main()