|
5 | 5 | import urllib.parse |
6 | 6 | from typing import Any, Dict, Iterator, List, Optional |
7 | 7 |
|
| 8 | +from dlt.common import logger |
8 | 9 | from dlt.common.schema.typing import TColumnSchema |
9 | 10 | from dlt.sources.helpers import requests |
10 | 11 |
|
@@ -63,11 +64,10 @@ def search_pagination( |
63 | 64 | headers: Dict[str, Any], |
64 | 65 | params: Optional[Dict[str, Any]] = None, |
65 | 66 | ) -> Optional[Dict[str, Any]]: |
66 | | - _next = _data.get("paging", {}).get("next", False) |
67 | | - if _next: |
68 | | - after = _next["after"] |
| 67 | + _after = _data.get("paging", {}).get("next", {}).get("after", False) |
| 68 | + if _after and _after != "10000": |
69 | 69 | # Get the next page response |
70 | | - r = requests.post(url, headers=headers, json={**params, "after": after}) |
| 70 | + r = requests.post(url, headers=headers, json={**params, "after": _after}) |
71 | 71 | return r.json() # type: ignore |
72 | 72 | else: |
73 | 73 | return None |
@@ -151,35 +151,70 @@ def fetch_property_history( |
151 | 151 | _data = None |
152 | 152 |
|
153 | 153 |
|
154 | | -def search_data( |
| 154 | +def search_data_since( |
155 | 155 | endpoint: str, |
156 | 156 | api_key: str, |
| 157 | + last_modified: str, |
| 158 | + last_modified_prop: str, |
| 159 | + props: List[str], |
157 | 160 | associations: Optional[List[str]] = None, |
158 | | - params: Optional[Dict[str, Any]] = None, |
159 | 161 | context: Optional[Dict[str, Any]] = None, |
160 | 162 | ) -> Iterator[List[Dict[str, Any]]]: |
161 | 163 | # Construct the URL and headers for the API request |
162 | 164 | url = get_url(CRM_SEARCH_ENDPOINT.format(crm_endpoint=endpoint)) |
163 | 165 | headers = _get_headers(api_key) |
| 166 | + body: Dict[str, Any] = { |
| 167 | + "properties": sorted(props), |
| 168 | + "limit": 200, |
| 169 | + "filterGroups": [ |
| 170 | + { |
| 171 | + "filters": [ |
| 172 | + { |
| 173 | + "propertyName": last_modified_prop, |
| 174 | + "operator": "GTE", |
| 175 | + "value": last_modified, |
| 176 | + } |
| 177 | + ] |
| 178 | + } |
| 179 | + ], |
| 180 | + "sorts": [{"propertyName": last_modified_prop, "direction": "ASCENDING"}], |
| 181 | + } |
164 | 182 |
|
165 | 183 | # Make the API request |
166 | | - r = requests.post(url, headers=headers, json=params) |
| 184 | + r = requests.post(url, headers=headers, json=body) |
167 | 185 | # Parse the API response and yield the properties of each result |
168 | 186 | # Parse the response JSON data |
169 | 187 | _data = r.json() |
170 | 188 |
|
171 | | - if _data.get("total", 0) > 9999: |
172 | | - raise SearchOutOfBoundsException |
173 | | - else: |
174 | | - # Yield the properties of each result in the API response |
175 | | - while _data is not None: |
176 | | - if "results" in _data: |
177 | | - yield _data_to_objects( |
178 | | - _data, endpoint, headers, associations=associations, context=context |
179 | | - ) |
| 189 | + _total = _data.get("total", 0) |
| 190 | + logger.info(f"Getting {_total} new objects from {url} starting at {last_modified}") |
| 191 | + _max_last_modified = last_modified |
| 192 | + # Yield the properties of each result in the API response |
| 193 | + while _data is not None: |
| 194 | + if "results" in _data: |
| 195 | + for _result in _data["results"]: |
| 196 | + if _result["updatedAt"]: |
| 197 | + _max_last_modified = max(_max_last_modified, _result["updatedAt"]) |
| 198 | + yield _data_to_objects( |
| 199 | + _data, endpoint, headers, associations=associations, context=context |
| 200 | + ) |
180 | 201 |
|
181 | | - # Follow pagination links if they exist |
182 | | - _data = search_pagination(url, _data, headers, params) |
| 202 | + # Follow pagination links if they exist |
| 203 | + _data = search_pagination(url, _data, headers, body) |
| 204 | + |
| 205 | + if _total > 9999: |
| 206 | + if _max_last_modified == last_modified: |
| 207 | + raise SearchOutOfBoundsException |
| 208 | + logger.info(f"Starting new search iteration at {_max_last_modified}") |
| 209 | + yield from search_data_since( |
| 210 | + endpoint, |
| 211 | + api_key, |
| 212 | + _max_last_modified, |
| 213 | + last_modified_prop, |
| 214 | + props, |
| 215 | + associations, |
| 216 | + context, |
| 217 | + ) |
183 | 218 |
|
184 | 219 |
|
185 | 220 | def fetch_data( |
|
0 commit comments