3030 List ,
3131 Literal ,
3232 Optional ,
33- Sequence ,
3433)
3534from urllib .parse import quote
3635
3736import dlt
38- from dlt .common import pendulum
37+ from dlt .common import logger , pendulum
3938from dlt .common .typing import TDataItems
4039from dlt .sources import DltResource
4140
4241from .helpers import (
4342 _get_property_names_types ,
4443 _to_dlt_columns_schema ,
44+ search_data ,
4545 fetch_data ,
4646 fetch_property_history ,
4747 get_properties_labels ,
48+ SearchOutOfBoundsException ,
4849)
4950from .settings import (
5051 ALL_OBJECTS ,
5152 ARCHIVED_PARAM ,
5253 CRM_OBJECT_ENDPOINTS ,
5354 CRM_PIPELINES_ENDPOINT ,
55+ CRM_SEARCH_OBJECT_ENDPOINTS ,
5456 ENTITY_PROPERTIES ,
57+ LAST_MODIFIED_PROPERTY ,
58+ HUBSPOT_CREATION_DATE ,
5559 MAX_PROPS_LENGTH ,
5660 OBJECT_TYPE_PLURAL ,
5761 OBJECT_TYPE_SINGULAR ,
@@ -73,6 +77,7 @@ def fetch_data_for_properties(
7377 api_key : str ,
7478 object_type : str ,
7579 soft_delete : bool ,
80+ last_modified : str = None ,
7681) -> Iterator [TDataItems ]:
7782 """
7883 Fetch data for a given set of properties from the HubSpot API.
@@ -82,20 +87,56 @@ def fetch_data_for_properties(
8287 api_key (str): HubSpot API key for authentication.
8388 object_type (str): The type of HubSpot object (e.g., 'company', 'contact').
8489 soft_delete (bool): Flag to fetch soft-deleted (archived) records.
90+ last_modified (str): The date from which to fetch records. If None, get all records.
8591
8692 Yields:
8793 Iterator[TDataItems]: Data retrieved from the HubSpot API.
8894 """
95+ logger .info (f"Fetching data for { object_type } ." )
8996 # The Hubspot API expects a comma separated string as properties
9097 joined_props = "," .join (sorted (props ))
9198 params : Dict [str , Any ] = {"properties" : joined_props , "limit" : 100 }
9299 context : Optional [Dict [str , Any ]] = (
93100 {SOFT_DELETE_KEY : False } if soft_delete else None
94101 )
95102
96- yield from fetch_data (
97- CRM_OBJECT_ENDPOINTS [object_type ], api_key , params = params , context = context
98- )
103+ if last_modified is not None :
104+ logger .info (f"Attempting search starting at { last_modified } ." )
105+ search_params : Dict [str , Any ] = {
106+ "properties" : sorted (props ),
107+ "limit" : 200 ,
108+ "filterGroups" : [
109+ {
110+ "filters" : [
111+ {
112+ "propertyName" : LAST_MODIFIED_PROPERTY [object_type ],
113+ "operator" : "GTE" ,
114+ "value" : last_modified ,
115+ }
116+ ]
117+ }
118+ ],
119+ }
120+
121+ try :
122+ yield from search_data (
123+ CRM_SEARCH_OBJECT_ENDPOINTS [object_type ],
124+ api_key ,
125+ params = search_params ,
126+ context = context ,
127+ )
128+ except SearchOutOfBoundsException :
129+ logger .info ("Search out of bounds, fetching all data" )
130+ yield from fetch_data (
131+ CRM_OBJECT_ENDPOINTS [object_type ],
132+ api_key ,
133+ params = params ,
134+ context = context ,
135+ )
136+ else :
137+ yield from fetch_data (
138+ CRM_OBJECT_ENDPOINTS [object_type ], api_key , params = params , context = context
139+ )
99140 if soft_delete :
100141 yield from fetch_data (
101142 CRM_OBJECT_ENDPOINTS [object_type ],
@@ -109,6 +150,7 @@ def crm_objects(
109150 object_type : str ,
110151 api_key : str ,
111152 props : List [str ],
153+ last_modified : dlt .sources .incremental [str ],
112154 include_custom_props : bool = True ,
113155 archived : bool = False ,
114156) -> Iterator [TDataItems ]:
@@ -119,6 +161,7 @@ def crm_objects(
119161 object_type (str): Type of HubSpot object (e.g., 'company', 'contact').
120162 api_key (str): API key for HubSpot authentication.
121163 props (List[str]): List of properties to retrieve.
164+ last_modified (str): The date from which to fetch records
122165 include_custom_props (bool, optional): Include custom properties in the result. Defaults to True.
123166 archived (bool, optional): Fetch archived (soft-deleted) objects. Defaults to False.
124167
@@ -135,8 +178,17 @@ def crm_objects(
135178 prop : _to_dlt_columns_schema ({prop : hb_type })
136179 for prop , hb_type in props_to_type .items ()
137180 }
181+ last_modified_on = (
182+ None
183+ if last_modified .start_value == last_modified .initial_value
184+ else last_modified .start_value
185+ )
138186 for batch in fetch_data_for_properties (
139- list (props_to_type .keys ()), api_key , object_type , archived
187+ list (props_to_type .keys ()),
188+ api_key ,
189+ object_type ,
190+ archived ,
191+ last_modified_on ,
140192 ):
141193 yield dlt .mark .with_hints (batch , dlt .mark .make_hints (columns = col_type_hints ))
142194
@@ -176,7 +228,7 @@ def crm_object_history(
176228 # This is especially relevant for columns of type "number" in Hubspot
177229 # that are returned as strings by the API
178230 for batch in fetch_property_history (
179- CRM_OBJECT_ENDPOINTS [object_type ],
231+ CRM_SEARCH_OBJECT_ENDPOINTS [object_type ],
180232 api_key ,
181233 "," .join (sorted (props_to_type .keys ())),
182234 ):
@@ -411,6 +463,10 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]:
411463 object_type = obj ,
412464 api_key = api_key ,
413465 props = properties .get (obj ),
466+ last_modified = dlt .sources .incremental (
467+ LAST_MODIFIED_PROPERTY [obj ],
468+ initial_value = HUBSPOT_CREATION_DATE .isoformat (),
469+ ),
414470 include_custom_props = include_custom_props ,
415471 archived = soft_delete ,
416472 )
0 commit comments