Skip to content

Commit 17dfca8

Browse files
authored
Merge pull request #1125 from Martozar/c.mze-CQ-1579
feat: optimize memory allocation when converting execution response to dataframe
2 parents 82a1c2d + ac0189f commit 17dfca8

File tree

4 files changed

+358
-68
lines changed

4 files changed

+358
-68
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ One logical change is done in one commit.
6666

6767
### Documenting new features
6868

69-
To document a new feature, you need to create a new `.md` file in one of the subsections. These subsections reresent the left navigation menu and are in a hierarchical directories.
69+
To document a new feature, you need to create a new `.md` file in one of the subsections. These subsections represent the left navigation menu and are in a hierarchical directories.
7070

7171
e.g.:
7272

gooddata-pandas/gooddata_pandas/dataframe.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ def for_created_visualization(
238238
created_visualizations_response: dict,
239239
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
240240
is_cancellable: bool = False,
241+
optimized: bool = False,
241242
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
242243
"""
243244
Creates a data frame using a created visualization.
@@ -247,6 +248,10 @@ def for_created_visualization(
247248
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
248249
submitted to the backend.
249250
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
251+
optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
252+
headers in memory as lists of dicts, which can consume a lot of memory for large results.
253+
Optimized accumulator stores only unique values and story only reference to them in the list,
254+
which can significantly reduce memory usage.
250255
251256
Returns:
252257
pandas.DataFrame: A DataFrame instance.
@@ -257,6 +262,7 @@ def for_created_visualization(
257262
return self.for_exec_def(
258263
exec_def=execution_definition,
259264
on_execution_submitted=on_execution_submitted,
265+
optimized=optimized,
260266
)
261267

262268
def result_cache_metadata_for_exec_result_id(self, result_id: str) -> ResultCacheMetadata:
@@ -279,6 +285,7 @@ def for_exec_def(
279285
result_size_bytes_limit: Optional[int] = None,
280286
page_size: int = _DEFAULT_PAGE_SIZE,
281287
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
288+
optimized: bool = False,
282289
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
283290
"""
284291
Creates a data frame using an execution definition.
@@ -311,6 +318,10 @@ def for_exec_def(
311318
page_size (int): Number of records per page.
312319
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
313320
submitted to the backend.
321+
optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
322+
headers in memory as lists of dicts, which can consume a lot of memory for large results.
323+
Optimized accumulator stores only unique values and story only reference to them in the list,
324+
which can significantly reduce memory usage.
314325
315326
Returns:
316327
Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
@@ -331,6 +342,7 @@ def for_exec_def(
331342
result_size_dimensions_limits=result_size_dimensions_limits,
332343
result_size_bytes_limit=result_size_bytes_limit,
333344
page_size=page_size,
345+
optimized=optimized,
334346
)
335347

336348
def for_exec_result_id(
@@ -343,6 +355,7 @@ def for_exec_result_id(
343355
use_local_ids_in_headers: bool = False,
344356
use_primary_labels_in_attributes: bool = False,
345357
page_size: int = _DEFAULT_PAGE_SIZE,
358+
optimized: bool = False,
346359
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
347360
"""
348361
Retrieves a DataFrame and DataFrame metadata for a given execution result identifier.
@@ -373,6 +386,10 @@ def for_exec_result_id(
373386
use_local_ids_in_headers (bool): Use local identifier in headers.
374387
use_primary_labels_in_attributes (bool): Use primary labels in attributes.
375388
page_size (int): Number of records per page.
389+
optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
390+
headers in memory as lists of dicts, which can consume a lot of memory for large results.
391+
Optimized accumulator stores only unique values and story only reference to them in the list,
392+
which can significantly reduce memory usage.
376393
377394
Returns:
378395
Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
@@ -398,4 +415,5 @@ def for_exec_result_id(
398415
use_local_ids_in_headers=use_local_ids_in_headers,
399416
use_primary_labels_in_attributes=use_primary_labels_in_attributes,
400417
page_size=page_size,
418+
optimized=optimized,
401419
)

0 commit comments

Comments
 (0)