Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions gooddata-pandas/gooddata_pandas/data_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
ObjId,
TableDimension,
)
from gooddata_sdk.utils import IdObjType, filter_for_attributes_labels
from gooddata_sdk.utils import IdObjType

from gooddata_pandas.utils import (
ColumnsDef,
Expand All @@ -26,6 +26,7 @@
_to_attribute,
_to_item,
_typed_attribute_value,
get_catalog_attributes_for_extract,
)


Expand Down Expand Up @@ -446,12 +447,7 @@ def compute_and_extract(
if not exec_def.has_attributes():
return _extract_for_metrics_only(response, cols, col_to_metric_idx), dict()
else:
filter_query = filter_for_attributes_labels(exec_def.attributes)
# if there is to many labels then all attributes are fetched and no rsql filter is used
# it prevention again 414 Request-URI Too Long
attributes = sdk.catalog_workspace_content.get_attributes_catalog(
workspace_id, include=["labels", "datasets"], rsql_filter=filter_query
)
attributes = get_catalog_attributes_for_extract(sdk, workspace_id, exec_def.attributes)
return _extract_from_attributes_and_maybe_metrics(
response,
attributes,
Expand Down
21 changes: 21 additions & 0 deletions gooddata-pandas/gooddata_pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
from gooddata_sdk import (
Attribute,
CatalogAttribute,
GoodDataSdk,
Metric,
ObjId,
SimpleMetric,
VisualizationAttribute,
VisualizationMetric,
)
from gooddata_sdk.type_converter import AttributeConverterStore, DateConverter, DatetimeConverter, IntegerConverter
from gooddata_sdk.utils import filter_for_attributes_labels
from pandas import Index, MultiIndex

LabelItemDef = Union[Attribute, ObjId, str]
Expand All @@ -29,6 +31,25 @@
DatetimeConverter.set_external_fnc(lambda self, value: pandas.to_datetime(value))


def get_catalog_attributes_for_extract(
sdk: GoodDataSdk, workspace_id: str, attributes: list[Attribute], character_limit: int = 1500
) -> list[CatalogAttribute]:
"""
Get catalog attributes for the given attributes.
It uses the filter_for_attributes_labels function to get the
RSQL queries for the attributes and then fetches the catalog attributes for the given workspace.
This approach prevents loading all catalog attributes providing significant speed-up.
"""
rsql_queries = filter_for_attributes_labels(attributes, character_limit)
return [
attr
for query in rsql_queries
for attr in sdk.catalog_workspace_content.get_attributes_catalog(
workspace_id, include=["labels", "datasets"], rsql_filter=query
)
]


def _unique_local_id() -> str:
"""
Generate unique local ID of a DataItem without dashes.
Expand Down
1 change: 1 addition & 0 deletions gooddata-pandas/tests/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# (C) 2025 GoodData Corporation
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
# (C) 2025 GoodData Corporation
version: 1
interactions:
- request:
method: GET
uri: http://localhost:3000/api/v1/entities/workspaces/demo/attributes?include=labels%2Cdatasets&filter=labels.id%3Din%3D%28campaign_name%29&page=0&size=500
body: null
headers:
Accept:
- application/vnd.gooddata.api+json
Accept-Encoding:
- br, gzip, deflate
X-GDC-VALIDATE-RELATIONS:
- 'true'
X-Requested-With:
- XMLHttpRequest
response:
status:
code: 200
message: OK
headers:
Access-Control-Allow-Credentials:
- 'true'
Access-Control-Expose-Headers:
- Content-Disposition, Content-Length, Content-Range, Set-Cookie
Cache-Control:
- no-cache, no-store, max-age=0, must-revalidate
Connection:
- keep-alive
Content-Length:
- '1541'
Content-Security-Policy:
- 'default-src ''self'' *.wistia.com *.wistia.net; script-src ''self'' ''unsafe-inline''
''unsafe-eval'' *.wistia.com *.wistia.net *.hsforms.net *.hsforms.com
src.litix.io matomo.anywhere.gooddata.com *.jquery.com unpkg.com cdnjs.cloudflare.com;
img-src * data: blob:; style-src ''self'' ''unsafe-inline'' fonts.googleapis.com
cdn.jsdelivr.net fast.fonts.net; font-src ''self'' data: fonts.gstatic.com
*.alicdn.com *.wistia.com cdn.jsdelivr.net info.gooddata.com; frame-src
''self'' *.hsforms.net *.hsforms.com; object-src ''none''; worker-src
''self'' blob:; child-src blob:; connect-src ''self'' *.tiles.mapbox.com
*.mapbox.com *.litix.io *.wistia.com *.hsforms.net *.hsforms.com embedwistia-a.akamaihd.net
matomo.anywhere.gooddata.com; media-src ''self'' blob: data: *.wistia.com
*.wistia.net embedwistia-a.akamaihd.net'
Content-Type:
- application/vnd.gooddata.api+json
DATE: &id001
- PLACEHOLDER
Expires:
- '0'
GoodData-Deployment:
- aio
Permission-Policy:
- geolocation 'none'; midi 'none'; sync-xhr 'none'; microphone 'none'; camera
'none'; magnetometer 'none'; gyroscope 'none'; fullscreen 'none'; payment
'none';
Pragma:
- no-cache
Referrer-Policy:
- no-referrer
Server:
- nginx
Vary:
- Origin
- Access-Control-Request-Method
- Access-Control-Request-Headers
X-Content-Type-Options:
- nosniff
X-GDC-TRACE-ID: *id001
X-XSS-Protection:
- '0'
set-cookie:
- SPRING_REDIRECT_URI=; Max-Age=0; Expires=Mon, 20 Jan 2025 10:18:23 GMT;
Path=/; HTTPOnly; SameSite=Lax
body:
string:
data:
- id: campaign_name
type: attribute
attributes:
title: Campaign name
description: Campaign name
tags:
- Campaigns
areRelationsValid: true
sourceColumn: campaign_name
sourceColumnDataType: STRING
relationships:
dataset:
data:
id: campaigns
type: dataset
labels:
data:
- id: campaign_name
type: label
links:
self: http://localhost:3000/api/v1/entities/workspaces/demo/attributes/campaign_name
meta:
origin:
originType: NATIVE
originId: demo
included:
- id: campaigns
type: dataset
attributes:
title: Campaigns
description: Campaigns
tags:
- Campaigns
grain:
- id: campaign_id
type: attribute
dataSourceTableId: demo-test-ds:campaigns
dataSourceTablePath:
- demo
- campaigns
type: NORMAL
links:
self: http://localhost:3000/api/v1/entities/workspaces/demo/datasets/campaigns
- id: campaign_name
type: label
attributes:
title: Campaign name
description: Campaign name
tags:
- Campaigns
primary: true
sourceColumn: campaign_name
sourceColumnDataType: STRING
valueType: TEXT
links:
self: http://localhost:3000/api/v1/entities/workspaces/demo/labels/campaign_name
links:
self: http://localhost:3000/api/v1/entities/workspaces/demo/attributes?include=labels%2Cdatasets&filter=labels.id%3D%3D%27campaign_name%27&page=0&size=500
next: http://localhost:3000/api/v1/entities/workspaces/demo/attributes?include=labels%2Cdatasets&filter=labels.id%3D%3D%27campaign_name%27&page=1&size=500
- request:
method: GET
uri: http://localhost:3000/api/v1/entities/workspaces/demo/attributes?include=labels%2Cdatasets&filter=labels.id%3Din%3D%28region%29&page=0&size=500
body: null
headers:
Accept:
- application/vnd.gooddata.api+json
Accept-Encoding:
- br, gzip, deflate
X-GDC-VALIDATE-RELATIONS:
- 'true'
X-Requested-With:
- XMLHttpRequest
response:
status:
code: 200
message: OK
headers:
Access-Control-Allow-Credentials:
- 'true'
Access-Control-Expose-Headers:
- Content-Disposition, Content-Length, Content-Range, Set-Cookie
Cache-Control:
- no-cache, no-store, max-age=0, must-revalidate
Connection:
- keep-alive
Content-Length:
- '1450'
Content-Security-Policy:
- 'default-src ''self'' *.wistia.com *.wistia.net; script-src ''self'' ''unsafe-inline''
''unsafe-eval'' *.wistia.com *.wistia.net *.hsforms.net *.hsforms.com
src.litix.io matomo.anywhere.gooddata.com *.jquery.com unpkg.com cdnjs.cloudflare.com;
img-src * data: blob:; style-src ''self'' ''unsafe-inline'' fonts.googleapis.com
cdn.jsdelivr.net fast.fonts.net; font-src ''self'' data: fonts.gstatic.com
*.alicdn.com *.wistia.com cdn.jsdelivr.net info.gooddata.com; frame-src
''self'' *.hsforms.net *.hsforms.com; object-src ''none''; worker-src
''self'' blob:; child-src blob:; connect-src ''self'' *.tiles.mapbox.com
*.mapbox.com *.litix.io *.wistia.com *.hsforms.net *.hsforms.com embedwistia-a.akamaihd.net
matomo.anywhere.gooddata.com; media-src ''self'' blob: data: *.wistia.com
*.wistia.net embedwistia-a.akamaihd.net'
Content-Type:
- application/vnd.gooddata.api+json
DATE: *id001
Expires:
- '0'
GoodData-Deployment:
- aio
Permission-Policy:
- geolocation 'none'; midi 'none'; sync-xhr 'none'; microphone 'none'; camera
'none'; magnetometer 'none'; gyroscope 'none'; fullscreen 'none'; payment
'none';
Pragma:
- no-cache
Referrer-Policy:
- no-referrer
Server:
- nginx
Vary:
- Origin
- Access-Control-Request-Method
- Access-Control-Request-Headers
X-Content-Type-Options:
- nosniff
X-GDC-TRACE-ID: *id001
X-XSS-Protection:
- '0'
set-cookie:
- SPRING_REDIRECT_URI=; Max-Age=0; Expires=Mon, 20 Jan 2025 10:18:23 GMT;
Path=/; HTTPOnly; SameSite=Lax
body:
string:
data:
- id: region
type: attribute
attributes:
title: Region
description: Region
tags:
- Customers
areRelationsValid: true
sourceColumn: region
sourceColumnDataType: STRING
relationships:
dataset:
data:
id: customers
type: dataset
labels:
data:
- id: region
type: label
links:
self: http://localhost:3000/api/v1/entities/workspaces/demo/attributes/region
meta:
origin:
originType: NATIVE
originId: demo
included:
- id: customers
type: dataset
attributes:
title: Customers
description: Customers
tags:
- Customers
grain:
- id: customer_id
type: attribute
dataSourceTableId: demo-test-ds:customers
dataSourceTablePath:
- demo
- customers
type: NORMAL
links:
self: http://localhost:3000/api/v1/entities/workspaces/demo/datasets/customers
- id: region
type: label
attributes:
title: Region
description: Region
tags:
- Customers
primary: true
sourceColumn: region
sourceColumnDataType: STRING
valueType: TEXT
links:
self: http://localhost:3000/api/v1/entities/workspaces/demo/labels/region
links:
self: http://localhost:3000/api/v1/entities/workspaces/demo/attributes?include=labels%2Cdatasets&filter=labels.id%3D%3D%27region%27&page=0&size=500
next: http://localhost:3000/api/v1/entities/workspaces/demo/attributes?include=labels%2Cdatasets&filter=labels.id%3D%3D%27region%27&page=1&size=500
24 changes: 24 additions & 0 deletions gooddata-pandas/tests/utils/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# (C) 2025 GoodData Corporation
from pathlib import Path

from gooddata_pandas.utils import get_catalog_attributes_for_extract
from gooddata_sdk import (
Attribute,
GoodDataSdk,
)
from tests_support.vcrpy_utils import get_vcr

gd_vcr = get_vcr()

_current_dir = Path(__file__).parent.absolute()
_fixtures_dir = _current_dir / "fixtures"


@gd_vcr.use_cassette(str(_fixtures_dir / "test_get_catalog_attributes_for_extract.yaml"))
def test_get_catalog_attributes_for_extract(test_config):
sdk = GoodDataSdk.create(host_=test_config["host"], token_=test_config["token"])
workspace_id = "demo"
attributes = [Attribute(local_id="0", label="campaign_name"), Attribute(local_id="1", label="region")]
catalog_attributes = get_catalog_attributes_for_extract(sdk, workspace_id, attributes, character_limit=28)
assert len(catalog_attributes) == 2
assert [ca.id for ca in catalog_attributes] == ["campaign_name", "region"]
Loading
Loading