Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
72 commits
Select commit Hold shift + click to select a range
7db4eb3
Added function calls to the ktype service
Sep 16, 2024
5b139c0
Added backend check for existing ktypes
Sep 18, 2024
87b27b5
Merge branch 'main' of github.com:MI-FraunhoferIWM/dsms-python-sdk in…
Sep 18, 2024
ed618b5
Added additional methods to support ktype functionalities
Sep 20, 2024
d8d2e67
Example for ktype usage has been added.
Sep 24, 2024
c96381e
Fixed pylint errors
Sep 24, 2024
3d0f850
Fixed pylint errors
Sep 25, 2024
6bd9835
Delete testktype.py
arjungkk Sep 26, 2024
997549d
update ktype retrieval
MBueschelberger Sep 27, 2024
46daa6e
udpate pytests
MBueschelberger Sep 27, 2024
c70ba76
minorly refactor ktype enum for direct retrieval of ktype model
MBueschelberger Oct 1, 2024
5ade860
update ktype validation
MBueschelberger Oct 1, 2024
4284256
update setup.cfg and bring back underscore for private variable
MBueschelberger Oct 2, 2024
9c0b6c4
remove unneeded utils
MBueschelberger Oct 2, 2024
6dc696f
update jupyter notebook for docs
MBueschelberger Oct 2, 2024
a70ad66
update jupyter notebook for docs
MBueschelberger Oct 2, 2024
db4509b
remove unneeded private property assignment
MBueschelberger Oct 2, 2024
ffdac4a
update ktype validator
MBueschelberger Oct 2, 2024
a0e1e69
update jupyter notebooks for docs
MBueschelberger Oct 2, 2024
cffaeb3
bring accidentally deleted ktype.setter
MBueschelberger Oct 2, 2024
23c819f
Merge pull request #37 from MI-FraunhoferIWM/review/ktype-integration
arjungkk Oct 7, 2024
901dc73
Mapped webform to pydantic model
Oct 21, 2024
2b81fae
Delete test.py
arjungkk Oct 24, 2024
2af08a7
Added alias generator to webform inputs
Oct 24, 2024
97ef7e2
Merge branch 'dev/ktype-integration' of github.com:MI-FraunhoferIWM/d…
Oct 24, 2024
d2b4c5f
Updated README and removed duplicate files
Oct 24, 2024
a3b7c2e
Added pylint fixes
Oct 24, 2024
aac1be4
adapt sdk to new platform backend
MBueschelberger Nov 21, 2024
aa4f3e6
Bump version v2.0.4 -> v2.1.0dev0
MBueschelberger Nov 21, 2024
7f4bff9
make upper restriction for pydantic
MBueschelberger Nov 21, 2024
761c3e3
set max length of string-field names
MBueschelberger Nov 27, 2024
8796d2e
bump dev version tag
MBueschelberger Nov 27, 2024
02ab43e
Support for webform changes
Dec 2, 2024
aab928f
Support for webform
Dec 2, 2024
e5b3396
Merge pull request #41 from MI-FraunhoferIWM/dev/webform-support
arjungkk Dec 2, 2024
6efc6e5
add schema transformation function
MBueschelberger Dec 2, 2024
1977ed9
bump dev version
MBueschelberger Dec 2, 2024
63d7be2
make temporary compatibility with old webform model
MBueschelberger Dec 9, 2024
0d06906
temporary fix for custom properties
MBueschelberger Dec 10, 2024
79a7f0d
set upper limit for pydantic
MBueschelberger Dec 10, 2024
b61f472
merge from dev/ktype-integration
MBueschelberger Dec 12, 2024
d6c23b9
update custom properties model
MBueschelberger Dec 12, 2024
c06d45d
update webform kitem assignment and validation
MBueschelberger Dec 13, 2024
ddf2458
fix printing of values
MBueschelberger Dec 16, 2024
78c534b
debug custom properties and dataframe
MBueschelberger Dec 16, 2024
e9f3f58
update config
MBueschelberger Dec 16, 2024
1f60b4d
debug buffer context
MBueschelberger Dec 17, 2024
00999e4
bump dev version
MBueschelberger Dec 17, 2024
3893290
fix minor problems
MBueschelberger Dec 17, 2024
ae25109
remove 'NumericalDatatype', debug unit converion
MBueschelberger Dec 18, 2024
69ba134
bump dev version
MBueschelberger Dec 18, 2024
9738986
debug type check in entry
MBueschelberger Dec 18, 2024
75ff828
bump version
MBueschelberger Dec 18, 2024
0a40c39
debug serialization
MBueschelberger Dec 18, 2024
3cc313a
update webform and custom properties aliases
MBueschelberger Dec 18, 2024
a666346
bump version
MBueschelberger Dec 18, 2024
c2170f6
remove unneeded union
MBueschelberger Dec 18, 2024
7e83491
move function to sectionize metadata to sdk
MBueschelberger Dec 19, 2024
8871d71
bump version
MBueschelberger Dec 19, 2024
2c28db7
kitem export to hdf5
Jan 29, 2025
e5f556f
Merge branch 'main' of github.com:MI-FraunhoferIWM/dsms-python-sdk in…
Jan 29, 2025
aefd516
kitem export
Jan 29, 2025
bbb1776
fixed merge issues
Jan 29, 2025
d5fbf7c
fixed merge issues
Jan 29, 2025
cc04d7d
Export function for ktype
Feb 11, 2025
6070f8a
Import kitems/ktypes implemented
Mar 20, 2025
95feed3
Pylint error fixes
Mar 20, 2025
8f134fe
Pylint fixes
Mar 20, 2025
1f1aec6
Updated setup requirements
Mar 20, 2025
0563bdc
pre-commit errors fixed
Mar 20, 2025
d09b49f
Duplicate key error fixed for hdf5
Mar 25, 2025
4befbde
Pre-commit hook errors fixed
Mar 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions dsms/knowledge/data_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""Data Formats"""

from enum import Enum


class DataFormat(Enum):
"""Data formats"""

JSON = "json"
YAML = "yaml"
HDF5 = "hdf5"
42 changes: 42 additions & 0 deletions dsms/knowledge/kitem.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Knowledge Item implementation of the DSMS"""

import json
import logging
import warnings
from datetime import datetime
Expand All @@ -9,6 +10,7 @@
from uuid import UUID, uuid4

import pandas as pd
import yaml
from rdflib import Graph

from pydantic import ( # isort:skip
Expand Down Expand Up @@ -48,6 +50,8 @@
UserGroupsProperty,
)

from dsms.knowledge.data_format import DataFormat # isort:skip

from dsms.knowledge.ktype import KType # isort:skip

from dsms.knowledge.utils import ( # isort:skip
Expand Down Expand Up @@ -655,3 +659,41 @@ def is_a(self, to_be_compared: KType) -> bool:
def refresh(self) -> None:
"""Refresh the KItem"""
_refresh_kitem(self)

def export(self, data_format: DataFormat) -> Any:
"""Export kitems to different formats"""

if data_format == DataFormat.HDF5:
from dsms.knowledge.knowledge_wrapper import ( # isort:skip
data_to_dict,
dict_to_hdf5,
)

return dict_to_hdf5(data_to_dict(self))

if data_format == DataFormat.JSON:
from dsms.knowledge.knowledge_wrapper import data_to_dict

return json.dumps(data_to_dict(self))

if data_format == DataFormat.YAML:
from dsms.knowledge.knowledge_wrapper import data_to_dict

return yaml.dump(data_to_dict(self), default_flow_style=False)

raise ValueError(f"Unsupported data format: {data_format}")

def import_kitem(data, data_format: DataFormat) -> Any:
"""Import objects in different formats to KItem"""

if data_format == DataFormat.HDF5:
from dsms.knowledge.knowledge_wrapper import hdf5_to_dict

return hdf5_to_dict(data)

if data_format == DataFormat.JSON:
return json.load(data)
if data_format == DataFormat.YAML:
return yaml.safe_load(data)

raise ValueError(f"Unsupported data format: {data_format}")
189 changes: 189 additions & 0 deletions dsms/knowledge/knowledge_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
"""Wrapper for data conversion to and from different data formats"""

import base64
import io
from typing import Any

import h5py
import numpy as np
from pydantic import BaseModel


def data_to_dict(data) -> Any:
"""Convert data to python dictionary"""

data_dict = {}

def handle_value(key, value):
"""Handles the values under different scenarios"""

result = None # Default value for result

# Handle special cases based on 'key' and 'value'
if not isinstance(
value, (int, float, str, bytes, bool, type(None))
) and hasattr(value, "__dict__"):
result = data_to_dict(value)

elif key == "id":
result = str(value)

elif key == "summary":
summary = getattr(data, "summary", None)
result = summary.text if summary else None

elif key == "dataframe":
dataframe = getattr(data, "dataframe", None)
if dataframe:
result = dataframe.to_df().to_json()

elif key == "file":
avatar = getattr(data, "avatar", None)
if avatar:
image = avatar.download()
image_bytes = io.BytesIO()
image.save(image_bytes, format="PNG")
image_bytes.seek(0)
result = base64.b64encode(image_bytes.getvalue()).decode(
"utf-8"
)

elif key == "subgraph" and value is not None:
result = value.serialize()

elif key == "content":
content = data.download().encode("utf-8")
bytes_io = io.BytesIO(content) if content else None
result = base64.b64encode(bytes_io.getvalue()).decode("utf-8")

# Process the value for other cases (lists, dicts, models, etc.)
if result is None:
if isinstance(value, (int, float, str, bytes, bool, type(None))):
result = str(value)
elif isinstance(value, list):
result = [handle_value(key, v) for v in value]
elif isinstance(value, dict):
result = {k: handle_value(k, v) for k, v in value.items()}
elif isinstance(value, BaseModel):
result = {
k: handle_value(k, v)
for k, v in value.model_dump().items()
}
elif isinstance(value, io.BytesIO):
result = base64.b64encode(value.getvalue()).decode("utf-8")

return result

for k, v in data.model_dump().items():
if k == "attachments":
for attachment in getattr(data, "attachments"):
data_dict.setdefault("attachments", []).append(
handle_value(k, attachment)
)
continue
if k == "linked_kitems":
for linked_kitem in getattr(data, "linked_kitems"):
item = {}
for key in ["id", "name", "slug", "ktype_id"]:
value = getattr(linked_kitem, key)
item[key] = str(value)
data_dict.setdefault("linked_kitems", []).append(item)
continue
data_dict[k] = handle_value(k, v)

return data_dict


def dict_to_hdf5(dict_data):
"""Converts data from a dictionary to HDF5"""
byte_data = io.BytesIO()

# Create an HDF5 file in memory
with h5py.File(byte_data, "w") as f:
# Recursively add dictionary contents
def add_to_hdf5(data, group):
for key, value in data.items():
if isinstance(value, dict):
# Handle nested dictionaries recursively
subgroup = group.create_group(key)
add_to_hdf5(value, subgroup)
elif isinstance(value, list):
# Handle lists, check if the list contains dictionaries
subgroup = group.create_group(key)
for idx, item in enumerate(value):
if isinstance(item, dict):
item_group = subgroup.create_group(f"item_{idx}")
add_to_hdf5(item, item_group)
else:
subgroup.create_dataset(f"item_{idx}", data=item)
elif value is not None:
group.create_dataset(key, data=value)
else:
group.create_dataset(key, data="")

# Add data to the root group
add_to_hdf5(dict_data, f)

# Get the bytes data from the memory buffer
byte_data.seek(0)
return byte_data.read()


def hdf5_to_dict(hdf5_file: io.BytesIO) -> dict:
"""Convert an HDF5 file into a Python dictionary."""

def decode_if_bytes(value):
"""Decode bytes to string if needed."""
if isinstance(value, bytes):
return value.decode("utf-8")
if isinstance(value, np.ndarray) and value.dtype.type is np.bytes_:
return [elem.decode("utf-8") for elem in value.tolist()]
return value

def convert_numpy(obj):
"""Convert numpy data types to native Python types."""
if isinstance(obj, np.generic):
return obj.item()
if isinstance(obj, dict):
return {key: convert_numpy(value) for key, value in obj.items()}
if isinstance(obj, list):
return [convert_numpy(item) for item in obj]
return obj

def read_group(group):
"""Recursively read HDF5 groups, grouping 'item_X' keys into lists efficiently."""
data_dict = {}
grouped_items = []

for key, value in group.attrs.items():
data_dict[key] = decode_if_bytes(value)

for key, dataset in group.items():
if isinstance(dataset, h5py.Dataset):
data = dataset[()]
if isinstance(data, np.ndarray) and data.dtype == np.uint8:
try:
value = data.tobytes().decode()
except UnicodeDecodeError:
value = data.tobytes()
elif isinstance(data, np.ndarray):
value = decode_if_bytes(data.tolist())
else:
value = decode_if_bytes(data)

elif isinstance(dataset, h5py.Group):
value = read_group(dataset)

if key.startswith("item_") and key[5:].isdigit():
grouped_items.append(value)
else:
data_dict[key] = value

# If there are grouped items, store them correctly
if grouped_items:
return grouped_items

return convert_numpy(data_dict)

with h5py.File(hdf5_file, "r") as hdf:
return read_group(hdf)
42 changes: 42 additions & 0 deletions dsms/knowledge/ktype.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
"""KItem types"""

import json
import logging
from datetime import datetime
from typing import TYPE_CHECKING, Any, Optional, Union
from uuid import UUID

import yaml
from pydantic import BaseModel, Field, model_serializer

from dsms.core.logging import handler
from dsms.knowledge.data_format import DataFormat
from dsms.knowledge.utils import _ktype_exists, _refresh_ktype, print_ktype
from dsms.knowledge.webform import Webform

Expand Down Expand Up @@ -137,3 +140,42 @@ def serialize(self):
)
for key, value in self.__dict__.items()
}

def export(self, data_format: DataFormat) -> Any:
"""Export ktypes to different formats"""

if data_format == DataFormat.HDF5:
from dsms.knowledge.knowledge_wrapper import ( # isort:skip
data_to_dict,
dict_to_hdf5,
)

return dict_to_hdf5(data_to_dict(self))

if data_format == DataFormat.JSON:
from dsms.knowledge.knowledge_wrapper import data_to_dict

return json.dumps(data_to_dict(self))

if data_format == DataFormat.YAML:
from dsms.knowledge.knowledge_wrapper import data_to_dict

return yaml.dump(data_to_dict(self), default_flow_style=False)

raise ValueError(f"Unsupported data format: {data_format}")

def import_ktype(data, data_format: DataFormat) -> Any:
"""Import objects in different formats to KType"""

if data_format == DataFormat.HDF5:
from dsms.knowledge.knowledge_wrapper import hdf5_to_dict

return hdf5_to_dict(data)

if data_format == DataFormat.JSON:
return json.load(data)

if data_format == DataFormat.YAML:
return yaml.safe_load(data)

raise ValueError(f"Unsupported data format: {data_format}")
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ packages = find:
install_requires =
PyYAML>=6,<7
click>=8,<9
h5py>=3,<4
html5lib>=1,<2
lru-cache<1
oyaml==1
Expand Down