From da136c3ce0351f92d6fc47f1b4d50ac372227b8c Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 17 Oct 2025 13:48:18 +0200 Subject: [PATCH 01/26] changed ld_list according to issue #439 --- src/hermes/model/types/__init__.py | 24 ++++------ src/hermes/model/types/ld_list.py | 49 +++++++------------- test/hermes_test/model/types/test_ld_list.py | 48 ++++++++++--------- 3 files changed, 51 insertions(+), 70 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 6c02662c..4bb63152 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -21,14 +21,14 @@ "ld_container": lambda c, **_: c, "json": lambda c, **_: c.compact(), "expanded_json": lambda c, **_: c.ld_value, - }, + } ), # Wrap item from ld_dict in ld_list (ld_list.is_ld_list, dict(ld_container=ld_list)), ( - lambda c: isinstance(c, list) and all(isinstance(item, dict) for item in c), - dict(ld_container=lambda c, **kw: ld_list([{"@list": c}], **kw)) + lambda c: isinstance(c, list) and all(isinstance(item, dict) for item in c), + dict(ld_container=lambda c, **kw: ld_list([{"@list": c}], **kw)) ), # pythonize items from lists (expanded set is already handled above) @@ -44,33 +44,25 @@ (ld_dict.is_ld_dict, dict(expanded_json=lambda c, **kw: ld_dict.from_dict(c[0], **kw).ld_value)), ( ld_list.is_container, - dict( - expanded_json=lambda c, **kw: ld_list.from_list( - ld_list([c]).item_list, container=ld_list([c]).container, **kw - ).ld_value - ), + dict(expanded_json=lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c), **kw).ld_value) ), ( ld_list.is_ld_list, - dict( - expanded_json=lambda c, **kw: ld_list.from_list( - ld_list(c).item_list, container=ld_list(c).container, **kw - ).ld_value - ), + dict(expanded_json=lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c[0]), **kw).ld_value) ), (lambda c: isinstance(c, list), dict(expanded_json=lambda c, **kw: ld_list.from_list(c, **kw).ld_value)), (lambda v: isinstance(v, (int, float, str, bool)), dict(expanded_json=lambda v, **_: [{"@value": v}])), ( lambda v: isinstance(v, datetime), - dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:DateTime"]}]), + dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:DateTime"]}]) ), ( lambda v: isinstance(v, date), - dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Date"]}]), + dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Date"]}]) ), ( lambda v: isinstance(v, time), - dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Time"]}]), + dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Time"]}]) ), ] diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index d915842a..17145374 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -10,27 +10,12 @@ class ld_list(ld_container): """ An JSON-LD container resembling a list. """ - container_types = ['@list', '@set', '@graph'] - def __init__(self, data, *, parent=None, key=None, index=None, context=None): - """ Create a new ld_list.py container. - - # FIXME: #439 there is no parameter container - :param container: The container type for this list. - """ - # FIXME: #439 A set container does not contain "@set" in the expected data format (expanded json ld) - # Instead it is just a list of dicts and therefor would raise a ValueError here (and fail ld_list.is_ld_list) - + if not (self.is_ld_list(data) and "@list" in data[0]): + raise ValueError("The given data does not represent a ld_list.") super().__init__(data, parent=parent, key=key, index=index, context=context) - # Determine container and correct item list - for container in self.container_types: - if container in self._data[0]: - self.item_list = self._data[0][container] - self.container = container - break - else: - raise ValueError(f"Unexpected dict: {data}") + self.item_list = data[0]["@list"] def __getitem__(self, index): if isinstance(index, slice): @@ -44,15 +29,7 @@ def __getitem__(self, index): def __setitem__(self, index, value): # FIXME: #439 what should your_ld_list[index] = [{"@type": "foo", "name": "bar"}] mean? # set your_ld_list[index] to the dict {"@type": "foo", "name": "bar"} given in expanded form or - # set your_ld_list[index] to the list [{"@type": "foo", "name": "bar"}] given in non expanded form or - # set your_ld_list[index] to the set [{"@type": "foo", "name": "bar"}] given in expanded form - # (ld_list.fromlist([{"@type": "foo", "name": "bar"}]) defaults to container type list - # which would have the object as an expanded form whereas the expanded form of a list would be - # ["@list": [{"@type": "foo", "name": "bar"}]] - # This is relevent because nested sets get unnested when being expanded and lists not. - # Moreover a set inside a list gets automaticaly converted to a list when expanded) - - # FIXME: #439 what happens when a ld_list is put inside another also depends on their container types + # set your_ld_list[index] to the list [{"@type": "foo", "name": "bar"}] given in non expanded form if not isinstance(index, slice): self.item_list[index] = val[0] if isinstance(val := self._to_expanded_json(self.key, value), list) else val @@ -81,7 +58,7 @@ def __contains__(self, value): def __eq__(self, other): if isinstance(other, ld_list): # FIXME: #439 When are ld_lists equal? - return self.item_list == other.item_list and self.container == other.container + return self.item_list == other.item_list if isinstance(other, list): return self.item_list == self._to_expanded_json(self.key, other)[0]["@list"] return NotImplemented @@ -108,19 +85,25 @@ def to_python(self): @classmethod def is_ld_list(cls, ld_value): - # FIXME: #439 every python list that contains at least one dict can be considerd a set in expanded json form return cls.is_ld_node(ld_value) and cls.is_container(ld_value[0]) @classmethod def is_container(cls, value): - # FIXME: #439 "@set" will never be inside a dictionary of an expanded json ld object return ( isinstance(value, dict) - and len([1 for ct in cls.container_types if isinstance(value.get(ct, None), list)]) == 1 + and [*value.keys()] in [["@list"], ["@set"], ["@graph"]] + and any(isinstance(value.get(cont, None), list) for cont in {"@list", "@set", "@graph"}) ) @classmethod - def from_list(cls, value, *, parent=None, key=None, context=None, container=None): - new_list = cls([{container or "@list": []}], parent=parent, key=key, context=context) + def from_list(cls, value, *, parent=None, key=None, context=None): + new_list = cls([{"@list": []}], parent=parent, key=key, context=context) new_list.extend(value) return new_list + + @classmethod + def get_item_list_from_container(cls, ld_value): + for cont in {"@list", "@set", "@graph"}: + if cont in ld_value: + return ld_value[cont] + raise ValueError("The given data does not represent a container.") diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index 377815a8..fcae0459 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -16,6 +16,8 @@ def test_undefined_list(): ld_list([{}]) with pytest.raises(ValueError): ld_list([{"spam": [{"@value": "bacon"}]}]) + with pytest.raises(ValueError): + ld_list([{"@list": [0], "spam": [{"@value": "bacon"}]}]) with pytest.raises(ValueError): ld_list([{"@list": ["a", "b"], "@set": ["foo", "bar"]}]) with pytest.raises(ValueError): @@ -23,16 +25,15 @@ def test_undefined_list(): def test_list_basics(): - li = ld_list([{"@list": [0], "spam": [{"@value": "bacon"}]}]) - assert li._data == [{"@list": [0], "spam": [{"@value": "bacon"}]}] - assert li.container == '@list' + li = ld_list([{"@list": [0]}]) + assert li._data == [{"@list": [0]}] assert li.item_list == [0] def test_build_in_get(): li = ld_list([{"@list": [{"@value": "foo"}, {"@value": "bar"}, {"@value": "foobar"}]}], key="name") assert li[0] == "foo" and li[-1] == "foobar" - assert li[:2] == ["foo", "bar"] and li[1:-1] == ["bar"] # FIXME: maybe ld_list instead? + assert li[:2] == ["foo", "bar"] and li[1:-1] == ["bar"] assert li[::2] == ["foo", "foobar"] and li[::-1] == ["foobar", "bar", "foo"] li = ld_list([{"@list": [{"@type": "A", "schema:name": "a"}, {"@list": [{"@type": "A", "schema:name": "a"}]}]}]) @@ -66,7 +67,7 @@ def test_build_in_set(): li[0] = {"@type": "schema:Thing", "schema:name": "b"} assert isinstance(li[0], ld_dict) assert li[0].data_dict == {"@type": ["https://schema.org/Thing"], "https://schema.org/name": [{"@value": "b"}]} - li[0] = ld_list.from_list([{"@type": "schema:Thing", "schema:name": "a"}], parent=li, key=li.key, container="@set") + li[0] = ld_list.from_list([{"@type": "schema:Thing", "schema:name": "a"}], parent=li, key=li.key) assert isinstance(li[0], ld_list) assert li[0].item_list == [{"@type": ["https://schema.org/Thing"], "https://schema.org/name": [{"@value": "a"}]}] li[0] = {"@set": [{"@type": "schema:Thing", "schema:name": "b"}]} @@ -117,19 +118,18 @@ def test_build_in_contains(): def test_build_in_comparison(): li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) - li2 = ld_list([{"@set": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) - li3 = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema2": "https://schema.org/"}]) - assert li == [] and li2 == [] and [] == li and [] == li2 - assert li != li2 and li == li3 + li2 = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema2": "https://schema.org/"}]) + assert li == [] and [] == li + assert li == li2 li.append("foo") li.append({"@type": "A", "schema:name": "a"}) - assert li != li3 and ["foo", {"@type": "A", "schema:name": "a"}] == li and ["foo"] != li3 + assert li != li2 and ["foo", {"@type": "A", "schema:name": "a"}] == li and ["foo"] != li2 assert ["foo", {"@type": "A", "https://schema.org/name": "a"}] == li - li3.extend(["foo", {"@type": "A", "schema2:name": "a"}]) - assert li == li3 - li4 = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) - li4.extend([{"@type": "A", "schema:name": "a"}, "foo"]) - assert li != li4 + li2.extend(["foo", {"@type": "A", "schema2:name": "a"}]) + assert li == li2 + li3 = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + li3.extend([{"@type": "A", "schema:name": "a"}, "foo"]) + assert li != li3 def test_extend(): @@ -170,13 +170,19 @@ def test_is_container(): def test_from_list(): li = ld_list.from_list([]) - assert li.container == "@list" and li.item_list == li.context == [] and li.parent is li.key is li.index is None - li = ld_list.from_list([], parent=li, key="schema:name", context=[{"schema": "https://schema.org/"}], - container="@set") - assert li.container == "@set" and li.item_list == [] and li.parent is not None and li.key == "schema:name" + assert li.item_list == li.context == [] and li.parent is li.key is li.index is None + assert li._data == [{"@list": []}] + li = ld_list.from_list([], parent=li, key="schema:name", context=[{"schema": "https://schema.org/"}]) + assert li.item_list == [] and li.parent is not None and li.key == "schema:name" assert li.index is None and li.context == [{"schema": "https://schema.org/"}] li = ld_list.from_list(["a", {"@value": "b"}], parent=None, key="https://schema.org/name", - context=[{"schema": "https://schema.org/"}], container="@graph") - assert li.container == "@graph" and li.item_list == [{"@value": "a"}, {"@value": "b"}] and li.parent is None + context=[{"schema": "https://schema.org/"}]) + assert li.item_list == [{"@value": "a"}, {"@value": "b"}] and li.parent is None assert li.key == "https://schema.org/name" and li.index is None assert li.context == [{"schema": "https://schema.org/"}] + + +def test_get_item_list_from_container(): + assert ld_list.get_item_list_from_container({"@list": ["a"]}) == ["a"] + assert ld_list.get_item_list_from_container({"@set": ["a"]}) == ["a"] + assert ld_list.get_item_list_from_container({"@graph": ["a"]}) == ["a"] From 03ebd32fd36df233a0ffec169e35c95b40aa263a Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 20 Oct 2025 10:06:24 +0200 Subject: [PATCH 02/26] implemented tests for problem 2 of the issue and more tests --- test/hermes_test/model/types/test_ld_list.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index fcae0459..f27b36a1 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -103,9 +103,9 @@ def test_append(): assert li.item_list[2] == li.item_list[3] li.append(ld_list([{"@list": [{"@type": ["A"], "https://schema.org/name": [{"@value": "a"}]}]}], parent=li, key=li.key)) - li.append([{"@type": "A", "schema:name": "a"}]) # FIXME: should that be interpreted as a list or expanded dict? + li.append([{"@type": "A", "schema:name": "a"}]) li.append(2 * [{"@type": "A", "schema:name": "a"}]) - assert 2 * li[4].item_list == 2 * [li[5].data_dict] == li[6].item_list + assert 2 * li[4].item_list == 2 * li[5].item_list == li[6].item_list def test_build_in_contains(): @@ -130,6 +130,8 @@ def test_build_in_comparison(): li3 = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) li3.extend([{"@type": "A", "schema:name": "a"}, "foo"]) assert li != li3 + assert not li == 3 + assert li != 3 def test_extend(): @@ -154,6 +156,14 @@ def test_extend(): assert li[0:2] == ["foo", "bar"] and li.item_list[0:2] == [{"@value": "foo"}, {"@value": "bar"}] assert li[-1].data_dict == {"@type": ["A"], "https://schema.org/name": [{"@value": "a"}]} and len(li) == 3 +def test_to_python(): + li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + li.append("foo") + li.append(ld_dict([{"@type": ["A"], "https://schema.org/name": [{"@value": "a"}]}])) + li.append(["a"]) + assert li[1]["@type"].item_list == ["A"] + assert li.to_python() == ["foo", {"@type": ["A"], "schema:name": ["a"]}, ["a"]] + def test_is_ld_list(): assert not any(ld_list.is_ld_list(item) for item in [1, "", [], {}, {"@list": []}, [{}], [{"a": "b"}]]) @@ -186,3 +196,5 @@ def test_get_item_list_from_container(): assert ld_list.get_item_list_from_container({"@list": ["a"]}) == ["a"] assert ld_list.get_item_list_from_container({"@set": ["a"]}) == ["a"] assert ld_list.get_item_list_from_container({"@graph": ["a"]}) == ["a"] + with pytest.raises(ValueError): + ld_list.get_item_list_from_container(["a"]) From d1eb6128eec251edc4e97ab11367f3baf7be0691 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 20 Oct 2025 10:14:37 +0200 Subject: [PATCH 03/26] added a few missing conversions and formatted the file a little bit different --- src/hermes/model/types/__init__.py | 37 ++++++++++++++++-------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 4bb63152..37203ce4 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -25,44 +25,47 @@ ), # Wrap item from ld_dict in ld_list - (ld_list.is_ld_list, dict(ld_container=ld_list)), + (ld_list.is_ld_list, {"ld_container": ld_list}), ( lambda c: isinstance(c, list) and all(isinstance(item, dict) for item in c), - dict(ld_container=lambda c, **kw: ld_list([{"@list": c}], **kw)) + {"ld_container": lambda c, **kw: ld_list([{"@list": c}], **kw)} ), # pythonize items from lists (expanded set is already handled above) - (ld_container.is_json_id, dict(python=lambda c, **_: c["@id"])), - (ld_container.is_typed_json_value, dict(python=ld_container.typed_ld_to_py)), - (ld_container.is_json_value, dict(python=lambda c, **_: c["@value"])), - (ld_list.is_container, dict(ld_container=lambda c, **kw: ld_list([c], **kw))), - (ld_dict.is_json_dict, dict(ld_container=lambda c, **kw: ld_dict([c], **kw))), + (ld_container.is_json_id, {"python": lambda c, **_: c["@id"]}), + (ld_container.is_typed_json_value, {"python": ld_container.typed_ld_to_py}), + (ld_container.is_json_value, {"python": lambda c, **_: c["@value"]}), + (ld_list.is_container, {"ld_container": lambda c, **kw: ld_list([c], **kw)}), + (ld_dict.is_json_dict, {"ld_container": lambda c, **kw: ld_dict([c], **kw)}), # Convert internal data types to expanded_json - (lambda c: ld_container.is_json_id(c) or ld_container.is_json_value(c), dict(expanded_json=lambda c, **_: [c])), - (ld_dict.is_json_dict, dict(expanded_json=lambda c, **kw: ld_dict.from_dict(c, **kw).ld_value)), - (ld_dict.is_ld_dict, dict(expanded_json=lambda c, **kw: ld_dict.from_dict(c[0], **kw).ld_value)), + (lambda c: ld_container.is_json_id(c), {"expanded_json": lambda c, **_: c}), + (lambda c: ld_container.is_ld_id(c), {"expanded_json": lambda c, **_: c[0]}), + (lambda c: ld_container.is_json_value(c), {"expanded_json": lambda c, **_: [c]}), + (lambda c: ld_container.is_ld_value(c), {"expanded_json": lambda c, **_: c}), + (ld_dict.is_json_dict, {"expanded_json": lambda c, **kw: ld_dict.from_dict(c, **kw).ld_value}), + (ld_dict.is_ld_dict, {"expanded_json": lambda c, **kw: ld_dict.from_dict(c[0], **kw).ld_value}), ( ld_list.is_container, - dict(expanded_json=lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c), **kw).ld_value) + {"expanded_json": lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c), **kw).ld_value} ), ( ld_list.is_ld_list, - dict(expanded_json=lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c[0]), **kw).ld_value) + {"expanded_json": lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c[0]), **kw).ld_value} ), - (lambda c: isinstance(c, list), dict(expanded_json=lambda c, **kw: ld_list.from_list(c, **kw).ld_value)), - (lambda v: isinstance(v, (int, float, str, bool)), dict(expanded_json=lambda v, **_: [{"@value": v}])), + (lambda c: isinstance(c, list), {"expanded_json": lambda c, **kw: ld_list.from_list(c, **kw).ld_value}), + (lambda v: isinstance(v, (int, float, str, bool)), {"expanded_json": lambda v, **_: [{"@value": v}]}), ( lambda v: isinstance(v, datetime), - dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:DateTime"]}]) + {"expanded_json": lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:DateTime"]}]} ), ( lambda v: isinstance(v, date), - dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Date"]}]) + {"expanded_json": lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Date"]}]} ), ( lambda v: isinstance(v, time), - dict(expanded_json=lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Time"]}]) + {"expanded_json": lambda v, **_: [{"@value": v.isoformat(), "@type": iri_map["schema:Time"]}]} ), ] From d28dc10d048be521abf50935fd43019832d36e8f Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 20 Oct 2025 10:16:34 +0200 Subject: [PATCH 04/26] removed one conversion, added one and changed _to_python to fix conversion of @type values --- src/hermes/model/types/__init__.py | 7 ++----- src/hermes/model/types/ld_container.py | 7 ------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 37203ce4..8b6a2930 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -26,10 +26,7 @@ # Wrap item from ld_dict in ld_list (ld_list.is_ld_list, {"ld_container": ld_list}), - ( - lambda c: isinstance(c, list) and all(isinstance(item, dict) for item in c), - {"ld_container": lambda c, **kw: ld_list([{"@list": c}], **kw)} - ), + (lambda c: isinstance(c, list), {"ld_container": lambda c, **kw: ld_list([{"@list": c}], **kw)}), # pythonize items from lists (expanded set is already handled above) (ld_container.is_json_id, {"python": lambda c, **_: c["@id"]}), @@ -37,6 +34,7 @@ (ld_container.is_json_value, {"python": lambda c, **_: c["@value"]}), (ld_list.is_container, {"ld_container": lambda c, **kw: ld_list([c], **kw)}), (ld_dict.is_json_dict, {"ld_container": lambda c, **kw: ld_dict([c], **kw)}), + (lambda v: isinstance(v, str), {"python": lambda v, parent, **_: parent.ld_proc.compact_iri(parent.active_ctx, v)}), # Convert internal data types to expanded_json (lambda c: ld_container.is_json_id(c), {"expanded_json": lambda c, **_: c}), @@ -44,7 +42,6 @@ (lambda c: ld_container.is_json_value(c), {"expanded_json": lambda c, **_: [c]}), (lambda c: ld_container.is_ld_value(c), {"expanded_json": lambda c, **_: c}), (ld_dict.is_json_dict, {"expanded_json": lambda c, **kw: ld_dict.from_dict(c, **kw).ld_value}), - (ld_dict.is_ld_dict, {"expanded_json": lambda c, **kw: ld_dict.from_dict(c[0], **kw).ld_value}), ( ld_list.is_container, {"expanded_json": lambda c, **kw: ld_list.from_list(ld_list.get_item_list_from_container(c), **kw).ld_value} diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 55b62cd3..86b42088 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -83,13 +83,6 @@ def ld_value(self): def _to_python(self, full_iri, ld_value): if full_iri == "@id": value = self.ld_proc.compact_iri(self.active_ctx, ld_value, vocab=False) - elif full_iri == "@type": - value = [ - self.ld_proc.compact_iri(self.active_ctx, ld_type) - for ld_type in ld_value - ] - if len(value) == 1: - value = value[0] else: value, ld_output = self.ld_proc.apply_typemap(ld_value, "python", "ld_container", parent=self, key=full_iri) From fb5aa6474c45a3c448e8b35289f34c2c1baee3f4 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 20 Oct 2025 10:19:40 +0200 Subject: [PATCH 05/26] fixed formatting errors --- test/hermes_test/model/types/test_ld_list.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index f27b36a1..98bd7b68 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -156,6 +156,7 @@ def test_extend(): assert li[0:2] == ["foo", "bar"] and li.item_list[0:2] == [{"@value": "foo"}, {"@value": "bar"}] assert li[-1].data_dict == {"@type": ["A"], "https://schema.org/name": [{"@value": "a"}]} and len(li) == 3 + def test_to_python(): li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) li.append("foo") @@ -163,7 +164,7 @@ def test_to_python(): li.append(["a"]) assert li[1]["@type"].item_list == ["A"] assert li.to_python() == ["foo", {"@type": ["A"], "schema:name": ["a"]}, ["a"]] - + def test_is_ld_list(): assert not any(ld_list.is_ld_list(item) for item in [1, "", [], {}, {"@list": []}, [{}], [{"a": "b"}]]) From 524885382489850d1a1039a61d7fa403e7775748 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 24 Oct 2025 09:51:16 +0200 Subject: [PATCH 06/26] fixed ld_container tests, a type conversion and equals of ld_list --- src/hermes/model/types/__init__.py | 2 +- src/hermes/model/types/ld_list.py | 4 +++- .../model/types/test_ld_container.py | 17 ++++++++--------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 8b6a2930..8f47fe44 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -30,7 +30,7 @@ # pythonize items from lists (expanded set is already handled above) (ld_container.is_json_id, {"python": lambda c, **_: c["@id"]}), - (ld_container.is_typed_json_value, {"python": ld_container.typed_ld_to_py}), + (ld_container.is_typed_json_value, {"python": lambda c, **kw: ld_container.typed_ld_to_py([c], **kw)}), (ld_container.is_json_value, {"python": lambda c, **_: c["@value"]}), (ld_list.is_container, {"ld_container": lambda c, **kw: ld_list([c], **kw)}), (ld_dict.is_json_dict, {"ld_container": lambda c, **kw: ld_dict([c], **kw)}), diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 17145374..fc93f074 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -60,7 +60,9 @@ def __eq__(self, other): # FIXME: #439 When are ld_lists equal? return self.item_list == other.item_list if isinstance(other, list): - return self.item_list == self._to_expanded_json(self.key, other)[0]["@list"] + if ld_list.is_ld_list(other): + other = ld_list.get_item_list_from_container(other) + return self.item_list == self.from_list(other, key=self.key, context=self.full_context).item_list return NotImplemented def __ne__(self, other): diff --git a/test/hermes_test/model/types/test_ld_container.py b/test/hermes_test/model/types/test_ld_container.py index 9da5b461..2d6687f1 100644 --- a/test/hermes_test/model/types/test_ld_container.py +++ b/test/hermes_test/model/types/test_ld_container.py @@ -96,27 +96,26 @@ def test_to_python_id_with_prefix(self, mock_context): def test_to_python_type(self, mock_context): cont = ld_container([{}], context=[mock_context]) - assert cont._to_python("@type", ["@id"]) == '@id' + assert cont._to_python("@type", ["@id"]) == ['@id'] assert cont._to_python("@type", ["@id", "http://spam.eggs/Egg"]) == ["@id", "Egg"] def test_to_python_id_value(self, mock_context): cont = ld_container([{}], context=[mock_context]) + assert cont._to_python("http://spam.eggs/ham", [{"@id": "http://spam.eggs/spam"}]) == ["http://spam.eggs/spam"] assert cont._to_python("http://spam.eggs/ham", - [{"@id": "http://spam.eggs/spam"}]) == "http://spam.eggs/spam" - assert cont._to_python("http://spam.eggs/ham", - [{"@id": "http://spam.eggs/identifier"}]) == "http://spam.eggs/identifier" + {"@id": "http://spam.eggs/identifier"}) == "http://spam.eggs/identifier" def test_to_python_basic_value(self, mock_context): cont = ld_container([{}], context=[mock_context]) - assert cont._to_python("http://soam.eggs/spam", [{"@value": "bacon"}]) == 'bacon' - assert cont._to_python("http://spam.eggs/spam", [{"@value": True}]) is True - assert cont._to_python("http://spam.eggs/spam", [{"@value": 123}]) == 123 + assert cont._to_python("http://soam.eggs/spam", {"@value": "bacon"}) == 'bacon' + assert cont._to_python("http://spam.eggs/spam", {"@value": True}) is True + assert cont._to_python("http://spam.eggs/spam", {"@value": 123}) == 123 def test_to_python_datetime_value(self, mock_context): cont = ld_container([{}], context=[mock_context]) - assert cont._to_python("http://spam.eggs/eggs", [{ + assert cont._to_python("http://spam.eggs/eggs", { "@value": "2022-02-22T00:00:00", "@type": "https://schema.org/DateTime" - }]) == "2022-02-22T00:00:00" + }) == "2022-02-22T00:00:00" # TODO: #434 typed date is returned as string instead of date def test_to_expanded_id(self, mock_context): cont = ld_container([{}], context=[mock_context]) From e2c1bba599237bafab6240ab77b3fcae7cf320d5 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 7 Nov 2025 13:42:06 +0100 Subject: [PATCH 07/26] added detailed comparison for ld_list, ld_dict and json_values and added tests for them --- src/hermes/model/types/__init__.py | 4 +- src/hermes/model/types/ld_container.py | 11 ++++++ src/hermes/model/types/ld_dict.py | 37 ++++++++++++++++++ src/hermes/model/types/ld_list.py | 39 ++++++++++++++----- .../model/types/test_ld_container.py | 11 ++++++ test/hermes_test/model/types/test_ld_dict.py | 26 +++++++++++++ test/hermes_test/model/types/test_ld_list.py | 18 +++++++++ 7 files changed, 134 insertions(+), 12 deletions(-) create mode 100644 test/hermes_test/model/types/test_ld_dict.py diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 8f47fe44..497ee44a 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -37,8 +37,8 @@ (lambda v: isinstance(v, str), {"python": lambda v, parent, **_: parent.ld_proc.compact_iri(parent.active_ctx, v)}), # Convert internal data types to expanded_json - (lambda c: ld_container.is_json_id(c), {"expanded_json": lambda c, **_: c}), - (lambda c: ld_container.is_ld_id(c), {"expanded_json": lambda c, **_: c[0]}), + (lambda c: ld_container.is_json_id(c), {"expanded_json": lambda c, **_: [c]}), + (lambda c: ld_container.is_ld_id(c), {"expanded_json": lambda c, **_: c}), (lambda c: ld_container.is_json_value(c), {"expanded_json": lambda c, **_: [c]}), (lambda c: ld_container.is_ld_value(c), {"expanded_json": lambda c, **_: c}), (ld_dict.is_json_dict, {"expanded_json": lambda c, **kw: ld_dict.from_dict(c, **kw).ld_value}), diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 86b42088..8b0ef437 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -182,3 +182,14 @@ def typed_ld_to_py(cls, data, **kwargs): ld_value = data[0]['@value'] return ld_value + + @classmethod + def are_values_equal(cls, first, second): + if "@id" in first and "@id" in second: + return first["@id"] == second["@id"] + for key in {"@value", "@type"}: + if (key in first) ^ (key in second): + return False + if key in first and key in second and first[key] != second[key]: + return False + return True diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index d134b99e..1686b9a6 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche from .ld_container import ld_container @@ -35,6 +36,42 @@ def __contains__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) return full_iri in self.data_dict + def __eq__(self, other): + if not isinstance(other, (dict, ld_dict)): + return NotImplemented + if ld_container.is_json_id(other): + if "@id" in self: + return self["@id"] == other["@id"] + return self.data_dict == {} + if ld_container.is_json_value(other): + if {*self.keys()}.issubset({"@id", *other.keys()}): + return ld_container.are_values_equal(self.data_dict, other) + return False + if isinstance(other, dict): + other = self.from_dict(other, parent=self.parent, key=self.key, context=self.context) + if "@id" in self and "@id" in other: + return self["@id"] == other["@id"] + keys_self = {*self.keys()} + keys_other = {*other.keys()} + unique_keys = keys_self.symmetric_difference(keys_other) + if unique_keys and unique_keys != {"@id"}: + return False + for key in keys_self.intersection(keys_other): + item = self[key] + other_item = other[key] + res = item.__eq__(other_item) + if res == NotImplemented: + res = other_item.__eq__(item) + if res is False or res == NotImplemented: # res is not True + return False + return True + + def __ne__(self, other): + x = self.__eq__(other) + if x is NotImplemented: + return NotImplemented + return not x + def get(self, key, default=_NO_DEFAULT): try: value = self[key] diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index fc93f074..18e3cdff 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche from .ld_container import ld_container @@ -27,10 +28,6 @@ def __getitem__(self, index): return item def __setitem__(self, index, value): - # FIXME: #439 what should your_ld_list[index] = [{"@type": "foo", "name": "bar"}] mean? - # set your_ld_list[index] to the dict {"@type": "foo", "name": "bar"} given in expanded form or - # set your_ld_list[index] to the list [{"@type": "foo", "name": "bar"}] given in non expanded form - if not isinstance(index, slice): self.item_list[index] = val[0] if isinstance(val := self._to_expanded_json(self.key, value), list) else val return @@ -56,14 +53,36 @@ def __contains__(self, value): return expanded_value in self.item_list def __eq__(self, other): - if isinstance(other, ld_list): - # FIXME: #439 When are ld_lists equal? - return self.item_list == other.item_list + if not (isinstance(other, (list, ld_list)) or ld_list.is_container(other)): + return NotImplemented + if isinstance(other, dict): + other = [other] if isinstance(other, list): if ld_list.is_ld_list(other): - other = ld_list.get_item_list_from_container(other) - return self.item_list == self.from_list(other, key=self.key, context=self.full_context).item_list - return NotImplemented + other = ld_list.get_item_list_from_container(other[0]) + other = self.from_list(other, parent=self.parent, key=self.key, context=self.context) + if len(self.item_list) != len(other.item_list): + return False + if (self.key == "@type") ^ (other.key == "@type"): + return False + if self.key == other.key == "@type": + return self.item_list == other.item_list + for index, (item, other_item) in enumerate(zip(self.item_list, other.item_list)): + if ((ld_container.is_typed_json_value(item) or ld_container.is_json_value(item)) and + (ld_container.is_typed_json_value(other_item) or ld_container.is_json_value(other_item))): + if not ld_container.are_values_equal(item, other_item): + return False + continue + if "@id" in item and "@id" in other_item: + return item["@id"] == other_item["@id"] + item = self[index] + other_item = other[index] + res = item.__eq__(other_item) + if res == NotImplemented: + res = other_item.__eq__(item) + if res is False or res == NotImplemented: # res is not True + return False + return True def __ne__(self, other): x = self.__eq__(other) diff --git a/test/hermes_test/model/types/test_ld_container.py b/test/hermes_test/model/types/test_ld_container.py index 2d6687f1..53cb15c8 100644 --- a/test/hermes_test/model/types/test_ld_container.py +++ b/test/hermes_test/model/types/test_ld_container.py @@ -4,6 +4,7 @@ # SPDX-FileContributor: Sophie Kernchen # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche from datetime import datetime @@ -152,3 +153,13 @@ def test_to_expanded_datetime_value(self, mock_context): assert cont._to_expanded_json("eggs", datetime(2022, 2, 22)) == [ {"@value": "2022-02-22T00:00:00", "@type": "http://schema.org/DateTime"} ] + + def test_are_values_equal(self): + assert ld_container.are_values_equal({"@id": "foo"}, {"@id": "foo"}) + assert not ld_container.are_values_equal({"@id": "foo"}, {"@id": "bar"}) + assert ld_container.are_values_equal({"@id": "foo"}, {"@id": "foo", "@value": "bar"}) + assert ld_container.are_values_equal({"@value": "foo"}, {"@value": "foo"}) + assert ld_container.are_values_equal({"@value": "bar"}, {"@id": "foo", "@value": "bar"}) + assert not ld_container.are_values_equal({"@value": "foo"}, {"@value": "bar"}) + assert not ld_container.are_values_equal({"@type": "bar", "@value": "foo"}, {"@value": "foo"}) + assert ld_container.are_values_equal({"@type": "bar", "@value": "foo"}, {"@type": "bar", "@value": "foo"}) diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py new file mode 100644 index 00000000..b696ff5b --- /dev/null +++ b/test/hermes_test/model/types/test_ld_dict.py @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Michael Fritzsche + +from hermes.model.types.ld_dict import ld_dict + + +def test_build_in_comparison(): + di = ld_dict([{}], context={"schema": "https://schema.org/"}) + assert di != 1 and di != [] and di != "" + di["@id"] = "foo" + di["schema:name"] = "bar" + assert di == {"@id": "foo"} + # Fail probably because of bug in ld_dict + # that is fixed on refactor/data-model after merge of refactor/384-test-ld_dict + assert di == {"@id": "foo", "schema:name": "bar"} + assert di == {"@id": "foo", "name": "b"} + assert di == {"schema:name": "bar"} + di = ld_dict([{}], context={"schema": "https://schema.org/"}) + di["schema:Person"] = {"schema:name": "foo"} + assert di == {"schema:Person": {"schema:name": "foo"}} + di["schema:Person"].append({"schema:name": "bar"}) + assert di == {"schema:Person": [{"schema:name": "foo"}, {"schema:name": "bar"}]} + assert di != {"schema:name": "foo"} diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index 98bd7b68..8a5bd7d2 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -73,6 +73,9 @@ def test_build_in_set(): li[0] = {"@set": [{"@type": "schema:Thing", "schema:name": "b"}]} assert isinstance(li[0], ld_list) assert li[0].item_list == [{"@type": ["https://schema.org/Thing"], "https://schema.org/name": [{"@value": "b"}]}] + li[0] = [{"@type": "schema:Thing", "schema:name": "b"}] + assert isinstance(li[0], ld_list) + assert li[0].item_list == [{"@type": ["https://schema.org/Thing"], "https://schema.org/name": [{"@value": "b"}]}] def test_build_in_len(): @@ -132,6 +135,21 @@ def test_build_in_comparison(): assert li != li3 assert not li == 3 assert li != 3 + li = ld_list([{"@list": []}], key="https://schema.org/Person", context=[{"schema": "https://schema.org/"}]) + li.append({"@id": "foo"}) + assert li == [{"@id": "foo"}] and li == [{"@id": "foo", "schema:name": "bar"}] and li == {"@list": [{"@id": "foo"}]} + li2 = ld_list([{"@list": []}], key="@type", context=[{"schema": "https://schema.org/"}]) + li2.append("schema:name") + assert li != li2 + li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + li2 = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema2": "https://schema.org/"}]) + li.append("foo") + li2.append("bar") + assert li != li2 + li[0] = {"@type": "foo", "@value": "bar"} + assert li != li2 + li[0] = {"@type": "foobar", "@value": "bar"} + assert li != li2 def test_extend(): From aeb88fb8f0172e609b3e4e1261e1f5061db1c579 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 21 Nov 2025 10:57:32 +0100 Subject: [PATCH 08/26] added support for graph and set again and improved expansion of json-ld --- src/hermes/model/types/__init__.py | 2 +- src/hermes/model/types/ld_container.py | 92 +++++++++++++++++++- src/hermes/model/types/ld_dict.py | 6 +- src/hermes/model/types/ld_list.py | 65 +++++++++++--- test/hermes_test/model/types/test_ld_dict.py | 2 +- test/hermes_test/model/types/test_ld_list.py | 54 ++++++++---- 6 files changed, 187 insertions(+), 34 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 497ee44a..bf47bdfb 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -26,7 +26,7 @@ # Wrap item from ld_dict in ld_list (ld_list.is_ld_list, {"ld_container": ld_list}), - (lambda c: isinstance(c, list), {"ld_container": lambda c, **kw: ld_list([{"@list": c}], **kw)}), + (lambda c: isinstance(c, list), {"ld_container": lambda c, **kw: ld_list(c, **kw)}), # pythonize items from lists (expanded set is already handled above) (ld_container.is_json_id, {"python": lambda c, **_: c["@id"]}), diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 8b0ef437..50e38df4 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -6,6 +6,8 @@ from .pyld_util import JsonLdProcessor, bundled_loader +from datetime import date, time, datetime + class ld_container: """ @@ -91,7 +93,95 @@ def _to_python(self, full_iri, ld_value): return value - def _to_expanded_json(self, key, value): + def _to_expanded_json(self, value): + """ + The item_lists contents/ the data_dict will be substituted with value. + Value can be an ld_container or contain zero or more. + Then the _data of the inner most ld_dict that contains or is self will be expanded. + If self is not an ld_dict and none of self's parents is, use the key from ld_list to generate a minimal dict + + The result of this function is what value has turned into + (always a list for type(self) == ld_dict and list or dict for type(self) == ld_list). + If self is an ld_list and value was assimilated by self the returned value is list otherwise it is a dict + (e.g. in a set the inner sets values are put directly into the outer one). + """ + if self.__class__.__name__ == "ld_list": + value = [value] + parent = self + path = [] + while parent.__class__.__name__ != "ld_dict": + if parent.container_type == "@list": + path.extend(["@list", 0]) + elif parent.container_type == "@graph": + path.extend(["@graph", 0]) + path.append(self.ld_proc.expand_iri(parent.active_ctx, parent.key) if self.index is None else self.index) + if parent.parent is None: + break + parent = parent.parent + if parent.__class__.__name__ != "ld_dict": + key = self.ld_proc.expand_iri(parent.active_ctx, parent.key) + parent = ld_container([{key: parent._data}]) + path.append(0) + + key_and_reference_todo_list = [] + if isinstance(value, ld_container): + if parent.__class__.__name__ == "ld_list" and parent.container_type == "@set": + value = value._data + else: + value = value._data[0] + elif isinstance(value, date): + value = {"@value": value.isoformat(), "@type": "schema:Date"} + elif isinstance(value, datetime): + value = {"@value": value.isoformat(), "@type": "schema:DateTime"} + elif isinstance(value, time): + value = {"@value": value.isoformat(), "@type": "schema:Time"} + else: + key_and_reference_todo_list = [(0, [value])] + special_types = (list, dict, ld_container, date, datetime, time) + while True: + if len(key_and_reference_todo_list) == 0: + break + key, ref = key_and_reference_todo_list.pop() + temp = ref[key] + if isinstance(temp, list): + key_and_reference_todo_list.extend([(index, temp) for index, val in enumerate(temp) if isinstance(val, special_types)]) + elif isinstance(temp, dict): + key_and_reference_todo_list.extend([(new_key, temp) for new_key in temp.keys() if isinstance(temp[new_key], special_types)]) + elif isinstance(temp, ld_container): + ref[key] = temp._data[0] + elif isinstance(temp, date): + ref[key] = {"@value": temp.isoformat(), "@type": "schema:Date"} + elif isinstance(temp, datetime): + ref[key] = {"@value": temp.isoformat(), "@type": "schema:DateTime"} + elif isinstance(temp, time): + ref[key] = {"@value": temp.isoformat(), "@type": "schema:Time"} + + current_data = parent._data + for index in range(len(path) - 1, 0, -1): + current_data = current_data[path[index]] + if current_data == []: + self_data = None + current_data.append(value) + else: + self_data = current_data[path[0]] + current_data[path[0]] = value + expanded_data = self.ld_proc.expand(parent._data, {"expandContext": self.full_context, + "documentLoader": bundled_loader, + "keepFreeFloatingNodes": True}) + if self_data is not None: + current_data[path[0]] = self_data + else: + current_data.clear() + for index in range(len(path) - 1, -1, -1): + expanded_data = expanded_data[path[index]] + + if self.__class__.__name__ == "ld_dict": + return expanded_data + if self.__class__.__name__ == "ld_list" and len(expanded_data) != 1: + return expanded_data + return expanded_data[0] + + def _to_expanded_json_deprecated(self, key, value): if key == "@id": ld_value = self.ld_proc.expand_iri(self.active_ctx, value, vocab=False) elif key == "@type": diff --git a/src/hermes/model/types/ld_dict.py b/src/hermes/model/types/ld_dict.py index 1686b9a6..e2848869 100644 --- a/src/hermes/model/types/ld_dict.py +++ b/src/hermes/model/types/ld_dict.py @@ -25,8 +25,8 @@ def __getitem__(self, key): def __setitem__(self, key, value): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) - ld_value = self._to_expanded_json(full_iri, value) - self.data_dict.update({full_iri: ld_value}) + ld_value = self._to_expanded_json({full_iri: value}) + self.data_dict.update(ld_value) def __delitem__(self, key): full_iri = self.ld_proc.expand_iri(self.active_ctx, key) @@ -127,7 +127,7 @@ def from_dict(cls, value, *, parent=None, key=None, context=None, ld_type=None): full_context[:0] = parent.full_context ld_value = cls.ld_proc.expand(ld_data, {"expandContext": full_context, "documentLoader": bundled_loader}) - ld_value = cls(ld_value, parent=parent, key=key, context=data_context) + ld_value = cls(ld_value, parent=parent, key=key, context=full_context) return ld_value diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 18e3cdff..a636bcb0 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -12,12 +12,28 @@ class ld_list(ld_container): """ An JSON-LD container resembling a list. """ def __init__(self, data, *, parent=None, key=None, index=None, context=None): - if not (self.is_ld_list(data) and "@list" in data[0]): - raise ValueError("The given data does not represent a ld_list.") + if not isinstance(key, str): + raise ValueError("The key is not a string or was omitted.") + if not isinstance(data, list): + raise ValueError("The given data does not represent an ld_list.") + if self.is_ld_list(data): + if "@list" in data[0]: + self.container_type = "@list" + self.item_list = data[0]["@list"] + elif "@graph" in data[0]: + self.container_type = "@graph" + self.item_list = data[0]["@graph"] + else: + raise ValueError("The given @set is not fully expanded.") + else: + self.container_type = "@set" + self.item_list = data + if key == "@type" and not all(isinstance(item, str) for item in self.item_list): + raise ValueError("A given value for @type is not a string.") + if key != "@type" and not all(isinstance(item, dict) for item in self.item_list): + raise ValueError("A given value is not properly expanded.") super().__init__(data, parent=parent, key=key, index=index, context=context) - self.item_list = data[0]["@list"] - def __getitem__(self, index): if isinstance(index, slice): return [self[i] for i in [*range(len(self))][index]] @@ -29,13 +45,22 @@ def __getitem__(self, index): def __setitem__(self, index, value): if not isinstance(index, slice): - self.item_list[index] = val[0] if isinstance(val := self._to_expanded_json(self.key, value), list) else val + value = self._to_expanded_json(value) + if not isinstance(value, list): + self.item_list[index] = value + return + if index < 0: + self.item_list[index-1:index] = value + else: + self.item_list[index:index+1] = value return try: iter(value) except TypeError as exc: raise TypeError("must assign iterable to extended slice") from exc - expanded_value = [self._to_expanded_json(self.key, val) for val in value] + expanded_value = [self._to_expanded_json(val) for val in value] + # TODO: the slice should work if all items including assimilated ones in the given order can be set via slice + # Implement this self.item_list[index] = [val[0] if isinstance(val, list) else val for val in expanded_value] def __len__(self): @@ -49,7 +74,9 @@ def __iter__(self): yield item def __contains__(self, value): - expanded_value = val[0] if isinstance(val := self._to_expanded_json(self.key, value), list) else val + # TODO: Update to use new _to_expanded_json + # and return True if value would be assimilated by self and all those items are in self + expanded_value = val[0] if isinstance(val := self._to_expanded_json_deprecated(self.key, value), list) else val return expanded_value in self.item_list def __eq__(self, other): @@ -91,7 +118,7 @@ def __ne__(self, other): return not x def append(self, value): - ld_value = self._to_expanded_json(self.key, value) + ld_value = val if isinstance(val:= self._to_expanded_json(value), list) else [val] self.item_list.extend(ld_value) def extend(self, value): @@ -117,10 +144,24 @@ def is_container(cls, value): ) @classmethod - def from_list(cls, value, *, parent=None, key=None, context=None): - new_list = cls([{"@list": []}], parent=parent, key=key, context=context) - new_list.extend(value) - return new_list + def from_list(cls, value, *, parent=None, key=None, context=None, container_type="@set"): + if key == "@type": + container_type = "@set" + if container_type == "@set": + temp_list = [] + else: + value = [{container_type: value}] + temp_list = [{container_type: value}] + if parent is not None: + expanded_value = parent._to_expanded_json(value) + # TODO: what should happen if value is assimilated by parent? + # -> return parent with added values + else: + expanded_value = cls([], parent=None, key=key, context=context)._to_expanded_json(value) + # we don't care if it is assimilated by the temporary object as expanded_value is its replacement + if not isinstance(expanded_value, list): + expanded_value = [expanded_value] + return cls(expanded_value, parent=parent, key=key, context=context) @classmethod def get_item_list_from_container(cls, ld_value): diff --git a/test/hermes_test/model/types/test_ld_dict.py b/test/hermes_test/model/types/test_ld_dict.py index b696ff5b..bc5c9916 100644 --- a/test/hermes_test/model/types/test_ld_dict.py +++ b/test/hermes_test/model/types/test_ld_dict.py @@ -16,7 +16,7 @@ def test_build_in_comparison(): # Fail probably because of bug in ld_dict # that is fixed on refactor/data-model after merge of refactor/384-test-ld_dict assert di == {"@id": "foo", "schema:name": "bar"} - assert di == {"@id": "foo", "name": "b"} + assert di == {"@id": "foo", "schema:name": "b"} assert di == {"schema:name": "bar"} di = ld_dict([{}], context={"schema": "https://schema.org/"}) di["schema:Person"] = {"schema:name": "foo"} diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index 8a5bd7d2..7e0fa390 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -5,6 +5,7 @@ # SPDX-FileContributor: Sophie Kernchen # SPDX-FileContributor: Michael Fritzsche +from datetime import date import pytest from hermes.model.types.ld_list import ld_list @@ -20,14 +21,17 @@ def test_undefined_list(): ld_list([{"@list": [0], "spam": [{"@value": "bacon"}]}]) with pytest.raises(ValueError): ld_list([{"@list": ["a", "b"], "@set": ["foo", "bar"]}]) + with pytest.raises(ValueError): + ld_list([{"@list": ["a", "b"]}]) # no given key with pytest.raises(ValueError): ld_list([{"@list": ["a", "b"]}, {"@set": ["foo", "bar"]}]) def test_list_basics(): - li = ld_list([{"@list": [0]}]) - assert li._data == [{"@list": [0]}] - assert li.item_list == [0] + li_data = [{"@list": [{"@value": "bar"}]}] + li = ld_list(li_data, key="foo") + assert li._data is li_data + assert li.item_list is li_data[0]["@list"] def test_build_in_get(): @@ -36,7 +40,8 @@ def test_build_in_get(): assert li[:2] == ["foo", "bar"] and li[1:-1] == ["bar"] assert li[::2] == ["foo", "foobar"] and li[::-1] == ["foobar", "bar", "foo"] - li = ld_list([{"@list": [{"@type": "A", "schema:name": "a"}, {"@list": [{"@type": "A", "schema:name": "a"}]}]}]) + li = ld_list([{"@list": [{"@type": "A", "schema:name": "a"}, {"@list": [{"@type": "A", "schema:name": "a"}]}]}], + key="schema:person") assert isinstance(li[0], ld_dict) and li[0].data_dict == {"@type": "A", "schema:name": "a"} and li[0].index == 0 assert isinstance(li[1], ld_list) and li[1].item_list == [{"@type": "A", "schema:name": "a"}] and li[1].index == 1 assert li[1].key == li.key @@ -78,9 +83,21 @@ def test_build_in_set(): assert li[0].item_list == [{"@type": ["https://schema.org/Thing"], "https://schema.org/name": [{"@value": "b"}]}] +def test_build_in_set_complex(): + di = ld_dict([{"https://schema.org/name": [{"@list": [{"@value": "c"}]}]}], + context=[{"schema": "https://schema.org/"}]) + temp = di["schema:name"] + di["schema:name"][0] = {"@list": ["a", "b"]} + assert di["schema:name"][0] == ["a", "b"] and temp._data is di["schema:name"]._data + li = ld_list([{"@list": []}], key="schema:time", context=[{"schema": "https://schema.org/"}]) + date_obj = date(year=2025, month=12, day=31) + li.append(date_obj) + assert li.item_list == [{"@value": date_obj.isoformat(), "@type": "https://schema.org/Date"}] + + def test_build_in_len(): - assert len(ld_list([{"@list": []}])) == 0 - assert len(ld_list([{"@list": [{"@value": "foo"}, {"@value": "bar"}, {"@value": "foobar"}]}])) == 3 + assert len(ld_list([{"@list": []}], key="foo")) == 0 + assert len(ld_list([{"@list": [{"@value": "foo"}, {"@value": "bar"}, {"@value": "foobar"}]}], key="foo")) == 3 def test_build_in_iter(): @@ -126,7 +143,9 @@ def test_build_in_comparison(): assert li == li2 li.append("foo") li.append({"@type": "A", "schema:name": "a"}) - assert li != li2 and ["foo", {"@type": "A", "schema:name": "a"}] == li and ["foo"] != li2 + assert li != li2 + assert ["foo", {"@type": "A", "schema:name": "a"}] == li + assert ["foo"] != li2 assert ["foo", {"@type": "A", "https://schema.org/name": "a"}] == li li2.extend(["foo", {"@type": "A", "schema2:name": "a"}]) assert li == li2 @@ -138,7 +157,7 @@ def test_build_in_comparison(): li = ld_list([{"@list": []}], key="https://schema.org/Person", context=[{"schema": "https://schema.org/"}]) li.append({"@id": "foo"}) assert li == [{"@id": "foo"}] and li == [{"@id": "foo", "schema:name": "bar"}] and li == {"@list": [{"@id": "foo"}]} - li2 = ld_list([{"@list": []}], key="@type", context=[{"schema": "https://schema.org/"}]) + li2 = ld_list([], key="@type", context=[{"schema": "https://schema.org/"}]) li2.append("schema:name") assert li != li2 li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) @@ -146,9 +165,9 @@ def test_build_in_comparison(): li.append("foo") li2.append("bar") assert li != li2 - li[0] = {"@type": "foo", "@value": "bar"} + li[0] = {"@type": "schema:foo", "@value": "bar"} assert li != li2 - li[0] = {"@type": "foobar", "@value": "bar"} + li[0] = {"@type": "schema:foobar", "@value": "bar"} assert li != li2 @@ -178,7 +197,7 @@ def test_extend(): def test_to_python(): li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) li.append("foo") - li.append(ld_dict([{"@type": ["A"], "https://schema.org/name": [{"@value": "a"}]}])) + li.append(ld_dict([{"@type": ["A"], "https://schema.org/name": [{"@value": "a"}]}], parent=li)) li.append(["a"]) assert li[1]["@type"].item_list == ["A"] assert li.to_python() == ["foo", {"@type": ["A"], "schema:name": ["a"]}, ["a"]] @@ -192,23 +211,26 @@ def test_is_ld_list(): def test_is_container(): assert not any(ld_list.is_container(item) for item in [1, "", [], {}, {"a": "b"}]) - assert not any(ld_list.is_container(item) for item in [ld_dict([{"a": "b"}]), ld_list([{"@list": ["a"]}])]) + assert not any(ld_list.is_container(item) for item in [ld_dict([{"a": "b"}]), + ld_list([{"@list": [{"@value": "a"}]}], key="foo")]) assert not any(ld_list.is_container({"@list": value}) for value in ["", 1, {}]) assert all(ld_list.is_container({container_type: []}) for container_type in ["@list", "@graph", "@set"]) def test_from_list(): - li = ld_list.from_list([]) - assert li.item_list == li.context == [] and li.parent is li.key is li.index is None - assert li._data == [{"@list": []}] + li = ld_list.from_list([], key="schema:foo") + assert li.item_list == li.context == [] and li.parent is li.index is None and li.key == "schema:foo" + assert li._data == [] and li.container_type == "@set" li = ld_list.from_list([], parent=li, key="schema:name", context=[{"schema": "https://schema.org/"}]) assert li.item_list == [] and li.parent is not None and li.key == "schema:name" assert li.index is None and li.context == [{"schema": "https://schema.org/"}] - li = ld_list.from_list(["a", {"@value": "b"}], parent=None, key="https://schema.org/name", + li_data = ["a", {"@value": "b"}] + li = ld_list.from_list(li_data, parent=None, key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) assert li.item_list == [{"@value": "a"}, {"@value": "b"}] and li.parent is None assert li.key == "https://schema.org/name" and li.index is None assert li.context == [{"schema": "https://schema.org/"}] + assert li.item_list is not li_data # as li_data is expected to change they should not be the same object def test_get_item_list_from_container(): From 007e3a32efd6f097a263c505e36a68a3fe3b2479 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 21 Nov 2025 10:58:06 +0100 Subject: [PATCH 09/26] removed unnecessary lambda --- src/hermes/model/types/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index bf47bdfb..3cf51f35 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -37,10 +37,10 @@ (lambda v: isinstance(v, str), {"python": lambda v, parent, **_: parent.ld_proc.compact_iri(parent.active_ctx, v)}), # Convert internal data types to expanded_json - (lambda c: ld_container.is_json_id(c), {"expanded_json": lambda c, **_: [c]}), - (lambda c: ld_container.is_ld_id(c), {"expanded_json": lambda c, **_: c}), - (lambda c: ld_container.is_json_value(c), {"expanded_json": lambda c, **_: [c]}), - (lambda c: ld_container.is_ld_value(c), {"expanded_json": lambda c, **_: c}), + (ld_container.is_json_id, {"expanded_json": lambda c, **_: [c]}), + (ld_container.is_ld_id, {"expanded_json": lambda c, **_: c}), + (ld_container.is_json_value, {"expanded_json": lambda c, **_: [c]}), + (ld_container.is_ld_value, {"expanded_json": lambda c, **_: c}), (ld_dict.is_json_dict, {"expanded_json": lambda c, **kw: ld_dict.from_dict(c, **kw).ld_value}), ( ld_list.is_container, From 3c9ea6184fb6af97d782349b21c53d5f89bd1378 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 21 Nov 2025 12:03:02 +0100 Subject: [PATCH 10/26] fixed set and from_list and added del --- src/hermes/model/types/ld_list.py | 21 ++++++++++---------- test/hermes_test/model/types/test_ld_list.py | 19 +++++++++++++++--- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index a636bcb0..1fca70e6 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -58,11 +58,12 @@ def __setitem__(self, index, value): iter(value) except TypeError as exc: raise TypeError("must assign iterable to extended slice") from exc - expanded_value = [self._to_expanded_json(val) for val in value] - # TODO: the slice should work if all items including assimilated ones in the given order can be set via slice - # Implement this + expanded_value = ld_container.merge_to_list(*[self._to_expanded_json(val) for val in value]) self.item_list[index] = [val[0] if isinstance(val, list) else val for val in expanded_value] + def __delitem__(self, index): + del self.item_list[index] + def __len__(self): return len(self.item_list) @@ -147,18 +148,18 @@ def is_container(cls, value): def from_list(cls, value, *, parent=None, key=None, context=None, container_type="@set"): if key == "@type": container_type = "@set" - if container_type == "@set": - temp_list = [] - else: + if container_type != "@set": value = [{container_type: value}] - temp_list = [{container_type: value}] if parent is not None: expanded_value = parent._to_expanded_json(value) - # TODO: what should happen if value is assimilated by parent? - # -> return parent with added values + if isinstance(expanded_value, list) or not cls.is_container(expanded_value): + # parent has to be an ld_list because an ld_dict won't assimilate an list + parent.extend(expanded_value if isinstance(expanded_value, list) else [expanded_value]) + # TODO: is there a need to add the context to the parent as well? + return parent else: expanded_value = cls([], parent=None, key=key, context=context)._to_expanded_json(value) - # we don't care if it is assimilated by the temporary object as expanded_value is its replacement + # the object has to be a list for further use but does not have to be returned by _to_expanded_json as a list if not isinstance(expanded_value, list): expanded_value = [expanded_value] return cls(expanded_value, parent=parent, key=key, context=context) diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index 7e0fa390..d1d52484 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -89,10 +89,23 @@ def test_build_in_set_complex(): temp = di["schema:name"] di["schema:name"][0] = {"@list": ["a", "b"]} assert di["schema:name"][0] == ["a", "b"] and temp._data is di["schema:name"]._data - li = ld_list([{"@list": []}], key="schema:time", context=[{"schema": "https://schema.org/"}]) + li = ld_list([], key="schema:time", context=[{"schema": "https://schema.org/"}]) date_obj = date(year=2025, month=12, day=31) li.append(date_obj) assert li.item_list == [{"@value": date_obj.isoformat(), "@type": "https://schema.org/Date"}] + del li[0] + li[0:1] = ["a", "b", "c"] + assert li == ["a", "b", "c"] + li[0:3:2] = [["aa", "bb"]] + assert li == ["aa", "b", "bb"] + +def test_build_in_del(): + li = ld_list([{"@list": [{"@value": "foo"}, {"@value": "bar"}, {"@value": "foobar"}]}], + key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + del li[0:3:2] + assert li == ["bar"] + del li[0] + assert li == [] def test_build_in_len(): @@ -222,8 +235,8 @@ def test_from_list(): assert li.item_list == li.context == [] and li.parent is li.index is None and li.key == "schema:foo" assert li._data == [] and li.container_type == "@set" li = ld_list.from_list([], parent=li, key="schema:name", context=[{"schema": "https://schema.org/"}]) - assert li.item_list == [] and li.parent is not None and li.key == "schema:name" - assert li.index is None and li.context == [{"schema": "https://schema.org/"}] + assert li.item_list == [] and li.parent is None and li.key == "schema:foo" + assert li.index is None and li.context == [] li_data = ["a", {"@value": "b"}] li = ld_list.from_list(li_data, parent=None, key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) From 66e03dffaf87f927e84eccbc3e0b7291577d1cc0 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 21 Nov 2025 13:05:21 +0100 Subject: [PATCH 11/26] updated tests of ld_container and fixed found bugs --- src/hermes/model/types/ld_container.py | 14 ++--- src/hermes/model/types/ld_list.py | 8 +-- test/hermes_test/model/types/conftest.py | 1 + .../model/types/test_ld_container.py | 52 ++++++++++--------- 4 files changed, 40 insertions(+), 35 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 50e38df4..ecff840b 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -129,15 +129,15 @@ def _to_expanded_json(self, value): value = value._data else: value = value._data[0] - elif isinstance(value, date): - value = {"@value": value.isoformat(), "@type": "schema:Date"} elif isinstance(value, datetime): value = {"@value": value.isoformat(), "@type": "schema:DateTime"} + elif isinstance(value, date): + value = {"@value": value.isoformat(), "@type": "schema:Date"} elif isinstance(value, time): value = {"@value": value.isoformat(), "@type": "schema:Time"} else: key_and_reference_todo_list = [(0, [value])] - special_types = (list, dict, ld_container, date, datetime, time) + special_types = (list, dict, ld_container, datetime, date, time) while True: if len(key_and_reference_todo_list) == 0: break @@ -149,10 +149,10 @@ def _to_expanded_json(self, value): key_and_reference_todo_list.extend([(new_key, temp) for new_key in temp.keys() if isinstance(temp[new_key], special_types)]) elif isinstance(temp, ld_container): ref[key] = temp._data[0] - elif isinstance(temp, date): - ref[key] = {"@value": temp.isoformat(), "@type": "schema:Date"} elif isinstance(temp, datetime): ref[key] = {"@value": temp.isoformat(), "@type": "schema:DateTime"} + elif isinstance(temp, date): + ref[key] = {"@value": temp.isoformat(), "@type": "schema:Date"} elif isinstance(temp, time): ref[key] = {"@value": temp.isoformat(), "@type": "schema:Time"} @@ -177,7 +177,7 @@ def _to_expanded_json(self, value): if self.__class__.__name__ == "ld_dict": return expanded_data - if self.__class__.__name__ == "ld_list" and len(expanded_data) != 1: + if len(expanded_data) != 1: return expanded_data return expanded_data[0] @@ -213,7 +213,7 @@ def _to_expanded_json_deprecated(self, key, value): return ld_value def __repr__(self): - return f'{type(self).__name__}({self._data[0]})' + return f'{type(self).__name__}({self._data})' def __str__(self): return str(self.to_python()) diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 1fca70e6..b102f477 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -151,9 +151,11 @@ def from_list(cls, value, *, parent=None, key=None, context=None, container_type if container_type != "@set": value = [{container_type: value}] if parent is not None: - expanded_value = parent._to_expanded_json(value) - if isinstance(expanded_value, list) or not cls.is_container(expanded_value): - # parent has to be an ld_list because an ld_dict won't assimilate an list + if isinstance(parent, ld_list): + expanded_value = parent._to_expanded_json(value) + else: + expanded_value = parent._to_expanded_json({key: value})[cls.ld_proc.expand_iri(parent.active_ctx, key)] + if isinstance(parent, cls) and (isinstance(expanded_value, list) or not cls.is_container(expanded_value)): parent.extend(expanded_value if isinstance(expanded_value, list) else [expanded_value]) # TODO: is there a need to add the context to the parent as well? return parent diff --git a/test/hermes_test/model/types/conftest.py b/test/hermes_test/model/types/conftest.py index 8a1c7c2e..7d7e52d5 100644 --- a/test/hermes_test/model/types/conftest.py +++ b/test/hermes_test/model/types/conftest.py @@ -25,6 +25,7 @@ def vocabulary(cls, base_url: str = "http://spam.eggs/") -> dict: "ham": {"@id": f"{base_url}ham", "@type": "@id"}, "eggs": {"@id": f"{base_url}eggs", "@container": "@list"}, "use_until": {"@id": f"{base_url}use_until", "@type": "http://schema.org/DateTime"}, + "schema": "https://schema.org/", "Egg": {"@id": f"{base_url}Egg"}, } diff --git a/test/hermes_test/model/types/test_ld_container.py b/test/hermes_test/model/types/test_ld_container.py index 53cb15c8..dc5d8f8f 100644 --- a/test/hermes_test/model/types/test_ld_container.py +++ b/test/hermes_test/model/types/test_ld_container.py @@ -6,11 +6,12 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche -from datetime import datetime +from datetime import date, datetime, time import pytest from hermes.model.types.ld_container import ld_container +from hermes.model.types.ld_dict import ld_dict '''we expect user of this class to give the right input data types @@ -83,7 +84,7 @@ def test_container_full_context_and_path(self, httpserver): def test_container_str_and_repr(self): cont = ld_container([{"spam": [{"@value": "bacon"}]}]) - assert repr(cont) == "ld_container({'spam': [{'@value': 'bacon'}]})" + assert repr(cont) == "ld_container([{'spam': [{'@value': 'bacon'}]}])" with pytest.raises(NotImplementedError): str(cont) @@ -96,13 +97,14 @@ def test_to_python_id_with_prefix(self, mock_context): assert cont._to_python("@id", f"{self.url}identifier") == "prefix:identifier" def test_to_python_type(self, mock_context): - cont = ld_container([{}], context=[mock_context]) + cont = ld_dict([{"@type": ["@id"]}], context=[mock_context]) assert cont._to_python("@type", ["@id"]) == ['@id'] + cont = ld_dict([{"@type": ["@id", "http://spam.eggs/Egg"]}], context=[mock_context]) assert cont._to_python("@type", ["@id", "http://spam.eggs/Egg"]) == ["@id", "Egg"] def test_to_python_id_value(self, mock_context): - cont = ld_container([{}], context=[mock_context]) - assert cont._to_python("http://spam.eggs/ham", [{"@id": "http://spam.eggs/spam"}]) == ["http://spam.eggs/spam"] + cont = ld_dict([{}], context=[mock_context]) + assert cont._to_python("http://spam.eggs/ham", [{"@id": "http://spam.eggs/spam"}]) == [{"@id": "http://spam.eggs/spam"}] assert cont._to_python("http://spam.eggs/ham", {"@id": "http://spam.eggs/identifier"}) == "http://spam.eggs/identifier" @@ -119,40 +121,40 @@ def test_to_python_datetime_value(self, mock_context): }) == "2022-02-22T00:00:00" # TODO: #434 typed date is returned as string instead of date def test_to_expanded_id(self, mock_context): - cont = ld_container([{}], context=[mock_context]) - assert cont._to_expanded_json("@id", f"{self.url}identifier") == f"{self.url}identifier" + cont = ld_dict([{}], context=[mock_context]) + assert cont._to_expanded_json({"@id": f"{self.url}identifier"}) == {"@id": f"{self.url}identifier"} # Regression test: "ham" is vocabulary and must not be expanded. - assert cont._to_expanded_json("@id", "ham") == "ham" + assert cont._to_expanded_json({"@id": "ham"}) == {"@id": "ham"} def test_to_expanded_id_with_prefix(self, mock_context): - cont = ld_container([{}], context=[mock_context, {"prefix": self.url}]) - assert cont._to_expanded_json("@id", "prefix:identifier") == f"{self.url}identifier" + cont = ld_dict([{}], context=[mock_context, {"prefix": self.url}]) + assert cont._to_expanded_json({"@id": "prefix:identifier"}) == {"@id": f"{self.url}identifier"} # Regression test: "ham" should still not be expaned, but "prefix:ham" should be. - assert cont._to_expanded_json("@id", "ham") == "ham" - assert cont._to_expanded_json("@id", "prefix:ham") == f"{self.url}ham" + assert cont._to_expanded_json({"@id": "ham"}) == {"@id": "ham"} + assert cont._to_expanded_json({"@id": "prefix:ham"}) == {"@id": f"{self.url}ham"} def test_to_expanded_type(self, mock_context): - cont = ld_container([{}], context=[mock_context]) - assert cont._to_expanded_json("@type", "Egg") == ["http://spam.eggs/Egg"] - assert cont._to_expanded_json("@type", ["Egg", "@id"]) == ["http://spam.eggs/Egg", "@id"] + cont = ld_dict([{}], context=[mock_context]) + assert cont._to_expanded_json({"@type": "Egg"}) == {"@type": ["http://spam.eggs/Egg"]} + assert cont._to_expanded_json({"@type": ["Egg", "@id"]}) =={"@type": ["http://spam.eggs/Egg", "@id"]} def test_to_expanded_id_value(self, mock_context): - cont = ld_container([{}], context=[mock_context]) - assert cont._to_expanded_json("ham", "spam") == [{"@id": "spam"}] + cont = ld_dict([{}], context=[mock_context]) + assert cont._to_expanded_json({"ham": "spam"}) == {"http://spam.eggs/ham": [{"@id": "spam"}]} def test_to_expanded_basic_value(self, mock_context): - cont = ld_container([{}], context=[mock_context]) - assert cont._to_expanded_json("spam", "bacon") == [{"@value": "bacon"}] - assert cont._to_expanded_json("spam", 123) == [{"@value": 123}] - assert cont._to_expanded_json("spam", True) == [{"@value": True}] + cont = ld_dict([{}], context=[mock_context]) + assert cont._to_expanded_json({"spam": "bacon"}) == {"http://spam.eggs/spam": [{"@value": "bacon"}]} + assert cont._to_expanded_json({"spam": 123}) == {"http://spam.eggs/spam": [{"@value": 123}]} + assert cont._to_expanded_json({"spam": True}) == {"http://spam.eggs/spam": [{"@value": True}]} def test_to_expanded_datetime_value(self, mock_context): - cont = ld_container([{}], context=[mock_context]) - assert cont._to_expanded_json("eggs", datetime(2022, 2, 22)) == [ - {"@value": "2022-02-22T00:00:00", "@type": "http://schema.org/DateTime"} - ] + cont = ld_dict([{}], context=[mock_context]) + assert cont._to_expanded_json({"eggs": datetime(2022, 2, 22)}) == {"http://spam.eggs/eggs": [{"@list": [ + {"@value": "2022-02-22T00:00:00", "@type": "https://schema.org/DateTime"} + ]}]} def test_are_values_equal(self): assert ld_container.are_values_equal({"@id": "foo"}, {"@id": "foo"}) From c4aea657b8893c3850081dbbfc0a97a5cffc88e4 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 21 Nov 2025 13:10:50 +0100 Subject: [PATCH 12/26] pacified flake8 --- src/hermes/model/types/ld_container.py | 10 ++++++---- src/hermes/model/types/ld_list.py | 2 +- test/hermes_test/model/types/test_ld_container.py | 7 ++++--- test/hermes_test/model/types/test_ld_list.py | 1 + 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index ecff840b..c136d476 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -144,9 +144,11 @@ def _to_expanded_json(self, value): key, ref = key_and_reference_todo_list.pop() temp = ref[key] if isinstance(temp, list): - key_and_reference_todo_list.extend([(index, temp) for index, val in enumerate(temp) if isinstance(val, special_types)]) + key_and_reference_todo_list.extend([(index, temp) for index, val in enumerate(temp) + if isinstance(val, special_types)]) elif isinstance(temp, dict): - key_and_reference_todo_list.extend([(new_key, temp) for new_key in temp.keys() if isinstance(temp[new_key], special_types)]) + key_and_reference_todo_list.extend([(new_key, temp) for new_key in temp.keys() + if isinstance(temp[new_key], special_types)]) elif isinstance(temp, ld_container): ref[key] = temp._data[0] elif isinstance(temp, datetime): @@ -166,8 +168,8 @@ def _to_expanded_json(self, value): self_data = current_data[path[0]] current_data[path[0]] = value expanded_data = self.ld_proc.expand(parent._data, {"expandContext": self.full_context, - "documentLoader": bundled_loader, - "keepFreeFloatingNodes": True}) + "documentLoader": bundled_loader, + "keepFreeFloatingNodes": True}) if self_data is not None: current_data[path[0]] = self_data else: diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index b102f477..ac8af6b5 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -119,7 +119,7 @@ def __ne__(self, other): return not x def append(self, value): - ld_value = val if isinstance(val:= self._to_expanded_json(value), list) else [val] + ld_value = val if isinstance(val := self._to_expanded_json(value), list) else [val] self.item_list.extend(ld_value) def extend(self, value): diff --git a/test/hermes_test/model/types/test_ld_container.py b/test/hermes_test/model/types/test_ld_container.py index dc5d8f8f..ddc98405 100644 --- a/test/hermes_test/model/types/test_ld_container.py +++ b/test/hermes_test/model/types/test_ld_container.py @@ -6,7 +6,7 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche -from datetime import date, datetime, time +from datetime import datetime import pytest @@ -104,7 +104,8 @@ def test_to_python_type(self, mock_context): def test_to_python_id_value(self, mock_context): cont = ld_dict([{}], context=[mock_context]) - assert cont._to_python("http://spam.eggs/ham", [{"@id": "http://spam.eggs/spam"}]) == [{"@id": "http://spam.eggs/spam"}] + assert cont._to_python("http://spam.eggs/ham", + [{"@id": "http://spam.eggs/spam"}]) == [{"@id": "http://spam.eggs/spam"}] assert cont._to_python("http://spam.eggs/ham", {"@id": "http://spam.eggs/identifier"}) == "http://spam.eggs/identifier" @@ -138,7 +139,7 @@ def test_to_expanded_id_with_prefix(self, mock_context): def test_to_expanded_type(self, mock_context): cont = ld_dict([{}], context=[mock_context]) assert cont._to_expanded_json({"@type": "Egg"}) == {"@type": ["http://spam.eggs/Egg"]} - assert cont._to_expanded_json({"@type": ["Egg", "@id"]}) =={"@type": ["http://spam.eggs/Egg", "@id"]} + assert cont._to_expanded_json({"@type": ["Egg", "@id"]}) == {"@type": ["http://spam.eggs/Egg", "@id"]} def test_to_expanded_id_value(self, mock_context): cont = ld_dict([{}], context=[mock_context]) diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index d1d52484..5f29017c 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -99,6 +99,7 @@ def test_build_in_set_complex(): li[0:3:2] = [["aa", "bb"]] assert li == ["aa", "b", "bb"] + def test_build_in_del(): li = ld_list([{"@list": [{"@value": "foo"}, {"@value": "bar"}, {"@value": "foobar"}]}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) From 7d30b169561a5583e747aec82d74352218894fd1 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 24 Nov 2025 09:02:38 +0100 Subject: [PATCH 13/26] improved ld_list.__contains__ and added tests for it --- src/hermes/model/types/ld_list.py | 13 +++++++++---- test/hermes_test/model/types/test_ld_list.py | 8 ++++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index ac8af6b5..ea693053 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -75,10 +75,15 @@ def __iter__(self): yield item def __contains__(self, value): - # TODO: Update to use new _to_expanded_json - # and return True if value would be assimilated by self and all those items are in self - expanded_value = val[0] if isinstance(val := self._to_expanded_json_deprecated(self.key, value), list) else val - return expanded_value in self.item_list + expanded_value = self._to_expanded_json(value) + if isinstance(expanded_value, list): + return all(val in self for val in expanded_value) + self_attributes = {"parent": self.parent, "key": self.key, "index": self.index, "context": self.full_context} + if self.container_type == "@set": + temp_list = ld_list([expanded_value], **self_attributes) + return any(temp_list == ld_list([val], **self_attributes) for val in self.item_list) + temp_list = ld_list([{self.container_type: [expanded_value]}], **self_attributes) + return any(temp_list == ld_list([{self.container_type: [val]}], **self_attributes) for val in self.item_list) def __eq__(self, other): if not (isinstance(other, (list, ld_list)) or ld_list.is_container(other)): diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index 5f29017c..1b0a4127 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -93,7 +93,6 @@ def test_build_in_set_complex(): date_obj = date(year=2025, month=12, day=31) li.append(date_obj) assert li.item_list == [{"@value": date_obj.isoformat(), "@type": "https://schema.org/Date"}] - del li[0] li[0:1] = ["a", "b", "c"] assert li == ["a", "b", "c"] li[0:3:2] = [["aa", "bb"]] @@ -143,11 +142,16 @@ def test_append(): def test_build_in_contains(): - li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + li = ld_list([], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) li.append("foo") li.append({"@type": "A", "schema:name": "a"}) assert "foo" in li and {"@type": "A", "schema:name": "a"} in li assert {"@value": "foo"} in li and {"@type": "A", "https://schema.org/name": "a"} in li + assert ["foo", {"@type": "A", "schema:name": "a"}] in li + assert [{"@list": ["foo", {"@type": "A", "schema:name": "a"}]}] not in li + li.append({"@id": "schema:foo", "schema:name": "foo"}) + assert {"@id": "schema:foo"} in li and {"@id": "schema:foo", "schema:name": "foobar"} in li + assert {"schema:name": "foo"} in li def test_build_in_comparison(): From f8d259fca9862ddef681d435387b4746e2482e08 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 24 Nov 2025 10:56:23 +0100 Subject: [PATCH 14/26] added one TODO item to ld_list --- src/hermes/model/types/ld_list.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index ea693053..6766f1c9 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -86,6 +86,7 @@ def __contains__(self, value): return any(temp_list == ld_list([{self.container_type: [val]}], **self_attributes) for val in self.item_list) def __eq__(self, other): + # TODO: ld_lists with container_type "@set" have to be considered unordered if not (isinstance(other, (list, ld_list)) or ld_list.is_container(other)): return NotImplemented if isinstance(other, dict): From 2b98627b2687f87fc4c49e7abada6435baa15944 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 24 Nov 2025 10:56:58 +0100 Subject: [PATCH 15/26] began adding doc strings, type hints and comments for ld_container --- src/hermes/model/types/ld_container.py | 108 ++++++++++++++++++++++--- 1 file changed, 97 insertions(+), 11 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index c136d476..359a4ebe 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -7,6 +7,9 @@ from .pyld_util import JsonLdProcessor, bundled_loader from datetime import date, time, datetime +from typing import Self + +JSON_LD_CONTEXT_DICT = dict[str, str | 'JSON_LD_CONTEXT_DICT'] class ld_container: @@ -20,14 +23,33 @@ class ld_container: ld_proc = JsonLdProcessor() - def __init__(self, data, *, parent=None, key=None, index=None, context=None): + def __init__( + self: Self, + data: list, + *, + parent: "ld_container" | None = None, + key: str | None = None, + index: int | None = None, + context: list[str | JSON_LD_CONTEXT_DICT] | None = None, + ) -> None: """ Create a new instance of an ld_container. + :param self: The instance of ld_container to be initialized. + :type self: Self :param data: The expanded json-ld data that is mapped. - :param parent: Optional parent node of this container. - :param key: Optional key into the parent container. - :param context: Optional local context for this container. + :type data: list + :param parent: parent node of this container. + :type parent: ld_container | None + :param key: key into the parent container. + :type key: str | None + :param key: index into the parent container. + :type index: int | None + :param context: local context for this container. + :type context: list[str | JSON_LD_CONTEXT_DICT] + + :return: + :rtype: None """ # Store basic data @@ -54,7 +76,18 @@ def __init__(self, data, *, parent=None, key=None, index=None, context=None): {"documentLoader": bundled_loader} ) - def add_context(self, context): + def add_context(self: Self, context: list[str | JSON_LD_CONTEXT_DICT]) -> None: + """ + Add the given context to the ld_container. + + :param self: The ld_container the context should be added to. + :type self: Self + :param context: The context to be added to self. + :type context: list[str | JSON_LD_CONTEXT_DICT] + + :return: + :rtype: None + """ self.context = self.merge_to_list(self.context, context) self.active_ctx = self.ld_proc.process_context( self.active_ctx, @@ -63,31 +96,84 @@ def add_context(self, context): ) @property - def full_context(self): + def full_context(self: Self) -> list[str, JSON_LD_CONTEXT_DICT] | None: + """ + Return the context of the ld_container merged with the full_context of its parent. + + :param self: The ld_container whose full_context is returned + :type self: Self + + :return: The context of the ld_container merged with the full_context of its parent via + ld_container.merge_to_list or just the context of this ld_container if self.parent is None. + :rtype: list[str | JSON_LD_CONTEXT_DICT] | None + """ if self.parent is not None: return self.merge_to_list(self.parent.full_context, self.context) else: return self.context @property - def path(self): - """ Create a path representation for this item. """ + def path(self: Self) -> list[str | int]: + """ + Create a path representation for this item. + + :param self: The ld_container the path leads to from its outer most parent container. + :type self: Self + + :return: The path from selfs outer most parent to it self. + Let parent be the outer most parent of self. + Start with index = 1 and iteratively set parent to parent[path[index]] and then increment index + until index == len(path) to get parent is self == true. + :rtype: list[str | int] + """ if self.parent: return self.parent.path + [self.key if self.index is None else self.index] else: return ['$'] @property - def ld_value(self): - """ Retrun a representation that is suitable as a value in expanded JSON-LD. """ + def ld_value(self: Self) -> list: + """ + Return a representation that is suitable as a value in expanded JSON-LD of this ld_container. + + :param self: The ld_container whose expanded JSON-LD representation is returned. + :type self: Self + + :return: The expanded JSON-LD value of this container. + This value is the basis of all operations and a reference to the original is returned and not a copy. + Do **not** modify unless strictly necessary and you know what you do. + Otherwise unexpected behavior may occur. + :rtype: list + """ return self._data - def _to_python(self, full_iri, ld_value): + def _to_python( + self: Self, + full_iri: str, + ld_value: list | dict | str + ) -> "ld_container" | str | int | float | bool | date | datetime | time: + """ + Returns a pythonized version of the given value pretending the value is in self and full_iri its key. + + :param self: the ld_container ld_value is considered to be in. + :type self: Self + :param full_iri: The expanded iri of the key of ld_value / self (later if self is not a dictionary). + :type full_iri: str + :param ld_value: The value thats pythonized value is requested. ld_value has to be valid expanded JSON-LD if it + was embeded in self._data. + :type ld_value: list | dict | str + + :return: The pythonized value of the ld_value. + :rtype: ld_container | str | int | float | bool | date | datetime | time + """ if full_iri == "@id": + # values of key "@id" only have to be compacted value = self.ld_proc.compact_iri(self.active_ctx, ld_value, vocab=False) else: + # use the type map from src/hermes/model/types/__init__.py to convert all other values. value, ld_output = self.ld_proc.apply_typemap(ld_value, "python", "ld_container", parent=self, key=full_iri) + # check if conversion was successful if ld_output is None: raise TypeError(full_iri, ld_value) From 1958518bc914d1fc9e1e192146352ea4590627c7 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 28 Nov 2025 11:14:02 +0100 Subject: [PATCH 16/26] improved _to_expanded_json --- src/hermes/model/types/ld_container.py | 27 ++++++++---------- src/hermes/model/types/ld_list.py | 39 +++++++++++--------------- 2 files changed, 29 insertions(+), 37 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 359a4ebe..4f0d6f41 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -181,18 +181,19 @@ def _to_python( def _to_expanded_json(self, value): """ - The item_lists contents/ the data_dict will be substituted with value. - Value can be an ld_container or contain zero or more. - Then the _data of the inner most ld_dict that contains or is self will be expanded. - If self is not an ld_dict and none of self's parents is, use the key from ld_list to generate a minimal dict + Returns an expanded version of the given value. - The result of this function is what value has turned into - (always a list for type(self) == ld_dict and list or dict for type(self) == ld_list). - If self is an ld_list and value was assimilated by self the returned value is list otherwise it is a dict - (e.g. in a set the inner sets values are put directly into the outer one). + The item_list/ data_dict of self will be substituted with value. + Value can be an ld_container or contain zero or more. + Then the _data of the inner most ld_dict that contains or is self will be expanded + using the JSON_LD-Processor. + If self and none of self's parents is an ld_dict, use the key from outer most ld_list + to generate a minimal dict. + + The result of this function is what value has turned into: + - If type(self) == ld_dict: the returned value is a dict + - If type(self) == ld_list: the returned value is a list """ - if self.__class__.__name__ == "ld_list": - value = [value] parent = self path = [] while parent.__class__.__name__ != "ld_dict": @@ -263,11 +264,7 @@ def _to_expanded_json(self, value): for index in range(len(path) - 1, -1, -1): expanded_data = expanded_data[path[index]] - if self.__class__.__name__ == "ld_dict": - return expanded_data - if len(expanded_data) != 1: - return expanded_data - return expanded_data[0] + return expanded_data def _to_expanded_json_deprecated(self, key, value): if key == "@id": diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 6766f1c9..5402f85d 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -45,20 +45,17 @@ def __getitem__(self, index): def __setitem__(self, index, value): if not isinstance(index, slice): - value = self._to_expanded_json(value) - if not isinstance(value, list): - self.item_list[index] = value - return - if index < 0: - self.item_list[index-1:index] = value - else: + value = self._to_expanded_json([value]) + if index != -1: self.item_list[index:index+1] = value + else: + self.item_list[index:] = value return try: iter(value) except TypeError as exc: raise TypeError("must assign iterable to extended slice") from exc - expanded_value = ld_container.merge_to_list(*[self._to_expanded_json(val) for val in value]) + expanded_value = ld_container.merge_to_list(*[self._to_expanded_json([val]) for val in value]) self.item_list[index] = [val[0] if isinstance(val, list) else val for val in expanded_value] def __delitem__(self, index): @@ -75,14 +72,16 @@ def __iter__(self): yield item def __contains__(self, value): - expanded_value = self._to_expanded_json(value) - if isinstance(expanded_value, list): + expanded_value = self._to_expanded_json([value]) + if len(expanded_value) == 0: + return True + if len(expanded_value) > 1: return all(val in self for val in expanded_value) self_attributes = {"parent": self.parent, "key": self.key, "index": self.index, "context": self.full_context} if self.container_type == "@set": - temp_list = ld_list([expanded_value], **self_attributes) + temp_list = ld_list(expanded_value, **self_attributes) return any(temp_list == ld_list([val], **self_attributes) for val in self.item_list) - temp_list = ld_list([{self.container_type: [expanded_value]}], **self_attributes) + temp_list = ld_list([{self.container_type: expanded_value}], **self_attributes) return any(temp_list == ld_list([{self.container_type: [val]}], **self_attributes) for val in self.item_list) def __eq__(self, other): @@ -125,8 +124,7 @@ def __ne__(self, other): return not x def append(self, value): - ld_value = val if isinstance(val := self._to_expanded_json(value), list) else [val] - self.item_list.extend(ld_value) + self.item_list.extend(self._to_expanded_json([value])) def extend(self, value): for item in value: @@ -158,18 +156,15 @@ def from_list(cls, value, *, parent=None, key=None, context=None, container_type value = [{container_type: value}] if parent is not None: if isinstance(parent, ld_list): - expanded_value = parent._to_expanded_json(value) + expanded_value = parent._to_expanded_json([value]) + if (len(expanded_value) != 1 or + not (isinstance(expanded_value[0], list) or cls.is_container(expanded_value[0]))): + parent.extend(expanded_value) + return parent else: expanded_value = parent._to_expanded_json({key: value})[cls.ld_proc.expand_iri(parent.active_ctx, key)] - if isinstance(parent, cls) and (isinstance(expanded_value, list) or not cls.is_container(expanded_value)): - parent.extend(expanded_value if isinstance(expanded_value, list) else [expanded_value]) - # TODO: is there a need to add the context to the parent as well? - return parent else: expanded_value = cls([], parent=None, key=key, context=context)._to_expanded_json(value) - # the object has to be a list for further use but does not have to be returned by _to_expanded_json as a list - if not isinstance(expanded_value, list): - expanded_value = [expanded_value] return cls(expanded_value, parent=parent, key=key, context=context) @classmethod From 4acf8de1a1f81c2349ce325130bff710ebdc4614 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 28 Nov 2025 12:48:47 +0100 Subject: [PATCH 17/26] added doc-strings and comments and pacified flake8 --- src/hermes/model/types/ld_container.py | 228 ++++++++++++++++--------- src/hermes/model/types/ld_list.py | 2 +- 2 files changed, 145 insertions(+), 85 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 4f0d6f41..b443ae1d 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -7,9 +7,22 @@ from .pyld_util import JsonLdProcessor, bundled_loader from datetime import date, time, datetime -from typing import Self +from typing import Union, Self, Any -JSON_LD_CONTEXT_DICT = dict[str, str | 'JSON_LD_CONTEXT_DICT'] + +JSON_LD_CONTEXT_DICT = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] +BASIC_TYPE = Union[str, float, int, bool] +EXPANDED_JSON_LD_VALUE = Union[ + list[Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]], dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]] +] +COMPACTED_JSON_LD_VALUE = Union[ + list[Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]] +] +TIME_TYPE = Union[datetime, date, time] +JSON_LD_VALUE = Union[ + list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_container"]], + dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_container"]], +] class ld_container: @@ -27,10 +40,10 @@ def __init__( self: Self, data: list, *, - parent: "ld_container" | None = None, - key: str | None = None, - index: int | None = None, - context: list[str | JSON_LD_CONTEXT_DICT] | None = None, + parent: Union["ld_container", None] = None, + key: Union[str, None] = None, + index: Union[int, None] = None, + context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, ) -> None: """ Create a new instance of an ld_container. @@ -46,7 +59,7 @@ def __init__( :param key: index into the parent container. :type index: int | None :param context: local context for this container. - :type context: list[str | JSON_LD_CONTEXT_DICT] + :type context: list[str | JSON_LD_CONTEXT_DICT] | None :return: :rtype: None @@ -65,18 +78,14 @@ def __init__( if self.parent: if self.context: self.active_ctx = self.ld_proc.process_context( - self.parent.active_ctx, - self.context, - {"documentLoader": bundled_loader}) + self.parent.active_ctx, self.context, {"documentLoader": bundled_loader} + ) else: self.active_ctx = parent.active_ctx else: - self.active_ctx = self.ld_proc.initial_ctx( - self.full_context, - {"documentLoader": bundled_loader} - ) + self.active_ctx = self.ld_proc.initial_ctx(self.full_context, {"documentLoader": bundled_loader}) - def add_context(self: Self, context: list[str | JSON_LD_CONTEXT_DICT]) -> None: + def add_context(self: Self, context: list[Union[str | JSON_LD_CONTEXT_DICT]]) -> None: """ Add the given context to the ld_container. @@ -89,14 +98,10 @@ def add_context(self: Self, context: list[str | JSON_LD_CONTEXT_DICT]) -> None: :rtype: None """ self.context = self.merge_to_list(self.context, context) - self.active_ctx = self.ld_proc.process_context( - self.active_ctx, - context, - {"documentLoader": bundled_loader} - ) + self.active_ctx = self.ld_proc.process_context(self.active_ctx, context, {"documentLoader": bundled_loader}) @property - def full_context(self: Self) -> list[str, JSON_LD_CONTEXT_DICT] | None: + def full_context(self: Self) -> Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None]: """ Return the context of the ld_container merged with the full_context of its parent. @@ -113,7 +118,7 @@ def full_context(self: Self) -> list[str, JSON_LD_CONTEXT_DICT] | None: return self.context @property - def path(self: Self) -> list[str | int]: + def path(self: Self) -> list[Union[str, int]]: """ Create a path representation for this item. @@ -129,10 +134,10 @@ def path(self: Self) -> list[str | int]: if self.parent: return self.parent.path + [self.key if self.index is None else self.index] else: - return ['$'] + return ["$"] @property - def ld_value(self: Self) -> list: + def ld_value(self: Self) -> EXPANDED_JSON_LD_VALUE: """ Return a representation that is suitable as a value in expanded JSON-LD of this ld_container. @@ -143,15 +148,13 @@ def ld_value(self: Self) -> list: This value is the basis of all operations and a reference to the original is returned and not a copy. Do **not** modify unless strictly necessary and you know what you do. Otherwise unexpected behavior may occur. - :rtype: list + :rtype: EXPANDED_JSON_LD_VALUE """ return self._data def _to_python( - self: Self, - full_iri: str, - ld_value: list | dict | str - ) -> "ld_container" | str | int | float | bool | date | datetime | time: + self: Self, full_iri: str, ld_value: Union[list, dict, str] + ) -> Union["ld_container", BASIC_TYPE, TIME_TYPE]: """ Returns a pythonized version of the given value pretending the value is in self and full_iri its key. @@ -164,36 +167,49 @@ def _to_python( :type ld_value: list | dict | str :return: The pythonized value of the ld_value. - :rtype: ld_container | str | int | float | bool | date | datetime | time + :rtype: ld_container | BASIC_TYPE | TIME_TYPE """ if full_iri == "@id": # values of key "@id" only have to be compacted value = self.ld_proc.compact_iri(self.active_ctx, ld_value, vocab=False) else: # use the type map from src/hermes/model/types/__init__.py to convert all other values. - value, ld_output = self.ld_proc.apply_typemap(ld_value, "python", "ld_container", - parent=self, key=full_iri) + value, ld_output = self.ld_proc.apply_typemap(ld_value, "python", "ld_container", parent=self, key=full_iri) # check if conversion was successful if ld_output is None: raise TypeError(full_iri, ld_value) return value - def _to_expanded_json(self, value): + def _to_expanded_json(self: Self, value: JSON_LD_VALUE) -> EXPANDED_JSON_LD_VALUE: """ - Returns an expanded version of the given value. - - The item_list/ data_dict of self will be substituted with value. - Value can be an ld_container or contain zero or more. - Then the _data of the inner most ld_dict that contains or is self will be expanded - using the JSON_LD-Processor. - If self and none of self's parents is an ld_dict, use the key from outer most ld_list - to generate a minimal dict. - - The result of this function is what value has turned into: - - If type(self) == ld_dict: the returned value is a dict - - If type(self) == ld_list: the returned value is a list + Returns an expanded version of the given value. + + The item_list/ data_dict of self will be substituted with value. + Value can be an ld_container or contain zero or more. + Then the _data of the inner most ld_dict that contains or is self will be expanded + using the JSON_LD-Processor. + If self and none of self's parents is an ld_dict, use the key from outer most ld_list + to generate a minimal dict. + + The result of this function is what value has turned into. + + :param self: The ld_dict or ld_list in which value gets expanded + :type self: Self + :param value: The value that is to be expanded. Different types are expected based on the type of self: +
  • If type(self) == ld_dict: value must be a dict
  • +
  • If type(self) == ld_list: value must be a list
+ value will be exapnded as if it was the data_dict/ the item_list of self. + :type value: JSON_LD_VALUE + + :return: The expanded version of value i.e. the data_dict/ item_list of self if it had been value. + The return type is based on the type of self: +
  • If type(self) == ld_dict: the returned values type is dict
  • +
  • If type(self) == ld_list: the returned values type is list
+ :rtype: EXPANDED_JSON_LD_VALUE """ + # search for an ld_dict that is either self or the inner most parents parent of self that is an ld_dict + # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list parent = self path = [] while parent.__class__.__name__ != "ld_dict": @@ -205,37 +221,36 @@ def _to_expanded_json(self, value): if parent.parent is None: break parent = parent.parent + + # if neither self nor any of its parents is a ld_dict: + # create a dict with the key of the outer most parent of self and this parents ld_value as a value + # this dict is stored in an ld_container and simulates the most minimal JSON-LD object possible if parent.__class__.__name__ != "ld_dict": key = self.ld_proc.expand_iri(parent.active_ctx, parent.key) parent = ld_container([{key: parent._data}]) path.append(0) - key_and_reference_todo_list = [] - if isinstance(value, ld_container): - if parent.__class__.__name__ == "ld_list" and parent.container_type == "@set": - value = value._data - else: - value = value._data[0] - elif isinstance(value, datetime): - value = {"@value": value.isoformat(), "@type": "schema:DateTime"} - elif isinstance(value, date): - value = {"@value": value.isoformat(), "@type": "schema:Date"} - elif isinstance(value, time): - value = {"@value": value.isoformat(), "@type": "schema:Time"} - else: - key_and_reference_todo_list = [(0, [value])] + # all ld_container (ld_dicts and ld_lists) and datetime, date as well as time objects in value have to dissolved + # because the JSON-LD processor can't handle them + # to do this traverse value in a BFS and replace all items with a type in 'special_types' with a usable values + key_and_reference_todo_list = [(0, [value])] special_types = (list, dict, ld_container, datetime, date, time) while True: + # check if ready if len(key_and_reference_todo_list) == 0: break + # get next item key, ref = key_and_reference_todo_list.pop() temp = ref[key] + # replace item if necessary and add childs to the todo list if isinstance(temp, list): - key_and_reference_todo_list.extend([(index, temp) for index, val in enumerate(temp) - if isinstance(val, special_types)]) + key_and_reference_todo_list.extend( + [(index, temp) for index, val in enumerate(temp) if isinstance(val, special_types)] + ) elif isinstance(temp, dict): - key_and_reference_todo_list.extend([(new_key, temp) for new_key in temp.keys() - if isinstance(temp[new_key], special_types)]) + key_and_reference_todo_list.extend( + [(new_key, temp) for new_key in temp.keys() if isinstance(temp[new_key], special_types)] + ) elif isinstance(temp, ld_container): ref[key] = temp._data[0] elif isinstance(temp, datetime): @@ -245,27 +260,40 @@ def _to_expanded_json(self, value): elif isinstance(temp, time): ref[key] = {"@value": temp.isoformat(), "@type": "schema:Time"} + # traverse the ld_value of parent with the previously generated path current_data = parent._data for index in range(len(path) - 1, 0, -1): current_data = current_data[path[index]] + # replace the data_dict/ item_list so that value is now inside of the ld_value of parent and store the old value if current_data == []: + # itemlist of an empty ld_list: + # The item_list can't be replaced like in all other cases self_data = None current_data.append(value) else: self_data = current_data[path[0]] current_data[path[0]] = value - expanded_data = self.ld_proc.expand(parent._data, {"expandContext": self.full_context, - "documentLoader": bundled_loader, - "keepFreeFloatingNodes": True}) + + # expand the ld_value of parent to implicitly expand value + # important the ld_value of parent is not modified because the processor makes a deep copy + expanded_data = self.ld_proc.expand( + parent._data, + {"expandContext": self.full_context, "documentLoader": bundled_loader, "keepFreeFloatingNodes": True}, + ) + + # restore the data_dict/ item_list to its former state if self_data is not None: current_data[path[0]] = self_data else: current_data.clear() + + # use the path to get the expansion of value for index in range(len(path) - 1, -1, -1): expanded_data = expanded_data[path[index]] return expanded_data + # TODO: remove this method and all other unused methods/ functions def _to_expanded_json_deprecated(self, key, value): if key == "@id": ld_value = self.ld_proc.expand_iri(self.active_ctx, value, vocab=False) @@ -275,11 +303,12 @@ def _to_expanded_json_deprecated(self, key, value): ld_value = [self.ld_proc.expand_iri(self.active_ctx, ld_type) for ld_type in value] else: short_key = self.ld_proc.compact_iri(self.active_ctx, key) - if ':' in short_key: - prefix, short_key = short_key.split(':', 1) + if ":" in short_key: + prefix, short_key = short_key.split(":", 1) ctx_value = self.ld_proc.get_context_value(self.active_ctx, prefix, "@id") - active_ctx = self.ld_proc.process_context(self.active_ctx, [ctx_value], - {"documentLoader": bundled_loader}) + active_ctx = self.ld_proc.process_context( + self.active_ctx, [ctx_value], {"documentLoader": bundled_loader} + ) else: active_ctx = self.active_ctx ld_type = self.ld_proc.get_context_value(active_ctx, short_key, "@type") @@ -287,38 +316,50 @@ def _to_expanded_json_deprecated(self, key, value): ld_value = [{"@id": value}] ld_output = "expanded_json" else: - ld_value, ld_output = self.ld_proc.apply_typemap(value, "expanded_json", "json", - parent=self, key=key) + ld_value, ld_output = self.ld_proc.apply_typemap(value, "expanded_json", "json", parent=self, key=key) if ld_output == "json": - ld_value = self.ld_proc.expand(ld_value, {"expandContext": self.full_context, - "documentLoader": bundled_loader}) + ld_value = self.ld_proc.expand( + ld_value, {"expandContext": self.full_context, "documentLoader": bundled_loader} + ) elif ld_output != "expanded_json": raise TypeError(f"Cannot convert {type(value)}") return ld_value - def __repr__(self): - return f'{type(self).__name__}({self._data})' + def __repr__(self: Self) -> str: + return f"{type(self).__name__}({self._data})" - def __str__(self): + def __str__(self: Self) -> str: return str(self.to_python()) - def compact(self, context=None): + def compact( + self: Self, context: Union[list[JSON_LD_CONTEXT_DICT], JSON_LD_CONTEXT_DICT] = None + ) -> COMPACTED_JSON_LD_VALUE: return self.ld_proc.compact( - self.ld_value, - context or self.context, - {"documentLoader": bundled_loader, "skipExpand": True} + self.ld_value, context or self.context, {"documentLoader": bundled_loader, "skipExpand": True} ) def to_python(self): raise NotImplementedError() @classmethod - def merge_to_list(cls, *args): + def merge_to_list(cls: Self, *args: tuple[Any]) -> list[Any]: + """ + Returns a list that is contains all non-list items from args and all items in the lists in args. + + :param *args: The items that should be put into one list. + :type *args: tuple[Any] + + :return: A list containing all non-list items and all items from lists in args. (Same order as in args) + :rytpe: list[Any] + """ + # base case for recursion if not args: return [] + # split args into first and all other items head, *tail = args + # recursion calls if isinstance(head, list): return [*head, *cls.merge_to_list(*tail)] else: @@ -354,14 +395,33 @@ def is_typed_json_value(cls, ld_value): @classmethod def typed_ld_to_py(cls, data, **kwargs): - ld_value = data[0]['@value'] + ld_value = data[0]["@value"] return ld_value @classmethod - def are_values_equal(cls, first, second): + def are_values_equal( + cls: Self, first: dict[str, Union[BASIC_TYPE, TIME_TYPE]], second: dict[str, Union[BASIC_TYPE, TIME_TYPE]] + ) -> bool: + """ + Returns whether or not the given expanded JSON-LD values are considered equal. + The comparison compares the "@id" values first and returns the result if it is conclusive. + + If the comparison is inconclusive i.e. exactly one or zero of both values have an "@id" value: + Return whether or not all other keys exist in both values and all values of the keys are the same. + + :param first: The first value of the comparison + :type first: dict[str, Union[BASIC_TYPE, TIME_TYPE]] + :param second: The second value of the comparison + :type second: dict[str, Union[BASIC_TYPE, TIME_TYPE]] + + :return: Whether the values are considered equal or not. + :rtype: bool + """ + # compare @id's if "@id" in first and "@id" in second: return first["@id"] == second["@id"] + # compare all other values and keys (@id-comparison was inconclusive) for key in {"@value", "@type"}: if (key in first) ^ (key in second): return False diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 5402f85d..fd958b65 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -158,7 +158,7 @@ def from_list(cls, value, *, parent=None, key=None, context=None, container_type if isinstance(parent, ld_list): expanded_value = parent._to_expanded_json([value]) if (len(expanded_value) != 1 or - not (isinstance(expanded_value[0], list) or cls.is_container(expanded_value[0]))): + not (isinstance(expanded_value[0], list) or cls.is_container(expanded_value[0]))): parent.extend(expanded_value) return parent else: From b751130f303378c6f59a01d8fd16e5c854501d33 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 28 Nov 2025 13:52:45 +0100 Subject: [PATCH 18/26] began commenting and adding doc-strings to ld_list --- src/hermes/model/types/__init__.py | 1 + src/hermes/model/types/ld_container.py | 8 +-- src/hermes/model/types/ld_list.py | 97 +++++++++++++++++++++++--- 3 files changed, 93 insertions(+), 13 deletions(-) diff --git a/src/hermes/model/types/__init__.py b/src/hermes/model/types/__init__.py index 3cf51f35..8ab05171 100644 --- a/src/hermes/model/types/__init__.py +++ b/src/hermes/model/types/__init__.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche from datetime import date, time, datetime diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index b443ae1d..36a37e9e 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileContributor: Michael Meinel +# SPDX-FileContributor: Michael Fritzsche from .pyld_util import JsonLdProcessor, bundled_loader @@ -38,7 +39,7 @@ class ld_container: def __init__( self: Self, - data: list, + data: list[EXPANDED_JSON_LD_VALUE], *, parent: Union["ld_container", None] = None, key: Union[str, None] = None, @@ -51,12 +52,12 @@ def __init__( :param self: The instance of ld_container to be initialized. :type self: Self :param data: The expanded json-ld data that is mapped. - :type data: list + :type data: list[EXPANDED_JSON_LD_VALUE] :param parent: parent node of this container. :type parent: ld_container | None :param key: key into the parent container. :type key: str | None - :param key: index into the parent container. + :param index: index into the parent container. :type index: int | None :param context: local context for this container. :type context: list[str | JSON_LD_CONTEXT_DICT] | None @@ -64,7 +65,6 @@ def __init__( :return: :rtype: None """ - # Store basic data self.parent = parent self.key = key diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index fd958b65..9fec82fc 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -5,17 +5,56 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche -from .ld_container import ld_container +from .ld_container import ( + ld_container, + JSON_LD_CONTEXT_DICT, + EXPANDED_JSON_LD_VALUE, + COMPACTED_JSON_LD_VALUE, + JSON_LD_VALUE, + TIME_TYPE, + BASIC_TYPE, +) +from typing import Union, Self, Any -class ld_list(ld_container): - """ An JSON-LD container resembling a list. """ - def __init__(self, data, *, parent=None, key=None, index=None, context=None): +class ld_list(ld_container): + """ An JSON-LD container resembling a list ("@set", "@list" or "@graph"). """ + + def __init__( + self: Self, + data: Union[list[str], list[dict[str, Union[BASIC_TYPE, EXPANDED_JSON_LD_VALUE]]]], + *, + parent: Union["ld_container", None] = None, + key: Union[str, None] = None, + index: Union[int, None] = None, + context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, + ) -> None: + """ + Create a new ld_list.py container. + + :param self: The instance of ld_list to be initialized. + :type self: Self + :param data: The expanded json-ld data that is mapped (must be valid for @set, @list or @graph) + :type data: list[str] | list[dict[str, BASIC_TYPE | EXPANDED_JSON_LD_VALUE]] + :param parent: parent node of this container. + :type parent: ld_container | None + :param key: key into the parent container. + :type key: str | None + :param index: index into the parent container. + :type index: int | None + :param context: local context for this container. + :type context: list[str | JSON_LD_CONTEXT_DICT] | None + + :return: + :rtype: None + """ + # check for validity of data if not isinstance(key, str): raise ValueError("The key is not a string or was omitted.") if not isinstance(data, list): raise ValueError("The given data does not represent an ld_list.") + # infer the container type and item_list from data if self.is_ld_list(data): if "@list" in data[0]: self.container_type = "@list" @@ -28,34 +67,74 @@ def __init__(self, data, *, parent=None, key=None, index=None, context=None): else: self.container_type = "@set" self.item_list = data - if key == "@type" and not all(isinstance(item, str) for item in self.item_list): - raise ValueError("A given value for @type is not a string.") - if key != "@type" and not all(isinstance(item, dict) for item in self.item_list): + # further validity checks + if key == "@type": + if any(not isinstance(item, str) for item in self.item_list) or self.container_type != "@set": + raise ValueError("A given value for @type is not a string.") + elif any(not isinstance(item, dict) for item in self.item_list): raise ValueError("A given value is not properly expanded.") + # call super constructor super().__init__(data, parent=parent, key=key, index=index, context=context) - def __getitem__(self, index): + def __getitem__( + self: Self, index: Union[int, slice] + ) -> Union[BASIC_TYPE, TIME_TYPE, ld_container, list[Union[BASIC_TYPE, TIME_TYPE, ld_container]]]: + """ + Get the item(s) at position index in a pythonized form. + + :param self: The ld_list the items are taken from. + :type self: Self + :param index: The positon(s) from which the item(s) is/ are taken. + :type index: int | slice + + :return: The pythonized item(s) at index. + :rtype: BASIC_TYPE | TIME_TYPE | ld_container | list[BASIC_TYPE | TIME_TYPE | ld_container]] + """ + # handle slices by applying them to a list of indices and then getting the items at those if isinstance(index, slice): return [self[i] for i in [*range(len(self))][index]] + # get the item from the item_list and pythonize it. If necessary add the index. item = self._to_python(self.key, self.item_list[index]) if isinstance(item, ld_container): item.index = index return item - def __setitem__(self, index, value): + def __setitem__( + self: Self, index: Union[int, slice], value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container] + ) -> None: + """ + Set the item(s) at position index to the given value(s). + All given values are expanded. If any are assimilated by self all items that would be added by this are added. + + :param self: The ld_list the items are set in. + :type self: Self + :param index: The positon(s) at which the item(s) is/ are set. + :type index: int | slice + :param value: The new value(s). + :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container] + + :return: + :rtype: None + """ if not isinstance(index, slice): + # expand the value value = self._to_expanded_json([value]) + # the returned value is always a list but my contain more then one item + # therefor a slice on the item_list is used to add the expanded value(s) if index != -1: self.item_list[index:index+1] = value else: self.item_list[index:] = value return + # check if the given values can be iterated (value does not have to be a list) try: iter(value) except TypeError as exc: raise TypeError("must assign iterable to extended slice") from exc + # expand the values and merge all expanded values into one list expanded_value = ld_container.merge_to_list(*[self._to_expanded_json([val]) for val in value]) + # set the values at index to the expanded values self.item_list[index] = [val[0] if isinstance(val, list) else val for val in expanded_value] def __delitem__(self, index): From 3c243f281964799df65071f83aac54329e473632 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 5 Dec 2025 09:14:03 +0100 Subject: [PATCH 19/26] fixed bug in _to_expanded_json --- src/hermes/model/types/ld_container.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index 36a37e9e..feff2742 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -252,7 +252,10 @@ def _to_expanded_json(self: Self, value: JSON_LD_VALUE) -> EXPANDED_JSON_LD_VALU [(new_key, temp) for new_key in temp.keys() if isinstance(temp[new_key], special_types)] ) elif isinstance(temp, ld_container): - ref[key] = temp._data[0] + if temp.__class__.__name__ == "ld_list" and temp.container_type == "@set": + ref[key] = temp._data + else: + ref[key] = temp._data[0] elif isinstance(temp, datetime): ref[key] = {"@value": temp.isoformat(), "@type": "schema:DateTime"} elif isinstance(temp, date): From e0f0fc9b4fd1800f92412c8e871fc4afc424504d Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 5 Dec 2025 09:14:29 +0100 Subject: [PATCH 20/26] added a few test cases to delete of ld_list --- test/hermes_test/model/types/test_ld_list.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index 1b0a4127..aaeb548f 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -106,6 +106,16 @@ def test_build_in_del(): assert li == ["bar"] del li[0] assert li == [] + di = ld_dict([{}], context=[{"schema": "https://schema.org/"}]) + di["schema:name"] = [{"@value": "foo"}, {"@value": "bar"}, {"@value": "foobar"}] + li = di["schema:name"] + del li[0] + assert len(di["schema:name"]) == 2 + di = ld_dict([{}], context=[{"schema": "https://schema.org/"}]) + di["schema:name"] = [{"@list": [{"@value": "foo"}, {"@value": "bar"}, {"@value": "foobar"}]}] + li = di["schema:name"] + del di["schema:name"][0:3:2] + assert len(di["schema:name"]) == 1 and len(li) == 1 def test_build_in_len(): From 1b5656ed0b5c79102c634f7979d1ffcedaeb2963 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 5 Dec 2025 09:14:44 +0100 Subject: [PATCH 21/26] added more comments to ld_list --- src/hermes/model/types/ld_list.py | 150 +++++++++++++++++++++++++++--- 1 file changed, 138 insertions(+), 12 deletions(-) diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 9fec82fc..e26a16ee 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -5,6 +5,7 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche +from types import NotImplementedType from .ld_container import ( ld_container, JSON_LD_CONTEXT_DICT, @@ -15,7 +16,7 @@ BASIC_TYPE, ) -from typing import Union, Self, Any +from typing import Generator, Union, Self class ld_list(ld_container): @@ -58,15 +59,15 @@ def __init__( if self.is_ld_list(data): if "@list" in data[0]: self.container_type = "@list" - self.item_list = data[0]["@list"] + self.item_list: list = data[0]["@list"] elif "@graph" in data[0]: self.container_type = "@graph" - self.item_list = data[0]["@graph"] + self.item_list: list = data[0]["@graph"] else: raise ValueError("The given @set is not fully expanded.") else: self.container_type = "@set" - self.item_list = data + self.item_list: list = data # further validity checks if key == "@type": if any(not isinstance(item, str) for item in self.item_list) or self.container_type != "@set": @@ -137,67 +138,192 @@ def __setitem__( # set the values at index to the expanded values self.item_list[index] = [val[0] if isinstance(val, list) else val for val in expanded_value] - def __delitem__(self, index): + def __delitem__(self: Self, index: Union[int, slice]) -> None: + """ + Delete the item(s) at position index. + Note that if a deleted object is represented by an ld_container druing this process it will still exist + and not be modified afterwards. + + :param self: The ld_list the items are deleted from. + :type self: Self + :param index: The positon(s) at which the item(s) is/ are deleted. + :type index: int | slice + + :return: + :rtype: None + """ del self.item_list[index] - def __len__(self): + def __len__(self: Self) -> int: + """ + Returns the number of items in this ld_list. + + :param self: The ld_list whose length is to be returned. + :type self: Self + + :return: The length of self. + :rtype: int + """ return len(self.item_list) - def __iter__(self): + def __iter__(self: Self) -> Generator[Union[BASIC_TYPE | TIME_TYPE | ld_container], None, None]: + """ + Returns an iterator over the pythonized values contained in self. + + :param self: The ld_list over whose items is iterated. + :type self: Self + + :return: The Iterator over self's values. + :rtype: Generator[Union[BASIC_TYPE | TIME_TYPE | ld_container], None, None] + """ + # return an Iterator over each value in self in its pythonized from for index, value in enumerate(self.item_list): item = self._to_python(self.key, value) + # add which entry an ld_container is stored at, if item is an ld_container if isinstance(item, ld_container): item.index = index yield item - def __contains__(self, value): + def __contains__(self: Self, value: JSON_LD_VALUE) -> bool: + """ + Returns whether or not value is contained in self. + Note that it is not directly checked if value is in self.item_list. + First value is expanded then it is checked if value is in self.item_list. + If however value is assimilated by self it is checked if all values are contained in self.item_list. + Also note that the checks whether the expanded value is in self.item_list is based on ld_list.__eq__. + That means that this value is 'contained' in self.item_list if any object in self.item_list + has the same @id like it or it xor the object in the item_list has an id an all other values are the same. + + :param self: The ld_list that is checked if it contains value. + :type self: Self + :param value: The object being checked whether or not it is in self. + :type value: JSON_LD_VALUE + + :return: Whether or not value is being considered to be contained in self. + :rtype: bool + """ + # expand value expanded_value = self._to_expanded_json([value]) + # empty list -> no value to check if len(expanded_value) == 0: return True + # call contains on all items in the expanded list if it contains more then one item + # and return true only if all calls return true if len(expanded_value) > 1: return all(val in self for val in expanded_value) self_attributes = {"parent": self.parent, "key": self.key, "index": self.index, "context": self.full_context} + # create a temporary list containing the expanded value + # check for equality with a list containg exactly one item from self.item_list for every item in self.item_list + # return true if for any item in self.item_list this check returns true if self.container_type == "@set": temp_list = ld_list(expanded_value, **self_attributes) return any(temp_list == ld_list([val], **self_attributes) for val in self.item_list) temp_list = ld_list([{self.container_type: expanded_value}], **self_attributes) return any(temp_list == ld_list([{self.container_type: [val]}], **self_attributes) for val in self.item_list) - def __eq__(self, other): + def __eq__( + self: Self, + other: Union[ + "ld_list", + list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]], + dict[str, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]], + ], + ) -> Union[bool, NotImplementedType]: + """ + Returns wheter or not self is considered to be equal to other. + If other is not an ld_list, it is converted first. + For each index it is checked if the ids of the items at index in self and other match if both have one, + if only one has an id all other values are compared. + If self or other is considered unordered the comparison is more difficult and ... + + :param self: The ld_list other is compared to. + :type self: Self + :param other: The list/ container/ ld_list self is compared to. + :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container] + | dict[str, list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container]] + + :return: Whether or not self and other are considered equal. + If other is of the wrong type return NotImplemented instead. + :rtype: bool | NotImplementedType + """ # TODO: ld_lists with container_type "@set" have to be considered unordered + # check if other has an acceptable type if not (isinstance(other, (list, ld_list)) or ld_list.is_container(other)): return NotImplemented + + # convert other into an ld_list if it isn't one already if isinstance(other, dict): other = [other] if isinstance(other, list): if ld_list.is_ld_list(other): other = ld_list.get_item_list_from_container(other[0]) other = self.from_list(other, parent=self.parent, key=self.key, context=self.context) + + # check if the length matches if len(self.item_list) != len(other.item_list): return False + + # check for special case (= key is @type) if (self.key == "@type") ^ (other.key == "@type"): return False if self.key == other.key == "@type": + # lists will only contain string return self.item_list == other.item_list + + # check if at each index the items are considered equal for index, (item, other_item) in enumerate(zip(self.item_list, other.item_list)): + # check if items are values if ((ld_container.is_typed_json_value(item) or ld_container.is_json_value(item)) and (ld_container.is_typed_json_value(other_item) or ld_container.is_json_value(other_item))): if not ld_container.are_values_equal(item, other_item): return False continue + # check if both contain an id and compare if "@id" in item and "@id" in other_item: - return item["@id"] == other_item["@id"] + if item["@id"] != other_item["@id"]: + return False + continue + # get the 'real' items (i.e. can also be ld_dicts or ld_lists) item = self[index] other_item = other[index] + # compare using the correct equals method res = item.__eq__(other_item) if res == NotImplemented: + # swap order if first try returned NotImplemented res = other_item.__eq__(item) - if res is False or res == NotImplemented: # res is not True + # return false if the second comparison also fails or one of them returned false + if res is False or res == NotImplemented: return False + + # return true because no unequal elements where found return True - def __ne__(self, other): + def __ne__( + self: Self, + other: Union[ + "ld_list", + list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]], + dict[str, list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]], + ], + ) -> Union[bool, NotImplementedType]: + """ + Returns whether or not self and other not considered to be equal. + (Returns not self.__eq__(other) if the return type is bool. + See ld_list.__eq__ for more details on the comparison.) + + :param self: The ld_list other is compared to. + :type self: Self + :param other: The list/ container/ ld_list self is compared to. + :type other: ld_list | list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container] + | dict[str, list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE | ld_container]] + + :return: Whether or not self and other are not considered equal. + If other is of the wrong type return NotImplemented instead. + :rtype: bool | NotImplementedType + """ + # compare self and other using __eq__ x = self.__eq__(other) + # return NotImplemented if __eq__ did so and else the inverted result of __eq__ if x is NotImplemented: return NotImplemented return not x From 071630ce03d52f5c5a6839fb3260837154c678a9 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 5 Dec 2025 14:04:09 +0100 Subject: [PATCH 22/26] added more doc strings --- src/hermes/model/types/ld_container.py | 45 +++++-- src/hermes/model/types/ld_list.py | 167 ++++++++++++++++++++++--- 2 files changed, 187 insertions(+), 25 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index feff2742..b1b55af4 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -13,17 +13,20 @@ JSON_LD_CONTEXT_DICT = dict[str, Union[str, "JSON_LD_CONTEXT_DICT"]] BASIC_TYPE = Union[str, float, int, bool] -EXPANDED_JSON_LD_VALUE = Union[ - list[Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]], dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]] -] +EXPANDED_JSON_LD_VALUE = list[dict[str, Union["EXPANDED_JSON_LD_VALUE", BASIC_TYPE]]] COMPACTED_JSON_LD_VALUE = Union[ - list[Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]] + list[Union[dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], BASIC_TYPE]], + dict[str, Union["COMPACTED_JSON_LD_VALUE", BASIC_TYPE]], ] TIME_TYPE = Union[datetime, date, time] JSON_LD_VALUE = Union[ list[Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_container"]], dict[str, Union["JSON_LD_VALUE", BASIC_TYPE, TIME_TYPE, "ld_container"]], ] +PYTHONIZED_LD_CONTAINER = Union[ + list[Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], + dict[str, Union["PYTHONIZED_LD_CONTAINER", BASIC_TYPE, TIME_TYPE]], +] class ld_container: @@ -33,13 +36,33 @@ class ld_container: A linked data container impelements a view on the expanded form of an JSON-LD document. It allows to easily interacts them by hinding all the nesting and automatically mapping between different forms. + + :ivar active_ctx: The active context that is used by the json-ld processor. + :ivar context: The context exclusive to this ld_container and all its childs + (it can still be the same as e.g. parent.context) + :ivartype context: list[str | JSON_LD_CONTEXT_DICT] + :ivar full_context: The context of this ld_container and all its parents merged into one list. + :ivartype full_context: list[str | JSON_LD_CONTEXT_DICT] + :ivar index: The index into the parent container if it is a list. + :ivartype index: int + :ivar key: The key into the inner most parent that is a dict of this ld_container. + :ivartype key: str + :ivar ld_value: The expanded JSON-LD value this object represents. + :ivartype ld_value: EXPANDED_JSON_LD_VALUE + :ivar parent: The ld_container this one is directly contained in. + :ivartype parent: ld_container + :ivar path: The path from the outer most parent to this ld_container. + :ivartype path: list[str | int] + + :cvar ld_proc: The JSON-LD processor object for all ld_container. + :cvartype ld_proc: JsonLdProcessor """ ld_proc = JsonLdProcessor() def __init__( self: Self, - data: list[EXPANDED_JSON_LD_VALUE], + data: EXPANDED_JSON_LD_VALUE, *, parent: Union["ld_container", None] = None, key: Union[str, None] = None, @@ -52,7 +75,7 @@ def __init__( :param self: The instance of ld_container to be initialized. :type self: Self :param data: The expanded json-ld data that is mapped. - :type data: list[EXPANDED_JSON_LD_VALUE] + :type data: EXPANDED_JSON_LD_VALUE :param parent: parent node of this container. :type parent: ld_container | None :param key: key into the parent container. @@ -101,7 +124,7 @@ def add_context(self: Self, context: list[Union[str | JSON_LD_CONTEXT_DICT]]) -> self.active_ctx = self.ld_proc.process_context(self.active_ctx, context, {"documentLoader": bundled_loader}) @property - def full_context(self: Self) -> Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None]: + def full_context(self: Self) -> list[Union[str, JSON_LD_CONTEXT_DICT]]: """ Return the context of the ld_container merged with the full_context of its parent. @@ -110,7 +133,7 @@ def full_context(self: Self) -> Union[list[Union[str, JSON_LD_CONTEXT_DICT]], No :return: The context of the ld_container merged with the full_context of its parent via ld_container.merge_to_list or just the context of this ld_container if self.parent is None. - :rtype: list[str | JSON_LD_CONTEXT_DICT] | None + :rtype: list[str | JSON_LD_CONTEXT_DICT] """ if self.parent is not None: return self.merge_to_list(self.parent.full_context, self.context) @@ -181,7 +204,9 @@ def _to_python( return value - def _to_expanded_json(self: Self, value: JSON_LD_VALUE) -> EXPANDED_JSON_LD_VALUE: + def _to_expanded_json( + self: Self, value: JSON_LD_VALUE + ) -> Union[EXPANDED_JSON_LD_VALUE, dict[str, EXPANDED_JSON_LD_VALUE]]: """ Returns an expanded version of the given value. @@ -206,7 +231,7 @@ def _to_expanded_json(self: Self, value: JSON_LD_VALUE) -> EXPANDED_JSON_LD_VALU The return type is based on the type of self:
  • If type(self) == ld_dict: the returned values type is dict
  • If type(self) == ld_list: the returned values type is list
- :rtype: EXPANDED_JSON_LD_VALUE + :rtype: EXPANDED_JSON_LD_VALUE | dict[str, EXPANDED_JSON_LD_VALUE] """ # search for an ld_dict that is either self or the inner most parents parent of self that is an ld_dict # while searching build a path such that it leads from the found ld_dicts ld_value to selfs data_dict/ item_list diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index e26a16ee..c10a6aa8 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -10,21 +10,29 @@ ld_container, JSON_LD_CONTEXT_DICT, EXPANDED_JSON_LD_VALUE, - COMPACTED_JSON_LD_VALUE, + PYTHONIZED_LD_CONTAINER, JSON_LD_VALUE, TIME_TYPE, BASIC_TYPE, ) -from typing import Generator, Union, Self +from typing import Generator, Union, Self, Any class ld_list(ld_container): - """ An JSON-LD container resembling a list ("@set", "@list" or "@graph"). """ + """ + An JSON-LD container resembling a list ("@set", "@list" or "@graph"). + See also :class:`ld_container` + + :ivar container_type: The type of JSON-LD container the list is representing. ("@set", "@list", "graph") + :ivartype container_type: str + :ivar item_list: The list of items (in expanded JSON-LD form) that are contained in this ld_list. + :ivartype item_list: EXPANDED_JSON_LD_VALUE + """ def __init__( self: Self, - data: Union[list[str], list[dict[str, Union[BASIC_TYPE, EXPANDED_JSON_LD_VALUE]]]], + data: Union[list[str], list[dict[str, EXPANDED_JSON_LD_VALUE]]], *, parent: Union["ld_container", None] = None, key: Union[str, None] = None, @@ -32,7 +40,7 @@ def __init__( context: Union[list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, ) -> None: """ - Create a new ld_list.py container. + Create a new ld_list container. :param self: The instance of ld_list to be initialized. :type self: Self @@ -49,6 +57,9 @@ def __init__( :return: :rtype: None + + :raises ValueError: bla + :raises ValueError: bla """ # check for validity of data if not isinstance(key, str): @@ -278,7 +289,7 @@ def __eq__( if not ld_container.are_values_equal(item, other_item): return False continue - # check if both contain an id and compare + # check if both contain an id and compare if "@id" in item and "@id" in other_item: if item["@id"] != other_item["@id"]: return False @@ -328,25 +339,78 @@ def __ne__( return NotImplemented return not x - def append(self, value): + def append(self: Self, value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]) -> None: + """ + Append the item to the given ld_list self. + The given value is expanded. If it is assimilated by self all items that would be added by this are added. + + :param self: The ld_list the item is appended to. + :type self: Self + :param value: The new value. + :type value: Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container] + + :return: + :rtype: None + """ self.item_list.extend(self._to_expanded_json([value])) - def extend(self, value): + def extend(self: Self, value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]]) -> None: + """ + Append the items in value to the given ld_list self. + The given values are expanded. If any are assimilated by self all items that would be added by this are added. + + :param self: The ld_list the items are appended to. + :type self: Self + :param value: The new values. + :type value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE, ld_container]] + + :return: + :rtype: None + """ for item in value: self.append(item) - def to_python(self): + def to_python(self: Self) -> list[PYTHONIZED_LD_CONTAINER]: + """ + Return a fully pythonized version of this object where all ld_container are replaced by lists and dicts. + + :param self: The ld_list whose fully pythonized version is returned. + :type self: Self + + :return: The fully pythonized version of self. + :rtype: list[PYTHONIZED_LD_CONTAINER] + """ return [ item.to_python() if isinstance(item, ld_container) else item for item in self ] @classmethod - def is_ld_list(cls, ld_value): + def is_ld_list(cls: Self, ld_value: Any) -> bool: + """ + Returns wheter the given value is considered to be possible of representing an ld_list.
+ I.e. if ld_value is of the form [{container_type: [...]}] where container_type is '@set', '@list' or '@graph'. + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent an ld_list. + :rtype: bool + """ return cls.is_ld_node(ld_value) and cls.is_container(ld_value[0]) @classmethod def is_container(cls, value): + """ + Returns wheter the given value is considered to be possible of representing an json-ld container.
+ I.e. if ld_value is of the form {container_type: [...]} where container_type is '@set', '@list' or '@graph'. + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent a json-ld container. + :rtype: bool + """ return ( isinstance(value, dict) and [*value.keys()] in [["@list"], ["@set"], ["@graph"]] @@ -354,27 +418,100 @@ def is_container(cls, value): ) @classmethod - def from_list(cls, value, *, parent=None, key=None, context=None, container_type="@set"): + def from_list( + cls: Self, + value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE]], + *, + parent: Union[ld_container, None] = None, + key: Union[str, None] = None, + context: Union[str, JSON_LD_CONTEXT_DICT, list[Union[str, JSON_LD_CONTEXT_DICT]], None] = None, + container_type: str = "@set" + ) -> "ld_list": + """ + Creates a ld_list from the given list with the given parent, key, context and container_type.
+ Note that only container_type '@set' is valid for key '@type'.
+ Further more note that if parent would assimilate the values in value no new ld_list is created + and the given values are appended to parent instead and parent is returned. + + :param value: The list of values the ld_list should be created from. + :type value: list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE] + :param parent: The parent container of the new ld_list.
If value is assimilated by parent druing JSON-LD + expansion parent is extended by value and parent is returned. + :type parent: ls_container | None + :param key: The key into the inner most parent container representing a dict of the new ld_list. + :type: key: str | None + :param context: The context for the new list (is will also inherit the context of parent).
+ Note that this context won't be added to parent if value is assimilated by parent and parent is returned. + :type context: str | JSON_LD_CONTEXT_DICT | list[str | JSON_LD_CONTEXT_DICT] | None + :param container_type: The container type of the new list valid are '@set', '@list' and '@graph'.
+ If value is assimilated by parent and parent is returned the given container_type won't affect + the container type of parent.
Also note that only '@set' is valid if key is '@type'. + :type container_type: str + + :return: The new ld_list build from value or if value is assimilated by parent, parent extended by value. + :rtype: ld_list + + :raises ValueError: If key is '@type' and container_type is not '@set'. + """ + # TODO: handle context if not of type list or None + # validate container_type if key == "@type": - container_type = "@set" - if container_type != "@set": + if container_type != "@set": + raise ValueError(f"The given container type is {container_type} which is invalid for a list" + " containing values for '@type' (valid is only '@set').") + if container_type in {"@list", "@graph"}: + # construct json-ld container that indicates the container type value = [{container_type: value}] + elif container_type != "@set": + raise ValueError(f"Invalid container type: {container_type}. (valid are only '@set', '@list' and '@graph')") + if parent is not None: + # expand value in the "context" of parent if isinstance(parent, ld_list): expanded_value = parent._to_expanded_json([value]) if (len(expanded_value) != 1 or not (isinstance(expanded_value[0], list) or cls.is_container(expanded_value[0]))): + # parent assimilated value druing expansion. Therefor the values are appended and parent returned + # if value is assimilated but contained only one list after expansion this list is used for + # the new list instead of expanding parent parent.extend(expanded_value) return parent else: expanded_value = parent._to_expanded_json({key: value})[cls.ld_proc.expand_iri(parent.active_ctx, key)] else: + # create a temporary ld_list which is necessary for expansion + # value is not passed in a list as usual because value should be treated like the item list of the + # temporary object and not like a item in it expanded_value = cls([], parent=None, key=key, context=context)._to_expanded_json(value) + + # construct and return the final ld_list from the expanded_value return cls(expanded_value, parent=parent, key=key, context=context) @classmethod - def get_item_list_from_container(cls, ld_value): + def get_item_list_from_container(cls: Self, ld_value: dict[str, list[Any]]) -> list[Any]: + """ + Returns the item list from a container, the given ld_value, (i.e. {container_type: item_list}).
+ Only '@set', '@list' and '@graph' are valid container types. + + :param ld_value: The container whose item list is to be returned. + :type ld_value: dict[str, list[Any]] + + :returns: The list the container holds. + :rtype: list[Any] + + :raises ValueError: If the item_container is not a dict. + :raises ValueError: If the container_type is not exactly one of '@set', '@list' and '@graph'. + :raises ValueError: If the item_list is no list. + """ + if type(ld_value) != dict: + raise ValueError(f"The given data {ld_value} is not a dictionary and therefor no container.") + if len(ld_value.keys()) != 1: + raise ValueError(f"The given data contains two many or few entries ({len(ld_value.keys())})." + " It should be only one entry: '@set', '@list' or '@graph' as key and a list as value.") + # find the container type to return the item_list for cont in {"@list", "@set", "@graph"}: if cont in ld_value: + if type(ld_value[cont]) != list: + raise ValueError(f"The item list of {ld_value} is of type {type(ld_value[cont])} and not list.") return ld_value[cont] - raise ValueError("The given data does not represent a container.") + raise ValueError(f"The given data {ld_value} does not represent a container.") From d4b34b41922fc02230ea1cd3c588a490b8b0aea3 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 8 Dec 2025 08:39:21 +0100 Subject: [PATCH 23/26] corrected type hints --- src/hermes/model/types/ld_list.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index c10a6aa8..bba5afcb 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -386,7 +386,7 @@ def to_python(self: Self) -> list[PYTHONIZED_LD_CONTAINER]: ] @classmethod - def is_ld_list(cls: Self, ld_value: Any) -> bool: + def is_ld_list(cls: type[Self], ld_value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing an ld_list.
I.e. if ld_value is of the form [{container_type: [...]}] where container_type is '@set', '@list' or '@graph'. @@ -400,7 +400,7 @@ def is_ld_list(cls: Self, ld_value: Any) -> bool: return cls.is_ld_node(ld_value) and cls.is_container(ld_value[0]) @classmethod - def is_container(cls, value): + def is_container(cls: type[Self], value: Any) -> bool: """ Returns wheter the given value is considered to be possible of representing an json-ld container.
I.e. if ld_value is of the form {container_type: [...]} where container_type is '@set', '@list' or '@graph'. @@ -419,7 +419,7 @@ def is_container(cls, value): @classmethod def from_list( - cls: Self, + cls: type[Self], value: list[Union[JSON_LD_VALUE, BASIC_TYPE, TIME_TYPE]], *, parent: Union[ld_container, None] = None, @@ -488,7 +488,7 @@ def from_list( return cls(expanded_value, parent=parent, key=key, context=context) @classmethod - def get_item_list_from_container(cls: Self, ld_value: dict[str, list[Any]]) -> list[Any]: + def get_item_list_from_container(cls: type[Self], ld_value: dict[str, list[Any]]) -> list[Any]: """ Returns the item list from a container, the given ld_value, (i.e. {container_type: item_list}).
Only '@set', '@list' and '@graph' are valid container types. From f0f18188037599fe4db69af58723929760110b45 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 8 Dec 2025 09:02:33 +0100 Subject: [PATCH 24/26] documented the rest of the methods in ld_container --- src/hermes/model/types/ld_container.py | 136 +++++++++++++++++++++++-- 1 file changed, 125 insertions(+), 11 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index b1b55af4..a0a0bfa5 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -355,14 +355,43 @@ def _to_expanded_json_deprecated(self, key, value): return ld_value def __repr__(self: Self) -> str: + """ + Returns a short string representation of this object. + + :param self: The object whose representation is returned. + :type self: Self + + :returns: The short representation of self. + :rtype: str + """ return f"{type(self).__name__}({self._data})" def __str__(self: Self) -> str: + """ + Returns a string representation of this object. + + :param self: The object whose representation is returned. + :type self: Self + + :returns: The representation of self. + :rtype: str + """ return str(self.to_python()) def compact( - self: Self, context: Union[list[JSON_LD_CONTEXT_DICT], JSON_LD_CONTEXT_DICT] = None + self: Self, context: Union[list[Union[JSON_LD_CONTEXT_DICT, str]], JSON_LD_CONTEXT_DICT, str, None] = None ) -> COMPACTED_JSON_LD_VALUE: + """ + Returns the compacted version of the given ld_container using its context only if none was supplied. + + :param self: The ld_container that is to be compacted. + :type self: Self + :param context: The context to use for the compaction. If None the context of self is used. + :type context: list[JSON_LD_CONTEXT_DICT | str] | JSON_LD_CONTEXT_DICT | str | None + + :returns: The compacted version of selfs JSON-LD representation. + :rtype: COMPACTED_JSON_LD_VALUE + """ return self.ld_proc.compact( self.ld_value, context or self.context, {"documentLoader": bundled_loader, "skipExpand": True} ) @@ -371,7 +400,7 @@ def to_python(self): raise NotImplementedError() @classmethod - def merge_to_list(cls: Self, *args: tuple[Any]) -> list[Any]: + def merge_to_list(cls: type[Self], *args: tuple[Any]) -> list[Any]: """ Returns a list that is contains all non-list items from args and all items in the lists in args. @@ -394,42 +423,127 @@ def merge_to_list(cls: Self, *args: tuple[Any]) -> list[Any]: return [head, *cls.merge_to_list(*tail)] @classmethod - def is_ld_node(cls, ld_value): + def is_ld_node(cls: type[Self], ld_value: Any) -> bool: + """ + Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node.
+ I.e. if ld_value is of the form [{a: b, ..., y: z}]. + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent an expanded JSON-LD node. + :rtype: bool + """ return isinstance(ld_value, list) and len(ld_value) == 1 and isinstance(ld_value[0], dict) @classmethod - def is_ld_id(cls, ld_value): + def is_ld_id(cls: type[Self], ld_value: Any) -> bool: + """ + Returns wheter the given value is considered to be possible of representing an expanded JSON-LD node + containing only an @id value.
+ I.e. if ld_value is of the form [{"@id": ...}]. + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent an expanded JSON-LD node containing only an @id value. + :rtype: bool + """ return cls.is_ld_node(ld_value) and cls.is_json_id(ld_value[0]) @classmethod - def is_ld_value(cls, ld_value): + def is_ld_value(cls: type[Self], ld_value: Any) -> bool: + """ + Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value.
+ I.e. if ld_value is of the form [{"@value": a, ..., x: z}]. + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent an expanded JSON-LD value. + :rtype: bool + """ return cls.is_ld_node(ld_value) and "@value" in ld_value[0] @classmethod - def is_typed_ld_value(cls, ld_value): + def is_typed_ld_value(cls: type[Self], ld_value: Any) -> bool: + """ + Returns wheter the given value is considered to be possible of representing an expanded JSON-LD value + containing a value type.
+ I.e. if ld_value is of the form [{"@value": a, "@type": b, ..., x: z}]. + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent an expanded JSON-LD value containing a value type. + :rtype: bool + """ return cls.is_ld_value(ld_value) and "@type" in ld_value[0] @classmethod - def is_json_id(cls, ld_value): + def is_json_id(cls: type[Self], ld_value: Any) -> bool: + """ + Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD node + containing only an @id value.
+ I.e. if ld_value is of the form {"@id": ...}. + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent a non-expanded JSON-LD node containing only an @id value. + :rtype: bool + """ return isinstance(ld_value, dict) and ["@id"] == [*ld_value.keys()] @classmethod - def is_json_value(cls, ld_value): + def is_json_value(cls: type[Self], ld_value: Any) -> bool: + """ + Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value.
+ I.e. if ld_value is of the form {"@value": b, ..., x: z}. + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent a non-expanded JSON-LD value. + :rtype: bool + """ return isinstance(ld_value, dict) and "@value" in ld_value @classmethod - def is_typed_json_value(cls, ld_value): + def is_typed_json_value(cls: type[Self], ld_value: Any) -> bool: + """ + Returns wheter the given value is considered to be possible of representing a non-expanded JSON-LD value + containing a value type.
+ I.e. if ld_value is of the form {"@value": a, "@type": b, ..., x: z}. + + :param ld_value: The value that is checked. + :type ld_value: Any + + :returns: Wheter or not ld_value could represent a non-expanded JSON-LD value containing a value type. + :rtype: bool + """ return cls.is_json_value(ld_value) and "@type" in ld_value @classmethod - def typed_ld_to_py(cls, data, **kwargs): + def typed_ld_to_py(cls: type[Self], data: list[dict[str, BASIC_TYPE]], **kwargs) -> Union[BASIC_TYPE, TIME_TYPE]: + """ + Returns the value of the given expanded JSON-LD value containing a value type converted into that type. + Meaning the pythonized version of the JSON-LD value data is returned.
+ ld_container.is_typed_ld_value(data) must return True. + + :param data: The value that is that is converted into its pythonized from. + :type data: list[dict[str, BASIC_TYPE]] + + :returns: The pythonized version of data. + :rtype: BASIC_TYPE | TIME_TYPE + """ ld_value = data[0]["@value"] return ld_value @classmethod def are_values_equal( - cls: Self, first: dict[str, Union[BASIC_TYPE, TIME_TYPE]], second: dict[str, Union[BASIC_TYPE, TIME_TYPE]] + cls: type[Self], first: dict[str, Union[BASIC_TYPE, TIME_TYPE]], second: dict[str, Union[BASIC_TYPE, TIME_TYPE]] ) -> bool: """ Returns whether or not the given expanded JSON-LD values are considered equal. From bcc233d0f76afef417cedb72f1cf94c79aba8aa7 Mon Sep 17 00:00:00 2001 From: notactuallyfinn Date: Fri, 12 Dec 2025 14:00:57 +0100 Subject: [PATCH 25/26] implemented list comparison and added tests for it --- src/hermes/model/types/ld_list.py | 247 ++++++++++++++++--- test/hermes_test/model/types/test_ld_list.py | 23 ++ 2 files changed, 240 insertions(+), 30 deletions(-) diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index bba5afcb..490ac4b3 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -5,6 +5,7 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche +from collections import deque from types import NotImplementedType from .ld_container import ( ld_container, @@ -16,7 +17,7 @@ BASIC_TYPE, ) -from typing import Generator, Union, Self, Any +from typing import Generator, Hashable, Union, Self, Any class ld_list(ld_container): @@ -241,11 +242,16 @@ def __eq__( ], ) -> Union[bool, NotImplementedType]: """ - Returns wheter or not self is considered to be equal to other. + Returns wheter or not self is considered to be equal to other.
If other is not an ld_list, it is converted first. For each index it is checked if the ids of the items at index in self and other match if both have one, - if only one has an id all other values are compared. - If self or other is considered unordered the comparison is more difficult and ... + if only one has or neither have an id all other values are compared.
+ Note that due to those circumstances equality is not transitve + meaning if a == b and b == c is is not guaranteed that a == c.
+ If self or other is considered unordered the comparison is more difficult. All items in self are compared + with all items in other. On the resulting graph given by the realtion == the Hopcroft-Karp algoritm is used + to determine if there exists a bijection reordering self so that the ordered comparison of self with other + returns true. :param self: The ld_list other is compared to. :type self: Self @@ -257,7 +263,6 @@ def __eq__( If other is of the wrong type return NotImplemented instead. :rtype: bool | NotImplementedType """ - # TODO: ld_lists with container_type "@set" have to be considered unordered # check if other has an acceptable type if not (isinstance(other, (list, ld_list)) or ld_list.is_container(other)): return NotImplemented @@ -281,33 +286,215 @@ def __eq__( # lists will only contain string return self.item_list == other.item_list - # check if at each index the items are considered equal - for index, (item, other_item) in enumerate(zip(self.item_list, other.item_list)): - # check if items are values - if ((ld_container.is_typed_json_value(item) or ld_container.is_json_value(item)) and - (ld_container.is_typed_json_value(other_item) or ld_container.is_json_value(other_item))): - if not ld_container.are_values_equal(item, other_item): + if self.container_type == other.container_type == "@list": + # check if at each index the items are considered equal + for index, (item, other_item) in enumerate(zip(self.item_list, other.item_list)): + # check if items are values + if ((ld_container.is_typed_json_value(item) or ld_container.is_json_value(item)) and + (ld_container.is_typed_json_value(other_item) or ld_container.is_json_value(other_item))): + if not ld_container.are_values_equal(item, other_item): + return False + continue + # check if both contain an id and compare + if "@id" in item and "@id" in other_item: + if item["@id"] != other_item["@id"]: + return False + continue + # get the 'real' items (i.e. can also be ld_dicts or ld_lists) + item = self[index] + other_item = other[index] + # compare using the correct equals method + res = item.__eq__(other_item) + if res == NotImplemented: + # swap order if first try returned NotImplemented + res = other_item.__eq__(item) + # return false if the second comparison also fails or one of them returned false + if res is False or res == NotImplemented: return False - continue - # check if both contain an id and compare - if "@id" in item and "@id" in other_item: - if item["@id"] != other_item["@id"]: + # return true because no unequal elements where found + return True + else: + # check which items in self are equal the which in other + equality_pairs = [[] for i in range(len(self))] # j in equality_pairs[i] <=> self[i] == other[j] + for index, item in enumerate(self.item_list): + for other_index, other_item in enumerate(other.item_list): + # check if items are values + if ((ld_container.is_typed_json_value(item) or ld_container.is_json_value(item)) and + (ld_container.is_typed_json_value(other_item) or ld_container.is_json_value(other_item))): + if ld_container.are_values_equal(item, other_item): + equality_pairs[index] += [other_index] + continue + # check if both contain an id and compare + if "@id" in item and "@id" in other_item: + if item["@id"] == other_item["@id"]: + equality_pairs[index] += [other_index] + continue + # get the 'real' items (i.e. can also be ld_dicts or ld_lists) + item = self[index] + other_item = other[index] + # compare using the correct equals method + res = item.__eq__(other_item) + if res == NotImplemented: + # swap order if first try returned NotImplemented + res = other_item.__eq__(item) + # if one of both comparisons returned true the elements are equal + if res: + equality_pairs[index] += [other_index] + if len(equality_pairs[index]) == 0: + # there exists no element in other that is equal to item return False - continue - # get the 'real' items (i.e. can also be ld_dicts or ld_lists) - item = self[index] - other_item = other[index] - # compare using the correct equals method - res = item.__eq__(other_item) - if res == NotImplemented: - # swap order if first try returned NotImplemented - res = other_item.__eq__(item) - # return false if the second comparison also fails or one of them returned false - if res is False or res == NotImplemented: - return False - - # return true because no unequal elements where found - return True + # check if there is a way to chose one index from equality_pairs[i] for every i + # so that there are no two i's with the same chosen index. + # If such a way exists self and other are considered equal. If not they are considered to be not equal. + # solved via a Hopcroft-Karp algorithm variant: + # The bipartite graph is the disjoint union of the vertices 1 to len(self) and + # freely chosen ids for each list in equality_pairs. + # The graph has an edge from i to the id of a list if i is contained in the list. + item_count = len(self) + verticies_set1 = {*range(item_count)} + verticies_set2 = {*range(item_count, 2 * item_count)} + edges = {i: tuple(j for j in verticies_set2 if i in equality_pairs[j - item_count]) for i in verticies_set1} + return ld_list._hopcroft_karp(verticies_set1, verticies_set2, edges) == len(self) + + @classmethod + def _bfs_step( + cls: Self, verticies1: set[Hashable], edges: dict[Hashable, tuple[Hashable]], matches: dict[Hashable, Hashable], + distances: dict[Hashable, Union[int, float]] + ) -> bool: + """ + Completes the BFS step of Hopcroft-Karp. I.e.:
+ Finds the shortest path from all unmatched verticies in verticies1 to any unmatched vertex in any value in edges + where the connecting paths are alternating between matches and its complement.
+ It also marks each vertex in verticies1 with how few verticies from verticies1 have to be passed + to reach the vertex from an unmatched one in verticies1. This is stored in distances. + + :param verticies1: The set of verticies in the left partition of the bipartite graph. + :type verticies1: set[Hashable] + :param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in + this format: Dictionary with keys being the vertices in the left partition and values being tuples + of verticies in the right partition.) + :type edges: dict[Hashable, tuple[Hashable]] + :param matches: The current matching of verticies in the left partition with the ones in the right partition. + :type matches: dict[Hashable, Hashable] + :param distances: The reference to the dictionary mapping verticies of the left partition to the minimal + number of verticies in the left partition that will be passed on a path from an unmatched vertex of the left + partition to the vertex that is the key. + :type distances: dict[Hashable, Union[int, float]] + + :returns: Wheter or not a alternating path from an unmatched vertex in the left partition to an unmatched vertex + in the right partition exists. + :rtype: bool + """ + # initialize the queue and set the distances to zero for unmatched vertices and to inf for all others + queue = deque() + for ver in verticies1: + if matches[ver] is None: + distances[ver] = 0 + queue.append(ver) + else: + distances[ver] = float("inf") + distances[None] = float("inf") + # begin BFS + while len(queue) != 0: + ver1 = queue.popleft() + # if the current vertex has a distance less then the current minimal one from an unmatched vertex in the + # left partition to an unmatched one in the right partition + if distances[ver1] < distances[None]: + # iterate over all vertices in the right partition connected to ver1 + for ver2 in edges[ver1]: + # if the vertex ver2 is matched with (or None if not matched) wasn't visited yet + if distances[matches[ver2]] == float("inf"): + # initialize the distance and queue the vertex for further search + distances[matches[ver2]] = distances[ver1] + 1 + queue.append(matches[ver2]) + # if a path to None i.e. an unmatched vertex in the right partition was found return true otherwise false + return distances[None] != float("inf") + + @classmethod + def _dfs_step( + cls: Self, ver: Hashable, edges: dict[Hashable, tuple[Hashable]], matches: dict[Hashable, Hashable], + distances: dict[Hashable, Union[int, float]] + ) -> bool: + """ + Completes the DFS step of Hopcroft-Karp. I.e.:
+ Adds all edges on every path with the minimal path length to matches if they would be in the symmetric + difference of matches and the set of edges on the union of the paths. + + :param ver: The set of verticies in the left partition of the bipartite graph. + :type vert: Hashable + :param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in + this format: Dictionary with keys being the vertices in the left partition and values being tuples + of verticies in the right partition.) + :type edges: dict[Hashable, tuple[Hashable]] + :param matches: The current matching of verticies in the left partition with the ones in the right partition. + :type matches: dict[Hashable, Hashable] + :param distances: The reference to the dictionary mapping verticies of the left partition to the minimal + number of verticies in the left partition that will be passed on a path from an unmatched vertex of the left + partition to the vertex that is the key. The values will be replaced with float("inf") to mark already + visited vertices. + :type distances: dict[Hashable, Union[int, float]] + + :returns: Wheter or not a path from the unmatched vertex ver in the left partition to an unmatched vertex + in the right partition could still exist. + :rtype: bool + """ + # recursion base case: None always has a shortest possible path to itself + if ver is None: + return True + # iterate over all vertices connected to ver in the right partition + for ver2 in edges[ver]: + # if ver2 is on a path with minimal length and not all subtrees have been searched already + if distances[matches[ver2]] == distances[ver] + 1: + if cls._dfs_step(matches[ver], edges, matches, distances): + # add the edge to the matches and return true + matches[ver2] = ver + matches[ver] = ver2 + return True + # mark this vertex as completly searched + distances[ver] = float("inf") + return False + + @classmethod + def _hopcroft_karp( + cls: Self, verticies1: set[Hashable], verticies2: set[Hashable], edges: dict[Hashable, tuple[Hashable]] + ) -> int: + """ + Implementation of Hopcroft-Karp. I.e.:
+ Finds how maximal number of edges with the property that no two edges share an endpoint (and startpoint) + in the given bipartite graph.
+ Note that verticies1 and verticies2 have to be disjoint. + + :param verticies1: The set of verticies in the left partition of the bipartite graph. + :type verticies1: set[Hashable] + :param verticies2: The set of verticies in the right partition of the bipartite graph. + :type verticies2: set[Hashable] + :param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in + this format: Dictionary with keys being the vertices in the left partition and values being tuples + of verticies in the right partition.) + :type edges: dict[Hashable, tuple[Hashable]] + + :returns: The number of edges. + :rtype: int + """ + # initializes the first matching. None is a imaginary vertex to denote unmatched vertices. + matches = dict() + for ver in verticies1: + matches[ver] = None + for ver in verticies2: + matches[ver] = None + matching_size = 0 + distances = dict() + while cls._bfs_step(verticies1, edges, matches, distances): + # while a alternating path from an unmatched vertex in the left partition exits + # recalculate the distances and + # iterate over all unmatched vertices in the left partition. + for ver in verticies1: + if matches[ver] is None: + # create the new matches dict and if a new edge was added increase the size of the matching + if cls._dfs_step(ver, edges, matches, distances): + matching_size += 1 + # return the size of the matching + return matching_size def __ne__( self: Self, diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index aaeb548f..e1785b7a 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -197,6 +197,29 @@ def test_build_in_comparison(): assert li != li2 li[0] = {"@type": "schema:foobar", "@value": "bar"} assert li != li2 + li = ld_list([], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + li2 = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema2": "https://schema.org/"}]) + li.extend(["foo", "bar"]) + li2.extend(["bar", "foo"]) + assert li == li2 + li.append("bar") + li2.append("foo") + assert li != li2 + + +def test_hopcroft_karp(): + ver1 = {0, 1, 2, 3, 4} + ver2 = {10, 11, 12, 13, 14} + edges = {0: (10, 11), 1: (10, 14), 2: (12, 13), 3: (10, 14), 4: tuple([11])} + assert ld_list._hopcroft_karp(ver1, ver2, edges) == 4 + edges[4] = (11, 13) + assert ld_list._hopcroft_karp(ver1, ver2, edges) == 5 + ver1 = {0, 1, 2, 3, 4} + ver2 = {(0, 1, 3), (0, 4), (2, ), (2, 4), (1, 3)} + edges = { + 0: ((0, 1, 3), (0, 4)), 1: ((0, 1, 3), (1, 3)), 2: ((2,), (2, 4)), 3: ((0, 1, 3), (1, 3)), 4: ((0, 4), (2, 4)) + } + assert ld_list._hopcroft_karp(ver1, ver2, edges) == 5 def test_extend(): From 287d37a3af7aa1c495bacd2b4c600c13a9ad2795 Mon Sep 17 00:00:00 2001 From: Michael Fritzsche Date: Mon, 15 Dec 2025 10:55:06 +0100 Subject: [PATCH 26/26] added tests and fixed small bug and fixed typos --- src/hermes/model/types/ld_container.py | 17 ++----- src/hermes/model/types/ld_list.py | 24 +++++++--- .../model/types/test_ld_container.py | 36 ++++++++++++++- test/hermes_test/model/types/test_ld_list.py | 45 ++++++++++++++++--- 4 files changed, 96 insertions(+), 26 deletions(-) diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index a0a0bfa5..8ddb0876 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -224,7 +224,7 @@ def _to_expanded_json( :param value: The value that is to be expanded. Different types are expected based on the type of self:
  • If type(self) == ld_dict: value must be a dict
  • If type(self) == ld_list: value must be a list
- value will be exapnded as if it was the data_dict/ the item_list of self. + value will be expanded as if it was the data_dict/ the item_list of self. :type value: JSON_LD_VALUE :return: The expanded version of value i.e. the data_dict/ item_list of self if it had been value. @@ -293,14 +293,8 @@ def _to_expanded_json( for index in range(len(path) - 1, 0, -1): current_data = current_data[path[index]] # replace the data_dict/ item_list so that value is now inside of the ld_value of parent and store the old value - if current_data == []: - # itemlist of an empty ld_list: - # The item_list can't be replaced like in all other cases - self_data = None - current_data.append(value) - else: - self_data = current_data[path[0]] - current_data[path[0]] = value + self_data = current_data[path[0]] + current_data[path[0]] = value # expand the ld_value of parent to implicitly expand value # important the ld_value of parent is not modified because the processor makes a deep copy @@ -310,10 +304,7 @@ def _to_expanded_json( ) # restore the data_dict/ item_list to its former state - if self_data is not None: - current_data[path[0]] = self_data - else: - current_data.clear() + current_data[path[0]] = self_data # use the path to get the expansion of value for index in range(len(path) - 1, -1, -1): diff --git a/src/hermes/model/types/ld_list.py b/src/hermes/model/types/ld_list.py index 490ac4b3..07cf3248 100644 --- a/src/hermes/model/types/ld_list.py +++ b/src/hermes/model/types/ld_list.py @@ -59,8 +59,12 @@ def __init__( :return: :rtype: None - :raises ValueError: bla - :raises ValueError: bla + :raises ValueError: If the given key is not a string or None was given. + :raises ValueError: If the given data is not a list. + :raises ValueError: If the data represents an unexpanded @set. I.e. is of the form [{"@set": [...]}] + :raises ValueError: If the given key is "@type" but the container_type not "@set" + or a value in the item_list not a string. + :raises ValueError: If the given key is not "@type" and any value in the item_list not a dict. """ # check for validity of data if not isinstance(key, str): @@ -272,8 +276,16 @@ def __eq__( other = [other] if isinstance(other, list): if ld_list.is_ld_list(other): - other = ld_list.get_item_list_from_container(other[0]) - other = self.from_list(other, parent=self.parent, key=self.key, context=self.context) + if "@list" in other[0]: + cont = "@list" + elif "@graph" in other[0]: + cont = "@graph" + else: + cont = "@set" + other = other[0][cont] + else: + cont = "@set" + other = self.from_list(other, parent=self.parent, key=self.key, context=self.context, container_type=cont) # check if the length matches if len(self.item_list) != len(other.item_list): @@ -338,7 +350,7 @@ def __eq__( # swap order if first try returned NotImplemented res = other_item.__eq__(item) # if one of both comparisons returned true the elements are equal - if res: + if res is not NotImplemented and res: equality_pairs[index] += [other_index] if len(equality_pairs[index]) == 0: # there exists no element in other that is equal to item @@ -624,7 +636,7 @@ def from_list( :type value: list[JSON_LD_VALUE | BASIC_TYPE | TIME_TYPE] :param parent: The parent container of the new ld_list.
If value is assimilated by parent druing JSON-LD expansion parent is extended by value and parent is returned. - :type parent: ls_container | None + :type parent: ld_container | None :param key: The key into the inner most parent container representing a dict of the new ld_list. :type: key: str | None :param context: The context for the new list (is will also inherit the context of parent).
diff --git a/test/hermes_test/model/types/test_ld_container.py b/test/hermes_test/model/types/test_ld_container.py index ddc98405..f73fdcd9 100644 --- a/test/hermes_test/model/types/test_ld_container.py +++ b/test/hermes_test/model/types/test_ld_container.py @@ -6,7 +6,7 @@ # SPDX-FileContributor: Michael Meinel # SPDX-FileContributor: Michael Fritzsche -from datetime import datetime +from datetime import datetime, time import pytest @@ -121,6 +121,11 @@ def test_to_python_datetime_value(self, mock_context): "@value": "2022-02-22T00:00:00", "@type": "https://schema.org/DateTime" }) == "2022-02-22T00:00:00" # TODO: #434 typed date is returned as string instead of date + def test_to_python_error(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + with pytest.raises(TypeError): + cont._to_python("http://spam.eggs/eggs", set()) + def test_to_expanded_id(self, mock_context): cont = ld_dict([{}], context=[mock_context]) assert cont._to_expanded_json({"@id": f"{self.url}identifier"}) == {"@id": f"{self.url}identifier"} @@ -156,6 +161,35 @@ def test_to_expanded_datetime_value(self, mock_context): assert cont._to_expanded_json({"eggs": datetime(2022, 2, 22)}) == {"http://spam.eggs/eggs": [{"@list": [ {"@value": "2022-02-22T00:00:00", "@type": "https://schema.org/DateTime"} ]}]} + cont = ld_dict([{}], context=[mock_context]) + assert cont._to_expanded_json({"eggs": time(5, 4, 3)}) == {"http://spam.eggs/eggs": [{"@list": [ + {"@value": "05:04:03", "@type": "https://schema.org/Time"} + ]}]} + + def test_compact(self, mock_context): + cont = ld_container([{"http://spam.eggs/eggs": [{"@list": [{"@value": "a"}]}], + "http://spam.eggs/spam": [{"@value": "bacon"}]}]) + assert cont.compact([mock_context]) == {"@context": mock_context, "spam": "bacon", "eggs": ["a"]} + + def test_is_ld_id(self): + assert ld_container.is_ld_id([{"@id": "foo"}]) + assert not ld_container.is_ld_id([{"@id": "foo", "bar": "barfoo"}]) + assert not ld_container.is_ld_id({"@id": "foo"}) + assert not ld_container.is_ld_id([{"bar": "foo"}]) + + def test_is_ld_value(self): + assert ld_container.is_ld_value([{"@value": "foo"}]) + assert ld_container.is_ld_value([{"@value": "foo", "bar": "barfoo"}]) + assert not ld_container.is_ld_value({"@value": "foo"}) + assert not ld_container.is_ld_value([{"bar": "foo"}]) + + def test_is_typed_ld_value(self): + assert ld_container.is_typed_ld_value([{"@value": "foo", "@type": "bar"}]) + assert ld_container.is_typed_ld_value([{"@value": "foo", "@type": "bar", "bar": "barfoo"}]) + assert not ld_container.is_typed_ld_value([{"@type": "bar"}]) + assert not ld_container.is_typed_ld_value([{"@value": "foo"}]) + assert not ld_container.is_typed_ld_value({"@value": "foo", "@type": "bar"}) + assert not ld_container.is_typed_ld_value([{"bar": "foo"}]) def test_are_values_equal(self): assert ld_container.are_values_equal({"@id": "foo"}, {"@id": "foo"}) diff --git a/test/hermes_test/model/types/test_ld_list.py b/test/hermes_test/model/types/test_ld_list.py index e1785b7a..fc9ca6a5 100644 --- a/test/hermes_test/model/types/test_ld_list.py +++ b/test/hermes_test/model/types/test_ld_list.py @@ -14,17 +14,15 @@ def test_undefined_list(): with pytest.raises(ValueError): - ld_list([{}]) + ld_list({}, key="foo") with pytest.raises(ValueError): - ld_list([{"spam": [{"@value": "bacon"}]}]) + ld_list([{"@set": [{"@value": "bacon"}]}], key="foo") with pytest.raises(ValueError): - ld_list([{"@list": [0], "spam": [{"@value": "bacon"}]}]) + ld_list([{"@value": "bacon"}], key="@type") with pytest.raises(ValueError): - ld_list([{"@list": ["a", "b"], "@set": ["foo", "bar"]}]) + ld_list(["bacon"], key="eggs") with pytest.raises(ValueError): ld_list([{"@list": ["a", "b"]}]) # no given key - with pytest.raises(ValueError): - ld_list([{"@list": ["a", "b"]}, {"@set": ["foo", "bar"]}]) def test_list_basics(): @@ -32,6 +30,15 @@ def test_list_basics(): li = ld_list(li_data, key="foo") assert li._data is li_data assert li.item_list is li_data[0]["@list"] + li_data = [{"@graph": [{"@value": "bar"}]}] + li = ld_list(li_data, key="foo") + assert li._data is li_data + assert li.item_list is li_data[0]["@graph"] + li_data = [{"@value": "bar"}] + li = ld_list(li_data, key="foo") + assert li._data is li_data + assert li.item_list is li_data + assert li.container_type == "@set" def test_build_in_get(): @@ -135,6 +142,12 @@ def test_build_in_iter(): def test_append(): + li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + li.append(ld_list([{"@value": "foo"}], key="https://schema.org/name")) + assert isinstance(li[0], ld_list) and li[0].container_type == "@list" + li = ld_list([{"@graph": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + li.append({"schema:name": "foo"}) + assert li[0] == {"https://schema.org/name": "foo"} and len(li) == 1 li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) li.append("foo") assert li[0] == "foo" and li.item_list[0] == {"@value": "foo"} and len(li) == 1 @@ -153,6 +166,7 @@ def test_append(): def test_build_in_contains(): li = ld_list([], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + assert [] in li li.append("foo") li.append({"@type": "A", "schema:name": "a"}) assert "foo" in li and {"@type": "A", "schema:name": "a"} in li @@ -162,9 +176,18 @@ def test_build_in_contains(): li.append({"@id": "schema:foo", "schema:name": "foo"}) assert {"@id": "schema:foo"} in li and {"@id": "schema:foo", "schema:name": "foobar"} in li assert {"schema:name": "foo"} in li + li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + li.append("foo") + assert "foo" in li def test_build_in_comparison(): + li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) + li.append({"@id": "foo", "schema:bar": "foobar"}) + assert [{"@list": [{"@id": "foo", "schema:bar": "barfoo"}]}] == li + assert [{"@list": [{"@id": "bar", "schema:bar": "foobar"}]}] != li + assert [{"@set": [{"@id": "foo", "schema:bar": "barfoo"}]}] == li + assert [{"@graph": [{"@id": "foo", "schema:bar": "barfoo"}]}] == li li = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}]) li2 = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema2": "https://schema.org/"}]) assert li == [] and [] == li @@ -269,6 +292,10 @@ def test_is_container(): def test_from_list(): + with pytest.raises(ValueError): + ld_list.from_list([], key="@type", container_type="@list") + with pytest.raises(ValueError): + ld_list.from_list([], container_type="foo") li = ld_list.from_list([], key="schema:foo") assert li.item_list == li.context == [] and li.parent is li.index is None and li.key == "schema:foo" assert li._data == [] and li.container_type == "@set" @@ -290,3 +317,9 @@ def test_get_item_list_from_container(): assert ld_list.get_item_list_from_container({"@graph": ["a"]}) == ["a"] with pytest.raises(ValueError): ld_list.get_item_list_from_container(["a"]) + with pytest.raises(ValueError): + ld_list.get_item_list_from_container({"@list": [], "@set": []}) + with pytest.raises(ValueError): + ld_list.get_item_list_from_container({"@list": {}}) + with pytest.raises(ValueError): + ld_list.get_item_list_from_container({"foo": []})