diff --git a/.github/workflows/integration-tests-mssql.yml b/.github/workflows/integration-tests-mssql.yml
index ba2ea6b..bd7bb87 100644
--- a/.github/workflows/integration-tests-mssql.yml
+++ b/.github/workflows/integration-tests-mssql.yml
@@ -9,7 +9,7 @@ on:
jobs:
integration-tests:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-22.04
services:
mssql:
image: mcr.microsoft.com/mssql/server:2019-latest
diff --git a/.github/workflows/integration-tests-mysql.yml b/.github/workflows/integration-tests-mysql.yml
index f03ef24..2adef2d 100644
--- a/.github/workflows/integration-tests-mysql.yml
+++ b/.github/workflows/integration-tests-mysql.yml
@@ -9,7 +9,7 @@ on:
jobs:
integration-tests:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-22.04
services:
mysql:
image: mysql:8.0
diff --git a/.github/workflows/integration-tests-postgres.yml b/.github/workflows/integration-tests-postgres.yml
index de1f12f..b74b2ca 100644
--- a/.github/workflows/integration-tests-postgres.yml
+++ b/.github/workflows/integration-tests-postgres.yml
@@ -9,7 +9,7 @@ on:
jobs:
integration-tests:
- runs-on: ubuntu-latest
+ runs-on: ubuntu-22.04
container: python:3.12-bookworm
services:
postgres:
diff --git a/docs/how_it_works.md b/docs/how_it_works.md
index 8704e82..c21dfc7 100644
--- a/docs/how_it_works.md
+++ b/docs/how_it_works.md
@@ -123,8 +123,10 @@ original one.
### Recursive XSD
-Recursive XML schemas are not supported, because most of the time they will result in cycles in foreign key constraints
-dependencies, which we cannot handle easily.
+Recursive XML schemas are not fully supported, because they result in cycles in tables dependencies, which would make
+the process much more complex. Whenever a field which would introduce a dependency cycle is detected in the XSD, it is
+discarded with a warning, which means that the corresponding data in XML files will not be imported. The rest of the
+data should be processed correctly.
### Mixed content elements
diff --git a/src/xml2db/document.py b/src/xml2db/document.py
index 1cfc160..d0a4f97 100644
--- a/src/xml2db/document.py
+++ b/src/xml2db/document.py
@@ -171,17 +171,24 @@ def _extract_node(
record["xml2db_row_number"] = row_number
# build record from fields for columns and n-1 relations
- for field_type, key, _ in model_table.fields:
+ for field_type, key, field in model_table.fields:
if field_type == "col":
- if key in content:
+ content_key = (
+ (f"{key[:-5]}__attr" if field.has_suffix else f"{key}__attr")
+ if field.is_attr
+ else key
+ )
+ if content_key in content:
if model_table.columns[key].data_type in ["decimal", "float"]:
- val = [float(v) for v in content[key]]
+ val = [float(v) for v in content[content_key]]
elif model_table.columns[key].data_type == "integer":
- val = [int(v) for v in content[key]]
+ val = [int(v) for v in content[content_key]]
elif model_table.columns[key].data_type == "boolean":
- val = [v == "true" or v == "1" for v in content[key]]
+ val = [
+ v == "true" or v == "1" for v in content[content_key]
+ ]
else:
- val = content[key]
+ val = content[content_key]
if len(val) == 1:
record[key] = val[0]
@@ -320,20 +327,29 @@ def _build_node(node_type: str, node_pk: int) -> tuple:
record = data_index[node_type]["records"][node_pk]
for field_type, rel_name, rel in tb.fields:
if field_type == "col" and record[rel_name] is not None:
+ content_key = (
+ (
+ f"{rel_name[:-5]}__attr"
+ if rel.has_suffix
+ else f"{rel_name}__attr"
+ )
+ if rel.is_attr
+ else rel_name
+ )
if rel.data_type in [
"decimal",
"float",
]: # remove trailing ".0" for decimal and float
- content[rel_name] = [
+ content[content_key] = [
value.rstrip("0").rstrip(".") if "." in value else value
for value in str(record[rel_name]).split(",")
]
elif isinstance(record[rel_name], datetime.datetime):
- content[rel_name] = [
+ content[content_key] = [
record[rel_name].isoformat(timespec="milliseconds")
]
else:
- content[rel_name] = (
+ content[content_key] = (
list(csv.reader([str(record[rel_name])], escapechar="\\"))[
0
]
diff --git a/src/xml2db/model.py b/src/xml2db/model.py
index 31836f0..3038189 100644
--- a/src/xml2db/model.py
+++ b/src/xml2db/model.py
@@ -70,7 +70,7 @@ class DataModel:
def __init__(
self,
xsd_file: str,
- short_name: str = None,
+ short_name: str = "DocumentRoot",
long_name: str = None,
base_url: str = None,
model_config: dict = None,
@@ -226,8 +226,7 @@ def _build_model(self):
"""
# parse the XML schema recursively and hold a reference to the head table
root_table = self._parse_tree(
- self.xml_schema[0] if len(self.xml_schema) == 1 else self.xml_schema,
- is_root_table=True,
+ self.xml_schema[0] if len(self.xml_schema) == 1 else self.xml_schema
)
self.root_table = root_table.type_name
# compute a text representation of the original data model and store it
@@ -273,9 +272,7 @@ def _build_model(self):
for tb in self.fk_ordered_tables:
tb.build_sqlalchemy_tables()
- def _parse_tree(
- self, parent_node: xmlschema.XsdElement, is_root_table: bool = False
- ):
+ def _parse_tree(self, parent_node: xmlschema.XsdElement, nodes_path: list = None):
"""Parse a node of an XML schema recursively and create a target data model without any simplification
We parse the XSD tree recursively to create for each node (basically a complex type in the XSD) an equivalent \
@@ -289,7 +286,7 @@ def _parse_tree(
Args:
parent_node: the current XSD node being parsed
- is_root_table: True if this is the root table
+ nodes_path: a list of nodes types from the root node
"""
# find current node type and name and returns corresponding table if it already exists
@@ -301,12 +298,16 @@ def _parse_tree(
if parent_type is None:
parent_type = parent_node.local_name
+ nodes_path = (nodes_path if nodes_path else []) + [parent_type]
+
# if this type has already been encountered, stop here and return existing table
if parent_type in self.tables:
parent_table = self.tables[parent_type]
return parent_table
- # elements names and types should be bijective. If an element name is used for different types,
+ # For database tables we use element names rather than XSD types, under the assumption that they are often
+ # more meaningful given that they are the one which appear in XML documents. However, same names can be used
+ # for different XSD types, so if an element name is used for different types,
# we add a suffix to the name to make it unique again (using a dict to keep the name/type association)
parent_name = (
parent_node.local_name
@@ -324,7 +325,7 @@ def _parse_tree(
parent_table = self._create_table_model(
parent_name,
parent_type,
- is_root_table,
+ len(nodes_path) == 1,
isinstance(parent_node, xmlschema.XMLSchema),
)
self.tables[parent_type] = parent_table
@@ -363,6 +364,13 @@ def recurse_parse_simple_type(elem_type):
if elem_type.base_type
else recurse_parse_simple_type(elem_type.member_types)
)
+ if elem_type.is_list():
+ return (
+ "string",
+ 0,
+ None,
+ elem_type.allow_empty,
+ )
if elem_type.is_restriction():
dt = elem_type.base_type.local_name
mil = elem_type.min_length
@@ -384,7 +392,12 @@ def recurse_parse_simple_type(elem_type):
else None
)
ae = ae and bt_ae if ae is not None and bt_ae is not None else None
- if elem_type.enumeration is not None and dt in ["string", "NMTOKEN", "duration", "token"]:
+ if elem_type.enumeration is not None and dt in [
+ "string",
+ "NMTOKEN",
+ "duration",
+ "token",
+ ]:
mil = min([len(val) for val in elem_type.enumeration])
mal = max([len(val) for val in elem_type.enumeration])
return dt, mil, mal, ae
@@ -410,25 +423,31 @@ def get_occurs(particle):
),
]
- # go through item attributes and add them as columns
+ # go through item attributes and add them as columns, adding a suffix if an element with the same name exists
+ children_names = None
for attrib_name, attrib in parent_node.attributes.items():
+ if children_names is None:
+ children_names = [child.local_name for child in parent_node]
(
data_type,
min_length,
max_length,
allow_empty,
) = recurse_parse_simple_type([attrib.type])
+ suffix = attrib_name in children_names
parent_table.add_column(
- f"{attrib_name}",
+ f"{attrib_name}{'_attr' if suffix else ''}",
data_type,
[0, 1],
min_length,
max_length,
True,
+ suffix,
False,
allow_empty,
None,
)
+
nested_containers = []
# go through the children to add either arguments either relations to the current element
for child in parent_node:
@@ -454,6 +473,7 @@ def get_occurs(particle):
if child.parent
and child.parent.max_occurs != 1
and child.parent.model != "choice"
+ and child.max_occurs == 1
else None
),
)
@@ -482,32 +502,39 @@ def get_occurs(particle):
max_length,
False,
False,
+ False,
allow_empty,
nested_containers[-1][1],
)
elif ct.is_complex():
- child_table = self._parse_tree(child)
- child_table.model_group = (
- "choice"
- if ct.model_group and ct.model_group.model == "choice"
- else "sequence"
- )
- occurs = get_occurs(child)
- if child.is_single():
- parent_table.add_relation_1(
- child.local_name,
- child_table,
- occurs,
- nested_containers[-1][1],
+ # ignoring recursive definitions by skipping these fields
+ if child.type.local_name in nodes_path:
+ logger.warning(
+ f"type '{child.type.local_name}' contains a recursive definition"
)
else:
- parent_table.add_relation_n(
- child.local_name,
- child_table,
- occurs,
- nested_containers[-1][1],
+ child_table = self._parse_tree(child, nodes_path)
+ child_table.model_group = (
+ "choice"
+ if ct.model_group and ct.model_group.model == "choice"
+ else "sequence"
)
+ occurs = get_occurs(child)
+ if occurs[1] == 1:
+ parent_table.add_relation_1(
+ child.local_name,
+ child_table,
+ occurs,
+ nested_containers[-1][1],
+ )
+ else:
+ parent_table.add_relation_n(
+ child.local_name,
+ child_table,
+ occurs,
+ nested_containers[-1][1],
+ )
else:
raise ValueError("unknown case; please check")
else:
@@ -534,6 +561,7 @@ def get_occurs(particle):
min_length,
max_length,
False,
+ False,
True,
allow_empty,
None,
@@ -544,31 +572,19 @@ def get_occurs(particle):
def _repr_tree(
self,
parent_table: Union[DataModelTableReused, DataModelTableDuplicated],
- visited_nodes: Union[set, None] = None,
):
"""Build a text representation of the data model tree
Args:
parent_table: the current data model table object
"""
- if visited_nodes is None:
- visited_nodes = set()
- else:
- visited_nodes = {item for item in visited_nodes}
- visited_nodes.add(parent_table.name)
for field_type, name, field in parent_table.fields:
if field_type == "col":
yield f"{field.name}{field.occurs}: {field.data_type}"
- elif field_type == "rel1":
- mg = " (choice)" if field.other_table.model_group == "choice" else ""
- yield f"{field.name}{field.occurs}{mg}:{' ...' if field_type in visited_nodes else ''}"
- if field.other_table.name not in visited_nodes:
- for line in self._repr_tree(field.other_table, visited_nodes):
- yield f" {line}"
- elif field_type == "reln":
+ else:
mg = " (choice)" if field.other_table.model_group == "choice" else ""
- yield f"{field.name}{field.occurs}{mg}:{' ...' if field_type in visited_nodes else ''}"
- for line in self._repr_tree(field.other_table, visited_nodes):
+ yield f"{field.name}{field.occurs}{mg}:"
+ for line in self._repr_tree(field.other_table):
yield f" {line}"
def get_entity_rel_diagram(self, text_context: bool = True) -> str:
diff --git a/src/xml2db/table/column.py b/src/xml2db/table/column.py
index bf450cb..17daba6 100644
--- a/src/xml2db/table/column.py
+++ b/src/xml2db/table/column.py
@@ -36,11 +36,18 @@ def types_mapping_default(temp: bool, col: "DataModelColumn") -> Any:
return Double
if col.data_type == "dateTime":
return DateTime(timezone=True)
- if col.data_type == "integer" or col.data_type == "int":
+ if col.data_type in [
+ "integer",
+ "int",
+ "nonPositiveInteger",
+ "nonNegativeInteger",
+ "positiveInteger",
+ "negativeInteger",
+ ]:
return Integer
if col.data_type == "boolean":
return Boolean
- if col.data_type == "byte":
+ if col.data_type in ["short", "byte"]:
return SmallInteger
if col.data_type == "long":
return BigInteger
@@ -77,20 +84,10 @@ def types_mapping_mssql(temp: bool, col: "DataModelColumn") -> Any:
"""
if col.occurs[1] != 1:
return mssql.VARCHAR(8000)
- if col.data_type in ["decimal", "float", "double"]:
- return Double
if col.data_type == "dateTime":
# using the DATETIMEOFFSET directly in the temporary table caused issues when inserting data in the target
# table with INSERT INTO SELECT converts datetime VARCHAR to DATETIMEOFFSET without errors
return mssql.VARCHAR(100) if temp else mssql.DATETIMEOFFSET
- if col.data_type == "integer" or col.data_type == "int":
- return Integer
- if col.data_type == "boolean":
- return Boolean
- if col.data_type == "byte":
- return SmallInteger
- if col.data_type == "long":
- return BigInteger
if col.data_type == "date":
return mssql.VARCHAR(16)
if col.data_type == "time":
@@ -106,12 +103,7 @@ def types_mapping_mssql(temp: bool, col: "DataModelColumn") -> Any:
if col.max_length == col.min_length:
return mssql.BINARY(col.max_length)
return mssql.VARBINARY(col.max_length)
- else:
- logger.warning(
- f"unknown type '{col.data_type}' for column '{col.name}', defaulting to VARCHAR(1000) "
- f"(this can be overridden by providing a field type in the configuration)"
- )
- return mssql.VARCHAR(1000)
+ return types_mapping_default(temp, col)
def types_mapping_mysql(temp: bool, col: "DataModelColumn") -> Any:
@@ -167,6 +159,7 @@ def __init__(
min_length: int,
max_length: Union[int, None],
is_attr: bool,
+ has_suffix: bool,
is_content: bool,
allow_empty: bool,
ngroup: Union[int, None],
@@ -181,6 +174,7 @@ def __init__(
self.min_length = min_length
self.max_length = max_length
self.is_attr = is_attr
+ self.has_suffix = has_suffix
self.is_content = is_content
self.allow_empty = allow_empty
self.ngroup = ngroup
diff --git a/src/xml2db/table/reused_table.py b/src/xml2db/table/reused_table.py
index 5fb954a..ce3d62f 100644
--- a/src/xml2db/table/reused_table.py
+++ b/src/xml2db/table/reused_table.py
@@ -71,6 +71,7 @@ def get_col(temp=False):
False,
False,
False,
+ False,
None,
self.config,
self.data_model,
diff --git a/src/xml2db/table/table.py b/src/xml2db/table/table.py
index cb9d65b..65d831d 100644
--- a/src/xml2db/table/table.py
+++ b/src/xml2db/table/table.py
@@ -130,6 +130,7 @@ def add_column(
min_length: int,
max_length: Union[int, None],
is_attr: bool,
+ has_suffix: bool,
is_content: bool,
allow_empty: bool,
ngroup: Union[str, None],
@@ -143,6 +144,7 @@ def add_column(
min_length: minimum length
max_length: maximum length
is_attr: is XML attribute or element?
+ has_suffix: for an attribute, do we need the '_attr' suffix?
is_content: is content of a mixed type element?
allow_empty: is nullable?
ngroup: a string id signaling that the column belongs to a nested sequence
@@ -155,6 +157,7 @@ def add_column(
min_length,
max_length,
is_attr,
+ has_suffix,
is_content,
allow_empty,
ngroup,
diff --git a/src/xml2db/table/transformed_table.py b/src/xml2db/table/transformed_table.py
index 71e7575..031d858 100644
--- a/src/xml2db/table/transformed_table.py
+++ b/src/xml2db/table/transformed_table.py
@@ -76,6 +76,7 @@ def _transform_to_choice(self) -> None:
False,
False,
False,
+ False,
None,
self.config,
self.data_model,
@@ -89,6 +90,7 @@ def _transform_to_choice(self) -> None:
max(max_lengths) if all(e is not None for e in max_lengths) else None,
False,
False,
+ False,
any(allow_empty),
None,
self.config,
@@ -193,6 +195,7 @@ def _elevate_relation_1(
child_field.min_length,
child_field.max_length,
child_field.is_attr,
+ child_field.has_suffix,
child_field.is_content,
child_field.allow_empty,
child_field.ngroup,
@@ -276,9 +279,12 @@ def simplify_table(self) -> Tuple[dict, dict]:
# if the table can be transformed, stop here
if self._is_table_choice_transform_applicable():
+ fields_transform = {}
+ for col in self.columns.values():
+ fields_transform[(self.type_name, col.name)] = (None, "join")
self._transform_to_choice()
self.is_simplified = True
- return {self.type_name: "choice"}, {}
+ return {self.type_name: "choice"}, fields_transform
# loop through field to transform them if need be
out_fields = []
diff --git a/src/xml2db/xml_converter.py b/src/xml2db/xml_converter.py
index 71a5574..ab54a7c 100644
--- a/src/xml2db/xml_converter.py
+++ b/src/xml2db/xml_converter.py
@@ -128,31 +128,36 @@ def _parse_xml_node(
key
!= "{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation"
):
- content[key] = [val]
+ content[f"{key}__attr"] = [val.strip() if val.strip() else val]
if node.text and node.text.strip():
content["value"] = [node.text.strip()]
for element in node.iterchildren():
- key = element.tag.split("}")[1] if "}" in element.tag else element.tag
- node_type_key = (node_type, key)
- value = None
- if element.text and element.text.strip():
- value = element.text
- transform = self.model.fields_transforms.get(node_type_key, (None, "join"))[
- 1
- ]
- if transform != "join":
- value = self._parse_xml_node(
- self.model.fields_transforms[node_type_key][0],
- element,
- transform not in ["elevate", "elevate_wo_prefix"],
- hash_maps,
- )
- if key in content:
- content[key].append(value)
- else:
- content[key] = [value]
+ if isinstance(element.tag, str):
+ key = element.tag.split("}")[1] if "}" in element.tag else element.tag
+ node_type_key = (node_type, key)
+ value = None
+ if element.text:
+ value = (
+ element.text.strip() if element.text.strip() else element.text
+ )
+ if node_type_key not in self.model.fields_transforms:
+ # skip the node if it is not in the data model
+ continue
+ transform = self.model.fields_transforms[node_type_key][1]
+ if transform != "join":
+ value = self._parse_xml_node(
+ self.model.fields_transforms[node_type_key][0],
+ element,
+ transform not in ["elevate", "elevate_wo_prefix"],
+ hash_maps,
+ )
+ if value is not None:
+ if key in content:
+ content[key].append(value)
+ else:
+ content[key] = [value]
node = self._transform_node(node_type, content)
@@ -189,6 +194,7 @@ def _parse_iterative(
hash_maps = {}
joined_values = False
+ skipped_nodes = 0
for event, element in etree.iterparse(
xml_file,
recover=recover,
@@ -196,12 +202,17 @@ def _parse_iterative(
remove_blank_text=True,
):
key = element.tag.split("}")[1] if "}" in element.tag else element.tag
- if event == "start":
+
+ if event == "start" and skipped_nodes > 0:
+ skipped_nodes += 1
+
+ elif event == "start":
if nodes_stack[-1][0]:
node_type_key = (nodes_stack[-1][0], key)
- node_type, transform = self.model.fields_transforms.get(
- node_type_key, (None, "join")
- )
+ if node_type_key not in self.model.fields_transforms:
+ skipped_nodes += 1
+ continue
+ node_type, transform = self.model.fields_transforms[node_type_key]
else:
node_type, transform = self.model.root_table, None
joined_values = transform == "join"
@@ -212,28 +223,41 @@ def _parse_iterative(
attrib_key
!= "{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation"
):
- content[attrib_key] = [attrib_val]
+ content[f"{attrib_key}__attr"] = [
+ attrib_val.strip() if attrib_val.strip() else attrib_val
+ ]
nodes_stack.append((node_type, content))
+ elif event == "end" and skipped_nodes > 0:
+ skipped_nodes -= 1
+
elif event == "end":
- # joined_values was set with the previous "start" event just before
+ # joined_values was set with the previous "start" event just before and corresponds to lists of simple
+ # type elements
if joined_values:
+ value = None
if element.text:
- if key in nodes_stack[-1][1]:
- nodes_stack[-1][1][key].append(element.text)
+ if element.text.strip():
+ value = element.text.strip()
else:
- nodes_stack[-1][1][key] = [element.text]
+ value = element.text
+ if key in nodes_stack[-1][1]:
+ nodes_stack[-1][1][key].append(value)
+ else:
+ nodes_stack[-1][1][key] = [value]
+
+ # else, we have completed a complex type node
else:
node = nodes_stack.pop()
if nodes_stack[-1][0]:
node_type_key = (nodes_stack[-1][0], key)
- node_type, transform = self.model.fields_transforms.get(
- node_type_key, (None, "join")
- )
+ node_type, transform = self.model.fields_transforms[
+ node_type_key
+ ]
else:
node_type, transform = self.model.root_table, None
- if element.text:
- node[1]["value"] = [element.text]
+ if element.text and element.text.strip():
+ node[1]["value"] = [element.text.strip()]
node = self._transform_node(*node)
if transform not in ["elevate", "elevate_wo_prefix"]:
node = self._compute_hash_deduplicate(node, hash_maps)
@@ -292,12 +316,28 @@ def _compute_hash_deduplicate(self, node: tuple, hash_maps: dict) -> tuple:
A tuple of (node_type, content, hash) representing a node after deduplication
"""
node_type, content = node
+ if node_type not in self.model.tables:
+ return "", None, b""
table = self.model.tables[node_type]
h = self.model.model_config["record_hash_constructor"]()
- for field_type, name, _ in table.fields:
+ for field_type, name, field in table.fields:
if field_type == "col":
- h.update(str(content.get(name, None)).encode("utf-8"))
+ if field.is_attr:
+ h.update(
+ str(
+ content.get(
+ (
+ f"{name[:-5]}__attr"
+ if field.has_suffix
+ else f"{name}__attr"
+ ),
+ None,
+ )
+ ).encode("utf-8")
+ )
+ else:
+ h.update(str(content.get(name, None)).encode("utf-8"))
elif field_type == "rel1":
h.update(content[name][0][2] if name in content else b"")
elif field_type == "reln":
@@ -419,10 +459,17 @@ def check_transformed_node(node_type, element):
attributes = {}
text_content = None
if field_type == "col":
- if rel_name in content:
- if rel.is_attr:
- attributes[rel.name_chain[-1][0]] = content[rel_name][0]
- elif rel.is_content:
+ if rel.is_attr:
+ if rel.has_suffix and f"{rel_name[:-5]}__attr" in content:
+ attributes[rel.name_chain[-1][0][:-5]] = content[
+ f"{rel_name[:-5]}__attr"
+ ][0]
+ elif not rel.has_suffix and f"{rel_name}__attr" in content:
+ attributes[rel.name_chain[-1][0]] = content[
+ f"{rel_name}__attr"
+ ][0]
+ elif rel_name in content:
+ if rel.is_content:
text_content = content[rel_name][0]
else:
for field_value in content[rel_name]:
@@ -446,7 +493,8 @@ def check_transformed_node(node_type, element):
if prev_ngroup and rel.ngroup != prev_ngroup:
for ngroup_children in zip_longest(*ngroup_stack):
for child in ngroup_children:
- nodes_stack[-1][1].append(child)
+ if child is not None:
+ nodes_stack[-1][1].append(child)
ngroup_stack = []
prev_ngroup = rel.ngroup
if len(children) > 0:
diff --git a/tests/fixtures.py b/tests/conftest.py
similarity index 61%
rename from tests/fixtures.py
rename to tests/conftest.py
index 55dbee9..343fecb 100644
--- a/tests/fixtures.py
+++ b/tests/conftest.py
@@ -4,6 +4,15 @@
from xml2db import DataModel
+models_path = "tests/sample_models"
+
+
+def list_xml_path(test_config, key):
+ path = os.path.join(models_path, test_config["id"], key)
+ if os.path.isdir(path):
+ return [os.path.join(path, f) for f in os.listdir(path)]
+ return []
+
@pytest.fixture
def conn_string():
@@ -14,7 +23,9 @@ def conn_string():
def setup_db_model(conn_string, model_config):
db_schema = f"test_xml2db"
model = DataModel(
- xsd_file=model_config.get("xsd_path"),
+ xsd_file=str(
+ os.path.join(models_path, model_config["id"], model_config["xsd"])
+ ),
short_name=model_config.get("id"),
connection_string=conn_string,
db_schema=db_schema,
diff --git a/tests/sample_models/models.py b/tests/sample_models/models.py
index 666fe58..bda579c 100644
--- a/tests/sample_models/models.py
+++ b/tests/sample_models/models.py
@@ -18,8 +18,7 @@ def wrapped():
"id": "orders",
"long_name": "A simple model for shipment orders",
"description": "This model was made up to be a simple case which could represent real business cases.",
- "xsd_path": "tests/sample_models/orders/orders.xsd",
- "xml_path": "tests/sample_models/orders/xml",
+ "xsd": "orders.xsd",
"versions": [
{
"config": {
@@ -80,8 +79,7 @@ def wrapped():
"long_name": "Data model for reporting standard contracts in the European energy markets",
"description": "This model is one of the official models published by the Agency for the cooperation of energy"
"regulators to report energy markets transaction data.",
- "xsd_path": "tests/sample_models/table1/Table1_V2.xsd",
- "xml_path": "tests/sample_models/table1/xml",
+ "xsd": "Table1_V2.xsd",
"versions": [
{
"config": {
@@ -137,8 +135,7 @@ def wrapped():
"Xunit XSD was taken from https://github.com/jenkinsci/xunit-plugin/blob/master/src/main/resources/org/jenkinsci/plugins/xunit/types/model/xsd/junit-10.xsd"
" and amended to remove its recursive nature."
),
- "xsd_path": "tests/sample_models/junit10/junit-10.xsd",
- "xml_path": "tests/sample_models/junit10/xml",
+ "xsd": "junit-10.xsd",
"versions": [
{
"config": {
@@ -185,7 +182,7 @@ def _generate_models_output():
for model_config in models:
for i in range(len(model_config["versions"])):
- xsd_path = os.path.join("../../", model_config["xsd_path"])
+ xsd_path = os.path.join(model_config["id"], model_config["xsd"])
for dialect in [d.dialect() for d in [postgresql, mssql, mysql]]:
model = DataModel(
xsd_path,
diff --git a/tests/sample_models/orders/base_types.xsd b/tests/sample_models/orders/base_types.xsd
index 20d4740..cadefa7 100644
--- a/tests/sample_models/orders/base_types.xsd
+++ b/tests/sample_models/orders/base_types.xsd
@@ -1,5 +1,6 @@
@@ -32,4 +33,10 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/sample_models/orders/equivalent_xml/order1.xml b/tests/sample_models/orders/equivalent_xml/order1.xml
new file mode 100644
index 0000000..4ed059c
--- /dev/null
+++ b/tests/sample_models/orders/equivalent_xml/order1.xml
@@ -0,0 +1,46 @@
+
+
+ 2
+
+
+ Bob
+ string
+ string
+ 102832
+ FR
+ +1732897354
+ +1732323984
+
+ JIDAZIO786DAZH
+
+
+ -
+
+ product 1
+ regular
+
+ 13
+ 340.23
+ EUR
+
+ -
+
+ product 2
+ regular
+
+ string
+ 8
+ 56.2
+ EUR
+
+ -
+
+ product 1
+ premium
+
+ 25
+ 21.7
+ EUR
+
+
+
diff --git a/tests/sample_models/orders/equivalent_xml/order1a.xml b/tests/sample_models/orders/equivalent_xml/order1a.xml
new file mode 100644
index 0000000..481996e
--- /dev/null
+++ b/tests/sample_models/orders/equivalent_xml/order1a.xml
@@ -0,0 +1,62 @@
+
+
+ 2
+
+
+ Bob
+ string
+ string
+ 102832
+ FR
+ +1732897354
+ +1732323984
+
+ JIDAZIO786DAZH
+
+
+ -
+
+
+ product 1
+
+ regular
+
+
+ product 1
+
+ premium
+
+
+
+ product 1
+
+ basic
+
+
+ 13
+
+ 340.23
+
+ EUR
+
+ -
+
+ product 2
+ regular
+
+ string
+ 8
+ 56.2
+ EUR
+
+ -
+
+ product 1
+ premium
+
+ 25
+ 21.7
+ EUR
+
+
+
diff --git a/tests/sample_models/orders/invalid_xml/invalid.xml b/tests/sample_models/orders/invalid_xml/invalid.xml
index 850c092..e77a8d6 100644
--- a/tests/sample_models/orders/invalid_xml/invalid.xml
+++ b/tests/sample_models/orders/invalid_xml/invalid.xml
@@ -1,14 +1,14 @@
-
+
Alice
string
string
21093
US
-
+
Bob
string
string
diff --git a/tests/sample_models/orders/invalid_xml/malformed_recover.xml b/tests/sample_models/orders/invalid_xml/malformed_recover.xml
index 8fd23af..302205c 100644
--- a/tests/sample_models/orders/invalid_xml/malformed_recover.xml
+++ b/tests/sample_models/orders/invalid_xml/malformed_recover.xml
@@ -2,7 +2,7 @@
3
-
+
Bob
string
string
diff --git a/tests/sample_models/orders/orders.xsd b/tests/sample_models/orders/orders.xsd
index 20b5d65..c54fcd3 100644
--- a/tests/sample_models/orders/orders.xsd
+++ b/tests/sample_models/orders/orders.xsd
@@ -34,6 +34,29 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -41,6 +64,8 @@
+
+
diff --git a/tests/sample_models/orders/orders_ddl_mssql_version0.sql b/tests/sample_models/orders/orders_ddl_mssql_version0.sql
index 859ac89..3371f25 100644
--- a/tests/sample_models/orders/orders_ddl_mssql_version0.sql
+++ b/tests/sample_models/orders/orders_ddl_mssql_version0.sql
@@ -1,6 +1,7 @@
CREATE TABLE orderperson (
pk_orderperson INTEGER NOT NULL IDENTITY,
+ name_attr VARCHAR(1000) NULL,
name VARCHAR(1000) NULL,
address VARCHAR(1000) NULL,
city VARCHAR(1000) NULL,
@@ -10,12 +11,33 @@ CREATE TABLE orderperson (
[phoneNumber] VARCHAR(8000) NULL,
[companyId_type] CHAR(3) NULL,
[companyId_value] VARCHAR(1000) NULL,
+ coordinates VARCHAR(1000) NULL,
record_hash BINARY(20) NULL,
CONSTRAINT cx_pk_orderperson PRIMARY KEY CLUSTERED (pk_orderperson),
CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash)
)
+CREATE TABLE intfeature (
+ pk_intfeature INTEGER NOT NULL IDENTITY,
+ id VARCHAR(1000) NULL,
+ value INTEGER NULL,
+ record_hash BINARY(20) NULL,
+ CONSTRAINT cx_pk_intfeature PRIMARY KEY CLUSTERED (pk_intfeature),
+ CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
+CREATE TABLE stringfeature (
+ pk_stringfeature INTEGER NOT NULL IDENTITY,
+ id VARCHAR(1000) NULL,
+ value VARCHAR(1000) NULL,
+ record_hash BINARY(20) NULL,
+ CONSTRAINT cx_pk_stringfeature PRIMARY KEY CLUSTERED (pk_stringfeature),
+ CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
CREATE TABLE item (
pk_item INTEGER NOT NULL IDENTITY,
product_name VARCHAR(1000) NULL,
@@ -30,6 +52,22 @@ CREATE TABLE item (
)
+CREATE TABLE item_product_features_intfeature (
+ fk_item INTEGER NOT NULL,
+ fk_intfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature)
+)
+
+
+CREATE TABLE item_product_features_stringfeature (
+ fk_item INTEGER NOT NULL,
+ fk_stringfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature)
+)
+
+
CREATE TABLE shiporder (
pk_shiporder INTEGER NOT NULL IDENTITY,
orderid VARCHAR(1000) NULL,
@@ -70,6 +108,14 @@ CREATE TABLE orders_shiporder (
FOREIGN KEY(fk_shiporder) REFERENCES shiporder (pk_shiporder)
)
+CREATE CLUSTERED INDEX ix_fk_item_product_features_intfeature ON item_product_features_intfeature (fk_item, fk_intfeature)
+
+CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature)
+
+CREATE CLUSTERED INDEX ix_fk_item_product_features_stringfeature ON item_product_features_stringfeature (fk_item, fk_stringfeature)
+
+CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature)
+
CREATE CLUSTERED INDEX ix_fk_shiporder_item ON shiporder_item (fk_shiporder, fk_item)
CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item)
diff --git a/tests/sample_models/orders/orders_ddl_mssql_version1.sql b/tests/sample_models/orders/orders_ddl_mssql_version1.sql
index a6b1946..21f3854 100644
--- a/tests/sample_models/orders/orders_ddl_mssql_version1.sql
+++ b/tests/sample_models/orders/orders_ddl_mssql_version1.sql
@@ -1,6 +1,7 @@
CREATE TABLE orderperson (
pk_orderperson INTEGER NOT NULL IDENTITY,
+ name_attr VARCHAR(1000) NULL,
name VARCHAR(1000) NULL,
address VARCHAR(1000) NULL,
city VARCHAR(1000) NULL,
@@ -11,12 +12,33 @@ CREATE TABLE orderperson (
[companyId_ace] VARCHAR(1000) NULL,
[companyId_bic] VARCHAR(1000) NULL,
[companyId_lei] VARCHAR(1000) NULL,
+ coordinates VARCHAR(1000) NULL,
record_hash BINARY(16) NULL,
CONSTRAINT cx_pk_orderperson PRIMARY KEY CLUSTERED (pk_orderperson),
CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash)
)
+CREATE TABLE intfeature (
+ pk_intfeature INTEGER NOT NULL IDENTITY,
+ id VARCHAR(1000) NULL,
+ value INTEGER NULL,
+ record_hash BINARY(16) NULL,
+ CONSTRAINT cx_pk_intfeature PRIMARY KEY CLUSTERED (pk_intfeature),
+ CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
+CREATE TABLE stringfeature (
+ pk_stringfeature INTEGER NOT NULL IDENTITY,
+ id VARCHAR(1000) NULL,
+ value VARCHAR(1000) NULL,
+ record_hash BINARY(16) NULL,
+ CONSTRAINT cx_pk_stringfeature PRIMARY KEY CLUSTERED (pk_stringfeature),
+ CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
CREATE TABLE shiporder (
pk_shiporder INTEGER NOT NULL IDENTITY,
orderid VARCHAR(1000) NULL,
@@ -54,6 +76,7 @@ CREATE TABLE orders_shiporder (
CREATE TABLE item (
pk_item INTEGER NOT NULL IDENTITY,
+ temp_pk_item INTEGER NULL,
fk_parent_shiporder INTEGER NULL,
xml2db_row_number INTEGER NOT NULL,
product_name VARCHAR(1000) NULL,
@@ -66,7 +89,33 @@ CREATE TABLE item (
FOREIGN KEY(fk_parent_shiporder) REFERENCES shiporder (pk_shiporder)
)
+
+CREATE TABLE item_product_features_intfeature (
+ fk_item INTEGER NOT NULL,
+ fk_intfeature INTEGER NOT NULL,
+ xml2db_row_number INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature)
+)
+
+
+CREATE TABLE item_product_features_stringfeature (
+ fk_item INTEGER NOT NULL,
+ fk_stringfeature INTEGER NOT NULL,
+ xml2db_row_number INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature)
+)
+
CREATE CLUSTERED INDEX ix_fk_orders_shiporder ON orders_shiporder (fk_orders, fk_shiporder)
CREATE INDEX ix_orders_shiporder_fk_shiporder ON orders_shiporder (fk_shiporder)
+CREATE CLUSTERED INDEX ix_fk_item_product_features_intfeature ON item_product_features_intfeature (fk_item, fk_intfeature)
+
+CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature)
+
+CREATE CLUSTERED INDEX ix_fk_item_product_features_stringfeature ON item_product_features_stringfeature (fk_item, fk_stringfeature)
+
+CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature)
+
diff --git a/tests/sample_models/orders/orders_ddl_mssql_version2.sql b/tests/sample_models/orders/orders_ddl_mssql_version2.sql
index 8d6fa31..a533b08 100644
--- a/tests/sample_models/orders/orders_ddl_mssql_version2.sql
+++ b/tests/sample_models/orders/orders_ddl_mssql_version2.sql
@@ -12,6 +12,7 @@ CREATE TABLE orders (
CREATE TABLE orderperson (
pk_orderperson INTEGER NOT NULL IDENTITY,
+ name_attr VARCHAR(1000) NULL,
name VARCHAR(1000) NULL,
address VARCHAR(1000) NULL,
city VARCHAR(1000) NULL,
@@ -21,12 +22,33 @@ CREATE TABLE orderperson (
[phoneNumber] VARCHAR(8000) NULL,
[companyId_type] CHAR(3) NULL,
[companyId_value] VARCHAR(1000) NULL,
+ coordinates VARCHAR(1000) NULL,
xml2db_record_hash BINARY(20) NULL,
CONSTRAINT cx_pk_orderperson PRIMARY KEY CLUSTERED (pk_orderperson),
CONSTRAINT orderperson_xml2db_record_hash UNIQUE (xml2db_record_hash)
)
+CREATE TABLE intfeature (
+ pk_intfeature INTEGER NOT NULL IDENTITY,
+ id VARCHAR(1000) NULL,
+ value INTEGER NULL,
+ xml2db_record_hash BINARY(20) NULL,
+ CONSTRAINT cx_pk_intfeature PRIMARY KEY CLUSTERED (pk_intfeature),
+ CONSTRAINT intfeature_xml2db_record_hash UNIQUE (xml2db_record_hash)
+)
+
+
+CREATE TABLE stringfeature (
+ pk_stringfeature INTEGER NOT NULL IDENTITY,
+ id VARCHAR(1000) NULL,
+ value VARCHAR(1000) NULL,
+ xml2db_record_hash BINARY(20) NULL,
+ CONSTRAINT cx_pk_stringfeature PRIMARY KEY CLUSTERED (pk_stringfeature),
+ CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (xml2db_record_hash)
+)
+
+
CREATE TABLE product (
pk_product INTEGER NOT NULL IDENTITY,
name VARCHAR(1000) NULL,
@@ -37,6 +59,22 @@ CREATE TABLE product (
)
+CREATE TABLE product_features_intfeature (
+ fk_product INTEGER NOT NULL,
+ fk_intfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_product) REFERENCES product (pk_product),
+ FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature)
+)
+
+
+CREATE TABLE product_features_stringfeature (
+ fk_product INTEGER NOT NULL,
+ fk_stringfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_product) REFERENCES product (pk_product),
+ FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature)
+)
+
+
CREATE TABLE item (
pk_item INTEGER NOT NULL IDENTITY,
fk_product INTEGER NULL,
@@ -57,6 +95,7 @@ CREATE TABLE shiporder (
fk_parent_orders INTEGER NULL,
orderid VARCHAR(1000) NULL,
processed_at DATETIMEOFFSET NULL,
+ orderperson_name_attr VARCHAR(1000) NULL,
orderperson_name VARCHAR(1000) NULL,
orderperson_address VARCHAR(1000) NULL,
orderperson_city VARCHAR(1000) NULL,
@@ -66,6 +105,7 @@ CREATE TABLE shiporder (
[orderperson_phoneNumber] VARCHAR(8000) NULL,
[orderperson_companyId_type] CHAR(3) NULL,
[orderperson_companyId_value] VARCHAR(1000) NULL,
+ orderperson_coordinates VARCHAR(1000) NULL,
shipto_fk_orderperson INTEGER NULL,
CONSTRAINT cx_pk_shiporder PRIMARY KEY CLUSTERED (pk_shiporder),
FOREIGN KEY(fk_parent_orders) REFERENCES orders (pk_orders),
@@ -80,6 +120,14 @@ CREATE TABLE shiporder_item (
FOREIGN KEY(fk_item) REFERENCES item (pk_item)
)
+CREATE CLUSTERED INDEX ix_fk_product_features_intfeature ON product_features_intfeature (fk_product, fk_intfeature)
+
+CREATE INDEX ix_product_features_intfeature_fk_intfeature ON product_features_intfeature (fk_intfeature)
+
+CREATE CLUSTERED INDEX ix_fk_product_features_stringfeature ON product_features_stringfeature (fk_product, fk_stringfeature)
+
+CREATE INDEX ix_product_features_stringfeature_fk_stringfeature ON product_features_stringfeature (fk_stringfeature)
+
CREATE CLUSTERED INDEX ix_fk_shiporder_item ON shiporder_item (fk_shiporder, fk_item)
CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item)
diff --git a/tests/sample_models/orders/orders_ddl_mysql_version0.sql b/tests/sample_models/orders/orders_ddl_mysql_version0.sql
index 17ea3a3..70929f2 100644
--- a/tests/sample_models/orders/orders_ddl_mysql_version0.sql
+++ b/tests/sample_models/orders/orders_ddl_mysql_version0.sql
@@ -1,6 +1,7 @@
CREATE TABLE orderperson (
pk_orderperson INTEGER NOT NULL AUTO_INCREMENT,
+ name_attr VARCHAR(255),
name VARCHAR(255),
address VARCHAR(255),
city VARCHAR(255),
@@ -10,12 +11,33 @@ CREATE TABLE orderperson (
`phoneNumber` VARCHAR(4000),
`companyId_type` VARCHAR(3),
`companyId_value` VARCHAR(255),
+ coordinates VARCHAR(255),
record_hash BINARY(20),
CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson),
CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash)
)
+CREATE TABLE intfeature (
+ pk_intfeature INTEGER NOT NULL AUTO_INCREMENT,
+ id VARCHAR(255),
+ value INTEGER,
+ record_hash BINARY(20),
+ CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature),
+ CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
+CREATE TABLE stringfeature (
+ pk_stringfeature INTEGER NOT NULL AUTO_INCREMENT,
+ id VARCHAR(255),
+ value VARCHAR(255),
+ record_hash BINARY(20),
+ CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature),
+ CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
CREATE TABLE item (
pk_item INTEGER NOT NULL AUTO_INCREMENT,
product_name VARCHAR(255),
@@ -30,6 +52,22 @@ CREATE TABLE item (
)
+CREATE TABLE item_product_features_intfeature (
+ fk_item INTEGER NOT NULL,
+ fk_intfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature)
+)
+
+
+CREATE TABLE item_product_features_stringfeature (
+ fk_item INTEGER NOT NULL,
+ fk_stringfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature)
+)
+
+
CREATE TABLE shiporder (
pk_shiporder INTEGER NOT NULL AUTO_INCREMENT,
orderid VARCHAR(255),
@@ -70,6 +108,14 @@ CREATE TABLE orders_shiporder (
FOREIGN KEY(fk_shiporder) REFERENCES shiporder (pk_shiporder)
)
+CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature)
+
+CREATE INDEX ix_item_product_features_intfeature_fk_item ON item_product_features_intfeature (fk_item)
+
+CREATE INDEX ix_item_product_features_stringfeature_fk_item ON item_product_features_stringfeature (fk_item)
+
+CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature)
+
CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item)
CREATE INDEX ix_shiporder_item_fk_shiporder ON shiporder_item (fk_shiporder)
diff --git a/tests/sample_models/orders/orders_ddl_mysql_version1.sql b/tests/sample_models/orders/orders_ddl_mysql_version1.sql
index 38f3e2c..89d8d35 100644
--- a/tests/sample_models/orders/orders_ddl_mysql_version1.sql
+++ b/tests/sample_models/orders/orders_ddl_mysql_version1.sql
@@ -1,6 +1,7 @@
CREATE TABLE orderperson (
pk_orderperson INTEGER NOT NULL AUTO_INCREMENT,
+ name_attr VARCHAR(255),
name VARCHAR(255),
address VARCHAR(255),
city VARCHAR(255),
@@ -11,12 +12,33 @@ CREATE TABLE orderperson (
`companyId_ace` VARCHAR(255),
`companyId_bic` VARCHAR(255),
`companyId_lei` VARCHAR(255),
+ coordinates VARCHAR(255),
record_hash BINARY(16),
CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson),
CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash)
)
+CREATE TABLE intfeature (
+ pk_intfeature INTEGER NOT NULL AUTO_INCREMENT,
+ id VARCHAR(255),
+ value INTEGER,
+ record_hash BINARY(16),
+ CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature),
+ CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
+CREATE TABLE stringfeature (
+ pk_stringfeature INTEGER NOT NULL AUTO_INCREMENT,
+ id VARCHAR(255),
+ value VARCHAR(255),
+ record_hash BINARY(16),
+ CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature),
+ CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
CREATE TABLE shiporder (
pk_shiporder INTEGER NOT NULL AUTO_INCREMENT,
orderid VARCHAR(255),
@@ -54,6 +76,7 @@ CREATE TABLE orders_shiporder (
CREATE TABLE item (
pk_item INTEGER NOT NULL AUTO_INCREMENT,
+ temp_pk_item INTEGER,
fk_parent_shiporder INTEGER,
xml2db_row_number INTEGER NOT NULL,
product_name VARCHAR(255),
@@ -66,7 +89,33 @@ CREATE TABLE item (
FOREIGN KEY(fk_parent_shiporder) REFERENCES shiporder (pk_shiporder)
)
+
+CREATE TABLE item_product_features_intfeature (
+ fk_item INTEGER NOT NULL,
+ fk_intfeature INTEGER NOT NULL,
+ xml2db_row_number INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature)
+)
+
+
+CREATE TABLE item_product_features_stringfeature (
+ fk_item INTEGER NOT NULL,
+ fk_stringfeature INTEGER NOT NULL,
+ xml2db_row_number INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature)
+)
+
CREATE INDEX ix_orders_shiporder_fk_orders ON orders_shiporder (fk_orders)
CREATE INDEX ix_orders_shiporder_fk_shiporder ON orders_shiporder (fk_shiporder)
+CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature)
+
+CREATE INDEX ix_item_product_features_intfeature_fk_item ON item_product_features_intfeature (fk_item)
+
+CREATE INDEX ix_item_product_features_stringfeature_fk_item ON item_product_features_stringfeature (fk_item)
+
+CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature)
+
diff --git a/tests/sample_models/orders/orders_ddl_mysql_version2.sql b/tests/sample_models/orders/orders_ddl_mysql_version2.sql
index 6ff42b8..f9901cf 100644
--- a/tests/sample_models/orders/orders_ddl_mysql_version2.sql
+++ b/tests/sample_models/orders/orders_ddl_mysql_version2.sql
@@ -12,6 +12,7 @@ CREATE TABLE orders (
CREATE TABLE orderperson (
pk_orderperson INTEGER NOT NULL AUTO_INCREMENT,
+ name_attr VARCHAR(255),
name VARCHAR(255),
address VARCHAR(255),
city VARCHAR(255),
@@ -21,12 +22,33 @@ CREATE TABLE orderperson (
`phoneNumber` VARCHAR(4000),
`companyId_type` VARCHAR(3),
`companyId_value` VARCHAR(255),
+ coordinates VARCHAR(255),
xml2db_record_hash BINARY(20),
CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson),
CONSTRAINT orderperson_xml2db_record_hash UNIQUE (xml2db_record_hash)
)
+CREATE TABLE intfeature (
+ pk_intfeature INTEGER NOT NULL AUTO_INCREMENT,
+ id VARCHAR(255),
+ value INTEGER,
+ xml2db_record_hash BINARY(20),
+ CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature),
+ CONSTRAINT intfeature_xml2db_record_hash UNIQUE (xml2db_record_hash)
+)
+
+
+CREATE TABLE stringfeature (
+ pk_stringfeature INTEGER NOT NULL AUTO_INCREMENT,
+ id VARCHAR(255),
+ value VARCHAR(255),
+ xml2db_record_hash BINARY(20),
+ CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature),
+ CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (xml2db_record_hash)
+)
+
+
CREATE TABLE product (
pk_product INTEGER NOT NULL AUTO_INCREMENT,
name VARCHAR(255),
@@ -37,6 +59,22 @@ CREATE TABLE product (
)
+CREATE TABLE product_features_intfeature (
+ fk_product INTEGER NOT NULL,
+ fk_intfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_product) REFERENCES product (pk_product),
+ FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature)
+)
+
+
+CREATE TABLE product_features_stringfeature (
+ fk_product INTEGER NOT NULL,
+ fk_stringfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_product) REFERENCES product (pk_product),
+ FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature)
+)
+
+
CREATE TABLE item (
pk_item INTEGER NOT NULL AUTO_INCREMENT,
fk_product INTEGER,
@@ -57,6 +95,7 @@ CREATE TABLE shiporder (
fk_parent_orders INTEGER,
orderid VARCHAR(255),
processed_at DATETIME,
+ orderperson_name_attr VARCHAR(255),
orderperson_name VARCHAR(255),
orderperson_address VARCHAR(255),
orderperson_city VARCHAR(255),
@@ -66,6 +105,7 @@ CREATE TABLE shiporder (
`orderperson_phoneNumber` VARCHAR(4000),
`orderperson_companyId_type` VARCHAR(3),
`orderperson_companyId_value` VARCHAR(255),
+ orderperson_coordinates VARCHAR(255),
shipto_fk_orderperson INTEGER,
CONSTRAINT cx_pk_shiporder PRIMARY KEY (pk_shiporder),
FOREIGN KEY(fk_parent_orders) REFERENCES orders (pk_orders),
@@ -80,6 +120,14 @@ CREATE TABLE shiporder_item (
FOREIGN KEY(fk_item) REFERENCES item (pk_item)
)
+CREATE INDEX ix_product_features_intfeature_fk_intfeature ON product_features_intfeature (fk_intfeature)
+
+CREATE INDEX ix_product_features_intfeature_fk_product ON product_features_intfeature (fk_product)
+
+CREATE INDEX ix_product_features_stringfeature_fk_product ON product_features_stringfeature (fk_product)
+
+CREATE INDEX ix_product_features_stringfeature_fk_stringfeature ON product_features_stringfeature (fk_stringfeature)
+
CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item)
CREATE INDEX ix_shiporder_item_fk_shiporder ON shiporder_item (fk_shiporder)
diff --git a/tests/sample_models/orders/orders_ddl_postgresql_version0.sql b/tests/sample_models/orders/orders_ddl_postgresql_version0.sql
index 1fa502c..540e8de 100644
--- a/tests/sample_models/orders/orders_ddl_postgresql_version0.sql
+++ b/tests/sample_models/orders/orders_ddl_postgresql_version0.sql
@@ -1,6 +1,7 @@
CREATE TABLE orderperson (
pk_orderperson SERIAL NOT NULL,
+ name_attr VARCHAR(1000),
name VARCHAR(1000),
address VARCHAR(1000),
city VARCHAR(1000),
@@ -10,12 +11,33 @@ CREATE TABLE orderperson (
"phoneNumber" VARCHAR(8000),
"companyId_type" VARCHAR(3),
"companyId_value" VARCHAR(1000),
+ coordinates VARCHAR(1000),
record_hash BYTEA,
CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson),
CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash)
)
+CREATE TABLE intfeature (
+ pk_intfeature SERIAL NOT NULL,
+ id VARCHAR(1000),
+ value INTEGER,
+ record_hash BYTEA,
+ CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature),
+ CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
+CREATE TABLE stringfeature (
+ pk_stringfeature SERIAL NOT NULL,
+ id VARCHAR(1000),
+ value VARCHAR(1000),
+ record_hash BYTEA,
+ CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature),
+ CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
CREATE TABLE item (
pk_item SERIAL NOT NULL,
product_name VARCHAR(1000),
@@ -30,6 +52,22 @@ CREATE TABLE item (
)
+CREATE TABLE item_product_features_intfeature (
+ fk_item INTEGER NOT NULL,
+ fk_intfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature)
+)
+
+
+CREATE TABLE item_product_features_stringfeature (
+ fk_item INTEGER NOT NULL,
+ fk_stringfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature)
+)
+
+
CREATE TABLE shiporder (
pk_shiporder SERIAL NOT NULL,
orderid VARCHAR(1000),
@@ -70,6 +108,14 @@ CREATE TABLE orders_shiporder (
FOREIGN KEY(fk_shiporder) REFERENCES shiporder (pk_shiporder)
)
+CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature)
+
+CREATE INDEX ix_item_product_features_intfeature_fk_item ON item_product_features_intfeature (fk_item)
+
+CREATE INDEX ix_item_product_features_stringfeature_fk_item ON item_product_features_stringfeature (fk_item)
+
+CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature)
+
CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item)
CREATE INDEX ix_shiporder_item_fk_shiporder ON shiporder_item (fk_shiporder)
diff --git a/tests/sample_models/orders/orders_ddl_postgresql_version1.sql b/tests/sample_models/orders/orders_ddl_postgresql_version1.sql
index e1f7224..7d1bf33 100644
--- a/tests/sample_models/orders/orders_ddl_postgresql_version1.sql
+++ b/tests/sample_models/orders/orders_ddl_postgresql_version1.sql
@@ -1,6 +1,7 @@
CREATE TABLE orderperson (
pk_orderperson SERIAL NOT NULL,
+ name_attr VARCHAR(1000),
name VARCHAR(1000),
address VARCHAR(1000),
city VARCHAR(1000),
@@ -11,12 +12,33 @@ CREATE TABLE orderperson (
"companyId_ace" VARCHAR(1000),
"companyId_bic" VARCHAR(1000),
"companyId_lei" VARCHAR(1000),
+ coordinates VARCHAR(1000),
record_hash BYTEA,
CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson),
CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash)
)
+CREATE TABLE intfeature (
+ pk_intfeature SERIAL NOT NULL,
+ id VARCHAR(1000),
+ value INTEGER,
+ record_hash BYTEA,
+ CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature),
+ CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
+CREATE TABLE stringfeature (
+ pk_stringfeature SERIAL NOT NULL,
+ id VARCHAR(1000),
+ value VARCHAR(1000),
+ record_hash BYTEA,
+ CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature),
+ CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash)
+)
+
+
CREATE TABLE shiporder (
pk_shiporder SERIAL NOT NULL,
orderid VARCHAR(1000),
@@ -54,6 +76,7 @@ CREATE TABLE orders_shiporder (
CREATE TABLE item (
pk_item SERIAL NOT NULL,
+ temp_pk_item INTEGER,
fk_parent_shiporder INTEGER,
xml2db_row_number INTEGER NOT NULL,
product_name VARCHAR(1000),
@@ -66,7 +89,33 @@ CREATE TABLE item (
FOREIGN KEY(fk_parent_shiporder) REFERENCES shiporder (pk_shiporder)
)
+
+CREATE TABLE item_product_features_intfeature (
+ fk_item INTEGER NOT NULL,
+ fk_intfeature INTEGER NOT NULL,
+ xml2db_row_number INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature)
+)
+
+
+CREATE TABLE item_product_features_stringfeature (
+ fk_item INTEGER NOT NULL,
+ fk_stringfeature INTEGER NOT NULL,
+ xml2db_row_number INTEGER NOT NULL,
+ FOREIGN KEY(fk_item) REFERENCES item (pk_item),
+ FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature)
+)
+
CREATE INDEX ix_orders_shiporder_fk_orders ON orders_shiporder (fk_orders)
CREATE INDEX ix_orders_shiporder_fk_shiporder ON orders_shiporder (fk_shiporder)
+CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature)
+
+CREATE INDEX ix_item_product_features_intfeature_fk_item ON item_product_features_intfeature (fk_item)
+
+CREATE INDEX ix_item_product_features_stringfeature_fk_item ON item_product_features_stringfeature (fk_item)
+
+CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature)
+
diff --git a/tests/sample_models/orders/orders_ddl_postgresql_version2.sql b/tests/sample_models/orders/orders_ddl_postgresql_version2.sql
index a1d4fa8..17f6cd2 100644
--- a/tests/sample_models/orders/orders_ddl_postgresql_version2.sql
+++ b/tests/sample_models/orders/orders_ddl_postgresql_version2.sql
@@ -12,6 +12,7 @@ CREATE TABLE orders (
CREATE TABLE orderperson (
pk_orderperson SERIAL NOT NULL,
+ name_attr VARCHAR(1000),
name VARCHAR(1000),
address VARCHAR(1000),
city VARCHAR(1000),
@@ -21,12 +22,33 @@ CREATE TABLE orderperson (
"phoneNumber" VARCHAR(8000),
"companyId_type" VARCHAR(3),
"companyId_value" VARCHAR(1000),
+ coordinates VARCHAR(1000),
xml2db_record_hash BYTEA,
CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson),
CONSTRAINT orderperson_xml2db_record_hash UNIQUE (xml2db_record_hash)
)
+CREATE TABLE intfeature (
+ pk_intfeature SERIAL NOT NULL,
+ id VARCHAR(1000),
+ value INTEGER,
+ xml2db_record_hash BYTEA,
+ CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature),
+ CONSTRAINT intfeature_xml2db_record_hash UNIQUE (xml2db_record_hash)
+)
+
+
+CREATE TABLE stringfeature (
+ pk_stringfeature SERIAL NOT NULL,
+ id VARCHAR(1000),
+ value VARCHAR(1000),
+ xml2db_record_hash BYTEA,
+ CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature),
+ CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (xml2db_record_hash)
+)
+
+
CREATE TABLE product (
pk_product SERIAL NOT NULL,
name VARCHAR(1000),
@@ -37,6 +59,22 @@ CREATE TABLE product (
)
+CREATE TABLE product_features_intfeature (
+ fk_product INTEGER NOT NULL,
+ fk_intfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_product) REFERENCES product (pk_product),
+ FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature)
+)
+
+
+CREATE TABLE product_features_stringfeature (
+ fk_product INTEGER NOT NULL,
+ fk_stringfeature INTEGER NOT NULL,
+ FOREIGN KEY(fk_product) REFERENCES product (pk_product),
+ FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature)
+)
+
+
CREATE TABLE item (
pk_item SERIAL NOT NULL,
fk_product INTEGER,
@@ -57,6 +95,7 @@ CREATE TABLE shiporder (
fk_parent_orders INTEGER,
orderid VARCHAR(1000),
processed_at TIMESTAMP WITH TIME ZONE,
+ orderperson_name_attr VARCHAR(1000),
orderperson_name VARCHAR(1000),
orderperson_address VARCHAR(1000),
orderperson_city VARCHAR(1000),
@@ -66,6 +105,7 @@ CREATE TABLE shiporder (
"orderperson_phoneNumber" VARCHAR(8000),
"orderperson_companyId_type" VARCHAR(3),
"orderperson_companyId_value" VARCHAR(1000),
+ orderperson_coordinates VARCHAR(1000),
shipto_fk_orderperson INTEGER,
CONSTRAINT cx_pk_shiporder PRIMARY KEY (pk_shiporder),
FOREIGN KEY(fk_parent_orders) REFERENCES orders (pk_orders),
@@ -80,6 +120,14 @@ CREATE TABLE shiporder_item (
FOREIGN KEY(fk_item) REFERENCES item (pk_item)
)
+CREATE INDEX ix_product_features_intfeature_fk_intfeature ON product_features_intfeature (fk_intfeature)
+
+CREATE INDEX ix_product_features_intfeature_fk_product ON product_features_intfeature (fk_product)
+
+CREATE INDEX ix_product_features_stringfeature_fk_product ON product_features_stringfeature (fk_product)
+
+CREATE INDEX ix_product_features_stringfeature_fk_stringfeature ON product_features_stringfeature (fk_stringfeature)
+
CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item)
CREATE INDEX ix_shiporder_item_fk_shiporder ON shiporder_item (fk_shiporder)
diff --git a/tests/sample_models/orders/orders_erd_version0.md b/tests/sample_models/orders/orders_erd_version0.md
index cb0a282..443b28c 100644
--- a/tests/sample_models/orders/orders_erd_version0.md
+++ b/tests/sample_models/orders/orders_erd_version0.md
@@ -12,6 +12,8 @@ erDiagram
string orderid
dateTime processed_at
}
+ item ||--o{ intfeature : "product_features_intfeature*"
+ item ||--o{ stringfeature : "product_features_stringfeature*"
item {
string product_name
string product_version
@@ -20,7 +22,16 @@ erDiagram
decimal price
string currency
}
+ stringfeature {
+ string id
+ string value
+ }
+ intfeature {
+ string id
+ integer value
+ }
orderperson {
+ string name_attr
string name
string address
string city
@@ -30,5 +41,6 @@ erDiagram
string-N phoneNumber
string companyId_type
string companyId_value
+ string coordinates
}
```
\ No newline at end of file
diff --git a/tests/sample_models/orders/orders_erd_version1.md b/tests/sample_models/orders/orders_erd_version1.md
index 9a28175..4b07efa 100644
--- a/tests/sample_models/orders/orders_erd_version1.md
+++ b/tests/sample_models/orders/orders_erd_version1.md
@@ -1,5 +1,7 @@
```mermaid
erDiagram
+ item ||--o{ intfeature : "product_features_intfeature*"
+ item ||--o{ stringfeature : "product_features_stringfeature*"
item {
string product_name
string product_version
@@ -20,7 +22,16 @@ erDiagram
string orderid
dateTime processed_at
}
+ stringfeature {
+ string id
+ string value
+ }
+ intfeature {
+ string id
+ integer value
+ }
orderperson {
+ string name_attr
string name
string address
string city
@@ -31,5 +42,6 @@ erDiagram
string companyId_ace
string companyId_bic
string companyId_lei
+ string coordinates
}
```
\ No newline at end of file
diff --git a/tests/sample_models/orders/orders_erd_version2.md b/tests/sample_models/orders/orders_erd_version2.md
index a1d5b75..26e96d9 100644
--- a/tests/sample_models/orders/orders_erd_version2.md
+++ b/tests/sample_models/orders/orders_erd_version2.md
@@ -5,6 +5,7 @@ erDiagram
shiporder {
string orderid
dateTime processed_at
+ string orderperson_name_attr
string orderperson_name
string orderperson_address
string orderperson_city
@@ -14,6 +15,7 @@ erDiagram
string-N orderperson_phoneNumber
string orderperson_companyId_type
string orderperson_companyId_value
+ string orderperson_coordinates
}
item ||--|| product : "product"
item {
@@ -22,11 +24,22 @@ erDiagram
decimal price
string currency
}
+ product ||--o{ intfeature : "features_intfeature*"
+ product ||--o{ stringfeature : "features_stringfeature*"
product {
string name
string version
}
+ stringfeature {
+ string id
+ string value
+ }
+ intfeature {
+ string id
+ integer value
+ }
orderperson {
+ string name_attr
string name
string address
string city
@@ -36,6 +49,7 @@ erDiagram
string-N phoneNumber
string companyId_type
string companyId_value
+ string coordinates
}
orders ||--o{ shiporder : "shiporder"
orders {
diff --git a/tests/sample_models/orders/xml/order1.xml b/tests/sample_models/orders/xml/order1.xml
index 194a67f..6c04192 100644
--- a/tests/sample_models/orders/xml/order1.xml
+++ b/tests/sample_models/orders/xml/order1.xml
@@ -2,7 +2,7 @@
2
-
+
Bob
string
string
@@ -18,6 +18,24 @@
product 1
regular
+
+
+ length
+ 60
+
+
+ width
+ 40
+
+
+ weight
+ 10
+
+
+ color
+ red
+
+
13
340.23
diff --git a/tests/sample_models/orders/xml/order2.xml b/tests/sample_models/orders/xml/order2.xml
index 7040dd3..5926472 100644
--- a/tests/sample_models/orders/xml/order2.xml
+++ b/tests/sample_models/orders/xml/order2.xml
@@ -2,7 +2,7 @@
2
-
+
Alice
string
string
diff --git a/tests/sample_models/orders/xml/order3.xml b/tests/sample_models/orders/xml/order3.xml
index 2e4a2fc..7cde833 100644
--- a/tests/sample_models/orders/xml/order3.xml
+++ b/tests/sample_models/orders/xml/order3.xml
@@ -2,14 +2,14 @@
2
-
+
Alice
string
string
21093
US
-
+
Bob
string
string
@@ -20,6 +20,7 @@
JIDAZIO786DAZH
+ 48.87271337163929 2.323433844198471
-
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 1f9f582..b279cfa 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -1,10 +1,12 @@
import os
+import pprint
import pytest
from lxml import etree
from xml2db import DataModel
from xml2db.xml_converter import XMLConverter, remove_record_hash
+from .conftest import list_xml_path, models_path
from .sample_models import models
@@ -13,19 +15,20 @@
[
{**model, **version, "xml_file": xml_file}
for model in models
- for xml_file in os.listdir(model["xml_path"])
+ for xml_file in list_xml_path(model, "xml")
+ + list_xml_path(model, "equivalent_xml")
for version in model["versions"]
],
)
-def test_document_tree_parsing(test_config):
+def test_iterative_recursive_parsing(test_config):
"""Test whether iterative and recursive parsing give same results"""
model = DataModel(
- test_config["xsd_path"],
+ str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
short_name=test_config["id"],
model_config=test_config["config"],
)
converter = XMLConverter(model)
- file_path = os.path.join(test_config["xml_path"], test_config["xml_file"])
+ file_path = test_config["xml_file"]
parsed_recursive = converter.parse_xml(
file_path, file_path, skip_validation=True, iterparse=False
@@ -42,7 +45,7 @@ def test_document_tree_parsing(test_config):
[
{**model, **version, "xml_file": xml_file}
for model in models
- for xml_file in os.listdir(model["xml_path"])
+ for xml_file in list_xml_path(model, "xml")
for version in model["versions"]
],
)
@@ -50,22 +53,22 @@ def test_document_tree_to_flat_data(test_config):
"""A test for document tree to flat data conversion and back"""
model = DataModel(
- test_config["xsd_path"],
+ str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
short_name=test_config["id"],
model_config=test_config["config"],
)
converter = XMLConverter(model)
- file_path = os.path.join(test_config["xml_path"], test_config["xml_file"])
+ file_path = test_config["xml_file"]
# parse XML to document tree
converter.parse_xml(file_path, file_path)
- exp_doc_tree = remove_record_hash(converter.document_tree)
+ exp_doc_tree = pprint.pformat(remove_record_hash(converter.document_tree))
# parse XML to document tree and then flat data model
doc = model.parse_xml(file_path)
# and convert it back to document tree
- act_doc_tree = doc.flat_data_to_doc_tree()
+ act_doc_tree = pprint.pformat(doc.flat_data_to_doc_tree())
assert act_doc_tree == exp_doc_tree
@@ -75,7 +78,7 @@ def test_document_tree_to_flat_data(test_config):
[
{**model, **version, "xml_file": xml_file}
for model in models
- for xml_file in os.listdir(model["xml_path"])
+ for xml_file in list_xml_path(model, "xml")
for version in model["versions"]
],
)
@@ -83,13 +86,13 @@ def test_document_tree_to_xml(test_config):
"""A test for document tree to xml conversion and back"""
model = DataModel(
- test_config["xsd_path"],
+ str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
short_name=test_config["id"],
model_config=test_config["config"],
)
converter = XMLConverter(model)
- file_path = os.path.join(test_config["xml_path"], test_config["xml_file"])
+ file_path = test_config["xml_file"]
# parse XML to document tree
converter.parse_xml(file_path, file_path)
@@ -112,3 +115,29 @@ def test_document_tree_to_xml(test_config):
ref_xml = f.read()
assert xml == ref_xml
+
+
+@pytest.mark.parametrize(
+ "test_config",
+ [
+ {**model, **version}
+ for model in models
+ for version in model["versions"]
+ if os.path.isdir(os.path.join(models_path, model["id"], "equivalent_xml"))
+ ],
+)
+def test_equivalent_xml(test_config):
+ """A test for xml documents which should result in the same extracted data"""
+
+ xml_files = list_xml_path(test_config, "equivalent_xml")
+
+ if len(xml_files) > 1:
+ model = DataModel(
+ str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
+ short_name=test_config["id"],
+ model_config=test_config["config"],
+ )
+ ref_data = model.parse_xml(xml_files[0])
+ for xml_file in xml_files[1:]:
+ equ_data = model.parse_xml(xml_file)
+ assert ref_data.data == equ_data.data
diff --git a/tests/test_models_output.py b/tests/test_models_output.py
index 0539080..b5245a0 100644
--- a/tests/test_models_output.py
+++ b/tests/test_models_output.py
@@ -5,6 +5,7 @@
from xml2db import DataModel
from .sample_models import models
+from .conftest import models_path
@pytest.mark.parametrize(
@@ -19,14 +20,15 @@ def test_model_erd(test_config):
"""A test to check if generated ERD matches saved output"""
model = DataModel(
- test_config["xsd_path"],
+ str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
short_name=test_config["id"],
model_config=test_config["config"],
)
expected = open(
os.path.join(
- os.path.dirname(test_config["xsd_path"]),
+ models_path,
+ test_config["id"],
f"{test_config['id']}_erd_version{test_config['version_id']}.md",
),
"r",
@@ -49,7 +51,7 @@ def test_model_ddl(test_config):
"""A test to check if generated SQL DDL matches saved output"""
model = DataModel(
- test_config["xsd_path"],
+ str(os.path.join(models_path, test_config["id"], test_config["xsd"])),
short_name=test_config["id"],
model_config=test_config["config"],
db_type=test_config["dialect"].name,
@@ -57,7 +59,8 @@ def test_model_ddl(test_config):
expected = open(
os.path.join(
- os.path.dirname(test_config["xsd_path"]),
+ models_path,
+ test_config["id"],
f"{test_config['id']}_ddl_{test_config['dialect'].name}_version{test_config['version_id']}.sql",
),
"r",
diff --git a/tests/test_roundtrip.py b/tests/test_roundtrip.py
index f0aaf43..8f63496 100644
--- a/tests/test_roundtrip.py
+++ b/tests/test_roundtrip.py
@@ -4,7 +4,7 @@
from lxml import etree
from xml2db.xml_converter import XMLConverter, remove_record_hash
-from .fixtures import setup_db_model, conn_string
+from .conftest import list_xml_path
from .sample_models import models
@@ -17,10 +17,7 @@ def test_database_xml_roundtrip(setup_db_model, model_config):
"""A test for roundtrip insert to the database from and to XML"""
model = setup_db_model
- xml_files = [
- os.path.join(model_config["xml_path"], file)
- for file in os.listdir(model_config["xml_path"])
- ]
+ xml_files = list_xml_path(model_config, "xml")
for file in xml_files:
# do parse and insert into the database
@@ -59,10 +56,7 @@ def test_database_document_tree_roundtrip(setup_db_model, model_config):
"""A test for roundtrip insert to the database from and to document tree"""
model = setup_db_model
- xml_files = [
- os.path.join(model_config["xml_path"], file)
- for file in os.listdir(model_config["xml_path"])
- ]
+ xml_files = list_xml_path(model_config, "xml")
for file in xml_files:
# do parse and insert into the database
@@ -92,10 +86,7 @@ def test_database_document_tree_roundtrip_single_load(setup_db_model, model_conf
"""A test for roundtrip insert to the database from and to document tree"""
model = setup_db_model
- xml_files = [
- os.path.join(model_config["xml_path"], file)
- for file in os.listdir(model_config["xml_path"])
- ]
+ xml_files = list_xml_path(model_config, "xml")
flat_data = None
doc = None
@@ -129,7 +120,7 @@ def test_database_document_tree_roundtrip_single_load(setup_db_model, model_conf
[
{**model, **version, "xml_file": xml_file}
for model in models
- for xml_file in os.listdir(model["xml_path"])
+ for xml_file in list_xml_path(model, "xml")
for version in model["versions"]
],
)
diff --git a/tests/test_validation.py b/tests/test_validation.py
index 4a3ce4d..d42e40c 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -1,10 +1,10 @@
-import xml.etree.ElementTree
-
import lxml.etree
import pytest
+import os
from xml2db import DataModel
from .sample_models import models
+from .conftest import models_path
@pytest.mark.parametrize(
@@ -27,7 +27,9 @@
def test_invalid_xml(args: tuple):
file_name, iterparse, recover, exception = args
- data_model = DataModel(models[0]["xsd_path"])
+ data_model = DataModel(
+ str(os.path.join(models_path, models[0]["id"], models[0]["xsd"]))
+ )
if exception is None:
data_model.parse_xml(
@@ -49,8 +51,8 @@ def test_invalid_xml(args: tuple):
@pytest.mark.parametrize(
"args",
[
- ("invalid", True, False, IndexError),
- ("invalid", True, True, IndexError),
+ ("invalid", True, False, None),
+ ("invalid", True, True, None),
("invalid", False, False, None),
("invalid", False, True, None),
("malformed_recover", True, False, lxml.etree.XMLSyntaxError),
@@ -58,7 +60,7 @@ def test_invalid_xml(args: tuple):
("malformed_recover", False, False, lxml.etree.XMLSyntaxError),
("malformed_recover", False, True, None),
("malformed_no_recover", True, False, lxml.etree.XMLSyntaxError),
- ("malformed_no_recover", True, True, IndexError),
+ ("malformed_no_recover", True, True, None),
("malformed_no_recover", False, False, lxml.etree.XMLSyntaxError),
("malformed_no_recover", False, True, None),
],
@@ -66,7 +68,9 @@ def test_invalid_xml(args: tuple):
def test_invalid_xml_skip_verify(args: tuple):
file_name, iterparse, recover, exception = args
- data_model = DataModel(models[0]["xsd_path"])
+ data_model = DataModel(
+ str(os.path.join(models_path, models[0]["id"], models[0]["xsd"]))
+ )
if exception is None:
data_model.parse_xml(