diff --git a/.github/workflows/integration-tests-mssql.yml b/.github/workflows/integration-tests-mssql.yml index ba2ea6b..bd7bb87 100644 --- a/.github/workflows/integration-tests-mssql.yml +++ b/.github/workflows/integration-tests-mssql.yml @@ -9,7 +9,7 @@ on: jobs: integration-tests: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 services: mssql: image: mcr.microsoft.com/mssql/server:2019-latest diff --git a/.github/workflows/integration-tests-mysql.yml b/.github/workflows/integration-tests-mysql.yml index f03ef24..2adef2d 100644 --- a/.github/workflows/integration-tests-mysql.yml +++ b/.github/workflows/integration-tests-mysql.yml @@ -9,7 +9,7 @@ on: jobs: integration-tests: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 services: mysql: image: mysql:8.0 diff --git a/.github/workflows/integration-tests-postgres.yml b/.github/workflows/integration-tests-postgres.yml index de1f12f..b74b2ca 100644 --- a/.github/workflows/integration-tests-postgres.yml +++ b/.github/workflows/integration-tests-postgres.yml @@ -9,7 +9,7 @@ on: jobs: integration-tests: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 container: python:3.12-bookworm services: postgres: diff --git a/docs/how_it_works.md b/docs/how_it_works.md index 8704e82..c21dfc7 100644 --- a/docs/how_it_works.md +++ b/docs/how_it_works.md @@ -123,8 +123,10 @@ original one. ### Recursive XSD -Recursive XML schemas are not supported, because most of the time they will result in cycles in foreign key constraints -dependencies, which we cannot handle easily. +Recursive XML schemas are not fully supported, because they result in cycles in tables dependencies, which would make +the process much more complex. Whenever a field which would introduce a dependency cycle is detected in the XSD, it is +discarded with a warning, which means that the corresponding data in XML files will not be imported. The rest of the +data should be processed correctly. ### Mixed content elements diff --git a/src/xml2db/document.py b/src/xml2db/document.py index 1cfc160..d0a4f97 100644 --- a/src/xml2db/document.py +++ b/src/xml2db/document.py @@ -171,17 +171,24 @@ def _extract_node( record["xml2db_row_number"] = row_number # build record from fields for columns and n-1 relations - for field_type, key, _ in model_table.fields: + for field_type, key, field in model_table.fields: if field_type == "col": - if key in content: + content_key = ( + (f"{key[:-5]}__attr" if field.has_suffix else f"{key}__attr") + if field.is_attr + else key + ) + if content_key in content: if model_table.columns[key].data_type in ["decimal", "float"]: - val = [float(v) for v in content[key]] + val = [float(v) for v in content[content_key]] elif model_table.columns[key].data_type == "integer": - val = [int(v) for v in content[key]] + val = [int(v) for v in content[content_key]] elif model_table.columns[key].data_type == "boolean": - val = [v == "true" or v == "1" for v in content[key]] + val = [ + v == "true" or v == "1" for v in content[content_key] + ] else: - val = content[key] + val = content[content_key] if len(val) == 1: record[key] = val[0] @@ -320,20 +327,29 @@ def _build_node(node_type: str, node_pk: int) -> tuple: record = data_index[node_type]["records"][node_pk] for field_type, rel_name, rel in tb.fields: if field_type == "col" and record[rel_name] is not None: + content_key = ( + ( + f"{rel_name[:-5]}__attr" + if rel.has_suffix + else f"{rel_name}__attr" + ) + if rel.is_attr + else rel_name + ) if rel.data_type in [ "decimal", "float", ]: # remove trailing ".0" for decimal and float - content[rel_name] = [ + content[content_key] = [ value.rstrip("0").rstrip(".") if "." in value else value for value in str(record[rel_name]).split(",") ] elif isinstance(record[rel_name], datetime.datetime): - content[rel_name] = [ + content[content_key] = [ record[rel_name].isoformat(timespec="milliseconds") ] else: - content[rel_name] = ( + content[content_key] = ( list(csv.reader([str(record[rel_name])], escapechar="\\"))[ 0 ] diff --git a/src/xml2db/model.py b/src/xml2db/model.py index 31836f0..3038189 100644 --- a/src/xml2db/model.py +++ b/src/xml2db/model.py @@ -70,7 +70,7 @@ class DataModel: def __init__( self, xsd_file: str, - short_name: str = None, + short_name: str = "DocumentRoot", long_name: str = None, base_url: str = None, model_config: dict = None, @@ -226,8 +226,7 @@ def _build_model(self): """ # parse the XML schema recursively and hold a reference to the head table root_table = self._parse_tree( - self.xml_schema[0] if len(self.xml_schema) == 1 else self.xml_schema, - is_root_table=True, + self.xml_schema[0] if len(self.xml_schema) == 1 else self.xml_schema ) self.root_table = root_table.type_name # compute a text representation of the original data model and store it @@ -273,9 +272,7 @@ def _build_model(self): for tb in self.fk_ordered_tables: tb.build_sqlalchemy_tables() - def _parse_tree( - self, parent_node: xmlschema.XsdElement, is_root_table: bool = False - ): + def _parse_tree(self, parent_node: xmlschema.XsdElement, nodes_path: list = None): """Parse a node of an XML schema recursively and create a target data model without any simplification We parse the XSD tree recursively to create for each node (basically a complex type in the XSD) an equivalent \ @@ -289,7 +286,7 @@ def _parse_tree( Args: parent_node: the current XSD node being parsed - is_root_table: True if this is the root table + nodes_path: a list of nodes types from the root node """ # find current node type and name and returns corresponding table if it already exists @@ -301,12 +298,16 @@ def _parse_tree( if parent_type is None: parent_type = parent_node.local_name + nodes_path = (nodes_path if nodes_path else []) + [parent_type] + # if this type has already been encountered, stop here and return existing table if parent_type in self.tables: parent_table = self.tables[parent_type] return parent_table - # elements names and types should be bijective. If an element name is used for different types, + # For database tables we use element names rather than XSD types, under the assumption that they are often + # more meaningful given that they are the one which appear in XML documents. However, same names can be used + # for different XSD types, so if an element name is used for different types, # we add a suffix to the name to make it unique again (using a dict to keep the name/type association) parent_name = ( parent_node.local_name @@ -324,7 +325,7 @@ def _parse_tree( parent_table = self._create_table_model( parent_name, parent_type, - is_root_table, + len(nodes_path) == 1, isinstance(parent_node, xmlschema.XMLSchema), ) self.tables[parent_type] = parent_table @@ -363,6 +364,13 @@ def recurse_parse_simple_type(elem_type): if elem_type.base_type else recurse_parse_simple_type(elem_type.member_types) ) + if elem_type.is_list(): + return ( + "string", + 0, + None, + elem_type.allow_empty, + ) if elem_type.is_restriction(): dt = elem_type.base_type.local_name mil = elem_type.min_length @@ -384,7 +392,12 @@ def recurse_parse_simple_type(elem_type): else None ) ae = ae and bt_ae if ae is not None and bt_ae is not None else None - if elem_type.enumeration is not None and dt in ["string", "NMTOKEN", "duration", "token"]: + if elem_type.enumeration is not None and dt in [ + "string", + "NMTOKEN", + "duration", + "token", + ]: mil = min([len(val) for val in elem_type.enumeration]) mal = max([len(val) for val in elem_type.enumeration]) return dt, mil, mal, ae @@ -410,25 +423,31 @@ def get_occurs(particle): ), ] - # go through item attributes and add them as columns + # go through item attributes and add them as columns, adding a suffix if an element with the same name exists + children_names = None for attrib_name, attrib in parent_node.attributes.items(): + if children_names is None: + children_names = [child.local_name for child in parent_node] ( data_type, min_length, max_length, allow_empty, ) = recurse_parse_simple_type([attrib.type]) + suffix = attrib_name in children_names parent_table.add_column( - f"{attrib_name}", + f"{attrib_name}{'_attr' if suffix else ''}", data_type, [0, 1], min_length, max_length, True, + suffix, False, allow_empty, None, ) + nested_containers = [] # go through the children to add either arguments either relations to the current element for child in parent_node: @@ -454,6 +473,7 @@ def get_occurs(particle): if child.parent and child.parent.max_occurs != 1 and child.parent.model != "choice" + and child.max_occurs == 1 else None ), ) @@ -482,32 +502,39 @@ def get_occurs(particle): max_length, False, False, + False, allow_empty, nested_containers[-1][1], ) elif ct.is_complex(): - child_table = self._parse_tree(child) - child_table.model_group = ( - "choice" - if ct.model_group and ct.model_group.model == "choice" - else "sequence" - ) - occurs = get_occurs(child) - if child.is_single(): - parent_table.add_relation_1( - child.local_name, - child_table, - occurs, - nested_containers[-1][1], + # ignoring recursive definitions by skipping these fields + if child.type.local_name in nodes_path: + logger.warning( + f"type '{child.type.local_name}' contains a recursive definition" ) else: - parent_table.add_relation_n( - child.local_name, - child_table, - occurs, - nested_containers[-1][1], + child_table = self._parse_tree(child, nodes_path) + child_table.model_group = ( + "choice" + if ct.model_group and ct.model_group.model == "choice" + else "sequence" ) + occurs = get_occurs(child) + if occurs[1] == 1: + parent_table.add_relation_1( + child.local_name, + child_table, + occurs, + nested_containers[-1][1], + ) + else: + parent_table.add_relation_n( + child.local_name, + child_table, + occurs, + nested_containers[-1][1], + ) else: raise ValueError("unknown case; please check") else: @@ -534,6 +561,7 @@ def get_occurs(particle): min_length, max_length, False, + False, True, allow_empty, None, @@ -544,31 +572,19 @@ def get_occurs(particle): def _repr_tree( self, parent_table: Union[DataModelTableReused, DataModelTableDuplicated], - visited_nodes: Union[set, None] = None, ): """Build a text representation of the data model tree Args: parent_table: the current data model table object """ - if visited_nodes is None: - visited_nodes = set() - else: - visited_nodes = {item for item in visited_nodes} - visited_nodes.add(parent_table.name) for field_type, name, field in parent_table.fields: if field_type == "col": yield f"{field.name}{field.occurs}: {field.data_type}" - elif field_type == "rel1": - mg = " (choice)" if field.other_table.model_group == "choice" else "" - yield f"{field.name}{field.occurs}{mg}:{' ...' if field_type in visited_nodes else ''}" - if field.other_table.name not in visited_nodes: - for line in self._repr_tree(field.other_table, visited_nodes): - yield f" {line}" - elif field_type == "reln": + else: mg = " (choice)" if field.other_table.model_group == "choice" else "" - yield f"{field.name}{field.occurs}{mg}:{' ...' if field_type in visited_nodes else ''}" - for line in self._repr_tree(field.other_table, visited_nodes): + yield f"{field.name}{field.occurs}{mg}:" + for line in self._repr_tree(field.other_table): yield f" {line}" def get_entity_rel_diagram(self, text_context: bool = True) -> str: diff --git a/src/xml2db/table/column.py b/src/xml2db/table/column.py index bf450cb..17daba6 100644 --- a/src/xml2db/table/column.py +++ b/src/xml2db/table/column.py @@ -36,11 +36,18 @@ def types_mapping_default(temp: bool, col: "DataModelColumn") -> Any: return Double if col.data_type == "dateTime": return DateTime(timezone=True) - if col.data_type == "integer" or col.data_type == "int": + if col.data_type in [ + "integer", + "int", + "nonPositiveInteger", + "nonNegativeInteger", + "positiveInteger", + "negativeInteger", + ]: return Integer if col.data_type == "boolean": return Boolean - if col.data_type == "byte": + if col.data_type in ["short", "byte"]: return SmallInteger if col.data_type == "long": return BigInteger @@ -77,20 +84,10 @@ def types_mapping_mssql(temp: bool, col: "DataModelColumn") -> Any: """ if col.occurs[1] != 1: return mssql.VARCHAR(8000) - if col.data_type in ["decimal", "float", "double"]: - return Double if col.data_type == "dateTime": # using the DATETIMEOFFSET directly in the temporary table caused issues when inserting data in the target # table with INSERT INTO SELECT converts datetime VARCHAR to DATETIMEOFFSET without errors return mssql.VARCHAR(100) if temp else mssql.DATETIMEOFFSET - if col.data_type == "integer" or col.data_type == "int": - return Integer - if col.data_type == "boolean": - return Boolean - if col.data_type == "byte": - return SmallInteger - if col.data_type == "long": - return BigInteger if col.data_type == "date": return mssql.VARCHAR(16) if col.data_type == "time": @@ -106,12 +103,7 @@ def types_mapping_mssql(temp: bool, col: "DataModelColumn") -> Any: if col.max_length == col.min_length: return mssql.BINARY(col.max_length) return mssql.VARBINARY(col.max_length) - else: - logger.warning( - f"unknown type '{col.data_type}' for column '{col.name}', defaulting to VARCHAR(1000) " - f"(this can be overridden by providing a field type in the configuration)" - ) - return mssql.VARCHAR(1000) + return types_mapping_default(temp, col) def types_mapping_mysql(temp: bool, col: "DataModelColumn") -> Any: @@ -167,6 +159,7 @@ def __init__( min_length: int, max_length: Union[int, None], is_attr: bool, + has_suffix: bool, is_content: bool, allow_empty: bool, ngroup: Union[int, None], @@ -181,6 +174,7 @@ def __init__( self.min_length = min_length self.max_length = max_length self.is_attr = is_attr + self.has_suffix = has_suffix self.is_content = is_content self.allow_empty = allow_empty self.ngroup = ngroup diff --git a/src/xml2db/table/reused_table.py b/src/xml2db/table/reused_table.py index 5fb954a..ce3d62f 100644 --- a/src/xml2db/table/reused_table.py +++ b/src/xml2db/table/reused_table.py @@ -71,6 +71,7 @@ def get_col(temp=False): False, False, False, + False, None, self.config, self.data_model, diff --git a/src/xml2db/table/table.py b/src/xml2db/table/table.py index cb9d65b..65d831d 100644 --- a/src/xml2db/table/table.py +++ b/src/xml2db/table/table.py @@ -130,6 +130,7 @@ def add_column( min_length: int, max_length: Union[int, None], is_attr: bool, + has_suffix: bool, is_content: bool, allow_empty: bool, ngroup: Union[str, None], @@ -143,6 +144,7 @@ def add_column( min_length: minimum length max_length: maximum length is_attr: is XML attribute or element? + has_suffix: for an attribute, do we need the '_attr' suffix? is_content: is content of a mixed type element? allow_empty: is nullable? ngroup: a string id signaling that the column belongs to a nested sequence @@ -155,6 +157,7 @@ def add_column( min_length, max_length, is_attr, + has_suffix, is_content, allow_empty, ngroup, diff --git a/src/xml2db/table/transformed_table.py b/src/xml2db/table/transformed_table.py index 71e7575..031d858 100644 --- a/src/xml2db/table/transformed_table.py +++ b/src/xml2db/table/transformed_table.py @@ -76,6 +76,7 @@ def _transform_to_choice(self) -> None: False, False, False, + False, None, self.config, self.data_model, @@ -89,6 +90,7 @@ def _transform_to_choice(self) -> None: max(max_lengths) if all(e is not None for e in max_lengths) else None, False, False, + False, any(allow_empty), None, self.config, @@ -193,6 +195,7 @@ def _elevate_relation_1( child_field.min_length, child_field.max_length, child_field.is_attr, + child_field.has_suffix, child_field.is_content, child_field.allow_empty, child_field.ngroup, @@ -276,9 +279,12 @@ def simplify_table(self) -> Tuple[dict, dict]: # if the table can be transformed, stop here if self._is_table_choice_transform_applicable(): + fields_transform = {} + for col in self.columns.values(): + fields_transform[(self.type_name, col.name)] = (None, "join") self._transform_to_choice() self.is_simplified = True - return {self.type_name: "choice"}, {} + return {self.type_name: "choice"}, fields_transform # loop through field to transform them if need be out_fields = [] diff --git a/src/xml2db/xml_converter.py b/src/xml2db/xml_converter.py index 71a5574..ab54a7c 100644 --- a/src/xml2db/xml_converter.py +++ b/src/xml2db/xml_converter.py @@ -128,31 +128,36 @@ def _parse_xml_node( key != "{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation" ): - content[key] = [val] + content[f"{key}__attr"] = [val.strip() if val.strip() else val] if node.text and node.text.strip(): content["value"] = [node.text.strip()] for element in node.iterchildren(): - key = element.tag.split("}")[1] if "}" in element.tag else element.tag - node_type_key = (node_type, key) - value = None - if element.text and element.text.strip(): - value = element.text - transform = self.model.fields_transforms.get(node_type_key, (None, "join"))[ - 1 - ] - if transform != "join": - value = self._parse_xml_node( - self.model.fields_transforms[node_type_key][0], - element, - transform not in ["elevate", "elevate_wo_prefix"], - hash_maps, - ) - if key in content: - content[key].append(value) - else: - content[key] = [value] + if isinstance(element.tag, str): + key = element.tag.split("}")[1] if "}" in element.tag else element.tag + node_type_key = (node_type, key) + value = None + if element.text: + value = ( + element.text.strip() if element.text.strip() else element.text + ) + if node_type_key not in self.model.fields_transforms: + # skip the node if it is not in the data model + continue + transform = self.model.fields_transforms[node_type_key][1] + if transform != "join": + value = self._parse_xml_node( + self.model.fields_transforms[node_type_key][0], + element, + transform not in ["elevate", "elevate_wo_prefix"], + hash_maps, + ) + if value is not None: + if key in content: + content[key].append(value) + else: + content[key] = [value] node = self._transform_node(node_type, content) @@ -189,6 +194,7 @@ def _parse_iterative( hash_maps = {} joined_values = False + skipped_nodes = 0 for event, element in etree.iterparse( xml_file, recover=recover, @@ -196,12 +202,17 @@ def _parse_iterative( remove_blank_text=True, ): key = element.tag.split("}")[1] if "}" in element.tag else element.tag - if event == "start": + + if event == "start" and skipped_nodes > 0: + skipped_nodes += 1 + + elif event == "start": if nodes_stack[-1][0]: node_type_key = (nodes_stack[-1][0], key) - node_type, transform = self.model.fields_transforms.get( - node_type_key, (None, "join") - ) + if node_type_key not in self.model.fields_transforms: + skipped_nodes += 1 + continue + node_type, transform = self.model.fields_transforms[node_type_key] else: node_type, transform = self.model.root_table, None joined_values = transform == "join" @@ -212,28 +223,41 @@ def _parse_iterative( attrib_key != "{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation" ): - content[attrib_key] = [attrib_val] + content[f"{attrib_key}__attr"] = [ + attrib_val.strip() if attrib_val.strip() else attrib_val + ] nodes_stack.append((node_type, content)) + elif event == "end" and skipped_nodes > 0: + skipped_nodes -= 1 + elif event == "end": - # joined_values was set with the previous "start" event just before + # joined_values was set with the previous "start" event just before and corresponds to lists of simple + # type elements if joined_values: + value = None if element.text: - if key in nodes_stack[-1][1]: - nodes_stack[-1][1][key].append(element.text) + if element.text.strip(): + value = element.text.strip() else: - nodes_stack[-1][1][key] = [element.text] + value = element.text + if key in nodes_stack[-1][1]: + nodes_stack[-1][1][key].append(value) + else: + nodes_stack[-1][1][key] = [value] + + # else, we have completed a complex type node else: node = nodes_stack.pop() if nodes_stack[-1][0]: node_type_key = (nodes_stack[-1][0], key) - node_type, transform = self.model.fields_transforms.get( - node_type_key, (None, "join") - ) + node_type, transform = self.model.fields_transforms[ + node_type_key + ] else: node_type, transform = self.model.root_table, None - if element.text: - node[1]["value"] = [element.text] + if element.text and element.text.strip(): + node[1]["value"] = [element.text.strip()] node = self._transform_node(*node) if transform not in ["elevate", "elevate_wo_prefix"]: node = self._compute_hash_deduplicate(node, hash_maps) @@ -292,12 +316,28 @@ def _compute_hash_deduplicate(self, node: tuple, hash_maps: dict) -> tuple: A tuple of (node_type, content, hash) representing a node after deduplication """ node_type, content = node + if node_type not in self.model.tables: + return "", None, b"" table = self.model.tables[node_type] h = self.model.model_config["record_hash_constructor"]() - for field_type, name, _ in table.fields: + for field_type, name, field in table.fields: if field_type == "col": - h.update(str(content.get(name, None)).encode("utf-8")) + if field.is_attr: + h.update( + str( + content.get( + ( + f"{name[:-5]}__attr" + if field.has_suffix + else f"{name}__attr" + ), + None, + ) + ).encode("utf-8") + ) + else: + h.update(str(content.get(name, None)).encode("utf-8")) elif field_type == "rel1": h.update(content[name][0][2] if name in content else b"") elif field_type == "reln": @@ -419,10 +459,17 @@ def check_transformed_node(node_type, element): attributes = {} text_content = None if field_type == "col": - if rel_name in content: - if rel.is_attr: - attributes[rel.name_chain[-1][0]] = content[rel_name][0] - elif rel.is_content: + if rel.is_attr: + if rel.has_suffix and f"{rel_name[:-5]}__attr" in content: + attributes[rel.name_chain[-1][0][:-5]] = content[ + f"{rel_name[:-5]}__attr" + ][0] + elif not rel.has_suffix and f"{rel_name}__attr" in content: + attributes[rel.name_chain[-1][0]] = content[ + f"{rel_name}__attr" + ][0] + elif rel_name in content: + if rel.is_content: text_content = content[rel_name][0] else: for field_value in content[rel_name]: @@ -446,7 +493,8 @@ def check_transformed_node(node_type, element): if prev_ngroup and rel.ngroup != prev_ngroup: for ngroup_children in zip_longest(*ngroup_stack): for child in ngroup_children: - nodes_stack[-1][1].append(child) + if child is not None: + nodes_stack[-1][1].append(child) ngroup_stack = [] prev_ngroup = rel.ngroup if len(children) > 0: diff --git a/tests/fixtures.py b/tests/conftest.py similarity index 61% rename from tests/fixtures.py rename to tests/conftest.py index 55dbee9..343fecb 100644 --- a/tests/fixtures.py +++ b/tests/conftest.py @@ -4,6 +4,15 @@ from xml2db import DataModel +models_path = "tests/sample_models" + + +def list_xml_path(test_config, key): + path = os.path.join(models_path, test_config["id"], key) + if os.path.isdir(path): + return [os.path.join(path, f) for f in os.listdir(path)] + return [] + @pytest.fixture def conn_string(): @@ -14,7 +23,9 @@ def conn_string(): def setup_db_model(conn_string, model_config): db_schema = f"test_xml2db" model = DataModel( - xsd_file=model_config.get("xsd_path"), + xsd_file=str( + os.path.join(models_path, model_config["id"], model_config["xsd"]) + ), short_name=model_config.get("id"), connection_string=conn_string, db_schema=db_schema, diff --git a/tests/sample_models/models.py b/tests/sample_models/models.py index 666fe58..bda579c 100644 --- a/tests/sample_models/models.py +++ b/tests/sample_models/models.py @@ -18,8 +18,7 @@ def wrapped(): "id": "orders", "long_name": "A simple model for shipment orders", "description": "This model was made up to be a simple case which could represent real business cases.", - "xsd_path": "tests/sample_models/orders/orders.xsd", - "xml_path": "tests/sample_models/orders/xml", + "xsd": "orders.xsd", "versions": [ { "config": { @@ -80,8 +79,7 @@ def wrapped(): "long_name": "Data model for reporting standard contracts in the European energy markets", "description": "This model is one of the official models published by the Agency for the cooperation of energy" "regulators to report energy markets transaction data.", - "xsd_path": "tests/sample_models/table1/Table1_V2.xsd", - "xml_path": "tests/sample_models/table1/xml", + "xsd": "Table1_V2.xsd", "versions": [ { "config": { @@ -137,8 +135,7 @@ def wrapped(): "Xunit XSD was taken from https://github.com/jenkinsci/xunit-plugin/blob/master/src/main/resources/org/jenkinsci/plugins/xunit/types/model/xsd/junit-10.xsd" " and amended to remove its recursive nature." ), - "xsd_path": "tests/sample_models/junit10/junit-10.xsd", - "xml_path": "tests/sample_models/junit10/xml", + "xsd": "junit-10.xsd", "versions": [ { "config": { @@ -185,7 +182,7 @@ def _generate_models_output(): for model_config in models: for i in range(len(model_config["versions"])): - xsd_path = os.path.join("../../", model_config["xsd_path"]) + xsd_path = os.path.join(model_config["id"], model_config["xsd"]) for dialect in [d.dialect() for d in [postgresql, mssql, mysql]]: model = DataModel( xsd_path, diff --git a/tests/sample_models/orders/base_types.xsd b/tests/sample_models/orders/base_types.xsd index 20d4740..cadefa7 100644 --- a/tests/sample_models/orders/base_types.xsd +++ b/tests/sample_models/orders/base_types.xsd @@ -1,5 +1,6 @@ @@ -32,4 +33,10 @@ + + + + + + \ No newline at end of file diff --git a/tests/sample_models/orders/equivalent_xml/order1.xml b/tests/sample_models/orders/equivalent_xml/order1.xml new file mode 100644 index 0000000..4ed059c --- /dev/null +++ b/tests/sample_models/orders/equivalent_xml/order1.xml @@ -0,0 +1,46 @@ + + + 2 + + + Bob +
string
+ string + 102832 + FR + +1732897354 + +1732323984 + + JIDAZIO786DAZH + +
+ + + product 1 + regular + + 13 + 340.23 + EUR + + + + product 2 + regular + + string + 8 + 56.2 + EUR + + + + product 1 + premium + + 25 + 21.7 + EUR + +
+
diff --git a/tests/sample_models/orders/equivalent_xml/order1a.xml b/tests/sample_models/orders/equivalent_xml/order1a.xml new file mode 100644 index 0000000..481996e --- /dev/null +++ b/tests/sample_models/orders/equivalent_xml/order1a.xml @@ -0,0 +1,62 @@ + + + 2 + + + Bob +
string
+ string + 102832 + FR + +1732897354 + +1732323984 + + JIDAZIO786DAZH + +
+ + + + product 1 + + regular + + + product 1 + + premium + + + + product 1 + + basic + + + 13 + + 340.23 + + EUR + + + + product 2 + regular + + string + 8 + 56.2 + EUR + + + + product 1 + premium + + 25 + 21.7 + EUR + +
+
diff --git a/tests/sample_models/orders/invalid_xml/invalid.xml b/tests/sample_models/orders/invalid_xml/invalid.xml index 850c092..e77a8d6 100644 --- a/tests/sample_models/orders/invalid_xml/invalid.xml +++ b/tests/sample_models/orders/invalid_xml/invalid.xml @@ -1,14 +1,14 @@ - + Alice
string
string 21093 US
- + Bob
string
string diff --git a/tests/sample_models/orders/invalid_xml/malformed_recover.xml b/tests/sample_models/orders/invalid_xml/malformed_recover.xml index 8fd23af..302205c 100644 --- a/tests/sample_models/orders/invalid_xml/malformed_recover.xml +++ b/tests/sample_models/orders/invalid_xml/malformed_recover.xml @@ -2,7 +2,7 @@ 3 - + Bob
string
string diff --git a/tests/sample_models/orders/orders.xsd b/tests/sample_models/orders/orders.xsd index 20b5d65..c54fcd3 100644 --- a/tests/sample_models/orders/orders.xsd +++ b/tests/sample_models/orders/orders.xsd @@ -34,6 +34,29 @@ + + + + + + + + + + + + + + + + + + + + + + + @@ -41,6 +64,8 @@ + + diff --git a/tests/sample_models/orders/orders_ddl_mssql_version0.sql b/tests/sample_models/orders/orders_ddl_mssql_version0.sql index 859ac89..3371f25 100644 --- a/tests/sample_models/orders/orders_ddl_mssql_version0.sql +++ b/tests/sample_models/orders/orders_ddl_mssql_version0.sql @@ -1,6 +1,7 @@ CREATE TABLE orderperson ( pk_orderperson INTEGER NOT NULL IDENTITY, + name_attr VARCHAR(1000) NULL, name VARCHAR(1000) NULL, address VARCHAR(1000) NULL, city VARCHAR(1000) NULL, @@ -10,12 +11,33 @@ CREATE TABLE orderperson ( [phoneNumber] VARCHAR(8000) NULL, [companyId_type] CHAR(3) NULL, [companyId_value] VARCHAR(1000) NULL, + coordinates VARCHAR(1000) NULL, record_hash BINARY(20) NULL, CONSTRAINT cx_pk_orderperson PRIMARY KEY CLUSTERED (pk_orderperson), CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash) ) +CREATE TABLE intfeature ( + pk_intfeature INTEGER NOT NULL IDENTITY, + id VARCHAR(1000) NULL, + value INTEGER NULL, + record_hash BINARY(20) NULL, + CONSTRAINT cx_pk_intfeature PRIMARY KEY CLUSTERED (pk_intfeature), + CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash) +) + + +CREATE TABLE stringfeature ( + pk_stringfeature INTEGER NOT NULL IDENTITY, + id VARCHAR(1000) NULL, + value VARCHAR(1000) NULL, + record_hash BINARY(20) NULL, + CONSTRAINT cx_pk_stringfeature PRIMARY KEY CLUSTERED (pk_stringfeature), + CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash) +) + + CREATE TABLE item ( pk_item INTEGER NOT NULL IDENTITY, product_name VARCHAR(1000) NULL, @@ -30,6 +52,22 @@ CREATE TABLE item ( ) +CREATE TABLE item_product_features_intfeature ( + fk_item INTEGER NOT NULL, + fk_intfeature INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature) +) + + +CREATE TABLE item_product_features_stringfeature ( + fk_item INTEGER NOT NULL, + fk_stringfeature INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature) +) + + CREATE TABLE shiporder ( pk_shiporder INTEGER NOT NULL IDENTITY, orderid VARCHAR(1000) NULL, @@ -70,6 +108,14 @@ CREATE TABLE orders_shiporder ( FOREIGN KEY(fk_shiporder) REFERENCES shiporder (pk_shiporder) ) +CREATE CLUSTERED INDEX ix_fk_item_product_features_intfeature ON item_product_features_intfeature (fk_item, fk_intfeature) + +CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature) + +CREATE CLUSTERED INDEX ix_fk_item_product_features_stringfeature ON item_product_features_stringfeature (fk_item, fk_stringfeature) + +CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature) + CREATE CLUSTERED INDEX ix_fk_shiporder_item ON shiporder_item (fk_shiporder, fk_item) CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item) diff --git a/tests/sample_models/orders/orders_ddl_mssql_version1.sql b/tests/sample_models/orders/orders_ddl_mssql_version1.sql index a6b1946..21f3854 100644 --- a/tests/sample_models/orders/orders_ddl_mssql_version1.sql +++ b/tests/sample_models/orders/orders_ddl_mssql_version1.sql @@ -1,6 +1,7 @@ CREATE TABLE orderperson ( pk_orderperson INTEGER NOT NULL IDENTITY, + name_attr VARCHAR(1000) NULL, name VARCHAR(1000) NULL, address VARCHAR(1000) NULL, city VARCHAR(1000) NULL, @@ -11,12 +12,33 @@ CREATE TABLE orderperson ( [companyId_ace] VARCHAR(1000) NULL, [companyId_bic] VARCHAR(1000) NULL, [companyId_lei] VARCHAR(1000) NULL, + coordinates VARCHAR(1000) NULL, record_hash BINARY(16) NULL, CONSTRAINT cx_pk_orderperson PRIMARY KEY CLUSTERED (pk_orderperson), CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash) ) +CREATE TABLE intfeature ( + pk_intfeature INTEGER NOT NULL IDENTITY, + id VARCHAR(1000) NULL, + value INTEGER NULL, + record_hash BINARY(16) NULL, + CONSTRAINT cx_pk_intfeature PRIMARY KEY CLUSTERED (pk_intfeature), + CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash) +) + + +CREATE TABLE stringfeature ( + pk_stringfeature INTEGER NOT NULL IDENTITY, + id VARCHAR(1000) NULL, + value VARCHAR(1000) NULL, + record_hash BINARY(16) NULL, + CONSTRAINT cx_pk_stringfeature PRIMARY KEY CLUSTERED (pk_stringfeature), + CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash) +) + + CREATE TABLE shiporder ( pk_shiporder INTEGER NOT NULL IDENTITY, orderid VARCHAR(1000) NULL, @@ -54,6 +76,7 @@ CREATE TABLE orders_shiporder ( CREATE TABLE item ( pk_item INTEGER NOT NULL IDENTITY, + temp_pk_item INTEGER NULL, fk_parent_shiporder INTEGER NULL, xml2db_row_number INTEGER NOT NULL, product_name VARCHAR(1000) NULL, @@ -66,7 +89,33 @@ CREATE TABLE item ( FOREIGN KEY(fk_parent_shiporder) REFERENCES shiporder (pk_shiporder) ) + +CREATE TABLE item_product_features_intfeature ( + fk_item INTEGER NOT NULL, + fk_intfeature INTEGER NOT NULL, + xml2db_row_number INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature) +) + + +CREATE TABLE item_product_features_stringfeature ( + fk_item INTEGER NOT NULL, + fk_stringfeature INTEGER NOT NULL, + xml2db_row_number INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature) +) + CREATE CLUSTERED INDEX ix_fk_orders_shiporder ON orders_shiporder (fk_orders, fk_shiporder) CREATE INDEX ix_orders_shiporder_fk_shiporder ON orders_shiporder (fk_shiporder) +CREATE CLUSTERED INDEX ix_fk_item_product_features_intfeature ON item_product_features_intfeature (fk_item, fk_intfeature) + +CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature) + +CREATE CLUSTERED INDEX ix_fk_item_product_features_stringfeature ON item_product_features_stringfeature (fk_item, fk_stringfeature) + +CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature) + diff --git a/tests/sample_models/orders/orders_ddl_mssql_version2.sql b/tests/sample_models/orders/orders_ddl_mssql_version2.sql index 8d6fa31..a533b08 100644 --- a/tests/sample_models/orders/orders_ddl_mssql_version2.sql +++ b/tests/sample_models/orders/orders_ddl_mssql_version2.sql @@ -12,6 +12,7 @@ CREATE TABLE orders ( CREATE TABLE orderperson ( pk_orderperson INTEGER NOT NULL IDENTITY, + name_attr VARCHAR(1000) NULL, name VARCHAR(1000) NULL, address VARCHAR(1000) NULL, city VARCHAR(1000) NULL, @@ -21,12 +22,33 @@ CREATE TABLE orderperson ( [phoneNumber] VARCHAR(8000) NULL, [companyId_type] CHAR(3) NULL, [companyId_value] VARCHAR(1000) NULL, + coordinates VARCHAR(1000) NULL, xml2db_record_hash BINARY(20) NULL, CONSTRAINT cx_pk_orderperson PRIMARY KEY CLUSTERED (pk_orderperson), CONSTRAINT orderperson_xml2db_record_hash UNIQUE (xml2db_record_hash) ) +CREATE TABLE intfeature ( + pk_intfeature INTEGER NOT NULL IDENTITY, + id VARCHAR(1000) NULL, + value INTEGER NULL, + xml2db_record_hash BINARY(20) NULL, + CONSTRAINT cx_pk_intfeature PRIMARY KEY CLUSTERED (pk_intfeature), + CONSTRAINT intfeature_xml2db_record_hash UNIQUE (xml2db_record_hash) +) + + +CREATE TABLE stringfeature ( + pk_stringfeature INTEGER NOT NULL IDENTITY, + id VARCHAR(1000) NULL, + value VARCHAR(1000) NULL, + xml2db_record_hash BINARY(20) NULL, + CONSTRAINT cx_pk_stringfeature PRIMARY KEY CLUSTERED (pk_stringfeature), + CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (xml2db_record_hash) +) + + CREATE TABLE product ( pk_product INTEGER NOT NULL IDENTITY, name VARCHAR(1000) NULL, @@ -37,6 +59,22 @@ CREATE TABLE product ( ) +CREATE TABLE product_features_intfeature ( + fk_product INTEGER NOT NULL, + fk_intfeature INTEGER NOT NULL, + FOREIGN KEY(fk_product) REFERENCES product (pk_product), + FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature) +) + + +CREATE TABLE product_features_stringfeature ( + fk_product INTEGER NOT NULL, + fk_stringfeature INTEGER NOT NULL, + FOREIGN KEY(fk_product) REFERENCES product (pk_product), + FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature) +) + + CREATE TABLE item ( pk_item INTEGER NOT NULL IDENTITY, fk_product INTEGER NULL, @@ -57,6 +95,7 @@ CREATE TABLE shiporder ( fk_parent_orders INTEGER NULL, orderid VARCHAR(1000) NULL, processed_at DATETIMEOFFSET NULL, + orderperson_name_attr VARCHAR(1000) NULL, orderperson_name VARCHAR(1000) NULL, orderperson_address VARCHAR(1000) NULL, orderperson_city VARCHAR(1000) NULL, @@ -66,6 +105,7 @@ CREATE TABLE shiporder ( [orderperson_phoneNumber] VARCHAR(8000) NULL, [orderperson_companyId_type] CHAR(3) NULL, [orderperson_companyId_value] VARCHAR(1000) NULL, + orderperson_coordinates VARCHAR(1000) NULL, shipto_fk_orderperson INTEGER NULL, CONSTRAINT cx_pk_shiporder PRIMARY KEY CLUSTERED (pk_shiporder), FOREIGN KEY(fk_parent_orders) REFERENCES orders (pk_orders), @@ -80,6 +120,14 @@ CREATE TABLE shiporder_item ( FOREIGN KEY(fk_item) REFERENCES item (pk_item) ) +CREATE CLUSTERED INDEX ix_fk_product_features_intfeature ON product_features_intfeature (fk_product, fk_intfeature) + +CREATE INDEX ix_product_features_intfeature_fk_intfeature ON product_features_intfeature (fk_intfeature) + +CREATE CLUSTERED INDEX ix_fk_product_features_stringfeature ON product_features_stringfeature (fk_product, fk_stringfeature) + +CREATE INDEX ix_product_features_stringfeature_fk_stringfeature ON product_features_stringfeature (fk_stringfeature) + CREATE CLUSTERED INDEX ix_fk_shiporder_item ON shiporder_item (fk_shiporder, fk_item) CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item) diff --git a/tests/sample_models/orders/orders_ddl_mysql_version0.sql b/tests/sample_models/orders/orders_ddl_mysql_version0.sql index 17ea3a3..70929f2 100644 --- a/tests/sample_models/orders/orders_ddl_mysql_version0.sql +++ b/tests/sample_models/orders/orders_ddl_mysql_version0.sql @@ -1,6 +1,7 @@ CREATE TABLE orderperson ( pk_orderperson INTEGER NOT NULL AUTO_INCREMENT, + name_attr VARCHAR(255), name VARCHAR(255), address VARCHAR(255), city VARCHAR(255), @@ -10,12 +11,33 @@ CREATE TABLE orderperson ( `phoneNumber` VARCHAR(4000), `companyId_type` VARCHAR(3), `companyId_value` VARCHAR(255), + coordinates VARCHAR(255), record_hash BINARY(20), CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson), CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash) ) +CREATE TABLE intfeature ( + pk_intfeature INTEGER NOT NULL AUTO_INCREMENT, + id VARCHAR(255), + value INTEGER, + record_hash BINARY(20), + CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature), + CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash) +) + + +CREATE TABLE stringfeature ( + pk_stringfeature INTEGER NOT NULL AUTO_INCREMENT, + id VARCHAR(255), + value VARCHAR(255), + record_hash BINARY(20), + CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature), + CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash) +) + + CREATE TABLE item ( pk_item INTEGER NOT NULL AUTO_INCREMENT, product_name VARCHAR(255), @@ -30,6 +52,22 @@ CREATE TABLE item ( ) +CREATE TABLE item_product_features_intfeature ( + fk_item INTEGER NOT NULL, + fk_intfeature INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature) +) + + +CREATE TABLE item_product_features_stringfeature ( + fk_item INTEGER NOT NULL, + fk_stringfeature INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature) +) + + CREATE TABLE shiporder ( pk_shiporder INTEGER NOT NULL AUTO_INCREMENT, orderid VARCHAR(255), @@ -70,6 +108,14 @@ CREATE TABLE orders_shiporder ( FOREIGN KEY(fk_shiporder) REFERENCES shiporder (pk_shiporder) ) +CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature) + +CREATE INDEX ix_item_product_features_intfeature_fk_item ON item_product_features_intfeature (fk_item) + +CREATE INDEX ix_item_product_features_stringfeature_fk_item ON item_product_features_stringfeature (fk_item) + +CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature) + CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item) CREATE INDEX ix_shiporder_item_fk_shiporder ON shiporder_item (fk_shiporder) diff --git a/tests/sample_models/orders/orders_ddl_mysql_version1.sql b/tests/sample_models/orders/orders_ddl_mysql_version1.sql index 38f3e2c..89d8d35 100644 --- a/tests/sample_models/orders/orders_ddl_mysql_version1.sql +++ b/tests/sample_models/orders/orders_ddl_mysql_version1.sql @@ -1,6 +1,7 @@ CREATE TABLE orderperson ( pk_orderperson INTEGER NOT NULL AUTO_INCREMENT, + name_attr VARCHAR(255), name VARCHAR(255), address VARCHAR(255), city VARCHAR(255), @@ -11,12 +12,33 @@ CREATE TABLE orderperson ( `companyId_ace` VARCHAR(255), `companyId_bic` VARCHAR(255), `companyId_lei` VARCHAR(255), + coordinates VARCHAR(255), record_hash BINARY(16), CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson), CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash) ) +CREATE TABLE intfeature ( + pk_intfeature INTEGER NOT NULL AUTO_INCREMENT, + id VARCHAR(255), + value INTEGER, + record_hash BINARY(16), + CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature), + CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash) +) + + +CREATE TABLE stringfeature ( + pk_stringfeature INTEGER NOT NULL AUTO_INCREMENT, + id VARCHAR(255), + value VARCHAR(255), + record_hash BINARY(16), + CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature), + CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash) +) + + CREATE TABLE shiporder ( pk_shiporder INTEGER NOT NULL AUTO_INCREMENT, orderid VARCHAR(255), @@ -54,6 +76,7 @@ CREATE TABLE orders_shiporder ( CREATE TABLE item ( pk_item INTEGER NOT NULL AUTO_INCREMENT, + temp_pk_item INTEGER, fk_parent_shiporder INTEGER, xml2db_row_number INTEGER NOT NULL, product_name VARCHAR(255), @@ -66,7 +89,33 @@ CREATE TABLE item ( FOREIGN KEY(fk_parent_shiporder) REFERENCES shiporder (pk_shiporder) ) + +CREATE TABLE item_product_features_intfeature ( + fk_item INTEGER NOT NULL, + fk_intfeature INTEGER NOT NULL, + xml2db_row_number INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature) +) + + +CREATE TABLE item_product_features_stringfeature ( + fk_item INTEGER NOT NULL, + fk_stringfeature INTEGER NOT NULL, + xml2db_row_number INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature) +) + CREATE INDEX ix_orders_shiporder_fk_orders ON orders_shiporder (fk_orders) CREATE INDEX ix_orders_shiporder_fk_shiporder ON orders_shiporder (fk_shiporder) +CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature) + +CREATE INDEX ix_item_product_features_intfeature_fk_item ON item_product_features_intfeature (fk_item) + +CREATE INDEX ix_item_product_features_stringfeature_fk_item ON item_product_features_stringfeature (fk_item) + +CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature) + diff --git a/tests/sample_models/orders/orders_ddl_mysql_version2.sql b/tests/sample_models/orders/orders_ddl_mysql_version2.sql index 6ff42b8..f9901cf 100644 --- a/tests/sample_models/orders/orders_ddl_mysql_version2.sql +++ b/tests/sample_models/orders/orders_ddl_mysql_version2.sql @@ -12,6 +12,7 @@ CREATE TABLE orders ( CREATE TABLE orderperson ( pk_orderperson INTEGER NOT NULL AUTO_INCREMENT, + name_attr VARCHAR(255), name VARCHAR(255), address VARCHAR(255), city VARCHAR(255), @@ -21,12 +22,33 @@ CREATE TABLE orderperson ( `phoneNumber` VARCHAR(4000), `companyId_type` VARCHAR(3), `companyId_value` VARCHAR(255), + coordinates VARCHAR(255), xml2db_record_hash BINARY(20), CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson), CONSTRAINT orderperson_xml2db_record_hash UNIQUE (xml2db_record_hash) ) +CREATE TABLE intfeature ( + pk_intfeature INTEGER NOT NULL AUTO_INCREMENT, + id VARCHAR(255), + value INTEGER, + xml2db_record_hash BINARY(20), + CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature), + CONSTRAINT intfeature_xml2db_record_hash UNIQUE (xml2db_record_hash) +) + + +CREATE TABLE stringfeature ( + pk_stringfeature INTEGER NOT NULL AUTO_INCREMENT, + id VARCHAR(255), + value VARCHAR(255), + xml2db_record_hash BINARY(20), + CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature), + CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (xml2db_record_hash) +) + + CREATE TABLE product ( pk_product INTEGER NOT NULL AUTO_INCREMENT, name VARCHAR(255), @@ -37,6 +59,22 @@ CREATE TABLE product ( ) +CREATE TABLE product_features_intfeature ( + fk_product INTEGER NOT NULL, + fk_intfeature INTEGER NOT NULL, + FOREIGN KEY(fk_product) REFERENCES product (pk_product), + FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature) +) + + +CREATE TABLE product_features_stringfeature ( + fk_product INTEGER NOT NULL, + fk_stringfeature INTEGER NOT NULL, + FOREIGN KEY(fk_product) REFERENCES product (pk_product), + FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature) +) + + CREATE TABLE item ( pk_item INTEGER NOT NULL AUTO_INCREMENT, fk_product INTEGER, @@ -57,6 +95,7 @@ CREATE TABLE shiporder ( fk_parent_orders INTEGER, orderid VARCHAR(255), processed_at DATETIME, + orderperson_name_attr VARCHAR(255), orderperson_name VARCHAR(255), orderperson_address VARCHAR(255), orderperson_city VARCHAR(255), @@ -66,6 +105,7 @@ CREATE TABLE shiporder ( `orderperson_phoneNumber` VARCHAR(4000), `orderperson_companyId_type` VARCHAR(3), `orderperson_companyId_value` VARCHAR(255), + orderperson_coordinates VARCHAR(255), shipto_fk_orderperson INTEGER, CONSTRAINT cx_pk_shiporder PRIMARY KEY (pk_shiporder), FOREIGN KEY(fk_parent_orders) REFERENCES orders (pk_orders), @@ -80,6 +120,14 @@ CREATE TABLE shiporder_item ( FOREIGN KEY(fk_item) REFERENCES item (pk_item) ) +CREATE INDEX ix_product_features_intfeature_fk_intfeature ON product_features_intfeature (fk_intfeature) + +CREATE INDEX ix_product_features_intfeature_fk_product ON product_features_intfeature (fk_product) + +CREATE INDEX ix_product_features_stringfeature_fk_product ON product_features_stringfeature (fk_product) + +CREATE INDEX ix_product_features_stringfeature_fk_stringfeature ON product_features_stringfeature (fk_stringfeature) + CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item) CREATE INDEX ix_shiporder_item_fk_shiporder ON shiporder_item (fk_shiporder) diff --git a/tests/sample_models/orders/orders_ddl_postgresql_version0.sql b/tests/sample_models/orders/orders_ddl_postgresql_version0.sql index 1fa502c..540e8de 100644 --- a/tests/sample_models/orders/orders_ddl_postgresql_version0.sql +++ b/tests/sample_models/orders/orders_ddl_postgresql_version0.sql @@ -1,6 +1,7 @@ CREATE TABLE orderperson ( pk_orderperson SERIAL NOT NULL, + name_attr VARCHAR(1000), name VARCHAR(1000), address VARCHAR(1000), city VARCHAR(1000), @@ -10,12 +11,33 @@ CREATE TABLE orderperson ( "phoneNumber" VARCHAR(8000), "companyId_type" VARCHAR(3), "companyId_value" VARCHAR(1000), + coordinates VARCHAR(1000), record_hash BYTEA, CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson), CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash) ) +CREATE TABLE intfeature ( + pk_intfeature SERIAL NOT NULL, + id VARCHAR(1000), + value INTEGER, + record_hash BYTEA, + CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature), + CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash) +) + + +CREATE TABLE stringfeature ( + pk_stringfeature SERIAL NOT NULL, + id VARCHAR(1000), + value VARCHAR(1000), + record_hash BYTEA, + CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature), + CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash) +) + + CREATE TABLE item ( pk_item SERIAL NOT NULL, product_name VARCHAR(1000), @@ -30,6 +52,22 @@ CREATE TABLE item ( ) +CREATE TABLE item_product_features_intfeature ( + fk_item INTEGER NOT NULL, + fk_intfeature INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature) +) + + +CREATE TABLE item_product_features_stringfeature ( + fk_item INTEGER NOT NULL, + fk_stringfeature INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature) +) + + CREATE TABLE shiporder ( pk_shiporder SERIAL NOT NULL, orderid VARCHAR(1000), @@ -70,6 +108,14 @@ CREATE TABLE orders_shiporder ( FOREIGN KEY(fk_shiporder) REFERENCES shiporder (pk_shiporder) ) +CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature) + +CREATE INDEX ix_item_product_features_intfeature_fk_item ON item_product_features_intfeature (fk_item) + +CREATE INDEX ix_item_product_features_stringfeature_fk_item ON item_product_features_stringfeature (fk_item) + +CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature) + CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item) CREATE INDEX ix_shiporder_item_fk_shiporder ON shiporder_item (fk_shiporder) diff --git a/tests/sample_models/orders/orders_ddl_postgresql_version1.sql b/tests/sample_models/orders/orders_ddl_postgresql_version1.sql index e1f7224..7d1bf33 100644 --- a/tests/sample_models/orders/orders_ddl_postgresql_version1.sql +++ b/tests/sample_models/orders/orders_ddl_postgresql_version1.sql @@ -1,6 +1,7 @@ CREATE TABLE orderperson ( pk_orderperson SERIAL NOT NULL, + name_attr VARCHAR(1000), name VARCHAR(1000), address VARCHAR(1000), city VARCHAR(1000), @@ -11,12 +12,33 @@ CREATE TABLE orderperson ( "companyId_ace" VARCHAR(1000), "companyId_bic" VARCHAR(1000), "companyId_lei" VARCHAR(1000), + coordinates VARCHAR(1000), record_hash BYTEA, CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson), CONSTRAINT orderperson_xml2db_record_hash UNIQUE (record_hash) ) +CREATE TABLE intfeature ( + pk_intfeature SERIAL NOT NULL, + id VARCHAR(1000), + value INTEGER, + record_hash BYTEA, + CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature), + CONSTRAINT intfeature_xml2db_record_hash UNIQUE (record_hash) +) + + +CREATE TABLE stringfeature ( + pk_stringfeature SERIAL NOT NULL, + id VARCHAR(1000), + value VARCHAR(1000), + record_hash BYTEA, + CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature), + CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (record_hash) +) + + CREATE TABLE shiporder ( pk_shiporder SERIAL NOT NULL, orderid VARCHAR(1000), @@ -54,6 +76,7 @@ CREATE TABLE orders_shiporder ( CREATE TABLE item ( pk_item SERIAL NOT NULL, + temp_pk_item INTEGER, fk_parent_shiporder INTEGER, xml2db_row_number INTEGER NOT NULL, product_name VARCHAR(1000), @@ -66,7 +89,33 @@ CREATE TABLE item ( FOREIGN KEY(fk_parent_shiporder) REFERENCES shiporder (pk_shiporder) ) + +CREATE TABLE item_product_features_intfeature ( + fk_item INTEGER NOT NULL, + fk_intfeature INTEGER NOT NULL, + xml2db_row_number INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature) +) + + +CREATE TABLE item_product_features_stringfeature ( + fk_item INTEGER NOT NULL, + fk_stringfeature INTEGER NOT NULL, + xml2db_row_number INTEGER NOT NULL, + FOREIGN KEY(fk_item) REFERENCES item (pk_item), + FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature) +) + CREATE INDEX ix_orders_shiporder_fk_orders ON orders_shiporder (fk_orders) CREATE INDEX ix_orders_shiporder_fk_shiporder ON orders_shiporder (fk_shiporder) +CREATE INDEX ix_item_product_features_intfeature_fk_intfeature ON item_product_features_intfeature (fk_intfeature) + +CREATE INDEX ix_item_product_features_intfeature_fk_item ON item_product_features_intfeature (fk_item) + +CREATE INDEX ix_item_product_features_stringfeature_fk_item ON item_product_features_stringfeature (fk_item) + +CREATE INDEX ix_item_product_features_stringfeature_fk_stringfeature ON item_product_features_stringfeature (fk_stringfeature) + diff --git a/tests/sample_models/orders/orders_ddl_postgresql_version2.sql b/tests/sample_models/orders/orders_ddl_postgresql_version2.sql index a1d4fa8..17f6cd2 100644 --- a/tests/sample_models/orders/orders_ddl_postgresql_version2.sql +++ b/tests/sample_models/orders/orders_ddl_postgresql_version2.sql @@ -12,6 +12,7 @@ CREATE TABLE orders ( CREATE TABLE orderperson ( pk_orderperson SERIAL NOT NULL, + name_attr VARCHAR(1000), name VARCHAR(1000), address VARCHAR(1000), city VARCHAR(1000), @@ -21,12 +22,33 @@ CREATE TABLE orderperson ( "phoneNumber" VARCHAR(8000), "companyId_type" VARCHAR(3), "companyId_value" VARCHAR(1000), + coordinates VARCHAR(1000), xml2db_record_hash BYTEA, CONSTRAINT cx_pk_orderperson PRIMARY KEY (pk_orderperson), CONSTRAINT orderperson_xml2db_record_hash UNIQUE (xml2db_record_hash) ) +CREATE TABLE intfeature ( + pk_intfeature SERIAL NOT NULL, + id VARCHAR(1000), + value INTEGER, + xml2db_record_hash BYTEA, + CONSTRAINT cx_pk_intfeature PRIMARY KEY (pk_intfeature), + CONSTRAINT intfeature_xml2db_record_hash UNIQUE (xml2db_record_hash) +) + + +CREATE TABLE stringfeature ( + pk_stringfeature SERIAL NOT NULL, + id VARCHAR(1000), + value VARCHAR(1000), + xml2db_record_hash BYTEA, + CONSTRAINT cx_pk_stringfeature PRIMARY KEY (pk_stringfeature), + CONSTRAINT stringfeature_xml2db_record_hash UNIQUE (xml2db_record_hash) +) + + CREATE TABLE product ( pk_product SERIAL NOT NULL, name VARCHAR(1000), @@ -37,6 +59,22 @@ CREATE TABLE product ( ) +CREATE TABLE product_features_intfeature ( + fk_product INTEGER NOT NULL, + fk_intfeature INTEGER NOT NULL, + FOREIGN KEY(fk_product) REFERENCES product (pk_product), + FOREIGN KEY(fk_intfeature) REFERENCES intfeature (pk_intfeature) +) + + +CREATE TABLE product_features_stringfeature ( + fk_product INTEGER NOT NULL, + fk_stringfeature INTEGER NOT NULL, + FOREIGN KEY(fk_product) REFERENCES product (pk_product), + FOREIGN KEY(fk_stringfeature) REFERENCES stringfeature (pk_stringfeature) +) + + CREATE TABLE item ( pk_item SERIAL NOT NULL, fk_product INTEGER, @@ -57,6 +95,7 @@ CREATE TABLE shiporder ( fk_parent_orders INTEGER, orderid VARCHAR(1000), processed_at TIMESTAMP WITH TIME ZONE, + orderperson_name_attr VARCHAR(1000), orderperson_name VARCHAR(1000), orderperson_address VARCHAR(1000), orderperson_city VARCHAR(1000), @@ -66,6 +105,7 @@ CREATE TABLE shiporder ( "orderperson_phoneNumber" VARCHAR(8000), "orderperson_companyId_type" VARCHAR(3), "orderperson_companyId_value" VARCHAR(1000), + orderperson_coordinates VARCHAR(1000), shipto_fk_orderperson INTEGER, CONSTRAINT cx_pk_shiporder PRIMARY KEY (pk_shiporder), FOREIGN KEY(fk_parent_orders) REFERENCES orders (pk_orders), @@ -80,6 +120,14 @@ CREATE TABLE shiporder_item ( FOREIGN KEY(fk_item) REFERENCES item (pk_item) ) +CREATE INDEX ix_product_features_intfeature_fk_intfeature ON product_features_intfeature (fk_intfeature) + +CREATE INDEX ix_product_features_intfeature_fk_product ON product_features_intfeature (fk_product) + +CREATE INDEX ix_product_features_stringfeature_fk_product ON product_features_stringfeature (fk_product) + +CREATE INDEX ix_product_features_stringfeature_fk_stringfeature ON product_features_stringfeature (fk_stringfeature) + CREATE INDEX ix_shiporder_item_fk_item ON shiporder_item (fk_item) CREATE INDEX ix_shiporder_item_fk_shiporder ON shiporder_item (fk_shiporder) diff --git a/tests/sample_models/orders/orders_erd_version0.md b/tests/sample_models/orders/orders_erd_version0.md index cb0a282..443b28c 100644 --- a/tests/sample_models/orders/orders_erd_version0.md +++ b/tests/sample_models/orders/orders_erd_version0.md @@ -12,6 +12,8 @@ erDiagram string orderid dateTime processed_at } + item ||--o{ intfeature : "product_features_intfeature*" + item ||--o{ stringfeature : "product_features_stringfeature*" item { string product_name string product_version @@ -20,7 +22,16 @@ erDiagram decimal price string currency } + stringfeature { + string id + string value + } + intfeature { + string id + integer value + } orderperson { + string name_attr string name string address string city @@ -30,5 +41,6 @@ erDiagram string-N phoneNumber string companyId_type string companyId_value + string coordinates } ``` \ No newline at end of file diff --git a/tests/sample_models/orders/orders_erd_version1.md b/tests/sample_models/orders/orders_erd_version1.md index 9a28175..4b07efa 100644 --- a/tests/sample_models/orders/orders_erd_version1.md +++ b/tests/sample_models/orders/orders_erd_version1.md @@ -1,5 +1,7 @@ ```mermaid erDiagram + item ||--o{ intfeature : "product_features_intfeature*" + item ||--o{ stringfeature : "product_features_stringfeature*" item { string product_name string product_version @@ -20,7 +22,16 @@ erDiagram string orderid dateTime processed_at } + stringfeature { + string id + string value + } + intfeature { + string id + integer value + } orderperson { + string name_attr string name string address string city @@ -31,5 +42,6 @@ erDiagram string companyId_ace string companyId_bic string companyId_lei + string coordinates } ``` \ No newline at end of file diff --git a/tests/sample_models/orders/orders_erd_version2.md b/tests/sample_models/orders/orders_erd_version2.md index a1d5b75..26e96d9 100644 --- a/tests/sample_models/orders/orders_erd_version2.md +++ b/tests/sample_models/orders/orders_erd_version2.md @@ -5,6 +5,7 @@ erDiagram shiporder { string orderid dateTime processed_at + string orderperson_name_attr string orderperson_name string orderperson_address string orderperson_city @@ -14,6 +15,7 @@ erDiagram string-N orderperson_phoneNumber string orderperson_companyId_type string orderperson_companyId_value + string orderperson_coordinates } item ||--|| product : "product" item { @@ -22,11 +24,22 @@ erDiagram decimal price string currency } + product ||--o{ intfeature : "features_intfeature*" + product ||--o{ stringfeature : "features_stringfeature*" product { string name string version } + stringfeature { + string id + string value + } + intfeature { + string id + integer value + } orderperson { + string name_attr string name string address string city @@ -36,6 +49,7 @@ erDiagram string-N phoneNumber string companyId_type string companyId_value + string coordinates } orders ||--o{ shiporder : "shiporder" orders { diff --git a/tests/sample_models/orders/xml/order1.xml b/tests/sample_models/orders/xml/order1.xml index 194a67f..6c04192 100644 --- a/tests/sample_models/orders/xml/order1.xml +++ b/tests/sample_models/orders/xml/order1.xml @@ -2,7 +2,7 @@ 2 - + Bob
string
string @@ -18,6 +18,24 @@ product 1 regular + + + length + 60 + + + width + 40 + + + weight + 10 + + + color + red + + 13 340.23 diff --git a/tests/sample_models/orders/xml/order2.xml b/tests/sample_models/orders/xml/order2.xml index 7040dd3..5926472 100644 --- a/tests/sample_models/orders/xml/order2.xml +++ b/tests/sample_models/orders/xml/order2.xml @@ -2,7 +2,7 @@ 2 - + Alice
string
string diff --git a/tests/sample_models/orders/xml/order3.xml b/tests/sample_models/orders/xml/order3.xml index 2e4a2fc..7cde833 100644 --- a/tests/sample_models/orders/xml/order3.xml +++ b/tests/sample_models/orders/xml/order3.xml @@ -2,14 +2,14 @@ 2 - + Alice
string
string 21093 US
- + Bob
string
string @@ -20,6 +20,7 @@ JIDAZIO786DAZH + 48.87271337163929 2.323433844198471
diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 1f9f582..b279cfa 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -1,10 +1,12 @@ import os +import pprint import pytest from lxml import etree from xml2db import DataModel from xml2db.xml_converter import XMLConverter, remove_record_hash +from .conftest import list_xml_path, models_path from .sample_models import models @@ -13,19 +15,20 @@ [ {**model, **version, "xml_file": xml_file} for model in models - for xml_file in os.listdir(model["xml_path"]) + for xml_file in list_xml_path(model, "xml") + + list_xml_path(model, "equivalent_xml") for version in model["versions"] ], ) -def test_document_tree_parsing(test_config): +def test_iterative_recursive_parsing(test_config): """Test whether iterative and recursive parsing give same results""" model = DataModel( - test_config["xsd_path"], + str(os.path.join(models_path, test_config["id"], test_config["xsd"])), short_name=test_config["id"], model_config=test_config["config"], ) converter = XMLConverter(model) - file_path = os.path.join(test_config["xml_path"], test_config["xml_file"]) + file_path = test_config["xml_file"] parsed_recursive = converter.parse_xml( file_path, file_path, skip_validation=True, iterparse=False @@ -42,7 +45,7 @@ def test_document_tree_parsing(test_config): [ {**model, **version, "xml_file": xml_file} for model in models - for xml_file in os.listdir(model["xml_path"]) + for xml_file in list_xml_path(model, "xml") for version in model["versions"] ], ) @@ -50,22 +53,22 @@ def test_document_tree_to_flat_data(test_config): """A test for document tree to flat data conversion and back""" model = DataModel( - test_config["xsd_path"], + str(os.path.join(models_path, test_config["id"], test_config["xsd"])), short_name=test_config["id"], model_config=test_config["config"], ) converter = XMLConverter(model) - file_path = os.path.join(test_config["xml_path"], test_config["xml_file"]) + file_path = test_config["xml_file"] # parse XML to document tree converter.parse_xml(file_path, file_path) - exp_doc_tree = remove_record_hash(converter.document_tree) + exp_doc_tree = pprint.pformat(remove_record_hash(converter.document_tree)) # parse XML to document tree and then flat data model doc = model.parse_xml(file_path) # and convert it back to document tree - act_doc_tree = doc.flat_data_to_doc_tree() + act_doc_tree = pprint.pformat(doc.flat_data_to_doc_tree()) assert act_doc_tree == exp_doc_tree @@ -75,7 +78,7 @@ def test_document_tree_to_flat_data(test_config): [ {**model, **version, "xml_file": xml_file} for model in models - for xml_file in os.listdir(model["xml_path"]) + for xml_file in list_xml_path(model, "xml") for version in model["versions"] ], ) @@ -83,13 +86,13 @@ def test_document_tree_to_xml(test_config): """A test for document tree to xml conversion and back""" model = DataModel( - test_config["xsd_path"], + str(os.path.join(models_path, test_config["id"], test_config["xsd"])), short_name=test_config["id"], model_config=test_config["config"], ) converter = XMLConverter(model) - file_path = os.path.join(test_config["xml_path"], test_config["xml_file"]) + file_path = test_config["xml_file"] # parse XML to document tree converter.parse_xml(file_path, file_path) @@ -112,3 +115,29 @@ def test_document_tree_to_xml(test_config): ref_xml = f.read() assert xml == ref_xml + + +@pytest.mark.parametrize( + "test_config", + [ + {**model, **version} + for model in models + for version in model["versions"] + if os.path.isdir(os.path.join(models_path, model["id"], "equivalent_xml")) + ], +) +def test_equivalent_xml(test_config): + """A test for xml documents which should result in the same extracted data""" + + xml_files = list_xml_path(test_config, "equivalent_xml") + + if len(xml_files) > 1: + model = DataModel( + str(os.path.join(models_path, test_config["id"], test_config["xsd"])), + short_name=test_config["id"], + model_config=test_config["config"], + ) + ref_data = model.parse_xml(xml_files[0]) + for xml_file in xml_files[1:]: + equ_data = model.parse_xml(xml_file) + assert ref_data.data == equ_data.data diff --git a/tests/test_models_output.py b/tests/test_models_output.py index 0539080..b5245a0 100644 --- a/tests/test_models_output.py +++ b/tests/test_models_output.py @@ -5,6 +5,7 @@ from xml2db import DataModel from .sample_models import models +from .conftest import models_path @pytest.mark.parametrize( @@ -19,14 +20,15 @@ def test_model_erd(test_config): """A test to check if generated ERD matches saved output""" model = DataModel( - test_config["xsd_path"], + str(os.path.join(models_path, test_config["id"], test_config["xsd"])), short_name=test_config["id"], model_config=test_config["config"], ) expected = open( os.path.join( - os.path.dirname(test_config["xsd_path"]), + models_path, + test_config["id"], f"{test_config['id']}_erd_version{test_config['version_id']}.md", ), "r", @@ -49,7 +51,7 @@ def test_model_ddl(test_config): """A test to check if generated SQL DDL matches saved output""" model = DataModel( - test_config["xsd_path"], + str(os.path.join(models_path, test_config["id"], test_config["xsd"])), short_name=test_config["id"], model_config=test_config["config"], db_type=test_config["dialect"].name, @@ -57,7 +59,8 @@ def test_model_ddl(test_config): expected = open( os.path.join( - os.path.dirname(test_config["xsd_path"]), + models_path, + test_config["id"], f"{test_config['id']}_ddl_{test_config['dialect'].name}_version{test_config['version_id']}.sql", ), "r", diff --git a/tests/test_roundtrip.py b/tests/test_roundtrip.py index f0aaf43..8f63496 100644 --- a/tests/test_roundtrip.py +++ b/tests/test_roundtrip.py @@ -4,7 +4,7 @@ from lxml import etree from xml2db.xml_converter import XMLConverter, remove_record_hash -from .fixtures import setup_db_model, conn_string +from .conftest import list_xml_path from .sample_models import models @@ -17,10 +17,7 @@ def test_database_xml_roundtrip(setup_db_model, model_config): """A test for roundtrip insert to the database from and to XML""" model = setup_db_model - xml_files = [ - os.path.join(model_config["xml_path"], file) - for file in os.listdir(model_config["xml_path"]) - ] + xml_files = list_xml_path(model_config, "xml") for file in xml_files: # do parse and insert into the database @@ -59,10 +56,7 @@ def test_database_document_tree_roundtrip(setup_db_model, model_config): """A test for roundtrip insert to the database from and to document tree""" model = setup_db_model - xml_files = [ - os.path.join(model_config["xml_path"], file) - for file in os.listdir(model_config["xml_path"]) - ] + xml_files = list_xml_path(model_config, "xml") for file in xml_files: # do parse and insert into the database @@ -92,10 +86,7 @@ def test_database_document_tree_roundtrip_single_load(setup_db_model, model_conf """A test for roundtrip insert to the database from and to document tree""" model = setup_db_model - xml_files = [ - os.path.join(model_config["xml_path"], file) - for file in os.listdir(model_config["xml_path"]) - ] + xml_files = list_xml_path(model_config, "xml") flat_data = None doc = None @@ -129,7 +120,7 @@ def test_database_document_tree_roundtrip_single_load(setup_db_model, model_conf [ {**model, **version, "xml_file": xml_file} for model in models - for xml_file in os.listdir(model["xml_path"]) + for xml_file in list_xml_path(model, "xml") for version in model["versions"] ], ) diff --git a/tests/test_validation.py b/tests/test_validation.py index 4a3ce4d..d42e40c 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1,10 +1,10 @@ -import xml.etree.ElementTree - import lxml.etree import pytest +import os from xml2db import DataModel from .sample_models import models +from .conftest import models_path @pytest.mark.parametrize( @@ -27,7 +27,9 @@ def test_invalid_xml(args: tuple): file_name, iterparse, recover, exception = args - data_model = DataModel(models[0]["xsd_path"]) + data_model = DataModel( + str(os.path.join(models_path, models[0]["id"], models[0]["xsd"])) + ) if exception is None: data_model.parse_xml( @@ -49,8 +51,8 @@ def test_invalid_xml(args: tuple): @pytest.mark.parametrize( "args", [ - ("invalid", True, False, IndexError), - ("invalid", True, True, IndexError), + ("invalid", True, False, None), + ("invalid", True, True, None), ("invalid", False, False, None), ("invalid", False, True, None), ("malformed_recover", True, False, lxml.etree.XMLSyntaxError), @@ -58,7 +60,7 @@ def test_invalid_xml(args: tuple): ("malformed_recover", False, False, lxml.etree.XMLSyntaxError), ("malformed_recover", False, True, None), ("malformed_no_recover", True, False, lxml.etree.XMLSyntaxError), - ("malformed_no_recover", True, True, IndexError), + ("malformed_no_recover", True, True, None), ("malformed_no_recover", False, False, lxml.etree.XMLSyntaxError), ("malformed_no_recover", False, True, None), ], @@ -66,7 +68,9 @@ def test_invalid_xml(args: tuple): def test_invalid_xml_skip_verify(args: tuple): file_name, iterparse, recover, exception = args - data_model = DataModel(models[0]["xsd_path"]) + data_model = DataModel( + str(os.path.join(models_path, models[0]["id"], models[0]["xsd"])) + ) if exception is None: data_model.parse_xml(