diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index 1693712e..8c57a573 100755 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -2,7 +2,6 @@ """ Converts a Python dictionary or other native data type into a valid XML string. - Supports item (`int`, `float`, `long`, `decimal.Decimal`, `bool`, `str`, `unicode`, `datetime`, `none` and other number-like objects) and collection (`list`, `set`, `tuple` and `dict`, as well as iterable and dict-like objects) data types, with arbitrary nesting for the collections. @@ -65,7 +64,6 @@ def get_xml_type(val): def escape_xml(s: str) -> str: - if isinstance(s, str): s = str(s) # avoid UnicodeDecodeError s = s.replace("&", "&") @@ -108,13 +106,17 @@ def make_valid_xml_name(key, attr: Dict[str, Any]): return key, attr # prepend a lowercase n if the key is numeric - if key.isdigit(): + if isinstance(key, int) or key.isdigit(): return f"n{key}", attr # replace spaces with underscores if that fixes the problem if key_is_valid_xml(key.replace(" ", "_")): return key.replace(" ", "_"), attr + # allow namespace prefixes + ignore @flat in key + if key_is_valid_xml(key.replace(":", "").replace("@flat", "")): + return key, attr + # key is still invalid - move it into a name attribute attr["name"] = key key = "key" @@ -134,8 +136,10 @@ def default_item_func(parent): def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): """Routes the elements of an object to the right function to convert them based on their data type""" - - LOG.info(f'Inside convert(). obj type is: "{type(obj).__name__}", obj="{str(obj)}"') + LOG.info(f'Inside convert(). type(obj)="{type(obj).__name__}"') + # avoid cpu consuming object serialization => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' obj="{str(obj)}"') item_name = item_func(parent) # since bool is also a subtype of number.Number and int, the check for bool @@ -171,18 +175,62 @@ def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): raise TypeError(f"Unsupported data type: {obj} ({type(obj).__name__})") +def is_primitive_type(val): + t = get_xml_type(val) + return t in {'str', 'int', 'float', 'bool', 'number', 'null'} + + +def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, parentIsList): + keys_str = ', '.join(str(key) for key in item) + LOG.info(f'Inside dict_item2xml_str: type(obj)="{type(item).__name__}", keys="{keys_str}"') + # avoid cpu consuming object serialization => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' item="{str(item)}"') + + if attr_type: + attr["type"] = get_xml_type(item) + attr = item.pop("@attrs", attr) # update attr with custom @attr if exists + rawitem = item["@val"] if "@val" in item else item + if is_primitive_type(rawitem): + subtree = rawitem + else: + # we can not use convert_dict, because rawitem could be non-dict + subtree = convert(rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name) + if item.get("@flat", False) or (parentIsList and not item_wrap): + return subtree + attrstring = make_attrstring(attr) + return f"<{item_name}{attrstring}>{subtree}" + + +def list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap): + if attr_type: + attr["type"] = get_xml_type(item) + flat = False + if item_name.endswith('@flat'): + item_name = item_name[0:-5] + flat = True + subtree = convert_list(item, ids, item_name, attr_type, item_func, cdata, item_wrap) + if flat or (len(item) > 0 and is_primitive_type(item[0]) and not item_wrap): + return subtree + attrstring = make_attrstring(attr) + return f"<{item_name}{attrstring}>{subtree}" + + def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): """Converts a dict into an XML string.""" - LOG.info( - f'Inside convert_dict(): obj type is: "{type(obj).__name__}", obj="{str(obj)}"' - ) + keys_str = ', '.join(str(key) for key in obj) + LOG.info(f'Inside convert_dict(): type(obj)="{type(obj).__name__}", keys="{keys_str}"') + # avoid cpu consuming object serialization => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' obj="{str(obj)}"') + output = [] addline = output.append for key, val in obj.items(): - LOG.info( - f'Looping inside convert_dict(): key="{str(key)}", val="{str(val)}", type(val)="{type(val).__name__}"' - ) + LOG.info(f'Looping inside convert_dict(): key="{str(key)}", type(val)="{type(val).__name__}"') + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' val="{str(val)}"') attr = {} if not ids else {"id": f"{get_unique_id(parent)}"} @@ -214,31 +262,11 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): ) elif isinstance(val, dict): - if attr_type: - attr["type"] = get_xml_type(val) - dict_str = convert_dict( - val, ids, key, attr_type, item_func, cdata, item_wrap - ) - attrstring = make_attrstring(attr) - addline(f"<{key}{attrstring}>{dict_str}") - - elif isinstance(val, collections.abc.Iterable) and val: - if attr_type: - attr["type"] = get_xml_type(val) - if ( - isinstance(val[0], numbers.Number) - or isinstance(val[0], str) - and not item_wrap - ): - addline( - convert_list(val, ids, key, attr_type, item_func, cdata, item_wrap) - ) - else: - attrstring = make_attrstring(attr) - list_str = convert_list( - val, ids, key, attr_type, item_func, cdata, item_wrap - ) - addline(f"<{key}{attrstring}>{list_str}") + addline(dict2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap, False)) + + elif isinstance(val, collections.abc.Iterable): + addline(list2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap)) + elif not val: addline(convert_none(key, val, attr_type, attr, cdata)) @@ -250,19 +278,27 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): """Converts a list into an XML string.""" - LOG.info("Inside convert_list()") + LOG.info(f'Inside convert_list(): type(items)="{type(items).__name__}"') + # avoid cpu consuming object serialization => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' items="{str(items)}"') + output = [] addline = output.append item_name = item_func(parent) + if item_name.endswith('@flat'): + item_name = item_name[:-5] this_id = None if ids: this_id = get_unique_id(parent) for i, item in enumerate(items): - LOG.info( - f'Looping inside convert_list(): item="{str(item)}", item_name="{item_name}", type="{type(item).__name__}"' - ) + LOG.info(f'Looping inside convert_list(): index="{str(i)}", type="{type(item).__name__}"') + # avoid cpu consuming object serialization => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' item="{str(item)}"') + attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} if isinstance(item, bool): @@ -302,37 +338,10 @@ def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): ) elif isinstance(item, dict): - item_dict_str = convert_dict( - item, - ids, - parent, - attr_type, - item_func, - cdata, - item_wrap, - ) - if not attr_type: - if item_wrap: - addline(f"<{item_name}>{item_dict_str}") - else: - addline(f"{item_dict_str}") - else: - if item_wrap: - addline(f'<{item_name} type="dict">{item_dict_str}') - else: - addline(f"{item_dict_str}") + addline(dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, True)) elif isinstance(item, collections.abc.Iterable): - attrstring = make_attrstring(attr) - convert_list_str = convert_list( - item, ids, item_name, attr_type, item_func, cdata, item_wrap - ) - if not attr_type: - addline(f"<{item_name} {attrstring}>{convert_list_str}") - else: - addline( - f'<{item_name} type="list"{attrstring}>{convert_list_str}' - ) + addline(list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap)) elif item is None: addline(convert_none(item_name, None, attr_type, attr, cdata)) @@ -391,6 +400,7 @@ def dicttoxml( item_wrap=True, item_func=default_item_func, cdata=False, + xml_namespaces={} ): """Converts a python object into XML. Arguments: @@ -409,17 +419,35 @@ def dicttoxml( Default is True - cdata specifies whether string values should be wrapped in CDATA sections. Default is False + - xml_namespaces is a dictionary where key is xmlns prefix and value the urn, + e.g. { 'flex': 'http://www.w3.org/flex/flexBase', 'xsl': "http://www.w3.org/1999/XSL/Transform"} + results in + Default is {} + + Dictionaries-keys with special char '@' has special meaning: + @attrs: This allows custom xml attributes: {'@attr':{'a':'b'}, 'x':'y'} results in y + @flat: If a key ends with @flat (or dict contains key '@flat'), encapsulating node is omitted. Similar to item_wrap. + @val: @attrs requires complex dict type. If primitive type should be used, then @val is used as key. + To add custom xml-attributes on a list {'list': [4, 5, 6]}, you do this: + {'list': {'@attrs': {'a':'b','c':'d'}, '@val': [4, 5, 6]} + which results in 456 """ - LOG.info( - f'Inside dicttoxml(): type(obj) is: "{type(obj).__name__}", obj="{str(obj)}"' - ) + LOG.info(f'Inside dicttoxml(): type(obj) is: "{type(obj).__name__}"') + # avoid cpu consuming object serialization (problem for large objects) => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' obj="{str(obj)}"') + output = [] + namespacestr = '' + for prefix in xml_namespaces: + ns = xml_namespaces[prefix] + namespacestr += f' xmlns:{prefix}="{ns}"' if root: output.append('') output_elem = convert( obj, ids, attr_type, item_func, cdata, item_wrap, parent=custom_root ) - output.append(f"<{custom_root}>{output_elem}") + output.append(f"<{custom_root}{namespacestr}>{output_elem}") else: output.append( convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="") diff --git a/tests/test_json2xml.py b/tests/test_json2xml.py index 2fa1547f..b85f1d13 100644 --- a/tests/test_json2xml.py +++ b/tests/test_json2xml.py @@ -181,14 +181,59 @@ def test_read_boolean_data_from_json(self): """Test correct return for boolean types.""" data = readfromjson("examples/booleanjson.json") result = json2xml.Json2xml(data).to_xml() + print(result) dict_from_xml = xmltodict.parse(result) + print(dict_from_xml) assert dict_from_xml["all"]["boolean"]["#text"] != 'True' assert dict_from_xml["all"]["boolean"]["#text"] == 'true' + assert dict_from_xml["all"]["boolean_dict_list"]["item"][0]["boolean_dict"]["boolean"]["#text"] == 'true' + assert dict_from_xml["all"]["boolean_dict_list"]["item"][1]["boolean_dict"]["boolean"]["#text"] == 'false' + assert dict_from_xml["all"]["boolean_list"]["item"][0]["#text"] == 'true' + assert dict_from_xml["all"]["boolean_list"]["item"][1]["#text"] == 'false' def test_read_boolean_data_from_json2(self): """Test correct return for boolean types.""" data = readfromjson("examples/booleanjson2.json") result = json2xml.Json2xml(data).to_xml() + print(result) dict_from_xml = xmltodict.parse(result) - assert dict_from_xml["all"]["item"][0]["#text"] != 'True' - assert dict_from_xml["all"]["item"][0]["#text"] == 'true' + print(dict_from_xml) + assert dict_from_xml["all"]["boolean_list"]["item"][0]["#text"] != 'True' + assert dict_from_xml["all"]["boolean_list"]["item"][0]["#text"] == 'true' + assert dict_from_xml["all"]["boolean_list"]["item"][1]["#text"] == 'false' + assert dict_from_xml["all"]["number_array"]["item"][0]["#text"] == '1' + assert dict_from_xml["all"]["number_array"]["item"][1]["#text"] == '2' + assert dict_from_xml["all"]["number_array"]["item"][2]["#text"] == '3' + assert dict_from_xml["all"]["string_array"]["item"][0]["#text"] == 'a' + assert dict_from_xml["all"]["string_array"]["item"][1]["#text"] == 'b' + assert dict_from_xml["all"]["string_array"]["item"][2]["#text"] == 'c' + + def test_dict2xml_with_namespaces(self): + data = {'ns1:node1': 'data in namespace 1', 'ns2:node2': 'data in namespace 2'} + namespaces = {'ns1': 'http://www.google.de/ns1', 'ns2': 'http://www.google.de/ns2'} + result = dicttoxml(data, attr_type=False, xml_namespaces=namespaces) + print(result) + assert b'' \ + b'' \ + b'data in namespace 1' \ + b'data in namespace 2' \ + b'' == result + + def test_dict2xml_with_flat(self): + data = {'flat_list@flat': [1, 2, 3], 'non_flat_list': [4, 5, 6]} + result = dicttoxml(data, attr_type=False) + print(result) + assert b'' \ + b'123' \ + b'456' \ + b'' == result + + def test_dict2xml_with_val_and_custom_attr(self): + # in order to use @attr in non-dict objects, we need to lift into a dict and combine with @val as key + data = {'list1': [1, 2, 3], 'list2': {'@attrs': {'myattr1': 'myval1', 'myattr2': 'myval2'}, '@val': [4, 5, 6]}} + result = dicttoxml(data, attr_type=False) + print(result) + assert b'' \ + b'123' \ + b'456' \ + b'' == result