fix: tests

vinitkumar · vinitkumar · commit a3db3b21c57e · 2025-11-03T15:21:30.000+05:30
diff --git a/benchmark.py b/benchmark.py
@@ -49,7 +49,7 @@ def benchmark_conversion(data: dict, parallel: bool, workers: int = 4, chunk_siz
     for _ in range(iterations):
         converter = Json2xml(data, parallel=parallel, workers=workers, chunk_size=chunk_size)
         start = time.perf_counter()
-        result = converter.to_xml()
+        converter.to_xml()
         end = time.perf_counter()
         times.append(end - start)
     
diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py
@@ -262,12 +262,17 @@ def dict2xml_str(
     parse dict2xml
     """
     ids: list[str] = []  # initialize list of unique ids
+    item = dict(item)  # copy to avoid modifying the original dict
     ", ".join(str(key) for key in item)
     subtree = ""  # Initialize subtree with default empty string
 
     if attr_type:
         attr["type"] = get_xml_type(item)
     val_attr: dict[str, str] = item.pop("@attrs", attr)  # update attr with custom @attr if exists
+    # Handle other @ keys as attributes
+    for key in list(item.keys()):
+        if key.startswith('@') and key not in ('@val', '@flat', '@attrs'):
+            val_attr[key[1:]] = item.pop(key)
     rawitem = item["@val"] if "@val" in item else item
     if is_primitive_type(rawitem):
         if isinstance(rawitem, dict):
@@ -522,7 +527,15 @@ def convert_kv(
     if attr_type:
         attr["type"] = get_xml_type(val)
     attr_string = make_attrstring(attr)
-    return f"<{key}{attr_string}>{wrap_cdata(val) if cdata else escape_xml(val)}</{key}>"
+    val_str = str(val)
+    if cdata:
+        if '<![CDATA[' in val_str:
+            content = val_str
+        else:
+            content = wrap_cdata(val)
+    else:
+        content = escape_xml(val)
+    return f"<{key}{attr_string}>{content}</{key}>"
 
 
 def convert_bool(
@@ -566,7 +579,8 @@ def dicttoxml(
     list_headers: bool = False,
     parallel: bool = False,
     workers: int | None = None,
-    chunk_size: int = 100
+    chunk_size: int = 100,
+    min_items_for_parallel: int = 10
 ) -> bytes:
     """
     Converts a python object into XML.
@@ -668,6 +682,10 @@ def dicttoxml(
         Default is 100
         Number of list items to process per chunk in parallel mode.
 
+    :param int min_items_for_parallel:
+        Default is 10
+        Minimum number of items in a dictionary to enable parallel processing.
+
     Dictionaries-keys with special char '@' has special meaning:
     @attrs: This allows custom xml attributes:
 
@@ -718,17 +736,61 @@ def dicttoxml(
             ns = xml_namespaces[prefix]
             namespace_str += f' xmlns:{prefix}="{ns}"'
 
+    def _dispatch_convert(
+        obj, ids, parent,
+        attr_type, item_func, cdata, item_wrap, list_headers,
+        parallel, workers, chunk_size, min_items_for_parallel, xml_namespaces
+    ):
+        should_use_parallel = parallel
+        if parallel:
+            if cdata:
+                should_use_parallel = False
+            if isinstance(obj, dict) and any(isinstance(k, str) and k.startswith('@') for k in obj.keys()):
+                should_use_parallel = False
+            if xml_namespaces:
+                should_use_parallel = False
+        if should_use_parallel:
+            if isinstance(obj, dict):
+                return convert_dict_parallel(
+                    obj, ids, parent,
+                    attr_type=attr_type, item_func=item_func, cdata=cdata,
+                    item_wrap=item_wrap, list_headers=list_headers,
+                    workers=workers, min_items_for_parallel=min_items_for_parallel
+                )
+            if isinstance(obj, Sequence) and not isinstance(obj, (str, bytes)):
+                return convert_list_parallel(
+                    obj, ids, parent,
+                    attr_type=attr_type, item_func=item_func, cdata=cdata,
+                    item_wrap=item_wrap, list_headers=list_headers,
+                    workers=workers, chunk_size=chunk_size
+                )
+        # fallback to serial
+        return convert(
+            obj, ids,
+            attr_type, item_func, cdata, item_wrap,
+            parent=parent, list_headers=list_headers
+        )
+
+    should_use_parallel = parallel
     if parallel:
+        if cdata:
+            should_use_parallel = False
+        if isinstance(obj, dict) and any(isinstance(k, str) and k.startswith('@') for k in obj.keys()):
+            should_use_parallel = False
+        if xml_namespaces:
+            should_use_parallel = False
+
+    if should_use_parallel:
         from json2xml.parallel import convert_dict_parallel, convert_list_parallel
 
         if root:
             output.append('<?xml version="1.0" encoding="UTF-8" ?>')
             if isinstance(obj, dict):
                 output_elem = convert_dict_parallel(
                     obj, ids, custom_root, attr_type, item_func, cdata, item_wrap,
-                    list_headers=list_headers, workers=workers, min_items_for_parallel=10
+                    list_headers=list_headers, workers=workers, min_items_for_parallel=min_items_for_parallel
                 )
-            elif isinstance(obj, Sequence):
+            elif isinstance(obj, Sequence) and not isinstance(obj, (str, bytes)):
                 output_elem = convert_list_parallel(
                     obj, ids, custom_root, attr_type, item_func, cdata, item_wrap,
                     list_headers=list_headers, workers=workers, chunk_size=chunk_size
@@ -742,11 +804,11 @@ def dicttoxml(
             if isinstance(obj, dict):
                 output.append(
                     convert_dict_parallel(
-                        obj, ids, "", attr_type, item_func, cdata, item_wrap,
-                        list_headers=list_headers, workers=workers, min_items_for_parallel=10
+                    obj, ids, "", attr_type, item_func, cdata, item_wrap,
+                    list_headers=list_headers, workers=workers, min_items_for_parallel=min_items_for_parallel
                     )
                 )
-            elif isinstance(obj, Sequence):
+            elif isinstance(obj, Sequence) and not isinstance(obj, (str, bytes)):
                 output.append(
                     convert_list_parallel(
                         obj, ids, "", attr_type, item_func, cdata, item_wrap,
diff --git a/json2xml/json2xml.py b/json2xml/json2xml.py
@@ -23,6 +23,7 @@ def __init__(
         parallel: bool = False,
         workers: int | None = None,
         chunk_size: int = 100,
+        min_items_for_parallel: int = 10,
     ):
         self.data = data
         self.pretty = pretty
@@ -33,6 +34,7 @@ def __init__(
         self.parallel = parallel
         self.workers = workers
         self.chunk_size = chunk_size
+        self.min_items_for_parallel = min_items_for_parallel
 
     def to_xml(self) -> Any | None:
         """
@@ -48,6 +50,7 @@ def to_xml(self) -> Any | None:
                 parallel=self.parallel,
                 workers=self.workers,
                 chunk_size=self.chunk_size,
+                min_items_for_parallel=self.min_items_for_parallel,
             )
             if self.pretty:
                 try:
diff --git a/json2xml/parallel.py b/json2xml/parallel.py
@@ -6,46 +6,61 @@
 import threading
 from collections.abc import Callable, Sequence
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from functools import lru_cache
 from typing import Any
 
-from json2xml import dicttoxml
-
 
 def is_free_threaded() -> bool:
     """
     Check if running on free-threaded Python build (Python 3.13t).
 
+    Note:
+        This function relies on the private attribute `sys._is_gil_enabled`, which may change or be removed in future Python versions.
+        If the attribute is not present, or its semantics change, this function will fall back to assuming GIL is enabled.
+
     Returns:
         bool: True if running on free-threaded build, False otherwise.
     """
-    return hasattr(sys, '_is_gil_enabled') and not sys._is_gil_enabled()
+    # Fallback: If attribute is missing or not callable, assume GIL is enabled.
+    gil_enabled = True
+    if hasattr(sys, '_is_gil_enabled'):
+        try:
+            gil_enabled = sys._is_gil_enabled()
+        except Exception:
+            pass
+    return not gil_enabled
 
 
-def get_optimal_workers(workers: int | None = None) -> int:
+def get_optimal_workers(
+    workers: int | None = None,
+    max_workers_limit: int | None = None
+) -> int:
     """
     Get the optimal number of worker threads.
 
     Args:
         workers: Explicitly specified worker count. If None, auto-detect.
+        max_workers_limit: Optional cap for worker count on non-free-threaded Python.
 
     Returns:
         int: Number of worker threads to use.
     """
     if workers is not None:
         return max(1, workers)
 
-    cpu_count = os.cpu_count() or 4
+    cpu_count = os.cpu_count() or 1
 
     if is_free_threaded():
-        return cpu_count
+        optimal = cpu_count
     else:
-        return min(4, cpu_count)
-
+        # Use configurable limit or default to 4
+        limit = max_workers_limit if max_workers_limit is not None else 4
+        optimal = min(limit, cpu_count)
 
-_validation_cache: dict[str, bool] = {}
-_validation_cache_lock = threading.Lock()
+    return max(1, optimal)
 
 
+@lru_cache(maxsize=None)
 def key_is_valid_xml_cached(key: str) -> bool:
     """
     Thread-safe cached version of key_is_valid_xml.
@@ -56,16 +71,8 @@ def key_is_valid_xml_cached(key: str) -> bool:
     Returns:
         bool: True if the key is valid XML, False otherwise.
     """
-    with _validation_cache_lock:
-        if key in _validation_cache:
-            return _validation_cache[key]
-
-    result = dicttoxml.key_is_valid_xml(key)
-
-    with _validation_cache_lock:
-        _validation_cache[key] = result
-
-    return result
+    from json2xml import dicttoxml
+    return dicttoxml.key_is_valid_xml(key)
 
 
 def make_valid_xml_name_cached(key: str, attr: dict[str, Any]) -> tuple[str, dict[str, Any]]:
@@ -79,6 +86,7 @@ def make_valid_xml_name_cached(key: str, attr: dict[str, Any]) -> tuple[str, dic
     Returns:
         tuple: Valid XML key and updated attributes.
     """
+    from json2xml import dicttoxml
     key = dicttoxml.escape_xml(key)
 
     if key_is_valid_xml_cached(key):
@@ -129,7 +137,9 @@ def _convert_dict_item(
     import datetime
     import numbers
 
-    attr = {} if not ids else {"id": f"{dicttoxml.get_unique_id(parent)}"}
+    from json2xml import dicttoxml
+
+    attr = {"id": f"{dicttoxml.get_unique_id(parent)}"} if ids else {}
     key, attr = make_valid_xml_name_cached(key, attr)
 
     if isinstance(val, bool):
@@ -203,8 +213,11 @@ def convert_dict_parallel(
         min_items_for_parallel: Minimum items to enable parallelization.
 
     Returns:
-        str: XML string.
+    str: XML string.
     """
+    if not isinstance(obj, dict):
+        raise TypeError("obj must be a dict")
+    from json2xml import dicttoxml
     if len(obj) < min_items_for_parallel:
         return dicttoxml.convert_dict(
             obj, ids, parent, attr_type, item_func, cdata, item_wrap, list_headers
@@ -225,7 +238,14 @@ def convert_dict_parallel(
 
         for future in as_completed(future_to_idx):
             idx = future_to_idx[future]
-            results[idx] = future.result()
+            try:
+                results[idx] = future.result()
+            except Exception as e:
+                # Cancel remaining futures
+                for f in future_to_idx:
+                    if not f.done():
+                        f.cancel()
+                raise e
 
     return "".join(results[idx] for idx in range(len(items)))
 
@@ -256,8 +276,9 @@ def _convert_list_chunk(
         start_offset: Starting index for this chunk.
 
     Returns:
-        str: XML string for this chunk.
+    str: XML string for this chunk.
     """
+    from json2xml import dicttoxml
     return dicttoxml.convert_list(
         items, ids, parent, attr_type, item_func, cdata, item_wrap, list_headers
     )
@@ -291,8 +312,11 @@ def convert_list_parallel(
         chunk_size: Number of items per chunk.
 
     Returns:
-        str: XML string.
+    str: XML string.
     """
+    if not isinstance(items, Sequence) or isinstance(items, (str, bytes)):
+        raise TypeError("items must be a sequence (not str or bytes)")
+    from json2xml import dicttoxml
     if len(items) < chunk_size:
         return dicttoxml.convert_list(
             items, ids, parent, attr_type, item_func, cdata, item_wrap, list_headers
@@ -313,6 +337,13 @@ def convert_list_parallel(
 
         for future in as_completed(future_to_idx):
             idx = future_to_idx[future]
-            results[idx] = future.result()
+            try:
+                results[idx] = future.result()
+            except Exception as e:
+                # Cancel remaining futures
+                for f in future_to_idx:
+                    if not f.done():
+                        f.cancel()
+                raise e
 
     return "".join(results[idx] for idx in range(len(chunks)))
diff --git a/tests/test_parallel.py b/tests/test_parallel.py