@@ -70,7 +70,7 @@ class DataModel:
70
70
def __init__ (
71
71
self ,
72
72
xsd_file : str ,
73
- short_name : str = None ,
73
+ short_name : str = "DocumentRoot" ,
74
74
long_name : str = None ,
75
75
base_url : str = None ,
76
76
model_config : dict = None ,
@@ -226,8 +226,7 @@ def _build_model(self):
226
226
"""
227
227
# parse the XML schema recursively and hold a reference to the head table
228
228
root_table = self ._parse_tree (
229
- self .xml_schema [0 ] if len (self .xml_schema ) == 1 else self .xml_schema ,
230
- is_root_table = True ,
229
+ self .xml_schema [0 ] if len (self .xml_schema ) == 1 else self .xml_schema
231
230
)
232
231
self .root_table = root_table .type_name
233
232
# compute a text representation of the original data model and store it
@@ -273,9 +272,7 @@ def _build_model(self):
273
272
for tb in self .fk_ordered_tables :
274
273
tb .build_sqlalchemy_tables ()
275
274
276
- def _parse_tree (
277
- self , parent_node : xmlschema .XsdElement , is_root_table : bool = False
278
- ):
275
+ def _parse_tree (self , parent_node : xmlschema .XsdElement , nodes_path : list = None ):
279
276
"""Parse a node of an XML schema recursively and create a target data model without any simplification
280
277
281
278
We parse the XSD tree recursively to create for each node (basically a complex type in the XSD) an equivalent \
@@ -289,7 +286,7 @@ def _parse_tree(
289
286
290
287
Args:
291
288
parent_node: the current XSD node being parsed
292
- is_root_table: True if this is the root table
289
+ nodes_path: a list of nodes types from the root node
293
290
"""
294
291
295
292
# find current node type and name and returns corresponding table if it already exists
@@ -301,12 +298,16 @@ def _parse_tree(
301
298
if parent_type is None :
302
299
parent_type = parent_node .local_name
303
300
301
+ nodes_path = (nodes_path if nodes_path else []) + [parent_type ]
302
+
304
303
# if this type has already been encountered, stop here and return existing table
305
304
if parent_type in self .tables :
306
305
parent_table = self .tables [parent_type ]
307
306
return parent_table
308
307
309
- # elements names and types should be bijective. If an element name is used for different types,
308
+ # For database tables we use element names rather than XSD types, under the assumption that they are often
309
+ # more meaningful given that they are the one which appear in XML documents. However, same names can be used
310
+ # for different XSD types, so if an element name is used for different types,
310
311
# we add a suffix to the name to make it unique again (using a dict to keep the name/type association)
311
312
parent_name = (
312
313
parent_node .local_name
@@ -324,7 +325,7 @@ def _parse_tree(
324
325
parent_table = self ._create_table_model (
325
326
parent_name ,
326
327
parent_type ,
327
- is_root_table ,
328
+ len ( nodes_path ) == 1 ,
328
329
isinstance (parent_node , xmlschema .XMLSchema ),
329
330
)
330
331
self .tables [parent_type ] = parent_table
@@ -363,6 +364,13 @@ def recurse_parse_simple_type(elem_type):
363
364
if elem_type .base_type
364
365
else recurse_parse_simple_type (elem_type .member_types )
365
366
)
367
+ if elem_type .is_list ():
368
+ return (
369
+ "string" ,
370
+ 0 ,
371
+ None ,
372
+ elem_type .allow_empty ,
373
+ )
366
374
if elem_type .is_restriction ():
367
375
dt = elem_type .base_type .local_name
368
376
mil = elem_type .min_length
@@ -384,7 +392,12 @@ def recurse_parse_simple_type(elem_type):
384
392
else None
385
393
)
386
394
ae = ae and bt_ae if ae is not None and bt_ae is not None else None
387
- if elem_type .enumeration is not None and dt in ["string" , "NMTOKEN" , "duration" , "token" ]:
395
+ if elem_type .enumeration is not None and dt in [
396
+ "string" ,
397
+ "NMTOKEN" ,
398
+ "duration" ,
399
+ "token" ,
400
+ ]:
388
401
mil = min ([len (val ) for val in elem_type .enumeration ])
389
402
mal = max ([len (val ) for val in elem_type .enumeration ])
390
403
return dt , mil , mal , ae
@@ -410,25 +423,31 @@ def get_occurs(particle):
410
423
),
411
424
]
412
425
413
- # go through item attributes and add them as columns
426
+ # go through item attributes and add them as columns, adding a suffix if an element with the same name exists
427
+ children_names = None
414
428
for attrib_name , attrib in parent_node .attributes .items ():
429
+ if children_names is None :
430
+ children_names = [child .local_name for child in parent_node ]
415
431
(
416
432
data_type ,
417
433
min_length ,
418
434
max_length ,
419
435
allow_empty ,
420
436
) = recurse_parse_simple_type ([attrib .type ])
437
+ suffix = attrib_name in children_names
421
438
parent_table .add_column (
422
- f"{ attrib_name } " ,
439
+ f"{ attrib_name } { '_attr' if suffix else '' } " ,
423
440
data_type ,
424
441
[0 , 1 ],
425
442
min_length ,
426
443
max_length ,
427
444
True ,
445
+ suffix ,
428
446
False ,
429
447
allow_empty ,
430
448
None ,
431
449
)
450
+
432
451
nested_containers = []
433
452
# go through the children to add either arguments either relations to the current element
434
453
for child in parent_node :
@@ -454,6 +473,7 @@ def get_occurs(particle):
454
473
if child .parent
455
474
and child .parent .max_occurs != 1
456
475
and child .parent .model != "choice"
476
+ and child .max_occurs == 1
457
477
else None
458
478
),
459
479
)
@@ -482,32 +502,39 @@ def get_occurs(particle):
482
502
max_length ,
483
503
False ,
484
504
False ,
505
+ False ,
485
506
allow_empty ,
486
507
nested_containers [- 1 ][1 ],
487
508
)
488
509
489
510
elif ct .is_complex ():
490
- child_table = self ._parse_tree (child )
491
- child_table .model_group = (
492
- "choice"
493
- if ct .model_group and ct .model_group .model == "choice"
494
- else "sequence"
495
- )
496
- occurs = get_occurs (child )
497
- if child .is_single ():
498
- parent_table .add_relation_1 (
499
- child .local_name ,
500
- child_table ,
501
- occurs ,
502
- nested_containers [- 1 ][1 ],
511
+ # ignoring recursive definitions by skipping these fields
512
+ if child .type .local_name in nodes_path :
513
+ logger .warning (
514
+ f"type '{ child .type .local_name } ' contains a recursive definition"
503
515
)
504
516
else :
505
- parent_table . add_relation_n (
506
- child . local_name ,
507
- child_table ,
508
- occurs ,
509
- nested_containers [ - 1 ][ 1 ],
517
+ child_table = self . _parse_tree ( child , nodes_path )
518
+ child_table . model_group = (
519
+ "choice"
520
+ if ct . model_group and ct . model_group . model == "choice"
521
+ else "sequence"
510
522
)
523
+ occurs = get_occurs (child )
524
+ if occurs [1 ] == 1 :
525
+ parent_table .add_relation_1 (
526
+ child .local_name ,
527
+ child_table ,
528
+ occurs ,
529
+ nested_containers [- 1 ][1 ],
530
+ )
531
+ else :
532
+ parent_table .add_relation_n (
533
+ child .local_name ,
534
+ child_table ,
535
+ occurs ,
536
+ nested_containers [- 1 ][1 ],
537
+ )
511
538
else :
512
539
raise ValueError ("unknown case; please check" )
513
540
else :
@@ -534,6 +561,7 @@ def get_occurs(particle):
534
561
min_length ,
535
562
max_length ,
536
563
False ,
564
+ False ,
537
565
True ,
538
566
allow_empty ,
539
567
None ,
@@ -544,31 +572,19 @@ def get_occurs(particle):
544
572
def _repr_tree (
545
573
self ,
546
574
parent_table : Union [DataModelTableReused , DataModelTableDuplicated ],
547
- visited_nodes : Union [set , None ] = None ,
548
575
):
549
576
"""Build a text representation of the data model tree
550
577
551
578
Args:
552
579
parent_table: the current data model table object
553
580
"""
554
- if visited_nodes is None :
555
- visited_nodes = set ()
556
- else :
557
- visited_nodes = {item for item in visited_nodes }
558
- visited_nodes .add (parent_table .name )
559
581
for field_type , name , field in parent_table .fields :
560
582
if field_type == "col" :
561
583
yield f"{ field .name } { field .occurs } : { field .data_type } "
562
- elif field_type == "rel1" :
563
- mg = " (choice)" if field .other_table .model_group == "choice" else ""
564
- yield f"{ field .name } { field .occurs } { mg } :{ ' ...' if field_type in visited_nodes else '' } "
565
- if field .other_table .name not in visited_nodes :
566
- for line in self ._repr_tree (field .other_table , visited_nodes ):
567
- yield f" { line } "
568
- elif field_type == "reln" :
584
+ else :
569
585
mg = " (choice)" if field .other_table .model_group == "choice" else ""
570
- yield f"{ field .name } { field .occurs } { mg } :{ ' ...' if field_type in visited_nodes else '' } "
571
- for line in self ._repr_tree (field .other_table , visited_nodes ):
586
+ yield f"{ field .name } { field .occurs } { mg } :"
587
+ for line in self ._repr_tree (field .other_table ):
572
588
yield f" { line } "
573
589
574
590
def get_entity_rel_diagram (self , text_context : bool = True ) -> str :
0 commit comments