Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 73fe5ed

Browse files
authored
Merge pull request #300 from eregs/dont-expand-long-reserved-section-spans
[WIP] Merge spans longer than 3 reserved sections
2 parents b0304ea + 767aa18 commit 73fe5ed

File tree

2 files changed

+23
-6
lines changed

2 files changed

+23
-6
lines changed

regparser/tree/xml_parser/reg_text.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -267,20 +267,29 @@ def build_from_section(reg_part, section_xml):
267267
secnum_candidate = int(secnum_candidate)
268268
section_nums.append(secnum_candidate)
269269

270-
# Span of section numbers
270+
# Merge spans longer than 3 sections
271+
section_span_end = None
271272
if u'§§' == section_no[:2] and '-' in section_no:
272273
first, last = section_nums
273-
section_nums = []
274-
for i in range(first, last + 1):
275-
section_nums.append(i)
274+
if last - first + 1 > 3:
275+
section_span_end = str(last)
276+
section_nums = [first]
277+
else:
278+
section_nums = []
279+
for i in range(first, last + 1):
280+
section_nums.append(i)
276281

277282
section_nodes = []
278283
for section_number in section_nums:
279284
section_number = str(section_number)
280285
section_text = (section_xml.text or '').strip()
281286
tagged_section_text = section_xml.text
282287

283-
section_title = u"§ " + reg_part + "." + section_number
288+
if section_span_end:
289+
section_title = u"§§ {}.{}-{}".format(
290+
reg_part, section_number, section_span_end)
291+
else:
292+
section_title = u"§ {}.{}".format(reg_part, section_number)
284293
if subject_text:
285294
section_title += " " + subject_text
286295

tests/tree_xml_parser_reg_text_tests.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def test_build_from_section_reserved(self):
8282
self.assertEqual(u'§ 8675.309 [Reserved]', node.title)
8383
self.assertEqual([], node.children)
8484

85-
def test_build_from_section_reserved_range(self):
85+
def test_build_from_3_section_reserved_range(self):
8686
with XMLBuilder("SECTION") as ctx:
8787
ctx.SECTNO(u"§§ 8675.309-8675.311")
8888
ctx.RESERVED("[Reserved]")
@@ -94,6 +94,14 @@ def test_build_from_section_reserved_range(self):
9494
self.assertEqual(u'§ 8675.310 [Reserved]', n310.title)
9595
self.assertEqual(u'§ 8675.311 [Reserved]', n311.title)
9696

97+
def test_build_from_4_section_reserved_range(self):
98+
with XMLBuilder("SECTION") as ctx:
99+
ctx.SECTNO(u"§§ 8675.309-8675.312")
100+
ctx.RESERVED("[Reserved]")
101+
n309 = reg_text.build_from_section('8675', ctx.xml)[0]
102+
self.assertEqual(n309.label, ['8675', '309'])
103+
self.assertEqual(u'§§ 8675.309-312 [Reserved]', n309.title)
104+
97105
def _setup_for_ambiguous(self, final_par):
98106
with self.section() as ctx:
99107
ctx.P("(g) Some Content")

0 commit comments

Comments
 (0)