Skip to content

Commit 37e2707

Browse files
zflatWilliam Wedler
andauthored
Support for matching license header within multiline comment block (#361)
Co-authored-by: William Wedler <[email protected]>
1 parent 8b85df7 commit 37e2707

File tree

5 files changed

+236
-6
lines changed

5 files changed

+236
-6
lines changed

ament_copyright/ament_copyright/parser.py

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,14 @@ def parse(self):
115115

116116
# get first comment block without leading comment tokens
117117
block, _ = get_comment_block(self.content, index)
118-
if not block:
119-
return
120118
copyrights, remaining_block = search_copyright_information(block)
121-
if not copyrights:
122-
return None
119+
120+
if len(copyrights) == 0:
121+
block = get_multiline_comment_block(self.content, index)
122+
copyrights, remaining_block = search_copyright_information(block)
123+
124+
if len(copyrights) == 0:
125+
return
123126

124127
self.copyrights = copyrights
125128

@@ -178,6 +181,8 @@ def determine_filetype(path):
178181

179182

180183
def search_copyright_information(content):
184+
if content is None:
185+
return [], content
181186
# regex for matching years or year ranges (yyyy-yyyy) separated by colons
182187
year = r'\d{4}'
183188
year_range = '%s-%s' % (year, year)
@@ -279,6 +284,49 @@ def get_comment_block(content, index):
279284
return '\n'.join(lines), start_index + len(comment_token) + 1
280285

281286

287+
def get_multiline_comment_block(content, index):
288+
patterns = [('^(/[*])', '([*]/)$'),
289+
('^(<!--)', '(-->)$')]
290+
for pattern_pair in patterns:
291+
start_pattern, end_pattern = pattern_pair
292+
# find the first match of the comment start token
293+
# also accept BOM if present
294+
if index == 0 and content[0] == '\ufeff':
295+
start_pattern = start_pattern[0] + '\ufeff' + start_pattern[1:]
296+
start_regex = re.compile(start_pattern, re.MULTILINE)
297+
start_match = start_regex.search(content, index)
298+
if not start_match:
299+
continue
300+
start_index = start_match.start(1)
301+
302+
# find the first match of the comment end token
303+
end_regex = re.compile(end_pattern, re.MULTILINE)
304+
end_match = end_regex.search(content, index)
305+
if not end_match:
306+
continue
307+
end_index = end_match.start(1)
308+
309+
# collect all lines between start and end (open interval) and strip out any common prefix
310+
block = content[start_index:end_index]
311+
block_lines = block.splitlines()
312+
if len(block_lines) == 1:
313+
prefixed_lines = block_lines
314+
elif len(block_lines) == 2:
315+
prefixed_lines = block_lines[1:]
316+
else:
317+
prefixed_lines = block_lines[1:-1]
318+
319+
if len(prefixed_lines) > 1:
320+
line_prefix = os.path.commonprefix(prefixed_lines)
321+
lines = [line[len(line_prefix):] for line in prefixed_lines]
322+
else:
323+
# Single-line header does not have a common prefix to strip out
324+
lines = prefixed_lines
325+
326+
return '\n'.join(lines)
327+
return None
328+
329+
282330
def scan_past_empty_lines(content, index):
283331
while is_empty_line(content, index):
284332
index = get_index_of_next_line(content, index)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/*
2+
* Copyright 2018 Open Source Robotics Foundation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
*/
17+
18+
#include <memory>
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<!--
2+
Copyright (C) 2018 Open Source Robotics Foundation
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
-->

ament_copyright/test/test_copyright.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,16 @@ def test_apache2_standard():
2828
assert rc == 0, 'Found errors'
2929

3030

31+
def test_apache2_cpp_multiline():
32+
rc = main(argv=[os.path.join(cases_path, 'apache2_license_multiline_comment/case.cpp')])
33+
assert rc == 0, 'Found errors'
34+
35+
36+
def test_apache2_xml_multiline():
37+
rc = main(argv=[os.path.join(cases_path, 'apache2_license_multiline_comment/case.xml')])
38+
assert rc == 0, 'Found errors'
39+
40+
3141
def test_boost1_cpp():
3242
rc = main(argv=[os.path.join(cases_path, 'boost1/case2.cpp')])
3343
assert rc == 0, 'Found errors'

ament_copyright/test/test_parser.py

Lines changed: 141 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@
1313
# limitations under the License.
1414

1515
from ament_copyright import UNKNOWN_IDENTIFIER
16-
from ament_copyright.parser import FileDescriptor, search_copyright_information, split_template
16+
from ament_copyright.parser import FileDescriptor
17+
from ament_copyright.parser import get_comment_block
18+
from ament_copyright.parser import get_multiline_comment_block
19+
from ament_copyright.parser import scan_past_empty_lines
20+
from ament_copyright.parser import search_copyright_information
21+
from ament_copyright.parser import split_template
1722

1823

1924
def test_search_copyright_information_incorrect_typo():
@@ -50,7 +55,6 @@ def test_search_copyright_information_capitalization1():
5055
"""
5156
copyrights, remaining_block = search_copyright_information(
5257
' Copyright 2020 Open Source Robotics Foundation, Inc.')
53-
print(copyrights[0].name)
5458
assert copyrights[0].name == 'Open Source Robotics Foundation, Inc.'
5559
assert len(copyrights) == 1
5660

@@ -235,3 +239,138 @@ class TempLicense(object):
235239
dut = FileDescriptor(0, '/')
236240
dut.identify_license(content, 'file_headers', {'temp': temp_license})
237241
assert dut.license_identifier == 'temp'
242+
243+
244+
def test_get_comment_block_slashes():
245+
"""Test parsing comment block with c-style comment forward slashes."""
246+
commented_content = """
247+
// aaa
248+
// bbb
249+
// ccc
250+
251+
// Comment not part of the header
252+
"""
253+
index = 0
254+
index = scan_past_empty_lines(commented_content, index)
255+
block, _ = get_comment_block(commented_content, index)
256+
assert block is not None
257+
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])
258+
259+
260+
def test_get_comment_block_slashes2():
261+
"""Test parsing comment multiline block that is not at the start of the content."""
262+
commented_content = """
263+
// aaa
264+
// bbb
265+
// ccc
266+
267+
///
268+
/**
269+
ddd
270+
*/
271+
"""
272+
index = 0
273+
index = scan_past_empty_lines(commented_content, index)
274+
block = get_multiline_comment_block(commented_content, index)
275+
assert block is not None
276+
assert block == 'ddd'
277+
278+
279+
def test_get_comment_block_doxygen():
280+
"""Test parsing comment block with doxygen-style comment forward slashes."""
281+
commented_content = """
282+
/// aaa
283+
/// bbb
284+
/// ccc
285+
"""
286+
index = 0
287+
index = scan_past_empty_lines(commented_content, index)
288+
block, _ = get_comment_block(commented_content, index)
289+
assert block is not None
290+
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])
291+
292+
293+
def test_get_comment_block_pound():
294+
"""Test parsing comment block with python-style comment pound signs."""
295+
commented_content = """
296+
# aaa
297+
# bbb
298+
# ccc
299+
"""
300+
index = 0
301+
index = scan_past_empty_lines(commented_content, index)
302+
block, _ = get_comment_block(commented_content, index)
303+
assert block is not None
304+
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])
305+
306+
307+
def test_get_multiline_comment_block_cstyle():
308+
"""Test parsing comment block with multiline c-style comment block."""
309+
commented_content = """
310+
/**
311+
* aaa
312+
* bbb
313+
* ccc
314+
*/
315+
316+
317+
/**
318+
* Comment not part of the header
319+
*/
320+
"""
321+
index = 0
322+
index = scan_past_empty_lines(commented_content, index)
323+
block = get_multiline_comment_block(commented_content, index)
324+
assert block is not None
325+
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])
326+
327+
328+
def test_get_multiline_comment_block_cstyle2():
329+
"""Test parsing comment block with multiline c-style comment block."""
330+
commented_content = """
331+
/**
332+
* aaa
333+
* bbb
334+
* ccc
335+
*/
336+
337+
// Comment not part of
338+
// the header
339+
"""
340+
index = 0
341+
index = scan_past_empty_lines(commented_content, index)
342+
block = get_multiline_comment_block(commented_content, index)
343+
assert block is not None
344+
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])
345+
346+
347+
def test_get_multiline_comment_block_xmlstyle():
348+
"""Test parsing comment block with multiline xml-style comment block."""
349+
commented_content = """
350+
<!--
351+
aaa
352+
bbb
353+
ccc
354+
-->
355+
"""
356+
index = 0
357+
index = scan_past_empty_lines(commented_content, index)
358+
block = get_multiline_comment_block(commented_content, index)
359+
assert block is not None
360+
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])
361+
362+
363+
def test_get_multiline_comment_block_xmlstyle_prefixed():
364+
"""Test parsing comment block with multiline xml-style comment block containing a prefix."""
365+
commented_content = """
366+
<!--
367+
# aaa
368+
# bbb
369+
# ccc
370+
-->
371+
"""
372+
index = 0
373+
index = scan_past_empty_lines(commented_content, index)
374+
block = get_multiline_comment_block(commented_content, index)
375+
assert block is not None
376+
assert block == '\n'.join(['aaa', 'bbb', 'ccc'])

0 commit comments

Comments
 (0)