Skip to content

Commit ae43889

Browse files
authored
Merge pull request #50 from ajinabraham/mobsf_queue
Split file read + regex scan
2 parents dddf52a + 96ec743 commit ae43889

File tree

5 files changed

+52
-32
lines changed

5 files changed

+52
-32
lines changed

libsast/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
__title__ = 'libsast'
1313
__authors__ = 'Ajin Abraham'
1414
__copyright__ = f'Copyright {year} Ajin Abraham, opensecurity.in'
15-
__version__ = '3.1.0'
15+
__version__ = '3.1.1'
1616
__version_info__ = tuple(int(i) for i in __version__.split('.'))
1717
__all__ = [
1818
'Scanner',

libsast/core_matcher/choice_matcher.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,40 +31,48 @@ def __init__(self, options: dict) -> None:
3131

3232
def scan(self, paths: list) -> dict:
3333
"""Scan file(s) or directory per rule."""
34-
if not (self.scan_rules and paths):
35-
return
36-
self.validate_rules()
37-
3834
if self.show_progress:
3935
pbar = common.ProgressBar('Choice Match', len(self.scan_rules))
4036
self.scan_rules = pbar.progress_loop(self.scan_rules)
4137

38+
file_contents = self.read_file_contents(paths)
39+
return self.regex_scan(file_contents)
40+
41+
def read_file_contents(self, paths: list) -> list:
42+
"""Load file(s) content."""
43+
if not (self.scan_rules and paths):
44+
return
45+
self.validate_rules()
4246
choice_args = []
4347
for rule in self.scan_rules:
4448
scan_paths = paths
4549
if rule['type'] != 'code' and self.alternative_path:
4650
# Scan only alternative path
4751
scan_paths = [Path(self.alternative_path)]
4852
choice_args.append((scan_paths, rule))
53+
if not choice_args:
54+
return []
4955

50-
# Use ThreadPoolExecutor for reading file contents and
51-
# ProcessPoolExecutor for processing regex
52-
with ThreadPoolExecutor() as io_executor, ProcessPoolExecutor(
53-
max_workers=self.cpu) as cpu_executor:
56+
# Use ThreadPoolExecutor for file reading
57+
with ThreadPoolExecutor() as io_executor:
58+
# Submit file reading tasks and wait for results
5459
futures = []
5560
for args_tuple in choice_args:
56-
# Submit each read task and store the future along with the args
5761
future = io_executor.submit(
5862
self._read_file_contents, args_tuple)
59-
futures.append((future, args_tuple))
63+
futures.append(future)
64+
return [future.result() for future in futures]
65+
66+
def regex_scan(self, file_contents) -> list:
67+
"""Process regex matches on the file contents."""
68+
# Use ProcessPoolExecutor for regex processing
69+
with ProcessPoolExecutor(max_workers=self.cpu) as cpu_executor:
6070

6171
results = []
62-
for future, _ in futures:
63-
file_contents = future.result()
64-
# This will block until the file reading is done
65-
# Process the file contents with ProcessPoolExecutor
72+
for content in file_contents:
73+
# Process Choice Matcher on the file contents
6674
process_future = cpu_executor.submit(
67-
self.choice_matcher, file_contents)
75+
self.choice_matcher, content)
6876
results.append(process_future.result())
6977

7078
self.add_finding(results)

libsast/core_matcher/pattern_matcher.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,27 +31,39 @@ def __init__(self, options: dict) -> None:
3131

3232
def scan(self, paths: list) -> dict:
3333
"""Scan file(s) or directory."""
34-
if not (self.scan_rules and paths):
35-
return
36-
self.validate_rules()
37-
3834
if self.show_progress:
3935
pbar = common.ProgressBar('Pattern Match', len(paths))
4036
paths = pbar.progress_loop(paths)
4137

38+
file_contents = self.read_file_contents(paths)
39+
return self.regex_scan(file_contents)
40+
41+
def read_file_contents(self, paths: list) -> list:
42+
"""Load file(s) content."""
43+
if not (self.scan_rules and paths):
44+
return
45+
self.validate_rules()
46+
4247
# Filter files by extension and size, prepare list for processing
4348
files_to_scan = {
4449
sfile for sfile in paths
4550
if is_file_valid(sfile, self.exts, 5)
4651
}
52+
if not files_to_scan:
53+
return []
4754

48-
# Use a ThreadPool for file reading, and ProcessPool for CPU-bound regex
49-
with ThreadPoolExecutor() as io_executor, ProcessPoolExecutor(
50-
max_workers=self.cpu) as cpu_executor:
55+
# Use a ThreadPool for file reading
56+
with ThreadPoolExecutor() as io_executor:
5157

5258
# Read all files
5359
file_contents = list(io_executor.map(
5460
self._read_file_content, files_to_scan))
61+
return file_contents
62+
63+
def regex_scan(self, file_contents: list) -> dict:
64+
"""Scan file(s) content."""
65+
# Use a ProcessPool for CPU-bound regex
66+
with ProcessPoolExecutor(max_workers=self.cpu) as cpu_executor:
5567

5668
# Run regex on file data
5769
results = cpu_executor.map(

poetry.lock

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "libsast"
3-
version = "3.1.0"
3+
version = "3.1.1"
44
description = "A generic SAST library built on top of semgrep and regex"
55
keywords = ["libsast", "SAST", "Python SAST", "SAST API", "Regex SAST", "Pattern Matcher"]
66
authors = ["Ajin Abraham <[email protected]>"]

0 commit comments

Comments
 (0)