Add scan tests (#10)

* recon.targets tests added * restructured tests logically * fixed yaml error * fixed job names * recon.__init__ tests added * recon.config tests added * recon.amass.ParseAmassScan tests added * fixed test destined to fail on CI pipeline * testing amass partially complete this commit closes #6 and #8 updated existing tests to utilize new paths
epi052 · Jan 30, 2020 · 1d5155f · 1d5155f
1 parent 2793284
commit 1d5155f
Show file tree

Hide file tree

Showing 270 changed files with 21,695 additions and 100 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,5 +1,5 @@
 [flake8]
-max-line-length = 100
+max-line-length = 88
 select = C,E,F,W,B,B950
 ignore = E203, E501, W503
 max-complexity = 13
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
@@ -28,7 +28,7 @@ jobs:
       with:
         args: ". --check"
 
-  test:
+  test-install:
 
     runs-on: ubuntu-latest
 
@@ -48,4 +48,24 @@ jobs:
     - name: Test with pytest
       run: |
         pipenv install pytest
-        pipenv run python -m pytest tests/
+        pipenv run python -m pytest tests/test_install
+
+  test-recon:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python 3.7
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.7
+    - name: Set up pipenv
+      run: |
+        python -m pip install --upgrade pip
+        pip install pipenv
+        pipenv install -d
+    - name: Test with pytest
+      run: |
+        pipenv install pytest
+        pipenv run python -m pytest tests/test_recon
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,4 +1,4 @@
 [tool.black]
-line-length = 100
+line-length = 88
 include = '\.pyi?$'
 exclude = '.*config.*py$|\.git'
diff --git a/recon-pipeline.py b/recon-pipeline.py
@@ -12,7 +12,9 @@
 __version__ = "0.7.3"
 
 # fix up the PYTHONPATH so we can simply execute the shell from wherever in the filesystem
-os.environ["PYTHONPATH"] = f"{os.environ.get('PYTHONPATH')}:{str(Path(__file__).parent.resolve())}"
+os.environ[
+    "PYTHONPATH"
+] = f"{os.environ.get('PYTHONPATH')}:{str(Path(__file__).parent.resolve())}"
 
 # suppress "You should consider upgrading via the 'pip install --upgrade pip' command." warning
 os.environ["PIP_DISABLE_PIP_VERSION_CHECK"] = "1"
@@ -113,7 +115,9 @@ def _luigi_pretty_printer(self, stderr):
 
             words = output.split()
 
-            self.async_alert(style(f"[-] {words[5].split('_')[0]} queued", fg="bright_white"))
+            self.async_alert(
+                style(f"[-] {words[5].split('_')[0]} queued", fg="bright_white")
+            )
         elif output.startswith("INFO: ") and "running" in output:
             # luigi Task is currently running
 
@@ -130,7 +134,9 @@ def _luigi_pretty_printer(self, stderr):
 
             words = output.split()
 
-            self.async_alert(style(f"[+] {words[5].split('_')[0]} complete!", fg="bright_green"))
+            self.async_alert(
+                style(f"[+] {words[5].split('_')[0]} complete!", fg="bright_green")
+            )
 
     @cmd2.with_argparser(scan_parser)
     def do_scan(self, args):
@@ -166,10 +172,14 @@ def do_scan(self, args):
             subprocess.run(command)
         else:
             # suppress luigi messages in favor of less verbose/cleaner output
-            proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+            proc = subprocess.Popen(
+                command, stderr=subprocess.PIPE, stdout=subprocess.PIPE
+            )
 
             # add stderr to the selector loop for processing when there's something to read from the fd
-            selector.register(proc.stderr, selectors.EVENT_READ, self._luigi_pretty_printer)
+            selector.register(
+                proc.stderr, selectors.EVENT_READ, self._luigi_pretty_printer
+            )
 
     @cmd2.with_argparser(install_parser)
     def do_install(self, args):
@@ -220,14 +230,18 @@ def do_install(self, args):
                 self.do_install(dependency)
 
         if tools.get(args.tool).get("installed"):
-            return self.async_alert(style(f"[!] {args.tool} is already installed.", fg="yellow"))
+            return self.async_alert(
+                style(f"[!] {args.tool} is already installed.", fg="yellow")
+            )
         else:
 
             # list of return values from commands run during each tool installation
             # used to determine whether the tool installed correctly or not
             retvals = list()
 
-            self.async_alert(style(f"[*] Installing {args.tool}...", fg="bright_yellow"))
+            self.async_alert(
+                style(f"[*] Installing {args.tool}...", fg="bright_yellow")
+            )
 
             for command in tools.get(args.tool).get("commands"):
                 # run all commands required to install the tool
@@ -239,13 +253,18 @@ def do_install(self, args):
 
                     # go tools use subshells (cmd1 && cmd2 && cmd3 ...) during install, so need shell=True
                     proc = subprocess.Popen(
-                        command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                        command,
+                        shell=True,
+                        stdout=subprocess.PIPE,
+                        stderr=subprocess.PIPE,
                     )
                 else:
 
                     # "normal" command, split up the string as usual and run it
                     proc = subprocess.Popen(
-                        shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                        shlex.split(command),
+                        stdout=subprocess.PIPE,
+                        stderr=subprocess.PIPE,
                     )
 
                 out, err = proc.communicate()

diff --git a/recon/__init__.py b/recon/__init__.py
@@ -141,7 +141,9 @@ def get_scans():
     # recursively walk packages; import each module in each package
     # walk_packages yields ModuleInfo objects for all modules recursively on path
     # prefix is a string to output on the front of every module name on output.
-    for loader, module_name, is_pkg in pkgutil.walk_packages(path=recon.__path__, prefix="recon."):
+    for loader, module_name, is_pkg in pkgutil.walk_packages(
+        path=recon.__path__, prefix="recon."
+    ):
         importlib.import_module(module_name)
 
     # walk all modules, grabbing classes that we've written and add them to the classlist defaultdict
@@ -184,7 +186,9 @@ def get_scans():
     help="directory in which to save scan results",
 )
 scan_parser.add_argument(
-    "--wordlist", completer_method=cmd2.Cmd.path_complete, help="path to wordlist used by gobuster",
+    "--wordlist",
+    completer_method=cmd2.Cmd.path_complete,
+    help="path to wordlist used by gobuster",
 )
 scan_parser.add_argument(
     "--interface",
@@ -200,14 +204,19 @@ def get_scans():
     help="ports to scan as specified by nmap's list of top-ports (only meaningful to around 5000)",
 )
 scan_parser.add_argument(
-    "--ports", help="port specification for masscan (all ports example: 1-65535,U:1-65535)",
+    "--ports",
+    help="port specification for masscan (all ports example: 1-65535,U:1-65535)",
 )
 scan_parser.add_argument(
     "--threads", help="number of threads for all of the threaded applications to use"
 )
 scan_parser.add_argument("--scan-timeout", help="scan timeout for aquatone")
-scan_parser.add_argument("--proxy", help="proxy for gobuster if desired (ex. 127.0.0.1:8080)")
-scan_parser.add_argument("--extensions", help="list of extensions for gobuster (ex. asp,html,aspx)")
+scan_parser.add_argument(
+    "--proxy", help="proxy for gobuster if desired (ex. 127.0.0.1:8080)"
+)
+scan_parser.add_argument(
+    "--extensions", help="list of extensions for gobuster (ex. asp,html,aspx)"
+)
 scan_parser.add_argument(
     "--local-scheduler",
     action="store_true",

diff --git a/recon/amass.py b/recon/amass.py
@@ -1,5 +1,6 @@
 import json
 import ipaddress
+from pathlib import Path
 
 import luigi
 from luigi.util import inherits
@@ -52,20 +53,26 @@ def requires(self):
     def output(self):
         """ Returns the target output for this task.
 
-        Naming convention for the output file is amass.TARGET_FILE.json.
+        Naming convention for the output file is amass.json.
 
         Returns:
             luigi.local_target.LocalTarget
         """
-        return luigi.LocalTarget(f"{self.results_dir}/amass.{self.target_file}.json")
+        results_subfolder = Path(self.results_dir) / "amass-results"
+
+        new_path = results_subfolder / "amass.json"
+
+        return luigi.LocalTarget(new_path.resolve())
 
     def program_args(self):
         """ Defines the options/arguments sent to amass after processing.
 
         Returns:
             list: list of options/arguments, beginning with the name of the executable to run
         """
-        print(f"debug-epi: amass {self.results_dir}")
+
+        Path(self.output().path).parent.mkdir(parents=True, exist_ok=True)
+
         if not self.input().path.endswith("domains"):
             return f"touch {self.output().path}".split()
 
@@ -128,12 +135,16 @@ def output(self):
         Returns:
             dict(str: luigi.local_target.LocalTarget)
         """
+        results_subfolder = Path(self.results_dir) / "target-results"
+
+        ips = (results_subfolder / "ipv4_addresses").resolve()
+        ip6s = ips.with_name("ipv6_addresses").resolve()
+        subdomains = ips.with_name("subdomains").resolve()
+
         return {
-            "target-ips": luigi.LocalTarget(f"{self.results_dir}/{self.target_file}.ips"),
-            "target-ip6s": luigi.LocalTarget(f"{self.results_dir}/{self.target_file}.ip6s"),
-            "target-subdomains": luigi.LocalTarget(
-                f"{self.results_dir}/{self.target_file}.subdomains"
-            ),
+            "target-ips": luigi.LocalTarget(ips),
+            "target-ip6s": luigi.LocalTarget(ip6s),
+            "target-subdomains": luigi.LocalTarget(subdomains),
         }
 
     def run(self):
@@ -160,6 +171,10 @@ def run(self):
         unique_ip6s = set()
         unique_subs = set()
 
+        Path(self.output().get("target-ips").path).parent.mkdir(
+            parents=True, exist_ok=True
+        )
+
         amass_json = self.input().open()
         ip_file = self.output().get("target-ips").open("w")
         ip6_file = self.output().get("target-ip6s").open("w")
@@ -172,9 +187,13 @@ def run(self):
 
                 for address in entry.get("addresses"):
                     ipaddr = address.get("ip")
-                    if isinstance(ipaddress.ip_address(ipaddr), ipaddress.IPv4Address):  # ipv4 addr
+                    if isinstance(
+                        ipaddress.ip_address(ipaddr), ipaddress.IPv4Address
+                    ):  # ipv4 addr
                         unique_ips.add(ipaddr)
-                    elif isinstance(ipaddress.ip_address(ipaddr), ipaddress.IPv6Address):  # ipv6
+                    elif isinstance(
+                        ipaddress.ip_address(ipaddr), ipaddress.IPv6Address
+                    ):  # ipv6
                         unique_ip6s.add(ipaddr)
 
             # send gathered results to their appropriate destination

diff --git a/recon/masscan.py b/recon/masscan.py
@@ -2,6 +2,7 @@
 import pickle
 import logging
 import subprocess
+from pathlib import Path
 from collections import defaultdict
 
 import luigi
@@ -49,7 +50,9 @@ class MasscanScan(luigi.Task):
 
     rate = luigi.Parameter(default=defaults.get("masscan-rate", ""))
     interface = luigi.Parameter(default=defaults.get("masscan-iface", ""))
-    top_ports = luigi.IntParameter(default=0)  # IntParameter -> top_ports expected as int
+    top_ports = luigi.IntParameter(
+        default=0
+    )  # IntParameter -> top_ports expected as int
     ports = luigi.Parameter(default="")
 
     def output(self):
@@ -60,15 +63,19 @@ def output(self):
         Returns:
             luigi.local_target.LocalTarget
         """
-        return luigi.LocalTarget(f"{self.results_dir}/masscan.{self.target_file}.json")
+        results_subfolder = Path(self.results_dir) / "masscan-results"
+
+        new_path = results_subfolder / "masscan.json"
+
+        return luigi.LocalTarget(new_path.resolve())
 
     def run(self):
         """ Defines the options/arguments sent to masscan after processing.
 
         Returns:
             list: list of options/arguments, beginning with the name of the executable to run
         """
-        print(f"debug-epi: masscan {self.results_dir}")
+
         if self.ports and self.top_ports:
             # can't have both
             logging.error("Only --ports or --top-ports is permitted, not both.")
@@ -86,13 +93,21 @@ def run(self):
 
         if self.top_ports:
             # if --top-ports used, format the top_*_ports lists as strings and then into a proper masscan --ports option
-            top_tcp_ports_str = ",".join(str(x) for x in top_tcp_ports[: self.top_ports])
-            top_udp_ports_str = ",".join(str(x) for x in top_udp_ports[: self.top_ports])
+            top_tcp_ports_str = ",".join(
+                str(x) for x in top_tcp_ports[: self.top_ports]
+            )
+            top_udp_ports_str = ",".join(
+                str(x) for x in top_udp_ports[: self.top_ports]
+            )
 
             self.ports = f"{top_tcp_ports_str},U:{top_udp_ports_str}"
             self.top_ports = 0
 
-        target_list = yield TargetList(target_file=self.target_file, results_dir=self.results_dir)
+        target_list = yield TargetList(
+            target_file=self.target_file, results_dir=self.results_dir
+        )
+
+        Path(self.output().path).parent.mkdir(parents=True, exist_ok=True)
 
         if target_list.path.endswith("domains"):
             yield ParseAmassOutput(
@@ -115,9 +130,13 @@ def run(self):
             "--ports",
             self.ports,
             "-iL",
-            target_list.path.replace("domains", "ips"),
         ]
 
+        if target_list.path.endswith("domains"):
+            command.append(target_list.path.replace("domains", "ipv4_addresses"))
+        else:
+            command.append(target_list.path.replace("domains", "ip_addresses"))
+
         subprocess.run(command)
 
 
@@ -160,19 +179,26 @@ def output(self):
         Returns:
             luigi.local_target.LocalTarget
         """
-        return luigi.LocalTarget(f"{self.results_dir}/masscan.{self.target_file}.parsed.pickle")
+        results_subfolder = Path(self.results_dir) / "masscan-results"
+
+        new_path = results_subfolder / "masscan.parsed.pickle"
+
+        return luigi.LocalTarget(new_path.resolve())
 
     def run(self):
         """ Reads masscan JSON results and creates a pickled dictionary of pertinent information for processing. """
         ip_dict = defaultdict(lambda: defaultdict(set))  # nested defaultdict
 
         try:
-            entries = json.load(self.input().open())  # load masscan results from Masscan Task
+            # load masscan results from Masscan Task
+            entries = json.load(self.input().open())
         except json.decoder.JSONDecodeError as e:
             # return on exception; no output file created; pipeline should start again from
             # this task if restarted because we never hit pickle.dump
             return print(e)
 
+        Path(self.output().path).parent.mkdir(parents=True, exist_ok=True)
+
         """
         build out ip_dictionary from the loaded JSON