diff --git a/CHANGELOG.md b/CHANGELOG.md index 1eb8ae94d..82f3c8e9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +- 1.0.11 (February 2024) +- Improve the logging of evidence in alerts.json and alerts.log. +- Optimize the storing of evidence in the Redis database. +- Fix problem of missing evidence, now all evidence is logged correctly. +- Fix problem adding flows to incorrect time windows. +- Fix problem setting SSH version changing evidence. +- Fix problem closing Redis ports using -k. +- Fix problem closing the progress bar. +- Fix problem releasing the terminal when Slips is done. + - 1.0.10 (January 2024) - Faster ensembling of evidence. - Log accumulated threat levels of each evidence in alerts.json. diff --git a/README.md b/README.md index d1882bae9..c1b0f5448 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

-Slips v1.0.10 +Slips v1.0.11

@@ -125,7 +125,7 @@ or our command-line based interface Kalipso ##### Web interface - ./webinteface.sh + ./webinterface.sh Then navigate to ```http://localhost:55000/``` from your browser. diff --git a/VERSION b/VERSION index 437d26b11..86844988e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.10 \ No newline at end of file +1.0.11 \ No newline at end of file diff --git a/config/slips.conf b/config/slips.conf index 77584ca3d..24514146d 100644 --- a/config/slips.conf +++ b/config/slips.conf @@ -345,24 +345,25 @@ receive_delay = 86400 # All the following detections are turned on by default # Turn them off by adding any of the following detections to the disabled_detections list -# ARPScan, ARP-outside-localnet, UnsolicitedARP, MITM-ARP-attack, SSHSuccessful, -# LongConnection, MultipleReconnectionAttempts, -# ConnectionToMultiplePorts, InvalidCertificate, UnknownPort, Port0Connection, -# ConnectionWithoutDNS, DNSWithoutConnection, -# MaliciousJA3, DataExfiltration, SelfSignedCertificate, VerticalPortscan, -# HorizontalPortscan, Password_Guessing, MaliciousFlow, -# SuspiciousUserAgent, multiple_google_connections, NETWORK_gps_location_leaked, -# Command-and-Control-channels-detection, InvalidCertificate -# ThreatIntelligenceBlacklistDomain, ThreatIntelligenceBlacklistIP, -# ThreatIntelligenceBlacklistedASN, MaliciousDownloadedFile, - # DGA, MaliciousSSLCert, YoungDomain, MultipleSSHVersions -# DNS-ARPA-Scan, SMTPLoginBruteforce, BadSMTPLogin, -# IncompatibleUserAgent, ICMP-Timestamp-Scan, ICMP-AddressScan, ICMP-AddressMaskScan -# EmptyConnections, IncompatibleCN, PastebinDownload, ExecutableMIMEType -# MultipleUserAgent, DifferentLocalnet, ConnectionToPrivateIP, HTTPtraffic -# InvalidDNSResolution - -# disabled_detections = [ConnectionToMultiplePorts, PortScanType1] +# ARP_SCAN, ARP_OUTSIDE_LOCALNET, UNSOLICITED_ARP, MITM_ARP_ATTACK, +# YOUNG_DOMAIN, MULTIPLE_SSH_VERSIONS, DIFFERENT_LOCALNET, +# DEVICE_CHANGING_IP, NON_HTTP_PORT_80_CONNECTION, NON_SSL_PORT_443_CONNECTION, +# WEIRD_HTTP_METHOD, INCOMPATIBLE_CN, DGA_NXDOMAINS, DNS_WITHOUT_CONNECTION, +# PASTEBIN_DOWNLOAD, CONNECTION_WITHOUT_DNS, DNS_ARPA_SCAN, UNKNOWN_PORT, +# PASSWORD_GUESSING, HORIZONTAL_PORT_SCAN, CONNECTION_TO_PRIVATE_IP, GRE_TUNNEL, +# VERTICAL_PORT_SCAN, SSH_SUCCESSFUL, LONG_CONNECTION, SELF_SIGNED_CERTIFICATE, +# MULTIPLE_RECONNECTION_ATTEMPTS, CONNECTION_TO_MULTIPLE_PORTS, HIGH_ENTROPY_DNS_ANSWER, +# INVALID_DNS_RESOLUTION, PORT_0_CONNECTION, MALICIOUS_JA3, MALICIOUS_JA3S, +# DATA_UPLOAD, BAD_SMTP_LOGIN, SMTP_LOGIN_BRUTEFORCE, MALICIOUS_SSL_CERT, +# MALICIOUS_FLOW, SUSPICIOUS_USER_AGENT, EMPTY_CONNECTIONS, INCOMPATIBLE_USER_AGENT, +# EXECUTABLE_MIME_TYPE, MULTIPLE_USER_AGENT, HTTP_TRAFFIC, MALICIOUS_JARM, +# NETWORK_GPS_LOCATION_LEAKED, ICMP_TIMESTAMP_SCAN, ICMP_ADDRESS_SCAN, +# ICMP_ADDRESS_MASK_SCAN, DHCP_SCAN, MALICIOUS_IP_FROM_P2P_NETWORK, P2P_REPORT, +# COMMAND_AND_CONTROL_CHANNEL, THREAT_INTELLIGENCE_BLACKLISTED_ASN, +# THREAT_INTELLIGENCE_BLACKLISTED_IP, THREAT_INTELLIGENCE_BLACKLISTED_DOMAIN, +# MALICIOUS_DOWNLOADED_FILE, MALICIOUS_URL + +# disabled_detections = [THREAT_INTELLIGENCE_BLACKLISTED_IP, CONNECTION_TO_PRIVATE_IP] disabled_detections = [] #################### diff --git a/conftest.py b/conftest.py index d260aacea..7356512e4 100644 --- a/conftest.py +++ b/conftest.py @@ -21,18 +21,20 @@ @pytest.fixture -def mock_rdb(): +def mock_db(): # Create a mock version of the database object with patch('slips_files.core.database.database_manager.DBManager') as mock: yield mock.return_value def do_nothing(*arg): - """Used to override the print function because using the self.print causes broken pipes""" + """Used to override the print function because using the self.print causes + broken pipes""" pass @pytest.fixture def input_queue(): - """This input_queue will be passed to all module constructors that need it""" + """This input_queue will be passed to all module constructors that need + it""" input_queue = Queue() input_queue.put = do_nothing return input_queue @@ -40,7 +42,8 @@ def input_queue(): @pytest.fixture def profiler_queue(): - """This profiler_queue will be passed to all module constructors that need it""" + """This profiler_queue will be passed to all module constructors that need + it""" profiler_queue = Queue() profiler_queue.put = do_nothing return profiler_queue diff --git a/dataset/test14-malicious-zeek-dir/http.log b/dataset/test14-malicious-zeek-dir/http.log index 0cddb36e9..52c6e987c 100644 --- a/dataset/test14-malicious-zeek-dir/http.log +++ b/dataset/test14-malicious-zeek-dir/http.log @@ -515,3 +515,7 @@ {"ts":256.462234,"uid":"C6pigI2xhEcRM11ul5","id.orig_h":"10.0.2.15","id.orig_p":49422,"id.resp_h":"147.32.80.7","id.resp_p":80,"trans_depth":7,"method":"GET","host":"147.32.80.7","uri":"/wpad.dat","version":"1.1","request_body_len":0,"response_body_len":593,"status_code":200,"status_msg":"OK","tags":[],"resp_fuids":["F9fIvwQnkVDaTbP3b"],"resp_mime_types":["text/plain"]} {"ts":256.472445,"uid":"ClMSbZVMEHMJrIUFg","id.orig_h":"10.0.2.15","id.orig_p":49421,"id.resp_h":"147.32.80.7","id.resp_p":80,"trans_depth":7,"method":"GET","host":"147.32.80.7","uri":"/wpad.dat","version":"1.1","request_body_len":0,"response_body_len":593,"status_code":200,"status_msg":"OK","tags":[],"resp_fuids":["FdmOWS7AXNlR2iaya"],"resp_mime_types":["text/plain"]} {"ts":256.550217,"uid":"C6ISUE3ZtRgrPYiTd","id.orig_h":"10.0.2.15","id.orig_p":49465,"id.resp_h":"54.239.168.175","id.resp_p":80,"trans_depth":1,"method":"GET","host":"x.ss2.us","uri":"/x.cer","version":"1.1","user_agent":"Microsoft-CryptoAPI/6.1","request_body_len":0,"response_body_len":1302,"status_code":200,"status_msg":"OK","tags":[],"resp_fuids":["FzV4Kl11UtKFOqcLf4"]} +{"ts":256.550218,"uid":"CMbXf021RgZlvixHhd","id.orig_h":"10.0.2.15","id.orig_p":45760,"id.resp_h":"142.250.200.238","id.resp_p":80,"trans_depth":1,"method":"GET","host":"google.com","uri":"/","version":"1.1","user_agent":"Wget/1.21.2","request_body_len":0,"response_body_len":219,"status_code":301,"status_msg":"Moved Permanently","tags":[],"resp_fuids":["Faa9ZB2vezZSLlpr9l"],"resp_mime_types":["text/html"]} +{"ts":256.550290,"uid":"CE2oyw2vkIR1JETHOb","id.orig_h":"10.0.2.15","id.orig_p":44476,"id.resp_h":"142.250.200.228","id.resp_p":80,"trans_depth":1,"method":"GET","host":"www.google.com","uri":"/","version":"1.1","user_agent":"Wget/1.21.2","request_body_len":0,"response_body_len":20626,"status_code":200,"status_msg":"OK","tags":[],"resp_fuids":["FjQHUY3rnpA1s8PsEa"],"resp_mime_types":["text/html"]} +{"ts":256.550291,"uid":"CE2oyw2vkIR1JETHOb","id.orig_h":"10.0.2.15","id.orig_p":44476,"id.resp_h":"142.250.200.228","id.resp_p":80,"trans_depth":1,"method":"GET","host":"google.com","uri":"/","version":"1.1","user_agent":"Wget/1.21.2","request_body_len":0,"response_body_len":20626,"status_code":200,"status_msg":"OK","tags":[],"resp_fuids":["FjQHUY3rnpA1s8PsEa"],"resp_mime_types":["text/html"]} +{"ts":256.550295,"uid":"C9wOvkjAJynTX6uVi","id.orig_h":"10.0.2.15","id.orig_p":45770,"id.resp_h":"142.250.200.238","id.resp_p":80,"trans_depth":1,"method":"GET","host":"google.com","uri":"/","version":"1.1","user_agent":"Wget/1.21.2","request_body_len":0,"response_body_len":219,"status_code":301,"status_msg":"Moved Permanently","tags":[],"resp_fuids":["FG8Tka4NxZUyOunZuc"],"resp_mime_types":["text/html"]} \ No newline at end of file diff --git a/docs/create_new_module.md b/docs/create_new_module.md index 43142811d..b1d5322d1 100644 --- a/docs/create_new_module.md +++ b/docs/create_new_module.md @@ -160,6 +160,7 @@ uid = next(iter(flow)) flow = json.loads(flow[uid]) saddr = flow['saddr'] daddr = flow['daddr'] +timestamp = flow['ts'] ``` Now we need to check if both of them are private. @@ -176,36 +177,105 @@ if srcip_obj.is_private and dstip_obj.is_private: Now that we're sure both IPs are private, we need to generate an alert. -Slips requires certain info about the evidence to be able to sort them and properly display them using Kalipso. +Slips requires certain info about the evidence to be able to deal with them. -Each parameter is described below +first, since we are creating a new evidence, other than the ones defined in the EvidenceType Enum in +```StratosphereLinuxIPS/slips_files/core/evidence_structure/evidence.py``` +then, we need to add it + +so the EvidenceType Enum in ```slips_files/core/evidence_structure/evidence.py``` would look something like this + +```python +class EvidenceType(Enum): + """ + These are the types of evidence slips can detect + """ + ... + CONNECTION_TO_LOCAL_DEVICE = auto() +``` + +now we have our evidence type supported. it's time to set the evidence! + +Now we need to use the Evidence structure of slips, to do that, +first import the necessary dataclasses + +```python +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Victim, + Attacker, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + ) +``` + +now use them, ```python # on a scale of 0 to 1, how confident you are of this evidence confidence = 0.8 # how dangerous is this evidence? info, low, medium, high, critical? -threat_level = 'high' +threat_level = ThreatLevel.HIGH # the name of your evidence, you can put any descriptive string here -evidence_type = 'ConnectionToLocalDevice' +# this is the type we just created +evidence_type = EvidenceType.CONNECTION_TO_LOCAL_DEVICE # what is this evidence category according to IDEA categories -category = 'Anomaly.Connection' -# which ip is the attacker here? the src or the dst? -attacker_direction = 'srcip' -# what is the ip of the attacker? -attacker = saddr +category = IDEACategory.ANOMALY_CONNECTION +# which ip is the attacker here? +attacker = Attacker( + direction=Direction.SRC, # who's the attacker the src or the dst? + attacker_type=IoCType.IP, # is it an IP? is it a domain? etc. + value=saddr # the actual ip/domain/url of the attacker, in our case, this is the IP + ) +victim = Victim( + direction=Direction.SRC, + victim_type=IoCType.IP, + value=daddr, + ) # describe the evidence -description = f'Detected a connection to a local device {daddr}' -timestamp = datetime.datetime.now().strftime('%Y/%m/%d-%H:%M:%S') -# the crrent profile is the source ip, this comes in -# the msg received in the channel -profileid = msg['profileid'] -# Profiles are split into timewindows, each timewindow is 1h, +description = f'A connection to a local device {daddr}' +# the current profile is the source ip, # this comes in the msg received in the channel -twid = msg['twid'] - -self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, profileid=profileid, twid=twid) +# the profile this evidence should be in, should be the profile of the attacker +# because this is evidence that this profile is attacker others right? +profile = ProfileID(ip=saddr) +# Profiles are split into timewindows, each timewindow is 1h, +# this if of the timewindwo comes in the msg received in the channel +twid_number = int( + msg['twid'].replace("timewindow",'') + ) +timewindow = TimeWindow(number=twid_number) +# how many flows formed this evidence? +# in the case of scans, it can be way more than 1 +conn_count = 1 +# list of uids of the flows that are part of this evidence +uid_list = [uid] +# no use the above info to create the evidence obj +evidence = Evidence( + evidence_type=evidence_type, + attacker=attacker, + threat_level=threat_level, + category=category, + description=description, + victim=victim, + profile=profile, + timewindow=timewindow, + uid=uid_list, + # when did this evidence happen? use the + # flow's ts detected by zeek + # this comes in the msg received in the channel + timestamp=timestamp, + conn_count=conn_count, + confidence=confidence + ) +self.db.set_evidence(evidence) ``` @@ -224,7 +294,7 @@ First we start Slips by using the following command: -o is to store the output in the ```local_conn_detector/``` dir. -Then we make a connnection to a local ip +Then we make a connnection to a local ip (change it to a host you know is up in your network) ``` ping 192.168.1.18 @@ -240,8 +310,8 @@ cat local_conn_detector/alerts.log ``` Using develop - 9f5f9412a3c941b3146d92c8cb2f1f12aab3699e - 2022-06-02 16:51:43.989778 -2022/06/02-16:51:57: Src IP 192.168.1.18 . Detected Detected a connection to a local device 192.168.1.12 -2022/06/02-16:51:57: Src IP 192.168.1.12 . Detected Detected a connection to a local device 192.168.1.18 +2022/06/02-16:51:57: Src IP 192.168.1.18 . Detected a connection to a local device 192.168.1.12 +2022/06/02-16:51:57: Src IP 192.168.1.12 . Detected a connection to a local device 192.168.1.18 ``` @@ -274,12 +344,23 @@ Detailed explanation of [Slips profiles and timewindows here](https://idea.cesne Here is the whole local_connection_detector.py code for copy/paste. ```python -from slips_files.common.abstracts.module import IModule -from slips_files.common.imports import * -import datetime import ipaddress import json +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Victim, + Attacker, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + ) +from slips_files.common.imports import * class Module(IModule, multiprocessing.Process): # Name: short name of the module. Do not use spaces @@ -287,7 +368,7 @@ class Module(IModule, multiprocessing.Process): description = 'detects connections to other devices in your local network' authors = ['Template Author'] - def init(self) + def init(self): # To which channels do you wnat to subscribe? When a message # arrives on the channel the module will wakeup # The options change, so the last list is on the @@ -299,19 +380,25 @@ class Module(IModule, multiprocessing.Process): self.c1 = self.db.subscribe('new_flow') self.channels = { 'new_flow': self.c1, - } + } - def shutdown_gracefully(self): + def shutdown_gracefully( + self + ): # Confirm that the module is done processing self.db.publish('finished_modules', self.name) - def pre_main(self): + def pre_main( + self + ): """ Initializations that run only once before the main() function runs in a loop """ utils.drop_root_privs() - def main(self): + def main( + self + ): """Main loop function""" if msg := self.get_msg('new_flow'): msg = msg['data'] @@ -321,35 +408,68 @@ class Module(IModule, multiprocessing.Process): flow = json.loads(flow[uid]) saddr = flow['saddr'] daddr = flow['daddr'] + timestamp = flow['ts'] srcip_obj = ipaddress.ip_address(saddr) dstip_obj = ipaddress.ip_address(daddr) if srcip_obj.is_private and dstip_obj.is_private: # on a scale of 0 to 1, how confident you are of this evidence confidence = 0.8 # how dangerous is this evidence? info, low, medium, high, critical? - threat_level = 'high' + threat_level = ThreatLevel.HIGH + # the name of your evidence, you can put any descriptive string here - evidence_type = 'ConnectionToLocalDevice' - # what is this evidence category according to IDEA categories - category = 'Anomaly.Connection' - # which ip is the attacker here? the src or the dst? - attacker_direction = 'srcip' - # what is the ip of the attacker? - attacker = saddr + # this is the type we just created + evidence_type = EvidenceType.CONNECTION_TO_LOCAL_DEVICE + # what is this evidence category according to IDEA categories + category = IDEACategory.ANOMALY_CONNECTION + # which ip is the attacker here? + attacker = Attacker( + direction=Direction.SRC, # who's the attacker the src or the dst? + attacker_type=IoCType.IP, # is it an IP? is it a domain? etc. + value=saddr # the actual ip/domain/url of the attacker, in our case, this is the IP + ) + victim = Victim( + direction=Direction.SRC, + victim_type=IoCType.IP, + value=daddr, + ) # describe the evidence - description = f'Detected a connection to a local device {daddr}' - timestamp = datetime.datetime.now().strftime('%Y/%m/%d-%H:%M:%S') - # the crrent profile is the source ip, this comes in the msg received in the channel - profileid = msg['profileid'] - # Profiles are split into timewindows, each timewindow is 1h, this comes in the msg received in the channel - twid = msg['twid'] - - self.db.setEvidence( - evidence_type, attacker_direction, attacker, threat_level, - confidence, description, timestamp, category, profileid=profileid, - twid=twid - ) - + description = f'A connection to a local device {daddr}' + # the current profile is the source ip, + # this comes in the msg received in the channel + # the profile this evidence should be in, should be the profile of the attacker + # because this is evidence that this profile is attacker others right? + profile = ProfileID(ip=saddr) + # Profiles are split into timewindows, each timewindow is 1h, + # this if of the timewindwo comes in the msg received in the channel + twid_number = int( + msg['twid'].replace("timewindow",'') + ) + timewindow = TimeWindow(number=twid_number) + # how many flows formed this evidence? + # in the case of scans, it can be way more than 1 + conn_count = 1 + # list of uids of the flows that are part of this evidence + uid_list = [uid] + # no use the above info to create the evidence obj + evidence = Evidence( + evidence_type=evidence_type, + attacker=attacker, + threat_level=threat_level, + category=category, + description=description, + victim=victim, + profile=profile, + timewindow=timewindow, + uid=uid_list, + # when did this evidence happen? use the + # flow's ts detected by zeek + # this comes in the msg received in the channel + timestamp=timestamp, + conn_count=conn_count, + confidence=confidence + ) + self.db.set_evidence(evidence) ``` diff --git a/docs/images/slips.gif b/docs/images/slips.gif index 956782564..a8cc42813 100644 Binary files a/docs/images/slips.gif and b/docs/images/slips.gif differ diff --git a/docs/installation.md b/docs/installation.md index e317b921b..8fe6a4875 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -226,7 +226,7 @@ If you cloned Slips in '~/code/StratosphereLinuxIPS', then you can build the Doc cd ~/code/StratosphereLinuxIPS/docker/ubunutu-image docker build --no-cache -t slips -f Dockerfile . docker run -it --rm --net=host -v ~/code/StratosphereLinuxIPS/dataset:/StratosphereLinuxIPS/dataset slips - ./slips.py -c config/slips.conf -f dataset/test3.binetflow + ./slips.py -c config/slips.conf -f dataset/test3-mixed.binetflow If you don't have Internet connection from inside your Docker image while building, you may have another set of networks defined in your Docker. For that try: diff --git a/managers/metadata_manager.py b/managers/metadata_manager.py index 3085e8f07..291fda15f 100644 --- a/managers/metadata_manager.py +++ b/managers/metadata_manager.py @@ -8,6 +8,7 @@ import shutil import json from datetime import datetime +from typing import Tuple, List, Set class MetadataManager: def __init__(self, main): @@ -147,20 +148,25 @@ def set_input_metadata(self): self.main.db.set_input_metadata(info) - - def update_slips_running_stats(self): + def update_slips_running_stats(self) -> Tuple[int, Set[str]] : """ - updates the number of processed ips, slips internal time, and modified tws so far in the db + updates the number of processed ips, slips internal time, + and modified tws so far in the db """ slips_internal_time = float(self.main.db.getSlipsInternalTime()) + 1 # Get the amount of modified profiles since we last checked - modified_profiles, last_modified_tw_time = self.main.db.getModifiedProfilesSince( - slips_internal_time + # this is the modification time of the last timewindow + last_modified_tw_time: float + modified_profiles, last_modified_tw_time = ( + self.main.db.getModifiedProfilesSince(slips_internal_time) ) modified_ips_in_the_last_tw = len(modified_profiles) - self.main.db.set_input_metadata({'modified_ips_in_the_last_tw': modified_ips_in_the_last_tw}) - # Get the time of last modified timewindow and set it as a new + self.main.db.set_input_metadata( + {'modified_ips_in_the_last_tw': modified_ips_in_the_last_tw} + ) + # last_modified_tw_time is 0 the moment we start slips + # or if we don't have modified tw since the last slips_internal_time if last_modified_tw_time != 0: self.main.db.set_slips_internal_time( last_modified_tw_time diff --git a/managers/process_manager.py b/managers/process_manager.py index a0e633e92..b36b0b6f0 100644 --- a/managers/process_manager.py +++ b/managers/process_manager.py @@ -1,62 +1,129 @@ -from slips_files.common.imports import * -from slips_files.core.output import Output -from slips_files.core.profiler import Profiler -from slips_files.core.evidence import Evidence -from slips_files.core.input import Input -from multiprocessing import Queue, Event, Process, Semaphore -from modules.update_manager.update_manager import UpdateManager -from exclusiveprocess import Lock, CannotAcquireLock -from collections import OrderedDict -from typing import List, Tuple -from slips_files.common.style import green import asyncio -import signal -import time -import pkgutil -import inspect -import modules import importlib +import inspect import os +import pkgutil +import signal import sys +import time import traceback +from collections import OrderedDict +from multiprocessing import ( + Queue, + Event, + Process, + Semaphore, + Pipe, + ) +from typing import ( + List, + Tuple, + ) + +from exclusiveprocess import ( + Lock, + CannotAcquireLock, + ) + +import modules +from modules.update_manager.update_manager import UpdateManager +from slips_files.common.imports import * +from slips_files.common.style import green +from slips_files.core.evidencehandler import EvidenceHandler +from slips_files.core.input import Input +from slips_files.core.output import Output +from slips_files.core.profiler import Profiler + class ProcessManager: def __init__(self, main): self.main = main self.module_objects = {} - # this is the queue that will be used by the input proces to pass flows - # to the profiler + # this is the queue that will be used by the input proces + # to pass flows to the profiler self.profiler_queue = Queue() self.termination_event: Event = Event() self.stopped_modules = [] # used to stop slips when these 2 are done - # since the semaphore count is zero, slips.py will wait until another thread (input and profiler) - # release the semaphore. Once having the semaphore, then slips.py can terminate slips. + # since the semaphore count is zero, slips.py will wait until another + # thread (input and profiler) + # release the semaphore. Once having the semaphore, then slips.py can + # terminate slips. self.is_input_done = Semaphore(0) self.is_profiler_done = Semaphore(0) - # is set by the profiler process to indicat ethat it's done so that input can shutdown no issue - # now without this event, input process doesn't know that profiler is still waiting for the queue to stop - # and inout stops and renders the profiler queue useless and profiler cant get more lines anymore! + # is set by the profiler process to indicat ethat it's done so + # input can shutdown no issue + # now without this event, input process doesn't know that profiler + # is still waiting for the queue to stop + # and inout stops and renders the profiler queue useless and profiler + # cant get more lines anymore! self.is_profiler_done_event = Event() + # for the communication between output.py and the progress bar + # Pipe(False) means the pipe is unidirectional. + # aka only msgs can go from output -> pbar and not vice versa + # recv_pipe used only for receiving, + # send_pipe use donly for sending + self.pbar_recv_pipe, self.output_send_pipe = Pipe(False) + self.pbar_finished: Event = Event() + + def is_pbar_supported(self) -> bool: + """ + When running on a pcap, interface, or taking flows from an + external module, the total amount of flows is unknown + so the pbar is not supported + """ + # input type can be false whne using -S or in unit tests + if ( + not self.main.input_type + or self.main.input_type in ('interface', 'pcap', 'stdin') + or self.main.mode == 'daemonized' + ): + return False + if self.main.stdout != '': + # this means that stdout was redirected to a file, + # no need to print the progress bar + return False + + if ( + self.main.args.growing + or self.main.args.input_module + or self.main.args.testing + ): + return False + + return True + def start_output_process(self, current_stdout, stderr, slips_logfile): - # only in this instance we'll have to specify the verbose, - # debug, std files and input type - # since the output is a singleton, the same params will - # be set everywhere, no need to pass them everytime output_process = Output( stdout=current_stdout, stderr=stderr, slips_logfile=slips_logfile, verbose=self.main.args.verbose or 0, debug=self.main.args.debug, - slips_mode=self.main.mode, input_type=self.main.input_type, + sender_pipe=self.output_send_pipe, + has_pbar=self.is_pbar_supported(), + pbar_finished=self.pbar_finished, + stop_daemon=self.main.args.stopdaemon ) self.slips_logfile = output_process.slips_logfile return output_process - + + def start_progress_bar(self, cls): + pbar = cls( + self.main.logger, + self.main.args.output, + self.main.redis_port, + self.termination_event, + stdout=self.main.stdout, + pipe=self.pbar_recv_pipe, + slips_mode=self.main.mode, + pbar_finished=self.pbar_finished, + ) + return pbar + def start_profiler_process(self): profiler_process = Profiler( self.main.logger, @@ -65,20 +132,19 @@ def start_profiler_process(self): self.termination_event, is_profiler_done=self.is_profiler_done, profiler_queue=self.profiler_queue, - is_profiler_done_event= self.is_profiler_done_event + is_profiler_done_event= self.is_profiler_done_event, + has_pbar=self.is_pbar_supported(), ) profiler_process.start() self.main.print( f'Started {green("Profiler Process")} ' - f"[PID {green(profiler_process.pid)}]", - 1, - 0, + f"[PID {green(profiler_process.pid)}]", 1, 0, ) self.main.db.store_process_PID("Profiler", int(profiler_process.pid)) return profiler_process def start_evidence_process(self): - evidence_process = Evidence( + evidence_process = EvidenceHandler( self.main.logger, self.main.args.output, self.main.redis_port, @@ -112,7 +178,8 @@ def start_input_process(self): ) input_process.start() self.main.print( - f'Started {green("Input Process")} ' f"[PID {green(input_process.pid)}]", + f'Started {green("Input Process")} ' + f'[PID {green(input_process.pid)}]', 1, 0, ) @@ -129,7 +196,9 @@ def kill_process_tree(self, pid: int): # Get the child processes of the current process try: - process_list = os.popen('pgrep -P {}'.format(pid)).read().splitlines() + process_list = (os.popen(f'pgrep -P {pid}') + .read() + .splitlines()) except: process_list = [] @@ -142,7 +211,8 @@ def kill_all_children(self): module_name: str = self.main.db.get_name_of_module_at(process.pid) if not module_name: # if it's a thread started by one of the modules or - # by slips.py, we don't have it stored in the db so just skip it + # by slips.py, we don't have it stored in + # the db so just skip it continue if module_name in self.stopped_modules: # already stopped @@ -157,10 +227,18 @@ def is_ignored_module( )-> bool: for ignored_module in to_ignore: - ignored_module = ignored_module.replace(' ','').replace('_','').replace('-','').lower() - # this version of the module name wont contain _ or spaces so we can + ignored_module = (ignored_module + .replace(' ','') + .replace('_','') + .replace('-','') + .lower()) + # this version of the module name wont contain + # _ or spaces so we can # easily match it with the ignored module name - curr_module_name = module_name.replace('_','').replace('-','').lower() + curr_module_name = (module_name + .replace('_','') + .replace('-','') + .lower()) if curr_module_name.__contains__(ignored_module): return True return False @@ -169,9 +247,8 @@ def get_modules(self, to_ignore: list): """ Get modules from the 'modules' folder. """ - # This plugins import will automatically load the modules and put them in - # the __modules__ variable - + # This plugins import will automatically load the modules + # and put them in the __modules__ variable plugins = {} failed_to_load_modules = 0 @@ -179,7 +256,8 @@ def get_modules(self, to_ignore: list): # __path__ is the current path of this python program look_for_modules_in = modules.__path__ prefix = f"{modules.__name__}." - # Walk recursively through all modules and packages found on the . folder. + # Walk recursively through all modules and packages found on the . + # folder. for loader, module_name, ispkg in pkgutil.walk_packages( look_for_modules_in, prefix ): @@ -203,7 +281,8 @@ def get_modules(self, to_ignore: list): try: # "level specifies whether to use absolute or relative imports. # The default is -1 which - # indicates both absolute and relative imports will be attempted. + # indicates both absolute and relative imports will + # be attempted. # 0 means only perform absolute imports. # Positive values for level indicate the number of parent # directories to search relative to the directory of the @@ -212,7 +291,7 @@ def get_modules(self, to_ignore: list): except ImportError as e: print(f"Something wrong happened while " f"importing the module {module_name}: {e}") - self.print(traceback.print_exc()) + print(traceback.print_stack()) failed_to_load_modules += 1 continue @@ -239,7 +318,8 @@ def get_modules(self, to_ignore: list): # last=False to move to the beginning of the dict plugins.move_to_end("Blocking", last=False) - # when cyst starts first, as soon as slips connects to cyst, cyst sends slips the flows, + # when cyst starts first, as soon as slips connects to cyst, + # cyst sends slips the flows, # but the inputprocess didn't even start yet so the flows are lost # to fix this, change the order of the CYST module(load it last) if "cyst" in plugins: @@ -252,6 +332,7 @@ def get_modules(self, to_ignore: list): def load_modules(self): to_ignore: list = self.main.conf.get_disabled_modules( self.main.input_type) + # Import all the modules modules_to_call = self.get_modules(to_ignore)[0] loaded_modules = [] @@ -260,12 +341,15 @@ def load_modules(self): continue module_class = modules_to_call[module_name]["obj"] - module = module_class( - self.main.logger, - self.main.args.output, - self.main.redis_port, - self.termination_event, - ) + if module_name == "Progress Bar": + module = self.start_progress_bar(module_class) + else: + module = module_class( + self.main.logger, + self.main.args.output, + self.main.redis_port, + self.termination_event, + ) module.start() self.main.db.store_process_PID(module_name, int(module.pid)) self.module_objects[module_name] = module # maps name -> object @@ -290,7 +374,8 @@ def print_stopped_module(self, module): # to vertically align them when printing module += " " * (20 - len(module)) - self.main.print(f"\t{green(module)} \tStopped. " f"{green(modules_left)} left.") + self.main.print(f"\t{green(module)} \tStopped. " + f"" f"{green(modules_left)} left.") def start_update_manager(self, local_files=False, TI_feeds=False): @@ -298,14 +383,18 @@ def start_update_manager(self, local_files=False, TI_feeds=False): starts the update manager process PS; this function is blocking, slips.py will not start the rest of the module unless this functionis done - :kwarg local_files: if true, updates the local ports and org files from disk + :kwarg local_files: if true, updates the local ports and + org files from disk :kwarg TI_feeds: if true, updates the remote TI feeds, this takes time """ try: - # only one instance of slips should be able to update ports and orgs at a time - # so this function will only be allowed to run from 1 slips instance. + # only one instance of slips should be able to update ports + # and orgs at a time + # so this function will only be allowed to run from 1 slips + # instance. with Lock(name="slips_ports_and_orgs"): - # pass a dummy termination event for update manager to update orgs and ports info + # pass a dummy termination event for update manager to + # update orgs and ports info update_manager = UpdateManager( self.main.logger, self.main.args.output, @@ -328,7 +417,8 @@ def start_update_manager(self, local_files=False, TI_feeds=False): def warn_about_pending_modules(self, pending_modules: List[Process]): """ Prints the names of the modules that are not finished yet. - :param pending_modules: List of active/pending process that aren't killed or stopped yet + :param pending_modules: List of active/pending process that aren't + killed or stopped yet """ if self.warning_printed_once: return @@ -354,13 +444,16 @@ def warn_about_pending_modules(self, pending_modules: List[Process]): def get_hitlist_in_order(self) -> Tuple[List[Process], List[Process]]: """ - returns a list of PIDs that slips should terminate first, and pids that should be killed last + returns a list of PIDs that slips should terminate first, + and pids that should be killed last """ - # all modules that deal with evidence, blocking and alerts should be killed last + # all modules that deal with evidence, blocking and alerts should + # be killed last # so we don't miss exporting or blocking any malicious IoC # input and profiler are not in this list because they # indicate that they're done processing using a semaphore - # slips won't reach this function unless they are done already. so no need to kill them last + # slips won't reach this function unless they are done already. + # so no need to kill them last pids_to_kill_last = [ self.main.db.get_pid_of("Evidence"), ] @@ -379,11 +472,13 @@ def get_hitlist_in_order(self) -> Tuple[List[Process], List[Process]]: to_kill_first: List[Process] = [] to_kill_last: List[Process] = [] for process in self.processes: - # if it's not to kill be killed last, then we need to kill it first :'D + # if it's not to kill be killed last, then we need to kill + # it first :'D if process.pid in pids_to_kill_last: to_kill_last.append(process) else: - # skips the context manager of output.py, will close it manually later + # skips the context manager of output.py, will close + # it manually later # once all processes are closed if type(process) == multiprocessing.context.ForkProcess: continue @@ -428,7 +523,6 @@ def get_analysis_time(self): def should_stop(self): """ returns true if the channel received the stop msg - :param msg: msgs receive in the control chanel """ message = self.main.c1.get_message(timeout=0.01) if ( @@ -471,11 +565,13 @@ def shutdown_interactive(self, to_kill_first, to_kill_last): # maximum time to wait is timeout_seconds alive_processes = self.wait_for_processes_to_finish(to_kill_first) if alive_processes: - # update the list of processes to kill first with only the ones that are still alive + # update the list of processes to kill first with only the ones + # that are still alive to_kill_first: List[Process] = alive_processes # the 2 lists combined are all the children that are still alive - # here to_kill_last are considered alive because we haven't tried to join() em yet + # here to_kill_last are considered alive because we haven't tried + # to join() em yet self.warn_about_pending_modules(alive_processes + to_kill_last) return to_kill_first, to_kill_last else: @@ -484,7 +580,8 @@ def shutdown_interactive(self, to_kill_first, to_kill_last): alive_processes = self.wait_for_processes_to_finish(to_kill_last) if alive_processes: - # update the list of processes to kill last with only the ones that are still alive + # update the list of processes to kill last with only the ones + # that are still alive to_kill_last: List[Process] = alive_processes # the 2 lists combined are all the children that are still alive @@ -496,20 +593,23 @@ def shutdown_interactive(self, to_kill_first, to_kill_last): def slips_is_done_receiving_new_flows(self) -> bool: """ - Slips won't be receiving new flows when - the input and profiler release the semaphores signaling that they're done - that's when this method will return True. + this method will return True when the input and profiler release + the semaphores signaling that they're done If they're still processing it will return False """ # try to acquire the semaphore without blocking - input_done_processing: bool = self.is_input_done.acquire(block=False) - profiler_done_processing: bool = self.is_profiler_done.acquire(block=False) + input_done_processing: bool = self.is_input_done.acquire( + block=False + ) + profiler_done_processing: bool = self.is_profiler_done.acquire( + block=False + ) if input_done_processing and profiler_done_processing: return True - else: - # can't acquire the semaphore, processes are still running - return False + + # can't acquire the semaphore, processes are still running + return False def shutdown_daemon(self): @@ -517,7 +617,8 @@ def shutdown_daemon(self): Shutdown slips modules in daemon mode using the daemon's -s """ - # this method doesn't deal with self.processes bc they aren't the daemon's children, + # this method doesn't deal with self.processes bc they + # aren't the daemon's children, # they are the children of the slips.py that ran using -D # (so they started on a previous run) # and we only have access to the PIDs @@ -535,6 +636,7 @@ def shutdown_gracefully(self): print("\n" + "-" * 27) self.main.print("Stopping Slips") + # by default, 15 mins from this time, all modules should be killed method_start_time = time.time() @@ -553,8 +655,8 @@ def shutdown_gracefully(self): self.processes: dict = self.main.db.get_pids() self.shutdown_daemon() - profilesLen = self.main.db.get_profiles_len() - self.main.daemon.print(f"Total analyzed IPs: {profilesLen}.") + profiles_len: int = self.main.db.get_profiles_len() + self.main.daemon.print(f"Total analyzed IPs: {profiles_len}.") # if slips finished normally without stopping the daemon with -S # then we need to delete the pidfile here @@ -562,29 +664,37 @@ def shutdown_gracefully(self): else: flows_count: int = self.main.db.get_flows_count() - self.main.print(f"Total flows read (without altflows): {flows_count}", log_to_logfiles_only=True) + self.main.print(f"Total flows read (without altflows): " + f"{flows_count}", log_to_logfiles_only=True) - hitlist: Tuple[List[Process], List[Process]] = self.get_hitlist_in_order() + hitlist: Tuple[List[Process], List[Process]] + hitlist = self.get_hitlist_in_order() to_kill_first: List[Process] = hitlist[0] to_kill_last: List[Process] = hitlist[1] self.termination_event.set() - # to make sure we only warn the user once about hte pending modules + # to make sure we only warn the user once about the pending + # modules self.warning_printed_once = False try: # Wait timeout_seconds for all the processes to finish while time.time() - method_start_time < timeout_seconds: - to_kill_first, to_kill_last = self.shutdown_interactive(to_kill_first, to_kill_last) + to_kill_first, to_kill_last = self.shutdown_interactive( + to_kill_first, + to_kill_last + ) if not to_kill_first and not to_kill_last: # all modules are done - # now close the communication between output.py and the pbar - self.main.logger.shutdown_gracefully() + # now close the communication between output.py + # and the pbar break except KeyboardInterrupt: - # either the user wants to kill the remaining modules (pressed ctrl +c again) - # or slips was stuck looping for too long that the OS sent an automatic sigint to kill slips + # either the user wants to kill the remaining modules + # (pressed ctrl +c again) + # or slips was stuck looping for too long that the OS + # sent an automatic sigint to kill slips # pass to kill the remaining modules reason = "User pressed ctr+c or slips was killed by the OS" graceful_shutdown = False @@ -594,7 +704,8 @@ def shutdown_gracefully(self): # getting here means we're killing them bc of the timeout # not getting here means we're killing them bc of double # ctr+c OR they terminated successfully - reason = f"Killing modules that took more than {timeout} mins to finish." + reason = (f"Killing modules that took more than {timeout}" + f" mins to finish.") self.main.print(reason) graceful_shutdown = False @@ -608,6 +719,9 @@ def shutdown_gracefully(self): format_ = self.main.conf.export_labeled_flows_to().lower() self.main.db.export_labeled_flows(format_) + self.output_send_pipe.close() + self.pbar_recv_pipe.close() + # if store_a_copy_of_zeek_files is set to yes in slips.conf, # copy the whole zeek_files dir to the output dir self.main.store_zeek_dir_copy() @@ -621,7 +735,8 @@ def shutdown_gracefully(self): self.main.print("[Process Manager] Slips shutdown gracefully\n", log_to_logfiles_only=True) else: - self.main.print(f"[Process Manager] Slips didn't shutdown gracefully - {reason}\n", + self.main.print(f"[Process Manager] Slips didn't " + f"shutdown gracefully - {reason}\n", log_to_logfiles_only=True) except KeyboardInterrupt: diff --git a/managers/redis_manager.py b/managers/redis_manager.py index b01473bde..1a105af9a 100644 --- a/managers/redis_manager.py +++ b/managers/redis_manager.py @@ -6,7 +6,6 @@ import socket import subprocess from typing import Dict, Union - from slips_files.core.output import Output from slips_files.common.slips_utils import utils from slips_files.core.database.database_manager import DBManager @@ -27,7 +26,7 @@ def __init__(self, main): def get_start_port(self): return self.start_port - def log_redis_server_PID(self, redis_port: int, redis_pid: int): + def log_redis_server_pid(self, redis_port: int, redis_pid: int): now = utils.convert_format(datetime.now(), utils.alerts_format) try: # used in case we need to remove the line using 6379 from running logfile @@ -51,7 +50,7 @@ def log_redis_server_PID(self, redis_port: int, redis_pid: int): # last run was by root, change the file ownership to non-root os.remove(self.running_logfile) open(self.running_logfile, 'w').close() - self.log_redis_server_PID(redis_port, redis_pid) + self.log_redis_server_pid(redis_port, redis_pid) if redis_port == 6379: # remove the old logline using this port @@ -64,7 +63,7 @@ def load_redis_db(self, redis_port): self.main.input_information = os.path.basename(self.main.args.db) redis_pid: int = self.get_pid_of_redis_server(redis_port) self.zeek_folder = '""' - self.log_redis_server_PID(redis_port, redis_pid) + self.log_redis_server_pid(redis_port, redis_pid) self.remove_old_logline(redis_port) print( @@ -118,12 +117,14 @@ def check_redis_database( except Exception as ex: # only try to open redis-server twice. if tries == 2: - print(f'[Main] Problem starting redis cache database. \n{ex}\nStopping') + print(f'[Main] Problem starting redis cache database.' + f' \n{ex}\nStopping') self.main.terminate_slips() return False print('[Main] Starting redis cache database..') - os.system('redis-server config/redis.conf --daemonize yes > /dev/null 2>&1') + os.system('redis-server config/redis.conf --daemonize yes ' + ' > /dev/null 2>&1') # give the server time to start time.sleep(1) tries += 1 @@ -131,7 +132,8 @@ def check_redis_database( def get_random_redis_port(self) -> int: """ - Keeps trying to connect to random generated ports until we found an available port. + Keeps trying to connect to random generated ports until + we found an available port. returns the port number """ for port in range(self.start_port, self.end_port+1): @@ -172,7 +174,8 @@ def clear_redis_cache_database( def close_all_ports(self): """ - Closes all the redis ports in running_slips_info.txt and in slips supported range of ports + Closes all the redis ports in running_slips_info.txt and + in slips supported range of ports """ if not hasattr(self, 'open_servers_PIDs'): self.get_open_redis_servers() @@ -193,7 +196,8 @@ def close_all_ports(self): self.kill_redis_server(pid) - # print(f"Successfully closed all redis servers on ports {self.start_port} to {self.end_port}") + # print(f"Successfully closed all redis servers on ports + # {self.start_port} to {self.end_port}") print("Successfully closed all open redis servers") with contextlib.suppress(FileNotFoundError): @@ -202,21 +206,28 @@ def close_all_ports(self): return + def print_port_in_use(self, port: int): + print(f"[Main] Port {port} is already in use by another process" + f"\nChoose another port using -P " + f"\nOr kill your open redis ports using: ./slips.py -k ") + + def check_if_port_is_in_use(self, port: int) -> bool: if port == 6379: # even if it's already in use, slips should override it return False - try: - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.bind(("localhost", port)) + # is it used by another app? + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + if sock.connect_ex(("localhost", port)) != 0: + # not used + sock.close() return False - except OSError as e: - print(f"[Main] Port {port} is already in use by another process." - f" Choose another port using -P \n" - f"Or kill your open redis ports using: ./slips.py -k ") - self.main.terminate_slips() - return True + + sock.close() + self.print_port_in_use(port) + self.main.terminate_slips() + return True def get_pid_of_redis_server(self, port: int) -> int: @@ -334,8 +345,10 @@ def get_port_of_redis_server(self, pid: int) -> Union[int, bool]: def flush_redis_server(self, pid: int=None, port: int=None): """ - Flush the redis server on this pid, only 1 param should be given, pid or port - :param pid: can be False if port is given + Flush the redis server on this pid, only 1 param should be + given, pid or port + :kwarg pid: can be False if port is given + :kwarg port: redis server port to flush Gets the pid of the port if not given """ if not port and not pid: @@ -362,10 +375,14 @@ def flush_redis_server(self, pid: int=None, port: int=None): start_sqlite=False, start_redis_server=False ) - db.rdb.r.flushall() - db.rdb.r.flushdb() - db.rdb.r.script_flush() - return True + # if the redis server opened by slips is closed manually by the + # user, not by slips, slips won't be able to connect to it + # that's why we check for db.rdb + if db.rdb: + db.rdb.r.flushall() + db.rdb.r.flushdb() + db.rdb.r.script_flush() + return True except redis.exceptions.ConnectionError: # server already killed! return False diff --git a/modules/arp/arp.py b/modules/arp/arp.py index ad538c2e6..b620439b9 100644 --- a/modules/arp/arp.py +++ b/modules/arp/arp.py @@ -1,16 +1,34 @@ from slips_files.common.abstracts._module import IModule -from slips_files.common.imports import * import json import ipaddress import time import threading from multiprocessing import Queue +from typing import List -class ARP(IModule, multiprocessing.Process): +from slips_files.common.imports import * +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Victim, + Attacker, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Tag + ) + + +class ARP(IModule): # Name: short name of the module. Do not use spaces name = 'ARP' - description = 'Detect arp attacks' + description = 'Detect ARP attacks' authors = ['Alya Gomaa'] + def init(self): self.c1 = self.db.subscribe('new_arp') self.c2 = self.db.subscribe('tw_closed') @@ -60,7 +78,9 @@ def wait_for_arp_scans(self): scans_ctr = 0 while True: try: - evidence: dict = self.pending_arp_scan_evidence.get(timeout=0.5) + evidence: dict = self.pending_arp_scan_evidence.get( + timeout=0.5 + ) except Exception: # nothing in queue time.sleep(5) @@ -73,7 +93,9 @@ def wait_for_arp_scans(self): while True: try: - new_evidence = self.pending_arp_scan_evidence.get(timeout=0.5) + new_evidence = self.pending_arp_scan_evidence.get( + timeout=0.5 + ) except Exception: # queue is empty break @@ -88,9 +110,10 @@ def wait_for_arp_scans(self): uids += uids2 conn_count = conn_count2 else: - # this is an ip performing arp scan in a diff profile or a diff twid, - # we shouldn't accumulate its evidence - # store it back in the queue until we're done with the current one + # this is an ip performing arp scan in a diff + # profile or a diff twid, we shouldn't accumulate its + # evidence store it back in the queue until we're done + # with the current one scans_ctr += 1 self.pending_arp_scan_evidence.put(new_evidence) if scans_ctr == 3: @@ -108,13 +131,23 @@ def wait_for_arp_scans(self): def check_arp_scan( - self, profileid, twid, daddr, uid, ts, dst_mac, src_mac, operation, dst_hw, src_hw + self, + profileid, + twid, + daddr, + uid, + ts, + operation, + dst_hw ): """ Check if the profile is doing an arp scan - If IP X sends arp requests to 3 or more different IPs within 30 seconds, then this IP X is doing arp scan - The key profileid_twid is used to group requests from the same saddr - arp flows don't have uids, the uids received are randomly generated by slips + If IP X sends arp requests to 3 or more different + IPs within 30 seconds, then this IP X is doing arp scan + The key profileid_twid is used to group requests + from the same saddr + arp flows don't have uids, the uids received are + randomly generated by slips """ # ARP scans are always requests always? mostly? from 00:00:00:00:00:00 @@ -131,8 +164,9 @@ def get_uids(): res.append(uid) return res - # The Gratuitous arp is sent as a broadcast, as a way for a node to announce or update its IP to MAC mapping to the entire network. - # It shouldn't be marked as an arp scan + # The Gratuitous arp is sent as a broadcast, as a way for a + # node to announce or update its IP to MAC mapping + # to the entire network. It shouldn't be marked as an arp scan saddr = profileid.split('_')[1] # Don't detect arp scan from the GW router @@ -167,10 +201,12 @@ def get_uids(): return True - # the list of daddrs that are scanned by the current proffileid in the curr tw + # the list of daddrs that are scanned by the current + # proffileid in the curr tw daddrs = list(cached_requests.keys()) - # The minimum amount of arp packets to send to be considered as scan is 5 + # The minimum amount of arp packets to send to be + # considered as scan is 5 if len(daddrs) >= self.arp_scan_threshold: # check if these requests happened within 30 secs # get the first and the last request of the 10 @@ -188,50 +224,86 @@ def get_uids(): # we are sure this is an arp scan if not self.alerted_once_arp_scan: self.alerted_once_arp_scan = True - self.set_evidence_arp_scan(ts, profileid, twid, uids, conn_count) + self.set_evidence_arp_scan( + ts, profileid, twid, uids, conn_count + ) else: - # after alerting once, wait 10s to see if more evidence are coming - self.pending_arp_scan_evidence.put((ts, profileid, twid, uids, conn_count)) + # after alerting once, wait 10s to see + # if more evidence are coming + self.pending_arp_scan_evidence.put( + (ts, profileid, twid, uids, conn_count) + ) return True return False - def set_evidence_arp_scan(self, ts, profileid, twid, uids, conn_count): - confidence = 0.8 - threat_level = 'low' - description = ( - f'performing an arp scan. Confidence {confidence}.' + def set_evidence_arp_scan( + self, + ts, + profileid, + twid, + uids: List[str], + conn_count + ): + confidence: float = 0.8 + threat_level: ThreatLevel = ThreatLevel.LOW + saddr: str = profileid.split('_')[1] + + description: str = f'performing an arp scan. Confidence {confidence}.' + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr ) - evidence_type = 'ARPScan' - # category of this evidence according to idea categories - category = 'Recon.Scanning' - attacker_direction = 'srcip' - source_target_tag = 'Recon' # srcip description - attacker = profileid.split('_')[1] - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - ts, category, source_target_tag=source_target_tag, conn_count=conn_count, - profileid=profileid, twid=twid, uid=uids) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.ARP_SCAN, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=uids, + timestamp=ts, + category=IDEACategory.RECON_SCANNING, + source_target_tag=Tag.RECON, + conn_count=conn_count + ) + + self.db.set_evidence(evidence) # after we set evidence, clear the dict so we can detect if it does another scan try: self.cache_arp_requests.pop(f'{profileid}_{twid}') except KeyError: - # when a tw is closed, we clear all its' entries from the cache_arp_requests dict - # having keyerr is a result of closing a timewindow before setting an evidence + # when a tw is closed, we clear all its' entries from the + # cache_arp_requests dict + # having keyerr is a result of closing a timewindow before + # setting an evidence # ignore it pass + def check_dstip_outside_localnet( - self, profileid, twid, daddr, uid, saddr, ts - ): + self, + profileid, + twid, + daddr, + uid: str, + saddr, + ts + ): """Function to setEvidence when daddr is outside the local network""" if '0.0.0.0' in saddr or '0.0.0.0' in daddr: - # this is the case of arp probe, not an arp outside of local network, don't alert + # this is the case of arp probe, not an + # arp outside of local network, don't alert return False daddr_as_obj = ipaddress.IPv4Address(daddr) if daddr_as_obj.is_multicast or daddr_as_obj.is_link_local: - # The arp to ‘outside’ the network should not detect multicast or link-local addresses. + # The arp to ‘outside’ the network should + # not detect multicast or link-local addresses. return False for network in self.home_network: @@ -243,22 +315,61 @@ def check_dstip_outside_localnet( local_net = saddr.split('.')[0] if not daddr.startswith(local_net): # comes here if the IP isn't in any of the local networks - confidence = 0.6 - threat_level = 'low' - ip_identification = self.db.get_ip_identification(daddr) - description = f'{saddr} sending ARP packet to a destination address outside of local network: {daddr}. {ip_identification}' - evidence_type = 'arp-outside-localnet' - category = 'Anomaly.Behaviour' - attacker_direction = 'srcip' - attacker = profileid.split('_')[1] - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, - description, ts, category, profileid=profileid, twid=twid, uid=uid, victim=daddr) + confidence: float = 0.6 + threat_level: ThreatLevel = ThreatLevel.LOW + ip_identification: str = self.db.get_ip_identification(daddr) + saddr: str = profileid.split('_')[1] + + description: str = f'{saddr} sending ARP packet to a destination ' \ + f'address outside of local network: {daddr}. ' \ + f'{ip_identification}' + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) + + evidence: Evidence = Evidence( + evidence_type= EvidenceType.ARP_OUTSIDE_LOCALNET, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=ts, + category=IDEACategory.ANOMALY_BEHAVIOUR, + victim=victim + ) + + self.db.set_evidence(evidence) return True + return False + + def detect_unsolicited_arp( - self, profileid, twid, uid, ts, dst_mac, src_mac, dst_hw, src_hw + self, + profileid: str, + twid: str, + uid: str, + ts: str, + dst_mac: str, + src_mac: str, + dst_hw: str, + src_hw: str ): - """Unsolicited arp is used to update the neighbours' arp caches but can also be used in arp spoofing""" + """ + Unsolicited arp is used to update the neighbours' + arp caches but can also be used in arp spoofing + """ if ( dst_mac == 'ff:ff:ff:ff:ff:ff' and dst_hw == 'ff:ff:ff:ff:ff:ff' @@ -266,62 +377,94 @@ def detect_unsolicited_arp( and src_hw != '00:00:00:00:00:00' ): # We're sure this is unsolicited arp - confidence = 0.8 - threat_level = 'info' - description = 'broadcasting unsolicited ARP' - evidence_type = 'UnsolicitedARP' - # This may be arp spoofing - category = 'Information' - attacker_direction = 'srcip' - source_target_tag = 'Recon' # srcip description - attacker = profileid.split('_')[1] - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, - description, ts, category, source_target_tag=source_target_tag, - profileid=profileid, twid=twid, uid=uid) + # it may be arp spoofing + confidence: float = 0.8 + threat_level: ThreatLevel = ThreatLevel.INFO + description: str = 'broadcasting unsolicited ARP' + + saddr: str = profileid.split('_')[-1] + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.UNSOLICITED_ARP, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=ts, + source_target_tag=Tag.RECON, + category=IDEACategory.INFO + ) + + self.db.set_evidence(evidence) return True - def detect_MITM_ARP_attack(self, profileid, twid, uid, saddr, ts, src_mac): - """Detects when a MAC with IP A, is trying to tell others that now that MAC is also for IP B (arp cache attack)""" - # Todo in rare cases, the vendor and IP of this mac is known AFTER returning from this function so detection is missed + def detect_MITM_ARP_attack( + self, + twid: str, + uid: str, + saddr: str, + ts: str, + src_mac: str, + ): + """ + Detects when a MAC with IP A, is trying to tell others that + now that MAC is also for IP B (arp cache attack) + """ + # Todo in rare cases, the vendor and IP of this mac is known AFTER + # returning from this function so detection is missed # to test this add these 2 flows to arp.log - # {"ts":1636305825.755132,"operation":"reply","src_mac":"2e:a4:18:f8:3d:02","dst_mac":"ff:ff:ff:ff:ff:ff", - # "orig_h":"172.20.7.40","resp_h":"172.20.7.40","orig_hw":"2e:a4:18:f8:3d:02","resp_hw":"00:00:00:00:00:00"} - # {"ts":1636305825.755132,"operation":"reply","src_mac":"2e:a4:18:f8:3d:02","dst_mac":"ff:ff:ff:ff:ff:ff", - # "orig_h":"172.20.7.41","resp_h":"172.20.7.41","orig_hw":"2e:a4:18:f8:3d:02","resp_hw":"00:00:00:00:00:00"} + # {"ts":1636305825.755132,"operation":"reply", + # "src_mac":"2e:a4:18:f8:3d:02", "dst_mac":"ff:ff:ff:ff:ff:ff", + # "orig_h":"172.20.7.40","resp_h":"172.20.7.40", + # "orig_hw":"2e:a4:18:f8:3d:02", "resp_hw":"00:00:00:00:00:00"} + # {"ts":1636305825.755132,"operation":"reply", + # "src_mac":"2e:a4:18:f8:3d:02", "dst_mac":"ff:ff:ff:ff:ff:ff", + # "orig_h":"172.20.7.41","resp_h":"172.20.7.41", + # "orig_hw":"2e:a4:18:f8:3d:02", "resp_hw":"00:00:00:00:00:00"} # todo will we get FPs when an ip changes? - # todo what if the ip of the attacker came to us first and we stored it in the db? + # todo what if the ip of the attacker came to us + # first and we stored it in the db? # the original IP of this src mac is now the IP of the attacker? # get the original IP of the src mac from the database - original_IP = self.db.get_ip_of_mac(src_mac) + original_IP: str = self.db.get_ip_of_mac(src_mac) if original_IP is None: return # original_IP is a serialized list - original_IP = json.loads(original_IP)[0] + original_IP: str = json.loads(original_IP)[0] + original_IP = original_IP.replace("profile_","") - # is this saddr trying to tell everyone that this it owns this src_mac - # even though we know this src_mac is associated with another IP (original_IP)? + # is this saddr trying to tell everyone that this + # it owns this src_mac even though we know this src_mac is associated + # with another IP (original_IP)? if saddr != original_IP: # From our db we know that: # original_IP has src_MAC # now saddr has src_MAC and saddr isn't the same as original_IP - # so this is either a MITM arp attack or the IP address of this src_mac simply changed + # so this is either a MITM arp attack or the IP + # address of this src_mac simply changed # todo how to find out which one is it?? - confidence = 0.2 # low confidence for now - threat_level = 'critical' - evidence_type = 'MITM-arp-attack' - # This may be arp spoofing - category = 'Recon' - attacker_direction = 'srcip' - source_target_tag = 'MITM' - attacker = profileid.split('_')[1] + # Assuming that 'threat_level' and 'category' + # are from predefined enums or constants + confidence: float = 0.2 # low confidence for now + threat_level: ThreatLevel = ThreatLevel.CRITICAL + + attackers_ip = saddr + victims_ip = original_IP gateway_ip = self.db.get_gateway_ip() gateway_MAC = self.db.get_gateway_mac() - if saddr == gateway_ip: saddr = f'The gateway {saddr}' @@ -332,35 +475,65 @@ def detect_MITM_ARP_attack(self, profileid, twid, uid, saddr, ts, src_mac): if original_IP == gateway_ip: original_IP = f'the gateway IP {original_IP}' - description = f'{saddr} performing a MITM ARP attack. The MAC {src_mac}, ' \ - f'now belonging to {saddr}, was seen before for {original_IP}.' - # self.print(f'{saddr} is claiming to have {src_mac}') - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, - description, ts, category, source_target_tag=source_target_tag, - profileid=profileid, twid=twid, uid=uid, victim=original_IP) + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=attackers_ip + ) + + victim = Victim( + direction=Direction.DST, # TODO not really dst + victim_type=IoCType.IP, + value=victims_ip + ) + + description = f'{saddr} performing a MITM ARP attack. ' \ + f'The MAC {src_mac}, now belonging to ' \ + f'{saddr}, was seen before for {original_IP}.' + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.MITM_ARP_ATTACK, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=ts, + category=IDEACategory.RECON, + source_target_tag=Tag.MITM, + victim=victim + ) + + self.db.set_evidence(evidence) return True def check_if_gratutitous_ARP( - self, saddr, daddr, src_mac, dst_mac, src_hw, dst_hw, operation + self, dst_hw: str, operation: str ): """ Check if an ARP packet is gratuitous - # The Gratuitous arp is sent as a broadcast, as a way for a node to announce or update - # its IP to MAC mapping to the entire network. - # Gratuitous ARP shouldn't be marked as an arp scan - # Check https://www.practicalnetworking.net/series/arp/gratuitous-arp/ - # dst_mac is the real MAC used to deliver the packet - # src_mac is the real MAC used to deliver the packet - # dst_hw is the MAC in the headers of the ARP packet - # src_hw is the MAC in the headers of the ARP packet - # saddr is the IP in the headers of the ARP packet - # daddr is the IP in the headers of the ARP packet - - # Gratuitous ARP can be used for (1) Updating ARP Mapping, (2) Announcing a Node’s Existence, - (3) Redundancy, (4) MITM. Which is similar to an 'unrequested' load balancing - # The saddr and daddr are the ones being avertised. The supposed purpose of the Gratuitous ARP + The Gratuitous arp is sent as a broadcast, as a way for a + node to announce or update + its IP to MAC mapping to the entire network. + Gratuitous ARP shouldn't be marked as an arp scan + Check https://www.practicalnetworking.net/series/arp/gratuitous-arp/ + dst_mac is the real MAC used to deliver the packet + src_mac is the real MAC used to deliver the packet + dst_hw is the MAC in the headers of the ARP packet + src_hw is the MAC in the headers of the ARP packet + saddr is the IP in the headers of the ARP packet + daddr is the IP in the headers of the ARP packet + + Gratuitous ARP can be used for (1) Updating ARP Mapping, + (2) Announcing a Node’s Existence, + (3) Redundancy, (4) MITM. Which is similar to an + 'unrequested' load balancing + The saddr and daddr are the ones being avertised. + The supposed purpose of the Gratuitous ARP """ # It should be a reply # The dst_mac should be ff:ff:ff:ff:ff:ff or 00:00:00:00:00:00 @@ -401,22 +574,22 @@ def main(self): dst_hw = flow['dst_hw'] src_hw = flow['src_hw'] operation = flow['operation'] - # arp flows don't have uids, the uids received are randomly generated by slips + # arp flows don't have uids, the uids received + # are randomly generated by slips uid = flow['uid'] - if self.check_if_gratutitous_ARP( - saddr, daddr, src_mac, dst_mac, src_hw, dst_hw, operation - ): + if self.check_if_gratutitous_ARP(dst_hw, operation): # for MITM arp attack, the arp has to be gratuitous # and it has to be a reply operation, not a request. - # A gratuitous ARP is always a reply. A MITM attack happens when there is a reply without a request + # A gratuitous ARP is always a reply. A MITM attack + # happens when there is a reply without a request self.detect_MITM_ARP_attack( - profileid, twid, uid, saddr, ts, src_mac + twid, uid, saddr, ts, src_mac ) else: # not gratuitous and request, may be an arp scan self.check_arp_scan( - profileid, twid, daddr, uid, ts, dst_mac, src_mac, operation, dst_hw, src_hw + profileid, twid, daddr, uid, ts, operation, dst_hw ) if 'request' in operation: @@ -439,11 +612,14 @@ def main(self): # if the tw is closed, remove all its entries from the cache dict if msg := self.get_msg('tw_closed'): profileid_tw = msg['data'] - # when a tw is closed, this means that it's too old so we don't check for arp scan in this time + # when a tw is closed, this means that it's too + # old so we don't check for arp scan in this time # range anymore - # this copy is made to avoid dictionary changed size during iteration err + # this copy is made to avoid dictionary + # changed size during iteration err cache_copy = self.cache_arp_requests.copy() for key in cache_copy: if profileid_tw in key: self.cache_arp_requests.pop(key) - # don't break, keep looking for more keys that belong to the same tw + # don't break, keep looking for more + # keys that belong to the same tw diff --git a/modules/blocking/blocking.py b/modules/blocking/blocking.py index a5ecb14f7..f4a7bdade 100644 --- a/modules/blocking/blocking.py +++ b/modules/blocking/blocking.py @@ -8,7 +8,7 @@ import subprocess import time -class Blocking(IModule, multiprocessing.Process): +class Blocking(IModule): """Data should be passed to this module as a json encoded python dict, by default this module flushes all slipsBlocking chains before it starts""" diff --git a/modules/cesnet/cesnet.py b/modules/cesnet/cesnet.py index 7f64f6bca..f23b785e8 100644 --- a/modules/cesnet/cesnet.py +++ b/modules/cesnet/cesnet.py @@ -11,7 +11,7 @@ from slips_files.common.slips_utils import utils -class CESNET(IModule, multiprocessing.Process): +class CESNET(IModule): name = 'CESNET' description = 'Send and receive alerts from warden servers.' authors = ['Alya Gomaa'] diff --git a/modules/cyst/cyst.py b/modules/cyst/cyst.py index 4d0d512e2..17901a16a 100644 --- a/modules/cyst/cyst.py +++ b/modules/cyst/cyst.py @@ -8,7 +8,7 @@ from pprint import pp import contextlib -class Module(IModule, multiprocessing.Process): +class Module(IModule): # Name: short name of the module. Do not use spaces name = 'CYST' description = 'Communicates with CYST simulation framework' diff --git a/modules/ensembling/ensembling.py b/modules/ensembling/ensembling.py index 9fcc7488a..487a4b404 100644 --- a/modules/ensembling/ensembling.py +++ b/modules/ensembling/ensembling.py @@ -1,7 +1,7 @@ from slips_files.common.abstracts._module import IModule from slips_files.common.imports import * -class Ensembling(IModule, multiprocessing.Process): +class Ensembling(IModule): # Name: short name of the module. Do not use spaces name = 'Ensembling' description = 'The module to assign ' diff --git a/modules/exporting_alerts/exporting_alerts.py b/modules/exporting_alerts/exporting_alerts.py index 07761ecea..a2e2ec985 100644 --- a/modules/exporting_alerts/exporting_alerts.py +++ b/modules/exporting_alerts/exporting_alerts.py @@ -1,4 +1,3 @@ -from slips_files.common.abstracts._module import IModule from slips_files.common.imports import * from slack import WebClient from slack.errors import SlackApiError @@ -11,7 +10,7 @@ import sys import datetime -class ExportingAlerts(IModule, multiprocessing.Process): +class ExportingAlerts(IModule): """ Module to export alerts to slack and/or STIX You need to have the token in your environment variables to use this module @@ -70,18 +69,18 @@ def get_slack_token(self): if not hasattr(self, 'slack_token_filepath'): return False - # slack_bot_token_secret should contain your slack token only + # slack_bot_token_secret should contain the slack token only try: with open(self.slack_token_filepath, 'r') as f: self.BOT_TOKEN = f.read() if len(self.BOT_TOKEN) < 5: + del self.BOT_TOKEN raise NameError except (FileNotFoundError, NameError): self.print( f'Please add slack bot token to ' f'{self.slack_token_filepath}. Stopping.' ) - # Stop the module self.shutdown_gracefully() @@ -96,7 +95,8 @@ def send_to_slack(self, msg_to_send: str) -> bool: if self.BOT_TOKEN == '': # The file is empty self.print( - f"Can't find SLACK_BOT_TOKEN in {self.slack_token_filepath}.",0,2, + f"Can't find SLACK_BOT_TOKEN " + f"in {self.slack_token_filepath}.",0,2, ) return False @@ -174,7 +174,7 @@ def push_to_TAXII_server(self): self.print(f'Successfully exported to TAXII server: {self.TAXII_server}.', 1, 0) return True - def export_to_STIX(self, msg_to_send: tuple) -> bool: + def export_to_stix(self, msg_to_send: tuple) -> bool: """ Function to export evidence to a STIX_data.json file in the cwd. It keeps appending the given indicator to STIX_data.json until they're sent to the @@ -276,17 +276,14 @@ def send_to_server(self): self.is_bundle_created = False else: self.print( - f'{self.push_delay} seconds passed, no new alerts in STIX_data.json.', 2, 0 + f'{self.push_delay} seconds passed, ' + f'no new alerts in STIX_data.json.', 2, 0 ) def shutdown_gracefully(self): # We need to publish to taxii server before stopping if 'stix' in self.export_to: self.push_to_TAXII_server() - - if hasattr(self, 'json_file_handle'): - self.json_file_handle.close() - if 'slack' in self.export_to and hasattr(self, 'BOT_TOKEN'): date_time = datetime.datetime.now() date_time = utils.convert_format(date_time, utils.alerts_format) @@ -309,21 +306,22 @@ def pre_main(self): self.send_to_slack(f'{date_time}: Slips started on sensor: {self.sensor_name}.') def main(self): - if msg:= self.get_msg('export_evidence'): + if msg := self.get_msg('export_evidence'): evidence = json.loads(msg['data']) - description = evidence['description'] + description: str = evidence['description'] + if 'slack' in self.export_to and hasattr(self, 'BOT_TOKEN'): - srcip = evidence['profileid'].split("_")[-1] + srcip = evidence['profile']['ip'] msg_to_send = f'Src IP {srcip} Detected {description}' self.send_to_slack(msg_to_send) if 'stix' in self.export_to: msg_to_send = ( evidence['evidence_type'], - evidence['attacker_direction'], - evidence['attacker'], + evidence['attacker']['direction'], + evidence['attacker']['value'], description, ) - exported_to_stix = self.export_to_STIX(msg_to_send) + exported_to_stix = self.export_to_stix(msg_to_send) if not exported_to_stix: self.print('Problem in export_to_STIX()', 0, 3) diff --git a/modules/flowalerts/flowalerts.py b/modules/flowalerts/flowalerts.py index c669a9208..2582a1ca1 100644 --- a/modules/flowalerts/flowalerts.py +++ b/modules/flowalerts/flowalerts.py @@ -3,7 +3,7 @@ from slips_files.common.abstracts._module import IModule from slips_files.common.imports import * from .timer_thread import TimerThread -from .set_evidence import Helper +from .set_evidence import SetEvidnceHelper from slips_files.core.helpers.whitelist import Whitelist import multiprocessing import json @@ -18,7 +18,7 @@ from slips_files.common.slips_utils import utils -class FlowAlerts(IModule, multiprocessing.Process): +class FlowAlerts(IModule): name = 'Flow Alerts' description = ( 'Alerts about flows: long connection, successful ssh, ' @@ -27,14 +27,12 @@ class FlowAlerts(IModule, multiprocessing.Process): authors = ['Kamila Babayeva', 'Sebastian Garcia', 'Alya Gomaa'] def init(self): - # Read the configuration self.read_configuration() - # Retrieve the labels self.subscribe_to_channels() self.whitelist = Whitelist(self.logger, self.db) self.conn_counter = 0 - # helper contains all functions used to set evidence - self.helper = Helper(self.db) + # this helper contains all functions used to set evidence + self.set_evidence = SetEvidnceHelper(self.db) self.p2p_daddrs = {} # get the default gateway self.gateway = self.db.get_gateway_ip() @@ -118,17 +116,17 @@ def check_connection_to_local_ip( dport, proto, saddr, - profileid, twid, uid, timestamp, ): """ - Alerts when there's a connection from a private IP to another private IP - except for DNS connections to the gateway + Alerts when there's a connection from a private IP to + another private IP except for DNS connections to the gateway """ def is_dns_conn(): - return dport == 53 and proto.lower() == 'udp' and daddr == self.db.get_gateway_ip() + return dport == 53 and proto.lower() == 'udp' \ + and daddr == self.db.get_gateway_ip() with contextlib.suppress(ValueError): dport = int(dport) @@ -144,12 +142,11 @@ def is_dns_conn(): ): return - self.helper.set_evidence_conn_to_private_ip( + self.set_evidence.conn_to_private_ip( proto, daddr, dport, saddr, - profileid, twid, uid, timestamp, @@ -178,8 +175,8 @@ def check_long_connection( # If duration is above threshold, we should set an evidence if dur > self.long_connection_threshold: - self.helper.set_evidence_long_connection( - daddr, dur, profileid, twid, uid, timestamp, attacker_direction='srcip' + self.set_evidence.long_connection( + daddr, dur, profileid, twid, uid, timestamp ) return True return False @@ -289,7 +286,8 @@ def is_ignored_ip_data_upload(self, ip): ): return True - def check_data_upload(self, sbytes, daddr, uid, profileid, twid): + def check_data_upload(self, sbytes, daddr, uid: str, profileid, twid, + timestamp): """ Set evidence when 1 flow is sending >= the flow_upload_threshold bytes """ @@ -302,12 +300,13 @@ def check_data_upload(self, sbytes, daddr, uid, profileid, twid): src_mbs = utils.convert_to_mb(int(sbytes)) if src_mbs >= self.flow_upload_threshold: - self.helper.set_evidence_data_exfiltration( + self.set_evidence.data_exfiltration( daddr, src_mbs, profileid, twid, - uid, + [uid], + timestamp, ) return True @@ -376,7 +375,7 @@ def check_pastebin_download( # orig_bytes is number of payload bytes downloaded downloaded_bytes = flow.get('resp_bytes', 0) if downloaded_bytes >= self.pastebin_downloads_threshold: - self.helper.set_evidence_pastebin_download(daddr, downloaded_bytes, ts, profileid, twid, uid) + self.set_evidence.pastebin_download(downloaded_bytes, ts, profileid, twid, uid) return True else: @@ -427,7 +426,7 @@ def get_sent_bytes(all_flows: dict): if mbs_uploaded < self.data_exfiltration_threshold: continue - self.helper.set_evidence_data_exfiltration( + self.set_evidence.data_exfiltration( ip, mbs_uploaded, profileid, @@ -467,7 +466,7 @@ def check_unknown_port( and not self.db.is_ftp_port(dport) ): # we don't have info about this port - self.helper.set_evidence_unknown_port( + self.set_evidence.unknown_port( daddr, dport, proto, timestamp, profileid, twid, uid ) return True @@ -553,10 +552,11 @@ def check_dns_arpa_scan(self, domain, stime, profileid, twid, uid): # happened within more than 2 seconds return False - self.helper.set_evidence_dns_arpa_scan( + self.set_evidence.dns_arpa_scan( self.arpa_scan_threshold, stime, profileid, twid, uids ) - # empty the list of arpa queries for this profile, we don't need them anymore + # empty the list of arpa queries for this profile, + # we don't need them anymore self.dns_arpa_queries.pop(profileid) return True @@ -568,7 +568,8 @@ def is_well_known_org(self, ip): try: SNI = ip_data['SNI'] if type(SNI) == list: - # SNI is a list of dicts, each dict contains the 'server_name' and 'port' + # SNI is a list of dicts, each dict contains the + # 'server_name' and 'port' SNI = SNI[0] if SNI in (None, ''): SNI = False @@ -623,12 +624,14 @@ def check_connection_without_dns_resolution( return False # Ignore some IP - ## - All dhcp servers. Since is ok to connect to them without a DNS request. + ## - All dhcp servers. Since is ok to connect to + # them without a DNS request. # We dont have yet the dhcp in the redis, when is there check it # if self.db.get_dhcp_servers(daddr): # continue - # To avoid false positives in case of an interface don't alert ConnectionWithoutDNS + # To avoid false positives in case of an interface + # don't alert ConnectionWithoutDNS # until 30 minutes has passed # after starting slips because the dns may have happened before starting slips if '-i' in sys.argv or self.db.is_growing_zeek_dir(): @@ -656,10 +659,12 @@ def check_connection_without_dns_resolution( # don't alert a Connection Without DNS until 5 seconds has passed # in real time from the time of this checking. - # Create a timer thread that will wait 15 seconds for the dns to arrive and then check again + # Create a timer thread that will wait 15 seconds + # for the dns to arrive and then check again # self.print(f'Cache of conns not to check: {self.conn_checked_dns}') if uid not in self.connections_checked_in_conn_dns_timer_thread: - # comes here if we haven't started the timer thread for this connection before + # comes here if we haven't started the timer + # thread for this connection before # mark this connection as checked self.connections_checked_in_conn_dns_timer_thread.append(uid) params = [flow_type, appproto, daddr, twid, profileid, timestamp, uid] @@ -672,18 +677,21 @@ def check_connection_without_dns_resolution( timer.start() else: # It means we already checked this conn with the Timer process - # (we waited 15 seconds for the dns to arrive after the connection was made) + # (we waited 15 seconds for the dns to arrive after + # the connection was made) # but still no dns resolution for it. - # Sometimes the same computer makes requests using its ipv4 and ipv6 address, check if this is the case + # Sometimes the same computer makes requests using + # its ipv4 and ipv6 address, check if this is the case if self.check_if_resolution_was_made_by_different_version( profileid, daddr ): return False if self.is_well_known_org(daddr): - # if the SNI or rDNS of the IP matches a well-known org, then this is a FP + # if the SNI or rDNS of the IP matches a + # well-known org, then this is a FP return False # self.print(f'Alerting after timer conn without dns on {daddr}, - self.helper.set_evidence_conn_without_dns( + self.set_evidence.conn_without_dns( daddr, timestamp, profileid, twid, uid ) # This UID will never appear again, so we can remove it and @@ -708,7 +716,8 @@ def is_CNAME_contacted(self, answers, contacted_ips) -> bool: return False def check_dns_without_connection( - self, domain, answers: list, rcode_name: str, timestamp: str, profileid, twid, uid + self, domain, answers: list, rcode_name: str, + timestamp: str, profileid, twid, uid ): """ Makes sure all cached DNS answers are used in contacted_ips @@ -717,8 +726,10 @@ def check_dns_without_connection( ## - All reverse dns resolutions ## - All .local domains ## - The wildcard domain * - ## - Subdomains of cymru.com, since it is used by the ipwhois library in Slips to get the ASN - # of an IP and its range. This DNS is meant not to have a connection later + ## - Subdomains of cymru.com, since it is used by + # the ipwhois library in Slips to get the ASN + # of an IP and its range. This DNS is meant not + # to have a connection later ## - Domains check from Chrome, like xrvwsrklpqrw ## - The WPAD domain of windows # - When there is an NXDOMAIN as answer, it means @@ -735,26 +746,34 @@ def check_dns_without_connection( ): return False - # One DNS query may not be answered exactly by UID, but the computer can re-ask the domain, + # One DNS query may not be answered exactly by UID, + # but the computer can re-ask the domain, # and the next DNS resolution can be # answered. So dont check the UID, check if the domain has an IP # self.print(f'The DNS query to {domain} had as answers {answers} ') - # It can happen that this domain was already resolved previously, but with other IPs - # So we get from the DB all the IPs for this domain first and append them to the answers - # This happens, for example, when there is 1 DNS resolution with A, then 1 DNS resolution - # with AAAA, and the computer chooses the A address. Therefore, the 2nd DNS resolution + # It can happen that this domain was already resolved + # previously, but with other IPs + # So we get from the DB all the IPs for this domain + # first and append them to the answers + # This happens, for example, when there is 1 DNS + # resolution with A, then 1 DNS resolution + # with AAAA, and the computer chooses the A address. + # Therefore, the 2nd DNS resolution # would be treated as 'without connection', but this is false. if prev_domain_resolutions := self.db.getDomainData(domain): prev_domain_resolutions = prev_domain_resolutions.get('IPs',[]) - # if there's a domain in the cache (prev_domain_resolutions) that is not in the - # current answers given to this function, append it to the answers list + # if there's a domain in the cache + # (prev_domain_resolutions) that is not in the + # current answers given to this function, + # append it to the answers list answers.extend([ans for ans in prev_domain_resolutions if ans not in answers]) if answers == ['-']: - # If no IPs are in the answer, we can not expect the computer to connect to anything + # If no IPs are in the answer, we can not expect + # the computer to connect to anything # self.print(f'No ips in the answer, so ignoring') return False # self.print(f'The extended DNS query to {domain} had as answers {answers} ') @@ -762,8 +781,10 @@ def check_dns_without_connection( contacted_ips = self.db.get_all_contacted_ips_in_profileid_twid( profileid, twid ) - # If contacted_ips is empty it can be because we didnt read yet all the flows. - # This is automatically captured later in the for loop and we start a Timer + # If contacted_ips is empty it can be because + # we didnt read yet all the flows. + # This is automatically captured later in the + # for loop and we start a Timer # every dns answer is a list of ips that correspond to 1 query, # one of these ips should be present in the contacted ips @@ -787,11 +808,13 @@ def check_dns_without_connection( # self.print(f'It seems that none of the IPs were contacted') # Found a DNS query which none of its IPs was contacted - # It can be that Slips is still reading it from the files. Lets check back in some time - # Create a timer thread that will wait some seconds for the connection to arrive and then check again + # It can be that Slips is still reading it from the files. + # Lets check back in some time + # Create a timer thread that will wait some seconds for the + # connection to arrive and then check again if uid not in self.connections_checked_in_dns_conn_timer_thread: - # comes here if we haven't started the timer thread for this dns before - # mark this dns as checked + # comes here if we haven't started the timer + # thread for this dns before mark this dns as checked self.connections_checked_in_dns_conn_timer_thread.append(uid) params = [domain, answers, rcode_name, timestamp, profileid, twid, uid] # self.print(f'Starting the timer to check on {domain}, uid {uid}. @@ -801,10 +824,9 @@ def check_dns_without_connection( ) timer.start() else: - # self.print(f'Alerting on {domain}, uid {uid}. time {datetime.datetime.now()}') # It means we already checked this dns with the Timer process # but still no connection for it. - self.helper.set_evidence_DNS_without_conn( + self.set_evidence.DNS_without_conn( domain, timestamp, profileid, twid, uid ) # This UID will never appear again, so we can remove it and @@ -825,8 +847,7 @@ def detect_successful_ssh_by_zeek(self, uid, timestamp, profileid, twid): daddr = ssh_flow_dict['daddr'] saddr = ssh_flow_dict['saddr'] size = ssh_flow_dict['allbytes'] - self.helper.set_evidence_ssh_successful( - profileid, + self.set_evidence.ssh_successful( twid, saddr, daddr, @@ -840,11 +861,14 @@ def detect_successful_ssh_by_zeek(self, uid, timestamp, profileid, twid): uid ) return True + elif uid not in self.connections_checked_in_ssh_timer_thread: # It can happen that the original SSH flow is not in the DB yet - # comes here if we haven't started the timer thread for this connection before + # comes here if we haven't started the timer thread + # for this connection before # mark this connection as checked - # self.print(f'Starting the timer to check on {flow_dict}, uid {uid}. time {datetime.datetime.now()}') + # self.print(f'Starting the timer to check on {flow_dict}, + # uid {uid}. time {datetime.datetime.now()}') self.connections_checked_in_ssh_timer_thread.append( uid ) @@ -873,8 +897,7 @@ def detect_successful_ssh_by_slips(self, uid, timestamp, profileid, twid, auth_s # Set the evidence because there is no # easier way to show how Slips detected # the successful ssh and not Zeek - self.helper.set_evidence_ssh_successful( - profileid, + self.set_evidence.ssh_successful( twid, saddr, daddr, @@ -891,7 +914,8 @@ def detect_successful_ssh_by_slips(self, uid, timestamp, profileid, twid, auth_s elif uid not in self.connections_checked_in_ssh_timer_thread: # It can happen that the original SSH flow is not in the DB yet - # comes here if we haven't started the timer thread for this connection before + # comes here if we haven't started the timer + # thread for this connection before # mark this connection as checked # self.print(f'Starting the timer to check on {flow_dict}, uid {uid}. # time {datetime.datetime.now()}') @@ -904,7 +928,9 @@ def detect_successful_ssh_by_slips(self, uid, timestamp, profileid, twid, auth_s ) timer.start() - def check_successful_ssh(self, uid, timestamp, profileid, twid, auth_success): + def check_successful_ssh( + self, uid, timestamp, profileid, twid, auth_success + ): """ Function to check if an SSH connection logged in successfully """ @@ -938,7 +964,8 @@ def detect_incompatible_CN( if org not in issuer.lower(): continue - # save the org this domain/ip is claiming to belong to, to use it to set evidence later + # save the org this domain/ip is claiming to belong to, + # to use it to set evidence later found_org_in_cn = org # check that the domain belongs to that same org @@ -952,9 +979,10 @@ def detect_incompatible_CN( if not found_org_in_cn: return False - # found one of our supported orgs in the cn but it doesn't belong to any of this org's + # found one of our supported orgs in the cn but + # it doesn't belong to any of this org's # domains or ips - self.helper.set_evidence_incompatible_CN( + self.set_evidence.incompatible_CN( found_org_in_cn, timestamp, daddr, @@ -973,7 +1001,8 @@ def check_multiple_ssh_versions( """ checks if this srcip was detected using a different ssh client or server versions before - :param role: can be 'SSH::CLIENT' or 'SSH::SERVER' as seen in zeek software.log flows + :param role: can be 'SSH::CLIENT' or 'SSH::SERVER' + as seen in zeek software.log flows """ if role not in flow['software']: return @@ -1000,11 +1029,17 @@ def check_multiple_ssh_versions( # they're using the same ssh client version return False - # get the uid of the cached versions, and the uid of the current used versions + # get the uid of the cached versions, and the uid + # of the current used versions uids = [cached_ssh_versions['uid'], flow['uid']] - self.helper.set_evidence_multiple_ssh_versions( - flow['saddr'], cached_versions, current_versions, - flow['starttime'], twid, uids, flow['daddr'], role=role + self.set_evidence.multiple_ssh_versions( + flow['saddr'], + cached_versions, + current_versions, + flow['starttime'], + twid, + uids, + role=role ) return True @@ -1022,9 +1057,12 @@ def estimate_shannon_entropy(self, string): return shannon_entropy_value * -1 - def check_suspicious_dns_answers(self, domain, answers, daddr, profileid, twid, stime, uid): + def check_suspicious_dns_answers( + self, domain, answers, daddr, profileid, twid, stime, uid + ): """ - Uses shannon entropy to detect DNS TXT answers with encoded/encrypted strings + Uses shannon entropy to detect DNS TXT answers + with encoded/encrypted strings """ if not answers: return @@ -1034,7 +1072,7 @@ def check_suspicious_dns_answers(self, domain, answers, daddr, profileid, twid, # TXT record entropy = self.estimate_shannon_entropy(answer) if entropy >= self.shannon_entropy_threshold: - self.helper.set_evidence_suspicious_dns_answer( + self.set_evidence.suspicious_dns_answer( domain, answer, entropy, @@ -1045,22 +1083,31 @@ def check_suspicious_dns_answers(self, domain, answers, daddr, profileid, twid, uid ) - def check_invalid_dns_answers(self, domain, answers, daddr, profileid, twid, stime, uid): - # this function is used to check for certain IP answers to DNS queries being blocked + def check_invalid_dns_answers( + self, domain, answers, daddr, profileid, twid, stime, uid + ): + # this function is used to check for certain IP + # answers to DNS queries being blocked # (perhaps by ad blockers) and set to the following IP values - invalid_answers = {"127.0.0.1" , "0.0.0.0"} # currently hardcoding blocked ips + # currently hardcoding blocked ips + invalid_answers = {"127.0.0.1" , "0.0.0.0"} if not answers: return for answer in answers: if answer in invalid_answers and domain != "localhost": #blocked answer found - self.helper.set_evidence_invalid_dns_answer(domain, answer, daddr, profileid, twid, stime, uid) - # delete answer from redis cache to prevent associating this dns answer with this domain/query and + self.set_evidence.invalid_dns_answer( + domain, answer, daddr, profileid, twid, stime, uid + ) + # delete answer from redis cache to prevent + # associating this dns answer with this domain/query and # avoid FP "DNS without connection" evidence self.db.delete_dns_resolution(answer) - def detect_DGA(self, rcode_name, query, stime, daddr, profileid, twid, uid): + def detect_DGA( + self, rcode_name, query, stime, daddr, profileid, twid, uid + ): """ Detect DGA based on the amount of NXDOMAINs seen in dns.log @@ -1072,14 +1119,17 @@ def detect_DGA(self, rcode_name, query, stime, daddr, profileid, twid, uid): saddr = profileid.split('_')[-1] # check whitelisted queries because we - # don't want to count nxdomains to cymru.com or spamhaus as DGA as they're made + # don't want to count nxdomains to cymru.com or + # spamhaus as DGA as they're made # by slips if ( 'NXDOMAIN' not in rcode_name or not query or query.endswith('.arpa') or query.endswith('.local') - or self.whitelist.is_whitelisted_domain(query, saddr, daddr, 'alerts') + or self.whitelist.is_whitelisted_domain( + query, saddr, daddr, 'alerts' + ) ): return False @@ -1105,7 +1155,7 @@ def detect_DGA(self, rcode_name, query, stime, daddr, profileid, twid, uid): number_of_nxdomains % 5 == 0 and number_of_nxdomains >= self.nxdomains_threshold ): - self.helper.set_evidence_DGA( + self.set_evidence.DGA( number_of_nxdomains, stime, profileid, twid, uids ) # clear the list of alerted queries and uids @@ -1134,17 +1184,19 @@ def check_conn_to_port_0( if sport != 0 and dport != 0: return - direction = 'source' if sport == 0 else 'destination' - self.helper.set_evidence_for_port_0_connection( + attacker = saddr if sport == 0 else daddr + victim = saddr if attacker == daddr else daddr + self.set_evidence.for_port_0_connection( saddr, daddr, sport, dport, - direction, profileid, twid, uid, timestamp, + victim, + attacker ) def check_multiple_reconnection_attempts( @@ -1168,7 +1220,9 @@ def check_multiple_reconnection_attempts( key = f'{saddr}-{daddr}-{dport}' # add this conn to the stored number of reconnections - current_reconnections = self.db.get_reconnections_for_tw(profileid, twid) + current_reconnections = self.db.get_reconnections_for_tw( + profileid, twid + ) try: reconnections, uids = current_reconnections[key] @@ -1182,21 +1236,14 @@ def check_multiple_reconnection_attempts( if reconnections < 5: return - ip_identification = ( - self.db.get_ip_identification(daddr) - ) - description = ( - f'Multiple reconnection attempts to Destination IP:' - f' {daddr} {ip_identification} ' - f'from IP: {saddr} reconnections: {reconnections}' - ) - self.helper.set_evidence_for_multiple_reconnection_attempts( + + self.set_evidence.multiple_reconnection_attempts( profileid, twid, daddr, - description, uids, timestamp, + reconnections, ) # reset the reconnection attempts of this src->dst current_reconnections[key] = (0, []) @@ -1232,7 +1279,7 @@ def detect_young_domains(self, domain, stime, profileid, twid, uid): if age >= age_threshold: return False - self.helper.set_evidence_young_domain( + self.set_evidence.young_domain( domain, age, stime, profileid, twid, uid ) return True @@ -1265,8 +1312,8 @@ def check_smtp_bruteforce( } ) - self.helper.set_evidence_bad_smtp_login( - saddr, daddr, stime, profileid, twid, uid + self.set_evidence.bad_smtp_login( + saddr, daddr, stime, twid, uid ) timestamps = self.smtp_bruteforce_cache[profileid][0] @@ -1284,14 +1331,14 @@ def check_smtp_bruteforce( if diff > 10: # didnt happen within 10s! - # remove the first login from cache so we can check the next 3 logins + # remove the first login from cache so we + # can check the next 3 logins self.smtp_bruteforce_cache[profileid][0].pop(0) self.smtp_bruteforce_cache[profileid][1].pop(0) return - self.helper.set_evidence_smtp_bruteforce( + self.set_evidence.smtp_bruteforce( flow, - profileid, twid, uids, self.smtp_bruteforce_threshold, @@ -1356,23 +1403,24 @@ def detect_connection_to_multiple_ports( if len(dstports) <= 1: return - ip_identification = self.db.get_ip_identification(daddr) - description = ( - f'Connection to multiple ports {dstports} of ' - f'Destination IP: {daddr}. {ip_identification}' - ) uids = daddrs[daddr]['uid'] - self.helper.set_evidence_for_connection_to_multiple_ports( + + victim: str = daddr + attacker: str = profileid.split("_")[-1] + + self.set_evidence.connection_to_multiple_ports( profileid, twid, - daddr, - description, uids, timestamp, + dstports, + victim, + attacker, ) - # Connection to multiple port to the Source IP. Happens in the mode 'all' - elif profileid.split('_')[1] == daddr: + # Connection to multiple port to the Source IP. + # Happens in the mode 'all' + elif profileid.split('_')[-1] == daddr: direction = 'Src' state = 'Established' protocol = 'TCP' @@ -1398,16 +1446,17 @@ def detect_connection_to_multiple_ports( return uids = saddrs[saddr]['uid'] - description = f'Connection to multiple ports {dstports} ' \ - f'of Source IP: {saddr}' + attacker: str = daddr + victim: str = profileid.split("_")[-1] - self.helper.set_evidence_for_connection_to_multiple_ports( + self.set_evidence.connection_to_multiple_ports( profileid, twid, - daddr, - description, uids, timestamp, + dstports, + victim, + attacker ) def detect_malicious_ja3( @@ -1429,29 +1478,27 @@ def detect_malicious_ja3( malicious_ja3_dict = self.db.get_ja3_in_IoC() if ja3 in malicious_ja3_dict: - self.helper.set_evidence_malicious_JA3( + self.set_evidence.malicious_ja3( malicious_ja3_dict, - saddr, - profileid, twid, uid, timestamp, daddr, + saddr, type_='ja3', - ioc=ja3, + ja3=ja3, ) if ja3s in malicious_ja3_dict: - self.helper.set_evidence_malicious_JA3( + self.set_evidence.malicious_ja3( malicious_ja3_dict, - daddr, - profileid, twid, uid, timestamp, saddr, + daddr, type_='ja3s', - ioc=ja3s, + ja3=ja3s, ) def check_self_signed_certs( @@ -1465,33 +1512,26 @@ def check_self_signed_certs( uid ): """ - checks the validation status of every azeek ssl flow for self signed certs + checks the validation status of every a zeek ssl flow for self + signed certs """ if 'self signed' not in validation_status: return - - ip_identification = ( - self.db.get_ip_identification(daddr) - ) - description = f'Self-signed certificate. Destination IP: {daddr}.' \ - f' {ip_identification}' - - if server_name: - description += f' SNI: {server_name}.' - - self.helper.set_evidence_self_signed_certificates( + self.set_evidence.self_signed_certificates( profileid, twid, daddr, - description, uid, timestamp, + server_name ) - def check_ssh_password_guessing(self, daddr, uid, timestamp, profileid, twid, auth_success): + def check_ssh_password_guessing( + self, daddr, uid, timestamp, profileid, twid, auth_success + ): """ This detection is only done when there's a failed ssh attempt alerts ssh pw bruteforce when there's more than @@ -1512,8 +1552,8 @@ def check_ssh_password_guessing(self, daddr, uid, timestamp, profileid, twid, au if conn_count >= self.pw_guessing_threshold: description = f'SSH password guessing to IP {daddr}' uids = self.password_guessing_cache[cache_key] - self.helper.set_evidence_pw_guessing( - description, timestamp, profileid, twid, uids, by='Slips' + self.set_evidence.pw_guessing( + description, timestamp, twid, uids, by='Slips' ) #reset the counter @@ -1541,7 +1581,7 @@ def check_malicious_ssl(self, ssl_info): if not ssl_info_from_db: return False - self.helper.set_evidence_malicious_ssl( + self.set_evidence.malicious_ssl( ssl_info, ssl_info_from_db ) @@ -1559,7 +1599,7 @@ def check_weird_http_method(self, msg): if 'unknown_HTTP_method' not in name: return False - self.helper.set_evidence_weird_http_method( + self.set_evidence.weird_http_method( profileid, twid, flow @@ -1588,7 +1628,7 @@ def check_non_http_port_80_conns( and appproto.lower() != 'http' and state == 'Established' ): - self.helper.set_evidence_non_http_port_80_conn( + self.set_evidence.non_http_port_80_conn( daddr, profileid, timestamp, @@ -1607,7 +1647,7 @@ def check_GRE_tunnel(self, tunnel_info: dict): if tunnel_type != 'Tunnel::GRE': return - self.helper.set_evidence_GRE_tunnel( + self.set_evidence.GRE_tunnel( tunnel_info ) @@ -1634,7 +1674,7 @@ def check_non_ssl_port_443_conns( and appproto.lower() != 'ssl' and state == 'Established' ): - self.helper.set_evidence_non_ssl_port_443_conn( + self.set_evidence.non_ssl_port_443_conn( daddr, profileid, timestamp, @@ -1655,9 +1695,11 @@ def check_different_localnet_usage( what_to_check='' ): """ - alerts when a connection to a private ip that doesn't belong to our local network is found + alerts when a connection to a private ip that + doesn't belong to our local network is found for example: - If we are on 192.168.1.0/24 then detect anything coming from/to 10.0.0.0/8 + If we are on 192.168.1.0/24 then detect anything + coming from/to 10.0.0.0/8 :param what_to_check: can be 'srcip' or 'dstip' """ ip_to_check = saddr if what_to_check == 'srcip' else daddr @@ -1677,7 +1719,7 @@ def check_different_localnet_usage( if ip_obj in ipaddress.IPv4Network(own_local_network): return - self.helper.set_evidence_different_localnet_usage( + self.set_evidence.different_localnet_usage( daddr, f'{dport}/{proto}', profileid, @@ -1697,7 +1739,8 @@ def check_device_changing_ips( timestamp ): """ - Every time we have a flow for a new ip (an ip that we're seeing for the first time) + Every time we have a flow for a new ip + (an ip that we're seeing for the first time) we check if the MAC of this srcip was associated with another ip this check is only done once for each source ip slips sees """ @@ -1715,14 +1758,16 @@ def check_device_changing_ips( return if self.db.was_ip_seen_in_connlog_before(saddr): - # we should only check once for the first time we're seeing this flow + # we should only check once for the first + # time we're seeing this flow return self.db.mark_srcip_as_seen_in_connlog(saddr) if old_ip_list := self.db.get_ip_of_mac(smac): # old_ip is a list that may contain the ipv6 of this MAC - # this ipv6 may be of the same device that has the given saddr and MAC + # this ipv6 may be of the same device that + # has the given saddr and MAC # so this would be fp. so, make sure we're dealing with ipv4 only for ip in json.loads(old_ip_list): if validators.ipv4(ip): @@ -1730,14 +1775,16 @@ def check_device_changing_ips( break else: # all the IPs associated with the given macs are ipv6, - # 1 computer might have several ipv6, AND/OR a combination of ipv6 and 4 + # 1 computer might have several ipv6, + # AND/OR a combination of ipv6 and 4 # so this detection will only work if both the # old ip and the given saddr are ipv4 private ips return if old_ip != saddr: - # we found this smac associated with an ip other than this saddr - self.helper.set_evidence_device_changing_ips( + # we found this smac associated with an + # ip other than this saddr + self.set_evidence.device_changing_ips( smac, old_ip, profileid, @@ -1750,7 +1797,6 @@ def pre_main(self): self.ssl_waiting_thread.start() def main(self): - # if timewindows are not updated for a long time, Slips is stopped automatically. if msg:= self.get_msg('new_flow'): new_flow = json.loads(msg['data']) profileid = new_flow['profileid'] @@ -1775,11 +1821,6 @@ def main(self): smac = flow_dict.get('smac', '') if not appproto or appproto == '-': appproto = flow_dict.get('type', '') - # dmac = flow_dict.get('dmac', '') - # stime = flow_dict['ts'] - # timestamp = new_flow['stime'] - # pkts = flow_dict['pkts'] - # allbytes = flow_dict['allbytes'] self.check_long_connection( dur, daddr, saddr, profileid, twid, uid, timestamp @@ -1858,7 +1899,8 @@ def main(self): daddr, uid, profileid, - twid + twid, + timestamp ) self.check_non_http_port_80_conns( @@ -1883,12 +1925,12 @@ def main(self): uid, timestamp ) + self.check_connection_to_local_ip( daddr, dport, proto, saddr, - profileid, twid, uid, timestamp, @@ -1900,7 +1942,7 @@ def main(self): self.conn_counter += 1 # --- Detect successful SSH connections --- - if msg:= self.get_msg('new_ssh'): + if msg := self.get_msg('new_ssh'): data = msg['data'] data = json.loads(data) profileid = data['profileid'] @@ -1931,7 +1973,8 @@ def main(self): auth_success ) # --- Detect alerts from Zeek: Self-signed certs, - # invalid certs, port-scans and address scans, and password guessing --- + # invalid certs, port-scans and address scans, + # and password guessing --- if msg:= self.get_msg('new_notice'): data = msg['data'] # Convert from json to dict @@ -1952,11 +1995,10 @@ def main(self): if 'Port_Scan' in note: # Vertical port scan scanning_ip = flow.get('scanning_ip', '') - self.helper.set_evidence_vertical_portscan( + self.set_evidence.vertical_portscan( msg, scanning_ip, timestamp, - profileid, twid, uid, ) @@ -1965,7 +2007,7 @@ def main(self): if 'Address_Scan' in note: # Horizontal port scan # scanned_port = flow.get('scanned_port', '') - self.helper.set_evidence_horizontal_portscan( + self.set_evidence.horizontal_portscan( msg, timestamp, profileid, @@ -1974,10 +2016,9 @@ def main(self): ) # --- Detect password guessing by zeek --- if 'Password_Guessing' in note: - self.helper.set_evidence_pw_guessing( + self.set_evidence.pw_guessing( msg, timestamp, - profileid, twid, uid, by='Zeek' @@ -2085,7 +2126,8 @@ def main(self): rcode_name, domain, stime, daddr, profileid, twid, uid ) - # TODO: not sure how to make sure IP_info is done adding domain age to the db or not + # TODO: not sure how to make sure IP_info is + # done adding domain age to the db or not self.detect_young_domains( domain, stime, profileid, twid, uid ) diff --git a/modules/flowalerts/set_evidence.py b/modules/flowalerts/set_evidence.py index b9c0b785a..8d69a2ed4 100644 --- a/modules/flowalerts/set_evidence.py +++ b/modules/flowalerts/set_evidence.py @@ -1,592 +1,770 @@ -# Must imports import datetime -# Your imports import json import sys import time +from typing import List from slips_files.common.slips_utils import utils - - -class Helper: +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Victim, + Attacker, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Tag + ) + +class SetEvidnceHelper: def __init__( self, db ): self.db = db - def set_evidence_young_domain( - self, - domain, - age, - stime, - profileid, - twid, - uid - ): - confidence = 1 - threat_level = 'low' - category = 'Anomaly.Traffic' - evidence_type = 'YoungDomain' - attacker_direction = 'dstdomain' - attacker = domain - description = f'connection to a young domain: {domain} registered {age} days ago.' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - stime, - category, - profileid=profileid, - twid=twid, - uid=uid + def young_domain( + self, + domain: str, + age: int, + stime: str, + profileid: ProfileID, + twid: str, + uid: str + ): + saddr: str = profileid.split("_")[-1] + victim = Victim( + direction=Direction.SRC, + victim_type=IoCType.IP, + value=saddr, + ) + attacker = Attacker( + direction=Direction.DST, + attacker_type=IoCType.DOMAIN, + value=domain, + ) + twid_number: int = int(twid.replace("timewindow", "")) + description = f'connection to a young domain: {domain} ' \ + f'registered {age} days ago.', + evidence = Evidence( + evidence_type=EvidenceType.YOUNG_DOMAIN, + attacker=attacker, + threat_level=ThreatLevel.LOW, + category=IDEACategory.ANOMALY_TRAFFIC, + description=description, + victim=victim, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=stime, + conn_count=1, + confidence=1.0 ) - - def set_evidence_multiple_ssh_versions( - self, - srcip, - cached_versions, - current_versions, - timestamp, - twid, - uid, - daddr, - role='' - ): + self.db.set_evidence(evidence) + + def multiple_ssh_versions( + self, + srcip: str, + cached_versions: str, + current_versions: str, + timestamp: str, + twid: str, + uid: List[str], + role: str = '' + ): """ :param cached_versions: major.minor :param current_versions: major.minor - :param role: can be 'SSH::CLIENT' or 'SSH::SERVER' as seen in zeek software.log flows + :param role: can be 'SSH::CLIENT' or + 'SSH::SERVER' as seen in zeek software.log flows """ - profileid = f'profile_{srcip}' - confidence = 0.9 - threat_level = 'medium' - category = 'Anomaly.Traffic' - attacker_direction = 'srcip' - evidence_type = 'MultipleSSHVersions' - attacker = srcip - role = 'client' if 'CLIENT' in role else 'server' - description = f'SSH {role} version changing from {cached_versions} to {current_versions}' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, category, profileid=profileid, twid=twid, uid=uid, - victim=daddr + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=srcip ) + role = 'client' if 'CLIENT' in role.upper() else 'server' + description = f'SSH {role} version changing from ' \ + f'{cached_versions} to {current_versions}' + + evidence = Evidence( + evidence_type=EvidenceType.MULTIPLE_SSH_VERSIONS, + attacker=attacker, + threat_level=ThreatLevel.MEDIUM, + category=IDEACategory.ANOMALY_TRAFFIC, + description=description, + profile=ProfileID(ip=attacker.value), + timewindow=TimeWindow(int(twid.replace("timewindow", ''))), + uid=uid, + timestamp=timestamp, + conn_count=1, + confidence=0.9, + source_target_tag=Tag.RECON + ) + self.db.set_evidence(evidence) - def set_evidence_different_localnet_usage( + def different_localnet_usage( self, - daddr, - portproto, - profileid, - timestamp, - twid, - uid, + daddr: str, + portproto: str, + profileid: ProfileID, + timestamp: str, + twid: str, + uid: str, ip_outside_localnet: str = '' - ): + ): """ - :param ip_outside_localnet: was the 'srcip' outside the localnet or the 'dstip'? + :param ip_outside_localnet: was the + 'srcip' outside the localnet or the 'dstip'? """ srcip = profileid.split('_')[-1] - # the attacker here is the IP found to be private and outside th localnet + # the attacker here is the IP found to be private and outside the localnet if ip_outside_localnet == 'srcip': - attacker = srcip - victim = daddr - direction = 'from' - rev_direction = 'to' + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=srcip + ) + victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) + description = f'A connection from a private IP ({srcip}) ' \ + f'outside of the used local network ' \ + f'{self.db.get_local_network()}. To IP: {daddr} ' else: - attacker = daddr - victim = srcip - direction = 'to' - rev_direction = 'from' - - confidence = 1 - threat_level = 'high' - category = 'Anomaly.Traffic' - attacker_direction = ip_outside_localnet - evidence_type = 'DifferentLocalnet' - localnet = self.db.get_local_network() - description = f'A connection {direction} a private IP ({attacker}) ' \ - f'outside of the used local network {localnet}.' \ - f' {rev_direction} IP: {victim} ' - - if attacker_direction == 'dstip': - description += 'using ARP' if 'arp' in portproto else f'on port: {portproto}' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - profileid=profileid, - twid=twid, - uid=uid, - victim=victim + attacker = Attacker( + direction=Direction.DST, + attacker_type=IoCType.IP, + value=daddr + ) + victim = Victim( + direction=Direction.SRC, + victim_type=IoCType.IP, + value=srcip ) + description = f'A connection to a private IP ({daddr}) ' \ + f'outside of the used local network ' \ + f'{self.db.get_local_network()}. ' \ + f'From IP: {srcip} ' + description += 'using ARP' if 'arp' in portproto \ + else f'on port: {portproto}' + + + confidence = 1.0 + threat_level = ThreatLevel.HIGH + + evidence = Evidence( + evidence_type=EvidenceType.DIFFERENT_LOCALNET, + attacker=attacker, + threat_level=threat_level, + category=IDEACategory.ANOMALY_TRAFFIC, + description=description, + victim=victim, + profile=ProfileID(ip=srcip), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence + ) + self.db.set_evidence(evidence) - def set_evidence_device_changing_ips( + def device_changing_ips( self, - smac, - old_ip, - profileid, - twid, - uid, - timestamp - ): + smac: str, + old_ip: str, + profileid: str, + twid: str, + uid: str, + timestamp: str + ): confidence = 0.8 - threat_level = 'medium' - category = 'Anomaly.Traffic' - attacker_direction = 'srcip' - evidence_type = 'DeviceChangingIP' - saddr = profileid.split("_")[-1] - attacker = saddr + threat_level = ThreatLevel.MEDIUM + saddr: str = profileid.split("_")[-1] + + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + description = f'A device changing IPs. IP {saddr} was found ' \ f'with MAC address {smac} but the MAC belongs ' \ f'originally to IP: {old_ip}. ' - self.db.setEvidence( - evidence_type - , attacker_direction, attacker, threat_level, confidence, - description, - timestamp, - category, - profileid=profileid, - twid=twid, - uid=uid, - victim=old_ip - ) - - def set_evidence_non_http_port_80_conn( - self, - daddr, - profileid, - timestamp, - twid, - uid - ): - confidence = 0.8 - threat_level = 'medium' - category = 'Anomaly.Traffic' - attacker_direction = 'dstip' - evidence_type = 'Non-HTTP-Port-80-Connection' - attacker = daddr - ip_identification = self.db.get_ip_identification(daddr) + twid_number = int(twid.replace("timewindow", "")) + + evidence = Evidence( + evidence_type=EvidenceType.DEVICE_CHANGING_IP, + attacker=attacker, + threat_level=threat_level, + category=IDEACategory.ANOMALY_TRAFFIC, + description=description, + victim=None, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence + ) - description = f'non-HTTP established connection to port 80.' \ - f' destination IP: {daddr} {ip_identification}' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - profileid=profileid, - twid=twid, - uid=uid) + self.db.set_evidence(evidence) - def set_evidence_non_ssl_port_443_conn( + def non_http_port_80_conn( self, - daddr, - profileid, - timestamp, - twid, - uid - ): + daddr: str, + profileid: str, + timestamp: str, + twid: str, + uid: str + ) -> None: confidence = 0.8 - threat_level = 'medium' - category = 'Anomaly.Traffic' - attacker_direction = 'dstip' - evidence_type = 'Non-SSL-Port-443-Connection' - attacker = daddr - ip_identification = self.db.get_ip_identification(daddr) - description = f'non-SSL established connection to port 443.' \ - f' destination IP: {daddr} {ip_identification}' + threat_level = ThreatLevel.MEDIUM + saddr: str = profileid.split("_")[-1] - self.db.setEvidence( - evidence_type, attacker_direction, - attacker, - threat_level, - confidence, - description, + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) - timestamp, category, profileid=profileid, twid=twid, uid=uid + ip_identification: str = self.db.get_ip_identification(daddr) + + description: str = f'non-HTTP established connection to port 80. ' \ + f'destination IP: {daddr} {ip_identification}' + + twid_number: int = int(twid.replace("timewindow", "")) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.NON_HTTP_PORT_80_CONNECTION, + attacker=attacker, + threat_level=threat_level, + category=IDEACategory.ANOMALY_TRAFFIC, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence + ) + + self.db.set_evidence(evidence) + + def non_ssl_port_443_conn( + self, + daddr: str, + profileid: str, + timestamp: str, + twid: str, + uid: str + ) -> None: + confidence: float = 0.8 + threat_level: ThreatLevel = ThreatLevel.MEDIUM + saddr: str = profileid.split("_")[-1] + + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr ) - def set_evidence_weird_http_method( + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = f'non-SSL established connection to port 443. ' \ + f'destination IP: {daddr} {ip_identification}' + + twid_number: int = int(twid.replace("timewindow", "")) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.NON_SSL_PORT_443_CONNECTION, + attacker=attacker, + victim=victim, + threat_level=threat_level, + category=IDEACategory.ANOMALY_TRAFFIC, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence + ) + + self.db.set_evidence(evidence) + + def weird_http_method( self, - profileid, - twid, + profileid: str, + twid: str, flow: dict - ): - daddr = flow['daddr'] - weird_method = flow['addl'] - uid = flow['uid'] - timestamp = flow['starttime'] + ) -> None: + daddr: str = flow['daddr'] + weird_method: str = flow['addl'] + uid: str = flow['uid'] + timestamp: str = flow['starttime'] confidence = 0.9 - threat_level = 'medium' - category = 'Anomaly.Traffic' - attacker_direction = 'srcip' - evidence_type = 'WeirdHTTPMethod' - attacker = profileid.split("_")[-1] - ip_identification = self.db.get_ip_identification(daddr) - description = f'Weird HTTP method "{weird_method}" to IP: {daddr} {ip_identification}. by Zeek.' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - profileid=profileid, - twid=twid, - uid=uid, - victim=daddr, - ) + threat_level: ThreatLevel = ThreatLevel.MEDIUM + saddr: str = profileid.split("_")[-1] - def set_evidence_incompatible_CN( + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) + + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = f'Weird HTTP method "{weird_method}" to IP: ' \ + f'{daddr} {ip_identification}. by Zeek.' + + twid_number: int = int(twid.replace("timewindow", "")) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.WEIRD_HTTP_METHOD, + attacker=attacker, + victim=victim, + threat_level=threat_level, + category=IDEACategory.ANOMALY_TRAFFIC, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence + ) + + self.db.set_evidence(evidence) + + def incompatible_CN( self, - org, - timestamp, - daddr, - profileid, - twid, - uid - ): - """ - :param prg: the org this ip/domain claims it belongs to - """ - confidence = 0.9 - threat_level = 'medium' - category = 'Anomaly.Traffic' - attacker_direction = 'dstip' - evidence_type = 'IncompatibleCN' - attacker = daddr - ip_identification = self.db.get_ip_identification(daddr) - description = f'Incompatible certificate CN to IP: {daddr} ' \ - f'{ip_identification} claiming to ' \ - f'belong {org.capitalize()}.' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - profileid=profileid, - twid=twid, - uid=uid - ) + org: str, + timestamp: str, + daddr: str, + profileid: str, + twid: str, + uid: str + ) -> None: + confidence: float = 0.9 + threat_level: ThreatLevel = ThreatLevel.MEDIUM + saddr: str = profileid.split("_")[-1] + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) - def set_evidence_DGA( + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) + + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = f'Incompatible certificate CN to IP: {daddr} ' \ + f'{ip_identification} claiming to ' \ + f'belong {org.capitalize()}.' + + twid_number: int = int(twid.replace("timewindow", "")) + evidence: Evidence = Evidence( + evidence_type=EvidenceType.INCOMPATIBLE_CN, + attacker=attacker, + victim=victim, + threat_level=threat_level, + category=IDEACategory.ANOMALY_TRAFFIC, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence + ) + + self.db.set_evidence(evidence) + + def DGA( self, nxdomains: int, - stime, - profileid, - twid, - uid - ): - confidence = (1 / 100) * (nxdomains - 100) + 1 + stime: str, + profileid: str, + twid: str, + uid: List[str] + ) -> None: + # for each non-existent domain beyond the threshold of 100, + # the confidence score is increased linearly. + # +1 ensures that the minimum confidence score is 1. + confidence: float = max(0, (1 / 100) * (nxdomains - 100) + 1) confidence = round(confidence, 2) # for readability - threat_level = 'high' - category = 'Intrusion.Botnet' - # the srcip doing all the dns queries - attacker_direction = 'srcip' - source_target_tag = 'OriginMalware' - evidence_type = f'DGA-{nxdomains}-NXDOMAINs' - attacker = profileid.split('_')[1] - description = f'possible DGA or domain scanning. {attacker} ' \ + threat_level = ThreatLevel.HIGH + saddr = profileid.split("_")[-1] + description = f'Possible DGA or domain scanning. {saddr} ' \ f'failed to resolve {nxdomains} domains' - conn_count = nxdomains - - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - stime, category, - source_target_tag=source_target_tag, - conn_count=conn_count, - profileid=profileid, - twid=twid, - uid=uid - ) - def set_evidence_DNS_without_conn( - self, - domain, - timestamp, - profileid, - twid, - uid - ): - confidence = 0.8 - threat_level = 'low' - category = 'Anomaly.Traffic' - attacker_direction = 'dstdomain' - evidence_type = 'DNSWithoutConnection' - attacker = domain - description = f'domain {domain} resolved with no connection' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, category, profileid=profileid, twid=twid, uid=uid + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr ) - def set_evidence_pastebin_download( + evidence: Evidence = Evidence( + evidence_type=EvidenceType.DGA_NXDOMAINS, + attacker=attacker, + threat_level=threat_level, + category=IDEACategory.ANOMALY_BEHAVIOUR, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=uid, + timestamp=stime, + conn_count=nxdomains, + confidence=confidence, + source_target_tag=Tag.ORIGIN_MALWARE + ) + + self.db.set_evidence(evidence) + + def DNS_without_conn( + self, + domain: str, + timestamp: str, + profileid: str, + twid: str, + uid: str + ) -> None: + confidence: float = 0.8 + threat_level: ThreatLevel = ThreatLevel.LOW + saddr: str = profileid.split("_")[-1] + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + description: str = f'domain {domain} resolved with no connection' + + twid_number: int = int(twid.replace("timewindow", "")) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.DNS_WITHOUT_CONNECTION, + attacker=attacker, + threat_level=threat_level, + category=IDEACategory.ANOMALY_TRAFFIC, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence + ) + + self.db.set_evidence(evidence) + + def pastebin_download( self, - daddr, - bytes_downloaded, - timestamp, - profileid, - twid, - uid - ): - attacker_direction = 'dstip' - source_target_tag = 'Malware' - attacker = daddr - evidence_type = 'PastebinDownload' - threat_level = 'info' - category = 'Anomaly.Behaviour' - confidence = 1 - response_body_len = utils.convert_to_mb(bytes_downloaded) - description = ( - f'A downloaded file from pastebin.com. size: {response_body_len} MBs' - ) - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, - category, - source_target_tag=source_target_tag, - profileid=profileid, - twid=twid, - uid=uid - ) + bytes_downloaded: int, + timestamp: str, + profileid: str, + twid: str, + uid: str + ) -> bool: + + threat_level: ThreatLevel = ThreatLevel.INFO + confidence: float = 1.0 + saddr: str = profileid.split("_")[-1] + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + response_body_len: float = utils.convert_to_mb(bytes_downloaded) + description: str = f'A downloaded file from pastebin.com. ' \ + f'size: {response_body_len} MBs' + + twid_number: int = int(twid.replace("timewindow", "")) + evidence: Evidence = Evidence( + evidence_type=EvidenceType.PASTEBIN_DOWNLOAD, + attacker=attacker, + threat_level=threat_level, + category=IDEACategory.ANOMALY_BEHAVIOUR, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + source_target_tag=Tag.MALWARE, + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence + ) + + self.db.set_evidence(evidence) return True - def set_evidence_conn_without_dns( + def conn_without_dns( self, - daddr, - timestamp, - profileid, - twid, - uid - ): - # uid {uid}. time {datetime.datetime.now()}') - threat_level = 'high' - category = 'Anomaly.Connection' - attacker_direction = 'dstip' - source_target_tag = 'Malware' - evidence_type = 'ConnectionWithoutDNS' - attacker = daddr - # the first 5 hours the confidence of connection w/o dns - # is 0.1 in case of interface only, until slips learns all the dns - start_time = self.db.get_slips_start_time() - now = time.time() - confidence = 0.8 - if ( - '-i' in sys.argv - or self.db.is_growing_zeek_dir() - ): - diff = utils.get_time_diff(start_time, now, return_type='hours') + daddr: str, + timestamp: str, + profileid: str, + twid: str, + uid: str + ) -> None: + confidence: float = 0.8 + threat_level: ThreatLevel = ThreatLevel.HIGH + saddr: str = profileid.split("_")[-1] + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + # The first 5 hours the confidence of connection w/o DNS + # is 0.1 in case of interface only, until slips learns all the DNS + start_time: float = self.db.get_slips_start_time() + now: float = time.time() + if '-i' in sys.argv or self.db.is_growing_zeek_dir(): + diff: float = utils.get_time_diff( + start_time, now, return_type='hours' + ) if diff < 5: confidence = 0.1 - ip_identification = self.db.get_ip_identification(daddr) - description = f'a connection without DNS resolution to IP: ' \ - f'{daddr} {ip_identification}' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, - category, - source_target_tag=source_target_tag, - profileid=profileid, - twid=twid, - uid=uid - ) + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = f'A connection without DNS resolution to IP: ' \ + f'{daddr} {ip_identification}' + + twid_number: int = int(twid.replace("timewindow", "")) + evidence: Evidence = Evidence( + evidence_type=EvidenceType.CONNECTION_WITHOUT_DNS, + attacker=attacker, + threat_level=threat_level, + source_target_tag=Tag.MALWARE, + category=IDEACategory.ANOMALY_CONNECTION, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence + ) - def set_evidence_dns_arpa_scan( - self, - arpa_scan_threshold, - stime, - profileid, - twid, - uid - ): + self.db.set_evidence(evidence) + + def dns_arpa_scan( + self, + arpa_scan_threshold: int, + stime: str, + profileid: str, + twid: str, + uid: List[str] + ) -> bool: + + threat_level = ThreatLevel.MEDIUM confidence = 0.7 - threat_level = 'medium' - category = 'Recon.Scanning' - attacker_direction = 'srcip' - evidence_type = 'DNS-ARPA-Scan' - description = f'doing DNS ARPA scan. Scanned ' \ - f'{arpa_scan_threshold} hosts within 2 seconds.' - attacker = profileid.split('_')[1] - - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - stime, - category, + saddr = profileid.split("_")[-1] + + description = f"Doing DNS ARPA scan. Scanned {arpa_scan_threshold}" \ + f" hosts within 2 seconds." + # Store attacker details in a local variable + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + # Create Evidence object using local variables + evidence = Evidence( + evidence_type=EvidenceType.DNS_ARPA_SCAN, + description=description, + attacker=attacker, + threat_level=threat_level, + category=IDEACategory.RECON_SCANNING, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=uid, + timestamp=stime, conn_count=arpa_scan_threshold, - profileid=profileid, - twid=twid, - uid=uid - ) + confidence=confidence, + ) + + # Store evidence in the database + self.db.set_evidence(evidence) + + return True + - def set_evidence_unknown_port( + def unknown_port( self, - daddr, - dport, - proto, - timestamp, - profileid, - twid, - uid - ): - confidence = 1 - threat_level = 'high' - category = 'Anomaly.Connection' - attacker_direction = 'srcip' - evidence_type = 'UnknownPort' - attacker = profileid.split('_')[-1] - ip_identification = self.db.get_ip_identification(daddr) - description = ( + daddr: str, + dport: int, + proto: str, + timestamp: str, + profileid: str, + twid: str, + uid: str + ) -> None: + confidence: float = 1.0 + twid_number: int = int(twid.replace("timewindow", "")) + saddr = profileid.split('_')[-1] + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) + + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = ( f'Connection to unknown destination port {dport}/{proto.upper()} ' f'destination IP {daddr}. {ip_identification}' ) - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - port=dport, - proto=proto, - profileid=profileid, - twid=twid, - uid=uid, - victim=daddr - ) + evidence: Evidence = Evidence( + evidence_type=EvidenceType.UNKNOWN_PORT, + attacker=attacker, + victim=victim, + threat_level=ThreatLevel.HIGH, + category=IDEACategory.ANOMALY_CONNECTION, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence + ) - def set_evidence_pw_guessing( + self.db.set_evidence(evidence) + + def pw_guessing( self, - msg, - timestamp, - profileid, - twid, - uid, - by='' - ): + msg: str, + timestamp: str, + twid: str, + uid: str, + by: str = '' + ) -> None: # 222.186.30.112 appears to be guessing SSH passwords # (seen in 30 connections) # confidence = 1 because this detection is comming # from a zeek file so we're sure it's accurate - confidence = 1 - threat_level = 'high' - category = 'Attempt.Login' - evidence_type = 'Password_Guessing' - attacker_direction = 'srcip' - source_target_tag = 'Malware' - description = f'password guessing. {msg}. by {by}.' - scanning_ip = msg.split(' appears')[0] - conn_count = int(msg.split('in ')[1].split('connections')[0]) - - self.db.setEvidence( - evidence_type, attacker_direction, scanning_ip, threat_level, - confidence, description, - timestamp, - category, - source_target_tag=source_target_tag, + confidence: float = 1.0 + threat_level: ThreatLevel = ThreatLevel.HIGH + twid_number: int = int(twid.replace("timewindow", "")) + scanning_ip: str = msg.split(' appears')[0] + + description: str = f'password guessing. {msg}. by {by}.' + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=scanning_ip + ) + + conn_count: int = int(msg.split('in ')[1].split('connections')[0]) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.PASSWORD_GUESSING, + attacker=attacker, + threat_level=threat_level, + category= IDEACategory.ATTEMPT_LOGIN, + description=description, + profile=ProfileID(ip=scanning_ip), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, conn_count=conn_count, - profileid=profileid, - twid=twid, - uid=uid - ) + confidence=confidence, + source_target_tag=Tag.MALWARE + ) + + self.db.set_evidence(evidence) - def set_evidence_horizontal_portscan( + + def horizontal_portscan( self, - msg, - timestamp, - profileid, - twid, - uid - ): - # 10.0.2.15 scanned at least 25 unique hosts on port 80/tcp in 0m33s - confidence = 1 - threat_level = 'high' - description = f'horizontal port scan by Zeek engine. {msg}' - evidence_type = 'HorizontalPortscan' - attacker_direction = 'srcip' - source_target_tag = 'Recon' - attacker = profileid.split('_')[-1] - category = 'Recon.Scanning' + msg: str, + timestamp: str, + profileid: str, + twid: str, + uid: str + ) -> None: + confidence: float = 1.0 + threat_level: ThreatLevel = ThreatLevel.HIGH + twid_number: int = int(twid.replace("timewindow", "")) + saddr = profileid.split('_')[-1] + + description: str = f'horizontal port scan by Zeek engine. {msg}' # get the number of unique hosts scanned on a specific port - conn_count = int(msg.split('least')[1].split('unique')[0]) - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, - category, - source_target_tag=source_target_tag, + conn_count: int = int(msg.split('least')[1].split('unique')[0]) + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.HORIZONTAL_PORT_SCAN, + attacker=attacker, + threat_level=threat_level, + category=IDEACategory.RECON_SCANNING, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, conn_count=conn_count, - profileid=profileid, - twid=twid, - uid=uid - ) + confidence=confidence, + source_target_tag=Tag.RECON + ) + + self.db.set_evidence(evidence) + - def set_evidence_conn_to_private_ip( + def conn_to_private_ip( self, - proto, - daddr, - dport, - saddr, - profileid, - twid, - uid, - timestamp - ): + proto: str, + daddr: str, + dport: str, + saddr: str, + twid: str, + uid: str, + timestamp: str + ) -> None: + confidence: float = 1.0 + threat_level: ThreatLevel = ThreatLevel.INFO + twid_number: int = int(twid.replace("timewindow", "")) + description: str = f'Connecting to private IP: {daddr} ' - confidence = 1 - threat_level = 'info' - description = f'Connecting to private IP: {daddr} ' if proto.lower() == 'arp' or dport == '': pass elif proto.lower() == 'icmp': @@ -594,112 +772,147 @@ def set_evidence_conn_to_private_ip( else: description += f'on destination port: {dport}' - evidence_type = 'ConnectionToPrivateIP' - category = 'Recon' - attacker_direction = 'srcip' - attacker = saddr - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, category, - profileid=profileid, - twid=twid, - uid=uid, - victim=daddr - ) + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.CONNECTION_TO_PRIVATE_IP, + attacker=attacker, + threat_level=threat_level, + category=IDEACategory.RECON, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=timestamp, + conn_count=1, + confidence=confidence, + victim=victim + ) + + self.db.set_evidence(evidence) + - def set_evidence_GRE_tunnel( + def GRE_tunnel( self, tunnel_info: dict - ): - tunnel_flow = tunnel_info['flow'] - profileid = tunnel_info['profileid'] - twid = tunnel_info['twid'] + ) -> None: + profileid: str = tunnel_info['profileid'] + twid: str = tunnel_info['twid'] + tunnel_flow: str = tunnel_info['flow'] action = tunnel_flow['action'] daddr = tunnel_flow['daddr'] ts = tunnel_flow['starttime'] - uid = tunnel_flow['uid'] + uid: str = tunnel_flow['uid'] - ip_identification = self.db.get_ip_identification(daddr) - saddr = profileid.split('_')[-1] - description = f'GRE tunnel from {saddr} ' \ - f'to {daddr} {ip_identification} ' \ - f'tunnel action: {action}' - confidence = 1 - threat_level = 'info' - evidence_type = 'GRETunnel' - category = 'Info' - self.db.setEvidence( - evidence_type, - 'dstip', - daddr, - threat_level, - confidence, - description, - ts, - category, - profileid=profileid, - twid=twid, - uid=uid, - ) + confidence: float = 1.0 + threat_level: ThreatLevel = ThreatLevel.INFO + twid_number: int = int(twid.replace("timewindow", "")) - def set_evidence_vertical_portscan( - self, - msg, - scanning_ip, - timestamp, - profileid, - twid, - uid - ): - """ - @rtype: object - """ - # confidence = 1 because this detection is comming - # from a zeek file so we're sure it's accurate - confidence = 1 - threat_level = 'high' + ip_identification: str = self.db.get_ip_identification(daddr) + saddr: str = profileid.split('_')[-1] + description: str = f'GRE tunnel from {saddr} ' \ + f'to {daddr} {ip_identification} ' \ + f'tunnel action: {action}' + + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.GRE_TUNNEL, + attacker=attacker, + victim=victim, + threat_level=threat_level, + category=IDEACategory.INFO, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid_number), + uid=[uid], + timestamp=ts, + conn_count=1, + confidence=confidence + ) + + self.db.set_evidence(evidence) + + + def vertical_portscan( + self, + msg: str, + scanning_ip: str, + timestamp: str, + twid: str, + uid: str + ) -> None: + # confidence = 1 because this detection is coming + # from a Zeek file so we're sure it's accurate + confidence: float = 1.0 + threat_level: ThreatLevel = ThreatLevel.HIGH + twid: int = int(twid.replace("timewindow", "")) # msg example: 192.168.1.200 has scanned 60 ports of 192.168.1.102 - description = f'vertical port scan by Zeek engine. {msg}' - evidence_type = 'VerticalPortscan' - category = 'Recon.Scanning' - attacker_direction = 'dstip' - source_target_tag = 'Recon' - conn_count = int(msg.split('least ')[1].split(' unique')[0]) - attacker = scanning_ip - victim = msg.split('ports of ')[-1] - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, - category, - source_target_tag=source_target_tag, + description: str = f'vertical port scan by Zeek engine. {msg}' + conn_count: int = int(msg.split('least ')[1].split(' unique')[0]) + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=scanning_ip + ) + + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=msg.split('ports of host ')[-1].split(" in")[0] + ) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.VERTICAL_PORT_SCAN, + attacker=attacker, + victim=victim, + threat_level=threat_level, + category=IDEACategory.RECON_SCANNING, + description=description, + profile=ProfileID(ip=scanning_ip), + timewindow=TimeWindow(number=twid), + uid=[uid], + timestamp=timestamp, conn_count=conn_count, - profileid=profileid, twid=twid, - uid=uid, - victim=victim - ) + confidence=confidence, + source_target_tag=Tag.RECON - def set_evidence_ssh_successful( + ) + + self.db.set_evidence(evidence) + + def ssh_successful( self, - profileid, - twid, - saddr, - daddr, + twid: str, + saddr: str, + daddr: str, size, - uid, - timestamp, + uid: str, + timestamp: str, by='', - ip_state='ip', - ): + ) -> None: """ Set an evidence for a successful SSH login. This is not strictly a detection, but we don't have @@ -707,494 +920,655 @@ def set_evidence_ssh_successful( The threat_level is 0.01 to show that this is not a detection """ - attacker_direction = 'srcip' - attacker = saddr - evidence_type = f'SSHSuccessful-by-{saddr}' - threat_level = 'info' - confidence = 0.8 - category = 'Infomation' - ip_identification = self.db.get_ip_identification(daddr) - description = ( + confidence: float = 0.8 + threat_level: ThreatLevel = ThreatLevel.INFO + twid: int = int(twid.replace("timewindow", "")) + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) + + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = ( f'SSH successful to IP {daddr}. {ip_identification}. ' f'From IP {saddr}. Size: {str(size)}. Detection model {by}.' f' Confidence {confidence}' ) - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, - category, - profileid=profileid, - twid=twid, - uid=uid, - victim=daddr - ) + evidence: Evidence = Evidence( + evidence_type=EvidenceType.SSH_SUCCESSFUL, + attacker=attacker, + victim=victim, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.INFO, + ) - def set_evidence_long_connection( - self, - attacker, - duration, - profileid, - twid, - uid, - timestamp, - attacker_direction='' - ): + self.db.set_evidence(evidence) + + def long_connection( + self, + daddr: str, + duration, + profileid: str, + twid: str, + uid: str, + timestamp, + ) -> None: """ Set an evidence for a long connection. """ - - evidence_type = 'LongConnection' - threat_level = 'low' - category = 'Anomaly.Connection' - # confidence depends on how long the connection - # scale the confidence from 0 to 1, 1 means 24 hours long - confidence = 1 / (3600 * 24) * (duration - 3600 * 24) + 1 + threat_level: ThreatLevel = ThreatLevel.LOW + twid: int = int(twid.replace("timewindow", "")) + # Confidence depends on how long the connection. + # Scale the confidence from 0 to 1; 1 means 24 hours long. + confidence: float = 1 / (3600 * 24) * (duration - 3600 * 24) + 1 confidence = round(confidence, 2) - ip_identification = self.db.get_ip_identification(attacker) - # get the duration in minutes - duration = int(duration / 60) - srcip = profileid.split('_')[1] - description = f'Long Connection. Connection from {srcip} ' \ - f'to destination address: {attacker} ' \ - f'{ip_identification} took {duration} mins' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - profileid=profileid, - twid=twid, - uid=uid, - victim=srcip - ) + # Get the duration in minutes. + duration_minutes: int = int(duration / 60) + srcip: str = profileid.split('_')[1] + + attacker_obj: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=srcip + ) + + victim_obj: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) - def set_evidence_self_signed_certificates( + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = ( + f'Long Connection. Connection from {srcip} ' + f'to destination address: {daddr} ' + f'{ip_identification} took {duration_minutes} mins' + ) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.LONG_CONNECTION, + attacker=attacker_obj, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=srcip), + timewindow=TimeWindow(number=twid), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.ANOMALY_CONNECTION, + victim=victim_obj + ) + + self.db.set_evidence(evidence) + + def self_signed_certificates( self, profileid, twid, - attacker, - description, - uid, - timestamp - ): + daddr, + uid: str, + timestamp, + server_name + ) -> None: """ - Set evidence for self signed certificates. + Set evidence for self-signed certificates. """ - confidence = 0.5 - threat_level = 'low' - category = 'Anomaly.Behaviour' - attacker_direction = 'dstip' - evidence_type = 'SelfSignedCertificate' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, category, profileid=profileid, twid=twid, uid=uid - ) + confidence: float = 0.5 + threat_level: ThreatLevel = ThreatLevel.LOW + saddr: str = profileid.split("_")[-1] + twid: int = int(twid.replace("timewindow", "")) + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + ip_identification: str = self.db.get_ip_identification(daddr) + description = f'Self-signed certificate. Destination IP: {daddr}.' \ + f' {ip_identification}' + + if server_name: + description += f' SNI: {server_name}.' + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.SELF_SIGNED_CERTIFICATE, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.ANOMALY_BEHAVIOUR + ) + + self.db.set_evidence(evidence) - def set_evidence_for_multiple_reconnection_attempts( + def multiple_reconnection_attempts( self, profileid, twid, - attacker, - description, - uid, - timestamp - ): + daddr, + uid: List[str], + timestamp, + reconnections + ) -> None: """ Set evidence for Reconnection Attempts. """ - confidence = 0.5 - threat_level = 'medium' - category = 'Anomaly.Traffic' - attacker_direction = 'dstip' - evidence_type = 'MultipleReconnectionAttempts' - - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, category, profileid=profileid, twid=twid, uid=uid - ) + confidence: float = 0.5 + threat_level: ThreatLevel = ThreatLevel.MEDIUM + saddr: str = profileid.split("_")[-1] + twid: int = int(twid.replace("timewindow", "")) + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) - def set_evidence_for_connection_to_multiple_ports( + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) + + ip_identification = self.db.get_ip_identification(daddr) + description = ( + f'Multiple reconnection attempts to Destination IP:' + f' {daddr} {ip_identification} ' + f'from IP: {saddr} reconnections: {reconnections}' + ) + evidence: Evidence = Evidence( + evidence_type=EvidenceType.MULTIPLE_RECONNECTION_ATTEMPTS, + attacker=attacker, + victim = victim, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid), + uid=uid, + timestamp=timestamp, + category=IDEACategory.ANOMALY_TRAFFIC + ) + + self.db.set_evidence(evidence) + + def connection_to_multiple_ports( self, - profileid, - twid, - ip, - description, - uid, - timestamp - ): + profileid: str, + twid: str, + uid: List[str], + timestamp: str, + dstports: list, + victim: str, + attacker: str, + ) -> None: """ Set evidence for connection to multiple ports. """ - confidence = 0.5 - threat_level = 'medium' - category = 'Anomaly.Connection' - attacker_direction = 'dstip' - evidence_type = 'ConnectionToMultiplePorts' - attacker = ip - - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, - category, - profileid=profileid, twid=twid, uid=uid + confidence: float = 0.5 + threat_level: ThreatLevel = ThreatLevel.INFO + twid: int = int(twid.replace("timewindow", "")) + ip_identification = self.db.get_ip_identification(attacker) + description = f'Connection to multiple ports {dstports} of ' \ + f'IP: {attacker}. {ip_identification}' + + if attacker in profileid: + attacker_direction = Direction.SRC + victim_direction = Direction.DST + profile_ip = attacker + else: + attacker_direction = Direction.DST + victim_direction = Direction.SRC + profile_ip = victim + + victim: Victim = Victim( + direction=victim_direction, + victim_type=IoCType.IP, + value=victim ) + attacker: Attacker = Attacker( + direction=attacker_direction, + attacker_type=IoCType.IP, + value=attacker + ) + + evidence = Evidence( + evidence_type=EvidenceType.CONNECTION_TO_MULTIPLE_PORTS, + attacker=attacker, + victim=victim, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=profile_ip), + timewindow=TimeWindow(number=twid), + uid=uid, + timestamp=timestamp, + category=IDEACategory.ANOMALY_CONNECTION + ) - def set_evidence_suspicious_dns_answer( + self.db.set_evidence(evidence) + + def suspicious_dns_answer( self, - query, - answer, - entropy, - daddr, - profileid, - twid, - stime, - uid - ): - confidence = 0.6 - threat_level = 'medium' - category = 'Anomaly.Traffic' - evidence_type = 'HighEntropyDNSanswer' - attacker_direction = 'dstip' - attacker = daddr - description = f'A DNS TXT answer with high entropy. ' \ - f'query: {query} answer: "{answer}" entropy: {round(entropy, 2)} ' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - stime, - category, - profileid=profileid, - twid=twid, - uid=uid + query: str, + answer: str, + entropy: float, + daddr: str, + profileid: str, + twid: str, + stime: str, + uid: str + ) -> None: + confidence: float = 0.6 + threat_level: ThreatLevel = ThreatLevel.MEDIUM + twid: int = int(twid.replace("timewindow", "")) + saddr: str = profileid.split("_")[-1] + + attacker: Attacker = Attacker( + direction=Direction.DST, + attacker_type=IoCType.IP, + value=daddr + ) + victim: Victim = Victim( + direction=Direction.SRC, + victim_type=IoCType.IP, + value=saddr ) - def set_evidence_invalid_dns_answer( + description: str = f'A DNS TXT answer with high entropy. ' \ + f'query: {query} answer: "{answer}" ' \ + f'entropy: {round(entropy, 2)} ' + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.HIGH_ENTROPY_DNS_ANSWER, + attacker=attacker, + victim=victim, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=daddr), + timewindow=TimeWindow(number=twid), + uid=[uid], + timestamp=stime, + category=IDEACategory.ANOMALY_TRAFFIC + ) + + self.db.set_evidence(evidence) + + def invalid_dns_answer( self, - query, - answer, - daddr, - profileid, - twid, - stime, - uid - ): - evidence_type = "InvalidDNSResolution" - attacker_direction = "dst_domain" - attacker = query - threat_level = "info" - confidence = 0.7 - description = f"The DNS query {query} was resolved to {answer}" - timestamp = stime - category = "Anamoly.Behaviour" - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, - category, - profileid=profileid, - twid=twid, - uid=uid + query: str, + answer: str, + daddr: str, + profileid: str, + twid: str, + stime: str, + uid: str + ) -> None: + threat_level: ThreatLevel = ThreatLevel.INFO + confidence: float = 0.7 + twid: int = int(twid.replace("timewindow", "")) + saddr: str = profileid.split("_")[-1] + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr ) - def set_evidence_for_port_0_connection( - self, - saddr, - daddr, - sport, - dport, - direction, - profileid, - twid, - uid, - timestamp - ): - """:param direction: 'source' or 'destination'""" - confidence = 0.8 - threat_level = 'high' - category = 'Anomaly.Connection' - source_target_tag = 'Recon' - evidence_type = 'Port0Connection' - - if direction == 'source': - attacker = saddr - attacker_direction = 'srcip' - victim = daddr + description: str = f"The DNS query {query} was resolved to {answer}" + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.INVALID_DNS_RESOLUTION, + attacker=attacker, + victim=victim, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid), + uid=[uid], + timestamp=stime, + category=IDEACategory.ANOMALY_BEHAVIOUR + ) + + self.db.set_evidence(evidence) + + + def for_port_0_connection( + self, + saddr: str, + daddr: str, + sport: int, + dport: int, + profileid: str, + twid: str, + uid: str, + timestamp: str, + victim: str, + attacker: str + ) -> None: + confidence: float = 0.8 + threat_level: ThreatLevel = ThreatLevel.HIGH + + if attacker in profileid: + attacker_direction = Direction.SRC + victim_direction = Direction.DST + profile_ip = attacker else: - attacker = daddr - attacker_direction = 'dstip' - victim = saddr + attacker_direction = Direction.DST + victim_direction = Direction.SRC + profile_ip = victim + + victim: Victim = Victim( + direction=victim_direction, + victim_type=IoCType.IP, + value=victim + ) + attacker: Attacker = Attacker( + direction=attacker_direction, + attacker_type=IoCType.IP, + value=attacker + ) - ip_identification = self.db.get_ip_identification(daddr) - description = f'Connection on port 0 from {saddr}:{sport} ' \ - f'to {daddr}:{dport}. {ip_identification}.' + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = f'Connection on port 0 from {saddr}:{sport} ' \ + f'to {daddr}:{dport}. {ip_identification}.' + + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.PORT_0_CONNECTION, + attacker=attacker, + victim=victim, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=profile_ip), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.ANOMALY_CONNECTION, + source_target_tag=Tag.RECON, + conn_count=1 + ) - conn_count = 1 + self.db.set_evidence(evidence) - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, description, - timestamp, - category, - source_target_tag=source_target_tag, - conn_count=conn_count, - profileid=profileid, - twid=twid, - uid=uid, - victim=victim - ) - def set_evidence_malicious_JA3( - self, - malicious_ja3_dict, - ip, - profileid, - twid, - uid, - timestamp, - victim, - type_='', - ioc='', - ): - malicious_ja3_dict = json.loads(malicious_ja3_dict[ioc]) - tags = malicious_ja3_dict.get('tags', '') - ja3_description = malicious_ja3_dict['description'] - threat_level = malicious_ja3_dict['threat_level'] + def malicious_ja3( + self, + malicious_ja3_dict: dict, + twid: str, + uid: str, + timestamp: str, + victim: str, + attacker: str, + type_: str = '', + ja3: str = '', + ) -> None: + """ + """ + ja3_info: dict = json.loads(malicious_ja3_dict[ja3]) + + threat_level: str = ja3_info['threat_level'].upper() + threat_level: ThreatLevel = ThreatLevel[threat_level] + + tags: str = ja3_info.get('tags', '') + ja3_description: str = ja3_info['description'] if type_ == 'ja3': - description = f'Malicious JA3: {ioc} from source address {ip} ' - evidence_type = 'MaliciousJA3' - category = 'Intrusion.Botnet' - source_target_tag = 'Botnet' - attacker_direction = 'srcip' + description = f'Malicious JA3: {ja3} from source address ' \ + f'{attacker} ' + evidence_type: EvidenceType = EvidenceType.MALICIOUS_JA3 + source_target_tag: Tag = Tag.BOTNET + attacker_direction: Direction = Direction.SRC + victim_direction: Direction = Direction.DST elif type_ == 'ja3s': description = ( - f'Malicious JA3s: (possible C&C server): {ioc} to server {ip} ' + f'Malicious JA3s: (possible C&C server): {ja3} to server ' + f'{attacker} ' ) - evidence_type = 'MaliciousJA3s' - category = 'Intrusion.Botnet' - source_target_tag = 'CC' - attacker_direction = 'dstip' - # append daddr identification to the description - ip_identification = self.db.get_ip_identification(ip) - description += f'{ip_identification} ' + evidence_type: EvidenceType = EvidenceType.MALICIOUS_JA3S + source_target_tag: Tag = Tag.CC + attacker_direction: Direction = Direction.DST + victim_direction: Direction = Direction.SRC + else: + return + # append daddr identification to the description + ip_identification: str = self.db.get_ip_identification(attacker) + description += f'{ip_identification} ' if ja3_description != 'None': description += f'description: {ja3_description} ' - description += f'tags: {tags}' - attacker = ip - confidence = 1 - - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - source_target_tag=source_target_tag, - profileid=profileid, - twid=twid, - uid=uid, - victim=victim - ) - def set_evidence_data_exfiltration( - self, - daddr, - src_mbs, - profileid, - twid, - uid, - ): - confidence = 0.6 - threat_level = 'high' - attacker_direction = 'dstip' - source_target_tag = 'OriginMalware' - evidence_type = 'DataUpload' - category = 'Malware' - attacker = daddr - ip_identification = self.db.get_ip_identification( - daddr - ) - description = f'Large data upload. {src_mbs} MBs sent to {daddr} ' - description += f'{ip_identification}' - timestamp = utils.convert_format( - datetime.datetime.now(), utils.alerts_format - ) - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - source_target_tag=source_target_tag, - profileid=profileid, - twid=twid, - uid=uid - ) + attacker: Attacker = Attacker( + direction=attacker_direction, + attacker_type=IoCType.IP, + value=attacker + ) + victim: Victim = Victim( + direction=victim_direction, + victim_type=IoCType.IP, + value=victim + ) + confidence: float = 1 + evidence: Evidence = Evidence( + evidence_type=evidence_type, + attacker=attacker, + victim=victim, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=attacker.value), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.INTRUSION_BOTNET, + source_target_tag=source_target_tag + ) - def set_evidence_bad_smtp_login( - self, - saddr, - daddr, - stime, - profileid, - twid, - uid - ): - confidence = 1 - threat_level = 'high' - category = 'Attempt.Login' - evidence_type = 'BadSMTPLogin' - attacker_direction = 'srcip' - attacker = saddr - ip_identification = self.db.get_ip_identification(daddr) - description = ( - f'doing bad SMTP login to {daddr} {ip_identification}' - ) - - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - stime, - category, - profileid=profileid, - twid=twid, + self.db.set_evidence(evidence) + + def data_exfiltration( + self, + daddr: str, + src_mbs: float, + profileid: str, + twid: str, + uid: List[str], + timestamp + ) -> None: + confidence: float = 0.6 + threat_level: ThreatLevel = ThreatLevel.HIGH + saddr: str = profileid.split("_")[-1] + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = f'Large data upload. {src_mbs} MBs ' \ + f'sent to {daddr} {ip_identification}' + timestamp: str = utils.convert_format(timestamp, utils.alerts_format) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.DATA_UPLOAD, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), uid=uid, - victim=daddr - ) + timestamp=timestamp, + category=IDEACategory.MALWARE, + source_target_tag=Tag.ORIGIN_MALWARE + ) - def set_evidence_smtp_bruteforce( + self.db.set_evidence(evidence) + + def bad_smtp_login( self, - flow: dict, - profileid, - twid, - uid, - smtp_bruteforce_threshold, - ): - saddr = flow['saddr'] - daddr = flow['daddr'] - stime = flow['starttime'] - - confidence = 1 - threat_level = 'high' - category = 'Attempt.Login' - attacker_direction = 'srcip' - evidence_type = 'SMTPLoginBruteforce' - ip_identification = self.db.get_ip_identification(daddr) - description = f'doing SMTP login bruteforce to {daddr}. ' \ - f'{smtp_bruteforce_threshold} logins in 10 seconds. ' \ - f'{ip_identification}' - attacker = saddr - conn_count = smtp_bruteforce_threshold - - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - stime, - category, - conn_count=conn_count, - profileid=profileid, - twid=twid, - uid=uid, - victim=daddr + saddr: str, + daddr: str, + stime: str, + twid: str, + uid: str + ) -> None: + confidence: float = 1.0 + threat_level: ThreatLevel = ThreatLevel.HIGH + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr ) + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = f'doing bad SMTP login to {daddr} ' \ + f'{ip_identification}' + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.BAD_SMTP_LOGIN, + attacker=attacker, + victim=victim, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=stime, + category=IDEACategory.ATTEMPT_LOGIN + ) - def set_evidence_malicious_ssl( + self.db.set_evidence(evidence) + + def smtp_bruteforce( + self, + flow: dict, + twid: str, + uid: List[str], + smtp_bruteforce_threshold: int, + ) -> None: + saddr: str = flow['saddr'] + daddr: str = flow['daddr'] + stime: str = flow['starttime'] + + confidence: float = 1.0 + threat_level: ThreatLevel = ThreatLevel.HIGH + + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = ( + f'doing SMTP login bruteforce to {daddr}. ' + f'{smtp_bruteforce_threshold} logins in 10 seconds. ' + f'{ip_identification}' + ) + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) + conn_count: int = smtp_bruteforce_threshold + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.SMTP_LOGIN_BRUTEFORCE, + attacker=attacker, + victim=victim, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=uid, + timestamp=stime, + category=IDEACategory.ATTEMPT_LOGIN, + conn_count=conn_count + ) + + self.db.set_evidence(evidence) + + def malicious_ssl( self, ssl_info: dict, ssl_info_from_db: dict - ): - """ - This function only works on zeek files.log flows - :param ssl_info: info about this ssl cert as found in zeek - :param ssl_info_from_db: ti feed, tags, description of this malicious cert - """ + ) -> None: flow: dict = ssl_info['flow'] - ts = flow.get('starttime', '') - daddr = flow.get('daddr', '') - uid = flow.get('uid', '') - - profileid = ssl_info.get('profileid', '') - twid = ssl_info.get('twid', '') + ts: str = flow.get('starttime', '') + daddr: str = flow.get('daddr', '') + uid: str = flow.get('uid', '') + twid: str = ssl_info.get('twid', '') + + ssl_info_from_db: dict = json.loads(ssl_info_from_db) + tags: str = ssl_info_from_db['tags'] + cert_description: str = ssl_info_from_db['description'] + + confidence: float = 1.0 + threat_level: float = utils.threat_levels[ + ssl_info_from_db['threat_level'] + ] + threat_level: ThreatLevel = ThreatLevel(threat_level) + + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = f'Malicious SSL certificate to server {daddr}.' \ + f'{ip_identification} description: ' \ + f'{cert_description} {tags} ' + + + attacker: Attacker = Attacker( + direction=Direction.DST, + attacker_type=IoCType.IP, + value=daddr + ) - ssl_info_from_db = json.loads(ssl_info_from_db) - tags = ssl_info_from_db['tags'] - cert_description = ssl_info_from_db['description'] - threat_level = ssl_info_from_db['threat_level'] + evidence: Evidence = Evidence( + evidence_type=EvidenceType.MALICIOUS_SSL_CERT, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=daddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=ts, + category=IDEACategory.INTRUSION_BOTNET, + source_target_tag=Tag.CC + ) - description = f'Malicious SSL certificate to server {daddr}.' - # append daddr identification to the description - ip_identification = self.db.get_ip_identification(daddr) - description += ( - f'{ip_identification} description: {cert_description} {tags} ' - ) - - evidence_type = 'MaliciousSSLCert' - category = 'Intrusion.Botnet' - source_target_tag = 'CC' - attacker_direction = 'dstip' - - attacker = daddr - confidence = 1 - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - ts, - category, - source_target_tag=source_target_tag, - profileid=profileid, - twid=twid, - uid=uid - ) + self.db.set_evidence(evidence) diff --git a/modules/flowmldetection/flowmldetection.py b/modules/flowmldetection/flowmldetection.py index 48827e9d5..f1d2c1af4 100644 --- a/modules/flowmldetection/flowmldetection.py +++ b/modules/flowmldetection/flowmldetection.py @@ -1,5 +1,3 @@ -from slips_files.common.abstracts._module import IModule -from slips_files.common.imports import * from sklearn.linear_model import SGDClassifier from sklearn.preprocessing import StandardScaler import pickle @@ -7,6 +5,21 @@ import json import datetime import traceback + +from slips_files.common.imports import * +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Attacker, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + ) + # Only for debbuging # from matplotlib import pyplot as plt @@ -20,7 +33,7 @@ def warn(*args, **kwargs): warnings.warn = warn -class FlowMLDetection(IModule, multiprocessing.Process): +class FlowMLDetection(IModule): # Name: short name of the module. Do not use spaces name = 'Flow ML Detection' description = ( @@ -82,7 +95,7 @@ def train(self): ) except Exception: self.print('Error while calling clf.train()') - self.print(traceback.print_exc()) + self.print(traceback.print_stack()) # See score so far in training score = self.clf.score(X_flow, y_flow) @@ -102,7 +115,7 @@ def train(self): except Exception: self.print('Error in train()', 0 , 1) - self.print(traceback.print_exc(), 0, 1) + self.print(traceback.print_stack(), 0, 1) def process_features(self, dataset): @@ -203,7 +216,7 @@ def process_features(self, dataset): except Exception: # Stop the timer self.print('Error in process_features()') - self.print(traceback.print_exc(),0,1) + self.print(traceback.print_stack(),0,1) def process_flows(self): """ @@ -282,7 +295,7 @@ def process_flows(self): except Exception: # Stop the timer self.print('Error in process_flows()') - self.print(traceback.print_exc(),0,1) + self.print(traceback.print_stack(),0,1) def process_flow(self): """ @@ -299,7 +312,7 @@ def process_flow(self): except Exception: # Stop the timer self.print('Error in process_flow()') - self.print(traceback.print_exc(),0,1) + self.print(traceback.print_stack(),0,1) def detect(self): """ @@ -320,7 +333,7 @@ def detect(self): # Stop the timer self.print('Error in detect() X_flow:') self.print(X_flow) - self.print(traceback.print_exc(),0,1) + self.print(traceback.print_stack(),0,1) def store_model(self): """ @@ -362,22 +375,47 @@ def read_model(self): ) def set_evidence_malicious_flow( - self, saddr, sport, daddr, dport, profileid, twid, uid - ): - """ - Set the evidence that a flow was detected as malicious - """ - confidence = 0.1 - threat_level = 'low' - attacker_direction = 'flow' - category = 'Anomaly.Traffic' - attacker = f'{str(saddr)}:{str(sport)}-{str(daddr)}:{str(dport)}' - evidence_type = 'MaliciousFlow' + self, + saddr: str, + sport: str, + daddr: str, + dport: str, + twid: str, + uid: str + ): + confidence: float = 0.1 + threat_level: ThreatLevel = ThreatLevel.LOW + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + ip_identification = self.db.get_ip_identification(daddr) - description = f'Malicious flow by ML. Src IP {saddr}:{sport} to {daddr}:{dport} {ip_identification}' - timestamp = utils.convert_format(datetime.datetime.now(), utils.alerts_format) - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, profileid=profileid, twid=twid) + description = f'Malicious flow by ML. Src IP {saddr}:{sport} to ' \ + f'{daddr}:{dport} {ip_identification}' + + timestamp = utils.convert_format( + datetime.datetime.now(), + utils.alerts_format + ) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.MALICIOUS_FLOW, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.ANOMALY_TRAFFIC + ) + + self.db.set_evidence(evidence) + def shutdown_gracefully(self): # Confirm that the module is done processing @@ -461,7 +499,6 @@ def main(self): self.flow_dict['sport'], self.flow_dict['daddr'], self.flow_dict['dport'], - profileid, twid, uid, ) diff --git a/modules/http_analyzer/http_analyzer.py b/modules/http_analyzer/http_analyzer.py index d3b47a75b..f502387ed 100644 --- a/modules/http_analyzer/http_analyzer.py +++ b/modules/http_analyzer/http_analyzer.py @@ -1,12 +1,27 @@ from slips_files.common.abstracts._module import IModule -from slips_files.common.imports import * import json import urllib import requests from typing import Union - -class HTTPAnalyzer(IModule, multiprocessing.Process): +from slips_files.common.imports import * +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Victim, + Attacker, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Tag + ) + + +class HTTPAnalyzer(IModule): # Name: short name of the module. Do not use spaces name = 'HTTP Analyzer' description = 'Analyze HTTP flows' @@ -21,7 +36,12 @@ def init(self): self.empty_connections_threshold = 4 # this is a list of hosts known to be resolved by malware # to check your internet connection - self.hosts = ['bing.com', 'google.com', 'yandex.com', 'yahoo.com', 'duckduckgo.com', 'gmail.com'] + self.hosts = ['bing.com', + 'google.com', + 'yandex.com', + 'yahoo.com', + 'duckduckgo.com', + 'gmail.com'] self.read_configuration() self.executable_mime_types = [ 'application/x-msdownload', @@ -38,7 +58,9 @@ def init(self): def read_configuration(self): conf = ConfigParser() - self.pastebin_downloads_threshold = conf.get_pastebin_download_threshold() + self.pastebin_downloads_threshold = ( + conf.get_pastebin_download_threshold() + ) def detect_executable_mime_types(self, resp_mime_types: list) -> bool: """ @@ -54,7 +76,7 @@ def detect_executable_mime_types(self, resp_mime_types: list) -> bool: return False def check_suspicious_user_agents( - self, uid, host, uri, timestamp, user_agent, profileid, twid + self, uid: str, host, uri, timestamp, user_agent, profileid, twid ): """Check unusual user agents and set evidence""" @@ -67,33 +89,61 @@ def check_suspicious_user_agents( ) for suspicious_ua in suspicious_user_agents: if suspicious_ua.lower() in user_agent.lower(): - attacker_direction = 'srcip' - source_target_tag = 'SuspiciousUserAgent' - attacker = profileid.split('_')[1] - evidence_type = 'SuspiciousUserAgent' - threat_level = 'high' - category = 'Anomaly.Behaviour' - confidence = 1 - victim = f'{host}{uri}' - description = f'suspicious user-agent: {user_agent} while connecting to {victim}' - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, - description, timestamp, category, source_target_tag=source_target_tag, - profileid=profileid, twid=twid, uid=uid, victim=victim) + threat_level: ThreatLevel = ThreatLevel.HIGH + confidence: float = 1 + saddr = profileid.split('_')[1] + description: str = (f'Suspicious user-agent: ' + f'{user_agent} while ' + f'connecting to {host}{uri}') + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + evidence: Evidence = Evidence( + evidence_type=EvidenceType.SUSPICIOUS_USER_AGENT, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.ANOMALY_TRAFFIC, + source_target_tag=Tag.SUSPICIOUS_USER_AGENT, + ) + + self.db.set_evidence(evidence) return True return False def check_multiple_empty_connections( - self, uid, contacted_host, timestamp, request_body_len, profileid, twid + self, + uid: str, + contacted_host: str, + uri: str, + timestamp: str, + request_body_len: int, + profileid: str, + twid: str ): """ - Detects more than 4 empty connections to google, bing, yandex and yahoo on port 80 + Detects more than 4 empty connections to + google, bing, yandex and yahoo on port 80 + and evidence is generted only when the 4 conns have an empty uri """ # to test this wget google.com:80 twice # wget makes multiple connections per command, # 1 to google.com and another one to www.google.com + if uri != '/': + # emtpy detections are only done when we go to bing.com, + # bing.com/something seems benign + return False for host in self.hosts: - if contacted_host in [host, f'www.{host}'] and request_body_len == 0: + if (contacted_host in [host, f'www.{host}'] + and request_body_len == 0): try: # this host has past connections, add to counter uids, connections = self.connections_counter[host] @@ -112,77 +162,134 @@ def check_multiple_empty_connections( uids, connections = self.connections_counter[host] if connections == self.empty_connections_threshold: - evidence_type = 'EmptyConnections' - attacker_direction = 'srcip' - attacker = profileid.split('_')[0] - threat_level = 'medium' - category = 'Anomaly.Connection' - confidence = 1 - description = f'multiple empty HTTP connections to {host}' - self.db.setEvidence(evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - profileid=profileid, - twid=twid, - uid=uids, - victim=host) + threat_level: ThreatLevel = ThreatLevel.MEDIUM + confidence: float = 1 + saddr: str = profileid.split('_')[-1] + description: str = f'Multiple empty HTTP connections to {host}' + + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.EMPTY_CONNECTIONS, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=uids, + timestamp=timestamp, + category=IDEACategory.ANOMALY_CONNECTION, + ) + + self.db.set_evidence(evidence) # reset the counter self.connections_counter[host] = ([], 0) return True return False def set_evidence_incompatible_user_agent( - self, host, uri, vendor, user_agent, timestamp, profileid, twid, uid + self, + host, + uri, + vendor, + user_agent, + timestamp, + profileid, + twid, + uid: str ): - attacker_direction = 'srcip' - source_target_tag = 'IncompatibleUserAgent' - attacker = profileid.split('_')[1] - evidence_type = 'IncompatibleUserAgent' - threat_level = 'high' - category = 'Anomaly.Behaviour' - confidence = 1 - os_type = user_agent.get('os_type', '').lower() - os_name = user_agent.get('os_name', '').lower() - browser = user_agent.get('browser', '').lower() - user_agent = user_agent.get('user_agent', '') - victim = f'{host}{uri}' - description = ( - f'using incompatible user-agent ({user_agent}) that belongs to OS: {os_name} ' + threat_level: ThreatLevel = ThreatLevel.HIGH + saddr = profileid.split('_')[1] + confidence: float = 1 + + os_type: str = user_agent.get('os_type', '').lower() + os_name: str = user_agent.get('os_name', '').lower() + browser: str = user_agent.get('browser', '').lower() + user_agent: str = user_agent.get('user_agent', '') + description: str = ( + f'using incompatible user-agent ({user_agent}) ' + f'that belongs to OS: {os_name} ' f'type: {os_type} browser: {browser}. ' - f'while connecting to {victim}. ' + f'while connecting to {host}{uri}. ' f'IP has MAC vendor: {vendor.capitalize()}' ) - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, source_target_tag=source_target_tag, profileid=profileid, - twid=twid, uid=uid, victim=victim) - - def report_executable_mime_type(self, mime_type, attacker, profileid, twid, uid, timestamp): - confidence = 1 - threat_level = 'low' - source_target_tag = 'ExecutableMIMEType' - category = 'Anomaly.File' - evidence_type = 'ExecutableMIMEType' - attacker_direction = 'dstip' - srcip = profileid.split('_')[1] - ip_identification = self.db.get_ip_identification(attacker) - description = f'download of an executable with mime type: {mime_type} ' \ - f'by {srcip} from {attacker} {ip_identification}.' - - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, source_target_tag=source_target_tag, profileid=profileid, - twid=twid, uid=uid) + + attacker: Attacker = Attacker( + direction= Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.INCOMPATIBLE_USER_AGENT, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.ANOMALY_BEHAVIOUR, + ) + + self.db.set_evidence(evidence) + + + def set_evidence_executable_mime_type( + self, + mime_type: str, + profileid: str, + twid: str, + uid: str, + timestamp: str, + daddr: str + ): + confidence: float = 1 + threat_level: ThreatLevel = ThreatLevel.LOW + saddr: str = profileid.split('_')[1] + + attacker_obj: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = ( + f'Download of an executable with MIME type: {mime_type} ' + f'by {saddr} from {daddr} {ip_identification}.' + ) + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.EXECUTABLE_MIME_TYPE, + attacker=attacker_obj, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.ANOMALY_FILE, + source_target_tag=Tag.EXECUTABLE_MIME_TYPE + ) + + self.db.set_evidence(evidence) + def check_incompatible_user_agent( - self, host, uri, timestamp, profileid, twid, uid + self, host, uri, timestamp, profileid, twid, uid: str ): """ - Compare the user agent of this profile to the MAC vendor and check incompatibility + Compare the user agent of this profile to the MAC vendor + and check incompatibility """ # get the mac vendor vendor: Union[str, None] = self.db.get_mac_vendor_from_profile( @@ -220,8 +327,10 @@ def check_incompatible_user_agent( for keyword in tuple_: if keyword in vendor: # this means this computer belongs to this org - # create a copy of the os_keywords list without the correct org - # FOR EXAMPLE if the mac vendor is apple, the os_keyword should be + # create a copy of the os_keywords list + # without the correct org + # FOR EXAMPLE if the mac vendor is apple, + # the os_keyword should be # [('microsoft', 'windows', 'NT'), ('android'), ('linux')] os_keywords.pop(os_keywords.index(tuple_)) found_vendor_tuple = True @@ -239,7 +348,8 @@ def check_incompatible_user_agent( for keyword in tuple_: if keyword in f'{os_name} {os_type}': # from the same example, - # this means that one of these keywords [('microsoft', 'windows', 'NT'), ('android'), ('linux')] + # this means that one of these keywords + # [('microsoft', 'windows', 'NT'), ('android'), ('linux')] # is found in the UA that belongs to an apple device self.set_evidence_incompatible_user_agent( host, @@ -256,7 +366,8 @@ def check_incompatible_user_agent( def get_ua_info_online(self, user_agent): """ - Get OS and browser info about a use agent from an online database http://useragentstring.com + Get OS and browser info about a use agent from an online database + http://useragentstring.com """ url = 'http://useragentstring.com/' params = { @@ -269,15 +380,19 @@ def get_ua_info_online(self, user_agent): response = requests.get(url, params=params, timeout=5) if response.status_code != 200 or not response.text: raise requests.exceptions.ConnectionError - except requests.exceptions.ConnectionError: + except (requests.exceptions.ConnectionError, + requests.exceptions.ReadTimeout): return False # returns the following - # {"agent_type":"Browser","agent_name":"Internet Explorer","agent_version":"8.0", - # "os_type":"Windows","os_name":"Windows 7","os_versionName":"","os_versionNumber":"", - # "os_producer":"","os_producerURL":"","linux_distibution":"Null","agent_language":"","agent_languageTag":""} + # {"agent_type":"Browser","agent_name":"Internet Explorer", + # "agent_version":"8.0", "os_type":"Windows","os_name":"Windows 7", + # "os_versionName":"","os_versionNumber":"", + # "os_producer":"","os_producerURL":"","linux_distibution" + # :"Null","agent_language":"","agent_languageTag":""} try: - # responses from this domain are broken for now. so this is a temp fix until they fix it from their side + # responses from this domain are broken for now. so this + # is a temp fix until they fix it from their side json_response = json.loads(response.text) except json.decoder.JSONDecodeError: # unexpected server response @@ -295,7 +410,8 @@ def get_user_agent_info(self, user_agent: str, profileid: str): # keep a history of the past user agents self.db.add_all_user_agent_to_profile(profileid, user_agent) - # don't make a request again if we already have a user agent associated with this profile + # don't make a request again if we already have a + # user agent associated with this profile if self.db.get_user_agent_from_profile(profileid) is not None: # this profile already has a user agent return False @@ -307,8 +423,9 @@ def get_user_agent_info(self, user_agent: str, profileid: str): } if ua_info := self.get_ua_info_online(user_agent): - # the above website returns unknown if it has no info about this UA, - # remove the 'unknown' from the string before storing in the db + # the above website returns unknown if it has + # no info about this UA, remove the 'unknown' from the string + # before storing in the db os_type = ( ua_info.get('os_type', '') .replace('unknown', '') @@ -338,8 +455,8 @@ def get_user_agent_info(self, user_agent: str, profileid: str): def extract_info_from_UA(self, user_agent, profileid): """ - Zeek sometimes collects info about a specific UA, in this case the UA starts with - 'server-bag' + Zeek sometimes collects info about a specific UA, + in this case the UA starts with 'server-bag' """ if self.db.get_user_agent_from_profile(profileid) is not None: # this profile already has a user agent @@ -367,14 +484,13 @@ def extract_info_from_UA(self, user_agent, profileid): return UA_info def check_multiple_UAs( - self, - cached_ua: dict, - user_agent: dict, - timestamp, - profileid, - twid, - uid, - ): + self, + cached_ua: dict, + user_agent: dict, + timestamp, + profileid, + twid, + uid: str): """ Detect if the user is using an Apple UA, then android, then linux etc. Doesn't check multiple ssh clients @@ -383,61 +499,93 @@ def check_multiple_UAs( """ if not cached_ua or not user_agent: return False + os_type = cached_ua['os_type'] os_name = cached_ua['os_name'] - # todo now the first UA seen is considered the only valid one and slips - # will setevidence everytime another one is used, is that correct? + for keyword in (os_type, os_name): # loop through each word in UA if keyword in user_agent: - # for example if the os of the cached UA is Linux and the current UA - # is Mozilla/5.0 (X11; Fedora;Linux x86; rv:60.0) - # we will find the keyword 'Linux' in both UAs, so we shouldn't alert + # for example if the os of the cached UA is + # Linux and the current UA is Mozilla/5.0 (X11; + # Fedora;Linux x86; rv:60.0) we will find the keyword + # 'Linux' in both UAs, so we shouldn't alert return False - attacker_direction = 'srcip' - source_target_tag = 'MultipleUserAgent' - attacker = profileid.split('_')[1] - evidence_type = 'MultipleUserAgent' - threat_level = 'info' - category = 'Anomaly.Behaviour' - confidence = 1 - ua = cached_ua.get('user_agent', '') - description = ( - f'using multiple user-agents: "{ua}" then "{user_agent}"' + threat_level: ThreatLevel = ThreatLevel.INFO + confidence: float = 1 + saddr: str = profileid.split('_')[1] + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + ua: str = cached_ua.get('user_agent', '') + description: str = (f'Using multiple user-agents:' + f' "{ua}" then "{user_agent}"') + + evidence: Evidence = Evidence( + evidence_type=EvidenceType.MULTIPLE_USER_AGENT, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.ANOMALY_BEHAVIOUR, + source_target_tag=Tag.MULTIPLE_USER_AGENT ) - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, source_target_tag=source_target_tag, profileid=profileid, - twid=twid, uid=uid) + + self.db.set_evidence(evidence) + return True - def set_evidence_http_traffic(self, daddr, profileid, twid, uid, timestamp): - """ - Detect when a new HTTP flow is found stating that the traffic is unencrypted - """ - confidence = 1 - threat_level = 'low' - source_target_tag = 'SendingUnencryptedData' - category = 'Anomaly.Traffic' - evidence_type = 'HTTPtraffic' - attacker_direction = 'srcip' - attacker = daddr + def set_evidence_http_traffic( + self, + daddr: str, + profileid: str, + twid: str, + uid: str, + timestamp: str + ): + confidence: float = 1 + threat_level: ThreatLevel = ThreatLevel.LOW saddr = profileid.split('_')[-1] + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=daddr + ) description = f'Unencrypted HTTP traffic from {saddr} to {daddr}.' - self.db.setEvidence(evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - source_target_tag=source_target_tag, - profileid=profileid, - twid=twid, - uid=uid) + evidence: Evidence = Evidence( + evidence_type=EvidenceType.HTTP_TRAFFIC, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.ANOMALY_TRAFFIC, + source_target_tag=Tag.SENDING_UNENCRYPTED_DATA, + victim=victim + ) + + self.db.set_evidence(evidence) + return True @@ -460,20 +608,34 @@ def check_pastebin_downloads( if ('pastebin' in ip_identification and response_body_len > self.pastebin_downloads_threshold and method == 'GET'): - attacker_direction = 'dstip' - source_target_tag = 'Malware' - attacker = daddr - evidence_type = 'PastebinDownload' - threat_level = 'info' - category = 'Anomaly.Behaviour' - confidence = 1 + confidence: float = 1 + threat_level: ThreatLevel = ThreatLevel.INFO + saddr = profileid.split('_')[1] + response_body_len = utils.convert_to_mb(response_body_len) - description = ( - f'A downloaded file from pastebin.com. size: {response_body_len} MBs' + description: str = f'A downloaded file from pastebin.com. ' \ + f'Size: {response_body_len} MBs' + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + evidence: Evidence = Evidence( + evidence_type= EvidenceType.PASTEBIN_DOWNLOAD, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.ANOMALY_BEHAVIOUR, + source_target_tag=Tag.MALWARE ) - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, - description, timestamp, category, source_target_tag=source_target_tag, - profileid=profileid, twid=twid, uid=uid) + + self.db.set_evidence(evidence) return True @@ -501,7 +663,7 @@ def main(self): uid, host, uri, timestamp, user_agent, profileid, twid ) self.check_multiple_empty_connections( - uid, host, timestamp, request_body_len, profileid, twid + uid, host, uri, timestamp, request_body_len, profileid, twid ) # find the UA of this profileid if we don't have it # get the last used ua of this profile @@ -524,7 +686,8 @@ def main(self): and cached_ua.get('user_agent', '') != user_agent and 'server-bag' not in user_agent) ): - # only UAs of type dict are browser UAs, skips str UAs as they are SSH clients + # only UAs of type dict are browser UAs, + # skips str UAs as they are SSH clients self.get_user_agent_info( user_agent, profileid @@ -537,13 +700,13 @@ def main(self): ) if self.detect_executable_mime_types(resp_mime_types): - self.report_executable_mime_type( + self.set_evidence_executable_mime_type( resp_mime_types, - daddr, profileid, twid, uid, - timestamp + timestamp, + daddr ) self.check_incompatible_user_agent( diff --git a/modules/ip_info/ip_info.py b/modules/ip_info/ip_info.py index ca77a5b8a..adefe6390 100644 --- a/modules/ip_info/ip_info.py +++ b/modules/ip_info/ip_info.py @@ -1,6 +1,4 @@ -from slips_files.common.imports import * from modules.ip_info.jarm import JARM -from .asn_info import ASN import platform import sys from typing import Union @@ -16,10 +14,27 @@ import re import time import asyncio -from slips_files.common.slips_utils import utils - -class IPInfo(IModule, multiprocessing.Process): +from slips_files.common.imports import * +from .asn_info import ASN +from slips_files.common.slips_utils import utils +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Attacker, + Proto, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Tag + ) + + +class IPInfo(IModule): # Name: short name of the module. Do not use spaces name = 'IP Info' description = 'Get different info about an IP/MAC address' @@ -490,36 +505,51 @@ def wait_for_dbs(self): def set_evidence_malicious_jarm_hash( self, - flow, - uid, - profileid, - twid, + flow: dict, + twid: str, ): - dport = flow['dport'] - dstip = flow['daddr'] - timestamp = flow['starttime'] - protocol = flow['proto'] - - evidence_type = 'MaliciousJARM' - attacker_direction = 'dstip' - source_target_tag = 'Malware' - attacker = dstip - threat_level = 'medium' + dport: int = flow['dport'] + dstip: str = flow['daddr'] + saddr: str = flow['saddr'] + timestamp: float = flow['starttime'] + protocol: str = flow['proto'] + + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + threat_level = ThreatLevel.MEDIUM confidence = 0.7 - category = 'Anomaly.Traffic' + portproto = f'{dport}/{protocol}' - port_info = self.db.get_port_info(portproto) - port_info = port_info or "" + port_info = self.db.get_port_info(portproto) or "" port_info = f'({port_info.upper()})' if port_info else "" + dstip_id = self.db.get_ip_identification(dstip) description = ( - f"Malicious JARM hash detected for destination IP: {dstip}" - f" on port: {portproto} {port_info}. {dstip_id}" + f"Malicious JARM hash detected for destination IP: {dstip}" + f" on port: {portproto} {port_info}. {dstip_id}" ) - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, source_target_tag=source_target_tag, - port=dport, proto=protocol, profileid=profileid, twid=twid, uid=uid) + evidence = Evidence( + evidence_type=EvidenceType.MALICIOUS_JARM, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[flow['uid']], + timestamp=timestamp, + category=IDEACategory.ANOMALY_TRAFFIC, + proto=Proto(protocol.lower()), + port=dport, + source_target_tag=Tag.MALWARE + ) + + self.db.set_evidence(evidence) + def pre_main(self): utils.drop_root_privs() @@ -598,3 +628,29 @@ def main(self): ip = msg['data'] self.handle_new_ip(ip) + if msg:= self.get_msg('check_jarm_hash'): + # example of a msg + # {'attacker_type': 'ip', + # 'profileid': 'profile_192.168.1.9', 'twid': 'timewindow1', + # 'flow': {'starttime': 1700828217.923668, + # 'uid': 'CuTCcR1Bbp9Je7LVqa', 'saddr': '192.168.1.9', + # 'daddr': '45.33.32.156', 'dur': 0.20363497734069824, + # 'proto': 'tcp', 'appproto': '', 'sport': 50824, 'dport': 443, + # 'spkts': 1, 'dpkts': 1, 'sbytes': 0, 'dbytes': 0, + # 'smac': 'c4:23:60:3d:fd:d3', 'dmac': '50:78:b3:b0:08:ec', + # 'state': 'REJ', 'history': 'Sr', 'type_': 'conn', 'dir_': '->'}, + # 'uid': 'CuTCcR1Bbp9Je7LVqa'} + + msg: dict = json.loads(msg['data']) + flow: dict = msg['flow'] + if msg['attacker_type'] == 'ip': + jarm_hash: str = self.JARM.JARM_hash( + flow['daddr'], + flow['dport'] + ) + + if self.db.is_malicious_jarm(jarm_hash): + self.set_evidence_malicious_jarm_hash( + flow, + msg['twid'] + ) diff --git a/modules/leak_detector/leak_detector.py b/modules/leak_detector/leak_detector.py index 53a23b50f..3471dd6bd 100644 --- a/modules/leak_detector/leak_detector.py +++ b/modules/leak_detector/leak_detector.py @@ -1,4 +1,3 @@ -from slips_files.common.imports import * import sys import base64 import time @@ -8,7 +7,25 @@ import json import shutil -class LeakDetector(IModule, multiprocessing.Process): +from slips_files.common.imports import * +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Victim, + Attacker, + Proto, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Tag + ) + + +class LeakDetector(IModule): # Name: short name of the module. Do not use spaces name = 'Leak Detector' description = 'Detect leaks of data in the traffic' @@ -171,47 +188,53 @@ def set_evidence_yara_match(self, info: dict): uid = base64.b64encode(binascii.b2a_hex(os.urandom(9))).decode( 'utf-8' ) - src_profileid = f'profile_{srcip}' - dst_profileid = f'profile_{dstip}' + profileid = f'profile_{srcip}' # sometimes this module tries to find the profile before it's created. so # wait a while before alerting. time.sleep(4) - # make sure we have a profile for any of the above IPs - if self.db.has_profile(src_profileid): - attacker_direction = 'dstip' - victim = srcip - profileid = src_profileid - attacker = dstip - ip_identification = self.db.get_ip_identification(dstip) - description = f"{rule} to destination address: {dstip} {ip_identification} port: {portproto} {port_info or ''}. Leaked location: {strings_matched}" - - elif self.db.has_profile(dst_profileid): - attacker_direction = 'srcip' - victim = dstip - profileid = dst_profileid - attacker = srcip - ip_identification = self.db.get_ip_identification(srcip) - description = f"{rule} to destination address: {srcip} {ip_identification} port: {portproto} {port_info or ''}. Leaked location: {strings_matched}" - - else: - # no profiles in slips for either IPs - return + + ip_identification = self.db.get_ip_identification(dstip) + description = f"{rule} to destination address: {dstip} " \ + f"{ip_identification} port: {portproto} " \ + f"{port_info or ''}. " \ + f"Leaked location: {strings_matched}" # in which tw is this ts? - twid = self.db.getTWofTime(profileid, ts) + twid = self.db.get_tw_of_ts(profileid, ts) # convert ts to a readable format ts = utils.convert_format(ts, utils.alerts_format) + if twid: twid = twid[0] - source_target_tag = 'CC' - # TODO: this needs to be changed if add more rules to the rules/dir - evidence_type = 'NETWORK_gps_location_leaked' - category = 'Malware' + source_target_tag = Tag.CC confidence = 0.9 - threat_level = 'high' - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, - description, ts, category, source_target_tag=source_target_tag, port=dport, - proto=proto, profileid=profileid, twid=twid, uid=uid, victim=victim) + threat_level = ThreatLevel.HIGH + + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=srcip + ) + + + evidence = Evidence( + evidence_type=EvidenceType.NETWORK_GPS_LOCATION_LEAKED, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=srcip), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=ts, + proto=Proto(proto.lower()), + port=dport, + source_target_tag=source_target_tag, + category=IDEACategory.MALWARE + ) + + self.db.set_evidence(evidence) + def compile_and_save_rules(self): """ diff --git a/modules/network_discovery/horizontal_portscan.py b/modules/network_discovery/horizontal_portscan.py index 402870585..6b73ae772 100644 --- a/modules/network_discovery/horizontal_portscan.py +++ b/modules/network_discovery/horizontal_portscan.py @@ -1,6 +1,22 @@ -from slips_files.common.imports import * import ipaddress +from slips_files.common.imports import * +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Victim, + Attacker, + Proto, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Tag + ) + class HorizontalPortscan(): def __init__(self, db): @@ -272,43 +288,41 @@ def decide_if_time_to_set_evidence_or_combine( return False - def set_evidence_horizontal_portscan( - self, - evidence: dict - ): - evidence_type = 'HorizontalPortscan' - attacker_direction = 'srcip' - source_target_tag = 'Recon' + def set_evidence_horizontal_portscan(self, evidence: dict): + threat_level = ThreatLevel.HIGH + confidence = utils.calculate_confidence(evidence["pkts_sent"]) srcip = evidence["profileid"].split('_')[-1] - attacker = srcip - threat_level = 'high' - category = 'Recon.Scanning' + + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=srcip + ) portproto = f'{evidence["dport"]}/{evidence["protocol"]}' - port_info = self.db.get_port_info(portproto) - port_info = port_info or "" - confidence = utils.calculate_confidence(evidence["pkts_sent"]) + port_info = self.db.get_port_info(portproto) or "" description = ( - f'horizontal port scan to port {port_info} {portproto}. ' - f'From {srcip} to {evidence["amount_of_dips"]} unique dst IPs. ' + f'Horizontal port scan to port {port_info} {portproto}. ' + f'From {srcip} to {evidence["amount_of_dips"]} unique destination IPs. ' f'Total packets sent: {evidence["pkts_sent"]}. ' f'Threat Level: {threat_level}. ' f'Confidence: {confidence}. by Slips' ) - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - evidence["timestamp"], - category, - source_target_tag=source_target_tag, + evidence = Evidence( + evidence_type=EvidenceType.HORIZONTAL_PORT_SCAN, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=srcip), + timewindow=TimeWindow(number=int(evidence["twid"].replace("timewindow", ""))), + uid=evidence["uids"], + timestamp=evidence["timestamp"], + category=IDEACategory.RECON_SCANNING, conn_count=evidence["pkts_sent"], - port=evidence["dport"], - proto=evidence["protocol"], - profileid=evidence["profileid"], - twid=evidence["twid"], - uid=evidence["uids"] - ) + proto=Proto(evidence["protocol"].lower()), + source_target_tag=Tag.RECON, + port=evidence["dport"] + ) + + self.db.set_evidence(evidence) diff --git a/modules/network_discovery/network_discovery.py b/modules/network_discovery/network_discovery.py index d5f76f9e0..bcdebf0da 100644 --- a/modules/network_discovery/network_discovery.py +++ b/modules/network_discovery/network_discovery.py @@ -1,11 +1,27 @@ -from slips_files.common.imports import * import json +from typing import List + +from slips_files.common.imports import * from modules.network_discovery.horizontal_portscan import HorizontalPortscan from modules.network_discovery.vertical_portscan import VerticalPortscan - - - -class NetworkDiscovery(IModule, multiprocessing.Process): +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Victim, + Attacker, + Proto, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Tag + ) + + +class NetworkDiscovery(IModule): """ A class process to find port scans This should be converted into a module that wakesup alone when a new alert arrives @@ -17,9 +33,6 @@ class NetworkDiscovery(IModule, multiprocessing.Process): def init(self): self.horizontal_ps = HorizontalPortscan(self.db) self.vertical_ps = VerticalPortscan(self.db) - # Get from the database the separator used to separate the IP and the word profile - self.fieldseparator = self.db.get_field_separator() - # To which channels do you wnat to subscribe? When a message arrives on the channel the module will wakeup self.c1 = self.db.subscribe('tw_modified') self.c2 = self.db.subscribe('new_notice') self.c3 = self.db.subscribe('new_dhcp') @@ -48,18 +61,26 @@ def shutdown_gracefully(self): self.horizontal_ps.combine_evidence() self.vertical_ps.combine_evidence() - def check_icmp_sweep(self, msg, note, profileid, uid, twid, timestamp): + def check_icmp_sweep( + self, + msg: str, + note: str, + profileid: str, + uid: str, + twid: str, + timestamp: str + ): """ - Use our own Zeek scripts to detect ICMP scans. - Threshold is on the scrips and it is 25 icmp flows + Use our own Zeek scripts to detect ICMP scans. + Threshold is on the scripts and it is 25 ICMP flows """ if 'TimestampScan' in note: - evidence_type = 'ICMP-Timestamp-Scan' + evidence_type = EvidenceType.ICMP_TIMESTAMP_SCAN elif 'ICMPAddressScan' in note: - evidence_type = 'ICMP-AddressScan' + evidence_type = EvidenceType.ICMP_ADDRESS_SCAN elif 'AddressMaskScan' in note: - evidence_type = 'ICMP-AddressMaskScan' + evidence_type = EvidenceType.ICMP_ADDRESS_MASK_SCAN else: # unsupported notice type return False @@ -67,32 +88,52 @@ def check_icmp_sweep(self, msg, note, profileid, uid, twid, timestamp): hosts_scanned = int(msg.split('on ')[1].split(' hosts')[0]) # get the confidence from 0 to 1 based on the number of hosts scanned confidence = 1 / (255 - 5) * (hosts_scanned - 255) + 1 - threat_level = 'medium' - category = 'Recon.Scanning' - attacker_direction = 'srcip' - # this is the last dip scanned - attacker = profileid.split('_')[1] - source_target_tag = 'Recon' - description = msg - # this one is detected by zeek so we can't track the uids causing it - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, source_target_tag=source_target_tag, conn_count=hosts_scanned, - profileid=profileid, twid=twid, uid=uid) + threat_level = ThreatLevel.MEDIUM + saddr = profileid.split('_')[1] + + # this is the last IP scanned + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + + # this one is detected by Zeek, so we can't track the UIDs causing it + evidence = Evidence( + evidence_type=evidence_type, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=msg, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.RECON_SCANNING, + conn_count=hosts_scanned, + source_target_tag=Tag.RECON + ) + + self.db.set_evidence(evidence) + def check_portscan_type3(self): """ ### # PortScan Type 3. Direction OUT - # Considering all the flows in this TW, for all the Dst IP, get the sum of all the pkts send to each dst port TCP No tEstablished + # Considering all the flows in this TW, for all the Dst IP, get the + sum of all the pkts send to each dst port TCP No tEstablished totalpkts = int(data[dport]['totalpkt']) - # If for each port, more than X amount of packets were sent, report an evidence + # If for each port, more than X amount of packets were sent, + report an evidence if totalpkts > 3: # Type of evidence evidence_type = 'PortScanType3' # Key key = 'dport' + ':' + dport + ':' + evidence_type # Description - description = 'Too Many Not Estab TCP to same port {} from IP: {}. Amount: {}'.format(dport, profileid.split('_')[1], totalpkts) + description = 'Too Many Not Estab TCP to same port {} from IP: {}. + Amount: {}'.format(dport, profileid.split('_')[1], totalpkts) # Threat level threat_level = 50 # Confidence. By counting how much we are over the threshold. @@ -102,18 +143,20 @@ def check_portscan_type3(self): else: # Between 3 and 10 pkts compute a kind of linear grow confidence = totalpkts / 10.0 - self.db.setEvidence(profileid, twid, evidence_type, threat_level, confidence) - self.print('Too Many Not Estab TCP to same port {} from IP: {}. Amount: {}'.format(dport, profileid.split('_')[1], totalpkts),6,0) + self.db.setEvidence(profileid, twid, evidence_type, + threat_level, confidence) + self.print('Too Many Not Estab TCP to same port {} from IP: {}. + Amount: {}'.format(dport, profileid.split('_')[1], totalpkts),6,0) """ def check_icmp_scan(self, profileid, twid): # Map the ICMP port scanned to it's attack port_map = { - '0x0008': 'AddressScan', - '0x0013': 'TimestampScan', - '0x0014': 'TimestampScan', - '0x0017': 'AddressMaskScan', - '0x0018': 'AddressMaskScan', + '0x0008': EvidenceType.ICMP_ADDRESS_SCAN, + '0x0013': EvidenceType.ICMP_TIMESTAMP_SCAN, + '0x0014': EvidenceType.ICMP_TIMESTAMP_SCAN, + '0x0017': EvidenceType.ICMP_ADDRESS_MASK_SCAN, + '0x0018': EvidenceType.ICMP_ADDRESS_MASK_SCAN, } direction = 'Src' @@ -126,7 +169,7 @@ def check_icmp_scan(self, profileid, twid): ) for sport, sport_info in sports.items(): # get the name of this attack - attack = port_map.get(sport) + attack: EvidenceType = port_map.get(sport) if not attack: return @@ -142,7 +185,8 @@ def check_icmp_scan(self, profileid, twid): number_of_flows = len(icmp_flows_uids) # how many flows are responsible for this attack # (from this srcip to this dstip on the same port) - cache_key = f'{profileid}:{twid}:dstip:{scanned_ip}:{sport}:{attack}' + cache_key = f'{profileid}:{twid}:dstip:' \ + f'{scanned_ip}:{sport}:{attack}' prev_flows = self.cache_det_thresholds.get(cache_key, 0) # We detect a scan every Threshold. So we detect when there @@ -176,8 +220,8 @@ def check_icmp_scan(self, profileid, twid): prev_scanned_ips = self.cache_det_thresholds.get(cache_key, 0) # detect every 5, 10, 15 scanned IPs if ( - amount_of_scanned_ips % self.pingscan_minimum_scanned_ips == 0 - and prev_scanned_ips < amount_of_scanned_ips + amount_of_scanned_ips % self.pingscan_minimum_scanned_ips == 0 + and prev_scanned_ips < amount_of_scanned_ips ): pkts_sent = 0 @@ -204,45 +248,70 @@ def check_icmp_scan(self, profileid, twid): def set_evidence_icmpscan( self, - number_of_scanned_ips, - timestamp, - pkts_sent, - protocol, - profileid, - twid, - icmp_flows_uids, - attack, - scanned_ip=False + number_of_scanned_ips: int, + timestamp: str, + pkts_sent: int, + protocol: str, + profileid: str, + twid: str, + icmp_flows_uids: List[str], + attack: EvidenceType, + scanned_ip: str=False ): confidence = utils.calculate_confidence(pkts_sent) - attacker_direction = 'srcip' - evidence_type = attack - source_target_tag = 'Recon' - threat_level = 'medium' - category = 'Recon.Scanning' + + threat_level = ThreatLevel.MEDIUM srcip = profileid.split('_')[-1] - attacker = srcip + victim = None if number_of_scanned_ips == 1: description = ( f'ICMP scanning {scanned_ip} ICMP scan type: {attack}. ' - f'Total packets sent: {pkts_sent} over {len(icmp_flows_uids)} flows. ' + f'Total packets sent: {pkts_sent} over ' + f'{len(icmp_flows_uids)} flows. ' f'Confidence: {confidence}. by Slips' ) - victim = scanned_ip + if scanned_ip: + victim = Victim( + value=scanned_ip, + direction=Direction.DST, # TODO is it? + victim_type=IoCType.IP, + ) else: + # not a single victim, there are many description = ( - f'ICMP scanning {number_of_scanned_ips} different IPs. ICMP scan type: {attack}. ' - f'Total packets sent: {pkts_sent} over {len(icmp_flows_uids)} flows. ' + f'ICMP scanning {number_of_scanned_ips} different IPs.' + f' ICMP scan type: {attack}. ' + f'Total packets sent: {pkts_sent} over ' + f'{len(icmp_flows_uids)} flows. ' f'Confidence: {confidence}. by Slips' ) - # not a single victim, there are many - victim = '' - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, source_target_tag=source_target_tag, conn_count=pkts_sent, - proto=protocol, profileid=profileid, twid=twid, uid=icmp_flows_uids, victim=victim) + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=srcip + ) + + evidence = Evidence( + evidence_type=attack, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=srcip), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=icmp_flows_uids, + timestamp=timestamp, + category=IDEACategory.RECON_SCANNING, + conn_count=pkts_sent, + proto=Proto(protocol.lower()), + source_target_tag=Tag.RECON, + victim=victim + ) + + self.db.set_evidence(evidence) def set_evidence_dhcp_scan( self, @@ -252,23 +321,38 @@ def set_evidence_dhcp_scan( uids, number_of_requested_addrs ): - evidence_type = 'DHCPScan' - attacker_direction = 'srcip' - source_target_tag = 'Recon' - srcip = profileid.split('_')[-1] - attacker = srcip - threat_level = 'medium' - category = 'Recon.Scanning' + threat_level = ThreatLevel.MEDIUM confidence = 0.8 + srcip = profileid.split('_')[-1] + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=srcip + ) description = ( - f'Performing a DHCP scan by requesting {number_of_requested_addrs} different IP addresses. ' + f'Performing a DHCP scan by requesting ' + f'{number_of_requested_addrs} different IP addresses. ' f'Threat Level: {threat_level}. ' f'Confidence: {confidence}. by Slips' ) - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, source_target_tag=source_target_tag, - conn_count=number_of_requested_addrs, profileid=profileid, twid=twid, uid=uids) + evidence = Evidence( + evidence_type=EvidenceType.DHCP_SCAN, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=srcip), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=uids, + timestamp=timestamp, + category=IDEACategory.RECON_SCANNING, + conn_count=number_of_requested_addrs, + source_target_tag=Tag.RECON + ) + + self.db.set_evidence(evidence) + def check_dhcp_scan(self, flow_info): diff --git a/modules/network_discovery/vertical_portscan.py b/modules/network_discovery/vertical_portscan.py index 70240df84..1ac50b9e5 100644 --- a/modules/network_discovery/vertical_portscan.py +++ b/modules/network_discovery/vertical_portscan.py @@ -1,5 +1,19 @@ from slips_files.common.slips_utils import utils - +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Attacker, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Victim, + Proto, + Tag + ) class VerticalPortscan: """ @@ -90,38 +104,52 @@ def combine_evidence(self): # reset the dict since we already combined self.pending_vertical_ps_evidence = {} + def set_evidence_vertical_portscan(self, evidence: dict): """Sets the vertical portscan evidence in the db""" - attacker_direction = 'srcip' - threat_level = 'high' - category = 'Recon.Scanning' - srcip = evidence['profileid'].split('_')[-1] + threat_level = ThreatLevel.HIGH + saddr = evidence['profileid'].split('_')[-1] confidence = utils.calculate_confidence(evidence['pkts_sent']) description = ( - f'new vertical port scan to IP {evidence["dstip"]} from {srcip}. ' + f'new vertical port scan to IP {evidence["dstip"]} from {saddr}. ' f'Total {evidence["amount_of_dports"]} ' f'dst {evidence["protocol"]} ports ' f'were scanned. ' f'Total packets sent to all ports: {evidence["pkts_sent"]}. ' f'Confidence: {confidence}. by Slips' ) - self.db.setEvidence( - 'VerticalPortscan', - attacker_direction, - srcip, - threat_level, - confidence, - description, - evidence['timestamp'], - category, - source_target_tag='Recon', - conn_count=evidence['pkts_sent'], - proto=evidence['protocol'], - profileid=evidence['profileid'], - twid=evidence['twid'], - uid=evidence['uid'], - victim=evidence['dstip'] + + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value=evidence['dstip'] ) + twid = int(evidence['twid'].replace("timewindow", "")) + evidence = Evidence( + evidence_type=EvidenceType.VERTICAL_PORT_SCAN, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=twid), + uid=evidence['uid'], + timestamp=evidence['timestamp'], + category=IDEACategory.RECON_SCANNING, + conn_count=evidence['pkts_sent'], + proto=Proto(evidence['protocol'].lower()), + source_target_tag=Tag.RECON, + victim=victim + ) + + self.db.set_evidence(evidence) + + def decide_if_time_to_set_evidence_or_combine( self, diff --git a/modules/p2ptrust/p2ptrust.py b/modules/p2ptrust/p2ptrust.py index 3e44f4639..4c0ea7e38 100644 --- a/modules/p2ptrust/p2ptrust.py +++ b/modules/p2ptrust/p2ptrust.py @@ -1,9 +1,4 @@ -import modules.p2ptrust.trust.base_model as reputation_model -import modules.p2ptrust.trust.trustdb as trustdb -import modules.p2ptrust.utils.utils as p2p_utils -from modules.p2ptrust.utils.go_director import GoDirector -from slips_files.common.imports import * import threading import os import shutil @@ -16,6 +11,29 @@ import sys import socket + +from slips_files.common.imports import * +import modules.p2ptrust.trust.base_model as reputation_model +import modules.p2ptrust.trust.trustdb as trustdb +import modules.p2ptrust.utils.utils as p2p_utils +from modules.p2ptrust.utils.go_director import GoDirector +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Victim, + Attacker, + Proto, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Tag + ) + + def validate_slips_data(message_data: str) -> (str, int): """ Check that message received from p2p_data_request channel has correct @@ -49,7 +67,7 @@ def validate_slips_data(message_data: str) -> (str, int): return None -class Trust(IModule, multiprocessing.Process): +class Trust(IModule): name = 'P2P Trust' description = 'Enables sharing detection data with other Slips instances' authors = ['Dita', 'Alya Gomaa'] @@ -417,41 +435,53 @@ def set_evidence_malicious_ip(self, :param confidence: how confident the network opinion is about this opinion """ - attacker = ip_info.get('ip') + attacker_ip: str = ip_info.get('ip') ip_state = ip_info.get('ip_state') - # proto = ip_info.get('proto', '').upper() uid = ip_info.get('uid') profileid = ip_info.get('profileid') twid = ip_info.get('twid') timestamp = str(ip_info.get('stime')) + saddr = profileid.split("_")[-1] - attacker_direction = ip_state - evidence_type = 'Malicious-IP-from-P2P-network' - - category = 'Anomaly.Traffic' - # dns_resolution = self.db.get_dns_resolution(ip) - # dns_resolution = dns_resolution.get('domains', []) - # dns_resolution = f' ({dns_resolution[0:3]}), ' if dns_resolution else '' - victim = profileid.split("_")[-1] + category = IDEACategory.ANOMALY_TRAFFIC + ip_identification = self.db.get_ip_identification(attacker_ip) if 'src' in ip_state: - direction = 'from' - # we'll be using this to make the description more clear - other_direction = 'to' + description = ( + f'Connection from blacklisted IP {attacker_ip} ' + f'({ip_identification}) to {saddr} Source: Slips P2P network.' + ) + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=attacker_ip + ) else: - direction = 'to' - other_direction = 'from' - - ip_identification = self.db.get_ip_identification(attacker) - description = ( - f'connection {direction} blacklisted IP {attacker} ({ip_identification}) ' - f'{other_direction} {profileid.split("_")[-1]}' - f' Source: Slips P2P network.' - ) + description = ( + f'Connection to blacklisted IP {attacker_ip} ' + f'({ip_identification}) ' + f'from {saddr} Source: Slips P2P network.' + ) + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, profileid=profileid, twid=twid, uid=uid, victim=victim) + evidence = Evidence( + evidence_type= EvidenceType.MALICIOUS_IP_FROM_P2P_NETWORK, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=attacker.value), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=category, + ) + self.db.set_evidence(evidence) # add this ip to our MaliciousIPs hash in the database self.db.set_malicious_ip(attacker, profileid, twid) diff --git a/modules/p2ptrust/utils/go_director.py b/modules/p2ptrust/utils/go_director.py index fcab41b67..f15a8703e 100644 --- a/modules/p2ptrust/utils/go_director.py +++ b/modules/p2ptrust/utils/go_director.py @@ -2,9 +2,11 @@ import binascii import json from typing import Dict +import time + + from slips_files.common.abstracts.observer import IObservable from slips_files.core.output import Output - from modules.p2ptrust.utils.utils import ( validate_ip_address, validate_timestamp, @@ -14,7 +16,20 @@ ) from modules.p2ptrust.trust.trustdb import TrustDB from slips_files.common.imports import * -import time +from slips_files.core.evidence_structure.evidence import ( + Evidence, + ProfileID, + TimeWindow, + Attacker, + ThreatLevel, + Victim, + EvidenceType, + IoCType, + Direction, + IDEACategory, + ) + + class GoDirector(IObservable): @@ -434,11 +449,13 @@ def process_evaluation_score_confidence( reporter, key_type, key, score, confidence, report_time ) result = ( - f'Data processing ok: reporter {reporter}, report time {report_time}, key {key} ({key_type}), ' + f'Data processing ok: reporter {reporter}, report time ' + f'{report_time}, key {key} ({key_type}), ' f'score {score}, confidence {confidence}' ) self.print(result, 2, 0) - # print(f"*** [debugging p2p] *** stored a report about about {key} from {reporter} in p2p_reports key in the db ") + # print(f"*** [debugging p2p] *** stored a report about about + # {key} from {reporter} in p2p_reports key in the db ") # save all report info in the db # convert ts to human readable format report_info = { @@ -452,55 +469,71 @@ def process_evaluation_score_confidence( # with the width from slips.conf and the starttime as the report time if key_type == 'ip': profileid_of_attacker = f'profile_{key}' - self.db.addProfile(profileid_of_attacker, report_time, self.width) - self.set_evidence_p2p_report(key, reporter, score, confidence, report_time, profileid_of_attacker) - - def set_evidence_p2p_report(self, ip, reporter, score, confidence, timestamp, profileid_of_attacker): + self.db.add_profile(profileid_of_attacker, report_time, self.width) + self.set_evidence_p2p_report(key, reporter, score, + confidence, + report_time, + profileid_of_attacker) + + def set_evidence_p2p_report( + self: str, + ip: str, + reporter: str, + score: float, + confidence: float, + timestamp: str, + profileid_of_attacker: str + ): """ - set evidence for the newly created attacker profile stating that it attacked another peer + set evidence for the newly created attacker + profile stating that it attacked another peer """ - attacker_direction = 'srcip' - attacker = ip - evidence_type = 'P2PReport' + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=ip + ) threat_level = utils.threat_level_to_string(score) - category = 'Anomaly.Connection' # confidence depends on how long the connection # scale the confidence from 0 to 1, 1 means 24 hours long ip_identification = self.db.get_ip_identification(ip, get_ti_data=False) last_update_time, reporter_ip = self.trustdb.get_ip_of_peer(reporter) - # this should never happen. if we have a report, we will have a reporter - # and will have the ip of the reporter + # this should never happen. if we have a report, + # we will have a reporter and will have the ip of the reporter # but just in case if not reporter_ip: reporter_ip = '' - description = f'attacking another peer: {reporter_ip} ({reporter}). threat level: {threat_level} ' \ + description = f'attacking another peer: {reporter_ip} ' \ + f'({reporter}). threat level: {threat_level} ' \ f'confidence: {confidence} {ip_identification}' + # get the tw of this report time - if twid := self.db.getTWofTime(profileid_of_attacker, timestamp): + if twid := self.db.get_tw_of_ts(profileid_of_attacker, timestamp): twid = twid[0] else: # create a new twid for the attacker profile that has the # report time to add this evidence to twid = self.db.get_timewindow(timestamp, profileid_of_attacker) - uid = '' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - profileid=profileid_of_attacker, - twid=twid, - uid=uid, - victim=reporter_ip - ) + timestamp = utils.convert_format(timestamp, utils.alerts_format) + evidence = Evidence( + evidence_type=EvidenceType.P2P_REPORT, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=ip), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[''], + timestamp=timestamp, + category=IDEACategory.ANOMALY_CONNECTION, + ) + + self.db.set_evidence(evidence) + def process_go_update(self, data: dict) -> None: diff --git a/modules/progress_bar/__init__.py b/modules/progress_bar/__init__.py new file mode 100644 index 000000000..a0162f7b2 --- /dev/null +++ b/modules/progress_bar/__init__.py @@ -0,0 +1,2 @@ +# This file is part of Viper - https://github.com/botherder/viper +# See the file 'LICENSE' for copying permission. diff --git a/modules/progress_bar/progress_bar.py b/modules/progress_bar/progress_bar.py new file mode 100644 index 000000000..405c6c102 --- /dev/null +++ b/modules/progress_bar/progress_bar.py @@ -0,0 +1,152 @@ +from multiprocessing.connection import Connection +from multiprocessing import Event +from tqdm.auto import tqdm +import sys + +from slips_files.common.abstracts._module import IModule + + +class PBar(IModule): + """ + Here's why this class is run in a separate process + we need all modules to have access to the pbar. + so for example, profile is the one always initializing the pbar, + when this class isn't run as a proc, profiler would be the only proc + that "knows" about the pbar + because it initialized it right? + now when any txt is sent to be printed by the output proc by anyone + other than the profiler + the output proc would print it on top of the pbar! and we'd get duplicate + bars! + + the solution to this is to make the pbar a separate proc + whenever it's supported, the output.py will forward all txt to be printed + to this class, and this class would handle the printing nicely + so that nothing will overlap with the pbar + once the pbar is done, this proc sets the has_pbar shared var to Flase + and output.py would know about it and print txt normally + """ + name = 'Progress Bar' + description = 'Shows a pbar of processed flows' + authors = ['Alya Gomaa'] + + def init( + self, + stdout: str = None, + pipe: Connection = None, + slips_mode: str = None, + pbar_finished: Event = None + ): + self.stdout: str = stdout + self.slips_mode: str = slips_mode + self.pipe = pipe + self.done_reading_flows = False + self.pbar_finished: Event = pbar_finished + + + + def remove_stats(self): + # remove the stats from the progress bar + self.progress_bar.set_postfix_str( + '', + refresh=True + ) + + + def initialize_pbar(self, msg: dict): + """ + initializes the progress bar when slips is runnning on a file or + a zeek dir + ignores pcaps, interface and dirs given to slips if -g is enabled + """ + self.total_flows = int(msg['total_flows']) + # the bar_format arg is to disable ETA and unit display + # dont use ncols so tqdm will adjust the bar size according to the + # terminal size + self.progress_bar = tqdm( + total=self.total_flows, + leave=True, + colour="green", + desc="Flows read", + mininterval=0, # defines how long to wait between each refresh. + unit=' flow', + smoothing=1, + bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} {postfix}", + position=0, + initial=0, #initial value of the flows processed + file=sys.stdout, + ) + + + def update_bar(self): + """ + wrapper for tqdm.update() + adds 1 to the number of flows processed + """ + + if not hasattr(self, 'progress_bar') : + # this module wont have the progress_bar set if it's running + # on pcap or interface + # or if the output is redirected to a file! + return + + if self.slips_mode == 'daemonized': + return + + self.progress_bar.update(1) + if self.progress_bar.n == self.total_flows: + self.terminate() + + def terminate(self): + # remove it from the bar because we'll be + # prining it in a new line + self.remove_stats() + tqdm.write("Profiler is done reading all flows. " + "Slips is now processing them.") + self.pbar_finished.set() + + def print_to_cli(self, msg: dict): + """ + prints using tqdm in order to avoid conflict with the pbar + """ + tqdm.write(msg['txt']) + + + def update_stats(self, msg: dict): + """writes the stats sent in the msg as a pbar postfix""" + self.progress_bar.set_postfix_str( + msg['stats'], + refresh=True + ) + + + def shutdown_gracefully(self): + # to tell output.py to no longer send prints here + self.pbar_finished.set() + + + + def main(self): + """ + keeps receiving events until pbar reaches 100% + """ + has_new_msg = self.pipe.poll(timeout=0.1) + + if has_new_msg: + msg: dict = self.pipe.recv() + + event: str = msg['event'] + if event == "init": + self.initialize_pbar(msg) + + if event == "update_bar": + self.update_bar() + + if event == "update_stats": + self.update_stats(msg) + + if event == "print": + # let tqdm do the printing to avoid conflicts with the pbar + self.print_to_cli(msg) + + diff --git a/modules/riskiq/riskiq.py b/modules/riskiq/riskiq.py index c31a258ef..659d2fca8 100644 --- a/modules/riskiq/riskiq.py +++ b/modules/riskiq/riskiq.py @@ -6,7 +6,7 @@ import requests from requests.auth import HTTPBasicAuth -class RiskIQ(IModule, multiprocessing.Process): +class RiskIQ(IModule): # Name: short name of the module. Do not use spaces name = 'Risk IQ' description = 'Module to get passive DNS info about IPs from RiskIQ' diff --git a/modules/rnn_cc_detection/rnn_cc_detection.py b/modules/rnn_cc_detection/rnn_cc_detection.py index 7c7aff1ea..bebaa6e8f 100644 --- a/modules/rnn_cc_detection/rnn_cc_detection.py +++ b/modules/rnn_cc_detection/rnn_cc_detection.py @@ -1,20 +1,33 @@ # Must imports -from slips_files.common.imports import * import warnings import json -import traceback - -# Your imports import numpy as np -import sys +from typing import Optional from tensorflow.python.keras.models import load_model +from slips_files.common.imports import * +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Attacker, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Victim, + Proto, + Tag + ) + warnings.filterwarnings('ignore', category=FutureWarning) warnings.filterwarnings('ignore', category=DeprecationWarning) -class CCDetection(IModule, multiprocessing.Process): +class CCDetection(IModule): # Name: short name of the module. Do not use spaces name = 'RNN C&C Detection' description = 'Detect C&C channels based on behavioral letters' @@ -26,53 +39,58 @@ def init(self): 'new_letters': self.c1, } - def set_evidence( + + def set_evidence_cc_channel( self, - score, - confidence, - uid, - timestamp, - tupleid='', - profileid='', - twid='', + score: float, + confidence: float, + uid: str, + timestamp: str, + tupleid: str = '', + profileid: str = '', + twid: str = '', ): """ Set an evidence for malicious Tuple """ - tupleid = tupleid.split('-') dstip, port, proto = tupleid[0], tupleid[1], tupleid[2] - attacker_direction = 'srcip' - attacker = dstip - source_target_tag = 'Botnet' - evidence_type = 'Command-and-Control-channels-detection' - threat_level = 'high' - categroy = 'Intrusion.Botnet' - portproto = f'{port}/{proto}' - port_info = self.db.get_port_info(portproto) - ip_identification = self.db.get_ip_identification(dstip) - description = ( + srcip = profileid.split("_")[-1] + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=srcip + ) + + threat_level: ThreatLevel = ThreatLevel.HIGH + portproto: str = f'{port}/{proto}' + port_info: str = self.db.get_port_info(portproto) + ip_identification: str = self.db.get_ip_identification(dstip) + description: str = ( f'C&C channel, destination IP: {dstip} ' f'port: {port_info.upper() if port_info else ""} {portproto} ' f'score: {format(score, ".4f")}. {ip_identification}' ) - victim = profileid.split('_')[-1] - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - categroy, - source_target_tag=source_target_tag, - port=port, - proto=proto, - profileid=profileid, - twid=twid, - uid=uid, - victim= victim) + + timestamp: str = utils.convert_format(timestamp, utils.alerts_format) + evidence: Evidence = Evidence( + evidence_type=EvidenceType.COMMAND_AND_CONTROL_CHANNEL, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=srcip), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=timestamp, + category=IDEACategory.INTRUSION_BOTNET, + source_target_tag=Tag.BOTNET, + port=int(port), + proto=Proto(proto.lower()) if proto else None, + ) + + self.db.set_evidence(evidence) def convert_input_for_module(self, pre_behavioral_model): @@ -170,7 +188,7 @@ def main(self): ) uid = msg['uid'] stime = flow['starttime'] - self.set_evidence( + self.set_evidence_cc_channel( score, confidence, uid, @@ -179,16 +197,14 @@ def main(self): profileid, twid, ) - attacker = tupleid.split('-')[0] - # port = int(tupleid.split('-')[1]) to_send = { - 'attacker': attacker, - 'attacker_type': utils.detect_data_type(attacker), + 'attacker_type': utils.detect_data_type(flow['daddr']), 'profileid' : profileid, 'twid' : twid, 'flow': flow, - 'uid': uid, } + # we only check malicious jarm hashes when there's a CC + # detection self.db.publish('check_jarm_hash', json.dumps(to_send)) """ diff --git a/modules/template/template.py b/modules/template/template.py index 1ecc973f7..e7368bff6 100644 --- a/modules/template/template.py +++ b/modules/template/template.py @@ -14,7 +14,7 @@ from slips_files.common.imports import * -class Template(IModule, multiprocessing.Process): +class Template(IModule): # Name: short name of the module. Do not use spaces name = 'Template' description = 'Template module' diff --git a/modules/threat_intelligence/threat_intelligence.py b/modules/threat_intelligence/threat_intelligence.py index 4060c5c29..b3eeeea92 100644 --- a/modules/threat_intelligence/threat_intelligence.py +++ b/modules/threat_intelligence/threat_intelligence.py @@ -1,8 +1,3 @@ -# Must imports -from slips_files.common.imports import * -from modules.threat_intelligence.urlhaus import URLhaus - -# Your imports import ipaddress import os import json @@ -11,12 +6,32 @@ import requests import threading import time -from slips_files.common.slips_utils import utils +from typing import Dict - -class ThreatIntel(IModule, multiprocessing.Process, URLhaus): +from slips_files.common.slips_utils import utils +from slips_files.common.imports import * +from modules.threat_intelligence.urlhaus import URLhaus +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + ProfileID, + TimeWindow, + Victim, + Attacker, + Proto, + ThreatLevel, + EvidenceType, + IoCType, + Direction, + IDEACategory, + Tag + ) + + +class ThreatIntel(IModule, URLhaus): name = 'Threat Intelligence' - description = 'Check if the source IP or destination IP are in a malicious list of IPs' + description = 'Check if the source IP or destination IP' \ + ' are in a malicious list of IPs' authors = ['Frantisek Strasak, Sebastian Garcia, Alya Gomaa'] def init(self): @@ -92,55 +107,73 @@ def __read_configuration(self): if not os.path.exists(self.path_to_local_ti_files): os.mkdir(self.path_to_local_ti_files) + def set_evidence_malicious_asn( - self, - attacker, - uid, - timestamp, - ip_info, - profileid, - twid, - asn, - asn_info, - ): + self, + attacker: str, + uid: str, + timestamp: str, + profileid: str, + twid: str, + asn: str, + asn_info: dict, + ): """ - :param asn_info: the malicious asn info taken from own_malicious_iocs.csv + :param asn_info: the malicious ASN info taken from own_malicious_iocs.csv """ - attacker_direction = 'srcip' - category = 'Anomaly.Traffic' - evidence_type = 'ThreatIntelligenceBlacklistedASN' - confidence = 0.8 - # when we comment ti_files and run slips, we get the error of not being able to get feed threat_level - threat_level = asn_info.get('threat_level', 'medium') + confidence: float = 0.8 + saddr = profileid.split("_")[-1] + + # when we comment ti_files and run slips, we get the + # error of not being able to get feed threat_level + threat_level: float = utils.threat_levels[ + asn_info.get('threat_level', 'medium') + ] + threat_level: ThreatLevel = ThreatLevel(threat_level) - tags = asn_info.get('tags', False) - source_target_tag = tags.capitalize() if tags else 'BlacklistedASN' - identification = self.db.get_ip_identification(attacker) + tags = asn_info.get('tags', '') + identification: str = self.db.get_ip_identification(attacker) - description = f'Connection to IP: {attacker} with blacklisted ASN: {asn} ' \ - f'Description: {asn_info["description"]}, ' \ - f'Found in feed: {asn_info["source"]}, ' \ - f'Confidence: {confidence}.'\ - f'Tags: {tags} ' \ - f'{identification}' + description: str = ( + f'Connection to IP: {attacker} with blacklisted ASN: {asn} ' + f'Description: {asn_info["description"]}, ' + f'Found in feed: {asn_info["source"]}, ' + f'Confidence: {confidence}. Tags: {tags} {identification}' + ) + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + evidence = Evidence( + evidence_type=EvidenceType.THREAT_INTELLIGENCE_BLACKLISTED_ASN, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=utils.convert_format(timestamp, utils.alerts_format), + category=IDEACategory.ANOMALY_TRAFFIC, + source_target_tag=Tag.BLACKLISTED_ASN, + ) - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, source_target_tag=source_target_tag, profileid=profileid, - twid=twid, uid=uid) + self.db.set_evidence(evidence) def set_evidence_malicious_ip( - self, - ip, - uid, - daddr, - timestamp, - ip_info: dict, - profileid='', - twid='', - ip_state='', - ): + self, + ip: str, + uid: str, + dstip: str, + timestamp: str, + ip_info: dict, + profileid: str = '', + twid: str = '', + ip_state: str = '', + ): """ Set an evidence for a malicious IP met in the timewindow :param ip: the ip source file @@ -152,79 +185,62 @@ def set_evidence_malicious_ip( :param ip_state: is basically the answer to "which one is the blacklisted IP"? can be 'srcip' or 'dstip' """ + threat_level: float = utils.threat_levels[ + ip_info.get('threat_level', 'medium') + ] + threat_level: ThreatLevel = ThreatLevel(threat_level) + confidence: float = 1.0 + srcip = profileid.split("_")[-1] - attacker_direction = ip_state - attacker = ip - evidence_type = 'ThreatIntelligenceBlacklistIP' - - threat_level = ip_info.get('threat_level', 'medium') - - confidence = 1 - category = 'Anomaly.Traffic' if 'src' in ip_state: - direction = 'from' - opposite_dir = 'to' - victim = daddr - attacker_direction = 'srcip' + description: str = f'connection from blacklisted ' \ + f'IP: {ip} to {dstip}. ' + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=ip + ) elif 'dst' in ip_state: - direction = 'to' - opposite_dir = 'from' - victim = profileid.split("_")[-1] - attacker_direction = 'srcip' + if self.is_dns_response: + description: str = f'DNS answer with a blacklisted ' \ + f'IP: {ip} for query: {self.dns_query}' + else: + description: str = f'connection to blacklisted ' \ + f'IP: {ip} from {srcip}. ' + + attacker = Attacker( + direction=Direction.DST, + attacker_type=IoCType.IP, + value=ip + ) else: # ip_state is not specified? return - - # getting the ip identification adds ti description and tags to the returned str - # in this alert, we only want the description and tags of the TI feed that has - # this ip (the one that triggered this alert only), we don't want other descriptions from other TI sources! - # setting it to true results in the following alert - # blacklisted ip description: source: ipsum - ip_identification = self.db.get_ip_identification( + ip_identification: str = self.db.get_ip_identification( ip, get_ti_data=False ).strip() + description += (f'{ip_identification} Description: ' + f'{ip_info["description"]}. ' + f'Source: {ip_info["source"]}.') + + + evidence = Evidence( + evidence_type=EvidenceType.THREAT_INTELLIGENCE_BLACKLISTED_IP, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=attacker.value), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=utils.convert_format(timestamp, utils.alerts_format), + category=IDEACategory.ANOMALY_TRAFFIC, + source_target_tag=Tag.BLACKLISTED_IP, + ) - if self.is_dns_response: - description = ( - f'DNS answer with a blacklisted ip: {ip} ' - f'for query: {self.dns_query} ' - ) - else: - - # this will be 'blacklisted conn from x to y' - # or 'blacklisted conn to x from y' - description = f'connection {direction} blacklisted IP {ip} ' \ - f'{opposite_dir} {victim}. ' - - - description += f'{ip_identification} ' \ - f'Description: {ip_info["description"]}. ' \ - f'Source: {ip_info["source"]}.' + self.db.set_evidence(evidence) - if tags := ip_info.get('tags', False): - if type(tags) == list: - source_target_tag = tags[0].capitalize() - else: - source_target_tag = tags.capitalize() - else: - source_target_tag = 'BlacklistedIP' - - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - source_target_tag=source_target_tag, - profileid=profileid, - twid=twid, - uid=uid, - victim=victim - ) # mark this ip as malicious in our database ip_info = {'threatintelligence': ip_info} @@ -235,63 +251,74 @@ def set_evidence_malicious_ip( def set_evidence_malicious_domain( self, - domain, - uid, - timestamp, + domain: str, + uid: str, + timestamp: str, domain_info: dict, - is_subdomain, - profileid='', - twid='', + is_subdomain: bool, + profileid: str = '', + twid: str = '', ): """ - Set an evidence for malicious domain met in the timewindow + Set an evidence for a malicious domain met in the timewindow :param source_file: is the domain source file - :param domain_info: is all the info we have about this domain in the db source, confidence , description etc... + :param domain_info: is all the info we have about + this domain in the db source, confidence , description etc... """ if not domain_info: return - attacker_direction = 'dstdomain' - attacker = domain - category = 'Anomaly.Traffic' - evidence_type = 'ThreatIntelligenceBlacklistDomain' + srcip = profileid.split("_")[-1] # in case of finding a subdomain in our blacklists - # print that in the description of the alert and change the confidence accordingly - # in case of a domain, confidence=1 - confidence = 0.7 if is_subdomain else 1 + # print that in the description of the alert and change the + # confidence accordingly in case of a domain, confidence=1 + confidence: float = 0.7 if is_subdomain else 1 - # when we comment ti_files and run slips, we get the error of not being able to get feed threat_level - threat_level = domain_info.get('threat_level', 'high') + # when we comment ti_files and run slips, we + # get the error of not being able to get feed threat_level + threat_level: float = utils.threat_levels[ + domain_info.get('threat_level', 'high') + ] + threat_level: ThreatLevel = ThreatLevel(threat_level) - tags = domain_info.get('tags', False) - source_target_tag = tags[0].capitalize() if tags else 'BlacklistedDomain' if self.is_dns_response: - description = f'DNS answer with a blacklisted CNAME: {domain} ' \ - f'for query: {self.dns_query} ' + description: str = (f'DNS answer with a blacklisted ' + f'CNAME: {domain} ' + f'for query: {self.dns_query} ') else: - description = f'connection to a blacklisted domain {domain}. ' + description: str = f'connection to a blacklisted domain {domain}. ' - description += f'Description: {domain_info.get("description", "")}, '\ - f'Found in feed: {domain_info["source"]}, '\ + description += f'Description: {domain_info.get("description", "")},' \ + f' Found in feed: {domain_info["source"]}, ' \ f'Confidence: {confidence}. ' + + tags = domain_info.get('tags', None) if tags: description += f'with tags: {tags}. ' - self.db.setEvidence( - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - source_target_tag=source_target_tag, - profileid=profileid, - twid=twid, - uid=uid) + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=srcip + ) + + evidence = Evidence( + evidence_type=EvidenceType.THREAT_INTELLIGENCE_BLACKLISTED_DOMAIN, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=srcip), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid], + timestamp=utils.convert_format(timestamp, utils.alerts_format), + category=IDEACategory.ANOMALY_TRAFFIC, + source_target_tag=Tag.BLACKLISTED_DOMAIN, + ) + + self.db.set_evidence(evidence) def is_valid_threat_level(self, threat_level): return threat_level in utils.threat_levels @@ -597,22 +624,31 @@ def spamhaus(self, ip): 0: False } - list_description = {'127.0.0.2' :'IP under the control of, used by, or made available for use' - ' by spammers and abusers in unsolicited bulk ' - 'email or other types of Internet-based abuse that ' - 'threatens networks or users', - '127.0.0.3' :'IP involved in sending low-reputation email, ' - 'may display a risk to users or a compromised host', - '127.0.0.4' :'IP address of exploited systems.' - 'This includes machines operating open proxies, systems infected ' - 'with trojans, and other malware vectors.', - '127.0.0.9' :'IP is part of a netblock that is ‘hijacked’ or leased by professional spam ' - 'or cyber-crime operations and therefore used for dissemination of malware, ' - 'trojan downloaders, botnet controllers, etc.', - '127.0.0.10':'IP address should not -according to the ISP controlling it- ' - 'be delivering unauthenticated SMTP email to any Internet mail server', - '127.0.0.11': 'IP is not expected be delivering unauthenticated SMTP email to any Internet mail server,' - ' such as dynamic and residential IP space'} + list_description = { + '127.0.0.2': 'IP under the control of, used by, or made ' + 'available for use' + ' by spammers and abusers in unsolicited bulk ' + 'email or other types of Internet-based abuse that ' + 'threatens networks or users', + '127.0.0.3': 'IP involved in sending low-reputation email, ' + 'may display a risk to users or a compromised host', + '127.0.0.4': 'IP address of exploited systems.' + 'This includes machines operating open proxies, ' + 'systems infected with trojans, and other ' + 'malware vectors.', + '127.0.0.9': 'IP is part of a netblock that is ‘hijacked’ ' + 'or leased by professional spam ' + 'or cyber-crime operations and therefore used ' + 'for dissemination of malware, ' + 'trojan downloaders, botnet controllers, etc.', + '127.0.0.10': 'IP address should not -according to the ISP ' + 'controlling it- ' + 'be delivering unauthenticated SMTP email to ' + 'any Internet mail server', + '127.0.0.11': 'IP is not expected be delivering unauthenticated' + ' SMTP email to any Internet mail server,' + ' such as dynamic and residential IP space' + } spamhaus_dns_hostname = ".".join(ip.split(".")[::-1]) + ".zen.spamhaus.org" @@ -662,38 +698,52 @@ def is_ignored_domain(self, domain): - def set_evidence_malicious_hash(self,file_info: dict): + def set_evidence_malicious_hash(self, file_info: Dict[str, any]): """ - :param file_info: dict with flow, profileid, twid, and confidence of file + :param file_info: dict with flow, profileid, + twid, and confidence of file """ - attacker_direction = 'md5' - category = 'Malware' - evidence_type = 'MaliciousDownloadedFile' - attacker = file_info['flow']["md5"] - threat_level = file_info["threat_level"] - daddr = file_info['flow']["daddr"] - ip_identification = self.db.get_ip_identification(daddr) - confidence = file_info["confidence"] - threat_level = utils.threat_level_to_string(threat_level) - - description = ( - f'Malicious downloaded file {attacker}. ' + srcip = file_info['flow']['saddr'] + threat_level: str = utils.threat_level_to_string( + file_info["threat_level"] + ) + threat_level: ThreatLevel = ThreatLevel[threat_level.upper()] + confidence: float = file_info["confidence"] + daddr = file_info["flow"]["daddr"] + + ip_identification: str = self.db.get_ip_identification(daddr) + description: str = ( + f'Malicious downloaded file {file_info["flow"]["md5"]}. ' f'size: {file_info["flow"]["size"]} ' - f'from IP: {daddr}. Detected by: {file_info["blacklist"]}. ' + f'from IP: {daddr}. ' + f'Detected by: {file_info["blacklist"]}. ' f'Score: {confidence}. {ip_identification}' ) - - self.db.setEvidence(evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - file_info['flow']["starttime"], - category, - profileid=file_info["profileid"], - twid=file_info["twid"], - uid=file_info['flow']["uid"]) + attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=srcip + ) + ts = utils.convert_format( + file_info['flow']["starttime"], utils.alerts_format + ) + twid = TimeWindow(number=int( + file_info["twid"].replace("timewindow", "") + )) + evidence = Evidence( + evidence_type=EvidenceType.MALICIOUS_DOWNLOADED_FILE, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=srcip), + timewindow=twid, + uid=[file_info['flow']["uid"]], + timestamp=ts, + category=IDEACategory.MALWARE + ) + + self.db.set_evidence(evidence) def circl_lu(self, flow_info: dict): """ @@ -799,7 +849,6 @@ def ip_has_blacklisted_ASN( ip, uid, timestamp, - ip_info, profileid, twid, asn, @@ -1042,9 +1091,15 @@ def main(self): utils.is_ignored_ip(ip) or self.is_outgoing_icmp_packet(protocol, ip_state) ): - self.is_malicious_ip(ip, uid, daddr, timestamp, profileid, twid, ip_state) - self.ip_belongs_to_blacklisted_range(ip, uid, daddr, timestamp, profileid, twid, ip_state) - self.ip_has_blacklisted_ASN(ip, uid, timestamp, profileid, twid, ip_state) + self.is_malicious_ip( + ip, uid, daddr, timestamp, profileid, twid, ip_state + ) + self.ip_belongs_to_blacklisted_range( + ip, uid, daddr, timestamp, profileid, twid, ip_state + ) + self.ip_has_blacklisted_ASN( + ip, uid, timestamp, profileid, twid, ip_state + ) elif type_ == 'domain': self.is_malicious_domain( to_lookup, @@ -1064,5 +1119,13 @@ def main(self): if msg:= self.get_msg('new_downloaded_file'): file_info: dict = json.loads(msg['data']) + # the format of file_info is as follows + # { + # 'flow': asdict(self.flow), + # 'type': 'suricata' or 'zeek', + # 'profileid': str, + # 'twid': str, + # } + if file_info['type'] == 'zeek': self.is_malicious_hash(file_info) diff --git a/modules/threat_intelligence/urlhaus.py b/modules/threat_intelligence/urlhaus.py index c06fd1e30..58f81b1ab 100644 --- a/modules/threat_intelligence/urlhaus.py +++ b/modules/threat_intelligence/urlhaus.py @@ -1,10 +1,21 @@ -# Must imports -from slips_files.common.imports import * - -# Your imports +from typing import Dict, Any import json import requests +from slips_files.common.imports import * +from slips_files.core.evidence_structure.evidence import ( + Evidence, + ProfileID, + TimeWindow, + Attacker, + ThreatLevel, + Victim, + EvidenceType, + IoCType, + Direction, + IDEACategory, + ) + URLHAUS_BASE_URL = 'https://urlhaus-api.abuse.ch/v1' class URLhaus: @@ -133,87 +144,123 @@ def urlhaus_lookup(self, ioc, type_of_ioc: str): elif type_of_ioc == 'url': return self.parse_urlhaus_url_response(response, ioc) - def set_evidence_malicious_hash(self, file_info: dict): - attacker_direction = 'md5' - category = 'Malware' - evidence_type = 'MaliciousDownloadedFile' + def set_evidence_malicious_hash(self, file_info: Dict[str, Any]) -> None: - threat_level = file_info["threat_level"] - flow = file_info['flow'] - attacker = flow["md5"] - daddr = flow["daddr"] + flow: Dict[str, Any] = file_info['flow'] - ip_identification = self.db.get_ip_identification(daddr) + daddr: str = flow["daddr"] + ip_identification: str = self.db.get_ip_identification(daddr) - # add the following fields in the evidence description but only if we're sure they exist - size = f" size: {flow['size']}." if flow.get('size', False) else '' - file_name = f" file name: {flow['file_name']}." if flow.get('file_name', False) else '' - file_type = f" file type: {flow['file_type']}." if flow.get('file_type', False) else '' - tags = f" tags: {flow['tags']}." if flow.get('tags', False) else '' + # Add the following fields in the evidence + # description but only if we're sure they exist + size: str = f" size: {flow['size']}." if flow.get('size', False) \ + else '' + file_name: str = f" file name: {flow['file_name']}." \ + if flow.get('file_name', False) else '' + file_type: str = f" file type: {flow['file_type']}." \ + if flow.get('file_type', False) else '' + tags: str = f" tags: {flow['tags']}." if flow.get('tags', False) \ + else '' - # we have more info about the downloaded file + # We have more info about the downloaded file # so we need a more detailed description - description = f"Malicious downloaded file: {flow['md5']}." \ - f"{size}" \ - f" from IP: {flow['daddr']} {ip_identification}." \ - f"{file_name}" \ - f"{file_type}" \ - f"{tags}" \ - f" by URLhaus." \ - + description: str = ( + f"Malicious downloaded file: {flow['md5']}." + f"{size}" + f" from IP: {daddr} {ip_identification}." + f"{file_name}" + f"{file_type}" + f"{tags}" + f" by URLhaus." + ) + + threat_level: float = file_info.get("threat_level", 0) if threat_level: - # threat level here is the vt percentage from urlhaus - description += f" virustotal score: {threat_level}% malicious" - threat_level = float(threat_level)/100 + # Threat level here is the VT percentage from URLhaus + description += f" Virustotal score: {threat_level}% malicious" + threat_level: str = utils.threat_level_to_string(float( + threat_level) / 100) else: - threat_level = 0.8 - - confidence = 0.7 - - self.db.setEvidence(evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - flow["starttime"], - category, - profileid=file_info["profileid"], - twid=file_info["twid"], - uid=flow["uid"]) + threat_level = 'high' + + threat_level: ThreatLevel= ThreatLevel[threat_level.upper()] + + confidence: float = 0.7 + saddr: str = file_info['profileid'].split("_")[-1] + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + timestamp: str = flow["starttime"] + twid: str = file_info["twid"] + + # Assuming you have an instance of the Evidence class in your class + evidence = Evidence( + evidence_type=EvidenceType.MALICIOUS_DOWNLOADED_FILE, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + timestamp=timestamp, + category=IDEACategory.MALWARE, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[flow["uid"]] + ) + + self.db.set_evidence(evidence) + def set_evidence_malicious_url( self, - url_info, - uid, - timestamp, - profileid, - twid - ): - """ - :param url_info: dict with source, description, therat_level, and tags of url - """ - threat_level = url_info['threat_level'] - attacker = url_info['url'] - description = url_info['description'] - - confidence = 0.7 - - if not threat_level: - threat_level = 'medium' - else: - # convert percentage reported by urlhaus (virustotal) to - # a valid slips confidence - try: - threat_level = int(threat_level)/100 - threat_level = utils.threat_level_to_string(threat_level) - except ValueError: + url_info: Dict[str, Any], + uid: str, + timestamp: str, + profileid: str, + twid: str + ) -> None: + """ + Set evidence for a malicious URL based on the provided URL info + """ + threat_level: str = url_info.get('threat_level', '') + description: str = url_info.get('description', '') + + confidence: float = 0.7 + + if not threat_level: threat_level = 'medium' + else: + # Convert percentage reported by URLhaus (VirusTotal) to + # a valid SLIPS confidence + try: + threat_level = int(threat_level) / 100 + threat_level = utils.threat_level_to_string(threat_level) + except ValueError: + threat_level = 'medium' + + threat_level: ThreatLevel = ThreatLevel[threat_level.upper()] + saddr: str = profileid.split("_")[-1] + + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=saddr + ) + # Assuming you have an instance of the Evidence class in your class + evidence = Evidence( + evidence_type=EvidenceType.MALICIOUS_URL, + attacker=attacker, + threat_level=threat_level, + confidence=confidence, + description=description, + timestamp=timestamp, + category=IDEACategory.MALWARE, + profile=ProfileID(ip=saddr), + timewindow=TimeWindow(number=int(twid.replace("timewindow", ""))), + uid=[uid] + ) - attacker_direction = 'url' - category = 'Malware' - evidence_type = 'MaliciousURL' - - self.db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, profileid=profileid, twid=twid, uid=uid) \ No newline at end of file + self.db.set_evidence(evidence) \ No newline at end of file diff --git a/modules/timeline/timeline.py b/modules/timeline/timeline.py index 407e5d43a..591cac9f9 100644 --- a/modules/timeline/timeline.py +++ b/modules/timeline/timeline.py @@ -8,7 +8,7 @@ import json -class Timeline(IModule, multiprocessing.Process): +class Timeline(IModule): # Name: short name of the module. Do not use spaces name = 'Timeline' description = 'Creates kalipso timeline of what happened in the network based on flows and available data' @@ -358,7 +358,7 @@ def process_flow(self, profileid, twid, flow, timestamp: float): self.print( f'Problem on process_flow() line {exception_line}', 0, 1 ) - self.print(traceback.print_exc(),0,1) + self.print(traceback.print_stack(),0,1) return True def pre_main(self): diff --git a/modules/update_manager/update_manager.py b/modules/update_manager/update_manager.py index b06f2127d..52dcbc2e3 100644 --- a/modules/update_manager/update_manager.py +++ b/modules/update_manager/update_manager.py @@ -1,34 +1,37 @@ -from exclusiveprocess import Lock, CannotAcquireLock -from modules.update_manager.timer_manager import InfiniteTimer -# from modules.update_manager.update_file_manager import UpdateFileManager -from slips_files.common.imports import * -from slips_files.core.helpers.whitelist import Whitelist -import time -import os -import json +import asyncio +import datetime import ipaddress -import validators +import json +import os +import sys +import time import traceback + import requests -import sys -import asyncio -import datetime +import validators +from exclusiveprocess import ( + Lock, + CannotAcquireLock, + ) + +from modules.update_manager.timer_manager import InfiniteTimer +# from modules.update_manager.update_file_manager import UpdateFileManager +from slips_files.common.imports import * from slips_files.common.slips_utils import utils +from slips_files.core.helpers.whitelist import Whitelist -class UpdateManager(IModule, multiprocessing.Process): +class UpdateManager(IModule): # Name: short name of the module. Do not use spaces - name = 'Update Manager' - description = 'Update Threat Intelligence files' - authors = ['Kamila Babayeva', 'Alya Gomaa'] + name = "Update Manager" + description = "Update Threat Intelligence files" + authors = ["Kamila Babayeva", "Alya Gomaa"] def init(self): self.read_configuration() # Update file manager # Timer to update the ThreatIntelligence files - self.timer_manager = InfiniteTimer( - self.update_period, self.update_ti_files - ) + self.timer_manager = InfiniteTimer(self.update_period, self.update_ti_files) # Timer to update the MAC db # when update_ti_files is called, it decides what exactly to update, the mac db, # online whitelist OT online ti files. @@ -46,25 +49,25 @@ def init(self): self.interval = 7 self.whitelist = Whitelist(self.logger, self.db) self.slips_logfile = self.db.get_stdfile("stdout") - self.org_info_path = 'slips_files/organizations_info/' + self.org_info_path = "slips_files/organizations_info/" # if any keyword of the following is present in a line # then this line should be ignored by slips # either a not supported ioc type or a header line etc. # make sure the header keywords are lowercase because # we convert lines to lowercase when comparing self.header_keywords = ( - 'type', - 'first_seen_utc', - 'ip_v4', + "type", + "first_seen_utc", + "ip_v4", '"domain"', '#"type"', - '#fields', - 'number', - 'atom_type', - 'attacker', - 'score' + "#fields", + "number", + "atom_type", + "attacker", + "score", ) - self.ignored_IoCs = ('email', 'url', 'file_hash', 'file') + self.ignored_IoCs = ("email", "url", "file_hash", "file") # to track how many times an ip is present in different blacklists self.ips_ctr = {} self.first_time_reading_files = False @@ -79,14 +82,13 @@ def read_riskiq_creds(RiskIQ_credentials_path): if not RiskIQ_credentials_path: return - RiskIQ_credentials_path = os.path.join(os.getcwd(), - RiskIQ_credentials_path) + RiskIQ_credentials_path = os.path.join(os.getcwd(), RiskIQ_credentials_path) if not os.path.exists(RiskIQ_credentials_path): return - with open(RiskIQ_credentials_path, 'r') as f: - self.riskiq_email = f.readline().replace('\n', '') - self.riskiq_key = f.readline().replace('\n', '') + with open(RiskIQ_credentials_path, "r") as f: + self.riskiq_email = f.readline().replace("\n", "") + self.riskiq_key = f.readline().replace("\n", "") conf = ConfigParser() @@ -114,13 +116,12 @@ def read_riskiq_creds(RiskIQ_credentials_path): self.online_whitelist_update_period = conf.online_whitelist_update_period() self.online_whitelist = conf.online_whitelist() - def get_feed_details(self, feeds_path): """ Parse links, threat level and tags from the feeds_path file and return a dict with feed info """ try: - with open(feeds_path, 'r') as feeds_file: + with open(feeds_path, "r") as feeds_file: feeds = feeds_file.read() except FileNotFoundError: self.print(f"Error finding {feeds_path}. Feed won't be added to slips.") @@ -133,18 +134,24 @@ def get_feed_details(self, feeds_path): if line.startswith("#"): continue # remove all spaces - line = line.strip().replace(" ",'') + line = line.strip().replace(" ", "") # each line is https://abc.d/e,medium,['tag1','tag2'] - line = line.split(',') + line = line.split(",") url, threat_level = line[0], line[1] tags: str = " ".join(line[2:]) - tags = tags.replace('[','').replace(']','').replace('\'',"").replace('\"',"").split(',') + tags = ( + tags.replace("[", "") + .replace("]", "") + .replace("'", "") + .replace('"', "") + .split(",") + ) url = utils.sanitize(url.strip()) threat_level = threat_level.lower() # remove commented lines from the cache db - if url.startswith(';'): - feed = url.split('/')[-1] + if url.startswith(";"): + feed = url.split("/")[-1] if self.db.get_TI_file_info(feed): self.db.delete_feed(feed) # to avoid calling delete_feed again with the same feed @@ -155,15 +162,14 @@ def get_feed_details(self, feeds_path): if not utils.is_valid_threat_level(threat_level): # not a valid threat_level self.print( - f'Invalid threat level found in slips.conf: {threat_level} ' - f"for TI feed: {url}. Using 'low' instead.", 0, 1 + f"Invalid threat level found in slips.conf: {threat_level} " + f"for TI feed: {url}. Using 'low' instead.", + 0, + 1, ) - threat_level = 'low' + threat_level = "low" - parsed_feeds[url] = { - 'threat_level': threat_level, - 'tags': tags - } + parsed_feeds[url] = {"threat_level": threat_level, "tags": tags} return parsed_feeds def log(self, text): @@ -172,12 +178,12 @@ def log(self, text): """ self.notify_observers( { - 'from': self.name, - 'log_to_logfiles_only': True, - 'txt': text, - 'verbose': 0, - 'debug': 1 - } + "from": self.name, + "log_to_logfiles_only": True, + "txt": text, + "verbose": 0, + "debug": 1, + } ) def read_ports_info(self, ports_info_filepath) -> int: @@ -189,7 +195,7 @@ def read_ports_info(self, ports_info_filepath) -> int: # there are ports that are by default considered unknown to slips, # but if it's known to be used by a specific organization, slips won't consider it 'unknown'. # in ports_info_filepath we have a list of organizations range/ip and the port it's known to use - with open(ports_info_filepath, 'r') as f: + with open(ports_info_filepath, "r") as f: line_number = 0 while True: line = f.readline() @@ -198,37 +204,37 @@ def read_ports_info(self, ports_info_filepath) -> int: if not line: break # skip the header and the comments at the begining - if line.startswith('#') or line.startswith('"Organization"'): + if line.startswith("#") or line.startswith('"Organization"'): continue - line = line.split(',') + line = line.split(",") try: organization, ip = line[0], line[1] ports_range = line[2] proto = line[3].lower().strip() # is it a range of ports or a single port - if '-' in ports_range: + if "-" in ports_range: # it's a range of ports - first_port, last_port = ports_range.split('-') + first_port, last_port = ports_range.split("-") first_port = int(first_port) last_port = int(last_port) - for port in range(first_port, last_port+1): - portproto = f'{port}/{proto}' + for port in range(first_port, last_port + 1): + portproto = f"{port}/{proto}" self.db.set_organization_of_port( organization, ip, portproto ) else: # it's a single port - portproto = f'{ports_range}/{proto}' - self.db.set_organization_of_port( - organization, ip, portproto - ) + portproto = f"{ports_range}/{proto}" + self.db.set_organization_of_port(organization, ip, portproto) except IndexError: self.print( - f'Invalid line: {line} line number: {line_number} in {ports_info_filepath}. Skipping.', 0, 1, + f"Invalid line: {line} line number: {line_number} in {ports_info_filepath}. Skipping.", + 0, + 1, ) continue return line_number @@ -239,22 +245,20 @@ def update_local_file(self, file_path) -> bool: """ try: # each file is updated differently - if 'ports_used_by_specific_orgs.csv' in file_path: + if "ports_used_by_specific_orgs.csv" in file_path: self.read_ports_info(file_path) - elif 'services.csv' in file_path: - with open(file_path, 'r') as f: + elif "services.csv" in file_path: + with open(file_path, "r") as f: for line in f: - name = line.split(',')[0] - port = line.split(',')[1] - proto = line.split(',')[2] + name = line.split(",")[0] + port = line.split(",")[1] + proto = line.split(",")[2] # descr = line.split(',')[3] - self.db.set_port_info( - f'{str(port)}/{proto}', name - ) + self.db.set_port_info(f"{str(port)}/{proto}", name) # Store the new hash of file in the database - file_info = {'hash': self.new_hash} + file_info = {"hash": self.new_hash} self.db.set_TI_file_info(file_path, file_info) return True @@ -273,7 +277,7 @@ def check_if_update_local_file(self, file_path: str) -> bool: # Get last hash of the file stored in the database file_info = self.db.get_TI_file_info(file_path) - old_hash = file_info.get('hash', False) + old_hash = file_info.get("hash", False) if not old_hash or old_hash != new_hash: # first time seeing the file, OR we should update it @@ -290,8 +294,8 @@ def check_if_update_online_whitelist(self) -> bool: Used for online whitelist specified in slips.conf """ # Get the last time this file was updated - ti_file_info = self.db.get_TI_file_info('tranco_whitelist') - last_update = ti_file_info.get('time', float('-inf')) + ti_file_info = self.db.get_TI_file_info("tranco_whitelist") + last_update = ti_file_info.get("time", float("-inf")) now = time.time() if last_update + self.online_whitelist_update_period > now: @@ -306,29 +310,30 @@ def check_if_update_online_whitelist(self) -> bool: return False # update the timestamp in the db - self.db.set_TI_file_info( - 'tranco_whitelist', - {'time': time.time()} - ) - self.responses['tranco_whitelist'] = response + self.db.set_TI_file_info("tranco_whitelist", {"time": time.time()}) + self.responses["tranco_whitelist"] = response return True - def download_file(self, file_to_download): # Retry 3 times to get the TI file if an error occured for _try in range(5): try: response = requests.get(file_to_download, timeout=5) if response.status_code != 200: - error = f'An error occurred while downloading the file {file_to_download}.' \ - f'status code: {response.status_code}. Aborting' + error = ( + f"An error occurred while downloading the file {file_to_download}." + f"status code: {response.status_code}. Aborting" + ) else: return response except requests.exceptions.ReadTimeout: - error = f'Timeout reached while downloading the file {file_to_download}. Aborting.' + error = f"Timeout reached while downloading the file {file_to_download}. Aborting." - except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError): - error = f'Connection error while downloading the file {file_to_download}. Aborting.' + except ( + requests.exceptions.ConnectionError, + requests.exceptions.ChunkedEncodingError, + ): + error = f"Connection error while downloading the file {file_to_download}. Aborting." if error: self.print(error, 0, 1) @@ -340,7 +345,7 @@ def get_last_modified(self, response) -> str: Called when the file doesn't have an e-tag :param response: the output of a request done with requests library """ - return response.headers.get('Last-Modified', False) + return response.headers.get("Last-Modified", False) def check_if_update(self, file_to_download: str, update_period) -> bool: """ @@ -351,14 +356,14 @@ def check_if_update(self, file_to_download: str, update_period) -> bool: # the response will be stored in self.responses if the file is old and needs to be updated # Get the last time this file was updated ti_file_info: dict = self.db.get_TI_file_info(file_to_download) - last_update = ti_file_info.get('time', float('-inf')) + last_update = ti_file_info.get("time", float("-inf")) if last_update + update_period > time.time(): # Update period hasn't passed yet, but the file is in our db self.loaded_ti_files += 1 return False # update period passed - if 'risk' in file_to_download: + if "risk" in file_to_download: # updating riskiq TI data does not depend on an e-tag return True @@ -370,25 +375,27 @@ def check_if_update(self, file_to_download: str, update_period) -> bool: if not response: return False - if 'maclookup' in file_to_download: + if "maclookup" in file_to_download: # no need to check the e-tag # we always need to download this file for slips to get info about MACs - self.responses['mac_db'] = response + self.responses["mac_db"] = response return True # Get the E-TAG of this file to compare with current files ti_file_info: dict = self.db.get_TI_file_info(file_to_download) - old_e_tag = ti_file_info.get('e-tag', '') + old_e_tag = ti_file_info.get("e-tag", "") # Check now if E-TAG of file in github is same as downloaded # file here. new_e_tag = self.get_e_tag(response) if not new_e_tag: # use last modified instead - cached_last_modified = ti_file_info.get('Last-Modified', '') + cached_last_modified = ti_file_info.get("Last-Modified", "") new_last_modified = self.get_last_modified(response) if not new_last_modified: - self.log(f"Error updating {file_to_download}. Doesn't have an e-tag or Last-Modified field.") + self.log( + f"Error updating {file_to_download}. Doesn't have an e-tag or Last-Modified field." + ) return False # use last modified date instead of e-tag @@ -418,22 +425,19 @@ def check_if_update(self, file_to_download: str, update_period) -> bool: except Exception: exception_line = sys.exc_info()[2].tb_lineno - self.print( - f'Problem on update_TI_file() line {exception_line}', 0, 1 - ) - self.print(traceback.format_exc(), 0, 1) + self.print(f"Problem on update_TI_file() line {exception_line}", 0, 1) + self.print(traceback.print_stack(), 0, 1) return False def get_e_tag(self, response): """ :param response: the output of a request done with requests library """ - return response.headers.get('ETag', False) - + return response.headers.get("ETag", False) def write_file_to_disk(self, response, full_path): - with open(full_path, 'w') as f: + with open(full_path, "w") as f: f.write(response.text) def parse_ssl_feed(self, url, full_path): @@ -450,13 +454,13 @@ def parse_ssl_feed(self, url, full_path): description_column = None while True: line = ssl_feed.readline() - if line.startswith('# Listingdate'): + if line.startswith("# Listingdate"): # looks like the line that contains column names, search where is the description column - for column in line.split(','): + for column in line.split(","): # Listingreason is the description column in abuse.ch Suricata SSL Fingerprint Blacklist - if 'Listingreason' in column.lower(): - description_column = line.split(',').index(column) - if not line.startswith('#'): + if "Listingreason" in column.lower(): + description_column = line.split(",").index(column) + if not line.startswith("#"): # break while statement if it is not a comment (i.e. does not start with #) or a header line break @@ -464,9 +468,9 @@ def parse_ssl_feed(self, url, full_path): # Store the current position of the TI file current_file_position = ssl_feed.tell() - if ',' in line: - data = line.replace('\n', '').replace('"', '').split(',') - amount_of_columns = len(line.split(',')) + if "," in line: + data = line.replace("\n", "").replace('"', "").split(",") + amount_of_columns = len(line.split(",")) if description_column is None: # assume it's the last column @@ -488,7 +492,9 @@ def parse_ssl_feed(self, url, full_path): if sha1_column is None: # can't find a column that contains an ioc self.print( - f'Error while reading the ssl file {full_path}. Could not find a column with sha1 info', 0, 1, + f"Error while reading the ssl file {full_path}. Could not find a column with sha1 info", + 0, + 1, ) return False @@ -500,7 +506,7 @@ def parse_ssl_feed(self, url, full_path): # 2022-02-06 07:58:29,6cec09bcb575352785d313c7e978f26bfbd528ab,AsyncRAT C&C # skip comment lines - if line.startswith('#'): + if line.startswith("#"): continue # Separate the lines like CSV, either by commas or tabs @@ -508,47 +514,45 @@ def parse_ssl_feed(self, url, full_path): # And surrounded by " # get the hash to store in our db - if ',' in line: + if "," in line: sha1 = ( - line.replace('\n', '') - .replace('"', '') - .split(',')[sha1_column] + line.replace("\n", "") + .replace('"', "") + .split(",")[sha1_column] .strip() ) # get the description of this ssl to store in our db try: - separator = ',' if ',' in line else '\t' + separator = "," if "," in line else "\t" description = ( - line.replace('\n', '') - .replace('"', '') + line.replace("\n", "") + .replace('"', "") .split(separator)[description_column] .strip() ) except IndexError: self.print( - f'IndexError Description column: {description_column}. Line: {line}' + f"IndexError Description column: {description_column}. Line: {line}" ) # self.print('\tRead Data {}: {}'.format(sha1, description)) - filename = full_path.split('/')[-1] + filename = full_path.split("/")[-1] if len(sha1) == 40: # Store the sha1 in our local dict malicious_ssl_certs[sha1] = json.dumps( { - 'description': description, - 'source': filename, - 'threat_level': self.ssl_feeds[url][ - 'threat_level' - ], - 'tags': self.ssl_feeds[url]['tags'], + "description": description, + "source": filename, + "threat_level": self.ssl_feeds[url]["threat_level"], + "tags": self.ssl_feeds[url]["tags"], } ) else: self.log( - f'The data {data} is not valid. It was found in {filename}.' + f"The data {data} is not valid. It was found in {filename}." ) continue # Add all loaded malicious sha1 to the database @@ -560,12 +564,14 @@ async def update_TI_file(self, link_to_download: str) -> bool: Update remote TI files, JA3 feeds and SSL feeds by writing them to disk and parsing them """ try: - self.log(f'Updating the remote file {link_to_download}') + self.log(f"Updating the remote file {link_to_download}") response = self.responses[link_to_download] - file_name_to_download = link_to_download.split('/')[-1] + file_name_to_download = link_to_download.split("/")[-1] # first download the file and save it locally - full_path = os.path.join(self.path_to_remote_ti_files, file_name_to_download) + full_path = os.path.join( + self.path_to_remote_ti_files, file_name_to_download + ) self.write_file_to_disk(response, full_path) # File is updated in the server and was in our database. @@ -578,8 +584,10 @@ async def update_TI_file(self, link_to_download: str) -> bool: link_to_download, full_path ): self.print( - f'Error parsing JA3 feed {link_to_download}. ' - f'Updating was aborted.', 0, 1, + f"Error parsing JA3 feed {link_to_download}. " + f"Updating was aborted.", + 0, + 1, ) return False @@ -588,29 +596,30 @@ async def update_TI_file(self, link_to_download: str) -> bool: link_to_download, full_path ): self.print( - f'Error parsing feed {link_to_download}. ' - f'Updating was aborted.', 0, 1, + f"Error parsing feed {link_to_download}. " f"Updating was aborted.", + 0, + 1, ) return False - elif ( - link_to_download in self.ssl_feeds - and not self.parse_ssl_feed(link_to_download, full_path) + elif link_to_download in self.ssl_feeds and not self.parse_ssl_feed( + link_to_download, full_path ): self.print( - f'Error parsing feed {link_to_download}. ' - f'Updating was aborted.', 0, 1, + f"Error parsing feed {link_to_download}. " f"Updating was aborted.", + 0, + 1, ) return False # Store the new etag and time of file in the database file_info = { - 'e-tag': self.get_e_tag(response), - 'time': time.time(), - 'Last-Modified': self.get_last_modified(response) + "e-tag": self.get_e_tag(response), + "time": time.time(), + "Last-Modified": self.get_last_modified(response), } self.db.set_TI_file_info(link_to_download, file_info) - self.log(f'Successfully updated in DB the remote file {link_to_download}') + self.log(f"Successfully updated in DB the remote file {link_to_download}") self.loaded_ti_files += 1 # done parsing the file, delete it from disk @@ -625,65 +634,62 @@ async def update_TI_file(self, link_to_download: str) -> bool: except Exception: exception_line = sys.exc_info()[2].tb_lineno - self.print( - f'Problem on update_TI_file() line {exception_line}', 0, 1 - ) - self.print(traceback.print_exc(),0,1) + self.print(f"Problem on update_TI_file() line {exception_line}", 0, 1) + self.print(traceback.print_stack(), 0, 1) return False def update_riskiq_feed(self): """Get and parse RiskIQ feed""" - if not ( - self.riskiq_email - and self.riskiq_key - ): + if not (self.riskiq_email and self.riskiq_key): return False try: - self.log('Updating RiskIQ domains') - url = 'https://api.riskiq.net/pt/v2/articles/indicators' + self.log("Updating RiskIQ domains") + url = "https://api.riskiq.net/pt/v2/articles/indicators" auth = (self.riskiq_email, self.riskiq_key) today = datetime.date.today() days_ago = datetime.timedelta(7) a_week_ago = today - days_ago data = { - 'startDateInclusive': a_week_ago.strftime('%Y-%m-%d'), - 'endDateExclusive': today.strftime('%Y-%m-%d'), + "startDateInclusive": a_week_ago.strftime("%Y-%m-%d"), + "endDateExclusive": today.strftime("%Y-%m-%d"), } # Specifying json= here instead of data= ensures that the # Content-Type header is application/json, which is necessary. response = requests.get(url, timeout=5, auth=auth, json=data).json() # extract domains only from the response try: - response = response['indicators'] + response = response["indicators"] for indicator in response: # each indicator is a dict malicious_domains_dict = {} - if indicator.get('type', '') == 'domain': - domain = indicator['value'] + if indicator.get("type", "") == "domain": + domain = indicator["value"] malicious_domains_dict[domain] = json.dumps( { - 'description': 'malicious domain detected by RiskIQ', - 'source': url, + "description": "malicious domain detected by RiskIQ", + "source": url, } ) self.db.add_domains_to_IoC(malicious_domains_dict) except KeyError: self.print( - f'RiskIQ returned: {response["message"]}. Update Cancelled.', 0, 1, + f'RiskIQ returned: {response["message"]}. Update Cancelled.', + 0, + 1, ) return False # update the timestamp in the db - malicious_file_info = {'time': time.time()} - self.db.set_TI_file_info( - 'riskiq_domains', malicious_file_info - ) - self.log('Successfully updated RiskIQ domains.') + malicious_file_info = {"time": time.time()} + self.db.set_TI_file_info("riskiq_domains", malicious_file_info) + self.log("Successfully updated RiskIQ domains.") return True except Exception as e: - self.log('An error occurred while updating RiskIQ domains. Updating was aborted.') - self.print('An error occurred while updating RiskIQ feed.', 0, 1) - self.print(f'Error: {e}', 0, 1) + self.log( + "An error occurred while updating RiskIQ domains. Updating was aborted." + ) + self.print("An error occurred while updating RiskIQ feed.", 0, 1) + self.print(f"Error: {e}", 0, 1) return False def delete_old_source_IPs(self, file): @@ -695,7 +701,7 @@ def delete_old_source_IPs(self, file): for ip_data in all_data.items(): ip = ip_data[0] data = json.loads(ip_data[1]) - if data['source'] == file: + if data["source"] == file: old_data.append(ip) if old_data: self.db.delete_ips_from_IoC_ips(old_data) @@ -709,7 +715,7 @@ def delete_old_source_Domains(self, file): for domain_data in all_data.items(): domain = domain_data[0] data = json.loads(domain_data[1]) - if data['source'] == file: + if data["source"] == file: old_data.append(domain) if old_data: self.db.delete_domains_from_IoC_domains(old_data) @@ -738,15 +744,13 @@ def parse_ja3_feed(self, url, ja3_feed_path: str) -> bool: description_column = None while True: line = ja3_feed.readline() - if line.startswith('# ja3_md5'): + if line.startswith("# ja3_md5"): # looks like the line that contains column names, search where is the description column - for column in line.split(','): + for column in line.split(","): # Listingreason is the description column in abuse.ch Suricata JA3 Fingerprint Blacklist - if 'Listingreason' in column.lower(): - description_column = line.split(',').index( - column - ) - if not line.startswith('#'): + if "Listingreason" in column.lower(): + description_column = line.split(",").index(column) + if not line.startswith("#"): # break while statement if it is not a comment (i.e. does not startwith #) or a header line break @@ -754,9 +758,9 @@ def parse_ja3_feed(self, url, ja3_feed_path: str) -> bool: # Store the current position of the TI file current_file_position = ja3_feed.tell() - if ',' in line: - data = line.replace('\n', '').replace('"', '').split(',') - amount_of_columns = len(line.split(',')) + if "," in line: + data = line.replace("\n", "").replace('"', "").split(",") + amount_of_columns = len(line.split(",")) if description_column is None: # assume it's the last column @@ -778,8 +782,10 @@ def parse_ja3_feed(self, url, ja3_feed_path: str) -> bool: if ja3_column is None: # can't find a column that contains an ioc self.print( - f'Error while reading the ja3 file {ja3_feed_path}. ' - f'Could not find a column with JA3 info', 1, 1 + f"Error while reading the ja3 file {ja3_feed_path}. " + f"Could not find a column with JA3 info", + 1, + 1, ) return False @@ -791,7 +797,7 @@ def parse_ja3_feed(self, url, ja3_feed_path: str) -> bool: # 8f52d1ce303fb4a6515836aec3cc16b1,2017-07-15 19:05:11,2019-07-27 20:00:57,TrickBot # skip comment lines - if line.startswith('#'): + if line.startswith("#"): continue # Separate the lines like CSV, either by commas or tabs @@ -799,57 +805,57 @@ def parse_ja3_feed(self, url, ja3_feed_path: str) -> bool: # And surronded by " # get the ja3 to store in our db - if ',' in line: + if "," in line: ja3 = ( - line.replace('\n', '') - .replace('"', '') - .split(',')[ja3_column] + line.replace("\n", "") + .replace('"', "") + .split(",")[ja3_column] .strip() ) # get the description of this ja3 to store in our db try: - if ',' in line: + if "," in line: description = ( - line.replace('\n', '') - .replace('"', '') - .split(',')[description_column] + line.replace("\n", "") + .replace('"', "") + .split(",")[description_column] .strip() ) else: description = ( - line.replace('\n', '') - .replace('"', '') - .split('\t')[description_column] + line.replace("\n", "") + .replace('"', "") + .split("\t")[description_column] .strip() ) except IndexError: self.print( - f'IndexError Description column: {description_column}. Line: {line}', + f"IndexError Description column: {description_column}. Line: {line}", 0, 1, ) # self.print('\tRead Data {}: {}'.format(ja3, description)) - filename = ja3_feed_path.split('/')[-1] + filename = ja3_feed_path.split("/")[-1] # Check if the data is a valid IPv4, IPv6 or domain if len(ja3) == 32: # Store the ja3 in our local dict malicious_ja3_dict[ja3] = json.dumps( { - 'description': description, - 'source': filename, - 'threat_level': self.ja3_feeds[url][ - 'threat_level' - ], - 'tags': self.ja3_feeds[url]['tags'], + "description": description, + "source": filename, + "threat_level": self.ja3_feeds[url]["threat_level"], + "tags": self.ja3_feeds[url]["tags"], } ) else: self.print( - f'The data {data} is not valid. It was found in {filename}.', 3, 3, + f"The data {data} is not valid. It was found in {filename}.", + 3, + 3, ) continue @@ -858,8 +864,8 @@ def parse_ja3_feed(self, url, ja3_feed_path: str) -> bool: return True except Exception: - self.print('Problem in parse_ja3_feed()', 0, 1) - print(traceback.format_exc()) + self.print("Problem in parse_ja3_feed()", 0, 1) + self.print(traceback.print_stack(), 0, 1) return False def parse_json_ti_feed(self, link_to_download, ti_file_path: str) -> bool: @@ -867,16 +873,16 @@ def parse_json_ti_feed(self, link_to_download, ti_file_path: str) -> bool: Slips has 2 json TI feeds that are parsed differently. hole.cert.pl and rstcloud """ # to support https://hole.cert.pl/domains/domains.json - tags = self.url_feeds[link_to_download]['tags'] + tags = self.url_feeds[link_to_download]["tags"] # the new threat_level is the max of the 2 - threat_level = self.url_feeds[link_to_download]['threat_level'] - filename = ti_file_path.split('/')[-1] + threat_level = self.url_feeds[link_to_download]["threat_level"] + filename = ti_file_path.split("/")[-1] - if 'rstcloud' in link_to_download: + if "rstcloud" in link_to_download: malicious_ips_dict = {} with open(ti_file_path) as feed: self.print( - f'Reading next lines in the file {ti_file_path} for IoC', 3, 0 + f"Reading next lines in the file {ti_file_path} for IoC", 3, 0 ) for line in feed.read().splitlines(): try: @@ -886,24 +892,23 @@ def parse_json_ti_feed(self, link_to_download, ti_file_path: str) -> bool: continue # each ip in this file has it's own source and tag src = line["src"]["name"][0] - malicious_ips_dict[line['ip']['v4']] = json.dumps( + malicious_ips_dict[line["ip"]["v4"]] = json.dumps( { - 'description': '', - 'source': f'{filename}, {src}', - 'threat_level': threat_level, - 'tags': f'{line["tags"]["str"]}, {tags}', + "description": "", + "source": f"{filename}, {src}", + "threat_level": threat_level, + "tags": f'{line["tags"]["str"]}, {tags}', } ) self.db.add_ips_to_IoC(malicious_ips_dict) return True - - if 'hole.cert.pl' in link_to_download: + if "hole.cert.pl" in link_to_download: malicious_domains_dict = {} with open(ti_file_path) as feed: self.print( - f'Reading next lines in the file {ti_file_path} for IoC', 3, 0 + f"Reading next lines in the file {ti_file_path} for IoC", 3, 0 ) try: file = json.loads(feed.read()) @@ -912,24 +917,20 @@ def parse_json_ti_feed(self, link_to_download, ti_file_path: str) -> bool: return False for ioc in file: - date = ioc['InsertDate'] - diff = utils.get_time_diff( - date, - time.time(), - return_type='days' - ) + date = ioc["InsertDate"] + diff = utils.get_time_diff(date, time.time(), return_type="days") if diff > self.interval: continue - domain = ioc['DomainAddress'] + domain = ioc["DomainAddress"] if not validators.domain(domain): continue malicious_domains_dict[domain] = json.dumps( { - 'description': '', - 'source': filename, - 'threat_level': threat_level, - 'tags': tags, + "description": "", + "source": filename, + "threat_level": threat_level, + "tags": tags, } ) self.db.add_domains_to_IoC(malicious_domains_dict) @@ -939,19 +940,19 @@ def get_description_column(self, header): """ Given the first line of a TI file (header line), try to get the index of the description column """ - description_keywords = ('desc', 'collect', 'malware', 'tags_str', 'source' ) - for column in header.split(','): + description_keywords = ("desc", "collect", "malware", "tags_str", "source") + for column in header.split(","): for keyword in description_keywords: if keyword in column: - return header.split(',').index(column) + return header.split(",").index(column) def is_ignored_line(self, line) -> bool: """ Returns True if a comment, a blank line, or an unsupported IoC """ if ( - line.startswith('#') - or line.startswith(';') + line.startswith("#") + or line.startswith(";") or line.isspace() or len(line) < 3 ): @@ -969,19 +970,17 @@ def parse_line(self, line, file_path) -> tuple: a list of the line fields, and the separator it's using """ # Separate the lines like CSV, either by commas or tabs - separators = ('#', ',', ';', '\t') + separators = ("#", ",", ";", "\t") for separator in separators: if separator in line: # lines and descriptions in this feed are separated with ',' , so we get # an invalid number of columns - if 'OCD-Datalak' in file_path: + if "OCD-Datalak" in file_path: # the valid line - new_line = line.split('Z,')[0] + new_line = line.split("Z,")[0] # replace every ',' from the description - description = line.split('Z,', 1)[1].replace( - ', ', '' - ) - line = f'{new_line},{description}' + description = line.split("Z,", 1)[1].replace(", ", "") + line = f"{new_line},{description}" # get a list of every field in the line e.g [ioc, description, date] line_fields = line.split(separator) @@ -990,22 +989,21 @@ def parse_line(self, line, file_path) -> tuple: break else: # no separator of the above was found - if '0.0.0.0 ' in line: - sep = ' ' + if "0.0.0.0 " in line: + sep = " " # anudeepND/blacklist file - line_fields = [ - line[line.index(' ') + 1 :].replace('\n', '') - ] + line_fields = [line[line.index(" ") + 1 :].replace("\n", "")] amount_of_columns = 1 else: - sep = '\t' + sep = "\t" line_fields = line.split(sep) amount_of_columns = len(line_fields) return amount_of_columns, line_fields, sep - - def get_data_column(self, amount_of_columns: int, line_fields: list, file_path: str): + def get_data_column( + self, amount_of_columns: int, line_fields: list, file_path: str + ): """ Get the first column that is an IPv4, IPv6 or domain :param file_path: path of the ti file that contains the given fields @@ -1016,19 +1014,22 @@ def get_data_column(self, amount_of_columns: int, line_fields: list, file_path: # Some unknown string and we cant detect the type of it # can't find a column that contains an ioc self.print( - f'Error while reading the TI file {file_path}.' - f' Could not find a column with an IP or domain', - 0, 1, + f"Error while reading the TI file {file_path}." + f" Could not find a column with an IP or domain", + 0, + 1, ) - return 'Error' + return "Error" - def extract_ioc_from_line(self, line, line_fields, separator, data_column, description_column, file_path) -> tuple: + def extract_ioc_from_line( + self, line, line_fields, separator, data_column, description_column, file_path + ) -> tuple: """ Returns the ip/ip range/domain and it's description from the given line """ - if '0.0.0.0 ' in line: + if "0.0.0.0 " in line: # anudeepND/blacklist file - data = line[line.index(' ') + 1 :].replace('\n', '') + data = line[line.index(" ") + 1 :].replace("\n", "") else: line_fields = line.split(separator) # get the ioc @@ -1039,13 +1040,15 @@ def extract_ioc_from_line(self, line, line_fields, separator, data_column, descr description = line_fields[description_column].strip() except (IndexError, UnboundLocalError): self.print( - f'IndexError Description column: ' - f'{description_column}. Line: {line} in ' - f'{file_path}', 0, 1, + f"IndexError Description column: " + f"{description_column}. Line: {line} in " + f"{file_path}", + 0, + 1, ) return False, False - self.print(f'\tRead Data {data}: {description}', 3, 0) + self.print(f"\tRead Data {data}: {description}", 3, 0) return data, description def add_to_ip_ctr(self, ip, blacklist): @@ -1053,24 +1056,14 @@ def add_to_ip_ctr(self, ip, blacklist): keep track of how many times an ip was there in all blacklists :param blacklist: t make sure we don't count the ip twice in the same blacklist """ - blacklist = os.path.basename(blacklist) - if ( - ip in self.ips_ctr - and - blacklist not in self.ips_ctr['blacklists'] - ): - self.ips_ctr[ip]['times_found'] += 1 - self.ips_ctr[ip]['blacklists'].append(blacklist) + blacklist = os.path.basename(blacklist) + if ip in self.ips_ctr and blacklist not in self.ips_ctr["blacklists"]: + self.ips_ctr[ip]["times_found"] += 1 + self.ips_ctr[ip]["blacklists"].append(blacklist) else: - self.ips_ctr[ip] = { - 'times_found': 1, - 'blacklists': [blacklist] - } + self.ips_ctr[ip] = {"times_found": 1, "blacklists": [blacklist]} - - def parse_ti_feed( - self, link_to_download, ti_file_path: str - ) -> bool: + def parse_ti_feed(self, link_to_download, ti_file_path: str) -> bool: """ Read all the files holding IP addresses and a description and put the info in a large dict. @@ -1092,16 +1085,14 @@ def parse_ti_feed( malicious_ips_dict = {} malicious_domains_dict = {} malicious_ip_ranges = {} - if 'json' in ti_file_path: - return self.parse_json_ti_feed( - link_to_download, ti_file_path - ) - + if "json" in ti_file_path: + return self.parse_json_ti_feed(link_to_download, ti_file_path) with open(ti_file_path) as feed: self.print( - f'Reading next lines in the file {ti_file_path} ' - f'for IoC', 3, 0, + f"Reading next lines in the file {ti_file_path} " f"for IoC", + 3, + 0, ) # Remove comments and find the description column if possible @@ -1120,15 +1111,19 @@ def parse_ti_feed( # Store the current position of the TI file current_file_position = feed.tell() - line = line.replace('\n', '').replace('"', '') + line = line.replace("\n", "").replace('"', "") - amount_of_columns, line_fields, separator = self.parse_line(line, ti_file_path) + amount_of_columns, line_fields, separator = self.parse_line( + line, ti_file_path + ) if description_column is None: # assume it's the last column description_column = amount_of_columns - 1 - data_column = self.get_data_column(amount_of_columns, line_fields, ti_file_path) - if data_column == 'Error': # don't use 'if not' because it may be 0 + data_column = self.get_data_column( + amount_of_columns, line_fields, ti_file_path + ) + if data_column == "Error": # don't use 'if not' because it may be 0 return False # Now that we read the first line, go back so we can process it @@ -1146,20 +1141,22 @@ def parse_ti_feed( if self.is_ignored_line(line): continue - if 'OCD-Datalak' in ti_file_path: + if "OCD-Datalak" in ti_file_path: # the valid line - new_line = line.split('Z,')[0] + new_line = line.split("Z,")[0] # replace every ',' from the description - description = line.split('Z,', 1)[1].replace(', ', '') - line = f'{new_line},{description}' - - line = line.replace('\n', '').replace('"', '') - data, description = self.extract_ioc_from_line(line, - line_fields, - separator, - data_column, - description_column, - ti_file_path) + description = line.split("Z,", 1)[1].replace(", ", "") + line = f"{new_line},{description}" + + line = line.replace("\n", "").replace('"', "") + data, description = self.extract_ioc_from_line( + line, + line_fields, + separator, + data_column, + description_column, + ti_file_path, + ) if not data and not description: return False @@ -1167,18 +1164,18 @@ def parse_ti_feed( if len(data) < 3: continue - data_file_name = ti_file_path.split('/')[-1] + data_file_name = ti_file_path.split("/")[-1] data_type = utils.detect_data_type(data) if data_type is None: self.print( - f'The data {data} is not valid. It was found in {ti_file_path}.', + f"The data {data} is not valid. It was found in {ti_file_path}.", 0, 1, ) continue - if data_type == 'domain': + if data_type == "domain": # if we have info about the ioc, append to it, if we don't add a new entry in the correct dict try: # we already have info about this domain? @@ -1186,7 +1183,7 @@ def parse_ti_feed( malicious_domains_dict[str(data)] ) # if the domain appeared twice in the same blacklist, skip it - if data_file_name in old_domain_info['source']: + if data_file_name in old_domain_info["source"]: continue # append the new blacklist name to the current one source = f'{old_domain_info["source"]}, {data_file_name}' @@ -1195,40 +1192,34 @@ def parse_ti_feed( # the new threat_level is the maximum threat_level threat_level = str( max( - float(old_domain_info['threat_level']), + float(old_domain_info["threat_level"]), float( - self.url_feeds[link_to_download][ - 'threat_level' - ] + self.url_feeds[link_to_download]["threat_level"] ), ) ) # Store the ip in our local dict malicious_domains_dict[str(data)] = json.dumps( { - 'description': old_domain_info[ - 'description' - ], - 'source': source, - 'threat_level': threat_level, - 'tags': tags, + "description": old_domain_info["description"], + "source": source, + "threat_level": threat_level, + "tags": tags, } ) except KeyError: # We don't have info about this domain, Store the ip in our local dict malicious_domains_dict[str(data)] = json.dumps( { - 'description': description, - 'source': data_file_name, - 'threat_level': self.url_feeds[ - link_to_download - ]['threat_level'], - 'tags': self.url_feeds[link_to_download][ - 'tags' + "description": description, + "source": data_file_name, + "threat_level": self.url_feeds[link_to_download][ + "threat_level" ], + "tags": self.url_feeds[link_to_download]["tags"], } ) - elif data_type == 'ip': + elif data_type == "ip": # make sure we're not blacklisting a private ip ip_obj = ipaddress.ip_address(data) if ( @@ -1241,64 +1232,56 @@ def parse_ti_feed( try: self.add_to_ip_ctr(data, ti_file_path) # we already have info about this ip? - old_ip_info = json.loads( - malicious_ips_dict[str(data)] - ) + old_ip_info = json.loads(malicious_ips_dict[str(data)]) # if the IP appeared twice in the same blacklist, don't add the blacklist name twice # or calculate the max threat_level - if data_file_name in old_ip_info['source']: + if data_file_name in old_ip_info["source"]: continue # append the new blacklist name to the current one - source = ( - f'{old_ip_info["source"]}, {data_file_name}' - ) + source = f'{old_ip_info["source"]}, {data_file_name}' # append the new tag to the old tag tags = f'{old_ip_info["tags"]}, {self.url_feeds[link_to_download]["tags"]}' # the new threat_level is the max of the 2 threat_level = str( max( - int(old_ip_info['threat_level']), + int(old_ip_info["threat_level"]), int( - self.url_feeds[link_to_download][ - 'threat_level' - ] + self.url_feeds[link_to_download]["threat_level"] ), ) ) malicious_ips_dict[str(data)] = json.dumps( { - 'description': old_ip_info['description'], - 'source': source, - 'threat_level': threat_level, - 'tags': tags, + "description": old_ip_info["description"], + "source": source, + "threat_level": threat_level, + "tags": tags, } ) # print(f'Dulicate ip {data} found in sources: {source} old threat_level: {ip_info["threat_level"]} except KeyError: threat_level = self.url_feeds[link_to_download][ - 'threat_level' + "threat_level" ] # We don't have info about this IP, Store the ip in our local dict malicious_ips_dict[str(data)] = json.dumps( { - 'description': description, - 'source': data_file_name, - 'threat_level': threat_level, - 'tags': self.url_feeds[link_to_download][ - 'tags' - ], + "description": description, + "source": data_file_name, + "threat_level": threat_level, + "tags": self.url_feeds[link_to_download]["tags"], } ) # set the score and confidence of this ip in ipsinfo # and the profile of this ip to the same as the ones given in slips.conf # todo for now the confidence is 1 self.db.update_threat_level( - f'profile_{data}', threat_level, 1 + f"profile_{data}", threat_level, 1 ) - elif data_type == 'ip_range': + elif data_type == "ip_range": # make sure we're not blacklisting a private or multicast ip range # get network address from range - net_addr = data[: data.index('/')] + net_addr = data[: data.index("/")] ip_obj = ipaddress.ip_address(net_addr) if ( ip_obj.is_multicast @@ -1310,38 +1293,30 @@ def parse_ti_feed( try: # we already have info about this range? - old_range_info = json.loads( - malicious_ip_ranges[data] - ) + old_range_info = json.loads(malicious_ip_ranges[data]) # if the Range appeared twice in the same blacklist, don't add the blacklist name twice # or calculate the max threat_level - if data_file_name in old_range_info['source']: + if data_file_name in old_range_info["source"]: continue # append the new blacklist name to the current one - source = ( - f'{old_range_info["source"]}, {data_file_name}' - ) + source = f'{old_range_info["source"]}, {data_file_name}' # append the new tag to the old tag tags = f'{old_range_info["tags"]}, {self.url_feeds[link_to_download]["tags"]}' # the new threat_level is the max of the 2 threat_level = str( max( - int(old_range_info['threat_level']), + int(old_range_info["threat_level"]), int( - self.url_feeds[link_to_download][ - 'threat_level' - ] + self.url_feeds[link_to_download]["threat_level"] ), ) ) malicious_ip_ranges[str(data)] = json.dumps( { - 'description': old_range_info[ - 'description' - ], - 'source': source, - 'threat_level': threat_level, - 'tags': tags, + "description": old_range_info["description"], + "source": source, + "threat_level": threat_level, + "tags": tags, } ) # print(f'Duplicate up range {data} found in @@ -1351,14 +1326,12 @@ def parse_ti_feed( # We don't have info about this range, Store the ip in our local dict malicious_ip_ranges[data] = json.dumps( { - 'description': description, - 'source': data_file_name, - 'threat_level': self.url_feeds[ - link_to_download - ]['threat_level'], - 'tags': self.url_feeds[link_to_download][ - 'tags' + "description": description, + "source": data_file_name, + "threat_level": self.url_feeds[link_to_download][ + "threat_level" ], + "tags": self.url_feeds[link_to_download]["tags"], } ) @@ -1370,38 +1343,39 @@ def parse_ti_feed( except Exception: exception_line = sys.exc_info()[2].tb_lineno self.print( - f'Problem while updating {link_to_download} line ' - f'{exception_line}', 0, 1, + f"Problem while updating {link_to_download} line " f"{exception_line}", + 0, + 1, ) - self.print(traceback.format_exc(), 0, 1) + self.print(traceback.print_stack(), 0, 1) return False def check_if_update_org(self, file): - cached_hash = self.db.get_TI_file_info(file).get('hash','') + cached_hash = self.db.get_TI_file_info(file).get("hash", "") if utils.get_hash_from_file(file) != cached_hash: return True - def get_whitelisted_orgs(self) -> list: self.whitelist.read_whitelist() - whitelisted_orgs: dict = self.db.get_whitelist('organizations') + whitelisted_orgs: dict = self.db.get_whitelist("organizations") whitelisted_orgs: list = list(whitelisted_orgs.keys()) return whitelisted_orgs - def update_org_files(self): # update whitelisted orgs in whitelist.conf, we may not have info about all of them whitelisted_orgs: list = self.get_whitelisted_orgs() # remove the once we have info about - not_supported_orgs = [org for org in whitelisted_orgs if org not in utils.supported_orgs] + not_supported_orgs = [ + org for org in whitelisted_orgs if org not in utils.supported_orgs + ] for org in not_supported_orgs: self.whitelist.load_org_IPs(org) # update org we have local into about for org in utils.supported_orgs: org_ips = os.path.join(self.org_info_path, org) - org_asn = os.path.join(self.org_info_path, f'{org}_asn') - org_domains = os.path.join(self.org_info_path, f'{org}_domains') + org_asn = os.path.join(self.org_info_path, f"{org}_asn") + org_domains = os.path.join(self.org_info_path, f"{org}_domains") if self.check_if_update_org(org_ips): self.whitelist.load_org_IPs(org) @@ -1413,20 +1387,22 @@ def update_org_files(self): for file in (org_ips, org_domains, org_asn): info = { - 'hash': utils.get_hash_from_file(file), + "hash": utils.get_hash_from_file(file), } self.db.set_TI_file_info(file, info) def update_ports_info(self): - for file in os.listdir('slips_files/ports_info'): - file = os.path.join('slips_files/ports_info', file) - if self.check_if_update_local_file( + for file in os.listdir("slips_files/ports_info"): + file = os.path.join("slips_files/ports_info", file) + if self.check_if_update_local_file(file) and not self.update_local_file( file - ) and not self.update_local_file(file): + ): # update failed self.print( - f'An error occurred while updating {file}. Updating ' - f'was aborted.', 0, 1, + f"An error occurred while updating {file}. Updating " + f"was aborted.", + 0, + 1, ) def print_duplicate_ip_summary(self): @@ -1439,53 +1415,52 @@ def print_duplicate_ip_summary(self): ips_in_2_bl = 0 ips_in_3_bl = 0 for ip, ip_info in self.ips_ctr.items(): - blacklists_ip_appeard_in = ip_info['times_found'] + blacklists_ip_appeard_in = ip_info["times_found"] if blacklists_ip_appeard_in == 1: ips_in_1_bl += 1 elif blacklists_ip_appeard_in == 2: ips_in_2_bl += 1 elif blacklists_ip_appeard_in == 3: ips_in_3_bl += 1 - self.print(f'Number of repeated IPs in 1 blacklist: {ips_in_1_bl}', 2, 0) - self.print(f'Number of repeated IPs in 2 blacklists: {ips_in_2_bl}', 2, 0) - self.print(f'Number of repeated IPs in 3 blacklists: {ips_in_3_bl}', 2, 0) + self.print(f"Number of repeated IPs in 1 blacklist: {ips_in_1_bl}", 2, 0) + self.print(f"Number of repeated IPs in 2 blacklists: {ips_in_2_bl}", 2, 0) + self.print(f"Number of repeated IPs in 3 blacklists: {ips_in_3_bl}", 2, 0) def update_mac_db(self): """ Updates the mac db using the response stored in self.response """ - response = self.responses['mac_db'] + response = self.responses["mac_db"] if response.status_code != 200: return False - self.log('Updating the MAC database.') - path_to_mac_db = 'databases/macaddress-db.json' + self.log("Updating the MAC database.") + path_to_mac_db = "databases/macaddress-db.json" # write to file the info as 1 json per line - mac_info = response.text.replace(']','').replace('[','').replace(',{','\n{') - with open(path_to_mac_db, 'w') as mac_db: + mac_info = response.text.replace("]", "").replace("[", "").replace(",{", "\n{") + with open(path_to_mac_db, "w") as mac_db: mac_db.write(mac_info) - self.db.set_TI_file_info( - self.mac_db_link, - {'time': time.time()} - ) + self.db.set_TI_file_info(self.mac_db_link, {"time": time.time()}) return True def update_online_whitelist(self): """ Updates online tranco whitelist defined in slips.conf online_whitelist key """ - response = self.responses['tranco_whitelist'] + response = self.responses["tranco_whitelist"] # write to the file so we don't store the 10k domains in memory - online_whitelist_download_path = os.path.join(self.path_to_remote_ti_files, 'tranco-top-10000-whitelist') - with open(online_whitelist_download_path, 'w') as f: + online_whitelist_download_path = os.path.join( + self.path_to_remote_ti_files, "tranco-top-10000-whitelist" + ) + with open(online_whitelist_download_path, "w") as f: f.write(response.text) # parse the downloaded file and store it in the db - with open(online_whitelist_download_path, 'r') as f: + with open(online_whitelist_download_path, "r") as f: while line := f.readline(): - domain = line.split(',')[1] + domain = line.split(",")[1] self.db.store_tranco_whitelisted_domain(domain) os.remove(online_whitelist_download_path) @@ -1498,13 +1473,15 @@ async def update(self) -> bool: if self.update_period <= 0: # User does not want to update the malicious IP list. self.print( - 'Not Updating the remote file of malicious IPs and domains. ' - 'update period is <= 0.', 0, 1, + "Not Updating the remote file of malicious IPs and domains. " + "update period is <= 0.", + 0, + 1, ) return False try: - self.log('Checking if we need to download TI files.') + self.log("Checking if we need to download TI files.") if self.check_if_update(self.mac_db_link, self.mac_db_update_period): self.update_mac_db() @@ -1532,13 +1509,11 @@ async def update(self) -> bool: # every function call to update_TI_file is now running concurrently instead of serially # so when a server's taking a while to give us the TI feed, we proceed # to download the next file instead of being idle - task = asyncio.create_task( - self.update_TI_file(file_to_download) - ) + task = asyncio.create_task(self.update_TI_file(file_to_download)) ####################################################### # in case of riskiq files, we don't have a link for them in ti_files, We update these files using their API # check if we have a username and api key and a week has passed since we last updated - if self.check_if_update('riskiq_domains', self.riskiq_update_period): + if self.check_if_update("riskiq_domains", self.riskiq_update_period): self.update_riskiq_feed() # wait for all TI files to update @@ -1554,7 +1529,6 @@ async def update(self) -> bool: except KeyboardInterrupt: return False - async def update_ti_files(self): """ Update TI files and store them in database before slips starts @@ -1562,7 +1536,7 @@ async def update_ti_files(self): # create_task is used to run update() function concurrently instead of serially self.update_finished = asyncio.create_task(self.update()) await self.update_finished - self.print(f'{self.db.get_loaded_ti_files()} TI files successfully loaded.') + self.print(f"{self.db.get_loaded_ti_files()} TI files successfully loaded.") def shutdown_gracefully(self): # terminating the timer for the process to be killed @@ -1594,4 +1568,3 @@ def main(self): nothing should run in a loop in this module """ pass - diff --git a/modules/virustotal/virustotal.py b/modules/virustotal/virustotal.py index 3ba03723d..466989e58 100644 --- a/modules/virustotal/virustotal.py +++ b/modules/virustotal/virustotal.py @@ -1,5 +1,3 @@ -# Must imports -from slips_files.common.imports import * import sys import traceback import json @@ -9,22 +7,22 @@ import ipaddress import threading import validators + +from slips_files.common.imports import * from slips_files.common.slips_utils import utils -class VT(IModule, multiprocessing.Process): +class VT(IModule): name = 'Virustotal' description = 'IP, domain and file hash lookup on Virustotal' authors = [ - 'Dita Hollmannova, Kamila Babayeva', + 'Dita Hollmannova', + 'Kamila Babayeva', 'Alya Gomaa', 'Sebastian Garcia', ] def init(self): - # This line might not be needed when running SLIPS, - # but when VT module is run standalone, it still uses the - # database and this line is necessary. Do not delete it, instead move it to line 21. self.c1 = self.db.subscribe('new_flow') self.c2 = self.db.subscribe('new_dns') self.c3 = self.db.subscribe('new_url') @@ -41,7 +39,8 @@ def init(self): # Queue of API calls self.api_call_queue = [] # Pool manager to make HTTP requests with urllib3 - # The certificate provides a bundle of trusted CAs, the certificates are located in certifi.where() + # The certificate provides a bundle of trusted CAs, + # the certificates are located in certifi.where() self.http = urllib3.PoolManager( cert_reqs='CERT_REQUIRED', ca_certs=certifi.where() ) @@ -49,7 +48,8 @@ def init(self): self.api_calls_thread = threading.Thread( target=self.API_calls_thread, daemon=True ) - # this will be true when there's a problem with the API key, then the module will exit + # this will be true when there's a problem with the + # API key, then the module will exit self.incorrect_API_key = False def read_api_key(self): @@ -75,13 +75,18 @@ def count_positives( self, response: dict, response_key: str, positive_key, total_key ): """ - Count positive checks and total checks in the response, for the given category. To compute ratio of downloaded - samples, sum results for both detected and undetected dicts: "undetected_downloaded_samples" and + Count positive checks and total checks in the response, + for the given category. To compute ratio of downloaded + samples, sum results for both detected and undetected + dicts: "undetected_downloaded_samples" and "detected_downloaded_samples". :param response: json dictionary with response data - :param response_key: category to count, eg "undetected_downloaded_samples" - :param positive_key: key to use inside of the category for successful detections (usually its "positives") - :param total_key: key to use inside of the category to sum all checks (usually its "total") + :param response_key: category to count, eg + "undetected_downloaded_samples" + :param positive_key: key to use inside of the + category for successful detections (usually its "positives") + :param total_key: key to use inside of the category to sum all checks + (usually its "total") :return: number of positive tests, number of total tests run """ detections = 0 @@ -124,7 +129,8 @@ def set_vt_data_in_IPInfo(self, ip, cached_data): def get_url_vt_data(self, url): """ - Function to perform API call to VirusTotal and return the score for the URL. + Function to perform API call to VirusTotal and return the + score for the URL. Response is cached in a dictionary. :param url: url to check :return: URL ratio @@ -176,7 +182,8 @@ def set_domain_data_in_DomainInfo(self, domain, cached_data): } data = {'VirusTotal': vtdata} - # Add asn (autonomous system number) if it is unknown or not in the Domain info + # Add asn (autonomous system number) if it is unknown + # or not in the Domain info if cached_data and 'asn' not in cached_data: data['asn'] = { 'number': f'AS{as_owner}' @@ -191,7 +198,8 @@ def API_calls_thread(self): """ while True: - # do not attempt to make more api calls if we already know that the api key is incorrect + # do not attempt to make more api calls if we already + # know that the api key is incorrect if self.incorrect_API_key: return False # wait until the queue is populated @@ -205,10 +213,13 @@ def API_calls_thread(self): ioc_type = self.get_ioc_type(ioc) if ioc_type == 'ip': cached_data = self.db.get_ip_info(ioc) - # return an IPv4Address or IPv6Address object depending on the IP address passed as argument. + # return an IPv4Address or IPv6Address object + # depending on the IP address passed as argument. ip_addr = ipaddress.ip_address(ioc) - # if VT data of this IP (not multicast) is not in the IPInfo, ask VT. - # if the IP is not a multicast and 'VirusTotal' key is not in the IPInfo, proceed. + # if VT data of this IP (not multicast) is not + # in the IPInfo, ask VT. + # if the IP is not a multicast and 'VirusTotal' + # key is not in the IPInfo, proceed. if ( not cached_data or 'VirusTotal' not in cached_data ) and not ip_addr.is_multicast: @@ -221,7 +232,8 @@ def API_calls_thread(self): elif ioc_type == 'url': cached_data = self.db.getURLData(ioc) - # If VT data of this domain is not in the DomainInfo, ask VT + # If VT data of this domain is not in the + # DomainInfo, ask VT # If 'Virustotal' key is not in the DomainInfo if not cached_data or 'VirusTotal' not in cached_data: # cached data is either False or {} @@ -246,10 +258,13 @@ def get_passive_dns(self, response): def get_ip_vt_data(self, ip: str): """ - Function to perform API call to VirusTotal and return scores for each of - the four processed categories. Response is cached in a dictionary. Private IPs always return (0, 0, 0, 0). + Function to perform API call to VirusTotal and return + scores for each of + the four processed categories. Response is cached in + a dictionary. Private IPs always return (0, 0, 0, 0). :param ip: IP address to check - :return: 4-tuple of floats: URL ratio, downloaded file ratio, referrer file ratio, communicating file ratio + :return: 4-tuple of floats: URL ratio, downloaded + file ratio, referrer file ratio, communicating file ratio """ try: @@ -271,17 +286,19 @@ def get_ip_vt_data(self, ip: str): self.print( f'Problem in the get_ip_vt_data() line {exception_line}', 0, 1 ) - self.print(traceback.print_exc(),0,1) + self.print(traceback.print_stack(),0,1) def get_domain_vt_data(self, domain: str): """ Function perform API call to VirusTotal and return scores for each of the four processed categories. Response is cached in a dictionary. :param domain: Domain address to check - :return: 4-tuple of floats: URL ratio, downloaded file ratio, referrer file ratio, communicating file ratio + :return: 4-tuple of floats: URL ratio, downloaded file ratio, + referrer file ratio, communicating file ratio """ if 'arpa' in domain or '.local' in domain: - # 'local' is a special-use domain name reserved by the Internet Engineering Task Force (IETF) + # 'local' is a special-use domain name reserved by + # the Internet Engineering Task Force (IETF) return (0, 0, 0, 0), '' try: # for unknown address, do the query @@ -293,11 +310,10 @@ def get_domain_vt_data(self, domain: str): except Exception: exception_line = sys.exc_info()[2].tb_lineno self.print( - f'Problem in the get_domain_vt_data() line {exception_line}', - 0, - 1, + f'Problem in the get_domain_vt_data() ' + f'line {exception_line}',0,1, ) - self.print(traceback.print_exc(),0,1) + self.print(traceback.print_stack(),0,1) return False def get_ioc_type(self, ioc): @@ -309,7 +325,8 @@ def api_query_(self, ioc, save_data=False): """ Create request and perform API call :param ioc: IP address, domain, or URL to check - :param save_data: False by default. Set to True to save each request json in a file named ip.txt + :param save_data: False by default. Set to True to save each + request json in a file named ip.txt :return: Response object """ if self.incorrect_API_key: @@ -340,33 +357,42 @@ def api_query_(self, ioc, save_data=False): time.sleep(10) if response.status != 200: - # 204 means Request rate limit exceeded. You are making more requests - # than allowed. You have exceeded one of your quotas (minute, daily or monthly). + # 204 means Request rate limit exceeded. + # You are making more requests + # than allowed. You have exceeded one of your quotas + # (minute, daily or monthly). if response.status == 204: # Add to the queue of api calls in case of api limit reached. self.api_call_queue.append(ioc) - # 403 means you don't have enough privileges to make the request or wrong API key + # 403 means you don't have enough privileges to make + # the request or wrong API key elif response.status == 403: - # don't add to the api call queue because the user will have to restart slips anyway + # don't add to the api call queue because the user + # will have to restart slips anyway # to add a correct API key and the queue wil be erased self.print('Please check that your API key is correct.', 0, 1) self.incorrect_API_key = True else: - # if the query was unsuccessful but it is not caused by API limit, abort (this is some unknown error) - # X-Api-Message is a comprehensive error description, but it is not always present + # if the query was unsuccessful but it is not caused + # by API limit, abort (this is some unknown error) + # X-Api-Message is a comprehensive error description, + # but it is not always present if 'X-Api-Message' in response.headers: message = response.headers['X-Api-Message'] - # Reason is a much shorter description ("Forbidden"), but it is always there + # Reason is a much shorter description ("Forbidden"), + # but it is always there else: message = response.reason self.print( - f'VT API returned unexpected code: {response.status} - {message}', 0, 2 + f'VT API returned unexpected code:' + f' {response.status} - {message}', 0, 2 ) # report that API limit is reached, wait one minute and try again self.print( - f'Status code is {response.status} at {time.asctime()}, query id: {self.counter}', + f'Status code is {response.status} at ' + f'{time.asctime()}, query id: {self.counter}', 0,2 ) # return empty dict because api call isn't successful @@ -433,19 +459,24 @@ def interpret_response(self, response: dict): ref_file_ratio, com_file_ratio """ - # compute how many tests were run on the undetected samples. This will return tuple (0, total) - # the numbers 2 and 3 are keys to the dictionary, which is in this only case (probably by mistake) a list + # compute how many tests were run on the undetected samples. + # This will return tuple (0, total) + # the numbers 2 and 3 are keys to the dictionary, which is + # in this only case (probably by mistake) a list undetected_url_score = self.count_positives( response, 'undetected_urls', 2, 3 ) - # compute how many tests were run on the detected samples. This will return tuple (detections, total) - # URLs that have been detected as malicious by one or more antivirus engines + # compute how many tests were run on the detected samples. + # This will return tuple (detections, total) + # URLs that have been detected as malicious by one or more + # antivirus engines detected_url_score = self.count_positives( response, 'detected_urls', 'positives', 'total' ) - # sum the previous results, to get the sum of detections and sum of total tests + # sum the previous results, to get the sum of detections + # and sum of total tests url_detections = undetected_url_score[0] + detected_url_score[0] if url_total := undetected_url_score[1] + detected_url_score[1]: url_ratio = url_detections / url_total @@ -468,13 +499,13 @@ def interpret_response(self, response: dict): down_file_ratio = down_file_detections / down_file_total else: down_file_ratio = 0 - # samples that were obtained from the same referrer as the file or URL being analyzed, - # but have not been detected as malicious + # samples that were obtained from the same referrer as the + # file or URL being analyzed, but have not been detected as malicious undetected_ref_score = self.count_positives( response, 'undetected_referrer_samples', 'positives', 'total' ) - # that were obtained from the same referrer as the file or URL being analyzed, - # that have been detected as malicious + # that were obtained from the same referrer as the file or + # URL being analyzed, that have been detected as malicious detected_ref_score = self.count_positives( response, 'detected_referrer_samples', 'positives', 'total' ) @@ -535,10 +566,13 @@ def main(self): if not cached_data: cached_data = {} - # return an IPv4Address or IPv6Address object depending on the IP address passed as argument. + # return an IPv4Address or IPv6Address object depending on the + # IP address passed as argument. ip_addr = ipaddress.ip_address(ip) - # if VT data of this IP (not multicast) is not in the IPInfo, ask VT. - # if the IP is not a multicast and 'VirusTotal' key is not in the IPInfo, proceed. + # if VT data of this IP (not multicast) is not in the IPInfo, + # ask VT. if the IP is not a multicast and 'VirusTotal' key is + # not in + # the IPInfo, proceed. if ( 'VirusTotal' not in cached_data and not ip_addr.is_multicast @@ -548,7 +582,8 @@ def main(self): # if VT data of this IP is in the IPInfo, check the timestamp. elif 'VirusTotal' in cached_data: - # If VT is in data, check timestamp. Take time difference, if not valid, update vt scores. + # If VT is in data, check timestamp. Take time difference, + # if not valid, update vt scores. if ( time.time() - cached_data['VirusTotal']['timestamp'] @@ -576,7 +611,8 @@ def main(self): elif ( domain and cached_data and 'VirusTotal' in cached_data ): - # If VT is in data, check timestamp. Take time difference, if not valid, update vt scores. + # If VT is in data, check timestamp. Take time difference, + # if not valid, update vt scores. if ( time.time() - cached_data['VirusTotal']['timestamp'] @@ -599,7 +635,8 @@ def main(self): # cached data is either False or {} self.set_url_data_in_URLInfo(url, cached_data) elif cached_data and 'VirusTotal' in cached_data: - # If VT is in data, check timestamp. Take time difference, if not valid, update vt scores. + # If VT is in data, check timestamp. Take time difference, + # if not valid, update vt scores. if ( time.time() - cached_data['VirusTotal']['timestamp'] diff --git a/slips.py b/slips.py index 4bcb6bb7d..955eabefb 100755 --- a/slips.py +++ b/slips.py @@ -18,8 +18,9 @@ # Contact: eldraco@gmail.com, sebastian.garcia@agents.fel.cvut.cz, stratosphere@aic.fel.cvut.cz from __future__ import print_function -import sys + import os +import sys import time import warnings @@ -41,8 +42,6 @@ from slips.daemon import Daemon slips = Main() - # checker = Checker(slips) - # checker.check_python_version() if slips.args.stopdaemon: # -S is provided diff --git a/slips/daemon.py b/slips/daemon.py index f6030ded5..05b450508 100644 --- a/slips/daemon.py +++ b/slips/daemon.py @@ -1,36 +1,37 @@ -from slips_files.common.imports import * -from slips_files.core.database.database_manager import DBManager -import sys import os +import sys from signal import SIGTERM +from slips_files.common.imports import * +from slips_files.core.database.database_manager import DBManager + + class Daemon: - description = 'This module runs when slips is in daemonized mode' + description = "This module runs when slips is in daemonized mode" def __init__(self, slips): # to use read_configurations defined in Main self.slips = slips # tell Main class that we're running in daemonized mode - self.slips.set_mode('daemonized', daemon=self) + self.slips.set_mode("daemonized", daemon=self) # this is a conf file used to store the pid of the daemon and is deleted when the daemon stops - self.pidfile_dir = '/var/lock' - self.pidfile = os.path.join(self.pidfile_dir, 'slips_daemon.lock') + self.pidfile_dir = "/var/lock" + self.pidfile = os.path.join(self.pidfile_dir, "slips_daemon.lock") self.read_configuration() if not self.slips.args.stopdaemon: self.prepare_output_dir() # Get the pid from pidfile try: - with open(self.pidfile, 'r') as pidfile: + with open(self.pidfile, "r") as pidfile: self.pid = int(pidfile.read().strip()) except (IOError, FileNotFoundError): self.pid = None - def print(self, text): """Prints output to logsfile specified in slips.conf""" - with open(self.logsfile, 'a') as f: - f.write(f'{text}\n') + with open(self.logsfile, "a") as f: + f.write(f"{text}\n") def create_std_streams(self): """Create standard steam files and dirs and clear them""" @@ -38,20 +39,19 @@ def create_std_streams(self): std_streams = [self.stderr, self.stdout, self.logsfile] for file in std_streams: # we don't want to clear the stdout or the logsfile when we stop the daemon using -S - if '-S' in sys.argv and file != self.stderr: + if "-S" in sys.argv and file != self.stderr: continue # create the file if it doesn't exist or clear it if it exists try: - open(file, 'w').close() + open(file, "w").close() except (FileNotFoundError, NotADirectoryError): os.mkdir(os.path.dirname(file)) - open(file, 'w').close() + open(file, "w").close() def prepare_std_streams(self, output_dir): """ prepare the path of stderr, stdout, logsfile """ - self.stderr = os.path.join(output_dir, self.stderr) self.stdout = os.path.join(output_dir, self.stdout) self.logsfile = os.path.join(output_dir, self.logsfile) @@ -62,23 +62,22 @@ def read_configuration(self): self.stdout = conf.stdout() self.stderr = conf.stderr() # we don't use it anyway - self.stdin = '/dev/null' - + self.stdin = "/dev/null" def prepare_output_dir(self): - if '-o' in sys.argv: + if "-o" in sys.argv: self.prepare_std_streams(self.slips.args.output) else: # if we have acess to '/var/log/slips/' store the logfiles there, if not , store it in the output/ dir try: - output_dir = '/var/log/slips/' + output_dir = "/var/log/slips/" try: os.mkdir(output_dir) except FileExistsError: pass # see if we have write permission to that dir or not - tmpfile = os.path.join(output_dir, 'tmp') - open(tmpfile, 'w').close() + tmpfile = os.path.join(output_dir, "tmp") + open(tmpfile, "w").close() os.remove(tmpfile) # we have permission, append the path to each logfile @@ -91,16 +90,16 @@ def prepare_output_dir(self): self.create_std_streams() # when stopping the daemon don't log this info again - if '-S' not in sys.argv: + if "-S" not in sys.argv: self.print( - f'Logsfile: {self.logsfile}\n' - f'pidfile: {self.pidfile}\n' - f'stdin : {self.stdin}\n' - f'stdout: {self.stdout}\n' - f'stderr: {self.stderr}\n' + f"Logsfile: {self.logsfile}\n" + f"pidfile: {self.pidfile}\n" + f"stdin : {self.stdin}\n" + f"stdout: {self.stdout}\n" + f"stderr: {self.stderr}\n" ) - self.print('Done reading configuration and setting up files.\n') + self.print("Done reading configuration and setting up files.\n") def delete_pidfile(self): """Deletes the pidfile to mark the daemon as closed""" @@ -110,11 +109,11 @@ def delete_pidfile(self): # because we don't know the output dir if os.path.exists(self.pidfile): os.remove(self.pidfile) - self.print('pidfile deleted.') + self.print("pidfile deleted.") else: self.print(f"Can't delete pidfile, {self.pidfile} doesn't exist.") # if an error happened it will be written in logsfile - self.print('Either Daemon stopped normally or an error occurred.') + self.print("Either Daemon stopped normally or an error occurred.") def daemonize(self): """ @@ -129,8 +128,8 @@ def daemonize(self): # exit first parent sys.exit(0) except OSError as e: - sys.stderr.write(f'Fork #1 failed: {e.errno} {e.strerror}\n') - self.print(f'Fork #1 failed: {e.errno} {e.strerror}\n') + sys.stderr.write(f"Fork #1 failed: {e.errno} {e.strerror}\n") + self.print(f"Fork #1 failed: {e.errno} {e.strerror}\n") sys.exit(1) # os.chdir("/") @@ -147,8 +146,8 @@ def daemonize(self): # exit from second parent (aka first child) sys.exit(0) except OSError as e: - sys.stderr.write(f'Fork #2 failed: {e.errno} {e.strerror}\n') - self.print(f'Fork #2 failed: {e.errno} {e.strerror}\n') + sys.stderr.write(f"Fork #2 failed: {e.errno} {e.strerror}\n") + self.print(f"Fork #2 failed: {e.errno} {e.strerror}\n") sys.exit(1) # Now this code is run from the daemon @@ -158,9 +157,9 @@ def daemonize(self): sys.stderr.flush() # redirect standard file descriptors - with open(self.stdin, 'r') as stdin, open( - self.stdout, 'a+' - ) as stdout, open(self.stderr, 'a+') as stderr: + with open(self.stdin, "r") as stdin, open(self.stdout, "a+") as stdout, open( + self.stderr, "a+" + ) as stderr: os.dup2(stdin.fileno(), sys.stdin.fileno()) os.dup2(stdout.fileno(), sys.stdout.fileno()) os.dup2(stderr.fileno(), sys.stderr.fileno()) @@ -172,7 +171,7 @@ def daemonize(self): # remember that we are writing this pid to a file # because the parent and the first fork, already exited, so this pid is the daemon pid which is slips.py pid self.pid = str(os.getpid()) - with open(self.pidfile, 'w+') as pidfile: + with open(self.pidfile, "w+") as pidfile: pidfile.write(self.pid) # Register a function to be executed if sys.exit() is called or the main module’s execution completes @@ -180,13 +179,13 @@ def daemonize(self): def start(self): """Main function, Starts the daemon and starts slips normally.""" - self.print('Daemon starting...') + self.print("Daemon starting...") # Start the daemon self.daemonize() # any code run after daemonizing will be run inside the daemon and have the same PID as slips.py - self.print(f'Slips Daemon is running. [PID {self.pid}]\n') + self.print(f"Slips Daemon is running. [PID {self.pid}]\n") # start slips normally self.slips.start() @@ -196,30 +195,28 @@ def get_last_opened_daemon_info(self): get information about the last opened slips daemon from running_slips_info.txt """ try: - with open(self.slips.redis_man.running_logfile, 'r') as f: + with open(self.slips.redis_man.running_logfile, "r") as f: # read the lines in reverse order to get the last opened daemon for line in f.read().splitlines()[::-1]: # skip comments - if ( - line.startswith('#') - or line.startswith('Date') - or len(line) < 3 - ): + if line.startswith("#") or line.startswith("Date") or len(line) < 3: continue - line = line.split(',') + line = line.split(",") is_daemon = bool(line[7]) if not is_daemon: continue port, output_dir, slips_pid = line[2], line[5], line[6] - return (port, output_dir, slips_pid) + return port, output_dir, slips_pid except FileNotFoundError: # file removed after daemon started - self.print(f"Warning: {self.slips.redis_man.running_logfile} is not found. Can't get daemon info." - f" Slips won't be completely killed.") + self.print( + f"Warning: {self.slips.redis_man.running_logfile} is not found. Can't get daemon info." + f" Slips won't be completely killed." + ) return False def killdaemon(self): - """ Kill the damon process only (aka slips.py) """ + """Kill the damon process only (aka slips.py)""" # sending SIGINT to self.pid will only kill slips.py and the rest of it's children will be zombies # sending SIGKILL to self.pid will only kill slips.py and the rest of # it's children will stay open in memory (not even zombies) @@ -238,19 +235,23 @@ def stop(self): if not info: return port, output_dir, self.pid = info - self.stderr = 'errors.log' - self.stdout = 'slips.log' - self.logsfile = 'slips.log' + self.stderr = "errors.log" + self.stdout = "slips.log" + self.logsfile = "slips.log" self.prepare_std_streams(output_dir) - self.logger = self.slips.proc_man.start_output_process(self.stdout, self.stderr, self.logsfile) + self.logger = self.slips.proc_man.start_output_process( + self.stdout, self.stderr, self.logsfile + ) self.slips.add_observer(self.logger) - db = DBManager(self.logger, - output_dir, - port, - start_sqlite=False, - flush_db=False) - db.set_slips_mode('daemonized') - self.slips.set_mode('daemonized', daemon=self) + db = DBManager( + self.logger, + output_dir, + port, + start_sqlite=False, + flush_db=False + ) + db.set_slips_mode("daemonized") + self.slips.set_mode("daemonized", daemon=self) # used in shutdown gracefully to print the name of the stopped file in slips.log self.slips.input_information = db.get_input_file() self.slips.db = db diff --git a/slips/main.py b/slips/main.py index 6b187710e..570141a1a 100644 --- a/slips/main.py +++ b/slips/main.py @@ -10,6 +10,7 @@ import time from datetime import datetime from distutils.dir_util import copy_tree +from typing import Set from managers.metadata_manager import MetadataManager from managers.process_manager import ProcessManager @@ -18,8 +19,7 @@ from slips_files.common.abstracts.observer import IObservable from slips_files.common.parsers.config_parser import ConfigParser from slips_files.common.performance_profilers.cpu_profiler import CPUProfiler -from slips_files.common.performance_profilers.memory_profiler import \ - MemoryProfiler +from slips_files.common.performance_profilers.memory_profiler import MemoryProfiler from slips_files.common.slips_utils import utils from slips_files.common.style import green from slips_files.core.database.database_manager import DBManager @@ -29,9 +29,9 @@ class Main(IObservable): def __init__(self, testing=False): IObservable.__init__(self) - self.name = 'Main' - self.alerts_default_path = 'output/' - self.mode = 'interactive' + self.name = "Main" + self.alerts_default_path = "output/" + self.mode = "interactive" # objects to manage various functionality self.checker = Checker(self) self.redis_man = RedisManager(self) @@ -41,20 +41,21 @@ def __init__(self, testing=False): self.conf = ConfigParser() self.version = self.get_slips_version() # will be filled later - self.commit = 'None' - self.branch = 'None' + self.commit = "None" + self.branch = "None" self.last_updated_stats_time = datetime.now() self.input_type = False # in testing mode we manually set the following params if not testing: - self.args = self.conf.get_args() self.pid = os.getpid() self.checker.check_given_flags() if not self.args.stopdaemon: # Check the type of input - self.input_type, self.input_information, self.line_type = self.checker.check_input_type() + self.input_type, self.input_information, self.line_type = ( + self.checker.check_input_type() + ) # If we need zeek (bro), test if we can run it. self.check_zeek_or_bro() self.prepare_output_dir() @@ -62,23 +63,33 @@ def __init__(self, testing=False): self.prepare_zeek_output_dir() self.twid_width = self.conf.get_tw_width() - def cpu_profiler_init(self): - self.cpuProfilerEnabled = self.conf.get_cpu_profiler_enable() == 'yes' + self.cpuProfilerEnabled = self.conf.get_cpu_profiler_enable() == "yes" self.cpuProfilerMode = self.conf.get_cpu_profiler_mode() - self.cpuProfilerMultiprocess = \ - self.conf.get_cpu_profiler_multiprocess() == 'yes' + self.cpuProfilerMultiprocess = ( + self.conf.get_cpu_profiler_multiprocess() == "yes" + ) if self.cpuProfilerEnabled: try: - if (self.cpuProfilerMultiprocess and self.cpuProfilerMode == "dev"): + if self.cpuProfilerMultiprocess and self.cpuProfilerMode == "dev": args = sys.argv - if (args[-1] != "--no-recurse"): + if args[-1] != "--no-recurse": tracer_entries = str( - self.conf.get_cpu_profiler_dev_mode_entries()) - viz_args = ['viztracer', '--tracer_entries', tracer_entries, - '--max_stack_depth', '10', '-o', - str(os.path.join(self.args.output, - 'cpu_profiling_result.json'))] + self.conf.get_cpu_profiler_dev_mode_entries() + ) + viz_args = [ + "viztracer", + "--tracer_entries", + tracer_entries, + "--max_stack_depth", + "10", + "-o", + str( + os.path.join( + self.args.output, "cpu_profiling_result.json" + ) + ), + ] viz_args.extend(args) viz_args.append("--no-recurse") print("Starting multiprocess profiling recursive subprocess") @@ -90,15 +101,15 @@ def cpu_profiler_init(self): output=self.args.output, mode=self.conf.get_cpu_profiler_mode(), limit=self.conf.get_cpu_profiler_output_limit(), - interval=self.conf.get_cpu_profiler_sampling_interval() - ) + interval=self.conf.get_cpu_profiler_sampling_interval(), + ) self.cpuProfiler.start() except Exception as e: print(e) self.cpuProfilerEnabled = False def cpu_profiler_release(self): - if hasattr(self, 'cpuProfilerEnabled' ): + if hasattr(self, "cpuProfilerEnabled"): if self.cpuProfilerEnabled and not self.cpuProfilerMultiprocess: self.cpuProfiler.stop() self.cpuProfiler.print() @@ -106,22 +117,24 @@ def cpu_profiler_release(self): def memory_profiler_init(self): self.memoryProfilerEnabled = self.conf.get_memory_profiler_enable() == "yes" memoryProfilerMode = self.conf.get_memory_profiler_mode() - memoryProfilerMultiprocess = self.conf.get_memory_profiler_multiprocess() == "yes" + memoryProfilerMultiprocess = ( + self.conf.get_memory_profiler_multiprocess() == "yes" + ) if self.memoryProfilerEnabled: - output_dir = os.path.join(self.args.output, 'memoryprofile/') + output_dir = os.path.join(self.args.output, "memoryprofile/") if not os.path.exists(output_dir): os.makedirs(output_dir) - output_file = os.path.join(output_dir, 'memory_profile.bin') + output_file = os.path.join(output_dir, "memory_profile.bin") self.memoryProfiler = MemoryProfiler( - output_file, - db=self.db, - mode=memoryProfilerMode, - multiprocess=memoryProfilerMultiprocess - ) + output_file, + db=self.db, + mode=memoryProfilerMode, + multiprocess=memoryProfilerMultiprocess, + ) self.memoryProfiler.start() def memory_profiler_release(self): - if hasattr(self, 'memoryProfilerEnabled') and self.memoryProfilerEnabled: + if hasattr(self, "memoryProfilerEnabled") and self.memoryProfilerEnabled: self.memoryProfiler.stop() def memory_profiler_multiproc_test(self): @@ -136,26 +149,31 @@ def mem_function(): array = [] for i in range(1000000): array.append(i) + processes = [] num_processes = 3 for _ in range(num_processes): - process = multiprocessing.Process(target=target_function if _%2 else mem_function) + process = multiprocessing.Process( + target=target_function if _ % 2 else mem_function + ) process.start() processes.append(process) # Message passing - self.db.publish("memory_profile", processes[1].pid) # successful - # subprocess.Popen(["memray", "live", "1234"]) - time.sleep(5) # target_function will timeout and tracker will be cleared - self.db.publish("memory_profile", processes[0].pid) # end but maybe don't start - time.sleep(5) # mem_function will get tracker started - self.db.publish("memory_profile", processes[0].pid) # start successfully + self.db.publish("memory_profile", processes[1].pid) # successful + # target_function will timeout and tracker will be cleared + time.sleep(5) + # end but maybe don't start + self.db.publish("memory_profile", processes[0].pid) + time.sleep(5) # mem_function will get tracker started + # start successfully + self.db.publish("memory_profile", processes[0].pid) input() def get_slips_version(self): - version_file = 'VERSION' - with open(version_file, 'r') as f: + version_file = "VERSION" + with open(version_file, "r") as f: version = f.read() return version @@ -164,15 +182,15 @@ def check_zeek_or_bro(self): Check if we have zeek or bro """ self.zeek_bro = None - if self.input_type not in ('pcap', 'interface'): + if self.input_type not in ("pcap", "interface"): return False - if shutil.which('zeek'): - self.zeek_bro = 'zeek' - elif shutil.which('bro'): - self.zeek_bro = 'bro' + if shutil.which("zeek"): + self.zeek_bro = "zeek" + elif shutil.which("bro"): + self.zeek_bro = "bro" else: - print('Error. No zeek or bro binary found.') + print("Error. No zeek or bro binary found.") self.terminate_slips() return False @@ -180,18 +198,19 @@ def check_zeek_or_bro(self): def prepare_zeek_output_dir(self): from pathlib import Path + without_ext = Path(self.input_information).stem if self.conf.store_zeek_files_in_the_output_dir(): - self.zeek_dir = os.path.join(self.args.output, 'zeek_files') + self.zeek_dir = os.path.join(self.args.output, "zeek_files") else: - self.zeek_dir = f'zeek_files_{without_ext}/' + self.zeek_dir = f"zeek_files_{without_ext}/" def terminate_slips(self): """ Shutdown slips, is called when stopping slips before starting all modules. for example using -cb """ - if self.mode == 'daemonized': + if self.mode == "daemonized": self.daemon.stop() if self.conf.get_cpu_profiler_enable() != "yes": sys.exit(0) @@ -206,14 +225,14 @@ def save_the_db(self): backups_dir = self.args.output # The name of the interface/pcap/nfdump/binetflow used is in self.input_information # if the input is a zeek dir, remove the / at the end - if self.input_information.endswith('/'): + if self.input_information.endswith("/"): self.input_information = self.input_information[:-1] # We need to separate it from the path self.input_information = os.path.basename(self.input_information) # Remove the extension from the filename with contextlib.suppress(ValueError): self.input_information = self.input_information[ - : self.input_information.index('.') + : self.input_information.index(".") ] # Give the exact path to save(), this is where our saved .rdb backup will be rdb_filepath = os.path.join(backups_dir, self.input_information) @@ -221,24 +240,25 @@ def save_the_db(self): # info will be lost only if you're out of space and redis # can't write to dump.self.rdb, otherwise you're fine print( - '[Main] [Warning] stop-writes-on-bgsave-error is set to no, ' - 'information may be lost in the redis backup file.' + "[Main] [Warning] stop-writes-on-bgsave-error is set to no, " + "information may be lost in the redis backup file." ) def was_running_zeek(self) -> bool: - """returns true if zeek wa sused in this run """ - return self.db.get_input_type() in ('pcap', 'interface') or self.db.is_growing_zeek_dir() + """returns true if zeek wa sused in this run""" + return ( + self.db.get_input_type() in ("pcap", "interface") + or self.db.is_growing_zeek_dir() + ) def store_zeek_dir_copy(self): store_a_copy_of_zeek_files = self.conf.store_a_copy_of_zeek_files() was_running_zeek = self.was_running_zeek() if store_a_copy_of_zeek_files and was_running_zeek: # this is where the copy will be stored - dest_zeek_dir = os.path.join(self.args.output, 'zeek_files') + dest_zeek_dir = os.path.join(self.args.output, "zeek_files") copy_tree(self.zeek_dir, dest_zeek_dir) - print( - f'[Main] Stored a copy of zeek files to {dest_zeek_dir}' - ) + print(f"[Main] Stored a copy of zeek files to {dest_zeek_dir}") def delete_zeek_files(self): if self.conf.delete_zeek_files(): @@ -252,7 +272,7 @@ def prepare_output_dir(self): @return: None """ # default output/ - if '-o' in sys.argv: + if "-o" in sys.argv: # -o is given # delete all old files in the output dir if os.path.exists(self.args.output): @@ -260,7 +280,7 @@ def prepare_output_dir(self): # in integration tests, slips redirects its # output to slips_output.txt, # don't delete that file - if self.args.testing and 'slips_output.txt' in file: + if self.args.testing and "slips_output.txt" in file: continue file_path = os.path.join(self.args.output, file) @@ -281,16 +301,15 @@ def prepare_output_dir(self): # it should be output/wlp3s0 self.args.output = os.path.join( self.alerts_default_path, - os.path.basename(self.input_information) # get pcap name from path + os.path.basename(self.input_information), # get pcap name from path ) # add timestamp to avoid conflicts wlp3s0_2022-03-1_03:55 - ts = utils.convert_format(datetime.now(), '%Y-%m-%d_%H:%M:%S') - self.args.output += f'_{ts}/' + ts = utils.convert_format(datetime.now(), "%Y-%m-%d_%H:%M:%S") + self.args.output += f"_{ts}/" os.makedirs(self.args.output) - - def set_mode(self, mode, daemon=''): + def set_mode(self, mode, daemon=""): """ Slips has 2 modes, daemonized and interactive, this function sets up the mode so that slips knows in which mode it's operating @@ -304,8 +323,8 @@ def log(self, txt): """ Is used instead of print for daemon debugging """ - with open(self.daemon.stdout, 'a') as f: - f.write(f'{txt}\n') + with open(self.daemon.stdout, "a") as f: + f.write(f"{txt}\n") def print(self, text, verbose=1, debug=0, log_to_logfiles_only=False): """ @@ -326,12 +345,12 @@ def print(self, text, verbose=1, debug=0, log_to_logfiles_only=False): """ self.notify_observers( { - 'from': self.name, - 'txt': text, - 'verbose': verbose, - 'debug': debug, - 'log_to_logfiles_only': log_to_logfiles_only - } + "from": self.name, + "txt": text, + "verbose": verbose, + "debug": debug, + "log_to_logfiles_only": log_to_logfiles_only, + } ) def handle_flows_from_stdin(self, input_information): @@ -339,25 +358,18 @@ def handle_flows_from_stdin(self, input_information): Make sure the stdin line type is valid (argus, suricata, or zeek) """ if input_information.lower() not in ( - 'argus', - 'suricata', - 'zeek', + "argus", + "suricata", + "zeek", ): - print( - f'[Main] Invalid file path {input_information}. Stopping.' - ) + print(f"[Main] Invalid file path {input_information}. Stopping.") sys.exit(-1) - return False - if self.mode == 'daemonized': - print( - "Can't read input from stdin in daemonized mode. " - "Stopping" - ) + if self.mode == "daemonized": + print("Can't read input from stdin in daemonized mode. " "Stopping") sys.exit(-1) - return False line_type = input_information - input_type = 'stdin' + input_type = "stdin" return input_type, line_type.lower() def get_input_file_type(self, given_path): @@ -366,89 +378,75 @@ def get_input_file_type(self, given_path): returns binetflow, pcap, nfdump, zeek_folder, suricata, etc. """ # default value - input_type = 'file' + input_type = "file" # Get the type of file - cmd_result = subprocess.run( - ['file', given_path], stdout=subprocess.PIPE - ) + cmd_result = subprocess.run(["file", given_path], stdout=subprocess.PIPE) # Get command output - cmd_result = cmd_result.stdout.decode('utf-8') + cmd_result = cmd_result.stdout.decode("utf-8") if ( - ('pcap capture file' in cmd_result - or 'pcapng capture file' in cmd_result) - and os.path.isfile(given_path) - ): - input_type = 'pcap' + "pcap capture file" in cmd_result or "pcapng capture file" in cmd_result + ) and os.path.isfile(given_path): + input_type = "pcap" elif ( - ('dBase' in cmd_result - or 'nfcap' in given_path - or 'nfdump' in given_path - ) - and os.path.isfile(given_path) - ): - input_type = 'nfdump' - if shutil.which('nfdump') is None: + "dBase" in cmd_result or "nfcap" in given_path or "nfdump" in given_path + ) and os.path.isfile(given_path): + input_type = "nfdump" + if shutil.which("nfdump") is None: # If we do not have nfdump, terminate Slips. - print( - 'nfdump is not installed. terminating slips.' - ) + print("nfdump is not installed. terminating slips.") self.terminate_slips() - elif 'CSV' in cmd_result and os.path.isfile(given_path): - input_type = 'binetflow' - elif 'directory' in cmd_result and os.path.isdir(given_path): + elif "CSV" in cmd_result and os.path.isfile(given_path): + input_type = "binetflow" + elif "directory" in cmd_result and os.path.isdir(given_path): from slips_files.core.input import SUPPORTED_LOGFILES + for log_file in os.listdir(given_path): # if there is at least 1 supported log file inside the # given directory, start slips normally # otherwise, stop slips - if log_file.replace('.log', '') in SUPPORTED_LOGFILES: - input_type = 'zeek_folder' + if log_file.replace(".log", "") in SUPPORTED_LOGFILES: + input_type = "zeek_folder" break else: # zeek dir filled with unsupported logs # or .labeled logs that slips can't read. - print(f"Log files in {given_path} are not supported \n" - f"Make sure all log files inside the given " - f"directory end with .log .. Stopping.") + print( + f"Log files in {given_path} are not supported \n" + f"Make sure all log files inside the given " + f"directory end with .log .. Stopping." + ) sys.exit(-1) else: # is it a zeek log file or suricata, binetflow tabs, # or binetflow comma separated file? # use first line to determine - with open(given_path, 'r') as f: + with open(given_path, "r") as f: while True: # get the first line that isn't a comment - first_line = f.readline().replace('\n', '') - if not first_line.startswith('#'): + first_line = f.readline().replace("\n", "") + if not first_line.startswith("#"): break - if 'flow_id' in first_line and os.path.isfile(given_path): - input_type = 'suricata' + if "flow_id" in first_line and os.path.isfile(given_path): + input_type = "suricata" elif os.path.isfile(given_path): # this is a text file, it can be binetflow or zeek_log_file try: # is it a json log file json.loads(first_line) - input_type = 'zeek_log_file' + input_type = "zeek_log_file" except json.decoder.JSONDecodeError: # this is a tab separated file # is it zeek log file or binetflow file? # zeek tab files are separated by several spaces or tabs - sequential_spaces_found = re.search( - '\s{1,}-\s{1,}', first_line - ) - tabs_found = re.search( - '\t{1,}', first_line - ) + sequential_spaces_found = re.search("\s{1,}-\s{1,}", first_line) + tabs_found = re.search("\t{1,}", first_line) - if ( - '->' in first_line - or 'StartTime' in first_line - ): + if "->" in first_line or "StartTime" in first_line: # tab separated files are usually binetflow tab files - input_type = 'binetflow-tabs' + input_type = "binetflow-tabs" elif sequential_spaces_found or tabs_found: - input_type = 'zeek_log_file' + input_type = "zeek_log_file" return input_type @@ -472,15 +470,14 @@ def setup_print_levels(self): self.args.debug = max(self.args.debug, 0) def print_version(self): - slips_version = f'Slips. Version {green(self.version)}' + slips_version = f"Slips. Version {green(self.version)}" branch_info = utils.get_branch_info() if branch_info is not False: # it's false when we're in docker because there's no .git/ there self.commit, self.branch = branch_info - slips_version += f' ({self.commit[:8]})' + slips_version += f" ({self.commit[:8]})" print(slips_version) - def update_stats(self): """ updates the statistics shown next to the progress bar @@ -490,31 +487,34 @@ def update_stats(self): # zeek directories, we prin the stats using slips.py # for other files, we print a progress bar + # the stats using outputprocess - if not self.mode == 'interactive': + if not self.mode == "interactive": return # only update the stats every 5s now = datetime.now() - if utils.get_time_diff(self.last_updated_stats_time, now, 'seconds') < 5: + if utils.get_time_diff(self.last_updated_stats_time, now, "seconds") < 5: return self.last_updated_stats_time = now - now = utils.convert_format(now, '%Y/%m/%d %H:%M:%S') + now = utils.convert_format(now, "%Y/%m/%d %H:%M:%S") modified_ips_in_the_last_tw = self.db.get_modified_ips_in_the_last_tw() profilesLen = self.db.get_profiles_len() evidence_number = self.db.get_evidence_number() or 0 - msg = f'Total analyzed IPs so far: ' \ - f'{green(profilesLen)}. ' \ - f'Evidence Added: {green(evidence_number)}. ' \ - f'IPs sending traffic in the last ' \ - f'{self.twid_width}: {green(modified_ips_in_the_last_tw)}. ' \ - f'({now})' + msg = ( + f"Total analyzed IPs so far: " + f"{green(profilesLen)}. " + f"Evidence Added: {green(evidence_number)}. " + f"IPs sending traffic in the last " + f"{self.twid_width}: {green(modified_ips_in_the_last_tw)}. " + f"({now})" + ) self.print(msg) - def update_host_ip(self, hostIP, modified_profiles) -> str: + def update_host_ip(self, hostIP: str, modified_profiles: Set[str]) -> str: """ - when running on an interface we keep track of the host IP. If there was no - # modified TWs in the host IP, we check if the network was changed. + when running on an interface we keep track of the host IP. + If there was no modified TWs in the host IP, we check if the + network was changed. """ if self.is_interface and hostIP not in modified_profiles: if hostIP := self.metadata_man.get_host_ip(): @@ -531,20 +531,30 @@ def is_total_flows_unknown(self) -> bool: return ( self.args.input_module or self.args.growing - or self.input_type in ('stdin', 'pcap', 'interface') + or self.input_type in ("stdin", "pcap", "interface") ) - def start(self): """Main Slips Function""" try: self.print_version() - print('https://stratosphereips.org') - print('-' * 27) + print("https://stratosphereips.org") + print("-" * 27) self.setup_print_levels() + # if stdout is redirected to a file, + # tell output.py to redirect it's output as well + current_stdout, stderr, slips_logfile = ( + self.checker.check_output_redirection() + ) + self.stdout = current_stdout + self.logger = self.proc_man.start_output_process( + current_stdout, stderr, slips_logfile + ) + self.add_observer(self.logger) + # get the port that is going to be used for this instance of slips if self.args.port: self.redis_port = int(self.args.port) @@ -555,73 +565,68 @@ def start(self): if not self.redis_port: # all ports are unavailable inp = input("Press Enter to close all ports.\n") - if inp == '': + if inp == "": self.redis_man.close_all_ports() self.terminate_slips() else: # even if this port is in use, it will be overwritten by slips self.redis_port = 6379 - # if stdout is redirected to a file, - # tell output.py to redirect it's output as well - current_stdout, stderr, slips_logfile = self.checker.check_output_redirection() - self.logger = self.proc_man.start_output_process( - current_stdout, - stderr, - slips_logfile) - self.add_observer(self.logger) - self.db = DBManager(self.logger, self.args.output, self.redis_port) - self.db.set_input_metadata({ - 'output_dir': self.args.output, - 'commit': self.commit, - 'branch': self.branch, - }) + self.db.set_input_metadata( + { + "output_dir": self.args.output, + "commit": self.commit, + "branch": self.branch, + } + ) self.cpu_profiler_init() self.memory_profiler_init() - # uncomment line to see that memory profiler works correctly - # Should print out red text if working properly - # self.memory_profiler_multiproc_test() - - if self.args.growing: - if self.input_type != 'zeek_folder': - self.print(f"Parameter -g should be using with " - f"-f not a {self.input_type}. " - f"Ignoring -g") + if self.input_type != "zeek_folder": + self.print( + f"Parameter -g should be using with " + f"-f not a {self.input_type}. " + f"Ignoring -g" + ) else: - self.print(f"Running on a growing zeek dir:" - f" {self.input_information}") + self.print( + f"Running on a growing zeek dir:" f" {self.input_information}" + ) self.db.set_growing_zeek_dir() # log the PID of the started redis-server # should be here after we're sure that the server was started redis_pid = self.redis_man.get_pid_of_redis_server(self.redis_port) - self.redis_man.log_redis_server_PID(self.redis_port, redis_pid) + self.redis_man.log_redis_server_pid(self.redis_port, redis_pid) self.db.set_slips_mode(self.mode) - if self.mode == 'daemonized': + if self.mode == "daemonized": std_files = { - 'stderr': self.daemon.stderr, - 'stdout': self.daemon.stdout, - 'stdin': self.daemon.stdin, - 'pidfile': self.daemon.pidfile, - 'logsfile': self.daemon.logsfile + "stderr": self.daemon.stderr, + "stdout": self.daemon.stdout, + "stdin": self.daemon.stdin, + "pidfile": self.daemon.pidfile, + "logsfile": self.daemon.logsfile, } else: std_files = { - 'stderr': stderr, - 'stdout': slips_logfile, + "stderr": stderr, + "stdout": slips_logfile, } self.db.store_std_file(**std_files) - self.print(f'Using redis server on port: {green(self.redis_port)}', 1, 0) - self.print(f'Started {green("Main")} process [PID {green(self.pid)}]', 1, 0) - self.print('Starting modules', 1, 0) + self.print( + f"Using redis server on " f"port: {green(self.redis_port)}", 1, 0 + ) + self.print( + f'Started {green("Main")} process ' f"[PID {green(self.pid)}]", 1, 0 + ) + self.print("Starting modules", 1, 0) # if slips is given a .rdb file, don't load the # modules as we don't need them @@ -631,8 +636,7 @@ def start(self): # slips will wait untill all TI files are updated before # starting the rest of the modules self.proc_man.start_update_manager( - local_files=True, - TI_feeds=self.conf.wait_for_TI_to_finish() + local_files=True, TI_feeds=self.conf.wait_for_TI_to_finish() ) self.proc_man.load_modules() @@ -642,6 +646,7 @@ def start(self): # call shutdown_gracefully on sigterm def sig_handler(sig, frame): self.proc_man.shutdown_gracefully() + # The signals SIGKILL and SIGSTOP cannot be caught, # blocked, or ignored. signal.signal(signal.SIGTERM, sig_handler) @@ -649,7 +654,7 @@ def sig_handler(sig, frame): self.proc_man.start_evidence_process() self.proc_man.start_profiler_process() - self.c1 = self.db.subscribe('control_channel') + self.c1 = self.db.subscribe("control_channel") self.metadata_man.enable_metadata() @@ -658,70 +663,73 @@ def sig_handler(sig, frame): # obtain the list of active processes self.proc_man.processes = multiprocessing.active_children() - self.db.store_process_PID( - 'slips.py', - int(self.pid) - ) + self.db.store_process_PID("slips.py", int(self.pid)) self.metadata_man.set_input_metadata() if self.conf.use_p2p() and not self.args.interface: - self.print('Warning: P2P is only supported using ' - 'an interface. Disabled P2P.') + self.print( + "Warning: P2P is only supported using " + "an interface. Disabled P2P." + ) # warn about unused open redis servers open_servers = len(self.redis_man.get_open_redis_servers()) if open_servers > 1: self.print( - f'Warning: You have {open_servers} ' - f'redis servers running. ' - f'Run Slips with --killall to stop them.' + f"Warning: You have {open_servers} " + f"redis servers running. " + f"Run Slips with --killall to stop them." ) - self.print("Warning: Slips may generate a large amount" - "of traffic by querying TI sites.") - + self.print( + "Warning: Slips may generate a large amount " + "of traffic by querying TI sites." + ) hostIP = self.metadata_man.store_host_ip() - - # Don't try to stop slips if it's capturing from # an interface or a growing zeek dir - self.is_interface: bool = self.args.interface or self.db.is_growing_zeek_dir() - - while True: - # check for the stop msg - if self.proc_man.should_stop(): - self.proc_man.shutdown_gracefully() - break + self.is_interface: bool = ( + self.args.interface or self.db.is_growing_zeek_dir() + ) - # Sleep some time to do routine checks and give time for more traffic to come + while ( + not self.proc_man.should_stop() + and not self.proc_man.slips_is_done_receiving_new_flows() + ): + # Sleep some time to do routine checks and give time for + # more traffic to come time.sleep(5) - # if you remove the below logic anywhere before the above sleep() statement - # it will try to get the return value very quickly before + # if you remove the below logic anywhere before the + # above sleep() statement, it will try to get the return + # value very quickly before # the webinterface thread sets it. so don't self.ui_man.check_if_webinterface_started() + # update the text we show in the cli self.update_stats() # Check if we need to close any TWs self.db.check_TW_to_close() - modified_ips_in_the_last_tw, modified_profiles = self.metadata_man.update_slips_running_stats() + modified_profiles: Set[str] = ( + self.metadata_man.update_slips_running_stats()[1] + ) hostIP: str = self.update_host_ip(hostIP, modified_profiles) - # don't move this up because we still need to print the stats and check tws anyway + # don't move this line up because we still need to print the + # stats and check tws anyway if self.proc_man.should_run_non_stop(): continue - if self.proc_man.slips_is_done_receiving_new_flows(): - self.proc_man.shutdown_gracefully() - break - self.db.check_health() except KeyboardInterrupt: - # the EINTR error code happens if a signal occurred while the system call was in progress + # the EINTR error code happens if a signal occurred while + # the system call was in progress # comes here if zeek terminates while slips is still working - self.proc_man.shutdown_gracefully() + pass + + self.proc_man.shutdown_gracefully() diff --git a/slips_files/common/abstracts/_module.py b/slips_files/common/abstracts/_module.py index 558fb6a5d..1b0ba2d0c 100644 --- a/slips_files/common/abstracts/_module.py +++ b/slips_files/common/abstracts/_module.py @@ -8,7 +8,7 @@ from slips_files.core.database.database_manager import DBManager from slips_files.common.abstracts.observer import IObservable -class IModule(IObservable, ABC): +class IModule(IObservable, ABC, Process): """ An interface for all slips modules """ @@ -33,31 +33,37 @@ def __init__(self, self.add_observer(self.logger) self.init(**kwargs) + @abstractmethod def init(self, **kwargs): """ - all the code that was in the __init__ of all modules, is now in this method - the goal of this is to have one common __init__() for all modules, which is the one - in this file - this init will have access to all keyword args passes when initializing the module + all the code that was in the __init__ of all modules, is + now in this method + the goal of this is to have one common __init__() for all + modules, which is the one in this file + this init will have access to all keyword args passes when + initializing the module """ def should_stop(self) -> bool: """ The module should stop on the following 2 conditions - 1. no new msgs are received in any of the channels the module is subscribed to + 1. no new msgs are received in any of the channels the + module is subscribed to 2. the termination event is set by the process_manager.py """ if self.msg_received or not self.termination_event.is_set(): # this module is still receiving msgs, # don't stop return False + return True def print(self, text, verbose=1, debug=0, log_to_logfiles_only=False): """ Function to use to print text using the outputqueue of slips. - Slips then decides how, when and where to print this text by taking all the processes into account + Slips then decides how, when and where to print this text + by taking all the processes into account :param verbose: 0 - don't print 1 - basic operation/proof of work @@ -68,13 +74,15 @@ def print(self, text, verbose=1, debug=0, log_to_logfiles_only=False): 1 - print exceptions 2 - unsupported and unhandled types (cases that may cause errors) 3 - red warnings that needs examination - developer warnings - :param text: text to print. Can include format like 'Test {}'.format('here') + :param text: text to print. Can include format + like 'Test {}'.format('here') + :param log_to_logfiles_only: logs to slips.log only, not to cli """ self.notify_observers( { 'from': self.name, - 'txt': text, + 'txt': str(text), 'verbose': verbose, 'debug': debug, 'log_to_logfiles_only': log_to_logfiles_only @@ -96,7 +104,8 @@ def main(self): def pre_main(self): """ - This function is for initializations that are executed once before the main loop + This function is for initializations that are + executed once before the main loop """ pass @@ -110,7 +119,10 @@ def get_msg(self, channel_name): return False def run(self): - """ This is the loop function, it runs non-stop as long as the module is online """ + """ + This is the loop function, it runs non-stop as long as + the module is running + """ try: error: bool = self.pre_main() if error or self.should_stop(): @@ -122,14 +134,15 @@ def run(self): except Exception: exception_line = sys.exc_info()[2].tb_lineno self.print(f'Problem in pre_main() line {exception_line}', 0, 1) - self.print(traceback.format_exc(), 0, 1) + self.print(traceback.print_stack(), 0, 1) return True - error = False try: while not self.should_stop(): - # keep running main() in a loop as long as the module is online - # if a module's main() returns 1, it means there's an error and it needs to stop immediately + # keep running main() in a loop as long as the module is + # online + # if a module's main() returns 1, it means there's an + # error and it needs to stop immediately error: bool = self.main() if error: self.shutdown_gracefully() @@ -138,7 +151,9 @@ def run(self): self.shutdown_gracefully() except Exception: exception_line = sys.exc_info()[2].tb_lineno - self.print(f'Problem in main() line {exception_line}', 0, 1) - self.print(traceback.format_exc(), 0, 1) + self.print(f'Problem in {self.name}\'s main() ' + f'line {exception_line}', + 0, 1) + traceback.print_stack() return True diff --git a/slips_files/common/abstracts/core.py b/slips_files/common/abstracts/core.py index fb22f7fca..2d36f2418 100644 --- a/slips_files/common/abstracts/core.py +++ b/slips_files/common/abstracts/core.py @@ -24,9 +24,10 @@ def __init__( **kwargs ): """ - contains common initializations in all core files in slips_files/core/ - the goal of this is to have one common __init__() for all modules, which is the one - in this file + contains common initializations in all core files in + slips_files/core/ + the goal of this is to have one common __init__() + for all modules, which is the one in this file """ Process.__init__(self) self.output_dir = output_dir @@ -57,7 +58,7 @@ def run(self): except Exception: exception_line = sys.exc_info()[2].tb_lineno self.print(f'Problem in main() line {exception_line}', 0, 1) - self.print(traceback.format_exc(), 0, 1) + self.print(traceback.print_stack(), 0, 1) return True diff --git a/slips_files/common/abstracts/observer.py b/slips_files/common/abstracts/observer.py index b93771c2a..2aa497d66 100644 --- a/slips_files/common/abstracts/observer.py +++ b/slips_files/common/abstracts/observer.py @@ -1,14 +1,17 @@ from abc import ABC, abstractmethod + class IObserver(ABC): """ gets notified whenever an observable has a new msg for it """ + @abstractmethod def update(self, msg): - """gets called whenever there's a new msg""" + """is called whenever there's a new msg""" pass + class IObservable(ABC): def __init__(self): self.observers = [] @@ -21,4 +24,4 @@ def remove_observer(self, observer): def notify_observers(self, msg): for observer in self.observers: - observer.update(msg) \ No newline at end of file + observer.update(msg) diff --git a/slips_files/common/idea_format.py b/slips_files/common/idea_format.py new file mode 100644 index 000000000..c1d796af8 --- /dev/null +++ b/slips_files/common/idea_format.py @@ -0,0 +1,166 @@ +import traceback +from datetime import datetime +from typing import Tuple + +import validators + +from slips_files.common.slips_utils import utils +from slips_files.core.evidence_structure.evidence import ( + Evidence, + IoCType, + EvidenceType, + ) + + +def get_ip_version(ip: str) -> str: + """returns 'IP6' or 'IP4'""" + if validators.ipv4(ip): + ip_version = "IP4" + elif validators.ipv6(ip): + ip_version = "IP6" + return ip_version + + +def extract_cc_server_ip(evidence: Evidence) -> Tuple[str, str]: + """ + extracts the CC server's IP from CC evidence + and returns the following in a tuple + ip_version: 'IP6' or 'IP4' + and the IP + """ + # get the destination IP + cc_server = evidence.description.split("destination IP: ")[1].split(" ")[0] + return cc_server, get_ip_version(cc_server) + + +def extract_cc_botnet_ip(evidence: Evidence) -> Tuple[str, str]: + """ + extracts the botnet's IP aka client's ip from the CC evidence + and returns the following in a tuple + ip_version: 'IP6' or 'IP4' + and the IP + """ + # this evidence belongs to the botnet's profile, not the server + srcip = evidence.attacker.value + return srcip, get_ip_version(srcip) + + +def extract_victim(evidence: Evidence) -> Tuple[str, str]: + ip = evidence.victim.value + # map of slips victim types to IDEA supported types + cases = { + IoCType.IP.name: get_ip_version(ip), + IoCType.DOMAIN.name: "Hostname", + IoCType.URL.name: "URL", + } + return ip, cases[evidence.victim.victim_type] + + +def extract_attacker(evidence: Evidence) -> Tuple[str, str]: + ip = evidence.attacker.value + # map of slips victim types to IDEA supported types + cases = { + IoCType.IP.name: get_ip_version(ip), + IoCType.DOMAIN.name: "Hostname", + IoCType.URL.name: "URL", + } + return ip, cases[evidence.attacker.attacker_type] + + +def idea_format(evidence: Evidence): + """ + Function to format our evidence according to I + ntrusion Detection Extensible Alert (IDEA format). + Detailed explanation of IDEA categories: + https://idea.cesnet.cz/en/classifications + """ + try: + idea_dict = { + "Format": "IDEA0", + "ID": evidence.id, + # both times represet the time of the detection, we probably + # don't need flow_datetime + "DetectTime": datetime.now(utils.local_tz).isoformat(), + "EventTime": datetime.now(utils.local_tz).isoformat(), + "Category": [evidence.category.value], + "Confidence": evidence.confidence, + "Source": [{}], + } + + attacker, attacker_type = extract_attacker(evidence) + idea_dict["Source"][0].update({attacker_type: [attacker]}) + + # according to the IDEA format + # When someone communicates with C&C, both sides of communication are + # sources, differentiated by the Type attribute, 'C&C' or 'Botnet' + # https://idea.cesnet.cz/en/design#:~:text=to%20string%20%E2%80%9CIDEA1 + # %E2%80%9D.-,Sources%20and%20targets,-As%20source%20of + if evidence.evidence_type == EvidenceType.COMMAND_AND_CONTROL_CHANNEL: + # botnet, ip_version = extract_cc_botnet_ip(evidence) + idea_dict["Source"][0].update({"Type": ["Botnet"]}) + + cc_server, ip_version = extract_cc_server_ip(evidence) + server_info: dict = {ip_version: [cc_server], "Type": ["CC"]} + + idea_dict["Source"].append(server_info) + + # the idx of the daddr, in CC detections, it's the second one + idx = ( + 1 + if (evidence.evidence_type == EvidenceType.COMMAND_AND_CONTROL_CHANNEL) + else 0 + ) + if evidence.port: + idea_dict["Source"][idx].update({"Port": [evidence.port]}) + if evidence.proto: + idea_dict["Source"][idx].update({"Proto": [evidence.proto.name]}) + + if hasattr(evidence, "victim") and evidence.victim: + # is the dstip ipv4/ipv6 or mac? + victims_ip: str + victim_type: str + victims_ip, victim_type = extract_victim(evidence) + idea_dict["Target"] = [{victim_type: [victims_ip]}] + + # update the dstip description if specified in the evidence + if hasattr(evidence, "source_target_tag") and evidence.source_target_tag: + # https://idea.cesnet.cz/en/classifications#sourcetargettagsourcetarget_classification + idea_dict["Source"][0].update({"Type": [evidence.source_target_tag.value]}) + + # add the description + attachment = { + "Attach": [ + { + "Content": evidence.description, + "ContentType": "text/plain", + } + ] + } + idea_dict.update(attachment) + + # only evidence of type scanning have conn_count + if evidence.conn_count: + idea_dict["ConnCount"] = evidence.conn_count + + if evidence.evidence_type == EvidenceType.MALICIOUS_DOWNLOADED_FILE: + idea_dict["Attach"] = [ + { + "Type": ["Malware"], + "Hash": [f"md5:{evidence.attacker.value}"], + } + ] + if "size" in evidence.description: + idea_dict.update( + { + "Size": int( + evidence.description.replace(".", "") + .split("size:")[1] + .split("from")[0] + ) + } + ) + + return idea_dict + except Exception as e: + print(f"Error in idea_format(): {e}") + print(traceback.print_stack()) diff --git a/slips_files/common/slips_utils.py b/slips_files/common/slips_utils.py index bc5c40fe2..4c9bf8c65 100644 --- a/slips_files/common/slips_utils.py +++ b/slips_files/common/slips_utils.py @@ -11,7 +11,9 @@ import sys import ipaddress import aid_hash - +from typing import Any, Union +from dataclasses import is_dataclass, asdict, fields +from enum import Enum, auto IS_IN_A_DOCKER_CONTAINER = os.environ.get('IS_IN_A_DOCKER_CONTAINER', False) @@ -100,7 +102,8 @@ def sanitize(self, string): def detect_data_type(self, data): """ - Detects the type of incoming data: ipv4, ipv6, domain, ip range, asn, md5, etc + Detects the type of incoming data: + ipv4, ipv6, domain, ip range, asn, md5, etc """ data = data.strip() try: @@ -507,156 +510,18 @@ def get_aid(self, flow): # proto doesn't have an aid.FlowTuple method return '' + def to_json_serializable(self, obj: Any) -> Any: + if is_dataclass(obj): + return {k: self.to_json_serializable(v) for k, v in asdict( + obj).items()} + elif isinstance(obj, Enum): + return obj.value + elif isinstance(obj, list): + return [self.to_json_serializable(item) for item in obj] + elif isinstance(obj, dict): + return {k: self.to_json_serializable(v) for k, v in obj.items()} + else: + return obj - def IDEA_format( - self, - srcip, - evidence_type, - attacker_direction, - attacker, - description, - confidence, - category, - conn_count, - source_target_tag, - port, - proto, - evidence_id - ): - """ - Function to format our evidence according to Intrusion Detection Extensible Alert (IDEA format). - Detailed explanation of IDEA categories: https://idea.cesnet.cz/en/classifications - """ - IDEA_dict = { - 'Format': 'IDEA0', - 'ID': evidence_id, - # both times represet the time of the detection, we probably don't need flow_datetime - 'DetectTime': datetime.now(self.local_tz).isoformat(), - 'EventTime': datetime.now(self.local_tz).isoformat(), - 'Category': [category], - 'Confidence': confidence, - 'Source': [{}], - } - - # is the srcip ipv4/ipv6 or mac? - if validators.ipv4(srcip): - IDEA_dict['Source'][0].update({'IP4': [srcip]}) - elif validators.ipv6(srcip): - IDEA_dict['Source'][0].update({'IP6': [srcip]}) - elif validators.mac_address(srcip): - IDEA_dict['Source'][0].update({'MAC': [srcip]}) - elif validators.url(srcip): - IDEA_dict['Source'][0].update({'URL': [srcip]}) - - - # When someone communicates with C&C, both sides of communication are - # sources, differentiated by the Type attribute, 'C&C' or 'Botnet' - if evidence_type == 'Command-and-Control-channels-detection': - # get the destination IP - dstip = description.split('destination IP: ')[1].split(' ')[0] - - if validators.ipv4(dstip): - ip_version = 'IP4' - elif validators.ipv6(dstip): - ip_version = 'IP6' - - IDEA_dict['Source'].append({ip_version: [dstip], 'Type': ['CC']}) - - # some evidence have a dst ip - if 'dstip' in attacker_direction or 'dip' in attacker_direction: - # is the dstip ipv4/ipv6 or mac? - if validators.ipv4(attacker): - IDEA_dict['Target'] = [{'IP4': [attacker]}] - elif validators.ipv6(attacker): - IDEA_dict['Target'] = [{'IP6': [attacker]}] - elif validators.mac_address(attacker): - IDEA_dict['Target'] = [{'MAC': [attacker]}] - elif validators.url(attacker): - IDEA_dict['Target'][0].update({'URL': [srcip]}) - - # try to extract the hostname/SNI/rDNS of the dstip form the description if available - hostname = False - try: - hostname = description.split('rDNS: ')[1] - except IndexError: - ... - try: - hostname = description.split('SNI: ')[1] - except IndexError: - pass - - if hostname: - IDEA_dict['Target'][0].update({'Hostname': [hostname]}) - - # update the dstip description if specified in the evidence - if source_target_tag: # https://idea.cesnet.cz/en/classifications#sourcetargettagsourcetarget_classification - IDEA_dict['Target'][0].update({'Type': [source_target_tag]}) - - elif 'domain' in attacker_direction: - # the ioc is a domain - attacker_type = 'Hostname' if validators.domain(attacker) else 'URL' - target_info = {attacker_type: [attacker]} - IDEA_dict['Target'] = [target_info] - - # update the dstdomain description if specified in the evidence - if source_target_tag: - IDEA_dict['Target'][0].update({'Type': [source_target_tag]}) - elif source_target_tag: - # the ioc is the srcip, therefore the tag is desscribing the source - IDEA_dict['Source'][0].update({'Type': [source_target_tag]}) - - - - # add the port/proto - # for all alerts, the srcip is in IDEA_dict['Source'][0] and the dstip is in IDEA_dict['Target'][0] - # for alert that only have a source, this is the port/proto of the source ip - key = 'Source' - idx = 0 # this idx is used for selecting the right dict to add port/proto - - if 'Target' in IDEA_dict: - # if the alert has a target, add the port/proto to the target(dstip) - key = 'Target' - idx = 0 - - # for C&C alerts IDEA_dict['Source'][0] is the Botnet aka srcip and IDEA_dict['Source'][1] is the C&C aka dstip - if evidence_type == 'Command-and-Control-channels-detection': - # idx of the dict containing the dstip, we'll use this to add the port and proto to this dict - key = 'Source' - idx = 1 - - if port: - IDEA_dict[key][idx].update({'Port': [int(port)]}) - if proto: - IDEA_dict[key][idx].update({'Proto': [proto.lower()]}) - - # add the description - attachment = { - 'Attach': [ - { - 'Content': description, - 'ContentType': 'text/plain', - } - ] - } - IDEA_dict.update(attachment) - - # only evidence of type scanning have conn_count - if conn_count: - IDEA_dict['ConnCount'] = conn_count - - if 'MaliciousDownloadedFile' in evidence_type: - IDEA_dict['Attach'] = [ - { - 'Type': ['Malware'], - 'Hash': [f'md5:{attacker}'], - } - - ] - if 'size' in description: - IDEA_dict.update( - {'Size': int(description.replace(".",'').split('size:')[1].split('from')[0])} - ) - - return IDEA_dict utils = Utils() diff --git a/slips_files/common/style.py b/slips_files/common/style.py index 81cbb3018..f526fea50 100644 --- a/slips_files/common/style.py +++ b/slips_files/common/style.py @@ -6,10 +6,15 @@ def green(txt): """ return colored(txt, "green") - def red(txt): """ returns the text in green """ return colored(txt, "red") +def cyan(txt): + """ + returns the text in green + """ + return colored(txt, "cyan") + diff --git a/slips_files/core/database/database_manager.py b/slips_files/core/database/database_manager.py index 0eb2133fb..9689c2cfa 100644 --- a/slips_files/core/database/database_manager.py +++ b/slips_files/core/database/database_manager.py @@ -27,7 +27,11 @@ def __init__( self.logger = logger IObservable.__init__(self) self.add_observer(self.logger) - self.rdb = RedisDB(self.logger, redis_port, **kwargs) + self.rdb = RedisDB( + self.logger, + redis_port, + start_redis_server, + **kwargs) # in some rare cases we don't wanna start sqlite, # like when using -S # we just want to connect to redis to get the PIDs @@ -97,6 +101,9 @@ def update_ip_info(self, *args, **kwargs): def getSlipsInternalTime(self, *args, **kwargs): return self.rdb.getSlipsInternalTime(*args, **kwargs) + def mark_profile_as_malicious(self, *args, **kwargs): + return self.rdb.mark_profile_as_malicious(*args, **kwargs) + def get_equivalent_tws(self, *args, **kwargs): return self.rdb.get_equivalent_tws(*args, **kwargs) @@ -376,8 +383,8 @@ def get_flows_causing_evidence(self, *args, **kwargs): """returns the list of uids of the flows causing evidence""" return self.rdb.get_flows_causing_evidence(*args, **kwargs) - def setEvidence(self, *args, **kwargs): - return self.rdb.setEvidence(*args, **kwargs) + def set_evidence(self, *args, **kwargs): + return self.rdb.set_evidence(*args, **kwargs) def get_user_agents_count(self, *args, **kwargs): return self.rdb.get_user_agents_count(*args, **kwargs) @@ -394,11 +401,9 @@ def mark_evidence_as_processed(self, *args, **kwargs): def is_evidence_processed(self, *args, **kwargs): return self.rdb.is_evidence_processed(*args, **kwargs) - def set_evidence_for_profileid(self, *args, **kwargs): - return self.rdb.set_evidence_for_profileid(*args, **kwargs) - def deleteEvidence(self, *args, **kwargs): - return self.rdb.deleteEvidence(*args, **kwargs) + def delete_evidence(self, *args, **kwargs): + return self.rdb.delete_evidence(*args, **kwargs) def cache_whitelisted_evidence_ID(self, *args, **kwargs): return self.rdb.cache_whitelisted_evidence_ID(*args, **kwargs) @@ -412,8 +417,8 @@ def remove_whitelisted_evidence(self, *args, **kwargs): def get_profileid_twid_alerts(self, *args, **kwargs): return self.rdb.get_profileid_twid_alerts(*args, **kwargs) - def getEvidenceForTW(self, *args, **kwargs): - return self.rdb.getEvidenceForTW(*args, **kwargs) + def get_twid_evidence(self, *args, **kwargs): + return self.rdb.get_twid_evidence(*args, **kwargs) def update_threat_level(self, *args, **kwargs): return self.rdb.update_threat_level(*args, **kwargs) @@ -622,8 +627,11 @@ def add_out_notice(self, *args, **kwargs): def add_out_ssl(self, *args, **kwargs): return self.rdb.add_out_ssl(*args, **kwargs) - def getProfileIdFromIP(self, *args, **kwargs): - return self.rdb.getProfileIdFromIP(*args, **kwargs) + def get_profileid_from_ip(self, *args, **kwargs): + return self.rdb.get_profileid_from_ip(*args, **kwargs) + + def get_first_flow_time(self, *args, **kwargs): + return self.rdb.get_first_flow_time(*args, **kwargs) def getProfiles(self, *args, **kwargs): return self.rdb.getProfiles(*args, **kwargs) @@ -652,20 +660,20 @@ def get_profiles_len(self, *args, **kwargs): def get_last_twid_of_profile(self, *args, **kwargs): return self.rdb.get_last_twid_of_profile(*args, **kwargs) - def getFirstTWforProfile(self, *args, **kwargs): - return self.rdb.getFirstTWforProfile(*args, **kwargs) + def get_first_twid_for_profile(self, *args, **kwargs): + return self.rdb.get_first_twid_for_profile(*args, **kwargs) - def getTWofTime(self, *args, **kwargs): - return self.rdb.getTWofTime(*args, **kwargs) + def get_tw_of_ts(self, *args, **kwargs): + return self.rdb.get_tw_of_ts(*args, **kwargs) - def addNewOlderTW(self, *args, **kwargs): - return self.rdb.addNewOlderTW(*args, **kwargs) + def add_new_older_tw(self, *args, **kwargs): + return self.rdb.add_new_older_tw(*args, **kwargs) - def addNewTW(self, *args, **kwargs): - return self.rdb.addNewTW(*args, **kwargs) + def add_new_tw(self, *args, **kwargs): + return self.rdb.add_new_tw(*args, **kwargs) - def getTimeTW(self, *args, **kwargs): - return self.rdb.getTimeTW(*args, **kwargs) + def get_tw_start_time(self, *args, **kwargs): + return self.rdb.get_tw_start_time(*args, **kwargs) def getAmountTW(self, *args, **kwargs): return self.rdb.getAmountTW(*args, **kwargs) @@ -700,8 +708,8 @@ def get_user_agent_from_profile(self, *args, **kwargs): def mark_profile_as_dhcp(self, *args, **kwargs): return self.rdb.mark_profile_as_dhcp(*args, **kwargs) - def addProfile(self, *args, **kwargs): - return self.rdb.addProfile(*args, **kwargs) + def add_profile(self, *args, **kwargs): + return self.rdb.add_profile(*args, **kwargs) def set_profile_module_label(self, *args, **kwargs): return self.rdb.set_profile_module_label(*args, **kwargs) @@ -892,7 +900,10 @@ def get_branch(self, *args, **kwargs): return self.rdb.get_branch(*args, **kwargs) def add_alert(self, alert: dict): - twid_starttime: float = self.rdb.getTimeTW(alert['profileid'], alert['twid']) + twid_starttime: float = self.rdb.get_tw_start_time( + alert['profileid'], + alert['twid'] + ) twid_endtime: float = twid_starttime + RedisDB.width alert.update({'tw_start': twid_starttime, 'tw_end': twid_endtime}) return self.sqlite.add_alert(alert) diff --git a/slips_files/core/database/redis_db/alert_handler.py b/slips_files/core/database/redis_db/alert_handler.py index 774336408..74aaae601 100644 --- a/slips_files/core/database/redis_db/alert_handler.py +++ b/slips_files/core/database/redis_db/alert_handler.py @@ -1,9 +1,16 @@ import time import json -from uuid import uuid4 -from typing import List, Tuple +from typing import List, Tuple, Optional, Dict from slips_files.common.slips_utils import utils +from slips_files.core.evidence_structure.evidence import \ + ( + Evidence, + EvidenceType, + Direction, + Victim, + evidence_to_dict, + ) class AlertHandler: """ @@ -15,7 +22,7 @@ class AlertHandler: def increment_attack_counter( self, attacker: str, - victim: str, + victim: Optional[Victim], evidence_type: str ): """ @@ -25,20 +32,29 @@ def increment_attack_counter( :param victim: IP of a victim :param evidence_type: e.g. MaliciousJA3, DataExfiltration, etc. """ + victim = '' if not victim else victim self.r.hincrby( f'{attacker}_evidence_sumamry', f"{victim}_{evidence_type}", 1) + def mark_profile_as_malicious(self, profileid: str): + """keeps track of profiles that generated an alert""" + self.r.sadd("malicious_profiles", profileid) + def set_evidence_causing_alert(self, profileid, twid, alert_ID, evidence_IDs: list): """ When we have a bunch of evidence causing an alert, we associate all evidence IDs with the alert ID in our database - this function stores evidence in 'alerts' key only - :param alert ID: the profileid_twid_ID of the last evidence causing this alert + this function stores evidence in 'alerts_profile_twid' key only + :param alert ID: the profileid_twid_ID of the last evidence + causing this alert :param evidence_IDs: all IDs of the evidence causing this alert """ - old_profileid_twid_alerts: dict = self.get_profileid_twid_alerts(profileid, twid) + old_profileid_twid_alerts: Dict[str, List[str]] + old_profileid_twid_alerts = self.get_profileid_twid_alerts( + profileid, twid + ) alert = { alert_ID: json.dumps(evidence_IDs) @@ -53,91 +69,40 @@ def set_evidence_causing_alert(self, profileid, twid, alert_ID, evidence_IDs: li # no previous alerts for this profileid twid profileid_twid_alerts = json.dumps(alert) - - self.r.hset(f'{profileid}{self.separator}{twid}', + self.r.hset(f'{profileid}_{twid}', 'alerts', profileid_twid_alerts) + self.r.incr('number_of_alerts', 1) - # the structure of alerts key is - # alerts { - # profile_: { - # twid1: { - # alert_ID1: [evidence_IDs], - # alert_ID2: [evidence_IDs] - # } - # } - # } - - profile_alerts = self.r.hget('alerts', profileid) - # alert ids look like this - # profile_192.168.131.2_timewindow1_92a3b9c2-330b-47ab-b73e-c5380af90439 - alert_hash = alert_ID.split('_')[-1] - alert = { - twid: { - alert_hash: evidence_IDs - } - } - if not profile_alerts: - # first alert in this profile - alert = json.dumps(alert) - self.r.hset('alerts', profileid, alert) - return - - # the format of this dict is {twid1: {alert_hash: [evidence_IDs]}, - # twid2: {alert_hash: [evidence_IDs]}} - profile_alerts:dict = json.loads(profile_alerts) - - if twid not in profile_alerts: - # first time having an alert for this twid - profile_alerts.update(alert) - else: - # we already have a twid with alerts in this profile, update it - # the format of twid_alerts is {alert_hash: evidence_IDs} - twid_alerts: dict = profile_alerts[twid] - twid_alerts[alert_hash] = evidence_IDs - profile_alerts[twid] = twid_alerts - - profile_alerts = json.dumps(profile_alerts) - self.r.hset('alerts', profileid, profile_alerts) - - def get_evidence_causing_alert(self, profileid, twid, alert_ID) -> list: + def get_evidence_causing_alert(self, profileid, twid, alert_id: str) -> \ + list: """ Returns all the IDs of evidence causing this alert :param alert_ID: ID of alert to export to warden server for example profile_10.0.2.15_timewindow1_4e4e4774-cdd7-4e10-93a3-e764f73af621 """ - if alerts := self.r.hget(f'{profileid}{self.separator}{twid}', 'alerts'): + if alerts := self.r.hget(f'{profileid}_{twid}', 'alerts'): alerts = json.loads(alerts) - return alerts.get(alert_ID, False) + return alerts.get(alert_id, False) return False - def get_evidence_by_ID(self, profileid, twid, ID): - - evidence = self.getEvidenceForTW(profileid, twid) + def get_evidence_by_ID(self, profileid: str, twid: str, evidence_id: str): + evidence: Dict[str, dict] = self.get_twid_evidence(profileid, twid) if not evidence: return False - evidence: dict = json.loads(evidence) # loop through each evidence in this tw for evidence_details in evidence.values(): evidence_details = json.loads(evidence_details) - if evidence_details.get('ID') == ID: + if evidence_details.get('ID') == evidence_id: # found an evidence that has a matching ID return evidence_details - def is_detection_disabled(self, evidence_type: str): + def is_detection_disabled(self, evidence_type: EvidenceType): """ Function to check if detection is disabled in slips.conf """ - for disabled_detection in self.disabled_detections: - # when we disable a detection , we add 'SSHSuccessful' in slips.conf, - # however our evidence can depend on an addr, for example 'SSHSuccessful-by-addr'. - # check if any disabled detection is a part of our evidence. - # for example 'SSHSuccessful' is a part of 'SSHSuccessful-by-addr' so if 'SSHSuccessful' - # is disabled, 'SSHSuccessful-by-addr' should also be disabled - if disabled_detection in evidence_type: - return True - return False + return str(evidence_type) in self.disabled_detections def set_flow_causing_evidence(self, uids: list, evidence_ID): self.r.hset("flows_causing_evidence", evidence_ID, json.dumps(uids)) @@ -150,147 +115,73 @@ def get_victim(self, profileid, attacker): saddr = profileid.split("_")[-1] if saddr not in attacker: return saddr - # if the saddr is the attacker, then the victim should be passed as a param to this function - # there's no 1 victim in this case. for example in ARP scans, the victim is the whole network + # if the saddr is the attacker, then the victim should be + # passed as a param to this function + # there's no 1 victim in this case. for example in ARP scans, + # the victim is the whole network return '' - def setEvidence( - self, - evidence_type, - attacker_direction, - attacker, - threat_level, - confidence, - description, - timestamp, - category, - source_target_tag=False, - conn_count=False, - port=False, - proto=False, - profileid='', - twid='', - uid='', - victim='' - ): + def set_evidence(self, evidence: Evidence): """ Set the evidence for this Profile and Timewindow. - - evidence_type: determine the type of this evidence. e.g. PortScan, ThreatIntelligence - attacker_direction: the type of value causing the detection e.g. dstip, srcip, dstdomain, md5, url - attacker: the actual srcip or dstdomain. e.g. 1.1.1.1 or abc.com - threat_level: determine the importance of the evidence, available options are: - info, low, medium, high, critical - confidence: determine the confidence of the detection on a scale from 0 to 1. - (How sure you are that this is what you say it is.) - uid: can be a single uid as a str, or a list of uids causing the evidence. - needed to get the flow from the database. - category: what is this evidence category according to IDEA categories - conn_count: the number of packets/flows/nxdomains that formed this scan/sweep/DGA. - victim: the ip/domain that was attacked by the attacker param. if not given slips can deduce it. - this param is usually passed if the saddr is the attacker and slips can't deduce the victim - - source_target_tag: - this is the IDEA category of the source and dst ip used in the evidence - if the attacker_direction is srcip this describes the source ip, - if the attacker_direction is dstip this describes the dst ip. - supported source and dst types are in the SourceTargetTag section https://idea.cesnet.cz/en/classifications - this is a keyword/optional argument because it shouldn't be used with dports and sports attacker_direction + :param evidence: an Evidence obj (defined in + slips_files/core/evidence_structure/evidence.py) with all the + evidence details, """ + # create the profile if it doesn't exist + self.add_profile( + str(evidence.profile), + evidence.timestamp, + self.width + ) # Ignore evidence if it's disabled in the configuration file - if self.is_detection_disabled(evidence_type): + if self.is_detection_disabled(evidence.evidence_type): return False - if not twid: - twid = '' - - # every evidence should have an ID according to the IDEA format - evidence_ID = str(uuid4()) - - if isinstance(uid, list): - # some evidence are caused by several uids, use the last one only - # todo check why we have duplicates in the first place - # remove duplicate uids - uids = list(set(uid)) - else: - uids = [uid] - - self.set_flow_causing_evidence(uids, evidence_ID) - - if not isinstance(threat_level, str): - # make sure we always store str threat levels in the db - threat_level: str = utils.threat_level_to_string(threat_level) - - if timestamp: - timestamp = utils.convert_format(timestamp, utils.alerts_format) - - if not victim: - victim = self.get_victim(profileid, attacker) - - evidence_to_send = { - 'profileid': str(profileid), - 'twid': str(twid), - 'attacker_direction': attacker_direction, - 'attacker': attacker, - 'evidence_type': evidence_type, - 'description': description, - 'stime': timestamp, - 'uid': uids, - 'confidence': confidence, - 'threat_level': threat_level, - 'category': category, - 'ID': evidence_ID, - 'victim': victim - } - # not all evidence requires a conn_coun, scans only - if conn_count: - evidence_to_send['conn_count'] = conn_count - - # source_target_tag is defined only if attacker_direction is srcip or dstip - if source_target_tag: - evidence_to_send['source_target_tag'] = source_target_tag - - if port: - evidence_to_send['port'] = port - if proto: - evidence_to_send['proto'] = proto - - evidence_to_send = json.dumps(evidence_to_send) + self.set_flow_causing_evidence(evidence.uid, evidence.id) + evidence_to_send: dict = evidence_to_dict(evidence) + evidence_to_send: str = json.dumps(evidence_to_send) # Check if we have the current evidence stored in the DB for # this profileid in this twid - current_evidence = self.getEvidenceForTW(profileid, twid) - current_evidence = json.loads(current_evidence) if current_evidence else {} - should_publish = evidence_ID not in current_evidence.keys() - # update our current evidence for this profileid and twid. - # now the evidence_ID is used as the key - current_evidence.update({evidence_ID: evidence_to_send}) - - # Set evidence in the database. - current_evidence = json.dumps(current_evidence) - self.r.hset( - f'{profileid}_{twid}', 'Evidence', current_evidence - ) - - self.r.hset(f'evidence{profileid}', twid, current_evidence) # This is done to ignore repetition of the same evidence sent. - # note that publishing HAS TO be done after updating the 'Evidence' keys - if should_publish: + evidence_exists: Optional[dict] = self.r.hget( + f'{evidence.profile}_{evidence.timewindow}_evidence', + evidence.id + ) + + # note that publishing HAS TO be done after adding the evidence + # to the db + if not evidence_exists: + self.r.hset( + f'{evidence.profile}_{evidence.timewindow}_evidence', + evidence.id, + evidence_to_send + ) self.r.incr('number_of_evidence', 1) self.publish('evidence_added', evidence_to_send) # an evidence is generated for this profile # update the threat level of this profile - if attacker_direction in ('sip', 'srcip'): + if evidence.attacker.direction == Direction.SRC: # the srcip is the malicious one - self.update_threat_level(profileid, threat_level, confidence) - elif attacker_direction in ('dip', 'dstip'): + self.update_threat_level( + str(evidence.profile), + str(evidence.threat_level), + evidence.confidence + ) + elif evidence.attacker.direction == Direction.DST: # the dstip is the malicious one - self.update_threat_level(f'profile_{attacker}', threat_level, confidence) + self.update_threat_level( + str(evidence.attacker.profile), + str(evidence.threat_level), + evidence.confidence + ) + return True @@ -301,75 +192,27 @@ def init_evidence_number(self): def get_evidence_number(self): return self.r.get('number_of_evidence') - def mark_evidence_as_processed(self, evidence_ID: str): + def mark_evidence_as_processed(self, evidence_id: str): """ If an evidence was processed by the evidenceprocess, mark it in the db """ - self.r.sadd('processed_evidence', evidence_ID) + self.r.sadd('processed_evidence', evidence_id) def is_evidence_processed(self, evidence_ID: str) -> bool: return self.r.sismember('processed_evidence', evidence_ID) - def set_evidence_for_profileid(self, evidence): - """ - Set evidence for the profile in the same format as json in alerts.json - """ - evidence = json.dumps(evidence) - self.r.sadd('Evidence', evidence) - def deleteEvidence(self, profileid, twid, evidence_ID: str): + + def delete_evidence(self, profileid, twid, evidence_id: str): """ - Delete evidence from the database + Deletes an evidence from the database """ - # 1. delete evidence from 'evidence' key - current_evidence = self.getEvidenceForTW(profileid, twid) - current_evidence = json.loads(current_evidence) if current_evidence else {} - # Delete the key regardless of whether it is in the dictionary - current_evidence.pop(evidence_ID, None) - current_evidence_json = json.dumps(current_evidence) - self.r.hset( - profileid + self.separator + twid, - 'Evidence', - current_evidence_json, - ) - self.r.hset(f'evidence{profileid}', twid, current_evidence_json) - # 2. delete evidence from 'alerts' key - profile_alerts = self.r.hget('alerts', profileid) - if not profile_alerts: - # this means that this evidence wasn't a part of an alert - # give redis time to the save the changes before calling this function again - # removing this sleep will cause this function to be called again before - # deleting the evidence ID from the evidence keys - time.sleep(0.5) - return - - profile_alerts:dict = json.loads(profile_alerts) - try: - # we already have a twid with alerts in this profile, update it - # the format of twid_alerts is {alert_hash: evidence_IDs} - twid_alerts: dict = profile_alerts[twid] - IDs = False - hash = False - for alert_hash, evidence_IDs in twid_alerts.items(): - if evidence_ID in evidence_IDs: - IDs = evidence_IDs - hash = alert_hash - break - else: - return + # this is only called by evidencehandler, + # which means that any evidence passed to this function + # can never be a part of a past alert + self.r.hdel(f'{profileid}_{twid}_evidence', evidence_id) - if IDs and hash: - evidence_IDs = IDs.remove(evidence_ID) - alert_ID = f'{profileid}_{twid}_{hash}' - if evidence_IDs: - self.set_evidence_causing_alert( - profileid, twid, alert_ID, evidence_IDs - ) - except KeyError: - # alert not added to the 'alerts' key yet! - # this means that this evidence wasn't a part of an alert - return def cache_whitelisted_evidence_ID(self, evidence_ID:str): """ @@ -385,21 +228,22 @@ def is_whitelisted_evidence(self, evidence_ID): """ return self.r.sismember('whitelisted_evidence', evidence_ID) - def remove_whitelisted_evidence(self, all_evidence:str) -> str: + def remove_whitelisted_evidence(self, all_evidence: dict) -> dict: """ param all_evidence serialized json dict - returns a serialized json dict + returns a dict """ # remove whitelisted evidence from the given evidence - all_evidence = json.loads(all_evidence) + # all_evidence = json.loads(all_evidence) tw_evidence = {} - for ID,evidence in all_evidence.items(): - if self.is_whitelisted_evidence(ID): + for evidence_id, evidence in all_evidence.items(): + if self.is_whitelisted_evidence(evidence_id): continue - tw_evidence[ID] = evidence - return json.dumps(tw_evidence) + tw_evidence[evidence_id] = evidence + return tw_evidence - def get_profileid_twid_alerts(self, profileid, twid) -> dict: + def get_profileid_twid_alerts(self, profileid, twid) \ + -> Dict[str, List[str]]: """ The format for the returned dict is {profile123_twid1_: [ev_uuid1, ev_uuid2, ev_uuid3]} @@ -410,12 +254,18 @@ def get_profileid_twid_alerts(self, profileid, twid) -> dict: alerts: dict = json.loads(alerts) return alerts - def getEvidenceForTW(self, profileid: str, twid: str) -> str: + def get_twid_evidence(self, profileid: str, twid: str) -> Dict[str, dict]: """Get the evidence for this TW for this Profile""" - evidence = self.r.hget(profileid + self.separator + twid, 'Evidence') + evidence: Dict[str, dict] = self.r.hgetall( + f'{profileid}_{twid}_evidence' + ) if evidence: - evidence: str = self.remove_whitelisted_evidence(evidence) - return evidence + evidence: Dict[str, dict] = self.remove_whitelisted_evidence( + evidence + ) + return evidence + + return {} def set_max_threat_level(self, profileid: str, threat_level: str): self.r.hset(profileid, 'max_threat_level', threat_level) @@ -472,7 +322,7 @@ def update_max_threat_level( the given :returns: the numerical val of the max threat level """ - threat_level_float = utils.threat_levels[threat_level] + threat_level_float = utils.threat_levels[threat_level] old_max_threat_level: str = self.r.hget( profileid, @@ -494,7 +344,7 @@ def update_max_threat_level( def update_threat_level( - self, profileid: str, threat_level: str, confidence: int + self, profileid: str, threat_level: str, confidence: float ): """ Update the threat level of a certain profile diff --git a/slips_files/core/database/redis_db/database.py b/slips_files/core/database/redis_db/database.py index fa86685a1..e8d951cf7 100644 --- a/slips_files/core/database/redis_db/database.py +++ b/slips_files/core/database/redis_db/database.py @@ -15,6 +15,7 @@ import ipaddress import sys import validators +from typing import List RUNNING_IN_DOCKER = os.environ.get('IS_IN_A_DOCKER_CONTAINER', False) @@ -116,19 +117,23 @@ def __new__( cls.start_server = start_redis_server if cls.redis_port not in cls._instances: - cls._instances[cls.redis_port] = super().__new__(cls) cls._set_redis_options() cls._read_configuration() - cls.start() - # By default the slips internal time is 0 until we receive something - cls.set_slips_internal_time(0) - if not cls.get_slips_start_time(): - cls._set_slips_start_time() - # useful for debugging using 'CLIENT LIST' redis cmd - cls.r.client_setname(f"Slips-DB") + if cls.start(): + cls._instances[cls.redis_port] = super().__new__(cls) + # By default the slips internal time is + # 0 until we receive something + cls.set_slips_internal_time(0) + if not cls.get_slips_start_time(): + cls._set_slips_start_time() + # useful for debugging using 'CLIENT LIST' redis cmd + cls.r.client_setname(f"Slips-DB") + else: + return False return cls._instances[cls.redis_port] - def __init__(self, logger, redis_port, flush_db=True): + def __init__(self, logger, redis_port, start_redis_server=True, + flush_db=True): # the main purpose of this init is to call the parent's __init__ IObservable.__init__(self) self.add_observer(logger) @@ -169,12 +174,20 @@ def _set_redis_options(cls): @classmethod def _read_configuration(cls): conf = ConfigParser() - cls.deletePrevdb = conf.deletePrevdb() - cls.disabled_detections = conf.disabled_detections() + cls.deletePrevdb: bool = conf.deletePrevdb() + cls.disabled_detections: List[str] = conf.disabled_detections() cls.width = conf.get_tw_width_as_float() @classmethod def set_slips_internal_time(cls, timestamp): + """ + slips internal time is the timestamp of the last tw update done in + slips + it is updated each time slips detects a new modification in any + timewindow + metadata_manager.py checks for new tw modifications every 5s and + updates this value accordingly + """ cls.r.set('slips_internal_time', timestamp) @classmethod @@ -185,10 +198,12 @@ def get_slips_start_time(cls): return start_time @classmethod - def start(cls): + def start(cls) -> bool: """Flushes and Starts the DB and """ try: - cls.connect_to_redis_server() + if not cls.connect_to_redis_server(): + return False + # Set the memory limits of the output buffer, For normal clients: no limits # for pub-sub 4GB maximum buffer size # and 2GB for soft limit @@ -197,21 +212,25 @@ def start(cls): # don't flush the db when starting or stopping the daemon, or when testing if ( cls.deletePrevdb - and not ('-S' in sys.argv or '-cb' in sys.argv or '-d' in sys.argv ) + and not ('-S' in sys.argv + or '-cb' in sys.argv + or '-d' in sys.argv ) and cls.flush_db ): - # when stopping the daemon, don't flush bc we need to get the pids - # to close slips files + # when stopping the daemon, don't flush bc we need to get + # the PIDS to close slips files cls.r.flushdb() cls.change_redis_limits(cls.r) cls.change_redis_limits(cls.rcache) - # to fix redis.exceptions.ResponseError MISCONF Redis is configured to save RDB snapshots - # configure redis to stop writing to dump.rdb when an error occurs without throwing errors in slips + # to fix redis.exceptions.ResponseError MISCONF Redis is + # configured to save RDB snapshots + # configure redis to stop writing to dump.rdb when an error + # occurs without throwing errors in slips # Even if the DB is not deleted. We need to delete some temp data cls.r.delete('zeekfiles') - + return True except redis.exceptions.ConnectionError as ex: print(f"[DB] Can't connect to redis on port {cls.redis_port}: {ex}") return False @@ -238,7 +257,7 @@ def start_redis_instance(port: int, db: int) -> redis.StrictRedis: ) @classmethod - def connect_to_redis_server(cls): + def connect_to_redis_server(cls) -> bool: """ Connects to the given port and Sets r and rcache """ @@ -263,15 +282,6 @@ def connect_to_redis_server(cls): cls.r.client_list() return True except redis.exceptions.ConnectionError: - # unable to connect to this port - # sometimes we open the server but we have trouble connecting, - # so we need to close it - # if the port is used for another instance, slips.py is going to detect it - if cls.redis_port != 32850: - # 32850 is where we have the loaded rdb file when loading a saved db - # we shouldn't close it because this is what kalipso will - # use to view the loaded the db - cls.close_redis_server(cls.redis_port) return False @classmethod @@ -502,7 +512,7 @@ def get_disabled_modules(self) -> list: else: return {} - def set_input_metadata(self, info:dict): + def set_input_metadata(self, info: dict): """ sets name, size, analysis dates, and zeek_dir in the db """ diff --git a/slips_files/core/database/redis_db/ioc_handler.py b/slips_files/core/database/redis_db/ioc_handler.py index 210affe21..7c17b3480 100644 --- a/slips_files/core/database/redis_db/ioc_handler.py +++ b/slips_files/core/database/redis_db/ioc_handler.py @@ -235,6 +235,7 @@ def get_malicious_domain(self, domain): def get_ssl_info(self, sha1): info = self.rcache.hmget('IoC_SSL', sha1)[0] return False if info is None else info + def is_domain_malicious(self, domain: str) -> tuple: """ Search in the dB of malicious domains and return a diff --git a/slips_files/core/database/redis_db/profile_handler.py b/slips_files/core/database/redis_db/profile_handler.py index 44d239950..ffc1a8f7c 100644 --- a/slips_files/core/database/redis_db/profile_handler.py +++ b/slips_files/core/database/redis_db/profile_handler.py @@ -1,27 +1,38 @@ -from dataclasses import asdict -import redis -import time import json -from typing import Tuple, Union, Dict -import traceback -import ipaddress import sys +import time +import traceback +from dataclasses import asdict +from math import floor +from typing import ( + Tuple, + Union, + Dict, + Optional, + List, + Set, + ) + +import redis import validators + from slips_files.common.abstracts.observer import IObservable from slips_files.core.output import Output + class ProfileHandler(IObservable): """ Helper class for the Redis class in database.py Contains all the logic related to flows, profiles and timewindows """ - name = 'DB' - + + name = "DB" + def __init__(self, logger: Output): IObservable.__init__(self) self.logger = logger self.add_observer(self.logger) - + def print(self, text, verbose=1, debug=0): """ Function to use to print text using the outputqueue of slips. @@ -40,27 +51,22 @@ def print(self, text, verbose=1, debug=0): """ self.notify_observers( - { - 'from': self.name, - 'txt': text, - 'verbose': verbose, - 'debug': debug - } + {"from": self.name, "txt": text, "verbose": verbose, "debug": debug} ) - def getOutTuplesfromProfileTW(self, profileid, twid): """Get the out tuples""" - return self.r.hget(profileid + self.separator + twid, 'OutTuples') + return self.r.hget(profileid + self.separator + twid, "OutTuples") def getInTuplesfromProfileTW(self, profileid, twid): """Get the in tuples""" - return self.r.hget(profileid + self.separator + twid, 'InTuples') + return self.r.hget(profileid + self.separator + twid, "InTuples") + def get_dhcp_flows(self, profileid, twid) -> list: """ returns a dict of dhcp flows that happened in this profileid and twid """ - if flows := self.r.hget('DHCP_flows', f'{profileid}_{twid}'): + if flows := self.r.hget("DHCP_flows", f"{profileid}_{twid}"): return json.loads(flows) def set_dhcp_flow(self, profileid, twid, requested_addr, uid): @@ -71,124 +77,53 @@ def set_dhcp_flow(self, profileid, twid, requested_addr, uid): if cached_flows := self.get_dhcp_flows(profileid, twid): # we already have flows in this twid, update them cached_flows.update(flow) - self.r.hset('DHCP_flows', f'{profileid}_{twid}', json.dumps(cached_flows)) + self.r.hset("DHCP_flows", f"{profileid}_{twid}", json.dumps(cached_flows)) else: - self.r.hset('DHCP_flows', f'{profileid}_{twid}', json.dumps(flow)) - + self.r.hset("DHCP_flows", f"{profileid}_{twid}", json.dumps(flow)) def get_timewindow(self, flowtime, profileid): """ - This function should get the id of the TW in the database where the flow belong. - If the TW is not there, we create as many tw as necessary in the future or past until we get the correct TW for this flow. - - We use this function to avoid retrieving all the data from the DB for the complete profile. We use a separate table for the TW per profile. + This function returns the TW in the database where the flow belongs. + If the TW is not there, we create as many tw as necessary in the future + or past until we get the correct TW for this flow. + - We use this function to avoid retrieving all the data from the DB + for the complete profile. + We use a separate table for the TW per profile. -- Returns the time window id THIS IS NOT WORKING: - - The empty profiles in the middle are not being created!!! - - The Dtp ips are stored in the first time win - """ - try: - # First check if we are not in the last TW. Since this will be the majority of cases - try: - [(lasttwid, lasttw_start_time)] = self.get_last_twid_of_profile(profileid) - lasttw_start_time = float(lasttw_start_time) - lasttw_end_time = lasttw_start_time + self.width + - The empty tws in the middle are not being created!!! + - The Dtp ips are stored in the first tw + """ + # If the option for only-one-tw was selected, we should + # create the TW at least 100 years before the flowtime, + # to cover for 'flows in the past'. Which means we should + # cover for any flow that is coming later with time before the + # first flow + if self.width == 9999999999: + # Seconds in 1 year = 31536000 + tw_start = float(flowtime - (31536000 * 100)) + tw_number: int = 1 + else: + starttime_of_first_tw: str = self.r.hget("analysis", "file_start") + + if starttime_of_first_tw: + starttime_of_first_tw = float(starttime_of_first_tw) flowtime = float(flowtime) - self.print( - f'The last TW id for profile {profileid} was {lasttwid}. Start:{lasttw_start_time}. End: {lasttw_end_time}', - 3, - 0, + tw_number: int = ( + floor((flowtime - starttime_of_first_tw) / self.width) + 1 ) - # There was a last TW, so check if the current flow belongs here. - if ( - lasttw_end_time > flowtime - and lasttw_start_time <= flowtime - ): - self.print( - f'The flow ({flowtime}) is on the last time window ({lasttw_end_time})', - 3, - 0, - ) - twid = lasttwid - elif lasttw_end_time <= flowtime: - # The flow was not in the last TW, its NEWER than it - self.print( - f'The flow ({flowtime}) is NOT on the last time window ({lasttw_end_time}). Its newer', - 3, - 0, - ) - amount_of_new_tw = int( - (flowtime - lasttw_end_time) / self.width - ) - self.print( - f'We have to create {amount_of_new_tw} empty TWs in the middle.', - 3, - 0, - ) - temp_end = lasttw_end_time - for _ in range(amount_of_new_tw + 1): - new_start = temp_end - twid = self.addNewTW(profileid, new_start) - self.print(f'Creating the TW id {twid}. Start: {new_start}.', 3, 0) - temp_end = new_start + self.width - else: - # The flow was not in the last TW, its OLDER that it - self.print( - f'The flow ({flowtime}) is NOT on the last time window ({lasttw_end_time}). Its older', - 3, - 0, - ) - if data := self.getTWofTime(profileid, flowtime): - # We found a TW where this flow belongs to - (twid, tw_start_time) = data - return twid - else: - # There was no TW that included the time of this flow, so create them in the past - # How many new TW we need in the past? - # amount_of_new_tw is the total amount of tw we should have under the new situation - amount_of_new_tw = int( - (lasttw_end_time - flowtime) / self.width - ) - # amount_of_current_tw is the real amount of tw we have now - amount_of_current_tw = ( - self.get_number_of_tws_in_profile(profileid) - ) - # diff is the new ones we should add in the past. (Yes, we could have computed this differently) - diff = amount_of_new_tw - amount_of_current_tw - self.print(f'We need to create {diff + 1} TW before the first', 3, 0) - # Get the first TW - [ - (firsttwid, firsttw_start_time) - ] = self.getFirstTWforProfile(profileid) - firsttw_start_time = float(firsttw_start_time) - # The start of the new older TW should be the first - the width - temp_start = firsttw_start_time - self.width - for _ in range(diff + 1): - new_start = temp_start - # The method to add an older TW is the same as - # to add a new one, just the starttime changes - twid = self.addNewOlderTW( - profileid, new_start - ) - self.print(f'Creating the new older TW id {twid}. Start: {new_start}.', 3, 0) - temp_start = new_start - self.width - except ValueError: - # There is no last tw. So create the first TW - # If the option for only-one-tw was selected, we should create the TW at least 100 years before the flowtime, to cover for - # 'flows in the past'. Which means we should cover for any flow that is coming later with time before the first flow - if self.width == 9999999999: - # Seconds in 1 year = 31536000 - startoftw = float(flowtime - (31536000 * 100)) - else: - startoftw = flowtime + tw_start: float = starttime_of_first_tw + (self.width * (tw_number - 1)) + else: + # this is the first timewindow + tw_number: int = 1 + tw_start: float = flowtime - # Add this TW, of this profile, to the DB - twid = self.addNewTW(profileid, startoftw) - # self.print("First TW ({}) created for profile {}.".format(twid, profileid), 0, 1) - return twid - except Exception as e: - self.print('Error in get_timewindow().', 0, 1) - self.print(e, 0, 1) + tw_id: str = f"timewindow{tw_number}" + + # Add this TW, of this profile, to the DB + self.add_new_tw(profileid, tw_id, tw_start) + return tw_id def add_out_http( self, @@ -202,89 +137,92 @@ def add_out_http( The idea is that from the uid of a netflow, you can access which other type of info is related to that uid """ http_flow_dict = { - 'uid': flow.uid, - 'type': flow.type_, - 'method': flow.method, - 'host': flow.host, - 'uri': flow.uri, - 'version': flow.version, - 'user_agent': flow.user_agent, - 'request_body_len': flow.request_body_len, - 'response_body_len': flow.response_body_len, - 'status_code': flow.status_code, - 'status_msg': flow.status_msg, - 'resp_mime_types': flow.resp_mime_types, - 'resp_fuids': flow.resp_fuids, - 'stime': flow.starttime, - 'daddr': flow.daddr, + "uid": flow.uid, + "type": flow.type_, + "method": flow.method, + "host": flow.host, + "uri": flow.uri, + "version": flow.version, + "user_agent": flow.user_agent, + "request_body_len": flow.request_body_len, + "response_body_len": flow.response_body_len, + "status_code": flow.status_code, + "status_msg": flow.status_msg, + "resp_mime_types": flow.resp_mime_types, + "resp_fuids": flow.resp_fuids, + "stime": flow.starttime, + "daddr": flow.daddr, } # Convert to json string http_flow_dict = json.dumps(http_flow_dict) http_flow = { - 'profileid': profileid, - 'twid': twid, - 'flow': http_flow_dict, - 'stime': flow.starttime, + "profileid": profileid, + "twid": twid, + "flow": http_flow_dict, + "stime": flow.starttime, } to_send = json.dumps(http_flow) - self.publish('new_http', to_send) - self.publish('new_url', to_send) + self.publish("new_http", to_send) + self.publish("new_url", to_send) - self.print(f'Adding HTTP flow to DB: {http_flow_dict}', 3, 0) + self.print(f"Adding HTTP flow to DB: {http_flow_dict}", 3, 0) - http_flow.pop('flow', None) - http_flow['uid'] = flow.uid + http_flow.pop("flow", None) + http_flow["uid"] = flow.uid - # Check if the host domain AND the url is detected by the threat intelligence. - # not all flows have a host value so don't send empty hosts to ti module. + # Check if the host domain AND the url is detected by the threat + # intelligence. + # not all flows have a host value so don't send empty hosts to ti + # module. if len(flow.host) > 2: - self.give_threat_intelligence(profileid, - twid, - 'dst', - flow.starttime, - flow.uid, - flow.daddr, - lookup=flow.host) - self.give_threat_intelligence(profileid, - twid, - 'dst', - flow.starttime, - flow.uid, - flow.daddr, - lookup=f'http://{flow.host}{flow.uri}') + self.give_threat_intelligence( + profileid, + twid, + "dst", + flow.starttime, + flow.uid, + flow.daddr, + lookup=flow.host, + ) + self.give_threat_intelligence( + profileid, + twid, + "dst", + flow.starttime, + flow.uid, + flow.daddr, + lookup=f"http://{flow.host}{flow.uri}", + ) else: # use the daddr since there's no host - self.give_threat_intelligence(profileid, - twid, - 'dstip', - flow.starttime, - flow.uid, - flow.daddr, - lookup=f'http://{flow.daddr}{flow.uri}') - - + self.give_threat_intelligence( + profileid, + twid, + "dstip", + flow.starttime, + flow.uid, + flow.daddr, + lookup=f"http://{flow.daddr}{flow.uri}", + ) - def add_out_dns( - self, - profileid, - twid, - flow - ): + def add_out_dns(self, profileid, twid, flow): """ Store in the DB a DNS request - All the type of flows that are not netflows are stored in a separate hash ordered by flow.uid. - The idea is that from the flow.uid of a netflow, you can access which other type of info is related to that flow.uid + All the type of flows that are not netflows are stored in a separate + hash ordered by flow.uid. + The idea is that from the flow.uid of a netflow, you can access which + other type of info is related to that flow.uid """ dns_flow = { - 'flow.uid': flow.uid, - 'type': flow.type_, - 'query': flow.query, - 'qclass_name': flow.qclass_name, - 'flow.qtype_name': flow.qtype_name, - 'rcode_name': flow.rcode_name, - 'answers': flow.answers, - 'ttls': flow.TTLs, - 'stime': flow.starttime, + "flow.uid": flow.uid, + "type": flow.type_, + "query": flow.query, + "qclass_name": flow.qclass_name, + "flow.qtype_name": flow.qtype_name, + "rcode_name": flow.rcode_name, + "answers": flow.answers, + "ttls": flow.TTLs, + "stime": flow.starttime, } # Convert to json string @@ -292,61 +230,65 @@ def add_out_dns( # Publish the new dns received # TODO we should just send the DNS obj! to_send = { - 'profileid': profileid, - 'twid': twid, - 'flow': dns_flow, - 'stime': flow.starttime, - 'uid': flow.uid, - 'rcode_name': flow.rcode_name, - 'daddr': flow.daddr, - 'answers': flow.answers + "profileid": profileid, + "twid": twid, + "flow": dns_flow, + "stime": flow.starttime, + "uid": flow.uid, + "rcode_name": flow.rcode_name, + "daddr": flow.daddr, + "answers": flow.answers, } to_send = json.dumps(to_send) # publish a dns with its flow - self.publish('new_dns', to_send) + self.publish("new_dns", to_send) # Check if the dns query is detected by the threat intelligence. self.give_threat_intelligence( profileid, twid, - 'dstip', + "dstip", flow.starttime, flow.uid, flow.daddr, - lookup=flow.query + lookup=flow.query, ) - # Add DNS resolution to the db if there are answers for the query - if flow.answers and flow.answers != ['-'] : - srcip = profileid.split('_')[1] + if flow.answers and flow.answers != ["-"]: + srcip = profileid.split("_")[1] self.set_dns_resolution( - flow.query, flow.answers, flow.starttime, flow.uid, flow.qtype_name, srcip, twid + flow.query, + flow.answers, + flow.starttime, + flow.uid, + flow.qtype_name, + srcip, + twid, ) # send each dns answer to TI module for answer in flow.answers: - if 'TXT' in answer: + if "TXT" in answer: continue extra_info = { - 'is_dns_response': True, - 'dns_query': flow.query, - 'domain': answer, + "is_dns_response": True, + "dns_query": flow.query, + "domain": answer, } self.give_threat_intelligence( profileid, twid, - 'dstip', + "dstip", flow.starttime, flow.uid, flow.daddr, lookup=answer, - extra_info=extra_info + extra_info=extra_info, ) - def add_port( - self, profileid: str, twid: str, flow: dict, role: str, port_type: str + self, profileid: str, twid: str, flow: dict, role: str, port_type: str ): """ Store info learned from ports for this flow @@ -366,13 +308,13 @@ def add_port( uid = flow.uid ip = str(flow.daddr) spkts = flow.spkts - state_hist = flow.state_hist if hasattr(flow, 'state_hist') else '' + state_hist = flow.state_hist if hasattr(flow, "state_hist") else "" # dpkts = columns['dpkts'] # daddr = columns['daddr'] # saddr = columns['saddr'] # sbytes = columns['sbytes'] - if '^' in state_hist: + if "^" in state_hist: # The majority of the FP with horizontal port scan detection happen because a # benign computer changes wifi, and many not established conns are redone, # which look like a port scan to 10 webpages. To avoid this, we IGNORE all @@ -383,75 +325,64 @@ def add_port( # of slips return False - # Choose which port to use based if we were asked Dst or Src - port = str(sport) if port_type == 'Src' else str(dport) + port = str(sport) if port_type == "Src" else str(dport) # If we are the Client, we want to store the dstips only # If we are the Server, we want to store the srcips only - ip_key = 'srcips' if role == 'Server' else 'dstips' + ip_key = "srcips" if role == "Server" else "dstips" # Get the state. Established, NotEstablished summaryState = self.getFinalStateFromFlags(state, pkts) old_profileid_twid_data = self.get_data_from_profile_tw( - profileid, - twid, - port_type, - summaryState, - proto, - role, - 'Ports' + profileid, twid, port_type, summaryState, proto, role, "Ports" ) try: # we already have info about this dport, update it port_data = old_profileid_twid_data[port] - port_data['totalflows'] += 1 - port_data['totalpkt'] += pkts - port_data['totalbytes'] += totbytes + port_data["totalflows"] += 1 + port_data["totalpkt"] += pkts + port_data["totalbytes"] += totbytes # if there's a conn from this ip on this port, update the pkts of this conn if ip in port_data[ip_key]: - port_data[ip_key][ip]['pkts'] += pkts - port_data[ip_key][ip]['spkts'] += spkts - port_data[ip_key][ip]['uid'].append(uid) + port_data[ip_key][ip]["pkts"] += pkts + port_data[ip_key][ip]["spkts"] += spkts + port_data[ip_key][ip]["uid"].append(uid) else: port_data[ip_key][ip] = { - 'pkts': pkts, - 'spkts': spkts, - 'stime': starttime, - 'uid': [uid] + "pkts": pkts, + "spkts": spkts, + "stime": starttime, + "uid": [uid], } except KeyError: # First time for this dport port_data = { - 'totalflows': 1, - 'totalpkt': pkts, - 'totalbytes': totbytes, + "totalflows": 1, + "totalpkt": pkts, + "totalbytes": totbytes, ip_key: { - ip: { - 'pkts': pkts, - 'spkts': spkts, - 'stime': starttime, - 'uid': [uid] - } - } + ip: {"pkts": pkts, "spkts": spkts, "stime": starttime, "uid": [uid]} + }, } old_profileid_twid_data[port] = port_data data = json.dumps(old_profileid_twid_data) - hash_key = f'{profileid}{self.separator}{twid}' - key_name = f'{port_type}Ports{role}{proto}{summaryState}' + hash_key = f"{profileid}{self.separator}{twid}" + key_name = f"{port_type}Ports{role}{proto}{summaryState}" self.r.hset(hash_key, key_name, str(data)) self.markProfileTWAsModified(profileid, twid, starttime) + def getFinalStateFromFlags(self, state, pkts): """ Analyze the flags given and return a summary of the state. Should work with Argus and Bro flags We receive the pakets to distinguish some Reset connections """ try: - pre = state.split('_')[0] + pre = state.split("_")[0] try: # Try suricata states """ @@ -460,22 +391,21 @@ def getFinalStateFromFlags(self, state, pkts): these are: New, Established and Closed,for UDP only new and established. For each of these states Suricata can employ different timeouts. """ - if 'new' in state or 'established' in state: - return 'Established' - elif 'closed' in state: - return 'Not Established' + if "new" in state or "established" in state: + return "Established" + elif "closed" in state: + return "Not Established" # We have varius type of states depending on the type of flow. # For Zeek - if state in ('S0', 'REJ', 'RSTOS0', 'RSTRH', 'SH', 'SHR'): - return 'Not Established' - elif state in ('S1', 'SF', 'S2', 'S3', 'RSTO', 'RSTP', 'OTH'): - return 'Established' - + if state in ("S0", "REJ", "RSTOS0", "RSTRH", "SH", "SHR"): + return "Not Established" + elif state in ("S1", "SF", "S2", "S3", "RSTO", "RSTP", "OTH"): + return "Established" # For Argus - suf = state.split('_')[1] - if 'S' in pre and 'A' in pre and 'S' in suf and 'A' in suf: + suf = state.split("_")[1] + if "S" in pre and "A" in pre and "S" in suf and "A" in suf: """ Examples: SA_SA @@ -499,23 +429,23 @@ def getFinalStateFromFlags(self, state, pkts): FSPAEC_FSPA SRPAEC_FSRPA """ - return 'Established' - elif 'PA' in pre and 'PA' in suf: + return "Established" + elif "PA" in pre and "PA" in suf: # Tipical flow that was reported in the middle """ Examples: PA_PA FPA_FPA """ - return 'Established' - elif 'ECO' in pre: - return 'ICMP Echo' - elif 'ECR' in pre: - return 'ICMP Reply' - elif 'URH' in pre: - return 'ICMP Host Unreachable' - elif 'URP' in pre: - return 'ICMP Port Unreachable' + return "Established" + elif "ECO" in pre: + return "ICMP Echo" + elif "ECR" in pre: + return "ICMP Reply" + elif "URH" in pre: + return "ICMP Host Unreachable" + elif "URP" in pre: + return "ICMP Port Unreachable" else: """ Examples: @@ -528,35 +458,35 @@ def getFinalStateFromFlags(self, state, pkts): SR_RA SEC_RA """ - return 'Not Established' + return "Not Established" except IndexError: # suf does not exist, which means that this is some ICMP or no response was sent for UDP or TCP - if 'ECO' in pre: + if "ECO" in pre: # ICMP - return 'Established' - elif 'UNK' in pre: + return "Established" + elif "UNK" in pre: # ICMP6 unknown upper layer - return 'Established' - elif 'CON' in pre: + return "Established" + elif "CON" in pre: # UDP - return 'Established' - elif 'INT' in pre: + return "Established" + elif "INT" in pre: # UDP trying to connect, NOT preciselly not established but also NOT 'Established'. So we considered not established because there # is no confirmation of what happened. - return 'Not Established' - elif 'EST' in pre: + return "Not Established" + elif "EST" in pre: # TCP - return 'Established' - elif 'RST' in pre: + return "Established" + elif "RST" in pre: # TCP. When -z B is not used in argus, states are single words. Most connections are reseted when finished and therefore are established # It can happen that is reseted being not established, but we can't tell without -z b. # So we use as heuristic the amount of packets. If <=3, then is not established because the OS retries 3 times. - return 'Not Established' if int(pkts) <= 3 else 'Established' - elif 'FIN' in pre: + return "Not Established" if int(pkts) <= 3 else "Established" + elif "FIN" in pre: # TCP. When -z B is not used in argus, states are single words. Most connections are finished with FIN when finished and therefore are established # It can happen that is finished being not established, but we can't tell without -z b. # So we use as heuristic the amount of packets. If <=3, then is not established because the OS retries 3 times. - return 'Not Established' if int(pkts) <= 3 else 'Established' + return "Not Established" if int(pkts) <= 3 else "Established" else: """ Examples: @@ -567,13 +497,15 @@ def getFinalStateFromFlags(self, state, pkts): SEC_ SRPA_ """ - return 'Not Established' + return "Not Established" except Exception: exception_line = sys.exc_info()[2].tb_lineno self.print( - f'Error in getFinalStateFromFlags() in database.py line {exception_line}' - ,0,1) - self.print(traceback.print_exc(), 0, 1) + f"Error in getFinalStateFromFlags() in database.py line {exception_line}", + 0, + 1, + ) + self.print(traceback.print_stack(), 0, 1) def get_data_from_profile_tw( self, @@ -610,13 +542,13 @@ def get_data_from_profile_tw( # Not Establihed] # Example: key_name = 'SrcPortClientTCPEstablished' key = direction + type_data + role + protocol.upper() + state - data = self.r.hget(f'{profileid}{self.separator}{twid}', key) + data = self.r.hget(f"{profileid}{self.separator}{twid}", key) if data: return json.loads(data) self.print( - f'There is no data for Key: {key}. Profile {profileid} TW {twid}', + f"There is no data for Key: {key}. Profile {profileid} TW {twid}", 3, 0, ) @@ -624,10 +556,9 @@ def get_data_from_profile_tw( except Exception as e: exception_line = sys.exc_info()[2].tb_lineno self.print( - f'Error in getDataFromProfileTW database.py line {exception_line}' - ,0,1) - self.print(traceback.print_exc(), 0, 1) - + f"Error in getDataFromProfileTW database.py line {exception_line}", 0, 1 + ) + self.print(traceback.print_stack(), 0, 1) def update_ip_info( self, @@ -638,7 +569,7 @@ def update_ip_info( totbytes, ip, starttime, - uid + uid, ) -> dict: """ # Updates how many times each individual DstPort was contacted, @@ -653,29 +584,26 @@ def update_ip_info( if ip in old_profileid_twid_data: # update info about an existing ip ip_data = old_profileid_twid_data[ip] - ip_data['totalflows'] += 1 - ip_data['totalpkt'] += pkts - ip_data['totalbytes'] += totbytes - ip_data['uid'].append(uid) + ip_data["totalflows"] += 1 + ip_data["totalpkt"] += pkts + ip_data["totalbytes"] += totbytes + ip_data["uid"].append(uid) - ip_data['dstports']: dict + ip_data["dstports"]: dict - if dport in ip_data['dstports']: - ip_data['dstports'][dport] += spkts + if dport in ip_data["dstports"]: + ip_data["dstports"][dport] += spkts else: - ip_data['dstports'].update({ - dport: spkts - }) + ip_data["dstports"].update({dport: spkts}) else: # First time seeing this ip ip_data = { - 'totalflows': 1, - 'totalpkt': pkts, - 'totalbytes': totbytes, - 'stime': starttime, - 'uid': [uid], - 'dstports': {dport: spkts} - + "totalflows": 1, + "totalpkt": pkts, + "totalbytes": totbytes, + "stime": starttime, + "uid": [uid], + "dstports": {dport: spkts}, } old_profileid_twid_data.update({ip: ip_data}) @@ -687,11 +615,11 @@ def update_times_contacted(self, ip, direction, profileid, twid): """ # Get the hash of the timewindow - profileid_twid = f'{profileid}{self.separator}{twid}' + profileid_twid = f"{profileid}{self.separator}{twid}" # Get the DstIPs data for this tw in this profile # The format is {'1.1.1.1' : 3} - ips_contacted = self.r.hget(profileid_twid, f'{direction}IPs') + ips_contacted = self.r.hget(profileid_twid, f"{direction}IPs") if not ips_contacted: ips_contacted = {} @@ -704,7 +632,7 @@ def update_times_contacted(self, ip, direction, profileid, twid): ips_contacted[ip] = 1 ips_contacted = json.dumps(ips_contacted) - self.r.hset(profileid_twid, f'{direction}IPs', str(ips_contacted)) + self.r.hset(profileid_twid, f"{direction}IPs", str(ips_contacted)) def add_ips(self, profileid, twid, flow, role): """ @@ -726,7 +654,7 @@ def add_ips(self, profileid, twid, flow, role): uid = flow.uid starttime = str(flow.starttime) - ip = flow.daddr if role=='Client' else flow.saddr + ip = flow.daddr if role == "Client" else flow.saddr """ Depending if the traffic is going out or not, we are Client or Server @@ -739,7 +667,7 @@ def add_ips(self, profileid, twid, flow, role): The srcip is here the one sending data to your profile So check the src ip """ - direction = 'Dst' if role == 'Client' else 'Src' + direction = "Dst" if role == "Client" else "Src" ############# # Store the Dst as IP address and notify in the channel @@ -750,31 +678,32 @@ def add_ips(self, profileid, twid, flow, role): ############# # OTH means that we didnt see the true src ip and dst ip - if flow.state != 'OTH': - self.ask_for_ip_info(flow.saddr, - profileid, - twid, - flow.proto.upper(), - flow.starttime, - flow.uid, - 'srcip', - daddr=flow.daddr) - self.ask_for_ip_info(flow.daddr, - profileid, - twid, - flow.proto.upper(), - flow.starttime, - flow.uid, - 'dstip') - + if flow.state != "OTH": + self.ask_for_ip_info( + flow.saddr, + profileid, + twid, + flow.proto.upper(), + flow.starttime, + flow.uid, + "srcip", + daddr=flow.daddr, + ) + self.ask_for_ip_info( + flow.daddr, + profileid, + twid, + flow.proto.upper(), + flow.starttime, + flow.uid, + "dstip", + ) self.update_times_contacted(ip, direction, profileid, twid) # Get the state. Established, NotEstablished summaryState = self.getFinalStateFromFlags(flow.state, flow.pkts) - key_name = ( - f'{direction}IPs{role}{flow.proto.upper()}{summaryState}' - ) + key_name = f"{direction}IPs{role}{flow.proto.upper()}{summaryState}" # Get the previous data about this key old_profileid_twid_data = self.get_data_from_profile_tw( profileid, @@ -783,7 +712,7 @@ def add_ips(self, profileid, twid, flow, role): summaryState, flow.proto, role, - 'IPs', + "IPs", ) profileid_twid_data: dict = self.update_ip_info( old_profileid_twid_data, @@ -793,14 +722,14 @@ def add_ips(self, profileid, twid, flow, role): flow.bytes, ip, starttime, - uid + uid, ) # Store this data in the profile hash self.r.hset( - f'{profileid}{self.separator}{twid}', + f"{profileid}{self.separator}{twid}", key_name, - json.dumps(profileid_twid_data) + json.dumps(profileid_twid_data), ) return True @@ -814,25 +743,25 @@ def get_all_contacted_ips_in_profileid_twid(self, profileid, twid) -> dict: contacted_ips = {} for uid, flow in all_flows.items(): # get the daddr of this flow - daddr = flow['daddr'] + daddr = flow["daddr"] contacted_ips[daddr] = uid return contacted_ips - def markProfileTWAsBlocked(self, profileid, twid): - """Add this profile and tw to the list of blocked""" + """Add this profile and tw to the list of blocked + a profile is only blocked if it was blocked using the user's + firewall, not if it just generated an alert + """ tws = self.getBlockedProfTW(profileid) tws.append(twid) - self.r.hset('BlockedProfTW', profileid, json.dumps(tws)) - + self.r.hset("BlockedProfTW", profileid, json.dumps(tws)) def getBlockedProfTW(self, profileid): """Return all the list of blocked tws""" - if tws := self.r.hget('BlockedProfTW', profileid): + if tws := self.r.hget("BlockedProfTW", profileid): return json.loads(tws) return [] - def checkBlockedProfTW(self, profileid, twid): """ Check if profile and timewindow is blocked @@ -840,17 +769,17 @@ def checkBlockedProfTW(self, profileid, twid): profile_tws = self.getBlockedProfTW(profileid) return twid in profile_tws - def wasProfileTWModified(self, profileid, twid): """Retrieve from the db if this TW of this profile was modified""" - data = self.r.zrank('ModifiedTW', profileid + self.separator + twid) + data = self.r.zrank("ModifiedTW", profileid + self.separator + twid) return bool(data) + def add_flow( self, flow, - profileid='', - twid='', - label='', + profileid="", + twid="", + label="", ): """ Function to add a flow by interpreting the data. The flow is added to the correct TW for this profile. @@ -859,25 +788,25 @@ def add_flow( """ summaryState = self.getFinalStateFromFlags(flow.state, flow.pkts) flow_dict = { - 'ts': flow.starttime, - 'dur': flow.dur, - 'saddr': flow.saddr, - 'sport': flow.sport, - 'daddr': flow.daddr, - 'dport': flow.dport, - 'proto': flow.proto, - 'origstate': flow.state, - 'state': summaryState, - 'pkts': flow.pkts, - 'allbytes': flow.bytes, - 'spkts': flow.spkts, - 'sbytes': flow.sbytes, - 'appproto': flow.appproto, - 'smac': flow.smac, - 'dmac': flow.dmac, - 'label': label, - 'flow_type': flow.type_, - 'module_labels': {}, + "ts": flow.starttime, + "dur": flow.dur, + "saddr": flow.saddr, + "sport": flow.sport, + "daddr": flow.daddr, + "dport": flow.dport, + "proto": flow.proto, + "origstate": flow.state, + "state": summaryState, + "pkts": flow.pkts, + "allbytes": flow.bytes, + "spkts": flow.spkts, + "sbytes": flow.sbytes, + "appproto": flow.appproto, + "smac": flow.smac, + "dmac": flow.dmac, + "label": label, + "flow_type": flow.type_, + "module_labels": {}, } # Convert to json string @@ -885,7 +814,7 @@ def add_flow( # The key was not there before. So this flow is not repeated # Store the label in our uniq set, and increment it by 1 if label: - self.r.zincrby('labels', 1, label) + self.r.zincrby("labels", 1, label) flow_dict = {flow.uid: flow_dict} @@ -893,39 +822,38 @@ def add_flow( flow_dict = json.dumps(flow_dict) # Prepare the data to publish. to_send = { - 'profileid': profileid, - 'twid': twid, - 'flow': flow_dict, - 'stime': flow.starttime, + "profileid": profileid, + "twid": twid, + "flow": flow_dict, + "stime": flow.starttime, } to_send = json.dumps(to_send) # set the pcap/file stime in the analysis key if self.first_flow: - self.set_input_metadata({'file_start': flow.starttime}) + self.set_input_metadata({"file_start": flow.starttime}) self.first_flow = False self.set_local_network(flow.saddr) # dont send arp flows in this channel, they have their own new_arp channel - if flow.type_ != 'arp': - self.publish('new_flow', to_send) + if flow.type_ != "arp": + self.publish("new_flow", to_send) return True - def add_software_to_profile( - self, profileid, flow - ): + def add_software_to_profile(self, profileid, flow): """ Used to associate this profile with it's used software and version """ sw_dict = { flow.software: { - 'version-major': flow.version_major, - 'version-minor': flow.version_minor, - 'uid': flow.uid - } + "version-major": flow.version_major, + "version-minor": flow.version_minor, + "uid": flow.uid, + } } - # cached_sw is {software: {'version-major':x, 'version-minor':y, 'uid':...}} + # cached_sw is {software: {'version-major':x, + # 'version-minor':y, 'uid':...}} if cached_sw := self.get_software_from_profile(profileid): if flow.software in cached_sw: # we already have this same software for this proileid. @@ -933,16 +861,16 @@ def add_software_to_profile( return # add this new sw to the list of softwares this profile is using cached_sw.update(sw_dict) - self.r.hset(profileid, 'used_software', json.dumps(cached_sw)) + self.r.hset(profileid, "used_software", json.dumps(cached_sw)) else: # first time for this profile to use a software - self.r.hset(profileid, 'used_software', json.dumps(sw_dict)) + self.r.hset(profileid, "used_software", json.dumps(sw_dict)) def get_total_flows(self): """ gets total flows to process from the db """ - return self.r.hget('analysis', 'total_flows') + return self.r.hget("analysis", "total_flows") def add_out_ssh( self, @@ -958,42 +886,47 @@ def add_out_ssh( other type of info is related to that uid """ ssh_flow_dict = { - 'uid': flow.uid, - 'type': flow.type_, - 'version': flow.version, - 'auth_attempts': flow.auth_attempts, - 'auth_success': flow.auth_success, - 'client': flow.client, - 'server': flow.server, - 'cipher_alg': flow.cipher_alg, - 'mac_alg': flow.mac_alg, - 'compression_alg': flow.compression_alg, - 'kex_alg': flow.kex_alg, - 'host_key_alg': flow.host_key_alg, - 'host_key': flow.host_key, - 'stime': flow.starttime, - 'daddr': flow.daddr + "uid": flow.uid, + "type": flow.type_, + "version": flow.version, + "auth_attempts": flow.auth_attempts, + "auth_success": flow.auth_success, + "client": flow.client, + "server": flow.server, + "cipher_alg": flow.cipher_alg, + "mac_alg": flow.mac_alg, + "compression_alg": flow.compression_alg, + "kex_alg": flow.kex_alg, + "host_key_alg": flow.host_key_alg, + "host_key": flow.host_key, + "stime": flow.starttime, + "daddr": flow.daddr, } # Convert to json string ssh_flow_dict = json.dumps(ssh_flow_dict) # Publish the new dns received to_send = { - 'profileid': profileid, - 'twid': twid, - 'flow': ssh_flow_dict, - 'stime': flow.starttime, - 'uid': flow.uid, + "profileid": profileid, + "twid": twid, + "flow": ssh_flow_dict, + "stime": flow.starttime, + "uid": flow.uid, } to_send = json.dumps(to_send) # publish a dns with its flow - self.publish('new_ssh', to_send) - self.print(f'Adding SSH flow to DB: {ssh_flow_dict}', 3, 0) + self.publish("new_ssh", to_send) + self.print(f"Adding SSH flow to DB: {ssh_flow_dict}", 3, 0) # Check if the dns is detected by the threat intelligence. Empty field in the end, cause we have extrafield for the IP. - self.give_threat_intelligence(profileid, twid, 'dstip', flow.starttime, - flow.uid, - flow.daddr, lookup=flow.daddr) - + self.give_threat_intelligence( + profileid, + twid, + "dstip", + flow.starttime, + flow.uid, + flow.daddr, + lookup=flow.daddr, + ) def add_out_notice( self, @@ -1003,83 +936,80 @@ def add_out_notice( ): """ " Send notice.log data to new_notice channel to look for self-signed certificates""" notice_flow = { - 'type': 'notice', - 'daddr': flow.daddr, - 'sport': flow.sport, - 'dport': flow.dport, - 'note': flow.note, - 'msg': flow.msg, - 'scanned_port': flow.scanned_port, - 'scanning_ip': flow.scanning_ip, - 'stime': flow.starttime, + "type": "notice", + "daddr": flow.daddr, + "sport": flow.sport, + "dport": flow.dport, + "note": flow.note, + "msg": flow.msg, + "scanned_port": flow.scanned_port, + "scanning_ip": flow.scanning_ip, + "stime": flow.starttime, } notice_flow = json.dumps( notice_flow - ) # this is going to be sent insidethe to_send dict + ) # this is going to be sent insidethe to_send dict to_send = { - 'profileid': profileid, - 'twid': twid, - 'flow': notice_flow, - 'stime': flow.starttime, - 'uid': flow.uid, + "profileid": profileid, + "twid": twid, + "flow": notice_flow, + "stime": flow.starttime, + "uid": flow.uid, } to_send = json.dumps(to_send) - self.publish('new_notice', to_send) - self.print(f'Adding notice flow to DB: {notice_flow}', 3, 0) + self.publish("new_notice", to_send) + self.print(f"Adding notice flow to DB: {notice_flow}", 3, 0) self.give_threat_intelligence( profileid, twid, - 'dstip', + "dstip", flow.starttime, flow.uid, flow.daddr, - lookup=flow.daddr) - + lookup=flow.daddr, + ) - def add_out_ssl( - self, - profileid, - twid, - flow - ): + def add_out_ssl(self, profileid, twid, flow): """ Store in the DB an ssl request - All the type of flows that are not netflows are stored in a separate hash ordered by uid. - The idea is that from the uid of a netflow, you can access which other type of info is related to that uid + All the type of flows that are not netflows are stored in a separate + hash ordered by uid. + The idea is that from the uid of a netflow, you can access which other + type of info is related to that uid """ ssl_flow = { - 'uid': flow.uid, - 'type': flow.type_, - 'version': flow.version, - 'cipher': flow.cipher, - 'resumed': flow.resumed, - 'established': flow.established, - 'cert_chain_fuids': flow.cert_chain_fuids, - 'client_cert_chain_fuids': flow.client_cert_chain_fuids, - 'subject': flow.subject, - 'issuer': flow.issuer, - 'validation_status': flow.validation_status, - 'curve': flow.curve, - 'server_name': flow.server_name, - 'daddr': flow.daddr, - 'dport': flow.dport, - 'stime': flow.starttime, - 'ja3': flow.ja3, - 'ja3s': flow.ja3s, - 'is_DoH': flow.is_DoH, + "uid": flow.uid, + "type": flow.type_, + "version": flow.version, + "cipher": flow.cipher, + "resumed": flow.resumed, + "established": flow.established, + "cert_chain_fuids": flow.cert_chain_fuids, + "client_cert_chain_fuids": flow.client_cert_chain_fuids, + "subject": flow.subject, + "issuer": flow.issuer, + "validation_status": flow.validation_status, + "curve": flow.curve, + "server_name": flow.server_name, + "daddr": flow.daddr, + "dport": flow.dport, + "stime": flow.starttime, + "ja3": flow.ja3, + "ja3s": flow.ja3s, + "is_DoH": flow.is_DoH, } # TODO do something with is_doh # Convert to json string ssl_flow = json.dumps(ssl_flow) to_send = { - 'profileid': profileid, - 'twid': twid, - 'flow': ssl_flow, - 'stime': flow.starttime, + "profileid": profileid, + "twid": twid, + "flow": ssl_flow, + "stime": flow.starttime, } to_send = json.dumps(to_send) - self.publish('new_ssl', to_send) - self.print(f'Adding SSL flow to DB: {ssl_flow}', 3, 0) + self.publish("new_ssl", to_send) + self.print(f"Adding SSL flow to DB: {ssl_flow}", 3, 0) # Check if the server_name (SNI) is detected by the threat intelligence. # Empty field in the end, cause we have extrafield for the IP. # If server_name is not empty, set in the IPsInfo and send to TI @@ -1087,51 +1017,58 @@ def add_out_ssl( return False # We are giving only new server_name to the threat_intelligence module. - self.give_threat_intelligence(profileid, twid, 'dstip', flow.starttime, - flow.uid, flow.daddr, lookup=flow.server_name) + self.give_threat_intelligence( + profileid, + twid, + "dstip", + flow.starttime, + flow.uid, + flow.daddr, + lookup=flow.server_name, + ) - # Save new server name in the IPInfo. There might be several server_name per IP. + # Save new server name in the IPInfo. There might be several + # server_name per IP. if ipdata := self.get_ip_info(flow.daddr): - sni_ipdata = ipdata.get('SNI', []) + sni_ipdata = ipdata.get("SNI", []) else: sni_ipdata = [] - SNI_port = { - 'server_name': flow.server_name, - 'dport': flow.dport - } + SNI_port = {"server_name": flow.server_name, "dport": flow.dport} # We do not want any duplicates. if SNI_port not in sni_ipdata: - # Verify that the SNI is equal to any of the domains in the DNS resolution + # Verify that the SNI is equal to any of the domains in the DNS + # resolution # only add this SNI to our db if it has a DNS resolution - if dns_resolutions := self.r.hgetall('DNSresolution'): - # dns_resolutions is a dict with {ip:{'ts'..,'domains':..., 'uid':..}} + if dns_resolutions := self.r.hgetall("DNSresolution"): + # dns_resolutions is a dict with {ip:{'ts'..,'domains':..., + # 'uid':..}} for ip, resolution in dns_resolutions.items(): resolution = json.loads(resolution) - if SNI_port['server_name'] in resolution['domains']: + if SNI_port["server_name"] in resolution["domains"]: # add SNI to our db as it has a DNS resolution sni_ipdata.append(SNI_port) - self.setInfoForIPs( - flow.daddr, {'SNI': sni_ipdata} - ) + self.setInfoForIPs(flow.daddr, {"SNI": sni_ipdata}) break - - def getProfileIdFromIP(self, daddr_as_obj): - """Receive an IP and we want the profileid""" + def get_profileid_from_ip(self, ip: str) -> Optional[str]: + """ + returns the profile of the given IP only if it was registered in + slips before + """ try: - profileid = f'profile{self.separator}{str(daddr_as_obj)}' - if self.r.sismember('profiles', profileid): + profileid = f"profile_{ip}" + if self.r.sismember("profiles", profileid): return profileid return False except redis.exceptions.ResponseError as inst: - self.print('error in addprofileidfromip in database.py', 0, 1) + self.print("error in get_profileid_from_ip in database.py", 0, 1) self.print(type(inst), 0, 1) self.print(inst, 0, 1) def getProfiles(self): """Get a list of all the profiles""" - profiles = self.r.smembers('profiles') + profiles = self.r.smembers("profiles") return profiles if profiles != set() else {} def getTWsfromProfile(self, profileid): @@ -1140,14 +1077,15 @@ def getTWsfromProfile(self, profileid): Returns a list of tuples (twid, ts) or an empty list """ return ( - self.r.zrange(f'tws{profileid}', 0, -1, withscores=True) + self.r.zrange(f"tws{profileid}", 0, -1, withscores=True) if profileid else False ) def get_number_of_tws_in_profile(self, profileid) -> int: """ - Receives a profile id and returns the number of all the TWs in that profile + Receives a profile id and returns the number of all the + TWs in that profile """ return len(self.getTWsfromProfile(profileid)) if profileid else 0 @@ -1155,13 +1093,13 @@ def getSrcIPsfromProfileTW(self, profileid, twid): """ Get the src ip for a specific TW for a specific profileid """ - return self.r.hget(profileid + self.separator + twid, 'SrcIPs') + return self.r.hget(profileid + self.separator + twid, "SrcIPs") def getDstIPsfromProfileTW(self, profileid, twid): """ Get the dst ip for a specific TW for a specific profileid """ - return self.r.hget(profileid + self.separator + twid, 'DstIPs') + return self.r.hget(profileid + self.separator + twid, "DstIPs") def getT2ForProfileTW(self, profileid, twid, tupleid, tuple_key: str): """ @@ -1180,37 +1118,52 @@ def getT2ForProfileTW(self, profileid, twid, tupleid, tuple_key: str): return False, False except Exception as e: exception_line = sys.exc_info()[2].tb_lineno - self.print(f'Error in getT2ForProfileTW in database.py line {exception_line}',0,1) + self.print( + f"Error in getT2ForProfileTW in database.py line " f"{exception_line}", + 0, + 1, + ) self.print(type(e), 0, 1) self.print(e, 0, 1) - self.print(traceback.format_exc(), 0, 1) + self.print(traceback.print_stack(), 0, 1) def has_profile(self, profileid): """Check if we have the given profile""" - return self.r.sismember('profiles', profileid) if profileid else False + return self.r.sismember("profiles", profileid) if profileid else False def get_profiles_len(self) -> int: - """Return the amount of profiles. Redis should be faster than python to do this count""" - profiles_n = self.r.scard('profiles') + """Return the amount of profiles. Redis should be faster than python + to do this count""" + profiles_n = self.r.scard("profiles") return 0 if not profiles_n else int(profiles_n) - def get_last_twid_of_profile(self, profileid): - """Return the last TW id and the starttime of the given profile id""" - return ( - self.r.zrange(f'tws{profileid}', -1, -1, withscores=True) - if profileid - else False - ) + def get_last_twid_of_profile(self, profileid: str) -> Tuple[str, float]: + """ + Returns the last TW id (aka tw with the greatest ts seen so far) and + the starttime of the given profile id + """ + if profileid: + res = self.r.zrange(f"tws{profileid}", -1, -1, withscores=True) + if res: + twid, starttime = res[0] + return twid, starttime - def getFirstTWforProfile(self, profileid): - """Return the first TW id and the time for the given profile id""" - return ( - self.r.zrange(f'tws{profileid}', 0, 0, withscores=True) - if profileid - else False - ) + def get_first_twid_for_profile(self, profileid: str) -> Optional[Tuple[str, float]]: + """ + Return the first TW id and the time for the given profile id + the returned twid may be a negative tw for example tw-1, depends on + what tw was last registered + """ + if profileid: + res: List[Tuple[str, float]] + res = self.r.zrange(f"tws{profileid}", 0, 0, withscores=True) + if res: + tw: str + starttime_of_tw: float + tw, starttime_of_tw = res[0] + return tw, starttime_of_tw - def getTWofTime(self, profileid, time): + def get_tw_of_ts(self, profileid, time) -> Optional[Tuple[str, float]]: """ Return the TW id and the time for the TW that includes the given time. The score in the DB is the start of the timewindow, so we should search @@ -1220,132 +1173,121 @@ def getTWofTime(self, profileid, time): # [-1] so we bring the last TW that matched this time. try: data = self.r.zrangebyscore( - f'tws{profileid}', - float('-inf'), + f"tws{profileid}", + float("-inf"), float(time), withscores=True, start=0, - num=-1 + num=-1, )[-1] except IndexError: # We dont have any last tw? data = self.r.zrangebyscore( - f'tws{profileid}', - 0, - float(time), - withscores=True, - start=0, - num=-1 + f"tws{profileid}", 0, float(time), withscores=True, start=0, num=-1 ) return data - def addNewOlderTW(self, profileid, startoftw): + def add_new_older_tw(self, profileid: str, tw_start_time: float, tw_number: int): + """ + Creates or adds a new timewindow that is OLDER than the + first we have + :param tw_start_time: start time of timewindow to add + :param tw_number: number of timewindow to add + Returns the id of the timewindow just created + """ try: - """ - Creates or adds a new timewindow that is OLDER than the first we have - Return the id of the timewindow just created - """ - # Get the first twid and obtain the new tw id - try: - (firstid, firstid_time) = self.getFirstTWforProfile(profileid)[ - 0 - ] - # We have a first id - # Decrement it!! - twid = 'timewindow' + str( - int(firstid.split('timewindow')[1]) - 1 - ) - except IndexError: - # Very weird error, since the first TW MUST exist. What are we doing here? - pass - # Add the new TW to the index of TW - data = {str(twid): float(startoftw)} - self.r.zadd(f'tws{profileid}', data) - self.print(f'Created and added to DB the new older ' - f'TW with id {twid}. Time: {startoftw} ' - ,0,4) + twid: str = f"timewindow{tw_number}" + timewindows: Dict[str, float] = {twid: tw_start_time} + self.r.zadd(f"tws{profileid}", timewindows) + + self.print( + f"Created and added to DB the new older " + f"TW with id {twid}. Time: {tw_start_time} ", + 0, + 4, + ) - # The creation of a TW now does not imply that it was modified. You need to put data to mark is at modified + # The creation of a TW now does not imply that it was modified. + # You need to put data to mark is at modified return twid except redis.exceptions.ResponseError as e: - self.print('error in addNewOlderTW in database.py', 0, 1) + self.print("error in addNewOlderTW in database.py", 0, 1) self.print(type(e), 0, 1) self.print(e, 0, 1) + self.print(traceback.print_stack(), 0, 1) - def addNewTW(self, profileid, startoftw): + def add_new_tw(self, profileid, timewindow: str, startoftw: float): + """ + Creates or adds a new timewindow to the list of tw for the + given profile + Add the twid to the ordered set of a given profile + :param timewindow: str id of the twid, e.g timewindow7, timewindow-9 + Returns the id of the timewindow just created + """ try: - """ - Creates or adds a new timewindow to the list of tw for the given profile - Add the twid to the ordered set of a given profile - Return the id of the timewindow just created - We should not mark the TW as modified here, since there is still no data on it, and it may remain without data. - """ - # Get the last twid and obtain the new tw id - try: - (lastid, lastid_time) = self.get_last_twid_of_profile(profileid)[0] - # We have a last id - # Increment it - twid = 'timewindow' + str( - int(lastid.split('timewindow')[1]) + 1 - ) - except IndexError: - # There is no first TW, create it - twid = 'timewindow1' # Add the new TW to the index of TW - data = {twid: float(startoftw)} - self.r.zadd(f'tws{profileid}', data) - self.print(f'Created and added to DB for profile ' - f'{profileid} on TW with id {twid}. Time: {startoftw} ', 0, 4) + self.r.zadd(f"tws{profileid}", {timewindow: float(startoftw)}) + self.print( + f"Created and added to DB for " + f"{profileid}: a new tw: {timewindow}. " + f" with starttime : {startoftw} ", + 0, + 4, + ) - # The creation of a TW now does not imply that it was modified. You need to put data to mark is at modified + # The creation of a TW now does not imply that it was modified. + # You need to put data to mark is at modified. # When a new TW is created for this profile, - # change the threat level of the profile to 0(info) and confidence to 0.05 - self.update_threat_level(profileid, 'info', 0.5) - return twid + # change the threat level of the profile to 0(info) + # and confidence to 0.05 + self.update_threat_level(profileid, "info", 0.5) except redis.exceptions.ResponseError as e: - self.print('Error in addNewTW', 0, 1) + self.print("Error in addNewTW", 0, 1) + self.print(traceback.print_stack(), 0, 1) self.print(e, 0, 1) - def getTimeTW(self, profileid, twid): + def get_tw_start_time(self, profileid, twid): """Return the time when this TW in this profile was created""" # Get all the TW for this profile - # We need to encode it to 'search' because the data in the sorted set is encoded - return self.r.zscore(f'tws{profileid}', twid.encode('utf-8')) + # We need to encode it to 'search' because the data in the + # sorted set is encoded + return self.r.zscore(f"tws{profileid}", twid.encode("utf-8")) def getAmountTW(self, profileid): """Return the number of tws for this profile id""" - return self.r.zcard(f'tws{profileid}') if profileid else False + return self.r.zcard(f"tws{profileid}") if profileid else False - def getModifiedTWSinceTime(self, time): - """Return the list of modified timewindows since a certain time""" - data = self.r.zrangebyscore( - 'ModifiedTW', time, float('+inf'), withscores=True - ) + def getModifiedTWSinceTime(self, time: float) -> List[Tuple[str, float]]: + """ + Return the list of modified timewindows since a certain time + """ + # this ModifiedTW set has all timewindows of all profiles + # the score of each tw is the ts it was last updated + # this ts is not network time, it is local time + data = self.r.zrangebyscore("ModifiedTW", time, float("+inf"), withscores=True) return data or [] - def getModifiedProfilesSince(self, time): - """Returns a set of modified profiles since a certain time and the time of the last modified profile""" - modified_tws = self.getModifiedTWSinceTime(time) + def getModifiedProfilesSince(self, time: float) -> Tuple[Set[str], float]: + """Returns a set of modified profiles since a certain time and + the time of the last modified profile""" + modified_tws: List[Tuple[str, float]] = self.getModifiedTWSinceTime(time) if not modified_tws: # no modified tws, and no time_of_last_modified_tw return [], 0 + # get the time of last modified tw - time_of_last_modified_tw = modified_tws[-1][-1] + time_of_last_modified_tw: float = modified_tws[-1][-1] + # this list will store modified profiles without tws profiles = [] - profiles.extend( - modified_tw[0].split('_')[1] for modified_tw in modified_tws - ) + profiles.extend(modified_tw[0].split("_")[1] for modified_tw in modified_tws) # return a set of unique profiles return set(profiles), time_of_last_modified_tw - - def add_to_the_list_of_ipv6( - self, ipv6_to_add: str, cached_ipv6: str - ) -> list : + def add_to_the_list_of_ipv6(self, ipv6_to_add: str, cached_ipv6: str) -> list: """ adds the given IPv6 to the list of given cached_ipv6 """ @@ -1359,10 +1301,7 @@ def add_to_the_list_of_ipv6( return cached_ipv6 def set_mac_vendor_to_profile( - self, - profileid: str, - mac_addr: str, - mac_vendor: str + self, profileid: str, mac_addr: str, mac_vendor: str ) -> bool: """ sets the given mac add and vendor to the given profile key @@ -1386,15 +1325,14 @@ def set_mac_vendor_to_profile( if cached_mac_addr == mac_addr: # now we're sure that the vendor of the given mac addr, # is the vendor of this profileid - self.r.hset(profileid, 'MAC_vendor', mac_vendor) + self.r.hset(profileid, "MAC_vendor", mac_vendor) return True return False - def update_mac_of_profile(self, profileid: str, mac: str): - """ Add the MAC addr to the given profileid key""" - self.r.hset(profileid, 'MAC', mac) + """Add the MAC addr to the given profileid key""" + self.r.hset(profileid, "MAC", mac) def add_mac_addr_to_profile(self, profileid: str, mac_addr: str): """ @@ -1407,14 +1345,10 @@ def add_mac_addr_to_profile(self, profileid: str, mac_addr: str): dhcp.log, conn.log, arp.log etc. PS: it doesn't deal with the MAC vendor """ - if ( - not mac_addr - or '0.0.0.0' in profileid - ): + if not mac_addr or "0.0.0.0" in profileid: return False - - incoming_ip: str = profileid.split('_')[1] + incoming_ip: str = profileid.split("_")[1] # sometimes we create profiles with the mac address. # don't save that in MAC hash @@ -1432,11 +1366,11 @@ def add_mac_addr_to_profile(self, profileid: str, mac_addr: str): return False # get the ips that belong to this mac - cached_ip = self.r.hmget('MAC', mac_addr)[0] + cached_ip = self.r.hmget("MAC", mac_addr)[0] if not cached_ip: # no mac info stored for profileid ip = json.dumps([incoming_ip]) - self.r.hset('MAC', mac_addr, ip) + self.r.hset("MAC", mac_addr, ip) # now that it's decided that this mac belongs to this profileid # stoe the mac in the profileid's key in the db @@ -1454,24 +1388,24 @@ def add_mac_addr_to_profile(self, profileid: str, mac_addr: str): # seen with the given mac. nothing to do here. return False - # make sure 1 profile is ipv4 and the other is ipv6 # (so we don't mess with MITM ARP detections) if validators.ipv6(incoming_ip) and validators.ipv4(found_ip): # associate the ipv4 we found with the incoming ipv6 # and vice versa self.set_ipv4_of_profile(profileid, found_ip) - self.set_ipv6_of_profile(f'profile_{found_ip}', [incoming_ip]) + self.set_ipv6_of_profile(f"profile_{found_ip}", [incoming_ip]) elif validators.ipv6(found_ip) and validators.ipv4(incoming_ip): # associate the ipv6 we found with the incoming ipv4 # and vice versa self.set_ipv6_of_profile(profileid, [found_ip]) - self.set_ipv4_of_profile(f'profile_{found_ip}', incoming_ip) + self.set_ipv4_of_profile(f"profile_{found_ip}", incoming_ip) elif validators.ipv6(found_ip) and validators.ipv6(incoming_ip): # If 2 IPv6 are claiming to have the same MAC it's fine # a computer is allowed to have many ipv6 - # add this found ipv6 to the list of ipv6 of the incoming ip(profileid) + # add this found ipv6 to the list of ipv6 of the incoming + # ip(profileid) # get the list of cached ipv6 ipv6: str = self.get_ipv6_from_profile(profileid) @@ -1482,10 +1416,10 @@ def add_mac_addr_to_profile(self, profileid: str, mac_addr: str): # add this incoming ipv6(profileid) to the list of # ipv6 of the found ip # get the list of cached ipv6 - ipv6: str = self.get_ipv6_from_profile(f'profile_{found_ip}') + ipv6: str = self.get_ipv6_from_profile(f"profile_{found_ip}") # get the list of cached ipv6+the new one ipv6: list = self.add_to_the_list_of_ipv6(incoming_ip, ipv6) - self.set_ipv6_of_profile(f'profile_{found_ip}', ipv6) + self.set_ipv6_of_profile(f"profile_{found_ip}", ipv6) else: # both are ipv4 and are claiming to have the same mac address @@ -1496,58 +1430,55 @@ def add_mac_addr_to_profile(self, profileid: str, mac_addr: str): # add the incoming ip to the list of ips that belong to this mac cached_ips.add(incoming_ip) cached_ips = json.dumps(list(cached_ips)) - self.r.hset('MAC', mac_addr, cached_ips) + self.r.hset("MAC", mac_addr, cached_ips) self.update_mac_of_profile(profileid, mac_addr) - self.update_mac_of_profile(f'profile_{found_ip}', mac_addr) + self.update_mac_of_profile(f"profile_{found_ip}", mac_addr) return True - def get_mac_addr_from_profile(self, profileid: dict) \ - -> Union[str, None]: + def get_mac_addr_from_profile(self, profileid: dict) -> Union[str, None]: """ Returns MAC address of the given profile as a str, or None returns the info from the profileid key. """ - return self.r.hget(profileid, 'MAC') - + return self.r.hget(profileid, "MAC") def add_user_agent_to_profile(self, profileid, user_agent: dict): """ Used to associate this profile with it's used user_agent - :param user_agent: dict containing user_agent, os_type , os_name and agent_name + :param user_agent: dict containing user_agent, os_type , + os_name and agent_name """ - self.r.hset(profileid, 'first user-agent', user_agent) + self.r.hset(profileid, "first user-agent", user_agent) def get_user_agents_count(self, profileid) -> int: """ returns the number of unique UAs seen for the given profileid """ - return int(self.r.hget(profileid, 'user_agents_count')) - + return int(self.r.hget(profileid, "user_agents_count")) def add_all_user_agent_to_profile(self, profileid, user_agent: str): """ Used to keep history of past user agents of profile :param user_agent: str of user_agent """ - if not self.r.hexists(profileid, 'past_user_agents'): + if not self.r.hexists(profileid, "past_user_agents"): # add the first user agent seen to the db - self.r.hset(profileid, 'past_user_agents', json.dumps([user_agent])) - self.r.hset(profileid, 'user_agents_count', 1) + self.r.hset(profileid, "past_user_agents", json.dumps([user_agent])) + self.r.hset(profileid, "user_agents_count", 1) else: # we have previous UAs - user_agents = json.loads(self.r.hget(profileid, 'past_user_agents')) + user_agents = json.loads(self.r.hget(profileid, "past_user_agents")) if user_agent not in user_agents: # the given ua is not cached. cache it as a str user_agents.append(user_agent) - self.r.hset(profileid, 'past_user_agents', json.dumps(user_agents)) + self.r.hset(profileid, "past_user_agents", json.dumps(user_agents)) # incr the number of user agents seen for this profile user_agents_count: int = self.get_user_agents_count(profileid) - self.r.hset(profileid, 'user_agents_count', user_agents_count+1 ) - + self.r.hset(profileid, "user_agents_count", user_agents_count + 1) def get_software_from_profile(self, profileid): """ @@ -1556,14 +1487,13 @@ def get_software_from_profile(self, profileid): if not profileid: return False - if used_software := self.r.hmget(profileid, 'used_software')[0]: + if used_software := self.r.hmget(profileid, "used_software")[0]: used_software = json.loads(used_software) return used_software - def get_first_user_agent(self, profileid) -> str: """returns the first user agent used by the given profile""" - return self.r.hmget(profileid, 'first user-agent')[0] + return self.r.hmget(profileid, "first user-agent")[0] def get_user_agent_from_profile(self, profileid) -> str: """ @@ -1573,7 +1503,7 @@ def get_user_agent_from_profile(self, profileid) -> str: if user_agent := self.get_first_user_agent(profileid): # user agents may be OpenSSH_8.6 , no need to deserialize them - if '{' in user_agent: + if "{" in user_agent: user_agent = json.loads(user_agent) return user_agent @@ -1583,46 +1513,55 @@ def mark_profile_as_dhcp(self, profileid): """ # returns a list of dhcp if the profile is in the db - profile_in_db = self.r.hmget(profileid, 'dhcp') + profile_in_db = self.r.hmget(profileid, "dhcp") if not profile_in_db: return False is_dhcp_set = profile_in_db[0] # check if it's already marked as dhcp if not is_dhcp_set: - self.r.hset(profileid, 'dhcp', 'true') + self.r.hset(profileid, "dhcp", "true") + + def get_first_flow_time(self) -> Optional[str]: + return self.r.hget("analysis", "file_start") - def addProfile(self, profileid, starttime, duration): + def add_profile(self, profileid, starttime, duration): """ - Add a new profile to the DB. Both the list of profiles and the hashmap of profile data - Profiles are stored in two structures. A list of profiles (index) and individual hashmaps for each profile (like a table) - Duration is only needed for registration purposes in the profile. Nothing operational + Add a new profile to the DB. Both the list of profiles and the + hashmap of profile data + Profiles are stored in two structures. A list of profiles (index) + and individual hashmaps for each profile (like a table) + Duration is only needed for registration purposes in the profile. + Nothing operational """ try: - if self.r.sismember('profiles', str(profileid)): + if self.r.sismember("profiles", profileid): # we already have this profile return False # Add the profile to the index. The index is called 'profiles' - self.r.sadd('profiles', str(profileid)) - # Create the hashmap with the profileid. The hasmap of each profile is named with the profileid + self.r.sadd("profiles", str(profileid)) + # Create the hashmap with the profileid. + # The hasmap of each profile is named with the profileid # Add the start time of profile - self.r.hset(profileid, 'starttime', starttime) + self.r.hset(profileid, "starttime", starttime) # For now duration of the TW is fixed - self.r.hset(profileid, 'duration', duration) - # When a new profiled is created assign threat level = 0 and confidence = 0.05 + self.r.hset(profileid, "duration", duration) + # When a new profiled is created assign threat level = 0 + # and confidence = 0.05 # self.r.hset(profileid, 'threat_level', 0) confidence = 0.05 - self.update_threat_level(profileid, 'info', confidence) - self.r.hset(profileid, 'confidence', confidence) - # The IP of the profile should also be added as a new IP we know about. + self.update_threat_level(profileid, "info", confidence) + self.r.hset(profileid, "confidence", confidence) + # The IP of the profile should also be added as a new IP + # we know about. ip = profileid.split(self.separator)[1] # If the ip is new add it to the list of ips self.set_new_ip(ip) # Publish that we have a new profile - self.publish('new_profile', ip) + self.publish("new_profile", ip) return True except redis.exceptions.ResponseError as inst: - self.print('Error in addProfile in database.py', 0, 1) + self.print("Error in add_profile in database.py", 0, 1) self.print(type(inst), 0, 1) self.print(inst, 0, 1) @@ -1635,7 +1574,7 @@ def set_profile_module_label(self, profileid, module, label): data = self.get_profile_modules_labels(profileid) data[module] = label data = json.dumps(data) - self.r.hset(profileid, 'modules_labels', data) + self.r.hset(profileid, "modules_labels", data) def check_TW_to_close(self, close_all=False): """ @@ -1650,21 +1589,22 @@ def check_TW_to_close(self, close_all=False): modification_time = float(sit) - self.width if close_all: # close all tws no matter when they were last modified - modification_time = float('inf') + modification_time = float("inf") profiles_tws_to_close = self.r.zrangebyscore( - 'ModifiedTW', 0, modification_time, withscores=True + "ModifiedTW", 0, modification_time, withscores=True ) for profile_tw_to_close in profiles_tws_to_close: profile_tw_to_close_id = profile_tw_to_close[0] profile_tw_to_close_time = profile_tw_to_close[1] self.print( - f'The profile id {profile_tw_to_close_id} has to be closed because it was' - f' last modifed on {profile_tw_to_close_time} and we are closing everything older ' - f'than {modification_time}.' - f' Current time {sit}. ' - f'Difference: {modification_time - profile_tw_to_close_time}', + f"The profile id {profile_tw_to_close_id} has to be closed" + f" because it was" + f" last modifed on {profile_tw_to_close_time} and we are " + f"closing everything older than {modification_time}." + f" Current time {sit}. " + f"Difference: {modification_time - profile_tw_to_close_time}", 3, 0, ) @@ -1674,9 +1614,9 @@ def markProfileTWAsClosed(self, profileid_tw): """ Mark the TW as closed so tools can work on its data """ - self.r.sadd('ClosedTW', profileid_tw) - self.r.zrem('ModifiedTW', profileid_tw) - self.publish('tw_closed', profileid_tw) + self.r.sadd("ClosedTW", profileid_tw) + self.r.zrem("ModifiedTW", profileid_tw) + self.publish("tw_closed", profileid_tw) def markProfileTWAsModified(self, profileid, twid, timestamp): """ @@ -1689,18 +1629,15 @@ def markProfileTWAsModified(self, profileid, twid, timestamp): 4- To check if we should 'close' some TW """ timestamp = time.time() - data = { - f'{profileid}{self.separator}{twid}': float(timestamp) - } - self.r.zadd('ModifiedTW', data) - self.publish( - 'tw_modified', - f'{profileid}:{twid}' - ) + data = {f"{profileid}{self.separator}{twid}": float(timestamp)} + self.r.zadd("ModifiedTW", data) + self.publish("tw_modified", f"{profileid}:{twid}") # Check if we should close some TW self.check_TW_to_close() - def publish_new_letter(self, new_symbol:str, profileid:str, twid:str, tupleid:str, flow): + def publish_new_letter( + self, new_symbol: str, profileid: str, twid: str, tupleid: str, flow + ): """ analyze behavioral model with lstm model if the length is divided by 3 - @@ -1710,18 +1647,19 @@ def publish_new_letter(self, new_symbol:str, profileid:str, twid:str, tupleid:st return to_send = { - 'new_symbol': new_symbol, - 'profileid': profileid, - 'twid': twid, - 'tupleid': str(tupleid), - 'uid': flow.uid, - 'flow': asdict(flow) + "new_symbol": new_symbol, + "profileid": profileid, + "twid": twid, + "tupleid": str(tupleid), + "uid": flow.uid, + "flow": asdict(flow), } to_send = json.dumps(to_send) - self.publish('new_letters', to_send) + self.publish("new_letters", to_send) # - # def get_previous_symbols(self, profileid: str, twid: str, direction: str, tupleid: str): + # def get_previous_symbols(self, profileid: str, twid: str, direction: + # str, tupleid: str): # """ # returns all the InTuples or OutTuples for this profileid in this TW # """ @@ -1735,15 +1673,17 @@ def publish_new_letter(self, new_symbol:str, profileid:str, twid:str, tupleid:st # return prev_symbols # - def add_tuple(self, - profileid: str, twid: str, tupleid: str, symbol: Tuple, - role: str, flow): + def add_tuple( + self, profileid: str, twid: str, tupleid: str, symbol: Tuple, role: str, flow + ): """ Add the tuple going in or out for this profile - and if there was previous symbols for this profile, append the new symbol to it + and if there was previous symbols for this profile, append the new + symbol to it before adding the tuple to the db - :param tupleid: a dash separated str with the following format daddr-dport-proto + :param tupleid: a dash separated str with the following format + daddr-dport-proto :param symbol: (symbol, (symbol_to_add, previous_two_timestamps)) T1: is the time diff between the past flow and the past-past flow. last_ts: the timestamp of the last flow @@ -1751,16 +1691,17 @@ def add_tuple(self, """ # If the traffic is going out it is part of our outtuples, # if not, part of our intuples - if role == 'Client': - direction = 'OutTuples' - elif role == 'Server': - direction = 'InTuples' + if role == "Client": + direction = "OutTuples" + elif role == "Server": + direction = "InTuples" try: - profileid_twid = f'{profileid}{self.separator}{twid}' + profileid_twid = f"{profileid}{self.separator}{twid}" - # prev_symbols is a dict with {tulpeid: ['symbols_so_far', [timestamps]]} - prev_symbols: str = self.r.hget(profileid_twid, direction) or '{}' + # prev_symbols is a dict with {tulpeid: ['symbols_so_far', + # [timestamps]]} + prev_symbols: str = self.r.hget(profileid_twid, direction) or "{}" prev_symbols: dict = json.loads(prev_symbols) try: @@ -1770,33 +1711,34 @@ def add_tuple(self, # Separate the symbol to add and the previous data (symbol_to_add, previous_two_timestamps) = symbol self.print( - f'Not the first time for tuple {tupleid} as an {direction} for ' - f'{profileid} in TW {twid}. Add the symbol: {symbol_to_add}. ' - f'Store previous_times: {previous_two_timestamps}. Prev Data: {prev_symbols}', - 3, 0, + f"Not the first time for tuple {tupleid} as an " + f"{direction} for " + f"{profileid} in TW {twid}. Add the symbol: {symbol_to_add}. " + f"Store previous_times: {previous_two_timestamps}. " + f"Prev Data: {prev_symbols}", + 3, + 0, ) # Add it to form the string of letters - new_symbol = f'{prev_symbol}{symbol_to_add}' + new_symbol = f"{prev_symbol}{symbol_to_add}" - self.publish_new_letter( - new_symbol, - profileid, - twid, - tupleid, - flow - ) + self.publish_new_letter(new_symbol, profileid, twid, tupleid, flow) prev_symbols[tupleid] = (new_symbol, previous_two_timestamps) - self.print(f'\tLetters so far for tuple {tupleid}: {new_symbol}', 3, 0) + self.print( + f"\tLetters so far for tuple {tupleid}:" f" {new_symbol}", 3, 0 + ) except (TypeError, KeyError): # TODO check that this condition is triggered correctly # only for the first case and not the rest after... - # There was no previous data stored in the DB to append the given symbol to. + # There was no previous data stored in the DB to append + # the given symbol to. self.print( - f'First time for tuple {tupleid} as an' - f' {direction} for {profileid} in TW {twid}', - 3, 0, + f"First time for tuple {tupleid} as an" + f" {direction} for {profileid} in TW {twid}", + 3, + 0, ) prev_symbols[tupleid] = symbol @@ -1806,111 +1748,101 @@ def add_tuple(self, except Exception: exception_line = sys.exc_info()[2].tb_lineno - self.print(f'Error in add_tuple in database.py line {exception_line}', 0,1) - self.print(traceback.format_exc(), 0, 1) + self.print(f"Error in add_tuple in database.py line {exception_line}", 0, 1) + self.print(traceback.print_stack(), 0, 1) def get_tws_to_search(self, go_back): - tws_to_search = float('inf') + tws_to_search = float("inf") if go_back: hrs_to_search = float(go_back) tws_to_search = self.get_equivalent_tws(hrs_to_search) return tws_to_search - def get_profile_modules_labels(self, profileid): """ Get labels set by modules in the profile. """ - data = self.r.hget(profileid, 'modules_labels') + data = self.r.hget(profileid, "modules_labels") data = json.loads(data) if data else {} return data def add_timeline_line(self, profileid, twid, data, timestamp): """Add a line to the timeline of this profileid and twid""" - self.print(f'Adding timeline for {profileid}, {twid}: {data}', 3, 0) - key = str( - profileid + self.separator + twid + self.separator + 'timeline' - ) + self.print(f"Adding timeline for {profileid}, {twid}: {data}", 3, 0) + key = str(profileid + self.separator + twid + self.separator + "timeline") data = json.dumps(data) mapping = {data: timestamp} self.r.zadd(key, mapping) # Mark the tw as modified since the timeline line is new data in the TW - self.markProfileTWAsModified(profileid, twid, timestamp='') + self.markProfileTWAsModified(profileid, twid, timestamp="") def get_timeline_last_lines( self, profileid, twid, first_index: int ) -> Tuple[str, int]: """Get only the new items in the timeline.""" - key = str( - profileid + self.separator + twid + self.separator + 'timeline' - ) + key = str(profileid + self.separator + twid + self.separator + "timeline") # The the amount of lines in this list last_index = self.r.zcard(key) # Get the data in the list from the index asked (first_index) until the last data = self.r.zrange(key, first_index, last_index - 1) return data, last_index - def mark_profile_as_gateway(self, profileid): """ Used to mark this profile as dhcp server """ - self.r.hset(profileid, 'gateway', 'true') - + self.r.hset(profileid, "gateway", "true") def set_ipv6_of_profile(self, profileid, ip: list): - self.r.hset(profileid, 'IPv6', json.dumps(ip)) + self.r.hset(profileid, "IPv6", json.dumps(ip)) def set_ipv4_of_profile(self, profileid, ip): - self.r.hset(profileid, 'IPv4', json.dumps([ip])) + self.r.hset(profileid, "IPv4", json.dumps([ip])) - def get_mac_vendor_from_profile( - self, - profileid: str - ) -> Union[str, None]: + def get_mac_vendor_from_profile(self, profileid: str) -> Union[str, None]: """ Returns a str MAC vendor of the given profile or None """ - return self.r.hget(profileid, 'MAC_vendor') + return self.r.hget(profileid, "MAC_vendor") - def get_hostname_from_profile(self, profileid: str) -> str: + def get_hostname_from_profile(self, profileid: str) -> Optional[str]: """ Returns hostname about a certain profile or None """ - return self.r.hget(profileid, 'host_name') + return self.r.hget(profileid, "host_name") def add_host_name_to_profile(self, hostname, profileid): """ Adds the given hostname to the given profile """ if not self.get_hostname_from_profile(profileid): - self.r.hset(profileid, 'host_name', hostname) + self.r.hset(profileid, "host_name", hostname) def get_ipv4_from_profile(self, profileid) -> str: """ Returns ipv4 about a certain profile or None """ - return self.r.hmget(profileid, 'IPv4')[0] if profileid else False + return self.r.hmget(profileid, "IPv4")[0] if profileid else False def get_ipv6_from_profile(self, profileid) -> str: """ Returns ipv6 about a certain profile or None """ - return self.r.hmget(profileid, 'IPv6')[0] if profileid else False + return self.r.hmget(profileid, "IPv6")[0] if profileid else False def get_the_other_ip_version(self, profileid): """ Given an ipv4, returns the ipv6 of the same computer Given an ipv6, returns the ipv4 of the same computer """ - srcip = profileid.split('_')[-1] + srcip = profileid.split("_")[-1] ip = False if validators.ipv4(srcip): ip = self.get_ipv6_from_profile(profileid) elif validators.ipv6(srcip): ip = self.get_ipv4_from_profile(profileid) - return ip \ No newline at end of file + return ip diff --git a/slips_files/core/evidence_structure/evidence.py b/slips_files/core/evidence_structure/evidence.py new file mode 100644 index 000000000..8e8c23628 --- /dev/null +++ b/slips_files/core/evidence_structure/evidence.py @@ -0,0 +1,344 @@ +""" +Contains evidence dataclass that is used in slips +""" +import ipaddress +from dataclasses import dataclass, field, asdict, is_dataclass +from enum import Enum, auto +from uuid import uuid4 +from typing import List, Optional + +from slips_files.common.slips_utils import utils + + +def validate_ip(ip): + ipaddress.ip_address(ip) + +def validate_timestamp(ts) -> str: + """ + the ts of all evidence should be in + the alerts time format, if not, raise an exception + """ + if utils.define_time_format(ts) == utils.alerts_format: + return ts + else: + raise ValueError(f"Invalid timestamp format: {ts}. " + f"Expected format: '%Y/%m/%d %H:%M:%S.%f%z'.") + +class EvidenceType(Enum): + """ + These are the types of evidence slips can detect + """ + ARP_SCAN = auto() + ARP_OUTSIDE_LOCALNET = auto() + UNSOLICITED_ARP = auto() + MITM_ARP_ATTACK = auto() + YOUNG_DOMAIN = auto() + MULTIPLE_SSH_VERSIONS = auto() + DIFFERENT_LOCALNET = auto() + DEVICE_CHANGING_IP = auto() + NON_HTTP_PORT_80_CONNECTION = auto() + NON_SSL_PORT_443_CONNECTION = auto() + WEIRD_HTTP_METHOD = auto() + INCOMPATIBLE_CN = auto() + DGA_NXDOMAINS = auto() + DNS_WITHOUT_CONNECTION = auto() + PASTEBIN_DOWNLOAD = auto() + CONNECTION_WITHOUT_DNS = auto() + DNS_ARPA_SCAN = auto() + UNKNOWN_PORT = auto() + PASSWORD_GUESSING = auto() + HORIZONTAL_PORT_SCAN = auto() + CONNECTION_TO_PRIVATE_IP = auto() + GRE_TUNNEL = auto() + VERTICAL_PORT_SCAN = auto() + SSH_SUCCESSFUL = auto() + LONG_CONNECTION = auto() + SELF_SIGNED_CERTIFICATE = auto() + MULTIPLE_RECONNECTION_ATTEMPTS = auto() + CONNECTION_TO_MULTIPLE_PORTS = auto() + HIGH_ENTROPY_DNS_ANSWER = auto() + INVALID_DNS_RESOLUTION = auto() + PORT_0_CONNECTION = auto() + MALICIOUS_JA3 = auto() + MALICIOUS_JA3S = auto() + DATA_UPLOAD = auto() + BAD_SMTP_LOGIN = auto() + SMTP_LOGIN_BRUTEFORCE = auto() + MALICIOUS_SSL_CERT = auto() + MALICIOUS_FLOW = auto() + SUSPICIOUS_USER_AGENT = auto() + EMPTY_CONNECTIONS = auto() + INCOMPATIBLE_USER_AGENT = auto() + EXECUTABLE_MIME_TYPE = auto() + MULTIPLE_USER_AGENT = auto() + HTTP_TRAFFIC = auto() + MALICIOUS_JARM = auto() + NETWORK_GPS_LOCATION_LEAKED = auto() + ICMP_TIMESTAMP_SCAN = auto() + ICMP_ADDRESS_SCAN = auto() + ICMP_ADDRESS_MASK_SCAN = auto() + DHCP_SCAN = auto() + MALICIOUS_IP_FROM_P2P_NETWORK = auto() + P2P_REPORT = auto() + COMMAND_AND_CONTROL_CHANNEL = auto() + THREAT_INTELLIGENCE_BLACKLISTED_ASN = auto() + THREAT_INTELLIGENCE_BLACKLISTED_IP = auto() + THREAT_INTELLIGENCE_BLACKLISTED_DOMAIN = auto() + MALICIOUS_DOWNLOADED_FILE = auto() + MALICIOUS_URL = auto() + + def __str__(self): + return self.name + + +class Direction(Enum): + DST = auto() + SRC = auto() + + + +class IoCType(Enum): + IP = auto() + URL = auto() + DOMAIN = auto() + MD5 = auto() + + +class ThreatLevel(Enum): + """determines the importance of the evidence""" + INFO = 0 + LOW = 0.2 + MEDIUM = 0.5 + HIGH = 0.8 + CRITICAL = 1 + + def __str__(self): + return self.name.lower() + + +class Anomaly(Enum): + """ + https://idea.cesnet.cz/en/classifications + """ + TRAFFIC = "Anomaly.Traffic" + FILE = "Anomaly.File" + CONNECTION = "Anomaly.Connection" + BEHAVIOUR = "Anomaly.Behaviour" + + +class Recon(Enum): + RECON = "Recon" + SCANNING = "Recon.Scanning" + + + + +class Attempt(Enum): + LOGIN = "Attempt.Login" + + +class Tag(Enum): + """ + this is the IDEA category of the source and dst ip used in the evidence + if the Attacker.Direction is srcip this describes the source ip, + if the Attacker.Direction is dstip this describes the dst ip. + supported source and dst types are in the SourceTargetTag + section https://idea.cesnet.cz/en/classifications + this is optional in an evidence because it shouldn't + be used with dports and sports Attacker.Direction + """ + SUSPICIOUS_USER_AGENT = 'SuspiciousUserAgent' + INCOMPATIBLE_USER_AGENT = 'IncompatibleUserAgent' + EXECUTABLE_MIME_TYPE = 'ExecutableMIMEType' + MULTIPLE_USER_AGENT = 'MultipleUserAgent' + SENDING_UNENCRYPTED_DATA = 'SendingUnencryptedData' + MALWARE = 'Malware' + RECON = 'Recon' + MITM = 'MITM' + ORIGIN_MALWARE = 'OriginMalware' + CC = 'CC' + BOTNET = 'Botnet' + BLACKLISTED_ASN = 'BlacklistedASN' + BLACKLISTED_IP = 'BlacklistedIP' + BLACKLISTED_DOMAIN = 'BlacklistedDomain' + + +class Proto(Enum): + TCP = 'tcp' + UDP = 'udp' + ICMP = 'icmp' + +@dataclass +class Victim: + direction: Direction + victim_type: IoCType + value: str # like the actual ip/domain/url check if value is reserved + + def __post_init__(self): + if self.victim_type == IoCType.IP: + validate_ip(self.value) + + +class IDEACategory(Enum): + """ + The evidence category according to IDEA categories + https://idea.cesnet.cz/en/classifications + """ + ANOMALY_TRAFFIC = "Anomaly.Traffic" + ANOMALY_FILE = "Anomaly.File" + ANOMALY_CONNECTION = "Anomaly.Connection" + ANOMALY_BEHAVIOUR = "Anomaly.Behaviour" + INFO = "Information" + MALWARE = "Malware" + RECON_SCANNING = "Recon.Scanning" + ATTEMPT_LOGIN = "Attempt.Login" + RECON = "Recon" + INTRUSION_BOTNET = "Intrusion.Botnet" + + + +@dataclass +class ProfileID: + ip: str + + def __setattr__(self, name, value): + if name == 'ip': + assert ipaddress.ip_address(value) + self.__dict__[name] = value + + def __repr__(self): + return f"profile_{self.ip}" + + +@dataclass +class Attacker: + direction: Direction + attacker_type: IoCType + value: str # like the actual ip/domain/url check if value is reserved + profile: ProfileID = '' + + def __post_init__(self): + if self.attacker_type == IoCType.IP: + validate_ip(self.value) + # each attacker should have a profile if it's an IP + if self.attacker_type == IoCType.IP: + self.profile = ProfileID(ip=self.value) + + +@dataclass +class TimeWindow: + number: int + + def __post_init__(self): + if not isinstance(self.number, int): + raise ValueError(f"timewindow number must be an int. " + f"{self.number} is invalid!") + + def __repr__(self): + return f"timewindow{self.number}" + + +@dataclass +class Evidence: + evidence_type: EvidenceType + description: str + attacker: Attacker + threat_level: ThreatLevel + category: IDEACategory + # profile of the srcip detected this evidence + profile: ProfileID + timewindow: TimeWindow + # the uids of the flows causing this evidence + uid: List[str] + timestamp: str = field( + metadata={ + 'validate': lambda x: validate_timestamp(x) + } + ) + victim: Optional[Victim] = field(default=False) + proto: Optional[Proto] = field(default=False) + port: int = field(default=None) + source_target_tag: Tag = field(default=False) + # every evidence should have an ID according to the IDEA format + id: str = field(default_factory=lambda: str(uuid4())) + # the number of packets/flows/nxdomains that formed this scan/sweep/DGA. + conn_count: int = field( + default=1, + metadata={ + 'validate': lambda x: isinstance(x, int) + } + ) + # the confidence of this evidence on a scale from 0 to 1. + # How sure you are that this evidence is what you say it is? + confidence: float = field( + default=0.0, + metadata={ + 'validate': lambda x: 0 <= x <= 1 + } + ) + + + def __post_init__(self): + if ( + not isinstance(self.uid, list) + or + not all(isinstance(uid, str) for uid in self.uid) + ): + raise ValueError(f"uid must be a list of strings .. {self}") + else: + # remove duplicate uids + self.uid = list(set(self.uid)) + + + +def evidence_to_dict(obj): + """ + Converts an Evidence object to a dictionary (aka json serializable) + :param obj: object of any type. + """ + if is_dataclass(obj): + # run this function on each value of the given dataclass + return {k: evidence_to_dict(v) for k, v in asdict(obj).items()} + + if isinstance(obj, Enum): + return obj.name + + if isinstance(obj, list): + return [evidence_to_dict(item) for item in obj] + + if isinstance(obj, dict): + return {k: evidence_to_dict(v) for k, v in obj.items()} + + return obj + +def dict_to_evidence(evidence: dict): + """ + Convert a dictionary to an Evidence object. + :param evidence (dict): Dictionary with evidence details. + returns an instance of the Evidence class. + """ + evidence_attributes = { + 'evidence_type': EvidenceType[evidence["evidence_type"]], + 'description': evidence['description'], + 'attacker': Attacker(**evidence['attacker']), + 'threat_level': ThreatLevel[evidence['threat_level']], + 'category': IDEACategory[evidence['category']], + 'victim': Victim(**evidence['victim']) if 'victim' in evidence + and evidence['victim'] else None, + 'profile': ProfileID(evidence['profile']['ip']) + if 'profile' in evidence else None, + 'timewindow': TimeWindow(evidence['timewindow']['number']), + 'uid': evidence['uid'], + 'timestamp': evidence['timestamp'], + 'proto': Proto[evidence['proto'].upper()] if 'proto' in evidence and + evidence['proto'] else None, + 'port': evidence['port'], + 'source_target_tag': Tag[evidence['source_target_tag']] if \ + 'source_target_tag' in evidence and evidence['source_target_tag'] + else None, + 'id': evidence['id'], + 'conn_count': evidence['conn_count'], + 'confidence': evidence['confidence'] + } + + return Evidence(**evidence_attributes) \ No newline at end of file diff --git a/slips_files/core/evidence.py b/slips_files/core/evidencehandler.py similarity index 73% rename from slips_files/core/evidence.py rename to slips_files/core/evidencehandler.py index 641918c17..7d50e7178 100644 --- a/slips_files/core/evidence.py +++ b/slips_files/core/evidencehandler.py @@ -15,12 +15,9 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Contact: eldraco@gmail.com, sebastian.garcia@agents.fel.cvut.cz, stratosphere@aic.fel.cvut.cz -from slips_files.common.imports import * -from slips_files.core.helpers.whitelist import Whitelist -from slips_files.core.helpers.notify import Notify -from slips_files.common.abstracts.core import ICore + import json -from typing import Union, List, Tuple, Dict +from typing import Union, List, Tuple, Dict, Optional from datetime import datetime from os import path from colorama import Fore, Style @@ -29,11 +26,31 @@ import time import platform import traceback +from slips_files.common.idea_format import idea_format +from slips_files.common.style import red, green, cyan +from slips_files.common.imports import * +from slips_files.core.helpers.whitelist import Whitelist +from slips_files.core.helpers.notify import Notify +from slips_files.common.abstracts.core import ICore +from slips_files.core.evidence_structure.evidence import ( + dict_to_evidence, + evidence_to_dict, + ProfileID, + Evidence, + Direction, + Victim, + IoCType, + EvidenceType, + IDEACategory, + TimeWindow, + Proto, + Tag + ) IS_IN_A_DOCKER_CONTAINER = os.environ.get('IS_IN_A_DOCKER_CONTAINER', False) # Evidence Process -class Evidence(ICore): +class EvidenceHandler(ICore): """ A class to process the evidence from the alerts and update the threat level It only work on evidence for IPs that were profiled @@ -81,7 +98,7 @@ def init(self): def read_configuration(self): conf = ConfigParser() - self.width = conf.get_tw_width_as_float() + self.width: float = conf.get_tw_width_as_float() self.detection_threshold = conf.evidence_detection_threshold() self.print( f'Detection Threshold: {self.detection_threshold} ' @@ -96,8 +113,8 @@ def read_configuration(self): if IS_IN_A_DOCKER_CONTAINER: self.popup_alerts = False - def format_evidence_string(self, ip, detection_module, attacker, - description) -> str: + def format_evidence_string( + self, ip, detection_module, attacker, description) -> str: """ Function to add the dns resolution of the src and dst ips of each evidence @@ -118,14 +135,15 @@ def format_evidence_string(self, ip, detection_module, attacker, elif len(dns_resolution_ip) == 0: dns_resolution_ip = '' - # dns_resolution_ip_final = f' DNS: {dns_resolution_ip[:3]}. ' if dns_resolution_attacker and len( + # dns_resolution_ip_final = f' DNS: {dns_resolution_ip[:3]}. ' + # if dns_resolution_attacker and len( # dns_resolution_ip[:3] # ) > 0 else '. ' return f'{evidence_string}' - - + + def line_wrap(self, txt): """ is called for evidence that are goinng to be printed in the terminal @@ -145,8 +163,8 @@ def line_wrap(self, txt): wrapped_txt = wrapped_txt[:-1] return wrapped_txt - - + + def clean_file(self, output_dir, file_to_clean): """ Clear the file if exists and return an open handle to it @@ -158,34 +176,35 @@ def clean_file(self, output_dir, file_to_clean): def add_to_json_log_file( self, - IDEA_dict: dict, + idea_dict: dict, all_uids: list, timewindow: str, accumulated_threat_level: float =0 ): """ Add a new evidence line to our alerts.json file in json format. - :param IDEA_dict: dict containing 1 alert + :param idea_dict: dict containing 1 alert :param all_uids: the uids of the flows causing this evidence """ try: # we add extra fields to alerts.json that are not in the IDEA format - IDEA_dict.update({ + idea_dict.update({ 'uids': all_uids, 'accumulated_threat_level': accumulated_threat_level, 'timewindow': int(timewindow.replace('timewindow', '')), }) - json.dump(IDEA_dict, self.jsonfile) + json.dump(idea_dict, self.jsonfile) self.jsonfile.write('\n') except KeyboardInterrupt: return True except Exception: - self.print('Error in addDataToJSONFile()') - self.print(traceback.print_exc(), 0, 1) + self.print('Error in add_to_json_log_file()') + self.print(traceback.print_stack(), 0, 1) def add_to_log_file(self, data): """ - Add a new evidence line to the alerts.log and other log files if logging is enabled. + Add a new evidence line to the alerts.log and other log files if + logging is enabled. """ try: # write to alerts.log @@ -195,16 +214,21 @@ def add_to_log_file(self, data): except KeyboardInterrupt: return True except Exception: - self.print('Error in addDataToLogFile()') - self.print(traceback.print_exc(),0,1) + self.print('Error in add_to_log_file()') + self.print(traceback.print_stack(),0,1) def get_domains_of_flow(self, flow: dict): - """Returns the domains of each ip (src and dst) that appeared in this flow""" - # These separate lists, hold the domains that we should only check if they are SRC or DST. Not both + """ + Returns the domains of each ip (src and dst) that a + ppeared in this flow + """ + # These separate lists, hold the domains that we should only + # check if they are SRC or DST. Not both try: flow = json.loads(list(flow.values())[0]) except TypeError: - # sometimes this function is called before the flow is add to our database + # sometimes this function is called before the flow is + # added to our database return [], [] domains_to_check_src = [] domains_to_check_dst = [] @@ -241,7 +265,8 @@ def show_popup(self, alert_to_log: str): Function to display a popup with the alert depending on the OS """ if platform.system() == 'Linux': - # is notify_cmd is set in setup_notifications function depending on the user + # is notify_cmd is set in setup_notifications function + # depending on the user os.system(f'{self.notify_cmd} "Slips" "{alert_to_log}"') elif platform.system() == 'Darwin': os.system( @@ -252,14 +277,14 @@ def show_popup(self, alert_to_log: str): def format_evidence_causing_this_alert( self, - all_evidence: Dict[str, dict], - profileid: str, - twid: str, + all_evidence: Dict[str, Evidence], + profileid: ProfileID, + twid: TimeWindow, flow_datetime: str ) -> str: """ Function to format the string with all evidence causing an alert - flow_datetime: time of the last evidence received + : param flow_datetime: time of the last evidence received """ # alerts in slips consists of several evidence, # each evidence has a threat_level @@ -267,66 +292,47 @@ def format_evidence_causing_this_alert( # threat_levels, we produce an alert # Now instead of printing the last evidence only, # we print all of them - try: - twid_num = twid.split('timewindow')[1] - srcip = profileid.split(self.separator)[1] - # Get the start time of this TW - twid_start_time = None - while twid_start_time is None: - # give the database time to retreive the time - twid_start_time = self.db.getTimeTW(profileid, twid) - - tw_start_time_str = utils.convert_format(twid_start_time, - '%Y/%m/%d %H:%M:%S') - # datetime obj - tw_start_time_datetime = utils.convert_to_datetime(tw_start_time_str) - - # Convert the tw width to deltatime - # tw_width_in_seconds_delta = timedelta(seconds=int(self.width)) - delta_width = utils.to_delta(self.width) - - # Get the stop time of the TW - tw_stop_time_datetime = (tw_start_time_datetime + delta_width) - - tw_stop_time_str = utils.convert_format( - tw_stop_time_datetime, - '%Y/%m/%d %H:%M:%S' - ) - - hostname = self.db.get_hostname_from_profile(profileid) - # if there's no hostname, set it as ' ' - hostname = hostname or '' - if hostname: - hostname = f'({hostname})' + # Get the start and end time of this TW + twid_start_time: Optional[float] = \ + self.db.get_tw_start_time(str(profileid), str(twid)) + tw_stop_time: float = twid_start_time + self.width + + # format them both for printing + time_format = '%Y/%m/%d %H:%M:%S' + twid_start_time: str = utils.convert_format( + twid_start_time, time_format + ) + tw_stop_time: str = utils.convert_format( + tw_stop_time, time_format + ) - alert_to_print = ( - f'{Fore.RED}IP {srcip} {hostname} detected as malicious in timewindow {twid_num} ' - f'(start {tw_start_time_str}, stop {tw_stop_time_str}) \n' - f'given the following evidence:{Style.RESET_ALL}\n' - ) - except Exception: - exception_line = sys.exc_info()[2].tb_lineno - self.print( - f'Problem on format_evidence_causing_this_alert() ' - f'line {exception_line}',0,1, + alert_to_print = f'IP {profileid.ip} ' + hostname: Optional[str] = self.db.get_hostname_from_profile( + str(profileid) ) - self.print(traceback.print_exc(),0,1) - return True - - for evidence in all_evidence.values(): - evidence: dict - description: str = evidence.get('description') + if hostname: + alert_to_print += f'({hostname}) ' - evidence_string = f'Detected {description}' - evidence_string = self.line_wrap(evidence_string) + alert_to_print += ( + f"detected as malicious in timewindow {twid.number} " + f"(start {twid_start_time}, stop {tw_stop_time}) \n" + f"given the following evidence:\n" + ) + alert_to_print: str = red(alert_to_print) - alert_to_print += ( - f'\t{Fore.CYAN}- {evidence_string}{Style.RESET_ALL}\n' + for evidence in all_evidence.values(): + evidence: Evidence + description: str = evidence.description + evidence_string = self.line_wrap(f'Detected {description}') + alert_to_print += cyan(f'\t- {evidence_string}\n') + + # Add the timestamp to the alert. + # The datetime printed will be of the last evidence only + readable_datetime: str = utils.convert_format( + flow_datetime, + utils.alerts_format ) - - # Add the timestamp to the alert. The datetime printed will be of the last evidence only - readable_datetime = utils.convert_format(flow_datetime, utils.alerts_format) - alert_to_print = f'{Fore.RED}{readable_datetime}{Style.RESET_ALL} {alert_to_print}' + alert_to_print: str = red(f'{readable_datetime} ') + alert_to_print return alert_to_print def is_running_on_interface(self): @@ -342,7 +348,8 @@ def decide_blocking(self, profileid) -> bool: # now since this source ip(profileid) caused an alert, # it means it caused so many evidence(attacked others a lot) # that we decided to alert and block it - #todo if by default we don't block everything from/to this ip anymore, remember to update the CYST module + #todo if by default we don't block everything from/to this ip anymore, + # remember to update the CYST module ip_to_block = profileid.split('_')[-1] @@ -371,13 +378,18 @@ def mark_as_blocked( """ Marks the profileid and twid as blocked and logs it to alerts.log we don't block when running slips on files, we log it in alerts.log only - :param blocked: bool. if the ip was blocked by the blocking module, we should say so + :param blocked: bool. if the ip was blocked by the blocking module, + we should say so in alerts.log, if not, we should say that we generated an alert - :param IDEA_dict: the last evidence of this alert, used for logging the blocking + :param IDEA_dict: the last evidence of this alert, + used for logging the blocking """ + self.db.mark_profile_as_malicious(profileid) + now = datetime.now() now = utils.convert_format(now, utils.alerts_format) ip = profileid.split('_')[-1].strip() + msg = f'{flow_datetime}: Src IP {ip:26}. ' if blocked: self.db.markProfileTWAsBlocked(profileid, twid) @@ -386,7 +398,8 @@ def mark_as_blocked( else: msg += 'Generated an alert ' - msg += f'given enough evidence on timewindow {twid.split("timewindow")[1]}. (real time {now})' + msg += (f'given enough evidence on timewindow ' + f'{twid.split("timewindow")[1]}. (real time {now})') # log in alerts.log self.add_to_log_file(msg) @@ -425,17 +438,12 @@ def get_evidence_that_were_part_of_a_past_alert( past_evidence_ids = [] return past_evidence_ids - def is_evidence_done_by_others(self, evidence: dict) -> bool: + def is_evidence_done_by_others(self, evidence: Evidence) -> bool: # given all the tw evidence, we should only # consider evidence that makes this given - # profile malicious, aka evidence of this profile attacking others. - attacker_direction: str = evidence.get('attacker_direction', '') - # the following type detections are the ones - # expected to be seen when we are attacking others - # marking this profileid (srcip) as malicious - if attacker_direction in ('srcip', 'sport', 'srcport'): - return False - return True + # profile malicious, aka evidence of this profile(srcip) attacking + # others. + return evidence.attacker.direction != 'SRC' def get_evidence_for_tw(self, profileid: str, twid: str) \ -> Dict[str, dict]: @@ -443,13 +451,12 @@ def get_evidence_for_tw(self, profileid: str, twid: str) \ filters and returns all the evidence for this profile in this TW returns the dict with filtered evidence """ - tw_evidence: str = self.db.getEvidenceForTW(profileid, twid) + tw_evidence: Dict[str, dict] = self.db.get_twid_evidence( + profileid, twid + ) if not tw_evidence: return False - # format of this is {ev_id, json_serialized(ev_details)} - tw_evidence: dict = json.loads(tw_evidence) - past_evidence_ids: List[str] = \ self.get_evidence_that_were_part_of_a_past_alert(profileid, twid) @@ -460,8 +467,8 @@ def get_evidence_for_tw(self, profileid: str, twid: str) \ for id, evidence in tw_evidence.items(): id: str evidence: str - evidence: dict = json.loads(evidence) + evidence: Evidence = dict_to_evidence(evidence) if self.is_filtered_evidence( evidence, @@ -483,21 +490,21 @@ def get_evidence_for_tw(self, profileid: str, twid: str) \ if not processed: continue - id: str = evidence.get('ID') - # we keep track of these IDs to be able to label the flows of these - # evidence later if this was detected as an alert + evidence_id: str = evidence.id + # we keep track of these IDs to be able to label the flows + # of these evidence later if this was detected as an alert # now this should be done in its' own function but this is more # optimal so we don't loop through all evidence again. i'll # just leave it like that:D - self.IDs_causing_an_alert.append(id) + self.IDs_causing_an_alert.append(evidence_id) - filtered_evidence[id] = evidence + filtered_evidence[evidence_id] = evidence return filtered_evidence def is_filtered_evidence(self, - evidence: dict, + evidence: Evidence, past_evidence_ids: List[str]): """ filters the following @@ -515,7 +522,7 @@ def is_filtered_evidence(self, # when we get all the tw evidence from the db, we get the once we # alerted, and the new once we need to alert # this method removes the already alerted evidence to avoid duplicates - if id in past_evidence_ids: + if evidence.id in past_evidence_ids: return True if self.is_evidence_done_by_others(evidence): @@ -526,49 +533,47 @@ def is_filtered_evidence(self, def get_threat_level( self, - evidence: dict, + evidence: Evidence, ) -> float: """ return the threat level of the given evidence * confidence """ - evidence_type: str = evidence.get('evidence_type') - confidence: float = float(evidence.get('confidence')) - threat_level: float = evidence.get('threat_level') - description: str = evidence.get('description') - - # each threat level is a string, get the numerical value of it - try: - threat_level: float = \ - utils.threat_levels[threat_level.lower()] - except KeyError: - self.print( - f'Error: Evidence of type {evidence_type} has ' - f'an invalid threat level {threat_level}', 0, 1 - ) - self.print(f'Description: {description}', 0, 1) - threat_level = 0 + confidence: float = evidence.confidence + threat_level: float = evidence.threat_level.value # Compute the moving average of evidence evidence_threat_level: float = threat_level * confidence - self.print(f'\t\tWeighted Threat Level: {evidence_threat_level}', 3, 0) + self.print(f'\t\tWeighted Threat Level: {evidence_threat_level}', + 3, 0) return evidence_threat_level def get_last_evidence_ID(self, tw_evidence: dict) -> str: return list(tw_evidence.keys())[-1] - def send_to_exporting_module(self, tw_evidence: Dict[str, str]): + def send_to_exporting_module(self, tw_evidence: Dict[str, Evidence]): + """ + sends all given evidence to export_evidence channel + :param tw_evidence: alll evidence that happened in a certain + timewindow + format is {evidence_id (str) : Evidence obj} + """ for evidence in tw_evidence.values(): + evidence: Evidence + evidence: dict = evidence_to_dict(evidence) self.db.publish('export_evidence', json.dumps(evidence)) def is_blocking_module_enabled(self) -> bool: """ - returns true if slips is running in an interface or growing zeek dir with -p - or if slips is using custom flows. meaning slips is reading the flows by a custom module not by + returns true if slips is running in an interface or growing + zeek dir with -p + or if slips is using custom flows. meaning slips is reading the + flows by a custom module not by inputprocess. there's no need for -p to enable the blocking """ custom_flows = '-im' in sys.argv or '--input-module' in sys.argv - return (self.is_running_on_interface() and '-p' not in sys.argv) or custom_flows + return ((self.is_running_on_interface() and '-p' not in sys.argv) + or custom_flows) def handle_new_alert(self, alert_ID: str, tw_evidence: dict): """ @@ -606,40 +611,38 @@ def handle_new_alert(self, alert_ID: str, tw_evidence: dict): self.db.set_accumulated_threat_level(profileid, twid, 0) def get_evidence_to_log( - self, - srcip: str, - description: str, - twid: str, - flow_datetime: str, - profileid: str, + self, evidence: Evidence, flow_datetime ) -> str: - timewindow_number: int = twid.replace("timewindow", '') + timewindow_number: int = evidence.timewindow.number # to keep the alignment of alerts.json ip + hostname # combined should take no more than 26 chars - evidence = f'{flow_datetime} (TW {timewindow_number}): Src IP' \ - f' {srcip:26}. Detected {description}' + evidence_str = f'{flow_datetime} (TW {timewindow_number}): Src ' \ + f'IP {evidence.profile.ip:26}. Detected ' \ + f' {evidence.description}' # sometimes slips tries to get the hostname of a # profile before ip_info stores it in the db # there's nothing we can do about it - hostname: str = self.db.get_hostname_from_profile(profileid) + hostname: Optional[str] = self.db.get_hostname_from_profile( + str(evidence.profile) + ) if not hostname: - return evidence + return evidence_str - padding_len = 26 - len(srcip) - len(hostname) - 3 + padding_len = 26 - len(evidence.profile.ip) - len(hostname) - 3 # fill the rest of the 26 characters with spaces to keep the alignment - evidence = f'{flow_datetime} (TW {timewindow_number}): Src IP' \ - f' {srcip} ({hostname}) {" "*padding_len}. ' \ - f'Detected {description}' + evidence_str = f'{flow_datetime} (TW {timewindow_number}): Src IP' \ + f' {evidence.profile.ip} ({hostname}) {" "*padding_len}. ' \ + f'Detected {evidence.description}' - return evidence + return evidence_str def increment_attack_counter( self, attacker: str, - victim: str, - evidence_type: str + victim: Optional[Victim], + evidence_type: EvidenceType ): """ increments the number of attacks of this type from the given @@ -652,17 +655,17 @@ def increment_attack_counter( self.db.increment_attack_counter( attacker, victim, - evidence_type + evidence_type.name ) - def update_accumulated_threat_level(self, evidence: dict) -> float: + def update_accumulated_threat_level(self, evidence: Evidence) -> float: """ update the accumulated threat level of the profileid and twid of the given evidence and return the updated value """ - profileid: str = evidence['profileid'] - twid: str = evidence['twid'] + profileid: str = str(evidence.profile) + twid: str = str(evidence.timewindow) evidence_threat_level: float = self.get_threat_level(evidence) self.db.update_accumulated_threat_level( @@ -686,70 +689,47 @@ def show_popup(self, alert: str): def main(self): while not self.should_stop(): if msg := self.get_msg('evidence_added'): - # Data sent in the channel as a json dict, it needs to be deserialized first + msg['data']: str evidence: dict = json.loads(msg['data']) - profileid = evidence.get('profileid') - srcip = profileid.split(self.separator)[1] - twid = evidence.get('twid') - attacker_direction = evidence.get( - 'attacker_direction' - ) # example: dstip srcip dport sport dstdomain - attacker = evidence.get( - 'attacker' - ) # example: ip, port, inTuple, outTuple, domain - evidence_type: str = evidence.get( - 'evidence_type' - ) # example: PortScan, ThreatIntelligence, etc.. - description = evidence.get('description') - timestamp = evidence.get('stime') + evidence: Evidence = dict_to_evidence(evidence) + profileid: str = str(evidence.profile) + twid: str = str(evidence.timewindow) + evidence_type: EvidenceType = evidence.evidence_type + timestamp: str = evidence.timestamp # this is all the uids of the flows that cause this evidence - all_uids = evidence.get('uid') - confidence = evidence.get('confidence', False) - category = evidence.get('category', False) - conn_count = evidence.get('conn_count', False) - port = evidence.get('port', False) - proto = evidence.get('proto', False) - source_target_tag = evidence.get('source_target_tag', False) - evidence_ID = evidence.get('ID', False) - victim: str = evidence.get('victim', '') - + all_uids: list = evidence.uid # FP whitelisted alerts happen when the db returns an evidence # that isn't processed in this channel, in the tw_evidence # below. # to avoid this, we only alert about processed evidence - self.db.mark_evidence_as_processed(evidence_ID) - + self.db.mark_evidence_as_processed(evidence.id) # Ignore alert if IP is whitelisted - if self.whitelist.is_whitelisted_evidence( - srcip, attacker, attacker_direction, description, victim - ): - self.db.cache_whitelisted_evidence_ID(evidence_ID) + if self.whitelist.is_whitelisted_evidence(evidence): + self.db.cache_whitelisted_evidence_ID(evidence.id) # Modules add evidence to the db before # reaching this point, now remove evidence from db so # it could be completely ignored - self.db.deleteEvidence( - profileid, twid, evidence_ID + self.db.delete_evidence( + profileid, twid, evidence.id ) continue - # convert time to local timezone if self.running_non_stop: - timestamp: datetime = utils.convert_to_local_timezone(timestamp) + timestamp: datetime = utils.convert_to_local_timezone( + timestamp + ) flow_datetime = utils.convert_format(timestamp, 'iso') evidence_to_log: str = self.get_evidence_to_log( - srcip, - description, - twid, + evidence, flow_datetime, - profileid ) # Add the evidence to alerts.log self.add_to_log_file(evidence_to_log) self.increment_attack_counter( - profileid, - victim, + evidence.profile.ip, + evidence.victim, evidence_type ) @@ -768,30 +748,17 @@ def main(self): twid ) # prepare evidence for json log file - IDEA_dict: dict = utils.IDEA_format( - srcip, - evidence_type, - attacker_direction, - attacker, - description, - confidence, - category, - conn_count, - source_target_tag, - port, - proto, - evidence_ID - ) + idea_dict: dict = idea_format(evidence) # add to alerts.json self.add_to_json_log_file( - IDEA_dict, + idea_dict, all_uids, twid, accumulated_threat_level, ) - self.db.set_evidence_for_profileid(IDEA_dict) - self.db.publish('report_to_peers', json.dumps(evidence)) + evidence_dict: dict = evidence_to_dict(evidence) + self.db.publish('report_to_peers', json.dumps(evidence_dict)) # if the profile was already blocked in @@ -815,11 +782,11 @@ def main(self): profileid, twid ) if tw_evidence: - id: str = self.get_last_evidence_ID(tw_evidence) # store the alert in our database + last_id: str = self.get_last_evidence_ID(tw_evidence) # the alert ID is profileid_twid + the ID of # the last evidence causing this alert - alert_id: str = f'{profileid}_{twid}_{id}' + alert_id: str = f'{profileid}_{twid}_{last_id}' self.handle_new_alert(alert_id, tw_evidence) @@ -827,8 +794,8 @@ def main(self): alert_to_print: str = \ self.format_evidence_causing_this_alert( tw_evidence, - profileid, - twid, + evidence.profile, + evidence.timewindow, flow_datetime, ) @@ -851,12 +818,11 @@ def main(self): twid, flow_datetime, accumulated_threat_level, - IDEA_dict, + idea_dict, blocked=blocked ) if msg := self.get_msg('new_blame'): - self.msg_received = True data = msg['data'] try: data = json.loads(data) @@ -865,16 +831,17 @@ def main(self): 'Error in the report received from p2ptrust module' ) return - # The available values for the following variables are defined in go_director + # The available values for the following variables are + # defined in go_director # available key types: "ip" - key_type = data['key_type'] + # key_type = data['key_type'] # if the key type is ip, the ip is validated key = data['key'] # available evaluation types: 'score_confidence' - evaluation_type = data['evaluation_type'] + # evaluation_type = data['evaluation_type'] # this is the score_confidence received from the peer evaluation = data['evaluation'] diff --git a/slips_files/core/flows/suricata.py b/slips_files/core/flows/suricata.py index f9784380c..3a6942d7b 100644 --- a/slips_files/core/flows/suricata.py +++ b/slips_files/core/flows/suricata.py @@ -60,6 +60,7 @@ def __post_init__(self): ).total_seconds() or 0 self.pkts = self.dpkts + self.spkts self.bytes = self.dbytes + self.sbytes + self.uid = str(self.uid) @dataclass class SuricataHTTP: @@ -91,6 +92,8 @@ class SuricataHTTP: resp_mime_types: str = '' resp_fuids: str = '' type_:str = 'http' + def __post_init__(self): + self.uid = str(self.uid) @dataclass class SuricataDNS: @@ -115,6 +118,8 @@ class SuricataDNS: qclass_name: str = '' rcode_name: str = '' type_: str = 'dns' + def __post_init__(self): + self.uid = str(self.uid) @dataclass @@ -141,6 +146,8 @@ class SuricataTLS: notafter: str type_: str = 'ssl' + def __post_init__(self): + self.uid = str(self.uid) @dataclass @@ -166,6 +173,8 @@ class SuricataFile: analyzers: str ='' tx_hosts: str = '' rx_hosts: str = '' + def __post_init__(self): + self.uid = str(self.uid) @dataclass class SuricataSSH: @@ -196,4 +205,6 @@ class SuricataSSH: host_key: str = '' type_: str = 'ssh' + def __post_init__(self): + self.uid = str(self.uid) diff --git a/slips_files/core/helpers/checker.py b/slips_files/core/helpers/checker.py index 8797e7ab4..b8134474a 100644 --- a/slips_files/core/helpers/checker.py +++ b/slips_files/core/helpers/checker.py @@ -1,7 +1,9 @@ -import psutil -import sys import os import subprocess +import sys + +import psutil + class Checker: def __init__(self, main): @@ -20,7 +22,7 @@ def check_input_type(self) -> tuple: # -I if self.main.args.interface: input_information = self.main.args.interface - input_type = 'interface' + input_type = "interface" # return input_type, self.main.input_information return input_type, input_information, line_type @@ -28,16 +30,15 @@ def check_input_type(self) -> tuple: self.main.redis_man.load_db() return - if self.main.args.input_module: - input_information = 'input_module' + input_information = "input_module" input_type = self.main.args.input_module # this is the default value of the type of flows slips reads from a module - line_type = 'zeek' + line_type = "zeek" return input_type, input_information, line_type if not self.main.args.filepath: - print('[Main] You need to define an input source.') + print("[Main] You need to define an input source.") sys.exit(-1) # -f file/dir/stdin-type @@ -45,13 +46,10 @@ def check_input_type(self) -> tuple: if os.path.isfile(input_information) or os.path.isdir(input_information): input_type = self.main.get_input_file_type(input_information) else: - input_type, line_type = self.main.handle_flows_from_stdin( - input_information - ) + input_type, line_type = self.main.handle_flows_from_stdin(input_information) return input_type, input_information, line_type - def check_given_flags(self): """ check the flags that don't require starting slips @@ -66,27 +64,31 @@ def check_given_flags(self): self.main.terminate_slips() if self.main.args.interface and self.main.args.filepath: - print('Only -i or -f is allowed. Stopping slips.') + print("Only -i or -f is allowed. Stopping slips.") self.main.terminate_slips() - - if (self.main.args.interface or self.main.args.filepath) and self.main.args.input_module: - print('You can\'t use --input-module with -f or -i. Stopping slips.') + if ( + self.main.args.interface or self.main.args.filepath + ) and self.main.args.input_module: + print("You can't use --input-module with -f or -i. Stopping slips.") self.main.terminate_slips() if (self.main.args.save or self.main.args.db) and os.getuid() != 0: - print('Saving and loading the database requires root privileges.') + print("Saving and loading the database requires root privileges.") self.main.terminate_slips() if (self.main.args.verbose and int(self.main.args.verbose) > 3) or ( self.main.args.debug and int(self.main.args.debug) > 3 ): - print('Debug and verbose values range from 0 to 3.') + print("Debug and verbose values range from 0 to 3.") self.main.terminate_slips() # Check if redis server running - if not self.main.args.killall and self.main.redis_man.check_redis_database() is False: - print('Redis database is not running. Stopping Slips') + if ( + not self.main.args.killall + and self.main.redis_man.check_redis_database() is False + ): + print("Redis database is not running. Stopping Slips") self.main.terminate_slips() if self.main.args.config and not os.path.exists(self.main.args.config): @@ -96,11 +98,15 @@ def check_given_flags(self): if self.main.args.interface: interfaces = psutil.net_if_addrs().keys() if self.main.args.interface not in interfaces: - print(f"{self.main.args.interface} is not a valid interface. Stopping Slips") + print( + f"{self.main.args.interface} is not a valid interface. Stopping Slips" + ) self.main.terminate_slips() # if we're reading flows from some module other than the input process, make sure it exists - if self.main.args.input_module and not self.input_module_exists(self.main.args.input_module): + if self.main.args.input_module and not self.input_module_exists( + self.main.args.input_module + ): self.main.terminate_slips() # Clear cache if the parameter was included @@ -108,7 +114,7 @@ def check_given_flags(self): self.clear_redis_cache() # Clear cache if the parameter was included if self.main.args.blocking and not self.main.args.interface: - print('Blocking is only allowed when running slips using an interface.') + print("Blocking is only allowed when running slips using an interface.") self.main.terminate_slips() # kill all open unused redis servers if the parameter was included @@ -120,21 +126,15 @@ def check_given_flags(self): self.main.print_version() self.main.terminate_slips() - if ( - self.main.args.interface - and self.main.args.blocking - and os.geteuid() != 0 - ): + if self.main.args.interface and self.main.args.blocking and os.geteuid() != 0: # If the user wants to blocks, we need permission to modify iptables - print( - 'Run Slips with sudo to enable the blocking module.' - ) + print("Run Slips with sudo to enable the blocking module.") self.main.terminate_slips() if self.main.args.clearblocking: if os.geteuid() != 0: print( - 'Slips needs to be run as root to clear the slipsBlocking chain. Stopping.' + "Slips needs to be run as root to clear the slipsBlocking chain. Stopping." ) else: self.delete_blocking_chain() @@ -158,11 +158,13 @@ def delete_blocking_chain(self): child.kill() def clear_redis_cache(self): - print('Deleting Cache DB in Redis.') + print("Deleting Cache DB in Redis.") self.main.redis_man.clear_redis_cache_database() - self.main.input_information = '' - self.main.zeek_dir = '' - self.main.redis_man.log_redis_server_PID(6379, self.main.redis_man.get_pid_of_redis_server(6379)) + self.main.input_information = "" + self.main.zeek_dir = "" + self.main.redis_man.log_redis_server_pid( + 6379, self.main.redis_man.get_pid_of_redis_server(6379) + ) self.main.terminate_slips() def input_module_exists(self, module): @@ -170,15 +172,17 @@ def input_module_exists(self, module): :param module: this is the one given to slips via --input-module check if the module was created in modules/ dir """ - available_modules = os.listdir('modules') + available_modules = os.listdir("modules") if module not in available_modules: print(f"{module} module is not available. Stopping slips") return False # this function assumes that the module is created in module/name/name.py - if f"{module}.py" not in os.listdir(f'modules/{module}/'): - print(f"{module} is not available in modules/{module}/{module}.py. Stopping slips") + if f"{module}.py" not in os.listdir(f"modules/{module}/"): + print( + f"{module} is not available in modules/{module}/{module}.py. Stopping slips" + ) return False return True @@ -191,24 +195,24 @@ def check_output_redirection(self) -> tuple: current_stdout will be '' if it's not redirected to a file """ # lsof will provide a list of all open fds belonging to slips - command = f'lsof -p {self.main.pid}' + command = f"lsof -p {self.main.pid}" result = subprocess.run(command.split(), capture_output=True) # Get command output - output = result.stdout.decode('utf-8') + output = result.stdout.decode("utf-8") # if stdout is being redirected we'll find '1w' in one of the lines # 1 means stdout, w means write mode # by default, stdout is not redirected - current_stdout = '' + current_stdout = "" for line in output.splitlines(): - if '1w' in line: + if "1w" in line: # stdout is redirected, get the file - current_stdout = line.split(' ')[-1] + current_stdout = line.split(" ")[-1] break - if self.main.mode == 'daemonized': + if self.main.mode == "daemonized": stderr = self.main.daemon.stderr slips_logfile = self.main.daemon.stdout else: - stderr = os.path.join(self.main.args.output, 'errors.log') - slips_logfile = os.path.join(self.main.args.output, 'slips.log') - return (current_stdout, stderr, slips_logfile) + stderr = os.path.join(self.main.args.output, "errors.log") + slips_logfile = os.path.join(self.main.args.output, "slips.log") + return current_stdout, stderr, slips_logfile diff --git a/slips_files/core/helpers/progress_bar.py b/slips_files/core/helpers/progress_bar.py deleted file mode 100644 index 1160d8705..000000000 --- a/slips_files/core/helpers/progress_bar.py +++ /dev/null @@ -1,193 +0,0 @@ -from multiprocessing import Process, Pipe -from tqdm.auto import tqdm -import sys - -class PBar(Process): - """ - Here's why this class is run in a separate process - we need all modules to have access to the pbar. - so for example, profile is the one always initializing the pbar, when this class - isn't run as a proc, profiler would be the only proc that "knows" about the pbar - because it initialized it right? - now when any txt is sent to be print by the output proc by anyone other than the profiler - the output.py would print it on top of the pbar! and we'd get duplicate bars! - - the solution to this is to make the pbar a separate proc - whenever it's supported, the output.py will forward all txt to be printed - to this class, and this class would handle the printing nicely - so that nothing will overlap with the pbar - once the pbar is done, this proc sets teh has_pbar shared var to Flase - and output.py would know about it and print txt normally - """ - def __init__( - self, - pipe: Pipe, - has_bar, - slips_mode: str, - input_type: str, - stdout: str, - ): - - Process.__init__(self) - self.pipe: Pipe = pipe - self.stdout = stdout - self.slips_mode: str = slips_mode - - # this is a shared obj using mp Manager - # using mp manager to be able to change this value - # here and and have it changed in the Output.py - self.has_pbar = has_bar - self.supported: bool = self.is_pbar_supported(input_type) - if self.supported: - self.has_pbar.value = True - - self.done_reading_flows = False - - def is_pbar_supported(self, input_type: str) -> bool: - """ - When running on a pcap, interface, or taking flows from an - external module, the total amount of flows is unknown - so the pbar is not supported - """ - # input type can be false -S or in unit tests - if ( - not input_type - or input_type in ('interface', 'pcap', 'stdin') - or self.slips_mode == 'daemonized' - ): - return False - - - if self.stdout != '': - # this means that stdout was redirected to a file, - # no need to print the progress bar - return False - - params = ('-g', '--growing', - '-im', '--input_module', - '-t' , '--testing') - for param in params: - if param in sys.argv: - return False - - return True - - def remove_stats(self): - # remove the stats from the progress bar - self.progress_bar.set_postfix_str( - '', - refresh=True - ) - - - def init(self, msg: dict): - """ - initializes the progress bar when slips is runnning on a file or a zeek dir - ignores pcaps, interface and dirs given to slips if -g is enabled - :param bar: dict with input type, total_flows, etc. - """ - - - self.total_flows = int(msg['total_flows']) - # the bar_format arg is to disable ETA and unit display - # dont use ncols so tqdm will adjust the bar size according to the terminal size - self.progress_bar = tqdm( - total=self.total_flows, - leave=True, - colour="green", - desc="Flows read", - mininterval=0, # defines how long to wait between each refresh. - unit=' flow', - smoothing=1, - bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} {postfix}", - position=0, - initial=0, #initial value of the flows processed - file=sys.stdout, - ) - - - def update_bar(self): - """ - wrapper for tqdm.update() - adds 1 to the number of flows processed - """ - - if not hasattr(self, 'progress_bar') : - # this module wont have the progress_bar set if it's running on pcap or interface - # or if the output is redirected to a file! - return - - if self.slips_mode == 'daemonized': - return - - self.progress_bar.update(1) - if self.progress_bar.n == self.total_flows: - self.terminate() - - def terminate(self): - # remove it from the bar because we'll be - # prining it in a new line - self.remove_stats() - tqdm.write("Profiler is done reading all flows. Slips is now processing them.") - self.done_reading_flows = True - self.has_pbar.value = False - - def print(self, msg: dict): - """ - prints using tqdm in order to avoid conflict with the pbar - """ - tqdm.write(msg['txt']) - - - def update_stats(self, msg: dict): - """writes the stats sent in the msg as a pbar postfix""" - self.progress_bar.set_postfix_str( - msg['stats'], - refresh=True - ) - - def pbar_supported(self) -> bool: - """ - this proc should stop listening - to events if the pbar reached 100% or if it's not supported - """ - if ( - self.done_reading_flows - or not self.supported - ): - return False - return True - - def run(self): - """keeps receiving events until pbar reaches 100%""" - try: - while self.pbar_supported(): - try: - msg: dict = self.pipe.recv() - except KeyboardInterrupt: - # to tell output.py to no longer send prints here - self.has_pbar.value = False - return - - event: str = msg['event'] - if event == "init": - self.init(msg) - - if event == "update_bar": - self.update_bar() - - if event == "update_stats": - self.update_stats(msg) - - - if event == "terminate": - self.terminate() - return - - if event == "print": - # let tqdm do th eprinting to avoid conflicts with the pbar - self.print(msg) - - except Exception as e: - tqdm.write(f"PBar Error: {e}") - diff --git a/slips_files/core/helpers/symbols_handler.py b/slips_files/core/helpers/symbols_handler.py index 607894964..0b5fadafb 100644 --- a/slips_files/core/helpers/symbols_handler.py +++ b/slips_files/core/helpers/symbols_handler.py @@ -288,5 +288,6 @@ def compute_timechar(): return symbol, (last_ts, now_ts) except Exception: # For some reason we can not use the output queue here.. check - self.print('Error in compute_symbol in Profiler Process.', 0, 1) - self.print('{}'.format(traceback.format_exc()), 0, 1) \ No newline at end of file + self.print('Error in compute_symbol in Profiler Process.', + 0, 1) + self.print(traceback.print_stack(), 0, 1) \ No newline at end of file diff --git a/slips_files/core/helpers/whitelist.py b/slips_files/core/helpers/whitelist.py index 2eace9f8a..41f950451 100644 --- a/slips_files/core/helpers/whitelist.py +++ b/slips_files/core/helpers/whitelist.py @@ -6,6 +6,18 @@ from slips_files.core.output import Output import tld import os +from slips_files.core.evidence_structure.evidence import ( + dict_to_evidence, + Evidence, + Direction, + IoCType, + EvidenceType, + IDEACategory, + Proto, + Tag, + Attacker, + Victim + ) class Whitelist(IObservable): @@ -78,9 +90,12 @@ def is_ignored_flow_type(self, flow_type) -> bool: if flow_type in self.ignored_flow_types: return True - def is_whitelisted_domain_in_flow( - self, whitelisted_domain, direction, domains_of_flow, ignore_type + self, + whitelisted_domain, + direction: Direction, + domains_of_flow, + ignore_type ): """ Given the domain of a flow, and a whitelisted domain, @@ -89,7 +104,7 @@ def is_whitelisted_domain_in_flow( :param whitelisted_domain: the domain we want to check if it exists in the domains_of_flow :param ignore_type: alerts or flows or both - :param direction: src or dst or both + :param direction: Direction obj :param domains_of_flow: src domains of the src IP of the flow, or dst domains of the dst IP of the flow """ @@ -99,8 +114,9 @@ def is_whitelisted_domain_in_flow( # do we wanna whitelist flows coming from or going to this domain or both? from_ = whitelisted_domains[whitelisted_domain]['from'] + from_ = Direction.SRC if 'src' in from_ else Direction.DST # Now check the domains of the src IP - if direction in from_ or 'both' in from_: + if direction == from_ or 'both' in whitelisted_domains[whitelisted_domain]['from']: what_to_ignore = whitelisted_domains[whitelisted_domain]['what_to_ignore'] for domain_to_check in domains_of_flow: @@ -134,7 +150,8 @@ def is_whitelisted_domain(self, domain_to_check, saddr, daddr, ignore_type): src_domains_of_flow, ) = self.get_domains_of_flow(saddr, daddr) - # self.print(f'Domains to check from flow: {domains_to_check}, {domains_to_check_dst} {domains_to_check_src}') + # self.print(f'Domains to check from flow: {domains_to_check}, + # {domains_to_check_dst} {domains_to_check_src}') # Go through each whitelisted domain and check if what arrived is there for whitelisted_domain in list(whitelisted_domains.keys()): what_to_ignore = whitelisted_domains[whitelisted_domain]['what_to_ignore'] @@ -148,25 +165,39 @@ def is_whitelisted_domain(self, domain_to_check, saddr, daddr, ignore_type): ignore_type in what_to_ignore or 'both' in what_to_ignore ): - # self.print(f'Whitelisting the domain {domain_to_check} due to whitelist of {domain_to_check}') + # self.print(f'Whitelisting the domain + # {domain_to_check} due to whitelist of {domain_to_check}') return True - if self.is_whitelisted_domain_in_flow(whitelisted_domain, 'src', src_domains_of_flow, ignore_type): - # self.print(f"Whitelisting the domain {domain_to_check} because is related" - # f" to domain {domain_to_check} of dst IP {daddr}") + if self.is_whitelisted_domain_in_flow( + whitelisted_domain, + Direction.SRC, + src_domains_of_flow, + ignore_type): + # self.print(f"Whitelisting the domain + # {domain_to_check} because is related" + # f" to domain {domain_to_check} + # of dst IP {daddr}") return True - if self.is_whitelisted_domain_in_flow(whitelisted_domain, 'dst', dst_domains_of_flow, ignore_type): - # self.print(f"Whitelisting the domain {domain_to_check} because is" - # f"related to domain {domain_to_check} of src IP {saddr}") + if self.is_whitelisted_domain_in_flow( + whitelisted_domain, + Direction.DST, + dst_domains_of_flow, + ignore_type): + # self.print(f"Whitelisting the domain + # {domain_to_check} because is" + # f"related to domain {domain_to_check} + # of src IP {saddr}") return True return False def is_whitelisted_flow(self, flow) -> bool: """ - Checks if the src IP or dst IP or domain or organization of this flow is whitelisted. + Checks if the src IP or dst IP or domain or organization + of this flow is whitelisted. """ saddr = flow.saddr daddr = flow.daddr @@ -222,7 +253,8 @@ def is_whitelisted_flow(self, flow) -> bool: if ('dst' in from_ or 'both' in from_) and ( self.should_ignore_flows(what_to_ignore) ): - # self.print(f"Whitelisting the dst IP {column_values['daddr']}") + # self.print(f"Whitelisting the dst IP + # {column_values['daddr']}") return True if flow_type == 'dns': @@ -233,7 +265,8 @@ def is_whitelisted_flow(self, flow) -> bool: # direction = whitelisted_IPs[daddr]['from'] what_to_ignore = whitelisted_IPs[answer]['what_to_ignore'] if self.should_ignore_flows(what_to_ignore): - # self.print(f"Whitelisting the IP {answer} due to its presence in a dns answer") + # self.print(f"Whitelisting the IP {answer} + # due to its presence in a dns answer") return True @@ -257,7 +290,8 @@ def is_whitelisted_flow(self, flow) -> bool: and self.should_ignore_flows(what_to_ignore) ): - # self.print(f"The source MAC of this flow {src_mac} is whitelisted") + # self.print(f"The source MAC of this flow + # {src_mac} is whitelisted") return True dst_mac = flow.dmac if hasattr(flow, 'smac') else False @@ -271,7 +305,8 @@ def is_whitelisted_flow(self, flow) -> bool: and self.should_ignore_flows(what_to_ignore) ): - # self.print(f"The dst MAC of this flow {dst_mac} is whitelisted") + # self.print(f"The dst MAC of this flow {dst_mac} + # is whitelisted") return True if self.is_ignored_flow_type(flow_type): @@ -290,7 +325,8 @@ def is_whitelisted_flow(self, flow) -> bool: # self.print(f'Checking {org}, from:{from_} type {what_to_ignore}') if self.should_ignore_flows(what_to_ignore): - # We want to block flows from this org. get the domains of this flow based on the direction. + # We want to block flows from this org. get the domains + # of this flow based on the direction. if 'both' in from_: domains_to_check = ( domains_to_check_src + domains_to_check_dst @@ -301,39 +337,52 @@ def is_whitelisted_flow(self, flow) -> bool: domains_to_check = domains_to_check_dst if 'src' in from_ or 'both' in from_: - # Method 1 Check if src IP belongs to a whitelisted organization range + # Method 1 Check if src IP belongs to a whitelisted + # organization range try: if self.is_ip_in_org(saddr, org): - # self.print(f"The src IP {saddr} is in the ranges of org {org}. Whitelisted.") + # self.print(f"The src IP {saddr} is in the + # ranges of org {org}. Whitelisted.") return True except ValueError: - # Some flows don't have IPs, but mac address or just - in some cases + # Some flows don't have IPs, but mac address or + # just - in some cases return False - # Method 2 Check if the ASN of this src IP is any of these organizations + # Method 2 Check if the ASN of this src IP is any of + # these organizations if self.is_whitelisted_asn(saddr, org): - # this ip belongs to a whitelisted org, ignore flow - # self.print(f"The src IP {saddr} belong to {org}. Whitelisted because of ASN.") + # this ip belongs to a whitelisted org, ignore + # flow + # self.print(f"The src IP {saddr} belong to {org}. + # Whitelisted because of ASN.") return True if 'dst' in from_ or 'both' in from_: - # Method 1 Check if dst IP belongs to a whitelisted organization range + # Method 1 Check if dst IP belongs to a whitelisted + # organization range try: if self.is_ip_in_org(flow.daddr, org): - # self.print(f"The dst IP {column_values['daddr']} " - # f"is in the network range of org {org}. Whitelisted.") + # self.print(f"The dst IP + # {column_values['daddr']} " + # f"is in the network range of org + # {org}. Whitelisted.") return True except ValueError: - # Some flows don't have IPs, but mac address or just - in some cases + # Some flows don't have IPs, but mac address or + # just - in some cases return False - # Method 2 Check if the ASN of this dst IP is any of these organizations + # Method 2 Check if the ASN of this dst IP is any of + # these organizations if self.is_whitelisted_asn(daddr, org): # this ip belongs to a whitelisted org, ignore flow return True - # either we're blocking src, dst, or both check the domain of this flow - # Method 3 Check if the domains of this flow belong to this org + # either we're blocking src, dst, or both check the + # domain of this flow + # Method 3 Check if the domains of this flow belong + # to this org # domains to check are usually 1 or 2 domains for flow_domain in domains_to_check: if self.is_domain_in_org(flow_domain, org): @@ -350,7 +399,8 @@ def is_domain_in_org(self, domain, org): self.db.get_org_info(org, 'domains') ) if org in domain: - # self.print(f"The domain of this flow ({domain}) belongs to the domains of {org}") + # self.print(f"The domain of this flow ({domain}) belongs to + # the domains of {org}") return True try: @@ -369,24 +419,30 @@ def is_domain_in_org(self, domain, org): continue # match subdomains too - # if org has org.com, and the flow_domain is xyz.org.com whitelist it + # if org has org.com, and the flow_domain is xyz.org.com + # whitelist it if org_domain in domain: - # self.print(f"The src domain of this flow ({domain}) is " + # self.print(f"The src domain of this flow ({domain}) + # is " # f"a subdomain of {org} domain: {org_domain}") return True - # if org has xyz.org.com, and the flow_domain is org.com whitelist it + # if org has xyz.org.com, and the flow_domain is org.com + # whitelist it if domain in org_domain: - # self.print(f"The domain of {org} ({org_domain}) is a subdomain of " + # self.print(f"The domain of {org} ({org_domain}) is + # a subdomain of " # f"this flow domain ({domain})") return True except (KeyError, TypeError): - # comes here if the whitelisted org doesn't have domains in slips/organizations_info (not a famous org) + # comes here if the whitelisted org doesn't have domains in + # slips/organizations_info (not a famous org) # and ip doesn't have asn info. # so we don't know how to link this ip to the whitelisted org! pass def read_whitelist(self): - """Reads the content of whitelist.conf and stores information about each ip/org/domain in the database""" + """Reads the content of whitelist.conf and stores information about + each ip/org/domain in the database""" # since this function can be run when the user modifies whitelist.conf # we need to check if the dicts are already there @@ -404,11 +460,13 @@ def read_whitelist(self): if line.startswith('"IoCType"'): continue - # check if the user commented an org, ip or domain that was whitelisted + # check if the user commented an org, ip or domain that + # was whitelisted if line.startswith('#'): if whitelisted_IPs: for ip in list(whitelisted_IPs): - # make sure the user commented the line we have in cache exactly + # make sure the user commented the line we + # have in cache exactly if ( ip in line and whitelisted_IPs[ip]['from'] in line @@ -449,7 +507,8 @@ def read_whitelist(self): # todo if the user closes slips, changes the whitelist, and reopens slips , # slips will still have the old whitelist in the cache! continue - # line should be: ["type","domain/ip/organization","from","what_to_ignore"] + # line should be: ["type","domain/ip/organization", + # "from","what_to_ignore"] line = line.replace('\n', '').replace(' ', '').split(',') try: type_, data, from_, what_to_ignore = ( @@ -487,7 +546,8 @@ def read_whitelist(self): } elif 'org' in type_: if data not in utils.supported_orgs: - self.print(f"Whitelisted org {data} is not supported in slips") + self.print(f"Whitelisted org {data} is not" + f" supported in slips") continue # organizations dicts look something like this: # {'google': {'from':'dst', @@ -507,26 +567,31 @@ def read_whitelist(self): } else: - self.print(f'{data} is not a valid {type_}.', 1, 0) + self.print(f'{data} is not a valid {type_}.', 1, + 0) except Exception: self.print( - f'Line {line_number} in whitelist.conf is invalid. Skipping. ' + f'Line {line_number} in whitelist.conf is invalid.' + f' Skipping. ' ) except FileNotFoundError: self.print( - f"Can't find {self.whitelist_path}, using slips default whitelist.conf instead" + f"Can't find {self.whitelist_path}, using slips default " + f"whitelist.conf instead" ) if self.whitelist_path != 'config/whitelist.conf': self.whitelist_path = 'config/whitelist.conf' self.read_whitelist() - # store everything in the cache db because we'll be needing this info in the evidenceProcess + # store everything in the cache db because we'll be needing this + # info in the evidenceProcess self.db.set_whitelist('IPs', whitelisted_IPs) self.db.set_whitelist('domains', whitelisted_domains) self.db.set_whitelist('organizations', whitelisted_orgs) self.db.set_whitelist('mac', whitelisted_mac) - return whitelisted_IPs, whitelisted_domains, whitelisted_orgs, whitelisted_mac + return whitelisted_IPs, whitelisted_domains, whitelisted_orgs, \ + whitelisted_mac def get_domains_of_flow(self, saddr, daddr): """ @@ -543,7 +608,8 @@ def get_domains_of_flow(self, saddr, daddr): except (KeyError, TypeError): pass try: - # self.print(f"DNS of src IP {column_values['saddr']}: {self.db.get_dns_resolution(column_values['saddr'])}") + # self.print(f"DNS of src IP {column_values['saddr']}: + # {self.db.get_dns_resolution(column_values['saddr'])}") src_dns_domains = self.db.get_dns_resolution(saddr) src_dns_domains = src_dns_domains.get('domains', []) domains_to_check_src.extend(iter(src_dns_domains)) @@ -557,7 +623,8 @@ def get_domains_of_flow(self, saddr, daddr): pass try: - # self.print(f"DNS of dst IP {column_values['daddr']}: {self.db.get_dns_resolution(column_values['daddr'])}") + # self.print(f"DNS of dst IP {column_values['daddr']}: + # {self.db.get_dns_resolution(column_values['daddr'])}") dst_dns_domains = self.db.get_dns_resolution(daddr) dst_dns_domains = dst_dns_domains.get('domains', []) domains_to_check_dst.extend(iter(dst_dns_domains)) @@ -589,7 +656,7 @@ def is_ip_in_org(self, ip:str, org): return False def profile_has_whitelisted_mac( - self, profile_ip, whitelisted_macs, is_srcip, is_dstip + self, profile_ip, whitelisted_macs, direction: Direction ) -> bool: """ Checks for alerts whitelist @@ -612,18 +679,19 @@ def profile_has_whitelisted_mac( 'alerts' in what_to_ignore or 'both' in what_to_ignore ): - if is_srcip and ( + if direction == Direction.DST and ( 'src' in from_ or 'both' in from_ ): return True - if is_dstip and ( + if direction == Direction.DST and ( 'dst' in from_ or 'both' in from_ ): return True - def is_ip_asn_in_org_asn(self, ip, org): + def is_ip_asn_in_org_asn(self, ip: str, org): """ - returns true if the ASN of the given IP is listed in the ASNs of the given org ASNs + returns true if the ASN of the given IP is listed in the ASNs of + the given org ASNs """ # Check if the IP in the content of the alert has ASN info in the db ip_data = self.db.get_ip_info(ip) @@ -642,25 +710,22 @@ def is_ip_asn_in_org_asn(self, ip, org): or ip_asn in org_asn ): # this ip belongs to a whitelisted org, ignore alert - # self.print(f'Whitelisting evidence sent by {srcip} about {ip} due to ASN of {ip} + # self.print(f'Whitelisting evidence sent by {srcip} about + # {ip} due to ASN of {ip} # related to {org}. {data} in {description}') return True - def is_srcip(self, attacker_direction): - return attacker_direction in ('sip', 'srcip', 'sport', 'inTuple') - - def is_dstip(self, attacker_direction): - return attacker_direction in ('dip', 'dstip', 'dport', 'outTuple') - def should_ignore_from(self, direction) -> bool: """ - Returns true if the user wants to whitelist alerts/flows from this source(ip, org, mac, etc) + Returns true if the user wants to whitelist alerts/flows from + this source(ip, org, mac, etc) """ return ('src' in direction or 'both' in direction) def should_ignore_to(self, direction) -> bool: """ - Returns true if the user wants to whitelist alerts/flows to this source(ip, org, mac, etc) + Returns true if the user wants to whitelist alerts/flows to + this source(ip, org, mac, etc) """ return ('dst' in direction or 'both' in direction) @@ -697,107 +762,95 @@ def parse_whitelist(self, whitelist): whitelisted_macs = json.loads(whitelist['mac']) except (IndexError, KeyError): whitelisted_macs = {} - return whitelisted_IPs, whitelisted_domains, whitelisted_orgs, whitelisted_macs + return whitelisted_IPs, whitelisted_domains, whitelisted_orgs, \ + whitelisted_macs def is_whitelisted_evidence( - self, srcip, attacker, attacker_direction, description, victim + self, evidence: Evidence ) -> bool: """ - Checks if IP is whitelisted - :param srcip: Src IP that generated the evidence - :param attacker: This is what was detected in the evidence. (attacker) can be ip, domain, tuple(ip:port:proto). - :param attacker_direction: this is the type of the attacker param. 'sip', 'dip', 'sport', 'dport', 'inTuple', - 'outTuple', 'dstdomain' - :param description: may contain IPs if the evidence is coming from portscan module - :param victim: ip of the victim (will either be the saddr, the daddr, or '' in case of scans) + Checks if an evidence is whitelisted """ - # self.print(f'Checking the whitelist of {srcip}: {data} {attacker_direction} {description} ') + # self.print(f'Checking the whitelist of {srcip}: {data} + # {attacker_direction} {description} ') whitelist = self.db.get_all_whitelist() max_tries = 10 - # if this module is loaded before profilerProcess or before we're done processing the whitelist in general + # if this module is loaded before profilerProcess or before we're + # done processing the whitelist in general # the database won't return the whitelist - # so we need to try several times until the db returns the populated whitelist + # so we need to try several times until the db returns the + # populated whitelist # empty dicts evaluate to False while not bool(whitelist) and max_tries != 0: - # try max 10 times to get the whitelist, if it's still empty then it's not empty by mistake + # try max 10 times to get the whitelist, if it's still empty + # hen it's not empty by mistake max_tries -= 1 whitelist = self.db.get_all_whitelist() if max_tries == 0: # we tried 10 times to get the whitelist, it's probably empty. return False - if self.check_whitelisted_attacker(attacker, attacker_direction): + if self.check_whitelisted_attacker(evidence.attacker): return True - if self.check_whitelisted_victim(victim, srcip): - return True + if ( + hasattr(evidence, 'victim') + and self.check_whitelisted_victim(evidence.victim) + ): + return True - def check_whitelisted_victim(self, victim, srcip): + def check_whitelisted_victim(self, victim: Victim): if not victim: return False whitelist = self.db.get_all_whitelist() - whitelisted_IPs, whitelisted_domains, whitelisted_orgs, whitelisted_macs = self.parse_whitelist(whitelist) - - victim = victim.strip() - victim_type = utils.detect_data_type(victim) + whitelisted_orgs = self.parse_whitelist(whitelist)[2] - if victim_type == 'ip': - ip = victim - is_srcip = True if srcip in victim else False - if self.is_ip_whitelisted(ip, is_srcip): + if ( + victim.victim_type == IoCType.IP.name + and self.is_ip_whitelisted(victim.value, victim.direction) + ): return True - elif victim_type == 'domain': - # the domain can never be a source here - if self.is_domain_whitelisted(victim, 'dstdomain'): + elif ( + victim.victim_type == IoCType.DOMAIN.name + and self.is_domain_whitelisted( + victim.value, victim.direction) + ): return True - direction = 'src' if srcip in victim else 'dst' - if ( + + if( whitelisted_orgs - and self.is_part_of_a_whitelisted_org(victim, victim_type, direction) + and self.is_part_of_a_whitelisted_org(victim) ): return True - def check_whitelisted_attacker(self, attacker, attacker_direction): + def check_whitelisted_attacker(self, attacker: Attacker): whitelist = self.db.get_all_whitelist() - whitelisted_IPs, whitelisted_domains, whitelisted_orgs, whitelisted_macs = self.parse_whitelist(whitelist) - - # Set attacker type - if 'domain' in attacker_direction: - attacker_type = 'domain' - elif 'outTuple' in attacker_direction: - # for example: ip:port:proto - attacker = attacker.split('-')[0] - attacker_type = 'ip' - else: - # it's probably one of the following: 'sip', 'dip', 'sport' - attacker_type = 'ip' - - # Check IPs - if attacker_type == 'domain': - if self.is_domain_whitelisted(attacker, attacker_direction): + whitelisted_orgs = self.parse_whitelist(whitelist)[2] + + if ( + attacker.attacker_type == IoCType.DOMAIN.name + and + self.is_domain_whitelisted(attacker.value, attacker.direction) + ): return True - elif attacker_type == 'ip': + elif attacker.attacker_type == IoCType.IP.name: # Check that the IP in the content of the alert is whitelisted - # Was the evidence coming as a src or dst? - ip = attacker - is_srcip = self.is_srcip(attacker_direction) - # is_dstip = self.is_dstip(attacker_direction) - if self.is_ip_whitelisted(ip, is_srcip): + if self.is_ip_whitelisted(attacker.value, attacker.direction): return True # Check orgs if ( whitelisted_orgs - and self.is_part_of_a_whitelisted_org(attacker, attacker_type, attacker_direction) + and self.is_part_of_a_whitelisted_org(attacker) ): return True @@ -805,7 +858,8 @@ def check_whitelisted_attacker(self, attacker, attacker_direction): def load_org_asn(self, org) -> list: """ - Reads the specified org's asn from slips_files/organizations_info and stores the info in the database + Reads the specified org's asn from slips_files/organizations_info + and stores the info in the database org: 'google', 'facebook', 'twitter', etc... returns a list containing the org's asn """ @@ -839,7 +893,8 @@ def load_org_asn(self, org) -> list: def load_org_domains(self, org): """ - Reads the specified org's domains from slips_files/organizations_info and stores the info in the database + Reads the specified org's domains from slips_files/organizations_info + and stores the info in the database org: 'google', 'facebook', 'twitter', etc... returns a list containing the org's domains """ @@ -861,7 +916,8 @@ def load_org_domains(self, org): def load_org_IPs(self, org): """ - Reads the specified org's info from slips_files/organizations_info and stores the info in the database + Reads the specified org's info from slips_files/organizations_info + and stores the info in the database if there's no file for this org, it get the IP ranges from asnlookup.com org: 'google', 'facebook', 'twitter', etc... returns a list of this organization's subnets @@ -903,31 +959,30 @@ def load_org_IPs(self, org): self.db.set_org_info(org, json.dumps(org_subnets), 'IPs') return org_subnets - def is_ip_whitelisted(self, ip: str, is_srcip: bool): + def is_ip_whitelisted(self, ip: str, direction: Direction): """ checks the given IP in the whitelisted IPs read from whitelist.conf """ whitelist = self.db.get_all_whitelist() - whitelisted_IPs, whitelisted_domains, whitelisted_orgs, whitelisted_macs = self.parse_whitelist(whitelist) + whitelisted_ips, _, _, whitelisted_macs = self.parse_whitelist(whitelist) - is_dstip = not is_srcip - if ip in whitelisted_IPs: + if ip in whitelisted_ips: # Check if we should ignore src or dst alerts from this ip # from_ can be: src, dst, both # what_to_ignore can be: alerts or flows or both - direction = whitelisted_IPs[ip]['from'] - what_to_ignore = whitelisted_IPs[ip]['what_to_ignore'] + whitelist_direction: str = whitelisted_ips[ip]['from'] + what_to_ignore = whitelisted_ips[ip]['what_to_ignore'] ignore_alerts = self.should_ignore_alerts(what_to_ignore) ignore_alerts_from_ip = ( ignore_alerts - and is_srcip - and self.should_ignore_from(direction) + and direction == Direction.SRC + and self.should_ignore_from(whitelist_direction) ) ignore_alerts_to_ip = ( ignore_alerts - and is_dstip - and self.should_ignore_to(direction) + and direction == Direction.DST + and self.should_ignore_to(whitelist_direction) ) if ignore_alerts_from_ip or ignore_alerts_to_ip: # self.print(f'Whitelisting src IP {srcip} for evidence' @@ -938,17 +993,12 @@ def is_ip_whitelisted(self, ip: str, is_srcip: bool): # Now we know this ipv4 or ipv6 isn't whitelisted # is the mac address of this ip whitelisted? if whitelisted_macs and self.profile_has_whitelisted_mac( - ip, whitelisted_macs, is_srcip, is_dstip + ip, whitelisted_macs, direction ): return True - def is_domain_whitelisted(self, domain: str, direction: str): - """ - :param direction: can be either srcdomain or dstdomain - """ + def is_domain_whitelisted(self, domain: str, direction: Direction): # todo differentiate between this and is_whitelisted_Domain() - is_srcdomain = direction in ('srcdomain') - is_dstdomain = direction in ('dstdomain') # extract the top level domain try: @@ -966,7 +1016,8 @@ def is_domain_whitelisted(self, domain: str, direction: str): sub_domain = domain[-len(domain_in_whitelist) :] if domain_in_whitelist in sub_domain: # Ignore src or dst - direction = whitelisted_domains[sub_domain]['from'] + whitelist_direction: str = whitelisted_domains[sub_domain][ + 'from'] # Ignore flows or alerts? what_to_ignore = whitelisted_domains[sub_domain][ 'what_to_ignore' @@ -974,13 +1025,13 @@ def is_domain_whitelisted(self, domain: str, direction: str): ignore_alerts = self.should_ignore_alerts(what_to_ignore) ignore_alerts_from_domain = ( ignore_alerts - and is_srcdomain - and self.should_ignore_from(direction) + and direction == Direction.SRC + and self.should_ignore_from(whitelist_direction) ) ignore_alerts_to_domain = ( ignore_alerts - and is_dstdomain - and self.should_ignore_to(direction) + and direction == Direction.DST + and self.should_ignore_to(whitelist_direction) ) if ignore_alerts_from_domain or ignore_alerts_to_domain: # self.print(f'Whitelisting evidence about ' @@ -993,14 +1044,10 @@ def is_domain_whitelisted(self, domain: str, direction: str): # https://tranco-list.eu/list/X5QNN/1000000 return True - def is_part_of_a_whitelisted_org(self, ioc, ioc_type, direction): + def is_part_of_a_whitelisted_org(self, ioc): """ - :param ioc: can be ip or domain - :param direction: can src or dst ip or domain - :param ioc: can be ip or domain + :param ioc: can be an Attacker or a Victim object """ - is_src = self.is_srcip(direction) or direction in 'srcdomain' - is_dst = self.is_dstip(direction) or direction in 'dstdomain' whitelist = self.db.get_all_whitelist() whitelisted_orgs = self.parse_whitelist(whitelist)[2] @@ -1011,29 +1058,35 @@ def is_part_of_a_whitelisted_org(self, ioc, ioc_type, direction): ignore_alerts = self.should_ignore_alerts(what_to_ignore) ignore_alerts_from_org = ( ignore_alerts - and is_src + and ioc.direction == Direction.SRC and self.should_ignore_from(from_) ) ignore_alerts_to_org = ( ignore_alerts - and is_dst + and ioc.direction == Direction.DST and self.should_ignore_to(from_) ) + + ioc_type: IoCType = ioc.attacker_type if isinstance(ioc, Attacker) \ + else \ + ioc.victim_type # Check if the IP in the alert belongs to a whitelisted organization - if ioc_type == 'domain': + if ioc_type == IoCType.DOMAIN.name: # Method 3 Check if the domains of this flow belong to this org domains - if self.is_domain_in_org(ioc, org): + if self.is_domain_in_org(ioc.value, org): return True - elif ioc_type == 'ip': + elif ioc_type == IoCType.IP.name: if ignore_alerts_from_org or ignore_alerts_to_org: # Method 1: using asn - self.is_ip_asn_in_org_asn(ioc, org) + self.is_ip_asn_in_org_asn(ioc.value, org) # Method 2 using the organization's list of ips - # ip doesn't have asn info, search in the list of organization IPs - if self.is_ip_in_org(ioc, org): - # self.print(f'Whitelisting evidence sent by {srcip} about {ip},' - # f'due to {ip} being in the range of {org}. {data} in {description}') + # ip doesn't have asn info, search in the list of + # organization IPs + if self.is_ip_in_org(ioc.value, org): + # self.print(f'Whitelisting evidence sent by {srcip} + # about {ip}. due to {ip} being in the range of { + # org}. {data} in {description}') return True diff --git a/slips_files/core/input.py b/slips_files/core/input.py index a7e4709ae..3aa2a2c70 100644 --- a/slips_files/core/input.py +++ b/slips_files/core/input.py @@ -11,62 +11,62 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. +import datetime +import json +import os +import signal +import subprocess +import sys +import threading +import time # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Contact: eldraco@gmail.com, sebastian.garcia@agents.fel.cvut.cz, stratosphere@aic.fel.cvut.cz from pathlib import Path from re import split -import signal -import sys -import os -from slips_files.common.abstracts.core import ICore -import datetime + from watchdog.observers import Observer -from slips_files.core.helpers.filemonitor import FileEventHandler -from slips_files.common.imports import * -import time -import queue -import json -import threading -import subprocess +from slips_files.common.abstracts.core import ICore +from slips_files.common.imports import * +from slips_files.core.helpers.filemonitor import FileEventHandler SUPPORTED_LOGFILES = ( - 'conn', - 'dns', - 'http', - 'ssl', - 'ssh', - 'dhcp', - 'ftp', - 'smtp', - 'tunnel', - 'notice', - 'files', - 'arp', - 'software', - 'weird' + "conn", + "dns", + "http", + "ssl", + "ssh", + "dhcp", + "ftp", + "smtp", + "tunnel", + "notice", + "files", + "arp", + "software", + "weird", ) - # Input Process class Input(ICore): - """ A class process to run the process of the flows """ + """A class process to run the process of the flows""" + + name = "Input" - name = 'Input' def init( - self, - is_input_done: multiprocessing.Semaphore = None, - profiler_queue=None, - input_type=None, - input_information=None, - cli_packet_filter= None, - zeek_or_bro=None, - zeek_dir=None, - line_type=None, - is_profiler_done_event : multiprocessing.Event =None + self, + is_input_done: multiprocessing.Semaphore = None, + profiler_queue=None, + input_type=None, + input_information=None, + cli_packet_filter=None, + zeek_or_bro=None, + zeek_dir=None, + line_type=None, + is_profiler_done_event: multiprocessing.Event = None, ): self.input_type = input_type self.profiler_queue = profiler_queue @@ -97,16 +97,14 @@ def init( target=self.remove_old_zeek_files, daemon=True ) self.open_file_handlers = {} - self.c1 = self.db.subscribe('remove_old_files') - self.channels = {'remove_old_files': self.c1} + self.c1 = self.db.subscribe("remove_old_files") + self.channels = {"remove_old_files": self.c1} self.timeout = None # zeek rotated files to be deleted after a period of time self.to_be_deleted = [] - self.zeek_thread = threading.Thread( - target=self.run_zeek, - daemon=True - ) - # used to give the profiler the total amount of flows to read with the first flow only + self.zeek_thread = threading.Thread(target=self.run_zeek, daemon=True) + # used to give the profiler the total amount of flows to + # read with the first flow only self.is_first_flow = True # is set by the profiler to tell this proc that we it is done processing # the input process and shut down and close the profiler queue no issue @@ -120,10 +118,11 @@ def is_done_processing(self): # signal slips.py that this process is done # tell profiler that this process is # done and no more flows are arriving - self.print(f"Telling Profiler to stop because " - f"no more input is arriving.", - log_to_logfiles_only=True) - self.profiler_queue.put('stop') + self.print( + f"Telling Profiler to stop because " f"no more input is arriving.", + log_to_logfiles_only=True, + ) + self.profiler_queue.put("stop") self.print(f"Waiting for Profiler to stop.", log_to_logfiles_only=True) self.is_profiler_done_event.wait() self.print(f"Input is done processing.", log_to_logfiles_only=True) @@ -141,8 +140,10 @@ def read_configuration(self): def stop_queues(self): """Stops the profiler queue""" - # By default if a process is not the creator of the queue then on exit it - # will attempt to join the queue’s background thread. The process can call cancel_join_thread() to make join_thread() do nothing. + # By default if a process is not the creator of the queue then on + # exit it will attempt to join the queue’s background thread. The + # process can call cancel_join_thread() to make join_thread() + # do nothing. self.profiler_queue.cancel_join_thread() def read_nfdump_output(self) -> int: @@ -152,20 +153,17 @@ def read_nfdump_output(self) -> int: """ if not self.nfdump_output: # The nfdump command returned nothing - self.print('Error reading nfdump output ', 1, 3) + self.print("Error reading nfdump output ", 1, 3) else: self.total_flows = len(self.nfdump_output.splitlines()) - self.db.set_input_metadata({'total_flows': self.total_flows}) + self.db.set_input_metadata({"total_flows": self.total_flows}) for nfdump_line in self.nfdump_output.splitlines(): # this line is taken from stdout we need to remove whitespaces - nfdump_line.replace(' ', '') - ts = nfdump_line.split(',')[0] + nfdump_line.replace(" ", "") + ts = nfdump_line.split(",")[0] - line = { - 'type': 'nfdump', - 'data': nfdump_line - } + line = {"type": "nfdump", "data": nfdump_line} self.give_profiler(line) if self.testing: break @@ -177,10 +175,10 @@ def check_if_time_to_del_rotated_files(self): After a specific period (keep_rotated_files_for), slips deletes all rotated files Check if it's time to do so """ - if not hasattr(self, 'time_rotated'): + if not hasattr(self, "time_rotated"): return False - now = float(utils.convert_format(datetime.datetime.now(), 'unixtimestamp')) + now = float(utils.convert_format(datetime.datetime.now(), "unixtimestamp")) time_to_delete = now >= self.time_rotated + self.keep_rotated_files_for if time_to_delete: # getting here means that the rotated @@ -210,7 +208,7 @@ def get_file_handle(self, filename): except KeyError: # First time opening this file. try: - file_handler = open(filename, 'r') + file_handler = open(filename, "r") lock = threading.Lock() lock.acquire() self.open_file_handlers[filename] = file_handler @@ -227,7 +225,6 @@ def get_file_handle(self, filename): return False return file_handler - def get_ts_from_line(self, zeek_line: str): """ used only by zeek log files @@ -236,7 +233,7 @@ def get_ts_from_line(self, zeek_line: str): if self.is_zeek_tabs: # It is not JSON format. It is tab format line. nline = zeek_line - nline_list = nline.split('\t') if '\t' in nline else split(r'\s{2,}', nline) + nline_list = nline.split("\t") if "\t" in nline else split(r"\s{2,}", nline) timestamp = nline_list[0] else: try: @@ -245,7 +242,7 @@ def get_ts_from_line(self, zeek_line: str): return False, False # In some Zeek files there may not be a ts field # Like in some weird smb files - timestamp = nline.get('ts', 0) + timestamp = nline.get("ts", 0) try: timestamp = float(timestamp) except ValueError: @@ -278,7 +275,7 @@ def cache_nxt_line_in_file(self, filename: str): return False # Did the file end? - if not zeek_line or zeek_line.startswith('#'): + if not zeek_line or zeek_line.startswith("#"): # We reached the end of one of the files that we were reading. # Wait for more lines to come from another file return False @@ -287,13 +284,9 @@ def cache_nxt_line_in_file(self, filename: str): if not timestamp: return False - self.file_time[filename] = timestamp # Store the line in the cache - self.cache_lines[filename] = { - 'type': filename, - 'data': nline - } + self.cache_lines[filename] = {"type": filename, "data": nline} return True def reached_timeout(self) -> bool: @@ -301,9 +294,13 @@ def reached_timeout(self) -> bool: # it may mean that new lines are not arriving. Check if not self.cache_lines: # Verify that we didn't have any new lines in the - # last 10 seconds. Seems enough for any network to have ANY traffic - # Since we actually read something form any file, update the last time of read - diff = utils.get_time_diff(self.last_updated_file_time, datetime.datetime.now()) + # last 10 seconds. Seems enough for any network to have + # ANY traffic + # Since we actually read something form any file, update + # the last time of read + diff = utils.get_time_diff( + self.last_updated_file_time, datetime.datetime.now() + ) if diff >= self.bro_timeout: # It has been seconds without any file # being updated. So stop Zeek @@ -311,15 +308,17 @@ def reached_timeout(self) -> bool: return False def close_all_handles(self): - # We reach here after the break produced if no zeek files are being updated. + # We reach here after the break produced + # if no zeek files are being updated. # No more files to read. Close the files for file, handle in self.open_file_handlers.items(): - self.print(f'Closing file {file}', 2, 0) + self.print(f"Closing file {file}", 2, 0) handle.close() def get_earliest_line(self): """ - loops through all the caches lines and returns the line with the earliest ts + loops through all the caches lines and returns the line with the + earliest ts """ # Now read lines in order. The line with the earliest timestamp first files_sorted_by_ts = sorted(self.file_time, key=self.file_time.get) @@ -394,7 +393,7 @@ def read_zeek_files(self) -> int: def _make_gen(self, reader): """yeilds (64 kilobytes) at a time from the file""" while True: - b = reader(2 ** 16) + b = reader(2**16) if not b: break yield b @@ -408,12 +407,11 @@ def get_flows_number(self, file: str) -> int: # this method is the most efficient and accurate i found online # https://stackoverflow.com/a/68385697/11604069 - with open(file, "rb") as f: # counts the occurances of \n in a file count = sum(buf.count(b"\n") for buf in self._make_gen(f.raw.read)) - if hasattr(self, 'is_zeek_tabs') and self.is_zeek_tabs: + if hasattr(self, "is_zeek_tabs") and self.is_zeek_tabs: # subtract comment lines in zeek tab files, # they shouldn't be considered flows @@ -433,15 +431,13 @@ def read_zeek_folder(self): if growing_zeek_dir: # slips is given a dir that is growing i.e zeek dir running on an interface # don't stop zeek or slips - self.bro_timeout = float('inf') - + self.bro_timeout = float("inf") self.zeek_dir = self.given_path self.start_observer() - # if 1 file is zeek tabs the rest should be the same - if not hasattr(self, 'is_zeek_tabs'): + if not hasattr(self, "is_zeek_tabs"): full_path = os.path.join(self.given_path, os.listdir(self.given_path)[0]) self.is_zeek_tabs = self.is_zeek_tabs_file(full_path) @@ -472,7 +468,7 @@ def read_zeek_folder(self): return True self.total_flows = total_flows - self.db.set_input_metadata({'total_flows': total_flows}) + self.db.set_input_metadata({"total_flows": total_flows}) self.lines = self.read_zeek_files() self.print_lines_read() self.is_done_processing() @@ -480,87 +476,77 @@ def read_zeek_folder(self): def print_lines_read(self): self.print( - f'We read everything. No more input. ' - f'Stopping input process. Sent {self.lines} lines' + f"We read everything. No more input. " + f"Stopping input process. Sent {self.lines} lines" ) def stdin(self): """opens the stdin in read mode""" sys.stdin.close() - sys.stdin = os.fdopen(0, 'r') + sys.stdin = os.fdopen(0, "r") return sys.stdin - def read_from_stdin(self) -> bool: - self.print('Receiving flows from stdin.') + self.print("Receiving flows from stdin.") for line in self.stdin(): - if line == '\n': + if line == "\n": continue - if line == 'done': + if line == "done": break # slips supports reading zeek json conn.log only using stdin, # tabs aren't supported - if self.line_type == 'zeek': + if self.line_type == "zeek": try: line = json.loads(line) except json.decoder.JSONDecodeError: - self.print('Invalid json line') + self.print("Invalid json line") continue - line_info = { - 'type': 'stdin', - 'line_type': self.line_type, - 'data': line - } - self.print(f' > Sent Line: {line_info}', 0, 3) + line_info = {"type": "stdin", "line_type": self.line_type, "data": line} + self.print(f" > Sent Line: {line_info}", 0, 3) self.give_profiler(line_info) self.lines += 1 - self.print('Done reading 1 flow.\n ', 0, 3) + self.print("Done reading 1 flow.\n ", 0, 3) return True def handle_binetflow(self): # the number of flows returned by get_flows_number contains the header, so subtract that - self.total_flows = self.get_flows_number(self.given_path) -1 - self.db.set_input_metadata({'total_flows': self.total_flows}) + self.total_flows = self.get_flows_number(self.given_path) - 1 + self.db.set_input_metadata({"total_flows": self.total_flows}) self.lines = 0 with open(self.given_path) as file_stream: # read first line to determine the type of line, tab or comma separated t_line = file_stream.readline() - type_ = 'argus-tabs' if '\t' in t_line else 'argus' - line = { - 'type': type_, - 'data': t_line - } + type_ = "argus-tabs" if "\t" in t_line else "argus" + line = {"type": type_, "data": t_line} self.give_profiler(line) self.lines += 1 # go through the rest of the file for t_line in file_stream: - line = { - 'type': type_, - 'data': t_line - } + line = {"type": type_, "data": t_line} # argus files are either tab separated orr comma separated if len(t_line.strip()) != 0: self.give_profiler(line) self.lines += 1 - if self.testing: break + if self.testing: + break self.is_done_processing() return True def handle_suricata(self): self.total_flows = self.get_flows_number(self.given_path) - self.db.set_input_metadata({'total_flows': self.total_flows}) + self.db.set_input_metadata({"total_flows": self.total_flows}) with open(self.given_path) as file_stream: for t_line in file_stream: line = { - 'type': 'suricata', - 'data': t_line, + "type": "suricata", + "data": t_line, } - self.print(f' > Sent Line: {line}', 0, 3) + self.print(f" > Sent Line: {line}", 0, 3) if len(t_line.strip()) != 0: self.give_profiler(line) self.lines += 1 @@ -574,10 +560,10 @@ def is_zeek_tabs_file(self, filepath: str) -> bool: returns true if the given path is a zeek tab separated file :param filepath: full log file path with the .log extension """ - with open(filepath,'r') as f: + with open(filepath, "r") as f: line = f.readline() - if '\t' in line: + if "\t" in line: return True if line.startswith("#separator"): @@ -594,10 +580,9 @@ def handle_zeek_log_file(self): and conn.log flows given to slips through CYST unix socket. """ if ( - (not self.given_path.endswith(".log") - or self.is_ignored_file(self.given_path)) - and 'cyst' not in self.given_path.lower() - ): + not self.given_path.endswith(".log") + or self.is_ignored_file(self.given_path) + ) and "cyst" not in self.given_path.lower(): # unsupported file return False @@ -605,7 +590,7 @@ def handle_zeek_log_file(self): # in case of CYST flows, the given path is 'cyst' and there's no way to get the total flows self.is_zeek_tabs = self.is_zeek_tabs_file(self.given_path) total_flows = self.get_flows_number(self.given_path) - self.db.set_input_metadata({'total_flows': total_flows}) + self.db.set_input_metadata({"total_flows": total_flows}) self.total_flows = total_flows # Add log file to database @@ -621,38 +606,29 @@ def handle_zeek_log_file(self): return True def handle_nfdump(self): - command = f'nfdump -b -N -o csv -q -r {self.given_path}' + command = f"nfdump -b -N -o csv -q -r {self.given_path}" # Execute command result = subprocess.run(command.split(), stdout=subprocess.PIPE) # Get command output - self.nfdump_output = result.stdout.decode('utf-8') + self.nfdump_output = result.stdout.decode("utf-8") self.lines = self.read_nfdump_output() self.print_lines_read() self.is_done_processing() return True - def start_observer(self): # Now start the observer of new files. We need the observer because Zeek does not create all the files # at once, but when the traffic appears. That means that we need # some process to tell us which files to read in real time when they appear # Get the file eventhandler # We have to set event_handler and event_observer before running zeek. - event_handler = FileEventHandler( - self.zeek_dir, - self.input_type, - self.db - ) + event_handler = FileEventHandler(self.zeek_dir, self.input_type, self.db) # Create an observer self.event_observer = Observer() # Schedule the observer with the callback on the file handler - self.event_observer.schedule( - event_handler, self.zeek_dir, recursive=True - ) + self.event_observer.schedule(event_handler, self.zeek_dir, recursive=True) # monitor changes to whitelist - self.event_observer.schedule( - event_handler, 'config/', recursive=True - ) + self.event_observer.schedule(event_handler, "config/", recursive=True) # Start the observer self.event_observer.start() @@ -662,13 +638,13 @@ def handle_pcap_and_interface(self) -> int: # Create zeek_folder if does not exist. if not os.path.exists(self.zeek_dir): os.makedirs(self.zeek_dir) - self.print(f'Storing zeek log files in {self.zeek_dir}') + self.print(f"Storing zeek log files in {self.zeek_dir}") self.start_observer() - if self.input_type == 'interface': + if self.input_type == "interface": # We don't want to stop bro if we read from an interface - self.bro_timeout = float('inf') - elif self.input_type == 'pcap': + self.bro_timeout = float("inf") + elif self.input_type == "pcap": # This is for stopping the inputprocess # if bro does not receive any new line while reading a pcap self.bro_timeout = 30 @@ -684,16 +660,20 @@ def handle_pcap_and_interface(self) -> int: # Give Zeek some time to generate at least 1 file. time.sleep(3) - self.db.store_process_PID('Zeek', self.zeek_pid) - if not hasattr(self, 'is_zeek_tabs'): + self.db.store_process_PID("Zeek", self.zeek_pid) + if not hasattr(self, "is_zeek_tabs"): self.is_zeek_tabs = False self.lines = self.read_zeek_files() self.print_lines_read() self.is_done_processing() - connlog_path = os.path.join(self.zeek_dir, 'conn.log') + connlog_path = os.path.join(self.zeek_dir, "conn.log") - self.print(f"Number of zeek generated flows in conn.log: {self.get_flows_number(connlog_path)}", 2, 0) + self.print( + f"Number of zeek generated flows in conn.log: {self.get_flows_number(connlog_path)}", + 2, + 0, + ) self.stop_observer() return True @@ -714,15 +694,15 @@ def remove_old_zeek_files(self): """ while not self.should_stop(): # keep the rotated files for the period specified in slips.conf - if msg := self.get_msg('remove_old_files'): + if msg := self.get_msg("remove_old_files"): # this channel receives renamed zeek log files, we can safely delete them and close their handle - changed_files = json.loads(msg['data']) + changed_files = json.loads(msg["data"]) # for example the old log file should be ./zeek_files/dns.2022-05-11-14-43-20.log # new log file should be dns.log without the ts - old_log_file = changed_files['old_file'] - new_log_file = changed_files['new_file'] - new_logfile_without_path = new_log_file.split('/')[-1].split('.')[0] + old_log_file = changed_files["old_file"] + new_log_file = changed_files["new_file"] + new_logfile_without_path = new_log_file.split("/")[-1].split(".")[0] # ignored files have no open handle, so we should only delete them from disk if new_logfile_without_path not in SUPPORTED_LOGFILES: # just delete the old file @@ -745,7 +725,9 @@ def remove_old_zeek_files(self): pass # delete the old log file (the one with the ts) self.to_be_deleted.append(old_log_file) - self.time_rotated = float(utils.convert_format(datetime.datetime.now(), 'unixtimestamp')) + self.time_rotated = float( + utils.convert_format(datetime.datetime.now(), "unixtimestamp") + ) # os.remove(old_log_file) lock.release() @@ -762,10 +744,10 @@ def shutdown_gracefully(self): except: pass - if hasattr(self, 'open_file_handlers'): + if hasattr(self, "open_file_handlers"): self.close_all_handles() - if hasattr(self, 'zeek_pid'): + if hasattr(self, "zeek_pid"): # kill zeek manually if it started bc it's detached from this process and will never recv the sigint # also withoutt this, inputproc will never shutdown and will always remain in memory causing 1000 bugs in # proc_man:shutdown_gracefully() @@ -780,6 +762,7 @@ def run_zeek(self): """ This thread sets the correct zeek parameters and starts zeek """ + def detach_child(): """ Detach zeek from the parent process group(inputprocess), the child(zeek) @@ -791,13 +774,16 @@ def detach_child(): # rotation is disabled unless it's an interface rotation = [] - if self.input_type == 'interface': + if self.input_type == "interface": if self.enable_rotation: # how often to rotate zeek files? taken from slips.conf - rotation = ['-e', f"redef Log::default_rotation_interval = {self.rotation_period} ;"] - bro_parameter = ['-i', self.given_path] + rotation = [ + "-e", + f"redef Log::default_rotation_interval = {self.rotation_period} ;", + ] + bro_parameter = ["-i", self.given_path] - elif self.input_type == 'pcap': + elif self.input_type == "pcap": # Find if the pcap file name was absolute or relative given_path = self.given_path if not os.path.isabs(self.given_path): @@ -811,29 +797,27 @@ def detach_child(): # using a list of params instead of a str for storing the cmd # becaus ethe given path may contain spaces - bro_parameter = ['-r', given_path] - + bro_parameter = ["-r", given_path] # Run zeek on the pcap or interface. The redef is to have json files - zeek_scripts_dir = os.path.join(os.getcwd(), 'zeek-scripts') - packet_filter = ['-f ', self.packet_filter] if self.packet_filter else [] + zeek_scripts_dir = os.path.join(os.getcwd(), "zeek-scripts") + packet_filter = ["-f ", self.packet_filter] if self.packet_filter else [] # 'local' is removed from the command because it # loads policy/protocols/ssl/expiring-certs and # and policy/protocols/ssl/validate-certs and they have conflicts with our own # zeek-scripts/expiring-certs and validate-certs # we have our own copy pf local.zeek in __load__.zeek - command = [self.zeek_or_bro, '-C'] + command = [self.zeek_or_bro, "-C"] command += bro_parameter command += [ - f'tcp_inactivity_timeout={self.tcp_inactivity_timeout}mins', - 'tcp_attempt_delay=1min', - zeek_scripts_dir + f"tcp_inactivity_timeout={self.tcp_inactivity_timeout}mins", + "tcp_attempt_delay=1min", + zeek_scripts_dir, ] command += rotation command += packet_filter - self.print(f'Zeek command: {" ".join(command)}', 3, 0) zeek = subprocess.Popen( @@ -842,7 +826,7 @@ def detach_child(): stderr=subprocess.PIPE, stdin=subprocess.PIPE, cwd=self.zeek_dir, - start_new_session=True + start_new_session=True, ) # you have to get the pid before communicate() self.zeek_pid = zeek.pid @@ -851,7 +835,9 @@ def detach_child(): if out: print(f"Zeek: {out}") if error: - self.print(f"Zeek error. return code: {zeek.returncode} error:{error.strip()}") + self.print( + f"Zeek error. return code: {zeek.returncode} error:{error.strip()}" + ) def handle_cyst(self): """ @@ -861,35 +847,35 @@ def handle_cyst(self): # slips supports reading zeek json conn.log only using CYST # this type is passed here by slips.py, so in the future # to support more types, modify slips.py - if self.line_type != 'zeek': + if self.line_type != "zeek": return - channel = self.db.subscribe('new_module_flow') - self.channels.update({'new_module_flow': channel}) + channel = self.db.subscribe("new_module_flow") + self.channels.update({"new_module_flow": channel}) while not self.should_stop(): # the CYST module will send msgs to this channel when it read s a new flow from the CYST UDS # todo when to break? cyst should send something like stop? - msg = self.get_msg('new_module_flow') - if msg and msg['data'] == 'stop_process': + msg = self.get_msg("new_module_flow") + if msg and msg["data"] == "stop_process": self.shutdown_gracefully() return True - if msg := self.get_msg('new_module_flow'): + if msg := self.get_msg("new_module_flow"): msg: str = msg["data"] msg = json.loads(msg) - flow = msg['flow'] - src_module = msg['module'] + flow = msg["flow"] + src_module = msg["module"] line_info = { - 'type': 'external_module', - 'module': src_module, - 'line_type': self.line_type, - 'data': flow + "type": "external_module", + "module": src_module, + "line_type": self.line_type, + "data": flow, } - self.print(f' > Sent Line: {line_info}', 0, 3) + self.print(f" > Sent Line: {line_info}", 0, 3) self.give_profiler(line_info) self.lines += 1 - self.print('Done reading 1 CYST flow.\n ', 0, 3) + self.print("Done reading 1 CYST flow.\n ", 0, 3) time.sleep(2) self.is_done_processing() @@ -899,28 +885,26 @@ def give_profiler(self, line): sends the given txt/dict to the profilerqueue for process sends the total amount of flows to process with the first flow only """ - to_send = { - 'line': line, - 'input_type': self.input_type - } + to_send = {"line": line, "input_type": self.input_type} # send the total flows slips is going to read to the profiler - # the profiler will give it to output() for initialising the progress bar - # in case of interface and pcaps, we don't know the total_flows beforehand + # the profiler will give it to output() for initialising + # the progress bar in case of interface and pcaps, we don't know + # the total_flows beforehand # and we don't print a pbar - if self.is_first_flow and hasattr(self, 'total_flows'): + if self.is_first_flow and hasattr(self, "total_flows"): self.is_first_flow = False - to_send.update({ - 'total_flows': self.total_flows, - }) - # when the queue is full, the default behaviour is to block if necessary until a free slot is available + to_send.update( + { + "total_flows": self.total_flows, + } + ) + # when the queue is full, the default behaviour is to block + # if necessary until a free slot is available self.profiler_queue.put(to_send) def main(self): utils.drop_root_privs() - if ( - '-i' in sys.argv - or self.db.is_growing_zeek_dir() - ): + if "-i" in sys.argv or self.db.is_growing_zeek_dir(): # this thread should be started from run() to get the PID of inputprocess and have shared variables # if it started from __init__() it will have the PID of slips.py therefore, # any changes made to the shared variables in inputprocess will not appear in the thread @@ -928,29 +912,26 @@ def main(self): self.remover_thread.start() input_handlers = { - 'stdin': self.read_from_stdin, - 'zeek_folder': self.read_zeek_folder, - 'zeek_log_file': self.handle_zeek_log_file, - 'nfdump': self.handle_nfdump, - 'binetflow': self.handle_binetflow, - 'binetflow-tabs': self.handle_binetflow, - 'pcap': self.handle_pcap_and_interface, - 'interface': self.handle_pcap_and_interface, - 'suricata': self.handle_suricata, - 'CYST': self.handle_cyst, + "stdin": self.read_from_stdin, + "zeek_folder": self.read_zeek_folder, + "zeek_log_file": self.handle_zeek_log_file, + "nfdump": self.handle_nfdump, + "binetflow": self.handle_binetflow, + "binetflow-tabs": self.handle_binetflow, + "pcap": self.handle_pcap_and_interface, + "interface": self.handle_pcap_and_interface, + "suricata": self.handle_suricata, + "CYST": self.handle_cyst, } try: # Process the file that was given input_handlers[self.input_type]() except KeyError: - self.print( - f'Unrecognized file type "{self.input_type}". Stopping.' - ) + self.print(f'Unrecognized file type "{self.input_type}". Stopping.') return False # no logic should be put here # because some of the above handlers never return # e.g. interface, stdin, cyst etc. return 1 - diff --git a/slips_files/core/input_profilers/argus.py b/slips_files/core/input_profilers/argus.py index a08e950a8..9a2cff195 100644 --- a/slips_files/core/input_profilers/argus.py +++ b/slips_files/core/input_profilers/argus.py @@ -133,5 +133,5 @@ def define_columns(self, new_line: dict) -> dict: self.print( f'\tProblem in define_columns() line {exception_line}', 0, 1 ) - self.print(traceback.print_exc(),0,1) + self.print(traceback.print_stack(),0,1) sys.exit(1) diff --git a/slips_files/core/output.py b/slips_files/core/output.py index 3500ae92a..9af1003c2 100644 --- a/slips_files/core/output.py +++ b/slips_files/core/output.py @@ -1,6 +1,5 @@ # Stratosphere Linux IPS. A machine-learning Intrusion Detection System # Copyright (C) 2021 Sebastian Garcia - # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 @@ -15,107 +14,93 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Contact: eldraco@gmail.com, sebastian.garcia@agents.fel.cvut.cz, stratosphere@aic.fel.cvut.cz -from slips_files.common.abstracts.observer import IObserver -from slips_files.common.parsers.config_parser import ConfigParser -from slips_files.core.helpers.progress_bar import PBar -from slips_files.common.slips_utils import utils -from slips_files.common.style import red +import traceback from threading import Lock +from multiprocessing.connection import Connection +from multiprocessing import Event import sys import io -import time from pathlib import Path from datetime import datetime import os -from multiprocessing import Pipe, Manager + +from slips_files.common.abstracts.observer import IObserver +from slips_files.common.parsers.config_parser import ConfigParser +from slips_files.common.slips_utils import utils +from slips_files.common.style import red class Output(IObserver): """ - A class to process the output of everything Slips need. Manages all the output - If any Slips module or process needs to output anything to screen, or logs, - it should use always the output queue. Then this output class will handle how to deal with it + A class to process the output of everything Slips need. + Manages all the output + If any Slips module or process needs to output anything to screen, + or logs, it should use always this process. + Then this output class will handle how to deal with it """ - + name = 'Output' - _obj = None slips_logfile_lock = Lock() errors_logfile_lock = Lock() cli_lock = Lock() - has_pbar = False - - def __new__( - cls, - verbose=1, - debug=0, - stdout='', - stderr='output/errors.log', - slips_logfile='output/slips.log', - slips_mode='interactive', - input_type=False + + def __init__(self, + verbose = 1, + debug = 0, + stdout = '', + stderr = 'output/errors.log', + slips_logfile = 'output/slips.log', + input_type = False, + sender_pipe: Connection = None, + has_pbar: bool = False, + pbar_finished: Event = None, + stop_daemon: bool = None, ): - if not cls._obj: - cls._obj = super().__new__(cls) - # when running slips using -e , this var is set and we only - # print all msgs with debug lvl less than it - cls.verbose = verbose - cls.debug = debug - cls.input_type = input_type - ####### create the log files - cls._read_configuration() - cls.errors_logfile = stderr - cls.slips_logfile = slips_logfile - cls.create_logfile(cls.errors_logfile) - cls.create_logfile(cls.slips_logfile) - utils.change_logfiles_ownership(cls.errors_logfile, cls.UID, cls.GID) - utils.change_logfiles_ownership(cls.slips_logfile, cls.UID, cls.GID) - cls.stdout = stdout - if stdout != '': - cls.change_stdout() - # Pipe(False) means the pipe is unidirectional. - # aka only msgs can go from output -> pbar and not vice versa - # recv_pipe used only for receiving, - # send_pipe use donly for sending - cls.recv_pipe, cls.send_pipe = Pipe(False) - # using mp manager to be able to change this value - # from the PBar class and have it changed here - cls.slips_mode = slips_mode - - cls.manager = Manager() - cls.has_pbar = cls.manager.Value("has_pbar", False) - - cls.pbar = PBar( - cls.recv_pipe, - cls.has_pbar, - cls.slips_mode, - cls.input_type, - cls.stdout + super().__init__() + # when running slips using -e , this var is set and we only + # print all msgs with debug lvl less than it + self.verbose = verbose + self.debug = debug + self.input_type = input_type + self.has_pbar = has_pbar + self.pbar_finished: Event = pbar_finished + self.sender_pipe = sender_pipe + self.stop_daemon = stop_daemon + self.errors_logfile = stderr + self.slips_logfile = slips_logfile + # if we're using -S, no need to init all the logfiles + # we just need an instance of this class to be able + # to start the db from the daemon class + if not stop_daemon: + self._read_configuration() + + self.create_logfile(self.errors_logfile) + self.log_branch_info(self.errors_logfile) + self.create_logfile(self.slips_logfile) + self.log_branch_info(self.slips_logfile) + + utils.change_logfiles_ownership( + self.errors_logfile, self.UID, self.GID ) - cls.pbar.start() - - if cls.verbose > 2: - print(f'Verbosity: {cls.verbose}. Debugging: {cls.debug}') - - - cls.done_reading_flows = False - # we update the stats printed by slips every 5seconds - # this is the last time the stats was printed - cls.last_updated_stats_time = float("-inf") - - - return cls._obj + utils.change_logfiles_ownership( + self.slips_logfile, self.UID, self.GID + ) + self.stdout = stdout + if stdout != '': + self.change_stdout() + + if self.verbose > 2: + print(f'Verbosity: {self.verbose}. Debugging: {self.debug}') - @classmethod - def _read_configuration(cls): + def _read_configuration(self): conf = ConfigParser() - cls.printable_twid_width = conf.get_tw_width() - cls.GID = conf.get_GID() - cls.UID = conf.get_UID() + self.printable_twid_width = conf.get_tw_width() + self.GID = conf.get_GID() + self.UID = conf.get_UID() - @classmethod - def log_branch_info(cls, logfile: str): + def log_branch_info(self, logfile: str): """ logs the branch and commit to the given logfile """ @@ -136,8 +121,7 @@ def log_branch_info(cls, logfile: str): with open(logfile, 'a') as f: f.write(f'Using {git_info} - {now}\n\n') - @classmethod - def create_logfile(cls, path): + def create_logfile(self, path): """ creates slips.log and errors.log if they don't exist """ @@ -147,7 +131,7 @@ def create_logfile(cls, path): p = Path(os.path.dirname(path)) p.mkdir(parents=True, exist_ok=True) open(path, 'w').close() - cls.log_branch_info(path) + def log_line(self, msg: dict): @@ -171,20 +155,23 @@ def log_line(self, msg: dict): self.slips_logfile_lock.release() - @classmethod - def change_stdout(cls): + def change_stdout(self): """ to be able to print the stats to the output file """ # io.TextIOWrapper creates a file object of this file - # Pass 0 to open() to switch output buffering off (only allowed in binary mode) - # write_through= True, to flush the buffer to disk, from there the file can read it. - # without it, the file writer keeps the information in a local buffer that's not accessible to the file. - sys.stdout = io.TextIOWrapper( - open(cls.stdout, 'wb', 0), + # Pass 0 to open() to switch output buffering off + # (only allowed in binary mode) + # write_through= True, to flush the buffer to disk, from there the + # file can read it. + # without it, the file writer keeps the information in a local buffer + # that's not accessible to the file. + stdout = io.TextIOWrapper( + open(self.stdout, 'wb', 0), write_through=True ) - return + sys.stdout = stdout + return stdout def print(self, sender: str, txt: str, end='\n'): """ @@ -199,13 +186,14 @@ def print(self, sender: str, txt: str, end='\n'): else: to_print = txt - if self.has_pbar.value: + if self.has_pbar and not self.is_pbar_finished(): self.tell_pbar({ 'event': 'print', 'txt': to_print }) else: print(to_print, end=end) + self.cli_lock.release() @@ -226,11 +214,13 @@ def handle_printing_stats(self, stats: str): """ slips prints the stats as a pbar postfix, or in a separate line if pbar isn't supported - this method handles the 2 cases depending on the availability of the pbar + this method handles the 2 cases depending on the availability + of the pbar """ - # if we're done reading flows, aka pbar reached 100% or we dont have a pbar + # if we're done reading flows, aka pbar reached 100% or we dont + # have a pbar # we print the stats in a new line, instead of next to the pbar - if self.has_pbar.value: + if self.has_pbar and not self.is_pbar_finished(): self.tell_pbar({ 'event': 'update_stats', 'stats': stats @@ -255,9 +245,11 @@ def enough_debug(self, debug: int): def output_line(self, msg: dict): """ - Prints to terminal and logfiles depending on the debug and verbose levels + Prints to terminal and logfiles depending on the debug and verbose + levels """ - verbose, debug = msg.get('verbose', self.verbose), msg.get('debug', self.debug) + verbose = msg.get('verbose', self.verbose) + debug = msg.get('debug', self.debug) sender, txt = msg['from'], msg['txt'] # if debug level is 3 make it red @@ -274,33 +266,26 @@ def output_line(self, msg: dict): # when printing started processes, don't print a sender if 'Start' in txt: sender = '' - self.print(sender, txt) self.log_line(msg) # if the line is an error and we're running slips without -e 1 , # we should log the error to output/errors.log - # make sure the msg is an error. debug_level==1 is the one printing errors + # make sure the msg is an error. debug_level==1 is the one printing + # errors if debug == 1: self.log_error(msg) - def shutdown_gracefully(self): - """closes all communications with the pbar process""" - self.manager.shutdown() - self.send_pipe.close() - self.recv_pipe.close() - if hasattr(self, 'pbar'): - self.pbar.join(3) - - def tell_pbar(self, msg: dict): """ writes to the pbar pipe. anything sent by this method will be received by the pbar class """ - self.send_pipe.send(msg) - - + self.sender_pipe.send(msg) + + def is_pbar_finished(self )-> bool: + return self.pbar_finished.is_set() + def update(self, msg: dict): """ gets called whenever any module need to print something @@ -312,7 +297,8 @@ def update(self, msg: dict): txt: text to log to the logfiles and/or the cli bar_info: { input_type: only given when we send bar:'init', - specifies the type of the input file given to slips + specifies the type of the input file + given to slips eg zeek, argus, etc total_flows: int, } @@ -324,12 +310,14 @@ def update(self, msg: dict): 'total_flows': msg['bar_info']['total_flows'], }) - elif 'update' in msg.get('bar', ''): + elif ( + 'update' in msg.get('bar', '') + and not self.is_pbar_finished() + ): # if pbar wasn't supported, inputproc won't send update msgs self.tell_pbar({ 'event': 'update_bar', }) - else: # output to terminal and logs or logs only? if msg.get('log_to_logfiles_only', False): @@ -339,3 +327,4 @@ def update(self, msg: dict): self.output_line(msg) except Exception as e: print(f"Error in output.py: {e}") + print(traceback.print_stack()) diff --git a/slips_files/core/profiler.py b/slips_files/core/profiler.py index 105b8e92f..2cfac61e1 100644 --- a/slips_files/core/profiler.py +++ b/slips_files/core/profiler.py @@ -61,7 +61,8 @@ class Profiler(ICore): def init(self, is_profiler_done: multiprocessing.Semaphore = None, profiler_queue=None, - is_profiler_done_event : multiprocessing.Event =None + is_profiler_done_event : multiprocessing.Event =None, + has_pbar: bool =False, ): # when profiler is done processing, it releases this semaphore, # that's how the process_manager knows it's done @@ -74,17 +75,20 @@ def init(self, self.input_type = False self.whitelisted_flows_ctr = 0 self.rec_lines = 0 + self.has_pbar = has_pbar self.whitelist = Whitelist(self.logger, self.db) # Read the configuration self.read_configuration() self.symbol = SymbolHandler(self.logger, self.db) - # there has to be a timeout or it will wait forever and never receive a new line + # there has to be a timeout or it will wait forever and never + # receive a new line self.timeout = 0.0000001 self.c1 = self.db.subscribe('reload_whitelist') self.channels = { 'reload_whitelist': self.c1, } - # is set by this proc to tell input proc that we are dne processing and it can exit no issue + # is set by this proc to tell input proc that we are done + # processing and it can exit no issue self.is_profiler_done_event = is_profiler_done_event @@ -98,10 +102,13 @@ def read_configuration(self): def convert_starttime_to_epoch(self): try: - self.flow.starttime = utils.convert_format(self.flow.starttime, 'unixtimestamp') + self.flow.starttime = utils.convert_format( + self.flow.starttime, + 'unixtimestamp') except ValueError: self.print(f'We can not recognize time format of ' - f'self.flow.starttime: {self.flow.starttime}', 0, 1) + f'self.flow.starttime: {self.flow.starttime}', + 0, 1) def get_rev_profile(self): """ @@ -112,22 +119,25 @@ def get_rev_profile(self): # some flows don't have a daddr like software.log flows return False, False - rev_profileid = self.db.getProfileIdFromIP(self.daddr_as_obj) + rev_profileid: str = self.db.get_profileid_from_ip(self.flow.daddr) if not rev_profileid: # the profileid is not present in the db, create it rev_profileid = f'profile_{self.flow.daddr}' - self.db.addProfile(rev_profileid, self.flow.starttime, self.width) + self.db.add_profile(rev_profileid, self.flow.starttime, self.width) - # in the database, Find the id of the tw where the flow belongs. - rev_twid = self.db.get_timewindow(self.flow.starttime, rev_profileid) + # in the database, Find and register the id of the tw where the flow + # belongs. + rev_twid: str = self.db.get_timewindow( + self.flow.starttime, rev_profileid) return rev_profileid, rev_twid def add_flow_to_profile(self): """ - This is the main function that takes the columns of a flow and does all the magic to - convert it into a working data in our system. - It includes checking if the profile exists and how to put the flow correctly. - It interprets each column + This is the main function that takes the columns of a flow + and does all the magic to convert it into a working data in our + system. + It includes checking if the profile exists and how to put + the flow correctly. It interprets each column """ # try: if not hasattr(self, 'flow'): @@ -159,15 +169,18 @@ def add_flow_to_profile(self): return True # 5th. Store the data according to the paremeters - # Now that we have the profileid and twid, add the data from the flow in this tw for this profile - self.print(f'Storing data in the profile: {self.profileid}', 3, 0) + # Now that we have the profileid and twid, add the data from the flow + # in this tw for this profile + self.print(f'Storing data in the profile: {self.profileid}', + 3, 0) self.convert_starttime_to_epoch() - # For this 'forward' profile, find the id in the database of the tw where the flow belongs. + # For this 'forward' profile, find the id in the + # database of the tw where the flow belongs. self.twid = self.db.get_timewindow(self.flow.starttime, self.profileid) self.flow_parser.twid = self.twid # Create profiles for all ips we see - self.db.addProfile(self.profileid, self.flow.starttime, self.width) + self.db.add_profile(self.profileid, self.flow.starttime, self.width) self.store_features_going_out() if self.analysis_direction == 'all': self.handle_in_flows() @@ -217,11 +230,15 @@ def store_features_going_out(self): def store_features_going_in(self, profileid: str, twid: str): """ - If we have the all direction set , slips creates profiles for each IP, the src and dst - store features going our adds the conn in the profileA from IP A -> IP B in the db - this function stores the reverse of this connection. adds the conn in the profileB from IP B <- IP A + If we have the all direction set , slips creates profiles + for each IP, the src and dst + store features going our adds the conn in the profileA from + IP A -> IP B in the db + this function stores the reverse of this connection. adds + the conn in the profileB from IP B <- IP A """ - # self.print(f'Storing features going in for profile {profileid} and tw {twid}') + # self.print(f'Storing features going in for profile + # {profileid} and tw {twid}') if ( 'flow' not in self.flow.type_ and 'conn' not in self.flow.type_ @@ -295,21 +312,29 @@ def define_separator(self, line: dict, input_type: str): def shutdown_gracefully(self): - self.print(f"Stopping. Total lines read: {self.rec_lines}", log_to_logfiles_only=True) - # By default if a process(profiler) is not the creator of the queue(profiler_queue) then on + self.print(f"Stopping. Total lines read: {self.rec_lines}", + log_to_logfiles_only=True) + # By default if a process(profiler) is not the creator of + # the queue(profiler_queue) then on # exit it will attempt to join the queue’s background thread. # this causes a deadlock # to avoid this behaviour we should call cancel_join_thread # self.profiler_queue.cancel_join_thread() def is_done_processing(self): - """is called to mark this process as done processing so slips.py would know when to terminate""" + """ + is called to mark this process as done processing so + slips.py would know when to terminate + """ # signal slips.py that this process is done - self.print(f"Marking Profiler as done processing.", log_to_logfiles_only=True) + self.print(f"Marking Profiler as done processing.", + log_to_logfiles_only=True) self.done_processing.release() - self.print(f"Profiler is done processing.", log_to_logfiles_only=True) + self.print(f"Profiler is done processing.", + log_to_logfiles_only=True) self.is_profiler_done_event.set() - self.print(f"Profiler is done telling input.py that it's done processing.", log_to_logfiles_only=True) + self.print(f"Profiler is done telling input.py " + f"that it's done processing.", log_to_logfiles_only=True) def check_for_stop_msg(self, msg: str)-> bool: @@ -321,8 +346,8 @@ def check_for_stop_msg(self, msg: str)-> bool: if msg != 'stop': return False - - self.print(f"Stopping profiler process. Number of whitelisted conn flows: " + self.print(f"Stopping profiler process. Number of whitelisted " + f"conn flows: " f"{self.whitelisted_flows_ctr}", 2, 0) self.shutdown_gracefully() @@ -333,22 +358,10 @@ def check_for_stop_msg(self, msg: str)-> bool: self.is_done_processing() return True - def init_pbar(self, input_type: str, total_flows:int): + def init_pbar(self, total_flows:int): """ sends the output.py a msg with the pbar details for initialization """ - # don't init the pbar when given the following - # input types because we don't - # know the total flows beforehand - if ( - input_type in ('pcap', 'interface', 'stdin') - or '-t' in sys.argv - or '--testing' in sys.argv - ): - # pbar not supported - self.supported_pbar = False - return - # Find the number of flows we're going to receive of input received self.notify_observers({ 'bar': 'init', @@ -395,7 +408,8 @@ def main(self): if not self.input_type: # Find the type of input received self.input_type = self.define_separator(line, input_type) - self.init_pbar(input_type, total_flows) + if self.has_pbar: + self.init_pbar(total_flows) # What type of input do we have? if not self.input_type: @@ -403,7 +417,6 @@ def main(self): self.print("Can't determine input type.") return False - # only create the input obj once, # the rest of the flows will use the same input handler if not hasattr(self, 'input'): @@ -414,9 +427,9 @@ def main(self): if self.flow: self.add_flow_to_profile() - - # now that one flow is processed tell output.py to update the bar - if self.supported_pbar: + # now that one flow is processed tell output.py + # to update the bar + if self.has_pbar: self.notify_observers({'bar': 'update'}) # listen on this channel in case whitelist.conf is changed, diff --git a/tests/integration_tests/test_dataset.py b/tests/integration_tests/test_dataset.py index c78411384..7e0f657f4 100644 --- a/tests/integration_tests/test_dataset.py +++ b/tests/integration_tests/test_dataset.py @@ -66,21 +66,21 @@ def test_pcap( ( 'dataset/test4-malicious.binetflow', 2, - 'horizontal port scan to port 81', + 'Horizontal port scan to port 81', 'test4/', 6662, ), ( 'dataset/test3-mixed.binetflow', 20, - 'horizontal port scan to port 3389', + 'Horizontal port scan to port 3389/TCP', 'test3/', 6663, ), ( 'dataset/test2-malicious.binetflow', 1, - 'Detected Long Connection.', + 'Detected Long Connection.', 'test2/', 6664, ), @@ -157,9 +157,9 @@ def test_binetflow( [ 'bad SMTP login to 80.75.42.226', 'SMTP login bruteforce to 80.75.42.226. 3 logins in 10 seconds', - 'multiple empty HTTP connections to bing.com', - 'suspicious user-agent', - 'download of an executable', + 'Multiple empty HTTP connections to google.com', + 'Suspicious user-agent:', + 'Download of an executable', 'GRE tunnel' ], 'test14-malicious-zeek-dir/', diff --git a/tests/integration_tests/test_portscans.py b/tests/integration_tests/test_portscans.py index 159c30f63..7fe4850e8 100644 --- a/tests/integration_tests/test_portscans.py +++ b/tests/integration_tests/test_portscans.py @@ -32,7 +32,7 @@ def test_horizontal(path, output_dir, redis_port): """ output_dir = create_output_dir(output_dir) - expected_evidence = 'horizontal port scan to port 80/TCP. From 10.0.2.112' + expected_evidence = 'Horizontal port scan to port 80/TCP. From 10.0.2.112' output_file = os.path.join(output_dir, 'slips_output.txt') command = f'./slips.py -e 1 -t -f {path} -o {output_dir} -P {redis_port} > {output_file} 2>&1' diff --git a/tests/module_factory.py b/tests/module_factory.py index 75c72ba51..dcbb5e0a7 100644 --- a/tests/module_factory.py +++ b/tests/module_factory.py @@ -79,25 +79,25 @@ def create_main_obj(self, input_information): return main - def create_http_analyzer_obj(self, mock_rdb): + def create_http_analyzer_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): http_analyzer = HTTPAnalyzer(self.logger, 'dummy_output_dir', 6379, self.dummy_termination_event) - http_analyzer.db.rdb = mock_rdb + http_analyzer.db.rdb = mock_db # override the self.print function to avoid broken pipes http_analyzer.print = do_nothing return http_analyzer - def create_virustotal_obj(self, mock_rdb): + def create_virustotal_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): virustotal = VT(self.logger, 'dummy_output_dir', 6379, self.dummy_termination_event) - virustotal.db.rdb = mock_rdb + virustotal.db.rdb = mock_db # override the self.print function to avoid broken pipes virustotal.print = do_nothing @@ -107,44 +107,44 @@ def create_virustotal_obj(self, mock_rdb): ) return virustotal - def create_arp_obj(self, mock_rdb): + def create_arp_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): arp = ARP(self.logger, 'dummy_output_dir', 6379, self.dummy_termination_event ) - arp.db.rdb = mock_rdb + arp.db.rdb = mock_db # override the self.print function to avoid broken pipes arp.print = do_nothing return arp - def create_blocking_obj(self, mock_rdb): + def create_blocking_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): blocking = Blocking(self.logger, 'dummy_output_dir', 6379, self.dummy_termination_event) - blocking.db.rdb = mock_rdb + blocking.db.rdb = mock_db # override the print function to avoid broken pipes blocking.print = do_nothing return blocking - def create_flowalerts_obj(self, mock_rdb): + def create_flowalerts_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): flowalerts = FlowAlerts(self.logger, 'dummy_output_dir', 6379, self.dummy_termination_event) - flowalerts.db.rdb = mock_rdb + flowalerts.db.rdb = mock_db # override the self.print function to avoid broken pipes flowalerts.print = do_nothing return flowalerts def create_inputProcess_obj( - self, input_information, input_type, mock_rdb, line_type=False + self, input_information, input_type, mock_db, line_type=False ): zeek_tmp_dir = os.path.join(os.getcwd(), 'zeek_dir_for_testing' ) @@ -165,7 +165,7 @@ def create_inputProcess_obj( line_type=line_type, is_profiler_done_event=self.dummy_termination_event, ) - inputProcess.db.rdb = mock_rdb + inputProcess.db.rdb = mock_db inputProcess.is_done_processing = do_nothing inputProcess.bro_timeout = 1 # override the print function to avoid broken pipes @@ -176,22 +176,22 @@ def create_inputProcess_obj( return inputProcess - def create_ip_info_obj(self, mock_rdb): + def create_ip_info_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): ip_info = IPInfo(self.logger, 'dummy_output_dir', 6379, self.dummy_termination_event ) - ip_info.db.rdb = mock_rdb + ip_info.db.rdb = mock_db # override the self.print function to avoid broken pipes ip_info.print = do_nothing return ip_info - def create_asn_obj(self, db): - return ASN(db) + def create_asn_obj(self, mock_db): + return ASN(mock_db) - def create_leak_detector_obj(self, mock_rdb): + def create_leak_detector_obj(self, mock_db): # this file will be used for storing the module output # and deleted when the tests are done test_pcap = 'dataset/test7-malicious.pcap' @@ -202,7 +202,7 @@ def create_leak_detector_obj(self, mock_rdb): 'dummy_output_dir', 6379, self.dummy_termination_event) - leak_detector.db.rdb = mock_rdb + leak_detector.db.rdb = mock_db # override the self.print function to avoid broken pipes leak_detector.print = do_nothing # this is the path containing 1 yara rule for testing, it matches every pcap @@ -212,9 +212,9 @@ def create_leak_detector_obj(self, mock_rdb): return leak_detector - def create_profiler_obj(self): + def create_profiler_obj(self, mock_db): dummy_semaphore = Semaphore(0) - profilerProcess = Profiler( + profiler = Profiler( self.logger, 'output/', 6379, @@ -225,9 +225,10 @@ def create_profiler_obj(self): ) # override the self.print function to avoid broken pipes - profilerProcess.print = do_nothing - profilerProcess.whitelist_path = 'tests/test_whitelist.conf' - return profilerProcess + profiler.print = do_nothing + profiler.whitelist_path = 'tests/test_whitelist.conf' + profiler.db = mock_db + return profiler def create_redis_manager_obj(self, main): return RedisManager(main) @@ -238,35 +239,35 @@ def create_process_manager_obj(self): def create_utils_obj(self): return utils - def create_threatintel_obj(self, mock_rdb): + def create_threatintel_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): threatintel = ThreatIntel(self.logger, 'dummy_output_dir', 6379, self.dummy_termination_event) - threatintel.db.rdb = mock_rdb + threatintel.db = mock_db # override the self.print function to avoid broken pipes threatintel.print = do_nothing return threatintel - def create_update_manager_obj(self, mock_rdb): + def create_update_manager_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): update_manager = UpdateManager(self.logger, 'dummy_output_dir', 6379, self.dummy_termination_event) - update_manager.db.rdb = mock_rdb + update_manager.db.rdb = mock_db # override the self.print function to avoid broken pipes update_manager.print = do_nothing return update_manager - def create_whitelist_obj(self, mock_rdb): + def create_whitelist_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): - whitelist = Whitelist(self.logger, mock_rdb) - whitelist.db.rdb = mock_rdb + whitelist = Whitelist(self.logger, mock_db) + whitelist.db.rdb = mock_db # override the self.print function to avoid broken pipes whitelist.print = do_nothing @@ -274,18 +275,18 @@ def create_whitelist_obj(self, mock_rdb): return whitelist - def create_flow_handler_obj(self, flow ,mock_rdb): + def create_flow_handler_obj(self, flow ,mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): - symbol = SymbolHandler(self.logger, mock_rdb) - flow_handler = FlowHandler(mock_rdb, symbol, flow) + symbol = SymbolHandler(self.logger, mock_db) + flow_handler = FlowHandler(mock_db, symbol, flow) return flow_handler - def create_horizontal_portscan_obj(self, mock_rdb): + def create_horizontal_portscan_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): - horizontal_ps = HorizontalPortscan(mock_rdb) + horizontal_ps = HorizontalPortscan(mock_db) return horizontal_ps - def create_vertical_portscan_obj(self, mock_rdb): + def create_vertical_portscan_obj(self, mock_db): with patch.object(DBManager, 'create_sqlite_db', return_value=Mock()): - vertical_ps = VerticalPortscan(mock_rdb) + vertical_ps = VerticalPortscan(mock_db) return vertical_ps \ No newline at end of file diff --git a/tests/run_all_tests.sh b/tests/run_all_tests.sh index e926393db..43f713852 100755 --- a/tests/run_all_tests.sh +++ b/tests/run_all_tests.sh @@ -6,14 +6,11 @@ printf "0" | ./slips.py -k # run all unit tests, -n *5 means distribute tests on 5 different process # -s to see print statements as they are executed -python3 -m pytest tests/ --ignore="tests/test_daemon.py" --ignore="tests/test_database.py" --ignore="tests/integration_tests" -n 7 -p no:warnings -vvvv -s --full-trace +python3 -m pytest tests/ --ignore="tests/test_daemon.py" --ignore="tests/test_database.py" --ignore="tests/integration_tests" -n 7 -p no:warnings -vvvv -s -## run db and daemon tests serially/using 1 worker +## run db tests serially/using 1 worker python3 -m pytest tests/test_database.py -p no:warnings -vvvv -s -# running serially because slips only supports running 1 daemon at a time -python3 -m pytest tests/test_daemon.py -p no:warnings -vvvv -s - # Close all redis-servers opened by the unit tests python3 tests/destrctor.py @@ -22,7 +19,7 @@ python3 tests/destrctor.py # close all open redis servers printf "0" | ./slips.py -k -# + # the command to run dataset tests is separated from the rest because it takes so much time, # so it's better to know and fix the failing unit tests from the above # command before running the dataset tests diff --git a/tests/test_arp.py b/tests/test_arp.py index eb85c66ec..fd7a52d03 100644 --- a/tests/test_arp.py +++ b/tests/test_arp.py @@ -9,8 +9,10 @@ # check_arp_scan is tested in test_dataset.py, check arp-only unit test -def test_check_dstip_outside_localnet(mock_rdb): - ARP = ModuleFactory().create_arp_obj(mock_rdb) +def test_check_dstip_outside_localnet( + mock_db + ): + ARP = ModuleFactory().create_arp_obj(mock_db) daddr = '1.1.1.1' uid = '1234' saddr = '192.168.1.1' @@ -20,8 +22,10 @@ def test_check_dstip_outside_localnet(mock_rdb): ) -def test_detect_unsolicited_arp(mock_rdb): - ARP = ModuleFactory().create_arp_obj(mock_rdb) +def test_detect_unsolicited_arp( + mock_db + ): + ARP = ModuleFactory().create_arp_obj(mock_db) uid = '1234' ts = '1632214645.783595' dst_mac = 'ff:ff:ff:ff:ff:ff' @@ -33,8 +37,10 @@ def test_detect_unsolicited_arp(mock_rdb): ) -def test_detect_MITM_ARP_attack(mock_rdb): - ARP = ModuleFactory().create_arp_obj(mock_rdb) +def test_detect_MITM_ARP_attack( + mock_db + ): + ARP = ModuleFactory().create_arp_obj(mock_db) # add a mac addr to this profile src_mac = '2e:a4:18:f8:3d:02' @@ -42,10 +48,9 @@ def test_detect_MITM_ARP_attack(mock_rdb): uid = '1234' ts = '1636305825.755132' saddr = '192.168.1.3' - mock_rdb.get_ip_of_mac.return_value = json.dumps([profileid]) + mock_db.get_ip_of_mac.return_value = json.dumps([profileid]) assert ( ARP.detect_MITM_ARP_attack( - profileid, twid, uid, saddr, diff --git a/tests/test_database.py b/tests/test_database.py index 908a783c1..8cce74288 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -1,13 +1,30 @@ -from slips_files.common.slips_utils import utils -from slips_files.core.flows.zeek import Conn -from slips_files.common.slips_utils import utils -from tests.module_factory import ModuleFactory import redis import os import json import time import pytest +from slips_files.common.slips_utils import utils +from slips_files.core.flows.zeek import Conn +from slips_files.common.slips_utils import utils +from tests.module_factory import ModuleFactory +from slips_files.core.evidence_structure.evidence import ( + dict_to_evidence, + Evidence, + Direction, + IoCType, + EvidenceType, + IDEACategory, + Proto, + Tag, + Attacker, + Victim, + ThreatLevel, + ProfileID, + TimeWindow + ) + + # random values for testing profileid = 'profile_192.168.1.1' twid = 'timewindow1' @@ -38,28 +55,29 @@ def add_flow(): def test_getProfileIdFromIP(): - """unit test for addProfile and getProfileIdFromIP""" + """unit test for add_profile and getProfileIdFromIP""" # clear the database before running this test os.system('./slips.py -c slips.conf -cc') # add a profile - db.addProfile('profile_192.168.1.1', '00:00', '1') + db.add_profile('profile_192.168.1.1', '00:00', '1') # try to retrieve it - assert db.getProfileIdFromIP(test_ip) is not False + assert db.get_profileid_from_ip(test_ip) is not False def test_timewindows(): - """unit tests for addNewTW ,getLastTWforProfile and getFirstTWforProfile""" + """unit tests for addNewTW , getLastTWforProfile and + getFirstTWforProfile""" profileid = 'profile_192.168.1.1' # add a profile - db.addProfile(profileid, '00:00', '1') + db.add_profile(profileid, '00:00', '1') # add a tw to that profile (first tw) - db.addNewTW(profileid, 0.0) + db.add_new_tw(profileid, 'timewindow1', 0.0) # add a new tw (last tw) - db.addNewTW(profileid, 5.0) - assert db.getFirstTWforProfile(profileid) == [('timewindow1', 0.0)] - assert db.get_last_twid_of_profile(profileid) == [('timewindow2', 5.0)] + db.add_new_tw(profileid, 'timewindow2', 3700) + assert db.get_first_twid_for_profile(profileid) == ('timewindow1', 0.0) + assert db.get_last_twid_of_profile(profileid) == ('timewindow2', 3700.0) def getSlipsInternalTime(): @@ -69,24 +87,9 @@ def getSlipsInternalTime(): def test_add_ips(): # add a profile - db.addProfile(profileid, '00:00', '1') + db.add_profile(profileid, '00:00', '1') # add a tw to that profile - db.addNewTW(profileid, 0.0) - columns = { - 'dport': 80, - 'sport': 80, - 'totbytes': 80, - 'pkts': 20, - 'sbytes': 30, - 'bytes': 30, - 'spkts': 70, - 'state': 'Not Established', - 'uid': '1234', - 'proto': 'TCP', - 'saddr': '8.8.8.8', - 'daddr': test_ip, - 'starttime': '20.0', - } + db.add_new_tw(profileid, 'timewindow1', 0.0) # make sure ip is added assert ( db.add_ips(profileid, twid, flow, 'Server') is True @@ -106,41 +109,39 @@ def test_add_port(): assert flow.daddr in added_ports['DstPortsServerTCPNot Established'] -def test_setEvidence(): - attacker_direction = 'ip' - attacker = test_ip - evidence_type = f'SSHSuccessful-by-{attacker}' - threat_level = 0.01 - confidence = 0.6 - description = 'SSH Successful to IP :' + '8.8.8.8' + '. From IP ' + test_ip +def test_set_evidence(): + attacker: Attacker = Attacker( + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=test_ip + ) + threat_level: ThreatLevel = ThreatLevel.INFO + confidence = 0.8 + description = f'SSH Successful to IP : 8.8.8.8 . From IP {test_ip}' timestamp = time.time() - category = 'Infomation' - uid = '123' - db.setEvidence(evidence_type, attacker_direction, attacker, threat_level, confidence, description, - timestamp, category, profileid=profileid, twid=twid, uid=uid) - - added_evidence = db.r.hget(f'evidence{profileid}', twid) - added_evidence2 = db.r.hget(f'{profileid}_{twid}', 'Evidence') - assert added_evidence2 == added_evidence - - added_evidence = json.loads(added_evidence) - description = 'SSH Successful to IP :8.8.8.8. From IP 192.168.1.1' - # note that added_evidence may have evidence from other unit tests - evidence_uid = next(iter(added_evidence)) - evidence_details = json.loads(added_evidence[evidence_uid]) - assert 'description' in evidence_details - assert evidence_details['description'] == description - - -def test_deleteEvidence(): - description = 'SSH Successful to IP :8.8.8.8. From IP 192.168.1.1' - db.deleteEvidence(profileid, twid, description) - added_evidence = json.loads(db.r.hget(f'evidence{profileid}', twid)) - added_evidence2 = json.loads( - db.r.hget(f'{profileid}_{twid}', 'Evidence') - ) - assert 'SSHSuccessful-by-192.168.1.1' not in added_evidence - assert 'SSHSuccessful-by-192.168.1.1' not in added_evidence2 + uid = ['123'] + victim: Victim = Victim( + direction=Direction.DST, + victim_type=IoCType.IP, + value='8.8.8.8' + ) + evidence: Evidence = Evidence( + evidence_type=EvidenceType.SSH_SUCCESSFUL, + attacker=attacker, + victim=victim, + threat_level=threat_level, + confidence=confidence, + description=description, + profile=ProfileID(ip=test_ip), + timewindow=TimeWindow(number=1), + uid=uid, + timestamp=timestamp, + category=IDEACategory.INFO, + ) + + db.set_evidence(evidence) + added = db.r.hget(f'{profileid}_{twid}_evidence', evidence.id) + assert added diff --git a/tests/test_flow_handler.py b/tests/test_flow_handler.py index fd760f6ca..d3db769e5 100644 --- a/tests/test_flow_handler.py +++ b/tests/test_flow_handler.py @@ -2,9 +2,11 @@ import pytest -def test_is_supported_flow_not_ts(flow, mock_rdb): +def test_is_supported_flow_not_ts(flow, + mock_db + ): flow.starttime = None - flow_handler = ModuleFactory().create_flow_handler_obj(flow, mock_rdb) + flow_handler = ModuleFactory().create_flow_handler_obj(flow, mock_db) assert flow_handler.is_supported_flow() == False @@ -17,9 +19,11 @@ def test_is_supported_flow_not_ts(flow, mock_rdb): ] ) def test_is_supported_flow_without_ts( - flow_type: str, expected_val: bool, flow, mock_rdb): + flow_type: str, expected_val: bool, flow, + mock_db + ): # just change the flow_type flow.type_ = flow_type - flow_handler = ModuleFactory().create_flow_handler_obj(flow, mock_rdb) + flow_handler = ModuleFactory().create_flow_handler_obj(flow, mock_db) assert flow_handler.is_supported_flow() == expected_val diff --git a/tests/test_flowalerts.py b/tests/test_flowalerts.py index ace67e857..977fcd870 100644 --- a/tests/test_flowalerts.py +++ b/tests/test_flowalerts.py @@ -14,8 +14,10 @@ dst_profileid = f'profile_{daddr}' -def test_port_belongs_to_an_org(mock_rdb): - flowalerts = ModuleFactory().create_flowalerts_obj(mock_rdb) +def test_port_belongs_to_an_org( + mock_db + ): + flowalerts = ModuleFactory().create_flowalerts_obj(mock_db) # belongs to apple portproto = '65509/tcp' @@ -23,24 +25,26 @@ def test_port_belongs_to_an_org(mock_rdb): # mock the db response to say that the org of this port # is apple and the mac vendor of the # given profile is also apple - mock_rdb.get_organization_of_port.return_value = json.dumps( + mock_db.get_organization_of_port.return_value = json.dumps( {'ip':[], 'org_name':'apple'} ) - mock_rdb.get_mac_vendor_from_profile.return_value = 'apple' + mock_db.get_mac_vendor_from_profile.return_value = 'apple' assert flowalerts.port_belongs_to_an_org(daddr, portproto, profileid) is True # doesn't belong to any org portproto = '78965/tcp' # expectations - mock_rdb.get_organization_of_port.return_value = None + mock_db.get_organization_of_port.return_value = None assert flowalerts.port_belongs_to_an_org(daddr, portproto, profileid) is False -def test_check_unknown_port(mocker, mock_rdb): - flowalerts = ModuleFactory().create_flowalerts_obj(mock_rdb) +def test_check_unknown_port(mocker, + mock_db + ): + flowalerts = ModuleFactory().create_flowalerts_obj(mock_db) # database.set_port_info('23/udp', 'telnet') - mock_rdb.get_port_info.return_value = 'telnet' + mock_db.get_port_info.return_value = 'telnet' # now we have info 23 udp assert flowalerts.check_unknown_port( '23', @@ -54,8 +58,8 @@ def test_check_unknown_port(mocker, mock_rdb): ) is False # test when the port is unknown - mock_rdb.get_port_info.return_value = None - mock_rdb.is_ftp_port.return_value = False + mock_db.get_port_info.return_value = None + mock_db.is_ftp_port.return_value = False # mock the flowalerts call to port_belongs_to_an_org flowalerts_mock = mocker.patch("modules.flowalerts.flowalerts.FlowAlerts.port_belongs_to_an_org") flowalerts_mock.return_value = False @@ -74,18 +78,18 @@ def test_check_unknown_port(mocker, mock_rdb): def test_check_if_resolution_was_made_by_different_version( - mock_rdb + mock_db ): - flowalerts = ModuleFactory().create_flowalerts_obj(mock_rdb) + flowalerts = ModuleFactory().create_flowalerts_obj(mock_db) # now this ipv6 belongs to the same profileid, is supposed to be # the other version of the ipv4 of the used profileid - mock_rdb.get_the_other_ip_version.return_value = json.dumps( + mock_db.get_the_other_ip_version.return_value = json.dumps( '2001:0db8:85a3:0000:0000:8a2e:0370:7334' ) # now the daddr given to check_if_resolution_was_made_by_different_version() # is supposed to be resolved by the ipv6 of the profile, not th eipv4 - mock_rdb.get_dns_resolution.return_value = { + mock_db.get_dns_resolution.return_value = { 'resolved-by': '2001:0db8:85a3:0000:0000:8a2e:0370:7334' } @@ -96,10 +100,10 @@ def test_check_if_resolution_was_made_by_different_version( ) is True # check the case when the resolution wasn't done by another IP - mock_rdb.get_the_other_ip_version.return_value = json.dumps( + mock_db.get_the_other_ip_version.return_value = json.dumps( '2001:0db8:85a3:0000:0000:8a2e:0370:7334' ) - mock_rdb.get_dns_resolution.return_value = {'resolved-by': []} + mock_db.get_dns_resolution.return_value = {'resolved-by': []} assert flowalerts.check_if_resolution_was_made_by_different_version( profileid, '2.3.4.5' @@ -107,8 +111,10 @@ def test_check_if_resolution_was_made_by_different_version( -def test_check_dns_arpa_scan(mock_rdb): - flowalerts = ModuleFactory().create_flowalerts_obj(mock_rdb) +def test_check_dns_arpa_scan( + mock_db + ): + flowalerts = ModuleFactory().create_flowalerts_obj(mock_db) # make 10 different arpa scans for ts in arange(0, 1, 1 / 10): is_arpa_scan = flowalerts.check_dns_arpa_scan( @@ -118,18 +124,28 @@ def test_check_dns_arpa_scan(mock_rdb): assert is_arpa_scan is True -def test_check_multiple_ssh_versions(mock_rdb): - flowalerts = ModuleFactory().create_flowalerts_obj(mock_rdb) +def test_check_multiple_ssh_versions( + mock_db + ): + flowalerts = ModuleFactory().create_flowalerts_obj(mock_db) # in the first flow, we only have 1 use ssh client so no version incompatibility - mock_rdb.get_software_from_profile.return_value = {'SSH::CLIENT': {'version-major': 8, 'version-minor': 1, 'uid': 'YTYwNjBiMjIxZDkzOWYyYTc4'}} + mock_db.get_software_from_profile.return_value = { + 'SSH::CLIENT': {'version-major': 8, 'version-minor': 1, + 'uid': 'YTYwNjBiMjIxZDkzOWYyYTc4'}} - flow2 = {'starttime': 1632302619.444328, 'uid': 'M2VhNTA3ZmZiYjU3OGMxMzJk', 'saddr': '192.168.1.247', 'daddr': '', 'software': 'SSH::CLIENT', 'unparsed_version': 'OpenSSH_9.1', 'version_major': 9, 'version_minor': 1, 'type_': 'software'} + flow2 = {'starttime': 1632302619.444328, 'uid': 'M2VhNTA3ZmZiYjU3OGMxMzJk', + 'saddr': '192.168.1.247', 'daddr': '192.168.1.50', 'software': + 'SSH::CLIENT', + 'unparsed_version': 'OpenSSH_9.1', 'version_major': 9, + 'version_minor': 1, 'type_': 'software'} # in flow 2 slips should detect a client version change assert flowalerts.check_multiple_ssh_versions(flow2, 'timewindow1') is True -def test_detect_DGA(mock_rdb): - flowalerts = ModuleFactory().create_flowalerts_obj(mock_rdb) +def test_detect_DGA( + mock_db + ): + flowalerts = ModuleFactory().create_flowalerts_obj(mock_db) rcode_name = 'NXDOMAIN' # arbitrary ip to be able to call detect_DGA daddr = '10.0.0.1' @@ -140,18 +156,20 @@ def test_detect_DGA(mock_rdb): assert dga_detected is True -def test_detect_young_domains(mock_rdb): - flowalerts = ModuleFactory().create_flowalerts_obj(mock_rdb) +def test_detect_young_domains( + mock_db + ): + flowalerts = ModuleFactory().create_flowalerts_obj(mock_db) domain = 'example.com' # age in days - mock_rdb.getDomainData.return_value = {'Age': 50} + mock_db.getDomainData.return_value = {'Age': 50} assert ( flowalerts.detect_young_domains(domain, timestamp, profileid, twid, uid) is True ) # more than the age threshold - mock_rdb.getDomainData.return_value = {'Age': 1000} + mock_db.getDomainData.return_value = {'Age': 1000} assert ( flowalerts.detect_young_domains(domain, timestamp, profileid, twid, uid) is False ) diff --git a/tests/test_horizontal_portscans.py b/tests/test_horizontal_portscans.py index 0ef0ae217..1aaffd714 100644 --- a/tests/test_horizontal_portscans.py +++ b/tests/test_horizontal_portscans.py @@ -12,12 +12,12 @@ def generate_random_ip(): return ".".join(str(random.randint(0, 255)) for _ in range(4)) -def enough_dstips_to_reach_the_threshold(mock_rdb): +def enough_dstips_to_reach_the_threshold(mock_db): """ returns conns to dport that are not enough to reach the minimum dports to trigger the first scan """ - module = ModuleFactory().create_horizontal_portscan_obj(mock_rdb) + module = ModuleFactory().create_horizontal_portscan_obj(mock_db) # get a random list of ints(ports) that are below the threshold # Generate a random number between 0 and threshold amount_of_dstips: int = random.randint( @@ -42,12 +42,12 @@ def enough_dstips_to_reach_the_threshold(mock_rdb): -def not_enough_dstips_to_reach_the_threshold(mock_rdb): +def not_enough_dstips_to_reach_the_threshold(mock_db): """ returns conns to dport that are not enough to reach the minimum dports to trigger the first scan """ - module = ModuleFactory().create_horizontal_portscan_obj(mock_rdb) + module = ModuleFactory().create_horizontal_portscan_obj(mock_db) # get a random list of ints(ports) that are below the threshold # Generate a random number between 0 and threshold amount_of_dstips: int = random.randint( @@ -80,16 +80,16 @@ def not_enough_dstips_to_reach_the_threshold(mock_rdb): def test_min_dstips_threshold( get_test_conns, expected_return_val: bool, - mock_rdb + mock_db ): - horizontal_ps = ModuleFactory().create_horizontal_portscan_obj(mock_rdb) + horizontal_ps = ModuleFactory().create_horizontal_portscan_obj(mock_db) profileid = 'profile_1.1.1.1' timewindow = 'timewindow0' dport = 5555 - dports: dict = get_test_conns(mock_rdb) - mock_rdb.get_data_from_profile_tw.return_value = dports + dports: dict = get_test_conns(mock_db) + mock_db.get_data_from_profile_tw.return_value = dports cache_key = horizontal_ps.get_cache_key(profileid, timewindow, dport) amount_of_dips = len(dports[dport]['dstips']) @@ -114,7 +114,7 @@ def test_min_dstips_threshold( def test_combine_evidence( number_of_pending_evidence, expected_return_val: bool, - mock_rdb + mock_db ): """ first evidence will be alerted, the rest will be combined @@ -124,7 +124,7 @@ def test_combine_evidence( dstip = '8.8.8.8' dport = 5555 - horizontal_ps = ModuleFactory().create_horizontal_portscan_obj(mock_rdb) + horizontal_ps = ModuleFactory().create_horizontal_portscan_obj(mock_db) key: str = horizontal_ps.get_cache_key(profileid, timewindow, dstip) for evidence_ctr in range(number_of_pending_evidence+1): @@ -167,10 +167,11 @@ def test_combine_evidence( (15, 20, True), ] ) -def test_check_if_enough_dstips_to_trigger_an_evidence(mock_rdb, - prev_amount_of_dstips, - cur_amount_of_dstips, - expected_return_val): +def test_check_if_enough_dstips_to_trigger_an_evidence( + mock_db, + prev_amount_of_dstips, + cur_amount_of_dstips, + expected_return_val): """ slip sdetects can based on the number of current dports scanned to the number of the ports scanned before @@ -181,7 +182,7 @@ def test_check_if_enough_dstips_to_trigger_an_evidence(mock_rdb, timewindow = 'timewindow0' dport = 5555 - horizontal_ps = ModuleFactory().create_horizontal_portscan_obj(mock_rdb) + horizontal_ps = ModuleFactory().create_horizontal_portscan_obj(mock_db) key: str = horizontal_ps.get_cache_key(profileid, timewindow, dport) horizontal_ps.cached_tw_thresholds[key] = prev_amount_of_dstips diff --git a/tests/test_http_analyzer.py b/tests/test_http_analyzer.py index 4296867b0..a013cecc1 100644 --- a/tests/test_http_analyzer.py +++ b/tests/test_http_analyzer.py @@ -20,44 +20,55 @@ def get_random_MAC(): -def test_check_suspicious_user_agents(mock_rdb): - http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_rdb) +def test_check_suspicious_user_agents( + mock_db + ): + http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_db) # create a flow with suspicious user agent - host = '147.32.80.7' - uri = '/wpad.dat' - user_agent = 'CHM_MSDN' assert ( - http_analyzer.check_suspicious_user_agents(uid, host, uri, timestamp, user_agent, profileid, twid) is True + http_analyzer.check_suspicious_user_agents( + uid, + '147.32.80.7', + '/wpad.dat', + timestamp, + 'CHM_MSDN', + profileid, + twid + ) is True ) -def test_check_multiple_google_connections(mock_rdb): - http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_rdb) +def test_check_multiple_google_connections( + mock_db + ): + http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_db) # {"ts":1635765765.435485,"uid":"C7mv0u4M1zqJBHydgj", # "id.orig_h":"192.168.1.28","id.orig_p":52102,"id.resp_h":"216.58.198.78", # "id.resp_p":80,"trans_depth":1,"method":"GET","host":"google.com","uri":"/", - # "version":"1.1","user_agent":"Wget/1.20.3 (linux-gnu)","request_body_len":0,"response_body_len":219, - # "status_code":301,"status_msg":"Moved Permanently","tags":[],"resp_fuids":["FGhwTU1OdvlfLrzBKc"], + # "version":"1.1","user_agent":"Wget/1.20.3 (linux-gnu)", + # "request_body_len":0,"response_body_len":219, + # "status_code":301,"status_msg":"Moved Permanently","tags":[], + # "resp_fuids":["FGhwTU1OdvlfLrzBKc"], # "resp_mime_types":["text/html"]} host = 'google.com' - # uri = '/' + uri = '/' request_body_len = 0 for _ in range(4): found_detection = http_analyzer.check_multiple_empty_connections( - uid, host, timestamp, request_body_len, profileid, twid + uid, host, uri, timestamp, request_body_len, profileid, twid ) assert found_detection is True -def test_parsing_online_ua_info(mock_rdb, mocker): +def test_parsing_online_ua_info(mock_db, mocker): """ tests the parsing and processing the ua found by the online query """ - http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_rdb) - # use a different profile for this unit test to make sure we don't already have info about - # it in the db + http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_db) + # use a different profile for this unit test to make + # sure we don't already have info about it in the db profileid = 'profile_192.168.99.99' - mock_rdb.get_user_agent_from_profile.return_value = None + mock_db.get_user_agent_from_profile.return_value = None # mock the function that gets info about the given ua from an online db mock_requests = mocker.patch("requests.get") mock_requests.return_value.status_code = 200 @@ -73,8 +84,9 @@ def test_parsing_online_ua_info(mock_rdb, mocker): assert ua_info['browser'] == 'Safari' -def test_get_user_agent_info(mock_rdb, mocker): - http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_rdb) +def test_get_user_agent_info( + mock_db, mocker): + http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_db) # mock the function that gets info about the # given ua from an online db: get_ua_info_online() mock_requests = mocker.patch("requests.get") @@ -85,8 +97,8 @@ def test_get_user_agent_info(mock_rdb, mocker): "os_name":"OS X" }""" - mock_rdb.add_all_user_agent_to_profile.return_value = True - mock_rdb.get_user_agent_from_profile.return_value = None + mock_db.add_all_user_agent_to_profile.return_value = True + mock_db.get_user_agent_from_profile.return_value = None expected_ret_value = {'browser': 'Safari', 'os_name': 'OS X', @@ -98,27 +110,31 @@ def test_get_user_agent_info(mock_rdb, mocker): # assert ua_added_to_db is not None, 'Error getting UA info online' # assert ua_added_to_db is not False, 'We already have UA info about this profile in the db' -def test_check_incompatible_user_agent(mock_rdb): +def test_check_incompatible_user_agent( + mock_db + ): - http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_rdb) + http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_db) # use a different profile for this unit test to make sure we don't already have info about # it in the db. it has to be a private IP for its' MAC to not be marked as the gw MAC profileid = 'profile_192.168.77.254' # Mimic an intel mac vendor using safari - mock_rdb.get_mac_vendor_from_profile.return_value = 'Intel Corp' - mock_rdb.get_user_agent_from_profile.return_value = {'browser': 'safari'} + mock_db.get_mac_vendor_from_profile.return_value = 'Intel Corp' + mock_db.get_user_agent_from_profile.return_value = {'browser': 'safari'} assert ( http_analyzer.check_incompatible_user_agent('google.com', '/images', timestamp, profileid, twid, uid) is True ) -def test_extract_info_from_UA(mock_rdb): - http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_rdb) +def test_extract_info_from_UA( + mock_db + ): + http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_db) # use another profile, because the default # one already has a ua in the db - mock_rdb.get_user_agent_from_profile.return_value = None + mock_db.get_user_agent_from_profile.return_value = None profileid = 'profile_192.168.1.2' server_bag_ua = 'server-bag[macOS,11.5.1,20G80,MacBookAir10,1]' assert ( @@ -127,19 +143,29 @@ def test_extract_info_from_UA(mock_rdb): ) -def test_check_multiple_UAs(mock_rdb): - http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_rdb) - mozilla_ua = 'Mozilla/5.0 (X11; Fedora;Linux x86; rv:60.0) Gecko/20100101 Firefox/60.0' +def test_check_multiple_UAs(mock_db): + http_analyzer = ModuleFactory().create_http_analyzer_obj(mock_db) + mozilla_ua = ('Mozilla/5.0 (X11; Fedora;Linux x86; rv:60.0) ' + 'Gecko/20100101 Firefox/60.0') # old ua cached_ua = {'os_type': 'Fedora', 'os_name': 'Linux'} - # current ua - user_agent = mozilla_ua # should set evidence assert ( - http_analyzer.check_multiple_UAs(cached_ua, user_agent, timestamp, profileid, twid, uid) is False + http_analyzer.check_multiple_UAs( + cached_ua, + mozilla_ua, + timestamp, + profileid, + twid, + uid) is False ) # in this case we should alert - user_agent = SAFARI_UA assert ( - http_analyzer.check_multiple_UAs(cached_ua, user_agent, timestamp, profileid, twid, uid) is True + http_analyzer.check_multiple_UAs( + cached_ua, + SAFARI_UA, + timestamp, + profileid, + twid, + uid) is True ) diff --git a/tests/test_inputProc.py b/tests/test_inputProc.py index 24dc09c63..e1d9d0fe7 100644 --- a/tests/test_inputProc.py +++ b/tests/test_inputProc.py @@ -12,10 +12,11 @@ [('pcap', 'dataset/test12-icmp-portscan.pcap')], ) def test_handle_pcap_and_interface( - input_type, input_information, mock_rdb + input_type, input_information, + mock_db ): # no need to test interfaces because in that case read_zeek_files runs in a loop and never returns - input = ModuleFactory().create_inputProcess_obj(input_information, input_type, mock_rdb) + input = ModuleFactory().create_inputProcess_obj(input_information, input_type, mock_db) input.zeek_pid = 'False' input.is_zeek_tabs = False assert input.handle_pcap_and_interface() is True @@ -31,10 +32,11 @@ def test_handle_pcap_and_interface( ], ) def test_is_growing_zeek_dir( - zeek_dir: str, is_tabs: bool, mock_rdb + zeek_dir: str, is_tabs: bool, + mock_db ): - input = ModuleFactory().create_inputProcess_obj(zeek_dir, 'zeek_folder', mock_rdb) - mock_rdb.get_all_zeek_files.return_value = [os.path.join(zeek_dir, 'conn.log')] + input = ModuleFactory().create_inputProcess_obj(zeek_dir, 'zeek_folder', mock_db) + mock_db.get_all_zeek_files.return_value = [os.path.join(zeek_dir, 'conn.log')] assert input.read_zeek_folder() is True @@ -47,8 +49,10 @@ def test_is_growing_zeek_dir( ('dataset/test9-mixed-zeek-dir/conn.log', False), # json ], ) -def test_is_zeek_tabs_file(path: str, expected_val: bool, mock_rdb): - input = ModuleFactory().create_inputProcess_obj(path, 'zeek_folder', mock_rdb) +def test_is_zeek_tabs_file(path: str, expected_val: bool, + mock_db + ): + input = ModuleFactory().create_inputProcess_obj(path, 'zeek_folder', mock_db) assert input.is_zeek_tabs_file(path) == expected_val @@ -62,9 +66,10 @@ def test_is_zeek_tabs_file(path: str, expected_val: bool, mock_rdb): ], ) def test_handle_zeek_log_file( - input_information, mock_rdb, expected_output + input_information, + mock_db, expected_output ): - input = ModuleFactory().create_inputProcess_obj(input_information, 'zeek_log_file', mock_rdb) + input = ModuleFactory().create_inputProcess_obj(input_information, 'zeek_log_file', mock_db) assert input.handle_zeek_log_file() == expected_output @@ -78,11 +83,13 @@ def test_handle_zeek_log_file( ) def test_cache_nxt_line_in_file( - path: str, is_tabs: str, line_cached: bool , mock_rdb): + path: str, is_tabs: str, line_cached: bool , + mock_db + ): """ :param line_cached: should slips cache the first line of this file or not """ - input = ModuleFactory().create_inputProcess_obj(path, 'zeek_log_file', mock_rdb) + input = ModuleFactory().create_inputProcess_obj(path, 'zeek_log_file', mock_db) input.cache_lines = {} input.file_time = {} input.is_zeek_tabs = is_tabs @@ -122,8 +129,10 @@ def test_cache_nxt_line_in_file( ], ) def test_get_ts_from_line( - path: str, is_tabs: str,zeek_line: str, expected_val:float, mock_rdb): - input = ModuleFactory().create_inputProcess_obj(path, 'zeek_log_file', mock_rdb) + path: str, is_tabs: str,zeek_line: str, expected_val:float, + mock_db + ): + input = ModuleFactory().create_inputProcess_obj(path, 'zeek_log_file', mock_db) input.is_zeek_tabs = is_tabs input.get_ts_from_line(zeek_line) @@ -138,10 +147,11 @@ def test_get_ts_from_line( ] ) def test_reached_timeout( - last_updated_file_time, now, bro_timeout, expected_val, mock_rdb + last_updated_file_time, now, bro_timeout, expected_val, + mock_db ): input = ModuleFactory().create_inputProcess_obj( - '', 'zeek_log_file', mock_rdb + '', 'zeek_log_file', mock_db ) input.last_updated_file_time = last_updated_file_time input.bro_timeout = bro_timeout @@ -161,15 +171,18 @@ def test_reached_timeout( 'path', [('dataset/test1-normal.nfdump')] ) def test_handle_nfdump( - path, mock_rdb + path, + mock_db ): - input = ModuleFactory().create_inputProcess_obj(path, 'nfdump', mock_rdb) + input = ModuleFactory().create_inputProcess_obj(path, 'nfdump', mock_db) assert input.handle_nfdump() is True -def test_get_earliest_line(mock_rdb): +def test_get_earliest_line( + mock_db + ): input = ModuleFactory().create_inputProcess_obj( - '', 'zeek_log_file', mock_rdb + '', 'zeek_log_file', mock_db ) input.file_time = { 'software.log': 3, @@ -202,8 +215,10 @@ def test_get_earliest_line(mock_rdb): ] ) def test_get_flows_number( - path: str, is_tabs: bool, expected_val: int, mock_rdb): - input = ModuleFactory().create_inputProcess_obj(path, 'nfdump', mock_rdb) + path: str, is_tabs: bool, expected_val: int, + mock_db + ): + input = ModuleFactory().create_inputProcess_obj(path, 'nfdump', mock_db) input.is_zeek_tabs = is_tabs assert input.get_flows_number(path) == expected_val @@ -219,9 +234,10 @@ def test_get_flows_number( # ('binetflow','dataset/test3-mixed.binetflow'), # ('binetflow','dataset/test4-malicious.binetflow'), def test_handle_binetflow( - input_type, input_information, mock_rdb + input_type, input_information, + mock_db ): - input = ModuleFactory().create_inputProcess_obj(input_information, input_type, mock_rdb) + input = ModuleFactory().create_inputProcess_obj(input_information, input_type, mock_db) with patch.object(input, 'get_flows_number', return_value=5): assert input.handle_binetflow() is True @@ -231,9 +247,10 @@ def test_handle_binetflow( [('dataset/test6-malicious.suricata.json')], ) def test_handle_suricata( - input_information, mock_rdb + input_information, + mock_db ): - inputProcess = ModuleFactory().create_inputProcess_obj(input_information, 'suricata', mock_rdb) + inputProcess = ModuleFactory().create_inputProcess_obj(input_information, 'suricata', mock_db) assert inputProcess.handle_suricata() is True @pytest.mark.parametrize( @@ -250,11 +267,13 @@ def test_handle_suricata( ], ) -def test_read_from_stdin(line_type: str, line: str, mock_rdb): +def test_read_from_stdin(line_type: str, line: str, + mock_db + ): # slips supports reading zeek json conn.log only using stdin, # tabs aren't supported input = ModuleFactory().create_inputProcess_obj( - line_type, 'stdin', mock_rdb, line_type=line_type, + line_type, 'stdin', mock_db, line_type=line_type, ) with patch.object(input, 'stdin', return_value=[line, 'done\n']): # this function will give the line to profiler @@ -286,11 +305,13 @@ def test_read_from_stdin(line_type: str, line: str, mock_rdb): ], ) -def test_read_from_stdin(line_type: str, line: str, mock_rdb): +def test_read_from_stdin(line_type: str, line: str, + mock_db + ): # slips supports reading zeek json conn.log only using stdin, # tabs aren't supported input = ModuleFactory().create_inputProcess_obj( - line_type, 'stdin', mock_rdb, line_type=line_type, + line_type, 'stdin', mock_db, line_type=line_type, ) with patch.object(input, 'stdin', return_value=[line, 'done\n']): # this function will give the line to profiler diff --git a/tests/test_ip_info.py b/tests/test_ip_info.py index b96ae058c..95aa53eb8 100644 --- a/tests/test_ip_info.py +++ b/tests/test_ip_info.py @@ -7,25 +7,31 @@ # ASN unit tests -def test_get_asn_info_from_geolite(mock_rdb): +def test_get_asn_info_from_geolite( + mock_db + ): """ geolite is an offline db """ - ASN_info = ModuleFactory().create_asn_obj(mock_rdb) + ASN_info = ModuleFactory().create_asn_obj(mock_db) # check an ip that we know is in the db expected_asn_info = {'asn': {'number': 'AS7018', 'org': 'ATT-INTERNET4'}} assert ASN_info.get_asn_info_from_geolite('108.200.116.255') == expected_asn_info # test asn info not found in geolite assert ASN_info.get_asn_info_from_geolite('0.0.0.0') == {} -def test_cache_ip_range(mock_rdb): +def test_cache_ip_range( + mock_db + ): # Patch the database object creation before it is instantiated - ASN_info = ModuleFactory().create_asn_obj(mock_rdb) + ASN_info = ModuleFactory().create_asn_obj(mock_db) assert ASN_info.cache_ip_range('8.8.8.8') == {'asn': {'number': 'AS15169', 'org': 'GOOGLE, US'}} # GEOIP unit tests -def test_get_geocountry(mock_rdb): - ip_info = ModuleFactory().create_ip_info_obj(mock_rdb) +def test_get_geocountry( + mock_db + ): + ip_info = ModuleFactory().create_ip_info_obj(mock_db) #open the db we'll be using for this test # ip_info.wait_for_dbs() @@ -40,9 +46,11 @@ def test_get_geocountry(mock_rdb): 'geocountry': 'Unknown' } -def test_get_vendor(mocker, mock_rdb): +def test_get_vendor(mocker, + mock_db + ): # make sure the mac db is download so that wai_for_dbs doesn't wait forever :'D - ip_info = ModuleFactory().create_ip_info_obj(mock_rdb) + ip_info = ModuleFactory().create_ip_info_obj(mock_db) profileid = 'profile_10.0.2.15' mac_addr = '08:00:27:7f:09:e1' @@ -51,7 +59,7 @@ def test_get_vendor(mocker, mock_rdb): mock_requests = mocker.patch("requests.get") mock_requests.return_value.status_code = 200 mock_requests.return_value.text = 'PCS Systemtechnik GmbH' - mock_rdb.get_mac_vendor_from_profile.return_value = False + mock_db.get_mac_vendor_from_profile.return_value = False # tries to get vendor either online or from our offline db mac_info = ip_info.get_vendor(mac_addr, profileid) diff --git a/tests/test_leak_detector.py b/tests/test_leak_detector.py index 35dff18e8..d82597077 100644 --- a/tests/test_leak_detector.py +++ b/tests/test_leak_detector.py @@ -3,8 +3,10 @@ import os -def test_compile_and_save_rules(mock_rdb): - leak_detector = ModuleFactory().create_leak_detector_obj(mock_rdb) +def test_compile_and_save_rules( + mock_db + ): + leak_detector = ModuleFactory().create_leak_detector_obj(mock_db) leak_detector.compile_and_save_rules() compiled_rules = os.listdir(leak_detector.compiled_yara_rules_path) assert 'test_rule.yara_compiled' in compiled_rules diff --git a/tests/test_profiler.py b/tests/test_profiler.py index d768ea385..92f121a82 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -15,8 +15,10 @@ 'file,input_type,expected_value', [('dataset/test6-malicious.suricata.json', 'suricata', 'suricata')] ) -def test_define_separator_suricata(file, input_type, expected_value, mock_rdb): - profilerProcess = ModuleFactory().create_profiler_obj() +def test_define_separator_suricata(file, input_type, expected_value, + mock_db + ): + profilerProcess = ModuleFactory().create_profiler_obj(mock_db) with open(file) as f: while True: sample_flow = f.readline().replace('\n', '') @@ -35,8 +37,10 @@ def test_define_separator_suricata(file, input_type, expected_value, mock_rdb): 'file,input_type,expected_value', [('dataset/test10-mixed-zeek-dir/conn.log', 'zeek_log_file', 'zeek-tabs')], ) -def test_define_separator_zeek_tab(file, input_type, expected_value, mock_rdb): - profilerProcess = ModuleFactory().create_profiler_obj() +def test_define_separator_zeek_tab(file, input_type, expected_value, + mock_db + ): + profilerProcess = ModuleFactory().create_profiler_obj(mock_db) with open(file) as f: while True: sample_flow = f.readline().replace('\n', '') @@ -55,12 +59,14 @@ def test_define_separator_zeek_tab(file, input_type, expected_value, mock_rdb): 'file, input_type,expected_value', [('dataset/test9-mixed-zeek-dir/conn.log', 'zeek_log_file', 'zeek')] ) -def test_define_separator_zeek_dict(file, input_type, expected_value, mock_rdb): +def test_define_separator_zeek_dict(file, input_type, expected_value, + mock_db + ): """ :param input_type: as determined by slips.py """ - profilerProcess = ModuleFactory().create_profiler_obj() + profilerProcess = ModuleFactory().create_profiler_obj(mock_db) with open(file) as f: sample_flow = f.readline().replace('\n', '') @@ -73,7 +79,9 @@ def test_define_separator_zeek_dict(file, input_type, expected_value, mock_rdb): @pytest.mark.parametrize('nfdump_file', [('dataset/test1-normal.nfdump')]) -def test_define_separator_nfdump(nfdump_file, mock_rdb): +def test_define_separator_nfdump(nfdump_file, + mock_db + ): # nfdump files aren't text files so we need to process them first command = f'nfdump -b -N -o csv -q -r {nfdump_file}' # Execute command @@ -90,7 +98,7 @@ def test_define_separator_nfdump(nfdump_file, mock_rdb): else: break - profilerProcess = ModuleFactory().create_profiler_obj() + profilerProcess = ModuleFactory().create_profiler_obj(mock_db) sample_flow = { 'data': nfdump_line, } @@ -110,7 +118,7 @@ def test_define_separator_nfdump(nfdump_file, mock_rdb): # ], # ) # def test_define_columns( -# file, separator, expected_value, mock_rdb +# file, separator, expected_value, mock_db # ): # # define_columns is called on header lines # # line = '#fields ts uid id.orig_h id.orig_p @@ -124,7 +132,7 @@ def test_define_separator_nfdump(nfdump_file, mock_rdb): # line = f.readline() # if line.startswith('#fields'): # break -# profilerProcess = ModuleFactory().create_profiler_obj() +# profilerProcess = ModuleFactory().create_profiler_obj(mock_db) # line = {'data': line} # profilerProcess.separator = separator # assert profilerProcess.define_columns(line) == expected_value @@ -144,8 +152,8 @@ def test_define_separator_nfdump(nfdump_file, mock_rdb): # ('dataset/test9-mixed-zeek-dir/files.log', 'files.log'), ], ) -def test_process_line(file, flow_type): - profiler = ModuleFactory().create_profiler_obj() +def test_process_line(file, flow_type, mock_db): + profiler = ModuleFactory().create_profiler_obj(mock_db) # we're testing another functionality here profiler.whitelist.is_whitelisted_flow = do_nothing profiler.input_type = 'zeek' @@ -185,10 +193,8 @@ def test_process_line(file, flow_type): ) assert added_flow is not None - - -def test_get_rev_profile(mock_rdb): - profiler = ModuleFactory().create_profiler_obj() +def test_get_rev_profile(mock_db): + profiler = ModuleFactory().create_profiler_obj(mock_db) profiler.flow = Conn( '1.0', '1234', @@ -203,12 +209,12 @@ def test_get_rev_profile(mock_rdb): '','', 'Established','' ) - profiler.daddr_as_obj = ipaddress.ip_address(profiler.flow.daddr) - mock_rdb.getProfileIdFromIP.return_value = None + mock_db.get_profileid_from_ip.return_value = None + mock_db.get_timewindow.return_value = 'timewindow1' assert profiler.get_rev_profile() == ('profile_8.8.8.8', 'timewindow1') -def test_get_rev_profile_no_daddr(flow): - profiler = ModuleFactory().create_profiler_obj() +def test_get_rev_profile_no_daddr(flow, mock_db): + profiler = ModuleFactory().create_profiler_obj(mock_db) profiler.flow = flow profiler.flow.daddr = None profiler.daddr_as_obj = None diff --git a/tests/test_threat_intelligence.py b/tests/test_threat_intelligence.py index 3bb9ce6bc..55346f732 100644 --- a/tests/test_threat_intelligence.py +++ b/tests/test_threat_intelligence.py @@ -5,8 +5,10 @@ -def test_parse_local_ti_file(mock_rdb): - threatintel = ModuleFactory().create_threatintel_obj(mock_rdb) +def test_parse_local_ti_file( + mock_db + ): + threatintel = ModuleFactory().create_threatintel_obj(mock_db) local_ti_files_dir = threatintel.path_to_local_ti_files local_ti_file = os.path.join(local_ti_files_dir, 'own_malicious_iocs.csv') # this is an ip we know we have in own_maicious_iocs.csv @@ -22,7 +24,8 @@ def test_parse_local_ti_file(mock_rdb): ], ) def test_check_local_ti_files_for_update( - current_hash, old_hash, expected_return, mocker, mock_rdb + current_hash, old_hash, expected_return, mocker, + mock_db ): """ first case the cur hash is diff from the old hash so slips should update @@ -30,12 +33,12 @@ def test_check_local_ti_files_for_update( third, cur hash is false meaning we cant get the file hash """ # since this is a clear db, then we should update the local ti file - threatintel = ModuleFactory().create_threatintel_obj(mock_rdb) + threatintel = ModuleFactory().create_threatintel_obj(mock_db) own_malicious_iocs = os.path.join(threatintel.path_to_local_ti_files, 'own_malicious_iocs.csv') mock_hash = mocker.patch("slips_files.common.slips_utils.Utils.get_hash_from_file") mock_hash.return_value = current_hash - mock_rdb.get_TI_file_info.return_value = {'hash': old_hash} + mock_db.get_TI_file_info.return_value = {'hash': old_hash} assert threatintel.should_update_local_ti_file(own_malicious_iocs) == expected_return diff --git a/tests/test_update_file_manager.py b/tests/test_update_file_manager.py index d36868208..93a3c3c18 100644 --- a/tests/test_update_file_manager.py +++ b/tests/test_update_file_manager.py @@ -2,8 +2,10 @@ from tests.module_factory import ModuleFactory import json -def test_getting_header_fields(mocker, mock_rdb): - update_manager = ModuleFactory().create_update_manager_obj(mock_rdb) +def test_getting_header_fields(mocker, + mock_db + ): + update_manager = ModuleFactory().create_update_manager_obj(mock_db) url = 'google.com/play' mock_requests = mocker.patch("requests.get") mock_requests.return_value.status_code = 200 @@ -13,20 +15,24 @@ def test_getting_header_fields(mocker, mock_rdb): assert update_manager.get_e_tag(response) == '1234' -def test_check_if_update_based_on_update_period(mock_rdb): - mock_rdb.get_TI_file_info.return_value = {'time': float('inf')} - update_manager = ModuleFactory().create_update_manager_obj(mock_rdb) +def test_check_if_update_based_on_update_period( + mock_db + ): + mock_db.get_TI_file_info.return_value = {'time': float('inf')} + update_manager = ModuleFactory().create_update_manager_obj(mock_db) url = 'abc.com/x' # update period hasn't passed assert update_manager.check_if_update(url, float('inf')) is False -def test_check_if_update_based_on_e_tag(mocker, mock_rdb): - update_manager = ModuleFactory().create_update_manager_obj(mock_rdb) +def test_check_if_update_based_on_e_tag(mocker, + mock_db + ): + update_manager = ModuleFactory().create_update_manager_obj(mock_db) # period passed, etag same etag = '1234' url = 'google.com/images' - mock_rdb.get_TI_file_info.return_value = {'e-tag': etag} + mock_db.get_TI_file_info.return_value = {'e-tag': etag} mock_requests = mocker.patch("requests.get") mock_requests.return_value.status_code = 200 @@ -38,20 +44,22 @@ def test_check_if_update_based_on_e_tag(mocker, mock_rdb): # period passed, etag different etag = '1111' url = 'google.com/images' - mock_rdb.get_TI_file_info.return_value = {'e-tag': etag} + mock_db.get_TI_file_info.return_value = {'e-tag': etag} mock_requests = mocker.patch("requests.get") mock_requests.return_value.status_code = 200 mock_requests.return_value.headers = {'ETag': '2222'} mock_requests.return_value.text = "" assert update_manager.check_if_update(url, float('-inf')) is True -def test_check_if_update_based_on_last_modified(database, mocker, mock_rdb): - update_manager = ModuleFactory().create_update_manager_obj(mock_rdb) +def test_check_if_update_based_on_last_modified(database, mocker, + mock_db + ): + update_manager = ModuleFactory().create_update_manager_obj(mock_db) # period passed, no etag, last modified the same url = 'google.com/photos' - mock_rdb.get_TI_file_info.return_value = {'Last-Modified': 10.0} + mock_db.get_TI_file_info.return_value = {'Last-Modified': 10.0} mock_requests = mocker.patch("requests.get") mock_requests.return_value.status_code = 200 mock_requests.return_value.headers = {'Last-Modified': 10.0} @@ -62,7 +70,7 @@ def test_check_if_update_based_on_last_modified(database, mocker, mock_rdb): # period passed, no etag, last modified changed url = 'google.com/photos' - mock_rdb.get_TI_file_info.return_value = {'Last-Modified': 10} + mock_db.get_TI_file_info.return_value = {'Last-Modified': 10} mock_requests = mocker.patch("requests.get") mock_requests.return_value.status_code = 200 mock_requests.return_value.headers = {'Last-Modified': 11} diff --git a/tests/test_vertical_portscans.py b/tests/test_vertical_portscans.py index 2ebec7b4c..5e8992bd9 100644 --- a/tests/test_vertical_portscans.py +++ b/tests/test_vertical_portscans.py @@ -9,12 +9,12 @@ def get_random_uid(): return base64.b64encode(binascii.b2a_hex(os.urandom(9))).decode('utf-8') -def not_enough_dports_to_reach_the_threshold(mock_rdb): +def not_enough_dports_to_reach_the_threshold(mock_db): """ returns a dict with conns to dport that are not enough to reach the minimum dports to trigger the first scan """ - module = ModuleFactory().create_vertical_portscan_obj(mock_rdb) + module = ModuleFactory().create_vertical_portscan_obj(mock_db) # get a random list of ints(ports) that are below the threshold # Generate a random number between 0 and threshold @@ -37,12 +37,12 @@ def not_enough_dports_to_reach_the_threshold(mock_rdb): # Return the list of random integers return res -def enough_dports_to_reach_the_threshold(mock_rdb): +def enough_dports_to_reach_the_threshold(mock_db): """ returns conns to dport that are not enough to reach the minimum dports to trigger the first scan """ - module = ModuleFactory().create_vertical_portscan_obj(mock_rdb) + module = ModuleFactory().create_vertical_portscan_obj(mock_db) # get a random list of ints(ports) that are below the threshold # Generate a random number between 0 and threshold @@ -67,14 +67,14 @@ def enough_dports_to_reach_the_threshold(mock_rdb): return res -def not_enough_dports_to_combine_1_evidence(mock_rdb): +def not_enough_dports_to_combine_1_evidence(mock_db): """ returns dports that are not enough to combine an evidence any number of dports within the range threshold -> threshold +15 is ok here, aka won't be enough :param key: """ - module = ModuleFactory().create_vertical_portscan_obj(mock_rdb) + module = ModuleFactory().create_vertical_portscan_obj(mock_db) # get a random list of ints(ports) that are below the threshold # Generate a random number between 0 and threshold @@ -110,16 +110,16 @@ def not_enough_dports_to_combine_1_evidence(mock_rdb): def test_min_dports_threshold( get_test_conns, expected_return_val: bool, - mock_rdb + mock_db ): - vertical_ps = ModuleFactory().create_vertical_portscan_obj(mock_rdb) + vertical_ps = ModuleFactory().create_vertical_portscan_obj(mock_db) profileid = 'profile_1.1.1.1' timewindow = 'timewindow0' dstip = '8.8.8.8' - conns: dict = get_test_conns(mock_rdb) - mock_rdb.get_data_from_profile_tw.return_value = conns + conns: dict = get_test_conns(mock_db) + mock_db.get_data_from_profile_tw.return_value = conns cache_key = vertical_ps.get_cache_key(profileid, timewindow, dstip) amount_of_dports = len(conns[dstip]['dstports']) @@ -143,7 +143,7 @@ def test_min_dports_threshold( def test_combining_evidence( number_of_pending_evidence, expected_return_val: bool, - mock_rdb + mock_db ): """ first evidence will be alerted, the rest will be combined @@ -152,10 +152,10 @@ def test_combining_evidence( timewindow = 'timewindow0' dstip = '8.8.8.8' - vertical_ps = ModuleFactory().create_vertical_portscan_obj(mock_rdb) + vertical_ps = ModuleFactory().create_vertical_portscan_obj(mock_db) key: str = vertical_ps.get_cache_key(profileid, timewindow, dstip) # get a random bunch of dstips, this dict is not important - dstips:dict = enough_dports_to_reach_the_threshold(mock_rdb) + dstips:dict = enough_dports_to_reach_the_threshold(mock_db) amount_of_dports = len(dstips[dstip]['dstports']) pkts_sent = sum(dstips[dstip]['dstports'].values()) @@ -200,10 +200,11 @@ def test_combining_evidence( (15, 20, True), ] ) -def test_check_if_enough_dports_to_trigger_an_evidence(mock_rdb, - prev_amount_of_dports, - cur_amount_of_dports, - expected_return_val): +def test_check_if_enough_dports_to_trigger_an_evidence( + mock_db, + prev_amount_of_dports, + cur_amount_of_dports, + expected_return_val): """ slip sdetects can based on the number of current dports scanned to the number of the ports scanned before @@ -214,7 +215,7 @@ def test_check_if_enough_dports_to_trigger_an_evidence(mock_rdb, timewindow = 'timewindow0' dstip = '8.8.8.8' - vertical_ps = ModuleFactory().create_vertical_portscan_obj(mock_rdb) + vertical_ps = ModuleFactory().create_vertical_portscan_obj(mock_db) key: str = vertical_ps.get_cache_key(profileid, timewindow, dstip) vertical_ps.cached_tw_thresholds[key] = prev_amount_of_dports diff --git a/tests/test_virustotal.py b/tests/test_virustotal.py index 4f86c04ae..53525f43c 100644 --- a/tests/test_virustotal.py +++ b/tests/test_virustotal.py @@ -81,16 +81,20 @@ def get_allowed(quota): @pytest.mark.dependency(name='sufficient_quota') @pytest.mark.parametrize('ip', ['8.8.8.8']) @valid_api_key -def test_interpret_rsponse(ip, mock_rdb): - virustotal = ModuleFactory().create_virustotal_obj(mock_rdb) +def test_interpret_rsponse(ip, + mock_db + ): + virustotal = ModuleFactory().create_virustotal_obj(mock_db) response = virustotal.api_query_(ip) for ratio in virustotal.interpret_response(response): assert type(ratio) == float @pytest.mark.dependency(depends=["sufficient_quota"]) @valid_api_key -def test_get_domain_vt_data(mock_rdb): - virustotal = ModuleFactory().create_virustotal_obj(mock_rdb) +def test_get_domain_vt_data( + mock_db + ): + virustotal = ModuleFactory().create_virustotal_obj(mock_db) assert virustotal.get_domain_vt_data('google.com') is not False diff --git a/tests/test_whitelist.py b/tests/test_whitelist.py index 43a8d83ba..af259bd36 100644 --- a/tests/test_whitelist.py +++ b/tests/test_whitelist.py @@ -3,13 +3,15 @@ -def test_read_whitelist(mock_rdb): +def test_read_whitelist( + mock_db + ): """ make sure the content of whitelists is read and stored properly uses tests/test_whitelist.conf for testing """ - whitelist = ModuleFactory().create_whitelist_obj(mock_rdb) - mock_rdb.get_whitelist.return_value = {} + whitelist = ModuleFactory().create_whitelist_obj(mock_db) + mock_db.get_whitelist.return_value = {} whitelisted_IPs, whitelisted_domains, whitelisted_orgs, whitelisted_mac = whitelist.read_whitelist() assert '91.121.83.118' in whitelisted_IPs assert 'apple.com' in whitelisted_domains @@ -17,15 +19,19 @@ def test_read_whitelist(mock_rdb): @pytest.mark.parametrize('org,asn', [('google', 'AS6432')]) -def test_load_org_asn(org, asn, mock_rdb): - whitelist = ModuleFactory().create_whitelist_obj(mock_rdb) +def test_load_org_asn(org, asn, + mock_db + ): + whitelist = ModuleFactory().create_whitelist_obj(mock_db) assert whitelist.load_org_asn(org) is not False assert asn in whitelist.load_org_asn(org) @pytest.mark.parametrize('org,subnet', [('google', '216.73.80.0/20')]) -def test_load_org_IPs(org, subnet, mock_rdb): - whitelist = ModuleFactory().create_whitelist_obj(mock_rdb) +def test_load_org_IPs(org, subnet, + mock_db + ): + whitelist = ModuleFactory().create_whitelist_obj(mock_db) assert whitelist.load_org_IPs(org) is not False # we now store subnets in a dict sorted by the first octet first_octet = subnet.split('.')[0] diff --git a/webinterface.sh b/webinterface.sh index c1187ee3a..07ab9bab0 100755 --- a/webinterface.sh +++ b/webinterface.sh @@ -1,4 +1,4 @@ #!/bin/bash # run webinterface -cd webinterface -python3 app.py +#cd webinterface +python3 webinterface/app.py diff --git a/webinterface/analysis/analysis.py b/webinterface/analysis/analysis.py index 35e1b29ac..2d09f0f61 100644 --- a/webinterface/analysis/analysis.py +++ b/webinterface/analysis/analysis.py @@ -3,7 +3,9 @@ import json from collections import defaultdict from datetime import datetime +from typing import Dict, List from database.database import __database__ +from slips_files.common.slips_utils import utils analysis = Blueprint('analysis', __name__, static_folder='static', static_url_path='/analysis/static', template_folder='templates') @@ -14,8 +16,8 @@ # ---------------------------------------- def ts_to_date(ts, seconds=False): if seconds: - return datetime.fromtimestamp(ts).strftime('%Y/%m/%d %H:%M:%S.%f') - return datetime.fromtimestamp(ts).strftime('%Y/%m/%d %H:%M:%S') + return utils.convert_format(ts, '%Y/%m/%d %H:%M:%S.%f') + return utils.convert_format(ts, '%Y/%m/%d %H:%M:%S') def get_all_tw_with_ts(profileid): @@ -93,7 +95,8 @@ def get_ip_info(ip): @analysis.route("/profiles_tws") def set_profile_tws(): ''' - Set profiles and their timewindows into the tree. Blocked are highligted in red. + Set profiles and their timewindows into the tree. + Blocked are highligted in red. :return: (profile, [tw, blocked], blocked) ''' @@ -104,9 +107,9 @@ def set_profile_tws(): profile_word, profile_ip = profileid.split("_") profiles_dict[profile_ip] = False - if blockedProfileTWs := __database__.db.hgetall('alerts'): - for blocked in blockedProfileTWs.keys(): - profile_word, blocked_ip = blocked.split("_") + if blocked_profiles := __database__.db.smembers('malicious_profiles'): + for profile in blocked_profiles: + blocked_ip = profile.split("_")[-1] profiles_dict[blocked_ip] = True data = [ @@ -138,17 +141,24 @@ def set_ip_info(ip): def set_tws(profileid): ''' Set timewindows for selected profile + :param profileid: ip of the profile :return: ''' # Fetch all profile TWs - tws = get_all_tw_with_ts(f"profile_{profileid}") + tws: Dict[str, dict] = get_all_tw_with_ts(f"profile_{profileid}") - if blockedTWs := __database__.db.hget('alerts', f"profile_{profileid}"): - blockedTWs = json.loads(blockedTWs) + blocked_tws: List[str] = [] + for tw_id, twid_details in tws.items(): + is_blocked: bool = __database__.db.hget( + f'profile_{profileid}_{tw_id}', + 'alerts' + ) + if is_blocked: + blocked_tws.append(tw_id) - for tw in blockedTWs.keys(): - tws[tw]['blocked'] = True + for tw in blocked_tws: + tws[tw]['blocked'] = True data = [ { @@ -292,22 +302,32 @@ def set_alerts(profile, timewindow): alerts = json.loads(alerts) alerts_tw = alerts.get(timewindow, {}) tws = get_all_tw_with_ts(profile) - evidences = __database__.db.hget(f"evidence{profile}", timewindow) - if evidences: - evidences = json.loads(evidences) - - for alert_ID, evidence_ID_list in alerts_tw.items(): - evidence_count = len(evidence_ID_list) - alert_description = json.loads(evidences[alert_ID]) - alert_timestamp = alert_description["stime"] - if not isinstance(alert_timestamp, str): # add check if the timestamp is a string - alert_timestamp = ts_to_date(alert_description["stime"], seconds=True) - profile_ip = profile.split("_")[1] - tw_name = tws[timewindow]["name"] + + evidence: Dict[str, str] = __database__.db.hgetall( + f'{profile}_{timewindow}_evidence' + ) + + for alert_id, evidence_id_list in alerts_tw.items(): + evidence_count = len(evidence_id_list) + evidence_details: dict = json.loads(evidence[alert_id]) + + timestamp: str = ts_to_date( + evidence_details["timestamp"], + seconds=True + ) + + profile_ip: str = profile.split("_")[1] + twid: str = tws[timewindow]["name"] data.append( - {"alert": alert_timestamp, "alert_id": alert_ID, "profileid": profile_ip, "timewindow": tw_name, - "evidence_count": evidence_count}) + { + "alert": timestamp, + "alert_id": alert_id, + "profileid": profile_ip, + "timewindow": twid, + "evidence_count": evidence_count + } + ) return {"data": data} @@ -321,12 +341,16 @@ def set_evidence(profile, timewindow, alert_id): if alerts := __database__.db.hget("alerts", f"profile_{profile}"): alerts = json.loads(alerts) alerts_tw = alerts[timewindow] - evidence_ID_list = alerts_tw[alert_id] - evidences = __database__.db.hget("evidence" + "profile_" + profile, timewindow) - evidences = json.loads(evidences) + # get the list of evidence that were part of this alert + evidence_ids: List[str] = alerts_tw[alert_id] - for evidence_ID in evidence_ID_list: - temp_evidence = json.loads(evidences[evidence_ID]) + profileid = f"profile_{profile}" + evidence: Dict[str, str] = __database__.db.hgetall( + f'{profileid}_{timewindow}_evidence' + ) + + for evidence_id in evidence_ids: + temp_evidence = json.loads(evidence[evidence_id]) if "source_target_tag" not in temp_evidence: temp_evidence["source_target_tag"] = "-" data.append(temp_evidence) @@ -334,23 +358,25 @@ def set_evidence(profile, timewindow, alert_id): @analysis.route("/evidence///") -def set_evidence_general(profile, timewindow): +def set_evidence_general(profile: str, timewindow: str): """ Set an analysis tag with general evidence - :param profile: - :param timewindow: + :param profile: the ip + :param timewindow: timewindowx :return: {"data": data} where data is a list of evidences """ data = [] - if evidence := __database__.db.hget( - "evidence" + "profile_" + profile, timewindow - ): - evidence = json.loads(evidence) - for id, content in evidence.items(): - content = json.loads(content) - if "source_target_tag" not in content: - content["source_target_tag"] = "-" - data.append(content) + profile = f"profile_{profile}" + + evidence: Dict[str, str] = __database__.db.hgetall( + f'{profile}_{timewindow}_evidence' + ) + if evidence : + for evidence_details in evidence.values(): + evidence_details: dict = json.loads(evidence_details) + if "source_target_tag" not in evidence_details: + evidence_details["source_target_tag"] = "-" + data.append(evidence_details) return {"data": data} diff --git a/webinterface/analysis/static/js/tableDefs.js b/webinterface/analysis/static/js/tableDefs.js index f3b653d65..e059df5ae 100644 --- a/webinterface/analysis/static/js/tableDefs.js +++ b/webinterface/analysis/static/js/tableDefs.js @@ -30,7 +30,7 @@ let analysisSubTableDefs = { searching: false, scrollY: "25vh", // hardcoded length of opened datatable columns: [ - { data: 'stime'}, + { data: 'timestamp'}, { data: 'confidence'}, { data: 'threat_level'}, { data: 'category'}, @@ -210,7 +210,7 @@ let analysisTableDefs = { scrollX: false, searching: true, columns: [ - { data: 'stime'}, + { data: 'timestamp'}, { data: 'confidence'}, { data: 'threat_level'}, { data: 'category'}, diff --git a/webinterface/app.py b/webinterface/app.py index 50a92b7e1..ade5fd994 100644 --- a/webinterface/app.py +++ b/webinterface/app.py @@ -46,10 +46,8 @@ def set_pcap_info(): profiles = __database__.db.smembers('profiles') info["num_profiles"] = len(profiles) if profiles else 0 - alerts = __database__.db.hgetall('alerts') - info["num_alerts"] = len(alerts) if alerts else 0 - - + alerts_number = __database__.db.get('number_of_alerts') + info["num_alerts"] = int(alerts_number) if alerts_number else 0 return info diff --git a/webinterface/documentation/documentation.py b/webinterface/documentation/documentation.py index b9faee9f1..cde233ff5 100644 --- a/webinterface/documentation/documentation.py +++ b/webinterface/documentation/documentation.py @@ -1,8 +1,11 @@ from flask import Blueprint from flask import render_template -documentation = Blueprint('documentation', __name__, static_folder='static', static_url_path='/documentation/static', - template_folder='templates') +documentation = Blueprint('documentation', + __name__, + static_folder='static', + static_url_path='/documentation/static', + template_folder='templates') @documentation.route("/") def index(): diff --git a/webinterface/documentation/templates/documentation.html b/webinterface/documentation/templates/documentation.html index 9262e4343..1139a631a 100644 --- a/webinterface/documentation/templates/documentation.html +++ b/webinterface/documentation/templates/documentation.html @@ -4,7 +4,7 @@ - Document + Slips diff --git a/webinterface/templates/app.html b/webinterface/templates/app.html index 71037e18a..01be82de9 100644 --- a/webinterface/templates/app.html +++ b/webinterface/templates/app.html @@ -10,6 +10,7 @@ + Slips