Skip to content

Commit

Permalink
Ianhelle/url-summary-fixes-2024-09-23 (#47)
Browse files Browse the repository at this point in the history
* Updates for msticpy compatibility

Fixing data_providers to check for required parameter before creating class
ip_summary - handle either dataclass or tuple representation of whois result
ti.py - TI call using ioc_col instead of deprecated obs_col
minor updates to unit tests.

* Updating version to 1.2.2

* Fixing call to data providers and pandas compat

* Various fixes for url summary and others.
Added unit tests

* Adding packages for tests

* Failing test fixes

* unit test fixes

* ip_summary test fix

* Linting test fixes

* More test case and warning fixes
  • Loading branch information
ianhelle authored Sep 24, 2024
1 parent e5a4f61 commit e3aa5be
Show file tree
Hide file tree
Showing 22 changed files with 412 additions and 94 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
python -m pip install pytest pytest-cov pytest-xdist pytest-check aiohttp nbconvert jupyter_contrib_nbextensions
python -m pip install Pygments respx pytest-xdist markdown beautifulsoup4 Pillow async-cache lxml
fi
python -m pip install "pandas>=1.3.0" "pygeohash>=1.2.0"
python -m pip install "pandas>=1.3.0" "pygeohash>=1.2.0" scikit-learn matplotlib
- name: Pytest
env:
MAXMIND_AUTH: DUMMY_KEY
Expand Down
2 changes: 1 addition & 1 deletion msticnb/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Version file."""

VERSION = "1.2.2"
VERSION = "1.2.3"
6 changes: 3 additions & 3 deletions msticnb/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from markdown import markdown
from msticpy import VERSION as MP_VERSION
from msticpy.common import utility as mp_utils
from pkg_resources import parse_version
from packaging.version import parse

from ._version import VERSION
from .options import get_opt
Expand Down Expand Up @@ -284,12 +284,12 @@ class MsticnbDataProviderError(MsticnbError):

def mp_version():
"""Return currently-loaded msticpy version."""
return parse_version(MP_VERSION)
return parse(MP_VERSION)


def check_mp_version(required_version: str) -> bool:
"""Return true if the installed version is >= `required_version`."""
return mp_version().major >= parse_version(required_version).major
return mp_version().major >= parse(required_version).major


def check_current_result(
Expand Down
10 changes: 10 additions & 0 deletions msticnb/nb/azsent/alert/ti_enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ def __init__(
self.enriched_results: Optional[pd.DataFrame] = None
self.picker: Optional[SelectAlert] = None

@property
def alert_selector(self):
"""Return the alert picker."""
return self.picker

@property
def alert_picker(self):
"""Return the alert picker."""
return self.picker


# pylint: enable=too-few-public-methods
# pylint: disable=too-many-branches
Expand Down
55 changes: 42 additions & 13 deletions msticnb/nb/azsent/host/host_network_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from functools import lru_cache
from typing import Any, Dict, Iterable, Optional

import msticpy
import pandas as pd
from bokeh.models import LayoutDOM
from IPython.display import display
Expand All @@ -21,6 +22,7 @@

from msticpy.common.timespan import TimeSpan
from msticpy.common.utility import md
from msticpy.datamodel.entities import Host

from ...._version import VERSION
from ....common import (
Expand All @@ -38,6 +40,7 @@
__author__ = "Pete Bryan"


_MSTICPY_VER = msticpy.__version__
_CLS_METADATA: NBMetadata
_CELL_DOCS: Dict[str, Any]
_CLS_METADATA, _CELL_DOCS = read_mod_metadata(__file__, __name__)
Expand Down Expand Up @@ -84,7 +87,7 @@ def __init__(self, *args, **kwargs):
"""Initialize the Host Network Summary notebooklet."""
super().__init__(*args, **kwargs)

# pylint: disable=too-many-branches
# pylint: disable=too-many-branches, too-many-locals
@set_text(docs=_CELL_DOCS, key="run") # noqa: MC0001
def run( # noqa:MC0001, C901
self,
Expand Down Expand Up @@ -149,8 +152,17 @@ def run( # noqa:MC0001, C901
notebooklet=self, description=self.metadata.description, timespan=timespan
)

host_name = value.HostName
ip_addr = value.IpAddress.Address if "IpAddress" in value else None
if isinstance(value, Host):
host_name = value.HostName
ip_addr = value.IpAddress.Address if "IpAddress" in value else None
elif isinstance(value, tuple):
host_name, ip_addr = value
else:
raise ValueError(
"Could not determine host name or IP address from value parameter."
"Please supply host and ip address in the form of a Host entity",
"or tuple of (host_name, ip_address).",
)

if not host_name and not ip_addr:
md(f"Could not obtain unique host name from {value}. Aborting.")
Expand All @@ -170,34 +182,41 @@ def run( # noqa:MC0001, C901

remote_ip_col = "RemoteIP"
local_ip_col = "LocalIP"
if "SrcIP" in result.flows.columns:
remote_ip_col = "DestIP"
local_ip_col = "SrcIP"
if "SourceIP" in result.flows.columns:
remote_ip_col = "DestinationIP"
local_ip_col = "SourceIP"
if not result.flows.empty:
result.flow_matrix = result.flows.mp_plot.matrix(
x=remote_ip_col, y=local_ip_col, title="IP Interaction", sort="asc"
)

flows_remote_ips = result.flows[[remote_ip_col]].drop_duplicates()
flows_source_ips = (
result.flows[[local_ip_col]]
.drop_duplicates()
.rename(columns={local_ip_col: remote_ip_col})
)
flows_all = pd.concat([flows_remote_ips, flows_source_ips]).drop_duplicates()
if "ti" in self.options:
if "tilookup" in self.data_providers.providers:
ti_prov = self.data_providers.providers["tilookup"]
else:
raise MsticnbDataProviderError("No TI providers avaliable")
raise MsticnbDataProviderError("No TI providers available")
ti_results, ti_results_merged = get_ti_results(
ti_prov, result.flows, remote_ip_col
ti_prov, flows_all, remote_ip_col
)
if isinstance(ti_results, pd.DataFrame) and not ti_results.empty:
result.flow_ti = ti_results_merged

if (
"map" in self.options
and isinstance(result.flows, pd.DataFrame)
and not result.flows.empty
and isinstance(flows_all, pd.DataFrame)
and not flows_all.empty
):
result.flow_map = result.flows.mp_plot.folium_map(ip_column=remote_ip_col)
result.flow_map = flows_all.mp_plot.folium_map(ip_column=remote_ip_col)

if "whois" in self.options:
result.flow_whois = _get_whois_data(result.flows, col=remote_ip_col)
result.flow_whois = _get_whois_data(flows_all, col=remote_ip_col)

self._last_result = result

Expand Down Expand Up @@ -247,6 +266,8 @@ def _get_host_flows(host_name, ip_addr, qry_prov, timespan) -> Optional[pd.DataF
if host_name:
nb_data_wait("Host flow events")
host_flows = qry_prov.MDE.host_connections(timespan, host_name=host_name)
host_flows["SourceIP"] = host_flows["LocalIP"]
host_flows["DestinationIP"] = host_flows["RemoteIP"]
host_flows_csl = qry_prov.Network.host_network_connections_csl(
timespan, host_name=host_name
)
Expand All @@ -265,5 +286,13 @@ def _get_host_flows(host_name, ip_addr, qry_prov, timespan) -> Optional[pd.DataF

def _get_whois_data(data, col) -> pd.DataFrame:
if not data.empty:
data["ASN"] = data.apply(lambda x: get_whois_info(x[col], True), axis=1)
if _MSTICPY_VER < "2.13.0":
data["ASN"] = data.apply(lambda x: get_whois_info(x[col]), axis=1)
else:
data["ASN"] = data.apply(
lambda x: get_whois_info(x[col], True).name, axis=1
)
data["ASNProperties"] = data.apply(
lambda x: get_whois_info(x[col], True).properties, axis=1
)
return data
27 changes: 15 additions & 12 deletions msticnb/nb/azsent/host/win_host_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,19 +346,22 @@ def _expand_event_properties(input_df):
# the whole data set but it will result
# in a lot of sparse columns in the output data frame.
exp_df = input_df.apply(lambda x: pd.Series(x.EventProperties), axis=1)
return (
exp_df.drop(set(input_df.columns).intersection(exp_df.columns), axis=1)
.merge(
input_df.drop("EventProperties", axis=1),
how="inner",
left_index=True,
right_index=True,
with pd.option_context("future.no_silent_downcasting", True):
return (
exp_df.drop(set(input_df.columns).intersection(exp_df.columns), axis=1)
.merge(
input_df.drop("EventProperties", axis=1),
how="inner",
left_index=True,
right_index=True,
)
.replace(
r"^\s*$", np.nan, regex=True
) # these 3 lines get rid of blank columns
.dropna(axis=1, how="all")
.infer_objects(copy=False)
.fillna("")
)
.replace("", np.nan) # these 3 lines get rid of blank columns
.infer_objects(copy=False)
.dropna(axis=1, how="all")
.fillna("")
)


@set_text(docs=_CELL_DOCS, key="parse_eventdata")
Expand Down
7 changes: 4 additions & 3 deletions msticnb/nb/azsent/network/ip_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,15 +421,16 @@ def _get_public_ip_data(self, src_ip, result):
if geo_lookup:
_get_geoip_data(geo_lookup, src_ip, result)

ti_provider = self.get_provider(provider_name="tilookup")
# TI Lookup
if result.ip_origin == "External" or "ti" in self.options:
_get_ti_data(self.get_provider("tilookup"), src_ip, result)
_get_ti_data(ti_provider, src_ip, result)

# Passive DNS
if (
result.ip_origin == "External" or "passive_dns" in self.options
) and isinstance(result.ip_address, IPv4Address):
_get_passv_dns(self.get_provider("tilookup"), src_ip, result)
_get_passv_dns(ti_provider, src_ip, result)

@set_text(docs=_CELL_DOCS, key="get_az_netflow")
def _get_azure_netflow(self, src_ip, result, timespan):
Expand Down Expand Up @@ -998,7 +999,7 @@ def _get_ti_data(ti_lookup, src_ip, result):
return
ti_results = ti_lookup.lookup_ioc(src_ip)
result.ti_results = ti_lookup.result_to_df(ti_results)
warn_ti_res = len(result.ti_results.query("Severity != 'information'"))
warn_ti_res = len(result.ti_results.query("Severity > 0"))
if warn_ti_res:
nb_markdown(f"{warn_ti_res} TI result(s) of severity 'warning' or above found.")
nb_display(result.ti_results)
Expand Down
12 changes: 7 additions & 5 deletions msticnb/nb/azsent/network/network_flow_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def lookup_ti_for_asn_ips(self):
self._last_result.ti_results = _lookup_ip_ti(
flows_df=self._last_result.flow_index_data,
selected_ips=selected_ips,
ti_lookup=self.data_providers["tilookup"],
ti_lookup=self.get_provider("tilookup"),
)

def show_selected_asn_map(self) -> foliummap.FoliumMap:
Expand Down Expand Up @@ -586,10 +586,12 @@ def _get_source_host_asns(host_entity):
host_asns = []
for ip_entity in host_ips:
if get_ip_type(ip_entity.Address) == "Public":
ip_entity.ASNDescription, ip_entity.ASNDetails = get_whois_info(
ip_entity.Address
)
host_asns.append(ip_entity.ASNDescription)
whois_result = get_whois_info(ip_entity)
if hasattr(whois_result, "properties"):
asn_name = whois_result.name
else:
asn_name = whois_result[0]
host_asns.append(asn_name)
return host_asns


Expand Down
Loading

0 comments on commit e3aa5be

Please sign in to comment.