Skip to content

Commit

Permalink
Misc build and linter fixes (#44)
Browse files Browse the repository at this point in the history
* Replaced reference to pywhois with msticpy whois - i url_summary.py

Fixed creation of process tree - ensuring Rarity column remains numeric in logon_session_rarity.py
Fixes in ti.py for potentially unitialized variables.
Fixed missing respx mocked URLs in test_ip_summary.py

* Updating requirements.txt to align with msticpy

* Mypy and pylint fixes
  • Loading branch information
ianhelle authored May 29, 2024
1 parent a1bb25a commit c2f2903
Show file tree
Hide file tree
Showing 17 changed files with 99 additions and 67 deletions.
27 changes: 9 additions & 18 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,24 @@ repos:
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- repo: https://github.com/ambv/black
rev: 22.1.0
rev: 24.4.2
hooks:
- id: black
language: python
args:
- -t
- py36
- repo: https://github.com/PyCQA/pylint
rev: v2.12.2
hooks:
- id: pylint
args:
- --disable=E0401,W0511,duplicate-code
- --ignore-patterns=test_
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
args:
- --extend-ignore=E0401,E501,W503
- --max-line-length=90
- --exclude=tests,test*.py
- repo: https://github.com/pycqa/isort
rev: 5.10.1
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
args:
- --profile
- black
- black
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.4.5
hooks:
# Run the linter.
- id: ruff
10 changes: 5 additions & 5 deletions msticnb/nb/azsent/account/account_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,17 @@ def __init__(
self.description: str = "Account Activity Summary"
self.account_entity: entities.Account = None
self.account_activity: Optional[pd.DataFrame] = None
self.account_selector: nbwidgets.SelectItem = None
self.account_selector: Optional[nbwidgets.SelectItem] = None
self.related_alerts: Optional[pd.DataFrame] = None
self.alert_timeline: LayoutDOM = None
self.alert_timeline: Optional[LayoutDOM] = None
self.related_bookmarks: Optional[pd.DataFrame] = None
self.host_logons: Optional[pd.DataFrame] = None
self.host_logon_summary: Optional[pd.DataFrame] = None
self.azure_activity: Optional[pd.DataFrame] = None
self.azure_activity_summary: Optional[pd.DataFrame] = None
self.azure_timeline_by_provider: LayoutDOM = None
self.account_timeline_by_ip: LayoutDOM = None
self.azure_timeline_by_operation: LayoutDOM = None
self.azure_timeline_by_provider: Optional[LayoutDOM] = None
self.account_timeline_by_ip: Optional[LayoutDOM] = None
self.azure_timeline_by_operation: Optional[LayoutDOM] = None
self.ip_summary: Optional[pd.DataFrame] = None
self.ip_all_data: Optional[pd.DataFrame] = None

Expand Down
8 changes: 4 additions & 4 deletions msticnb/nb/azsent/host/host_logons_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ class HostLogonsSummary(Notebooklet): # pylint: disable=too-few-public-methods

metadata = _CLS_METADATA

@set_text(docs=_CELL_DOCS, key="run") # noqa: MC0001
@set_text(docs=_CELL_DOCS, key="run") # noqa: MC0001, C901
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
def run( # noqa:MC0001
def run( # noqa:MC0001, C901
self,
value: Any = None,
data: Optional[pd.DataFrame] = None,
Expand Down Expand Up @@ -380,13 +380,13 @@ def _process_stack_bar(data: pd.DataFrame, silent: bool) -> figure:
legend_label=results,
)

viz.y_range.start = 0
viz.y_range.start = 0 # type: ignore[attr-defined]
viz.x_range.range_padding = 0.1 # type: ignore[attr-defined]
viz.xgrid.grid_line_color = None # type: ignore[attr-defined]
viz.axis.minor_tick_line_color = None
viz.yaxis.axis_label = "% of logons"
viz.xaxis.axis_label = "Process name" # type: ignore[assignment]
viz.outline_line_color = None
viz.outline_line_color = None # type: ignore[assignment]
viz.legend.location = "top_left"
viz.legend.orientation = "horizontal"

Expand Down
10 changes: 8 additions & 2 deletions msticnb/nb/azsent/host/host_network_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(self, *args, **kwargs):

# pylint: disable=too-many-branches
@set_text(docs=_CELL_DOCS, key="run") # noqa: MC0001
def run( # noqa:MC0001
def run( # noqa:MC0001, C901
self,
value: Any = None,
data: Optional[pd.DataFrame] = None,
Expand Down Expand Up @@ -163,6 +163,10 @@ def run( # noqa:MC0001
qry_prov=self.query_provider,
timespan=self.timespan,
)
if result.flows is None:
nb_markdown("No network flow data found.")
self._last_result = result
return self._last_result

remote_ip_col = "RemoteIP"
local_ip_col = "LocalIP"
Expand Down Expand Up @@ -239,7 +243,7 @@ def _display_results(self):


@lru_cache()
def _get_host_flows(host_name, ip_addr, qry_prov, timespan) -> pd.DataFrame:
def _get_host_flows(host_name, ip_addr, qry_prov, timespan) -> Optional[pd.DataFrame]:
if host_name:
nb_data_wait("Host flow events")
host_flows = qry_prov.MDE.host_connections(timespan, host_name=host_name)
Expand All @@ -254,6 +258,8 @@ def _get_host_flows(host_name, ip_addr, qry_prov, timespan) -> pd.DataFrame:
host_flows_csl = qry_prov.Network.ip_network_connections_csl(
timespan, ip=ip_addr
)
else:
return None
return pd.concat([host_flows, host_flows_csl], sort=False)


Expand Down
2 changes: 1 addition & 1 deletion msticnb/nb/azsent/host/host_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

# pylint: disable=too-many-branches, too-many-statements
def run( # noqa:MC0001
def run( # noqa:MC0001, C901
self,
value: Any = None,
data: Optional[pd.DataFrame] = None,
Expand Down
15 changes: 13 additions & 2 deletions msticnb/nb/azsent/host/logon_session_rarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from msticpy.analysis.eventcluster import char_ord_score, dbcluster_events, delim_count
from msticpy.common.timespan import TimeSpan

# pylint: disable=unused-import
from msticpy.init import mp_pandas_accessors # noqa: F401

try:
from msticpy import nbwidgets

Expand Down Expand Up @@ -252,13 +255,21 @@ def process_tree(
acct_col = self.column_map.get(COL_ACCT)
data = self._last_result.processes_with_cluster
data = data[data[acct_col] == account]
data.mp_plot.process_tree(legend_col="Rarity")
proc_tree_data = data.mp.build_process_tree()
proc_tree_data["Rarity"] = pd.to_numeric(
proc_tree_data["Rarity"], errors="coerce"
).fillna(0)
proc_tree_data.mp_plot.process_tree(legend_col="Rarity")
return
session = session or self._event_browser.value
sess_col = self.column_map.get(COL_SESS)
data = self._last_result.processes_with_cluster
data = data[data[sess_col] == session]
data.mp_plot.process_tree(legend_col="Rarity")
proc_tree_data = data.mp.build_process_tree()
proc_tree_data["Rarity"] = pd.to_numeric(
proc_tree_data["Rarity"], errors="coerce"
).fillna(0)
proc_tree_data.mp_plot.process_tree(legend_col="Rarity")

def browse_events(self):
"""Browse the events by logon session."""
Expand Down
2 changes: 1 addition & 1 deletion msticnb/nb/azsent/network/ip_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ class IpAddressSummary(Notebooklet):

# pylint: disable=too-many-branches, too-many-statements
@set_text(docs=_CELL_DOCS, key="run") # noqa: MC0001
def run( # noqa: MC0001
def run( # noqa: MC0001,C901
self,
value: Any = None,
data: Optional[pd.DataFrame] = None,
Expand Down
2 changes: 1 addition & 1 deletion msticnb/nb/azsent/network/network_flow_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def __init__(self, data_providers: Optional[DataProviders] = None, **kwargs):

# pylint: disable=too-many-branches
@set_text(docs=_CELL_DOCS, key="run") # noqa: MC0001
def run( # noqa: MC0001
def run( # noqa: MC0001, C901
self,
value: Any = None,
data: Optional[pd.DataFrame] = None,
Expand Down
6 changes: 4 additions & 2 deletions msticnb/nb/azsent/url/url_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,16 @@
import pandas as pd
import tldextract
from IPython.display import Image, display
from whois import whois # type: ignore

# pylint: disable=ungrouped-imports
try:
from msticpy import nbwidgets
from msticpy.context.domain_utils import DomainValidator, screenshot
from msticpy.context.ip_utils import ip_whois as whois
from msticpy.vis.timeline import display_timeline, display_timeline_values
except ImportError:
# Fall back to msticpy locations prior to v2.0.0
from whois import whois # type: ignore
from msticpy.sectools.domain_utils import DomainValidator, screenshot
from msticpy.nbtools import nbwidgets
from msticpy.nbtools.nbdisplay import display_timeline, display_timeline_values
Expand Down Expand Up @@ -95,7 +96,7 @@ class URLSummary(Notebooklet):

# pylint: disable=too-many-branches, too-many-locals, too-many-statements
@set_text(docs=_CELL_DOCS, key="run") # noqa: MC0001
def run( # noqa:MC0001
def run( # noqa:MC0001, C901
self,
value: Any = None,
data: Optional[pd.DataFrame] = None,
Expand Down Expand Up @@ -161,6 +162,7 @@ def run( # noqa:MC0001
self._last_result = result

self.url = value.strip().lower()

_, domain, tld = cast(Tuple[Any, str, str], tldextract.extract(self.url)) # type: ignore
domain = f"{domain.lower()}.{tld.lower()}"
domain_validator = DomainValidator()
Expand Down
2 changes: 1 addition & 1 deletion msticnb/nblib/azsent/host.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def get_aznet_topology(


@lru_cache() # noqa:MC0001
def verify_host_name( # noqa: MC0001
def verify_host_name( # noqa: MC0001, C901
qry_prov: QueryProvider, host_name: str, timespan: TimeSpan = None, **kwargs
) -> HostNameVerif:
"""
Expand Down
42 changes: 21 additions & 21 deletions msticnb/nblib/ti.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# license information.
# --------------------------------------------------------------------------
"""Threat Intelligence notebooklet feature support."""
from typing import Any, Tuple, Optional
from typing import Any, Optional, Tuple

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -81,36 +81,36 @@ def extract_iocs(
)
b64_iocs = b64_extracted.mp_ioc.extract(columns=["decoded_string"])
b64_iocs["SourceIndex"] = pd.to_numeric(b64_iocs["SourceIndex"])
data_b64_iocs = pd.merge(
data = pd.merge(
left=data,
right=b64_iocs,
how="outer",
left_index=True,
right_on="SourceIndex",
)
else:
data_b64_iocs = data
other_iocs = data_b64_iocs.mp_ioc.extract(columns=[col])
all_data_w_iocs = pd.merge(
left=data_b64_iocs,
right=other_iocs,

iocs = data.mp_ioc.extract(columns=[col])
data = pd.merge(
left=data,
right=iocs,
how="outer",
left_index=True,
right_on="SourceIndex",
)
if "Observable_x" in all_data_w_iocs.columns:
all_data_w_iocs["IoC"] = np.where(
all_data_w_iocs["Observable_x"].isna(),
all_data_w_iocs["Observable_y"],
all_data_w_iocs["Observable_x"],

if "Observable_x" in data.columns:
data["IoC"] = np.where(
data["Observable_x"].isna(),
data["Observable_y"],
data["Observable_x"],
)
all_data_w_iocs["IoCType"] = np.where(
all_data_w_iocs["IoCType_x"].isna(),
all_data_w_iocs["IoCType_y"],
all_data_w_iocs["IoCType_x"],
data["IoCType"] = np.where(
data["IoCType_x"].isna(),
data["IoCType_y"],
data["IoCType_x"],
)
all_data_w_iocs["IoC"] = all_data_w_iocs["IoC"].astype("str")
data["IoC"] = data["IoC"].astype("str")
else:
all_data_w_iocs["IoC"] = all_data_w_iocs["Observable"].astype("str")
all_data_w_iocs["IoCType"] = all_data_w_iocs["IoCType"].astype("str")
return all_data_w_iocs
data["IoC"] = data["Observable"].astype("str")
data["IoCType"] = data["IoCType"].astype("str")
return data
5 changes: 3 additions & 2 deletions msticnb/notebooklet.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
import warnings
from abc import ABC, abstractmethod
from functools import wraps
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union

import pandas as pd
from IPython.core.getipython import get_ipython
Expand All @@ -35,7 +36,7 @@ class Notebooklet(ABC):
metadata: NBMetadata = NBMetadata(
name="Notebooklet", description="Base class", default_options=[]
)
module_path = ""
module_path: Union[str, Path] = ""

def __init__(self, data_providers: Optional[DataProviders] = None, **kwargs):
"""
Expand Down
7 changes: 3 additions & 4 deletions msticnb/read_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from functools import partial
from operator import itemgetter
from pathlib import Path
from typing import Dict, Iterable, List, Tuple, Union
from typing import Dict, Iterable, List, Tuple, Type, Union
from warnings import warn

from . import nb
Expand All @@ -24,8 +24,7 @@
__author__ = "Ian Hellen"

nblts: NBContainer = NBContainer()
# index of notebooklets classes by full path
nb_index: Dict[str, type] = {}
nb_index: Dict[str, Type[Notebooklet]] = {}


def discover_modules(nb_path: Union[str, Iterable[str], None] = None) -> NBContainer:
Expand Down Expand Up @@ -97,7 +96,7 @@ def _import_from_folder(nb_folder: Path, pkg_folder: Path):
nb_index[cls_index] = nb_class


def _find_cls_modules(folder: Path, pkg_folder: Path) -> Dict[str, type]:
def _find_cls_modules(folder: Path, pkg_folder: Path) -> Dict[str, Type[Notebooklet]]:
"""
Import .py files in `folder` and return any Notebooklet classes found.
Expand Down
20 changes: 20 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[build-system]
requires = [
"setuptools>=42",
"wheel"
]
build-backend = "setuptools.build_meta"

[tool.isort]
profile = "black"
src_paths = ["msticnb", "tests"]

[tool.pydocstyle]
convention = "numpy"

[tool.ruff.lint]
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
# McCabe complexity (`C901`) by default.
select = ["E4", "E7", "E9", "F", "W", "D", "C"]
ignore = ["D212", "D417", "D203"]
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
bokeh<3.0.0
bokeh>=1.4.0, <3.4.0
defusedxml>=0.6.0
ipython>=7.23.1
ipywidgets>=7.5.1
Expand All @@ -9,5 +9,4 @@ numpy>=1.17.3
pandas>=0.25.3
python-dateutil>=2.8.1
tqdm>=4.41.1
python-whois>=0.7.3
tldextract>=3.3.0
2 changes: 1 addition & 1 deletion tests/nb/azsent/host/test_hostlogonsummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from msticpy.vis.foliummap import FoliumMap
except ImportError:
# Fall back to msticpy locations prior to v2.0.0
from msticpy.nbtools.foliummap import FoliumMap
from msticpy.nbtools.foliummap import FoliumMap # noqa: F401

from msticnb import data_providers, discover_modules, nblts

Expand Down
Loading

0 comments on commit c2f2903

Please sign in to comment.