Skip to content

Commit

Permalink
Merge branch 'master' into dynamic-feature-extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
williballenthin committed Aug 10, 2023
2 parents 3cf748a + e5efc15 commit c1fbb27
Show file tree
Hide file tree
Showing 21 changed files with 341 additions and 136 deletions.
19 changes: 9 additions & 10 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,21 @@

### New Features
- ELF: implement file import and export name extractor #1607 @Aayush-Goel-04
- Add a dynamic feature extractor for the CAPE sandbox @yelhamer [#1535](https://github.com/mandiant/capa/issues/1535)
- Add unit tests for the new CAPE extractor #1563 @yelhamer
- Add a CAPE file format and CAPE-based dynamic feature extraction to scripts/show-features.py #1566 @yelhamer
- Add a new process scope for the dynamic analysis flavor #1517 @yelhamer
- Add a new thread scope for the dynamic analysis flavor #1517 @yelhamer
- Add support for flavor-based rule scopes @yelhamer
- Add ProcessesAddress and ThreadAddress #1612 @yelhamer
- Add dynamic capability extraction @yelhamer
- Add support for mixed-scopes rules @yelhamer
- Add a call scope @yelhamer
- bump pydantic from 1.10.9 to 2.1.1 #1582 @Aayush-Goel-04
- develop script to highlight the features that are not used during matching #331 @Aayush-Goel-04
- implement dynamic analysis via CAPE sandbox #48 #1535 @yelhamer
- add call scope #771 @yelhamer
- add process scope for the dynamic analysis flavor #1517 @yelhamer
- Add thread scope for the dynamic analysis flavor #1517 @yelhamer

### Breaking Changes

### New Rules (4)

- executable/pe/export/forwarded-export [email protected]
- host-interaction/bootloader/get-uefi-variable [email protected]
- host-interaction/bootloader/set-uefi-variable [email protected]
- nursery/enumerate-device-drivers-on-linux @mr-tz
-

### Bug Fixes
Expand All @@ -29,6 +27,7 @@
- linter: skip native API check for NtProtectVirtualMemory #1675 @williballenthin

### capa explorer IDA Pro plugin
- fix unhandled exception when resolving rule path #1693 @mike-hunhoff

### Development

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-826-blue.svg)](https://github.com/mandiant/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-828-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)
Expand Down
4 changes: 2 additions & 2 deletions capa/features/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ def __lt__(self, other):
import capa.features.freeze.features

return (
capa.features.freeze.features.feature_from_capa(self).json()
< capa.features.freeze.features.feature_from_capa(other).json()
capa.features.freeze.features.feature_from_capa(self).model_dump_json()
< capa.features.freeze.features.feature_from_capa(other).model_dump_json()
)

def get_name_str(self) -> str:
Expand Down
1 change: 1 addition & 0 deletions capa/features/extractors/base_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from dataclasses import dataclass

# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated
# https://github.com/mandiant/capa/issues/1699
from typing_extensions import TypeAlias

import capa.features.address
Expand Down
29 changes: 9 additions & 20 deletions capa/features/freeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from enum import Enum
from typing import List, Tuple, Union

from pydantic import Field, BaseModel
from pydantic import Field, BaseModel, ConfigDict
from typing_extensions import TypeAlias

import capa.helpers
Expand All @@ -38,8 +38,7 @@


class HashableModel(BaseModel):
class Config:
frozen = True
model_config = ConfigDict(frozen=True)


class AddressType(str, Enum):
Expand All @@ -57,7 +56,7 @@ class AddressType(str, Enum):

class Address(HashableModel):
type: AddressType
value: Union[int, Tuple[int, ...], None]
value: Union[int, Tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS

@classmethod
def from_capa(cls, a: capa.features.address.Address) -> "Address":
Expand Down Expand Up @@ -271,9 +270,7 @@ class BasicBlockFeature(HashableModel):
basic_block: Address = Field(alias="basic block")
address: Address
feature: Feature

class Config:
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)


class InstructionFeature(HashableModel):
Expand Down Expand Up @@ -306,9 +303,7 @@ class FunctionFeatures(BaseModel):
address: Address
features: Tuple[FunctionFeature, ...]
basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks")

class Config:
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)


class CallFeatures(BaseModel):
Expand All @@ -332,9 +327,7 @@ class StaticFeatures(BaseModel):
global_: Tuple[GlobalFeature, ...] = Field(alias="global")
file: Tuple[FileFeature, ...]
functions: Tuple[FunctionFeatures, ...]

class Config:
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)


class DynamicFeatures(BaseModel):
Expand All @@ -352,9 +345,7 @@ class Config:
class Extractor(BaseModel):
name: str
version: str = capa.version.__version__

class Config:
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)


class Freeze(BaseModel):
Expand All @@ -363,9 +354,7 @@ class Freeze(BaseModel):
sample_hashes: SampleHashes
extractor: Extractor
features: Features

class Config:
allow_population_by_field_name = True
model_config = ConfigDict(populate_by_name=True)


def dumps_static(extractor: StaticFeatureExtractor) -> str:
Expand Down Expand Up @@ -467,7 +456,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
) # type: ignore
# Mypy is unable to recognise `base_address` as a argument due to alias

return freeze.json()
return freeze.model_dump_json()


def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
Expand Down
54 changes: 26 additions & 28 deletions capa/features/freeze/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import binascii
from typing import Union, Optional

from pydantic import Field, BaseModel
from pydantic import Field, BaseModel, ConfigDict

import capa.features.file
import capa.features.insn
Expand All @@ -17,9 +17,7 @@


class FeatureModel(BaseModel):
class Config:
frozen = True
allow_population_by_field_name = True
model_config = ConfigDict(frozen=True, populate_by_name=True)

def to_capa(self) -> capa.features.common.Feature:
if isinstance(self, OSFeature):
Expand Down Expand Up @@ -213,141 +211,141 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
class OSFeature(FeatureModel):
type: str = "os"
os: str
description: Optional[str]
description: Optional[str] = None


class ArchFeature(FeatureModel):
type: str = "arch"
arch: str
description: Optional[str]
description: Optional[str] = None


class FormatFeature(FeatureModel):
type: str = "format"
format: str
description: Optional[str]
description: Optional[str] = None


class MatchFeature(FeatureModel):
type: str = "match"
match: str
description: Optional[str]
description: Optional[str] = None


class CharacteristicFeature(FeatureModel):
type: str = "characteristic"
characteristic: str
description: Optional[str]
description: Optional[str] = None


class ExportFeature(FeatureModel):
type: str = "export"
export: str
description: Optional[str]
description: Optional[str] = None


class ImportFeature(FeatureModel):
type: str = "import"
import_: str = Field(alias="import")
description: Optional[str]
description: Optional[str] = None


class SectionFeature(FeatureModel):
type: str = "section"
section: str
description: Optional[str]
description: Optional[str] = None


class FunctionNameFeature(FeatureModel):
type: str = "function name"
function_name: str = Field(alias="function name")
description: Optional[str]
description: Optional[str] = None


class SubstringFeature(FeatureModel):
type: str = "substring"
substring: str
description: Optional[str]
description: Optional[str] = None


class RegexFeature(FeatureModel):
type: str = "regex"
regex: str
description: Optional[str]
description: Optional[str] = None


class StringFeature(FeatureModel):
type: str = "string"
string: str
description: Optional[str]
description: Optional[str] = None


class ClassFeature(FeatureModel):
type: str = "class"
class_: str = Field(alias="class")
description: Optional[str]
description: Optional[str] = None


class NamespaceFeature(FeatureModel):
type: str = "namespace"
namespace: str
description: Optional[str]
description: Optional[str] = None


class BasicBlockFeature(FeatureModel):
type: str = "basic block"
description: Optional[str]
description: Optional[str] = None


class APIFeature(FeatureModel):
type: str = "api"
api: str
description: Optional[str]
description: Optional[str] = None


class PropertyFeature(FeatureModel):
type: str = "property"
access: Optional[str]
access: Optional[str] = None
property: str
description: Optional[str]
description: Optional[str] = None


class NumberFeature(FeatureModel):
type: str = "number"
number: Union[int, float]
description: Optional[str]
description: Optional[str] = None


class BytesFeature(FeatureModel):
type: str = "bytes"
bytes: str
description: Optional[str]
description: Optional[str] = None


class OffsetFeature(FeatureModel):
type: str = "offset"
offset: int
description: Optional[str]
description: Optional[str] = None


class MnemonicFeature(FeatureModel):
type: str = "mnemonic"
mnemonic: str
description: Optional[str]
description: Optional[str] = None


class OperandNumberFeature(FeatureModel):
type: str = "operand number"
index: int
operand_number: int = Field(alias="operand number")
description: Optional[str]
description: Optional[str] = None


class OperandOffsetFeature(FeatureModel):
type: str = "operand offset"
index: int
operand_offset: int = Field(alias="operand offset")
description: Optional[str]
description: Optional[str] = None


Feature = Union[
Expand Down
13 changes: 7 additions & 6 deletions capa/ida/plugin/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,10 +573,11 @@ def ida_hook_rebase(self, meta, post=False):

def ensure_capa_settings_rule_path(self):
try:
path: Path = Path(settings.user.get(CAPA_SETTINGS_RULE_PATH, ""))
path: str = settings.user.get(CAPA_SETTINGS_RULE_PATH, "")

# resolve rules directory - check self and settings first, then ask user
if not path.exists():
# pathlib.Path considers "" equivalent to "." so we first check if rule path is an empty string
if not path or not Path(path).exists():
# configure rules selection messagebox
rules_message = QtWidgets.QMessageBox()
rules_message.setIcon(QtWidgets.QMessageBox.Information)
Expand All @@ -594,15 +595,15 @@ def ensure_capa_settings_rule_path(self):
if pressed == QtWidgets.QMessageBox.Cancel:
raise UserCancelledError()

path = Path(self.ask_user_directory())
path = self.ask_user_directory()
if not path:
raise UserCancelledError()

if not path.exists():
if not Path(path).exists():
logger.error("rule path %s does not exist or cannot be accessed", path)
return False

settings.user[CAPA_SETTINGS_RULE_PATH] = str(path)
settings.user[CAPA_SETTINGS_RULE_PATH] = path
except UserCancelledError:
capa.ida.helpers.inform_user_ida_ui("Analysis requires capa rules")
logger.warning(
Expand Down Expand Up @@ -1307,7 +1308,7 @@ def save_program_analysis(self):
idaapi.info("No program analysis to save.")
return

s = self.resdoc_cache.json().encode("utf-8")
s = self.resdoc_cache.model_dump_json().encode("utf-8")

path = Path(self.ask_user_capa_json_file())
if not path.exists():
Expand Down
2 changes: 1 addition & 1 deletion capa/render/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@


def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
return rd.ResultDocument.from_capa(meta, rules, capabilities).json(exclude_none=True)
return rd.ResultDocument.from_capa(meta, rules, capabilities).model_dump_json(exclude_none=True)
Loading

0 comments on commit c1fbb27

Please sign in to comment.