From 5eee4ecded36f1ae730a77999cd7d16f6d57239a Mon Sep 17 00:00:00 2001 From: doomedraven Date: Mon, 29 Dec 2025 07:36:01 +0100 Subject: [PATCH 1/2] Refactor BSON handling in compressor and netlog modules (#2810) * Refactor BSON handling in compressor and netlog modules Simplifies BSON import logic and encoding/decoding functions in compressor.py and netlog.py. Unifies BSON encode/decode usage and streamlines CRC32 calculation in CuckooBsonCompressor. * Update compressor.py * clean it --- lib/cuckoo/common/compressor.py | 29 ++++++----------------------- lib/cuckoo/common/netlog.py | 18 ++---------------- 2 files changed, 8 insertions(+), 39 deletions(-) diff --git a/lib/cuckoo/common/compressor.py b/lib/cuckoo/common/compressor.py index 1097cb4f04f..baa9a078998 100644 --- a/lib/cuckoo/common/compressor.py +++ b/lib/cuckoo/common/compressor.py @@ -6,28 +6,13 @@ log = logging.getLogger(__name__) +# bson from pymongo is C so is faster try: import bson HAVE_BSON = True except ImportError: HAVE_BSON = False -else: - # The BSON module provided by pymongo works through its "BSON" class. - if hasattr(bson, "BSON"): - - def bson_decode(d): - return bson.BSON(d).decode() - - # The BSON module provided by "pip3 install bson" works through the - # "loads" function (just like pickle etc.) - elif hasattr(bson, "loads"): - - def bson_decode(d): - return bson.loads(d) - - else: - HAVE_BSON = False class NGram: @@ -99,7 +84,7 @@ def __next_message(self): _size = struct.unpack("I", data)[0] data += self.fd_in.read(_size - 4) self.raw_data = data - return (data, bson_decode(data)) + return (data, bson.decode(data)) def run(self, file_path): if not os.path.isfile(file_path) and os.stat(file_path).st_size: @@ -167,7 +152,7 @@ def flush(self, file_path): if final and os.path.isfile(compressed_path): for d in final: d.pop("order") - edata = bson.BSON.encode(d) + edata = bson.encode(d) fd.write(edata) os.rename(file_path, f"{file_path}.raw") @@ -182,9 +167,7 @@ def checksum(self, msg): # this value is used for identifying a call setup. index = msg.get("I", -1) - args = "".join([str(c) for c in msg["args"]]) - content = [str(index), str(msg["T"]), str(msg["R"]), args, str(self.category), str(msg["P"])] - - content = "".join(content) + args = "".join(map(str, msg["args"])) + content = f"{index}{msg['T']}{msg['R']}{args}{self.category}{msg['P']}" - return binascii.crc32(bytes(content, "utf8")) + return binascii.crc32(content.encode("utf8")) diff --git a/lib/cuckoo/common/netlog.py b/lib/cuckoo/common/netlog.py index 215b624b9da..80750ce4012 100644 --- a/lib/cuckoo/common/netlog.py +++ b/lib/cuckoo/common/netlog.py @@ -6,27 +6,13 @@ import logging import struct +# bson from pymongo is C so is faster try: import bson HAVE_BSON = True except ImportError: HAVE_BSON = False -else: - # The BSON module provided by pymongo works through its "BSON" class. - if hasattr(bson, "BSON"): - - def bson_decode(d): - return bson.decode(d) - - # The BSON module provided by "pip3 install bson" works through the "loads" function (just like pickle etc.) - elif hasattr(bson, "loads"): - - def bson_decode(d): - return bson.loads(d) - - else: - HAVE_BSON = False from lib.cuckoo.common.logtbl import table as LOGTBL from lib.cuckoo.common.path_utils import path_get_filename @@ -208,7 +194,7 @@ def read_next_message(self): return try: - dec = bson_decode(data) + dec = bson.decode(data) except Exception as e: log.warning("BsonParser decoding problem %s on data[:50] %s", e, data[:50]) return False From 49f79a200a0e71659f4f81fed5ad570bde6ba382 Mon Sep 17 00:00:00 2001 From: doomedraven Date: Mon, 29 Dec 2025 08:42:13 +0100 Subject: [PATCH 2/2] Improve performance and error handling in bson part 2 (#2814) * Improve performance and error handling in core modules Refactored compressor and behavior processing to use mmap for faster file access, added standard file reading fallback, and improved error handling and logging in guest and sniffer modules. Refactored behavior summary logic to use dispatch tables for API handling, enhancing maintainability and extensibility. Enhanced remote sniffer operations with timeouts and error logging for robustness. * sync * Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update modules/auxiliary/sniffer.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update compressor.py * Update compressor.py * Update compressor.py * Update compressor.py * fix --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- conf/default/processing.conf.default | 3 + lib/cuckoo/common/compressor.py | 162 +++++++-- lib/cuckoo/core/guest.py | 21 +- modules/auxiliary/sniffer.py | 88 +++-- modules/processing/behavior.py | 488 ++++++++++++++++----------- 5 files changed, 491 insertions(+), 271 deletions(-) diff --git a/conf/default/processing.conf.default b/conf/default/processing.conf.default index c1c392ba732..332f8d51e80 100644 --- a/conf/default/processing.conf.default +++ b/conf/default/processing.conf.default @@ -54,6 +54,9 @@ ram_boost = no replace_patterns = no file_activities = no +# process behavior files in ram to speedup processing a little bit? +ram_mmap = no + [tracee] enabled = no diff --git a/lib/cuckoo/common/compressor.py b/lib/cuckoo/common/compressor.py index baa9a078998..6e7f8c02f81 100644 --- a/lib/cuckoo/common/compressor.py +++ b/lib/cuckoo/common/compressor.py @@ -1,5 +1,6 @@ import binascii import logging +import mmap import os import struct from pathlib import Path @@ -76,50 +77,115 @@ def __init__(self): self.callmap = {} self.head = [] self.ccounter = 0 + self.category = None - def __next_message(self): - data = self.fd_in.read(4) - if not data: - return (False, False) - _size = struct.unpack("I", data)[0] - data += self.fd_in.read(_size - 4) - self.raw_data = data - return (data, bson.decode(data)) - - def run(self, file_path): - if not os.path.isfile(file_path) and os.stat(file_path).st_size: + def _process_message(self, msg, data): + mtype = msg.get("type") # message type [debug, new_process, info] + if mtype in {"debug", "new_process", "info"}: + self.category = msg.get("category", "None") + self.head.append(data.tobytes() if isinstance(data, memoryview) else data) + + elif self.category and self.category.startswith("__"): + self.head.append(data.tobytes() if isinstance(data, memoryview) else data) + else: + tid = msg.get("T", -1) + time = msg.get("t", 0) + + if tid not in self.threads: + self.threads[tid] = Compressor(100) + + csum = self.checksum(msg) + self.ccounter += 1 + v = (csum, self.ccounter, time) + self.threads[tid].add(v) + + if csum not in self.callmap: + self.callmap[csum] = msg + + def _process_mmap_content(self, mm): + with memoryview(mm) as mv: + offset = 0 + size_mm = len(mm) + + while offset < size_mm: + # Read size (4 bytes) + if offset + 4 > size_mm: + break + + # Slicing memoryview returns memoryview + size_bytes = mv[offset : offset + 4] + _size = struct.unpack("I", size_bytes)[0] + + if offset + _size > size_mm: + break + + data = mv[offset : offset + _size] + offset += _size + + try: + msg = bson.decode(data) + except Exception: + break + + if msg: + self._process_message(msg, data) + + def run(self, file_path, use_mmap=False): + if use_mmap: + return self._run_mmap(file_path) + return self._run_standard(file_path) + + def _run_mmap(self, file_path): + if not os.path.isfile(file_path) or not os.stat(file_path).st_size: log.warning("File %s does not exists or it is invalid", file_path) return False - self.fd_in = open(file_path, "rb") + with open(file_path, "rb") as f: + try: + mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + except ValueError: + return False - msg = "---" - while msg: - data, msg = self.__next_message() + try: + self._process_mmap_content(mm) + finally: + mm.close() - if msg: - mtype = msg.get("type") # message type [debug, new_process, info] - if mtype in {"debug", "new_process", "info"}: - self.category = msg.get("category", "None") - self.head.append(data) + return self.flush(file_path) - elif self.category.startswith("__"): - self.head.append(data) - else: - tid = msg.get("T", -1) - time = msg.get("t", 0) + def _run_standard(self, file_path): + if not os.path.isfile(file_path) or not os.stat(file_path).st_size: + log.warning("File %s does not exists or it is invalid", file_path) + return False + + with open(file_path, "rb") as f: + while True: + size_bytes = f.read(4) + if len(size_bytes) < 4: + break + + try: + _size = struct.unpack("I", size_bytes)[0] + except struct.error: + break - if tid not in self.threads: - self.threads[tid] = Compressor(100) + remaining = _size - 4 + if remaining < 0: + break - csum = self.checksum(msg) - self.ccounter += 1 - v = (csum, self.ccounter, time) - self.threads[tid].add(v) + data_body = f.read(remaining) + if len(data_body) < remaining: + break - if csum not in self.callmap: - self.callmap[csum] = msg - self.fd_in.close() + data = size_bytes + data_body + + try: + msg = bson.decode(data) + except Exception: + break + + if msg: + self._process_message(msg, data) return self.flush(file_path) @@ -171,3 +237,31 @@ def checksum(self, msg): content = f"{index}{msg['T']}{msg['R']}{args}{self.category}{msg['P']}" return binascii.crc32(content.encode("utf8")) + + +if __name__ == "__main__": + import argparse + import time + import sys + + parser = argparse.ArgumentParser() + parser.add_argument("file", help="Path to BSON file to compress") + parser.add_argument("--mmap", action="store_true", help="Use mmap for compression") + args = parser.parse_args() + + if not os.path.exists(args.file): + print(f"File {args.file} not found.") + sys.exit(1) + + print(f"Compressing {args.file}...") + start = time.time() + + compressor = CuckooBsonCompressor() + result = compressor.run(args.file, use_mmap=args.mmap) + + end = time.time() + + if result: + print(f"Compression successful. Took {end - start:.4f} seconds.") + else: + print("Compression failed.") diff --git a/lib/cuckoo/core/guest.py b/lib/cuckoo/core/guest.py index 4614d658681..effb77a1710 100644 --- a/lib/cuckoo/core/guest.py +++ b/lib/cuckoo/core/guest.py @@ -107,11 +107,11 @@ def get(self, method, *args, **kwargs): try: r = session.get(url, *args, **kwargs) - except requests.ConnectionError: + except requests.ConnectionError as e: raise CuckooGuestError( - "CAPE Agent failed without error status, please try " - "upgrading to the latest version of agent.py (>= 0.10) and " - "notify us if the issue persists" + f"CAPE Agent failed without error status, please try " + f"upgrading to the latest version of agent.py (>= 0.10) and " + f"notify us if the issue persists. Error: {e}" ) do_raise and r.raise_for_status() @@ -134,11 +134,11 @@ def post(self, method, *args, **kwargs): try: r = session.post(url, *args, **kwargs) - except requests.ConnectionError: + except requests.ConnectionError as e: raise CuckooGuestError( - "CAPE Agent failed without error status, please try " - "upgrading to the latest version of agent.py (>= 0.10) and " - "notify us if the issue persists" + f"CAPE Agent failed without error status, please try " + f"upgrading to the latest version of agent.py (>= 0.10) and " + f"notify us if the issue persists. Error: {e}" ) r.raise_for_status() @@ -377,14 +377,15 @@ def wait_for_completion(self): try: status = self.get("/status", timeout=5).json() - except (CuckooGuestError, requests.exceptions.ReadTimeout): + except (CuckooGuestError, requests.exceptions.ReadTimeout) as e: # this might fail due to timeouts or just temporary network # issues thus we don't want to abort the analysis just yet and # wait for things to recover log.warning( - "Task #%s: Virtual Machine %s /status failed. This can indicate the guest losing network connectivity", + "Task #%s: Virtual Machine %s /status failed. This can indicate the guest losing network connectivity. Error: %s", self.task_id, self.vmid, + e, ) continue except Exception as e: diff --git a/modules/auxiliary/sniffer.py b/modules/auxiliary/sniffer.py index c1ce22051bd..41ca52eb8d7 100644 --- a/modules/auxiliary/sniffer.py +++ b/modules/auxiliary/sniffer.py @@ -191,27 +191,43 @@ def start(self): f.write(f"echo $PID > /tmp/{self.task.id}.pid") f.write("\n") - subprocess.check_output( - ["scp", "-q", f"/tmp/{self.task.id}.sh", remote_host + f":/tmp/{self.task.id}.sh"], - ) - subprocess.check_output( - ["ssh", remote_host, "nohup", "/bin/bash", f"/tmp/{self.task.id}.sh", ">", "/tmp/log", "2>", "/tmp/err"], - ) - - self.pid = subprocess.check_output( - ["ssh", remote_host, "cat", f"/tmp/{self.task.id}.pid"], stderr=subprocess.DEVNULL - ).strip() - log.info( - "Started remote sniffer @ %s with (interface=%s, host=%s, dump path=%s, pid=%s)", - remote_host, - interface, - host, - file_path, - self.pid, - ) - subprocess.check_output( - ["ssh", remote_host, "rm", "-f", f"/tmp/{self.task.id}.pid", f"/tmp/{self.task.id}.sh"], - ) + try: + subprocess.check_output( + ["scp", "-q", f"/tmp/{self.task.id}.sh", remote_host + f":/tmp/{self.task.id}.sh"], timeout=30 + ) + subprocess.check_output( + [ + "ssh", + remote_host, + "nohup", + "/bin/bash", + f"/tmp/{self.task.id}.sh", + ">", + "/tmp/log", + "2>", + "/tmp/err", + ], + timeout=30, + ) + + self.pid = subprocess.check_output( + ["ssh", remote_host, "cat", f"/tmp/{self.task.id}.pid"], stderr=subprocess.DEVNULL, timeout=30 + ).strip() + log.info( + "Started remote sniffer @ %s with (interface=%s, host=%s, dump path=%s, pid=%s)", + remote_host, + interface, + host, + file_path, + self.pid, + ) + subprocess.check_output( + ["ssh", remote_host, "rm", "-f", f"/tmp/{self.task.id}.pid", f"/tmp/{self.task.id}.sh"], timeout=30 + ) + except subprocess.TimeoutExpired: + log.error("Timeout connecting to remote host %s", remote_host) + except subprocess.CalledProcessError as e: + log.error("Error connecting to remote host %s: %s", remote_host, e) else: try: @@ -220,7 +236,13 @@ def start(self): log.exception("Failed to start sniffer (interface=%s, host=%s, dump path=%s)", interface, host, file_path) return - log.info("Started sniffer with PID %d (interface=%s, host=%s, dump path=%s)", self.proc.pid, interface, host, file_path) + log.info( + "Started sniffer with PID %d (interface=%s, host=%s, dump path=%s)", + self.proc.pid, + interface, + host, + file_path, + ) def stop(self): """Stop sniffing. @@ -235,13 +257,16 @@ def stop(self): remote_host = self.options.get("host", "") remote_args = ["ssh", remote_host, "kill", "-2", self.pid] - subprocess.check_output(remote_args) + try: + subprocess.check_output(remote_args, timeout=30) - file_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(self.task.id), "dump.pcap") - file_path2 = f"/tmp/tcp.dump.{self.task.id}" + file_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(self.task.id), "dump.pcap") + file_path2 = f"/tmp/tcp.dump.{self.task.id}" - subprocess.check_output(["scp", "-q", f"{remote_host}:{file_path2}", file_path]) - subprocess.check_output(["ssh", remote_host, "rm", "-f", file_path2]) + subprocess.check_output(["scp", "-q", f"{remote_host}:{file_path2}", file_path], timeout=300) + subprocess.check_output(["ssh", remote_host, "rm", "-f", file_path2], timeout=30) + except (subprocess.TimeoutExpired, subprocess.CalledProcessError) as e: + log.error("Error stopping remote sniffer: %s", e) return if self.proc and not self.proc.poll(): @@ -249,8 +274,9 @@ def stop(self): # We must kill the child process that sudo spawned. We won't # have permission to kill the parent process because it's owned by root. try: - pid = int(subprocess.check_output(["ps", "--ppid", str(self.proc.pid), "-o", "pid="]).decode()) - except (subprocess.CalledProcessError, TypeError, ValueError): + output = subprocess.check_output(["ps", "--ppid", str(self.proc.pid), "-o", "pid="]).decode().strip() + pid = int(output.split()[0]) + except (subprocess.CalledProcessError, TypeError, ValueError, IndexError): log.exception("Failed to get child pid of sudo process to stop the sniffer.") return term_func = functools.partial(os.kill, pid, signal.SIGTERM) @@ -261,14 +287,14 @@ def stop(self): pid = self.proc.pid try: term_func() - _, _ = self.proc.communicate() + _, _ = self.proc.communicate(timeout=5) except Exception as e: log.error("Unable to stop the sniffer (first try) with pid %d: %s", pid, e) try: if not self.proc.poll(): log.debug("Killing sniffer") kill_func() - _, _ = self.proc.communicate() + _, _ = self.proc.communicate(timeout=5) except OSError as e: log.debug("Error killing sniffer: %s, continuing", e) except Exception as e: diff --git a/modules/processing/behavior.py b/modules/processing/behavior.py index 3594eeb3fcc..b94cc480ff6 100644 --- a/modules/processing/behavior.py +++ b/modules/processing/behavior.py @@ -5,6 +5,7 @@ import datetime import json import logging +import mmap import os import struct from contextlib import suppress @@ -61,6 +62,7 @@ def __init__(self, log_path, options): self.call_id = 0 self.conversion_cache = {} self.options = options + self.options.ram_mmap = self.options.ram_mmap if self.options.ram_mmap else False # Limit of API calls per process self.api_limit = self.options.analysis_call_limit @@ -77,12 +79,40 @@ def __init__(self, log_path, options): self.api_call_cache.append(i) self.api_call_cache.append(None) + # Close mmap and file descriptor after reading all data into cache + self.close() + + def close(self): + if hasattr(self, "mv") and self.mv: + self.mv.release() + self.mv = None + if hasattr(self, "mm") and self.mm: + self.mm.close() + self.mm = None + if self.fd: + self.fd.close() + self.fd = None + def parse_first_and_reset(self): """Open file and init Bson Parser. Read till first process""" if not self._log_path.endswith(".bson"): return self.fd = open(self._log_path, "rb") + + self.use_mmap = False + self.mm = None + self.mv = None + self.mm_pos = 0 + + if self.options.ram_mmap: + try: + self.mm = mmap.mmap(self.fd.fileno(), 0, access=mmap.ACCESS_READ) + self.mv = memoryview(self.mm) + self.use_mmap = True + except (ValueError, OSError) as e: + log.debug("mmap failed, falling back to standard file reading: %s", e) + self.parser = BsonParser(self) # Get the process information from file @@ -95,7 +125,10 @@ def parse_first_and_reset(self): while not self.process_id: self.parser.read_next_message() - self.fd.seek(0) + if self.use_mmap: + self.mm_pos = 0 + else: + self.fd.seek(0) def read(self, length): """Read data from log file @@ -104,6 +137,14 @@ def read(self, length): """ if not length or length < 0: return b"" + + if self.use_mmap: + if self.mm_pos + length > len(self.mm): + raise EOFError() + buf = self.mv[self.mm_pos : self.mm_pos + length] + self.mm_pos += length + return buf + buf = self.fd.read(length) if not buf or len(buf) != length: raise EOFError() @@ -123,7 +164,10 @@ def __nonzero__(self): def reset(self): """Reset fd""" - self.fd.seek(0) + if self.use_mmap: + self.mm_pos = 0 + else: + self.fd.seek(0) self.api_count = 0 self.lastcall = None self.call_id = 0 @@ -440,7 +484,7 @@ def run(self): def compress_log_file(self, file_path): if file_path.endswith(".bson") and os.stat(file_path).st_size: try: - if not CuckooBsonCompressor().run(file_path): + if not CuckooBsonCompressor().run(file_path, use_mmap=self.options.ram_mmap): log.debug("Could not execute loop detection analysis") else: log.debug("BSON was compressed successfully") @@ -474,6 +518,26 @@ def __init__(self, options): self.resolved_apis = [] self.options = options + self.dispatch = { + "NtCreateKey": self._handle_NtCreateKey, + "NtDeleteValueKey": self._handle_NtDeleteKey, + "NtDeleteKey": self._handle_NtDeleteKey, + "NtQueryValueKey": self._handle_NtQueryValueKey, + "NtQueryMultipleValueKey": self._handle_NtQueryValueKey, + "SHGetFileInfoW": self._handle_SHGetFileInfoW, + "ShellExecuteExW": self._handle_ShellExecuteExW, + "NtSetInformationFile": self._handle_NtSetInformationFile, + "NtDeleteFile": self._handle_DeleteFile, + "LdrGetProcedureAddress": self._handle_LdrGetProcedureAddress, + "MoveFileWithProgressW": self._handle_MoveFile, + "MoveFileWithProgressTransactedW": self._handle_MoveFile, + "CreateProcessInternalW": self._handle_CreateProcess, + "NtCreateUserProcess": self._handle_CreateProcess, + "CreateProcessWithTokenW": self._handle_CreateProcess, + "CreateProcessWithLogonW": self._handle_CreateProcess, + "NtSetValueKey": self._handle_RegSetValue, + } + def get_argument(self, call, argname, strip=False): return next( (arg["value"].strip() if strip else arg["value"] for arg in call["arguments"] if arg["name"] == argname), @@ -500,107 +564,124 @@ def _add_file_activity(self, process, key, filename): if self.options.file_activities: process["file_activities"][key].append(filename) - def event_apicall(self, call, process): - """Generate processes list from streamed calls/processes. - @return: None. - """ - if call["api"].startswith("RegOpenKeyEx"): - name = self.get_argument(call, "FullName") - if name and name not in self.keys: - self._filtering_helper(self.keys, name) - elif call["api"].startswith("RegSetValue") or call["api"] == "NtSetValueKey": - name = self.get_argument(call, "FullName") - if name and name not in self.keys: - self._filtering_helper(self.keys, name) - if name and name not in self.write_keys: - self._filtering_helper(self.write_keys, name) - elif call["api"] == "NtCreateKey" or call["api"].startswith("RegCreateKeyEx"): - name = self.get_argument(call, "ObjectAttributes" if call["api"] == "NtCreateKey" else "FullName") - disposition = int(self.get_argument(call, "Disposition")) - if name and name not in self.keys: - self._filtering_helper(self.keys, name) - # if disposition == 1 then we created a new key - if name and disposition == 1 and name not in self.write_keys: - self.write_keys.append(name) - - elif call["api"] in ("NtDeleteValueKey", "NtDeleteKey") or call["api"].startswith("RegDeleteValue"): - name = self.get_argument(call, "FullName") - if name and name not in self.keys: - self._filtering_helper(self.keys, name) - if name and name not in self.delete_keys: - self.delete_keys.append(name) - elif call["api"].startswith("NtOpenKey"): - name = self.get_argument(call, "ObjectAttributes") - if name and name not in self.keys: - self._filtering_helper(self.keys, name) - elif call["api"] in ("NtQueryValueKey", "NtQueryMultipleValueKey") or call["api"].startswith("RegQueryValue"): - name = self.get_argument(call, "FullName") - if name and name not in self.keys: - self._filtering_helper(self.keys, name) - if name and name not in self.read_keys: - self._filtering_helper(self.read_keys, name) - elif call["api"] == "SHGetFileInfoW": - filename = self.get_argument(call, "Path") - if filename and (len(filename) < 2 or filename[1] != ":"): - filename = None - if filename and filename not in self.files: - self._filtering_helper(self.files, filename) - elif call["api"] == "ShellExecuteExW": - filename = self.get_argument(call, "FilePath") - if len(filename) < 2 or filename[1] != ":": - filename = None - if filename and filename not in self.files: + def _handle_RegOpenKey(self, call, process): + name = self.get_argument(call, "FullName") + if name and name not in self.keys: + self._filtering_helper(self.keys, name) + + def _handle_RegSetValue(self, call, process): + name = self.get_argument(call, "FullName") + if name and name not in self.keys: + self._filtering_helper(self.keys, name) + if name and name not in self.write_keys: + self._filtering_helper(self.write_keys, name) + + def _handle_NtCreateKey(self, call, process): + name = self.get_argument(call, "ObjectAttributes") + disposition = int(self.get_argument(call, "Disposition")) + if name and name not in self.keys: + self._filtering_helper(self.keys, name) + # if disposition == 1 then we created a new key + if name and disposition == 1 and name not in self.write_keys: + self.write_keys.append(name) + + def _handle_RegCreateKey(self, call, process): + name = self.get_argument(call, "FullName") + disposition = int(self.get_argument(call, "Disposition")) + if name and name not in self.keys: + self._filtering_helper(self.keys, name) + # if disposition == 1 then we created a new key + if name and disposition == 1 and name not in self.write_keys: + self.write_keys.append(name) + + def _handle_NtDeleteKey(self, call, process): + name = self.get_argument(call, "FullName") + if name and name not in self.keys: + self._filtering_helper(self.keys, name) + if name and name not in self.delete_keys: + self.delete_keys.append(name) + + def _handle_NtOpenKey(self, call, process): + name = self.get_argument(call, "ObjectAttributes") + if name and name not in self.keys: + self._filtering_helper(self.keys, name) + + def _handle_NtQueryValueKey(self, call, process): + name = self.get_argument(call, "FullName") + if name and name not in self.keys: + self._filtering_helper(self.keys, name) + if name and name not in self.read_keys: + self._filtering_helper(self.read_keys, name) + + def _handle_SHGetFileInfoW(self, call, process): + filename = self.get_argument(call, "Path") + if filename and (len(filename) < 2 or filename[1] != ":"): + filename = None + if filename and filename not in self.files: + self._filtering_helper(self.files, filename) + + def _handle_ShellExecuteExW(self, call, process): + filename = self.get_argument(call, "FilePath") + if len(filename) < 2 or filename[1] != ":": + filename = None + if filename and filename not in self.files: + self._filtering_helper(self.files, filename) + path = self.get_argument(call, "FilePath", strip=True) + params = self.get_argument(call, "Parameters", strip=True) + cmdline = f"{path} {params}" if path else None + if cmdline and cmdline not in self.executed_commands: + self._filtering_helper(self.executed_commands, cmdline) + + def _handle_NtSetInformationFile(self, call, process): + filename = self.get_argument(call, "HandleName") + infoclass = int(self.get_argument(call, "FileInformationClass")) + fileinfo = self.get_raw_argument(call, "FileInformation") + if filename and infoclass and infoclass == 13 and fileinfo and len(fileinfo) > 0: + if not isinstance(fileinfo, bytes): + fileinfo = fileinfo.encode() + disp = struct.unpack_from("B", fileinfo)[0] + if disp and filename not in self.delete_files: + self._filtering_helper(self.delete_files, filename) + self._add_file_activity(process, "delete_files", filename) + + def _handle_DeleteFile(self, call, process): + filename = self.get_argument(call, "FileName") + if not filename: + filename = self.get_argument(call, "DirectoryName") + if filename: + if filename not in self.files: self._filtering_helper(self.files, filename) - path = self.get_argument(call, "FilePath", strip=True) - params = self.get_argument(call, "Parameters", strip=True) - cmdline = f"{path} {params}" if path else None - if cmdline and cmdline not in self.executed_commands: - self._filtering_helper(self.executed_commands, cmdline) - elif call["api"] == "NtSetInformationFile": - filename = self.get_argument(call, "HandleName") - infoclass = int(self.get_argument(call, "FileInformationClass")) - fileinfo = self.get_raw_argument(call, "FileInformation") - if filename and infoclass and infoclass == 13 and fileinfo and len(fileinfo) > 0: - if not isinstance(fileinfo, bytes): - fileinfo = fileinfo.encode() - disp = struct.unpack_from("B", fileinfo)[0] - if disp and filename not in self.delete_files: - self._filtering_helper(self.delete_files, filename) - self._add_file_activity(process, "delete_files", filename) - elif call["api"].startswith("DeleteFile") or call["api"] == "NtDeleteFile" or call["api"].startswith("RemoveDirectory"): - filename = self.get_argument(call, "FileName") - if not filename: - filename = self.get_argument(call, "DirectoryName") - if filename: - if filename not in self.files: - self._filtering_helper(self.files, filename) - if filename not in self.delete_files: - self._filtering_helper(self.delete_files, filename) - self._add_file_activity(process, "delete_files", filename) - elif call["api"].startswith("StartService"): - servicename = self.get_argument(call, "ServiceName", strip=True) - if servicename and servicename not in self.started_services: - self._filtering_helper(self.started_services, servicename) - elif call["api"].startswith("CreateService"): - servicename = self.get_argument(call, "ServiceName", strip=True) - if servicename and servicename not in self.created_services: - self._filtering_helper(self.created_services, servicename) - elif call["api"] in ("CreateProcessInternalW", "NtCreateUserProcess", "CreateProcessWithTokenW", "CreateProcessWithLogonW"): - cmdline = self.get_argument(call, "CommandLine", strip=True) - appname = self.get_argument(call, "ApplicationName", strip=True) - if appname and cmdline: - base = appname.rsplit("\\", 1)[-1].rsplit(".", 1)[0] - firstarg = "" - if cmdline[0] == '"': - firstarg = cmdline[1:].split('"', 1)[0] - else: - firstarg = cmdline.split(" ", 1)[0] - if base not in firstarg: - cmdline = f"{appname} {cmdline}" - if cmdline and cmdline not in self.executed_commands: - self._filtering_helper(self.executed_commands, cmdline) - - elif call["api"] == "LdrGetProcedureAddress" and call["status"]: + if filename not in self.delete_files: + self._filtering_helper(self.delete_files, filename) + self._add_file_activity(process, "delete_files", filename) + + def _handle_StartService(self, call, process): + servicename = self.get_argument(call, "ServiceName", strip=True) + if servicename and servicename not in self.started_services: + self._filtering_helper(self.started_services, servicename) + + def _handle_CreateService(self, call, process): + servicename = self.get_argument(call, "ServiceName", strip=True) + if servicename and servicename not in self.created_services: + self._filtering_helper(self.created_services, servicename) + + def _handle_CreateProcess(self, call, process): + cmdline = self.get_argument(call, "CommandLine", strip=True) + appname = self.get_argument(call, "ApplicationName", strip=True) + if appname and cmdline: + base = appname.rsplit("\\", 1)[-1].rsplit(".", 1)[0] + firstarg = "" + if cmdline[0] == '"': + firstarg = cmdline[1:].split('"', 1)[0] + else: + firstarg = cmdline.split(" ", 1)[0] + if base not in firstarg: + cmdline = f"{appname} {cmdline}" + if cmdline and cmdline not in self.executed_commands: + self._filtering_helper(self.executed_commands, cmdline) + + def _handle_LdrGetProcedureAddress(self, call, process): + if call["status"]: dllname = self.get_argument(call, "ModuleName").lower() funcname = self.get_argument(call, "FunctionName") if not funcname: @@ -609,27 +690,57 @@ def event_apicall(self, call, process): if combined not in self.resolved_apis: self.resolved_apis.append(combined) - elif call["api"].startswith("NtCreateProcess"): - cmdline = self.get_argument(call, "FileName") - if cmdline and cmdline not in self.executed_commands: - self._filtering_helper(self.executed_commands, cmdline) - - elif call["api"] in ("MoveFileWithProgressW", "MoveFileWithProgressTransactedW"): - origname = self.get_argument(call, "ExistingFileName") - newname = self.get_argument(call, "NewFileName") - if origname: - if origname not in self.files: - self._filtering_helper(self.files, origname) - if origname not in self.delete_files: - self._filtering_helper(self.delete_files, origname) - self._add_file_activity(process, "delete_files", origname) - if newname: - if newname not in self.files: - self._filtering_helper(self.files, newname) - if newname not in self.write_files: - self._filtering_helper(self.write_files, newname) - self._add_file_activity(process, "write_files", newname) + def _handle_NtCreateProcess(self, call, process): + cmdline = self.get_argument(call, "FileName") + if cmdline and cmdline not in self.executed_commands: + self._filtering_helper(self.executed_commands, cmdline) + + def _handle_MoveFile(self, call, process): + origname = self.get_argument(call, "ExistingFileName") + newname = self.get_argument(call, "NewFileName") + if origname: + if origname not in self.files: + self._filtering_helper(self.files, origname) + if origname not in self.delete_files: + self._filtering_helper(self.delete_files, origname) + self._add_file_activity(process, "delete_files", origname) + if newname: + if newname not in self.files: + self._filtering_helper(self.files, newname) + if newname not in self.write_files: + self._filtering_helper(self.write_files, newname) + self._add_file_activity(process, "write_files", newname) + + def event_apicall(self, call, process): + """Generate processes list from streamed calls/processes. + @return: None. + """ + api = call["api"] + handler = self.dispatch.get(api) + if handler: + handler(call, process) + return + if api.startswith("RegOpenKeyEx"): + self._handle_RegOpenKey(call, process) + elif api.startswith("RegSetValue"): + self._handle_RegSetValue(call, process) + elif api.startswith("RegCreateKeyEx"): + self._handle_RegCreateKey(call, process) + elif api.startswith("RegDeleteValue"): + self._handle_NtDeleteKey(call, process) + elif api.startswith("NtOpenKey"): + self._handle_NtOpenKey(call, process) + elif api.startswith("RegQueryValue"): + self._handle_NtQueryValueKey(call, process) + elif api.startswith("DeleteFile") or api.startswith("RemoveDirectory"): + self._handle_DeleteFile(call, process) + elif api.startswith("StartService"): + self._handle_StartService(call, process) + elif api.startswith("CreateService"): + self._handle_CreateService(call, process) + elif api.startswith("NtCreateProcess"): + self._handle_NtCreateProcess(call, process) elif call["category"] == "filesystem": filename = self.get_argument(call, "FileName") if not filename: @@ -709,78 +820,7 @@ def __init__(self, details=False): self.modules = {} self.procedures = {} self.events = [] - - def _add_procedure(self, mbase, name, base): - """ - Add a procedure address - """ - self.procedures[base] = f"{self._get_loaded_module(mbase)}:{name}" - - def _add_loaded_module(self, name, base): - """ - Add a loaded module to the internal database - """ - self.modules[base] = name - - def _get_loaded_module(self, base): - """ - Get the name of a loaded module from the internal db - """ - return self.modules.get(base, "") - - def _process_call(self, call): - """Gets files calls - @return: information list - """ - - def _load_args(call): - """ - Load arguments from call - """ - return {argument["name"]: argument["value"] for argument in call["arguments"]} - - def _generic_handle_details(self, call, item): - """ - Generic handling of api calls - @call: the call dict - @item: Generic item to process - """ - event = None - if call["api"] in item["apis"]: - args = _load_args(call) - self.eid += 1 - - event = { - "event": item["event"], - "object": item["object"], - "timestamp": call["timestamp"], - "eid": self.eid, - "data": {}, - } - - for logname, dataname in item["args"]: - event["data"][logname] = args.get(dataname) - return event - - def _generic_handle(self, data, call): - """Generic handling of api calls.""" - for item in data: - event = _generic_handle_details(self, call, item) - if event: - return event - - return None - - def _get_service_action(control_code): - """@see: http://msdn.microsoft.com/en-us/library/windows/desktop/ms682108%28v=vs.85%29.aspx""" - codes = {1: "stop", 2: "pause", 3: "continue", 4: "info"} - - default = "user" if int(control_code) >= 128 else "notify" - return codes.get(control_code, default) - - event = None - - gendat = [ + self.gendat = [ { "event": "move", "object": "file", @@ -933,6 +973,63 @@ def _get_service_action(control_code): }, {"event": "delete", "object": "service", "apis": ["DeleteService"], "args": [("service", "ServiceName")]}, ] + self.api_map = {} + for item in self.gendat: + for api in item["apis"]: + self.api_map[api] = item + + def _add_procedure(self, mbase, name, base): + """ + Add a procedure address + """ + self.procedures[base] = f"{self._get_loaded_module(mbase)}:{name}" + + def _add_loaded_module(self, name, base): + """ + Add a loaded module to the internal database + """ + self.modules[base] = name + + def _get_loaded_module(self, base): + """ + Get the name of a loaded module from the internal db + """ + return self.modules.get(base, "") + + def _process_call(self, call): + """Gets files calls + @return: information list + """ + + def _load_args(call): + """ + Load arguments from call + """ + return {argument["name"]: argument["value"] for argument in call["arguments"]} + + def _get_service_action(control_code): + """@see: http://msdn.microsoft.com/en-us/library/windows/desktop/ms682108%28v=vs.85%29.aspx""" + codes = {1: "stop", 2: "pause", 3: "continue", 4: "info"} + + default = "user" if int(control_code) >= 128 else "notify" + return codes.get(control_code, default) + + event = None + item = self.api_map.get(call["api"]) + if item: + args = _load_args(call) + self.eid += 1 + + event = { + "event": item["event"], + "object": item["object"], + "timestamp": call["timestamp"], + "eid": self.eid, + "data": {}, + } + + for logname, dataname in item["args"]: + event["data"][logname] = args.get(dataname) # Not sure I really want this, way too noisy anyway and doesn't bring much value. # if self.details: @@ -942,7 +1039,6 @@ def _get_service_action(control_code): # "args": [("name", "FunctionName"), ("ordinal", "Ordinal")] # },] - event = _generic_handle(self, gendat, call) args = _load_args(call) if event: