Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions conf/default/processing.conf.default
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ ram_boost = no
replace_patterns = no
file_activities = no

# process behavior files in ram to speedup processing a little bit?
ram_mmap = no

[tracee]
enabled = no

Expand Down
187 changes: 132 additions & 55 deletions lib/cuckoo/common/compressor.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,19 @@
import binascii
import logging
import mmap
import os
import struct
from pathlib import Path

log = logging.getLogger(__name__)

# bson from pymongo is C so is faster
try:
import bson

HAVE_BSON = True
except ImportError:
HAVE_BSON = False
else:
# The BSON module provided by pymongo works through its "BSON" class.
if hasattr(bson, "BSON"):

def bson_decode(d):
return bson.BSON(d).decode()

# The BSON module provided by "pip3 install bson" works through the
# "loads" function (just like pickle etc.)
elif hasattr(bson, "loads"):

def bson_decode(d):
return bson.loads(d)

else:
HAVE_BSON = False


class NGram:
Expand Down Expand Up @@ -91,50 +77,115 @@ def __init__(self):
self.callmap = {}
self.head = []
self.ccounter = 0
self.category = None

def _process_message(self, msg, data):
mtype = msg.get("type") # message type [debug, new_process, info]
if mtype in {"debug", "new_process", "info"}:
self.category = msg.get("category", "None")
self.head.append(data.tobytes() if isinstance(data, memoryview) else data)

elif self.category and self.category.startswith("__"):
self.head.append(data.tobytes() if isinstance(data, memoryview) else data)
else:
tid = msg.get("T", -1)
time = msg.get("t", 0)

if tid not in self.threads:
self.threads[tid] = Compressor(100)

csum = self.checksum(msg)
self.ccounter += 1
v = (csum, self.ccounter, time)
self.threads[tid].add(v)

if csum not in self.callmap:
self.callmap[csum] = msg

def _process_mmap_content(self, mm):
with memoryview(mm) as mv:
offset = 0
size_mm = len(mm)

while offset < size_mm:
# Read size (4 bytes)
if offset + 4 > size_mm:
break

# Slicing memoryview returns memoryview
size_bytes = mv[offset : offset + 4]
_size = struct.unpack("I", size_bytes)[0]

if offset + _size > size_mm:
break

data = mv[offset : offset + _size]
offset += _size

try:
msg = bson.decode(data)
except Exception:
break

def __next_message(self):
data = self.fd_in.read(4)
if not data:
return (False, False)
_size = struct.unpack("I", data)[0]
data += self.fd_in.read(_size - 4)
self.raw_data = data
return (data, bson_decode(data))

def run(self, file_path):
if not os.path.isfile(file_path) and os.stat(file_path).st_size:
if msg:
self._process_message(msg, data)

def run(self, file_path, use_mmap=False):
if use_mmap:
return self._run_mmap(file_path)
return self._run_standard(file_path)

def _run_mmap(self, file_path):
if not os.path.isfile(file_path) or not os.stat(file_path).st_size:
log.warning("File %s does not exists or it is invalid", file_path)
return False

with open(file_path, "rb") as f:
try:
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
except ValueError:
return False

try:
self._process_mmap_content(mm)
finally:
mm.close()

return self.flush(file_path)

def _run_standard(self, file_path):
if not os.path.isfile(file_path) or not os.stat(file_path).st_size:
log.warning("File %s does not exists or it is invalid", file_path)
return False

self.fd_in = open(file_path, "rb")
with open(file_path, "rb") as f:
while True:
size_bytes = f.read(4)
if len(size_bytes) < 4:
break

msg = "---"
while msg:
data, msg = self.__next_message()
try:
_size = struct.unpack("I", size_bytes)[0]
except struct.error:
break

if msg:
mtype = msg.get("type") # message type [debug, new_process, info]
if mtype in {"debug", "new_process", "info"}:
self.category = msg.get("category", "None")
self.head.append(data)
remaining = _size - 4
if remaining < 0:
break

elif self.category.startswith("__"):
self.head.append(data)
else:
tid = msg.get("T", -1)
time = msg.get("t", 0)
data_body = f.read(remaining)
if len(data_body) < remaining:
break

if tid not in self.threads:
self.threads[tid] = Compressor(100)
data = size_bytes + data_body

csum = self.checksum(msg)
self.ccounter += 1
v = (csum, self.ccounter, time)
self.threads[tid].add(v)
try:
msg = bson.decode(data)
except Exception:
break

if csum not in self.callmap:
self.callmap[csum] = msg
self.fd_in.close()
if msg:
self._process_message(msg, data)

return self.flush(file_path)

Expand Down Expand Up @@ -167,7 +218,7 @@ def flush(self, file_path):
if final and os.path.isfile(compressed_path):
for d in final:
d.pop("order")
edata = bson.BSON.encode(d)
edata = bson.encode(d)
fd.write(edata)

os.rename(file_path, f"{file_path}.raw")
Expand All @@ -182,9 +233,35 @@ def checksum(self, msg):
# this value is used for identifying a call setup.

index = msg.get("I", -1)
args = "".join([str(c) for c in msg["args"]])
content = [str(index), str(msg["T"]), str(msg["R"]), args, str(self.category), str(msg["P"])]
args = "".join(map(str, msg["args"]))
content = f"{index}{msg['T']}{msg['R']}{args}{self.category}{msg['P']}"

content = "".join(content)
return binascii.crc32(content.encode("utf8"))

return binascii.crc32(bytes(content, "utf8"))

if __name__ == "__main__":
import argparse
import time
import sys

parser = argparse.ArgumentParser()
parser.add_argument("file", help="Path to BSON file to compress")
parser.add_argument("--mmap", action="store_true", help="Use mmap for compression")
args = parser.parse_args()

if not os.path.exists(args.file):
print(f"File {args.file} not found.")
sys.exit(1)

print(f"Compressing {args.file}...")
start = time.time()

compressor = CuckooBsonCompressor()
result = compressor.run(args.file, use_mmap=args.mmap)

end = time.time()

if result:
print(f"Compression successful. Took {end - start:.4f} seconds.")
else:
print("Compression failed.")
18 changes: 2 additions & 16 deletions lib/cuckoo/common/netlog.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,13 @@
import logging
import struct

# bson from pymongo is C so is faster
try:
import bson

HAVE_BSON = True
except ImportError:
HAVE_BSON = False
else:
# The BSON module provided by pymongo works through its "BSON" class.
if hasattr(bson, "BSON"):

def bson_decode(d):
return bson.decode(d)

# The BSON module provided by "pip3 install bson" works through the "loads" function (just like pickle etc.)
elif hasattr(bson, "loads"):

def bson_decode(d):
return bson.loads(d)

else:
HAVE_BSON = False

from lib.cuckoo.common.logtbl import table as LOGTBL
from lib.cuckoo.common.path_utils import path_get_filename
Expand Down Expand Up @@ -208,7 +194,7 @@ def read_next_message(self):
return

try:
dec = bson_decode(data)
dec = bson.decode(data)
except Exception as e:
log.warning("BsonParser decoding problem %s on data[:50] %s", e, data[:50])
return False
Expand Down
21 changes: 11 additions & 10 deletions lib/cuckoo/core/guest.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,11 @@ def get(self, method, *args, **kwargs):

try:
r = session.get(url, *args, **kwargs)
except requests.ConnectionError:
except requests.ConnectionError as e:
raise CuckooGuestError(
"CAPE Agent failed without error status, please try "
"upgrading to the latest version of agent.py (>= 0.10) and "
"notify us if the issue persists"
f"CAPE Agent failed without error status, please try "
f"upgrading to the latest version of agent.py (>= 0.10) and "
f"notify us if the issue persists. Error: {e}"
)

do_raise and r.raise_for_status()
Expand All @@ -134,11 +134,11 @@ def post(self, method, *args, **kwargs):

try:
r = session.post(url, *args, **kwargs)
except requests.ConnectionError:
except requests.ConnectionError as e:
raise CuckooGuestError(
"CAPE Agent failed without error status, please try "
"upgrading to the latest version of agent.py (>= 0.10) and "
"notify us if the issue persists"
f"CAPE Agent failed without error status, please try "
f"upgrading to the latest version of agent.py (>= 0.10) and "
f"notify us if the issue persists. Error: {e}"
)

r.raise_for_status()
Expand Down Expand Up @@ -377,14 +377,15 @@ def wait_for_completion(self):

try:
status = self.get("/status", timeout=5).json()
except (CuckooGuestError, requests.exceptions.ReadTimeout):
except (CuckooGuestError, requests.exceptions.ReadTimeout) as e:
# this might fail due to timeouts or just temporary network
# issues thus we don't want to abort the analysis just yet and
# wait for things to recover
log.warning(
"Task #%s: Virtual Machine %s /status failed. This can indicate the guest losing network connectivity",
"Task #%s: Virtual Machine %s /status failed. This can indicate the guest losing network connectivity. Error: %s",
self.task_id,
self.vmid,
e,
)
continue
except Exception as e:
Expand Down
Loading
Loading