From df66030a11d4f7cdc0b89c712853da249722ff76 Mon Sep 17 00:00:00 2001 From: doomedraven Date: Fri, 23 Jan 2026 16:45:04 +0100 Subject: [PATCH 1/3] handle strings size is bigger than 16mb (#2635) --- dev_utils/mongo_hooks.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/dev_utils/mongo_hooks.py b/dev_utils/mongo_hooks.py index 6270a86763a..08427a7ec31 100644 --- a/dev_utils/mongo_hooks.py +++ b/dev_utils/mongo_hooks.py @@ -1,7 +1,10 @@ import itertools import logging +from contextlib import suppress from pymongo import UpdateOne, errors +from pymongo.errors import InvalidDocument, BulkWriteError +import bson from dev_utils.mongodb import ( mongo_bulk_write, @@ -61,13 +64,12 @@ def normalize_file(file_dict, task_id): ) new_dict = {} for fld in static_fields: - try: + with suppress(KeyError): new_dict[fld] = file_dict.pop(fld) - except KeyError: - pass new_dict["_id"] = key file_dict[FILE_REF_KEY] = key + return UpdateOne({"_id": key}, {"$set": new_dict, "$addToSet": {TASK_IDS_KEY: task_id}}, upsert=True, hint=[("_id", 1)]) @@ -87,8 +89,32 @@ def normalize_files(report): try: if requests: mongo_bulk_write(FILES_COLL, requests, ordered=False) - except errors.OperationFailure as exc: - log.error("Mongo hook 'normalize_files' failed with code %d: %s", exc.code, exc) + except (errors.OperationFailure, InvalidDocument, BulkWriteError) as exc: + log.warning("Mongo hook 'normalize_files' failed: %s. Attempting to sanitize strings and retry.", exc) + for req in requests: + # req._doc is the update document: {"$set": new_dict, ...} + # Accessing private attribute _doc to modify in place for retry + try: + if hasattr(req, "_doc") and "$set" in req._doc and "strings" in req._doc["$set"]: + strings_val = req._doc["$set"]["strings"] + # Check if strings field alone is too large (buffer safe 15MB) + if strings_val and len(bson.encode({"strings": strings_val})) > 15 * 1024 * 1024: + log.warning("Truncating oversized strings field for retry.") + if isinstance(strings_val, list): + req._doc["$set"]["strings"] = strings_val[:1000] + else: + req._doc["$set"]["strings"] = [] + # If still too large, clear it + if len(bson.encode({"strings": req._doc["$set"]["strings"]})) > 15 * 1024 * 1024: + req._doc["$set"]["strings"] = [] + except Exception as e: + log.error("Failed to sanitize request during retry: %s", e) + + # Retry the bulk write + try: + mongo_bulk_write(FILES_COLL, requests, ordered=False) + except Exception as retry_exc: + log.error("Retry of 'normalize_files' failed: %s", retry_exc) return report From 64b2e06bdc31fe5ec38c3ea5f44045cb05d0ae43 Mon Sep 17 00:00:00 2001 From: doomedraven Date: Fri, 23 Jan 2026 18:44:14 +0100 Subject: [PATCH 2/3] QR data extraction (#2480) * Update screenshots.py * Add QR code URL extraction to screenshots module Introduces optional QR code URL extraction in Windows screenshots. Adds configuration option 'screenshots_qr' and corresponding UI checkbox, enabling automatic detection and opening of URLs from QR codes found in screenshots if OpenCV is available. * Add QR code URL extraction from screenshots Introduces QR code detection using OpenCV in the deduplication process. Extracted URLs from QR codes in screenshots are collected and stored in results. Adds a new signature module to report these URLs. * Update screenshots.py * Update deduplication.py * Update test_analysis_manager.py * Update test_analysis_manager.py * Update test_analysis_manager.py * Update test_analysis_manager.py --- .../windows/modules/auxiliary/screenshots.py | 48 +++++++++- conf/default/auxiliary.conf.default | 1 + modules/processing/deduplication.py | 37 ++++++++ modules/signatures/qr_urls.py | 19 ++++ tests/test_analysis_manager.py | 89 ++++++------------- web/submission/views.py | 3 + web/templates/submission/index.html | 10 ++- 7 files changed, 142 insertions(+), 65 deletions(-) create mode 100644 modules/signatures/qr_urls.py diff --git a/analyzer/windows/modules/auxiliary/screenshots.py b/analyzer/windows/modules/auxiliary/screenshots.py index e254e7a5cec..f93c93986cc 100644 --- a/analyzer/windows/modules/auxiliary/screenshots.py +++ b/analyzer/windows/modules/auxiliary/screenshots.py @@ -3,14 +3,29 @@ # See the file 'docs/LICENSE' for copying permission. import logging +import os import time +from contextlib import suppress from io import BytesIO from threading import Thread +try: + from PIL import Image +except ImportError: + pass + from lib.api.screenshot import Screenshot from lib.common.abstracts import Auxiliary from lib.common.results import NetlogFile +HAVE_CV2 = False +with suppress(ImportError): + import cv2 + import numpy as np + + HAVE_CV2 = True + + log = logging.getLogger(__name__) SHOT_DELAY = 1 @@ -20,6 +35,26 @@ SKIP_AREA = None +def handle_qr_codes(image_data): + """Extract URL from QR code if present.""" + if not HAVE_CV2: + return None + + try: + image = Image.open(image_data) + # Convert PIL image to BGR numpy array for OpenCV + img = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2BGR) + detector = cv2.QRCodeDetector() + extracted, _, _ = detector.detectAndDecode(img) + # Simple URL detection + if extracted and "://" in extracted[:10]: + return extracted + except Exception as e: + log.debug("Error in handle_qr_codes: %s", e) + + return None + + class Screenshots(Auxiliary, Thread): """Take screenshots.""" @@ -27,6 +62,7 @@ def __init__(self, options, config): Auxiliary.__init__(self, options, config) Thread.__init__(self) self.enabled = config.screenshots_windows + self.screenshots_qr = getattr(config, "screenshots_qr", False) self.do_run = self.enabled def stop(self): @@ -62,7 +98,17 @@ def run(self): img_current.save(tmpio, format="JPEG") tmpio.seek(0) - # now upload to host from the StringIO + if self.screenshots_qr and HAVE_CV2: + url = handle_qr_codes(tmpio) + if url: + log.info("QR code detected with URL: %s", url) + try: + # os.startfile is Windows only and usually works for URLs + os.startfile(url) + except Exception as e: + log.error("Failed to open QR URL: %s", e) + tmpio.seek(0) + nf = NetlogFile() nf.init(f"shots/{str(img_counter).rjust(4, '0')}.jpg") for chunk in tmpio: diff --git a/conf/default/auxiliary.conf.default b/conf/default/auxiliary.conf.default index 94a5707242b..bc2c09d6362 100644 --- a/conf/default/auxiliary.conf.default +++ b/conf/default/auxiliary.conf.default @@ -32,6 +32,7 @@ procmon = no recentfiles = no screenshots_windows = yes screenshots_linux = yes +screenshots_qr = no sysmon_windows = no sysmon_linux = no tlsdump = yes diff --git a/modules/processing/deduplication.py b/modules/processing/deduplication.py index d76c46ab046..52c0358076c 100644 --- a/modules/processing/deduplication.py +++ b/modules/processing/deduplication.py @@ -17,6 +17,14 @@ except ImportError: log.error("Missed dependency: poetry run pip install ImageHash") +HAVE_CV2 = False +try: + import cv2 + + HAVE_CV2 = True +except ImportError: + print("Missed dependency: poetry run pip install opencv-python") + try: from PIL import Image @@ -38,6 +46,23 @@ def reindex_screenshots(shots_path): os.rename(old_path, new_path) +def handle_qr_codes(image_path): + if not HAVE_CV2: + return None + try: + # cv2.imread handles file path directly + img = cv2.imread(image_path) + if img is None: + return None + detector = cv2.QRCodeDetector() + extracted, points, straight_qrcode = detector.detectAndDecode(img) + if extracted and "://" in extracted[:10]: + return extracted + except Exception as e: + log.error("Error detecting QR in %s: %s", image_path, e) + return None + + class Deduplicate(Processing): """Deduplicate screenshots.""" @@ -116,6 +141,18 @@ def hashfunc(img): screenshots = sorted(self.deduplicate_images(userpath=shots_path, hashfunc=hashfunc)) shots = [re.sub(r"\.(png|jpg)$", "", screenshot) for screenshot in screenshots] + if HAVE_CV2: + qr_urls = set() + for img_name in os.listdir(shots_path): + if not img_name.lower().endswith((".jpg", ".png")): + continue + url = handle_qr_codes(os.path.join(shots_path, img_name)) + if url: + qr_urls.add(url) + + if qr_urls: + self.results["qr_urls"] = list(qr_urls) + except Exception as e: log.error(e) diff --git a/modules/signatures/qr_urls.py b/modules/signatures/qr_urls.py new file mode 100644 index 00000000000..a6027e3edcb --- /dev/null +++ b/modules/signatures/qr_urls.py @@ -0,0 +1,19 @@ + +from lib.cuckoo.common.abstracts import Signature + +class QRUrls(Signature): + name = "qr_urls" + description = "URLs extracted from QR codes in screenshots" + severity = 1 + categories = ["info"] + authors = ["DoomedRaven"] + minimum = "1.3" + evented = False + + def run(self): + qr_urls = self.results.get("qr_urls") + if qr_urls: + for url in qr_urls: + self.data.append({"url": url}) + return True + return False diff --git a/tests/test_analysis_manager.py b/tests/test_analysis_manager.py index e0d94709294..b37cfd6658e 100644 --- a/tests/test_analysis_manager.py +++ b/tests/test_analysis_manager.py @@ -10,7 +10,7 @@ from sqlalchemy import select from lib.cuckoo.common.abstracts import Machinery -from lib.cuckoo.common.config import ConfigMeta +from lib.cuckoo.common.config import Config, ConfigMeta from lib.cuckoo.core.analysis_manager import AnalysisManager from lib.cuckoo.core.database import TASK_RUNNING, Guest, Machine, Task, _Database from lib.cuckoo.core.machinery_manager import MachineryManager @@ -303,7 +303,12 @@ def screenshot(self2, label, path): assert "no machine is used" in caplog.text def test_build_options( - self, db: _Database, tmp_path: pathlib.Path, task: Task, machine: Machine, machinery_manager: MachineryManager + self, + db: _Database, + tmp_path: pathlib.Path, + task: Task, + machine: Machine, + machinery_manager: MachineryManager, ): with db.session.begin(): task = db.session.merge(task) @@ -315,57 +320,38 @@ def test_build_options( analysis_man = AnalysisManager(task=task, machine=machine, machinery_manager=machinery_manager) opts = analysis_man.build_options() - assert opts == { - "amsi": False, - "browser": True, - "browsermonitor": False, + + expected_opts = { "category": "file", "clock": datetime.datetime(2099, 1, 1, 9, 1, 1), - "curtain": False, - "digisig": True, - "disguise": True, "do_upload_max_size": 0, - "during_script": False, "enable_trim": 0, "enforce_timeout": 1, - "evtx": False, "exports": "", - "filecollector": True, "file_name": "sample.py", - "file_pickup": False, "file_type": "Python script, ASCII text executable", - "human_linux": False, - "human_windows": True, "id": task.id, "ip": "5.6.7.8", "options": "foo=bar", "package": "foo", - "permissions": False, "port": "2043", - "pre_script": False, - "procmon": False, - "recentfiles": False, - "screenshots_linux": True, - "screenshots_windows": True, - "sslkeylogfile": False, - "sysmon_linux": False, - "sysmon_windows": False, "target": str(tmp_path / "sample.py"), "terminate_processes": False, "timeout": 10, - "tlsdump": True, - "tracee_linux": False, "upload_max_size": 100000000, - "usage": False, - "windows_static_route": False, - "windows_static_route_gateway": "192.168.1.1", - "dns_etw": False, - "wmi_etw": False, - "watchdownloads": False, } + # Dynamically load auxiliary modules from Config to ensure test stays in sync with configuration changes + expected_opts.update(Config("auxiliary").auxiliary_modules) + + assert opts == expected_opts def test_build_options_pe( - self, db: _Database, tmp_path: pathlib.Path, task: Task, machine: Machine, machinery_manager: MachineryManager + self, + db: _Database, + tmp_path: pathlib.Path, + task: Task, + machine: Machine, + machinery_manager: MachineryManager, ): sample_location = get_test_object_path( pathlib.Path("data/core/5dd87d3d6b9d8b4016e3c36b189234772661e690c21371f1eb8e018f0f0dec2b") @@ -380,54 +366,31 @@ def test_build_options_pe( analysis_man = AnalysisManager(task=task, machine=machine, machinery_manager=machinery_manager) opts = analysis_man.build_options() - assert opts == { - "amsi": False, - "browser": True, - "browsermonitor": False, + + expected_opts = { "category": "file", "clock": datetime.datetime(2099, 1, 1, 9, 1, 1), - "curtain": False, - "digisig": True, - "disguise": True, "do_upload_max_size": 0, - "during_script": False, "enable_trim": 0, "enforce_timeout": 1, - "evtx": False, "exports": "", - "filecollector": True, "file_name": sample_location.name, - "file_pickup": False, "file_type": "PE32 executable (console) Intel 80386, for MS Windows", - "human_linux": False, - "human_windows": True, "id": task.id, "ip": "5.6.7.8", "options": "", "package": "file", - "permissions": False, "port": "2043", - "pre_script": False, - "procmon": False, - "recentfiles": False, - "screenshots_linux": True, - "screenshots_windows": True, - "sslkeylogfile": False, - "sysmon_linux": False, - "sysmon_windows": False, "target": str(sample_location), "terminate_processes": False, "timeout": 10, - "tlsdump": True, - "tracee_linux": False, "upload_max_size": 100000000, - "usage": False, - "windows_static_route": False, - "windows_static_route_gateway": "192.168.1.1", - "dns_etw": False, - "wmi_etw": False, - "watchdownloads": False, } + # Dynamically load auxiliary modules from Config to ensure test stays in sync with configuration changes + expected_opts.update(Config("auxiliary").auxiliary_modules) + + assert opts == expected_opts + def test_category_checks( self, db: _Database, task: Task, machine: Machine, machinery_manager: MachineryManager, mocker: MockerFixture diff --git a/web/submission/views.py b/web/submission/views.py index 32d36f8e4f0..5e0f26d1cff 100644 --- a/web/submission/views.py +++ b/web/submission/views.py @@ -352,6 +352,9 @@ def index(request, task_id=None, resubmit_hash=None): if request.POST.get("unpack"): options += "unpack=yes," + if request.POST.get("screenshots_qr"): + options += "screenshots_qr=yes," + job_category = False if request.POST.get("job_category"): job_category = request.POST.get("job_category") diff --git a/web/templates/submission/index.html b/web/templates/submission/index.html index 2d762af4652..f786652bb3e 100644 --- a/web/templates/submission/index.html +++ b/web/templates/submission/index.html @@ -598,7 +598,15 @@
Advance id="duringScript" name="during_script"> {% endif %} - +
+ +
+
+
Date: Fri, 23 Jan 2026 18:54:41 +0100 Subject: [PATCH 3/3] add docker-compose dev stack, Dockerfile, scripts (#2732) * add docker-compose dev stack, Dockerfile, scripts * feat: resolve comments * Enhance Docker setup with config and DB initialization Update docker-compose.yml to add environment variables for PostgreSQL and mount additional volumes for configuration and storage. Modify run.sh to initialize configuration files and create a Docker-specific database config if not present, improving container startup and environment consistency. --------- Co-authored-by: doomedraven --- .dockerignore | 51 +++++++++++++++++++++++++++++ docker/.env.example | 8 +++++ docker/Dockerfile | 36 +++++++++++++++++++++ docker/docker-compose.yml | 67 +++++++++++++++++++++++++++++++++++++++ docker/pcap.sh | 4 +++ docker/readme.md | 4 +++ docker/run.sh | 34 ++++++++++++++++++++ 7 files changed, 204 insertions(+) create mode 100644 .dockerignore create mode 100644 docker/.env.example create mode 100644 docker/Dockerfile create mode 100644 docker/docker-compose.yml create mode 100644 docker/pcap.sh create mode 100644 docker/readme.md create mode 100644 docker/run.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000000..41a281e7f49 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,51 @@ +# Git +.git +.gitignore +.github + +# Docker +docker-compose.yml +Dockerfile +.dockerignore + +# DB +mongodata +pgdata +*.db +*.sqlite3 + +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +*.egg +*.egg-info +dist +build +.eggs +.venv +venv +env + +# IDE +.vscode +.idea +*.swp +*.swo +*~ + +# Logs +*.log +logs + +# OS +.DS_Store +Thumbs.db + +# Others +.env.local +.cache +tmp +temp \ No newline at end of file diff --git a/docker/.env.example b/docker/.env.example new file mode 100644 index 00000000000..ac45eb4a119 --- /dev/null +++ b/docker/.env.example @@ -0,0 +1,8 @@ +WEB_PORT=8000 +RESULT_PORT=2042 +PG_PORT=5432 +MONGO_PORT=27017 + +POSTGRES_USER=cape +POSTGRES_PASSWORD=cape +POSTGRES_DB=cape diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 00000000000..5e8975ddc89 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,36 @@ +FROM python:3.11-bookworm + +RUN apt-get update \ + && apt-get install -y --no-install-recommends git libgraphviz-dev tcpdump libcap2-bin iproute2 libjansson-dev libmagic-dev \ + && rm -rf /var/lib/apt/lists/* + +RUN useradd -ms /bin/bash cape + +RUN pip install --no-cache-dir poetry + +RUN poetry config virtualenvs.create false + +RUN mkdir -p /etc/poetry/bin && ln -s $(which poetry) /etc/poetry/bin/poetry +RUN mkdir -p /opt && ln -s /cape /opt/CAPEv2 + +WORKDIR /cape + +COPY pyproject.toml poetry.lock* ./ + +RUN poetry install --no-interaction --no-ansi --no-root + +COPY . . + +RUN poetry install --no-interaction --no-ansi + +RUN pip install --no-cache-dir -U flare-floss +RUN bash extra/yara_installer.sh + +RUN bash docker/pcap.sh + +RUN bash conf/copy_configs.sh +RUN chown -R cape:cape /cape + +USER cape + +CMD ["bash", "docker/run.sh"] \ No newline at end of file diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 00000000000..f94336ced5f --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,67 @@ +services: + cape-db: + image: postgres:bookworm + hostname: cape-db + restart: unless-stopped + ports: + - "127.0.0.1:${PG_PORT:-5432}:5432" + environment: + POSTGRES_USER: ${POSTGRES_USER:-cape} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-cape} + POSTGRES_DB: ${POSTGRES_DB:-cape} + PGDATA: /var/lib/postgresql/data/pgdata + volumes: + - cape-db-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-cape} -d ${POSTGRES_DB:-cape}"] + interval: 5s + timeout: 5s + retries: 10 + start_period: 30s + + mongodb: + image: mongo:6 + command: ["--bind_ip_all"] + volumes: + - cape-mongo-data:/data/db + ports: + - "127.0.0.1:${MONGO_PORT:-27017}:27017" + restart: unless-stopped + healthcheck: + test: ["CMD", "mongosh", "--eval", "db.runCommand({ ping: 1 })"] + interval: 10s + timeout: 5s + retries: 12 + start_period: 20s + + cape-server: + build: + context: ../ + dockerfile: docker/Dockerfile + hostname: cape-server + restart: unless-stopped + depends_on: + cape-db: + condition: service_healthy + mongodb: + condition: service_healthy + environment: + - WEB_PORT=${WEB_PORT:-8000} + - POSTGRES_USER=${POSTGRES_USER:-cape} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-cape} + - POSTGRES_DB=${POSTGRES_DB:-cape} + ports: + - "127.0.0.1:${RESULT_PORT:-2042}:2042" # result server + - "127.0.0.1:${WEB_PORT:-8000}:8000" # web ui + volumes: + - ../conf:/cape/conf + - ../custom/conf:/cape/custom/conf + - ../custom:/cape/custom + - ../storage:/cape/storage + cap_add: + - NET_ADMIN + - NET_RAW + +volumes: + cape-db-data: + cape-mongo-data: diff --git a/docker/pcap.sh b/docker/pcap.sh new file mode 100644 index 00000000000..a83b34c0315 --- /dev/null +++ b/docker/pcap.sh @@ -0,0 +1,4 @@ +groupadd pcap +usermod -a -G pcap cape +chgrp pcap /usr/bin/tcpdump +setcap cap_net_raw,cap_net_admin=eip /usr/bin/tcpdump \ No newline at end of file diff --git a/docker/readme.md b/docker/readme.md new file mode 100644 index 00000000000..7edb30667cf --- /dev/null +++ b/docker/readme.md @@ -0,0 +1,4 @@ +This is not official docker soluction! +Is community based contribution so use on your own risks! + +No support here from core devs! diff --git a/docker/run.sh b/docker/run.sh new file mode 100644 index 00000000000..dcb91965326 --- /dev/null +++ b/docker/run.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -e + +cd /cape + +# Initialize configs if mounted volume is empty +if [ ! -f "conf/cuckoo.conf" ]; then + echo "Initializing configuration files..." + bash conf/copy_configs.sh +fi + +# Configure Database connection for Docker environment +mkdir -p conf/cuckoo.conf.d +DB_CONF="conf/cuckoo.conf.d/00_docker_db.conf" +if [ ! -f "$DB_CONF" ]; then + echo "Creating Docker DB configuration..." + cat > "$DB_CONF" <