From 73cc62a746a1b51f244f5e7de9cdd7d82c0e43d5 Mon Sep 17 00:00:00 2001
From: doomedraven <abrukhovetskyy@google.com>
Date: Mon, 5 Jan 2026 12:27:22 +0100
Subject: [PATCH 1/2] Add stuck VM monitoring and killing to scheduler (#2809)

* Add stuck VM monitoring and killing to scheduler

The scheduler now monitors running analysis tasks and kills virtual machines if a task exceeds its allowed runtime (timeout + critical + 100s). Added tests to verify that stuck VMs are killed and healthy VMs are not affected.

* Update test_scheduler_monitoring.py

* fix

* Update test_scheduler_monitoring.py

* Update test_scheduler_monitoring.py

* sync

* Update scheduler.py

* Update test_scheduler_monitoring.py

* Update test_scheduler_monitoring.py

* Update test_scheduler_monitoring.py

* Update test_scheduler_monitoring.py

* Improve error handling and timeout management

Refactor error handling and timeout logic in guest.py.

* Add guide for debugging stuck virtual machines

This guide provides a systematic approach to diagnosing and resolving issues where the analysis process hangs indefinitely, detailing problem triage, immediate diagnostics, debugging steps for Python and QEMU, root cause analysis, and resolution strategies.

* Add issues documentation and remove scheduler monitoring test

Introduced a new 'Issues' section in the documentation with an initial page for debugging VM issues. Updated the main documentation index to include this section. Removed the test_scheduler_monitoring.py test file.

* Update guest.py

* Update config references for timeouts in guest handling

Replaced references to cfg.cuckoo.timeouts with cfg.timeouts in both documentation and GuestManager code to reflect updated configuration structure.
---
 conf/default/cuckoo.conf.default             |   3 +
 docs/book/src/Issues/Debugging_VM_issues.rst | 142 +++++++++++++++++++
 docs/book/src/Issues/index.rst               |  10 ++
 docs/book/src/index.rst                      |   1 +
 lib/cuckoo/core/guest.py                     |  86 ++++++++---
 lib/cuckoo/core/scheduler.py                 |  37 +++++
 6 files changed, 258 insertions(+), 21 deletions(-)
 create mode 100644 docs/book/src/Issues/Debugging_VM_issues.rst
 create mode 100644 docs/book/src/Issues/index.rst
diff --git a/conf/default/cuckoo.conf.default b/conf/default/cuckoo.conf.default
index a0da4fb527b..bf00fe0c2b7 100644
--- a/conf/default/cuckoo.conf.default
+++ b/conf/default/cuckoo.conf.default
@@ -204,6 +204,9 @@ critical = 60
 # shutting down a vm. Default is 300 seconds.
 vm_state = 300
 
+# Will kill VM if: Task time = timeout + critical + stuck_seconds
+stuck_seconds = 100
+
 [tmpfs]
 # only if you using volatility to speedup IO
 # mkdir -p /mnt/tmpfs
diff --git a/docs/book/src/Issues/Debugging_VM_issues.rst b/docs/book/src/Issues/Debugging_VM_issues.rst
new file mode 100644
index 00000000000..e439c9cb620
--- /dev/null
+++ b/docs/book/src/Issues/Debugging_VM_issues.rst
@@ -0,0 +1,142 @@
+============================================
+Debugging Stuck Virtual Machines in Cuckoo/CAPE
+============================================
+
+This guide outlines a systematic approach to diagnosing and resolving issues where the analysis process hangs ("stucks") indefinitely.
+
+Problem Triage
+==============
+
+When a task appears stuck (e.g., Python process running, VM running, but no activity), determine which component is frozen:
+
+1. **Guest OS:** Kernel panic or BSOD inside the VM.
+2. **QEMU Process:** Deadlocked hypervisor.
+3. **Python Controller:** Waiting on a socket/pipe that will never return data.
+
+Phase 1: Immediate Diagnostics
+==============================
+
+Check the CPU state of the processes to identify the bottleneck.
+
+**1. Find Process IDs (PIDs)**
+
+.. code-block:: bash
+
+    ps aux | grep qemu
+    ps aux | grep python
+
+**2. Check CPU Usage**
+
+.. code-block:: bash
+
+    top -p <PID>
+
+* **100% CPU:** The process is in a tight loop (livelock).
+* **0% CPU (State S - Sleeping):** Waiting for network/socket data. (Most common for Python).
+* **0% CPU (State D - Disk Sleep):** Waiting for Hardware I/O.
+
+Phase 2: Debugging the Python Controller
+========================================
+
+If Python is sleeping (0% CPU), it is likely blocked on a synchronous call waiting for the Guest.
+
+**Using py-spy (Recommended)**
+
+Dumps the Python stack trace without pausing execution.
+
+.. code-block:: bash
+
+    pip install py-spy
+    sudo py-spy dump --pid <PYTHON_PID>
+
+**What to look for:**
+* ``wait_for_completion`` (Looping/Waiting for Agent)
+* ``select``, ``poll``, ``recv`` (Waiting for Network I/O)
+
+Phase 3: Debugging the QEMU Engine
+==================================
+
+If Python is waiting on QEMU, inspect the VM state.
+
+**1. Check System Calls**
+
+See what the process is asking the Linux Kernel to do.
+
+.. code-block:: bash
+
+    sudo strace -p <QEMU_PID>
+
+* **futex:** Threading deadlock.
+* **ppoll/select:** Normal idle state (waiting for guest interrupt).
+
+**2. QEMU Monitor (QMP)**
+
+Query the internal state of the hypervisor.
+
+.. code-block:: bash
+
+    echo "info status" | socat - UNIX-CONNECT:/tmp/qemu-monitor.sock
+
+**3. Visual Inspection (Screenshot)**
+
+If using Libvirt, capture a screenshot of the Guest without stopping it to check for BSODs or Popups.
+
+.. code-block:: bash
+
+    virsh list
+    virsh screenshot <VM_NAME> /tmp/debug_screenshot.ppm
+
+Phase 4: Root Cause Analysis
+============================
+
+Linking the Python trace to the VM state.
+
+Scenario A: "The Zombie Success"
+--------------------------------
+* **Symptom:** Logs say "Analysis completed successfully", but Python process is still running hours later.
+* **Trace:** Python is stuck in ``wait_for_completion`` loop (sleeping).
+* **Cause:** The Agent inside the VM died or network was cut *after* sending success, but *before* the Python loop could confirm the shutdown.
+* **Fix:** Python enters an infinite retry loop because the error handler just ``continues`` instead of aborting.
+
+Scenario B: "The Blind Wait"
+----------------------------
+* **Symptom:** Python blocked on ``recv`` or ``read``.
+* **Cause:** The Agent crashed without closing the TCP socket. Python waits for EOF that never comes.
+* **Fix:** Enforce socket timeouts in code.
+
+Phase 5: Resolution & Code Fixes
+================================
+
+To prevent future hangs, apply these fixes to ``lib/cuckoo/core/guest.py``.
+
+**1. Fix the Infinite Loop**
+Modify ``wait_for_completion`` to calculate a hard deadline based on configuration.
+
+.. code-block:: python
+
+    # Calculate Hard Limit
+    effective_timeout = self.timeout if self.timeout else cfg.timeouts.default
+    hard_limit = effective_timeout + cfg.timeouts.critical
+
+    while self.do_run:
+        # Check Hard Limit
+        if timeit.default_timer() - start > hard_limit:
+            log.error("Hard Timeout reached! Killing analysis.")
+            return
+
+**2. Fix Database Staleness**
+Ensure the loop checks the *actual* database state, not cached RAM values (essential for tasks deleted by user).
+
+.. code-block:: python
+
+    db.session.expire_all()
+    if db.guest_get_status(self.task_id) is None:
+        return  # Task deleted
+
+**3. Emergency Cleanup**
+If a process is already stuck, kill the zombie pair manually.
+
+.. code-block:: bash
+
+    kill -9 <PYTHON_PID>
+    kill -9 <QEMU_PID>
diff --git a/docs/book/src/Issues/index.rst b/docs/book/src/Issues/index.rst
new file mode 100644
index 00000000000..dbf9080f9cf
--- /dev/null
+++ b/docs/book/src/Issues/index.rst
@@ -0,0 +1,10 @@
+.. Introduction chapter frontpage
+
+Issues
+============
+
+This chapter will have some examples how to debug some common issues.
+
+.. toctree::
+
+    Debugging_VM_issues
diff --git a/docs/book/src/index.rst b/docs/book/src/index.rst
index 41dd3fcc10d..60b6fd8b734 100644
--- a/docs/book/src/index.rst
+++ b/docs/book/src/index.rst
@@ -36,3 +36,4 @@ Contents
     integrations/index
     development/index
     finalremarks/index
+    Issues/index
diff --git a/lib/cuckoo/core/guest.py b/lib/cuckoo/core/guest.py
index effb77a1710..2a3de974af1 100644
--- a/lib/cuckoo/core/guest.py
+++ b/lib/cuckoo/core/guest.py
@@ -109,17 +109,25 @@ def get(self, method, *args, **kwargs):
                 r = session.get(url, *args, **kwargs)
             except requests.ConnectionError as e:
                 raise CuckooGuestError(
-                    f"CAPE Agent failed without error status, please try "
-                    f"upgrading to the latest version of agent.py (>= 0.10) and "
-                    f"notify us if the issue persists. Error: {e}"
+                    "CAPE Agent failed without error status, please try "
+                    "upgrading to the latest version of agent.py (>= 0.10) and "
+                    "notify us if the issue persists. Error: %s", str(e)
                 )
 
         do_raise and r.raise_for_status()
         return r
 
     def get_status_from_db(self) -> str:
+        # Force SQLAlchemy to dump its cache and look at the real DB
         with db.session.begin():
-            return db.guest_get_status(self.task_id)
+            db.session.expire_all()
+            status = db.guest_get_status(self.task_id)
+
+        # Handle the case where the task was already deleted by race condition
+        if status is None:
+            return "deleted"
+
+        return status
 
     def set_status_in_db(self, status: str):
         with db.session.begin():
@@ -136,9 +144,9 @@ def post(self, method, *args, **kwargs):
             r = session.post(url, *args, **kwargs)
         except requests.ConnectionError as e:
             raise CuckooGuestError(
-                f"CAPE Agent failed without error status, please try "
-                f"upgrading to the latest version of agent.py (>= 0.10) and "
-                f"notify us if the issue persists. Error: {e}"
+                "CAPE Agent failed without error status, please try "
+                "upgrading to the latest version of agent.py (>= 0.10) and "
+                "notify us if the issue persists. Error: %s", str(e)
             )
 
         r.raise_for_status()
@@ -356,8 +364,35 @@ def start_analysis(self, options):
     def wait_for_completion(self):
         count = 0
         start = timeit.default_timer()
+        current_status = ""
+        consecutive_failures = 0
+        # --- REWORKED: TIMEOUT CALCULATION ---
+        # 1. Detect None/0: If self.timeout is missing/infinite, use system default.
+        #    This prevents "infinite patience" if the task submission was malformed.
+        effective_timeout = self.timeout
+        if not effective_timeout:
+            effective_timeout = cfg.timeouts.default
+
+        # 2. Add Critical Buffer: This is the "Grace Period" for shutdown/reporting.
+        #    e.g., 200s (analysis) + 60s (critical) = 260s Hard Limit.
+        hard_limit = effective_timeout + cfg.timeouts.critical
+
+        while self.do_run:
+            # FORCE REFRESH: Tell SQLAlchemy to expire the cache and fetch fresh data
+            # Note: Depending on your exact Cuckoo version, the session access might vary.
+            # This is the generic fix:
+            try:
+                # Re-fetch the task status directly from DB logic
+                current_status = self.get_status_from_db()
+            except Exception:
+                # If the task was deleted, this query might fail.
+                # If it fails, we should ABORT.
+                log.info("Task #%s: Task deleted or DB error. Aborting.", self.task_id)
+                return
+
+            if current_status != "running":
+                break
 
-        while self.do_run and self.get_status_from_db() == "running":
             time.sleep(1)
 
             if cfg.cuckoo.machinery_screenshots:
@@ -369,24 +404,33 @@ def wait_for_completion(self):
             if count % 5 == 0:
                 log.debug("Task #%s: Analysis is still running (id=%s, ip=%s)", self.task_id, self.vmid, self.ipaddr)
 
-            # If the analysis hits the critical timeout, just return straight
-            # away and try to recover the analysis results from the guest.
-            if timeit.default_timer() - start > self.timeout:
-                log.info("Task #%s: End of analysis reached! (id=%s, ip=%s)", self.task_id, self.vmid, self.ipaddr)
+            # --- REWORKED: HARD STOP ENFORCEMENT ---
+            # If we exceed the (Timeout + Critical) limit, we kill the loop.
+            # This handles the case where the Agent is dead (network timeout)
+            # and the 'except' block below keeps "continuing" forever.
+            if timeit.default_timer() - start > hard_limit:
+                log.error(
+                    "Task #%s: Hard Timeout reached! (Running for %ds, Limit %ds). "
+                    "Agent is likely unresponsive.",
+                    self.task_id,
+                    timeit.default_timer() - start,
+                    hard_limit
+                )
+                self.set_status_in_db("failed")
                 return
 
             try:
                 status = self.get("/status", timeout=5).json()
             except (CuckooGuestError, requests.exceptions.ReadTimeout) as e:
-                # this might fail due to timeouts or just temporary network
-                # issues thus we don't want to abort the analysis just yet and
-                # wait for things to recover
-                log.warning(
-                    "Task #%s: Virtual Machine %s /status failed. This can indicate the guest losing network connectivity. Error: %s",
-                    self.task_id,
-                    self.vmid,
-                    e,
-                )
+                # Add a counter for consecutive failures
+                consecutive_failures += 1  # You need to init this to 0 outside loop
+
+                log.warning("Task #%s: Agent unreachable (%s/10). Error: %s", self.task_id, consecutive_failures, e)
+
+                # If we fail 10 times in a row (approx 10-15 seconds), give up.
+                if consecutive_failures > 10:
+                    log.error("Task #%s: Agent is dead. Virtual Machine %s /status failed. This can indicate the guest losing network connectivity. Killing analysis.", self.task_id, self.vmid)
+                    return # Or raise exception
                 continue
             except Exception as e:
                 log.exception("Task #%s: Virtual machine %s /status failed. %s", self.task_id, self.vmid, e)
diff --git a/lib/cuckoo/core/scheduler.py b/lib/cuckoo/core/scheduler.py
index f7251bc8c55..3afcd1d0310 100644
--- a/lib/cuckoo/core/scheduler.py
+++ b/lib/cuckoo/core/scheduler.py
@@ -11,7 +11,9 @@
 import sys
 import threading
 import time
+import traceback
 from collections import defaultdict
+from datetime import datetime
 from typing import DefaultDict, List, Optional, Tuple
 
 from lib.cuckoo.common.cleaners_utils import free_space_monitor
@@ -83,6 +85,41 @@ def analysis_finished(self, analysis_manager: AnalysisManager):
 
     def do_main_loop_work(self, error_queue: queue.Queue) -> SchedulerCycleDelay:
         """Return the number of seconds to sleep after returning."""
+        # Monitor and kill stuck VMs
+        with self.analysis_threads_lock:
+            for analysis in self.analysis_threads:
+                if not analysis.machine or not analysis.task or not analysis.task.started_on:
+                    continue
+
+                stuck_seconds = self.cfg.timeouts.get("stuck_seconds", 100)
+                vm_state = self.cfg.timeouts.get("vm_state", 100)
+                timeout = analysis.task.timeout or self.cfg.timeouts.default
+                max_runtime = timeout + self.cfg.timeouts.critical + stuck_seconds + vm_state
+                duration = (datetime.now() - analysis.task.started_on).total_seconds()
+                if duration > max_runtime:
+                    log.warning(
+                        "Task #%s has been running for %s seconds, which is longer than the configured timeout + critical timeout + 100s. Killing VM.",
+                        analysis.task.id,
+                        duration,
+                    )
+
+                    # Log stack trace of the stuck thread
+                    try:
+                        frame = sys._current_frames().get(analysis.ident)
+                        if frame:
+                            stack_trace = "".join(traceback.format_stack(frame))
+                            log.warning("Stack trace of stuck thread (Task #%s):\n%s", analysis.task.id, stack_trace)
+                        else:
+                            log.warning("Could not retrieve stack trace for thread %s (Task #%s)", analysis.ident, analysis.task.id)
+                    except Exception:
+                        log.exception("Failed to log stack trace for stuck thread (Task #%s)", analysis.task.id)
+
+                    try:
+                        if analysis.machinery_manager and analysis.machine:
+                            analysis.machinery_manager.stop_machine(analysis.machine)
+                    except Exception as e:
+                        log.error("Failed to kill stuck VM for task #%s: %s", analysis.task.id, e)
+
         if self.loop_state == LoopState.STOPPING:
             # This blocks the main loop until the analyses are finished.
             self.wait_for_running_analyses_to_finish()

From ca58aaa37a3e82e959e3d1c3c11c36671df4d00b Mon Sep 17 00:00:00 2001
From: Josh Feather <142008135+josh-feather@users.noreply.github.com>
Date: Mon, 5 Jan 2026 18:45:22 +0000
Subject: [PATCH 2/2] Bugfix: Always initialize Package.args in Linux Analyzer
 Package (#2824)

* Bugfix: Always initialize Package.args in Linux Analyzer Package

Fixes a bug introduced in d9be043 that prevents Package.args from being set if a string was not passed into the object during initialization. Package.args will now always be initialized with an empty list if no arguments are provided. This behavior aligns with the Windows analyzer.

* Adds some basic Linux analyzer unit tests

* Use poetry to run Linux analyzer unit tests in CI

* Make analyzer.linux.lib.core.packages.Package.__init__._args more concise

* Remove placeholder test

* Increase pytest verbosity for Linux analyzer tests

* Add additional test cases for Linux Analyzer package arguments

* Remove unused import in Linux analyzer

* Fix Linux analyzer test case

* Fix Linux analyzer skip logic

* Add exception handling to Process.get_parent_pid in Linux analyzer

* Fix test_proc_dead_states test case
---
 .github/workflows/python-package.yml          |  3 +
 analyzer/linux/lib/api/process.py             |  5 +-
 analyzer/linux/lib/core/packages.py           | 48 +++++----
 analyzer/linux/pytest.ini                     |  2 +
 analyzer/linux/tests/lib/api/test_process.py  | 97 +++++++++++++++++++
 .../linux/tests/lib/core/test_packages.py     | 19 ++++
 analyzer/linux/tests/lib/core/test_startup.py | 53 ++++++++++
 .../tests/modules/packages/test_package.py    | 35 +++++++
 analyzer/linux/tests/test_analyzer.py         | 34 +++++++
 9 files changed, 276 insertions(+), 20 deletions(-)
 create mode 100644 analyzer/linux/pytest.ini
 create mode 100644 analyzer/linux/tests/lib/api/test_process.py
 create mode 100644 analyzer/linux/tests/lib/core/test_packages.py
 create mode 100644 analyzer/linux/tests/lib/core/test_startup.py
 create mode 100644 analyzer/linux/tests/modules/packages/test_package.py
 create mode 100644 analyzer/linux/tests/test_analyzer.py

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 1d7cfd310fc..652db6263cc 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -48,6 +48,9 @@ jobs:
       - name: Run unit tests
         run: poetry run python -m pytest --import-mode=append
 
+      - name: Run Linux analyzer unit tests
+        run: poetry --project . --directory "analyzer/linux" run python -m pytest -v
+
       # see the mypy configuration in pyproject.toml
       - name: Run mypy
         run: poetry run mypy
diff --git a/analyzer/linux/lib/api/process.py b/analyzer/linux/lib/api/process.py
index 5a9fea3c3c7..6fc922bc504 100644
--- a/analyzer/linux/lib/api/process.py
+++ b/analyzer/linux/lib/api/process.py
@@ -30,7 +30,10 @@ def is_alive(self):
         return True
 
     def get_parent_pid(self):
-        return int(self.get_proc_status().get("PPid"))
+        try:
+            return int(self.get_proc_status().get("PPid"))
+        except (TypeError, ValueError):
+            return None
 
     def get_proc_status(self):
         try:
diff --git a/analyzer/linux/lib/core/packages.py b/analyzer/linux/lib/core/packages.py
index 33d7d0b2e26..695719491a4 100644
--- a/analyzer/linux/lib/core/packages.py
+++ b/analyzer/linux/lib/core/packages.py
@@ -56,22 +56,25 @@ def _found_target_class(module, name):
 
 
 def _guess_package_name(file_type, file_name):
-    if "Bourne-Again" in file_type or "bash" in file_type:
-        return "bash"
-    elif "Zip archive" in file_type:
-        return "zip"
-    elif "gzip compressed data" in file_type:
-        return "zip"
-    elif "PDF document" in file_type or file_name.endswith(".pdf"):
-        return "pdf"
-    elif "Composite Document File V2 Document" in file_type or file_name.endswith(".doc"):
-        return "doc"
-    elif "Microsoft Word" in file_type or file_name.endswith(".docx"):
-        return "doc"
-    elif "ELF" in file_type:
-        return "generic"
-    elif "Unicode text" in file_type or file_name.endswith(".js"):
-        return "js"
+    try:
+        if "Bourne-Again" in file_type or "bash" in file_type:
+            return "bash"
+        elif "Zip archive" in file_type:
+            return "zip"
+        elif "gzip compressed data" in file_type:
+            return "zip"
+        elif "PDF document" in file_type or file_name.endswith(".pdf"):
+            return "pdf"
+        elif "Composite Document File V2 Document" in file_type or file_name.endswith(".doc"):
+            return "doc"
+        elif "Microsoft Word" in file_type or file_name.endswith(".docx"):
+            return "doc"
+        elif "ELF" in file_type:
+            return "generic"
+        elif "Unicode text" in file_type or file_name.endswith(".js"):
+            return "js"
+    except (TypeError, AttributeError):
+        pass
     return None
 
 
@@ -101,9 +104,16 @@ def __init__(self, target, **kwargs):
         self.timeout = kwargs.get("timeout")
         # Command-line arguments for the target.
 
-        _args = self.options.get("arguments", [])
-        if isinstance(_args, str):
-            self.args = _args.split()
+        def _args():
+            args = self.options.get("arguments")
+            if isinstance(args, list):
+                return args
+            if isinstance(args, str):
+                return args.split()
+            return []
+
+        self.args = _args()
+
         # Choose an analysis method (or fallback to apicalls)
         self.method = self.options.get("method", "apicalls")
         # Should our target be launched as root or not
diff --git a/analyzer/linux/pytest.ini b/analyzer/linux/pytest.ini
new file mode 100644
index 00000000000..a635c5c0318
--- /dev/null
+++ b/analyzer/linux/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+pythonpath = .
diff --git a/analyzer/linux/tests/lib/api/test_process.py b/analyzer/linux/tests/lib/api/test_process.py
new file mode 100644
index 00000000000..4b8d73fc56f
--- /dev/null
+++ b/analyzer/linux/tests/lib/api/test_process.py
@@ -0,0 +1,97 @@
+from unittest.mock import Mock, patch, mock_open, PropertyMock
+from lib.api.process import Process
+import base64
+
+import pytest
+
+proc_status = """Name:	test-process
+Umask:	0002
+State:	R (running)
+Tgid:	42
+Ngid:	0
+Pid:	42
+PPid:	24"""
+
+@pytest.fixture
+def os_path_exists(monkeypatch):
+    monkeypatch.setattr("os.path.exists", Mock(return_value=True))
+    yield
+
+@pytest.fixture
+def os_path_not_exists(monkeypatch):
+    monkeypatch.setattr("os.path.exists", Mock(return_value=False))
+    yield
+
+@pytest.fixture
+def fake_proc_status_file(monkeypatch):
+    monkeypatch.setattr("builtins.open", mock_open(read_data=proc_status))
+    yield
+
+ARGS = {
+    "pid": 42
+}
+
+def test_init():
+    """Initialize Process instances using both args and kwargs"""
+    kw_args_instance = Process(**ARGS)
+    assert kw_args_instance.pid ==  ARGS["pid"]
+    args_instance = Process(*ARGS.values())
+    assert args_instance.pid ==  ARGS["pid"]
+
+@pytest.mark.usefixtures("os_path_exists")
+def test_proc_alive_states():
+    for state in [
+        "State:	R (running)",
+        "State:	S (sleeping)",
+        "State:	D (waiting)",
+        "State:	T (stopped)",
+        "State:	t (trace stopped)",
+        "State:	W (paging)",
+        "State:	W (waking)",
+        "State:	P (parked)",
+    ]:
+        state_file_content = proc_status.replace("State:	R (running)", state)
+        with patch("builtins.open", mock_open(read_data=state_file_content)):
+            process = Process(**ARGS)
+            assert process.is_alive()
+
+@pytest.mark.usefixtures("os_path_exists")
+def test_proc_dead_states():
+    for state in [
+        "State:	Z (zombie)",
+    ]:
+        state_file_content = proc_status.replace("State:	R (running)", state)
+        with patch("builtins.open", mock_open(read_data=state_file_content)):
+            process = Process(**ARGS)
+            alive = process.is_alive()
+            assert not alive
+
+@pytest.mark.usefixtures("os_path_not_exists")
+def test_proc_file_not_exists():
+    process = Process(**ARGS)
+    assert not process.is_alive()
+
+@pytest.mark.usefixtures("os_path_exists")
+def test_proc_file_corrupt():
+    corrupt_status = base64.b64encode(proc_status.encode("utf-8")).decode("utf-8")
+    with patch("builtins.open", mock_open(read_data=corrupt_status)):
+        process = Process(**ARGS)
+        assert not process.is_alive()
+
+@pytest.mark.usefixtures("os_path_exists", "fake_proc_status_file")
+def test_get_ppid():
+    process = Process(**ARGS)
+    assert 24 == process.get_parent_pid()
+
+@patch("builtins.open", side_effect=FileNotFoundError)
+def test_get_ppid_file_not_exists(bopen):
+    process = Process(**ARGS)
+    assert process.get_parent_pid() is None
+
+@patch("subprocess.Popen")
+def test_execute(popen):
+    process = Process(**ARGS)
+    type(popen.return_value).pid = PropertyMock(return_value=ARGS["pid"])
+    assert process.execute(["echo", "this is a test message"])
+    assert ARGS["pid"] ==  process.pid
+    assert popen.called
diff --git a/analyzer/linux/tests/lib/core/test_packages.py b/analyzer/linux/tests/lib/core/test_packages.py
new file mode 100644
index 00000000000..1e95b6f08b0
--- /dev/null
+++ b/analyzer/linux/tests/lib/core/test_packages.py
@@ -0,0 +1,19 @@
+from lib.core.packages import _guess_package_name
+import pytest
+
+@pytest.mark.parametrize("file_type, file_name, expected_package_name", [
+    ("", "", None),
+    ("Bourne-Again", None, "bash"),
+    ("Zip archive", None, "zip"),
+    ("gzip compressed data", None, "zip"),
+    ("PDF document", "test.pdf", "pdf"),
+    ("Composite Document File V2 Document", "test.docx", "doc"),
+    ("Microsoft Word", "test.docx", "doc"),
+    ("ELF", None, "generic"),
+    ("Unicode text", "malware.js", "js")
+])
+def test__guess_package_name(file_type, file_name, expected_package_name):
+    assert _guess_package_name(file_type, "") == expected_package_name, f"Expected {expected_package_name} for {file_type}, {file_name}"
+    if file_name:
+        assert _guess_package_name("", file_name) == expected_package_name, f"Expected {expected_package_name} for {file_type}, {file_name}"
+        assert _guess_package_name(file_type, file_name) == expected_package_name, f"Expected {expected_package_name} for {file_type}, {file_name}"
diff --git a/analyzer/linux/tests/lib/core/test_startup.py b/analyzer/linux/tests/lib/core/test_startup.py
new file mode 100644
index 00000000000..7e7cadf4be1
--- /dev/null
+++ b/analyzer/linux/tests/lib/core/test_startup.py
@@ -0,0 +1,53 @@
+from unittest.mock import patch, Mock
+import logging
+from logging import StreamHandler
+import pytest
+
+from lib.common.results import NetlogHandler
+import lib.core.startup
+import lib.core.config
+
+import lib
+
+@pytest.fixture
+def patch_netloghandler(monkeypatch):
+    monkeypatch.setattr(NetlogHandler, "__init__", Mock(return_value=None))
+    monkeypatch.setattr(NetlogHandler, "connect", Mock())
+    yield
+
+@patch('os.makedirs')
+@patch('os.path.exists')
+def test_create_folders_path_not_exists(os_path_exists, os_mkdirs):
+    """Test initial folder creation with paths that do not exist"""
+    # Fake path not existing
+    os_path_exists.return_value = False
+    lib.core.startup.create_folders()
+    assert os_path_exists.called
+    # Ensure there is an attempt to create a folder
+    assert os_mkdirs.called
+
+@patch('os.makedirs')
+@patch('os.path.exists')
+def test_create_folders_path_exists(os_path_exists, os_mkdirs):
+    """Test initial folder creation with paths that already exist"""
+    # Fake path not existing
+    os_path_exists.return_value = True
+    lib.core.startup.create_folders()
+    assert os_path_exists.called
+    # Ensure there are no attempts to create a folder
+    assert not os_mkdirs.called
+
+@pytest.mark.usefixtures("patch_netloghandler")
+@patch("logging.Logger.addHandler")
+def test_init_logging(addhandler):
+    """Ensure init_logging adds the right log handlers"""
+    lib.core.startup.init_logging()
+    handlers = []
+    # Get a list of all the types of handlers that are being added
+    for name, args, kwargs in addhandler.mock_calls:
+        handlers = [*handlers, *[type(arg) for arg in args]]
+    # Ensure there is a StreamHandler and a NetlogHandler
+    assert StreamHandler in handlers
+    assert NetlogHandler in handlers
+    # Ensure log level is set to DEBUG
+    assert lib.core.startup.log.level == logging.DEBUG
diff --git a/analyzer/linux/tests/modules/packages/test_package.py b/analyzer/linux/tests/modules/packages/test_package.py
new file mode 100644
index 00000000000..afff20fb737
--- /dev/null
+++ b/analyzer/linux/tests/modules/packages/test_package.py
@@ -0,0 +1,35 @@
+import sys
+import unittest
+import pytest
+
+import lib
+
+from lib.core.packages import Package
+
+@pytest.fixture
+def patch_netlogfile(monkeypatch):
+    class MockNetlogFile:
+        def init(self, *args):
+            return
+        def close(self):
+            return
+    monkeypatch.setattr(lib.core.packages, "NetlogFile", MockNetlogFile)
+    monkeypatch.setattr(lib.core.packages, "append_buffer_to_host", lambda *args: None)
+    yield
+
+class TestPackage(unittest.TestCase):
+
+    @pytest.mark.usefixtures("patch_netlogfile")
+    def test_package_init_args(self):
+        pkg = Package(sys.executable, options={})
+        self.assertEqual(pkg.args, [])
+
+    @pytest.mark.usefixtures("patch_netlogfile")
+    def test_package_init_args_list(self):
+        pkg = Package(sys.executable, options={"arguments": ["foo", "bar"]})
+        self.assertEqual(pkg.args, ["foo", "bar"])
+
+    @pytest.mark.usefixtures("patch_netlogfile")
+    def test_package_init_args_str(self):
+        pkg = Package(sys.executable, options={"arguments": "foo bar"})
+        self.assertEqual(pkg.args, ["foo", "bar"])
diff --git a/analyzer/linux/tests/test_analyzer.py b/analyzer/linux/tests/test_analyzer.py
new file mode 100644
index 00000000000..47998705b40
--- /dev/null
+++ b/analyzer/linux/tests/test_analyzer.py
@@ -0,0 +1,34 @@
+import sys
+import unittest
+
+import pytest
+
+if sys.platform == "linux":
+    import analyzer
+    from analyzer import PROCESS_LIST, SEEN_LIST
+
+@pytest.mark.skipif(sys.platform != "linux", reason="Requires Linux")
+class TestAnalyzer(unittest.TestCase):
+
+    def test_add_pids(self):
+        """Test add_pids with a variety of valid types"""
+        # Check that both sets are empty
+        self.assertEqual(PROCESS_LIST, set())
+        self.assertEqual(SEEN_LIST, set())
+
+        pids = [123, 456, 789]
+        # Add a list of PIDs
+        analyzer.add_pids([str(pids[0]), pids[1]])
+        # Add a set of PIDs
+        analyzer.add_pids(set([pids[0], pids[2]]))
+        # Add a tuple of PIDs
+        analyzer.add_pids((pids[1], pids[2]))
+
+        self.assertEqual(PROCESS_LIST, set(pids))
+        self.assertEqual(SEEN_LIST, set(pids))
+
+
+    def test_add_pids_invalid_var(self):
+        """Test add_pids with an invalid type"""
+        with self.assertRaises(TypeError):
+            analyzer.add_pids(analyzer.add_pids)