diff --git a/run_tests.py b/run_tests.py
new file mode 100644
index 00000000..1dcd373f
--- /dev/null
+++ b/run_tests.py
@@ -0,0 +1,16 @@
+r"""Helper to run pytest with the `src` layout on systems where the package isn't installed.
+
+Usage:
+ .venv\Scripts\python.exe run_tests.py -q
+"""
+import sys
+
+# Ensure the src directory is first on sys.path so `import torbot` works
+sys.path.insert(0, "src")
+
+import pytest
+
+
+if __name__ == "__main__":
+ args = sys.argv[1:] or ["-q"]
+ raise SystemExit(pytest.main(args))
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..328483d7
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,35 @@
+"""pytest configuration and test-time shims for TorBot tests.
+
+This module provides a lightweight stub for the NLP classifier so tests can
+run without installing the full scientific stack (numpy, scikit-learn, etc.).
+The stub is only active during test runs and does not affect production code.
+"""
+import sys
+import types
+
+
+def _install_nlp_stub():
+ """Install a minimal stub for torbot.modules.nlp.main during tests."""
+ mod_name = "torbot.modules.nlp.main"
+ if mod_name in sys.modules:
+ return
+
+ # Create a minimal module with classify function
+ stub = types.ModuleType(mod_name)
+
+ def classify(data):
+ """Lightweight test-only classifier.
+
+ Returns a deterministic classification without requiring ML libraries.
+ Real implementation uses sklearn pipeline with training data.
+ """
+ _ = data # unused in stub
+ return ["unknown", 0.0]
+
+ # Use setattr to avoid linter complaints about dynamic attributes
+ setattr(stub, "classify", classify)
+ sys.modules[mod_name] = stub
+
+
+# Install stub before any test imports
+_install_nlp_stub()
diff --git a/tests/test_linktree_extra.py b/tests/test_linktree_extra.py
new file mode 100644
index 00000000..3252b55a
--- /dev/null
+++ b/tests/test_linktree_extra.py
@@ -0,0 +1,158 @@
+"""Additional edge-case tests for linktree parsing functions.
+
+These tests cover corner cases and error conditions for the parsing helpers.
+"""
+from bs4 import BeautifulSoup
+import pytest
+
+from torbot.modules.linktree import (
+ parse_hostname,
+ parse_links,
+ parse_emails,
+ parse_phone_numbers,
+)
+
+
+def test_parse_hostname_raises_on_invalid_url() -> None:
+ """Ensure parse_hostname raises exception for URLs without hostname."""
+ with pytest.raises(Exception, match="unable to parse hostname"):
+ parse_hostname("not-a-valid-url")
+
+
+def test_parse_hostname_handles_various_schemes() -> None:
+ """Verify parse_hostname works with http, https, and onion domains."""
+ assert parse_hostname("https://www.example.com/path") == "www.example.com"
+ assert parse_hostname("http://test.onion") == "test.onion"
+ assert parse_hostname("https://sub.domain.co.uk:8080/") == "sub.domain.co.uk"
+
+
+def test_parse_links_filters_only_valid_full_urls() -> None:
+ """Ensure parse_links returns only absolute http(s) URLs."""
+ html = """
+
+ relative
+ valid
+ valid2
+ js
+ valid-duplicate
+
+ """
+
+ links = parse_links(html)
+ # only absolute http(s) URLs should be returned, duplicates preserved
+ assert links == [
+ "https://valid.example/path",
+ "http://also.valid.test/",
+ "https://valid.example/path",
+ ]
+
+
+def test_parse_links_empty_html() -> None:
+ """Test parse_links with HTML containing no anchor tags."""
+ html = "
No links here
"
+ links = parse_links(html)
+ assert links == []
+
+
+def test_parse_links_anchor_without_href() -> None:
+ """Ensure parse_links handles anchor tags without href attribute."""
+ html = """
+
+ No href
+ Named anchor
+ Valid
+
+ """
+ links = parse_links(html)
+ assert links == ["https://valid.com"]
+
+
+def test_parse_emails_ignores_invalid_and_returns_unique() -> None:
+ """Verify parse_emails filters invalid emails and removes duplicates."""
+ doc = BeautifulSoup(
+ """
+
+ good
+ good-dup
+ bad
+ withparams
+ not-mailto
+
+ """,
+ "html.parser",
+ )
+
+ emails = parse_emails(doc)
+ # duplicates removed, invalid emails rejected
+ # Note: current impl splits on 'mailto:' so params might be included
+ # We test actual behavior here
+ assert "good@example.com" in emails
+ assert len([e for e in emails if e == "good@example.com"]) == 1 # no duplicates
+
+
+def test_parse_emails_empty_page() -> None:
+ """Test parse_emails with no mailto links."""
+ doc = BeautifulSoup("No emails
", "html.parser")
+ emails = parse_emails(doc)
+ assert emails == []
+
+
+def test_parse_emails_malformed_mailto() -> None:
+ """Ensure malformed mailto links are filtered out."""
+ doc = BeautifulSoup(
+ """
+
+ empty
+ invalid
+ valid
+
+ """,
+ "html.parser",
+ )
+ emails = parse_emails(doc)
+ # Only valid email should be extracted
+ assert emails == ["valid@test.com"]
+
+
+def test_parse_phone_numbers_only_accepts_possible_international_numbers() -> None:
+ """Verify parse_phone_numbers validates international format."""
+ doc = BeautifulSoup(
+ """
+
+ us
+ no-plus
+ uk
+ invalid
+
+ """,
+ "html.parser",
+ )
+
+ numbers = parse_phone_numbers(doc)
+ # only the properly formatted international numbers (with +) are considered possible
+ assert sorted(numbers) == ["+14155552671", "+442071838750"]
+
+
+def test_parse_phone_numbers_empty_page() -> None:
+ """Test parse_phone_numbers with no tel: links."""
+ doc = BeautifulSoup("No phones
", "html.parser")
+ numbers = parse_phone_numbers(doc)
+ assert numbers == []
+
+
+def test_parse_phone_numbers_removes_duplicates() -> None:
+ """Ensure duplicate phone numbers are deduplicated."""
+ doc = BeautifulSoup(
+ """
+
+ call
+ call again
+ other
+
+ """,
+ "html.parser",
+ )
+ numbers = parse_phone_numbers(doc)
+ assert len(numbers) == 2
+ assert "+14155551234" in numbers
+ assert "+14155559999" in numbers
diff --git a/tests/test_linktree_tree.py b/tests/test_linktree_tree.py
new file mode 100644
index 00000000..cd64a8ea
--- /dev/null
+++ b/tests/test_linktree_tree.py
@@ -0,0 +1,362 @@
+"""Comprehensive tests for LinkTree and LinkNode classes.
+
+These tests verify:
+- Creating nodes from HTTP responses (with/without title)
+- Building trees with depth limits
+- Duplicate node handling
+- Saving to JSON and text formats
+- Classification and contact extraction integration
+"""
+import json
+import tempfile
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+import pytest
+from bs4 import BeautifulSoup
+
+from torbot.modules.linktree import LinkTree, LinkNode, parse_hostname
+
+
+class FakeResponse:
+ """Mock HTTP response for testing."""
+
+ def __init__(self, url: str, text: str, status_code: int = 200):
+ self.url = url
+ self.text = text
+ self.status_code = status_code
+
+
+class FakeClient:
+ """Mock httpx.Client that returns pre-configured responses."""
+
+ def __init__(self, responses: dict[str, FakeResponse]):
+ self.responses = responses
+ self.call_count = {}
+
+ def get(self, url: str) -> FakeResponse:
+ self.call_count[url] = self.call_count.get(url, 0) + 1
+ if url in self.responses:
+ return self.responses[url]
+ # Return a minimal response for unknown URLs
+ return FakeResponse(url, "404", 404)
+
+
+def test_parse_hostname_extracts_domain():
+ """Verify parse_hostname extracts hostname from various URL formats."""
+ assert parse_hostname("https://example.com/path") == "example.com"
+ assert parse_hostname("http://sub.domain.com:8080/") == "sub.domain.com"
+ assert parse_hostname("https://test.onion") == "test.onion"
+
+
+def test_parse_hostname_raises_on_invalid():
+ """Ensure parse_hostname raises when URL has no hostname."""
+ with pytest.raises(Exception, match="unable to parse hostname"):
+ parse_hostname("not-a-url")
+
+ with pytest.raises(Exception, match="unable to parse hostname"):
+ parse_hostname("file:///local/path")
+
+
+def test_linknode_initialization():
+ """Verify LinkNode stores all required fields correctly."""
+ node = LinkNode(
+ title="Test Page",
+ url="https://example.com",
+ status=200,
+ classification="blog",
+ accuracy=0.85,
+ numbers=["+14155551234"],
+ emails=["test@example.com"],
+ )
+
+ assert node.tag == "Test Page"
+ assert node.identifier == "https://example.com"
+ assert node.status == 200
+ assert node.classification == "blog"
+ assert node.accuracy == 0.85
+ assert node.numbers == ["+14155551234"]
+ assert node.emails == ["test@example.com"]
+
+
+def test_linktree_creates_root_node_with_title():
+ """Test that LinkTree creates a root node using the page title."""
+ html = """
+
+ Example Site
+
+ Page 1
+
+
+ """
+
+ client = FakeClient({
+ "https://example.com": FakeResponse("https://example.com", html, 200),
+ })
+
+ tree = LinkTree("https://example.com", depth=0, client=client)
+ tree.load()
+
+ root = tree.get_node("https://example.com")
+ assert root is not None
+ assert root.tag == "Example Site"
+ assert root.data.status == 200
+
+
+def test_linktree_creates_root_node_without_title():
+ """Test that LinkTree falls back to hostname when no tag."""
+ html = "No title here
"
+
+ client = FakeClient({
+ "https://test.onion": FakeResponse("https://test.onion", html, 200),
+ })
+
+ tree = LinkTree("https://test.onion", depth=0, client=client)
+ tree.load()
+
+ root = tree.get_node("https://test.onion")
+ assert root is not None
+ assert root.tag == "test.onion"
+
+
+def test_linktree_extracts_contacts_from_page():
+ """Verify LinkTree extracts emails and phone numbers from page."""
+ html = """
+
+ Contact Page
+
+ Email us
+ Call us
+ Support
+
+
+ """
+
+ client = FakeClient({
+ "https://example.com/contact": FakeResponse(
+ "https://example.com/contact", html, 200
+ ),
+ })
+
+ tree = LinkTree("https://example.com/contact", depth=0, client=client)
+ tree.load()
+
+ root = tree.get_node("https://example.com/contact")
+ assert root is not None
+ assert len(root.data.emails) == 2
+ assert "info@example.com" in root.data.emails
+ assert "support@example.com" in root.data.emails
+ assert "+14155551234" in root.data.numbers
+
+
+def test_linktree_builds_tree_to_depth_1():
+ """Test that LinkTree builds children up to specified depth."""
+ root_html = """
+
+ Root
+
+ Child 1
+ Child 2
+
+
+ """
+
+ child1_html = "Child 1"
+ child2_html = "Child 2"
+
+ client = FakeClient({
+ "https://example.com": FakeResponse("https://example.com", root_html, 200),
+ "https://example.com/child1": FakeResponse(
+ "https://example.com/child1", child1_html, 200
+ ),
+ "https://example.com/child2": FakeResponse(
+ "https://example.com/child2", child2_html, 200
+ ),
+ })
+
+ tree = LinkTree("https://example.com", depth=1, client=client)
+ tree.load()
+
+ # Verify root exists
+ root = tree.get_node("https://example.com")
+ assert root is not None
+
+ # Verify children exist
+ child1 = tree.get_node("https://example.com/child1")
+ child2 = tree.get_node("https://example.com/child2")
+ assert child1 is not None
+ assert child2 is not None
+ assert child1.tag == "Child 1"
+ assert child2.tag == "Child 2"
+
+ # Verify tree structure
+ assert tree.parent("https://example.com/child1").identifier == "https://example.com"
+ assert tree.parent("https://example.com/child2").identifier == "https://example.com"
+
+
+def test_linktree_respects_depth_limit():
+ """Ensure LinkTree stops recursion at the specified depth."""
+ root_html = 'RootL1'
+ level1_html = 'Level 1L2'
+ level2_html = 'Level 2L3'
+
+ client = FakeClient({
+ "https://example.com": FakeResponse("https://example.com", root_html, 200),
+ "https://example.com/level1": FakeResponse(
+ "https://example.com/level1", level1_html, 200
+ ),
+ "https://example.com/level2": FakeResponse(
+ "https://example.com/level2", level2_html, 200
+ ),
+ "https://example.com/level3": FakeResponse(
+ "https://example.com/level3", 'Level 3', 200
+ ),
+ })
+
+ # Build tree with depth=2 (root + 2 levels)
+ tree = LinkTree("https://example.com", depth=2, client=client)
+ tree.load()
+
+ # Root and level1 and level2 should exist
+ assert tree.get_node("https://example.com") is not None
+ assert tree.get_node("https://example.com/level1") is not None
+ assert tree.get_node("https://example.com/level2") is not None
+
+ # Level3 should NOT exist (depth limit)
+ assert tree.get_node("https://example.com/level3") is None
+
+
+def test_linktree_handles_duplicate_links():
+ """Verify duplicate URLs don't cause errors or duplicate nodes."""
+ html_with_dup = """
+
+ Page with duplicates
+ Link 1
+ Link 2 (same URL)
+ Other
+
+ """
+
+ page_html = "Target Page"
+ other_html = "Other Page"
+
+ client = FakeClient({
+ "https://example.com": FakeResponse("https://example.com", html_with_dup, 200),
+ "https://example.com/page": FakeResponse(
+ "https://example.com/page", page_html, 200
+ ),
+ "https://example.com/other": FakeResponse(
+ "https://example.com/other", other_html, 200
+ ),
+ })
+
+ tree = LinkTree("https://example.com", depth=1, client=client)
+ tree.load()
+
+ # Should have 3 nodes total: root + 2 unique children
+ all_nodes = tree.all_nodes()
+ assert len(all_nodes) == 3
+
+ # Duplicate should have been attempted once (first add), then skipped
+ assert tree.get_node("https://example.com/page") is not None
+ assert tree.get_node("https://example.com/other") is not None
+
+
+def test_linktree_save_json_creates_file():
+ """Test saveJSON writes a valid JSON file with tree structure."""
+ html = 'Test JSON Save'
+
+ client = FakeClient({
+ "https://example.com": FakeResponse("https://example.com", html, 200),
+ })
+
+ tree = LinkTree("https://example.com", depth=0, client=client)
+ tree.load()
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ # Patch project_root_directory to use temp dir
+ with patch("torbot.modules.linktree.project_root_directory", tmpdir):
+ tree.saveJSON()
+
+ # Check that JSON file was created
+ json_files = list(Path(tmpdir).glob("*.json"))
+ assert len(json_files) == 1
+
+ # Verify JSON file is not empty and is valid JSON
+ with open(json_files[0]) as f:
+ content = f.read()
+ assert len(content) > 0
+ # The file should contain valid JSON (even if simple string from treelib)
+ data = json.loads(content)
+ # Verify file was created successfully (content is non-empty valid JSON)
+ assert data is not None
+
+
+def test_linktree_save_text_creates_file():
+ """Test save creates a text file representation of the tree."""
+ html = 'Test Text Save'
+
+ client = FakeClient({
+ "https://example.com": FakeResponse("https://example.com", html, 200),
+ })
+
+ tree = LinkTree("https://example.com", depth=0, client=client)
+ tree.load()
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ with patch("torbot.modules.linktree.project_root_directory", tmpdir):
+ tree.save()
+
+ # Check that text file was created
+ txt_files = list(Path(tmpdir).glob("*.txt"))
+ assert len(txt_files) == 1
+
+ # Verify file is not empty
+ assert txt_files[0].stat().st_size > 0
+
+
+def test_linktree_handles_non_200_status():
+ """Verify LinkTree records non-200 status codes correctly."""
+ html = 'Not Found'
+
+ client = FakeClient({
+ "https://example.com/missing": FakeResponse(
+ "https://example.com/missing", html, 404
+ ),
+ })
+
+ tree = LinkTree("https://example.com/missing", depth=0, client=client)
+ tree.load()
+
+ root = tree.get_node("https://example.com/missing")
+ assert root is not None
+ assert root.data.status == 404
+
+
+def test_linktree_filters_invalid_links():
+ """Ensure only valid absolute URLs are added as children."""
+ html = """
+
+ Root
+ Valid
+ Relative
+ JS
+ Email
+ Fragment
+
+ """
+
+ valid_html = 'Valid'
+
+ client = FakeClient({
+ "https://example.com": FakeResponse("https://example.com", html, 200),
+ "https://valid.com": FakeResponse("https://valid.com", valid_html, 200),
+ })
+
+ tree = LinkTree("https://example.com", depth=1, client=client)
+ tree.load()
+
+ # Should have 2 nodes: root + 1 valid child
+ all_nodes = tree.all_nodes()
+ assert len(all_nodes) == 2
+ assert tree.get_node("https://valid.com") is not None