From ff19a475be8e096f1b47914d97c2722a14ce680d Mon Sep 17 00:00:00 2001 From: JoukoVirtanen Date: Tue, 3 Feb 2026 12:59:03 -0800 Subject: [PATCH 1/9] X-Smart-Branch-Parent: main From a32e3fad39d983ef6a210f6b017e8c229446dfba Mon Sep 17 00:00:00 2001 From: JoukoVirtanen Date: Thu, 5 Feb 2026 11:34:13 -0800 Subject: [PATCH 2/9] Added unit tests and parameterized integration tests --- fact/src/event/mod.rs | 151 ++++++++++++++++++++++++++ fact/src/event/process.rs | 219 ++++++++++++++++++++++++++++++++++++++ tests/test_file_open.py | 24 ++++- 3 files changed, 389 insertions(+), 5 deletions(-) diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index e5695653..46283470 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -358,3 +358,154 @@ impl From for fact_api::FileOwnershipChange { } } } + +#[cfg(test)] +mod tests { + use super::*; + + /// Helper function to convert a Rust string to a c_char array for testing + fn string_to_c_char_array(s: &str) -> [c_char; N] { + let mut array = [0 as c_char; N]; + let bytes = s.as_bytes(); + let len = bytes.len().min(N - 1); + for (i, &byte) in bytes.iter().take(len).enumerate() { + array[i] = byte as c_char; + } + array + } + + /// Helper function to convert raw bytes to a c_char array for testing invalid UTF-8 + fn bytes_to_c_char_array(bytes: &[u8]) -> [c_char; N] { + let mut array = [0 as c_char; N]; + let len = bytes.len().min(N - 1); + for (i, &byte) in bytes.iter().take(len).enumerate() { + array[i] = byte as c_char; + } + array + } + + #[test] + fn slice_to_string_valid_utf8() { + let tests = [ + ("hello", "ASCII"), + ("café", "Latin-1 supplement"), + ("файл", "Cyrillic"), + ("测试文件", "Chinese"), + ("test🚀file", "emoji"), + ("test-файл-测试-🐛.txt", "mixed characters"), + ("ملف", "Arabic"), + ("קובץ", "Hebrew"), + ("ファイル", "Japanese"), + ]; + + for (input, description) in tests { + let arr = string_to_c_char_array::<256>(input); + assert_eq!(slice_to_string(&arr).unwrap(), input, "Failed for {}", description); + } + } + + #[test] + fn slice_to_string_invalid_utf8() { + let tests: &[(&[u8], &str)] = &[ + (&[0xFF, 0xFE, 0xFD], "invalid continuation bytes"), + (&[b't', b'e', b's', b't', 0xE2], "truncated multi-byte sequence"), + (&[0xC0, 0x80], "overlong encoding"), + (&[b'h', b'e', b'l', b'l', b'o', 0x80, b'w', b'o', b'r', b'l', b'd'], "invalid start byte"), + (&[0x80], "lone continuation byte"), + (&[b't', b'e', b's', b't', 0xFF, 0xFE], "mixed valid and invalid bytes"), + ]; + + for (bytes, description) in tests { + let arr = bytes_to_c_char_array::<256>(bytes); + assert!(slice_to_string(&arr).is_err(), "Should fail for {}", description); + } + } + + #[test] + fn sanitize_d_path_valid_utf8() { + let tests = [ + ("/etc/test", "/etc/test", "ASCII"), + ("/tmp/файл.txt", "/tmp/файл.txt", "Cyrillic"), + ("/home/user/测试文件.log", "/home/user/测试文件.log", "Chinese"), + ("/data/🚀rocket.dat", "/data/🚀rocket.dat", "emoji"), + ("/var/log/app-данные-数据-🐛.log", "/var/log/app-данные-数据-🐛.log", "mixed Unicode"), + ("/home/ملف.txt", "/home/ملف.txt", "Arabic"), + ("/opt/ファイル.conf", "/opt/ファイル.conf", "Japanese"), + ]; + + for (input, expected, description) in tests { + let arr = string_to_c_char_array::<4096>(input); + assert_eq!(sanitize_d_path(&arr), PathBuf::from(expected), "Failed for {}", description); + } + } + + #[test] + fn sanitize_d_path_deleted_suffix() { + let tests = [ + ("/tmp/test.txt (deleted)", "/tmp/test.txt", "ASCII with deleted suffix"), + ("/tmp/файл.txt (deleted)", "/tmp/файл.txt", "Unicode with deleted suffix"), + ("/etc/config.yaml", "/etc/config.yaml", "no deleted suffix"), + ("/var/log/app/debug.log (deleted)", "/var/log/app/debug.log", "nested path with deleted suffix"), + ]; + + for (input, expected, description) in tests { + let arr = string_to_c_char_array::<4096>(input); + assert_eq!(sanitize_d_path(&arr), PathBuf::from(expected), "Failed for {}", description); + } + } + + #[test] + fn sanitize_d_path_invalid_utf8() { + let tests: &[(&[u8], &str, &str, &str)] = &[ + ( + &[b'/', b't', b'm', b'p', b'/', 0xFF, 0xFE, b'.', b't', b'x', b't'], + "/tmp/", + ".txt", + "invalid continuation bytes", + ), + ( + &[b'/', b'v', b'a', b'r', b'/', b't', b'e', b's', b't', 0xE2, 0x80], + "/var/", + "", + "truncated multi-byte sequence", + ), + ( + &[b'/', b'h', b'o', b'm', b'e', b'/', b'f', b'i', b'l', b'e', 0x80, b'.', b'l', b'o', b'g'], + "/home/", + ".log", + "invalid start byte", + ), + ( + &[b'/', b't', b'm', b'p', b'/', 0xD1, 0x84, 0xFF, 0xD0, 0xBB, b'.', b't', b'x', b't'], + "/tmp/", + "", + "mixed valid and invalid UTF-8", + ), + ]; + + for (bytes, must_contain1, must_contain2, description) in tests { + let arr = bytes_to_c_char_array::<4096>(bytes); + let result = sanitize_d_path(&arr); + let result_str = result.to_string_lossy(); + + assert!(result_str.contains(must_contain1), "Failed for {} - should contain '{}'", description, must_contain1); + if !must_contain2.is_empty() { + assert!(result_str.contains(must_contain2), "Failed for {} - should contain '{}'", description, must_contain2); + } + assert!(result_str.contains('\u{FFFD}'), "Failed for {} - should contain replacement character", description); + } + } + + #[test] + fn sanitize_d_path_invalid_utf8_with_deleted_suffix() { + let invalid_with_deleted = bytes_to_c_char_array::<4096>(&[ + b'/', b't', b'm', b'p', b'/', 0xFF, 0xFE, b' ', b'(', b'd', b'e', b'l', b'e', b't', b'e', b'd', b')', + ]); + let result = sanitize_d_path(&invalid_with_deleted); + let result_str = result.to_string_lossy(); + + assert!(result_str.contains("/tmp/")); + assert!(!result_str.ends_with(" (deleted)")); + assert!(result_str.contains('\u{FFFD}')); + } +} diff --git a/fact/src/event/process.rs b/fact/src/event/process.rs index d7d1d139..080239ab 100644 --- a/fact/src/event/process.rs +++ b/fact/src/event/process.rs @@ -222,6 +222,49 @@ impl From for fact_api::ProcessSignal { #[cfg(test)] mod tests { use super::*; + use std::os::raw::c_char; + + /// Helper function to convert a Rust string to a c_char array for testing + fn string_to_c_char_array(s: &str) -> [c_char; N] { + let mut array = [0 as c_char; N]; + let bytes = s.as_bytes(); + let len = bytes.len().min(N - 1); + for (i, &byte) in bytes.iter().take(len).enumerate() { + array[i] = byte as c_char; + } + array + } + + /// Helper function to convert raw bytes to a c_char array for testing invalid UTF-8 + fn bytes_to_c_char_array(bytes: &[u8]) -> [c_char; N] { + let mut array = [0 as c_char; N]; + let len = bytes.len().min(N - 1); + for (i, &byte) in bytes.iter().take(len).enumerate() { + array[i] = byte as c_char; + } + array + } + + /// Helper to create a default process_t for testing + fn default_process_t() -> process_t { + process_t { + comm: string_to_c_char_array::<16>("test"), + args: string_to_c_char_array::<4096>("arg1\0arg2\0"), + args_len: 10, + exe_path: string_to_c_char_array::<4096>("/usr/bin/test"), + memory_cgroup: string_to_c_char_array::<4096>("init.scope"), + uid: 1000, + gid: 1000, + login_uid: 1000, + pid: 12345, + lineage: [lineage_t { + uid: 1000, + exe_path: string_to_c_char_array::<4096>("/bin/bash"), + }; 2], + lineage_len: 0, + in_root_mount_ns: 1, + } + } #[test] fn extract_container_id() { @@ -259,4 +302,180 @@ mod tests { assert_eq!(id, expected); } } + + #[test] + fn process_conversion_valid_utf8_comm() { + let tests = [ + ("test", "ASCII"), + ("тест", "Cyrillic"), + ("测试", "Chinese"), + ("app🚀", "emoji"), + ]; + + for (comm, description) in tests { + let mut proc = default_process_t(); + proc.comm = string_to_c_char_array::<16>(comm); + let result = Process::try_from(proc); + assert!(result.is_ok(), "Failed for {}", description); + assert_eq!(result.unwrap().comm, comm, "Failed for {}", description); + } + } + + #[test] + fn process_conversion_invalid_utf8_comm() { + let tests: &[(&[u8], &str)] = &[ + (&[b't', b'e', b's', b't', 0xFF, 0xFE], "invalid bytes"), + (&[b'a', b'p', b'p', 0xE2, 0x80], "truncated multi-byte sequence"), + ]; + + for (bytes, description) in tests { + let mut proc = default_process_t(); + proc.comm = bytes_to_c_char_array::<16>(bytes); + let result = Process::try_from(proc); + assert!(result.is_err(), "Should fail for {}", description); + } + } + + #[test] + fn process_conversion_valid_utf8_exe_path() { + let tests = [ + ("/usr/bin/test", "ASCII"), + ("/usr/bin/тест", "Cyrillic"), + ("/opt/应用/测试", "Chinese"), + ("/home/user/🚀app", "emoji"), + ("/var/app-данные-数据/bin", "mixed UTF-8"), + ]; + + for (path, description) in tests { + let mut proc = default_process_t(); + proc.exe_path = string_to_c_char_array::<4096>(path); + let result = Process::try_from(proc); + assert!(result.is_ok(), "Failed for {}", description); + assert_eq!(result.unwrap().exe_path, PathBuf::from(path), "Failed for {}", description); + } + } + + #[test] + fn process_conversion_invalid_utf8_exe_path() { + let mut proc = default_process_t(); + proc.exe_path = bytes_to_c_char_array::<4096>(&[ + b'/', b'u', b's', b'r', b'/', b'b', b'i', b'n', b'/', 0xFF, 0xFE, + ]); + let result = Process::try_from(proc); + assert!(result.is_ok()); + let exe_path = result.unwrap().exe_path; + assert!(exe_path.to_string_lossy().contains("/usr/bin/")); + assert!(exe_path.to_string_lossy().contains('\u{FFFD}')); + } + + #[test] + fn process_conversion_valid_utf8_args() { + let tests: &[(&str, Vec<&str>, &str)] = &[ + ("arg1\0arg2\0arg3\0", vec!["arg1", "arg2", "arg3"], "ASCII"), + ("файл\0данные\0", vec!["файл", "данные"], "Cyrillic"), + ("测试\0文件\0数据\0", vec!["测试", "文件", "数据"], "Chinese"), + ("app\0🚀file\0📁data\0", vec!["app", "🚀file", "📁data"], "emoji"), + ("test\0файл\0测试\0🚀\0", vec!["test", "файл", "测试", "🚀"], "mixed UTF-8"), + ]; + + for (args_str, expected, description) in tests { + let mut proc = default_process_t(); + proc.args = string_to_c_char_array::<4096>(args_str); + proc.args_len = args_str.len() as u32; + let result = Process::try_from(proc); + assert!(result.is_ok(), "Failed for {}", description); + assert_eq!(result.unwrap().args, *expected, "Failed for {}", description); + } + } + + #[test] + fn process_conversion_invalid_utf8_args() { + let tests: &[(&[u8], u32, &str)] = &[ + (&[b'a', b'r', b'g', b'1', 0, 0xFF, 0xFE, b'a', b'r', b'g', 0], 11, "invalid bytes"), + (&[b't', b'e', b's', b't', 0, 0xE2, 0x80, 0], 8, "truncated multi-byte sequence"), + ]; + + for (bytes, args_len, description) in tests { + let mut proc = default_process_t(); + proc.args = bytes_to_c_char_array::<4096>(bytes); + proc.args_len = *args_len; + let result = Process::try_from(proc); + assert!(result.is_err(), "Should fail for {}", description); + } + } + + #[test] + fn process_conversion_valid_utf8_memory_cgroup() { + let tests = [ + ("init.scope", None, "ASCII init.scope"), + ( + "/docker/951e643e3c241b225b6284ef2b79a37c13fc64cbf65b5d46bda95fcb98fe63a4", + Some("951e643e3c24"), + "container ID", + ), + ]; + + for (cgroup, expected_id, description) in tests { + let mut proc = default_process_t(); + proc.memory_cgroup = string_to_c_char_array::<4096>(cgroup); + let result = Process::try_from(proc); + assert!(result.is_ok(), "Failed for {}", description); + assert_eq!( + result.unwrap().container_id, + expected_id.map(|s| s.to_string()), + "Failed for {}", + description + ); + } + } + + #[test] + fn process_conversion_invalid_utf8_memory_cgroup() { + let mut proc = default_process_t(); + proc.memory_cgroup = bytes_to_c_char_array::<4096>(&[ + b'/', b'd', b'o', b'c', b'k', b'e', b'r', b'/', 0xFF, 0xFE, + ]); + let result = Process::try_from(proc); + assert!(result.is_err()); + } + + #[test] + fn process_conversion_valid_utf8_lineage() { + let tests = [ + ("/bin/bash", "ASCII"), + ("/usr/bin/тест", "Cyrillic"), + ("/opt/应用", "Chinese"), + ]; + + for (path, description) in tests { + let mut proc = default_process_t(); + proc.lineage[0] = lineage_t { + uid: 1000, + exe_path: string_to_c_char_array::<4096>(path), + }; + proc.lineage_len = 1; + let result = Process::try_from(proc); + assert!(result.is_ok(), "Failed for {}", description); + let lineage = result.unwrap().lineage; + assert_eq!(lineage.len(), 1); + assert_eq!(lineage[0].exe_path, PathBuf::from(path), "Failed for {}", description); + } + } + + #[test] + fn process_conversion_invalid_utf8_lineage() { + let mut proc = default_process_t(); + proc.lineage[0] = lineage_t { + uid: 1000, + exe_path: bytes_to_c_char_array::<4096>(&[ + b'/', b'b', b'i', b'n', b'/', 0xFF, 0xFE, + ]), + }; + proc.lineage_len = 1; + let result = Process::try_from(proc); + assert!(result.is_ok()); + let lineage = result.unwrap().lineage; + assert!(lineage[0].exe_path.to_string_lossy().contains("/bin/")); + assert!(lineage[0].exe_path.to_string_lossy().contains('\u{FFFD}')); + } } diff --git a/tests/test_file_open.py b/tests/test_file_open.py index c47272c7..20ff4f4b 100644 --- a/tests/test_file_open.py +++ b/tests/test_file_open.py @@ -2,11 +2,19 @@ import os import docker +import pytest from event import Event, EventType, Process -def test_open(fact, monitored_dir, server): +@pytest.mark.parametrize("filename", [ + 'create.txt', + 'café.txt', + 'файл.txt', + '测试.txt', + '🚀rocket.txt', +]) +def test_open(fact, monitored_dir, server, filename): """ Tests the opening of a file and verifies that the corresponding event is captured by the server. @@ -15,9 +23,10 @@ def test_open(fact, monitored_dir, server): fact: Fixture for file activity (only required to be running). monitored_dir: Temporary directory path for creating the test file. server: The server instance to communicate with. + filename: Name of the file to create (includes UTF-8 test cases). """ # File Under Test - fut = os.path.join(monitored_dir, 'create.txt') + fut = os.path.join(monitored_dir, filename) with open(fut, 'w') as f: f.write('This is a test') @@ -28,7 +37,11 @@ def test_open(fact, monitored_dir, server): server.wait_events([e]) -def test_multiple(fact, monitored_dir, server): +@pytest.mark.parametrize("filenames", [ + ['0.txt', '1.txt', '2.txt'], + ['café.txt', 'файл.txt', '测试.txt'], +]) +def test_multiple(fact, monitored_dir, server, filenames): """ Tests the opening of multiple files and verifies that the corresponding events are captured by the server. @@ -37,12 +50,13 @@ def test_multiple(fact, monitored_dir, server): fact: Fixture for file activity (only required to be running). monitored_dir: Temporary directory path for creating the test file. server: The server instance to communicate with. + filenames: List of filenames to create (includes UTF-8 test cases). """ events = [] process = Process.from_proc() # File Under Test - for i in range(3): - fut = os.path.join(monitored_dir, f'{i}.txt') + for filename in filenames: + fut = os.path.join(monitored_dir, filename) with open(fut, 'w') as f: f.write('This is a test') From 703057fc60538130149bf11363ee3645cdd1c693 Mon Sep 17 00:00:00 2001 From: JoukoVirtanen Date: Thu, 5 Feb 2026 21:44:34 -0800 Subject: [PATCH 3/9] Reduced duplication of helper functions --- fact/src/event/mod.rs | 26 +++++++++++++------------- fact/src/event/process.rs | 22 +--------------------- 2 files changed, 14 insertions(+), 34 deletions(-) diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 46283470..d34b395b 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -360,13 +360,12 @@ impl From for fact_api::FileOwnershipChange { } #[cfg(test)] -mod tests { - use super::*; +mod test_utils { + use std::os::raw::c_char; - /// Helper function to convert a Rust string to a c_char array for testing - fn string_to_c_char_array(s: &str) -> [c_char; N] { + /// Helper function to convert raw bytes to a c_char array for testing + pub fn bytes_to_c_char_array(bytes: &[u8]) -> [c_char; N] { let mut array = [0 as c_char; N]; - let bytes = s.as_bytes(); let len = bytes.len().min(N - 1); for (i, &byte) in bytes.iter().take(len).enumerate() { array[i] = byte as c_char; @@ -374,15 +373,16 @@ mod tests { array } - /// Helper function to convert raw bytes to a c_char array for testing invalid UTF-8 - fn bytes_to_c_char_array(bytes: &[u8]) -> [c_char; N] { - let mut array = [0 as c_char; N]; - let len = bytes.len().min(N - 1); - for (i, &byte) in bytes.iter().take(len).enumerate() { - array[i] = byte as c_char; - } - array + /// Helper function to convert a Rust string to a c_char array for testing + pub fn string_to_c_char_array(s: &str) -> [c_char; N] { + bytes_to_c_char_array(s.as_bytes()) } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::test_utils::*; #[test] fn slice_to_string_valid_utf8() { diff --git a/fact/src/event/process.rs b/fact/src/event/process.rs index 080239ab..58dcd100 100644 --- a/fact/src/event/process.rs +++ b/fact/src/event/process.rs @@ -222,29 +222,9 @@ impl From for fact_api::ProcessSignal { #[cfg(test)] mod tests { use super::*; + use crate::event::test_utils::*; use std::os::raw::c_char; - /// Helper function to convert a Rust string to a c_char array for testing - fn string_to_c_char_array(s: &str) -> [c_char; N] { - let mut array = [0 as c_char; N]; - let bytes = s.as_bytes(); - let len = bytes.len().min(N - 1); - for (i, &byte) in bytes.iter().take(len).enumerate() { - array[i] = byte as c_char; - } - array - } - - /// Helper function to convert raw bytes to a c_char array for testing invalid UTF-8 - fn bytes_to_c_char_array(bytes: &[u8]) -> [c_char; N] { - let mut array = [0 as c_char; N]; - let len = bytes.len().min(N - 1); - for (i, &byte) in bytes.iter().take(len).enumerate() { - array[i] = byte as c_char; - } - array - } - /// Helper to create a default process_t for testing fn default_process_t() -> process_t { process_t { From 631aea5e0132d33cf8ec84673cd07b35f1a4a67d Mon Sep 17 00:00:00 2001 From: JoukoVirtanen Date: Thu, 5 Feb 2026 22:13:51 -0800 Subject: [PATCH 4/9] cargo fmt --all --- fact/src/event/mod.rs | 117 +++++++++++++++++++++++++++++++------- fact/src/event/process.rs | 60 +++++++++++++++---- 2 files changed, 144 insertions(+), 33 deletions(-) diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index d34b395b..082f7e0f 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -381,8 +381,8 @@ mod test_utils { #[cfg(test)] mod tests { - use super::*; use super::test_utils::*; + use super::*; #[test] fn slice_to_string_valid_utf8() { @@ -400,7 +400,12 @@ mod tests { for (input, description) in tests { let arr = string_to_c_char_array::<256>(input); - assert_eq!(slice_to_string(&arr).unwrap(), input, "Failed for {}", description); + assert_eq!( + slice_to_string(&arr).unwrap(), + input, + "Failed for {}", + description + ); } } @@ -408,16 +413,31 @@ mod tests { fn slice_to_string_invalid_utf8() { let tests: &[(&[u8], &str)] = &[ (&[0xFF, 0xFE, 0xFD], "invalid continuation bytes"), - (&[b't', b'e', b's', b't', 0xE2], "truncated multi-byte sequence"), + ( + &[b't', b'e', b's', b't', 0xE2], + "truncated multi-byte sequence", + ), (&[0xC0, 0x80], "overlong encoding"), - (&[b'h', b'e', b'l', b'l', b'o', 0x80, b'w', b'o', b'r', b'l', b'd'], "invalid start byte"), + ( + &[ + b'h', b'e', b'l', b'l', b'o', 0x80, b'w', b'o', b'r', b'l', b'd', + ], + "invalid start byte", + ), (&[0x80], "lone continuation byte"), - (&[b't', b'e', b's', b't', 0xFF, 0xFE], "mixed valid and invalid bytes"), + ( + &[b't', b'e', b's', b't', 0xFF, 0xFE], + "mixed valid and invalid bytes", + ), ]; for (bytes, description) in tests { let arr = bytes_to_c_char_array::<256>(bytes); - assert!(slice_to_string(&arr).is_err(), "Should fail for {}", description); + assert!( + slice_to_string(&arr).is_err(), + "Should fail for {}", + description + ); } } @@ -426,31 +446,61 @@ mod tests { let tests = [ ("/etc/test", "/etc/test", "ASCII"), ("/tmp/файл.txt", "/tmp/файл.txt", "Cyrillic"), - ("/home/user/测试文件.log", "/home/user/测试文件.log", "Chinese"), + ( + "/home/user/测试文件.log", + "/home/user/测试文件.log", + "Chinese", + ), ("/data/🚀rocket.dat", "/data/🚀rocket.dat", "emoji"), - ("/var/log/app-данные-数据-🐛.log", "/var/log/app-данные-数据-🐛.log", "mixed Unicode"), + ( + "/var/log/app-данные-数据-🐛.log", + "/var/log/app-данные-数据-🐛.log", + "mixed Unicode", + ), ("/home/ملف.txt", "/home/ملف.txt", "Arabic"), ("/opt/ファイル.conf", "/opt/ファイル.conf", "Japanese"), ]; for (input, expected, description) in tests { let arr = string_to_c_char_array::<4096>(input); - assert_eq!(sanitize_d_path(&arr), PathBuf::from(expected), "Failed for {}", description); + assert_eq!( + sanitize_d_path(&arr), + PathBuf::from(expected), + "Failed for {}", + description + ); } } #[test] fn sanitize_d_path_deleted_suffix() { let tests = [ - ("/tmp/test.txt (deleted)", "/tmp/test.txt", "ASCII with deleted suffix"), - ("/tmp/файл.txt (deleted)", "/tmp/файл.txt", "Unicode with deleted suffix"), + ( + "/tmp/test.txt (deleted)", + "/tmp/test.txt", + "ASCII with deleted suffix", + ), + ( + "/tmp/файл.txt (deleted)", + "/tmp/файл.txt", + "Unicode with deleted suffix", + ), ("/etc/config.yaml", "/etc/config.yaml", "no deleted suffix"), - ("/var/log/app/debug.log (deleted)", "/var/log/app/debug.log", "nested path with deleted suffix"), + ( + "/var/log/app/debug.log (deleted)", + "/var/log/app/debug.log", + "nested path with deleted suffix", + ), ]; for (input, expected, description) in tests { let arr = string_to_c_char_array::<4096>(input); - assert_eq!(sanitize_d_path(&arr), PathBuf::from(expected), "Failed for {}", description); + assert_eq!( + sanitize_d_path(&arr), + PathBuf::from(expected), + "Failed for {}", + description + ); } } @@ -458,25 +508,35 @@ mod tests { fn sanitize_d_path_invalid_utf8() { let tests: &[(&[u8], &str, &str, &str)] = &[ ( - &[b'/', b't', b'm', b'p', b'/', 0xFF, 0xFE, b'.', b't', b'x', b't'], + &[ + b'/', b't', b'm', b'p', b'/', 0xFF, 0xFE, b'.', b't', b'x', b't', + ], "/tmp/", ".txt", "invalid continuation bytes", ), ( - &[b'/', b'v', b'a', b'r', b'/', b't', b'e', b's', b't', 0xE2, 0x80], + &[ + b'/', b'v', b'a', b'r', b'/', b't', b'e', b's', b't', 0xE2, 0x80, + ], "/var/", "", "truncated multi-byte sequence", ), ( - &[b'/', b'h', b'o', b'm', b'e', b'/', b'f', b'i', b'l', b'e', 0x80, b'.', b'l', b'o', b'g'], + &[ + b'/', b'h', b'o', b'm', b'e', b'/', b'f', b'i', b'l', b'e', 0x80, b'.', b'l', + b'o', b'g', + ], "/home/", ".log", "invalid start byte", ), ( - &[b'/', b't', b'm', b'p', b'/', 0xD1, 0x84, 0xFF, 0xD0, 0xBB, b'.', b't', b'x', b't'], + &[ + b'/', b't', b'm', b'p', b'/', 0xD1, 0x84, 0xFF, 0xD0, 0xBB, b'.', b't', b'x', + b't', + ], "/tmp/", "", "mixed valid and invalid UTF-8", @@ -488,18 +548,33 @@ mod tests { let result = sanitize_d_path(&arr); let result_str = result.to_string_lossy(); - assert!(result_str.contains(must_contain1), "Failed for {} - should contain '{}'", description, must_contain1); + assert!( + result_str.contains(must_contain1), + "Failed for {} - should contain '{}'", + description, + must_contain1 + ); if !must_contain2.is_empty() { - assert!(result_str.contains(must_contain2), "Failed for {} - should contain '{}'", description, must_contain2); + assert!( + result_str.contains(must_contain2), + "Failed for {} - should contain '{}'", + description, + must_contain2 + ); } - assert!(result_str.contains('\u{FFFD}'), "Failed for {} - should contain replacement character", description); + assert!( + result_str.contains('\u{FFFD}'), + "Failed for {} - should contain replacement character", + description + ); } } #[test] fn sanitize_d_path_invalid_utf8_with_deleted_suffix() { let invalid_with_deleted = bytes_to_c_char_array::<4096>(&[ - b'/', b't', b'm', b'p', b'/', 0xFF, 0xFE, b' ', b'(', b'd', b'e', b'l', b'e', b't', b'e', b'd', b')', + b'/', b't', b'm', b'p', b'/', 0xFF, 0xFE, b' ', b'(', b'd', b'e', b'l', b'e', b't', + b'e', b'd', b')', ]); let result = sanitize_d_path(&invalid_with_deleted); let result_str = result.to_string_lossy(); diff --git a/fact/src/event/process.rs b/fact/src/event/process.rs index 58dcd100..89dbe4a6 100644 --- a/fact/src/event/process.rs +++ b/fact/src/event/process.rs @@ -305,7 +305,10 @@ mod tests { fn process_conversion_invalid_utf8_comm() { let tests: &[(&[u8], &str)] = &[ (&[b't', b'e', b's', b't', 0xFF, 0xFE], "invalid bytes"), - (&[b'a', b'p', b'p', 0xE2, 0x80], "truncated multi-byte sequence"), + ( + &[b'a', b'p', b'p', 0xE2, 0x80], + "truncated multi-byte sequence", + ), ]; for (bytes, description) in tests { @@ -331,7 +334,12 @@ mod tests { proc.exe_path = string_to_c_char_array::<4096>(path); let result = Process::try_from(proc); assert!(result.is_ok(), "Failed for {}", description); - assert_eq!(result.unwrap().exe_path, PathBuf::from(path), "Failed for {}", description); + assert_eq!( + result.unwrap().exe_path, + PathBuf::from(path), + "Failed for {}", + description + ); } } @@ -353,9 +361,21 @@ mod tests { let tests: &[(&str, Vec<&str>, &str)] = &[ ("arg1\0arg2\0arg3\0", vec!["arg1", "arg2", "arg3"], "ASCII"), ("файл\0данные\0", vec!["файл", "данные"], "Cyrillic"), - ("测试\0文件\0数据\0", vec!["测试", "文件", "数据"], "Chinese"), - ("app\0🚀file\0📁data\0", vec!["app", "🚀file", "📁data"], "emoji"), - ("test\0файл\0测试\0🚀\0", vec!["test", "файл", "测试", "🚀"], "mixed UTF-8"), + ( + "测试\0文件\0数据\0", + vec!["测试", "文件", "数据"], + "Chinese", + ), + ( + "app\0🚀file\0📁data\0", + vec!["app", "🚀file", "📁data"], + "emoji", + ), + ( + "test\0файл\0测试\0🚀\0", + vec!["test", "файл", "测试", "🚀"], + "mixed UTF-8", + ), ]; for (args_str, expected, description) in tests { @@ -364,15 +384,28 @@ mod tests { proc.args_len = args_str.len() as u32; let result = Process::try_from(proc); assert!(result.is_ok(), "Failed for {}", description); - assert_eq!(result.unwrap().args, *expected, "Failed for {}", description); + assert_eq!( + result.unwrap().args, + *expected, + "Failed for {}", + description + ); } } #[test] fn process_conversion_invalid_utf8_args() { let tests: &[(&[u8], u32, &str)] = &[ - (&[b'a', b'r', b'g', b'1', 0, 0xFF, 0xFE, b'a', b'r', b'g', 0], 11, "invalid bytes"), - (&[b't', b'e', b's', b't', 0, 0xE2, 0x80, 0], 8, "truncated multi-byte sequence"), + ( + &[b'a', b'r', b'g', b'1', 0, 0xFF, 0xFE, b'a', b'r', b'g', 0], + 11, + "invalid bytes", + ), + ( + &[b't', b'e', b's', b't', 0, 0xE2, 0x80, 0], + 8, + "truncated multi-byte sequence", + ), ]; for (bytes, args_len, description) in tests { @@ -438,7 +471,12 @@ mod tests { assert!(result.is_ok(), "Failed for {}", description); let lineage = result.unwrap().lineage; assert_eq!(lineage.len(), 1); - assert_eq!(lineage[0].exe_path, PathBuf::from(path), "Failed for {}", description); + assert_eq!( + lineage[0].exe_path, + PathBuf::from(path), + "Failed for {}", + description + ); } } @@ -447,9 +485,7 @@ mod tests { let mut proc = default_process_t(); proc.lineage[0] = lineage_t { uid: 1000, - exe_path: bytes_to_c_char_array::<4096>(&[ - b'/', b'b', b'i', b'n', b'/', 0xFF, 0xFE, - ]), + exe_path: bytes_to_c_char_array::<4096>(&[b'/', b'b', b'i', b'n', b'/', 0xFF, 0xFE]), }; proc.lineage_len = 1; let result = Process::try_from(proc); From 5ea7b29b105a1d531d1baff85503718823076114 Mon Sep 17 00:00:00 2001 From: JoukoVirtanen Date: Fri, 6 Feb 2026 11:11:18 -0800 Subject: [PATCH 5/9] Reverted changes to test_multiple --- tests/test_file_open.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/test_file_open.py b/tests/test_file_open.py index 20ff4f4b..a10276fb 100644 --- a/tests/test_file_open.py +++ b/tests/test_file_open.py @@ -37,11 +37,7 @@ def test_open(fact, monitored_dir, server, filename): server.wait_events([e]) -@pytest.mark.parametrize("filenames", [ - ['0.txt', '1.txt', '2.txt'], - ['café.txt', 'файл.txt', '测试.txt'], -]) -def test_multiple(fact, monitored_dir, server, filenames): +def test_multiple(fact, monitored_dir, server): """ Tests the opening of multiple files and verifies that the corresponding events are captured by the server. @@ -55,8 +51,8 @@ def test_multiple(fact, monitored_dir, server, filenames): events = [] process = Process.from_proc() # File Under Test - for filename in filenames: - fut = os.path.join(monitored_dir, filename) + for i in range(3): + fut = os.path.join(monitored_dir, f'{i}.txt') with open(fut, 'w') as f: f.write('This is a test') From c435b9daa41561f16004d75e0f8cc44478ff650e Mon Sep 17 00:00:00 2001 From: JoukoVirtanen Date: Fri, 6 Feb 2026 11:30:57 -0800 Subject: [PATCH 6/9] Added integration test case with invalid utf-8 --- tests/test_file_open.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_file_open.py b/tests/test_file_open.py index a10276fb..92af3195 100644 --- a/tests/test_file_open.py +++ b/tests/test_file_open.py @@ -13,6 +13,7 @@ 'файл.txt', '测试.txt', '🚀rocket.txt', + b'test\xff\xfe.txt', ]) def test_open(fact, monitored_dir, server, filename): """ From 77ea056d288f1c3d5a4290d74045fc5952a4907f Mon Sep 17 00:00:00 2001 From: JoukoVirtanen Date: Sat, 7 Feb 2026 19:01:06 -0800 Subject: [PATCH 7/9] Fixed how file paths are joined when there are bytes. Invalid utf-8 is replaced when comparing to the result --- tests/test_file_open.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/test_file_open.py b/tests/test_file_open.py index 92af3195..f272b8eb 100644 --- a/tests/test_file_open.py +++ b/tests/test_file_open.py @@ -27,12 +27,25 @@ def test_open(fact, monitored_dir, server, filename): filename: Name of the file to create (includes UTF-8 test cases). """ # File Under Test - fut = os.path.join(monitored_dir, filename) + # Handle bytes filenames by converting monitored_dir to bytes + if isinstance(filename, bytes): + fut = os.path.join(os.fsencode(monitored_dir), filename) + else: + fut = os.path.join(monitored_dir, filename) + with open(fut, 'w') as f: f.write('This is a test') + # Convert fut back to string for the Event + # For bytes paths with invalid UTF-8, Rust will use the replacement character U+FFFD + if isinstance(fut, bytes): + # Manually convert to match Rust's behavior: replace invalid UTF-8 with U+FFFD + fut_str = fut.decode('utf-8', errors='replace') + else: + fut_str = fut + e = Event(process=Process.from_proc(), event_type=EventType.CREATION, - file=fut, host_path='') + file=fut_str, host_path='') print(f'Waiting for event: {e}') server.wait_events([e]) From b2f317170da78df156a51b5c2df62f3c67c464ca Mon Sep 17 00:00:00 2001 From: JoukoVirtanen Date: Sun, 8 Feb 2026 14:39:23 -0800 Subject: [PATCH 8/9] Added tests for test_path_chown and test_path_unlink --- tests/test_path_chmod.py | 47 +++++++++++++++++++++++++++++++++------ tests/test_path_chown.py | 25 +++++++++++++++++---- tests/test_path_unlink.py | 37 +++++++++++++++++++++++++++--- 3 files changed, 95 insertions(+), 14 deletions(-) diff --git a/tests/test_path_chmod.py b/tests/test_path_chmod.py index 4b62e2c2..dd5e1f36 100644 --- a/tests/test_path_chmod.py +++ b/tests/test_path_chmod.py @@ -1,10 +1,20 @@ import multiprocessing as mp import os +import pytest + from event import Event, EventType, Process -def test_chmod(fact, monitored_dir, server): +@pytest.mark.parametrize("filename", [ + 'chmod.txt', + 'café.txt', + 'файл.txt', + '测试.txt', + '🔒secure.txt', + b'perm\xff\xfe.txt', +]) +def test_chmod(fact, monitored_dir, server, filename): """ Tests changing permissions on a file and verifies the corresponding event is captured by the server @@ -13,18 +23,41 @@ def test_chmod(fact, monitored_dir, server): fact: Fixture for file activity (only required to be runing). monitored_dir: Temporary directory path for creating the test file. server: The server instance to communicate with. + filename: Name of the file to create (includes UTF-8 test cases). """ - # File Under Test - fut = os.path.join(monitored_dir, 'test.txt') + # Handle bytes filenames by converting monitored_dir to bytes + if isinstance(filename, bytes): + fut = os.path.join(os.fsencode(monitored_dir), filename) + else: + fut = os.path.join(monitored_dir, filename) + + # Create the file first + with open(fut, 'w') as f: + f.write('This is a test') + mode = 0o666 os.chmod(fut, mode) - e = Event(process=Process.from_proc(), event_type=EventType.PERMISSION, - file=fut, host_path=fut, mode=mode) + # Convert fut back to string for the Event + # For bytes paths with invalid UTF-8, Rust will use the replacement character U+FFFD + if isinstance(fut, bytes): + fut_str = fut.decode('utf-8', errors='replace') + else: + fut_str = fut - print(f'Waiting for event: {e}') + process = Process.from_proc() + # We expect both CREATION (from file creation) and PERMISSION (from chmod) + events = [ + Event(process=process, event_type=EventType.CREATION, + file=fut_str, host_path=''), + Event(process=process, event_type=EventType.PERMISSION, + file=fut_str, host_path='', mode=mode), + ] - server.wait_events([e]) + for e in events: + print(f'Waiting for event: {e}') + + server.wait_events(events) def test_multiple(fact, monitored_dir, server): diff --git a/tests/test_path_chown.py b/tests/test_path_chown.py index d318f4eb..fc9a28ca 100644 --- a/tests/test_path_chown.py +++ b/tests/test_path_chown.py @@ -1,4 +1,7 @@ import os +import shlex + +import pytest from event import Event, EventType, Process @@ -10,7 +13,14 @@ TEST_GID = 2345 -def test_chown(fact, test_container, server): +@pytest.mark.parametrize("filename", [ + 'chown.txt', + 'café.txt', + 'файл.txt', + '测试.txt', + '👤owner.txt', +]) +def test_chown(fact, test_container, server, filename): """ Execute a chown operation on a file and verifies the corresponding event is captured by the server. @@ -19,15 +29,22 @@ def test_chown(fact, test_container, server): fact: Fixture for file activity (only required to be running). test_container: A container for running commands in. server: The server instance to communicate with. + filename: Name of the file to create (includes UTF-8 test cases). """ # File Under Test - fut = '/container-dir/test.txt' + fut = f'/container-dir/{filename}' # Create the file and chown it + # Use shlex.quote to properly escape special characters for shell + fut_quoted = shlex.quote(fut) + touch_cmd_shell = f'touch {fut_quoted}' + chown_cmd_shell = f'chown {TEST_UID}:{TEST_GID} {fut_quoted}' + test_container.exec_run(touch_cmd_shell) + test_container.exec_run(chown_cmd_shell) + + # The args in the event won't have quotes (shell removes them) touch_cmd = f'touch {fut}' chown_cmd = f'chown {TEST_UID}:{TEST_GID} {fut}' - test_container.exec_run(touch_cmd) - test_container.exec_run(chown_cmd) loginuid = pow(2, 32) - 1 touch = Process(pid=None, diff --git a/tests/test_path_unlink.py b/tests/test_path_unlink.py index 3a7cde5b..283897ea 100644 --- a/tests/test_path_unlink.py +++ b/tests/test_path_unlink.py @@ -2,26 +2,57 @@ import os import docker +import pytest from event import Event, EventType, Process -def test_remove(fact, test_file, server): +@pytest.mark.parametrize("filename", [ + 'remove.txt', + 'café.txt', + 'файл.txt', + '测试.txt', + '🗑️delete.txt', + b'rm\xff\xfe.txt', +]) +def test_remove(fact, monitored_dir, server, filename): """ Tests the removal of a file and verifies the corresponding event is captured by the server. Args: fact: Fixture for file activity (only required to be running). - test_file: Temporary file for testing. + monitored_dir: Temporary directory path for creating the test file. server: The server instance to communicate with. + filename: Name of the file to create and remove (includes UTF-8 test cases). """ + # Handle bytes filenames by converting monitored_dir to bytes + if isinstance(filename, bytes): + test_file = os.path.join(os.fsencode(monitored_dir), filename) + else: + test_file = os.path.join(monitored_dir, filename) + + # Create the file first + with open(test_file, 'w') as f: + f.write('This is a test') + + # Remove the file os.remove(test_file) + # Convert test_file back to string for the Event + # For bytes paths with invalid UTF-8, Rust will use the replacement character U+FFFD + if isinstance(test_file, bytes): + test_file_str = test_file.decode('utf-8', errors='replace') + else: + test_file_str = test_file + process = Process.from_proc() + # We expect both CREATION (from file creation) and UNLINK (from removal) events = [ + Event(process=process, event_type=EventType.CREATION, + file=test_file_str, host_path=''), Event(process=process, event_type=EventType.UNLINK, - file=test_file, host_path=test_file), + file=test_file_str, host_path=''), ] server.wait_events(events) From 144457b7d8f689bf408b8313a57af59a21044fa2 Mon Sep 17 00:00:00 2001 From: JoukoVirtanen Date: Sun, 8 Feb 2026 15:26:02 -0800 Subject: [PATCH 9/9] Added invalid utf-8 test case to test_path_chown --- tests/test_path_chown.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/test_path_chown.py b/tests/test_path_chown.py index fc9a28ca..95cb9ed0 100644 --- a/tests/test_path_chown.py +++ b/tests/test_path_chown.py @@ -19,6 +19,7 @@ 'файл.txt', '测试.txt', '👤owner.txt', + b'own\xff\xfe.txt', ]) def test_chown(fact, test_container, server, filename): """ @@ -31,8 +32,15 @@ def test_chown(fact, test_container, server, filename): server: The server instance to communicate with. filename: Name of the file to create (includes UTF-8 test cases). """ + # Handle bytes filenames - convert to string with replacement characters + # Rust will use the same replacement, so the strings will match + if isinstance(filename, bytes): + filename_str = filename.decode('utf-8', errors='replace') + else: + filename_str = filename + # File Under Test - fut = f'/container-dir/{filename}' + fut = f'/container-dir/{filename_str}' # Create the file and chown it # Use shlex.quote to properly escape special characters for shell