Skip to content
226 changes: 226 additions & 0 deletions fact/src/event/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,3 +358,229 @@ impl From<ChownFileData> for fact_api::FileOwnershipChange {
}
}
}

#[cfg(test)]
mod test_utils {
use std::os::raw::c_char;

/// Helper function to convert raw bytes to a c_char array for testing
pub fn bytes_to_c_char_array<const N: usize>(bytes: &[u8]) -> [c_char; N] {
let mut array = [0 as c_char; N];
let len = bytes.len().min(N - 1);
for (i, &byte) in bytes.iter().take(len).enumerate() {
array[i] = byte as c_char;
}
array
}

/// Helper function to convert a Rust string to a c_char array for testing
pub fn string_to_c_char_array<const N: usize>(s: &str) -> [c_char; N] {
bytes_to_c_char_array(s.as_bytes())
}
}

#[cfg(test)]
mod tests {
use super::test_utils::*;
use super::*;

#[test]
fn slice_to_string_valid_utf8() {
let tests = [
("hello", "ASCII"),
("café", "Latin-1 supplement"),
("файл", "Cyrillic"),
("测试文件", "Chinese"),
("test🚀file", "emoji"),
("test-файл-测试-🐛.txt", "mixed characters"),
("ملف", "Arabic"),
("קובץ", "Hebrew"),
("ファイル", "Japanese"),
];

for (input, description) in tests {
let arr = string_to_c_char_array::<256>(input);
assert_eq!(
slice_to_string(&arr).unwrap(),
input,
"Failed for {}",
description
);
}
}

#[test]
fn slice_to_string_invalid_utf8() {
let tests: &[(&[u8], &str)] = &[
(&[0xFF, 0xFE, 0xFD], "invalid continuation bytes"),
(
&[b't', b'e', b's', b't', 0xE2],
"truncated multi-byte sequence",
),
(&[0xC0, 0x80], "overlong encoding"),
(
&[
b'h', b'e', b'l', b'l', b'o', 0x80, b'w', b'o', b'r', b'l', b'd',
],
"invalid start byte",
),
(&[0x80], "lone continuation byte"),
(
&[b't', b'e', b's', b't', 0xFF, 0xFE],
"mixed valid and invalid bytes",
),
Comment on lines +415 to +431
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is easier to read

Suggested change
(&[0xFF, 0xFE, 0xFD], "invalid continuation bytes"),
(
&[b't', b'e', b's', b't', 0xE2],
"truncated multi-byte sequence",
),
(&[0xC0, 0x80], "overlong encoding"),
(
&[
b'h', b'e', b'l', b'l', b'o', 0x80, b'w', b'o', b'r', b'l', b'd',
],
"invalid start byte",
),
(&[0x80], "lone continuation byte"),
(
&[b't', b'e', b's', b't', 0xFF, 0xFE],
"mixed valid and invalid bytes",
),
(&[0xFF, 0xFE, 0xFD], "invalid continuation bytes"),
(b"test\xE2", "truncated multi-byte sequence"),
(&[0xC0, 0x80], "overlong encoding"),
(b"hello\x80world", "invalid start byte"),
(&[0x80], "lone continuation byte"),
(b"test\xFF\xFE", "mixed valid and invalid bytes"),

];

for (bytes, description) in tests {
let arr = bytes_to_c_char_array::<256>(bytes);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are some of these templates 256 bytes but others 4096? The 4K one makes more sense since we are dealing mostly with paths, so I don't really get why we event need a template.

assert!(
slice_to_string(&arr).is_err(),
"Should fail for {}",
description
);
}
}

#[test]
fn sanitize_d_path_valid_utf8() {
let tests = [
("/etc/test", "/etc/test", "ASCII"),
("/tmp/файл.txt", "/tmp/файл.txt", "Cyrillic"),
(
"/home/user/测试文件.log",
"/home/user/测试文件.log",
"Chinese",
),
("/data/🚀rocket.dat", "/data/🚀rocket.dat", "emoji"),
(
"/var/log/app-данные-数据-🐛.log",
"/var/log/app-данные-数据-🐛.log",
"mixed Unicode",
),
("/home/ملف.txt", "/home/ملف.txt", "Arabic"),
("/opt/ファイル.conf", "/opt/ファイル.conf", "Japanese"),
];

for (input, expected, description) in tests {
let arr = string_to_c_char_array::<4096>(input);
assert_eq!(
sanitize_d_path(&arr),
PathBuf::from(expected),
"Failed for {}",
description
);
}
}

#[test]
fn sanitize_d_path_deleted_suffix() {
let tests = [
(
"/tmp/test.txt (deleted)",
"/tmp/test.txt",
"ASCII with deleted suffix",
),
(
"/tmp/файл.txt (deleted)",
"/tmp/файл.txt",
"Unicode with deleted suffix",
),
("/etc/config.yaml", "/etc/config.yaml", "no deleted suffix"),
(
"/var/log/app/debug.log (deleted)",
"/var/log/app/debug.log",
"nested path with deleted suffix",
),
];

for (input, expected, description) in tests {
let arr = string_to_c_char_array::<4096>(input);
assert_eq!(
sanitize_d_path(&arr),
PathBuf::from(expected),
"Failed for {}",
description
);
}
}

#[test]
fn sanitize_d_path_invalid_utf8() {
let tests: &[(&[u8], &str, &str, &str)] = &[
(
&[
b'/', b't', b'm', b'p', b'/', 0xFF, 0xFE, b'.', b't', b'x', b't',
],
"/tmp/",
".txt",
"invalid continuation bytes",
),
(
&[
b'/', b'v', b'a', b'r', b'/', b't', b'e', b's', b't', 0xE2, 0x80,
],
"/var/",
"",
"truncated multi-byte sequence",
),
(
&[
b'/', b'h', b'o', b'm', b'e', b'/', b'f', b'i', b'l', b'e', 0x80, b'.', b'l',
b'o', b'g',
],
"/home/",
".log",
"invalid start byte",
),
(
&[
b'/', b't', b'm', b'p', b'/', 0xD1, 0x84, 0xFF, 0xD0, 0xBB, b'.', b't', b'x',
b't',
],
"/tmp/",
"",
"mixed valid and invalid UTF-8",
),
Comment on lines +510 to +543
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as my previous comment

Suggested change
(
&[
b'/', b't', b'm', b'p', b'/', 0xFF, 0xFE, b'.', b't', b'x', b't',
],
"/tmp/",
".txt",
"invalid continuation bytes",
),
(
&[
b'/', b'v', b'a', b'r', b'/', b't', b'e', b's', b't', 0xE2, 0x80,
],
"/var/",
"",
"truncated multi-byte sequence",
),
(
&[
b'/', b'h', b'o', b'm', b'e', b'/', b'f', b'i', b'l', b'e', 0x80, b'.', b'l',
b'o', b'g',
],
"/home/",
".log",
"invalid start byte",
),
(
&[
b'/', b't', b'm', b'p', b'/', 0xD1, 0x84, 0xFF, 0xD0, 0xBB, b'.', b't', b'x',
b't',
],
"/tmp/",
"",
"mixed valid and invalid UTF-8",
),
(
b"/tmp/\xFF\xFE.txt",
"/tmp/",
".txt",
"invalid continuation bytes",
),
(
b"/var/test\xE2\x80",
"/var/",
"",
"truncated multi-byte sequence",
),
(
b"/home/file\x80.log",
"/home/",
".log",
"invalid start byte",
),
(
b"/tmp/\xD1\x84\xFF\xD0\xBB.txt",
"/tmp/",
"",
"mixed valid and invalid UTF-8",
),

];

for (bytes, must_contain1, must_contain2, description) in tests {
let arr = bytes_to_c_char_array::<4096>(bytes);
let result = sanitize_d_path(&arr);
let result_str = result.to_string_lossy();

assert!(
result_str.contains(must_contain1),
"Failed for {} - should contain '{}'",
description,
must_contain1
);
if !must_contain2.is_empty() {
assert!(
result_str.contains(must_contain2),
"Failed for {} - should contain '{}'",
description,
must_contain2
);
}
assert!(
result_str.contains('\u{FFFD}'),
"Failed for {} - should contain replacement character",
description
);
Comment on lines +551 to +569
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wonder if we could change this test to use a regex for validation instead of this must_contain logic 🤔

We might need to pull in a regex crate as a dev-dependency, but I think it might make the validation a bit more robust.

}
}

#[test]
fn sanitize_d_path_invalid_utf8_with_deleted_suffix() {
let invalid_with_deleted = bytes_to_c_char_array::<4096>(&[
b'/', b't', b'm', b'p', b'/', 0xFF, 0xFE, b' ', b'(', b'd', b'e', b'l', b'e', b't',
b'e', b'd', b')',
]);
Comment on lines +575 to +578
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let invalid_with_deleted = bytes_to_c_char_array::<4096>(&[
b'/', b't', b'm', b'p', b'/', 0xFF, 0xFE, b' ', b'(', b'd', b'e', b'l', b'e', b't',
b'e', b'd', b')',
]);
let invalid_with_deleted = bytes_to_c_char_array::<4096>(b"/tmp/\xFF\xFE (deleted)");

let result = sanitize_d_path(&invalid_with_deleted);
let result_str = result.to_string_lossy();

assert!(result_str.contains("/tmp/"));
assert!(!result_str.ends_with(" (deleted)"));
assert!(result_str.contains('\u{FFFD}'));
}
}
Loading
Loading