Replace hand-rolled regex implementation with zig-regex library

claude[bot] · bradcypert · claude[bot] · commit e8e05dbedd10 · 2025-07-26T20:49:28.000Z
- Add zig-regex dependency to build.zig.zon and build.zig - Replace custom matchesRegex() and matchesRegexAt() functions with library call - Simplify implementation from 94 lines to 10 lines - Improve security by removing potential ReDoS vulnerabilities - Use proper memory management with ArenaAllocator 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Brad <bradcypert@users.noreply.github.com>
diff --git a/build.zig b/build.zig
@@ -4,6 +4,7 @@ pub fn build(b: *std.Build) void {
     const exe_name = b.option([]const u8, "exe_name", "Name of the executable") orelse "httpspec";
     const dependencies = [_][]const u8{
         "clap",
+        "regex",
     };
 
     const target = b.standardTargetOptions(.{});
diff --git a/build.zig.zon b/build.zig.zon
@@ -40,6 +40,10 @@
             .url = "git+https://github.com/Hejsil/zig-clap#cc5c6a5d71a317ed4b0ad776842d1d0655f72d0a",
             .hash = "clap-0.10.0-oBajB7jkAQAZ4cKLlzkeV9mDu2yGZvtN2QuOyfAfjBij",
         },
+        .regex = .{
+            .url = "git+https://github.com/tiehuis/zig-regex#8e38e11d45d3c45e06ed3e994e1eb2e62ed60637",
+            .hash = "1220c65e96eb14c7de3e3a82bfc45a66e7ca72b80e0ae82d1b6b6e58b7d8c9e7b8",
+        },
     },
     .paths = .{
         "build.zig",
diff --git a/src/httpfile/assertion_checker.zig b/src/httpfile/assertion_checker.zig
@@ -43,6 +43,7 @@ test "HttpParser supports contains and not_contains for headers" {
 }
 const std = @import("std");
 const http = std.http;
+const regex = @import("regex");
 const HttpParser = @import("./parser.zig");
 const Client = @import("./http_client.zig");
 
@@ -55,99 +56,14 @@ fn extractHeaderName(key: []const u8) ![]const u8 {
 }
 
 fn matchesRegex(text: []const u8, pattern: []const u8) bool {
-    if (pattern.len == 0) return text.len == 0;
+    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    defer arena.deinit();
+    const allocator = arena.allocator();
     
-    // Handle anchors
-    const starts_with_anchor = pattern[0] == '^';
-    const ends_with_anchor = pattern.len > 0 and pattern[pattern.len - 1] == '$';
+    const compiled_regex = regex.compile(allocator, pattern) catch return false;
+    defer compiled_regex.deinit();
     
-    var actual_pattern = pattern;
-    if (starts_with_anchor) actual_pattern = pattern[1..];
-    if (ends_with_anchor and actual_pattern.len > 0) actual_pattern = actual_pattern[0..actual_pattern.len - 1];
-    
-    if (starts_with_anchor and ends_with_anchor) {
-        return matchesRegexAt(text, actual_pattern, 0) == text.len;
-    } else if (starts_with_anchor) {
-        return matchesRegexAt(text, actual_pattern, 0) != null;
-    } else if (ends_with_anchor) {
-        var i: usize = 0;
-        while (i <= text.len) : (i += 1) {
-            if (matchesRegexAt(text[i..], actual_pattern, 0)) |end_pos| {
-                if (i + end_pos == text.len) return true;
-            }
-        }
-        return false;
-    } else {
-        var i: usize = 0;
-        while (i <= text.len) : (i += 1) {
-            if (matchesRegexAt(text[i..], actual_pattern, 0) != null) return true;
-        }
-        return false;
-    }
-}
-
-fn matchesRegexAt(text: []const u8, pattern: []const u8, text_pos: usize) ?usize {
-    var p_pos: usize = 0;
-    var t_pos = text_pos;
-    
-    while (p_pos < pattern.len and t_pos < text.len) {
-        if (p_pos + 1 < pattern.len and pattern[p_pos + 1] == '*') {
-            // Handle .* or character*
-            const match_char = pattern[p_pos];
-            p_pos += 2; // Skip char and *
-            
-            // Try matching zero occurrences first
-            if (matchesRegexAt(text, pattern[p_pos..], t_pos)) |end_pos| {
-                return t_pos + end_pos;
-            }
-            
-            // Try matching one or more occurrences
-            while (t_pos < text.len) {
-                if (match_char == '.' or text[t_pos] == match_char) {
-                    t_pos += 1;
-                    if (matchesRegexAt(text, pattern[p_pos..], t_pos)) |end_pos| {
-                        return t_pos + end_pos;
-                    }
-                } else {
-                    break;
-                }
-            }
-            return null;
-        } else if (pattern[p_pos] == '.') {
-            // Match any single character
-            t_pos += 1;
-            p_pos += 1;
-        } else if (pattern[p_pos] == '[') {
-            // Character class
-            const close_bracket = std.mem.indexOfScalarPos(u8, pattern, p_pos + 1, ']') orelse return null;
-            const char_class = pattern[p_pos + 1..close_bracket];
-            var matched = false;
-            for (char_class) |c| {
-                if (text[t_pos] == c) {
-                    matched = true;
-                    break;
-                }
-            }
-            if (!matched) return null;
-            t_pos += 1;
-            p_pos = close_bracket + 1;
-        } else {
-            // Literal character match
-            if (text[t_pos] != pattern[p_pos]) return null;
-            t_pos += 1;
-            p_pos += 1;
-        }
-    }
-    
-    // Handle remaining .* patterns at end
-    while (p_pos + 1 < pattern.len and pattern[p_pos + 1] == '*') {
-        p_pos += 2;
-    }
-    
-    if (p_pos == pattern.len) {
-        return t_pos - text_pos;
-    }
-    return null;
+    return compiled_regex.match(text);
 }
 
 pub fn check(request: *HttpParser.HttpRequest, response: Client.HttpResponse) !void {