From 5d231190be9698d7659516e276a22607370f9dfb Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Thu, 19 Feb 2026 10:38:40 -0800 Subject: [PATCH] add string search example and benchmark --- benchmarks/run.sh | 26 +++++ benchmarks/stringsearch/bench.c | 78 +++++++++++++++ benchmarks/stringsearch/bun.mjs | 40 ++++++++ benchmarks/stringsearch/chadscript.ts | 47 ++++++++++ benchmarks/stringsearch/grep.sh | 8 ++ benchmarks/stringsearch/node.mjs | 40 ++++++++ benchmarks/stringsearch/rg.sh | 8 ++ benchmarks/stringsearch/stringsearch.go | 53 +++++++++++ examples/string-search.ts | 120 ++++++++++++++++++++++++ 9 files changed, 420 insertions(+) create mode 100644 benchmarks/stringsearch/bench.c create mode 100644 benchmarks/stringsearch/bun.mjs create mode 100644 benchmarks/stringsearch/chadscript.ts create mode 100755 benchmarks/stringsearch/grep.sh create mode 100644 benchmarks/stringsearch/node.mjs create mode 100755 benchmarks/stringsearch/rg.sh create mode 100644 benchmarks/stringsearch/stringsearch.go create mode 100644 examples/string-search.ts diff --git a/benchmarks/run.sh b/benchmarks/run.sh index e49a629f..ce3f0bb4 100755 --- a/benchmarks/run.sh +++ b/benchmarks/run.sh @@ -158,6 +158,7 @@ assemble_json() { bench_names[fileio]="File I/O" bench_names[binarytrees]="Binary Trees" bench_names[json]="JSON Parse/Stringify" + bench_names[stringsearch]="String Search" declare -A bench_descs bench_descs[startup]="Time to print 'Hello, World!' and exit. Average of ${STARTUP_RUNS} runs." @@ -175,6 +176,7 @@ assemble_json() { bench_descs[fileio]="Write and read ~100MB to /tmp." bench_descs[binarytrees]="Build/check/discard binary trees of depth 18." bench_descs[json]="Parse 10K JSON objects, stringify back." + bench_descs[stringsearch]="Recursive directory search for 'console.log' in src/. Small corpus (~30 files); grep/ripgrep advantages (mmap, SIMD, parallelism) shine on larger codebases." declare -A bench_metrics bench_metrics[startup]="ms" @@ -192,6 +194,7 @@ assemble_json() { bench_metrics[fileio]="s" bench_metrics[binarytrees]="s" bench_metrics[json]="s" + bench_metrics[stringsearch]="s" declare -A bench_lower bench_lower[startup]="true" @@ -209,6 +212,7 @@ assemble_json() { bench_lower[fileio]="true" bench_lower[binarytrees]="true" bench_lower[json]="true" + bench_lower[stringsearch]="true" for benchfile in "$JSON_DIR"/*.json; do [ -f "$benchfile" ] || continue @@ -335,6 +339,9 @@ echo " ChadScript Binary Trees built" $CHAD "$DIR/json/chadscript.ts" -o /tmp/bench-json-chad echo " ChadScript JSON built" +$CHAD "$DIR/stringsearch/chadscript.ts" -o /tmp/bench-stringsearch-chad +echo " ChadScript String Search built" + clang -O2 -march=native -o /tmp/bench-startup-c "$DIR/startup/hello.c" echo " C startup built" @@ -371,6 +378,9 @@ echo " C Binary Trees built" clang -O2 -march=native -I "$DIR/../vendor/yyjson" -o /tmp/bench-json-c "$DIR/json/bench.c" "$DIR/../vendor/yyjson/libyyjson.a" echo " C JSON built" +clang -O2 -march=native -o /tmp/bench-stringsearch-c "$DIR/stringsearch/bench.c" +echo " C String Search built" + go build -o /tmp/bench-startup-go "$DIR/startup/hello.go" echo " Go startup built" @@ -404,6 +414,9 @@ echo " Go Binary Trees built" go build -o /tmp/bench-json-go "$DIR/json/json_bench.go" echo " Go JSON built" +go build -o /tmp/bench-stringsearch-go "$DIR/stringsearch/stringsearch.go" +echo " Go String Search built" + echo "" echo "═══════════════════════════════════════════════════" @@ -538,6 +551,19 @@ bench_compute "json" "go" "Go" "Time:" /tmp/bench-json-go bench_compute "json" "node" "Node.js $(node --version)" "Time:" node "$DIR/json/node.mjs" bench_compute "json" "bun" "Bun $(bun --version)" "Time:" bun "$DIR/json/bun.mjs" +echo "═══════════════════════════════════════════════════" +echo " String Search (recursive, 'console.log' in src/)" +echo "═══════════════════════════════════════════════════" +echo "" + +bench_compute "stringsearch" "c" "C (clang -O2 -march=native)" "Time:" /tmp/bench-stringsearch-c +bench_compute "stringsearch" "chadscript" "ChadScript (native)" "Time:" /tmp/bench-stringsearch-chad +bench_compute "stringsearch" "go" "Go" "Time:" /tmp/bench-stringsearch-go +bench_compute "stringsearch" "node" "Node.js $(node --version)" "Time:" node "$DIR/stringsearch/node.mjs" +bench_compute "stringsearch" "bun" "Bun $(bun --version)" "Time:" bun "$DIR/stringsearch/bun.mjs" +bench_compute "stringsearch" "grep" "grep -r (GNU)" "Time:" bash "$DIR/stringsearch/grep.sh" +bench_compute "stringsearch" "ripgrep" "ripgrep (rg)" "Time:" bash "$DIR/stringsearch/rg.sh" + assemble_json "$JSON_OUT" echo "" echo "═══════════════════════════════════════════════════" diff --git a/benchmarks/stringsearch/bench.c b/benchmarks/stringsearch/bench.c new file mode 100644 index 00000000..68f3c36e --- /dev/null +++ b/benchmarks/stringsearch/bench.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#include + +#define NEEDLE "console.log" +#define SEARCH_DIR "src" + +static int total_matches = 0; + +static void search_file(const char *path) { + FILE *f = fopen(path, "r"); + if (!f) return; + + fseek(f, 0, SEEK_END); + long size = ftell(f); + fseek(f, 0, SEEK_SET); + if (size == 0) { fclose(f); return; } + + char *buf = (char *)malloc(size + 1); + fread(buf, 1, size, f); + buf[size] = '\0'; + fclose(f); + + char *line = buf; + while (line && *line) { + char *nl = strchr(line, '\n'); + if (nl) *nl = '\0'; + if (strstr(line, NEEDLE)) { + total_matches++; + } + if (nl) { + line = nl + 1; + } else { + break; + } + } + free(buf); +} + +static void search_dir(const char *path) { + DIR *d = opendir(path); + if (!d) return; + + struct dirent *ent; + char fullpath[4096]; + struct stat st; + + while ((ent = readdir(d)) != NULL) { + if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) + continue; + snprintf(fullpath, sizeof(fullpath), "%s/%s", path, ent->d_name); + if (stat(fullpath, &st) != 0) continue; + if (S_ISREG(st.st_mode)) { + search_file(fullpath); + } else if (S_ISDIR(st.st_mode)) { + search_dir(fullpath); + } + } + closedir(d); +} + +int main(void) { + struct timespec t0, t1; + clock_gettime(CLOCK_MONOTONIC, &t0); + + search_dir(SEARCH_DIR); + + clock_gettime(CLOCK_MONOTONIC, &t1); + double elapsed = (t1.tv_sec - t0.tv_sec) + (t1.tv_nsec - t0.tv_nsec) / 1e9; + + printf("Matches: %d\n", total_matches); + printf("Time: %.3fs\n", elapsed); + + return 0; +} diff --git a/benchmarks/stringsearch/bun.mjs b/benchmarks/stringsearch/bun.mjs new file mode 100644 index 00000000..a5f58663 --- /dev/null +++ b/benchmarks/stringsearch/bun.mjs @@ -0,0 +1,40 @@ +import { readdirSync, readFileSync, statSync } from "node:fs"; +import { join } from "node:path"; + +const NEEDLE = "console.log"; +const SEARCH_DIR = "src"; + +let totalMatches = 0; + +function searchFile(filePath) { + const content = readFileSync(filePath, "utf8"); + if (content.length === 0) return; + const lines = content.split("\n"); + for (const line of lines) { + if (line.indexOf(NEEDLE) !== -1) { + totalMatches++; + } + } +} + +function searchDir(dirPath) { + const entries = readdirSync(dirPath); + for (const entry of entries) { + const fullPath = join(dirPath, entry); + const st = statSync(fullPath); + if (st.isFile()) { + searchFile(fullPath); + } else if (st.isDirectory()) { + searchDir(fullPath); + } + } +} + +const start = performance.now(); + +searchDir(SEARCH_DIR); + +const elapsed = (performance.now() - start) / 1000; + +console.log(`Matches: ${totalMatches}`); +console.log(`Time: ${elapsed.toFixed(3)}s`); diff --git a/benchmarks/stringsearch/chadscript.ts b/benchmarks/stringsearch/chadscript.ts new file mode 100644 index 00000000..ddcf76f0 --- /dev/null +++ b/benchmarks/stringsearch/chadscript.ts @@ -0,0 +1,47 @@ +const NEEDLE = "console.log"; +const SEARCH_DIR = "src"; + +let totalMatches = 0; + +function searchFile(filePath: string): void { + const content = fs.readFileSync(filePath); + if (content.length === 0) { + return; + } + const lines = content.split("\n"); + let i = 0; + while (i < lines.length) { + if (lines[i].indexOf(NEEDLE) !== -1) { + totalMatches = totalMatches + 1; + } + i = i + 1; + } +} + +function searchDir(dirPath: string): void { + const entries = fs.readdirSync(dirPath); + let i = 0; + while (i < entries.length) { + const entryPath = dirPath + "/" + entries[i]; + const info = fs.statSync(entryPath); + if (info.isFile()) { + searchFile(entryPath); + } else if (info.isDirectory()) { + searchDir(entryPath); + } + i = i + 1; + } +} + +function run(): void { + const start = Date.now(); + + searchDir(SEARCH_DIR); + + const elapsed = (Date.now() - start) / 1000; + + console.log("Matches: " + totalMatches); + console.log("Time: " + elapsed + "s"); +} + +run(); diff --git a/benchmarks/stringsearch/grep.sh b/benchmarks/stringsearch/grep.sh new file mode 100755 index 00000000..be8be95f --- /dev/null +++ b/benchmarks/stringsearch/grep.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +start_ns=$(date +%s%N) +matches=$(grep -r "console.log" src/ | wc -l) +end_ns=$(date +%s%N) +elapsed_ns=$((end_ns - start_ns)) +elapsed_s=$(echo "scale=3; $elapsed_ns / 1000000000" | bc) +echo "Matches: $matches" +echo "Time: ${elapsed_s}s" diff --git a/benchmarks/stringsearch/node.mjs b/benchmarks/stringsearch/node.mjs new file mode 100644 index 00000000..a5f58663 --- /dev/null +++ b/benchmarks/stringsearch/node.mjs @@ -0,0 +1,40 @@ +import { readdirSync, readFileSync, statSync } from "node:fs"; +import { join } from "node:path"; + +const NEEDLE = "console.log"; +const SEARCH_DIR = "src"; + +let totalMatches = 0; + +function searchFile(filePath) { + const content = readFileSync(filePath, "utf8"); + if (content.length === 0) return; + const lines = content.split("\n"); + for (const line of lines) { + if (line.indexOf(NEEDLE) !== -1) { + totalMatches++; + } + } +} + +function searchDir(dirPath) { + const entries = readdirSync(dirPath); + for (const entry of entries) { + const fullPath = join(dirPath, entry); + const st = statSync(fullPath); + if (st.isFile()) { + searchFile(fullPath); + } else if (st.isDirectory()) { + searchDir(fullPath); + } + } +} + +const start = performance.now(); + +searchDir(SEARCH_DIR); + +const elapsed = (performance.now() - start) / 1000; + +console.log(`Matches: ${totalMatches}`); +console.log(`Time: ${elapsed.toFixed(3)}s`); diff --git a/benchmarks/stringsearch/rg.sh b/benchmarks/stringsearch/rg.sh new file mode 100755 index 00000000..b357c92c --- /dev/null +++ b/benchmarks/stringsearch/rg.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +start_ns=$(date +%s%N) +matches=$(rg "console.log" src/ | wc -l) +end_ns=$(date +%s%N) +elapsed_ns=$((end_ns - start_ns)) +elapsed_s=$(echo "scale=3; $elapsed_ns / 1000000000" | bc) +echo "Matches: $matches" +echo "Time: ${elapsed_s}s" diff --git a/benchmarks/stringsearch/stringsearch.go b/benchmarks/stringsearch/stringsearch.go new file mode 100644 index 00000000..876dffda --- /dev/null +++ b/benchmarks/stringsearch/stringsearch.go @@ -0,0 +1,53 @@ +package main + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "time" +) + +const needle = "console.log" +const searchDir = "src" + +var totalMatches int + +func searchFile(path string) { + data, err := os.ReadFile(path) + if err != nil || len(data) == 0 { + return + } + lines := strings.Split(string(data), "\n") + for _, line := range lines { + if strings.Contains(line, needle) { + totalMatches++ + } + } +} + +func walkDir(dirPath string) { + entries, err := os.ReadDir(dirPath) + if err != nil { + return + } + for _, entry := range entries { + fullPath := filepath.Join(dirPath, entry.Name()) + if entry.IsDir() { + walkDir(fullPath) + } else { + searchFile(fullPath) + } + } +} + +func main() { + start := time.Now() + + walkDir(searchDir) + + elapsed := time.Since(start).Seconds() + + fmt.Printf("Matches: %d\n", totalMatches) + fmt.Printf("Time: %.3fs\n", elapsed) +} diff --git a/examples/string-search.ts b/examples/string-search.ts new file mode 100644 index 00000000..a7115c63 --- /dev/null +++ b/examples/string-search.ts @@ -0,0 +1,120 @@ +import { ArgumentParser } from '../lib/argparse.js'; + +const parser = new ArgumentParser("string-search", "Search for a string pattern in files"); +parser.addFlag("ignore-case", "i", "Case-insensitive search"); +parser.addFlag("line-number", "n", "Show line numbers"); +parser.addFlag("count", "c", "Only print a count of matching lines per file"); +parser.addFlag("recursive", "r", "Recursively search directories"); +parser.addFlag("invert-match", "v", "Select non-matching lines"); +parser.addPositional("pattern", "The string to search for"); +parser.addPositional("file", "File or directory to search"); +parser.parse(process.argv); + +const pattern = parser.getPositional(0); +const target = parser.getPositional(1); + +if (pattern.length === 0 || target.length === 0) { + console.error("string-search: missing pattern or file argument"); + console.error("Try 'string-search --help' for more information"); + process.exit(2); +} + +const ignoreCase = parser.getFlag("ignore-case"); +const showLineNumbers = parser.getFlag("line-number"); +const countOnly = parser.getFlag("count"); +const recursive = parser.getFlag("recursive"); +const invertMatch = parser.getFlag("invert-match"); + +let searchPattern = pattern; +if (ignoreCase) { + searchPattern = pattern.toLowerCase(); +} + +let totalMatches = 0; + +function matchesLine(line: string): boolean { + let haystack = line; + if (ignoreCase) { + haystack = line.toLowerCase(); + } + const found = haystack.indexOf(searchPattern) !== -1; + if (invertMatch) { + return !found; + } + return found; +} + +function searchFile(filePath: string, showPrefix: boolean): void { + const content = fs.readFileSync(filePath); + if (content.length === 0) { + return; + } + + const lines = content.split("\n"); + let matchCount = 0; + let lineNum = 0; + + while (lineNum < lines.length) { + if (matchesLine(lines[lineNum])) { + matchCount = matchCount + 1; + if (!countOnly) { + let output = ""; + if (showPrefix) { + output = filePath + ":"; + } + if (showLineNumbers) { + output = output + (lineNum + 1) + ":"; + } + output = output + lines[lineNum]; + console.log(output); + } + } + lineNum = lineNum + 1; + } + + if (countOnly) { + if (showPrefix) { + console.log(filePath + ":" + matchCount); + } else { + console.log(matchCount); + } + } + + totalMatches = totalMatches + matchCount; +} + +function searchDir(dirPath: string): void { + const entries = fs.readdirSync(dirPath); + let i = 0; + while (i < entries.length) { + const entryPath = dirPath + "/" + entries[i]; + const info = fs.statSync(entryPath); + if (info.isFile()) { + searchFile(entryPath, true); + } else if (info.isDirectory()) { + searchDir(entryPath); + } + i = i + 1; + } +} + +function main(): void { + const info = fs.statSync(target); + + if (info.isDirectory()) { + if (!recursive) { + console.error("string-search: " + target + ": Is a directory (use -r to search recursively)"); + process.exit(2); + } + searchDir(target); + } else { + searchFile(target, false); + } + + if (totalMatches === 0) { + process.exit(1); + } + process.exit(0); +} + +main();