From ba356a30e9c245b44d0a4eddc18aff333dd74d16 Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 13:48:48 +0800 Subject: [PATCH 01/12] feat: implement trie for longest common prefix --- _xtool/internal/config/trie.go | 187 +++++++++++++ _xtool/internal/config/trie_test.go | 413 ++++++++++++++++++++++++++++ 2 files changed, 600 insertions(+) create mode 100644 _xtool/internal/config/trie.go create mode 100644 _xtool/internal/config/trie_test.go diff --git a/_xtool/internal/config/trie.go b/_xtool/internal/config/trie.go new file mode 100644 index 00000000..1641c2ba --- /dev/null +++ b/_xtool/internal/config/trie.go @@ -0,0 +1,187 @@ +package config + +import ( + "iter" + "os" + "path/filepath" + "slices" + "strings" +) + +type Segmenter func(s string) iter.Seq[string] + +type TrieNode struct { + isLeaf bool // Indicates if this node represents the end of a word + linkCount int // Number of children nodes + children map[string]*TrieNode // Map of child nodes by segment +} + +// Creates a new TrieNode with empty children map +func NewTrieNode() *TrieNode { + return &TrieNode{children: make(map[string]*TrieNode)} +} + +type Trie struct { + root *TrieNode // Root node of the trie + segmenter Segmenter // Function to split strings into segments +} +type Options func(*Trie) // Function type for configuring Trie options + +func skipEmpty(s []string) []string { + for len(s) > 0 && s[0] == "" { + s = s[1:] + } + return s +} + +func splitPathAbsSafe(path string) (paths []string) { + originalPath := filepath.Clean(path) + + sep := string(os.PathSeparator) + + // keep absolute path info + if filepath.IsAbs(originalPath) { + i := strings.Index(originalPath[1:], sep) + if i > 0 { + // bound edge: if i is greater than zero, which means there's second separator + // for example, /usr/, i: 3, with first separator what we just skipped, i: 4 + paths = append(paths, originalPath[0:i+1]) + paths = append(paths, skipEmpty(strings.Split(originalPath[i+1:], sep))...) + } else { + // start with / but no other / is found, like /usr + paths = append(paths, originalPath) + } + } + + if len(paths) == 0 { + paths = skipEmpty(strings.Split(originalPath, sep)) + } + + return +} + +// Returns an option to configure path segmenter +// Splits strings by OS path separator and yields each segment +func WithPathSegmenter() Options { + return func(t *Trie) { + t.segmenter = func(s string) iter.Seq[string] { + return func(yield func(string) bool) { + for _, path := range splitPathAbsSafe(s) { + if path != "" && !yield(path) { + return + } + } + } + } + } +} + +// Returns an option to configure reverse path segmenter +// Splits and reverses strings by OS path separator +func WithReversePathSegmenter() Options { + return func(t *Trie) { + t.segmenter = func(s string) iter.Seq[string] { + return func(yield func(string) bool) { + paths := splitPathAbsSafe(s) + + slices.Reverse(paths) + + for _, path := range paths { + if path != "" && !yield(path) { + return + } + } + } + } + } +} + +// Creates a new Trie with default path segmenter +// Applies all provided options to configure the Trie +func NewTrie(opts ...Options) *Trie { + t := &Trie{root: NewTrieNode()} + + WithPathSegmenter()(t) + + for _, o := range opts { + o(t) + } + + return t +} + +// Inserts a string into the trie +// Creates nodes for each segment in the string +func (t *Trie) Insert(s string) { + if s == "" { + return + } + node := t.root + + for segment := range t.segmenter(s) { + child, ok := node.children[segment] + if !ok { + child = NewTrieNode() + node.children[segment] = child + node.linkCount++ + } + node = child + } + node.isLeaf = true +} + +// Searches for a prefix in the trie +// Returns the node at the end of the prefix or nil if not found +func (t *Trie) searchPrefix(s string) *TrieNode { + if s == "" { + return nil + } + node := t.root + + for segment := range t.segmenter(s) { + child, ok := node.children[segment] + if !ok { + return nil + } + node = child + } + + return node +} + +// Finds the longest common prefix of the given string +// Returns the longest prefix that exists in the trie +// +// Implement Source: https://leetcode.com/problems/longest-common-prefix/solutions/127449/longest-common-prefix +func (t *Trie) LongestPrefix(s string) string { + var prefix []string + + node := t.root + + for segment := range t.segmenter(s) { + child := node.children[segment] + + isLongestPrefix := child != nil && node.linkCount == 1 && !node.isLeaf + + if !isLongestPrefix { + break + } + + prefix = append(prefix, segment) + node = child + } + + return filepath.Join(prefix...) +} + +// Checks if the trie contains the given string as a prefix +func (t *Trie) Contains(s string) bool { + return t.searchPrefix(s) != nil +} + +// Checks if the trie contains the exact string +// Returns true if the string exists in the trie +func (t *Trie) Search(s string) bool { + node := t.searchPrefix(s) + return node != nil && node.isLeaf +} diff --git a/_xtool/internal/config/trie_test.go b/_xtool/internal/config/trie_test.go new file mode 100644 index 00000000..cc2ea964 --- /dev/null +++ b/_xtool/internal/config/trie_test.go @@ -0,0 +1,413 @@ +package config_test + +import ( + "testing" + + "github.com/goplus/llcppg/_xtool/internal/config" +) + +func TestTrieContains(t *testing.T) { + testCases := []struct { + name string + search string + inserted []string + want bool + }{ + { + name: "empty string", + search: "abc", + want: false, + }, + { + name: "input empty string", + search: "", + inserted: []string{""}, + want: false, + }, + { + name: "one string", + search: "/a", + inserted: []string{"/a"}, + want: true, + }, + { + name: "two string", + search: "/a", + inserted: []string{"/a", "/b"}, + want: true, + }, + { + name: "multiple string case 1", + search: "/c", + inserted: []string{"/a", "/b", "/d"}, + want: false, + }, + + { + name: "multiple string case 2", + search: "", + inserted: []string{"/a", "/b", "/d"}, + want: false, + }, + + { + name: "multiple string case 3", + search: "/c", + inserted: []string{"/a/c", "/b/c", "/c/d"}, + want: true, + }, + + { + name: "multiple string case 4", + search: "/c/d", + inserted: []string{"/a/c/d", "/b/c/d", "/c/d/a"}, + want: true, + }, + { + name: "substring string case 1", + search: "/a/b", + inserted: []string{"/a"}, + want: false, + }, + { + name: "substring string case 2", + search: "/a", + inserted: []string{"/a/b"}, + want: true, + }, + + { + name: "substring string case 3", + search: "/a/b", + inserted: []string{"/a/b", "/a/b/c"}, + want: true, + }, + + { + name: "absolute path case 1", + search: "a", + inserted: []string{"/a/b"}, + want: false, + }, + { + name: "substring string case 2", + search: "/a", + inserted: []string{"a/b", "a/b/c"}, + want: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + trie := config.NewTrie() + + for _, i := range tc.inserted { + trie.Insert(i) + } + if got := trie.Contains(tc.search); got != tc.want { + t.Fatalf("unexpected result: want %v got %v", tc.want, got) + } + }) + } +} + +func TestTrieSearch(t *testing.T) { + testCases := []struct { + name string + search string + inserted []string + want bool + }{ + { + name: "Empty string insertion and search", + search: "", + inserted: []string{""}, + want: false, + }, + { + name: "Single directory exact match", + search: "/usr/local/bin/", + inserted: []string{"/usr/local/bin/"}, + want: true, + }, + { + name: "Single directory partial match", + search: "/usr/local/bin/python", + inserted: []string{"/usr/local/bin/"}, + want: false, + }, + { + name: "Multiple directories exact match", + search: "/usr/local/lib/", + inserted: []string{"/usr/local/bin/", "/usr/local/lib/", "/usr/include/"}, + want: true, + }, + { + name: "Multiple directories partial match", + search: "/usr/local/lib/python", + inserted: []string{"/usr/local/bin/", "/usr/local/lib/", "/usr/include/"}, + want: false, + }, + { + name: "Mixed path separators", + search: "/usr/local/bin/", + inserted: []string{"/usr/local/bin/"}, + want: true, + }, + { + name: "Non-existent path", + search: "/non/existent/path", + inserted: []string{"/usr/local/bin/", "/usr/local/lib/"}, + want: false, + }, + { + name: "Empty search string", + search: "", + inserted: []string{"/usr/local/bin/"}, + want: false, + }, + { + name: "Subdirectory search", + search: "/usr/local/bin/", + inserted: []string{"/usr/local/bin/"}, + want: true, + }, + { + name: "Deep directory structure", + search: "/a/b/c/d/e/f/g", + inserted: []string{"/a/b/c/d/e/f/g"}, + want: true, + }, + { + name: "Long path with special characters", + search: "/home/user/!@#$%^&*()", + inserted: []string{"/home/user/!@#$%^&*()"}, + want: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + trie := config.NewTrie() + for _, word := range tc.inserted { + trie.Insert(word) + } + if got := trie.Search(tc.search); got != tc.want { + t.Fatalf("Search(%q) = %v, want %v", tc.search, got, tc.want) + } + }) + } +} + +func TestTrieLongestPrefix(t *testing.T) { + tests := []struct { + name string + inserted []string + input string + want string + }{ + { + name: "Empty trie", + inserted: []string{}, + input: "/usr/local/bin", + want: "", + }, + { + name: "Single directory exact match", + inserted: []string{"/usr/local/bin/"}, + input: "/usr/local/bin", + want: "/usr/local/bin", + }, + { + name: "Single directory partial match", + inserted: []string{"/usr/local/bin/"}, + input: "/usr/local/bin/python", + want: "/usr/local/bin", + }, + { + name: "Multiple directories with common prefix", + inserted: []string{"/usr/local/bin/", "/usr/local/lib/", "/usr/include/"}, + input: "/usr/local/bin/python", + want: "/usr", + }, + { + name: "No common prefix", + inserted: []string{"/home/user/", "/var/log/", "/tmp/"}, + input: "/etc/passwd", + want: "", + }, + { + name: "Reverse path match", + inserted: []string{"bin", "lib", "include"}, + input: "include/lib/bin", + want: "", + }, + { + name: "Longer input than stored", + inserted: []string{"/short/"}, + input: "/shorter/path", + want: "", + }, + { + name: "Empty input", + inserted: []string{"/test/"}, + input: "", + want: "", + }, + { + name: "No match", + inserted: []string{"/apple/", "/banana/"}, + input: "/cherry/", + want: "", + }, + { + name: "Partial reverse match", + inserted: []string{"bin", "lib", "include"}, + input: "lib/bin", + want: "", + }, + { + name: "normal case 1", + inserted: []string{ + "/opt/homebrew/Cellar/cjson/1.7.18/include/cJSON.h", + "/opt/homebrew/Cellar/cjson/1.7.18/include/zlib/zlib.h", + }, + input: "/opt/homebrew/Cellar/cjson/1.7.18/include/cJSON/cJSON.h", + want: "/opt/homebrew/Cellar/cjson/1.7.18/include", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + trie := config.NewTrie() + for _, word := range tt.inserted { + trie.Insert(word) + } + result := trie.LongestPrefix(tt.input) + if result != tt.want { + t.Errorf("LongestPrefix(%q) = %q, want %q", tt.input, result, tt.want) + } + }) + } +} + +func TestTrieReverse(t *testing.T) { + testCases := []struct { + name string + search string + inserted []string + want bool + }{ + { + name: "empty string", + search: "abc", + want: false, + }, + { + name: "input empty string", + search: "", + inserted: []string{""}, + want: false, + }, + { + name: "one string", + search: "/a", + inserted: []string{"/a"}, + want: true, + }, + { + name: "two string", + search: "/a", + inserted: []string{"/a", "/b"}, + want: true, + }, + { + name: "multiple string case 1", + search: "/c", + inserted: []string{"/a", "/b", "/d"}, + want: false, + }, + + { + name: "multiple string case 2", + search: "", + inserted: []string{"/a", "/b", "/d"}, + want: false, + }, + + { + name: "multiple string case 3", + search: "/c", + inserted: []string{"/a/c", "/b/c", "/c/d"}, + want: false, + }, + + { + name: "multiple string case 4", + search: "/c/d", + inserted: []string{"/a/c/d", "/b/c/d", "/c/d/a"}, + want: false, + }, + + { + name: "multiple string case 5", + search: "c", + inserted: []string{"/a/c", "/b/c", "/c/d"}, + want: true, + }, + { + name: "substring string case 1", + search: "/a/b", + inserted: []string{"/a"}, + want: false, + }, + { + name: "substring string case 2", + search: "b", + inserted: []string{"/a/b"}, + want: true, + }, + + { + name: "substring string case 3", + search: "/a/b", + inserted: []string{"/a/b", "/a/b/c"}, + want: true, + }, + + { + name: "normal case 1", + search: "libxslt/variables.h", + inserted: []string{ + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/imports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/xsltexports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/variables.h", + }, + want: true, + }, + + { + name: "normal case 2", + search: "libxslt/c14n.h", + inserted: []string{ + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/imports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/xsltexports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/variables.h", + }, + want: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + trie := config.NewTrie(config.WithReversePathSegmenter()) + + for _, i := range tc.inserted { + trie.Insert(i) + } + if got := trie.Contains(tc.search); got != tc.want { + t.Fatalf("unexpected result: want %v got %v", tc.want, got) + } + }) + } +} From 9cac662d83b554d8d22c6f839c8069b9d11f2406 Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 13:59:47 +0800 Subject: [PATCH 02/12] test: add more abs tests --- _xtool/internal/config/trie.go | 5 ++++- _xtool/internal/config/trie_test.go | 14 +++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/_xtool/internal/config/trie.go b/_xtool/internal/config/trie.go index 1641c2ba..dff45380 100644 --- a/_xtool/internal/config/trie.go +++ b/_xtool/internal/config/trie.go @@ -1,6 +1,7 @@ package config import ( + "fmt" "iter" "os" "path/filepath" @@ -57,6 +58,8 @@ func splitPathAbsSafe(path string) (paths []string) { paths = skipEmpty(strings.Split(originalPath, sep)) } + fmt.Println(paths) + return } @@ -162,7 +165,7 @@ func (t *Trie) LongestPrefix(s string) string { child := node.children[segment] isLongestPrefix := child != nil && node.linkCount == 1 && !node.isLeaf - + fmt.Println(segment, child, node) if !isLongestPrefix { break } diff --git a/_xtool/internal/config/trie_test.go b/_xtool/internal/config/trie_test.go index cc2ea964..faa87672 100644 --- a/_xtool/internal/config/trie_test.go +++ b/_xtool/internal/config/trie_test.go @@ -90,7 +90,7 @@ func TestTrieContains(t *testing.T) { want: false, }, { - name: "substring string case 2", + name: "absolute path case 2", search: "/a", inserted: []string{"a/b", "a/b/c"}, want: false, @@ -275,6 +275,18 @@ func TestTrieLongestPrefix(t *testing.T) { input: "/opt/homebrew/Cellar/cjson/1.7.18/include/cJSON/cJSON.h", want: "/opt/homebrew/Cellar/cjson/1.7.18/include", }, + { + name: "absolute path case 1", + inserted: []string{"/usr", "usr", "/usr/include"}, + input: "/usr", + want: "", + }, + { + name: "absolute path case 2", + inserted: []string{"usr/share", "/usr", "usr/include"}, + input: "usr/include/share", + want: "usr", + }, } for _, tt := range tests { From e25406b219164513d7aff71c2a71d7a560574ffd Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 14:04:20 +0800 Subject: [PATCH 03/12] chore: remove println --- _xtool/internal/config/trie.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/_xtool/internal/config/trie.go b/_xtool/internal/config/trie.go index dff45380..1641c2ba 100644 --- a/_xtool/internal/config/trie.go +++ b/_xtool/internal/config/trie.go @@ -1,7 +1,6 @@ package config import ( - "fmt" "iter" "os" "path/filepath" @@ -58,8 +57,6 @@ func splitPathAbsSafe(path string) (paths []string) { paths = skipEmpty(strings.Split(originalPath, sep)) } - fmt.Println(paths) - return } @@ -165,7 +162,7 @@ func (t *Trie) LongestPrefix(s string) string { child := node.children[segment] isLongestPrefix := child != nil && node.linkCount == 1 && !node.isLeaf - fmt.Println(segment, child, node) + if !isLongestPrefix { break } From 1c1c16f98e93f910a28a37c5d84503cd572da238 Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 14:14:00 +0800 Subject: [PATCH 04/12] test: fix test --- _xtool/internal/config/trie_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_xtool/internal/config/trie_test.go b/_xtool/internal/config/trie_test.go index faa87672..3386cb05 100644 --- a/_xtool/internal/config/trie_test.go +++ b/_xtool/internal/config/trie_test.go @@ -285,7 +285,7 @@ func TestTrieLongestPrefix(t *testing.T) { name: "absolute path case 2", inserted: []string{"usr/share", "/usr", "usr/include"}, input: "usr/include/share", - want: "usr", + want: "", }, } From c8484dedaa19abfcf711f040c8842269f8872404 Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 15:25:51 +0800 Subject: [PATCH 05/12] chore: fix namespace --- _xtool/internal/header/trie_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/_xtool/internal/header/trie_test.go b/_xtool/internal/header/trie_test.go index 331fff97..40c0b958 100644 --- a/_xtool/internal/header/trie_test.go +++ b/_xtool/internal/header/trie_test.go @@ -3,7 +3,7 @@ package header_test import ( "testing" - "github.com/goplus/llcppg/_xtool/internal/config" + "github.com/goplus/llcppg/_xtool/internal/header" ) func TestTrieContains(t *testing.T) { @@ -99,7 +99,7 @@ func TestTrieContains(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - trie := config.NewTrie() + trie := header.NewTrie() for _, i := range tc.inserted { trie.Insert(i) @@ -188,7 +188,7 @@ func TestTrieSearch(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - trie := config.NewTrie() + trie := header.NewTrie() for _, word := range tc.inserted { trie.Insert(word) } @@ -291,7 +291,7 @@ func TestTrieLongestPrefix(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - trie := config.NewTrie() + trie := header.NewTrie() for _, word := range tt.inserted { trie.Insert(word) } @@ -412,7 +412,7 @@ func TestTrieReverse(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - trie := config.NewTrie(config.WithReversePathSegmenter()) + trie := header.NewTrie(header.WithReversePathSegmenter()) for _, i := range tc.inserted { trie.Insert(i) From e6de029ac758c67298afdbd31eca54a7714bd79e Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 15:59:47 +0800 Subject: [PATCH 06/12] fix: change contains logic --- _xtool/internal/header/trie.go | 18 +++++++++++++++++- _xtool/internal/header/trie_test.go | 4 ++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/_xtool/internal/header/trie.go b/_xtool/internal/header/trie.go index 9ea8a318..7469588e 100644 --- a/_xtool/internal/header/trie.go +++ b/_xtool/internal/header/trie.go @@ -176,7 +176,23 @@ func (t *Trie) LongestPrefix(s string) string { // Checks if the trie contains the given string as a prefix func (t *Trie) Contains(s string) bool { - return t.searchPrefix(s) != nil + if s == "" { + return false + } + node := t.root + + for segment := range t.segmenter(s) { + child, ok := node.children[segment] + if !ok { + if node == t.root { + node = nil + } + break + } + node = child + } + + return node != nil } // Checks if the trie contains the exact string diff --git a/_xtool/internal/header/trie_test.go b/_xtool/internal/header/trie_test.go index 40c0b958..0c60b43a 100644 --- a/_xtool/internal/header/trie_test.go +++ b/_xtool/internal/header/trie_test.go @@ -67,7 +67,7 @@ func TestTrieContains(t *testing.T) { name: "substring string case 1", search: "/a/b", inserted: []string{"/a"}, - want: false, + want: true, }, { name: "substring string case 2", @@ -358,7 +358,7 @@ func TestTrieReverse(t *testing.T) { name: "multiple string case 4", search: "/c/d", inserted: []string{"/a/c/d", "/b/c/d", "/c/d/a"}, - want: false, + want: true, }, { From 3776c37bd2d7a0d0d12088c2a1986e6ed4607ae0 Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 16:36:22 +0800 Subject: [PATCH 07/12] fix: contains logic --- _xtool/internal/header/trie.go | 13 +++++++++---- _xtool/internal/header/trie_test.go | 26 +++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/_xtool/internal/header/trie.go b/_xtool/internal/header/trie.go index 7469588e..af64ea6a 100644 --- a/_xtool/internal/header/trie.go +++ b/_xtool/internal/header/trie.go @@ -183,11 +183,16 @@ func (t *Trie) Contains(s string) bool { for segment := range t.segmenter(s) { child, ok := node.children[segment] + // if the current node is end, but there's something unmatched, we still consider it valid. + // for example, + // input: /c/b/a, tree: /c/b, valid + // input: /c/b/a, tree: /c/b/c, invalid + // input: /c/b, tree: /c/b/c, valid + if !ok && node.isLeaf { + return true + } if !ok { - if node == t.root { - node = nil - } - break + return false } node = child } diff --git a/_xtool/internal/header/trie_test.go b/_xtool/internal/header/trie_test.go index 0c60b43a..f528841e 100644 --- a/_xtool/internal/header/trie_test.go +++ b/_xtool/internal/header/trie_test.go @@ -83,6 +83,30 @@ func TestTrieContains(t *testing.T) { want: true, }, + { + name: "substring string case 4", + search: "/c/b", + inserted: []string{"/a/b", "/c/b/a"}, + want: true, + }, + { + name: "substring string case 5", + search: "/c/a", + inserted: []string{"/a/b", "/c/b/a"}, + want: false, + }, + { + name: "substring string case 6", + search: "/c/b/c", + inserted: []string{"/a/b", "/c/b/a"}, + want: false, + }, + { + name: "substring string case 7", + search: "/c/b", + inserted: []string{"/a/b", "/c/b/c/a"}, + want: true, + }, { name: "absolute path case 1", search: "a", @@ -356,7 +380,7 @@ func TestTrieReverse(t *testing.T) { { name: "multiple string case 4", - search: "/c/d", + search: "c/d", inserted: []string{"/a/c/d", "/b/c/d", "/c/d/a"}, want: true, }, From 690b3f8e6690e4a2d1f505761fbe1d313ef7b3cd Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 16:50:14 +0800 Subject: [PATCH 08/12] test: add more tests --- _xtool/internal/header/trie_test.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/_xtool/internal/header/trie_test.go b/_xtool/internal/header/trie_test.go index f528841e..d995bdf4 100644 --- a/_xtool/internal/header/trie_test.go +++ b/_xtool/internal/header/trie_test.go @@ -432,6 +432,19 @@ func TestTrieReverse(t *testing.T) { }, want: false, }, + + { + name: "normal case 3", + search: "libxslt/imports.h", + inserted: []string{ + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/zlib/imports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxml2/imports.h", + + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/xsltexports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/variables.h", + }, + want: false, + }, } for _, tc := range testCases { From 8f81a2b52bd9c0f778760cdbf1e2f6073e535f5c Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 17:04:40 +0800 Subject: [PATCH 09/12] chore: rename Contains --- _xtool/internal/header/trie.go | 19 ++++++++----------- _xtool/internal/header/trie_test.go | 6 +++--- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/_xtool/internal/header/trie.go b/_xtool/internal/header/trie.go index af64ea6a..4d57c5c2 100644 --- a/_xtool/internal/header/trie.go +++ b/_xtool/internal/header/trie.go @@ -174,8 +174,8 @@ func (t *Trie) LongestPrefix(s string) string { return filepath.Join(prefix...) } -// Checks if the trie contains the given string as a prefix -func (t *Trie) Contains(s string) bool { +// IsSubsetOf checks the given s is the subset of trie tree +func (t *Trie) IsSubsetOf(s string) bool { if s == "" { return false } @@ -183,16 +183,13 @@ func (t *Trie) Contains(s string) bool { for segment := range t.segmenter(s) { child, ok := node.children[segment] - // if the current node is end, but there's something unmatched, we still consider it valid. - // for example, - // input: /c/b/a, tree: /c/b, valid - // input: /c/b/a, tree: /c/b/c, invalid - // input: /c/b, tree: /c/b/c, valid - if !ok && node.isLeaf { - return true - } if !ok { - return false + // if the current node is end, but there's something unmatched, we still consider it valid. + // for example, + // input: /c/b/a, tree: /c/b, valid + // input: /c/b/a, tree: /c/b/c, invalid + // input: /c/b, tree: /c/b/c, valid + return node.isLeaf } node = child } diff --git a/_xtool/internal/header/trie_test.go b/_xtool/internal/header/trie_test.go index d995bdf4..56bf249c 100644 --- a/_xtool/internal/header/trie_test.go +++ b/_xtool/internal/header/trie_test.go @@ -6,7 +6,7 @@ import ( "github.com/goplus/llcppg/_xtool/internal/header" ) -func TestTrieContains(t *testing.T) { +func TestTrieSubset(t *testing.T) { testCases := []struct { name string search string @@ -128,7 +128,7 @@ func TestTrieContains(t *testing.T) { for _, i := range tc.inserted { trie.Insert(i) } - if got := trie.Contains(tc.search); got != tc.want { + if got := trie.IsSubsetOf(tc.search); got != tc.want { t.Fatalf("unexpected result: want %v got %v", tc.want, got) } }) @@ -454,7 +454,7 @@ func TestTrieReverse(t *testing.T) { for _, i := range tc.inserted { trie.Insert(i) } - if got := trie.Contains(tc.search); got != tc.want { + if got := trie.IsSubsetOf(tc.search); got != tc.want { t.Fatalf("unexpected result: want %v got %v", tc.want, got) } }) From 1102c985b95e718029a7af0fc98c0df744fda5a6 Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 17:06:08 +0800 Subject: [PATCH 10/12] test: remove duplicated test --- _xtool/internal/header/trie_test.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/_xtool/internal/header/trie_test.go b/_xtool/internal/header/trie_test.go index 56bf249c..d816b00a 100644 --- a/_xtool/internal/header/trie_test.go +++ b/_xtool/internal/header/trie_test.go @@ -172,12 +172,6 @@ func TestTrieSearch(t *testing.T) { inserted: []string{"/usr/local/bin/", "/usr/local/lib/", "/usr/include/"}, want: false, }, - { - name: "Mixed path separators", - search: "/usr/local/bin/", - inserted: []string{"/usr/local/bin/"}, - want: true, - }, { name: "Non-existent path", search: "/non/existent/path", From 378033970f038da7a46e9474d650fa2c54225ede Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 18:49:47 +0800 Subject: [PATCH 11/12] feat: use DFS to scan the longest common prefix --- _xtool/internal/header/trie.go | 28 ++++++++++-------- _xtool/internal/header/trie_test.go | 44 +++++++++-------------------- 2 files changed, 29 insertions(+), 43 deletions(-) diff --git a/_xtool/internal/header/trie.go b/_xtool/internal/header/trie.go index 4d57c5c2..b246c3ee 100644 --- a/_xtool/internal/header/trie.go +++ b/_xtool/internal/header/trie.go @@ -153,25 +153,29 @@ func (t *Trie) searchPrefix(s string) *TrieNode { // Returns the longest prefix that exists in the trie // // Implement Source: https://leetcode.com/problems/longest-common-prefix/solutions/127449/longest-common-prefix -func (t *Trie) LongestPrefix(s string) string { +func (t *Trie) LongestPrefix() string { var prefix []string - node := t.root - - for segment := range t.segmenter(s) { - child := node.children[segment] + dfs(&prefix, "", t.root, nil) - isLongestPrefix := child != nil && node.linkCount == 1 && !node.isLeaf + return filepath.Join(prefix...) +} - if !isLongestPrefix { - break - } +func dfs(prefix *[]string, currentPrefix string, node, parent *TrieNode) { + if node == nil { + return + } + if parent != nil && (parent.linkCount != 1 || parent.isLeaf) { + return + } - prefix = append(prefix, segment) - node = child + if currentPrefix != "" { + *prefix = append(*prefix, currentPrefix) } - return filepath.Join(prefix...) + for current, child := range node.children { + dfs(prefix, current, child, node) + } } // IsSubsetOf checks the given s is the subset of trie tree diff --git a/_xtool/internal/header/trie_test.go b/_xtool/internal/header/trie_test.go index d816b00a..a01fb314 100644 --- a/_xtool/internal/header/trie_test.go +++ b/_xtool/internal/header/trie_test.go @@ -221,67 +221,51 @@ func TestTrieLongestPrefix(t *testing.T) { tests := []struct { name string inserted []string - input string want string }{ { name: "Empty trie", inserted: []string{}, - input: "/usr/local/bin", want: "", }, { name: "Single directory exact match", inserted: []string{"/usr/local/bin/"}, - input: "/usr/local/bin", want: "/usr/local/bin", }, { name: "Single directory partial match", - inserted: []string{"/usr/local/bin/"}, - input: "/usr/local/bin/python", + inserted: []string{"/usr/local/bin/", "/usr/local/bin/python"}, want: "/usr/local/bin", }, { name: "Multiple directories with common prefix", - inserted: []string{"/usr/local/bin/", "/usr/local/lib/", "/usr/include/"}, - input: "/usr/local/bin/python", + inserted: []string{"/usr/local/bin/", "/usr/local/lib/", "/usr/include/", "/usr/local/bin/python"}, want: "/usr", }, { name: "No common prefix", - inserted: []string{"/home/user/", "/var/log/", "/tmp/"}, - input: "/etc/passwd", + inserted: []string{"/home/user/", "/var/log/", "/tmp/", "/etc/passwd"}, want: "", }, { name: "Reverse path match", - inserted: []string{"bin", "lib", "include"}, - input: "include/lib/bin", + inserted: []string{"bin", "lib", "include", "include/lib/bin"}, want: "", }, { name: "Longer input than stored", - inserted: []string{"/short/"}, - input: "/shorter/path", - want: "", - }, - { - name: "Empty input", - inserted: []string{"/test/"}, - input: "", + inserted: []string{"/short/", "/shorter/path"}, want: "", }, { name: "No match", - inserted: []string{"/apple/", "/banana/"}, - input: "/cherry/", + inserted: []string{"/apple/", "/banana/", "/cherry/"}, want: "", }, { name: "Partial reverse match", - inserted: []string{"bin", "lib", "include"}, - input: "lib/bin", + inserted: []string{"bin", "lib", "include", "lib/bin"}, want: "", }, { @@ -289,20 +273,18 @@ func TestTrieLongestPrefix(t *testing.T) { inserted: []string{ "/opt/homebrew/Cellar/cjson/1.7.18/include/cJSON.h", "/opt/homebrew/Cellar/cjson/1.7.18/include/zlib/zlib.h", + "/opt/homebrew/Cellar/cjson/1.7.18/include/cJSON/cJSON.h", }, - input: "/opt/homebrew/Cellar/cjson/1.7.18/include/cJSON/cJSON.h", - want: "/opt/homebrew/Cellar/cjson/1.7.18/include", + want: "/opt/homebrew/Cellar/cjson/1.7.18/include", }, { name: "absolute path case 1", - inserted: []string{"/usr", "usr", "/usr/include"}, - input: "/usr", + inserted: []string{"/usr", "usr", "/usr/include", "/usr"}, want: "", }, { name: "absolute path case 2", - inserted: []string{"usr/share", "/usr", "usr/include"}, - input: "usr/include/share", + inserted: []string{"usr/share", "/usr", "usr/include", "usr/include/share"}, want: "", }, } @@ -313,9 +295,9 @@ func TestTrieLongestPrefix(t *testing.T) { for _, word := range tt.inserted { trie.Insert(word) } - result := trie.LongestPrefix(tt.input) + result := trie.LongestPrefix() if result != tt.want { - t.Errorf("LongestPrefix(%q) = %q, want %q", tt.input, result, tt.want) + t.Errorf("LongestPrefix(%q) = %q, want %q", tt.inserted, result, tt.want) } }) } From c0263ed9726ddffc1da0ea0d88665c2b23a72210 Mon Sep 17 00:00:00 2001 From: Haolan Date: Fri, 6 Jun 2025 18:53:08 +0800 Subject: [PATCH 12/12] chore: rename IsSubset --- _xtool/internal/header/trie.go | 4 ++-- _xtool/internal/header/trie_test.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/_xtool/internal/header/trie.go b/_xtool/internal/header/trie.go index b246c3ee..bfdd00d7 100644 --- a/_xtool/internal/header/trie.go +++ b/_xtool/internal/header/trie.go @@ -178,8 +178,8 @@ func dfs(prefix *[]string, currentPrefix string, node, parent *TrieNode) { } } -// IsSubsetOf checks the given s is the subset of trie tree -func (t *Trie) IsSubsetOf(s string) bool { +// IsOnSameBranch checks the given s is the subset of trie tree +func (t *Trie) IsOnSameBranch(s string) bool { if s == "" { return false } diff --git a/_xtool/internal/header/trie_test.go b/_xtool/internal/header/trie_test.go index a01fb314..d770ab3b 100644 --- a/_xtool/internal/header/trie_test.go +++ b/_xtool/internal/header/trie_test.go @@ -128,7 +128,7 @@ func TestTrieSubset(t *testing.T) { for _, i := range tc.inserted { trie.Insert(i) } - if got := trie.IsSubsetOf(tc.search); got != tc.want { + if got := trie.IsOnSameBranch(tc.search); got != tc.want { t.Fatalf("unexpected result: want %v got %v", tc.want, got) } }) @@ -430,7 +430,7 @@ func TestTrieReverse(t *testing.T) { for _, i := range tc.inserted { trie.Insert(i) } - if got := trie.IsSubsetOf(tc.search); got != tc.want { + if got := trie.IsOnSameBranch(tc.search); got != tc.want { t.Fatalf("unexpected result: want %v got %v", tc.want, got) } })