diff --git a/changes/20251124113919.feature b/changes/20251124113919.feature new file mode 100644 index 0000000000..9772d95fcc --- /dev/null +++ b/changes/20251124113919.feature @@ -0,0 +1 @@ +:sparkles: `[url]` Add a new package containing url helper functions diff --git a/utils/url/url.go b/utils/url/url.go new file mode 100644 index 0000000000..cb931d1659 --- /dev/null +++ b/utils/url/url.go @@ -0,0 +1,167 @@ +package url + +import ( + netUrl "net/url" + "path" + "regexp" + "strings" + + "github.com/ARM-software/golang-utils/utils/collection" + "github.com/ARM-software/golang-utils/utils/commonerrors" + "github.com/ARM-software/golang-utils/utils/reflection" +) + +const ( + defaultPathSeparator = "/" + minimumPathParameterLength = 3 +) + +// Section 3.3 of RFC3986 details valid characters for path segments (see https://datatracker.ietf.org/doc/html/rfc3986#section-3.3) +var validPathRegex = regexp.MustCompile(`^(?:[A-Za-z0-9._~\-!$&'()*+,;=:@{}]|%[0-9A-Fa-f]{2})+$`) + +// PathSegmentMatcherFunc defines the signature for path segment matcher functions. +type PathSegmentMatcherFunc = func(segmentA, segmentB string) (match bool, err error) + +// ValidatePathParameter checks whether a path parameter is valid. An error is returned if it is invalid. +// Version 3.1.0 of the OpenAPI spec provides some guidance for path parameter values (see https://spec.openapis.org/oas/v3.1.0.html#path-templating) +func ValidatePathParameter(parameter string) error { + if !MatchesPathParameterSyntax(parameter) { + return commonerrors.Newf(commonerrors.ErrInvalid, "parameter %q must not be empty, cannot contain only whitespaces, have a length greater than or equal to three, start with an opening brace, and end with a closing brace", parameter) + } + + unescapedSegment, err := netUrl.PathUnescape(parameter) + if err != nil { + return commonerrors.WrapErrorf(commonerrors.ErrInvalid, err, "an error occurred during path unescaping for parameter %q", parameter) + } + + if !validPathRegex.MatchString(unescapedSegment) { + return commonerrors.Newf(commonerrors.ErrInvalid, "parameter %q unescaped to %q can only contain alphanumeric characters, dashes, underscores, and a single pair of braces", parameter, unescapedSegment) + } + + return nil +} + +// MatchesPathParameterSyntax checks whether the parameter string matches the syntax for a path parameter as described by the OpenAPI spec (see https://spec.openapis.org/oas/v3.0.0.html#path-templating). +func MatchesPathParameterSyntax(parameter string) bool { + if reflection.IsEmpty(parameter) { + return false + } + + if len(parameter) < minimumPathParameterLength { + return false + } + + if !strings.HasPrefix(parameter, "{") || !strings.HasSuffix(parameter, "}") { + return false + } + + return strings.Count(parameter, "{") == 1 && strings.Count(parameter, "}") == 1 +} + +// HasMatchingPathSegments checks whether two path strings match based on their segments by doing a simple equality check on each path segment pair. +func HasMatchingPathSegments(pathA, pathB string) (match bool, err error) { + return MatchingPathSegments(pathA, pathB, BasicEqualityPathSegmentMatcher) +} + +// HasMatchingPathSegmentsWithParams is similar to HasMatchingPathSegments but also considers segments as matching if at least one of them contains a path parameter. +// +// HasMatchingPathSegmentsWithParams("/some/{param}/path", "/some/{param}/path") // true +// HasMatchingPathSegmentsWithParams("/some/abc/path", "/some/{param}/path") // true +// HasMatchingPathSegmentsWithParams("/some/abc/path", "/some/def/path") // false +func HasMatchingPathSegmentsWithParams(pathA, pathB string) (match bool, err error) { + return MatchingPathSegments(pathA, pathB, BasicEqualityPathSegmentWithParamMatcher) +} + +// BasicEqualityPathSegmentMatcher is a PathSegmentMatcherFunc that performs direct string comparison of two path segments. +func BasicEqualityPathSegmentMatcher(segmentA, segmentB string) (match bool, err error) { + match = segmentA == segmentB + return +} + +// BasicEqualityPathSegmentWithParamMatcher is a PathSegmentMatcherFunc that is similar to BasicEqualityPathSegmentMatcher but accounts for path parameter segments. +func BasicEqualityPathSegmentWithParamMatcher(segmentA, segmentB string) (match bool, err error) { + if MatchesPathParameterSyntax(segmentA) { + if errValidatePathASeg := ValidatePathParameter(segmentA); errValidatePathASeg != nil { + err = commonerrors.WrapErrorf(commonerrors.ErrInvalid, errValidatePathASeg, "an error occurred while validating path parameter %q", segmentA) + return + } + + match = !reflection.IsEmpty(segmentB) + return + } + + if MatchesPathParameterSyntax(segmentB) { + if errValidatePathBSeg := ValidatePathParameter(segmentB); errValidatePathBSeg != nil { + err = commonerrors.WrapErrorf(commonerrors.ErrInvalid, errValidatePathBSeg, "an error occurred while validating path parameter %q", segmentB) + return + } + + match = !reflection.IsEmpty(segmentA) + return + } + + return BasicEqualityPathSegmentMatcher(segmentA, segmentB) +} + +// MatchingPathSegments checks whether two path strings match based on their segments using the provided matcher function. +func MatchingPathSegments(pathA, pathB string, matcherFn PathSegmentMatcherFunc) (match bool, err error) { + if reflection.IsEmpty(pathA) { + err = commonerrors.UndefinedVariable("path A") + return + } + + if reflection.IsEmpty(pathB) { + err = commonerrors.UndefinedVariable("path B") + return + } + + if matcherFn == nil { + err = commonerrors.UndefinedVariable("segment matcher function") + return + } + + unescapedPathA, errPathASeg := netUrl.PathUnescape(pathA) + if errPathASeg != nil { + err = commonerrors.WrapErrorf(commonerrors.ErrUnexpected, errPathASeg, "an error occurred while unescaping path %q", pathA) + return + } + + unescapedPathB, errPathBSeg := netUrl.PathUnescape(pathB) + if errPathBSeg != nil { + err = commonerrors.WrapErrorf(commonerrors.ErrUnexpected, errPathBSeg, "an error occurred while unescaping path %q", pathB) + return + } + + pathASegments := SplitPath(unescapedPathA) + pathBSegments := SplitPath(unescapedPathB) + if len(pathASegments) != len(pathBSegments) { + return + } + + for i := range pathBSegments { + match, err = matcherFn(pathASegments[i], pathBSegments[i]) + if err != nil { + err = commonerrors.WrapErrorf(commonerrors.ErrUnexpected, err, "an error occurred during execution of the matcher function for path segments %q and %q", pathASegments[i], pathBSegments[i]) + return + } + + if !match { + return + } + } + + match = true + return +} + +// SplitPath returns a slice containing the individual segments that make up the path string p. +// It looks for the default forward slash path separator when splitting. +func SplitPath(p string) []string { + if reflection.IsEmpty(p) { + return []string{} + } + + p = path.Clean(p) + p = strings.Trim(p, defaultPathSeparator) + return collection.ParseListWithCleanup(p, defaultPathSeparator) +} diff --git a/utils/url/url_test.go b/utils/url/url_test.go new file mode 100644 index 0000000000..c761dbfda6 --- /dev/null +++ b/utils/url/url_test.go @@ -0,0 +1,499 @@ +package url + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/ARM-software/golang-utils/utils/commonerrors" + "github.com/ARM-software/golang-utils/utils/commonerrors/errortest" +) + +func TestUrl_MatchesPathParameterSyntax(t *testing.T) { + tests := []struct { + name string + parameter string + result bool + }{ + { + "valid", + "{abc}", + true, + }, + { + "with encoded underscore", + "{abc%5F1}", // unescaped as '{abc_1}' + true, + }, + { + "only whitespace", + " ", + false, + }, + { + "missing opening brace", + "abc}", + false, + }, + { + "missing closing brace", + "{abc", + false, + }, + { + "missing both braces", + "abc", + false, + }, + { + "contains multiple braces", + "{{abc}}", + false, + }, + { + "with encoded asterisk", + "{abc%2A123}", // unescaped as '{abc*123}' + true, + }, + { + "with encoded space", + "{%20abc%20}", // unescaped as '{ abc }' + true, + }, + { + "with valid special characters", + "{abc$123.zzz~999}", + true, + }, + } + + for i := range tests { + test := tests[i] + t.Run(test.name, func(t *testing.T) { + assert.Equal(t, test.result, MatchesPathParameterSyntax(test.parameter)) + }) + } +} + +func TestUrl_ValidatePathParameter(t *testing.T) { + tests := []struct { + name string + parameter string + err error + }{ + { + "valid", + "{abc}", + nil, + }, + { + "with valid special characters", + "{abc.-_+$@!123(a)}", + nil, + }, + { + "with encoded underscore", + "{abc%5F1}", // unescaped as '{abc_1}' + nil, + }, + { + "missing opening brace", + "abc}", + commonerrors.ErrInvalid, + }, + { + "missing closing brace", + "{abc", + commonerrors.ErrInvalid, + }, + { + "missing both braces", + "abc", + commonerrors.ErrInvalid, + }, + { + "contains multiple braces", + "{{abc}}", + commonerrors.ErrInvalid, + }, + { + "with encoded asterisk", + "{abc%2A123}", // unescaped as '{abc*123}' + nil, + }, + { + "with encoded hash", + "{abc%23123}", // unescaped as '{abc#123}' + commonerrors.ErrInvalid, + }, + { + "with encoded space", + "{%20abc%20}", // unescaped as '{ abc }' + commonerrors.ErrInvalid, + }, + } + + for i := range tests { + test := tests[i] + t.Run(test.name, func(t *testing.T) { + errortest.AssertError(t, ValidatePathParameter(test.parameter), test.err) + }) + } +} + +func TestUrl_HasMatchingPathSegments(t *testing.T) { + tests := []struct { + name string + pathA string + pathB string + result bool + err error + }{ + { + "empty pathA", + "", + "abc/123", + false, + commonerrors.ErrUndefined, + }, + { + "empty pathB", + "abc/123", + "", + false, + commonerrors.ErrUndefined, + }, + { + "identical paths", + "abc/123", + "abc/123", + true, + nil, + }, + { + "identical paths with multiple segments", + "abc/123/def/456/zzz", + "abc/123/def/456/zzz", + true, + nil, + }, + { + "root paths", + "/", + "/", + true, + nil, + }, + { + "paths with different segment values", + "abc/123", + "abc/456", + false, + nil, + }, + { + "paths with different lengths", + "abc/123", + "abc/123/456", + false, + nil, + }, + { + "path with trailing slashes", + "/abc/123/", + "abc/123", + true, + nil, + }, + { + "paths with repeated slashes", + "//abc///123/", + "abc//123/////", + true, + nil, + }, + { + "path with valid encoding", + "abc/123%5F456", // unescaped as 'abc/123_456' + "abc/123_456", + true, + nil, + }, + { + "path with invalid encoding", + "abc/%$#%*123", + "abc/123", + false, + commonerrors.ErrUnexpected, + }, + } + + for i := range tests { + test := tests[i] + t.Run(test.name, func(t *testing.T) { + match, err := HasMatchingPathSegments(test.pathA, test.pathB) + errortest.AssertError(t, err, test.err) + assert.Equal(t, test.result, match) + }) + } +} + +func TestUrl_HasMatchingPathSegmentsWithParams(t *testing.T) { + tests := []struct { + name string + pathA string + pathB string + result bool + err error + }{ + { + "empty pathA", + "", + "abc/123", + false, + commonerrors.ErrUndefined, + }, + { + "empty pathB", + "abc/123", + "", + false, + commonerrors.ErrUndefined, + }, + { + "identical paths", + "abc/123", + "abc/123", + true, + nil, + }, + { + "identical paths with repeated slashes", + "abc///123//", + "//abc/123///", + true, + nil, + }, + { + "identical paths with multiple segments", + "abc/123/def/456/zzz", + "abc/123/def/456/zzz", + true, + nil, + }, + { + "path with parameter segment", + "/abc/{id}/123", + "/abc/123/123", + true, + nil, + }, + { + "both paths with matching parameter segments", + "/abc/{param}/123", + "/abc/{param}/123", + true, + nil, + }, + { + "both paths with different parameter segments", + "/abc/{id}/123", + "/abc/{val}/123", + true, + nil, + }, + { + "paths with different segments", + "/abc/123/xyz", + "/def/123/zzz", + false, + nil, + }, + { + "paths with different segments with parameter", + "/abc/{param}/123", + "/def/123/zzz", + false, + nil, + }, + { + "paths with different lengths and params", + "/abc/{param}", + "/abc/{param}/123", + false, + nil, + }, + { + "path with valid encoding in parameter segment", + "abc/{param%2D1}", // unescaped as 'abc/{param-1}' + "abc/123", + true, + nil, + }, + { + "path with invalid encoding in parameter segment", + "abc/{%$#%*param}", + "abc/123", + false, + commonerrors.ErrUnexpected, + }, + } + + for i := range tests { + test := tests[i] + t.Run(test.name, func(t *testing.T) { + match, err := HasMatchingPathSegmentsWithParams(test.pathA, test.pathB) + errortest.AssertError(t, err, test.err) + assert.Equal(t, test.result, match) + }) + } +} + +func TestUrl_MatchingPathSegments(t *testing.T) { + tests := []struct { + name string + pathA string + pathB string + matcherFn PathSegmentMatcherFunc + result bool + err error + }{ + { + "empty pathA", + "", + "abc/123", + BasicEqualityPathSegmentMatcher, + false, + commonerrors.ErrUndefined, + }, + { + "empty pathB", + "abc/123", + "", + BasicEqualityPathSegmentMatcher, + false, + commonerrors.ErrUndefined, + }, + { + "path with valid encoding", + "abc/123%5F456", // unescaped as 'abc/123_456' + "abc/123_456", + BasicEqualityPathSegmentMatcher, + true, + nil, + }, + { + "path with invalid encoding", + "abc/%$#%*123", + "abc/123", + BasicEqualityPathSegmentMatcher, + false, + commonerrors.ErrUnexpected, + }, + { + "paths with different segments with parameter", + "/abc/{param}/123", + "/def/123/zzz", + BasicEqualityPathSegmentWithParamMatcher, + false, + nil, + }, + { + "paths with different lengths and params", + "/abc/{param}", + "/abc/{param}/123", + BasicEqualityPathSegmentWithParamMatcher, + false, + nil, + }, + { + "matching paths when using a custom matcher function", + "/abc/||zzz||/123", + "/abc/||{param}||/123", + func(segmentA string, segmentB string) (match bool, err error) { + segmentA = strings.Trim(segmentA, "|") + segmentB = strings.Trim(segmentB, "|") + return BasicEqualityPathSegmentWithParamMatcher(segmentA, segmentB) + }, + true, + nil, + }, + { + "non-matching paths when using a custom matcher function", + "/abc/##zzz||/123", + "/abc/||{param}##/123", + func(segmentA string, segmentB string) (match bool, err error) { + segmentA = strings.Trim(segmentA, "#") + segmentB = strings.Trim(segmentB, "|") + return BasicEqualityPathSegmentWithParamMatcher(segmentA, segmentB) + }, + false, + nil, + }, + } + + for i := range tests { + test := tests[i] + t.Run(test.name, func(t *testing.T) { + match, err := MatchingPathSegments(test.pathA, test.pathB, test.matcherFn) + errortest.AssertError(t, err, test.err) + assert.Equal(t, test.result, match) + }) + } +} + +func TestUrl_SplitPath(t *testing.T) { + tests := []struct { + name string + path string + result []string + }{ + { + "empty path", + "", + []string{}, + }, + { + "root path", + "/", + []string{"/"}, + }, + { + "root path with repeated slashes", + "///", + []string{"/"}, + }, + { + "path with one segment", + "abc", + []string{"abc"}, + }, + { + "path with two segments", + "abc/123", + []string{"abc", "123"}, + }, + { + "path with multiple segments", + "abc/123/def/456", + []string{"abc", "123", "def", "456"}, + }, + { + "path with multiple segments including param segment", + "abc/123/def/456/zzz/{param1}/999", + []string{"abc", "123", "def", "456", "zzz", "{param1}", "999"}, + }, + } + + for i := range tests { + test := tests[i] + t.Run(test.name, func(t *testing.T) { + segments := SplitPath(test.path) + + for i, s := range segments { + assert.Equal(t, test.result[i], s) + } + }) + } +} diff --git a/utils/validation/rules.go b/utils/validation/rules.go index a51331782e..e33ecc36b1 100644 --- a/utils/validation/rules.go +++ b/utils/validation/rules.go @@ -9,6 +9,7 @@ import ( "github.com/ARM-software/golang-utils/utils/commonerrors" "github.com/ARM-software/golang-utils/utils/encoding/base64" + "github.com/ARM-software/golang-utils/utils/url" ) // IsPort validates whether a value is a port using is.Port from github.com/go-ozzo/ozzo-validation/v4. @@ -41,3 +42,11 @@ func isPort(vRaw any) (err error) { // IsBase64 validates whether a value is a base64 encoded string. It is similar to is.Base64 but more generic and robust although less performant. var IsBase64 = validation.NewStringRuleWithError(base64.IsEncoded, is.ErrBase64) + +// IsPathParameter validates whether a value is a valid path parameter of a url. +var IsPathParameter = validation.NewStringRule(isValidPathParameter, "invalid path parameter") + +func isValidPathParameter(value string) bool { + err := url.ValidatePathParameter(value) + return err == nil +} diff --git a/utils/validation/rules_test.go b/utils/validation/rules_test.go index 3987bd2dd2..5499a8a03c 100644 --- a/utils/validation/rules_test.go +++ b/utils/validation/rules_test.go @@ -97,3 +97,31 @@ func TestIsBase64Encoded(t *testing.T) { }) } } + +func TestIsPathParameter(t *testing.T) { + tests := []struct { + input string + expected bool + }{ + {"{abc}", true}, + {"abc}", false}, + {"{abc", false}, + {"abc", false}, + {"{abc$123.zzz~999}", true}, + {"{abc%5F1}", true}, // unescaped as '{abc_1}' + {"{abc#123}", false}, + {" ", false}, + } + + for i := range tests { + test := tests[i] + t.Run(test.input, func(t *testing.T) { + err := validation.Validate(test.input, IsPathParameter) + if test.expected { + require.NoError(t, err) + } else { + errortest.AssertErrorDescription(t, err, "invalid path parameter") + } + }) + } +}