From c4887ad828140256fe90877f1467a0d656c9a9f4 Mon Sep 17 00:00:00 2001 From: MazenSamehR Date: Sun, 30 Nov 2025 14:19:13 +0200 Subject: [PATCH 1/2] Add XSD anyURI validator and tests --- README.md | 1 + src/index.js | 2 + src/lib/isXsdAnyURI.js | 301 ++++++++++++++++++++++++++++++++++++++++ test/validators.test.js | 63 +++++++++ 4 files changed, 367 insertions(+) create mode 100644 src/lib/isXsdAnyURI.js diff --git a/README.md b/README.md index 37fbd5e3d..f783aa622 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,7 @@ Validator | Description **isTime(str [, options])** | check if the string is a valid time e.g. [`23:01:59`, new Date().toLocaleTimeString()].

`options` is an object which can contain the keys `hourFormat` or `mode`.

`hourFormat` is a key and defaults to `'hour24'`.

`mode` is a key and defaults to `'default'`.

`hourFormat` can contain the values `'hour12'` or `'hour24'`, `'hour24'` will validate hours in 24 format and `'hour12'` will validate hours in 12 format.

`mode` can contain the values `'default', 'withSeconds', withOptionalSeconds`, `'default'` will validate `HH:MM` format, `'withSeconds'` will validate the `HH:MM:SS` format, `'withOptionalSeconds'` will validate `'HH:MM'` and `'HH:MM:SS'` formats. **isTaxID(str, locale)** | check if the string is a valid Tax Identification Number. Default locale is `en-US`.

More info about exact TIN support can be found in `src/lib/isTaxID.js`.

Supported locales: `[ 'bg-BG', 'cs-CZ', 'de-AT', 'de-DE', 'dk-DK', 'el-CY', 'el-GR', 'en-CA', 'en-GB', 'en-IE', 'en-US', 'es-AR', 'es-ES', 'et-EE', 'fi-FI', 'fr-BE', 'fr-CA', 'fr-FR', 'fr-LU', 'hr-HR', 'hu-HU', 'it-IT', 'lb-LU', 'lt-LT', 'lv-LV', 'mt-MT', 'nl-BE', 'nl-NL', 'pl-PL', 'pt-BR', 'pt-PT', 'ro-RO', 'sk-SK', 'sl-SI', 'sv-SE', 'uk-UA']`. **isURL(str [, options])** | check if the string is a URL.

`options` is an object which defaults to `{ protocols: ['http','https','ftp'], require_tld: true, require_protocol: false, require_host: true, require_port: false, require_valid_protocol: true, allow_underscores: false, host_whitelist: false, host_blacklist: false, allow_trailing_dot: false, allow_protocol_relative_urls: false, allow_fragments: true, allow_query_components: true, disallow_auth: false, validate_length: true }`.

`protocols` - valid protocols can be modified with this option.
`require_tld` - If set to false isURL will not check if the URL's host includes a top-level domain.
`require_protocol` - **RECOMMENDED** if set to true isURL will return false if protocol is not present in the URL. Without this setting, some malicious URLs cannot be distinguishable from a valid URL with authentication information.
`require_host` - if set to false isURL will not check if host is present in the URL.
`require_port` - if set to true isURL will check if port is present in the URL.
`require_valid_protocol` - isURL will check if the URL's protocol is present in the protocols option.
`allow_underscores` - if set to true, the validator will allow underscores in the URL.
`host_whitelist` - if set to an array of strings or regexp, and the domain matches none of the strings defined in it, the validation fails.
`host_blacklist` - if set to an array of strings or regexp, and the domain matches any of the strings defined in it, the validation fails.
`allow_trailing_dot` - if set to true, the validator will allow the domain to end with a `.` character.
`allow_protocol_relative_urls` - if set to true protocol relative URLs will be allowed.
`allow_fragments` - if set to false isURL will return false if fragments are present.
`allow_query_components` - if set to false isURL will return false if query components are present.
`disallow_auth` - if set to true, the validator will fail if the URL contains an authentication component, e.g. `http://username:password@example.com`.
`validate_length` - if set to false isURL will skip string length validation. `max_allowed_length` will be ignored if this is set as `false`.
`max_allowed_length` - if set, isURL will not allow URLs longer than the specified value (default is 2084 that IE maximum URL length).
+**isXsdAnyURI(str)** | check if the string conforms to the [XML Schema `anyURI` type](https://www.w3.org/TR/xmlschema-2/#anyURI). Leading/trailing XML whitespace is collapsed before validation and any non-ASCII characters are percent-encoded via `encodeURI` semantics before the RFC 3986 rules are applied. Both absolute and relative references (including query-only or fragment-only references) are supported. **isULID(str)** | check if the string is a [ULID](https://github.com/ulid/spec). **isUUID(str [, version])** | check if the string is an RFC9562 UUID.
`version` is one of `'1'`-`'8'`, `'nil'`, `'max'`, `'all'` or `'loose'`. The `'loose'` option checks if the string is a UUID-like string with hexadecimal values, ignoring RFC9565. **isVariableWidth(str)** | check if the string contains a mixture of full and half-width chars. diff --git a/src/index.js b/src/index.js index ccbcc4188..0fb21d77f 100644 --- a/src/index.js +++ b/src/index.js @@ -8,6 +8,7 @@ import matches from './lib/matches'; import isEmail from './lib/isEmail'; import isURL from './lib/isURL'; +import isXsdAnyURI from './lib/isXsdAnyURI'; import isMACAddress from './lib/isMACAddress'; import isIP from './lib/isIP'; import isIPRange from './lib/isIPRange'; @@ -143,6 +144,7 @@ const validator = { matches, isEmail, isURL, + isXsdAnyURI, isMACAddress, isIP, isIPRange, diff --git a/src/lib/isXsdAnyURI.js b/src/lib/isXsdAnyURI.js new file mode 100644 index 000000000..dbde7c9a4 --- /dev/null +++ b/src/lib/isXsdAnyURI.js @@ -0,0 +1,301 @@ +import assertString from './util/assertString'; +import isIP from './isIP'; + +const MULTIPLE_SPACES_REGEX = / {2,}/g; +const INVALID_PERCENT_REGEX = /%(?![0-9A-Fa-f]{2})/; +const SCHEME_REGEX = /^[A-Za-z][A-Za-z0-9+.-]*$/; +const BACKSLASH_REGEX = /\\/; +const DISALLOWED_ASCII_REGEX = /["<>^`{}|]/; +const OPEN_BRACKET_PLACEHOLDER = '__VALIDATOR_OPEN_BRACKET__'; +const CLOSE_BRACKET_PLACEHOLDER = '__VALIDATOR_CLOSE_BRACKET__'; + +const HEX_DIGIT = '[0-9A-Fa-f]'; +const PCT_ENCODED = `%${HEX_DIGIT}{2}`; +const UNRESERVED = 'A-Za-z0-9\\-._~'; +const SUB_DELIMS = "!$&'()*+,;="; +const PCHAR = `(?:[${UNRESERVED}]|${PCT_ENCODED}|[${SUB_DELIMS}:@])`; +const SEGMENT = `(?:${PCHAR})*`; +const SEGMENT_NZ = `(?:${PCHAR})+`; +const SEGMENT_NZ_NC = `(?:${PCT_ENCODED}|[${UNRESERVED}${SUB_DELIMS}@])+`; + +const PATH_ABEMPTY_REGEX = new RegExp(`^(?:/${SEGMENT})*$`); +const PATH_ABSOLUTE_REGEX = new RegExp(`^/(?:${SEGMENT_NZ}(?:/${SEGMENT})*)?$`); +const PATH_ROOTLESS_REGEX = new RegExp(`^${SEGMENT_NZ}(?:/${SEGMENT})*$`); +const PATH_NOSCHEME_REGEX = new RegExp(`^(?:${SEGMENT_NZ_NC})(?:/${SEGMENT})*$`); +const QUERY_FRAGMENT_REGEX = new RegExp(`^(?:${PCHAR}|[/?])*$`); +const USERINFO_REGEX = new RegExp(`^(?:${PCT_ENCODED}|[${UNRESERVED}${SUB_DELIMS}:])*$`); +const REG_NAME_REGEX = new RegExp(`^(?:${PCT_ENCODED}|[${UNRESERVED}${SUB_DELIMS}])*$`); +const IPV_FUTURE_REGEX = /^v[0-9A-F]+\.[A-Za-z0-9._~!$&'()*+,;=:-]+$/i; + +function collapseXmlWhitespace(input) { + let normalized = ''; + + for (let i = 0; i < input.length; i += 1) { + const code = input.charCodeAt(i); + + if (code === 0x09 || code === 0x0a || code === 0x0d) { + normalized += ' '; + } else { + normalized += input[i]; + } + } + + return normalized.replace(MULTIPLE_SPACES_REGEX, ' ').trim(); +} + +function containsForbiddenControl(value) { + for (let i = 0; i < value.length; i += 1) { + const code = value.charCodeAt(i); + + if ( + (code >= 0x00 && code <= 0x08) || + code === 0x0b || + code === 0x0c || + (code >= 0x0e && code <= 0x1f) || + code === 0x7f + ) { + return true; + } + } + + return false; +} + +function hasInvalidPercentEncoding(input) { + return INVALID_PERCENT_REGEX.test(input); +} + +function isIPvFuture(address) { + return IPV_FUTURE_REGEX.test(address); +} + +function isValidAuthority(authority, options) { + const allowEmptyAuthority = Boolean(options && options.allowEmptyAuthority); + if (authority === '') { + return !!allowEmptyAuthority; + } + + let hostPort = authority; + let userinfo = ''; + const atIndex = authority.lastIndexOf('@'); + + if (atIndex !== -1) { + userinfo = authority.slice(0, atIndex); + hostPort = authority.slice(atIndex + 1); + + if (!USERINFO_REGEX.test(userinfo)) { + return false; + } + } + + let host = hostPort; + let port = null; + let hasHost = false; + + if (hostPort.startsWith('[')) { + const closingIndex = hostPort.indexOf(']'); + + if (closingIndex === -1) { + return false; + } + + const address = hostPort.slice(1, closingIndex); + + if (!isIP(address, 6) && !isIPvFuture(address)) { + return false; + } + + const remainder = hostPort.slice(closingIndex + 1); + + if (remainder) { + if (!remainder.startsWith(':')) { + return false; + } + + port = remainder.slice(1); + } + + host = ''; + hasHost = true; + } else { + const firstColon = hostPort.indexOf(':'); + const lastColon = hostPort.lastIndexOf(':'); + + if (firstColon !== lastColon) { + return false; + } + + if (lastColon !== -1) { + host = hostPort.slice(0, lastColon); + port = hostPort.slice(lastColon + 1); + } + + if (host) { + hasHost = true; + + if (!isIP(host, 4) && !REG_NAME_REGEX.test(host)) { + return false; + } + } + } + + if (!hasHost) { + return false; + } + + if (port !== null) { + if (port === '' || !/^[0-9]+$/.test(port)) { + return false; + } + + const portNumber = parseInt(port, 10); + + if (Number.isNaN(portNumber) || portNumber > 65535) { + return false; + } + } + + return true; +} + +function isValidPath(path, { hasAuthority, hasScheme }) { + if (hasAuthority) { + return PATH_ABEMPTY_REGEX.test(path); + } + + if (hasScheme) { + if (path === '') { + return true; + } + + if (path.startsWith('/')) { + return PATH_ABSOLUTE_REGEX.test(path); + } + + return PATH_ROOTLESS_REGEX.test(path); + } + + if (path === '') { + return true; + } + + if (path.startsWith('/')) { + return PATH_ABSOLUTE_REGEX.test(path); + } + + return PATH_NOSCHEME_REGEX.test(path); +} + +function isValidQueryOrFragment(value) { + return value === '' || QUERY_FRAGMENT_REGEX.test(value); +} + +function isValidUriReference(value) { + let rest = value; + let scheme = null; + let hadScheme = false; + + const colonIndex = rest.indexOf(':'); + + if (colonIndex > 0) { + const potentialScheme = rest.slice(0, colonIndex); + + if (SCHEME_REGEX.test(potentialScheme)) { + scheme = potentialScheme; + hadScheme = true; + rest = rest.slice(colonIndex + 1); + } + } + + let fragment = ''; + const hashIndex = rest.indexOf('#'); + + if (hashIndex !== -1) { + fragment = rest.slice(hashIndex + 1); + rest = rest.slice(0, hashIndex); + + if (!isValidQueryOrFragment(fragment)) { + return false; + } + } + + let query = ''; + const questionIndex = rest.indexOf('?'); + + if (questionIndex !== -1) { + query = rest.slice(questionIndex + 1); + rest = rest.slice(0, questionIndex); + + if (!isValidQueryOrFragment(query)) { + return false; + } + } + + let hasAuthority = false; + let authority = ''; + let path = rest; + + if (rest.startsWith('//')) { + hasAuthority = true; + rest = rest.slice(2); + const nextSlash = rest.indexOf('/'); + + if (nextSlash === -1) { + authority = rest; + path = ''; + } else { + authority = rest.slice(0, nextSlash); + path = rest.slice(nextSlash); + } + + const allowEmptyAuthority = Boolean(hadScheme && scheme && scheme.toLowerCase() === 'file'); + const authorityOptions = allowEmptyAuthority + ? { allowEmptyAuthority: true } + : undefined; + + if (!isValidAuthority(authority, authorityOptions)) { + return false; + } + } + + return isValidPath(path, { hasAuthority, hasScheme: hadScheme }); +} + +export default function isXsdAnyURI(input) { + assertString(input); + + let value = collapseXmlWhitespace(input); + + if (value === '') { + return true; + } + + if ( + containsForbiddenControl(value) || + hasInvalidPercentEncoding(value) || + BACKSLASH_REGEX.test(value) || + DISALLOWED_ASCII_REGEX.test(value) + ) { + return false; + } + + let encoded; + + try { + const bracketSafeValue = value + .replace(/\[/g, OPEN_BRACKET_PLACEHOLDER) + .replace(/\]/g, CLOSE_BRACKET_PLACEHOLDER); + + const encodedWithPlaceholders = encodeURI(bracketSafeValue); + + encoded = encodedWithPlaceholders + .split(OPEN_BRACKET_PLACEHOLDER) + .join('[') + .split(CLOSE_BRACKET_PLACEHOLDER) + .join(']'); + } catch (err) { + return false; + } + + return isValidUriReference(encoded); +} diff --git a/test/validators.test.js b/test/validators.test.js index 7eef901b8..9a6e76a5f 100644 --- a/test/validators.test.js +++ b/test/validators.test.js @@ -1017,6 +1017,69 @@ describe('Validators', () => { }); }); + it('should validate XML Schema AnyURI values', () => { + test({ + validator: 'isXsdAnyURI', + valid: [ + 'http://example.com', + 'https://example.com:8080/path?query=1#frag', + 'mailto:user@example.com', + 'urn:isbn:0451450523', + 'data:text/plain;charset=utf-8,Hello%20World', + '../relative/path', + '/absolute/path', + '//cdn.example.com/libs.js', + '#fragment-only', + '?queryOnly=true', + 'file:///C:/Program%20Files/MyApp/app.exe', + 'http://[2001:db8::1]:443/path', + 'http://[v7.fe80::abcd]/resource', + 'https://user:pa%20ss@example.com:8443/resource', + ' https://example.com/with-space ', + ' \t\nhttps://example.com/resource\r\n', + 'foo%20bar/baz', + 'tel:+123456789', + 'foo:', + 'foo:/bar', + 'file:///var/log', + 'http://[2001:db8::1]:1234', + '', + 'file:///', + '//example.com/path#frag', + ], + invalid: [ + 'http://example.com:99999', + 'http://example.com:port', + 'http://example.com:-1', + 'http://[::1', + 'http://example.com#frag#extra', + 'foo%zz', + 'foo%2', + 'http://user@:8080', + 'http://user[info@example.com', + '\\server\\share', + 'http://example.com/pa|th', + 'http://example.com/path\u0006', + '//:8080/path', + 'http:///path', + 'file://user@', + 'http://example.com/%', + 'foo#frag%2', + 'http://example.com/%ZZ', + 'http://example.com/?q=abc^123', + 'http://example.com?foo[bar', + 'foo://?query', + 'foo%2/bar', + 'http://[::g]/path', + 'http://[::1]foo', + 'http://host:80:123/path', + 'http://exa[mple.com', + 'http://example.com/\ud800', + 'foo { test({ validator: 'isMACAddress', From 2e14b320b3c6561a86b89950cadd3ab387a11a38 Mon Sep 17 00:00:00 2001 From: MazenSamehR <103901861+MazenSamehR@users.noreply.github.com> Date: Thu, 4 Dec 2025 21:56:47 +0200 Subject: [PATCH 2/2] Update src/lib/isXsdAnyURI.js Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/lib/isXsdAnyURI.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib/isXsdAnyURI.js b/src/lib/isXsdAnyURI.js index dbde7c9a4..a03e34101 100644 --- a/src/lib/isXsdAnyURI.js +++ b/src/lib/isXsdAnyURI.js @@ -115,7 +115,6 @@ function isValidAuthority(authority, options) { port = remainder.slice(1); } - host = ''; hasHost = true; } else { const firstColon = hostPort.indexOf(':');