diff --git a/README.md b/README.md
index 37fbd5e3d..f783aa622 100644
--- a/README.md
+++ b/README.md
@@ -168,6 +168,7 @@ Validator | Description
**isTime(str [, options])** | check if the string is a valid time e.g. [`23:01:59`, new Date().toLocaleTimeString()].
`options` is an object which can contain the keys `hourFormat` or `mode`.
`hourFormat` is a key and defaults to `'hour24'`.
`mode` is a key and defaults to `'default'`.
`hourFormat` can contain the values `'hour12'` or `'hour24'`, `'hour24'` will validate hours in 24 format and `'hour12'` will validate hours in 12 format.
`mode` can contain the values `'default', 'withSeconds', withOptionalSeconds`, `'default'` will validate `HH:MM` format, `'withSeconds'` will validate the `HH:MM:SS` format, `'withOptionalSeconds'` will validate `'HH:MM'` and `'HH:MM:SS'` formats.
**isTaxID(str, locale)** | check if the string is a valid Tax Identification Number. Default locale is `en-US`.
More info about exact TIN support can be found in `src/lib/isTaxID.js`.
Supported locales: `[ 'bg-BG', 'cs-CZ', 'de-AT', 'de-DE', 'dk-DK', 'el-CY', 'el-GR', 'en-CA', 'en-GB', 'en-IE', 'en-US', 'es-AR', 'es-ES', 'et-EE', 'fi-FI', 'fr-BE', 'fr-CA', 'fr-FR', 'fr-LU', 'hr-HR', 'hu-HU', 'it-IT', 'lb-LU', 'lt-LT', 'lv-LV', 'mt-MT', 'nl-BE', 'nl-NL', 'pl-PL', 'pt-BR', 'pt-PT', 'ro-RO', 'sk-SK', 'sl-SI', 'sv-SE', 'uk-UA']`.
**isURL(str [, options])** | check if the string is a URL.
`options` is an object which defaults to `{ protocols: ['http','https','ftp'], require_tld: true, require_protocol: false, require_host: true, require_port: false, require_valid_protocol: true, allow_underscores: false, host_whitelist: false, host_blacklist: false, allow_trailing_dot: false, allow_protocol_relative_urls: false, allow_fragments: true, allow_query_components: true, disallow_auth: false, validate_length: true }`.
`protocols` - valid protocols can be modified with this option.
`require_tld` - If set to false isURL will not check if the URL's host includes a top-level domain.
`require_protocol` - **RECOMMENDED** if set to true isURL will return false if protocol is not present in the URL. Without this setting, some malicious URLs cannot be distinguishable from a valid URL with authentication information.
`require_host` - if set to false isURL will not check if host is present in the URL.
`require_port` - if set to true isURL will check if port is present in the URL.
`require_valid_protocol` - isURL will check if the URL's protocol is present in the protocols option.
`allow_underscores` - if set to true, the validator will allow underscores in the URL.
`host_whitelist` - if set to an array of strings or regexp, and the domain matches none of the strings defined in it, the validation fails.
`host_blacklist` - if set to an array of strings or regexp, and the domain matches any of the strings defined in it, the validation fails.
`allow_trailing_dot` - if set to true, the validator will allow the domain to end with a `.` character.
`allow_protocol_relative_urls` - if set to true protocol relative URLs will be allowed.
`allow_fragments` - if set to false isURL will return false if fragments are present.
`allow_query_components` - if set to false isURL will return false if query components are present.
`disallow_auth` - if set to true, the validator will fail if the URL contains an authentication component, e.g. `http://username:password@example.com`.
`validate_length` - if set to false isURL will skip string length validation. `max_allowed_length` will be ignored if this is set as `false`.
`max_allowed_length` - if set, isURL will not allow URLs longer than the specified value (default is 2084 that IE maximum URL length).
+**isXsdAnyURI(str)** | check if the string conforms to the [XML Schema `anyURI` type](https://www.w3.org/TR/xmlschema-2/#anyURI). Leading/trailing XML whitespace is collapsed before validation and any non-ASCII characters are percent-encoded via `encodeURI` semantics before the RFC 3986 rules are applied. Both absolute and relative references (including query-only or fragment-only references) are supported.
**isULID(str)** | check if the string is a [ULID](https://github.com/ulid/spec).
**isUUID(str [, version])** | check if the string is an RFC9562 UUID.
`version` is one of `'1'`-`'8'`, `'nil'`, `'max'`, `'all'` or `'loose'`. The `'loose'` option checks if the string is a UUID-like string with hexadecimal values, ignoring RFC9565.
**isVariableWidth(str)** | check if the string contains a mixture of full and half-width chars.
diff --git a/src/index.js b/src/index.js
index ccbcc4188..0fb21d77f 100644
--- a/src/index.js
+++ b/src/index.js
@@ -8,6 +8,7 @@ import matches from './lib/matches';
import isEmail from './lib/isEmail';
import isURL from './lib/isURL';
+import isXsdAnyURI from './lib/isXsdAnyURI';
import isMACAddress from './lib/isMACAddress';
import isIP from './lib/isIP';
import isIPRange from './lib/isIPRange';
@@ -143,6 +144,7 @@ const validator = {
matches,
isEmail,
isURL,
+ isXsdAnyURI,
isMACAddress,
isIP,
isIPRange,
diff --git a/src/lib/isXsdAnyURI.js b/src/lib/isXsdAnyURI.js
new file mode 100644
index 000000000..a03e34101
--- /dev/null
+++ b/src/lib/isXsdAnyURI.js
@@ -0,0 +1,300 @@
+import assertString from './util/assertString';
+import isIP from './isIP';
+
+const MULTIPLE_SPACES_REGEX = / {2,}/g;
+const INVALID_PERCENT_REGEX = /%(?![0-9A-Fa-f]{2})/;
+const SCHEME_REGEX = /^[A-Za-z][A-Za-z0-9+.-]*$/;
+const BACKSLASH_REGEX = /\\/;
+const DISALLOWED_ASCII_REGEX = /["<>^`{}|]/;
+const OPEN_BRACKET_PLACEHOLDER = '__VALIDATOR_OPEN_BRACKET__';
+const CLOSE_BRACKET_PLACEHOLDER = '__VALIDATOR_CLOSE_BRACKET__';
+
+const HEX_DIGIT = '[0-9A-Fa-f]';
+const PCT_ENCODED = `%${HEX_DIGIT}{2}`;
+const UNRESERVED = 'A-Za-z0-9\\-._~';
+const SUB_DELIMS = "!$&'()*+,;=";
+const PCHAR = `(?:[${UNRESERVED}]|${PCT_ENCODED}|[${SUB_DELIMS}:@])`;
+const SEGMENT = `(?:${PCHAR})*`;
+const SEGMENT_NZ = `(?:${PCHAR})+`;
+const SEGMENT_NZ_NC = `(?:${PCT_ENCODED}|[${UNRESERVED}${SUB_DELIMS}@])+`;
+
+const PATH_ABEMPTY_REGEX = new RegExp(`^(?:/${SEGMENT})*$`);
+const PATH_ABSOLUTE_REGEX = new RegExp(`^/(?:${SEGMENT_NZ}(?:/${SEGMENT})*)?$`);
+const PATH_ROOTLESS_REGEX = new RegExp(`^${SEGMENT_NZ}(?:/${SEGMENT})*$`);
+const PATH_NOSCHEME_REGEX = new RegExp(`^(?:${SEGMENT_NZ_NC})(?:/${SEGMENT})*$`);
+const QUERY_FRAGMENT_REGEX = new RegExp(`^(?:${PCHAR}|[/?])*$`);
+const USERINFO_REGEX = new RegExp(`^(?:${PCT_ENCODED}|[${UNRESERVED}${SUB_DELIMS}:])*$`);
+const REG_NAME_REGEX = new RegExp(`^(?:${PCT_ENCODED}|[${UNRESERVED}${SUB_DELIMS}])*$`);
+const IPV_FUTURE_REGEX = /^v[0-9A-F]+\.[A-Za-z0-9._~!$&'()*+,;=:-]+$/i;
+
+function collapseXmlWhitespace(input) {
+ let normalized = '';
+
+ for (let i = 0; i < input.length; i += 1) {
+ const code = input.charCodeAt(i);
+
+ if (code === 0x09 || code === 0x0a || code === 0x0d) {
+ normalized += ' ';
+ } else {
+ normalized += input[i];
+ }
+ }
+
+ return normalized.replace(MULTIPLE_SPACES_REGEX, ' ').trim();
+}
+
+function containsForbiddenControl(value) {
+ for (let i = 0; i < value.length; i += 1) {
+ const code = value.charCodeAt(i);
+
+ if (
+ (code >= 0x00 && code <= 0x08) ||
+ code === 0x0b ||
+ code === 0x0c ||
+ (code >= 0x0e && code <= 0x1f) ||
+ code === 0x7f
+ ) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+function hasInvalidPercentEncoding(input) {
+ return INVALID_PERCENT_REGEX.test(input);
+}
+
+function isIPvFuture(address) {
+ return IPV_FUTURE_REGEX.test(address);
+}
+
+function isValidAuthority(authority, options) {
+ const allowEmptyAuthority = Boolean(options && options.allowEmptyAuthority);
+ if (authority === '') {
+ return !!allowEmptyAuthority;
+ }
+
+ let hostPort = authority;
+ let userinfo = '';
+ const atIndex = authority.lastIndexOf('@');
+
+ if (atIndex !== -1) {
+ userinfo = authority.slice(0, atIndex);
+ hostPort = authority.slice(atIndex + 1);
+
+ if (!USERINFO_REGEX.test(userinfo)) {
+ return false;
+ }
+ }
+
+ let host = hostPort;
+ let port = null;
+ let hasHost = false;
+
+ if (hostPort.startsWith('[')) {
+ const closingIndex = hostPort.indexOf(']');
+
+ if (closingIndex === -1) {
+ return false;
+ }
+
+ const address = hostPort.slice(1, closingIndex);
+
+ if (!isIP(address, 6) && !isIPvFuture(address)) {
+ return false;
+ }
+
+ const remainder = hostPort.slice(closingIndex + 1);
+
+ if (remainder) {
+ if (!remainder.startsWith(':')) {
+ return false;
+ }
+
+ port = remainder.slice(1);
+ }
+
+ hasHost = true;
+ } else {
+ const firstColon = hostPort.indexOf(':');
+ const lastColon = hostPort.lastIndexOf(':');
+
+ if (firstColon !== lastColon) {
+ return false;
+ }
+
+ if (lastColon !== -1) {
+ host = hostPort.slice(0, lastColon);
+ port = hostPort.slice(lastColon + 1);
+ }
+
+ if (host) {
+ hasHost = true;
+
+ if (!isIP(host, 4) && !REG_NAME_REGEX.test(host)) {
+ return false;
+ }
+ }
+ }
+
+ if (!hasHost) {
+ return false;
+ }
+
+ if (port !== null) {
+ if (port === '' || !/^[0-9]+$/.test(port)) {
+ return false;
+ }
+
+ const portNumber = parseInt(port, 10);
+
+ if (Number.isNaN(portNumber) || portNumber > 65535) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+function isValidPath(path, { hasAuthority, hasScheme }) {
+ if (hasAuthority) {
+ return PATH_ABEMPTY_REGEX.test(path);
+ }
+
+ if (hasScheme) {
+ if (path === '') {
+ return true;
+ }
+
+ if (path.startsWith('/')) {
+ return PATH_ABSOLUTE_REGEX.test(path);
+ }
+
+ return PATH_ROOTLESS_REGEX.test(path);
+ }
+
+ if (path === '') {
+ return true;
+ }
+
+ if (path.startsWith('/')) {
+ return PATH_ABSOLUTE_REGEX.test(path);
+ }
+
+ return PATH_NOSCHEME_REGEX.test(path);
+}
+
+function isValidQueryOrFragment(value) {
+ return value === '' || QUERY_FRAGMENT_REGEX.test(value);
+}
+
+function isValidUriReference(value) {
+ let rest = value;
+ let scheme = null;
+ let hadScheme = false;
+
+ const colonIndex = rest.indexOf(':');
+
+ if (colonIndex > 0) {
+ const potentialScheme = rest.slice(0, colonIndex);
+
+ if (SCHEME_REGEX.test(potentialScheme)) {
+ scheme = potentialScheme;
+ hadScheme = true;
+ rest = rest.slice(colonIndex + 1);
+ }
+ }
+
+ let fragment = '';
+ const hashIndex = rest.indexOf('#');
+
+ if (hashIndex !== -1) {
+ fragment = rest.slice(hashIndex + 1);
+ rest = rest.slice(0, hashIndex);
+
+ if (!isValidQueryOrFragment(fragment)) {
+ return false;
+ }
+ }
+
+ let query = '';
+ const questionIndex = rest.indexOf('?');
+
+ if (questionIndex !== -1) {
+ query = rest.slice(questionIndex + 1);
+ rest = rest.slice(0, questionIndex);
+
+ if (!isValidQueryOrFragment(query)) {
+ return false;
+ }
+ }
+
+ let hasAuthority = false;
+ let authority = '';
+ let path = rest;
+
+ if (rest.startsWith('//')) {
+ hasAuthority = true;
+ rest = rest.slice(2);
+ const nextSlash = rest.indexOf('/');
+
+ if (nextSlash === -1) {
+ authority = rest;
+ path = '';
+ } else {
+ authority = rest.slice(0, nextSlash);
+ path = rest.slice(nextSlash);
+ }
+
+ const allowEmptyAuthority = Boolean(hadScheme && scheme && scheme.toLowerCase() === 'file');
+ const authorityOptions = allowEmptyAuthority
+ ? { allowEmptyAuthority: true }
+ : undefined;
+
+ if (!isValidAuthority(authority, authorityOptions)) {
+ return false;
+ }
+ }
+
+ return isValidPath(path, { hasAuthority, hasScheme: hadScheme });
+}
+
+export default function isXsdAnyURI(input) {
+ assertString(input);
+
+ let value = collapseXmlWhitespace(input);
+
+ if (value === '') {
+ return true;
+ }
+
+ if (
+ containsForbiddenControl(value) ||
+ hasInvalidPercentEncoding(value) ||
+ BACKSLASH_REGEX.test(value) ||
+ DISALLOWED_ASCII_REGEX.test(value)
+ ) {
+ return false;
+ }
+
+ let encoded;
+
+ try {
+ const bracketSafeValue = value
+ .replace(/\[/g, OPEN_BRACKET_PLACEHOLDER)
+ .replace(/\]/g, CLOSE_BRACKET_PLACEHOLDER);
+
+ const encodedWithPlaceholders = encodeURI(bracketSafeValue);
+
+ encoded = encodedWithPlaceholders
+ .split(OPEN_BRACKET_PLACEHOLDER)
+ .join('[')
+ .split(CLOSE_BRACKET_PLACEHOLDER)
+ .join(']');
+ } catch (err) {
+ return false;
+ }
+
+ return isValidUriReference(encoded);
+}
diff --git a/test/validators.test.js b/test/validators.test.js
index 7eef901b8..9a6e76a5f 100644
--- a/test/validators.test.js
+++ b/test/validators.test.js
@@ -1017,6 +1017,69 @@ describe('Validators', () => {
});
});
+ it('should validate XML Schema AnyURI values', () => {
+ test({
+ validator: 'isXsdAnyURI',
+ valid: [
+ 'http://example.com',
+ 'https://example.com:8080/path?query=1#frag',
+ 'mailto:user@example.com',
+ 'urn:isbn:0451450523',
+ 'data:text/plain;charset=utf-8,Hello%20World',
+ '../relative/path',
+ '/absolute/path',
+ '//cdn.example.com/libs.js',
+ '#fragment-only',
+ '?queryOnly=true',
+ 'file:///C:/Program%20Files/MyApp/app.exe',
+ 'http://[2001:db8::1]:443/path',
+ 'http://[v7.fe80::abcd]/resource',
+ 'https://user:pa%20ss@example.com:8443/resource',
+ ' https://example.com/with-space ',
+ ' \t\nhttps://example.com/resource\r\n',
+ 'foo%20bar/baz',
+ 'tel:+123456789',
+ 'foo:',
+ 'foo:/bar',
+ 'file:///var/log',
+ 'http://[2001:db8::1]:1234',
+ '',
+ 'file:///',
+ '//example.com/path#frag',
+ ],
+ invalid: [
+ 'http://example.com:99999',
+ 'http://example.com:port',
+ 'http://example.com:-1',
+ 'http://[::1',
+ 'http://example.com#frag#extra',
+ 'foo%zz',
+ 'foo%2',
+ 'http://user@:8080',
+ 'http://user[info@example.com',
+ '\\server\\share',
+ 'http://example.com/pa|th',
+ 'http://example.com/path\u0006',
+ '//:8080/path',
+ 'http:///path',
+ 'file://user@',
+ 'http://example.com/%',
+ 'foo#frag%2',
+ 'http://example.com/%ZZ',
+ 'http://example.com/?q=abc^123',
+ 'http://example.com?foo[bar',
+ 'foo://?query',
+ 'foo%2/bar',
+ 'http://[::g]/path',
+ 'http://[::1]foo',
+ 'http://host:80:123/path',
+ 'http://exa[mple.com',
+ 'http://example.com/\ud800',
+ 'foo {
test({
validator: 'isMACAddress',