Skip to content

Commit d0df4ea

Browse files
Add RFC 7639 canonical percent codec for ALPN protocol identifiers. (#596)
Preserve token tchar octets (except '%') and enforce uppercase hex.
1 parent 86d9214 commit d0df4ea

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
import java.nio.charset.StandardCharsets;
3434
import java.util.BitSet;
3535

36+
import org.apache.hc.core5.annotation.Internal;
37+
3638
/**
3739
* Percent-encoding.
3840
*
@@ -113,6 +115,36 @@ public class PercentCodec {
113115
RFC5987_UNRESERVED.set('~');
114116
}
115117

118+
static final BitSet HTTP_TOKEN_UNRESERVED = new BitSet(256);
119+
120+
static {
121+
// HTTP token characters (tchar) minus '%' (percent-encoded per RFC 7639 canonical form)
122+
for (int i = 'a'; i <= 'z'; i++) {
123+
HTTP_TOKEN_UNRESERVED.set(i);
124+
}
125+
for (int i = 'A'; i <= 'Z'; i++) {
126+
HTTP_TOKEN_UNRESERVED.set(i);
127+
}
128+
for (int i = '0'; i <= '9'; i++) {
129+
HTTP_TOKEN_UNRESERVED.set(i);
130+
}
131+
132+
HTTP_TOKEN_UNRESERVED.set('!');
133+
HTTP_TOKEN_UNRESERVED.set('#');
134+
HTTP_TOKEN_UNRESERVED.set('$');
135+
HTTP_TOKEN_UNRESERVED.set('&');
136+
HTTP_TOKEN_UNRESERVED.set('\'');
137+
HTTP_TOKEN_UNRESERVED.set('*');
138+
HTTP_TOKEN_UNRESERVED.set('+');
139+
HTTP_TOKEN_UNRESERVED.set('-');
140+
HTTP_TOKEN_UNRESERVED.set('.');
141+
HTTP_TOKEN_UNRESERVED.set('^');
142+
HTTP_TOKEN_UNRESERVED.set('_');
143+
HTTP_TOKEN_UNRESERVED.set('`');
144+
HTTP_TOKEN_UNRESERVED.set('|');
145+
HTTP_TOKEN_UNRESERVED.set('~');
146+
}
147+
116148
static final BitSet PCHAR = new BitSet(256);
117149
static final BitSet USERINFO = new BitSet(256);
118150
static final BitSet REG_NAME = new BitSet(256);
@@ -217,10 +249,12 @@ public static String decode(final CharSequence content, final Charset charset) {
217249

218250
public static final PercentCodec RFC3986 = new PercentCodec(UNRESERVED);
219251
public static final PercentCodec RFC5987 = new PercentCodec(RFC5987_UNRESERVED);
252+
public static final PercentCodec HTTP_TOKEN = new PercentCodec(HTTP_TOKEN_UNRESERVED);
220253

221254
private final BitSet unreserved;
222255

223-
private PercentCodec(final BitSet unreserved) {
256+
@Internal
257+
public PercentCodec(final BitSet unreserved) {
224258
this.unreserved = unreserved;
225259
}
226260

httpcore5/src/test/java/org/apache/hc/core5/net/TestPercentCodec.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,30 @@ void verifyRfc5987EncodingandDecoding() {
9898
assertThat(PercentCodec.RFC5987.decode(PercentCodec.RFC5987.encode(s)), CoreMatchers.equalTo(s));
9999
}
100100

101+
@Test
102+
void testRfc7639CanonicalAlpnTokenEncoding() {
103+
// RFC 7639 requires protocol-id to be a token and applies additional canonical constraints:
104+
// - Octets not allowed in tokens MUST be percent-encoded (RFC 3986).
105+
// - '%' MUST be percent-encoded.
106+
// - Octets that are valid token characters MUST NOT be percent-encoded (except '%').
107+
// - Uppercase hex digits MUST be used.
108+
assertEquals("h2", PercentCodec.HTTP_TOKEN.encode("h2"));
109+
assertEquals("http%2F1.1", PercentCodec.HTTP_TOKEN.encode("http/1.1"));
110+
assertEquals("%25", PercentCodec.HTTP_TOKEN.encode("%"));
111+
assertEquals("foo+bar", PercentCodec.HTTP_TOKEN.encode("foo+bar"));
112+
assertEquals("!#$&'*+-.^_`|~", PercentCodec.HTTP_TOKEN.encode("!#$&'*+-.^_`|~"));
113+
assertEquals("foo bar", PercentCodec.HTTP_TOKEN.decode("foo%20bar"));
114+
assertEquals("ws/é", PercentCodec.HTTP_TOKEN.decode("ws%2F%C3%A9"));
115+
}
116+
117+
@Test
118+
void testPercentCodecEncodeIsNotRfc7639Canonical() {
119+
// PercentCodec.encode(..) uses RFC 3986 UNRESERVED as the safe set.
120+
// This percent-encodes valid RFC 7230 tchar like '+', '*', '!', '|', which RFC 7639 forbids.
121+
assertEquals("foo%2Bbar", PercentCodec.encode("foo+bar", StandardCharsets.UTF_8));
122+
assertEquals("%2A", PercentCodec.encode("*", StandardCharsets.UTF_8));
123+
assertEquals("%21", PercentCodec.encode("!", StandardCharsets.UTF_8));
124+
assertEquals("%7C", PercentCodec.encode("|", StandardCharsets.UTF_8));
125+
}
126+
101127
}

0 commit comments

Comments
 (0)