Skip to content

Commit 6965600

Browse files
committed
TRegex: add support for UTF-16BE and UTF-32BE.
1 parent 5dbb9dc commit 6965600

File tree

88 files changed

+772
-1812
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+772
-1812
lines changed

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/parser/JsFlagsTest.java renamed to regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/flavor/js/JsFlagsTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3939
* SOFTWARE.
4040
*/
41-
package com.oracle.truffle.regex.tregex.parser;
41+
package com.oracle.truffle.regex.flavor.js;
4242

4343
import static org.junit.Assert.assertFalse;
4444
import static org.junit.Assert.assertTrue;

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/parser/flavors/PythonFlagsTest.java renamed to regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/flavor/python/PythonFlagsTest.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,12 @@
3838
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3939
* SOFTWARE.
4040
*/
41-
package com.oracle.truffle.regex.tregex.parser.flavors;
41+
package com.oracle.truffle.regex.flavor.python;
4242

4343
import static org.junit.Assert.assertTrue;
4444

4545
import org.junit.Test;
4646

47-
import com.oracle.truffle.regex.flavor.python.PythonFlags;
48-
4947
public class PythonFlagsTest {
5048

5149
private static PythonFlags parse(String flags) {

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/StringTest.java

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,11 @@
4242

4343
import static org.junit.Assert.assertEquals;
4444

45-
import java.util.PrimitiveIterator;
46-
4745
import org.junit.Test;
4846

47+
import com.oracle.truffle.api.strings.TruffleStringIterator;
48+
import com.oracle.truffle.regex.tregex.string.AbstractStringBuffer;
49+
import com.oracle.truffle.regex.tregex.string.Encoding;
4950
import com.oracle.truffle.regex.tregex.string.StringBufferUTF16;
5051
import com.oracle.truffle.regex.tregex.string.StringBufferUTF32;
5152
import com.oracle.truffle.regex.tregex.string.StringBufferUTF8;
@@ -54,29 +55,30 @@ public class StringTest {
5455

5556
@Test
5657
public void testEncodings() {
57-
testEncodingsRange(Character.MIN_CODE_POINT, Character.MAX_HIGH_SURROGATE);
58-
testEncodingsRange(Character.MIN_LOW_SURROGATE, Character.MAX_CODE_POINT);
58+
testEncodingsRange(Character.MIN_CODE_POINT, Character.MIN_HIGH_SURROGATE - 1);
59+
testEncodingsRange(Character.MAX_LOW_SURROGATE + 1, Character.MAX_CODE_POINT);
5960
}
6061

6162
static void testEncodingsRange(int lo, int hi) {
62-
StringBufferUTF8 sb8 = new StringBufferUTF8();
63-
StringBufferUTF16 sb16 = new StringBufferUTF16();
64-
StringBufferUTF32 sb32 = new StringBufferUTF32();
63+
AbstractStringBuffer[] sbs = {
64+
new StringBufferUTF8(hi - lo),
65+
new StringBufferUTF16(hi - lo, Encoding.UTF_16),
66+
new StringBufferUTF16(hi - lo, Encoding.UTF_16FE),
67+
new StringBufferUTF32(hi - lo, Encoding.UTF_32),
68+
new StringBufferUTF32(hi - lo, Encoding.UTF_32FE)
69+
};
6570

66-
for (int i = lo; i <= hi; i++) {
67-
sb8.append(i);
68-
sb16.append(i);
69-
sb32.append(i);
71+
for (AbstractStringBuffer sb : sbs) {
72+
for (int i = lo; i <= hi; i++) {
73+
sb.append(i);
74+
}
7075
}
7176

72-
PrimitiveIterator.OfInt it8 = sb8.materialize().iterator();
73-
PrimitiveIterator.OfInt it16 = sb16.materialize().iterator();
74-
PrimitiveIterator.OfInt it32 = sb32.materialize().iterator();
75-
76-
for (int i = lo; i <= hi; i++) {
77-
assertEquals(i, it8.nextInt());
78-
assertEquals(i, it16.nextInt());
79-
assertEquals(i, it32.nextInt());
77+
for (AbstractStringBuffer sb : sbs) {
78+
TruffleStringIterator it = sb.asTString().createCodePointIteratorUncached(sb.getEncoding().getTStringEncoding());
79+
for (int i = lo; i <= hi; i++) {
80+
assertEquals(i, it.nextUncached(sb.getEncoding().getTStringEncoding()));
81+
}
8082
}
8183
}
8284
}

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/matchers/MatcherBuilderTest.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
import com.oracle.truffle.regex.charset.CodePointSetAccumulator;
5050
import com.oracle.truffle.regex.charset.Range;
5151
import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer;
52-
import com.oracle.truffle.regex.tregex.string.Encodings;
52+
import com.oracle.truffle.regex.tregex.string.Encoding;
5353

5454
public class MatcherBuilderTest {
5555

@@ -111,11 +111,11 @@ private static void checkContains(CodePointSet a, CodePointSet b, boolean expect
111111
}
112112

113113
private static void checkInverse(CodePointSet a, int... values) {
114-
checkMatch("inverse(" + a + ")", a.createInverse(Encodings.UTF_16), values);
114+
checkMatch("inverse(" + a + ")", a.createInverse(Encoding.UTF_16), values);
115115
}
116116

117117
private static void checkIntersection(CodePointSet a, CodePointSet b, int... values) {
118-
CompilationBuffer compilationBuffer = new CompilationBuffer(Encodings.UTF_16);
118+
CompilationBuffer compilationBuffer = new CompilationBuffer(Encoding.UTF_16);
119119
CodePointSet intersection = a.createIntersection(b, compilationBuffer);
120120
checkMatch("intersection(" + a + "," + b + ")", intersection, values);
121121
assertTrue("intersection(" + a + "," + b + ")", a.intersects(b) == intersection.matchesSomething());
@@ -126,11 +126,11 @@ private static void checkIntersection(CodePointSet a, CodePointSet b, int... val
126126
}
127127

128128
private static void checkSubtraction(CodePointSet a, CodePointSet b, int... values) {
129-
checkMatch("subtraction(" + a + "," + b + ")", a.subtract(b, new CompilationBuffer(Encodings.UTF_16)), values);
129+
checkMatch("subtraction(" + a + "," + b + ")", a.subtract(b, new CompilationBuffer(Encoding.UTF_16)), values);
130130
}
131131

132132
private static void checkUnion(CodePointSet a, CodePointSet b, int... values) {
133-
checkMatch("union(" + a + "," + b + ")", a.union(b, new CompilationBuffer(Encodings.UTF_16)), values);
133+
checkMatch("union(" + a + "," + b + ")", a.union(b, new CompilationBuffer(Encoding.UTF_16)), values);
134134
}
135135

136136
@Test

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/InputStringGeneratorTests.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
import org.junit.Test;
5252

5353
import com.oracle.truffle.api.CompilerDirectives;
54-
import com.oracle.truffle.regex.tregex.string.Encodings;
54+
import com.oracle.truffle.regex.tregex.string.Encoding;
5555

5656
public class InputStringGeneratorTests extends RegexTestBase {
5757

@@ -63,8 +63,8 @@ Map<String, String> getEngineOptions() {
6363
}
6464

6565
@Override
66-
Encodings.Encoding getTRegexEncoding() {
67-
return Encodings.UTF_16_RAW;
66+
Encoding getTRegexEncoding() {
67+
return Encoding.UTF_16_RAW;
6868
}
6969

7070
@Test
@@ -87,7 +87,7 @@ void testInputStringGenerator(String pattern) {
8787
testInputStringGenerator(pattern, "", getEngineOptions(), getTRegexEncoding(), rng.nextLong());
8888
}
8989

90-
private void testInputStringGenerator(String pattern, String flags, Map<String, String> options, Encodings.Encoding encoding, long rngSeed) {
90+
private void testInputStringGenerator(String pattern, String flags, Map<String, String> options, Encoding encoding, long rngSeed) {
9191
Value compiledRegex = compileRegex(pattern, flags);
9292
Value generator = getGenerator(pattern, flags, options, encoding);
9393
for (int i = 0; i < 20; i++) {
@@ -102,7 +102,7 @@ private void testInputStringGenerator(String pattern, String flags, Map<String,
102102
}
103103
}
104104

105-
private Value getGenerator(String pattern, String flags, Map<String, String> options, Encodings.Encoding encoding) {
105+
private Value getGenerator(String pattern, String flags, Map<String, String> options, Encoding encoding) {
106106
Source.Builder builder = sourceBuilder(pattern, flags, options(options), encoding).option("regexDummyLang.GenerateInput", "true");
107107
try {
108108
return context.parse(builder.build());

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/JavaUtilPatternTests.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
import com.oracle.truffle.regex.charset.Range;
6161
import com.oracle.truffle.regex.flavor.java.JavaFlags;
6262
import com.oracle.truffle.regex.tregex.parser.CaseFoldData;
63-
import com.oracle.truffle.regex.tregex.string.Encodings;
63+
import com.oracle.truffle.regex.tregex.string.Encoding;
6464
import com.oracle.truffle.regex.tregex.test.generated.JavaGeneratedTests;
6565
import com.oracle.truffle.regex.util.EmptyArrays;
6666

@@ -74,8 +74,8 @@ Map<String, String> getEngineOptions() {
7474
}
7575

7676
@Override
77-
Encodings.Encoding getTRegexEncoding() {
78-
return Encodings.UTF_16;
77+
Encoding getTRegexEncoding() {
78+
return Encoding.UTF_16;
7979
}
8080

8181
@Test

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/JsTests.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949

5050
import com.oracle.truffle.regex.errors.JsErrorMessages;
5151
import com.oracle.truffle.regex.tregex.TRegexOptions;
52-
import com.oracle.truffle.regex.tregex.string.Encodings;
52+
import com.oracle.truffle.regex.tregex.string.Encoding;
5353
import com.oracle.truffle.regex.tregex.test.generated.JsGeneratedTests;
5454

5555
public class JsTests extends RegexTestBase {
@@ -63,8 +63,8 @@ Map<String, String> getEngineOptions() {
6363
}
6464

6565
@Override
66-
Encodings.Encoding getTRegexEncoding() {
67-
return Encodings.UTF_16_RAW;
66+
Encoding getTRegexEncoding() {
67+
return Encoding.UTF_16_RAW;
6868
}
6969

7070
@Test
@@ -443,8 +443,8 @@ public void overlappingBq() {
443443

444444
@Test
445445
public void simpleCGUtf8() {
446-
test("^block($|(?=__|_))", "", Encodings.UTF_8, "block_baz", 0, true, 0, 5, 5, 5);
447-
test("^foo($|(?=__|_))", "", Encodings.UTF_8, "foo", 0, true, 0, 3, 3, 3);
446+
test("^block($|(?=__|_))", "", Encoding.UTF_8, "block_baz", 0, true, 0, 5, 5, 5);
447+
test("^foo($|(?=__|_))", "", Encoding.UTF_8, "foo", 0, true, 0, 3, 3, 3);
448448
}
449449

450450
@Test

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545
import org.junit.Test;
4646

47-
import com.oracle.truffle.regex.tregex.string.Encodings;
47+
import com.oracle.truffle.regex.tregex.string.Encoding;
4848
import com.oracle.truffle.regex.tregex.test.generated.OracleDBGeneratedTests;
4949

5050
public class OracleDBTests extends RegexTestBase {
@@ -57,8 +57,8 @@ Map<String, String> getEngineOptions() {
5757
}
5858

5959
@Override
60-
Encodings.Encoding getTRegexEncoding() {
61-
return Encodings.UTF_8;
60+
Encoding getTRegexEncoding() {
61+
return Encoding.UTF_8;
6262
}
6363

6464
@Test
@@ -90,10 +90,10 @@ public void orcl38190286() {
9090
test("[[:alpha:]]", "", "\uD839", 0, false);
9191
test("[[:alpha:]]", "", "\uDDF2", 0, false);
9292
test("[[:alpha:]]", "", "\uD839\uDDF2", 0, false);
93-
test("[[:alpha:]]", "", Encodings.UTF_16, "\ufffd", 0, true, 0, 1);
94-
test("[[:alpha:]]", "", Encodings.UTF_16, "\uD839", 0, false);
95-
test("[[:alpha:]]", "", Encodings.UTF_16, "\uDDF2", 0, false);
96-
test("[[:alpha:]]", "", Encodings.UTF_16, "\uD839\uDDF2", 0, false);
93+
test("[[:alpha:]]", "", Encoding.UTF_16, "\ufffd", 0, true, 0, 1);
94+
test("[[:alpha:]]", "", Encoding.UTF_16, "\uD839", 0, false);
95+
test("[[:alpha:]]", "", Encoding.UTF_16, "\uDDF2", 0, false);
96+
test("[[:alpha:]]", "", Encoding.UTF_16, "\uD839\uDDF2", 0, false);
9797
}
9898

9999
@Test

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonByteTests.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545
import org.junit.Test;
4646

47-
import com.oracle.truffle.regex.tregex.string.Encodings;
47+
import com.oracle.truffle.regex.tregex.string.Encoding;
4848

4949
public class PythonByteTests extends RegexTestBase {
5050

@@ -59,8 +59,8 @@ Map<String, String> getEngineOptions() {
5959
}
6060

6161
@Override
62-
Encodings.Encoding getTRegexEncoding() {
63-
return Encodings.LATIN_1;
62+
Encoding getTRegexEncoding() {
63+
return Encoding.LATIN_1;
6464
}
6565

6666
@Test

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/PythonTests.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949

5050
import com.oracle.truffle.regex.flavor.python.PyErrorMessages;
5151
import com.oracle.truffle.regex.tregex.TRegexOptions;
52-
import com.oracle.truffle.regex.tregex.string.Encodings;
52+
import com.oracle.truffle.regex.tregex.string.Encoding;
5353
import com.oracle.truffle.regex.tregex.test.generated.PythonGeneratedTests;
5454

5555
public class PythonTests extends RegexTestBase {
@@ -63,8 +63,8 @@ Map<String, String> getEngineOptions() {
6363
}
6464

6565
@Override
66-
Encodings.Encoding getTRegexEncoding() {
67-
return Encodings.UTF_32;
66+
Encoding getTRegexEncoding() {
67+
return Encoding.UTF_32;
6868
}
6969

7070
@Test
@@ -392,7 +392,7 @@ public void testPythonFlagChecks() {
392392
expectSyntaxError("(?a)(?u)", "", "ASCII and UNICODE flags are incompatible");
393393

394394
expectSyntaxError("", "L", "cannot use LOCALE flag with a str pattern");
395-
expectSyntaxError("", "u", Encodings.LATIN_1, "cannot use UNICODE flag with a bytes pattern", Integer.MIN_VALUE);
395+
expectSyntaxError("", "u", Encoding.LATIN_1, "cannot use UNICODE flag with a bytes pattern", Integer.MIN_VALUE);
396396

397397
Assert.assertTrue("expected str pattern to default to UNICODE flag",
398398
compileRegex("", "").getMember("flags").getMember("UNICODE").asBoolean());

0 commit comments

Comments
 (0)