Skip to content

Commit 3f2cc32

Browse files
committed
AVRO-4069: Remove Reader String Cache from Generic Datum Reader
1 parent 8040078 commit 3f2cc32

File tree

3 files changed

+25
-124
lines changed

3 files changed

+25
-124
lines changed

lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ public enum StringType {
9595
private final ClassLoader classLoader;
9696

9797
/**
98-
* Set the Java type to be used when reading this schema. Meaningful only only
98+
* Set the Java type to be used when reading this schema. Meaningful only for
9999
* string schemas and map schemas (for the keys).
100100
*/
101101
public static void setStringType(Schema s, StringType stringType) {

lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java

Lines changed: 23 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323
import java.util.Collection;
2424
import java.util.HashMap;
2525
import java.util.Map;
26-
import java.util.concurrent.ConcurrentHashMap;
27-
import java.util.function.Function;
2826

2927
import org.apache.avro.AvroRuntimeException;
3028
import org.apache.avro.Conversion;
@@ -46,8 +44,8 @@ public class GenericDatumReader<D> implements DatumReader<D> {
4644
private Schema actual;
4745
private Schema expected;
4846
private DatumReader<D> fastDatumReader = null;
49-
5047
private ResolvingDecoder creatorResolver = null;
48+
private final Map<Class, Constructor> stringCtorCache;
5149
private final Thread creator;
5250

5351
public GenericDatumReader() {
@@ -73,6 +71,7 @@ public GenericDatumReader(Schema writer, Schema reader, GenericData data) {
7371
protected GenericDatumReader(GenericData data) {
7472
this.data = data;
7573
this.creator = Thread.currentThread();
74+
this.stringCtorCache = new HashMap<>();
7675
}
7776

7877
/** Return the {@link GenericData} implementation. */
@@ -452,13 +451,15 @@ protected Object newMap(Object old, int size) {
452451
* representation. By default, this calls {@link #readString(Object,Decoder)}.
453452
*/
454453
protected Object readString(Object old, Schema expected, Decoder in) throws IOException {
455-
Class stringClass = this.getReaderCache().getStringClass(expected);
456-
if (stringClass == String.class) {
457-
return in.readString();
458-
}
454+
Class stringClass = this.findStringClass(expected);
455+
456+
// Default is CharSequence / UTF8 so check it first
459457
if (stringClass == CharSequence.class) {
460458
return readString(old, in);
461459
}
460+
if (stringClass == String.class) {
461+
return in.readString();
462+
}
462463
return this.newInstanceFromString(stringClass, in.readString());
463464
}
464465

@@ -487,99 +488,34 @@ protected Object createString(String value) {
487488
*/
488489
protected Class findStringClass(Schema schema) {
489490
String name = schema.getProp(GenericData.STRING_PROP);
490-
if (name == null)
491-
return CharSequence.class;
492-
493-
switch (GenericData.StringType.valueOf(name)) {
494-
case String:
495-
return String.class;
496-
default:
491+
if (name == null) {
497492
return CharSequence.class;
498493
}
499-
}
500-
501-
/**
502-
* This class is used to reproduce part of IdentityHashMap in ConcurrentHashMap
503-
* code.
504-
*/
505-
private static final class IdentitySchemaKey {
506-
private final Schema schema;
507-
508-
private final int hashcode;
509-
510-
public IdentitySchemaKey(Schema schema) {
511-
this.schema = schema;
512-
this.hashcode = System.identityHashCode(schema);
513-
}
514-
515-
@Override
516-
public int hashCode() {
517-
return this.hashcode;
518-
}
519-
520-
@Override
521-
public boolean equals(Object obj) {
522-
if (obj == null || !(obj instanceof GenericDatumReader.IdentitySchemaKey)) {
523-
return false;
524-
}
525-
IdentitySchemaKey key = (IdentitySchemaKey) obj;
526-
return this == key || this.schema == key.schema;
494+
if (GenericData.StringType.String.name().equals(name)) {
495+
return String.class;
527496
}
497+
return CharSequence.class;
528498
}
529499

530-
// VisibleForTesting
531-
static class ReaderCache {
532-
private final Map<IdentitySchemaKey, Class> stringClassCache = new ConcurrentHashMap<>();
533-
534-
private final Map<Class, Function<String, Object>> stringCtorCache = new ConcurrentHashMap<>();
535-
536-
private final Function<Schema, Class> findStringClass;
537-
538-
public ReaderCache(Function<Schema, Class> findStringClass) {
539-
this.findStringClass = findStringClass;
540-
}
541-
542-
public Object newInstanceFromString(Class c, String s) {
543-
final Function<String, Object> ctor = stringCtorCache.computeIfAbsent(c, this::buildFunction);
544-
return ctor.apply(s);
545-
}
546-
547-
private Function<String, Object> buildFunction(Class c) {
500+
@SuppressWarnings("unchecked")
501+
protected Object newInstanceFromString(Class c, String s) {
502+
final Constructor cachedCtor = stringCtorCache.computeIfAbsent(c, clazz -> {
548503
final Constructor ctor;
549504
try {
550-
ctor = c.getDeclaredConstructor(String.class);
505+
ctor = clazz.getDeclaredConstructor(String.class);
506+
ctor.setAccessible(true);
551507
} catch (NoSuchMethodException e) {
552508
throw new AvroRuntimeException(e);
553509
}
554-
ctor.setAccessible(true);
555-
556-
return (String s) -> {
557-
try {
558-
return ctor.newInstance(s);
559-
} catch (ReflectiveOperationException e) {
560-
throw new AvroRuntimeException(e);
561-
}
562-
};
563-
}
564-
565-
public Class getStringClass(final Schema s) {
566-
final IdentitySchemaKey key = new IdentitySchemaKey(s);
567-
return this.stringClassCache.computeIfAbsent(key, (IdentitySchemaKey k) -> this.findStringClass.apply(k.schema));
510+
return ctor;
511+
});
512+
try {
513+
return cachedCtor.newInstance(s);
514+
} catch (ReflectiveOperationException e) {
515+
throw new AvroRuntimeException(e);
568516
}
569517
}
570518

571-
private final ReaderCache readerCache = new ReaderCache(this::findStringClass);
572-
573-
// VisibleForTesting
574-
ReaderCache getReaderCache() {
575-
return readerCache;
576-
}
577-
578-
@SuppressWarnings("unchecked")
579-
protected Object newInstanceFromString(Class c, String s) {
580-
return this.getReaderCache().newInstanceFromString(c, s);
581-
}
582-
583519
/**
584520
* Called to read byte arrays. Subclasses may override to use a different byte
585521
* array representation. By default, this calls

lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumReader.java

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323
import java.util.Arrays;
2424
import java.util.List;
2525
import java.util.Random;
26-
import java.util.stream.Collectors;
27-
import java.util.stream.IntStream;
2826

2927
import org.apache.avro.Schema;
3028
import org.junit.jupiter.api.Test;
@@ -33,27 +31,9 @@ public class TestGenericDatumReader {
3331

3432
private static final Random r = new Random(System.currentTimeMillis());
3533

36-
@Test
37-
void readerCache() {
38-
final GenericDatumReader.ReaderCache cache = new GenericDatumReader.ReaderCache(this::findStringClass);
39-
List<Thread> threads = IntStream.rangeClosed(1, 200).mapToObj((int index) -> {
40-
final Schema schema = TestGenericDatumReader.this.build(index);
41-
final WithSchema s = new WithSchema(schema, cache);
42-
return (Runnable) () -> s.test();
43-
}).map(Thread::new).collect(Collectors.toList());
44-
threads.forEach(Thread::start);
45-
threads.forEach((Thread t) -> {
46-
try {
47-
t.join();
48-
} catch (InterruptedException e) {
49-
throw new RuntimeException(e);
50-
}
51-
});
52-
}
53-
5434
@Test
5535
void newInstanceFromString() {
56-
final GenericDatumReader.ReaderCache cache = new GenericDatumReader.ReaderCache(this::findStringClass);
36+
final GenericDatumReader cache = new GenericDatumReader();
5737

5838
Object object = cache.newInstanceFromString(StringBuilder.class, "Hello");
5939
assertEquals(StringBuilder.class, object.getClass());
@@ -62,21 +42,6 @@ void newInstanceFromString() {
6242

6343
}
6444

65-
static class WithSchema {
66-
private final Schema schema;
67-
68-
private final GenericDatumReader.ReaderCache cache;
69-
70-
public WithSchema(Schema schema, GenericDatumReader.ReaderCache cache) {
71-
this.schema = schema;
72-
this.cache = cache;
73-
}
74-
75-
public void test() {
76-
this.cache.getStringClass(schema);
77-
}
78-
}
79-
8045
private List<Schema> list = new ArrayList<>();
8146

8247
private Schema build(int index) {

0 commit comments

Comments
 (0)