@@ -56,6 +56,53 @@ static status_t checkAndClearExceptionFromCallback(JNIEnv* env, const char* meth
5656 return OK;
5757}
5858
59+ // stolen from dalvik/vm/checkJni.cpp
60+ static bool isValidUtf8 (const char * bytes) {
61+ while (*bytes != ' \0 ' ) {
62+ unsigned char utf8 = *(bytes++);
63+ // Switch on the high four bits.
64+ switch (utf8 >> 4 ) {
65+ case 0x00 :
66+ case 0x01 :
67+ case 0x02 :
68+ case 0x03 :
69+ case 0x04 :
70+ case 0x05 :
71+ case 0x06 :
72+ case 0x07 :
73+ // Bit pattern 0xxx. No need for any extra bytes.
74+ break ;
75+ case 0x08 :
76+ case 0x09 :
77+ case 0x0a :
78+ case 0x0b :
79+ case 0x0f :
80+ /*
81+ * Bit pattern 10xx or 1111, which are illegal start bytes.
82+ * Note: 1111 is valid for normal UTF-8, but not the
83+ * modified UTF-8 used here.
84+ */
85+ return false ;
86+ case 0x0e :
87+ // Bit pattern 1110, so there are two additional bytes.
88+ utf8 = *(bytes++);
89+ if ((utf8 & 0xc0 ) != 0x80 ) {
90+ return false ;
91+ }
92+ // Fall through to take care of the final byte.
93+ case 0x0c :
94+ case 0x0d :
95+ // Bit pattern 110x, so there is one additional byte.
96+ utf8 = *(bytes++);
97+ if ((utf8 & 0xc0 ) != 0x80 ) {
98+ return false ;
99+ }
100+ break ;
101+ }
102+ }
103+ return true ;
104+ }
105+
59106class MyMediaScannerClient : public MediaScannerClient
60107{
61108public:
@@ -123,7 +170,22 @@ class MyMediaScannerClient : public MediaScannerClient
123170 mEnv ->ExceptionClear ();
124171 return NO_MEMORY;
125172 }
126- if ((valueStr = mEnv ->NewStringUTF (value)) == NULL ) {
173+ char *cleaned = NULL ;
174+ if (!isValidUtf8 (value)) {
175+ cleaned = strdup (value);
176+ char *chp = cleaned;
177+ char ch;
178+ while ((ch = *chp)) {
179+ if (ch & 0x80 ) {
180+ *chp = ' ?' ;
181+ }
182+ chp++;
183+ }
184+ value = cleaned;
185+ }
186+ valueStr = mEnv ->NewStringUTF (value);
187+ free (cleaned);
188+ if (valueStr == NULL ) {
127189 mEnv ->DeleteLocalRef (nameStr);
128190 mEnv ->ExceptionClear ();
129191 return NO_MEMORY;
0 commit comments