2424#define UNICODE_SURROGATE_PAIR -1
2525#define UNICODE_BAD_INPUT -2
2626
27- void rightShiftFromOffsetSteps (UCS2 * ucs2 , int offset , int steps , int * len )
27+ #define DEBUG_SPLICE (X ) debug_msg(X)
28+
29+ void debug_msg (char * x )
30+ {
31+ fprintf (stderr , "%s\n" , x );
32+ //exit(1);
33+ }
34+
35+ bool rightShiftFromOffsetSteps (UCS2 * ucs2 , int offset , int steps , int * len , int buffer_capacity )
2836{
29- int j = offset + * len - 1 ;
30- for ( ; j >= offset ; j -- )
37+ if (* len + steps > buffer_capacity )
3138 {
39+ DEBUG_SPLICE ("right shift: out of bounds!" );
40+ return false;
41+ }
42+ if (offset >= * len )
43+ {
44+ DEBUG_SPLICE ("right shift: offset out of bounds!" );
45+ return false;
46+ }
47+
48+ for (int j = * len ; j >= offset ; j -- )
49+ {
50+ //printf("j: %d\n", j);
3251 ucs2 [j + steps ] = ucs2 [j ];
52+ ucs2 [j ] = 0 ;
3353 }
3454 * len += steps ;
55+ return true;
3556}
3657
37- //Moves everything over to the left, eating the first letter
38- void leftShiftFromOffsetSteps (UCS2 * ucs2 , int offset , int steps , int * len )
58+ //Moves everything over to the left, eating the char at the offset index
59+ bool leftShiftFromOffsetSteps (UCS2 * ucs2 , int offset , int steps , int * len )
3960{
61+ if (offset < 0 )
62+ {
63+ DEBUG_SPLICE ("offset out of bounds!" );
64+ return false;
65+ }
66+ if (offset + steps > * len )
67+ {
68+ DEBUG_SPLICE ("out of bounds!" );
69+ return false;
70+ }
4071 int j = offset ;
4172 for ( ; j < * len - 1 ; j ++ )
4273 {
4374 ucs2 [j ] = ucs2 [j + steps ];
4475 }
4576 * len -= steps ;
77+ return true;
78+ }
79+
80+ /*
81+ * Almost just like the Javascript function splice
82+ * Except there cannot be any empty gaps, it will fail
83+ * string: the string
84+ * len: the actual len of the string
85+ * bufferCapacity: the length of the array buffer
86+ * offset: the offset to begin inserting/removing elements
87+ * replacing: how many characters to replace or remove if insert_len is 0
88+ * insert: the string to insert
89+ * insert_len: the length of the elements to be inserted. Can be 0 if just want to remove.
90+ *
91+ * returns true, or false if error
92+ */
93+ bool splice (UCS2 * string , int * len , int buffer_capacity , int offset , int replacing , UCS2 * insert , int insert_len )
94+ {
95+ if (* len + insert_len - replacing > buffer_capacity )
96+ {
97+ DEBUG_SPLICE ("out of bounds!" );
98+ return false;
99+ }
100+ if (offset >= * len + 1 )
101+ {
102+ DEBUG_SPLICE ("offset beyond end of string + 1!" ); //would create gap
103+ return false;
104+ }
105+ if (replacing > (* len - offset ))
106+ {
107+ DEBUG_SPLICE ("replacing cannot go past end of string!" );
108+ return false;
109+ }
110+ if (insert_len < 0 )
111+ {
112+ DEBUG_SPLICE ("insert len must be >= 0!" );
113+ return false;
114+ }
115+ if (offset > * len || offset < 0 )
116+ {
117+ DEBUG_SPLICE ("offset out of bounds!" );
118+ return false;
119+ }
120+ //shift right, this function increases len
121+ if (replacing < insert_len )
122+ {
123+ if (offset + replacing < * len ) //only call right shift if offset is before end, else rightShift will fail since nothing to move
124+ {
125+ if (!rightShiftFromOffsetSteps (string , offset + replacing , insert_len - replacing , len , buffer_capacity ))
126+ {
127+ return false;
128+ }
129+ }
130+ else
131+ {
132+ * len += insert_len - replacing ;
133+ }
134+ }
135+ else if (replacing > insert_len )
136+ {
137+ if (!leftShiftFromOffsetSteps (string , offset + insert_len , replacing - insert_len , len ))
138+ {
139+ return false;
140+ }
141+ }
142+ for (int i = 0 ; i < insert_len ; i ++ )
143+ {
144+ string [offset + i ] = insert [i ];
145+ }
146+ return true;
46147}
47148
48149/* Input: a Unicode code point, "ucs2".
@@ -70,7 +171,7 @@ int ucs2_to_utf8 (UCS2 ucs2, unsigned char * utf8)
70171 }
71172 if (ucs2 >= 0x800 && ucs2 < 0xFFFF ) {
72173 if (ucs2 >= 0xD800 && ucs2 <= 0xDFFF ) {
73- /* Ill-formed. */
174+ // Ill-formed.
74175 return UNICODE_SURROGATE_PAIR ;
75176 }
76177 utf8 [0 ] = ((ucs2 >> 12 ) ) | 0xE0 ;
@@ -79,15 +180,18 @@ int ucs2_to_utf8 (UCS2 ucs2, unsigned char * utf8)
79180 utf8 [3 ] = '\0' ;
80181 return 3 ;
81182 }
82- if (ucs2 >= 0x10000 && ucs2 < 0x10FFFF ) {
83- /* http://tidy.sourceforge.net/cgi-bin/lxr/source/src/utf8.c#L380 */
84- utf8 [0 ] = 0xF0 | (ucs2 >> 18 );
85- utf8 [1 ] = 0x80 | ((ucs2 >> 12 ) & 0x3F );
86- utf8 [2 ] = 0x80 | ((ucs2 >> 6 ) & 0x3F );
87- utf8 [3 ] = 0x80 | ((ucs2 & 0x3F ));
88- utf8 [4 ] = '\0' ;
89- return 4 ;
90- }
183+ /*
184+ //ucs2 >= UINT16_MAX &&
185+ if (ucs2 >= 0x10000 && ucs2 < 0x10FFFF) {
186+ // http://tidy.sourceforge.net/cgi-bin/lxr/source/src/utf8.c#L380
187+ utf8[0] = 0xF0 | (ucs2 >> 18);
188+ utf8[1] = 0x80 | ((ucs2 >> 12) & 0x3F);
189+ utf8[2] = 0x80 | ((ucs2 >> 6) & 0x3F);
190+ utf8[3] = 0x80 | ((ucs2 & 0x3F));
191+ utf8[4] = '\0';
192+ return 4;
193+ }
194+ */
91195 return UNICODE_BAD_INPUT ;
92196}
93197
@@ -170,3 +274,33 @@ void utf8_to_ucs2_string(const unsigned char *utf8, UCS2 *ucs2, int *len)
170274 (* len )++ ;
171275 }
172276}
277+
278+ /*
279+ bool utf8HasSuffix2(char *s, char *suffix, ...)
280+ {
281+ va_list argp;
282+ unsigned long len = strlen(s);
283+
284+ //if (suffixLen > len)
285+ // return false;
286+
287+ va_start( argp, suffix );
288+ for( i = 0; argp != '\0'; ++i )
289+ {
290+
291+ suffix = va_arg( vl, char * );
292+ unsigned long suffixLen = strlen(suffix);
293+
294+ long j = len - 1;
295+ for (long i = suffixLen - 1; i >= 0; i--, j--)
296+ {
297+ if (suffix[i] != s[j])
298+ return false;
299+ }
300+ }
301+ va_end( argp );
302+
303+ return true;
304+ }
305+
306+ */
0 commit comments