Skip to content

Commit be98aec

Browse files
authored
fix(otel): prevent unpaired unicode surrogate pairs from causing insert errors (#2594)
* fix(otel): prevent unpaired unicode surrogate pairs from causing insert errors * only check parts of the string that are not going to get truncated remove unnecessary taks
1 parent 129dc02 commit be98aec

File tree

1 file changed

+90
-31
lines changed

1 file changed

+90
-31
lines changed

apps/webapp/app/v3/otlpExporter.server.ts

Lines changed: 90 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import type {
2929
import { startSpan } from "./tracing.server";
3030
import { enrichCreatableEvents } from "./utils/enrichCreatableEvents.server";
3131
import { env } from "~/env.server";
32+
import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings";
33+
import { singleton } from "~/utils/singleton";
3234

3335
class OTLPExporter {
3436
private _tracer: Tracer;
@@ -221,18 +223,16 @@ function convertLogsToCreateableEvents(
221223
);
222224

223225
const properties =
224-
convertKeyValueItemsToMap(
225-
truncateAttributes(log.attributes ?? [], spanAttributeValueLengthLimit),
226-
[],
227-
undefined,
228-
[
226+
truncateAttributes(
227+
convertKeyValueItemsToMap(log.attributes ?? [], [], undefined, [
229228
SemanticInternalAttributes.USAGE,
230229
SemanticInternalAttributes.SPAN,
231230
SemanticInternalAttributes.METADATA,
232231
SemanticInternalAttributes.STYLE,
233232
SemanticInternalAttributes.METRIC_EVENTS,
234233
SemanticInternalAttributes.TRIGGER,
235-
]
234+
]),
235+
spanAttributeValueLengthLimit
236236
) ?? {};
237237

238238
return {
@@ -304,18 +304,16 @@ function convertSpansToCreateableEvents(
304304
);
305305

306306
const properties =
307-
convertKeyValueItemsToMap(
308-
truncateAttributes(span.attributes ?? [], spanAttributeValueLengthLimit),
309-
[],
310-
undefined,
311-
[
307+
truncateAttributes(
308+
convertKeyValueItemsToMap(span.attributes ?? [], [], undefined, [
312309
SemanticInternalAttributes.USAGE,
313310
SemanticInternalAttributes.SPAN,
314311
SemanticInternalAttributes.METADATA,
315312
SemanticInternalAttributes.STYLE,
316313
SemanticInternalAttributes.METRIC_EVENTS,
317314
SemanticInternalAttributes.TRIGGER,
318-
]
315+
]),
316+
spanAttributeValueLengthLimit
319317
) ?? {};
320318

321319
return {
@@ -774,24 +772,85 @@ function binaryToHex(buffer: Buffer | string | undefined): string | undefined {
774772
return Buffer.from(Array.from(buffer)).toString("hex");
775773
}
776774

777-
function truncateAttributes(attributes: KeyValue[], maximumLength: number = 1024): KeyValue[] {
778-
return attributes.map((attribute) => {
779-
return isStringValue(attribute.value)
780-
? {
781-
key: attribute.key,
782-
value: {
783-
stringValue: attribute.value.stringValue.slice(0, maximumLength),
784-
},
785-
}
786-
: attribute;
787-
});
775+
function truncateAttributes(
776+
attributes: Record<string, string | number | boolean | undefined> | undefined,
777+
maximumLength: number = 1024
778+
): Record<string, string | number | boolean | undefined> | undefined {
779+
if (!attributes) return undefined;
780+
781+
const truncatedAttributes: Record<string, string | number | boolean | undefined> = {};
782+
783+
for (const [key, value] of Object.entries(attributes)) {
784+
if (!key) continue;
785+
786+
if (typeof value === "string") {
787+
truncatedAttributes[key] = truncateAndDetectUnpairedSurrogate(value, maximumLength);
788+
} else {
789+
truncatedAttributes[key] = value;
790+
}
791+
}
792+
793+
return truncatedAttributes;
794+
}
795+
796+
function truncateAndDetectUnpairedSurrogate(str: string, maximumLength: number): string {
797+
const truncatedString = smartTruncateString(str, maximumLength);
798+
799+
if (hasUnpairedSurrogateAtEnd(truncatedString)) {
800+
return smartTruncateString(truncatedString, [...truncatedString].length - 1);
801+
}
802+
803+
return truncatedString;
804+
}
805+
806+
const ASCII_ONLY_REGEX = /^[\p{ASCII}]*$/u;
807+
808+
function smartTruncateString(str: string, maximumLength: number): string {
809+
if (!str) return "";
810+
if (str.length <= maximumLength) return str;
811+
812+
const checkLength = Math.min(str.length, maximumLength * 2 + 2);
813+
814+
if (ASCII_ONLY_REGEX.test(str.slice(0, checkLength))) {
815+
return str.slice(0, maximumLength);
816+
}
817+
818+
return [...str.slice(0, checkLength)].slice(0, maximumLength).join("");
819+
}
820+
821+
function hasUnpairedSurrogateAtEnd(str: string): boolean {
822+
if (str.length === 0) return false;
823+
824+
const lastCode = str.charCodeAt(str.length - 1);
825+
826+
// Check if last character is an unpaired high surrogate
827+
if (lastCode >= 0xd800 && lastCode <= 0xdbff) {
828+
return true; // High surrogate at end = unpaired
829+
}
830+
831+
// Check if last character is an unpaired low surrogate
832+
if (lastCode >= 0xdc00 && lastCode <= 0xdfff) {
833+
// Low surrogate is only valid if preceded by high surrogate
834+
if (str.length === 1) return true; // Single low surrogate
835+
836+
const secondLastCode = str.charCodeAt(str.length - 2);
837+
if (secondLastCode < 0xd800 || secondLastCode > 0xdbff) {
838+
return true; // Low surrogate not preceded by high surrogate
839+
}
840+
}
841+
842+
return false;
788843
}
789844

790-
export const otlpExporter = new OTLPExporter(
791-
eventRepository,
792-
clickhouseEventRepository,
793-
process.env.OTLP_EXPORTER_VERBOSE === "1",
794-
process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT
795-
? parseInt(process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT, 10)
796-
: 8192
797-
);
845+
export const otlpExporter = singleton("otlpExporter", initializeOTLPExporter);
846+
847+
function initializeOTLPExporter() {
848+
return new OTLPExporter(
849+
eventRepository,
850+
clickhouseEventRepository,
851+
process.env.OTLP_EXPORTER_VERBOSE === "1",
852+
process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT
853+
? parseInt(process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT, 10)
854+
: 8192
855+
);
856+
}

0 commit comments

Comments
 (0)