1+ using System . Linq ;
2+
13namespace Semmle . Extraction
24{
35 /// <summary>
@@ -15,6 +17,68 @@ public Tuple(string name, params object[] args)
1517 Args = args ;
1618 }
1719
20+ const int maxStringBytes = 1 << 20 ; // 1MB
21+ static readonly System . Text . Encoding encoding = System . Text . Encoding . UTF8 ;
22+
23+ private static bool NeedsTruncation ( string s )
24+ {
25+ // Optimization: only count the actual number of bytes if there is the possibility
26+ // of the string exceeding maxStringBytes
27+ return encoding . GetMaxByteCount ( s . Length ) > maxStringBytes &&
28+ encoding . GetByteCount ( s ) > maxStringBytes ;
29+ }
30+
31+ private static bool NeedsTruncation ( string [ ] array )
32+ {
33+ // Optimization: only count the actual number of bytes if there is the possibility
34+ // of the strings exceeding maxStringBytes
35+ return encoding . GetMaxByteCount ( array . Sum ( s => s . Length ) ) > maxStringBytes &&
36+ array . Sum ( encoding . GetByteCount ) > maxStringBytes ;
37+ }
38+
39+ private static void WriteString ( ITrapBuilder tb , string s ) => tb . Append ( EncodeString ( s ) ) ;
40+
41+ /// <summary>
42+ /// Truncates a string such that the output UTF8 does not exceed <paramref name="bytesRemaining"/> bytes.
43+ /// </summary>
44+ /// <param name="s">The input string to truncate.</param>
45+ /// <param name="bytesRemaining">The number of bytes available.</param>
46+ /// <returns>The truncated string.</returns>
47+ private static string TruncateString ( string s , ref int bytesRemaining )
48+ {
49+ int outputLen = encoding . GetByteCount ( s ) ;
50+ if ( outputLen > bytesRemaining )
51+ {
52+ outputLen = 0 ;
53+ int chars ;
54+ for ( chars = 0 ; chars < s . Length ; ++ chars )
55+ {
56+ var bytes = encoding . GetByteCount ( s , chars , 1 ) ;
57+ if ( outputLen + bytes <= bytesRemaining )
58+ outputLen += bytes ;
59+ else
60+ break ;
61+ }
62+ s = s . Substring ( 0 , chars ) ;
63+ }
64+ bytesRemaining -= outputLen ;
65+ return s ;
66+ }
67+
68+ private static string EncodeString ( string s ) => s . Replace ( "\" " , "\" \" " ) ;
69+
70+ /// <summary>
71+ /// Output a string to the trap file, such that the encoded output does not exceed
72+ /// <paramref name="bytesRemaining"/> bytes.
73+ /// </summary>
74+ /// <param name="tb">The trapbuilder</param>
75+ /// <param name="s">The string to output.</param>
76+ /// <param name="bytesRemaining">The remaining bytes available to output.</param>
77+ private static void WriteTruncatedString ( ITrapBuilder tb , string s , ref int bytesRemaining )
78+ {
79+ WriteString ( tb , TruncateString ( s , ref bytesRemaining ) ) ;
80+ }
81+
1882 /// <summary>
1983 /// Constructs a unique string for this tuple.
2084 /// </summary>
@@ -27,49 +91,60 @@ public void EmitToTrapBuilder(ITrapBuilder tb)
2791 foreach ( var a in Args )
2892 {
2993 if ( column > 0 ) tb . Append ( ", " ) ;
30- if ( a is Label )
31- {
32- ( ( Label ) a ) . AppendTo ( tb ) ;
33- }
34- else if ( a is IEntity )
35- {
36- ( ( IEntity ) a ) . Label . AppendTo ( tb ) ;
37- }
38- else if ( a is string )
39- {
40- tb . Append ( "\" " ) ;
41- tb . Append ( ( ( string ) a ) . Replace ( "\" " , "\" \" " ) ) ;
42- tb . Append ( "\" " ) ;
43- }
44- else if ( a is System . Enum )
94+ switch ( a )
4595 {
46- tb . Append ( ( int ) a ) ;
47- }
48- else if ( a is int )
49- {
50- tb . Append ( ( int ) a ) ;
51- }
52- else if ( a == null )
53- {
54- throw new InternalError ( "Attempt to write a null argument tuple {0} at column {1}" ,
55- Name , column ) ;
56- }
57- else
58- {
59- var array = a as string [ ] ;
60- if ( array != null )
61- {
96+ case Label l :
97+ l . AppendTo ( tb ) ;
98+ break ;
99+ case IEntity e :
100+ e . Label . AppendTo ( tb ) ;
101+ break ;
102+ case string s :
62103 tb . Append ( "\" " ) ;
63- foreach ( var element in array )
64- tb . Append ( element . Replace ( "\" " , "\" \" " ) ) ;
104+ if ( NeedsTruncation ( s ) )
105+ {
106+ // Slow path
107+ int remaining = maxStringBytes ;
108+ WriteTruncatedString ( tb , s , ref remaining ) ;
109+ }
110+ else
111+ {
112+ // Fast path
113+ WriteString ( tb , s ) ;
114+ }
65115 tb . Append ( "\" " ) ;
66- }
67- else
68- {
116+ break ;
117+ case System . Enum _:
118+ tb . Append ( ( int ) a ) ;
119+ break ;
120+ case int i :
121+ tb . Append ( i ) ;
122+ break ;
123+ case string [ ] array :
124+ tb . Append ( "\" " ) ;
125+ if ( NeedsTruncation ( array ) )
126+ {
127+ // Slow path
128+ int remaining = maxStringBytes ;
129+ foreach ( var element in array )
130+ WriteTruncatedString ( tb , element , ref remaining ) ;
131+ }
132+ else
133+ {
134+ // Fast path
135+ foreach ( var element in array )
136+ WriteString ( tb , element ) ;
137+ }
138+ tb . Append ( "\" " ) ;
139+ break ;
140+ case null :
141+ throw new InternalError ( "Attempt to write a null argument tuple {0} at column {1}" ,
142+ Name , column ) ;
143+ default :
69144 throw new InternalError ( "Attempt to write an invalid argument type {0} in tuple {1} at column {2}" ,
70145 a . GetType ( ) , Name , column ) ;
71- }
72146 }
147+
73148 ++ column ;
74149 }
75150 tb . Append ( ")" ) ;
0 commit comments