Skip to content

Commit a17b4e6

Browse files
authored
Fuzzing consume much memory for a method with 10 and more parameters (#335)
1 parent 1ceea28 commit a17b4e6

File tree

3 files changed

+468
-4
lines changed

3 files changed

+468
-4
lines changed

utbot-fuzzers/src/main/kotlin/org/utbot/fuzzer/CartesianProduct.kt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,8 @@ class CartesianProduct<T>(
1515
override fun iterator(): Iterator<List<T>> {
1616
val combinations = Combinations(*lists.map { it.size }.toIntArray())
1717
val sequence = if (random != null) {
18-
// todo create lazy random algo for this because this method can cause OOME even if we take only one value
19-
val permutation = IntArray(combinations.size) { it }
20-
permutation.shuffle(random)
21-
permutation.asSequence().map(combinations::get)
18+
val permutation = PseudoShuffledIntProgression(combinations.size, random)
19+
(0 until combinations.size).asSequence().map { combinations[permutation[it]] }
2220
} else {
2321
combinations.asSequence()
2422
}
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
package org.utbot.fuzzer
2+
3+
import kotlin.math.sqrt
4+
import kotlin.random.Random
5+
6+
/**
7+
* Generates pseudo random values from 0 to size exclusive.
8+
*
9+
* In general there are 2 ways to get random order for a given range:
10+
* 1. Create an array of target size and shuffle values.
11+
* 2. Create a set of generated values and generate new values until they're unique.
12+
*
13+
* Both cases cause high memory usage when target number of values is big.
14+
*
15+
* The memory usage can be reduced by using a pseudo-random sequence.
16+
*
17+
* Algorithm to create pseudo-random sequence of length L:
18+
* 1. Take first K elements to create a matrix of size COLS × ROWS in such way that K = COLS * ROWS and ROWS >= L - K.
19+
* 2. Move last L - K elements into a new array tail.
20+
* 3. Any index N from [0, K) can be calculated by using 2 numbers: number of column (i) and number of row (j) as follows:
21+
* `N = i * ROWS + j` where `i = N % COLS` and `j = N / COLS`.
22+
* In such case the index N increases from top to bottom and left to right.
23+
* 4. Tail contains all values from [K, L).
24+
* 5. Shuffle matrix columns, matrix rows and the tail.
25+
* 6. Add the tail as a column to the matrix. Since ROWS >= L - K there are missing values for i = COLS and j >= L - K.
26+
* 7. Write down all values except missing from left to right and top to bottom.
27+
*
28+
* Example, input size = 23
29+
* ```
30+
* matrix tail
31+
* -----------------------
32+
* 0 5 10 15 20
33+
* 1 6 11 16 21
34+
* 2 7 12 17 22
35+
* 3 8 13 18
36+
* 4 9 14 19
37+
* ```
38+
* Columns are shuffled:
39+
*
40+
* ```
41+
* matrix tail
42+
* -----------------------
43+
* 10 5 15 0 20
44+
* 11 6 16 1 21
45+
* 12 7 17 2 22
46+
* 13 8 18 3
47+
* 14 9 19 4
48+
* ```
49+
* Rows and tail are shuffled
50+
* ```
51+
* matrix tail
52+
* -----------------------
53+
* 12 7 17 2 22
54+
* 13 8 18 3 20
55+
* 14 9 19 4 21
56+
* 10 5 15 0
57+
* 11 6 16 1
58+
* ```
59+
* Merge matrix and tail:
60+
* ```
61+
* 12 7 17 2 22
62+
* 13 8 18 3 20
63+
* 14 9 19 4 21
64+
* 10 5 15 0 ×
65+
* 11 6 16 1 ×
66+
* ```
67+
*
68+
* Write down the sequence: `[12, 7, 17, 2, 22, 13, 8, 18, 3, 20, 14, 9, 19, 4, 21, 10, 5, 15, 0, 11, 6, 16, 1]`.
69+
*
70+
* Instead of storing matrix itself only column and row numbers can be stored. Therefore, the 5th step of the algorithm
71+
* can be changed into this:
72+
*
73+
* 5. Shuffle column numbers, row numbers and tail.
74+
*
75+
* In this case any value from the matrix can be calculated as follows:
76+
* `N = column[i] * ROWS + rows[j]`, where column and rows are shuffled column and row numbers arrays.
77+
*
78+
* Using number arrays instead of the matrix this algorithm requires only
79+
* `537 552 bytes (~ 550 KB)` compared to `Int.MAX_VALUE * 4 = 8 589 934 588 bytes (~ 8 GB)`
80+
* when using simple array-shuffle algorithm.
81+
*/
82+
class PseudoShuffledIntProgression : Iterable<Int> {
83+
private val columnNumber: IntArray
84+
private val rowNumber: IntArray
85+
private val tail: IntArray
86+
87+
val size: Int
88+
89+
constructor(size: Int, random: Random = Random) : this(size, random, { sqrt(it.toDouble()).toInt() })
90+
91+
/**
92+
* Test only constructor
93+
*/
94+
internal constructor(size: Int, random: Random, columns: (Int) -> Int) {
95+
check(size >= 0) { "Size must be positive or 0 but current value is $size" }
96+
this.size = size
97+
var cols = columns(size)
98+
if (cols > 0 && cols > size / cols) {
99+
cols = size / cols
100+
}
101+
check(cols > 0 || size == 0) { "Side of matrix must be greater than 0 but $cols <= 0" }
102+
103+
columnNumber = IntArray(size = cols) { it }.apply { shuffle(random) }
104+
rowNumber = IntArray(size = if (cols == 0) 0 else size / cols) { it }.apply { shuffle(random) }
105+
check(columnNumber.size <= rowNumber.size) { "Error in internal array state: number of rows shouldn't be less than number of columns" }
106+
107+
val rectangle = columnNumber.size * rowNumber.size
108+
tail = IntArray(size - rectangle) { it + rectangle }.apply { shuffle(random) }
109+
}
110+
111+
/**
112+
* Test only constructor
113+
*/
114+
internal constructor(columns: IntArray, rows: IntArray, tail: IntArray) {
115+
check(rows.size >= tail.size) { "Tail cannot be placed into 1 column of the target matrix" }
116+
this.columnNumber = columns
117+
this.rowNumber = rows
118+
this.tail = tail
119+
this.size = columns.size * rows.size + tail.size
120+
}
121+
122+
/**
123+
* Returns a unique pseudo-random index for the current one.
124+
*
125+
* To calculate correct value of the merged matrix as described before
126+
* let's look at not shuffled merged matrix with size 2 × 6 and the tail with 2 elements.
127+
*
128+
* ```
129+
* 0 6 13
130+
* 1 7 14
131+
* 2 9 ×
132+
* 3 10 ×
133+
* 4 11 ×
134+
* 5 12 ×
135+
* ```
136+
*
137+
* The index moves from left to right and top to bottom, so the result sequence should be
138+
* `[0, 6, 13, 1, 7, 14, 2, 9, 3, 10, 4, 11, 5, 12]`
139+
* but the correct index in this matrix cannot be calculated just as `i * ROWS + j` because of missing values.
140+
* To correct the index a property shift should be used. Let's transform the matrix into a matrix of shift
141+
* that should be added to the index for jumping over missing values:
142+
*
143+
* ```
144+
* 0 0 0
145+
* 0 0 0
146+
* 0 0 1
147+
* 1 2 2
148+
* 3 3 ×
149+
* × × ×
150+
* ```
151+
*
152+
* The matrix can be divided into 3 parts that calculates correct indices:
153+
*
154+
* ```
155+
* 0 0 | 0
156+
* 0 0 | 0
157+
* _____|___
158+
* 0 0 1
159+
* 1 2 2
160+
* 3 3 ×
161+
* × × ×
162+
* ```
163+
*
164+
* 1. Top left doesn't change the index and values are taken from the matrix.
165+
* 2. Top right doesn't change the index and values are taken from the tail.
166+
* 3. Bottom uses shifts to change index and values are taken from the matrix.
167+
*
168+
* To clarify general rule for calculating a shift let's look at merged matrix
169+
* where every row has COLS values and TAILS missing values like this:
170+
* ```
171+
* 1: N1 N2 N3 .. N_COLS | M1 M2 .. M_TAILS
172+
* 2: N1 N2 N3 .. N_COLS | M1 M2 .. M_TAILS
173+
* ...
174+
* ROW: N1 N2 N3 .. N_COLS | M1 M2 .. M_TAILS
175+
* ```
176+
* It can be shown that the shift changes every COLS values on TAILS, therefore the shift can be calculated as follows:
177+
* ```
178+
* shift = (index / COLS) * TAILS
179+
* ```
180+
*
181+
* **Examples**
182+
*
183+
* COLS = 3, TAILS = 1:
184+
* ```
185+
* 0 0 0 1
186+
* 1 1 2 2
187+
* 2 3 3 3
188+
* ...
189+
* ```
190+
*
191+
* COLS = 1, TAILS = 3
192+
* ```
193+
* 0 3 6 9
194+
* 12 15 18 21
195+
* 24 27 30 33
196+
* ...
197+
* ```
198+
*
199+
* COLS = 2, TAILS = 2
200+
* ```
201+
* 0 0 2 2
202+
* 4 4 6 6
203+
* 8 8 10 10
204+
* ...
205+
* ```
206+
*/
207+
operator fun get(index: Int): Int {
208+
check(index in 0 until size) { "Index out of bounds: $index >= $size" }
209+
val cols = columnNumber.size
210+
val rows = rowNumber.size
211+
val e = cols + 1
212+
var i = index % e
213+
var j = index / e
214+
return if (i == cols && j < tail.size) {
215+
// top right case
216+
tail[j]
217+
} else {
218+
// first tail.size * e values can be calculated without index shift
219+
// o < 0 is the top left case
220+
// o >= 0 is the bottom case with COLS = cols and TAILS = 1
221+
val o = ((index - tail.size * e) / cols).toLong()
222+
if (o > 0) {
223+
i = ((index + o) % e).toInt()
224+
j = ((index + o) / e).toInt()
225+
}
226+
columnNumber[i] * rows + rowNumber[j]
227+
}
228+
}
229+
230+
fun toArray(): IntArray = IntArray(size, this::get)
231+
232+
override fun iterator(): IntIterator = object : IntIterator() {
233+
var current = 0
234+
override fun hasNext() = current < size
235+
override fun nextInt() = get(current++)
236+
}
237+
}

0 commit comments

Comments
 (0)