Skip to content

Commit d81951c

Browse files
committed
AVX2: Avoid overread in polyz_unpack_17_avx2/polyz_unpack_19_avx2
polyz_unpack_17_avx2/polyz_unpack_197_avx2 unpacks polynnomials with coefficients packed into 18/20 bits each. To do so, it currently loads 32 bytes into a 256-bit register and discards the last 14/12-bytes. This is problematic in the last iteration as this overreads the buffer presenting a potential safety problem. This is a violoation of the API contract which only requires 18*32/20*32-bytes. Hence, this commit eliminates the overread by adding a special handling for the last iteration. In practice this problem is not flagged as the z component in the signature is followed by the hint compoenent, and, hence, this never overreads the actual signature buffer. It did, however, show up in the unit tests that are being implemented in #777. Signed-off-by: Matthias J. Kannwischer <matthias@kannwischer.eu>
1 parent 70513eb commit d81951c

File tree

4 files changed

+44
-4
lines changed

4 files changed

+44
-4
lines changed

dev/x86_64/src/polyz_unpack_17_avx2.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,17 @@ void mld_polyz_unpack_17_avx2(__m256i *r, const uint8_t *a)
3939

4040
for (i = 0; i < MLDSA_N / 8; i++)
4141
{
42-
f = _mm256_loadu_si256((__m256i *)&a[18 * i]);
42+
/* Last iteration: avoid overread by copying to padded buffer */
43+
if (i == MLDSA_N / 8 - 1)
44+
{
45+
MLD_ALIGN uint8_t tmp[32] = {0};
46+
memcpy(tmp, &a[18 * i], 18);
47+
f = _mm256_load_si256((__m256i *)tmp);
48+
}
49+
else
50+
{
51+
f = _mm256_loadu_si256((__m256i *)&a[18 * i]);
52+
}
4353

4454
/* Permute 64-bit lanes
4555
* 0x94 = 10010100b rearranges 64-bit lanes as: [3,2,1,0] -> [2,1,1,0]

dev/x86_64/src/polyz_unpack_19_avx2.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,17 @@ void mld_polyz_unpack_19_avx2(__m256i *r, const uint8_t *a)
4040

4141
for (i = 0; i < MLDSA_N / 8; i++)
4242
{
43-
f = _mm256_loadu_si256((__m256i *)&a[20 * i]);
43+
/* Last iteration: avoid overread by copying to padded buffer */
44+
if (i == MLDSA_N / 8 - 1)
45+
{
46+
MLD_ALIGN uint8_t tmp[32] = {0};
47+
memcpy(tmp, &a[20 * i], 20);
48+
f = _mm256_load_si256((__m256i *)tmp);
49+
}
50+
else
51+
{
52+
f = _mm256_loadu_si256((__m256i *)&a[20 * i]);
53+
}
4454

4555
/* Permute 64-bit lanes
4656
* 0x94 = 10010100b rearranges 64-bit lanes as: [3,2,1,0] -> [2,1,1,0]

mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,17 @@ void mld_polyz_unpack_17_avx2(__m256i *r, const uint8_t *a)
3939

4040
for (i = 0; i < MLDSA_N / 8; i++)
4141
{
42-
f = _mm256_loadu_si256((__m256i *)&a[18 * i]);
42+
/* Last iteration: avoid overread by copying to padded buffer */
43+
if (i == MLDSA_N / 8 - 1)
44+
{
45+
MLD_ALIGN uint8_t tmp[32] = {0};
46+
memcpy(tmp, &a[18 * i], 18);
47+
f = _mm256_load_si256((__m256i *)tmp);
48+
}
49+
else
50+
{
51+
f = _mm256_loadu_si256((__m256i *)&a[18 * i]);
52+
}
4353

4454
/* Permute 64-bit lanes
4555
* 0x94 = 10010100b rearranges 64-bit lanes as: [3,2,1,0] -> [2,1,1,0]

mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,17 @@ void mld_polyz_unpack_19_avx2(__m256i *r, const uint8_t *a)
4040

4141
for (i = 0; i < MLDSA_N / 8; i++)
4242
{
43-
f = _mm256_loadu_si256((__m256i *)&a[20 * i]);
43+
/* Last iteration: avoid overread by copying to padded buffer */
44+
if (i == MLDSA_N / 8 - 1)
45+
{
46+
MLD_ALIGN uint8_t tmp[32] = {0};
47+
memcpy(tmp, &a[20 * i], 20);
48+
f = _mm256_load_si256((__m256i *)tmp);
49+
}
50+
else
51+
{
52+
f = _mm256_loadu_si256((__m256i *)&a[20 * i]);
53+
}
4454

4555
/* Permute 64-bit lanes
4656
* 0x94 = 10010100b rearranges 64-bit lanes as: [3,2,1,0] -> [2,1,1,0]

0 commit comments

Comments
 (0)