Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions src/libc/memcpy.src
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,30 @@
.else

_memcpy:
ld iy, -1
; size > 0 : 25F + 15R + 1 + LDIR
; size >= 65536 : 32F + 16R + 3 + LDIR
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is no longer applicable, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I will fix that

; size == 0 : 26F + 13R + 2
; size >= 65536 + 7F + 1R + 2 (only when the low 16 bits are zero)

ld iy, 0
add iy, sp
ld bc, (iy + 10) ; Load count
sbc hl, hl
add hl, bc
jr nc, .L.zero
ld de, (iy + 4) ; Load destination
ld hl, (iy + 7) ; Load source
ld bc, (iy + 9) ; Load count
ld a, c
or a, b
ld de, (iy + 3) ; Load destination
jr z, .L.maybe_zero
.L.not_zero:
ld hl, (iy + 6) ; Load source
ldir
.L.zero:
ld hl, (iy + 4) ; Return the destination pointer
ld hl, (iy + 3) ; Return the destination pointer
ret

.L.maybe_zero:
; low 16 bits are zero
or a, (iy + 11) ; test upper 8 bits
jr nz, .L.not_zero ; size >= 65536
; size == 0
ex de, hl
ret

.endif
108 changes: 62 additions & 46 deletions src/libc/memmove.src
Original file line number Diff line number Diff line change
Expand Up @@ -15,87 +15,103 @@

; Optimized for when src != dst
_memmove:
; src > dst | LDIR | 32F + 15R + 1
; src < dst | LDDR | 35F + 12R + 2
; src = dst | LDDR | 35F + 12R + 2
; zero size | | 24F + 12R + 2
; src > dst | LDIR | 31F + 15R + 2
; src < dst | LDDR | 33F + 12R + 1
; src = dst | LDIR | 31F + 15R + 2
; zero size | | 26F + 10R + 2
; size >= 65536 + 7F + 1R + 2 (only when the low 16 bits are zero)

ld iy, -1
ld iy, 0
add iy, sp
ld bc, (iy + 10)
sbc hl, hl
add hl, bc
jr nc, .L.zero
ld hl, (iy + 7)
ld de, (iy + 4)
ld bc, (iy + 9)
ld a, c
or a, b
ld de, (iy + 3)
jr z, .L.maybe_zero
.L.not_zero:
ld hl, (iy + 6)
sbc hl, de
; src <= dst
jr c, .L.copy_backwards
; src > dst
; .copy_forwards:
add hl, de
inc hl
ldir
.L.zero:
ld hl, (iy + 4)
ret

.L.copy_backwards:
; src >= dst
jr nc, .L.copy_forwards
; src < dst
; .L.copy_backwards:
; move HL and DE to the end
dec de ; DE = dst - 1
ex de, hl
add hl, bc ; HL = dst + size - 1, DE = src - dst
ex de, hl
add hl, bc
ex de, hl ; HL = src - dst - 1, DE = dst + size
add hl, de ; HL = src + size - 1
dec de ; DE = dst + size - 1
add hl, de ; HL = src + size - 1, DE = dst + size - 1
lddr
ex de, hl
inc hl
ret

.L.copy_forwards:
add hl, de
ldir
ld hl, (iy + 3)
ret

.L.maybe_zero:
; low 16 bits are zero
or a, (iy + 11) ; test upper 8 bits
jr nz, .L.not_zero ; size >= 65536
; size == 0
ex de, hl
ret

.else

; Optimized for when src == dst
_memmove:
; src > dst | LDIR | 33F + 15R + 2
; src < dst | LDDR | 36F + 12R + 2
; src = dst | | 29F + 12R + 2
; zero size | | 24F + 12R + 2
; src > dst | LDIR | 31F + 15R + 2
; src < dst | LDDR | 34F + 12R + 2
; src = dst | | 27F + 12R + 2
; zero size | | 26F + 10R + 2
; size >= 65536 + 7F + 1R + 2 (only when the low 16 bits are zero)

ld iy, -1
ld iy, 0
add iy, sp
ld bc, (iy + 10)
sbc hl, hl
add hl, bc
jr nc, .L.zero
ld de, (iy + 4)
ld hl, (iy + 7)
or a, a
ld bc, (iy + 9)
ld a, c
or a, b
ld de, (iy + 3)
jr z, .L.maybe_zero
.L.not_zero:
ld hl, (iy + 6)
sbc hl, de
; src < dst
jr c, .copy_backwards
jr c, .L.copy_backwards
; src >= dst
; .L.copy_forwards:
add hl, de
; src == dst
ret z ; skips LDIR when src == dst
ret z ; skips LDIR when src == dst
; src > dst
ldir
.L.zero:
ld hl, (iy + 4)
ld hl, (iy + 3)
ret

.L.copy_backwards:
; move HL and DE to the end
dec de ; DE = dst - 1
dec de ; DE = dst - 1
ex de, hl
add hl, bc ; HL = dst + size - 1, DE = src - dst
add hl, bc ; HL = dst + size - 1, DE = src - dst
ex de, hl
add hl, de ; HL = src + size - 1, DE = dst + size - 1
add hl, de ; HL = src + size - 1, DE = dst + size - 1
lddr
ex de, hl
inc hl
ret

.L.maybe_zero:
; low 16 bits are zero
or a, (iy + 11) ; test upper 8 bits
jr nz, .L.not_zero ; size >= 65536
; size == 0
ex de, hl
ret

.endif

.endif
22 changes: 0 additions & 22 deletions src/libc/mempcpy.src
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,6 @@
.global _mempcpy
.type _mempcpy, @function

.if 0

; faster when count is zero
_mempcpy:
ld iy, -1
add iy, sp
ld bc, (iy + 10) ; Load count
sbc hl, hl
add hl, bc
ld hl, (iy + 4) ; Load destination
ret nc ; zero bytes to copy
ld de, (iy + 7) ; Load source
ex de, hl
ldir
ex de, hl
ret

.else

; faster in full execution case by 0F + 1 clock cycles
_mempcpy:
ld iy, -1
add iy, sp
Expand All @@ -38,5 +18,3 @@ _mempcpy:
.L.zero_byte_copy:
ex de, hl
ret

.endif
26 changes: 26 additions & 0 deletions test/standalone/asprintf_fprintf/src/fill_mem32.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
.assume adl = 1

.section .text

.global _fill_mem32

; void fill_mem32(void *dst, size_t bytes, uint32_t pattern)
_fill_mem32:
ld iy, 0
add iy, sp
ld de, (iy + 3)
ld hl, (iy + 6)
ld bc, 4
sbc hl, bc
; return if bytes <= pattern_size
ret c
ret z
push hl
; copy pattern once
lea hl, iy + 9
ldir
pop bc
; now copy (bytes - pattern_size)
ld hl, (iy + 3)
ldir
ret
55 changes: 55 additions & 0 deletions test/standalone/asprintf_fprintf/src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ void *T_memccpy(void *__restrict dest, const void *__restrict src, int c, size_t
void *T_mempcpy(void *__restrict dest, const void *__restrict src, size_t n)
__attribute__((nonnull(1, 2)));

void *T_memchr(const void *s, int c, size_t n)
__attribute__((nonnull(1)));

void *T_memrchr(const void *s, int c, size_t n)
__attribute__((nonnull(1)));

Expand Down Expand Up @@ -92,6 +95,7 @@ void T_bzero(void* s, size_t n);
#define T_memcmp memcmp
#define T_memccpy memccpy
#define T_mempcpy mempcpy
#define T_memchr memchr
#define T_memrchr memrchr
#define T_memmem memmem
#define T_memrmem memrmem
Expand Down Expand Up @@ -994,6 +998,54 @@ int strchrnul_test(void) {
return 0;
}

int mem65536_test(void) {
void fill_mem32(void *dst, size_t bytes, uint32_t pattern);

uint8_t * const dst = (uint8_t*)0xD40000;
const size_t screen_size = 320 * 240 * 2;
memset(dst, 0, screen_size);
const size_t B16 = 65536;
const size_t B17 = 131072;

/* test return values */

C(T_memcpy(SINK, SINK, B16) == SINK);
C(T_memcpy(SINK, SINK, B17) == SINK);

C(T_memmove(SINK, SINK, B16) == SINK);
C(T_memmove(SINK, SINK, B17) == SINK);

C(T_memmove(SINK + 16, SINK, B16) == SINK + 16);
C(T_memmove(SINK + 16, SINK, B17) == SINK + 16);

C(T_memmove(SINK, SINK + 16, B16) == SINK);
C(T_memmove(SINK, SINK + 16, B17) == SINK);

/* test memcpy and memmove when size is a non-zero multiple of 65536 */

fill_mem32(dst + screen_size - B16, B16, 0x78563412);
C(T_memcpy(dst + 32, dst + screen_size - B16, B16) == dst + 32);
C(T_memchr(dst, 0x00, 32) == dst);
C(T_memchr(dst, 0x12, 32) == NULL_ptr);
C(T_memchr(dst, 0x12, 33) == dst + 32);
C(T_memrchr(dst, 0x78, 32 + B16 + 32) == dst + 32 + B16 - 1);
const uint32_t pattern_1 = 0xA3A0A1A0;
const uint32_t pattern_2 = 0xFECDAB89;
fill_mem32(dst, 32, pattern_1);
fill_mem32(dst + 24576, B16, pattern_2);

C(T_memmove(dst + 61, dst, B16) == dst + 61);
C(T_memmem(dst, B17, &pattern_1, sizeof(pattern_1)) == dst);
C(T_memrmem(dst, B17, &pattern_1, sizeof(pattern_1)) == dst + 61 - 4 + 32);
C(T_memmove(dst + 24578, dst, B16) == dst + 24578);
C(T_memmem(dst, B16, &pattern_1, sizeof(pattern_1)) == dst + 0);
C(T_memrmem(dst, B16, &pattern_1, sizeof(pattern_1)) == dst + 24578 + 61 + 32 - 4);
C(T_memmem(dst, B16, &pattern_2, sizeof(pattern_2)) == dst + 24576 + 24578 + 61);
C(T_memrmem(dst, B16, &pattern_2, sizeof(pattern_2)) == dst + B16 - 4u - (((24578u - 24576u) - 61u) % 4u));

return 0;
}

int run_tests(void) {
int ret = 0;
/* boot_asprintf */
Expand Down Expand Up @@ -1027,6 +1079,9 @@ int run_tests(void) {
TEST(strrstr_test());
TEST(strchrnul_test());

TEST(mem65536_test());
os_ClrHome();

return 0;
}

Expand Down
6 changes: 4 additions & 2 deletions test/standalone/asprintf_fprintf/src/rename.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

.section .text

.global _T_memset, _T_memcpy, _T_memmove, _T_memcmp, _T_memccpy, _T_mempcpy, _T_memrchr, _T_memmem, _T_memrmem
.global _T_memset, _T_memcpy, _T_memmove, _T_memcmp, _T_memccpy, _T_mempcpy, _T_memchr, _T_memrchr, _T_memmem, _T_memrmem
.global _T_strlen, _T_strcmp, _T_strncmp, _T_stpcpy, _T_stpncpy, _T_strlcat, _T_strchrnul, _T_strrstr
.global _T_bzero

Expand All @@ -18,6 +18,8 @@ _T_memccpy:
jp _memccpy
_T_mempcpy:
jp _mempcpy
_T_memchr:
jp _memchr
_T_memrchr:
jp _memrchr
_T_memmem:
Expand Down Expand Up @@ -51,6 +53,6 @@ _T_bzero:
_NULL_ptr:
db $00, $00, $00

.extern _memset, _memcpy, _memmove, _memcmp, _memccpy, _mempcpy, _memrchr, _memmem, _memrmem
.extern _memset, _memcpy, _memmove, _memcmp, _memccpy, _mempcpy, _memchr, _memrchr, _memmem, _memrmem
.extern _strlen, _strcmp, _strncmp, _stpcpy, _stpncpy, _strlcat, _strchrnul, _strrstr
.extern _bzero
Loading