Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions src/crt/i48mulhs.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
.assume adl=1

.section .text

.global __i48mulhs
.type __i48mulhs, @function

; UDE:UHL = ((int96_t)UDE:UHL * (int96_t)UIY:UBC) >> 48
__i48mulhs:
push af
push iy
push bc
push de
push hl

push hl
lea hl, iy + 0
add hl, hl
sbc a, a

ld hl, $800000
add hl, de
pop hl
rla

call __i48mulhu

; if (UDE:UHL < 0) { result -= UIY:UBC; }
rrca
call c, __i48sub

pop bc
pop iy

; if (UIY:UBC < 0) { result -= UDE:UHL; }
rrca
call c, __i48sub

pop bc
pop iy
pop af
ret

.extern __i48mulhu
.extern __i48sub
76 changes: 76 additions & 0 deletions src/crt/i48mulhu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
.assume adl=1

.section .text

.global __i48mulhu
.type __i48mulhu, @function

; UDE:UHL = ((uint96_t)UDE:UHL * (uint96_t)UIY:UBC) >> 48
__i48mulhu:
; CC: 93 bytes
; minimum: 92F + 42R + 42W + 2
; maximum: 94F + 42R + 42W + 4
; including __i48mulu:
; minimum: 900F + 246R + 182W + 342
; maximum: 902F + 246R + 182W + 344
push ix
push iy
push bc
ld ix, 0
lea iy, ix + 0
add ix, sp
push de
push hl

; x_lo * y_lo
lea de, iy + 0
call __i48mulu
push de ; UHL * UBC (low carry)

; x_hi * y_lo
lea de, iy + 0
ld hl, (ix - 3)
call __i48mulu
push de ; hi24
push hl ; lo24

; x_lo * y_hi
lea de, iy + 0
ld bc, (ix + 3)
ld hl, (ix - 6)
call __i48mulu
pop bc ; lo24
add hl, bc
ex de, hl
pop bc ; hi24
adc hl, bc

pop bc ; UHL * UBC (low carry)
push af ; upper carry
ex de, hl
add hl, bc
jr nc, .L.no_low_carry
inc de
.L.no_low_carry:
push de ; high carry

; x_hi * y_hi
lea de, iy + 0
ld bc, (ix + 3)
ld hl, (ix - 3)
call __i48mulu
pop bc ; high carry
pop af ; upper carry
jr nc, .L.no_upper_carry
inc de
.L.no_upper_carry:
add hl, bc
ld sp, ix
pop bc
pop iy
pop ix
ret nc ; no high carry
inc de
ret

.extern __i48mulu
42 changes: 42 additions & 0 deletions src/crt/imulhs.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
.assume adl=1

.section .text

.global __imulhs
.type __imulhs, @function

; UHL = ((int48_t)UHL * (int48_t)UBC) >> 24
__imulhs:
push af
push bc
push hl

push hl
add hl, hl
rla
ld hl, $800000
add hl, bc
rla
pop hl

call __imulhu

; if (UBC < 0) { result -= UHL; }
pop bc
cpl
rra
jr c, .L.positive_bc
sbc hl, bc
.L.positive_bc:

; if (UHL < 0) { result -= UBC; }
pop bc
rra
jr c, .L.positive_hl
sbc hl, bc
.L.positive_hl:

pop af
ret

.extern __imulhu
146 changes: 146 additions & 0 deletions src/crt/imulhu.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
.assume adl=1

.section .text

.global __imulhu
.type __imulhu, @function

; UHL = ((uint48_t)UHL * (uint48_t)UBC) >> 24
__imulhu:
; TODO: Optimize this routine as this is mostly just a copy paste of __i48mulu with some stuff removed.
;
; CC: 118*r(PC)+39*r(SPL)+38*w(SPL)+37
; CC: 117 bytes | 118F + 39R + 38W + 37
push de
; backup af
push af
push ix
ld ix, 0
add ix, sp

; On stack to get upper byte when needed
push de ; de will also be used to perform the actual multiplication
push hl
push iy
push bc

; bc = a[0], a[1]
ld a, l ; a = b[0]
ld iy, (ix - 5) ; iy = b[1], b[2]

; or a, a ; carry is already cleared
sbc hl, hl
push hl ; upper bytes of sum at -15
; Stack Use:
; ix-1 : deu b[5]
; ix-2 : d b[4]
; ix-3 : e b[3]
; ix-4 : hlu b[2]
; ix-5 : h b[1]
; ix-6 : l b[0]
; ix-7 : iyu a[5]
; ix-8 : iyh a[4]
; ix-9 : iyl a[3]
; ix-10 : bcu a[2]
; ix-11 : b a[1]
; ix-12 : c a[0]
; ix-13 : sum[5]
; ix-14 : sum[4]
; ix-15 : sum[3]
; ix-16 : sum[2]
; ix-17 : sum[1]
; ix-18 : sum[0]

; ======================================================================
; sum[0-1]

; a[0]*b[0]
ld d, c ; d = a[0]
ld e, a ; e = b[0]
mlt de
push de ; lower bytes of sum at -18

; ======================================================================
; sum[1-2]
ld l, d ; hl will store current partial sum

; a[1]*b[0]
ld d, b ; d = a[1]
ld e, a ; e = b[0]
mlt de
add hl, de

; a[0]*b[1]
ld d, c ; d = a[0]
ld e, iyl ; e = b[1]
mlt de
add hl, de

ld (ix - 17), hl

; ======================================================================
; sum[2-3]
ld hl, (ix - 16) ; hl will store current partial sum

; a[0]*b[2]
ld d, c ; d = a[0]
ld e, iyh ; e = b[2]
mlt de
add hl, de

; a[1]*b[1]
ld d, b ; d = a[1]
ld e, iyl ; e = b[1]
mlt de
add hl, de

; a[2]*b[0]
ld d, (ix - 10) ; d = a[2]
ld e, a ; e = b[0]
mlt de
add hl, de

ld (ix - 16), hl

; ======================================================================
; sum[3-4]
ld hl, (ix - 15) ; hl will store current partial sum

; a[1]*b[2]
ld d, b ; d = a[1]
ld e, iyh ; e = b[2]
mlt de
add hl, de

; a[2]*b[1]
ld d, (ix - 10) ; d = a[2]
ld e, iyl ; e = b[1]
mlt de
add hl, de

ld (ix - 15), hl

; ======================================================================
; sum[4-5]
ld hl, (ix - 14) ; hl will store current partial sum

; a[2]*b[2]
ld d, (ix - 10) ; d = a[2]
ld e, iyh ; e = b[2]
mlt de
add hl, de

ld (ix - 14), l
ld (ix - 13), h

; clean up stack and restore registers
pop de
pop hl
pop bc
pop iy

ld sp, ix
pop ix
pop af
pop de
ret
39 changes: 39 additions & 0 deletions src/crt/llmulhs.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
.assume adl=1

.section .text

.global __llmulhs
.type __llmulhs, @function

; BC:UDE:UHL = ((int128_t)BC:UDE:UHL * (int128_t)(SP64)) >> 64
__llmulhs:
push iy
ld iy, 0
add iy, sp

push bc
push de
push hl

ld hl, (iy + 6)
ld de, (iy + 9)
ld bc, (iy + 12)

; argument order can be swapped since multiplication is commutative
call __llmulhu

; if ((SP64) < 0) { result -= BC:UDE:UHL; }
bit 7, (iy + 13)
call nz, __llsub

; if (BC:UDE:UHL < 0) { result -= (SP64); }
bit 7, (iy - 2)

ld sp, iy

pop iy
ret z
jp __llsub

.extern __llmulhu
.extern __llsub
Loading
Loading