diff --git a/src/crt/dabs.src b/src/crt/dabs.src new file mode 100644 index 000000000..61b14e863 --- /dev/null +++ b/src/crt/dabs.src @@ -0,0 +1,11 @@ + .assume adl=1 + + .section .text + + .global __dabs + .type __dabs, @function + +; assumes BC:UDE:UHL +__dabs: + res 7, b + ret diff --git a/src/crt/dtof.src b/src/crt/dtofp.src similarity index 97% rename from src/crt/dtof.src rename to src/crt/dtofp.src index 6cfdb2138..1ffde7ed2 100644 --- a/src/crt/dtof.src +++ b/src/crt/dtofp.src @@ -4,9 +4,11 @@ .global __dtof .type __dtof, @function + .global __dtofp + .type __dtofp, @function - .local __dtof_helper -__dtof_helper: + .local __dtofp_helper +__dtofp_helper: ; Moving this block of code to be behind __dtof ensures that ; .L.ret_copysign can always be reached by jr in all paths. .L.overflow: @@ -54,6 +56,7 @@ __dtof_helper: ; Quiet NaN: Quiet bit preserved. No signals raised. ; NaN Payloads: Copies the most significant payload bits. The LSB of mantissa is set if payload bits were discarded/truncated out. __dtof: +__dtofp: bit 7, b push af ; preserve A and signbit push bc diff --git a/src/crt/fpabs.src b/src/crt/fpabs.src new file mode 100644 index 000000000..ee467f4fb --- /dev/null +++ b/src/crt/fpabs.src @@ -0,0 +1,12 @@ + .assume adl=1 + + .section .text + + .global __fpabs + .type __fpabs, @function + +; IEEE single precision absolute value +; aubc = |aubc| +__fpabs: ; CHECK: bitcast(uint32_t, pair8_24_t, { out.BC, out.A }) == bitcast(uint32_t, float, fabsf(bitcast(float, pair8_24_t, { in.BC, in.A }))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + and a, 07Fh + ret diff --git a/src/crt/fpaddsub.src b/src/crt/fpaddsub.src new file mode 100644 index 000000000..4622778d1 --- /dev/null +++ b/src/crt/fpaddsub.src @@ -0,0 +1,230 @@ + .assume adl=1 + +;------------------------------------------------------------------------------- + + .section .text + + .global __fpsub + .type __fpsub, @function + +; IEEE single precision subtraction +; aubc = aubc - euhl +__fpsub: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), bitcast(float, pair8_24_t, { in.BC, in.A }) - bitcast(float, pair8_24_t, { in.HL, in.E })) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push de + rl e + ccf + rr e +; jq __fpadd.enter + db $16 ; ld d, * + + ; require __fpadd + +;------------------------------------------------------------------------------- + + ; .section .text + + .global __fpadd + .type __fpadd, @function + +; IEEE single precision addition +; aubc = aubc + euhl +__fpadd: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), bitcast(float, pair8_24_t, { in.BC, in.A }) + bitcast(float, pair8_24_t, { in.HL, in.E })) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push de + .local __fpadd.enter +__fpadd.enter: + push hl + push bc + xor a, e + ld d, a + push de + push bc + xor a, e + call __fppop1 + ex (sp), hl + ld d, e + ld e, a + call __fppop2 + inc e + jr z, .L.nonfinite1 + inc d + jr z, .L.return2 + ld a, d + sub a, e + jr z, .L.rounded + jr nc, .L.sorted + inc c + ex (sp), hl + ld a, e + sub a, d + ld d, e +.L.sorted: + cp a, 26 + jr nc, .L.largest + ; Extend to 32 bits and shift left by ~(amount - 1) & 7 + dec a + ld b, a + xor a, a + srl b + jr c, .L.noshift1 + add hl, hl + rla +.L.noshift1: + srl b + jr c, .L.noshift2 + .rept 2 + add hl, hl + rla + .endr +.L.noshift2: + srl b + jr c, .L.noshift4 + .rept 4 + add hl, hl + rla + .endr +.L.noshift4: + ; Shift right by (amount + 7) / 8 * 8, truncating to 24 bits + ; The last 2 bits shifted out are in A, while any remaining non-zero + ; bits are aggregated into the lower bits of A + push af + inc sp + push hl + inc sp + ld a, l + jr nz, .L.shift16 + ; Shift by 8 for amounts between 1 and 8 + pop hl + inc sp + jr .L.rounded + +.L.shift16: + ; Shift by 16 for amounts between 9 and 16 + ld e, h + inc sp + pop hl + dec b + jr z, .L.flush +.L.shift8more: + ; Shift by 24 for amounts between 17 and 24 + or a, e + ld e, l + ld l, h + ld h, 0 + ; Shift by 32 for amount of 25 + djnz .L.shift8more +.L.flush: + sub a, 1 + sbc a, a + inc a + or a, e +.L.rounded: + ld b, d + pop de + ex (sp), hl + add.s hl, hl + jr nc, .L.add + ld l, h + add hl, bc + ld c, l + pop hl + djnz .L.subtract ;always taken + +.L.nonfinite1: + inc d + jr z, .L.nonfinite +.L.return1: + pop bc +.L.return1.pop1: + pop de + ld a, d +.L.return1.pop2: + xor a, e + pop bc +.L.return: + pop hl + pop de + ret + +.L.largest: + ld a, d + cp a, e + jr z, .L.return1 +.L.return2: + pop bc +.L.return2.pop1: + pop de +.L.return2.pop2: + ld a, e + pop bc + pop bc + push bc + jr .L.return + +.L.add: + ld c, h + pop hl + add hl, de + dec b + jr nc, .L.done + ex de, hl + sbc hl, hl + add hl, sp + push de + rr (hl) + pop hl + rr h + rr l + rra + jr nc, .L.flushed2 + or a, 1 +.L.flushed2: + inc b + inc b + jr z, .L.infinite + djnz .L.done ;always taken + +.L.borrow: + inc c + add hl, de +.L.subtract: + ex de, hl + neg + sbc hl, de + jr c, .L.borrow + jr nz, .L.done + or a, a +.L.done: + ld de, 0800000h + call nz, __fppack + pop bc + ex (sp), hl + pop bc + pop de + ret + +.L.nonfinite: + xor a, a + sbc hl, bc + jr nz, .L.return1 + pop hl + sbc hl, bc + jr nz, .L.return2.pop1 + pop de + bit 7, d + jr z, .L.return1.pop2 + ld bc, 0C00000h +.L.infinite: + ld a, c + ld c, b ;0, also note BCU=080h from __fppop1 or 0C0h from .L.nonfinite + rrca + or a, 07Fh + pop hl + pop hl + pop de + ret + +;------------------------------------------------------------------------------- + + .extern __fppop1 + .extern __fppop2 + .extern __fppack diff --git a/src/crt/fpcbrt.src b/src/crt/fpcbrt.src new file mode 100644 index 000000000..a97ee06d3 --- /dev/null +++ b/src/crt/fpcbrt.src @@ -0,0 +1,191 @@ + .assume adl=1 + + .section .text + + .global __fpcbrt + .type __fpcbrt, @function + +; IEEE single precision cube root +; aubc = cbrt(aubc) +__fpcbrt: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), (float)cbrt(bitcast(float, pair8_24_t, { in.BC, in.A }))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push de + push bc + ex (sp), hl + ld e, a + call __fppop1 + inc e + jp z, .L.nonfinite + dec e + ld d, 0ABh ; multiplicative inverse of 3 modulo 256 + ld b, d + mlt de + ld c, a + ld a, i + push af + push hl + ex (sp), ix + or a, a + sbc hl, hl + ld a, e + ld d, 10 ; maximum normalization iterations to determine a zero +.L.modulo.loop: + ; x <<= 1 + add ix, ix + adc hl, hl + sub a, b + jr c, .L.modulo.loop + jr nz, .L.normalized + dec d + jp z, .L.return + cp a, l + inc a + jr nc, .L.modulo.loop +.L.normalized: + add a, d + ld b, a + push bc + xor a, a + ld d, a + ld e, a + push de + ex (sp), iy + ; Optimized first iteration, we know UHL >= 1 + ; r += 1 + inc de + ; x -= r << 24 + dec hl + di + exx + ld c, a + sbc hl, hl + ex de, hl + sbc hl, hl + ld b, 24 + ; x in AUHL[UHL']UIX, r in CUDE[UDE'], q in UIY +.L.root.loop: + exx + lea bc, iy + 0 + jr c, .L.root.zerobit + ex af, af' + ; q += 1 + inc iy + ; r += q * 4 - 1 + ex de, hl + xor a, a + add hl, bc + rla + inc bc + .rept 3 + add hl, bc + adc a, 0 + .endr + ; r <<= 2 + add hl, hl + rla + ; r += q + add hl, bc + adc a, 0 + jr .L.root.nextbit +.L.root.zerobit: + ; x += r << 24 + add hl, de + exx + adc hl, de + adc a, c + exx + ex af, af' + ; r -= q + 1 + ex de, hl + scf + sbc hl, bc + sbc a, a + ; r <<= 1 + add hl, hl + rla + ; r -= q + or a, a + sbc hl, bc + sbc a, 0 +.L.root.nextbit: + ; r <<= 1 + add hl, hl + rla + ex de, hl + exx + push hl + ; Sign extend carry byte to 32 bits + sbc hl, hl + ld l, a + sbc a, a + ; Left shift high 32 bits of r by 2 + ex de, hl + .rept 2 + add hl, hl + rl c + .endr + ; Add carry byte to shifted high bits + add hl, de + ex de, hl + adc a, c + ld c, a + pop hl + exx + ; x <<= 3 + xor a, a + .rept 3 + add ix, ix + adc hl, hl + rla + .endr + exx + ex af, af' + .rept 3 + add hl, hl + rla + .endr + ex af, af' + or a, l + ld l, a + ex af, af' + exx + ; q <<= 1 + add iy, iy ; clears carry + ; r += 1 + inc de ; sets bit 0 so never overflows + ; x -= r << 24 + sbc hl, de + exx + sbc hl, de + sbc a, c + djnz .L.root.loop + ; Apply rounding (never round-to-even because a root with the lowest mantissa bit set must be irrational) + ; q += !carry + exx + sbc hl, hl + inc hl + add hl, bc + pop iy + pop bc + ; Get final exponent after rounding + ld a, b + adc a, 07Fh - (07Fh / 3) - 10 + ; Set low exponent bit + srl b + jr nc, .L.return + ld de, 0800000h + add hl, de +.L.return: + sla c + rra + pop ix + pop bc + bit 2, c +.L.nonfinite: + ex (sp), hl + pop bc + pop de + ret z + ei + ret + + .extern __fppop1 diff --git a/src/crt/fpcmp.src b/src/crt/fpcmp.src new file mode 100644 index 000000000..812adbc94 --- /dev/null +++ b/src/crt/fpcmp.src @@ -0,0 +1,205 @@ + .assume adl=1 + +;------------------------------------------------------------------------------- + + .section .text + + .global __fpcmpu + .type __fpcmpu, @function + +; IEEE single precision unordered comparison +; z = euhl == aubc || isunordered(euhl, aubc) +; c = !isunordered(euhl, aubc) +__fpcmpu: ; CHECK: out.flags.C == !isunordered(bitcast(float, pair8_24_t, { in.HL, in.E }), bitcast(float, pair8_24_t, { in.BC, in.A })) && out.flags.Z == (!out.flags.C || (bitcast(float, pair8_24_t, { in.HL, in.E }) == bitcast(float, pair8_24_t, { in.BC, in.A }))) && out.A == in.A && out.BC == in.BC && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + call __fpcmpo + ret c ; If euhl < aubc: C = 1, Z = 0 + scf + ret p ; If euhl >= aubc: C = 1, Z = (euhl == aubc) + cp a, a ; If unordered: C = 0, Z = 1 + ret + +;------------------------------------------------------------------------------- + + .section .text + + .global __fpcmpo + .type __fpcmpo, @function + +; IEEE single precision ordered comparison +; z = euhl == aubc +; c = euhl < aubc +; s = !(euhl >= aubc) +__fpcmpo: ; CHECK: out.flags.Z == (bitcast(float, pair8_24_t, { in.HL, in.E }) == bitcast(float, pair8_24_t, { in.BC, in.A })) && out.flags.C == (bitcast(float, pair8_24_t, { in.HL, in.E }) < bitcast(float, pair8_24_t, { in.BC, in.A })) && out.flags.S == !(bitcast(float, pair8_24_t, { in.HL, in.E }) >= bitcast(float, pair8_24_t, { in.BC, in.A })) && out.A == in.A && out.BC == in.BC && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + xor a, e + jr z, .L.maybeEqual + cp a, 080h + jr c, .L.signsMatch + jr z, .L.maybeBothZero + xor a, e + ; Check if first operand could hold NaN, and if so, the second cannot on this code path + inc e + jr z, .L.checkFirstNan + jp pe, .L.checkFirstNan + dec e +.L.secondLargerAbs: + cp a, 07Fh ; C = S = !sign(aubc), Z = 0 + ret c ; Return if A less than 07Fh + ret pe ; Return if A between 080h and 0FEh +.L.checkSecondNan: + push hl + ld hl, 07FFFFFh + add hl, bc + pop hl + jr c, .L.unordered + cp a, 080h ; C = S = !sign(aubc), Z = 0 + ret + +.L.signsMatch: + xor a, e + ; Compare upper 7 exponent bits, which are not equal + cp a, e + jr nc, .L.secondLargerAbs + inc e + jr z, .L.checkFirstNan + dec e ; S = sign(euhl), Z = 0 because E > A + rlca + rrca ; C = sign(aubc), same as sign(euhl) on this code path + ret po ; Return if E != 07Fh +.L.checkFirstNanInc: + inc e +.L.checkFirstNan: + ex de, hl + push hl + ld hl, 07FFFFFh + add hl, de + pop hl + ex de, hl + dec e ; S = sign(euhl), Z = 0 because (E & 07Fh) == 07Fh + jr c, .L.unordered + ret p ; C = sign(euhl) + scf ; C = sign(euhl) + ret + +.L.maybeBothZero: + ; Upper 7 bits of exponents are equal, but sign differs + xor a, e + ; Check if upper 7 bits of both exponents are zero + add a, a + rra + jr nz, .L.notBothZero + ; Check if low bit of both exponents and entire mantissas are 0 + adc hl, bc + jr nz, .L.notBothZeroFixup + ret nc ; Both inputs are zero, return Z=1, C=0, S=0 +.L.notBothZeroFixup: + or a, a + sbc hl, bc +.L.notBothZero: + ; Check if upper 7 bits of both exponents are one + cp a, 07Fh ; C = S = !sign(aubc), Z = 0 + ret c ; Return if less than 07Fh + ret pe ; Return if between 080h and 0FEh + ; Check the larger exponent/mantissa for NaN + sbc hl, bc + add hl, bc + jr nc, .L.checkFirstNanInc + jr .L.checkSecondNan + +.L.maybeEqual: + ; Sign and upper 7 exponent bits are equal + ld a, e + inc a + add a, a + ld a, e + jr z, .L.checkBothNan + or a, a +.L.checkBothNanDone: + ; Compare mantissas and low exponent bit + sbc hl, bc + add hl, bc + ret z ; Both inputs are equal, return Z=1, C=0, S=0 + ; XOR the carry with the input sign and place into the output sign/carry + sbc a, a + xor a, e + or a, 07Fh ; Affect S flag, Z = 0 + rlca ; Affect C flag + ld a, e + ret + +.L.checkBothNan: + ex de, hl + push hl + ld hl, 07FFFFFh + add hl, de + jr c, .L.gotFirstNan + sbc hl, de + add hl, bc +.L.gotFirstNan: + pop hl + ex de, hl + jr nc, .L.checkBothNanDone +.L.unordered: + ; Carry is always set here + rr a ; Z = 0, S = 1 + rla ; Restore A + ccf ; C = 0 + ret + +;------------------------------------------------------------------------------- + + .section .text + + .global __fpcmp + .type __fpcmp, @function + +; IEEE single precision comparison (flag outputs for NaN inputs are undefined) +; z = euhl == aubc +; s = euhl < aubc = !(euhl >= aubc) +__fpcmp: ; CHECK: (isunordered(bitcast(float, pair8_24_t, { in.HL, in.E }), bitcast(float, pair8_24_t, { in.BC, in.A })) || (out.flags.Z == (bitcast(float, pair8_24_t, { in.HL, in.E }) == bitcast(float, pair8_24_t, { in.BC, in.A })) && out.flags.S == (bitcast(float, pair8_24_t, { in.HL, in.E }) < bitcast(float, pair8_24_t, { in.BC, in.A })))) && out.A == in.A && out.BC == in.BC && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + sub a, e + jr z, __fpcmp.maybeEqual + cp a, 080h + jr z, __fpcmp.maybeBothZero + ; Compare sign and upper 7 exponent bits, which are not equal + add a, e + inc e + dec e ; S = sign(euhl), Z = 0 because E > A + ret c + .local __fpcmp.notBothZero +__fpcmp.notBothZero: + cp a, 080h ; S = !sign(aubc) + ret nz + cp a, e ; S = !sign(aubc), Z = 0 because A == 080h and A > E and A - E != 080h + ret + + .local __fpcmp.maybeBothZero +__fpcmp.maybeBothZero: + add a, e + add a, a + rra + jr nz, __fpcmp.notBothZero + adc hl, bc + jr c, __fpcmp.notBothZeroFixup + ret z ; Both inputs are zero, return Z=1, S=0 + .local __fpcmp.notBothZeroFixup +__fpcmp.notBothZeroFixup: + or a, a + sbc hl, bc + dec a ; S = !sign(aubc), Z = 0 because (A & 07Fh) == 0 + cpl ; Restore A, preserve S and Z + ret + + .local __fpcmp.maybeEqual +__fpcmp.maybeEqual: + ld a, e + sbc hl, bc + add hl, bc + ret z + ; XOR the carry with the input sign and place into the output sign + sbc a, a + xor a, e + or a, 07Fh ; Affect S flag, Z = 0 + ld a, e + ret + +;------------------------------------------------------------------------------- diff --git a/src/crt/fpcopysign.src b/src/crt/fpcopysign.src new file mode 100644 index 000000000..9329457f3 --- /dev/null +++ b/src/crt/fpcopysign.src @@ -0,0 +1,32 @@ + .assume adl=1 + +;------------------------------------------------------------------------------- + + .section .text + + .global __fpcopysign + .type __fpcopysign, @function + +; IEEE single precision copy sign +; aubc = copysign(aubc, euhl) +__fpcopysign: ; CHECK: bitcast(uint32_t, pair8_24_t, { out.BC, out.A }) == bitcast(uint32_t, float, copysignf(bitcast(float, pair8_24_t, { in.BC, in.A }), bitcast(float, pair8_24_t, { in.HL, in.E }))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + xor a, e + and a, 07Fh + xor a, e + ; jq __fpcopy + + ; require __fpcopy + +;------------------------------------------------------------------------------- + + ; .section .text + + .global __fpcopy + .type __fpcopy, @function + +; IEEE single precision copy +; aubc = copy(aubc) +__fpcopy: ; CHECK: bitcast(uint32_t, pair8_24_t, { out.BC, out.A }) == bitcast(uint32_t, pair8_24_t, { in.BC, in.A }) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + ret + +;------------------------------------------------------------------------------- diff --git a/src/crt/fpdiv.src b/src/crt/fpdiv.src new file mode 100644 index 000000000..9e5da5123 --- /dev/null +++ b/src/crt/fpdiv.src @@ -0,0 +1,169 @@ + .assume adl=1 + + .section .text + + .global __fpdiv + .type __fpdiv, @function + +; IEEE single precision division +; aubc = aubc / euhl +__fpdiv: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), bitcast(float, pair8_24_t, { in.BC, in.A }) / bitcast(float, pair8_24_t, { in.HL, in.E })) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push de + push hl + xor a, e + push af + xor a, e + push bc + call __fppop1 + ex (sp), hl + ld d, e + ld e, a + call __fppop2 + inc e + jr z, .L.nonfinite.1 + ld a, d + inc a + jr z, .L.nonfinite.2 + add a, 080h + dec d + jr nz, .L.exponent.adjust + ex (sp), hl + add hl, bc + jr c, .L.normalize.divisor.done + sbc hl, bc + jr z, .L.divisor.zero +.L.normalize.divisor.loop: + dec a + add hl, hl + add hl, bc + jr nc, .L.normalize.divisor.loop +.L.normalize.divisor.done: + add hl, bc + ex (sp), hl +.L.exponent.adjust: + ex de, hl ; ude=UHL, uhl=UDE + ld h, b + rl b + ld c, a + sbc hl, bc + ld b, h + ld c, l + pop hl + add hl, de ; uhl=UHL+UDE + ex de, hl ; ude=UHL+UDE, uhl=UHL + xor a, a + sbc hl, de ; uhl=-UDE + ex de, hl ; ude=-UDE, uhl=UHL+UDE + add hl, de ; uhl=UHL +.L.normalize.dividend.loop: + add hl, de + jr c, .L.normalize.dividend.done + sbc hl, de + jr z, .L.dividend.zero + dec bc + add hl, hl + jr nc, .L.normalize.dividend.loop + add hl, de +.L.normalize.dividend.done: + cp a, b + jr nc, .L.return.overflow + dec bc + dec bc + ld a, c + inc b + ld bc, 0800000h | (23 << 8) + jr z, .L.divide.entry.normal + adc a, b + ld b, a + ld a, c + jr nc, .L.return.underflow + push iy + ld iy, 0 + jr nz, .L.divide.entry.subnormal + dec hl + add hl, de + jr .L.subsubnormal + +.L.divisor.zero: + pop de + sbc hl, de + jr nz, .L.return.nonfinite +.L.return.nan: + inc b +.L.return.overflow: + ld c, b +.L.return.nonfinite: + pop af + pop hl + pop de + or a, 07Fh + ret + +.L.nonfinite.1: + inc d + pop de + jr z, .L.return.nan + push hl + pop bc + jr .L.return.nonfinite + +.L.nonfinite.2: + pop hl + dec hl + add hl, bc + jr c, .L.return.nan +.L.return.underflow: + sbc hl, hl +.L.dividend.zero: + pop de + jr .L.return + +.L.divide.entry.normal: + push iy + ld iyl, b +.L.divide.loop: + add iy, iy + add hl, hl + jr c, .L.divide.overflow + add hl, de + jr c, .L.divide.setbit + sbc hl, de + djnz .L.divide.loop + add hl, hl + jr .L.divide.finish +.L.divide.overflow: + add hl, de +.L.divide.setbit: +.L.divide.entry.subnormal: + inc iy + djnz .L.divide.loop + add hl, hl + inc hl +.L.divide.finish: + jr c, .L.round + dec de + add hl, de +.L.round: + ccf +.L.subsubnormal: + lea de, iy + 0 + pop iy + sbc hl, hl + inc hl + add hl, de + pop de + ld e, a + adc a, 1 + srl e + jr nc, .L.return + add hl, bc +.L.return: + sla d + rra + ex (sp), hl + pop bc + pop de + ret + + .extern __fppop1 + .extern __fppop2 diff --git a/src/crt/fpminmax.src b/src/crt/fpminmax.src new file mode 100644 index 000000000..201ebf789 --- /dev/null +++ b/src/crt/fpminmax.src @@ -0,0 +1,50 @@ + .assume adl=1 + + .section .text + + .global __fpmin + .type __fpmin, @function + .global __fpmax + .type __fpmax, @function + +; IEEE single precision minimum +; aubc = fmin(aubc, euhl) +__fpmin: ; CHECK: sameignzerosign(bitcast(float, pair8_24_t, { out.BC, out.A }), fminf(quiet(bitcast(float, pair8_24_t, { in.BC, in.A })), quiet(bitcast(float, pair8_24_t, { in.HL, in.E })))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + call __fpcmpo + ret p + jr nc, __fpmax.unordered + .local __fpmin.return +__fpmin.return: + push hl + pop bc + ld a, e + ret + +; IEEE single precision maximum +; aubc = fmax(aubc, euhl) +__fpmax: ; CHECK: sameignzerosign(bitcast(float, pair8_24_t, { out.BC, out.A }), fmaxf(quiet(bitcast(float, pair8_24_t, { in.BC, in.A })), quiet(bitcast(float, pair8_24_t, { in.HL, in.E })))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + call __fpcmpo + ret c + jp p, __fpmin.return + .local __fpmax.unordered +__fpmax.unordered: + ; Compare the low 31 bits (ignoring sign) and return the smaller one, which is non-NaN if either one is + sbc hl, bc + ccf + sbc a, e + add hl, bc + ccf + adc a, e + ; Carry into bit 31 is equivalent to carry XOR overflow + jr nc, __fpmax.nocarry + ret po +; jq __fpmax.return + db $30 ; jr nc, * + .local __fpmax.nocarry +__fpmax.nocarry: + ret pe + .local __fpmax.return +__fpmax.return: + jr __fpmin.return + + .extern __fpcmpo diff --git a/src/crt/fpmul.src b/src/crt/fpmul.src new file mode 100644 index 000000000..20a32faff --- /dev/null +++ b/src/crt/fpmul.src @@ -0,0 +1,199 @@ + .assume adl=1 + + .section .text + + .global __fpmul + .type __fpmul, @function + +.L.underflow: + pop af + pop af + pop hl + pop de + and a, 080h + ld bc, 0 + ret +.L.nonfinite: + sbc hl, bc + jr z, .L.return.2 + add hl, bc + pop bc + jr .L.return.1 +.L.nonfinite.1: + inc d + jr z, .L.nonfinite + ex de, hl + pop hl + add hl, bc + or a, a + sbc hl, bc + ex de, hl + jr nz, .L.return.1 + ld h, a +.L.return.1: + pop af + ex (sp), hl + pop bc + jr .L.return +.L.nonfinite.2: + add hl, bc + or a, a + sbc hl, bc + jr z, .L.return.nan +.L.return.2: + pop bc + pop af + pop bc + push bc +.L.return.pop: + pop hl +.L.return: + pop de + or a, 07Fh + ret +.L.return.nan: + pop bc + set 7, b + pop af + jr .L.return.pop + +; IEEE single precision multiplication +; aubc = aubc * euhl +__fpmul: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), bitcast(float, pair8_24_t, { in.BC, in.A }) * bitcast(float, pair8_24_t, { in.HL, in.E })) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push de + push hl + xor a, e + push af + xor a, e + push bc + call __fppop1 + ex (sp), hl + ld d, e + ld e, a + call __fppop2 + inc e + jr z, .L.nonfinite.1 + inc d + jr z, .L.nonfinite.2 + ld a, d + ld d, b ; ld d, 0 + rlc e + ccf + rr e + jr nc, .L.subtract + add a, e + jr nc, .L.continue +.L.overflow: + pop af + pop af + pop hl + pop de + or a, 07Fh + ret +.L.subtract: + add a, e + jr z, .L.subnormal + jr c, .L.continue + cp a, -23 + jr c, .L.underflow +.L.subnormal: + dec a + ld d, a +.L.continue: + push hl + push ix + ld ix, -7 + add ix, sp + ld sp, ix + ld c, l + ld h, (ix + 13) + mlt hl + ld (ix + 0), l + ld l, h + ld h, 0 + ld b, (ix + 14) + mlt bc + add hl, bc + ld c, (ix + 11) + ld b, (ix + 13) + mlt bc + add.s hl, bc + ld (ix + 1), l + ld l, h + ld h, 0 + rl h + ld c, (ix + 10) + ld b, (ix + 15) + mlt bc + add hl, bc + ld c, (ix + 11) + ld b, (ix + 14) + mlt bc + add hl, bc + ld bc, (ix + 12) + mlt bc + add hl, bc + + ld (ix + 2), hl + ld b, (ix + 4) + ld c, h + ld l, (ix + 11) + ld h, (ix + 15) + mlt hl ; clears UHL + add hl, bc + + ld c, (ix + 12) + ld b, (ix + 14) + mlt bc + add hl, bc + + ld (ix + 3), hl + ld b, (ix + 5) + ld c, h + ld l, (ix + 12) + ld h, (ix + 15) + mlt hl ; clears UHL + add hl, bc + + cp a, d + jr nz, .L.normalized + ld bc, (ix + 2) + pop de +.L.normalize: + srl h + rr l + rr b + rr c + rr d + rr e + jr nc, .L.flushed + ld e, a +.L.flushed: + inc a + jr nz, .L.normalize + push de + ld (ix + 2), bc +.L.normalized: + ld (ix + 4), hl + ld c, (ix + 17) + pop ix + pop hl + inc sp + ld b, a + ld de, 0800000h + add a, -1 + call nc, __fppack2.normalized + call c, __fppack2 + pop ix + pop bc + pop bc + pop bc + ex (sp), hl + pop bc + pop de + ret + + .extern __fppop1 + .extern __fppop2 + .extern __fppack2.normalized + .extern __fppack2 diff --git a/src/crt/fpneg.src b/src/crt/fpneg.src new file mode 100644 index 000000000..29561bf97 --- /dev/null +++ b/src/crt/fpneg.src @@ -0,0 +1,12 @@ + .assume adl=1 + + .section .text + + .global __fpneg + .type __fpneg, @function + +; IEEE single precision negation +; aubc = -aubc +__fpneg: ; CHECK: bitcast(uint32_t, pair8_24_t, { out.BC, out.A }) == bitcast(uint32_t, float, -bitcast(float, pair8_24_t, { in.BC, in.A })) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + xor a, 080h + ret diff --git a/src/crt/fprem.src b/src/crt/fprem.src new file mode 100644 index 000000000..325940c12 --- /dev/null +++ b/src/crt/fprem.src @@ -0,0 +1,83 @@ + .assume adl=1 + + .section .text + + .global __fprem + .type __fprem, @function + +; IEEE single precision remainder +; aubc = fmod(aubc, euhl) +__fprem: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), fmodf(bitcast(float, pair8_24_t, { in.BC, in.A }), bitcast(float, pair8_24_t, { in.HL, in.E }))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push de + ld d, a + set 7, a ; aubc = -abs(AUBC) + set 7, e ; euhl = -abs(EUHL) + call __fpcmpo ; abs(AUBC) <=> abs(EUHL) + jp m, .L.notge ; !(abs(AUBC) >= abs(EUHL)) + push hl + push bc + call __fppop1 + ex (sp), hl + ld a, e + ld e, d + call __fppop2 + ld b, a + cpl + inc e + add a, e ; Always sets carry, unless dividend non-finite + ld e, d + ld d, b + pop bc + push de + ex de, hl + sbc hl, hl + add hl, bc + jr nc, .L.nan ; Return if dividend non-finite or divisor zero + sbc hl, hl + or a, a + sbc hl, bc + inc hl + ex de, hl + call m, __iremu ; If divisor subnormal, modulo the mantissas + ld b, a + inc b +.L.rem.compare: + add hl, de + jr c, .L.rem.norestore + sbc hl, de +.L.rem.norestore: + dec b + jr z, .L.rem.finish +.L.rem.loop: + add hl, hl + jr nc, .L.rem.compare + add hl, de + djnz .L.rem.loop +.L.rem.finish: + pop bc + rlc c + ld de, 0800000h + xor a, a + call __fppack + ex (sp), hl + pop bc + pop de + ret + +.L.nan: + pop hl + pop hl +.L.notge: + ld a, d + pop de + ret c ; abs(AUBC) < abs(EUHL) + ld bc, 0C00000h + ld a, 0FFh + ret + + .extern __iremu + + .extern __fppop1 + .extern __fppop2 + .extern __fppack + .extern __fpcmpo diff --git a/src/crt/fpround.src b/src/crt/fpround.src new file mode 100644 index 000000000..f1792ad5f --- /dev/null +++ b/src/crt/fpround.src @@ -0,0 +1,58 @@ + .assume adl=1 + + .section .text + + .global __fpround + .type __fpround, @function + +; IEEE single precision round +; aubc = round(aubc) +__fpround: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), roundf(bitcast(float, pair8_24_t, { in.BC, in.A }))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push bc + ex (sp), hl + add hl, hl + ld h, a + rla + sub a, 07Fh - 1 + jr c, .L.return.zero + sub a, 24 + jr nc, .L.return + push bc + cpl + ld c, a + ld a, h + sbc hl, hl + call __ishl + ex de, hl + ex (sp), hl + or a, a + sbc hl, de + ex (sp), hl + ex de, hl + jp pe, .L.overflow + jr c, .L.no_overflow + inc a +; jq .L.overflow + db $38 ; jr c, * +.L.no_overflow: + add hl, hl +.L.overflow: + pop bc + call __iand + push hl + pop bc + ld h, a +.L.return: + ld a, h + pop hl + ret + +.L.return.zero: + ld bc, 0 + ld a, h + and a, 080h + pop hl + ret + + .extern __iand + .extern __ishl diff --git a/src/crt/fpsqrt.src b/src/crt/fpsqrt.src new file mode 100644 index 000000000..48d448304 --- /dev/null +++ b/src/crt/fpsqrt.src @@ -0,0 +1,117 @@ + .assume adl=1 + + .section .text + + .global __fpsqrt + .type __fpsqrt, @function + +; IEEE single precision square root +; aubc = sqrt(aubc) +__fpsqrt: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), (float)sqrt(bitcast(float, pair8_24_t, { in.BC, in.A }))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push de + push bc + ex (sp), hl + ld e, a + call __fppop1 + add hl, bc + or a, a + sbc hl, bc + jr z, .L.zero + rlca + jr c, .L.nan + inc e + jr z, .L.nonfinite + push hl + ex (sp), ix + sbc hl, hl + srl e + jr nc, .L.normalize.skip +.L.normalize.loop: + add ix, ix + adc hl, hl +.L.normalize.skip: + dec e + add ix, ix + adc hl, hl + jr z, .L.normalize.loop + push de + xor a, a + ex de, hl + sbc hl, hl + ld c, a + ld b, 25 + ; x in AUDEUIX, r in CUHL +.L.root.loop: + ex de, hl + ; r += 1 + inc de ; sets bit 0 so never overflows + ; x -= r << 24 + sbc hl, de + sbc a, c + jr nc, .L.root.onebit + ; x += r << 24 + add hl, de + adc a, c + ; r -= 2 + dec de + dec de +.L.root.onebit: + ; r += 1 + inc de + ; x <<= 2 + .rept 2 + add ix, ix + adc hl, hl + rla + .endr + ex de, hl + ; r <<= 1 + add hl, hl + rl c ; clears carry + djnz .L.root.loop + ; Shift left by 5 + ld a, c + .rept 5 + add hl, hl + rla + .endr + ; Set the low exponent bit of the result + pop de + pop ix + sra e + rla + rrca + ; Shift right by 8 + push af + inc sp + push hl + inc sp + pop bc + inc sp + ; Calculate the high exponent bits and unset sign bit + ld a, e + add a, 07Fh / 4 + 1 + ; Check whether to round up (never round-to-even because a root with the lowest mantissa bit set must be irrational) + inc l + pop hl + pop de + ret p + inc bc ; This never overflows into the exponent field + ret + +.L.nonfinite: + rrca +.L.zero: + ex (sp), hl + pop bc + pop de + ret + +.L.nan: + sbc a, a + set 7, b + pop hl + pop de + ret + + .extern __fppop1 diff --git a/src/crt/ftod.src b/src/crt/fptod.src similarity index 97% rename from src/crt/ftod.src rename to src/crt/fptod.src index 2a640d559..ef08ae9d4 100644 --- a/src/crt/ftod.src +++ b/src/crt/fptod.src @@ -4,11 +4,14 @@ .global __ftod .type __ftod, @function + .global __fptod + .type __fptod, @function ; input E:UHL (float) ; ouput BC:UDE:UHL (long double) ; NaN payloads are bitshifted __ftod: +__fptod: sla e ; extract signbit push af srl e diff --git a/src/crt/fptol.src b/src/crt/fptol.src new file mode 100644 index 000000000..cf2c70015 --- /dev/null +++ b/src/crt/fptol.src @@ -0,0 +1,58 @@ + .assume adl=1 + + .section .text + + .global __fptol + .type __fptol, @function + .global __fptoul + .type __fptoul, @function + +; IEEE single precision to 32-bit integers +; aubc = long(aubc) +__fptoul: ; PREREQ: bitcast(float, pair8_24_t, { in.BC, in.A }) > -1.0f && bitcast(float, pair8_24_t, { in.BC, in.A }) < (UINT32_MAX/2+1)*2.0f CHECK: bitcast(uint32_t, pair8_24_t, { out.BC, out.A }) == (uint32_t)bitcast(float, pair8_24_t, { in.BC, in.A }) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY +__fptol: ; PREREQ: bitcast(float, pair8_24_t, { in.BC, in.A })-INT32_MIN > -1.0f && bitcast(float, pair8_24_t, { in.BC, in.A }) < (INT32_MAX/2+1)*2.0f CHECK: bitcast(int32_t, pair8_24_t, { out.BC, out.A }) == (int32_t)bitcast(float, pair8_24_t, { in.BC, in.A }) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push de + push bc + ex (sp), hl + ld d, a + ld e, a + call __fppop1 + ld a, e + sub a, 07Fh + cp a, 23 + 32 + jr nc, .L.return.zero + sub a, 23 + 1 + jr nc, .L.left + cpl + ld c, a + call __ishru + xor a, a + jr .L.finish +.L.left: + inc a + ld b, a + xor a, a +.L.loop: + add hl, hl + rla + djnz .L.loop +.L.finish: + ld e, a + bit 7, d + call nz, __lneg + ld a, e +.L.return: + ex (sp), hl + pop bc + pop de + ret + +.L.return.zero: + xor a, a + sbc hl, hl + jr .L.return + + .extern __ishru + .extern __lneg + + .extern __fppop1 diff --git a/src/crt/fptoll.src b/src/crt/fptoll.src new file mode 100644 index 000000000..154c4db40 --- /dev/null +++ b/src/crt/fptoll.src @@ -0,0 +1,56 @@ + .assume adl=1 + + .section .text + + .global __ftoll + .type __ftoll, @function + .global __ftoull + .type __ftoull, @function + .global __fptoll + .type __fptoll, @function + .global __fptoull + .type __fptoull, @function + +; IEEE single precision to 64-bit integers +; bcudeuhl = longlong(euhl) +__ftoull: +__ftoll: +__fptoull: ; PREREQ: bitcast(float, pair8_24_t, { in.HL, in.E }) > -1.0f && bitcast(float, pair8_24_t, { in.HL, in.E }) < (UINT64_MAX/2+1)*2.0f CHECK: bitcast(uint64_t, tuple16_24_24_t, { out.HL, out.DE, out.BCS }) == (uint64_t)bitcast(float, pair8_24_t, { in.HL, in.E }) && out.A == in.A && out.IX == in.IX && out.IY == in.IY +__fptoll: ; PREREQ: bitcast(float, pair8_24_t, { in.HL, in.E })-INT64_MIN > -1.0f && bitcast(float, pair8_24_t, { in.HL, in.E }) < (INT64_MAX/2+1)*2.0f CHECK: bitcast(int64_t, tuple16_24_24_t, { out.HL, out.DE, out.BCS }) == (int64_t)bitcast(float, pair8_24_t, { in.HL, in.E }) && out.A == in.A && out.IX == in.IX && out.IY == in.IY + ld d, a + push de + call __fppop1 + ld a, e + ld de, 0 + sub a, 07Fh + cp a, 23 + 64 + jr nc, .L.return.zero + sub a, 23 + 1 + jr nc, .L.left + cpl + ld c, a + call __ishru + ld c, b + jr .L.finish +.L.left: + inc a + push af + inc sp + call __llshl + inc sp + inc sp +.L.finish: + pop af + ret p + jp __llneg + +.L.return.zero: + sbc hl, hl + pop af + ret + + .extern __ishru + .extern __llshl + .extern __llneg + + .extern __fppop1 diff --git a/src/crt/fptrunc.src b/src/crt/fptrunc.src new file mode 100644 index 000000000..1386e838a --- /dev/null +++ b/src/crt/fptrunc.src @@ -0,0 +1,124 @@ + .assume adl=1 + +;------------------------------------------------------------------------------- + + .section .text + + .global __fpfloor + .type __fpfloor, @function + +; IEEE single precision floor +; aubc = floor(aubc) +__fpfloor: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), floorf(bitcast(float, pair8_24_t, { in.BC, in.A }))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + cp a, 080h + jr c, __fptrunc + + .local __fpfloor.roundup + +__fpfloor.roundup: + push bc + ex (sp), hl + adc hl, hl + ld h, a + rla + jr z, .L.mantissa.zero + sub a, 07Fh + jr c, .L.return.one + sub a, 24 + jr nc, .L.return + push bc + cpl + ld c, a + ld a, h + sbc hl, hl + call __ishl + ex (sp), hl + pop bc + dec hl + call __iand + or a, a + sbc hl, bc + sbc a, -1 + push hl + pop bc + ld h, a +.L.return: + ld a, h + pop hl + ret + +.L.mantissa.zero: + dec a + cp a, 07Fh - 1 + jr nc, .L.return +.L.return.one: + ld bc, 0800000h + ld a, h + or a, 07Fh >> 1 + pop hl + ret + +;------------------------------------------------------------------------------- + + .section .text + + .global __fpceil + .type __fpceil, @function + +; IEEE single precision ceiling +; aubc = ceil(aubc) +__fpceil: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), ceilf(bitcast(float, pair8_24_t, { in.BC, in.A }))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + rlca + rrca + jr nc, __fpfloor.roundup + + ; require __fptrunc + +;------------------------------------------------------------------------------- + + ; .section .text + + .global __fptrunc + .type __fptrunc, @function + +; IEEE single precision truncation +; aubc = trunc(aubc) +__fptrunc: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), truncf(bitcast(float, pair8_24_t, { in.BC, in.A }))) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push bc + ex (sp), hl + add hl, hl + ld h, a + rla + sub a, 07Fh + jr c, __fptrunc.return.zero + sub a, 24 + jr nc, __fptrunc.return + ld l, c + cpl + ld c, a + ld a, l + push hl + sbc hl, hl + call __ishl + ld c, a + call __iand + ex (sp), hl + pop bc + .local __fptrunc.return +__fptrunc.return: + ld a, h + pop hl + ret + + .local __fptrunc.return.zero +__fptrunc.return.zero: + ld bc, 0 + ld a, h + and a, 080h + pop hl + ret + +;------------------------------------------------------------------------------- + + .extern __iand + .extern __ishl diff --git a/src/crt/fputil.src b/src/crt/fputil.src new file mode 100644 index 000000000..e882d7d55 --- /dev/null +++ b/src/crt/fputil.src @@ -0,0 +1,129 @@ + .assume adl=1 + +;------------------------------------------------------------------------------- + + .section .text + + .global __fppop1 + .type __fppop1, @function + .global __fppop2 + .type __fppop2, @function + +__fppop1: + ld bc, 0800000h +__fppop2: + sla e + jr z, .L.denormal + add hl, bc + ret nc + add hl, bc +.L.denormal: + inc e + ret + +;------------------------------------------------------------------------------- + + .section .text + + .global __fppack + .type __fppack, @function + .global __fppack.normalize + .type __fppack.normalize, @function + .global __fppack.normalized + .type __fppack.normalized, @function + +__fppack.normalize: + dec b + call nz, __fppack.normalize.entry +__fppack: + add hl, de + jr nc, __fppack.normalize +__fppack.normalized: + rrc l + rlc l + adc a, 07Fh + adc hl, de + ld a, b + adc a, e + srl c + rra + srl b + ret c + add hl, de + ret + + .local __fppack.normalize.entry +__fppack.normalize.entry: + add a, a + adc hl, hl + ret m + jr nz, .L.normalize.continue + or a, a + jr nz, .L.normalize.continue + ld b, 1 +.L.normalize.loop: + add a, a + adc hl, hl + ret m +.L.normalize.continue: + djnz .L.normalize.loop + add hl, de + ret + +;------------------------------------------------------------------------------- + + .section .text + + .global __fppack2 + .type __fppack2, @function + .global __fppack2.normalize + .type __fppack2.normalize, @function + .global __fppack2.normalized + .type __fppack2.normalized, @function + +__fppack2.normalize: + dec b + call nz, __fppack2.normalize.loop +__fppack2: + add hl, de + jr nc, __fppack2.normalize +__fppack2.normalized: + ld a, b + add ix, de + jr nc, .L.rounded + dec ix + add ix, de + jr nc, .L.round + bit 0, l + jr z, .L.rounded +.L.round: + scf + adc hl, de + adc a, e +.L.clear: + add hl, de + jr nc, .L.clear +.L.rounded: + cp a, 0FFh + jr z, .L.infinite + sla c + rra + ret nc + add hl, de + ret +.L.infinite: + sla c + rra + ex de, hl + ret + + .local __fppack2.normalize.loop +__fppack2.normalize.loop: + add ix, ix + adc hl, hl + ret m + djnz __fppack2.normalize.loop + add hl, de + ret + +;------------------------------------------------------------------------------- diff --git a/src/crt/ftoll.c b/src/crt/ftoll.c deleted file mode 100644 index b88ead5e3..000000000 --- a/src/crt/ftoll.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include - -/** - * @brief the exact same routine is used for (long long)float and - * (unsigned long long)float. If the input float is out of range, - * then the conversion is UB anyways. - */ -long long _ftoll_c(float x) -{ - const union { float f; uint32_t u; struct { uint32_t mantissa: FLT_MANT_DIG - 1, exponent: 8, sign: 1; }; } parts = { .f = x }; - const uint8_t exponent = parts.exponent, bias = (1 << 7) - 1; - const uint24_t mantissa = UINT24_C(1) << (FLT_MANT_DIG - 1) | parts.mantissa; - if (exponent < bias) return 0; - if (exponent <= bias + FLT_MANT_DIG - 1) { - const long long result = mantissa >> (bias + FLT_MANT_DIG - 1 - exponent); - return parts.sign ? -result : result; - } - const long long result = (long long)mantissa << (exponent - (bias + FLT_MANT_DIG - 1)); - return parts.sign ? -result : result; -} diff --git a/src/crt/ftoll.src b/src/crt/ftoll.src deleted file mode 100644 index c7389a1ed..000000000 --- a/src/crt/ftoll.src +++ /dev/null @@ -1,24 +0,0 @@ - .assume adl=1 - - .section .text - - .global __ftoll - .type __ftoll, @function - .global __ftoull - .type __ftoull, @function - -; __ftoll_c correctly handles all non-UB cases for both -; (long long)float and (unsigned long long)float -__ftoll: -__ftoull: - ld d, a - push iy - push de - push hl - call __ftoll_c - pop af - pop af - pop iy - ret - - .extern __ftoll_c diff --git a/src/crt/lltof.c b/src/crt/lltof.c deleted file mode 100644 index 4d5c9a84a..000000000 --- a/src/crt/lltof.c +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include -#include - -float _lltof_c(long long x) -{ - uint8_t exponent = x ? __builtin_clrsbll(x) : LLONG_WIDTH - 1; - if (exponent >= LLONG_WIDTH - LONG_WIDTH) { - return (float)((long)x); - } - exponent = LLONG_WIDTH - LONG_WIDTH - exponent; - return ldexpf((float)((long)(x >> exponent)), exponent); -} diff --git a/src/crt/lltof.src b/src/crt/lltof.src deleted file mode 100644 index 18dfd4e94..000000000 --- a/src/crt/lltof.src +++ /dev/null @@ -1,23 +0,0 @@ - .assume adl=1 - - .section .text - .global __lltof - .type __lltof, @function - -__lltof: - push af - push iy - push bc - push de - push hl - call __lltof_c - pop af - ld a, e - pop de - ld e, a - pop bc - pop iy - pop af - ret - - .extern __lltof_c diff --git a/src/crt/lltofp.src b/src/crt/lltofp.src new file mode 100644 index 000000000..b53ebfbe8 --- /dev/null +++ b/src/crt/lltofp.src @@ -0,0 +1,80 @@ + .assume adl=1 + +;------------------------------------------------------------------------------- + + .section .text + + .global __lltof + .type __lltof, @function + .global __lltofp + .type __lltofp, @function + +; IEEE single precision from 64-bit integers +; euhl = float(bcudeuhl) +__lltof: +__lltofp: ; CHECK: same(bitcast(float, pair8_24_t, { out.HL, out.E }), (float)bitcast(int64_t, tuple16_24_24_t, { in.HL, in.DE, in.BCS })) && out.A == in.A && out.BC == in.BC && out.IX == in.IX && out.IY == in.IY + push af + push bc + ld a, b + or a, a + call m, __llneg + jr __ulltofp.enter + +;------------------------------------------------------------------------------- + + .section .text + + .global __ulltof + .type __ulltof, @function + .global __ulltofp + .type __ulltofp, @function + +__ulltof: +__ulltofp: ; CHECK: same(bitcast(float, pair8_24_t, { out.HL, out.E }), (float)bitcast(uint64_t, tuple16_24_24_t, { in.HL, in.DE, in.BCS })) && out.A == in.A && out.BC == in.BC && out.IX == in.IX && out.IY == in.IY + push af + push bc + xor a, a + .local __ulltofp.enter +__ulltofp.enter: + inc b + djnz .L.upper + inc c + dec c + jr z, .L.lower +.L.upper: + push bc + push de + push hl + ld c, a + ld a, l + or a, h + inc sp + inc sp + pop hl + pop de + inc sp + or a, l + ld l, a + ld b, 07Fh + 63 +; jq .L.pack + db $DA ; jp c, * +.L.lower: + ld c, a + ld b, 07Fh + 47 +.L.pack: + ex de, hl + push de + ex (sp), ix + ld de, 0800000h + call __fppack2 + ld e, a + pop ix + pop bc + pop af + ret + +;------------------------------------------------------------------------------- + + .extern __llneg + + .extern __fppack2 diff --git a/src/crt/ltofp.src b/src/crt/ltofp.src new file mode 100644 index 000000000..4af3c1350 --- /dev/null +++ b/src/crt/ltofp.src @@ -0,0 +1,57 @@ + .assume adl=1 + +;------------------------------------------------------------------------------- + + .section .text + + .global __ultofp + .type __ultofp, @function + +; IEEE single precision from 32-bit integers +; aubc = float(aubc) +__ultofp: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), (float)bitcast(uint32_t, pair8_24_t, { in.BC, in.A })) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push af + xor a, a +; jq __ltofp.enter + db $38 ; jr c, * + + ; require __ltofp + +;------------------------------------------------------------------------------- + + .section .text + + .global __ltofp + .type __ltofp, @function + +__ltofp: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), (float)bitcast(int32_t, pair8_24_t, { in.BC, in.A })) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY + push af + .local __ltofp.enter +__ltofp.enter: + rlca + inc sp + push bc + inc sp + ex (sp), hl + ld b, c + ld c, a + ld a, b + push de + jr nc, .L.positive + add hl, de + ex de, hl + neg + sbc hl, de +.L.positive: + ld de, 0800000h + ld b, 07Fh + 31 + call __fppack + pop de + ex (sp), hl + pop bc + inc sp + ret + +;------------------------------------------------------------------------------- + + .extern __fppack diff --git a/src/crt/ulltof.c b/src/crt/ulltof.c deleted file mode 100644 index 568ad7ddd..000000000 --- a/src/crt/ulltof.c +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include -#include - -float _ulltof_c(unsigned long long x) -{ - uint8_t exponent = x ? __builtin_clzll(x) : ULLONG_WIDTH; - if (exponent >= ULLONG_WIDTH - ULONG_WIDTH) { - return (float)((unsigned long)x); - } - exponent = ULLONG_WIDTH - ULONG_WIDTH - exponent; - return ldexpf((float)((unsigned long)(x >> exponent)), exponent); -} diff --git a/src/crt/ulltof.src b/src/crt/ulltof.src deleted file mode 100644 index 6ae1d6214..000000000 --- a/src/crt/ulltof.src +++ /dev/null @@ -1,23 +0,0 @@ - .assume adl=1 - - .section .text - .global __ulltof - .type __ulltof, @function - -__ulltof: - push af - push iy - push bc - push de - push hl - call __ulltof_c - pop af - ld a, e - pop de - ld e, a - pop bc - pop iy - pop af - ret - - .extern __ulltof_c diff --git a/src/libc/sqrtf.c b/src/libc/sqrtf.c index 0aa9b45b0..72f437a30 100644 --- a/src/libc/sqrtf.c +++ b/src/libc/sqrtf.c @@ -25,7 +25,7 @@ asm push de, hl, bc ret */ -float _f32_fast_div4(float x); +float _f32_fast_div4(float x) __attribute__((__const__, __leaf__, __nothrow__)); /** * @remarks Minimum ulp of -1 diff --git a/test/floating_point/ez80sf/autotest.json b/test/floating_point/ez80sf/autotest.json new file mode 100644 index 000000000..6dfa8370d --- /dev/null +++ b/test/floating_point/ez80sf/autotest.json @@ -0,0 +1,43 @@ +{ + "transfer_files": [ + "bin/DEMO.8xp" + ], + "target": { + "name": "DEMO", + "isASM": true + }, + "sequence": [ + "action|launch", + "delay|4000", + "hashWait|1", + "key|enter", + "delay|400", + "hashWait|2" + ], + "hashes": { + "1": { + "description": "All tests passed or GDB1 error", + "timeout": 6000, + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "38E2AD5A", + "2C812DC2" + ] + }, + "2": { + "description": "Exit or GDB1 error", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "FFAF89BA", + "101734A5", + "9DA19F44", + "A32840C8", + "349F4775", + "271A9FBF", + "82FD0B1E" + ] + } + } +} diff --git a/test/floating_point/ez80sf/makefile b/test/floating_point/ez80sf/makefile new file mode 100644 index 000000000..3665be2f9 --- /dev/null +++ b/test/floating_point/ez80sf/makefile @@ -0,0 +1,20 @@ +# ---------------------------- +# Makefile Options +# ---------------------------- + +NAME = DEMO +ICON = icon.png +DESCRIPTION = "CE C Toolchain Demo" +COMPRESSED = NO + +COMMON_FLAGS = -Wall -Wextra -Wshadow -Wformat=2 -Wconversion -Wimplicit-float-conversion -Wimplicit-int-float-conversion +COMMON_FLAGS += -Oz -ffreestanding + +CFLAGS = ${COMMON_FLAGS} -std=c17 +CXXFLAGS = ${COMMON_FLAGS} -std=c++20 + +PREFER_OS_LIBC = NO + +# ---------------------------- + +include $(shell cedev-config --makefile) diff --git a/test/floating_point/ez80sf/src/link_test.s b/test/floating_point/ez80sf/src/link_test.s new file mode 100644 index 000000000..c675c0a6e --- /dev/null +++ b/test/floating_point/ez80sf/src/link_test.s @@ -0,0 +1,92 @@ + .assume adl=1 + +;------------------------------------------------------------------------------- + + .section .text + + .global _link_test + +_link_test: + ld hl, __fpabs + ld hl, __fpsub + ld hl, __fpadd + ld hl, __fpcbrt + ld hl, __fpcmpu + ld hl, __fpcmpo + ld hl, __fpcmp + ld hl, __fpcopysign + ld hl, __fpcopy + ld hl, __fpdiv + ld hl, __fpmin + ld hl, __fpmax + ld hl, __fpmul + ld hl, __fpneg + ld hl, __fprem + ld hl, __fpround + ld hl, __fpsqrt + ld hl, __fptol + ld hl, __fptoul + ld hl, __fptoll + ld hl, __fptoull + ld hl, __fpfloor + ld hl, __fpceil + ld hl, __fptrunc + ld hl, __fppop1 + ld hl, __fppop2 + ld hl, __fppack + ld hl, __fppack.normalize + ld hl, __fppack.normalized + ld hl, __fppack2 + ld hl, __fppack2.normalize + ld hl, __fppack2.normalized + ld hl, __ltofp + ld hl, __ultofp + ld hl, __lltofp + ld hl, __ulltofp + + ld hl, __fptod + ld hl, __dtofp + + or a, a + sbc hl, hl + ret + + .extern __fpabs + .extern __fpsub + .extern __fpadd + .extern __fpcbrt + .extern __fpcmpu + .extern __fpcmpo + .extern __fpcmp + .extern __fpcopysign + .extern __fpcopy + .extern __fpdiv + .extern __fpmin + .extern __fpmax + .extern __fpmul + .extern __fpneg + .extern __fprem + .extern __fpround + .extern __fpsqrt + .extern __fptol + .extern __fptoul + .extern __fptoll + .extern __fptoull + .extern __fpfloor + .extern __fpceil + .extern __fptrunc + .extern __fppop1 + .extern __fppop2 + .extern __fppack + .extern __fppack.normalize + .extern __fppack.normalized + .extern __fppack2 + .extern __fppack2.normalize + .extern __fppack2.normalized + .extern __ltofp + .extern __ultofp + .extern __lltofp + .extern __ulltofp + + .extern __fptod + .extern __dtofp diff --git a/test/floating_point/ez80sf/src/main.c b/test/floating_point/ez80sf/src/main.c new file mode 100644 index 000000000..59c49451b --- /dev/null +++ b/test/floating_point/ez80sf/src/main.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//------------------------------------------------------------------------------ +// Config +//------------------------------------------------------------------------------ + +// define to 0 or 1 +#define DEBUG_DIAGNOSTICS 0 + +//------------------------------------------------------------------------------ +// Utility +//------------------------------------------------------------------------------ + +#define C(expr) if (!(expr)) { return __LINE__; } + +#define TEST(test) { ret = test; if (ret != 0) { return ret; }} + +#ifndef DEBUG_DIAGNOSTICS +#error "DEBUG_DIAGNOSTICS needs to be defined to 0 or 1" +#endif + +#if DEBUG_DIAGNOSTICS +#define test_printf printf +#else +#define test_printf(...) +#endif + +//------------------------------------------------------------------------------ +// Tests +//------------------------------------------------------------------------------ + +int link_test(void); + +int run_tests(void) { + int ret = 0; + + TEST(link_test()); + + /* passed all */ + return ret; +} + +int main(void) { + os_ClrHome(); + int failed_test = run_tests(); + if (failed_test != 0) { + char buf[sizeof("Failed test L-8388608\n")]; + boot_sprintf(buf, "Failed test L%d\n", failed_test); + fputs(buf, stdout); + } else { + fputs("All tests passed", stdout); + } + + while (!os_GetCSC()); + + return 0; +}