From 7210ee219d2ea609221650d2be50a8ec1031964a Mon Sep 17 00:00:00 2001 From: Sean Cross Date: Thu, 21 Mar 2019 11:01:46 +0800 Subject: [PATCH] sw: add div/mul instructions For foboot, we're using a CPU without a divide or multiply instruction, to save gates. Replace these with software implementations. Signed-off-by: Sean Cross --- sw/third_party/div.S | 121 +++++++++++++++++++++++++++++++++++++++++++ sw/third_party/mul.S | 26 ++++++++++ 2 files changed, 147 insertions(+) create mode 100644 sw/third_party/div.S create mode 100644 sw/third_party/mul.S diff --git a/sw/third_party/div.S b/sw/third_party/div.S new file mode 100644 index 0000000..ca0d9f4 --- /dev/null +++ b/sw/third_party/div.S @@ -0,0 +1,121 @@ + .text + .align 2 + +#ifndef __riscv64 +/* Our RV64 64-bit routines are equivalent to our RV32 32-bit routines. */ +# define __udivdi3 __udivsi3 +# define __umoddi3 __umodsi3 +# define __divdi3 __divsi3 +# define __moddi3 __modsi3 +#else + .globl __udivsi3 +__udivsi3: + /* Compute __udivdi3(a0 << 32, a1 << 32); cast result to uint32_t. */ + sll a0, a0, 32 + sll a1, a1, 32 + move t0, ra + jal __udivdi3 + sext.w a0, a0 + jr t0 + + .globl __umodsi3 +__umodsi3: + /* Compute __udivdi3((uint32_t)a0, (uint32_t)a1); cast a1 to uint32_t. */ + sll a0, a0, 32 + sll a1, a1, 32 + srl a0, a0, 32 + srl a1, a1, 32 + move t0, ra + jal __udivdi3 + sext.w a0, a1 + jr t0 + + .globl __modsi3 + __modsi3 = __moddi3 + + .globl __divsi3 +__divsi3: + /* Check for special case of INT_MIN/-1. Otherwise, fall into __divdi3. */ + li t0, -1 + beq a1, t0, .L20 +#endif + + .globl __divdi3 +__divdi3: + bltz a0, .L10 + bltz a1, .L11 + /* Since the quotient is positive, fall into __udivdi3. */ + + .globl __udivdi3 +__udivdi3: + mv a2, a1 + mv a1, a0 + li a0, -1 + beqz a2, .L5 + li a3, 1 + bgeu a2, a1, .L2 +.L1: + blez a2, .L2 + slli a2, a2, 1 + slli a3, a3, 1 + bgtu a1, a2, .L1 +.L2: + li a0, 0 +.L3: + bltu a1, a2, .L4 + sub a1, a1, a2 + or a0, a0, a3 +.L4: + srli a3, a3, 1 + srli a2, a2, 1 + bnez a3, .L3 +.L5: + ret + + .globl __umoddi3 +__umoddi3: + /* Call __udivdi3(a0, a1), then return the remainder, which is in a1. */ + move t0, ra + jal __udivdi3 + move a0, a1 + jr t0 + + /* Handle negative arguments to __divdi3. */ +.L10: + neg a0, a0 + bgez a1, .L12 /* Compute __udivdi3(-a0, a1), then negate the result. */ + neg a1, a1 + j __divdi3 /* Compute __udivdi3(-a0, -a1). */ +.L11: /* Compute __udivdi3(a0, -a1), then negate the result. */ + neg a1, a1 +.L12: + move t0, ra + jal __divdi3 + neg a0, a0 + jr t0 + + .globl __moddi3 +__moddi3: + move t0, ra + bltz a1, .L31 + bltz a0, .L32 +.L30: + jal __udivdi3 /* The dividend is not negative. */ + move a0, a1 + jr t0 +.L31: + neg a1, a1 + bgez a0, .L30 +.L32: + neg a0, a0 + jal __udivdi3 /* The dividend is hella negative. */ + neg a0, a1 + jr t0 + +#ifdef __riscv64 + /* continuation of __divsi3 */ +.L20: + sll t0, t0, 31 + bne a0, t0, __divdi3 + ret +#endif \ No newline at end of file diff --git a/sw/third_party/mul.S b/sw/third_party/mul.S new file mode 100644 index 0000000..d44f726 --- /dev/null +++ b/sw/third_party/mul.S @@ -0,0 +1,26 @@ + .text + .align 2 + +#ifdef __riscv64 +#define _RISCV_SZPTR 64 +#define _RISCV_SZINT 64 +#else +/* Our RV64 64-bit routine is equivalent to our RV32 32-bit routine. */ +# define __muldi3 __mulsi3 +#define _RISCV_SZPTR 32 +#define _RISCV_SZINT 32 +#endif + + .globl __muldi3 +__muldi3: + mv a2, a0 + li a0, 0 +.L1: + slli a3, a1, _RISCV_SZPTR-1 + bgez a3, .L2 + add a0, a0, a2 +.L2: + srli a1, a1, 1 + slli a2, a2, 1 + bnez a1, .L1 + ret