Initial commit.

crypto-aes
Damien 9 years ago
commit 429d71943d

1
py/.gitignore vendored

@ -0,0 +1 @@
*.o

@ -0,0 +1,50 @@
CC = gcc
CFLAGS = -Wall -ansi -std=gnu99 -Os #-DNDEBUG
LDFLAGS =
SRC = \
malloc.c \
misc.c \
qstr.c \
lexer.c \
lexerfile.c \
parse.c \
scope.c \
compile.c \
emitcommon.c \
emitcpy.c \
emitbc.c \
asmx64.c \
emitx64v2.c \
emitthumb.c \
asmthumb.c \
runtime.c \
bc.c \
main.c \
SRC_ASM = \
runtime1.s \
OBJ = $(SRC:.c=.o) $(SRC_ASM:.s=.o)
LIB =
PROG = py
$(PROG): $(OBJ)
$(CC) -o $@ $(OBJ) $(LIB) $(LDFLAGS)
runtime.o: runtime.c
$(CC) $(CFLAGS) -O3 -c -o $@ $<
bc.o: bc.c
$(CC) $(CFLAGS) -O3 -c -o $@ $<
parse.o: grammar.h
compile.o: grammar.h
emitcpy.o: emit.h
emitbc.o: emit.h
emitx64.o: emit.h
emitx64v2.o: emit.h
emitthumb.o: emit.h
clean:
/bin/rm $(OBJ)

@ -0,0 +1,421 @@
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include "misc.h"
#include "machine.h"
#include "asmthumb.h"
#define UNSIGNED_FIT8(x) (((x) & 0xffffff00) == 0)
#define UNSIGNED_FIT16(x) (((x) & 0xffff0000) == 0)
#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
#define SIGNED_FIT9(x) (((x) & 0xffffff00) == 0) || (((x) & 0xffffff00) == 0xffffff00)
#define SIGNED_FIT12(x) (((x) & 0xfffff800) == 0) || (((x) & 0xfffff800) == 0xfffff800)
struct _asm_thumb_t {
int pass;
uint code_offset;
uint code_size;
byte *code_base;
byte dummy_data[8];
int next_label;
int max_num_labels;
int *label_offsets;
int num_locals;
uint push_reglist;
uint stack_adjust;
};
asm_thumb_t *asm_thumb_new() {
asm_thumb_t *as;
as = m_new(asm_thumb_t, 1);
as->pass = 0;
as->code_offset = 0;
as->code_size = 0;
as->code_base = NULL;
as->label_offsets = NULL;
as->num_locals = 0;
return as;
}
void asm_thumb_free(asm_thumb_t *as, bool free_code) {
if (free_code) {
m_free(as->code_base);
}
/*
if (as->label != NULL) {
int i;
for (i = 0; i < as->label->len; ++i)
{
Label *lab = &g_array_index(as->label, Label, i);
if (lab->unresolved != NULL)
g_array_free(lab->unresolved, true);
}
g_array_free(as->label, true);
}
*/
m_free(as);
}
void asm_thumb_start_pass(asm_thumb_t *as, int pass) {
as->pass = pass;
as->code_offset = 0;
as->next_label = 1;
if (pass == ASM_THUMB_PASS_1) {
as->max_num_labels = 0;
} else {
if (pass == ASM_THUMB_PASS_2) {
memset(as->label_offsets, -1, as->max_num_labels * sizeof(int));
}
}
}
void asm_thumb_end_pass(asm_thumb_t *as) {
if (as->pass == ASM_THUMB_PASS_1) {
// calculate number of labels need
if (as->next_label > as->max_num_labels) {
as->max_num_labels = as->next_label;
}
as->label_offsets = m_new(int, as->max_num_labels);
} else if (as->pass == ASM_THUMB_PASS_2) {
// calculate size of code in bytes
as->code_size = as->code_offset;
as->code_base = m_new(byte, as->code_size);
printf("code_size: %u\n", as->code_size);
}
/*
// check labels are resolved
if (as->label != NULL)
{
int i;
for (i = 0; i < as->label->len; ++i)
if (g_array_index(as->label, Label, i).unresolved != NULL)
return false;
}
*/
}
// all functions must go through this one to emit bytes
static byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int num_bytes_to_write) {
//printf("emit %d\n", num_bytes_to_write);
if (as->pass < ASM_THUMB_PASS_3) {
as->code_offset += num_bytes_to_write;
return as->dummy_data;
} else {
assert(as->code_offset + num_bytes_to_write <= as->code_size);
byte *c = as->code_base + as->code_offset;
as->code_offset += num_bytes_to_write;
return c;
}
}
uint asm_thumb_get_code_size(asm_thumb_t *as) {
return as->code_size;
}
void *asm_thumb_get_code(asm_thumb_t *as) {
// need to set low bit to indicate that it's thumb code
return (void *)(((machine_uint_t)as->code_base) | 1);
}
/*
static void asm_thumb_write_byte_1(asm_thumb_t *as, byte b1) {
byte *c = asm_thumb_get_cur_to_write_bytes(as, 1);
c[0] = b1;
}
*/
static void asm_thumb_write_op16(asm_thumb_t *as, uint op) {
byte *c = asm_thumb_get_cur_to_write_bytes(as, 2);
// little endian
c[0] = op;
c[1] = op >> 8;
}
static void asm_thumb_write_op32(asm_thumb_t *as, uint op1, uint op2) {
byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
// little endian, op1 then op2
c[0] = op1;
c[1] = op1 >> 8;
c[2] = op2;
c[3] = op2 >> 8;
}
/*
#define IMM32_L0(x) ((x) & 0xff)
#define IMM32_L1(x) (((x) >> 8) & 0xff)
#define IMM32_L2(x) (((x) >> 16) & 0xff)
#define IMM32_L3(x) (((x) >> 24) & 0xff)
static void asm_thumb_write_word32(asm_thumb_t *as, int w32) {
byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
c[0] = IMM32_L0(w32);
c[1] = IMM32_L1(w32);
c[2] = IMM32_L2(w32);
c[3] = IMM32_L3(w32);
}
*/
// rlolist is a bit map indicating desired lo-registers
#define OP_PUSH_RLIST(rlolist) (0xb400 | (rlolist))
#define OP_PUSH_RLIST_LR(rlolist) (0xb400 | 0x0100 | (rlolist))
#define OP_POP_RLIST(rlolist) (0xbc00 | (rlolist))
#define OP_POP_RLIST_PC(rlolist) (0xbc00 | 0x0100 | (rlolist))
#define OP_ADD_SP(num_words) (0xb000 | (num_words))
#define OP_SUB_SP(num_words) (0xb080 | (num_words))
void asm_thumb_entry(asm_thumb_t *as, int num_locals) {
// work out what to push and how many extra space to reserve on stack
// so that we have enough for all locals and it's aligned an 8-byte boundary
uint reglist;
uint stack_adjust;
if (num_locals < 0) {
num_locals = 0;
}
// don't ppop r0 because it's used for return value
switch (num_locals) {
case 0:
reglist = 0xf2;
stack_adjust = 0;
break;
case 1:
reglist = 0xf2;
stack_adjust = 0;
break;
case 2:
reglist = 0xfe;
stack_adjust = 0;
break;
case 3:
reglist = 0xfe;
stack_adjust = 0;
break;
default:
reglist = 0xfe;
stack_adjust = ((num_locals - 3) + 1) & (~1);
break;
}
asm_thumb_write_op16(as, OP_PUSH_RLIST_LR(reglist));
if (stack_adjust > 0) {
asm_thumb_write_op16(as, OP_SUB_SP(stack_adjust));
}
as->push_reglist = reglist;
as->stack_adjust = stack_adjust;
as->num_locals = num_locals;
}
void asm_thumb_exit(asm_thumb_t *as) {
if (as->stack_adjust > 0) {
asm_thumb_write_op16(as, OP_ADD_SP(as->stack_adjust));
}
asm_thumb_write_op16(as, OP_POP_RLIST_PC(as->push_reglist));
}
int asm_thumb_label_new(asm_thumb_t *as) {
return as->next_label++;
}
void asm_thumb_label_assign(asm_thumb_t *as, int label) {
if (as->pass > ASM_THUMB_PASS_1) {
assert(label < as->max_num_labels);
if (as->pass == ASM_THUMB_PASS_2) {
// assign label offset
assert(as->label_offsets[label] == -1);
as->label_offsets[label] = as->code_offset;
} else if (as->pass == ASM_THUMB_PASS_3) {
// ensure label offset has not changed from PASS_2 to PASS_3
//printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset);
assert(as->label_offsets[label] == as->code_offset);
}
}
}
// the i8 value will be zero extended into the r32 register!
void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8) {
assert(rlo_dest < REG_R8);
// movs rlo_dest, #i8
asm_thumb_write_op16(as, 0x2000 | (rlo_dest << 8) | i8);
}
// if loading lo half, the i16 value will be zero extended into the r32 register!
void asm_thumb_mov_i16_to_reg(asm_thumb_t *as, int i16, uint reg_dest, bool load_hi_half) {
assert(reg_dest < REG_R15);
uint op;
if (load_hi_half) {
// movt reg_dest, #i16
op = 0xf2c0;
} else {
// movw reg_dest, #i16
op = 0xf240;
}
asm_thumb_write_op32(as, op | ((i16 >> 1) & 0x0400) | ((i16 >> 12) & 0xf), ((i16 << 4) & 0x7000) | (reg_dest << 8) | (i16 & 0xff));
}
void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32) {
// movw, movt does it in 8 bytes
// ldr [pc, #], dw does it in 6 bytes, but we might not reach to end of code for dw
asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false);
asm_thumb_mov_i16_to_reg(as, i32 >> 16, reg_dest, true);
}
void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32) {
if (reg_dest < 8 && UNSIGNED_FIT8(i32)) {
asm_thumb_mov_reg_i8(as, reg_dest, i32);
} else if (UNSIGNED_FIT16(i32)) {
asm_thumb_mov_i16_to_reg(as, i32, reg_dest, false);
} else {
asm_thumb_mov_reg_i32(as, reg_dest, i32);
}
}
void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src) {
uint op_lo;
if (reg_src < 8) {
op_lo = reg_src << 3;
} else {
op_lo = 0x40 | ((reg_src - 8) << 3);
}
if (reg_dest < 8) {
op_lo |= reg_dest;
} else {
op_lo |= 0x80 | (reg_dest - 8);
}
asm_thumb_write_op16(as, 0x4600 | op_lo);
}
#define OP_STR_TO_SP_OFFSET(rlo_dest, word_offset) (0x9000 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
#define OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset) (0x9800 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num, uint rlo_src) {
assert(rlo_src < REG_R8);
int word_offset = as->num_locals - local_num - 1;
assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
asm_thumb_write_op16(as, OP_STR_TO_SP_OFFSET(rlo_src, word_offset));
}
void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num) {
assert(rlo_dest < REG_R8);
int word_offset = as->num_locals - local_num - 1;
assert(as->pass < ASM_THUMB_PASS_3 || word_offset >= 0);
asm_thumb_write_op16(as, OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset));
}
void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num) {
assert(0);
// see format 12, load address
asm_thumb_write_op16(as, 0x0000);
}
#define OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b) (0x1800 | ((rlo_src_b) << 6) | ((rlo_src_a) << 3) | (rlo_dest))
void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b) {
asm_thumb_write_op16(as, OP_ADD_REG_REG_REG(rlo_dest, rlo_src_a, rlo_src_b));
}
#define OP_CMP_REG_REG(rlo_a, rlo_b) (0x4280 | ((rlo_b) << 3) | (rlo_a))
void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b) {
asm_thumb_write_op16(as, OP_CMP_REG_REG(rlo_a, rlo_b));
}
void asm_thumb_ite_ge(asm_thumb_t *as) {
asm_thumb_write_op16(as, 0xbfac);
}
#define OP_B(byte_offset) (0xe000 | (((byte_offset) >> 1) & 0x07ff))
// this could be wrong, because it should have a range of +/- 16MiB...
#define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff))
#define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff))
void asm_thumb_b_label(asm_thumb_t *as, int label) {
if (as->pass > ASM_THUMB_PASS_1) {
int dest = as->label_offsets[label];
int rel = dest - as->code_offset;
rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
if (dest >= 0 && rel <= -4) {
// is a backwards jump, so we know the size of the jump on the first pass
// calculate rel assuming 12 bit relative jump
if (SIGNED_FIT12(rel)) {
asm_thumb_write_op16(as, OP_B(rel));
} else {
goto large_jump;
}
} else {
// is a forwards jump, so need to assume it's large
large_jump:
asm_thumb_write_op32(as, OP_BW_HI(rel), OP_BW_LO(rel));
}
}
}
#define OP_CMP_REG_IMM(rlo, i8) (0x2800 | ((rlo) << 8) | (i8))
// all these bit arithmetics need coverage testing!
#define OP_BEQ(byte_offset) (0xd000 | (((byte_offset) >> 1) & 0x00ff))
#define OP_BEQW_HI(byte_offset) (0xf000 | (((byte_offset) >> 10) & 0x0400) | (((byte_offset) >> 14) & 0x003f))
#define OP_BEQW_LO(byte_offset) (0x8000 | ((byte_offset) & 0x2000) | (((byte_offset) >> 1) & 0x0fff))
void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label) {
assert(rlo < REG_R8);
// compare reg with 0
asm_thumb_write_op16(as, OP_CMP_REG_IMM(rlo, 0));
// branch if equal
if (as->pass > ASM_THUMB_PASS_1) {
int dest = as->label_offsets[label];
int rel = dest - as->code_offset;
rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
if (dest >= 0 && rel <= -4) {
// is a backwards jump, so we know the size of the jump on the first pass
// calculate rel assuming 12 bit relative jump
if (SIGNED_FIT9(rel)) {
asm_thumb_write_op16(as, OP_BEQ(rel));
} else {
goto large_jump;
}
} else {
// is a forwards jump, so need to assume it's large
large_jump:
asm_thumb_write_op32(as, OP_BEQW_HI(rel), OP_BEQW_LO(rel));
}
}
}
#define OP_BLX(reg) (0x4780 | ((reg) << 3))
#define OP_SVC(arg) (0xdf00 | (arg))
#define OP_LDR_FROM_BASE_OFFSET(rlo_dest, rlo_base, word_offset) (0x6800 | (((word_offset) << 6) & 0x07c0) | ((rlo_base) << 3) | (rlo_dest))
void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp) {
/* TODO make this use less bytes
uint rlo_base = REG_R3;
uint rlo_dest = REG_R7;
uint word_offset = 4;
asm_thumb_write_op16(as, 0x0000);
asm_thumb_write_op16(as, 0x6800 | (word_offset << 6) | (rlo_base << 3) | rlo_dest); // ldr rlo_dest, [rlo_base, #offset]
asm_thumb_write_op16(as, 0x4780 | (REG_R9 << 3)); // blx reg
*/
if (0) {
// load ptr to function into register using immediate, then branch
// not relocatable
asm_thumb_mov_reg_i32(as, reg_temp, (machine_uint_t)fun_ptr);
asm_thumb_write_op16(as, OP_BLX(reg_temp));
} else if (1) {
asm_thumb_write_op16(as, OP_LDR_FROM_BASE_OFFSET(reg_temp, REG_R7, fun_id));
asm_thumb_write_op16(as, OP_BLX(reg_temp));
} else {
// use SVC
asm_thumb_write_op16(as, OP_SVC(fun_id));
}
}

@ -0,0 +1,60 @@
#define ASM_THUMB_PASS_1 (1)
#define ASM_THUMB_PASS_2 (2)
#define ASM_THUMB_PASS_3 (3)
#define REG_R0 (0)
#define REG_R1 (1)
#define REG_R2 (2)
#define REG_R3 (3)
#define REG_R4 (4)
#define REG_R5 (5)
#define REG_R6 (6)
#define REG_R7 (7)
#define REG_R8 (8)
#define REG_R9 (9)
#define REG_R10 (10)
#define REG_R11 (11)
#define REG_R12 (12)
#define REG_R13 (13)
#define REG_R14 (14)
#define REG_R15 (15)
#define REG_LR (REG_R14)
#define REG_RET REG_R0
#define REG_ARG_1 REG_R0
#define REG_ARG_2 REG_R1
#define REG_ARG_3 REG_R2
#define REG_ARG_4 REG_R3
typedef struct _asm_thumb_t asm_thumb_t;
asm_thumb_t *asm_thumb_new();
void asm_thumb_free(asm_thumb_t *as, bool free_code);
void asm_thumb_start_pass(asm_thumb_t *as, int pass);
void asm_thumb_end_pass(asm_thumb_t *as);
uint asm_thumb_get_code_size(asm_thumb_t *as);
void *asm_thumb_get_code(asm_thumb_t *as);
void asm_thumb_entry(asm_thumb_t *as, int num_locals);
void asm_thumb_exit(asm_thumb_t *as);
int asm_thumb_label_new(asm_thumb_t *as);
void asm_thumb_label_assign(asm_thumb_t *as, int label);
// argument order follows ARM, in general dest is first
void asm_thumb_mov_reg_i8(asm_thumb_t *as, uint rlo_dest, int i8_src);
void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32_src);
void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32_src);
void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src);
void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num_dest, uint rlo_src);
void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num);
void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint reg_dest, int local_num);
void asm_thumb_add_reg_reg_reg(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b);
void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b);
void asm_thumb_ite_ge(asm_thumb_t *as);
void asm_thumb_b_label(asm_thumb_t *as, int label);
void asm_thumb_cmp_reg_bz_label(asm_thumb_t *as, uint rlo, int label);
void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp);

@ -0,0 +1,621 @@
#include <stdio.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <string.h>
#include "misc.h"
#include "asmx64.h"
/* all offsets are measured in multiples of 8 bytes */
#define WORD_SIZE (8)
#define OPCODE_NOP (0x90)
#define OPCODE_PUSH_R64 (0x50)
#define OPCODE_PUSH_I64 (0x68)
#define OPCODE_PUSH_M64 (0xff) /* /6 */
#define OPCODE_POP_R64 (0x58)
#define OPCODE_RET (0xc3)
#define OPCODE_MOV_I8_TO_R8 (0xb0) /* +rb */
#define OPCODE_MOV_I64_TO_R64 (0xb8)
#define OPCODE_MOV_I32_TO_RM32 (0xc7)
#define OPCODE_MOV_R64_TO_RM64 (0x89)
#define OPCODE_MOV_RM64_TO_R64 (0x8b)
#define OPCODE_LEA_MEM_TO_R64 (0x8d) /* /r */
#define OPCODE_XOR_R64_TO_RM64 (0x31) /* /r */
#define OPCODE_ADD_R64_TO_RM64 (0x01)
#define OPCODE_ADD_I32_TO_RM32 (0x81) /* /0 */
#define OPCODE_ADD_I8_TO_RM32 (0x83) /* /0 */
#define OPCODE_SUB_R64_FROM_RM64 (0x29)
#define OPCODE_SUB_I32_FROM_RM64 (0x81) /* /5 */
#define OPCODE_SUB_I8_FROM_RM64 (0x83) /* /5 */
#define OPCODE_SHL_RM32_BY_I8 (0xc1) /* /4 */
#define OPCODE_SHR_RM32_BY_I8 (0xc1) /* /5 */
#define OPCODE_SAR_RM32_BY_I8 (0xc1) /* /7 */
#define OPCODE_CMP_I32_WITH_RM32 (0x81) /* /7 */
#define OPCODE_CMP_I8_WITH_RM32 (0x83) /* /7 */
#define OPCODE_CMP_R64_WITH_RM64 (0x39)
#define OPCODE_CMP_RM32_WITH_R32 (0x3b)
#define OPCODE_TEST_R8_WITH_RM8 (0x84) /* /r */
#define OPCODE_JMP_REL8 (0xeb)
#define OPCODE_JMP_REL32 (0xe9)
#define OPCODE_JCC_REL8 (0x70) /* | jcc type */
#define OPCODE_JCC_REL32_A (0x0f)
#define OPCODE_JCC_REL32_B (0x80) /* | jcc type */
#define OPCODE_SETCC_RM8_A (0x0f)
#define OPCODE_SETCC_RM8_B (0x90) /* | jcc type, /0 */
#define OPCODE_CALL_REL32 (0xe8)
#define OPCODE_CALL_RM32 (0xff) /* /2 */
#define OPCODE_LEAVE (0xc9)
#define MODRM_R64(x) ((x) << 3)
#define MODRM_RM_DISP0 (0x00)
#define MODRM_RM_DISP8 (0x40)
#define MODRM_RM_DISP32 (0x80)
#define MODRM_RM_REG (0xc0)
#define MODRM_RM_R64(x) (x)
#define REX_PREFIX (0x40)
#define REX_W (0x08) // width
#define REX_R (0x04) // register
#define REX_X (0x02) // index
#define REX_B (0x01) // base
#define IMM32_L0(x) ((x) & 0xff)
#define IMM32_L1(x) (((x) >> 8) & 0xff)
#define IMM32_L2(x) (((x) >> 16) & 0xff)
#define IMM32_L3(x) (((x) >> 24) & 0xff)
#define IMM64_L4(x) (((x) >> 32) & 0xff)
#define IMM64_L5(x) (((x) >> 40) & 0xff)
#define IMM64_L6(x) (((x) >> 48) & 0xff)
#define IMM64_L7(x) (((x) >> 56) & 0xff)
#define UNSIGNED_FIT8(x) (((x) & 0xffffffffffffff00) == 0)
#define UNSIGNED_FIT32(x) (((x) & 0xffffffff00000000) == 0)
#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
struct _asm_x64_t {
int pass;
uint code_offset;
uint code_size;
byte *code_base;
byte dummy_data[8];
int next_label;
int max_num_labels;
int *label_offsets;
};
// for allocating memory, see src/v8/src/platform-linux.cc
void *alloc_mem(uint req_size, uint *alloc_size, bool is_exec) {
req_size = (req_size + 0xfff) & (~0xfff);
int prot = PROT_READ | PROT_WRITE | (is_exec ? PROT_EXEC : 0);
void *ptr = mmap(NULL, req_size, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED) {
assert(0);
}
*alloc_size = req_size;
return ptr;
}
asm_x64_t* asm_x64_new() {
asm_x64_t* as;
as = m_new(asm_x64_t, 1);
as->pass = 0;
as->code_offset = 0;
as->code_size = 0;
as->code_base = NULL;
as->label_offsets = NULL;
return as;
}
void asm_x64_free(asm_x64_t* as, bool free_code) {
if (free_code) {
m_free(as->code_base);
}
/*
if (as->label != NULL) {
int i;
for (i = 0; i < as->label->len; ++i)
{
Label* lab = &g_array_index(as->label, Label, i);
if (lab->unresolved != NULL)
g_array_free(lab->unresolved, true);
}
g_array_free(as->label, true);
}
*/
m_free(as);
}
void asm_x64_start_pass(asm_x64_t *as, int pass) {
as->pass = pass;
as->code_offset = 0;
as->next_label = 1;
if (pass == ASM_X64_PASS_1) {
as->max_num_labels = 0;
} else {
if (pass == ASM_X64_PASS_2) {
memset(as->label_offsets, -1, as->max_num_labels * sizeof(int));
}
}
}
void asm_x64_end_pass(asm_x64_t *as) {
if (as->pass == ASM_X64_PASS_1) {
// calculate number of labels need
if (as->next_label > as->max_num_labels) {
as->max_num_labels = as->next_label;
}
as->label_offsets = m_new(int, as->max_num_labels);
} else if (as->pass == ASM_X64_PASS_2) {
// calculate size of code in bytes
as->code_size = as->code_offset;
as->code_base = m_new(byte, as->code_size);
printf("code_size: %u\n", as->code_size);
}
/*
// check labels are resolved
if (as->label != NULL)
{
int i;
for (i = 0; i < as->label->len; ++i)
if (g_array_index(as->label, Label, i).unresolved != NULL)
return false;
}
*/
}
// all functions must go through this one to emit bytes
static byte* asm_x64_get_cur_to_write_bytes(asm_x64_t* as, int num_bytes_to_write) {
//printf("emit %d\n", num_bytes_to_write);
if (as->pass < ASM_X64_PASS_3) {
as->code_offset += num_bytes_to_write;
return as->dummy_data;
} else {
assert(as->code_offset + num_bytes_to_write <= as->code_size);
byte *c = as->code_base + as->code_offset;
as->code_offset += num_bytes_to_write;
return c;
}
}
uint asm_x64_get_code_size(asm_x64_t* as) {
return as->code_size;
}
void* asm_x64_get_code(asm_x64_t* as) {
return as->code_base;
}
static void asm_x64_write_byte_1(asm_x64_t* as, byte b1) {
byte* c = asm_x64_get_cur_to_write_bytes(as, 1);
c[0] = b1;
}
static void asm_x64_write_byte_2(asm_x64_t* as, byte b1, byte b2) {
byte* c = asm_x64_get_cur_to_write_bytes(as, 2);
c[0] = b1;
c[1] = b2;
}
static void asm_x64_write_byte_3(asm_x64_t* as, byte b1, byte b2, byte b3) {
byte* c = asm_x64_get_cur_to_write_bytes(as, 3);
c[0] = b1;
c[1] = b2;
c[2] = b3;
}
static void asm_x64_write_word32(asm_x64_t* as, int w32) {
byte* c = asm_x64_get_cur_to_write_bytes(as, 4);
c[0] = IMM32_L0(w32);
c[1] = IMM32_L1(w32);
c[2] = IMM32_L2(w32);
c[3] = IMM32_L3(w32);
}
static void asm_x64_write_word64(asm_x64_t* as, int64_t w64) {
byte* c = asm_x64_get_cur_to_write_bytes(as, 8);
c[0] = IMM32_L0(w64);
c[1] = IMM32_L1(w64);
c[2] = IMM32_L2(w64);
c[3] = IMM32_L3(w64);
c[4] = IMM64_L4(w64);
c[5] = IMM64_L5(w64);
c[6] = IMM64_L6(w64);
c[7] = IMM64_L7(w64);
}
/* unused
static void asm_x64_write_word32_to(asm_x64_t* as, int offset, int w32) {
byte* c;
assert(offset + 4 <= as->code_size);
c = as->code_base + offset;
c[0] = IMM32_L0(w32);
c[1] = IMM32_L1(w32);
c[2] = IMM32_L2(w32);
c[3] = IMM32_L3(w32);
}
*/
static void asm_x64_write_r64_disp(asm_x64_t* as, int r64, int disp_r64, int disp_offset) {
assert(disp_r64 != REG_RSP);
if (disp_offset == 0 && disp_r64 != REG_RBP) {
asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP0 | MODRM_RM_R64(disp_r64));
} else if (SIGNED_FIT8(disp_offset)) {
asm_x64_write_byte_2(as, MODRM_R64(r64) | MODRM_RM_DISP8 | MODRM_RM_R64(disp_r64), IMM32_L0(disp_offset));
} else {
asm_x64_write_byte_1(as, MODRM_R64(r64) | MODRM_RM_DISP32 | MODRM_RM_R64(disp_r64));
asm_x64_write_word32(as, disp_offset);
}
}
void asm_x64_nop(asm_x64_t* as)
{
asm_x64_write_byte_1(as, OPCODE_NOP);
}
void asm_x64_push_r64(asm_x64_t* as, int src_r64)
{
asm_x64_write_byte_1(as, OPCODE_PUSH_R64 | src_r64);
}
void asm_x64_push_i32(asm_x64_t* as, int src_i32)
{
asm_x64_write_byte_1(as, OPCODE_PUSH_I64);
asm_x64_write_word32(as, src_i32); // will be sign extended to 64 bits
}
void asm_x64_push_disp(asm_x64_t* as, int src_r64, int src_offset) {
asm_x64_write_byte_1(as, OPCODE_PUSH_M64);
asm_x64_write_r64_disp(as, 6, src_r64, src_offset);
}
void asm_x64_pop_r64(asm_x64_t* as, int dest_r64)
{
asm_x64_write_byte_1(as, OPCODE_POP_R64 | dest_r64);
}
static void asm_x64_ret(asm_x64_t* as)
{
asm_x64_write_byte_1(as, OPCODE_RET);
}
void asm_x64_mov_r32_to_r32(asm_x64_t* as, int src_r32, int dest_r32) {
// defaults to 32 bit operation
asm_x64_write_byte_2(as, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
}
void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
}
void asm_x64_mov_r64_to_disp(asm_x64_t* as, int src_r64, int dest_r64, int dest_disp) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_R64_TO_RM64);
asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp);
}
void asm_x64_mov_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_RM64_TO_R64);
asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
}
void asm_x64_lea_disp_to_r64(asm_x64_t* as, int src_r64, int src_disp, int dest_r64) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_LEA_MEM_TO_R64);
asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
}
void asm_x64_mov_i8_to_r8(asm_x64_t *as, int src_i8, int dest_r64) {
asm_x64_write_byte_2(as, OPCODE_MOV_I8_TO_R8 | dest_r64, src_i8);
}
void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64) {
// cpu defaults to i32 to r64, with zero extension
asm_x64_write_byte_1(as, OPCODE_MOV_I64_TO_R64 | dest_r64);
asm_x64_write_word32(as, src_i32);
}
void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64) {
// cpu defaults to i32 to r64
// to mov i64 to r64 need to use REX prefix
asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_MOV_I64_TO_R64 | dest_r64);
asm_x64_write_word64(as, src_i64);
}
void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64) {
if (UNSIGNED_FIT32(src_i64)) {
// 5 bytes
asm_x64_mov_i32_to_r64(as, src_i64 & 0xffffffff, dest_r64);
} else {
// 10 bytes
asm_x64_mov_i64_to_r64(as, src_i64, dest_r64);
}
}
void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp)
{
assert(0);
asm_x64_write_byte_1(as, OPCODE_MOV_I32_TO_RM32);
//asm_x64_write_r32_disp(as, 0, dest_r32, dest_disp);
asm_x64_write_word32(as, src_i32);
}
void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64) {
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_XOR_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
}
void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64) {
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_ADD_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
}
void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32)
{
assert(dest_r32 != REG_RSP); // in this case i think src_i32 must be 64 bits
if (SIGNED_FIT8(src_i32))
{
asm_x64_write_byte_2(as, OPCODE_ADD_I8_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
asm_x64_write_byte_1(as, src_i32 & 0xff);
}
else
{
asm_x64_write_byte_2(as, OPCODE_ADD_I32_TO_RM32, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
asm_x64_write_word32(as, src_i32);
}
}
void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32) {
// defaults to 32 bit operation
asm_x64_write_byte_2(as, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
}
void asm_x64_sub_r64_from_r64(asm_x64_t* as, int src_r64, int dest_r64) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
}
void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32) {
if (SIGNED_FIT8(src_i32)) {
// defaults to 32 bit operation
asm_x64_write_byte_2(as, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
asm_x64_write_byte_1(as, src_i32 & 0xff);
} else {
// defaults to 32 bit operation
asm_x64_write_byte_2(as, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
asm_x64_write_word32(as, src_i32);
}
}
void asm_x64_sub_i32_from_r64(asm_x64_t* as, int src_i32, int dest_r64) {
if (SIGNED_FIT8(src_i32)) {
// use REX prefix for 64 bit operation
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
asm_x64_write_byte_1(as, src_i32 & 0xff);
} else {
// use REX prefix for 64 bit operation
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
asm_x64_write_word32(as, src_i32);
}
}
/* shifts not tested */
void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm) {
asm_x64_write_byte_2(as, OPCODE_SHL_RM32_BY_I8, MODRM_R64(4) | MODRM_RM_REG | MODRM_RM_R64(r32));
asm_x64_write_byte_1(as, imm);
}
void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm) {
asm_x64_write_byte_2(as, OPCODE_SHR_RM32_BY_I8, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(r32));
asm_x64_write_byte_1(as, imm);
}
void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm) {
asm_x64_write_byte_2(as, OPCODE_SAR_RM32_BY_I8, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(r32));
asm_x64_write_byte_1(as, imm);
}
void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b) {
asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_CMP_R64_WITH_RM64, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b));
}
void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b) {
assert(0);
asm_x64_write_byte_1(as, OPCODE_CMP_R64_WITH_RM64);
//asm_x64_write_r32_disp(as, src_r32_a, src_r32_b, src_disp_b);
}
void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b) {
assert(0);
asm_x64_write_byte_1(as, OPCODE_CMP_RM32_WITH_R32);
//asm_x64_write_r32_disp(as, src_r32_b, src_r32_a, src_disp_a);
}
void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32) {
if (SIGNED_FIT8(src_i32)) {
asm_x64_write_byte_2(as, OPCODE_CMP_I8_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32));
asm_x64_write_byte_1(as, src_i32 & 0xff);
} else {
asm_x64_write_byte_2(as, OPCODE_CMP_I32_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32));
asm_x64_write_word32(as, src_i32);
}
}
void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b) {
asm_x64_write_byte_2(as, OPCODE_TEST_R8_WITH_RM8, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b));
}
void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8) {
asm_x64_write_byte_3(as, OPCODE_SETCC_RM8_A, OPCODE_SETCC_RM8_B | jcc_type, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r8));
}
int asm_x64_label_new(asm_x64_t* as) {
return as->next_label++;
}
void asm_x64_label_assign(asm_x64_t* as, int label) {
if (as->pass > ASM_X64_PASS_1) {
assert(label < as->max_num_labels);
if (as->pass == ASM_X64_PASS_2) {
// assign label offset
assert(as->label_offsets[label] == -1);
as->label_offsets[label] = as->code_offset;
} else if (as->pass == ASM_X64_PASS_3) {
// ensure label offset has not changed from PASS_2 to PASS_3
//printf("l%d: (at %d=%ld)\n", label, as->label_offsets[label], as->code_offset);
assert(as->label_offsets[label] == as->code_offset);
}
}
}
void asm_x64_jmp_label(asm_x64_t* as, int label) {
if (as->pass > ASM_X64_PASS_1) {
int dest = as->label_offsets[label];
int rel = dest - as->code_offset;
if (dest >= 0 && rel < 0) {
// is a backwards jump, so we know the size of the jump on the first pass
// calculate rel assuming 8 bit relative jump
rel -= 2;
if (SIGNED_FIT8(rel)) {
asm_x64_write_byte_2(as, OPCODE_JMP_REL8, rel & 0xff);
} else {
rel += 2;
goto large_jump;
}
} else {
// is a forwards jump, so need to assume it's large
large_jump:
rel -= 5;
asm_x64_write_byte_1(as, OPCODE_JMP_REL32);
asm_x64_write_word32(as, rel);
}
}
}
void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label) {
if (as->pass > ASM_X64_PASS_1) {
int dest = as->label_offsets[label];
int rel = dest - as->code_offset;
if (dest >= 0 && rel < 0) {
// is a backwards jump, so we know the size of the jump on the first pass
// calculate rel assuming 8 bit relative jump
rel -= 2;
if (SIGNED_FIT8(rel)) {
asm_x64_write_byte_2(as, OPCODE_JCC_REL8 | jcc_type, rel & 0xff);
} else {
rel += 2;
goto large_jump;
}
} else {
// is a forwards jump, so need to assume it's large
large_jump:
rel -= 6;
asm_x64_write_byte_2(as, OPCODE_JCC_REL32_A, OPCODE_JCC_REL32_B | jcc_type);
asm_x64_write_word32(as, rel);
}
}
}
void asm_x64_entry(asm_x64_t* as, int num_locals) {
asm_x64_push_r64(as, REG_RBP);
asm_x64_mov_r64_to_r64(as, REG_RSP, REG_RBP);
if (num_locals < 0) {
num_locals = 0;
}
num_locals |= 1; // make it odd so stack is aligned on 16 byte boundary
asm_x64_sub_i32_from_r64(as, num_locals * WORD_SIZE, REG_RSP);
asm_x64_push_r64(as, REG_RBX);
}
void asm_x64_exit(asm_x64_t* as) {
asm_x64_pop_r64(as, REG_RBX);
asm_x64_write_byte_1(as, OPCODE_LEAVE);
asm_x64_ret(as);
}
void asm_x64_push_arg(asm_x64_t* as, int src_arg_num) {
assert(0);
asm_x64_push_disp(as, REG_RBP, 8 + src_arg_num * WORD_SIZE);
}
void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32) {
assert(0);
//asm_x64_mov_disp_to_r32(as, REG_RBP, 8 + src_arg_num * WORD_SIZE, dest_r32);
}
void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num) {
assert(0);
//asm_x64_mov_r32_to_disp(as, src_r32, REG_RBP, 8 + dest_arg_num * WORD_SIZE);
}
static int asm_x64_local_offset_from_ebp(int local_num)
{
return -(local_num + 1) * WORD_SIZE;
}
void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64) {
asm_x64_mov_disp_to_r64(as, REG_RBP, asm_x64_local_offset_from_ebp(src_local_num), dest_r64);
}
void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num) {
asm_x64_mov_r64_to_disp(as, src_r64, REG_RBP, asm_x64_local_offset_from_ebp(dest_local_num));
}
void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64) {
int offset = asm_x64_local_offset_from_ebp(local_num);
if (offset == 0) {
asm_x64_mov_r64_to_r64(as, REG_RBP, dest_r64);
} else {
asm_x64_lea_disp_to_r64(as, REG_RBP, offset, dest_r64);
}
}
void asm_x64_push_local(asm_x64_t* as, int local_num) {
asm_x64_push_disp(as, REG_RBP, asm_x64_local_offset_from_ebp(local_num));
}
void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r64)
{
asm_x64_mov_r64_to_r64(as, REG_RBP, temp_r64);
asm_x64_add_i32_to_r32(as, asm_x64_local_offset_from_ebp(local_num), temp_r64);
asm_x64_push_r64(as, temp_r64);
}
/*
can't use these because code might be relocated when resized
void asm_x64_call(asm_x64_t* as, void* func)
{
asm_x64_sub_i32_from_r32(as, 8, REG_RSP);
asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
asm_x64_write_word32(as, func - (void*)(as->code_cur + 4));
asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP);
}
void asm_x64_call_i1(asm_x64_t* as, void* func, int i1)
{
asm_x64_sub_i32_from_r32(as, 8, REG_RSP);
asm_x64_sub_i32_from_r32(as, 12, REG_RSP);
asm_x64_push_i32(as, i1);
asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
asm_x64_write_word32(as, func - (void*)(as->code_cur + 4));
asm_x64_add_i32_to_r32(as, 16, REG_RSP);
asm_x64_mov_r64_to_r64(as, REG_RBP, REG_RSP);
}
*/
void asm_x64_call_ind(asm_x64_t* as, void *ptr, int temp_r64) {
/*
asm_x64_mov_i64_to_r64_optimised(as, (int64_t)ptr, temp_r64);
asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64));
*/
// this reduces code size by 2 bytes per call, but doesn't seem to speed it up at all
asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
asm_x64_write_word32(as, ptr - (void*)(as->code_base + as->code_offset + 4));
}

@ -0,0 +1,76 @@
#define ASM_X64_PASS_1 (1)
#define ASM_X64_PASS_2 (2)
#define ASM_X64_PASS_3 (3)
#define REG_RAX (0)
#define REG_RCX (1)
#define REG_RDX (2)
#define REG_RBX (3)
#define REG_RSP (4)
#define REG_RBP (5)
#define REG_RSI (6)
#define REG_RDI (7)
// condition codes, used for jcc and setcc (desipite their j-name!)
#define JCC_JB (0x2) // below, unsigned
#define JCC_JZ (0x4)
#define JCC_JE (0x4)
#define JCC_JNZ (0x5)
#define JCC_JNE (0x5)
#define JCC_JL (0xc) // less, signed
#define REG_RET REG_RAX
#define REG_ARG_1 REG_RDI
#define REG_ARG_2 REG_RSI
#define REG_ARG_3 REG_RDX
typedef struct _asm_x64_t asm_x64_t;
asm_x64_t* asm_x64_new();
void asm_x64_free(asm_x64_t* as, bool free_code);
void asm_x64_start_pass(asm_x64_t *as, int pass);
void asm_x64_end_pass(asm_x64_t *as);
uint asm_x64_get_code_size(asm_x64_t* as);
void* asm_x64_get_code(asm_x64_t* as);
void asm_x64_nop(asm_x64_t* as);
void asm_x64_push_r64(asm_x64_t* as, int src_r64);
void asm_x64_push_i32(asm_x64_t* as, int src_i32); // will be sign extended to 64 bits
void asm_x64_push_disp(asm_x64_t* as, int src_r32, int src_offset);
void asm_x64_pop_r64(asm_x64_t* as, int dest_r64);
void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64);
void asm_x64_mov_r32_to_disp(asm_x64_t* as, int src_r32, int dest_r32, int dest_disp);
void asm_x64_mov_disp_to_r32(asm_x64_t* as, int src_r32, int src_disp, int dest_r32);
void asm_x64_mov_i32_to_r64(asm_x64_t* as, int src_i32, int dest_r64);
void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64);
void asm_x64_mov_i32_to_disp(asm_x64_t* as, int src_i32, int dest_r32, int dest_disp);
void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64);
void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64);
void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64);
void asm_x64_add_i32_to_r32(asm_x64_t* as, int src_i32, int dest_r32);
void asm_x64_sub_r32_from_r32(asm_x64_t* as, int src_r32, int dest_r32);
void asm_x64_sub_i32_from_r32(asm_x64_t* as, int src_i32, int dest_r32);
void asm_x64_shl_r32_by_imm(asm_x64_t* as, int r32, int imm);
void asm_x64_shr_r32_by_imm(asm_x64_t* as, int r32, int imm);
void asm_x64_sar_r32_by_imm(asm_x64_t* as, int r32, int imm);
void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b);
void asm_x64_cmp_r32_with_disp(asm_x64_t* as, int src_r32_a, int src_r32_b, int src_disp_b);
void asm_x64_cmp_disp_with_r32(asm_x64_t* as, int src_r32_a, int src_disp_a, int src_r32_b);
void asm_x64_cmp_i32_with_r32(asm_x64_t* as, int src_i32, int src_r32);
void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b);
void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8);
int asm_x64_label_new(asm_x64_t* as);
void asm_x64_label_assign(asm_x64_t* as, int label);
void asm_x64_jmp_label(asm_x64_t* as, int label);
void asm_x64_jcc_label(asm_x64_t* as, int jcc_type, int label);
void asm_x64_entry(asm_x64_t* as, int num_locals);
void asm_x64_exit(asm_x64_t* as);
void asm_x64_push_arg(asm_x64_t* as, int src_arg_num);
void asm_x64_mov_arg_to_r32(asm_x64_t* as, int src_arg_num, int dest_r32);
void asm_x64_mov_r32_to_arg(asm_x64_t* as, int src_r32, int dest_arg_num);
void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64);
void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num);
void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64);
void asm_x64_push_local(asm_x64_t* as, int local_num);
void asm_x64_push_local_addr(asm_x64_t* as, int local_num, int temp_r32);
void asm_x64_call_ind(asm_x64_t* as, void* ptr, int temp_r32);

@ -0,0 +1,272 @@
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "misc.h"
#include "machine.h"
#include "runtime.h"
#include "bc.h"
#define DECODE_UINT do { unum = *ip++; if (unum > 127) { unum = ((unum & 0x3f) << 8) | (*ip++); } } while (0)
#define DECODE_QSTR do { qstr = *ip++; if (qstr > 127) { qstr = ((qstr & 0x3f) << 8) | (*ip++); } } while (0)
#define PUSH(val) *--sp = (val)
#define POP() (*sp++)
py_obj_t py_execute_byte_code(byte *code, uint len, py_obj_t *args, uint n_args) {
byte *ip = code;
py_obj_t stack[10];
py_obj_t *sp = &stack[10]; // stack grows down, sp points to top of stack
machine_uint_t unum;
machine_int_t snum;
qstr qstr;
py_obj_t obj1, obj2;
py_obj_t fast0 = NULL, fast1 = NULL, fast2 = NULL, fastn[4] = {NULL, NULL, NULL, NULL};
// init args
for (int i = 0; i < n_args; i++) {
if (i == 0) {
fast0 = args[0];
} else if (i == 1) {
fast1 = args[1];
} else if (i == 2) {
fast2 = args[2];
} else {
assert(i - 3 < 4);
fastn[i - 3] = args[i];
}
}
// execute byte code
for (;;) {
int op = *ip++;
switch (op) {
case PYBC_LOAD_CONST_FALSE:
PUSH(py_const_false);
break;
case PYBC_LOAD_CONST_NONE:
PUSH(py_const_none);
break;
case PYBC_LOAD_CONST_TRUE:
PUSH(py_const_true);
break;
case PYBC_LOAD_CONST_SMALL_INT:
snum = ip[0] | (ip[1] << 8);
if (snum & 0x8000) {
snum |= ~0xffff;
}
ip += 2;
PUSH((py_obj_t)(snum << 1 | 1));
break;
case PYBC_LOAD_CONST_ID:
DECODE_QSTR;
PUSH(rt_load_const_str(qstr)); // TODO
break;
case PYBC_LOAD_CONST_STRING:
DECODE_QSTR;
PUSH(rt_load_const_str(qstr));
break;
case PYBC_LOAD_FAST_0:
PUSH(fast0);
break;
case PYBC_LOAD_FAST_1:
PUSH(fast1);
break;
case PYBC_LOAD_FAST_2:
PUSH(fast2);
break;
case PYBC_LOAD_FAST_N:
DECODE_UINT;
PUSH(fastn[unum - 3]);
break;
case PYBC_LOAD_NAME:
DECODE_QSTR;
PUSH(rt_load_name(qstr));
break;
case PYBC_LOAD_GLOBAL:
DECODE_QSTR;
PUSH(rt_load_global(qstr));
break;
case PYBC_LOAD_ATTR:
DECODE_QSTR;
*sp = rt_load_attr(*sp, qstr);
break;
case PYBC_LOAD_METHOD:
DECODE_QSTR;
sp -= 1;
rt_load_method(sp[1], qstr, sp);
break;
case PYBC_LOAD_BUILD_CLASS:
PUSH(rt_load_build_class());
break;
case PYBC_STORE_FAST_0:
fast0 = POP();
break;
case PYBC_STORE_FAST_1:
fast1 = POP();
break;
case PYBC_STORE_FAST_2:
fast2 = POP();
break;
case PYBC_STORE_FAST_N:
DECODE_UINT;
fastn[unum - 3] = POP();
break;
case PYBC_STORE_NAME:
DECODE_QSTR;
rt_store_name(qstr, POP());
break;
case PYBC_STORE_SUBSCR:
rt_store_subscr(sp[1], sp[0], sp[2]);
sp += 3;
break;
case PYBC_DUP_TOP:
obj1 = *sp;
PUSH(obj1);
break;
case PYBC_DUP_TOP_TWO:
sp -= 2;
sp[0] = sp[2];
sp[1] = sp[3];
break;
case PYBC_POP_TOP:
++sp;
break;
case PYBC_ROT_THREE:
obj1 = sp[0];
sp[0] = sp[1];
sp[1] = sp[2];
sp[2] = obj1;
break;
case PYBC_JUMP:
DECODE_UINT;
ip = code + unum;
break;
case PYBC_POP_JUMP_IF_FALSE:
DECODE_UINT;
if (!rt_is_true(POP())) {
ip = code + unum;
}
break;
case PYBC_SETUP_LOOP:
DECODE_UINT;
break;
case PYBC_POP_BLOCK:
break;
case PYBC_BINARY_OP:
unum = *ip++;
obj2 = POP();
obj1 = *sp;
*sp = rt_binary_op(unum, obj1, obj2);
break;
case PYBC_COMPARE_OP:
unum = *ip++;
obj2 = POP();
obj1 = *sp;
*sp = rt_compare_op(unum, obj1, obj2);
break;
case PYBC_BUILD_LIST:
DECODE_UINT;
obj1 = rt_build_list(unum, sp);
sp += unum - 1;
*sp = obj1;
break;
case PYBC_BUILD_MAP:
DECODE_UINT;
PUSH(rt_build_map(unum));
break;
case PYBC_STORE_MAP:
sp += 2;
rt_store_map(sp[0], sp[-2], sp[-1]);
break;
case PYBC_BUILD_SET:
DECODE_UINT;
obj1 = rt_build_set(unum, sp);
sp += unum - 1;
*sp = obj1;
break;
case PYBC_MAKE_FUNCTION:
DECODE_UINT;
PUSH(rt_make_function_from_id(unum));
break;
case PYBC_CALL_FUNCTION:
DECODE_UINT;
assert((unum & 0xff00) == 0); // n_keyword
// switch on n_positional
if ((unum & 0xff) == 0) {
*sp = rt_call_function_0(*sp);
} else if ((unum & 0xff) == 1) {
obj1 = *sp++; // the single argument
*sp = rt_call_function_1(*sp, obj1);
} else if ((unum & 0xff) == 2) {
obj2 = *sp++; // the second argument
obj1 = *sp++; // the first argument
*sp = rt_call_function_2(*sp, obj1, obj2);
} else {
assert(0);
}
break;
case PYBC_CALL_METHOD:
DECODE_UINT;
assert((unum & 0xff00) == 0); // n_keyword
// switch on n_positional
if ((unum & 0xff) == 0) {
obj1 = *sp++; // the self object (or NULL)
*sp = rt_call_method_1(*sp, obj1);
} else if ((unum & 0xff) == 1) {
<