From b99610fee30e41049e94687e350a720951a9f0e9 Mon Sep 17 00:00:00 2001 From: Clifford Wolf Date: Tue, 7 Jun 2016 17:09:26 +0200 Subject: [PATCH] Using newlib libc from riscv-tools in dhrystone benchmark --- README.md | 4 +- dhrystone/Makefile | 29 ++-- dhrystone/dhry_1.c | 20 ++- dhrystone/sections.lds | 18 +++ dhrystone/stdlib.c | 296 ++++++++++++++--------------------------- dhrystone/syscalls.c | 95 +++++++++++++ dhrystone/testbench.v | 19 ++- 7 files changed, 258 insertions(+), 223 deletions(-) create mode 100644 dhrystone/sections.lds create mode 100644 dhrystone/syscalls.c diff --git a/README.md b/README.md index a01d330..9260acf 100644 --- a/README.md +++ b/README.md @@ -326,9 +326,9 @@ any other ALU operation. The following dhrystone benchmark results are for a core with enabled `ENABLE_MUL`, `ENABLE_DIV`, and `BARREL_SHIFTER` options. -Dhrystone benchmark results: 0.406 DMIPS/MHz (715 Dhrystones/Second/MHz) +Dhrystone benchmark results: 0.505 DMIPS/MHz (888 Dhrystones/Second/MHz) -For the Dhrystone benchmark the average CPI is 4.072. +For the Dhrystone benchmark the average CPI is 4.208. PicoRV32 Native Memory Interface diff --git a/dhrystone/Makefile b/dhrystone/Makefile index 255749d..3609a97 100644 --- a/dhrystone/Makefile +++ b/dhrystone/Makefile @@ -1,7 +1,14 @@ +USE_MYSTDLIB = 0 +OBJS = dhry_1.o dhry_2.o stdlib.o +CFLAGS = -MD -O3 -m32 -march=RV32IM -DTIME -DRISCV +TOOLCHAIN_PREFIX = /opt/riscv32im/bin/riscv32-unknown-elf- -OBJS = start.o dhry_1.o dhry_2.o stdlib.o -CFLAGS = -MD -O3 -m32 -march=RV32IM -ffreestanding -nostdlib -DTIME -DRISCV -TOOLCHAIN_PREFIX = riscv32-unknown-elf- +ifeq ($(USE_MYSTDLIB),1) +CFLAGS += -DUSE_MYSTDLIB -ffreestanding -nostdlib +OBJS += start.o +else +OBJS += syscalls.o +endif test: testbench.vvp dhry.hex vvp -N testbench.vvp @@ -21,16 +28,18 @@ timing.vvp: testbench.v ../picorv32.v iverilog -o timing.vvp -DTIMING testbench.v ../picorv32.v chmod -x timing.vvp -dhry.hex: dhry.bin ../firmware/makehex.py - python3 ../firmware/makehex.py $< 16384 > $@ +dhry.hex: dhry.elf + riscv32-unknown-elf-objcopy -O verilog $< $@ -dhry.bin: dhry.elf - $(TOOLCHAIN_PREFIX)objcopy -O binary $< $@ +ifeq ($(USE_MYSTDLIB),1) +dhry.elf: $(OBJS) sections.lds + $(TOOLCHAIN_PREFIX)gcc $(CFLAGS) -Wl,-Bstatic,-T,sections.lds,-Map,dhry.map,--strip-debug -o $@ $(OBJS) -lgcc chmod -x $@ - -dhry.elf: $(OBJS) ../firmware/sections.lds - $(TOOLCHAIN_PREFIX)gcc $(CFLAGS) -Wl,-Bstatic,-T,../firmware/sections.lds,-Map,dhry.map,--strip-debug -o $@ $(OBJS) -lgcc +else +dhry.elf: $(OBJS) + $(TOOLCHAIN_PREFIX)gcc $(CFLAGS) -Wl,-Bstatic,-Map,dhry.map,--strip-debug -o $@ $(OBJS) -lgcc -lc chmod -x $@ +endif %.o: %.c $(TOOLCHAIN_PREFIX)gcc -c $(CFLAGS) $< diff --git a/dhrystone/dhry_1.c b/dhrystone/dhry_1.c index 884c613..fa0d4d9 100644 --- a/dhrystone/dhry_1.c +++ b/dhrystone/dhry_1.c @@ -17,6 +17,13 @@ #include "dhry.h" +#ifdef USE_MYSTDLIB +extern char *malloc (); +#else +# include +# include +#endif + /* Global Variables: */ Rec_Pointer Ptr_Glob, @@ -28,7 +35,6 @@ char Ch_1_Glob, int Arr_1_Glob [50]; int Arr_2_Glob [50] [50]; -extern char *malloc (); Enumeration Func_1 (); /* forward declaration necessary since Enumeration may not simply be int */ @@ -43,7 +49,7 @@ Enumeration Func_1 (); /* variables for time measurement: */ -#ifdef TIMES +#ifdef IGN_TIMES struct tms time_info; extern int times (); /* see library function "times" */ @@ -124,9 +130,9 @@ main () } printf ("Please give the number of runs through the benchmark: "); { - int n; - scanf ("%d", &n); - Number_Of_Runs = n; + // int n; + // scanf ("%d", &n); + Number_Of_Runs = 100; } printf ("\n"); @@ -136,7 +142,7 @@ main () /* Start timer */ /***************/ -#ifdef TIMES +#ifdef IGN_TIMES times (&time_info); Begin_Time = (long) time_info.tms_utime; #endif @@ -197,7 +203,7 @@ main () /* Stop timer */ /**************/ -#ifdef TIMES +#ifdef IGN_TIMES times (&time_info); End_Time = (long) time_info.tms_utime; #endif diff --git a/dhrystone/sections.lds b/dhrystone/sections.lds new file mode 100644 index 0000000..3efb873 --- /dev/null +++ b/dhrystone/sections.lds @@ -0,0 +1,18 @@ +/* +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. +*/ + +SECTIONS { + .memory : { + . = 0x10000; + start*(.text); + *(.text); + *(*); + end = .; + } +} diff --git a/dhrystone/stdlib.c b/dhrystone/stdlib.c index b4d2a2f..55150a7 100644 --- a/dhrystone/stdlib.c +++ b/dhrystone/stdlib.c @@ -1,18 +1,27 @@ +// This is free and unencumbered software released into the public domain. +// +// Anyone is free to copy, modify, publish, use, compile, sell, or +// distribute this software, either in source code form or as a compiled +// binary, for any purpose, commercial or non-commercial, and by any +// means. + #include +#include extern long time(); extern long insn(); + +#ifdef USE_MYSTDLIB extern char *malloc(); extern int printf(const char *format, ...); -extern int scanf(const char *format, ...); -// implementations are copy&paste from riscv newlib extern void *memcpy(void *dest, const void *src, long n); extern char *strcpy(char *dest, const char *src); extern int strcmp(const char *s1, const char *s2); char heap_memory[1024]; int heap_memory_used = 0; +#endif long time() { @@ -30,6 +39,7 @@ long insn() return insns; } +#ifdef USE_MYSTDLIB char *malloc(int size) { char *p = heap_memory + heap_memory_used; @@ -96,213 +106,105 @@ int printf(const char *format, ...) va_end(ap); } -int scanf(const char *format, ...) +void *memcpy(void *aa, const void *bb, long n) { - // printf("[scanf(\"%s\")]\n", format); - va_list ap; - va_start(ap, format); - *va_arg(ap,int*) = 100; - va_end(ap); - return 0; + // printf("**MEMCPY**\n"); + char *a = aa; + const char *b = bb; + while (n--) *(a++) = *(b++); + return aa; } -// ------------------------------------------------------- -// Copy&paste from RISC-V newlib: - -void* memcpy(void* aa, const void* bb, long n) +char *strcpy(char* dst, const char* src) { - #define BODY(a, b, t) { \ - t tt = *b; \ - a++, b++; \ - *(a-1) = tt; \ - } + char *r = dst; - char* a = (char*)aa; - const char* b = (const char*)bb; - char* end = a+n; - unsigned long msk = sizeof(long)-1; - if (__builtin_expect(((unsigned long)a & msk) != ((unsigned long)b & msk) || n < sizeof(long), 0)) - { -small: - if (__builtin_expect(a < end, 1)) - while (a < end) - BODY(a, b, char); - return aa; - } + while ((((uint32_t)dst | (uint32_t)src) & 3) != 0) + { + char c = *(src++); + *(dst++) = c; + if (!c) return r; + } - if (__builtin_expect(((unsigned long)a & msk) != 0, 0)) - while ((unsigned long)a & msk) - BODY(a, b, char); + while (1) + { + uint32_t v = *(uint32_t*)src; - long* la = (long*)a; - const long* lb = (const long*)b; - long* lend = (long*)((unsigned long)end & ~msk); + if (__builtin_expect((((v) - 0x01010101UL) & ~(v) & 0x80808080UL), 0)) + { + dst[0] = v & 0xff; + if ((v & 0xff) == 0) + return r; + v = v >> 8; - if (__builtin_expect(la < lend-8, 0)) - { - while (la < lend-8) - { - long b0 = *lb++; - long b1 = *lb++; - long b2 = *lb++; - long b3 = *lb++; - long b4 = *lb++; - long b5 = *lb++; - long b6 = *lb++; - long b7 = *lb++; - long b8 = *lb++; - *la++ = b0; - *la++ = b1; - *la++ = b2; - *la++ = b3; - *la++ = b4; - *la++ = b5; - *la++ = b6; - *la++ = b7; - *la++ = b8; - } - } + dst[1] = v & 0xff; + if ((v & 0xff) == 0) + return r; + v = v >> 8; - while (la < lend) - BODY(la, lb, long); + dst[2] = v & 0xff; + if ((v & 0xff) == 0) + return r; + v = v >> 8; - a = (char*)la; - b = (const char*)lb; - if (__builtin_expect(a < end, 0)) - goto small; - return aa; + dst[3] = v & 0xff; + return r; + } + + *(uint32_t*)dst = v; + src += 4; + dst += 4; + } } -static inline unsigned long __libc_detect_null(unsigned long w) +int strcmp(const char *s1, const char *s2) { - unsigned long mask = 0x7f7f7f7f; - if (sizeof(long) == 8) - mask = ((mask << 16) << 16) | mask; - return ~(((w & mask) + mask) | w | mask); + while ((((uint32_t)s1 | (uint32_t)s2) & 3) != 0) + { + char c1 = *(s1++); + char c2 = *(s2++); + + if (c1 != c2) + return c1 < c2 ? -1 : +1; + else if (!c1) + return 0; + } + + while (1) + { + uint32_t v1 = *(uint32_t*)s1; + uint32_t v2 = *(uint32_t*)s2; + + if (__builtin_expect(v1 != v2, 0)) + { + char c1, c2; + + c1 = v1 & 0xff, c2 = v2 & 0xff; + if (c1 != c2) return c1 < c2 ? -1 : +1; + if (!c1) return 0; + v1 = v1 >> 8, v2 = v2 >> 8; + + c1 = v1 & 0xff, c2 = v2 & 0xff; + if (c1 != c2) return c1 < c2 ? -1 : +1; + if (!c1) return 0; + v1 = v1 >> 8, v2 = v2 >> 8; + + c1 = v1 & 0xff, c2 = v2 & 0xff; + if (c1 != c2) return c1 < c2 ? -1 : +1; + if (!c1) return 0; + v1 = v1 >> 8, v2 = v2 >> 8; + + c1 = v1 & 0xff, c2 = v2 & 0xff; + if (c1 != c2) return c1 < c2 ? -1 : +1; + return 0; + } + + if (__builtin_expect((((v1) - 0x01010101UL) & ~(v1) & 0x80808080UL), 0)) + return 0; + + s1 += 4; + s2 += 4; + } } +#endif -char* strcpy(char* dst, const char* src) -{ - char* dst0 = dst; - -#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) - int misaligned = ((unsigned long)dst | (unsigned long)src) & (sizeof(long)-1); - if (__builtin_expect(!misaligned, 1)) - { - long* ldst = (long*)dst; - const long* lsrc = (const long*)src; - - while (!__libc_detect_null(*lsrc)) - *ldst++ = *lsrc++; - - dst = (char*)ldst; - src = (const char*)lsrc; - - char c0 = src[0]; - char c1 = src[1]; - char c2 = src[2]; - if (!(*dst++ = c0)) return dst0; - if (!(*dst++ = c1)) return dst0; - char c3 = src[3]; - if (!(*dst++ = c2)) return dst0; - if (sizeof(long) == 4) goto out; - char c4 = src[4]; - if (!(*dst++ = c3)) return dst0; - char c5 = src[5]; - if (!(*dst++ = c4)) return dst0; - char c6 = src[6]; - if (!(*dst++ = c5)) return dst0; - if (!(*dst++ = c6)) return dst0; - -out: - *dst++ = 0; - return dst0; - } -#endif /* not PREFER_SIZE_OVER_SPEED */ - - char ch; - do - { - ch = *src; - src++; - dst++; - *(dst-1) = ch; - } while(ch); - - return dst0; -} - -/* copy&paste from disassembled libc */ -// strcmp.S: Artisanally coded in California by A. Shell Waterman -asm ( -" .global strcmp; " -" strcmp: or a4,a0,a1; " -" li t2,-1; " -" andi a4,a4,3; " -" bnez a4,.K1; " -" lui t3,0x7f7f8; " -" addi t3,t3,-129; " -" .K6: lw a2,0(a0); " -" lw a3,0(a1); " -" and t0,a2,t3; " -" or t1,a2,t3; " -" add t0,t0,t3; " -" or t0,t0,t1; " -" bne t0,t2,.K2; " -" bne a2,a3,.K3; " -" lw a2,4(a0); " -" lw a3,4(a1); " -" and t0,a2,t3; " -" or t1,a2,t3; " -" add t0,t0,t3; " -" or t0,t0,t1; " -" bne t0,t2,.K4; " -" bne a2,a3,.K3; " -" lw a2,8(a0); " -" lw a3,8(a1); " -" and t0,a2,t3; " -" or t1,a2,t3; " -" add t0,t0,t3; " -" or t0,t0,t1; " -" bne t0,t2,.K5; " -" addi a0,a0,12; " -" addi a1,a1,12; " -" beq a2,a3,.K6; " -" .K3: slli a4,a2,0x10; " -" slli a5,a3,0x10; " -" bne a4,a5,.K7; " -" srli a4,a2,0x10; " -" srli a5,a3,0x10; " -" sub a0,a4,a5; " -" andi a1,a0,255; " -" bnez a1,.K8; " -" ret; " -" .K7: srli a4,a4,0x10; " -" srli a5,a5,0x10; " -" sub a0,a4,a5; " -" andi a1,a0,255; " -" bnez a1,.K8; " -" ret; " -" .K8: andi a4,a4,255; " -" andi a5,a5,255; " -" sub a0,a4,a5; " -" ret; " -" .K1: lbu a2,0(a0); " -" lbu a3,0(a1); " -" addi a0,a0,1; " -" addi a1,a1,1; " -" bne a2,a3,.K9; " -" bnez a2,.K1; " -" .K9: sub a0,a2,a3; " -" ret; " -" .K4: addi a0,a0,4; " -" addi a1,a1,4; " -" .K2: bne a2,a3,.K1; " -" li a0,0; " -" ret; " -" .K5: addi a0,a0,8; " -" addi a1,a1,8; " -" bne a2,a3,.K1; " -" li a0,0; " -" ret; " -); diff --git a/dhrystone/syscalls.c b/dhrystone/syscalls.c new file mode 100644 index 0000000..cdf872f --- /dev/null +++ b/dhrystone/syscalls.c @@ -0,0 +1,95 @@ +// An extremely minimalist syscalls.c for newlib +// Based on riscv newlib libgloss/riscv/machine/syscall.h +// Written by Clifford Wolf. + +#include +#include +#include + +#define UNIMPL_FUNC(_f) ".globl " #_f "\n.type " #_f ", @function\n" #_f ":\n" + +asm ( + ".text\n" + ".align 2\n" + UNIMPL_FUNC(open) + UNIMPL_FUNC(openat) + UNIMPL_FUNC(lseek) + UNIMPL_FUNC(stat) + UNIMPL_FUNC(lstat) + UNIMPL_FUNC(fstatat) + UNIMPL_FUNC(isatty) + UNIMPL_FUNC(access) + UNIMPL_FUNC(faccessat) + UNIMPL_FUNC(link) + UNIMPL_FUNC(unlink) + UNIMPL_FUNC(execve) + UNIMPL_FUNC(getpid) + UNIMPL_FUNC(fork) + UNIMPL_FUNC(kill) + UNIMPL_FUNC(wait) + UNIMPL_FUNC(times) + UNIMPL_FUNC(gettimeofday) + UNIMPL_FUNC(ftime) + UNIMPL_FUNC(utime) + UNIMPL_FUNC(chown) + UNIMPL_FUNC(chmod) + UNIMPL_FUNC(chdir) + UNIMPL_FUNC(getcwd) + UNIMPL_FUNC(sysconf) + "j unimplemented_syscall\n" +); + +void unimplemented_syscall() +{ + const char *p = "Unimplemented system call called!\n"; + while (*p) + *(volatile int*)0x10000000 = *(p++); + asm volatile ("ebreak"); + __builtin_unreachable(); +} + +ssize_t read(int file, void *ptr, size_t len) +{ + // always EOF + return 0; +} + +ssize_t write(int file, const void *ptr, size_t len) +{ + const void *eptr = ptr + len; + while (ptr != eptr) + *(volatile int*)0x10000000 = *(char*)(ptr++); + return len; +} + +int close(int file) +{ + // close is called before _exit() + return 0; +} + +int fstat(int file, struct stat *st) +{ + // fstat is called during libc startup + errno = ENOENT; + return -1; +} + +void *sbrk(ptrdiff_t incr) +{ + extern unsigned char _end[]; // Defined by linker + static unsigned long heap_end; + + if (heap_end == 0) + heap_end = (long)_end; + + heap_end += incr; + return (void *)(heap_end - incr); +} + +void _exit(int exit_status) +{ + asm volatile ("ebreak"); + __builtin_unreachable(); +} + diff --git a/dhrystone/testbench.v b/dhrystone/testbench.v index a00e8f4..2d985d6 100644 --- a/dhrystone/testbench.v +++ b/dhrystone/testbench.v @@ -29,7 +29,9 @@ module testbench; picorv32 #( .BARREL_SHIFTER(1), .ENABLE_MUL(1), - .ENABLE_DIV(1) + .ENABLE_DIV(1), + .PROGADDR_RESET('h10000), + .STACKADDR('h10000) ) uut ( .clk (clk ), .resetn (resetn ), @@ -48,13 +50,16 @@ module testbench; .mem_la_wstrb(mem_la_wstrb) ); - reg [31:0] memory [0:64*1024/4-1]; + reg [7:0] memory [0:256*1024-1]; initial $readmemh("dhry.hex", memory); assign mem_ready = 1; always @(posedge clk) begin - mem_rdata <= mem_la_read ? memory[mem_la_addr >> 2] : 'bx; + mem_rdata[ 7: 0] <= mem_la_read ? memory[mem_la_addr + 0] : 'bx; + mem_rdata[15: 8] <= mem_la_read ? memory[mem_la_addr + 1] : 'bx; + mem_rdata[23:16] <= mem_la_read ? memory[mem_la_addr + 2] : 'bx; + mem_rdata[31:24] <= mem_la_read ? memory[mem_la_addr + 3] : 'bx; if (mem_la_write) begin case (mem_la_addr) 32'h1000_0000: begin @@ -64,10 +69,10 @@ module testbench; `endif end default: begin - if (mem_la_wstrb[0]) memory[mem_la_addr >> 2][ 7: 0] <= mem_la_wdata[ 7: 0]; - if (mem_la_wstrb[1]) memory[mem_la_addr >> 2][15: 8] <= mem_la_wdata[15: 8]; - if (mem_la_wstrb[2]) memory[mem_la_addr >> 2][23:16] <= mem_la_wdata[23:16]; - if (mem_la_wstrb[3]) memory[mem_la_addr >> 2][31:24] <= mem_la_wdata[31:24]; + if (mem_la_wstrb[0]) memory[mem_la_addr + 0] <= mem_la_wdata[ 7: 0]; + if (mem_la_wstrb[1]) memory[mem_la_addr + 1] <= mem_la_wdata[15: 8]; + if (mem_la_wstrb[2]) memory[mem_la_addr + 2] <= mem_la_wdata[23:16]; + if (mem_la_wstrb[3]) memory[mem_la_addr + 3] <= mem_la_wdata[31:24]; end endcase end