diff --git a/litex/soc/integration/export.py b/litex/soc/integration/export.py index ed3abcb60..23f4809d6 100644 --- a/litex/soc/integration/export.py +++ b/litex/soc/integration/export.py @@ -136,44 +136,34 @@ def get_soc_header(constants, with_access_functions=True): r += "\n#endif\n" return r -def _get_rw_functions_c(reg_name, reg_base, nwords, busword, alignment, read_only, with_access_functions): +def _get_rw_functions_c(reg_name, reg_base, nwords, busword, read_only, with_access_functions): r = "" - r += "#define CSR_"+reg_name.upper()+"_ADDR "+hex(reg_base)+"L\n" - r += "#define CSR_"+reg_name.upper()+"_SIZE "+str(nwords)+"\n" + addr_str = "CSR_{}_ADDR".format(reg_name.upper()) + size_str = "CSR_{}_SIZE".format(reg_name.upper()) + r += "#define {} {}L\n".format(addr_str, hex(reg_base)) + r += "#define {} {}\n".format(size_str, nwords) - size = nwords*busword - if size > 64: + size = nwords*busword//8 + if size > 8: + # FIXME: maybe implement some "memcpy-like" semantics for larger blobs? return r - elif size > 32: - ctype = "unsigned long long int" - elif size > 16: - ctype = "unsigned int" - elif size > 8: - ctype = "unsigned short int" + elif size > 4: + ctype = "uint64_t" + elif size > 2: + ctype = "uint32_t" + elif size > 1: + ctype = "uint16_t" else: - ctype = "unsigned char" + ctype = "uint8_t" if with_access_functions: - r += "static inline "+ctype+" "+reg_name+"_read(void) {\n" - if size > 1: - r += "\t"+ctype+" r = csr_readl("+hex(reg_base)+"L);\n" - for byte in range(1, nwords): - r += "\tr <<= "+str(busword)+";\n\tr |= csr_readl("+hex(reg_base+alignment//8*byte)+"L);\n" - r += "\treturn r;\n}\n" - else: - r += "\treturn csr_readl("+hex(reg_base)+"L);\n}\n" + r += "static inline {} {}_read(void) {{\n".format(ctype, reg_name) + r += "\treturn _csr_rd((unsigned long *){}, {});\n}}\n".format(addr_str, size) if not read_only: - r += "static inline void "+reg_name+"_write("+ctype+" value) {\n" - for word in range(nwords): - shift = (nwords-word-1)*busword - if shift: - value_shifted = "value >> "+str(shift) - else: - value_shifted = "value" - r += "\tcsr_writel("+value_shifted+", "+hex(reg_base+alignment//8*word)+"L);\n" - r += "}\n" + r += "static inline void {}_write({} v) {{\n".format(reg_name, ctype) + r += "\t_csr_wr((unsigned long *){}, v, {});\n}}\n".format(addr_str, size) return r @@ -186,12 +176,14 @@ def get_csr_header(regions, constants, with_access_functions=True): if with_access_functions: r += "#include \n" r += "#ifdef CSR_ACCESSORS_DEFINED\n" - r += "extern void csr_writeb(uint8_t value, unsigned long addr);\n" - r += "extern uint8_t csr_readb(unsigned long addr);\n" - r += "extern void csr_writew(uint16_t value, unsigned long addr);\n" - r += "extern uint16_t csr_readw(unsigned long addr);\n" - r += "extern void csr_writel(uint32_t value, unsigned long addr);\n" - r += "extern uint32_t csr_readl(unsigned long addr);\n" + r += "extern void csr_wr_uint8(uint8_t v, unsigned long a);\n" + r += "extern void csr_wr_uint16(uint16_t v, unsigned long a);\n" + r += "extern void csr_wr_uint32(uint32_t v, unsigned long a);\n" + r += "extern void csr_wr_uint64(uint64_t v, unsigned long a);\n" + r += "extern uint8_t csr_rd_uint8(unsigned long a);\n" + r += "extern uint16_t csr_rd_uint16(unsigned long a);\n" + r += "extern uint32_t csr_rd_uint32(unsigned long a);\n" + r += "extern uint64_t csr_rd_uint64(unsigned long a);\n" r += "#else /* ! CSR_ACCESSORS_DEFINED */\n" r += "#include \n" r += "#endif /* ! CSR_ACCESSORS_DEFINED */\n" @@ -202,7 +194,7 @@ def get_csr_header(regions, constants, with_access_functions=True): if not isinstance(region.obj, Memory): for csr in region.obj: nr = (csr.size + region.busword - 1)//region.busword - r += _get_rw_functions_c(name + "_" + csr.name, origin, nr, region.busword, alignment, + r += _get_rw_functions_c(name + "_" + csr.name, origin, nr, region.busword, isinstance(csr, CSRStatus), with_access_functions) origin += alignment//8*nr if hasattr(csr, "fields"): diff --git a/litex/soc/software/bios/sdram.c b/litex/soc/software/bios/sdram.c index 6907041a4..f7d8458c8 100644 --- a/litex/soc/software/bios/sdram.c +++ b/litex/soc/software/bios/sdram.c @@ -20,8 +20,6 @@ #include #include -#include // for hton/ntoh (byteswap) functions - #include "sdram.h" // FIXME(hack): If we don't have main ram, just target the sram instead. @@ -63,22 +61,6 @@ __attribute__((unused)) static void cdelay(int i) #define DFII_PIX_DATA_BYTES DFII_PIX_DATA_SIZE*CSR_DATA_BYTES -#if CSR_DATA_BYTES == 1 - typedef uint8_t csr_dw_t; - #define csr_dw_hton(x) (x) - #define csr_dw_ntoh(x) (x) -#elif CSR_DATA_BYTES == 2 - typedef uint16_t csr_dw_t; - #define csr_dw_hton(x) htons(x) - #define csr_dw_ntoh(x) ntohs(x) -#elif CSR_DATA_BYTES == 4 - typedef uint32_t csr_dw_t; - #define csr_dw_hton(x) htonl(x) - #define csr_dw_ntoh(x) ntohl(x) -#else -#error Unsupported CSR data width -#endif - void sdrsw(void) { sdram_dfii_control_write(DFII_CONTROL_CKE|DFII_CONTROL_ODT|DFII_CONTROL_RESET_N); @@ -120,8 +102,7 @@ void sdrrdbuf(int dq) { int i, p; int first_byte, step; - csr_dw_t buf[DFII_PIX_DATA_SIZE]; - unsigned char *buf_bytes = (unsigned char *)&(buf[0]); + unsigned char buf[DFII_PIX_DATA_BYTES]; if(dq < 0) { first_byte = 0; @@ -132,10 +113,10 @@ void sdrrdbuf(int dq) } for(p=0;p\n"); @@ -193,32 +174,35 @@ void sdrrderr(char *count) } for(p=0;p\n"); @@ -246,9 +229,9 @@ void sdrwr(char *startaddr) for(p=0;p /* To overwrite CSR accessors, define extern, non-inlined versions - * of csr_read[bwl]() and csr_write[bwl](), and define - * CSR_ACCESSORS_DEFINED. + * of csr_rd_uint[8|16|32|64]() and csr_wr_uint[8|16|32|64](), and + * define CSR_ACCESSORS_DEFINED. */ #ifndef CSR_ACCESSORS_DEFINED @@ -14,37 +14,232 @@ #ifdef __ASSEMBLER__ #define MMPTR(x) x #else /* ! __ASSEMBLER__ */ -#define MMPTR(x) (*((volatile unsigned long *)(x))) -static inline void csr_writeb(uint8_t value, unsigned long addr) +/* CSRs are stored in subregister slices of CONFIG_CSR_DATA_WIDTH (native + * endianness), with the least significant slice at the lowest aligned + * (base) address. */ + +#include +#if !defined(CONFIG_CSR_ALIGNMENT) || !defined(CONFIG_CSR_DATA_WIDTH) +#error csr alignment and data-width MUST be set before including this file! +#endif + +#if CONFIG_CSR_DATA_WIDTH > CONFIG_CSR_ALIGNMENT +#error invalid CONFIG_CSR_DATA_WIDTH (must not exceed CONFIG_CSR_ALIGNMENT)! +#endif + +/* FIXME: preprocessor can't evaluate 'sizeof()' operator, is there a better + * way to implement the following assertion? + * #if sizeof(unsigned long) != CONFIG_CSR_ALIGNMENT/8 + * #error invalid CONFIG_CSR_ALIGNMENT (must match native CPU word size)! + * #endif + */ + +/* CSR data width (subregister width) in bytes, for direct comparson to sizeof() */ +#define CSR_DW_BYTES (CONFIG_CSR_DATA_WIDTH/8) + +/* CSR subregisters are embedded inside native CPU word aligned locations: */ +#define MMPTR(a) (*((volatile unsigned long *)(a))) + +/* Number of subregs required for various total byte sizes, by subreg width: + * NOTE: 1, 2, 4, and 8 bytes represent uint[8|16|32|64]_t C types; However, + * CSRs of intermediate byte sizes (24, 40, 48, and 56) are NOT padded + * (with extra unallocated subregisters) to the next valid C type! + * +-----+-----------------+ + * | csr | bytes | + * | _dw | 1 2 3 4 5 6 7 8 | + * | |-----=---=-=-=---| + * | 1 | 1 2 3 4 5 6 7 8 | + * | 2 | 1 1 2 2 3 3 4 4 | + * | 4 | 1 1 1 1 2 2 2 2 | + * | 8 | 1 1 1 1 1 1 1 1 | + * +-----+-----------------+ */ +static inline int num_subregs(int csr_bytes) { - *((volatile uint8_t *)addr) = value; + return (csr_bytes - 1) / CSR_DW_BYTES + 1; } -static inline uint8_t csr_readb(unsigned long addr) +/* Read a CSR of size 'csr_bytes' located at address 'a'. */ +static inline uint64_t _csr_rd(unsigned long *a, int csr_bytes) { - return *(volatile uint8_t *)addr; + uint64_t r = a[0]; + for (int i = 1; i < num_subregs(csr_bytes); i++) { + r <<= CONFIG_CSR_DATA_WIDTH; + r |= a[i]; + } + return r; } -static inline void csr_writew(uint16_t value, unsigned long addr) +/* Write value 'v' to a CSR of size 'csr_bytes' located at address 'a'. */ +static inline void _csr_wr(unsigned long *a, uint64_t v, int csr_bytes) { - *((volatile uint16_t *)addr) = value; + int ns = num_subregs(csr_bytes); + for (int i = 0; i < ns; i++) + a[i] = v >> (CONFIG_CSR_DATA_WIDTH * (ns - 1 - i)); } -static inline uint16_t csr_readw(unsigned long addr) +// FIXME: - should we provide 24, 40, 48, and 56 bit csr_[rd|wr] methods? + +static inline uint8_t csr_rd_uint8(unsigned long a) { - return *(volatile uint16_t *)addr; + return _csr_rd((unsigned long *)a, sizeof(uint8_t)); } -static inline void csr_writel(uint32_t value, unsigned long addr) +static inline void csr_wr_uint8(uint8_t v, unsigned long a) { - *((volatile uint32_t *)addr) = value; + _csr_wr((unsigned long *)a, v, sizeof(uint8_t)); } -static inline uint32_t csr_readl(unsigned long addr) +static inline uint16_t csr_rd_uint16(unsigned long a) { - return *(volatile uint32_t *)addr; + return _csr_rd((unsigned long *)a, sizeof(uint16_t)); } + +static inline void csr_wr_uint16(uint16_t v, unsigned long a) +{ + _csr_wr((unsigned long *)a, v, sizeof(uint16_t)); +} + +static inline uint32_t csr_rd_uint32(unsigned long a) +{ + return _csr_rd((unsigned long *)a, sizeof(uint32_t)); +} + +static inline void csr_wr_uint32(uint32_t v, unsigned long a) +{ + _csr_wr((unsigned long *)a, v, sizeof(uint32_t)); +} + +static inline uint64_t csr_rd_uint64(unsigned long a) +{ + return _csr_rd((unsigned long *)a, sizeof(uint64_t)); +} + +static inline void csr_wr_uint64(uint64_t v, unsigned long a) +{ + _csr_wr((unsigned long *)a, v, sizeof(uint64_t)); +} + +/* Read a CSR located at address 'a' into an array 'buf' of 'cnt' elements. + * + * NOTE: Since CSR_DW_BYTES is a constant here, we might be tempted to further + * optimize things by leaving out one or the other of the if() branches below, + * depending on each unsigned type width; + * However, this code is also meant to serve as a reference for how CSRs are + * to be manipulated by other programs (e.g., an OS kernel), which may benefit + * from dynamically handling multiple possible CSR subregister data widths + * (e.g., by passing a value in through the Device Tree). + * Ultimately, if CSR_DW_BYTES is indeed a constant, the compiler should be + * able to determine on its own whether it can automatically optimize away one + * of the if() branches! */ +#define _csr_rd_buf(a, buf, cnt) \ +{ \ + int i, j, nsubs, n_sub_elem; \ + unsigned long *addr = (unsigned long *)(a); \ + uint64_t r; \ + if (sizeof(buf[0]) >= CSR_DW_BYTES) { \ + /* one or more subregisters per element */ \ + for (i = 0; i < cnt; i++) { \ + buf[i] = _csr_rd(addr, sizeof(buf[0])); \ + addr += num_subregs(sizeof(buf[0])); \ + } \ + } else { \ + /* multiple elements per subregister (2, 4, or 8) */ \ + nsubs = num_subregs(sizeof(buf[0]) * cnt); \ + n_sub_elem = CSR_DW_BYTES / sizeof(buf[0]); \ + for (i = 0; i < nsubs; i++) { \ + r = addr[i]; \ + for (j = n_sub_elem - 1; j >= 0; j--) { \ + if (i * n_sub_elem + j < cnt) \ + buf[i * n_sub_elem + j] = r; \ + r >>= sizeof(buf[0]) * 8; \ + } \ + } \ + } \ +} + +/* Write an array 'buf' of 'cnt' elements to a CSR located at address 'a'. + * + * NOTE: The same optimization considerations apply here as with _csr_rd_buf() + * above. + */ +#define _csr_wr_buf(a, buf, cnt) \ +{ \ + int i, j, nsubs, n_sub_elem; \ + unsigned long *addr = (unsigned long *)(a); \ + uint64_t v; \ + if (sizeof(buf[0]) >= CSR_DW_BYTES) { \ + /* one or more subregisters per element */ \ + for (i = 0; i < cnt; i++) { \ + _csr_wr(addr, buf[i], sizeof(buf[0])); \ + addr += num_subregs(sizeof(buf[0])); \ + } \ + } else { \ + /* multiple elements per subregister (2, 4, or 8) */ \ + nsubs = num_subregs(sizeof(buf[0]) * cnt); \ + n_sub_elem = CSR_DW_BYTES / sizeof(buf[0]); \ + for (i = 0; i < nsubs; i++) { \ + v = buf[i * n_sub_elem + 0]; \ + for (j = 1; j < n_sub_elem; j++) { \ + if (i * n_sub_elem + j == cnt) \ + break; \ + v <<= sizeof(buf[0]) * 8; \ + v |= buf[i * n_sub_elem + j]; \ + } \ + addr[i] = v; \ + } \ + } \ +} + +static inline void csr_rd_buf_uint8(unsigned long a, uint8_t *buf, int cnt) +{ + _csr_rd_buf(a, buf, cnt); +} + +static inline void csr_wr_buf_uint8(unsigned long a, + const uint8_t *buf, int cnt) +{ + _csr_wr_buf(a, buf, cnt); +} + +static inline void csr_rd_buf_uint16(unsigned long a, uint16_t *buf, int cnt) +{ + _csr_rd_buf(a, buf, cnt); +} + +static inline void csr_wr_buf_uint16(unsigned long a, + const uint16_t *buf, int cnt) +{ + _csr_wr_buf(a, buf, cnt); +} + +static inline void csr_rd_buf_uint32(unsigned long a, uint32_t *buf, int cnt) +{ + _csr_rd_buf(a, buf, cnt); +} + +static inline void csr_wr_buf_uint32(unsigned long a, + const uint32_t *buf, int cnt) +{ + _csr_wr_buf(a, buf, cnt); +} + +/* NOTE: the macros' "else" branch is unreachable, no need to be warned + * about a >= 64bit left shift! */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshift-count-overflow" +static inline void csr_rd_buf_uint64(unsigned long a, uint64_t *buf, int cnt) +{ + _csr_rd_buf(a, buf, cnt); +} + +static inline void csr_wr_buf_uint64(unsigned long a, + const uint64_t *buf, int cnt) +{ + _csr_wr_buf(a, buf, cnt); +} +#pragma GCC diagnostic pop + #endif /* ! __ASSEMBLER__ */ #endif /* ! CSR_ACCESSORS_DEFINED */