bios/sdram: Use an LFSR to speed up pseudo-random number generation

This speeds up the memory test by an order of magnitude, esp. on
cores without a hardware multiplier by getting rid of the
multiplication in the loop.

The LFSR implementation comes from microwatt's simple_random test
project.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
Benjamin Herrenschmidt 2020-05-06 21:54:27 +02:00 committed by Florent Kermarrec
parent 34f268689a
commit 99c5b0fca1
2 changed files with 118 additions and 8 deletions

View File

@ -0,0 +1,109 @@
#include <limits.h>
/*
* Copyright (C) 2020, Anton Blanchard <anton@linux.ibm.com>, IBM
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Galois LFSR
*
* Polynomials verified with https://bitbucket.org/gallen/mlpolygen/
*/
static inline unsigned long lfsr(unsigned long bits, unsigned long prev)
{
static const unsigned long lfsr_taps[] = {
0x0,
0x0,
0x3,
0x6,
0xc,
0x14,
0x30,
0x60,
0xb8,
0x110,
0x240,
0x500,
0x829,
0x100d,
0x2015,
0x6000,
0xd008,
0x12000,
0x20400,
0x40023,
0x90000,
0x140000,
0x300000,
0x420000,
0xe10000,
0x1200000,
0x2000023,
0x4000013,
0x9000000,
0x14000000,
0x20000029,
0x48000000,
0x80200003,
#if __WORDSIZE == 64
0x100080000,
0x204000003,
0x500000000,
0x801000000,
0x100000001f,
0x2000000031,
0x4400000000,
0xa000140000,
0x12000000000,
0x300000c0000,
0x63000000000,
0xc0000030000,
0x1b0000000000,
0x300003000000,
0x420000000000,
0xc00000180000,
0x1008000000000,
0x3000000c00000,
0x6000c00000000,
0x9000000000000,
0x18003000000000,
0x30000000030000,
0x40000040000000,
0xc0000600000000,
0x102000000000000,
0x200004000000000,
0x600003000000000,
0xc00000000000000,
0x1800300000000000,
0x3000000000000030,
0x6000000000000000,
0x800000000000000d
#endif
};
unsigned long lsb = prev & 1;
prev >>= 1;
prev ^= (-lsb) & lfsr_taps[bits];
return prev;
}

View File

@ -21,6 +21,7 @@
#include <system.h> #include <system.h>
#include "sdram.h" #include "sdram.h"
#include "lfsr.h"
// FIXME(hack): If we don't have main ram, just target the sram instead. // FIXME(hack): If we don't have main ram, just target the sram instead.
#ifndef MAIN_RAM_BASE #ifndef MAIN_RAM_BASE
@ -514,7 +515,7 @@ static int read_level_scan(int module, int bitslip)
prv = 42; prv = 42;
for(p=0;p<SDRAM_PHY_PHASES;p++) for(p=0;p<SDRAM_PHY_PHASES;p++)
for(i=0;i<DFII_PIX_DATA_BYTES;i++) { for(i=0;i<DFII_PIX_DATA_BYTES;i++) {
prv = 1664525*prv + 1013904223; return lfsr(32, seed);
prs[p][i] = prv; prs[p][i] = prv;
} }
@ -594,7 +595,7 @@ static void read_level(int module)
prv = 42; prv = 42;
for(p=0;p<SDRAM_PHY_PHASES;p++) for(p=0;p<SDRAM_PHY_PHASES;p++)
for(i=0;i<DFII_PIX_DATA_BYTES;i++) { for(i=0;i<DFII_PIX_DATA_BYTES;i++) {
prv = 1664525*prv + 1013904223; return lfsr(32, seed);
prs[p][i] = prv; prs[p][i] = prv;
} }
@ -712,7 +713,7 @@ static void read_level(int module)
static unsigned int seed_to_data_32(unsigned int seed, int random) static unsigned int seed_to_data_32(unsigned int seed, int random)
{ {
if (random) if (random)
return 1664525*seed + 1013904223; return lfsr(32, seed);
else else
return seed + 1; return seed + 1;
} }
@ -720,7 +721,7 @@ static unsigned int seed_to_data_32(unsigned int seed, int random)
static unsigned short seed_to_data_16(unsigned short seed, int random) static unsigned short seed_to_data_16(unsigned short seed, int random)
{ {
if (random) if (random)
return 25173*seed + 13849; return lfsr(16, seed);
else else
return seed + 1; return seed + 1;
} }
@ -794,14 +795,14 @@ static int memtest_data(void)
unsigned int rdata; unsigned int rdata;
errors = 0; errors = 0;
seed_32 = 0; seed_32 = 1;
for(i=0;i<MEMTEST_DATA_SIZE/4;i++) { for(i=0;i<MEMTEST_DATA_SIZE/4;i++) {
seed_32 = seed_to_data_32(seed_32, MEMTEST_DATA_RANDOM); seed_32 = seed_to_data_32(seed_32, MEMTEST_DATA_RANDOM);
array[i] = seed_32; array[i] = seed_32;
} }
seed_32 = 0; seed_32 = 1;
flush_cpu_dcache(); flush_cpu_dcache();
#ifdef CONFIG_L2_SIZE #ifdef CONFIG_L2_SIZE
flush_l2_cache(); flush_l2_cache();
@ -834,14 +835,14 @@ static int memtest_addr(void)
unsigned short rdata; unsigned short rdata;
errors = 0; errors = 0;
seed_16 = 0; seed_16 = 1;
for(i=0;i<MEMTEST_ADDR_SIZE/4;i++) { for(i=0;i<MEMTEST_ADDR_SIZE/4;i++) {
seed_16 = seed_to_data_16(seed_16, MEMTEST_ADDR_RANDOM); seed_16 = seed_to_data_16(seed_16, MEMTEST_ADDR_RANDOM);
array[(unsigned int) seed_16] = i; array[(unsigned int) seed_16] = i;
} }
seed_16 = 0; seed_16 = 1;
flush_cpu_dcache(); flush_cpu_dcache();
#ifdef CONFIG_L2_SIZE #ifdef CONFIG_L2_SIZE
flush_l2_cache(); flush_l2_cache();