diff --git a/litex/soc/cores/cpu/rocket/boot-helper.S b/litex/soc/cores/cpu/rocket/boot-helper.S
index 6dd74aaeb..657806060 100644
--- a/litex/soc/cores/cpu/rocket/boot-helper.S
+++ b/litex/soc/cores/cpu/rocket/boot-helper.S
@@ -1,4 +1,18 @@
 .section    .text, "ax", @progbits
-.global     boot_helper
+.global boot_helper
+.global smp_ap_args
+.global smp_ap_target
+.global smp_ap_ready
+
 boot_helper:
-	jr x13
+  // boot core saves args and jump target for ap cores:
+  sd a0, smp_ap_args, t1
+  sd a1, smp_ap_args+8, t1
+  sd a2, smp_ap_args+16, t1
+  sd a3, smp_ap_target, t1
+  fence w, w
+  // notify application cores to proceed with boot:
+  li t0, 1
+  sd t0, smp_ap_ready, t1
+  // boot core now also ready to boot:
+  jr a3
diff --git a/litex/soc/cores/cpu/rocket/core.py b/litex/soc/cores/cpu/rocket/core.py
index 940414446..59eafa515 100644
--- a/litex/soc/cores/cpu/rocket/core.py
+++ b/litex/soc/cores/cpu/rocket/core.py
@@ -45,9 +45,12 @@ class Open(Signal): pass
 CPU_VARIANTS = {
     "standard": "freechips.rocketchip.system.LitexConfig",
     "linux":    "freechips.rocketchip.system.LitexLinuxConfig",
+    "linux4":   "freechips.rocketchip.system.LitexLinux4Config",
     "linuxd":   "freechips.rocketchip.system.LitexLinuxDConfig",
     "linuxq":   "freechips.rocketchip.system.LitexLinuxQConfig",
     "full":     "freechips.rocketchip.system.LitexFullConfig",
+    "full4d":   "freechips.rocketchip.system.LitexFull4DConfig",
+    "full4q":   "freechips.rocketchip.system.LitexFull4QConfig",
 }
 
 # GCC Flags-----------------------------------------------------------------------------------------
@@ -55,20 +58,26 @@ CPU_VARIANTS = {
 GCC_FLAGS = {
     "standard": "-march=rv64imac   -mabi=lp64 ",
     "linux":    "-march=rv64imac   -mabi=lp64 ",
+    "linux4":   "-march=rv64imac   -mabi=lp64 ",
     "linuxd":   "-march=rv64imac   -mabi=lp64 ",
     "linuxq":   "-march=rv64imac   -mabi=lp64 ",
     "full":     "-march=rv64imafdc -mabi=lp64 ",
+    "full4d":   "-march=rv64imafdc -mabi=lp64 ",
+    "full4q":   "-march=rv64imafdc -mabi=lp64 ",
 }
 
-# AXI Data-Widths ----------------------------------------------------------------------------------
+# CPU Size Params ----------------------------------------------------------------------------------
 
-AXI_DATA_WIDTHS = {
-    # Variant : (mem, mmio)
-    "standard": ( 64,  64),
-    "linux":    ( 64,  64),
-    "linuxd":   (128,  64),
-    "linuxq":   (256,  64),
-    "full":     ( 64,  64),
+CPU_SIZE_PARAMS = {
+    # Variant : (mem_dw, mmio_dw, num_cores)
+    "standard": (    64,      64,         1),
+    "linux":    (    64,      64,         1),
+    "linux4":   (    64,      64,         4),
+    "linuxd":   (   128,      64,         1),
+    "linuxq":   (   256,      64,         1),
+    "full":     (    64,      64,         1),
+    "full4d":   (   128,      64,         4),
+    "full4q":   (   256,      64,         4),
 }
 
 # Rocket RV64 --------------------------------------------------------------------------------------
@@ -111,7 +120,7 @@ class RocketRV64(CPU):
         self.reset     = Signal()
         self.interrupt = Signal(4)
 
-        mem_dw, mmio_dw = AXI_DATA_WIDTHS[self.variant]
+        mem_dw, mmio_dw, num_cores = CPU_SIZE_PARAMS[self.variant]
 
         self.mem_axi   =  mem_axi = axi.AXIInterface(data_width=mem_dw,  address_width=32, id_width=4)
         self.mmio_axi  = mmio_axi = axi.AXIInterface(data_width=mmio_dw, address_width=32, id_width=4)
@@ -132,7 +141,6 @@ class RocketRV64(CPU):
             i_reset = ResetSignal("sys") | self.reset,
 
             # Debug (ignored).
-            i_resetctrl_hartIsInReset_0           = Open(),
             i_debug_clock                         = 0,
             i_debug_reset                         = ResetSignal() | self.reset,
             o_debug_clockeddmi_dmi_req_ready      = Open(),
@@ -282,6 +290,8 @@ class RocketRV64(CPU):
             o_l2_frontend_bus_axi4_0_r_bits_resp   = l2fb_axi.r.resp,
             o_l2_frontend_bus_axi4_0_r_bits_last   = l2fb_axi.r.last,
         )
+        # additional per-core debug signals:
+        self.cpu_params.update({'i_resetctrl_hartIsInReset_%s'%i : Open() for i in range(num_cores)})
 
         # Adapt AXI interfaces to Wishbone.
         mmio_a2w = ResetInserter()(axi.AXI2Wishbone(mmio_axi, mmio_wb, base_address=0))
diff --git a/litex/soc/cores/cpu/rocket/crt0.S b/litex/soc/cores/cpu/rocket/crt0.S
index 2bb4293ab..d28a5c04f 100644
--- a/litex/soc/cores/cpu/rocket/crt0.S
+++ b/litex/soc/cores/cpu/rocket/crt0.S
@@ -2,6 +2,10 @@
 .global isr
 .global _start
 
+.global smp_ap_args
+.global smp_ap_target
+.global smp_ap_ready
+
 _start:
   j crt_init
   nop
@@ -54,37 +58,65 @@ trap_entry:
 
 crt_init:
   la sp, _fstack
-  la a0, trap_entry
-  csrw mtvec, a0
+  sd zero, smp_ap_ready, t0
+  la t0, trap_entry
+  csrw mtvec, t0
+
+smp_select_bp:
+  csrr a0, mhartid
+  beqz a0, data_init  // hart 0 is bp, everyone else is ap
+
+smp_ap_loop:
+  ld t0, smp_ap_ready
+  beqz t0, smp_ap_loop
+smp_ap_boot:
+  fence r, r
+  fence.i  // i$ flush
+  ld a0, smp_ap_args     // hart ID (but next-stage loads its own)
+  ld a1, smp_ap_args+8   // DTB pointer (if provded by litex bios)
+  ld a2, smp_ap_args+16
+  ld a3, smp_ap_target
+  jr a3
+smp_ap_done:
 
 data_init:
-  la a0, _fdata
-  la a1, _edata
-  la a2, _fdata_rom
+  la t0, _fdata
+  la t1, _edata
+  la t2, _fdata_rom
 data_loop:
-  beq a0,a1,data_done
-  ld a3,0(a2)
-  sd a3,0(a0)
-  add a0,a0,8
-  add a2,a2,8
+  beq t0,t1,data_done
+  ld t3,0(t2)
+  sd t3,0(t0)
+  add t0,t0,8
+  add t2,t2,8
   j data_loop
 data_done:
 
 bss_init:
-  la a0, _fbss
-  la a1, _ebss
+  la t0, _fbss
+  la t1, _ebss
 bss_loop:
-  beq a0,a1,bss_done
-  sd zero,0(a0)
-  add a0,a0,8
+  beq t0,t1,bss_done
+  sd zero,0(t0)
+  add t0,t0,8
   j bss_loop
 bss_done:
 
   call plic_init // initialize external interrupt controller
-  li a0, 0x800   // external interrupt sources only (using LiteX timer);
+  li t0, 0x800   // external interrupt sources only (using LiteX timer);
                  // NOTE: must still enable mstatus.MIE!
-  csrw mie,a0
+  csrw mie,t0
 
   call main
 inf_loop:
   j inf_loop
+
+.bss
+smp_ap_args:
+  .dword 0
+  .dword 0
+  .dword 0
+smp_ap_target:
+  .dword 0
+smp_ap_ready:
+  .dword 0