From b4e041ecf162fbeee3f894d188037674d20b3205 Mon Sep 17 00:00:00 2001
From: Sebastien Bourdeauducq <sebastien@milkymist.org>
Date: Mon, 20 Feb 2012 23:55:20 +0100
Subject: [PATCH] s6ddrphy: write path OK in simulation

---
 milkymist/m1crg/__init__.py    |   2 +-
 milkymist/s6ddrphy/__init__.py |   2 +-
 tb/s6ddrphy/Makefile           |  23 ++++++
 tb/s6ddrphy/tb_s6ddrphy.v      | 125 +++++++++++++++++++++++++++++++++
 top.py                         |   2 +-
 verilog/m1crg/m1crg.v          |   6 +-
 verilog/s6ddrphy/s6ddrphy.v    | 111 +++++++++++++++--------------
 7 files changed, 209 insertions(+), 62 deletions(-)
 create mode 100644 tb/s6ddrphy/Makefile
 create mode 100644 tb/s6ddrphy/tb_s6ddrphy.v

diff --git a/milkymist/m1crg/__init__.py b/milkymist/m1crg/__init__.py
index a0b49b61a..83d7e28b9 100644
--- a/milkymist/m1crg/__init__.py
+++ b/milkymist/m1crg/__init__.py
@@ -14,7 +14,7 @@ class M1CRG:
 			"ac97_rst_n",
 			"videoin_rst_n",
 			"flash_rst_n",
-			"clk2x_90",
+			"clk2x_270",
 			"clk4x_wr",
 			"clk4x_wr_strb",
 			"clk4x_rd",
diff --git a/milkymist/s6ddrphy/__init__.py b/milkymist/s6ddrphy/__init__.py
index 4129f0893..ea6587172 100644
--- a/milkymist/s6ddrphy/__init__.py
+++ b/milkymist/s6ddrphy/__init__.py
@@ -8,7 +8,7 @@ class S6DDRPHY:
 		inouts = []
 		
 		for name in [
-			"clk2x_90",
+			"clk2x_270",
 			"clk4x_wr",
 			"clk4x_wr_strb",
 			"clk4x_rd",
diff --git a/tb/s6ddrphy/Makefile b/tb/s6ddrphy/Makefile
new file mode 100644
index 000000000..8189f571a
--- /dev/null
+++ b/tb/s6ddrphy/Makefile
@@ -0,0 +1,23 @@
+SOURCES=tb_s6ddrphy.v ../../verilog/s6ddrphy/s6ddrphy.v \
+	$(XILINX)/verilog/src/unisims/ODDR2.v \
+	$(XILINX)/verilog/src/unisims/OSERDES2.v \
+	$(XILINX)/verilog/src/unisims/ISERDES2.v \
+	$(XILINX)/verilog/src/unisims/IOBUF.v \
+	$(XILINX)/verilog/src/unisims/OBUFT.v \
+	$(XILINX)/verilog/src/unisims/BUFPLL.v
+
+all: tb_s6ddrphy
+
+isim: tb_s6ddrphy
+	./tb_s6ddrphy
+
+cversim: $(SOURCES)
+	cver $(SOURCES)
+
+clean:
+	rm -f tb_s6ddrphy verilog.log s6ddrphy.vcd
+
+tb_s6ddrphy: $(SOURCES)
+	iverilog -o tb_s6ddrphy $(SOURCES)
+
+.PHONY: clean sim cversim
diff --git a/tb/s6ddrphy/tb_s6ddrphy.v b/tb/s6ddrphy/tb_s6ddrphy.v
new file mode 100644
index 000000000..f9341bf9f
--- /dev/null
+++ b/tb/s6ddrphy/tb_s6ddrphy.v
@@ -0,0 +1,125 @@
+`timescale 1ns / 1ps
+
+module tb_s6ddrphy();
+
+reg sys_clk = 1'b0;
+reg clk2x_270 = 1'b0;
+reg clk4x_wr = 1'b0;
+wire clk4x_wr_strb;
+wire clk4x_rd = clk4x_wr;
+wire clk4x_rd_strb = clk4x_wr_strb;
+
+initial begin
+	while(1) begin
+		sys_clk <= 1'b1;
+		#6;
+		sys_clk <= 1'b0;
+		#6;
+	end
+end
+
+initial begin
+	#4.5;
+	while(1) begin
+		clk2x_270 <= 1'b1;
+		#3;
+		clk2x_270 <= 1'b0;
+		#3;
+	end
+end
+
+initial begin
+	while(1) begin
+		clk4x_wr <= 1'b1;
+		#1.5;
+		clk4x_wr <= 1'b0;
+		#1.5;
+	end
+end
+
+BUFPLL #(
+	.DIVIDE(4)
+) bufpll (
+	.PLLIN(clk4x_wr),
+	.GCLK(sys_clk),
+	.LOCKED(1'b1),
+	.IOCLK(),
+	.LOCK(),
+	.SERDESSTROBE(clk4x_wr_strb)
+);
+
+reg [12:0] dfi_address_p0 = 0;
+reg [12:0] dfi_address_p1 = 0;
+
+reg dfi_wrdata_en_p0 = 0;
+reg [7:0] dfi_wrdata_mask_p0 = 0;
+reg [63:0] dfi_wrdata_p0 = 0;
+reg dfi_wrdata_en_p1 = 0;
+reg [7:0] dfi_wrdata_mask_p1 = 0;
+reg [63:0] dfi_wrdata_p1 = 0;
+
+s6ddrphy #(
+	.NUM_AD(13),
+	.NUM_BA(2),
+	.NUM_D(64)
+) dut (
+	.sys_clk(sys_clk),
+	.clk2x_270(clk2x_270),
+	.clk4x_wr(clk4x_wr),
+	.clk4x_wr_strb(clk4x_wr_strb),
+	.clk4x_rd(clk4x_rd),
+	.clk4x_rd_strb(clk4x_rd_strb),
+	
+	.sd_clk_out_p(),
+	.sd_clk_out_n(),
+	
+	.dfi_address_p0(dfi_address_p0),
+	.dfi_address_p1(dfi_address_p1),
+	.sd_a(),
+	
+	.dfi_wrdata_en_p0(dfi_wrdata_en_p0),
+	.dfi_wrdata_mask_p0(dfi_wrdata_mask_p0),
+	.dfi_wrdata_p0(dfi_wrdata_p0),
+	.dfi_wrdata_en_p1(dfi_wrdata_en_p1),
+	.dfi_wrdata_mask_p1(dfi_wrdata_mask_p1),
+	.dfi_wrdata_p1(dfi_wrdata_p1),
+	.sd_dq(),
+	.sd_dm(),
+	.sd_dqs()
+);
+
+initial begin
+	$dumpfile("s6ddrphy.vcd");
+	$dumpvars(3, dut);
+	#13;
+	
+	/*dfi_address_p0 <= 13'h1aba;
+	dfi_address_p1 <= 13'h1234;
+	#12;
+	dfi_address_p0 <= 0;
+	dfi_address_p1 <= 0;
+	#60;*/
+	
+	dfi_address_p0 <= 13'h0dea;
+	dfi_address_p1 <= 13'h0dbe;
+	dfi_wrdata_p0 <= 64'hcafebabeabadface;
+	dfi_wrdata_p1 <= 64'h0123456789abcdef;
+	dfi_wrdata_en_p0 <= 1'b1;
+	dfi_wrdata_en_p1 <= 1'b1;
+	#12;
+	dfi_address_p0 <= 0;
+	dfi_address_p1 <= 0;
+	dfi_wrdata_p0 <= 64'd0;
+	dfi_wrdata_p1 <= 64'd0;
+	dfi_wrdata_en_p0 <= 1'b0;
+	dfi_wrdata_en_p1 <= 1'b0;
+	#60;
+	$finish;
+end
+
+endmodule
+
+module glbl();
+wire GSR = 1'b0;
+wire GTS = 1'b0;
+endmodule
diff --git a/top.py b/top.py
index a4d898861..366560745 100644
--- a/top.py
+++ b/top.py
@@ -18,7 +18,7 @@ dfi_d = 64
 
 def ddrphy_clocking(crg, phy):
 	names = [
-		"clk2x_90",
+		"clk2x_270",
 		"clk4x_wr",
 		"clk4x_wr_strb",
 		"clk4x_rd",
diff --git a/verilog/m1crg/m1crg.v b/verilog/m1crg/m1crg.v
index 0ab044105..94d00b6f4 100644
--- a/verilog/m1crg/m1crg.v
+++ b/verilog/m1crg/m1crg.v
@@ -33,7 +33,7 @@ module m1crg #(
 	output flash_rst_n,
 	
 	/* DDR PHY clocks */
-	output clk2x_90,
+	output clk2x_270,
 	output clk4x_wr,
 	output clk4x_wr_strb,
 	output clk4x_rd,
@@ -122,7 +122,7 @@ PLL_ADV #(
 	.CLKOUT1_PHASE(0),
 	.CLKOUT2_DIVIDE(2*f_div),
 	.CLKOUT2_DUTY_CYCLE(0.5),
-	.CLKOUT2_PHASE(90.0),
+	.CLKOUT2_PHASE(270.0),
 	.CLKOUT3_DIVIDE(4*f_div),
 	.CLKOUT3_DUTY_CYCLE(0.5),
 	.CLKOUT3_PHASE(0.0),
@@ -192,7 +192,7 @@ BUFPLL #(
 
 BUFG bufg_x2_2(
 	.I(pllout2),
-	.O(clk2x_90)
+	.O(clk2x_270)
 );
 
 BUFG bufg_x1(
diff --git a/verilog/s6ddrphy/s6ddrphy.v b/verilog/s6ddrphy/s6ddrphy.v
index a1716bdf3..54a23e44b 100644
--- a/verilog/s6ddrphy/s6ddrphy.v
+++ b/verilog/s6ddrphy/s6ddrphy.v
@@ -3,9 +3,9 @@
  *
  * Command path:
  *   posedge sys_clk             + 1
- *   negedge clk2x_90            + 0.375
- *   negedge clk2x_90            + 0.5
- * Command latency:              1.875 cycles
+ *   posedge clk2x_270           + 0.375
+ *   negedge clk2x_270           + 0.125
+ * Command latency:              1.5 cycles
  *
  * Data write path (phase 0, word 0):
  *   posedge sys_clk [oserdes]   + 1
@@ -14,9 +14,9 @@
  *
  * DQS OE path:
  *   posedge sys_clk             + 1
- *   negedge clk2x_90            + 0.375
- *   negedge clk2x_90 [oddr]     + 0.5
- * DQS OE latency                1.875 cycles
+ *   posedge clk2x_270           + 0.375
+ *   negedge clk2x_270 [oddr]    + 0.125
+ * DQS OE latency                1.5 cycles
  *
  * Data read path:
  */
@@ -27,7 +27,7 @@ module s6ddrphy #(
 ) (
 	/* Clocks */
 	input sys_clk,
-	input clk2x_90,
+	input clk2x_270,
 	input clk4x_wr,
 	input clk4x_wr_strb,
 	input clk4x_rd,
@@ -87,8 +87,8 @@ ODDR2 #(
 	.SRTYPE("SYNC")
 ) sd_clk_forward_p (
 	.Q(sd_clk_out_p),
-	.C0(clk2x_90),
-	.C1(~clk2x_90),
+	.C0(clk2x_270),
+	.C1(~clk2x_270),
 	.CE(1'b1),
 	.D0(1'b1),
 	.D1(1'b0),
@@ -101,8 +101,8 @@ ODDR2 #(
 	.SRTYPE("SYNC")
 ) sd_clk_forward_n (
 	.Q(sd_clk_out_n),
-	.C0(clk2x_90),
-	.C1(~clk2x_90),
+	.C0(clk2x_270),
+	.C1(~clk2x_270),
 	.CE(1'b1),
 	.D0(1'b0),
 	.D1(1'b1),
@@ -115,7 +115,7 @@ ODDR2 #(
  */
 
 reg phase_sel;
-always @(negedge clk2x_90)
+always @(negedge clk2x_270)
 	phase_sel <= sys_clk;
 
 reg [NUM_AD-1:0] r_dfi_address_p0;
@@ -166,7 +166,7 @@ reg r2_dfi_ras_n_p1;
 reg r2_dfi_cas_n_p1;
 reg r2_dfi_we_n_p1;
 	
-always @(negedge clk2x_90) begin
+always @(posedge clk2x_270) begin
 	r2_dfi_address_p0 <= r_dfi_address_p0;
 	r2_dfi_bank_p0 <= r_dfi_bank_p0;
 	r2_dfi_cs_n_p0 <= r_dfi_cs_n_p0;
@@ -184,16 +184,8 @@ always @(negedge clk2x_90) begin
 	r2_dfi_we_n_p1 <= r_dfi_we_n_p1;
 end
 
-always @(negedge clk2x_90) begin
+always @(negedge clk2x_270) begin
 	if(phase_sel) begin
-		sd_a <= r2_dfi_address_p1;
-		sd_ba <= r2_dfi_bank_p1;
-		sd_cs_n <= r2_dfi_cs_n_p1;
-		sd_cke <= r2_dfi_cke_p1;
-		sd_ras_n <= r2_dfi_ras_n_p1;
-		sd_cas_n <= r2_dfi_cas_n_p1;
-		sd_we_n <= r2_dfi_we_n_p1;
-	end else begin
 		sd_a <= r2_dfi_address_p0;
 		sd_ba <= r2_dfi_bank_p0;
 		sd_cs_n <= r2_dfi_cs_n_p0;
@@ -201,6 +193,14 @@ always @(negedge clk2x_90) begin
 		sd_ras_n <= r2_dfi_ras_n_p0;
 		sd_cas_n <= r2_dfi_cas_n_p0;
 		sd_we_n <= r2_dfi_we_n_p0;
+	end else begin
+		sd_a <= r2_dfi_address_p1;
+		sd_ba <= r2_dfi_bank_p1;
+		sd_cs_n <= r2_dfi_cs_n_p1;
+		sd_cke <= r2_dfi_cke_p1;
+		sd_ras_n <= r2_dfi_ras_n_p1;
+		sd_cas_n <= r2_dfi_cas_n_p1;
+		sd_we_n <= r2_dfi_we_n_p1;
 	end
 end
 
@@ -210,10 +210,10 @@ end
 
 genvar i;
 
-wire drive_dqs_p0;
-wire drive_dqs_p1;
+wire drive_dqs;
 wire [NUM_D/16-1:0] dqs_o;
 wire [NUM_D/16-1:0] dqs_t;
+reg postamble;
 generate
 	for(i=0;i<NUM_D/16;i=i+1)
 	begin: gen_dqs
@@ -223,8 +223,8 @@ generate
 			.SRTYPE("ASYNC")
 		) dqs_o_oddr (
 			.Q(dqs_o[i]),
-			.C0(clk2x_90),
-			.C1(~clk2x_90),
+			.C0(clk2x_270),
+			.C1(~clk2x_270),
 			.CE(1'b1),
 			.D0(1'b0),
 			.D1(1'b1),
@@ -237,11 +237,11 @@ generate
 			.SRTYPE("ASYNC")
 		) dqs_t_oddr (
 			.Q(dqs_t[i]),
-			.C0(clk2x_90),
-			.C1(~clk2x_90),
+			.C0(clk2x_270),
+			.C1(~clk2x_270),
 			.CE(1'b1),
-			.D0(~drive_dqs_p0),
-			.D1(~drive_dqs_p1),
+			.D0(~(drive_dqs | postamble)),
+			.D1(~drive_dqs),
 			.R(1'b0),
 			.S(1'b0)
 		);
@@ -252,9 +252,10 @@ generate
 		);
 	end
 endgenerate
+always @(posedge clk2x_270)
+	postamble <= drive_dqs;
 
-wire drive_dq_p0;
-wire drive_dq_p1;
+wire drive_dq;
 wire [NUM_D/2-1:0] dq_i;
 wire [NUM_D/2-1:0] dq_o;
 wire [NUM_D/2-1:0] dq_t;
@@ -273,17 +274,17 @@ generate
 			.CLK0(clk4x_wr),
 			.CLK1(1'b0),
 			.IOCE(clk4x_wr_strb),
-			.RST(),
+			.RST(1'b0),
 			.CLKDIV(sys_clk),
-			.D1(dfi_wrdata_p0[2*i]),
-			.D2(dfi_wrdata_p0[2*i+1]),
-			.D3(dfi_wrdata_p1[2*i]),
-			.D4(dfi_wrdata_p1[2*i+1]),
+			.D1(dfi_wrdata_p0[i+NUM_D/2]),
+			.D2(dfi_wrdata_p0[i]),
+			.D3(dfi_wrdata_p1[i+NUM_D/2]),
+			.D4(dfi_wrdata_p1[i]),
 			.TQ(dq_t[i]),
-			.T1(~drive_dq_p0),
-			.T2(~drive_dq_p0),
-			.T3(~drive_dq_p1),
-			.T4(~drive_dq_p1),
+			.T1(~drive_dq),
+			.T2(~drive_dq),
+			.T3(~drive_dq),
+			.T4(~drive_dq),
 			.TRAIN(1'b0),
 			.TCE(1'b1),
 			.SHIFTIN1(1'b0),
@@ -307,15 +308,15 @@ generate
 			.CLK0(clk4x_rd),
 			.CLK1(1'b0),
 			.IOCE(clk4x_rd_strb),
-			.RST(),
+			.RST(1'b0),
 			.CLKDIV(clk),
 			.SHIFTIN(),
 			.BITSLIP(1'b0),
 			.FABRICOUT(),
-			.Q1(dfi_rddata_w0[2*i]),
-			.Q2(dfi_rddata_w0[2*i+1]),
-			.Q3(dfi_rddata_w1[2*i]),
-			.Q4(dfi_rddata_w1[2*i+1]),
+			.Q1(dfi_rddata_w0[i+NUM_D/2]),
+			.Q2(dfi_rddata_w0[i]),
+			.Q3(dfi_rddata_w1[i+NUM_D/2]),
+			.Q4(dfi_rddata_w1[i]),
 			.DFB(),
 			.CFB0(),
 			.CFB1(),
@@ -347,12 +348,12 @@ generate
 			.CLK0(clk4x_wr),
 			.CLK1(1'b0),
 			.IOCE(clk4x_wr_strb),
-			.RST(),
+			.RST(1'b0),
 			.CLKDIV(sys_clk),
-			.D1(dfi_wrdata_mask_p0[2*i]),
-			.D2(dfi_wrdata_mask_p0[2*i+1]),
-			.D3(dfi_wrdata_mask_p1[2*i]),
-			.D4(dfi_wrdata_mask_p1[2*i+1]),
+			.D1(dfi_wrdata_mask_p0[i+NUM_D/16]),
+			.D2(dfi_wrdata_mask_p0[i]),
+			.D3(dfi_wrdata_mask_p1[i+NUM_D/16]),
+			.D4(dfi_wrdata_mask_p1[i]),
 			.TQ(),
 			.T1(),
 			.T2(),
@@ -387,15 +388,13 @@ end
 reg r2_dfi_wrdata_en_p0;
 reg r2_dfi_wrdata_en_p1;
 
-always @(negedge clk2x_90) begin
+always @(posedge clk2x_270) begin
 	r2_dfi_wrdata_en_p0 <= r_dfi_wrdata_en_p0;
 	r2_dfi_wrdata_en_p1 <= r_dfi_wrdata_en_p1;
 end
 
-assign drive_dqs_p0 = r2_dfi_wrdata_en_p0;
-assign drive_dqs_p1 = r2_dfi_wrdata_en_p1;
-assign drive_dq_p0 = dfi_wrdata_en_p0;
-assign drive_dq_p1 = dfi_wrdata_en_p1;
+assign drive_dqs = r2_dfi_wrdata_en_p0 | r2_dfi_wrdata_en_p1;
+assign drive_dq = dfi_wrdata_en_p0 | dfi_wrdata_en_p1;
 
 // TODO: dfi_rddata_valid_w0/1?