-------------------------------------------------------------------------------- -- -- Copyright (C) 2014 -- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/> -- JIVE (Joint Institute for VLBI in Europe) <http://www.jive.nl/> -- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands -- -- This program is free software: you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation, either version 3 of the License, or -- (at your option) any later version. -- -- This program is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program. If not, see <http://www.gnu.org/licenses/>. -- ------------------------------------------------------------------------------- -- Purpose: Provide streaming interface to DDR memory -- Description: -- -- The write FIFO and read FIFO take care of the clock domain crossing that -- is needed if the ctlr_clk is not used for the data path. Furthermore the -- FIFO takes care of repacking the user write and read data into the -- typically wider data width c_ctlr_data_w = 256 for DDR3. -- -- The DDR access starts after a dvr_en pulse. The access can be write or -- read as defined by dvr_wr_not_rd. The size of the access depends the -- address range given by dvr_start_address and dvr_nof_data. The -- dvr_done goes high when the access has finished and it goes low when a -- new access starts. -- -- The dvr_wr_flush_en pulse is recognized only between DDR accesses so when -- the dvr_done is active, otherwise the dvr_wr_flush_en is ignored. The -- dvr_wr_flush_en pulse can be used to flush the write FIFO to avoid that is -- runs full in case a next write access will not happen soon enough. A next -- DDR write access can start on the next valid, sop or sync dependent on -- g_wr_flush_mode. -- -- The c_rd_fifo_af_margin needs to be large enough to fit a number of read -- bursts that may be pending in the DDR controller command queue depth. -- A new rd access can start when ctlr_rd_src_in.ready indicates that there -- is sufficient space in the read FIFO to store g_tech_ddr.maxburstsize -- words. Due to the DDR controller command queue there can be more rd -- bursts already be pending. Therefore the c_rd_fifo_af_margin needs to be -- large enough to fit a number of read bursts. -- Usage: -- . The dvr interface could be connected to a MM register. The DDR memory -- may then be used to capture (large) blocks of streaming data that can -- offline be read via a DP-MM interface (eg. like in bn_capture). During -- the read access the streaming write data then is flushed. -- -- . The dvr interface could be connected to a DP sequencer that can write -- blocks to DDR and read back from DDR. The DP sequencer uses signals -- from its input DP interface. For write access the dvr_en could relate -- to the sop and the dvr_nof_data then equals the nof data from sop to eop. -- The dvr_done can be treated as xon. The dvr_wr_not_rd selects between -- the write stream to DDR access or the read stream from DDR access. For a -- read access the sequencer needs to generate the dvr signals itself. -- -- . The dvr interface is mapped on the t_mem_ctlr_mosi/miso interface: -- -- dvr_miso.done <= dvr_done -- Requested wr or rd sequence is done -- dvr_en <= dvr_mosi.burstbegin -- dvr_wr_not_rd <= dvr_mosi.wr -- No need to use dvr_mosi.rd -- dvr_start_address <= dvr_mosi.address -- dvr_nof_data <= dvr_mosi.burstsize -- dvr_wr_flush_en <= dvr_mosi.flush -- -- Block diagram: -- -- ctlr_wr_fifo_src ctlr_wr_snk ctlr_tech_mosi -- ________ . ______ . _______ . ______ -- wr_fifo_usedw <---|dp_fifo | . |dp | . | | . | | -- wr_sosi --------->|dc_mixed|-+----->|flush |----->| io | . | tech | -- wr_clk --------->|widths | | | | | ddr | . | ddr | -- |________| | |______|<--\ | driver| . | | -- | | | | . | | -- | ctlr_wr_flush_en| | | . | | -- ctlr_wr_fifo_src_out | ______ | | | . | | -- \----->|io_ddr|---/ | | . | | -- dvr_clk ------------> |driver| | | . | |<--- phy_in -- dvr_wr_flush_en ----*-------------->|flush | | |---->| |---> phy_ou -- /---->|ctrl |<--\ | |<----| |<--> phy_io -- |/--->|______| | | | . | | -- || | | | . | | -- dvr_en --*---------+|---------------|->| | . | | -- dvr_wr_not_rd --*----------+---------------|->| | . | | -- dvr_done <-*--------------------------+--| | . | | -- dvr_start_address --*---------------------------->| | . | | -- dvr_nof_data --*---------------------------->| | . | | -- ________ | | . | | -- rd_clk --------->|dp_fifo | | | . | | -- rd_sosi <---------|dc_mixed|<---------------------| | . | | -- rd_fifo_usedw <---|widths | . |_______| . |______|---\ -- |________| . . | -- ctlr_rd_src ctlr_tech_miso | -- | -- ctlr_clk /------ctlr_clk_in -------> | -- \------ctlr_clk_out-----------------------------------------------/ -- -- * = clock domain crossing between dvr_clk and ctlr_clk clock domains. ---- -- Remarks: -- . If the dvr_clk=ctlr_clk then the clock domain crossing logic defaults -- to wires. However dvr_clk could also be the dp_clk or the mm_clk and then -- the clock domain crossing logic is needed. -- . Externally connect ctlr_clk = ctlr_clk_in = ctlr_clk_out -- . Typically wr_clk = rd_clk = dp_clk. -- . To achieve maximum DDR access rate the g_wr_data_w and g_rd_data_w -- typically already need to be equal to the c_ctlr_data_w, because the -- DP clk for wr_clk and rd_clk can typically not run much faster than the -- ctlr_clk. Therefore in practise the mixed width FIFO will often be used -- as equal width FIFO. -- . The main PHY signals are carried by phy_ou and phy_io. The phy_in signals -- are typically not needed. -- . If ctlr_clk is used as dp_clk and connected to wr_clk, rd_clk and dvr_clk -- then still the io_ddr_driver and a equal width rd FIFO are needed. The rd -- FIFO is needed because the DDR controller does not have flow control -- during the read burst. The wr FIFO is not needed provide that the user -- write source does support flow control. If the user write source does not -- support flow control then the wr FIFO is needed and io_ddr needs to be -- used. -- . The flushing does ensure that the write FIFO does not run full. If the -- write input FIFO is a mixed width FIFO with narrow write data, then it -- may not be possible to read the FIFO empty, because a wide data word -- can only be read when it is complete. Typically this behaviour is fine -- in applications, so no need to try to somehow flush an incomplete last -- wide word from the FIFO. -- . The flush control uses ctlr_wr_fifo_src_out and not the wr_sosi, because -- dp_flush needs to be at the read side of the FIFO. -- . The dvr_wr_flush_en is mapped to the dvr_mosi.flush in the -- t_mem_ctlr_mosi. This is a bit awkward, because flush is not an Avalon -- MM interface signal. However some external control needs to decide on -- the need to flush or not, because that cannot be decided internally? An -- option could be to automatically trigger a flush event when the write -- FIFO runs almost full as indicated by wr_siso.ready. This scheme would -- require that there is never a need to flush as long as the FIFO has not -- run full. library IEEE, technology_lib, tech_ddr_lib, common_lib, dp_lib; use IEEE.std_logic_1164.all; use common_lib.common_pkg.all; use common_lib.common_mem_pkg.all; use technology_lib.technology_select_pkg.all; use technology_lib.technology_pkg.all; use tech_ddr_lib.tech_ddr_pkg.all; use dp_lib.dp_stream_pkg.all; entity io_ddr is generic( g_sim_model : boolean := false; -- when FALSE use IP and external DDR3 model, else when TRUE: use fast behavioural model, requires no external memory (uses memory array). g_technology : natural := c_tech_select_default; g_tech_ddr : t_c_tech_ddr; g_cross_domain_dvr_ctlr : boolean := true; -- use TRUE when MM clock is used for the access control, use FALSE when ctlr_clk_in=ctlr_clk_out is used to avoid extra latency g_wr_data_w : natural := 32; g_wr_fifo_depth : natural := 256; -- defined at DDR side of the FIFO, >=16 and independent of wr burst size, default >= 256 because 32b*256 fits in 1 M9K so c_ctlr_data_w=256b will require 8 M9K g_rd_fifo_depth : natural := 256; -- defined at DDR side of the FIFO, >=16 AND > max number of rd burst sizes (so > c_rd_fifo_af_margin), default >= 256 because 32b*256 fits in 1 M9K so c_ctlr_data_w=256b will require 8 M9K g_rd_data_w : natural := 32; g_wr_flush_mode : string := "VAL"; -- "VAL", "SOP", "SYN" g_wr_flush_use_channel : boolean := false; g_wr_flush_start_channel : natural := 0; g_wr_flush_nof_channels : positive := 1 ); port ( -- DDR reference clock ctlr_ref_clk : in std_logic; ctlr_ref_rst : in std_logic; -- DDR controller clock domain ctlr_clk_out : out std_logic; ctlr_rst_out : out std_logic; ctlr_clk_in : in std_logic; -- connect ctlr_clk_out to ctlr_clk_in at top level to avoid potential delta-cycle differences between the same clock ctlr_rst_in : in std_logic; -- connect ctlr_rst_out to ctlr_rst_in at top level -- MM clock + reset mm_rst : in std_logic := '1'; mm_clk : in std_logic := '0'; -- MM interface reg_io_ddr_mosi : in t_mem_mosi := c_mem_mosi_rst; -- register for DDR controller status info reg_io_ddr_miso : out t_mem_miso; state_vec : out std_logic_vector(1 downto 0); -- Driver clock domain dvr_clk : in std_logic; dvr_rst : in std_logic; dvr_miso : out t_mem_ctlr_miso; dvr_mosi : in t_mem_ctlr_mosi; -- Write FIFO clock domain wr_clk : in std_logic; wr_rst : in std_logic; wr_fifo_usedw : out std_logic_vector(ceil_log2(g_wr_fifo_depth * (func_tech_ddr_ctlr_data_w(g_tech_ddr) / g_wr_data_w) ) - 1 downto 0); -- for monitoring purposes wr_sosi : in t_dp_sosi; wr_siso : out t_dp_siso; ctlr_wr_flush_en_o : out std_logic; -- Read FIFO clock domain rd_clk : in std_logic; rd_rst : in std_logic; rd_fifo_usedw : out std_logic_vector(ceil_log2(g_rd_fifo_depth * (func_tech_ddr_ctlr_data_w(g_tech_ddr) / g_rd_data_w) ) - 1 downto 0); rd_sosi : out t_dp_sosi; rd_siso : in t_dp_siso; term_ctrl_out : out t_tech_ddr3_phy_terminationcontrol; term_ctrl_in : in t_tech_ddr3_phy_terminationcontrol := c_tech_ddr3_phy_terminationcontrol_rst; -- DDR3 PHY external interface phy3_in : in t_tech_ddr3_phy_in := c_tech_ddr3_phy_in_x; phy3_io : inout t_tech_ddr3_phy_io; phy3_ou : out t_tech_ddr3_phy_ou; -- DDR4 PHY external interface phy4_in : in t_tech_ddr4_phy_in := c_tech_ddr4_phy_in_x; phy4_io : inout t_tech_ddr4_phy_io; phy4_ou : out t_tech_ddr4_phy_ou; -- DDR Calibration result ddr_cal_ok : out std_logic := '0' ); end io_ddr; architecture str of io_ddr is constant c_wr_use_sync : boolean := sel_a_b(g_wr_flush_mode = "SYN", true, false); constant c_wr_use_ctrl : boolean := sel_a_b(g_wr_flush_mode = "SOP", true, false); constant c_wr_fifo_use_ctrl : boolean := c_wr_use_sync or c_wr_use_ctrl; constant c_ddr_nofbytes_w : natural := func_tech_ddr_module_nofbytes_w(g_tech_ddr); -- log2(number of bytes) constant c_ddr_gigabytes : integer := func_tech_ddr_module_gigabytes(g_tech_ddr); -- units value GiByte when value > 1 or 2**value GiByte when value < 0 constant c_ctlr_nof_bytes_per_word : natural := func_tech_ddr_ctlr_ip_data_w(g_tech_ddr) / c_byte_w; -- unit byte constant c_ctlr_address_w : natural := func_tech_ddr_ctlr_address_w(g_tech_ddr); constant c_ctlr_data_w : natural := func_tech_ddr_ctlr_data_w(g_tech_ddr); constant c_wr_fifo_depth : natural := g_wr_fifo_depth * (c_ctlr_data_w / g_wr_data_w); -- get FIFO depth at write side constant c_wr_fifo_af_margin : natural := 8 + 1; -- use 8 (>= 4 default) to be safe and use +1 to compensate for latency introduced by registering wr_siso.ready due to RL=0 constant c_nof_rd_bursts_max : natural := sel_a_b(g_tech_ddr.name = "DDR3", 1, 3); -- max number of rd bursts in queue, derived empirically from simulation, seems fixed 1 for DDR3 and seems to match (g_tech_ddr.command_queue_depth-1)/2 for DDR4 constant c_rd_fifo_af_margin : natural := 8 + c_nof_rd_bursts_max * g_tech_ddr.maxburstsize; -- use 8 (>= 4 default) to be safe and use sufficient extra margin to fit one or more rd burst accesses of g_tech_ddr.maxburstsize each constant c_mem_reg_adr_w : natural := 2; constant c_mem_reg_dat_w : natural := 32; constant c_mem_reg_nof_data : natural := 4; constant c_mem_reg_io_ddr : t_c_mem := (c_mem_reg_rd_latency, c_mem_reg_adr_w , c_mem_reg_dat_w , c_mem_reg_nof_data, 'X'); signal ctlr_dvr_miso : t_mem_ctlr_miso; signal ctlr_dvr_mosi : t_mem_ctlr_mosi; signal ctlr_tech_mosi : t_mem_ctlr_mosi := c_mem_ctlr_mosi_rst; signal ctlr_tech_miso : t_mem_ctlr_miso := c_mem_ctlr_miso_rst; signal ctlr_wr_flush_en : std_logic := '0'; signal wr_fifo_snk_in : t_dp_sosi; signal ctlr_wr_fifo_src_in : t_dp_siso; signal ctlr_wr_fifo_src_out : t_dp_sosi := c_dp_sosi_rst; signal ctlr_wr_flush_snk_in : t_dp_sosi := c_dp_sosi_rst; signal ctlr_wr_snk_out : t_dp_siso := c_dp_siso_rdy; -- default xon='1' signal ctlr_wr_snk_in : t_dp_sosi := c_dp_sosi_rst; signal ctlr_rd_src_in : t_dp_siso; signal ctlr_rd_src_out : t_dp_sosi := c_dp_sosi_rst; -- Monitor only signal ctlr_wr_fifo_usedw : std_logic_vector(ceil_log2(g_wr_fifo_depth) - 1 downto 0); -- read side depth of the write FIFO signal ctlr_rd_fifo_usedw : std_logic_vector(ceil_log2(g_rd_fifo_depth) - 1 downto 0); -- write side depth of the read FIFO signal reg_rd_arr : std_logic_vector(c_mem_reg_io_ddr.nof_dat - 1 downto 0); signal wr_fifo_full : std_logic; signal wr_fifo_full_reg : std_logic; signal rd_fifo_full : std_logic; signal rd_fifo_full_reg : std_logic; signal dp_flush_snk_in : t_dp_sosi := c_dp_sosi_rst; signal ctlr_rst_out_i : std_logic; signal mm_reg_io_ddr : std_logic_vector(c_mem_reg_nof_data * c_mem_reg_dat_w - 1 downto 0); begin ctlr_wr_flush_en_o <= ctlr_wr_flush_en; u_io_ddr_cross_domain : entity work.io_ddr_cross_domain generic map ( g_cross_domain => g_cross_domain_dvr_ctlr, g_delay_len => c_meta_delay_len ) port map( -- Driver clock domain dvr_clk => dvr_clk, dvr_rst => dvr_rst, dvr_done => dvr_miso.done, dvr_en => dvr_mosi.burstbegin, dvr_wr_not_rd => dvr_mosi.wr, dvr_start_address => dvr_mosi.address, dvr_nof_data => dvr_mosi.burstsize, dvr_wr_flush_en => dvr_mosi.flush, -- DDR controller clock domain ctlr_clk => ctlr_clk_in, ctlr_rst => ctlr_rst_in, ctlr_dvr_done => ctlr_dvr_miso.done, ctlr_dvr_en => ctlr_dvr_mosi.burstbegin, ctlr_dvr_wr_not_rd => ctlr_dvr_mosi.wr, ctlr_dvr_start_address => ctlr_dvr_mosi.address, ctlr_dvr_nof_data => ctlr_dvr_mosi.burstsize, ctlr_dvr_wr_flush_en => ctlr_dvr_mosi.flush ); p_wr_fifo_snk_in : process (wr_sosi) begin wr_fifo_snk_in <= wr_sosi; if c_wr_use_sync = true then -- Work around : Transport sync via sop through the dp_fifo_dc_mixed_widths wr_fifo_snk_in.sop <= wr_sosi.sync; wr_fifo_snk_in.eop <= '0'; end if; end process; u_wr_fifo : entity dp_lib.dp_fifo_dc_mixed_widths generic map ( g_technology => g_technology, g_wr_data_w => g_wr_data_w, g_rd_data_w => c_ctlr_data_w, g_use_ctrl => c_wr_fifo_use_ctrl, g_wr_fifo_size => c_wr_fifo_depth, g_wr_fifo_af_margin => c_wr_fifo_af_margin, g_rd_fifo_rl => 0 ) port map ( wr_rst => wr_rst, wr_clk => wr_clk, rd_rst => ctlr_rst_in, rd_clk => ctlr_clk_in, snk_out => wr_siso, snk_in => wr_fifo_snk_in, wr_ful => wr_fifo_full, wr_usedw => wr_fifo_usedw, rd_usedw => ctlr_wr_fifo_usedw, rd_emp => OPEN, src_in => ctlr_wr_fifo_src_in, src_out => ctlr_wr_fifo_src_out ); u_dp_flush : entity dp_lib.dp_flush generic map ( g_ready_latency => 0, g_framed_xon => c_wr_fifo_use_ctrl, -- stop flushing when flush_en is low and a sop (or sync via sop) has arrived g_framed_xoff => false -- immediately start flushing when flush_en goes high ) port map ( rst => ctlr_rst_in, clk => ctlr_clk_in, snk_in => ctlr_wr_fifo_src_out, snk_out => ctlr_wr_fifo_src_in, src_out => ctlr_wr_snk_in, src_in => ctlr_wr_snk_out, flush_en => ctlr_wr_flush_en ); p_ctlr_wr_flush_snk_in : process (ctlr_wr_fifo_src_out) begin ctlr_wr_flush_snk_in <= ctlr_wr_fifo_src_out; if c_wr_use_sync = true then -- Work around : Transport sync via sop through the dp_fifo_dc_mixed_widths ctlr_wr_flush_snk_in.sync <= ctlr_wr_fifo_src_out.sop; ctlr_wr_flush_snk_in.sop <= '0'; end if; end process; u_io_ddr_driver_flush_ctrl : entity work.io_ddr_driver_flush_ctrl generic map ( g_mode => g_wr_flush_mode, g_use_channel => g_wr_flush_use_channel, g_start_channel => g_wr_flush_start_channel, g_nof_channels => g_wr_flush_nof_channels ) port map ( rst => ctlr_rst_in, clk => ctlr_clk_in, -- Inputs dvr_en => ctlr_dvr_mosi.burstbegin, dvr_wr_not_rd => ctlr_dvr_mosi.wr, dvr_wr_flush_en => ctlr_dvr_mosi.flush, dvr_done => ctlr_dvr_miso.done, ctlr_wr_sosi => ctlr_wr_flush_snk_in, -- Output ctlr_wr_flush_en => ctlr_wr_flush_en, state_vec => state_vec ); assert g_rd_fifo_depth > c_rd_fifo_af_margin report "io_ddr: rd FIFO depth must be > almost full margin." severity FAILURE; u_rd_fifo : entity dp_lib.dp_fifo_dc_mixed_widths generic map ( g_technology => g_technology, g_wr_data_w => c_ctlr_data_w, g_rd_data_w => g_rd_data_w, g_use_ctrl => false, g_wr_fifo_size => g_rd_fifo_depth, g_wr_fifo_af_margin => c_rd_fifo_af_margin, -- >=4 (required by dp_fifo) g_rd_fifo_rl => 1 ) port map ( wr_rst => ctlr_rst_in, wr_clk => ctlr_clk_in, rd_rst => rd_rst, rd_clk => rd_clk, snk_out => ctlr_rd_src_in, snk_in => ctlr_rd_src_out, wr_ful => rd_fifo_full, wr_usedw => ctlr_rd_fifo_usedw, rd_usedw => rd_fifo_usedw, rd_emp => OPEN, src_in => rd_siso, src_out => rd_sosi ); u_io_ddr_driver : entity work.io_ddr_driver generic map ( g_tech_ddr => g_tech_ddr ) port map ( rst => ctlr_rst_in, clk => ctlr_clk_in, dvr_miso => ctlr_dvr_miso, dvr_mosi => ctlr_dvr_mosi, wr_snk_in => ctlr_wr_snk_in, wr_snk_out => ctlr_wr_snk_out, rd_src_out => ctlr_rd_src_out, rd_src_in => ctlr_rd_src_in, ctlr_miso => ctlr_tech_miso, ctlr_mosi => ctlr_tech_mosi ); u_tech_ddr : entity tech_ddr_lib.tech_ddr generic map ( g_sim_model => g_sim_model, g_technology => g_technology, g_tech_ddr => g_tech_ddr ) port map ( -- PLL reference clock ref_clk => ctlr_ref_clk, ref_rst => ctlr_ref_rst, -- Controller user interface ctlr_gen_clk => ctlr_clk_out, ctlr_gen_rst => ctlr_rst_out_i, ctlr_gen_clk_2x => OPEN, ctlr_gen_rst_2x => OPEN, ctlr_mosi => ctlr_tech_mosi, ctlr_miso => ctlr_tech_miso, term_ctrl_out => term_ctrl_out, term_ctrl_in => term_ctrl_in, -- DDR3 PHY interface phy3_in => phy3_in, phy3_io => phy3_io, phy3_ou => phy3_ou, -- DDR4 PHY interface phy4_in => phy4_in, phy4_io => phy4_io, phy4_ou => phy4_ou ); ctlr_rst_out <= ctlr_rst_out_i; ddr_cal_ok <= ctlr_tech_miso.done; u_wr_fifo_full : entity common_lib.common_switch generic map( g_priority_lo => true ) port map( rst => ctlr_rst_in, clk => ctlr_clk_in, switch_high => wr_fifo_full, switch_low => reg_rd_arr(3), out_level => wr_fifo_full_reg ); u_rd_fifo_full : entity common_lib.common_switch generic map( g_priority_lo => true ) port map( rst => ctlr_rst_in, clk => ctlr_clk_in, switch_high => rd_fifo_full, switch_low => reg_rd_arr(3), out_level => rd_fifo_full_reg ); mm_reg_io_ddr <= RESIZE_UVEC(rd_fifo_full_reg & wr_fifo_full_reg, c_mem_reg_dat_w) & RESIZE_UVEC(ctlr_wr_fifo_usedw, c_mem_reg_dat_w) & RESIZE_UVEC(ctlr_rd_fifo_usedw, c_mem_reg_dat_w) & RESIZE_UVEC(TO_SVEC(c_ddr_gigabytes, 8) & TO_UVEC(c_ctlr_nof_bytes_per_word, 8) & ctlr_tech_mosi.wr & ctlr_tech_miso.rdval & ctlr_tech_miso.cal_fail & ctlr_tech_miso.cal_ok & ctlr_rst_out_i & ctlr_wr_flush_en & ctlr_tech_miso.waitrequest_n & ctlr_tech_miso.done, c_mem_reg_dat_w); u_reg_map : entity common_lib.common_reg_r_w_dc generic map ( g_cross_clock_domain => true, g_in_new_latency => 0, g_readback => false, g_reg => c_mem_reg_io_ddr, g_init_reg => (others => '0') ) port map ( -- Clocks and reset mm_rst => mm_rst, mm_clk => mm_clk, st_rst => ctlr_rst_in, st_clk => ctlr_clk_in, -- Memory Mapped Slave in mm_clk domain sla_in => reg_io_ddr_mosi, sla_out => reg_io_ddr_miso, -- MM registers in st_clk domain reg_wr_arr => OPEN, reg_rd_arr => reg_rd_arr, in_new => '1', in_reg => mm_reg_io_ddr, out_reg => open ); end str;