diff --git a/applications/lofar2/doc/sdp_timing_closure.txt b/applications/lofar2/doc/sdp_timing_closure.txt new file mode 100644 index 0000000000000000000000000000000000000000..a6067d25de915855fc7ab8c401262e8eccbf5f6b --- /dev/null +++ b/applications/lofar2/doc/sdp_timing_closure.txt @@ -0,0 +1,122 @@ +Titel: SDP Firmware timing closure investinagtions and solutions +Author: Eric Kooistra +References: + [1] https://support.astron.nl/confluence/display/SBe/Asynchronous+reset+synchronization + +1) General: +* In L2SDP-1005 (19 aug 2024) the common_areset.vhd was improved with g_tree_len = 1. From Eric to Reinier in + Slack 6 aug 2024: + + Hoi Reinier, kun je deze voor mij reviewen https://git.astron.nl/rtsd/hdl/-/merge_requests/417 . Ik heb de + common_pipeline_sl vervangen door een common_async omdat dat beter past op het niveau van common_areset. Verder heb + ik default g_tree_len = 1 gezet voor minimale extra latency. Ik zag dat de regressie test op SDP ARTS faalde. + Misschien lag dat aan common_areset met te lange g_tree_len. Ik heb gezocht op waar common_areset wordt gebruikt + in onze code, met name in ctrl_unb2c_board.vhd, maar kon niet direct een oorzaak vinden. Missichien moet + default g_tree_len = 0 en dan alleen g_tree_len > 0 waar dat nodig is. Maar eerst wou ik het maar met minimale + g_tree_len = 1 default proberen. + +* ==> c_dp_sosi_rst with 'X' for data, info fields or use RESET_DP_SOSI_CTRL() [RTSD-123, RTSD-129] + Issue is that using 'X' or '-' in c_dp_sosi_rst causes lots of Modelsim warnings about conversion from slv to int + +* The PPS capture sometimes fails to meet timing. It is special because it uses also the falling edge of CLK. Sofar + we can ignore this because FPGA_pps_capture_cnt_R and FPGA_pps_error_cnt_R seem OK on hardware: + + u_revision|u_ctrl|u_mms_ppsh|u_ppsh|u_in|u_ddio_in|gen_ip_arria10_e2sg.u0|gen_w[0].u_ip_arria10_e2sg_ddio_in_1| + gpio_0|core|i_loop[0].altera_gpio_bit_i|input_path.in_path_fr.buffer_data_in_fr_ddio~ddio_in_fr__nff to + u_revision|u_ctrl|u_mms_ppsh|u_ppsh|pps_ext_cap + + +2) RTSD-124c (24 feb 2025) : Fix asynchronous reset timing violations + +The changes marked with DONE are sufficient to achieve timing closure for CLK 5 ns and applied in RTSD-124c merge. + +CLK timing violations (10000 paths in lofar2_unb2c_sdp_station_full, in disturb2_unb2c_sdp_station_full almost 10000 +fail): +* common_areset_dp_rst/with_pipe/u_pipe/din_meta[0] to gen_cross in some MM reg + - u_revision|u_sdp_station|gen_use_xsub.u_xsub|u_sdp_crosslets_remote|u_mmp_dp_bsn_align_v2| + gen_bsn_mon_input.u_bsn_mon_input|gen_stream[6].u_reg|u_reg|gen_cross.gen_rd.u_in_vector|in_buf_reg[145] + - u_revision|u_sdp_station|gen_use_ring.gen_xst_ring.u_ring_lane_xst|u_ring_tx| + gen_bsn_monitors.u_mms_dp_bsn_monitor_v2|gen_stream[0].u_reg|u_reg|gen_cross.gen_rd.u_in_vector|in_buf_reg[35] + - u_revision|u_sdp_station|gen_use_bf.gen_bf[1].u_bf|u_sdp_bst_udp_offload|u_dp_offload_tx_v3| + gen_dp_field_blk[0].u_dp_field_blk|u_mm_fields_slv|u_common_reg_r_w_dc|gen_cross.u_out_reg|i_out_dat[1291] + - u_revision|u_ctrl|gen_eth.u_eth|u_rx_hdr_info|u_hdr_store|i_hdr_words_arr[3][29] + ==> TRIED: Use g_tree_len = 2 instead of 1 or fix it by adding pipeline to out_rst in common_reg_cross_clock_domain + and by using out_rst as synchronous rst. Same for in_rst, because timing violations also occur with in_rst and + in_buf_reg[]. + Using g_tree_len = 2 does not help for timing closure in u_common_fifo_sc, the timing errors occur then from + din_meta[1]. + ==> TRIED: Do not out_rst out_en in common_reg_cross_clock_domain + ==> DONE: Pipeline in_rst and out_rst in common_reg_cross_clock_domain fixes gen_cross errors + ==> NOT TRIED: Do not use in_rst and out_rst in common_reg_cross_clock_domain + ==> NOT TRIED: Use in_rst and out_rst as synchronous reset in common_reg_cross_clock_domain + +* common_areset_dp_rst/with_pipe/u_pipe/din_meta[0] to r.taps_out_vec in: + - u_revision|u_sdp_station|gen_use_fsub.gen_use_no_oversample.u_fsub|u_wpfb_unit_dev| + gen_pfb.gen_prefilter.u_filter|gen_fil_ppf_singles[0].u_fil_ppf_single|u_fil_ctrl|r.taps_out_vec[481] + ==> DONE: Do not rst r.taps_out_vec, fixes u_fil_ctrl errors + +* common_areset_dp_rst/with_pipe/u_pipe/din_meta[0] to u_dp_fifo_sc in: + - u_revision|u_sdp_station|gen_use_ring.gen_bf_ring.gen_beamset_ring[0].u_ring_lane_bf|u_ring_tx|u_dp_fifo_sc| + u_dp_fifo_core|gen_common_fifo_sc.u_common_fifo_sc|u_fifo|gen_ip_arria10_e2sg.u0|u_scfifo|auto_generated| + dpfifo|FIFOram|q_b[63] + - u_revision|u_sdp_station|gen_use_ring.gen_bf_ring.gen_beamset_ring[1].u_ring_lane_bf|u_ring_tx|u_dp_fifo_sc| + u_dp_fifo_core|gen_common_fifo_sc.u_common_fifo_sc|u_fifo|gen_ip_arria10_e2sg.u0|u_scfifo|auto_generated| + dpfifo|FIFOram|q_b[65] + - u_revision|u_sdp_station|gen_use_ring.gen_xst_ring.u_ring_lane_xst|u_ring_tx|u_dp_fifo_sc| + u_dp_fifo_core|gen_common_fifo_sc.u_common_fifo_sc|u_fifo|gen_ip_arria10_e2sg.u0|u_scfifo|auto_generated| + dpfifo|FIFOram|q_b[60] + - u_revision|u_sdp_station|gen_use_ring.gen_bf_ring.gen_beamset_ring[1].u_ring_lane_bf|u_ring_tx|u_dp_fifo_sc| + u_dp_fifo_core|gen_common_fifo_sc.u_common_fifo_sc|u_fifo|gen_ip_arria10_e2sg.u0|u_scfifo|auto_generated| + dpfifo|usedw_counter|counter_reg_bit[3] + - u_revision|u_sdp_station|gen_use_ring.gen_xst_ring.u_ring_lane_xst|u_ring_tx|u_dp_fifo_sc| + u_dp_fifo_core|gen_common_fifo_sc.u_common_fifo_sc|u_fifo|gen_ip_arria10_e2sg.u0|u_scfifo|auto_generated| + dpfifo|FIFOram|ram_block1a42~reg1 + - u_revision|u_sdp_station|gen_use_ring.gen_xst_ring.u_ring_lane_xst|u_ring_tx|u_dp_fifo_sc| + u_dp_fifo_core|gen_common_fifo_sc.u_common_fifo_sc|u_fifo|gen_ip_arria10_e2sg.u0|u_scfifo|auto_generated| + dpfifo|FIFOram|ram_block1a19~reg1 + ==> TRIED: pipeline rst --> in_rst --> fifo_rst in common_fifo_sc.vhd + Still the u_common_fifo_sc do occur, maybe due to path via wr_req input. + ==> TRIED: Pipeline tx_sosi in ring_tx.vhd --> pipeline not needed, only the tx_sosi.channel field has logic, + all other tx_sosi fields are wired. + ==> DONE: Pipeline tx_fifo_siso with dp_pipeline_ready_inp in dp_offload_tx_v3 in ring_tx.vhd fixes FIFOram errors. + For this I (Eric) added dp_pipeline_ready_inp and dp_pipeline_ready_outp to dp_offload_tx_v3.vhd. + +Following din_meta[0] timing paths with least slack were seen, but not further investigated because they achieved +CLK 5 ns timing: + +* common_areset_dp_rst/with_pipe/u_pipe/din_meta[0] to u_dp_fifo_sc in: + - u_revision|u_sdp_station|gen_use_ring.gen_xst_ring.u_ring_lane_xst|u_ring_tx|u_dp_offload_tx_v3| + gen_dp_field_blk[0].u_dp_field_blk|u_dp_repack_data|gen_dp_repack_out.u_dp_repack_out|r.hold_out.eop + - u_revision|u_sdp_station|gen_use_xsub.u_xsub|u_sdp_xst_udp_offload|u_dp_offload_tx_v3| + gen_dp_field_blk[0].u_dp_field_blk|u_dp_repack_data|gen_dp_repack_in.u_dp_repack_in|r.src_out.bsn[29] + - u_revision|u_sdp_station|gen_use_xsub.u_xsub|u_sdp_crosslets_remote|u_dp_repack_data_rx| + gen_dp_repack_out.u_dp_repack_out|r.src_out.bsn[59] + - u_revision|u_sdp_station|gen_use_bf.gen_bf[0].u_bf|u_sdp_beamformer_remote|u_dp_repack_data_sum| + gen_dp_repack_out.u_dp_repack_out|r.src_out.channel[0] + - u_revision|u_sdp_station|gen_use_bf.gen_bf[1].u_bf|u_sdp_beamformer_output|u_dp_offload_tx_v3| + gen_packet[0].u_dp_field_blk|u_dp_repack_data|gen_dp_repack_out.u_dp_repack_out|r.src_out.empty[0] + - u_revision|u_sdp_station|gen_use_bf.gen_bf[1].u_bf|u_sdp_beamformer_remote|u_dp_repack_data_sum| + gen_dp_repack_in.u_dp_repack_in|gen_load.u_load|r.dat_bit_cnt[2] + ==> do not reset data fields in u_dp_repack_out + ==> do not reset data fields in u_dp_repack_in + ==> use RESET_DP_SOSI_CTRL() + +Some other timing paths with low slack: + +* u_revision|u_sdp_station|gen_use_xsub.u_xsub|u_sdp_xst_udp_offload|u_dp_offload_tx_v3| + gen_dp_concat[0].u_dp_concat|i_src_out.eop to + - u_revision|u_ctrl|gen_eth.u_eth|u_tx_mux|src_out_hi.eop + - u_revision|u_ctrl|gen_eth.u_eth|u_tx_mux|gen_input[2].u_hold|u_hold_sop|u_hld_ctrl|switch_level + ==> Put dp_pipeline_ready after dp_offload_tx_v3 in sdp_statistics_offload + ==> Put optional dp_pipeline_ready in gen_dp_concat +* u_revision|u_sdp_station|gen_use_bf.gen_bf[0].u_bf|u_sdp_bst_udp_offload|u_dp_offload_tx_v3| + gen_dp_concat[0].u_dp_concat|i_src_out.sop to + - u_revision|u_sdp_station|gen_use_bf.gen_bf[1].u_bf|u_sdp_bst_udp_offload|u_dp_pipeline_ready|gen_out_rl.u_incr| + reg_ready[1] +* u_revision|u_sdp_station|gen_use_bf.gen_bf[0].u_bf|u_sdp_beamformer_output|u_dp_fifo_data|u_dp_fifo_fill_eop| + u_dp_fifo_core|gen_common_fifo_sc.u_common_fifo_sc|rd_val to + - u_revision|u_sdp_station|gen_use_bf.gen_bf[0].u_bf|u_sdp_beamformer_output|u_dp_fifo_data|u_dp_fifo_fill_eop| + i_src_out.channel[2] +* u_revision|u_sdp_station|u_ring_info|u_mm_fields|u_common_reg_r_w_dc|gen_cross.u_out_reg|i_out_dat[69] to + - u_revision|u_sdp_station|gen_use_ring.gen_bf_ring.gen_beamset_ring[0].u_ring_lane_bf|u_ring_tx|hdr_fields_in_reg[65] + diff --git a/libraries/base/common/src/vhdl/common_reg_cross_domain.vhd b/libraries/base/common/src/vhdl/common_reg_cross_domain.vhd index 84e56f935c2a68e0df66662259b81b45ba48ab43..b1510580a9e6bbf1e0b159093cc2d28db9a3243d 100644 --- a/libraries/base/common/src/vhdl/common_reg_cross_domain.vhd +++ b/libraries/base/common/src/vhdl/common_reg_cross_domain.vhd @@ -59,6 +59,9 @@ end common_reg_cross_domain; architecture rtl of common_reg_cross_domain is constant c_dat : std_logic_vector(in_dat'range) := g_out_dat_init(in_dat'range); + signal in_rst_p : std_logic := '1'; + signal out_rst_p : std_logic := '1'; + ------------------------------------------------------------------------------ -- in_clk domain ------------------------------------------------------------------------------ @@ -90,6 +93,10 @@ architecture rtl of common_reg_cross_domain is begin out_dat <= i_out_dat; + -- Pipeline synchronous input rst to ease timing closure + in_rst_p <= in_rst when rising_edge(in_clk); + out_rst_p <= out_rst when rising_edge(out_clk); + ------------------------------------------------------------------------------ -- in_clk domain ------------------------------------------------------------------------------ @@ -97,9 +104,9 @@ begin reg_new(0) <= in_new; gen_latency : if g_in_new_latency > 0 generate - p_reg_new : process(in_rst, in_clk) + p_reg_new : process(in_rst_p, in_clk) begin - if in_rst = '1' then + if in_rst_p = '1' then reg_new(1 to g_in_new_latency) <= (others => '0'); elsif rising_edge(in_clk) then reg_new(1 to g_in_new_latency) <= nxt_reg_new(1 to g_in_new_latency); @@ -109,9 +116,9 @@ begin nxt_reg_new(1 to g_in_new_latency) <= reg_new(0 to g_in_new_latency - 1); end generate; - p_in_clk : process(in_rst, in_clk) + p_in_clk : process(in_rst_p, in_clk) begin - if in_rst = '1' then + if in_rst_p = '1' then in_new_hold <= '0'; in_done <= '0'; state <= s_idle; @@ -167,11 +174,11 @@ begin ------------------------------------------------------------------------------ u_cross_req : entity common_lib.common_spulse port map ( - in_rst => in_rst, + in_rst => in_rst_p, in_clk => in_clk, in_pulse => cross_req, in_busy => cross_busy, - out_rst => out_rst, + out_rst => out_rst_p, out_clk => out_clk, out_pulse => out_en ); @@ -179,9 +186,9 @@ begin ------------------------------------------------------------------------------ -- out_clk domain ------------------------------------------------------------------------------ - p_out_clk : process(out_rst, out_clk) + p_out_clk : process(out_rst_p, out_clk) begin - if out_rst = '1' then + if out_rst_p = '1' then out_new <= '0'; elsif rising_edge(out_clk) then i_out_dat <= nxt_out_dat; diff --git a/libraries/base/dp/src/vhdl/dp_offload_tx_v3.vhd b/libraries/base/dp/src/vhdl/dp_offload_tx_v3.vhd index df7fbd0603ddfeed6c5dae24a7c2ccf32ec77511..9689b5f18b4e9979d8f3eb5e8488f74f59fbbea0 100644 --- a/libraries/base/dp/src/vhdl/dp_offload_tx_v3.vhd +++ b/libraries/base/dp/src/vhdl/dp_offload_tx_v3.vhd @@ -50,12 +50,15 @@ use technology_lib.technology_select_pkg.all; entity dp_offload_tx_v3 is generic ( - g_nof_streams : natural; - g_data_w : natural; - g_symbol_w : natural; - g_hdr_field_arr : t_common_field_arr; -- User defined header fields - g_hdr_field_sel : std_logic_vector; -- For each header field, select the source: 0=data path, 1=MM controlled - g_pipeline_ready : boolean := false + g_nof_streams : natural; + g_data_w : natural; + g_symbol_w : natural; + g_hdr_field_arr : t_common_field_arr; -- User defined header fields + g_hdr_field_sel : std_logic_vector; -- For each header field, select the source: 0=data path, 1=MM controlled + g_pipeline_ready : boolean := false; -- dp_pipeline_ready unpack MM header + g_pipeline_ready_inp : boolean := false; -- dp_pipeline_ready snk_in + g_capture_hdr_fields_in : boolean := false; -- capture hdr_fields_in_arr at snk_in_arr().sop + g_pipeline_ready_outp : boolean := false -- dp_pipeline_ready scr_out ); port ( mm_rst : in std_logic := '0'; @@ -73,27 +76,43 @@ entity dp_offload_tx_v3 is src_out_arr : out t_dp_sosi_arr(g_nof_streams - 1 downto 0); src_in_arr : in t_dp_siso_arr(g_nof_streams - 1 downto 0) := (others => c_dp_siso_rdy); - hdr_fields_in_arr : in t_slv_1024_arr(g_nof_streams - 1 downto 0); -- hdr_fields_in_arr(i) is considered valid @ snk_in_arr(i).sop + -- hdr_fields_in_arr(i) is considered valid at snk_in_arr(i).sop + hdr_fields_in_arr : in t_slv_1024_arr(g_nof_streams - 1 downto 0); hdr_fields_out_arr : out t_slv_1024_arr(g_nof_streams - 1 downto 0) ); end dp_offload_tx_v3; architecture str of dp_offload_tx_v3 is + -- Force c_capture_hdr_fields_in = false in when g_pipeline_ready_inp = false, because hdr_fields_in_arr is + -- valid at snk_in_arr().sop. + -- . Use c_capture_hdr_fields_in = true in combination with g_pipeline_ready_inp = true when some fields in + -- hdr_fields_in_arr are only valid at snk_in_arr().sop. E.g. a strobe field like sosi.sync is only valid + -- at sosi.sop, therefore then need to capture hdr_fields_in_arr at snk_in_arr().sop. + -- . Use c_capture_hdr_fields_in = false in combination with g_pipeline_ready_inp = true to save capture logic, + -- when all hdr_fields_in_arr are valid until the next snk_in_arr().sop. + constant c_capture_hdr_fields_in : boolean := g_pipeline_ready_inp and g_capture_hdr_fields_in; + constant c_dp_field_blk_snk_data_w : natural := field_slv_out_len(field_arr_set_mode(g_hdr_field_arr, "RW")); constant c_dp_field_blk_src_data_w : natural := g_data_w; signal dbg_c_dp_field_blk_snk_data_w : natural := c_dp_field_blk_snk_data_w; signal dbg_c_dp_field_blk_src_data_w : natural := c_dp_field_blk_src_data_w; + signal pipe_snk_in_arr : t_dp_sosi_arr(g_nof_streams - 1 downto 0); + signal pipe_snk_out_arr : t_dp_siso_arr(g_nof_streams - 1 downto 0); + signal hold_hdr_fields_in_arr : t_slv_1024_arr(g_nof_streams - 1 downto 0); + signal dp_field_blk_snk_in_arr : t_dp_sosi_arr(g_nof_streams - 1 downto 0); signal dp_field_blk_snk_out_arr : t_dp_siso_arr(g_nof_streams - 1 downto 0); - signal dp_field_blk_src_out_arr : t_dp_sosi_arr(g_nof_streams - 1 downto 0); signal dp_field_blk_src_in_arr : t_dp_siso_arr(g_nof_streams - 1 downto 0); signal dp_concat_snk_in_2arr : t_dp_sosi_2arr_2(g_nof_streams - 1 downto 0); signal dp_concat_snk_out_2arr : t_dp_siso_2arr_2(g_nof_streams - 1 downto 0); + signal wire_src_out_arr : t_dp_sosi_arr(g_nof_streams - 1 downto 0); + signal wire_src_in_arr : t_dp_siso_arr(g_nof_streams - 1 downto 0); + signal reg_hdr_dat_mosi_arr : t_mem_mosi_arr(g_nof_streams - 1 downto 0); signal reg_hdr_dat_miso_arr : t_mem_miso_arr(g_nof_streams - 1 downto 0); @@ -103,34 +122,62 @@ begin assert c_dp_field_blk_snk_data_w <= c_dp_stream_data_w report "Number of header bits must fit in t_dp_sosi data field." severity FAILURE; + + gen_input : for i in 0 to g_nof_streams - 1 generate + -- Optional dp_pipeline_ready input to ease timing closure + pipe_inp : if g_pipeline_ready_inp generate + u_dp_pipeline_ready_inp : entity work.dp_pipeline_ready + port map( + rst => dp_rst, + clk => dp_clk, + + snk_out => snk_out_arr(i), + snk_in => snk_in_arr(i), + src_in => pipe_snk_out_arr(i), + src_out => pipe_snk_in_arr(i) + ); + end generate; + wire_inp : if not g_pipeline_ready_inp generate + snk_out_arr(i) <= pipe_snk_out_arr(i); + pipe_snk_in_arr(i) <= snk_in_arr(i); + end generate; + + capture_hdr : if c_capture_hdr_fields_in generate + hold_hdr_fields_in_arr(i) <= hdr_fields_in_arr(i) when rising_edge(dp_clk) and snk_in_arr(i).sop = '1'; + end generate; + wire_hdr : if not c_capture_hdr_fields_in generate + hold_hdr_fields_in_arr(i) <= hdr_fields_in_arr(i); + end generate; + end generate; + --------------------------------------------------------------------------------------- -- Create header block & concatenate header to offload stream. --------------------------------------------------------------------------------------- - p_wire_valid : process(snk_in_arr, hdr_fields_in_arr) + p_wire_hdr_fields : process(pipe_snk_in_arr, hold_hdr_fields_in_arr) begin for i in 0 to g_nof_streams - 1 loop -- default pass on the other snk_in_arr fields as well, especially the sync, bsn and channel can -- be useful to preserve for the packetized output, even though only the sosi.data of the -- packetized output will get transmitted. - dp_field_blk_snk_in_arr(i) <= snk_in_arr(i); + dp_field_blk_snk_in_arr(i) <= pipe_snk_in_arr(i); -- Prepare packet header as a data block with only one data word, so valid = sop = eop. If -- c_dp_field_blk_snk_data_w > c_dp_field_blk_src_data_w then dp_repack_data in dp_field_blk will -- repack this data word into a multi word header block, else dp_field_blk will pass on the -- dp_field_blk_snk_in_arr as a single word header block. - dp_field_blk_snk_in_arr(i).data <= RESIZE_DP_DATA(hdr_fields_in_arr(i)(field_slv_len(g_hdr_field_arr) - 1 downto 0)); - dp_field_blk_snk_in_arr(i).valid <= snk_in_arr(i).sop; - dp_field_blk_snk_in_arr(i).sop <= snk_in_arr(i).sop; -- necessary for single word header block - dp_field_blk_snk_in_arr(i).eop <= snk_in_arr(i).sop; -- necessary for single word header block + dp_field_blk_snk_in_arr(i).data <= RESIZE_DP_DATA(hold_hdr_fields_in_arr(i)(field_slv_len(g_hdr_field_arr) - 1 downto 0)); + dp_field_blk_snk_in_arr(i).valid <= pipe_snk_in_arr(i).sop; + dp_field_blk_snk_in_arr(i).sop <= pipe_snk_in_arr(i).sop; -- necessary for single word header block + dp_field_blk_snk_in_arr(i).eop <= pipe_snk_in_arr(i).sop; -- necessary for single word header block end loop; end process; - gen_dp_field_blk : for i in 0 to g_nof_streams - 1 generate + gen_packet : for i in 0 to g_nof_streams - 1 generate -- Both dp_concat inputs must be ready. One of the inputs toggles ready via dp_field_blk. - snk_out_arr(i).ready <= dp_field_blk_snk_out_arr(i).ready and dp_concat_snk_out_2arr(i)(0).ready; - snk_out_arr(i).xon <= src_in_arr(i).xon; + pipe_snk_out_arr(i).ready <= dp_field_blk_snk_out_arr(i).ready and dp_concat_snk_out_2arr(i)(0).ready; + pipe_snk_out_arr(i).xon <= wire_src_in_arr(i).xon; -- Wire hdr_fields_out_arr - p_hdr_fields_out_arr : process(mm_fields_slv_out_arr, hdr_fields_in_arr, field_override_arr) + p_hdr_fields_out_arr : process(mm_fields_slv_out_arr, hold_hdr_fields_in_arr, field_override_arr) variable v_hi : natural; variable v_lo : natural; begin @@ -144,14 +191,12 @@ begin if field_override_arr(j) = '1' then hdr_fields_out_arr(i)(v_hi downto v_lo) <= mm_fields_slv_out_arr(i)(v_hi downto v_lo); else - hdr_fields_out_arr(i)(v_hi downto v_lo) <= hdr_fields_in_arr(i)(v_hi downto v_lo); + hdr_fields_out_arr(i)(v_hi downto v_lo) <= hold_hdr_fields_in_arr(i)(v_hi downto v_lo); end if; end loop; end process; - --------------------------------------------------------------------------------------- -- mm_fields for MM access to each field - --------------------------------------------------------------------------------------- u_mm_fields_slv: entity mm_lib.mm_fields generic map( g_field_arr => field_arr_set_mode(g_hdr_field_arr, "RW") @@ -198,12 +243,10 @@ begin reg_slv_miso => reg_hdr_dat_miso_arr(i) ); + -- Prepend the header block to the input block dp_field_blk_src_in_arr(i) <= dp_concat_snk_out_2arr(i)(1); - end generate; - -- Prepend the header block to the input block - gen_dp_concat : for i in 0 to g_nof_streams - 1 generate - dp_concat_snk_in_2arr(i)(0) <= snk_in_arr(i); + dp_concat_snk_in_2arr(i)(0) <= pipe_snk_in_arr(i); dp_concat_snk_in_2arr(i)(1) <= dp_field_blk_src_out_arr(i); u_dp_concat : entity work.dp_concat @@ -218,11 +261,31 @@ begin snk_out_arr => dp_concat_snk_out_2arr(i), snk_in_arr => dp_concat_snk_in_2arr(i), - src_in => src_in_arr(i), - src_out => src_out_arr(i) + src_in => wire_src_in_arr(i), + src_out => wire_src_out_arr(i) ); end generate; + gen_output : for i in 0 to g_nof_streams - 1 generate + -- Optional dp_pipeline_ready output to ease timing closure + gen_dp_pipeline_ready_outp : if g_pipeline_ready_outp generate + u_dp_pipeline_ready_outp : entity work.dp_pipeline_ready + port map( + rst => dp_rst, + clk => dp_clk, + + snk_out => wire_src_in_arr(i), + snk_in => wire_src_out_arr(i), + src_in => src_in_arr(i), + src_out => src_out_arr(i) + ); + end generate; + wire_outp : if not g_pipeline_ready_outp generate + src_out_arr(i) <= wire_src_out_arr(i); + wire_src_in_arr(i) <= src_in_arr(i); + end generate; + end generate; + --------------------------------------------------------------------------------------- -- MM control & monitoring --------------------------------------------------------------------------------------- diff --git a/libraries/base/dp/tb/vhdl/tb_dp_offload_tx_v3.vhd b/libraries/base/dp/tb/vhdl/tb_dp_offload_tx_v3.vhd index cbac338cde2a89b1f5b1894cbb793c893562ad88..941d29f5af36243e7b5acddfa0138702ed249eb1 100644 --- a/libraries/base/dp/tb/vhdl/tb_dp_offload_tx_v3.vhd +++ b/libraries/base/dp/tb/vhdl/tb_dp_offload_tx_v3.vhd @@ -76,7 +76,11 @@ entity tb_dp_offload_tx_v3 is g_symbol_w : natural := 8; g_empty : natural := 6; -- number of empty symbols in header when g_symbol_w < g_data_w, must be < c_nof_symbols_per_data g_pkt_len : natural := 240; - g_pkt_gap : natural := 16 + g_pkt_gap : natural := 16; + g_pipeline_ready_hdr : boolean := false; + g_pipeline_ready_inp : boolean := false; + g_capture_hdr_fields_in : boolean := false; + g_pipeline_ready_outp : boolean := false ); end tb_dp_offload_tx_v3; @@ -494,11 +498,15 @@ begin u_tx : entity work.dp_offload_tx_v3 generic map ( - g_nof_streams => 1, - g_data_w => g_data_w, - g_symbol_w => g_symbol_w, - g_hdr_field_arr => c_udp_offload_hdr_field_arr, - g_hdr_field_sel => c_hdr_field_ovr_init + g_nof_streams => 1, + g_data_w => g_data_w, + g_symbol_w => g_symbol_w, + g_hdr_field_arr => c_udp_offload_hdr_field_arr, + g_hdr_field_sel => c_hdr_field_ovr_init, + g_pipeline_ready => g_pipeline_ready_hdr, + g_pipeline_ready_inp => g_pipeline_ready_inp, + g_capture_hdr_fields_in => g_capture_hdr_fields_in, + g_pipeline_ready_outp => g_pipeline_ready_outp ) port map ( mm_rst => mm_rst, @@ -562,7 +570,7 @@ begin link_offload_sosi_arr(0).sop <= tx_offload_sosi_arr(0).sop; link_offload_sosi_arr(0).eop <= tx_offload_sosi_arr(0).eop; - -- The dp_offload_tx_v3 cannot accept flow control via its src_in_arr + -- The dp_offload_rx cannot accept flow control via its src_in_arr, -- however the dp_offload_rx only lowers ready at eop, to request a -- one cycle gap between rx packets. The dp_offload_tx_v3 output via -- src_out_arr has a gap, when g_flow_control_stimuli is e_pulse or when @@ -649,7 +657,7 @@ begin -- Check dp_bsn and dp_sync of second packet after sync with dp_sync = 0 proc_common_wait_until_hi_lo(dp_clk, verify_snk_in.sop); -- wait some latency until header fields of this sync packet are available via MM - proc_common_wait_some_cycles(dp_clk, 10); + proc_common_wait_some_cycles(dp_clk, 20); -- dp_bsn lo proc_mem_mm_bus_rd(0, mm_clk, reg_dp_offload_rx_hdr_dat_mosi); proc_mem_mm_bus_rd_latency(c_mem_reg_rd_latency, mm_clk); diff --git a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd index 7a612c135a18c40a6aa024527a6d69e1cad8d1e8..26a18faa8e276e4eac31227f7b83342607a29041 100644 --- a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd +++ b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd @@ -210,7 +210,7 @@ begin begin proc_common_wait_until_low(dp_clk, mm_rst); proc_common_wait_until_low(dp_clk, dp_rst); - proc_common_wait_some_cycles(mm_clk, 5); + proc_common_wait_some_cycles(mm_clk, 10); -- Read stream enable bits, default '1' after power up for I in 0 to c_nof_streams - 1 loop diff --git a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_sync_scheduler.vhd b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_sync_scheduler.vhd index f35a8bd290a55d34f3ee9d2955233c8b57ca4ee5..a3565d0983b1f79e9e531b09d22233e64420f0ad 100644 --- a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_sync_scheduler.vhd +++ b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_sync_scheduler.vhd @@ -102,7 +102,7 @@ begin begin proc_common_wait_until_low(dp_clk, mm_rst); proc_common_wait_until_low(dp_clk, dp_rst); - proc_common_wait_some_cycles(mm_clk, 5); + proc_common_wait_some_cycles(mm_clk, 10); --------------------------------------------------------------------------- -- Initial check diff --git a/libraries/base/dp/tb/vhdl/tb_mms_dp_bsn_source_v2.vhd b/libraries/base/dp/tb/vhdl/tb_mms_dp_bsn_source_v2.vhd index bd571b81e9033627b0983c66739032d173fd0cce..4f4439c85daee6a8e4cdee465f4bd665731b5e43 100644 --- a/libraries/base/dp/tb/vhdl/tb_mms_dp_bsn_source_v2.vhd +++ b/libraries/base/dp/tb/vhdl/tb_mms_dp_bsn_source_v2.vhd @@ -53,11 +53,11 @@ architecture tb of tb_mms_dp_bsn_source_v2 is constant c_block_size : natural := 100; constant c_nof_block_per_sync : natural := 15; - constant c_sync_interval : natural := c_nof_block_per_sync * c_block_size; - constant c_bsn_init : natural := c_nof_block_per_sync; + constant c_nof_clk_per_sync : natural := c_nof_block_per_sync * c_block_size; + constant c_bsn_init : natural := 7; constant c_mm_addr_dp_on : natural := 0; - constant c_mm_addr_nof_block_per_sync : natural := 1; + constant c_mm_addr_nof_clk_per_sync : natural := 1; constant c_mm_addr_bsn_lo : natural := 2; constant c_mm_addr_bsn_hi : natural := 3; constant c_mm_addr_bsn_time_offset : natural := 4; @@ -75,8 +75,6 @@ architecture tb of tb_mms_dp_bsn_source_v2 is signal mm_dp_on_status : natural; signal mm_bsn : std_logic_vector(c_dp_stream_bsn_w - 1 downto 0) := (others => '0'); - signal mm_bsn_prev : std_logic_vector(c_dp_stream_bsn_w - 1 downto 0) := (others => '0'); - signal mm_bsn_time_offset : std_logic_vector(c_bsn_time_offset_w - 1 downto 0) := (others => '0'); signal mm_mosi : t_mem_mosi := c_mem_mosi_rst; @@ -93,15 +91,15 @@ begin proc_common_wait_some_cycles(clk, 10); -- Write initial BSN and number of block per sync interval - proc_mem_mm_bus_wr(c_mm_addr_bsn_lo, c_bsn_init, clk, mm_miso, mm_mosi); - proc_mem_mm_bus_wr(c_mm_addr_bsn_hi, 0, clk, mm_miso, mm_mosi); -- must also write hi part to trigger transfer accross clock domain - proc_mem_mm_bus_wr(c_mm_addr_nof_block_per_sync, c_nof_block_per_sync, clk, mm_miso, mm_mosi); + -- . -- must also write hi part to trigger transfer across clock domain + proc_mem_mm_bus_wr(c_mm_addr_bsn_lo, c_bsn_init, clk, mm_miso, mm_mosi); + proc_mem_mm_bus_wr(c_mm_addr_bsn_hi, 0, clk, mm_miso, mm_mosi); + proc_mem_mm_bus_wr(c_mm_addr_nof_clk_per_sync, c_nof_clk_per_sync, clk, mm_miso, mm_mosi); proc_common_wait_some_cycles(clk, c_cross_clock_domain_latency); -------------------------------------------------------------------------- -- DP on immediate -------------------------------------------------------------------------- - -- Wait until after PPS proc_common_wait_until_hi_lo(clk, pps); @@ -118,19 +116,21 @@ begin report "Wrong DP on status, expected DP on immediate." severity ERROR; - -- Read BSN twice in same PPS interval - proc_common_wait_some_cycles(clk, c_block_size); - + -- Read BSN in first sync interval + proc_common_wait_some_cycles(clk, c_nof_clk_per_sync / 2); proc_mem_mm_bus_rd(c_mm_addr_bsn_lo, clk, mm_miso, mm_mosi); proc_mem_mm_bus_rd_latency(1, clk); mm_bsn(c_word_w - 1 downto 0) <= mm_miso.rddata(c_word_w - 1 downto 0); proc_mem_mm_bus_rd(c_mm_addr_bsn_hi, clk, mm_miso, mm_mosi); proc_mem_mm_bus_rd_latency(1, clk); mm_bsn(2 * c_word_w - 1 downto c_word_w) <= mm_miso.rddata(c_word_w - 1 downto 0); + proc_common_wait_some_cycles(clk, 1); + assert TO_UINT(mm_bsn) = c_bsn_init + report "Wrong BSN at sync in first interval." + severity ERROR; - proc_common_wait_some_cycles(clk, c_block_size); - - mm_bsn_prev <= mm_bsn; + -- Read BSN in second sync interval + proc_common_wait_some_cycles(clk, c_nof_clk_per_sync); proc_mem_mm_bus_rd(c_mm_addr_bsn_lo, clk, mm_miso, mm_mosi); proc_mem_mm_bus_rd_latency(1, clk); mm_bsn(c_word_w - 1 downto 0) <= mm_miso.rddata(c_word_w - 1 downto 0); @@ -138,15 +138,12 @@ begin proc_mem_mm_bus_rd_latency(1, clk); mm_bsn(2 * c_word_w - 1 downto c_word_w) <= mm_miso.rddata(c_word_w - 1 downto 0); proc_common_wait_some_cycles(clk, 1); - - -- Uncomment appropriate assert line dependent on fixed code for capture_bsn in mms_dp_bsn_source: - --ASSERT mm_bsn_prev<mm_bsn REPORT "Wrong BSN, expected incrementing BSN during PPS or sync interval." SEVERITY ERROR; - assert mm_bsn_prev = mm_bsn - report "Wrong BSN, expected constant BSN during PPS or sync interval." + assert TO_UINT(mm_bsn) = c_bsn_init + c_nof_block_per_sync + report "Wrong BSN at sync in second interval." severity ERROR; -- Run few sync intervals - proc_common_wait_some_cycles(clk, 3 * c_sync_interval); + proc_common_wait_some_cycles(clk, 3 * c_nof_clk_per_sync); -- Write DP off proc_mem_mm_bus_wr(c_mm_addr_dp_on, c_mm_dp_off, clk, mm_miso, mm_mosi); @@ -184,14 +181,14 @@ begin report "Wrong offset, expected 5" severity ERROR; - proc_common_wait_some_cycles(clk, c_sync_interval); + proc_common_wait_some_cycles(clk, c_nof_clk_per_sync); tb_end <= '1'; wait; end process; u_dut : entity work.mms_dp_bsn_source_v2 generic map ( - g_cross_clock_domain => true, -- use FALSE when mm_clk and st_clk are the same, else use TRUE to cross the clock domain + g_cross_clock_domain => true, g_block_size => c_block_size, g_nof_clk_per_sync => 200 * 10**6, -- overrule via MM write g_bsn_w => c_dp_stream_bsn_w diff --git a/libraries/base/dp/tb/vhdl/tb_tb_dp_offload_tx_v3.vhd b/libraries/base/dp/tb/vhdl/tb_tb_dp_offload_tx_v3.vhd index 615f42cfd52b8db909b618d93dd0380ef1bafbb2..b55c988a931bdb4273808f1a07724886c7e81444 100644 --- a/libraries/base/dp/tb/vhdl/tb_tb_dp_offload_tx_v3.vhd +++ b/libraries/base/dp/tb/vhdl/tb_tb_dp_offload_tx_v3.vhd @@ -23,14 +23,17 @@ -- > as 5 -- > run -all -library IEEE; +library IEEE, common_lib; use IEEE.std_logic_1164.all; +use common_lib.common_pkg.all; use work.tb_dp_pkg.all; -- for t_dp_flow_control_enum entity tb_tb_dp_offload_tx_v3 is end tb_tb_dp_offload_tx_v3; architecture tb of tb_tb_dp_offload_tx_v3 is + constant c_p : t_boolean_arr(0 to 1) := (false, true); + signal tb_end : std_logic := '0'; -- declare tb_end to avoid 'No objects found' error on 'when -label tb_end' begin -- -- general @@ -44,16 +47,20 @@ begin -- g_pkt_len : NATURAL := 240; -- g_pkt_gap : NATURAL := 16 - u_pls_act_data_w_64 : entity work.tb_dp_offload_tx_v3 generic map (e_pulse, e_active, false, 64, 64, 0, 240, 16); - u_act_act_data_w_64_no_gap : entity work.tb_dp_offload_tx_v3 generic map (e_active, e_active, false, 64, 64, 0, 240, 0); -- u_dp_fifo_sc does run almost full - u_pls_act_data_w_64_no_gap : entity work.tb_dp_offload_tx_v3 generic map (e_pulse, e_active, false, 64, 64, 0, 240, 0); - u_rnd_act_data_w_64 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 64, 64, 0, 240, 16); - u_rnd_act_data_w_32 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 32, 32, 0, 240, 16); - --u_act_rnd_data_w : ENTITY work.tb_dp_offload_tx_v3 GENERIC MAP (e_active, e_random, FALSE, 64, 64, 0, 240, 16); -- dp_offload_rx requires e_active - u_rnd_act_data_64_symbol_8_empty_1 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 64, 8, 1, 240, 16); - u_rnd_act_data_64_symbol_8_empty_6 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 64, 8, 6, 240, 16); - u_rnd_act_data_64_symbol_16 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 64, 16, 1, 240, 16); - u_rnd_act_data_64_symbol_32 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 64, 32, 1, 240, 16); - u_rnd_act_data_32_symbol_8 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 32, 8, 1, 240, 16); - u_rnd_act_data_32_symbol_16 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 32, 16, 1, 240, 16); + gen : for P in 0 to 1 generate + + u_pls_act_data_w_64 : entity work.tb_dp_offload_tx_v3 generic map (e_pulse, e_active, false, 64, 64, 0, 240, 16, c_p(P), c_p(P), c_p(P), c_p(P)); + u_act_act_data_w_64_no_gap : entity work.tb_dp_offload_tx_v3 generic map (e_active, e_active, false, 64, 64, 0, 240, 0, c_p(P), c_p(P), c_p(P), c_p(P)); -- u_dp_fifo_sc does run almost full + u_pls_act_data_w_64_no_gap : entity work.tb_dp_offload_tx_v3 generic map (e_pulse, e_active, false, 64, 64, 0, 240, 0, c_p(P), c_p(P), c_p(P), c_p(P)); + u_rnd_act_data_w_64 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 64, 64, 0, 240, 16, c_p(P), c_p(P), c_p(P), c_p(P)); + u_rnd_act_data_w_32 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 32, 32, 0, 240, 16, c_p(P), c_p(P), c_p(P), c_p(P)); + --u_act_rnd_data_w : ENTITY work.tb_dp_offload_tx_v3 GENERIC MAP (e_active, e_random, FALSE, 64, 64, 0, 240, 16, c_p(P), c_p(P), c_p(P), c_p(P)); -- dp_offload_rx requires e_active + u_rnd_act_data_64_symbol_8_empty_1 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 64, 8, 1, 240, 16, c_p(P), c_p(P), c_p(P), c_p(P)); + u_rnd_act_data_64_symbol_8_empty_6 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 64, 8, 6, 240, 16, c_p(P), c_p(P), c_p(P), c_p(P)); + u_rnd_act_data_64_symbol_16 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 64, 16, 1, 240, 16, c_p(P), c_p(P), c_p(P), c_p(P)); + u_rnd_act_data_64_symbol_32 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 64, 32, 1, 240, 16, c_p(P), c_p(P), c_p(P), c_p(P)); + u_rnd_act_data_32_symbol_8 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 32, 8, 1, 240, 16, c_p(P), c_p(P), c_p(P), c_p(P)); + u_rnd_act_data_32_symbol_16 : entity work.tb_dp_offload_tx_v3 generic map (e_random, e_active, false, 32, 16, 1, 240, 16, c_p(P), c_p(P), c_p(P), c_p(P)); + + end generate; end tb; diff --git a/libraries/base/ring/src/vhdl/ring_tx.vhd b/libraries/base/ring/src/vhdl/ring_tx.vhd index 62b21f1b560c1970492d5853370fb8a030bf69a8..a7b0771330ce5b95470b1bc9ce21798da9470ac8 100644 --- a/libraries/base/ring/src/vhdl/ring_tx.vhd +++ b/libraries/base/ring/src/vhdl/ring_tx.vhd @@ -164,7 +164,11 @@ begin g_data_w => g_data_w, g_symbol_w => g_symbol_w, g_hdr_field_arr => c_hdr_field_arr, - g_hdr_field_sel => c_hdr_field_sel + g_hdr_field_sel => c_hdr_field_sel, + g_pipeline_ready => false, -- unpack MM header + g_pipeline_ready_inp => true, -- use true to ease timning closure for ring_tx + g_capture_hdr_fields_in => false, -- can use false when g_pipeline_ready_inp = true, because hdr_fields_in_reg captures hdr_fields_in + g_pipeline_ready_outp => false -- use false, because there is no src_in_arr flow control ) port map ( mm_rst => mm_rst, diff --git a/libraries/dsp/filter/src/vhdl/fil_ppf_ctrl.vhd b/libraries/dsp/filter/src/vhdl/fil_ppf_ctrl.vhd index 6ff4e8743bf7dc0355d80080804bef2ae08505a1..cdc8523a5dbc80f469038717c919fac6ee379ad4 100644 --- a/libraries/dsp/filter/src/vhdl/fil_ppf_ctrl.vhd +++ b/libraries/dsp/filter/src/vhdl/fil_ppf_ctrl.vhd @@ -71,7 +71,7 @@ architecture rtl of fil_ppf_ctrl is constant c_single_taps_vec_w : natural := g_fil_ppf.in_dat_w * g_fil_ppf.nof_taps; constant c_taps_vec_w : natural := c_single_taps_vec_w * g_fil_ppf.nof_streams; - type reg_type is record + type t_reg is record in_dat_arr : t_in_dat_delay; -- Input register for the data init_dly_cnt : integer range 0 to c_filter_zdly; -- Counter used to overcome the settling time of the filter. val_dly : std_logic_vector(c_tot_latency - 1 downto 0); -- Delay register for the valid signal @@ -82,12 +82,15 @@ architecture rtl of fil_ppf_ctrl is out_val_ena : std_logic; -- Output enable end record; - signal r, rin : reg_type; + constant c_reg_rst : t_reg := ((others => (others => '0')), 0, + (others => '0'), (others => '0'), (others => '0'), + '0', (others => '0'), '0'); + signal r : t_reg := c_reg_rst; + signal rin : t_reg := c_reg_rst; begin - comb : process(r, rst, in_val, in_dat, taps_in_vec) - variable v : reg_type; + variable v : t_reg; begin v := r; @@ -125,13 +128,14 @@ begin end if; if(rst = '1') then + -- Do not reset data vectors to ease timing closure, because data reset value is dont care v.init_dly_cnt := 0; - v.in_dat_arr := (others => (others => '0')); + --v.in_dat_arr := (others => (others => '0')); v.val_dly := (others => '0'); v.rd_addr := (others => '0'); v.wr_addr := (others => '0'); v.wr_en := '0'; - v.taps_out_vec := (others => '0'); + --v.taps_out_vec := (others => '0'); v.out_val_ena := '0'; end if; diff --git a/libraries/technology/mac_10g/tech_mac_10g.vhd b/libraries/technology/mac_10g/tech_mac_10g.vhd index 735d9dda0945bea3a3a701c2dcde26d2ccee168c..0d0d303aa9aef6891dfc5afb21579089ffc1a08c 100644 --- a/libraries/technology/mac_10g/tech_mac_10g.vhd +++ b/libraries/technology/mac_10g/tech_mac_10g.vhd @@ -66,7 +66,7 @@ -- because the ST interface uses RL=1 whereas e.g. the Altera mac_10g IP uses -- RL=0. -- The dp_latency_fifo is necessary because the rx_mac_src_in.ready must remain --- active during a receptiothe Rx clockn, otherwise the rx frame gets truncated due to +-- active during a reception, otherwise the rx frame gets truncated due to -- back pressure by dp_pad_insert. --