Skip to content
Snippets Groups Projects
Commit 9c872374 authored by Daniel van der Schuur's avatar Daniel van der Schuur
Browse files

-Implemented folder architecture.

parent b84b51d0
No related branches found
No related tags found
No related merge requests found
......@@ -27,25 +27,129 @@ USE dp_lib.dp_stream_pkg.ALL;
-- Purpose:
-- Description:
-- . nof_outputs = ceil_div(g_nof_inputs, 2^(g_nof_folds)) for g_nof_folds>=0
-- . Examples:
-- . g_nof_inputs=10, g_nof_folds=0 -> nof_outputs=10
-- . g_nof_inputs=10, g_nof_folds=1 -> nof_outputs= 5
-- . g_nof_inputs=10, g_nof_folds=2 -> nof_outputs= 3
-- . g_nof_inputs=10, g_nof_folds=3 -> nof_outputs= 2
-- . g_nof_inputs=10, g_nof_folds=4 -> nof_outputs= 1
-- . g_nof_inputs=10, g_nof_folds<0 -> nof_outputs= 1
ENTITY corr_folder IS
GENERIC (
g_nof_inputs : NATURAL; -- Number of inputs
g_nof_folds : INTEGER := -1 -- >0: Number of folds;
); -- 0: Wire out to in;
PORT ( -- <0: Fold until one output remains
rst : IN STD_LOGIC;
clk : IN STD_LOGIC;
snk_in_arr : IN t_dp_sosi_arr(g_nof_inputs-1 DOWNTO 0);
src_out_arr : OUT t_dp_sosi_arr(sel_a_b(g_nof_folds>=0, ceil_div(g_nof_inputs, ceil_pow2(g_nof_folds)), 1)-1 DOWNTO 0)
);
END corr_folder;
ARCHITECTURE str OF corr_folder IS
COMPONENT corr_folder IS
GENERIC (
g_nof_inputs : NATURAL;
g_folder_factor : NATURAL
g_nof_folds : INTEGER := -1
);
PORT (
rst : IN STD_LOGIC;
clk : IN STD_LOGIC;
snk_in_2arr_2 : IN t_dp_sosi_2arr_2(g_nof_inputs-1 DOWNTO 0);
snk_in_arr : IN t_dp_sosi_arr(g_nof_inputs-1 DOWNTO 0);
src_out_2arr_2 : OUT t_dp_sosi_2arr_2(g_nof_inputs/g_folder_factor-1 DOWNTO 0)
src_out_arr : OUT t_dp_sosi_arr(sel_a_b(g_nof_folds>=0, ceil_div(g_nof_inputs, ceil_pow2(g_nof_folds)), 1)-1 DOWNTO 0)
);
END corr_folder;
END COMPONENT;
CONSTANT c_nof_muxes : NATURAL := ceil_div(g_nof_inputs, 2);
SIGNAL mux_snk_in_arr : t_dp_sosi_arr(2*c_nof_muxes-1 DOWNTO 0);
SIGNAL mux_snk_in_2arr_2 : t_dp_sosi_2arr_2(c_nof_muxes-1 DOWNTO 0);
ARCHITECTURE rtl OF corr_folder IS
SIGNAL mux_src_out_arr : t_dp_sosi_arr(c_nof_muxes-1 DOWNTO 0);
SIGNAL nxt_mux_src_out_arr : t_dp_sosi_arr(c_nof_muxes-1 DOWNTO 0);
BEGIN
gen_arch: IF g_nof_folds/=0 GENERATE
-----------------------------------------------------------------------------
-- Wire input array to mux_snk_in_arr to make sure we have an even number
-- of buses to work with in case of an odd number of inputs
-- . We want an even number of buses because we will wire up 2-input muxes
-----------------------------------------------------------------------------
gen_even_nof_buses: FOR i IN 0 TO g_nof_inputs-1 GENERATE
mux_snk_in_arr(i) <= snk_in_arr(i);
END GENERATE;
-----------------------------------------------------------------------------
-- Wire inputs to the 2-input muxes
-----------------------------------------------------------------------------
gen_mux_inputs_0: FOR i IN 0 TO c_nof_muxes-1 GENERATE
mux_snk_in_2arr_2(i)(0) <= mux_snk_in_arr(2*i);
mux_snk_in_2arr_2(i)(1) <= mux_snk_in_arr(2*i+1);
END GENERATE;
-----------------------------------------------------------------------------
-- Simple 2-input mux logic
-----------------------------------------------------------------------------
gen_mux_comb: FOR i IN 0 TO c_nof_muxes-1 GENERATE
nxt_mux_src_out_arr(i) <= mux_snk_in_2arr_2(i)(0) WHEN mux_snk_in_2arr_2(i)(0).valid='1' ELSE
mux_snk_in_2arr_2(i)(1) WHEN mux_snk_in_2arr_2(i)(1).valid='1' ELSE
c_dp_sosi_rst;
END GENERATE;
-----------------------------------------------------------------------------
-- If c_nof_muxes=1 or g_nof_folds=1, this is the last stage. Otherwise,
-- add a stage.
-- . g_nof_folds <0 : user wants to fold all the way to one output
-- . g_nof_folds >0 : user wants to fold n times
-----------------------------------------------------------------------------
gen_corr_folder: IF (g_nof_folds<0 AND c_nof_muxes>1) OR g_nof_folds>1 GENERATE
u_corr_folder : corr_folder
GENERIC MAP (
g_nof_inputs => c_nof_muxes,
g_nof_folds => g_nof_folds-1
)
PORT MAP (
rst => rst,
clk => clk,
snk_in_arr => mux_src_out_arr,
src_out_arr => src_out_arr
);
END GENERATE;
gen_src_out_arr: IF (g_nof_folds<0 AND c_nof_muxes=1) OR g_nof_folds=1 GENERATE
src_out_arr <= mux_src_out_arr;
END GENERATE;
-----------------------------------------------------------------------------
-- Registers
-----------------------------------------------------------------------------
p_clk: PROCESS(clk, rst)
BEGIN
IF rst='1' THEN
mux_src_out_arr <= (OTHERS=>c_dp_sosi_rst);
ELSIF rising_edge(clk) THEN
mux_src_out_arr <= nxt_mux_src_out_arr;
END IF;
END PROCESS;
END GENERATE;
-----------------------------------------------------------------------------
-- Wire output to input if g_nof_folds=0
-----------------------------------------------------------------------------
gen_wire_out_to_in: IF g_nof_folds=0 GENERATE
src_out_arr <= snk_in_arr;
END GENERATE;
END str;
END rtl;
......@@ -57,16 +57,19 @@ ARCHITECTURE str OF correlator IS
CONSTANT c_acc_data_w : NATURAL := ceil_log2(c_nof_words_to_acc*(pow2(g_data_w)-1));
CONSTANT c_dp_mux_fifo_size : t_natural_arr := array_init(g_nof_acc_per_input, g_nof_mults);
CONSTANT c_dp_mux_fifo_fill : t_natural_arr := array_init(0, g_nof_mults); -- Start outputting right away
-- CONSTANT c_dp_mux_fifo_size : t_natural_arr := array_init(g_nof_acc_per_input, g_nof_mults);
-- CONSTANT c_dp_mux_fifo_fill : t_natural_arr := array_init(0, g_nof_mults); -- Start outputting right away
SIGNAL corr_permutator_src_out_2arr_2 : t_dp_sosi_2arr_2(g_nof_inputs*(g_nof_inputs+1)/2-1 DOWNTO 0); -- Array of pairs
SIGNAL corr_folder_src_out_2arr_2 : t_dp_sosi_2arr_2(g_nof_inputs*(g_nof_inputs+1)/2-1 DOWNTO 0); -- Array of pairs, not folded yet
SIGNAL corr_multiplier_src_out_arr : t_dp_sosi_arr(g_nof_mults-1 DOWNTO 0);
SIGNAL corr_accumulator_src_out_arr : t_dp_sosi_arr(g_nof_mults-1 DOWNTO 0);
SIGNAL dp_mux_snk_in_arr : t_dp_sosi_arr(g_nof_mults-1 DOWNTO 0);
SIGNAL dp_mux_src_out : t_dp_sosi;
SIGNAL dp_block_gen_snk_in : t_dp_sosi;
SIGNAL dp_fifo_sc_src_out_arr : t_dp_sosi_arr(g_nof_mults-1 DOWNTO 0);
-- SIGNAL dp_pipeline_src_out_arr : t_dp_sosi_arr(g_nof_mults-1 DOWNTO 0);
-- SIGNAL dp_mux_snk_in_arr : t_dp_sosi_arr(g_nof_mults-1 DOWNTO 0);
-- SIGNAL dp_mux_src_out : t_dp_sosi;
-- SIGNAL dp_block_gen_snk_in : t_dp_sosi;
BEGIN
......@@ -126,6 +129,22 @@ BEGIN
src_out_arr => corr_accumulator_src_out_arr
);
-----------------------------------------------------------------------------
-- Pre-mux pipeline stages to ease dp_mux routing
-----------------------------------------------------------------------------
-- gen_dp_pipeline : FOR i IN 0 TO g_nof_mults-1 GENERATE
-- u_dp_pipeline : ENTITY dp_lib.dp_pipeline
-- GENERIC MAP (
-- g_pipeline => 5
-- )
-- PORT MAP (
-- rst => rst,
-- clk => clk,
-- snk_in => corr_accumulator_src_out_arr(i),
-- src_out => dp_pipeline_src_out_arr(i)
-- );
-- END GENERATE;
-----------------------------------------------------------------------------
-- Multiplex the parallel visibility blocks onto one output stream.
-- . In : All channels per visibility
......@@ -136,66 +155,123 @@ BEGIN
-- .. |dp_mux| -> [0..0]..[63..63]
-- [0..63] -> |______|
-----------------------------------------------------------------------------
gen_concat_complex : FOR i IN 0 TO g_nof_mults-1 GENERATE
-- Concatenate real&imaginary parts
dp_mux_snk_in_arr(i).data(2*c_acc_data_w-1 DOWNTO c_acc_data_w) <= corr_accumulator_src_out_arr(i).re(c_acc_data_w-1 DOWNTO 0);
dp_mux_snk_in_arr(i).data( c_acc_data_w-1 DOWNTO 0) <= corr_accumulator_src_out_arr(i).im(c_acc_data_w-1 DOWNTO 0);
dp_mux_snk_in_arr(i).valid <= corr_accumulator_src_out_arr(i).valid;
-- SOP, EOP = valid. This creates blocks of one cycle. This makes dp_mux multiplex per single word instead of
-- per block of x different channels, effectively performing a transpose so dp_mux outputs blocks of the same channel.
dp_mux_snk_in_arr(i).sop <= corr_accumulator_src_out_arr(i).valid;
dp_mux_snk_in_arr(i).eop <= corr_accumulator_src_out_arr(i).valid;
END GENERATE;
-- gen_concat_complex : FOR i IN 0 TO g_nof_mults-1 GENERATE
-- -- Concatenate real&imaginary parts
-- dp_mux_snk_in_arr(i).data(2*c_acc_data_w-1 DOWNTO c_acc_data_w) <= dp_pipeline_src_out_arr(i).re(c_acc_data_w-1 DOWNTO 0);
-- dp_mux_snk_in_arr(i).data( c_acc_data_w-1 DOWNTO 0) <= dp_pipeline_src_out_arr(i).im(c_acc_data_w-1 DOWNTO 0);
-- dp_mux_snk_in_arr(i).valid <= dp_pipeline_src_out_arr(i).valid;
--
-- -- SOP, EOP = valid. This creates blocks of one cycle. This makes dp_mux multiplex per single word instead of
-- -- per block of x different channels, effectively performing a transpose so dp_mux outputs blocks of the same channel.
-- dp_mux_snk_in_arr(i).sop <= dp_pipeline_src_out_arr(i).valid;
-- dp_mux_snk_in_arr(i).eop <= dp_pipeline_src_out_arr(i).valid;
-- END GENERATE;
--
-- u_dp_mux : ENTITY dp_lib.dp_mux
-- GENERIC MAP (
-- g_data_w => 2*c_acc_data_w,
-- g_mode => 1,
-- g_nof_input => g_nof_mults,
-- g_use_fifo => TRUE,
-- g_fifo_size => c_dp_mux_fifo_size,
-- g_fifo_fill => c_dp_mux_fifo_fill,
-- g_fifo_af_margin => 0
-- )
-- PORT MAP (
-- rst => rst,
-- clk => clk,
-- -- ST sinks
-- snk_out_arr => OPEN,
-- snk_in_arr => func_dp_stream_arr_reverse_range(dp_mux_snk_in_arr), -- dp_mux uses TO range!
-- -- ST source
-- src_in => c_dp_siso_rdy,
-- src_out => dp_mux_src_out -- Note: dp_mux_src_out.channel outputs mux input index
-- );
--
-- -- Extract real&imaginary parts
-- dp_block_gen_snk_in.re(c_acc_data_w-1 DOWNTO 0) <= dp_mux_src_out.data(2*c_acc_data_w-1 DOWNTO c_acc_data_w);
-- dp_block_gen_snk_in.im(c_acc_data_w-1 DOWNTO 0) <= dp_mux_src_out.data( c_acc_data_w-1 DOWNTO 0);
--
-- -- Pad the rest with zeroes
-- dp_block_gen_snk_in.re(64-1 DOWNTO c_acc_data_w) <= (OTHERS=>'0');
-- dp_block_gen_snk_in.im(64-1 DOWNTO c_acc_data_w) <= (OTHERS=>'0');
--
-- dp_block_gen_snk_in.valid <= dp_mux_src_out.valid;
u_dp_mux : ENTITY dp_lib.dp_mux
------------------------------------------------------------------------------
-- Buffer the visibilities before the folding stage
-- . I/O delay is 3 cycles.
-- . FSM can monitor input valid and base read requests on that.
-- . If a read request was posted but no valid data emerged, reset the FSM
-- . this indicates a gap in the input data.
------------------------------------------------------------------------------
gen_dp_fifo_sc : FOR i IN 0 TO g_nof_mults-1 GENERATE
u_dp_fifo_sc : ENTITY dp_lib.dp_fifo_sc
GENERIC MAP (
g_data_w => 2*c_acc_data_w,
g_mode => 1,
g_nof_input => g_nof_mults,
g_use_fifo => TRUE,
g_fifo_size => c_dp_mux_fifo_size,
g_fifo_fill => c_dp_mux_fifo_fill,
g_bsn_w => 1,
g_empty_w => 1,
g_channel_w => 1,
g_error_w => 1,
g_use_bsn => FALSE,
g_use_empty => FALSE,
g_use_channel => FALSE,
g_use_error => FALSE,
g_use_sync => FALSE,
g_use_ctrl => FALSE,
g_use_complex => TRUE,
g_fifo_size => 64,
g_fifo_af_margin => 0
)
PORT MAP (
rst => rst,
clk => clk,
-- ST sinks
snk_out_arr => OPEN,
snk_in_arr => func_dp_stream_arr_reverse_range(dp_mux_snk_in_arr), -- dp_mux uses TO range!
-- ST source
src_in => c_dp_siso_rdy,
src_out => dp_mux_src_out -- Note: dp_mux_src_out.channel outputs mux input index
);
-- Extract real&imaginary parts
dp_block_gen_snk_in.re(c_acc_data_w-1 DOWNTO 0) <= dp_mux_src_out.data(2*c_acc_data_w-1 DOWNTO c_acc_data_w);
dp_block_gen_snk_in.im(c_acc_data_w-1 DOWNTO 0) <= dp_mux_src_out.data( c_acc_data_w-1 DOWNTO 0);
-- Pad the rest with zeroes
dp_block_gen_snk_in.re(64-1 DOWNTO c_acc_data_w) <= (OTHERS=>'0');
dp_block_gen_snk_in.im(64-1 DOWNTO c_acc_data_w) <= (OTHERS=>'0');
wr_ful => OPEN,
usedw => OPEN,
rd_emp => OPEN,
dp_block_gen_snk_in.valid <= dp_mux_src_out.valid;
snk_out => OPEN,
snk_in => corr_accumulator_src_out_arr(i),
src_in => c_dp_siso_rdy,
src_out => dp_fifo_sc_src_out_arr(i)
);
END GENERATE;
-----------------------------------------------------------------------------
-- Add proper SOP and EOP to mux output
-- . Output one block of g_nof_mults for each channel
-----------------------------------------------------------------------------
dp_block_gen: ENTITY dp_lib.dp_block_gen
------------------------------------------------------------------------------
-- Fold onto one stream
------------------------------------------------------------------------------
u_corr_folder : ENTITY work.corr_folder
GENERIC MAP (
g_use_src_in => FALSE,
g_nof_data => g_nof_mults,
g_nof_blk_per_sync => 10 -- Randomly chosen sync interval
g_nof_inputs => 55,
g_nof_folds => -1
)
PORT MAP (
rst => rst,
clk => clk,
snk_in => dp_block_gen_snk_in,
snk_in_arr => dp_fifo_sc_src_out_arr,
src_out => src_out_arr(0)
src_out_arr => src_out_arr
);
-- -----------------------------------------------------------------------------
-- -- Add proper SOP and EOP to mux output
-- -- . Output one block of g_nof_mults for each channel
-- -----------------------------------------------------------------------------
-- dp_block_gen: ENTITY dp_lib.dp_block_gen
-- GENERIC MAP (
-- g_use_src_in => FALSE,
-- g_nof_data => g_nof_mults,
-- g_nof_blk_per_sync => 10 -- Randomly chosen sync interval
-- )
-- PORT MAP (
-- rst => rst,
-- clk => clk,
--
-- snk_in => dp_block_gen_snk_in,
--
-- src_out => src_out_arr(0)
-- );
END str;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment