Where is "-bp" MAP option and how to use it in spartant 6 in xilinx OR can any suggest a better design which uses less resources

Question

I am writing some code in vhdl, when I synthesize it show desgin is using 13036 slice LUTs enter image description here After searching on google I found the following link ( http://www.xilinx.com/support/answers/15888.htm )which says If we use "-bp" MAP option we can reduce the used LUTs, So how it is possible. How to use the option in xilinx 14.4. This is my first encounter when utilized logic is going more than available.

The code is below:

library ieee;
use ieee.std_logic_1164.all;
--use ieee.std_logic_arith.all;
--use ieee.std_logic_unsigned.all;
use IEEE.NUMERIC_STD.ALL;

entity fir is
 port
  (
    -- ADD USER PORTS BELOW THIS LINE ------------------

     adc_miso : in std_logic;
     adc_sclk : out std_logic;
     adc_cs : out std_logic;
     CLK_IN : in std_logic;
     bit_out : out std_logic
);
end fir;

architecture Behavioral of fir is

  signal p : unsigned(6 downto 0) := (others => '0');
  signal k : unsigned(6 downto 0) := (others => '0');
  signal index2 : unsigned(4 downto 0) := (others => '0');


  signal q : unsigned(5 downto 0) := (others => '0');
  signal index : unsigned(3 downto 0) := (others => '0');
  signal data_buffer : std_logic_vector(15 downto 0) := (others => '0');
  signal clk_slow : std_logic := '0';
  signal clk_slow_out : std_logic := '0'; 
  signal cs_select : unsigned(1 downto 0) := (others => '0');
  signal count_2_bits : unsigned(1 downto 0) := (others => '0');

  signal slv_reg0                       : std_logic_vector(31 downto 0);
  signal slv_reg1                       : std_logic_vector(31 downto 0);

begin 
  process ( CLK_IN ) is 
begin 
if CLK_IN'event and CLK_IN = '1' then
q <= q + 1;
clk_slow <= q(4);
clk_slow_out <= q(3);
adc_sclk <= clk_slow;
end if;
end process;



process( clk_slow ) is 

  variable fir_out : signed (31 downto 0):= (others => '0');
  type array_signed1 is array(99 downto 0) of signed(15 downto 0);
  variable H : array_signed1 := (others => "0000000000000000");
  variable Xin : array_signed1 := (others => "0000000000000000");

begin 

          H(0) := to_signed(16,16);
          H(1) := to_signed(16,16);
          H(2) := to_signed(17,16);
          H(3) := to_signed(18,16);
          H(4) := to_signed(20,16);
          H(5) := to_signed(22,16);
          H(6) := to_signed(25,16);
          H(7) := to_signed(28,16);
          H(8) := to_signed(31,16);
          H(9) := to_signed(35,16);
          H(10) := to_signed(39,16);
          H(11) := to_signed(44,16);
          H(12) := to_signed(49,16);
          H(13) := to_signed(54,16);
          H(14) := to_signed(60,16);
          H(15) := to_signed(67,16);
          H(16) := to_signed(73,16);
          H(17) := to_signed(80,16);
          H(18) := to_signed(88,16);
          H(19) := to_signed(95,16);
          H(20) := to_signed(103,16);
          H(21) := to_signed(111,16);
          H(22) := to_signed(120,16);
          H(23) := to_signed(128,16);
          H(24) := to_signed(137,16);
          H(25) := to_signed(145,16);
          H(26) := to_signed(154,16);
          H(27) := to_signed(163,16);
          H(28) := to_signed(172,16);
          H(29) := to_signed(181,16);
          H(30) := to_signed(190,16);
          H(31) := to_signed(198,16);
          H(32) := to_signed(207,16);
          H(33) := to_signed(215,16);
          H(34) := to_signed(223,16);
          H(35) := to_signed(231,16);
          H(36) := to_signed(238,16);
          H(37) := to_signed(245,16);
          H(38) := to_signed(252,16);
          H(39) := to_signed(258,16);
          H(40) := to_signed(264,16);
          H(41) := to_signed(270,16);
          H(42) := to_signed(274,16);
          H(43) := to_signed(279,16);
          H(44) := to_signed(283,16);
          H(45) := to_signed(286,16);
          H(46) := to_signed(289,16);
          H(47) := to_signed(291,16);
          H(48) := to_signed(292,16);
          H(49) := to_signed(293,16);
          H(50) := to_signed(292,16);
          H(51) := to_signed(291,16);
          H(52) := to_signed(289,16);
          H(53) := to_signed(286,16);
          H(54) := to_signed(283,16);
          H(55) := to_signed(279,16);
          H(56) := to_signed(274,16);
          H(57) := to_signed(270,16);
          H(58) := to_signed(264,16);
          H(59) := to_signed(258,16);
          H(60) := to_signed(252,16);
          H(61) := to_signed(245,16);
          H(62) := to_signed(238,16);
          H(63) := to_signed(231,16);
          H(64) := to_signed(223,16);
          H(65) := to_signed(215,16);
          H(66) := to_signed(207,16);
          H(67) := to_signed(198,16);
          H(68) := to_signed(190,16);
          H(69) := to_signed(181,16);
          H(70) := to_signed(172,16);
          H(71) := to_signed(163,16);
          H(72) := to_signed(154,16);
          H(73) := to_signed(145,16);
          H(74) := to_signed(137,16);
          H(75) := to_signed(128,16);
          H(76) := to_signed(120,16);
          H(77) := to_signed(111,16);
          H(78) := to_signed(103,16);
          H(79) := to_signed(95,16);
          H(80) := to_signed(88,16);
          H(81) := to_signed(80,16);
          H(82) := to_signed(73,16);
          H(83) := to_signed(67,16);
          H(84) := to_signed(60,16);
          H(85) := to_signed(54,16);
          H(86) := to_signed(49,16);
          H(87) := to_signed(44,16);
          H(88) := to_signed(39,16);
          H(89) := to_signed(35,16);
          H(90) := to_signed(31,16);
          H(91) := to_signed(28,16);
          H(92) := to_signed(25,16);
          H(93) := to_signed(22,16);
          H(94) := to_signed(20,16);
          H(95) := to_signed(18,16);
          H(96) := to_signed(17,16);
          H(97) := to_signed(16,16);
          H(98) := to_signed(16,16);
          H(99) := to_signed(16,16);


if falling_edge(clk_slow) then 

Case cs_select is 
when "00" =>   
adc_cs <= '0';
case count_2_bits is 
when "10" =>
data_buffer(to_integer(index)) <=  adc_miso;
index <= index + 1;
if ( index = "1111" ) then 
index <= "0000";
slv_reg0(15 downto 0) <= data_buffer;
Xin(to_integer(p)) := signed(data_buffer);
k <= p;
p <= p + 1;
if ( p = "1100101") then 
p <= (others => '0');
end if;
for i in 0 to 99 loop 
fir_out := fir_out + Xin(to_integer(k-i))*H(i);
end loop;
slv_reg1 <= std_logic_vector(fir_out);
cs_select <= cs_select + 1;
count_2_bits <= "00";
end if;
when others => 
count_2_bits <= count_2_bits + 1;
end case;
when others => 
adc_cs <= '1'; 
cs_select <= cs_select + 1;

end case;
end if;
end process;

Process_bit_out_clk_fast : process(clk_slow_out)

begin 
if(falling_edge(clk_slow_out)) then 
bit_out <= slv_reg1(to_integer(index2));
index2 <= index2 + 1;

end if;
end process;
end Behavioral;

Thanks Brian & baldy, I understood How can my for loop part can be converted in the state machine type of logic. Needed to make some multiplication and adder arrays (I am writing for 60 taps same can be for 100 taps):

type MULT_TYPE is array(60 downto 0) of signed(31 downto 0);
signal mult_array : MULT_TYPE := (others => "00000000000000000000000000000000");
type ADD_TYPE is array(60 downto 0) of signed(31 downto 0);
signal ADD_array : ADD_TYPE := (others => "00000000000000000000000000000000");
constant ZERO : signed(31 downto 0) := (others => '0');

and a new logic:

for i in 0 to 60 loop 
mult_array(i) <= signed(slv_reg0)*H(60-i); -- slv_reg0 is fir_input
if i = 0 then 
ADD_array(i) <= ZERO + mult_array(0);
else 
ADD_array(i) <= mult_array(i) + ADD_array(i-1);
end if;
end loop;
slv_reg1 <= std_logic_vector(ADD_array(60)); -- slv_reg1 is the FIR_output

with the above replacements instead for a for loop in one clock cycle I am able to get better results : enter image description here

Yes I can implement the design successfully and it worked fine for FPGA(xc6slx9).

Now I Tried to make this design in the Microblaze. Made a peripheral and in the user logic used the same vhdl code, In that this same design is using more resources enter image description here and can't be implemented So how is it possible to optimize it more ?? I can write my full vhdl code if it is necessary to understand what I am doing.

-pb option will probably not help as your design uses way too many LUTs. I'd suggest that you re-think your algorithm. the "for i in 0 to 99" loop is the "big guy". make that part synchron and use one clock edge per multiply/add. — baldyHDL, Mar 12 '14 at 07:48
Still doesn't tell me the sample rate. But the fact that the output bitrate is less than CLKIN points you in the right direction. You have multiple clock cycles per output sample. Which means you can try to follow the approach I gave in your previous question and baldyHDL repeated above. — , Mar 12 '14 at 15:26
The sampling rate is 223214 samples per second for the datain from ADC. as it takes 21 clock cycle and the frequency of the clock is 4.6875Mhz — user3217310, Mar 12 '14 at 16:13

user3217310 · Answer 1 · 2014-03-12T23:56:18.700

Thanks Brian & baldy, I understood How can my for loop part can be converted in the state machine type of logic. Needed to make some multiplication and adder arrays (I am writing for 60 taps same can be for 100 taps):

type MULT_TYPE is array(60 downto 0) of signed(31 downto 0);
signal mult_array : MULT_TYPE := (others => "00000000000000000000000000000000");
type ADD_TYPE is array(60 downto 0) of signed(31 downto 0);
signal ADD_array : ADD_TYPE := (others => "00000000000000000000000000000000");
constant ZERO : signed(31 downto 0) := (others => '0');

and a new logic:

for i in 0 to 60 loop 
mult_array(i) <= signed(slv_reg0)*H(60-i); -- slv_reg0 is fir_input
if i = 0 then 
ADD_array(i) <= ZERO + mult_array(0);
else 
ADD_array(i) <= mult_array(i) + ADD_array(i-1);
end if;
end loop;
slv_reg1 <= std_logic_vector(ADD_array(60)); -- slv_reg1 is the FIR_output

with the above replacements instead for a for loop in one clock cycle I am able to get better results : enter image description here

Yes I can implement the design successfully and it worked fine for FPGA(xc6slx9).

Now I Tried to make this design in the Microblaze. Made a peripheral and in the user logic used the same vhdl code, In that this same design is using more resources enter image description here and can't be implemented So how is it possible to optimize it more ?? I can write my full vhdl code if it is necessary to understand what I am doing.

Where is "-bp" MAP option and how to use it in spartant 6 in xilinx OR can any suggest a better design which uses less resources

1 Answers1