I am writing some code in vhdl, when I synthesize it show desgin is using 13036 slice LUTs
After searching on google I found the following link ( http://www.xilinx.com/support/answers/15888.htm )which says If we use "-bp" MAP option we can reduce the used LUTs, So how it is possible. How to use the option in xilinx 14.4.
This is my first encounter when utilized logic is going more than available.
The code is below:
library ieee;
use ieee.std_logic_1164.all;
--use ieee.std_logic_arith.all;
--use ieee.std_logic_unsigned.all;
use IEEE.NUMERIC_STD.ALL;
entity fir is
port
(
-- ADD USER PORTS BELOW THIS LINE ------------------
adc_miso : in std_logic;
adc_sclk : out std_logic;
adc_cs : out std_logic;
CLK_IN : in std_logic;
bit_out : out std_logic
);
end fir;
architecture Behavioral of fir is
signal p : unsigned(6 downto 0) := (others => '0');
signal k : unsigned(6 downto 0) := (others => '0');
signal index2 : unsigned(4 downto 0) := (others => '0');
signal q : unsigned(5 downto 0) := (others => '0');
signal index : unsigned(3 downto 0) := (others => '0');
signal data_buffer : std_logic_vector(15 downto 0) := (others => '0');
signal clk_slow : std_logic := '0';
signal clk_slow_out : std_logic := '0';
signal cs_select : unsigned(1 downto 0) := (others => '0');
signal count_2_bits : unsigned(1 downto 0) := (others => '0');
signal slv_reg0 : std_logic_vector(31 downto 0);
signal slv_reg1 : std_logic_vector(31 downto 0);
begin
process ( CLK_IN ) is
begin
if CLK_IN'event and CLK_IN = '1' then
q <= q + 1;
clk_slow <= q(4);
clk_slow_out <= q(3);
adc_sclk <= clk_slow;
end if;
end process;
process( clk_slow ) is
variable fir_out : signed (31 downto 0):= (others => '0');
type array_signed1 is array(99 downto 0) of signed(15 downto 0);
variable H : array_signed1 := (others => "0000000000000000");
variable Xin : array_signed1 := (others => "0000000000000000");
begin
H(0) := to_signed(16,16);
H(1) := to_signed(16,16);
H(2) := to_signed(17,16);
H(3) := to_signed(18,16);
H(4) := to_signed(20,16);
H(5) := to_signed(22,16);
H(6) := to_signed(25,16);
H(7) := to_signed(28,16);
H(8) := to_signed(31,16);
H(9) := to_signed(35,16);
H(10) := to_signed(39,16);
H(11) := to_signed(44,16);
H(12) := to_signed(49,16);
H(13) := to_signed(54,16);
H(14) := to_signed(60,16);
H(15) := to_signed(67,16);
H(16) := to_signed(73,16);
H(17) := to_signed(80,16);
H(18) := to_signed(88,16);
H(19) := to_signed(95,16);
H(20) := to_signed(103,16);
H(21) := to_signed(111,16);
H(22) := to_signed(120,16);
H(23) := to_signed(128,16);
H(24) := to_signed(137,16);
H(25) := to_signed(145,16);
H(26) := to_signed(154,16);
H(27) := to_signed(163,16);
H(28) := to_signed(172,16);
H(29) := to_signed(181,16);
H(30) := to_signed(190,16);
H(31) := to_signed(198,16);
H(32) := to_signed(207,16);
H(33) := to_signed(215,16);
H(34) := to_signed(223,16);
H(35) := to_signed(231,16);
H(36) := to_signed(238,16);
H(37) := to_signed(245,16);
H(38) := to_signed(252,16);
H(39) := to_signed(258,16);
H(40) := to_signed(264,16);
H(41) := to_signed(270,16);
H(42) := to_signed(274,16);
H(43) := to_signed(279,16);
H(44) := to_signed(283,16);
H(45) := to_signed(286,16);
H(46) := to_signed(289,16);
H(47) := to_signed(291,16);
H(48) := to_signed(292,16);
H(49) := to_signed(293,16);
H(50) := to_signed(292,16);
H(51) := to_signed(291,16);
H(52) := to_signed(289,16);
H(53) := to_signed(286,16);
H(54) := to_signed(283,16);
H(55) := to_signed(279,16);
H(56) := to_signed(274,16);
H(57) := to_signed(270,16);
H(58) := to_signed(264,16);
H(59) := to_signed(258,16);
H(60) := to_signed(252,16);
H(61) := to_signed(245,16);
H(62) := to_signed(238,16);
H(63) := to_signed(231,16);
H(64) := to_signed(223,16);
H(65) := to_signed(215,16);
H(66) := to_signed(207,16);
H(67) := to_signed(198,16);
H(68) := to_signed(190,16);
H(69) := to_signed(181,16);
H(70) := to_signed(172,16);
H(71) := to_signed(163,16);
H(72) := to_signed(154,16);
H(73) := to_signed(145,16);
H(74) := to_signed(137,16);
H(75) := to_signed(128,16);
H(76) := to_signed(120,16);
H(77) := to_signed(111,16);
H(78) := to_signed(103,16);
H(79) := to_signed(95,16);
H(80) := to_signed(88,16);
H(81) := to_signed(80,16);
H(82) := to_signed(73,16);
H(83) := to_signed(67,16);
H(84) := to_signed(60,16);
H(85) := to_signed(54,16);
H(86) := to_signed(49,16);
H(87) := to_signed(44,16);
H(88) := to_signed(39,16);
H(89) := to_signed(35,16);
H(90) := to_signed(31,16);
H(91) := to_signed(28,16);
H(92) := to_signed(25,16);
H(93) := to_signed(22,16);
H(94) := to_signed(20,16);
H(95) := to_signed(18,16);
H(96) := to_signed(17,16);
H(97) := to_signed(16,16);
H(98) := to_signed(16,16);
H(99) := to_signed(16,16);
if falling_edge(clk_slow) then
Case cs_select is
when "00" =>
adc_cs <= '0';
case count_2_bits is
when "10" =>
data_buffer(to_integer(index)) <= adc_miso;
index <= index + 1;
if ( index = "1111" ) then
index <= "0000";
slv_reg0(15 downto 0) <= data_buffer;
Xin(to_integer(p)) := signed(data_buffer);
k <= p;
p <= p + 1;
if ( p = "1100101") then
p <= (others => '0');
end if;
for i in 0 to 99 loop
fir_out := fir_out + Xin(to_integer(k-i))*H(i);
end loop;
slv_reg1 <= std_logic_vector(fir_out);
cs_select <= cs_select + 1;
count_2_bits <= "00";
end if;
when others =>
count_2_bits <= count_2_bits + 1;
end case;
when others =>
adc_cs <= '1';
cs_select <= cs_select + 1;
end case;
end if;
end process;
Process_bit_out_clk_fast : process(clk_slow_out)
begin
if(falling_edge(clk_slow_out)) then
bit_out <= slv_reg1(to_integer(index2));
index2 <= index2 + 1;
end if;
end process;
end Behavioral;
Thanks Brian & baldy, I understood How can my for loop part can be converted in the state machine type of logic. Needed to make some multiplication and adder arrays (I am writing for 60 taps same can be for 100 taps):
type MULT_TYPE is array(60 downto 0) of signed(31 downto 0);
signal mult_array : MULT_TYPE := (others => "00000000000000000000000000000000");
type ADD_TYPE is array(60 downto 0) of signed(31 downto 0);
signal ADD_array : ADD_TYPE := (others => "00000000000000000000000000000000");
constant ZERO : signed(31 downto 0) := (others => '0');
and a new logic:
for i in 0 to 60 loop
mult_array(i) <= signed(slv_reg0)*H(60-i); -- slv_reg0 is fir_input
if i = 0 then
ADD_array(i) <= ZERO + mult_array(0);
else
ADD_array(i) <= mult_array(i) + ADD_array(i-1);
end if;
end loop;
slv_reg1 <= std_logic_vector(ADD_array(60)); -- slv_reg1 is the FIR_output
with the above replacements instead for a for loop in one clock cycle I am able to get better results :
Yes I can implement the design successfully and it worked fine for FPGA(xc6slx9).
Now I Tried to make this design in the Microblaze. Made a peripheral and in the user logic used the same vhdl code, In that this same design is using more resources
and can't be implemented So how is it possible to optimize it more ??
I can write my full vhdl code if it is necessary to understand what I am doing.