Within the scope of a paper I am writing at high school I chose to make my own audio-file-to-spectrogram-converter from scratch in order to create landscapes out of these spectrograms.
I already do have my implementation of an FFT and of using that to make a heightmap, a spectrogram. But I often get weird artifacts in the form of vertical stripes when the frequencies get dense, as you can see in the image below.
The example is right at the beginning with a window length of 2048 and on a log^2-scale. The FFT I am using is flawless, I've already compared it to others and they produce the same result.
This is the function which transforms the amplitudes into frequencies and stores them in a 2D-array:
private void transform(int from, int until) {
double val, step;
for(int i=from; i<until; i++) {
for(int j=0; j<n; j++)
chunk[j] = data[0][i*n+j+start];
fft.realForward(chunk);
for(int j=0; j<height; j++) {
val = Math.sqrt(chunk[2*j]*chunk[2*j] + chunk[2*j+1]*chunk[2*j+1]);
map[i][j] = val;
}
}
}
Now my Question: Where do these vertical stripes come from and how do I get rid of them?
I currently don't employ a window function and every calculation is stringed to one another, which means there is no overlapping. It is the simplest way you can think of making a spectrogram. Could it help introducing a window function or doing each calculation independent of whether the frame was already involved in a previous calculation, that is to say overlapping the frame-windows?
Also, what other ways are there to improve on my basic approach in order to get a better result?
This is the whole class. I feed it the data and all the necessary information from an audio file:
import java.awt.*;
import java.awt.event.*;
import java.awt.image.*;
import java.io.*;
import javax.imageio.ImageIO;
import javax.swing.*;
import org.jtransforms.fft.DoubleFFT_1D;
public class Heightmap extends JFrame implements WindowListener{
public static final int LOG_SCALE = 0;
public static final int LOG_SQUARE_SCALE = 1;
public static final int SQUARE_SCALE = 2;
public static final int LINEAR_SCALE = 3;
private BufferedImage heightmap;
private FileDialog chooser;
private JMenuBar menuBar;
private JMenu fileMenu;
private JMenuItem save, close;
private DoubleFFT_1D fft;
private int[][] data;
private double[][] map;
private double[] chunk;
private int width, height, n, start, scale;
private String name;
private boolean inactive;
public Heightmap(int[][] data, int resolution, int start,
int width, int height, int scale, String name) {
this.data = data;
this.n = resolution;
this.start = start;
this.width = width;
this.height = height;
this.scale = scale;
this.name = name;
fft = new DoubleFFT_1D(n);
map = new double[width][height];
heightmap = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
chunk = new double[n];
System.out.println("Starting transformation...");
long time;
time = System.currentTimeMillis();
transform();
time = System.currentTimeMillis() - time;
System.out.println("Time taken for calculation: "+time+" ms");
time = System.currentTimeMillis();
makeHeightmap();
initComponents();
time = System.currentTimeMillis() - time;
System.out.println("Time taken for drawing heightmap: "+time+" ms");
}
private void initComponents() {
this.setSize(width, height);
this.setDefaultCloseOperation(DISPOSE_ON_CLOSE);
this.setResizable(false);
this.setLocationRelativeTo(null);
this.setTitle(name);
createMenuBar();
chooser = new FileDialog(this, "Choose a directory", FileDialog.SAVE);
chooser.setDirectory("/Users/<user>/Desktop");
this.addMouseListener(new HeightmapMouseListener());
this.addKeyListener(new HeightmapKeyListener());
this.addWindowListener(this);
this.setVisible(true);
}
private void createMenuBar() {
menuBar = new JMenuBar();
fileMenu = new JMenu();
fileMenu.setText("File");
save = new JMenuItem("Save...", KeyEvent.VK_S);
save.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_S, InputEvent.META_DOWN_MASK));
save.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent arg0) {
chooser.setVisible(true);
String fileName = chooser.getFile();
String dir = chooser.getDirectory();
chooser.setDirectory(dir);
if(fileName != null) {
try {
File outputfile = new File(dir + fileName + ".png");
ImageIO.write(heightmap, "png", outputfile);
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("Saved "+fileName+".png to "+dir);
}
}
});
close = new JMenuItem("Close", KeyEvent.VK_C);
close.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_W, InputEvent.META_DOWN_MASK));
close.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
setVisible(false);
dispose();
}
});
fileMenu.add(save);
fileMenu.addSeparator();
fileMenu.add(close);
menuBar.add(fileMenu);
this.setJMenuBar(menuBar);
}
public void paint(Graphics g) {
g.drawImage(heightmap, 0, 0, null);
}
private void transform() {
transform(0, width);
}
private void transform(int from, int until) {
double max = Double.MIN_VALUE;
double min = Double.MAX_VALUE;
double val, step;
for(int i=from; i<until; i++) {
for(int j=0; j<n; j++) {
chunk[j] = data[0][i*n+j+start];
}
fft.realForward(chunk);
for(int j=0; j<height; j++) {
val = Math.sqrt(chunk[2*j]*chunk[2*j] + chunk[2*j+1]*chunk[2*j+1]);
if(val > max)
max = val;
if(val < min)
min = val;
map[i][j] = val;
}
if(min != 0) {
step = max/(max-min);
for(int j=0; j<height; j++)
map[i][j] = (map[i][j]-min)*step;
}
}
}
/*
* Paints heightmap into the BufferedImage
*/
private void makeHeightmap() {
double max = 0;
switch(scale) {
case LOG_SCALE: max = Math.log(findMax(map)+1); break;
case LOG_SQUARE_SCALE: max = Math.pow(Math.log(findMax(map)+1), 2); break;
case SQUARE_SCALE: max = Math.sqrt(findMax(map)); break;
case LINEAR_SCALE: max = findMax(map); break;
default: max = Math.pow(Math.log(findMax(map)+1), 2); break;
}
double stepsize = 255.0/max;
int val, rgb;
for(int x=0; x<width; x++)
for(int y=0; y<height; y++) {
switch(scale) {
case LOG_SCALE: val = (int) (Math.log(map[x][y]+1)*stepsize); break;
case LOG_SQUARE_SCALE: val = (int) (Math.log(map[x][y]+1)*stepsize); val *= val; break;
case SQUARE_SCALE: val = (int) (Math.sqrt(map[x][y])*stepsize); break;
case LINEAR_SCALE: val = (int) (map[x][y]*stepsize); break;
default: val = (int) (Math.log(map[x][y]+1)*stepsize); val *= val; break;
}
rgb = 255<<24 | val<<16 | val<<8 | val;
heightmap.setRGB(x, height-y-1, rgb);
}
}
private double findMax(double[][] data) {
double max = 0;
for(double[] val1: data)
for(double d: val1)
if(d > max)
max = d;
return max;
}
private class HeightmapKeyListener implements KeyListener {
boolean busy = false;
public void keyPressed(KeyEvent e) {
if(e.getKeyCode() == KeyEvent.VK_RIGHT && !busy && start < data[0].length-width*n) {
busy = true;
for(int x=0; x<width-1; x++)
map[x] = map[x+1].clone();
start += n;
transform(width-1, width);
makeHeightmap();
repaint();
busy = false;
}
else if(e.getKeyCode() == KeyEvent.VK_LEFT && !busy && start > 0) {
busy = true;
for(int x=width-1; x>0; x--)
map[x] = map[x-1];
start -= n;
transform(0, 1);
makeHeightmap();
repaint();
busy = false;
}
}
public void keyReleased(KeyEvent e) { }
public void keyTyped(KeyEvent e) { }
}
private class HeightmapMouseListener implements MouseListener {
public void mouseClicked(MouseEvent e) {
if(inactive) {
inactive = false;
return;
}
long time = System.currentTimeMillis();
int posX = e.getX();
int diff = posX - width/2; //difference between old and new center in pixels
int oldStart = start;
start = start + diff*n;
if(start < 0) start = 0;
int maxFrame = data[0].length-width*n;
if(start > maxFrame) start = maxFrame;
if(start == oldStart) return;
System.out.println("Changing center...");
int absDiff = Math.abs(diff);
if(start < oldStart) { //shift the start backward, recalculate the start
for(int x=width-1; x>=absDiff; x--)
map[x] = map[x-absDiff].clone();
transform(0, absDiff);
}
else if(start > oldStart) { //shift the back forward, recalculate the back
for(int x=0; x<width-absDiff; x++)
map[x] = map[x+absDiff].clone();
transform(width-absDiff, width);
}
makeHeightmap();
repaint();
System.out.println("Time taken: "+(System.currentTimeMillis()-time)+" ms");
}
public void mousePressed(MouseEvent e) { }
public void mouseReleased(MouseEvent e) { }
public void mouseEntered(MouseEvent e) { }
public void mouseExited(MouseEvent e) { }
}
public void windowActivated(WindowEvent arg0) { }
public void windowClosed(WindowEvent arg0) { }
public void windowClosing(WindowEvent arg0) { }
public void windowDeactivated(WindowEvent arg0) {
inactive = true;
}
public void windowDeiconified(WindowEvent arg0) { }
public void windowIconified(WindowEvent arg0) { }
public void windowOpened(WindowEvent arg0) { }
}
EDIT: Implementing a window function improved the result drastically. I really didn't understand what a window function would do and therefore underestimated its effect.
However, after doing so I tried mapping a cosine wave with a frequency of 10kHz which (again) produced some strange artifacts:
What could be the cause of this one? I implemented a overflow protection by clipping everything under 0 to 0 and over 255 to 255 with no change whatsoever.