You will need to use Unicode-aware methods such as Character.isLetter()
. Here is the code from http://rosettacode.org/wiki/Letter_frequency#Java
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
public class LetterFreq {
public static int[] countLetters(String filename) throws IOException{
int[] freqs = new int[26];
BufferedReader in = new BufferedReader(new FileReader(filename));
String line;
while((line = in.readLine()) != null){
line = line.toUpperCase();
for(char ch:line.toCharArray()){
if(Character.isLetter(ch)){
freqs[ch - 'A']++;
}
}
}
in.close();
return freqs;
}
public static void main(String[] args) throws IOException{
System.out.println(Arrays.toString(countLetters("filename.txt")));
}
}
Here I modified it to include Unicode characters
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
public class LetterFreq {
public static Map<Character, Integer> countLetters(String filename) throws IOException{
Map<Character, Integer> freqs = new HashMap<Character, Integer>();
BufferedReader in = new BufferedReader(new FileReader(filename));
String line;
Integer f;
while((line = in.readLine()) != null){
line = line.toUpperCase();
for(Character ch:line.toCharArray()){
if(Character.isLetter(ch)){
f = freqs.get(ch);
if (f == null) {
freqs.put(ch, 0);
}
else {
freqs.put(ch, ++f);
}
}
}
}
in.close();
return freqs;
}
public static void main(String[] args) throws IOException{
Map<Character, Integer> freqs = countLetters("filename.txt");
Set<Character> keySet = freqs.keySet();
for (Character key : keySet) {
System.out.println("Char: " + key + " Count: " + freqs.get(key));
}
}
}