Well, the answer is already there, I still post my code for the little work that has gone into it.
I think that a mysql db is not ideal for storing this kind of data. I would suggest something like memcachedb, so you can easily access a keyword by using it as an index to fetch the count from the db. Persisting those keywords in a high load environment may cause problems with a mysql db.
$keyWords = extractKeyWords($text);
saveWords($keyWords);
function extractKeyWords($text) {
$result = array();
if(preg_match_all('#([\w]+)\b#i', $text, $matches)) {
foreach($matches[1] as $key => $match) {
// encode found word to safely use as key in array
$encodedKey = base64_encode(strtolower($match));
if(wordIsValid($match)) {
if(array_key_exists($encodedKey, $result)) {
$result[$encodedKey] = ++$result[$encodedKey];
} else {
$result[$encodedKey] = 1;
}
}
}
}
return $result;
}
function wordIsValid($word) {
$wordsToIgnore = array("to", "and", "if", "or", "by", "me", "you", "it", "as", "be", "the", "in");
// don't use words with a single character
if(strlen($word) > 1) {
if(in_array(strtolower($word), $wordsToIgnore)) {
return false;
} else {
return true;
}
} else {
return false;
}
}
// not implemented yet ;)
function saveWords($arrayOfWords) {
foreach($arrayOfWords as $word => $count) {
echo base64_decode($word).":".$count."\n";
}
}