I'm trying to write code using NLP (I used PHP-ML Library), This is the code I tried to write but when I try check Non Book title for example phone (Tecno POP 7 Pro (Uyuni Blue, 2GB RAM,64GB Storage) | Type C Port | 12MP Dual Camera | Up to 4GB RAM with Memory Fusion). It is saying that it comes under book category, though it is non book category .Please help me to fix
<?php
require_once 'vendor/autoload.php';
use Phpml\Classification\KNearestNeighbors;
$book_titles = ["To Kill a Mockingbird", "The Great Gatsby", "1984", "The Lord of the Rings", "Pride and Prejudice"];
$non_book_titles = ["Breaking News", "How to Bake a Cake", "The Latest Technology Trends", "10 Best Places to Visit", "The History of Jazz"];
// Preprocess the dataset
function preprocess($text) {
$text = strtolower($text);
$text = preg_replace('/[^a-z0-9\s]/', '', $text);
$stopwords = ['a', 'an', 'the', 'and', 'or', 'in', 'on', 'at', 'by'];
$text = str_replace($stopwords, '', $text);
return $text;
}
// Preprocess the book titles
$book_titles_preprocessed = array();
foreach ($book_titles as $title) {
$preprocessed_title = preprocess($title);
$book_titles_preprocessed[] = $preprocessed_title;
}
// Preprocess the non-book titles
$non_book_titles_preprocessed = array();
foreach ($non_book_titles as $title) {
$preprocessed_title = preprocess($title);
$non_book_titles_preprocessed[] = $preprocessed_title;
}
// Combine the preprocessed book and non-book titles into one array
$titles_preprocessed = array_merge($book_titles_preprocessed, $non_book_titles_preprocessed);
// Create the training data for the k-nearest neighbors classifier
$trainingData = array();
$labels = array();
foreach ($titles_preprocessed as $title) {
if (in_array($title, $non_book_titles_preprocessed)) {
// Label non-book titles as "Other"
$trainingData[] = [$title];
$labels[] = "Other";
} else {
// Label book titles based on their category
$trainingData[] = [$title];
$labels[] = "Book";
}
}
// Create the k-nearest neighbors classifier
$classifier = new KNearestNeighbors();
$classifier->train($trainingData, $labels);
// Classify a new title and print the predicted category
$newTitle = "The Great Gatsby: A Novel by F. Scott Fitzgerald";
$newTitlePreprocessed = preprocess($newTitle);
$predictedCategory = $classifier->predict([$newTitlePreprocessed]);
if ($predictedCategory == "Other") {
echo "The title \"$newTitle\" is not a book.\n";
} else {
echo "The title \"$newTitle\" belongs to the books category.\n";
}
?>