I have this array of string:
let prompts = [
"This is some word",
"This is some more word",
"This is some more word",
"This is some more word",
"This is some more word",
"This is something else"
];
Is it possible to write a function that can find repetitions in the array (e.g: This is some more word
) and replace it with "REPEATED" while keeping the first one intact. We also need the ability to set a number for the words to be considered as "repetitions" otherwise it will match pretty much everything.
When I call the function. the output will be:
[
"This is some word",
"This is some more word",
"REPEATED",
"REPEATED",
"REPEATED",
"This is something else"
];
This has been troubling me for a long time before I reach out for help, because the string in the prompts
variable is totally random and I want to eliminate repetitions in OpenAI prompts. Thus this question.
Thanks in advance!
Update: This is the closest I got so far
function detectRepetitions(arr, minWords) {
const uniqueWords = new Set();
const repeatedWords = new Set();
// Iterate over the array and find repeated words
for (let i = 0; i < arr.length; i++) {
const words = arr[i].split(" ");
for (let j = 0; j <= words.length - minWords; j++) {
const subArray = words.slice(j, j + minWords);
const subString = subArray.join(" ");
if (uniqueWords.has(subString)) {
repeatedWords.add(subString);
} else {
uniqueWords.add(subString);
}
}
}
// Replace repeated words with 'REPEATED'
const result = arr.map((sentence) => {
let words = sentence.split(" ");
let repeatedFound = false;
for (let i = 0; i <= words.length - minWords; i++) {
const subArray = words.slice(i, i + minWords);
const subString = subArray.join(" ");
if (repeatedWords.has(subString)) {
if (repeatedFound) {
words.splice(i, minWords, "REPEATED");
} else {
repeatedFound = true;
}
}
}
return words.join(" ");
});
return result;
}
let prompt = [
"This is some word",
"This is some more word",
"This is some more word",
"This is some more word",
"This is some more word",
"This is something else",
];
const minWords = 4;
const result = detectRepetitions(prompt, minWords);
console.log(result);