//this is a blacklist, which will be used to filter out some words.
const blacklist = [];

function removeDuplicates(arr) {
  const uniqueArray = [...new Set(arr)];
  return uniqueArray;
}

// in this method, every one word in matches should not in blacklist.
export function extractEnglishWords(input) {
  // first，delete all [**]
  let regex = /\[[^\]]*\]/g;
  const pureText = input.replace(regex, '');

  regex = /\b[a-zA-Z-]+\b/g;
  const matches = pureText.match(regex);

  if (!matches) {
    // console.error(`got no matches from input : ${input}.`);
    return [];
  }

  const result = [];
  matches.map((word) => {
    if (blacklist.indexOf(word) === -1) {
      result.push(word);
    }
  });

  return removeDuplicates(result);
}

export function extractEnglishPhrases(input, seperator = '\n') {
  const lines = input.split(seperator);

  const result = [];
  lines.map((line) => {
    const words = extractEnglishWords(line);

    const unverifiedPhrase = words.join(' ').trim();
    const unverifiedLowcasePhrase = unverifiedPhrase.toLowerCase();
    if (
      words.length >= 2 &&
      unverifiedPhrase &&
      !blacklist.includes(unverifiedPhrase) &&
      !blacklist.includes(unverifiedLowcasePhrase)
    ) {
      console.log(`unverifiedPhrase: "${unverifiedPhrase}"`);
      result.push(unverifiedPhrase);
    }
  });
  return result;
}

// add a word to blacklist.
// after this, this word will not be extracted.
export function addBlock(text) {
  blacklist.push(text);
  console.log(`addBlock: "${text}"`);
}

export default { extractEnglishWords, extractEnglishPhrases, addBlock };
