standardization.js 3.77 KB
/* eslint-disable no-use-before-define */
/* eslint-disable no-restricted-syntax */
const { config } = require('dotenv');
const Hangul = require('hangul-js');

const standardize = (word) => {
  let result = '';
  let newLetter = '';
  const disassembledWord = Hangul.disassemble(word);
  for (const [index, letter] of disassembledWord.entries()) {
    if (isKorean(letter)) {
      newLetter = Hangul.isConsonant(letter)
        ? standardizeConsonent(letter)
        : standardizeVowel(letter);
    } else {
      const preLetter = index === 0 ? '' : disassembledWord[index - 1];
      const nextLetter = index === disassembledWord.length - 1 ? '' : disassembledWord[index + 1];
      newLetter = isVowelSpecial(preLetter, letter, nextLetter)
        ? standardizeSpecialVowel(letter)
        : standardizeSpecialConsonent(letter);
    }
    result += newLetter;
  }
  return result;
};

const isKorean = (word) => {
  const numCheck = /[0-9]/; // 숫자

  const engCheck = /[a-zA-Z]/; // 문자

  const specialCheck = /[~!@#$%^&*()_+|<>?:{}]/; // 특수문자

  if (!numCheck.test(word) && !engCheck.test(word) && !specialCheck.test(word)) {
    return true;
  }
  return false;
};

const standardizeConsonent = (letter) => {
  switch (letter) {
    case ('ㄱ', 'ㄲ', 'ㅋ'):
      return 'ㄱ';
    case ('ㄷ', 'ㄸ', 'ㅌ'):
      return 'ㄷ';
    case ('ㅂ', 'ㅃ', 'ㅍ'):
      return 'ㅂ';
    case ('ㅅ', 'ㅆ'):
      return 'ㅅ';
    case ('ㅈ', 'ㅉ', 'ㅊ'):
      return 'ㅈ';
    default:
      return letter;
  }
};

const standardizeVowel = (letter) => {
  switch (letter) {
    case ('ㅏ', 'ㅑ'):
      return 'ㅏ';
    case ('ㅗ', 'ㅛ'):
      return 'ㅗ';
    case ('ㅐ', 'ㅒ', 'ㅔ', 'ㅖ', 'ㅙ', 'ㅚ', 'ㅝ', 'ㅞ'):
      return 'ㅐ';
    case ('ㅜ', 'ㅠ'):
      return 'ㅜ';
    case ('ㅓ', 'ㅕ'):
      return 'ㅓ';
    case ('ㅟ', 'ㅢ', 'ㅣ'):
      return 'ㅣ';
    default:
      return letter;
  }
};

const isVowelSpecial = (preLetter, letter, nextLetter) => {
  if (preLetter !== '') {
    if (nextLetter === '') {
      if (isKorean(preLetter)) {
        if (Hangul.isConsonant(preLetter)) {
          return true;
        }
      }
    } else if (isKorean(preLetter) && isKorean(nextLetter)) {
      if (Hangul.isConsonant(preLetter) && Hangul.isConsonant(nextLetter)) {
        return true;
      }
    }
  }

  return false;
};

const standardizeSpecialConsonent = (letter) => {
  switch (letter) {
    case ('g', 'k', 'G', 'K'):
      return 'ㄱ';
    case ('b', 'v', 'B', 'V'):
      return 'ㅂ';
    case ('P', 'p', 'F', 'f'):
      return 'ㅍ';
    case ('n', 'L', 'N'):
      return 'ㄴ';
    case ('s', 'A', 'S', 'C', 'c'):
      return 'ㅅ';
    case ('d', 't', 'E'):
      return 'ㄷ';
    case ('o', 'O'):
      return 'ㅇ';
    case ('r', 'R'):
      return 'ㄹ';
    case ('j', 'z', 'J', 'Z'):
      return 'ㅈ';
    case ('m', '口', 'M'):
      return 'ㅁ';
    case ('h', 'H'):
      return 'ㅎ';
    default:
      return letter;
  }
};

const standardizeSpecialVowel = (letter) => {
  let result = '';

  switch (letter) {
    case ('a', 'A'):
      result = 'ㅏ';
      break;
    case ('u', 'U'):
      result = 'ㅓ';
      break;
    case ('@', 'H'):
      result = 'ㅐ';
      break;
    case ('l', 'i', 'I', '!', 1):
      result = 'ㅣ';
      break;
    case '1':
      result = 'ㅣ';
      break;
    default:
      result = letter;
      break;
  }

  return result;
};

const removeSpecial = (word) => {
  const numCheck = /[0-9]/g; // 숫자

  const engCheck = /[a-zA-Z]/g; // 문자

  const specialCheck = /[~!@#$%^&*()_+|<>?:{}]/g; // 특수문자

  let result = word.replace(numCheck, '');
  result = result.replace(engCheck, '');
  result = result.replace(specialCheck, '');

  return result;
};

module.exports = { standardize, removeSpecial };