emtion-recognition-landmarks.ipynb 23.6 KB

Raw Blame History Permalink

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import dlib\n",
    "import cv2\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow as tf\n",
    "import math\n",
    "import os\n",
    "import pathlib\n",
    "import time\n",
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img\n",
    "from tensorflow.keras.models import load_model\n",
    "from tensorflow.keras import regularizers\n",
    "from tensorflow import keras\n",
    "from imutils import face_utils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "## face detector와 landmark predictor 정의\n",
    "detector = dlib.get_frontal_face_detector()\n",
    "predictor = dlib.shape_predictor(\"./models/shape_predictor_68_face_landmarks.dat\")\n",
    "facerec = dlib.face_recognition_model_v1('models/dlib_face_recognition_resnet_model_v1.dat')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       happy   sad  fear  surprise  neutral  angry\n",
      "train   7215  4830  4097      3171     4965   3995\n",
      "      happy   sad  fear  surprise  neutral  angry\n",
      "test   1774  1247  1024       831     1233    958\n"
     ]
    }
   ],
   "source": [
    "train_dir = '../fer2013/train/'\n",
    "test_dir = '../fer2013/test/'\n",
    "\n",
    "row, col = 48, 48\n",
    "classes = 7\n",
    "\n",
    "def count_exp(path, set_):\n",
    "    dict_={}\n",
    "    for expression in os.listdir(path):\n",
    "        dir_ = path + expression\n",
    "        dict_[expression] = len(os.listdir(dir_))\n",
    "    df = pd.DataFrame(dict_, index=[set_])\n",
    "    return df\n",
    "train_count = count_exp(train_dir, 'train')\n",
    "test_count = count_exp(test_dir, 'test')\n",
    "print(train_count)\n",
    "print(test_count)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def euclidean_distance(pt1, pt2):\n",
    "    distance = 0\n",
    "    for i in range(2):\n",
    "        distance += (pt1[i] - pt2[i]) ** 2\n",
    "    return math.sqrt(distance)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "26388\n",
      "0\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "'numpy.ndarray' object has no attribute 'append'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0mTraceback (most recent call last)",
      "\u001b[0;32m<ipython-input-5-babf77cb1751>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     24\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m                     \u001b[0mDistLandmark\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0meuclidean_distance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     27\u001b[0m             \u001b[0mDistLandmark\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDistLandmark\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     28\u001b[0m             \u001b[0mDistExpression\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDistExpression\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mDistLandmark\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'append'"
     ]
    }
   ],
   "source": [
    "# train set 전처리작업\n",
    "# train 이미지 얼굴 인식 및 랜드마크 추출해서 각 점에 해당하는 랜드마크 거리 계산\n",
    "#define DLIB_GIF_SUPPORT \n",
    "\n",
    "for expression in os.listdir(train_dir):\n",
    "    DistExpression = np.empty((0, 4624), float)\n",
    "    filename = '../fer2013_Distance/train/' + expression + '/landmarkDist.npy'\n",
    "    print(len(os.listdir(train_dir + expression)))\n",
    "    for k in range(len(os.listdir(train_dir + expression))):\n",
    "        DistLandmark = []\n",
    "        if(k % 100 == 0):\n",
    "            print(k)\n",
    "        img = dlib.load_rgb_image(train_dir + expression + '/' + os.listdir(train_dir + expression)[k])\n",
    "        faces = detector(img, 1)\n",
    "        # 인식된 얼굴 개수 출력 \n",
    "        # print(\"Number of faces detected: {}\".format(len(faces)))\n",
    "\n",
    "        # For each detected face, find the landmark.\n",
    "        for (d, face) in enumerate(faces):\n",
    "            # Make the prediction and transfom it to numpy array\n",
    "            shape = predictor(img, face)\n",
    "            shape = face_utils.shape_to_np(shape)\n",
    "\n",
    "            for i in range(len(shape)):\n",
    "                for j in range(len(shape)):\n",
    "                    DistLandmark.append(euclidean_distance(shape[i], shape[j]))\n",
    "            DistLandmark = np.array(DistLandmark).reshape((1, -1))\n",
    "            DistExpression = np.append(DistExpression, DistLandmark, axis = 0)\n",
    "    print(DistExpression.shape)\n",
    "    np.save(filename, DistExpression)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "958\n",
      "0\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "(642, 4624)\n",
      "1024\n",
      "0\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "(646, 4624)\n",
      "831\n",
      "0\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "(611, 4624)\n",
      "1247\n",
      "0\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "1100\n",
      "1200\n",
      "(665, 4624)\n",
      "1233\n",
      "0\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "1100\n",
      "1200\n",
      "(931, 4624)\n",
      "1774\n",
      "0\n",
      "100\n",
      "200\n",
      "300\n",
      "400\n",
      "500\n",
      "600\n",
      "700\n",
      "800\n",
      "900\n",
      "1000\n",
      "1100\n",
      "1200\n",
      "1300\n",
      "1400\n",
      "1500\n",
      "1600\n",
      "1700\n",
      "(1381, 4624)\n"
     ]
    }
   ],
   "source": [
    "# test set 전처리작업\n",
    "# test 이미지 얼굴 인식 및 랜드마크 추출해서 각 점에 해당하는 랜드마크 거리 계산\n",
    "\n",
    "for expression in os.listdir(test_dir):\n",
    "    DistExpression = np.empty((0, 4624), float)\n",
    "    filename = '../fer2013_Distance/test/' + expression + '/landmarkDist.npy'\n",
    "    print(len(os.listdir(test_dir + expression)))\n",
    "    for k in range(len(os.listdir(test_dir + expression))):\n",
    "        DistLandmark = []\n",
    "        if(k % 100 == 0):\n",
    "            print(k)\n",
    "        img = dlib.load_rgb_image(test_dir + expression + '/' + os.listdir(test_dir + expression)[k])\n",
    "        faces = detector(img, 1)\n",
    "        # 인식된 얼굴 개수 출력 \n",
    "        # print(\"Number of faces detected: {}\".format(len(faces)))\n",
    "\n",
    "        # For each detected face, find the landmark.\n",
    "        for (d, face) in enumerate(faces):\n",
    "            # Make the prediction and transfom it to numpy array\n",
    "            shape = predictor(img, face)\n",
    "            shape = face_utils.shape_to_np(shape)\n",
    "\n",
    "            for i in range(len(shape)):\n",
    "                for j in range(len(shape)):\n",
    "                    DistLandmark.append(euclidean_distance(shape[i], shape[j]))\n",
    "            DistLandmark = np.array(DistLandmark).reshape((1, -1))\n",
    "            DistExpression = np.append(DistExpression, DistLandmark, axis = 0)\n",
    "    print(DistExpression.shape)\n",
    "    np.save(filename, DistExpression)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of faces detected: 1\n"
     ]
    },
    {
     "ename": "error",
     "evalue": "OpenCV(4.5.1) /tmp/pip-req-build-_a0ur5ao/opencv/modules/highgui/src/window.cpp:651: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'\n",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m\u001b[0m",
      "\u001b[0;31merror\u001b[0mTraceback (most recent call last)",
      "\u001b[0;32m<ipython-input-97-5173bf530f2f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0;31m# Show the image\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m     \u001b[0mcv2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Output\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     19\u001b[0m     \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmap\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'gray'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m     \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31merror\u001b[0m: OpenCV(4.5.1) /tmp/pip-req-build-_a0ur5ao/opencv/modules/highgui/src/window.cpp:651: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'\n"
     ]
    }
   ],
   "source": [
    "for expression in os.listdir(train_dir):\n",
    "    img = dlib.load_rgb_image(train_dir + expression + '/' + os.listdir(train_dir + expression)[1])\n",
    "    faces = detector(img, 1)\n",
    "    # 인식된 얼굴 개수 출력 \n",
    "    print(\"Number of faces detected: {}\".format(len(faces)))\n",
    "\n",
    "    # For each detected face, find the landmark.\n",
    "    for (i, face) in enumerate(faces):\n",
    "        # Make the prediction and transfom it to numpy array\n",
    "        shape = predictor(img, face)\n",
    "        shape = face_utils.shape_to_np(shape)\n",
    "\n",
    "        for x, y in shape:\n",
    "            cv2.line(img, (x, y), (x, y), (0, 0, 255), 1)\n",
    "\n",
    "        \n",
    "    # Show the image\n",
    "    cv2.imshow(\"Output\", img)\n",
    "    plt.imshow(img, cmap='gray')\n",
    "    plt.show()\n",
    "\n",
    "    k = cv2.waitKey(5) & 0xFF\n",
    "    if k == 27:\n",
    "        break\n",
    "\n",
    "for i in range(1, 5):\n",
    "    cv2.destroyAllWindows()\n",
    "    cv2.waitKey(1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels_dict_ = {'angry': 0, 'fear': 1, 'happy': 2, 'neutral': 3, 'sad': 4, 'surprise': 5}\n",
    "def get_key(val):\n",
    "    for key, value in labels_dict_.items():\n",
    "        if(value == val):\n",
    "            return key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "title =  매트릭스 2 - 네오 vs 스미스들 2\n",
      "video.rating =  4.9316239\n",
      "video.duration =  00:03:54\n",
      "best.resolution 1280x534\n",
      "frame_size=(1280, 534)\n",
      "fps=23\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-6-ca51bcf4e2ca>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     74\u001b[0m     \u001b[0mcv2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'frame'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mframeBGR\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 76\u001b[0;31m     \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcv2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwaitKey\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m25\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     77\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m27\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     78\u001b[0m         \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "import cv2\n",
    " \n",
    "########### 카메라 대신 youtube영상으로 대체 ############\n",
    "import pafy\n",
    "url = 'https://www.youtube.com/watch?v=BtkzHvIJFKc'\n",
    "video = pafy.new(url)\n",
    "print('title = ', video.title)\n",
    "print('video.rating = ', video.rating)\n",
    "print('video.duration = ', video.duration)\n",
    " \n",
    "best = video.getbest(preftype='mp4')     # 'webm','3gp'\n",
    "print('best.resolution', best.resolution)\n",
    " \n",
    "cap=cv2.VideoCapture(best.url)\n",
    "#########################################################\n",
    " \n",
    "#cap = cv2.VideoCapture(0) # 0번 카메라\n",
    " \n",
    "# 동영상 크기(frame정보)를 읽어옴\n",
    "frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n",
    "frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))\n",
    "\n",
    "_, img_bgr = cap.read() # (800, 1920, 3)\n",
    "padding_size = 0\n",
    "resized_width = 1920\n",
    "video_size = (resized_width, int(img_bgr.shape[0] * resized_width // img_bgr.shape[1]))\n",
    "output_size = (resized_width, int(img_bgr.shape[0] * resized_width // img_bgr.shape[1] + padding_size * 2))\n",
    "\n",
    "# 동영상 프레임을 캡쳐\n",
    "frameRate = int(cap.get(cv2.CAP_PROP_FPS))\n",
    " \n",
    "frame_size = (frameWidth, frameHeight)\n",
    "print('frame_size={}'.format(frame_size))\n",
    "print('fps={}'.format(frameRate))\n",
    "\n",
    "# 코덱 설정하기\n",
    "#fourcc = cv2.VideoWriter_fourcc(*'DIVX')  # ('D', 'I', 'V', 'X')\n",
    "fourcc = cv2.VideoWriter_fourcc(*'XVID')\n",
    "timestamps = [cap.get(cv2.CAP_PROP_POS_MSEC)]\n",
    "prev_time = 0\n",
    "FPS = frameRate\n",
    "\n",
    "# 이미지 저장하기 위한 영상 파일 생성\n",
    "out1 = cv2.VideoWriter('./data/record0.mp4',fourcc, frameRate, frame_size)\n",
    "\n",
    "# efficientnet model 로드\n",
    "model = load_model('../checkpoint/er-best-efficientNet1-bt32-model-SGD.h5')\n",
    "\n",
    "\n",
    "while True:\n",
    "    retval, frameBGR = cap.read()\t# 영상을 한 frame씩 읽어오기\n",
    "    current_time = time.time() - prev_time\n",
    "    \n",
    "    frame = cv2.resize(frameBGR, video_size)\n",
    "    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n",
    "    \n",
    "    if (retval is True) :\n",
    "        prev_time = time.time()\n",
    "#         faces = detector(frame, 1)\n",
    "#         if(len(faces) > 0):\n",
    "#             print(\"Number of faces detected: {}\".format(len(faces)))\n",
    "#             print(timestamps)\n",
    "#             for (i, face) in enumerate(faces):\n",
    "#                 img = cv2.resize(frame[face.top():face.bottom(), face.left():face.right()], dsize=(224, 224), interpolation = cv2.INTER_CUBIC)\n",
    "#                 imgarr = np.array(img).reshape(1, 224, 224, 3) /255\n",
    "#                 print(get_key(model.predict_classes(imgarr)))\n",
    "#             print(cap.get(cv2.CAP_PROP_POS_MSEC) / 60)\n",
    "    #         timestamps.append(cap.get(cv2.CAP_PROP_POS_MSEC))\n",
    "\n",
    "\n",
    "        # 동영상 파일에 쓰기\n",
    "    out1.write(frameBGR)\n",
    "\n",
    "        # 모니터에 출력\n",
    "    cv2.imshow('frame', frameBGR)\n",
    "\n",
    "    key = cv2.waitKey(25)\n",
    "    if key == 27 :\n",
    "        break\n",
    "        \n",
    "if cap.isOpened():\n",
    "    cap.release()\n",
    "    out1.release()\n",
    "\n",
    "for i in range(1,5):\n",
    "    cv2.destroyAllWindows()\n",
    "    cv2.waitKey(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.6666666666666666\n"
     ]
    }
   ],
   "source": [
    "import cv2\n",
    "import dlib, cv2\n",
    "import numpy as np\n",
    "\n",
    " \n",
    "detector = dlib.get_frontal_face_detector()\n",
    "predictor = dlib.shape_predictor(\"./models/shape_predictor_68_face_landmarks.dat\")\n",
    "facerec = dlib.face_recognition_model_v1('models/dlib_face_recognition_resnet_model_v1.dat')\n",
    "model = load_model('../checkpoint/er-best-efficientNet1-bt32-model-SGD.h5')\n",
    "    \n",
    "    \n",
    "descs = np.load('img/descs2.npy', allow_pickle=True)[()]\n",
    " \n",
    "video_path = './data/zoom_0.mp4'\n",
    "cap=cv2.VideoCapture(video_path)\n",
    " \n",
    "#cap = cv2.VideoCapture(0) # 0번 카메라\n",
    " \n",
    "# 동영상 크기(frame정보)를 읽어옴\n",
    "frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n",
    "frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))\n",
    "frame_size = (frameWidth, frameHeight)\n",
    "fps = cap.get((cv2.CAP_PROP_FPS))\n",
    "\n",
    "\n",
    "_, img_bgr = cap.read() # (800, 1920, 3)\n",
    "padding_size = 0\n",
    "resized_width = 1920\n",
    "video_size = (resized_width, int(img_bgr.shape[0] * resized_width // img_bgr.shape[1]))\n",
    "timestamps = [cap.get(cv2.CAP_PROP_POS_MSEC)]\n",
    "prev_time = 0\n",
    "\n",
    "fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')\n",
    "out1 = cv2.VideoWriter('./data/record0.mp4',fourcc, fps, frame_size)\n",
    "\n",
    "while True:\n",
    "    retval, frameBGR = cap.read()\t# 영상을 한 frame씩 읽어오기\n",
    "    current_time = time.time() - prev_time\n",
    "    \n",
    "    frameBGR = cv2.resize(frameBGR, video_size)\n",
    "    frame = cv2.cvtColor(frameBGR, cv2.COLOR_BGR2RGB)\n",
    "    \n",
    "    if (retval is True) and (current_time > 3) :\n",
    "        prev_time = time.time()\n",
    "        faces = detector(frame, 1)\n",
    "        \n",
    "        for (i, face) in enumerate(faces):\n",
    "            shape = predictor(frame, face)\n",
    "            face_descriptor = facerec.compute_face_descriptor(frame, shape)\n",
    "            \n",
    "            img = cv2.resize(frame[face.top():face.bottom(), face.left():face.right()], dsize=(224, 224), interpolation = cv2.INTER_CUBIC)\n",
    "            imgarr = np.array(img).reshape(1, 224, 224, 3) /255\n",
    "            print(get_key(model.predict_classes(imgarr)))\n",
    "            \n",
    "            last_found = {'name': 'unknown', 'dist': 0.6, 'color': (0,0,255)}\n",
    "            \n",
    "            for name, saved_desc in descs.items():\n",
    "                dist = np.linalg.norm([face_descriptor] - saved_desc, axis=1)\n",
    "                if dist < last_found['dist']:\n",
    "                    last_found = {'name': name, 'dist': dist, 'color': (255,255,255)}\n",
    "            \n",
    "            cv2.rectangle(frameBGR, pt1=(face.left(), face.top()), pt2=(face.right(), face.bottom()), color=last_found['color'], thickness=2)\n",
    "            cv2.putText(frameBGR, last_found['name'] + ',' + get_key(model.predict_classes(imgarr)) , org=(face.left(), face.top()), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=last_found['color'], thickness=2)\n",
    "        \n",
    "        print(cap.get(cv2.CAP_PROP_POS_MSEC) / 60)\n",
    "    #         timestamps.append(cap.get(cv2.CAP_PROP_POS_MSEC))\n",
    "\n",
    "\n",
    "        # 동영상 파일에 쓰기\n",
    "    out1.write(frameBGR)\n",
    "\n",
    "        # 모니터에 출력\n",
    "    cv2.imshow('frame', frameBGR)\n",
    "\n",
    "    key = cv2.waitKey(25)\n",
    "    if key == 27 :\n",
    "        break\n",
    "        \n",
    "if cap.isOpened():\n",
    "    cap.release()\n",
    "    out1.release()\n",
    "\n",
    "for i in range(1,5):\n",
    "    cv2.destroyAllWindows()\n",
    "    cv2.waitKey(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}