Skip to content

Commit

Permalink
Huge refactoring to face recognition
Browse files Browse the repository at this point in the history
  • Loading branch information
henryruhs committed Aug 11, 2023
1 parent 83d913d commit 6f99280
Show file tree
Hide file tree
Showing 10 changed files with 200 additions and 75 deletions.
50 changes: 27 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,29 +29,33 @@ Start the program with arguments:
```
python run.py [options]
-h, --help show this help message and exit
-s SOURCE_PATH, --source SOURCE_PATH select an source image
-t TARGET_PATH, --target TARGET_PATH select an target image or video
-o OUTPUT_PATH, --output OUTPUT_PATH select output file or directory
--frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] list of available frame processors (choices: face_swapper, face_enhancer, frame_enhancer, ...)
--ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] list of available ui layouts (choices: default, ...)
--keep-fps keep target fps
--keep-temp keep temporary frames
--skip-audio skip target audio
--many-faces process every face
--reference-face-position REFERENCE_FACE_POSITION position of the reference face
--reference-frame-number REFERENCE_FRAME_NUMBER number of the reference frame
--similar-face-distance SIMILAR_FACE_DISTANCE face distance used for recognition
--temp-frame-format {jpg,png} image format used for frame extraction
--temp-frame-quality [0-100] image quality used for frame extraction
--output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc} encoder used for the output video
--output-video-quality [0-100] quality used for the output video
--max-memory MAX_MEMORY maximum amount of RAM in GB
--execution-providers {cpu} [{cpu} ...] list of available execution providers (choices: cpu, ...)
--execution-thread-count EXECUTION_THREAD_COUNT number of execution threads
--execution-queue-count EXECUTION_QUEUE_COUNT number of execution queries
-v, --version show program's version number and exit
-h, --help show this help message and exit
-s SOURCE_PATH, --source SOURCE_PATH select an source image
-t TARGET_PATH, --target TARGET_PATH select an target image or video
-o OUTPUT_PATH, --output OUTPUT_PATH select output file or directory
--frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] list of available frame processors (choices: face_swapper, face_enhancer, frame_enhancer, ...)
--ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] list of available ui layouts (choices: default, ...)
--keep-fps keep target fps
--keep-temp keep temporary frames
--skip-audio skip target audio
--face-recognition {reference,many} face recognition method
--face-analyser-direction {left-right,right-left,top-bottom,bottom-top,small-large,large-small} direction used for the face analyser
--face-analyser-age {children,teenager,adult,senior} age used for the face analyser
--face-analyser-gender {male,female} gender used for the face analyser
--reference-face-position REFERENCE_FACE_POSITION position of the reference face
--reference-face-distance REFERENCE_FACE_DISTANCE distance between reference face and target face
--reference-frame-number REFERENCE_FRAME_NUMBER number of the reference frame
--trim-frame-start TRIM_FRAME_START start frame use for extraction
--trim-frame-end TRIM_FRAME_END end frame use for extraction
--temp-frame-format {jpg,png} image format used for frame extraction
--temp-frame-quality [0-100] image quality used for frame extraction
--output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc} encoder used for the output video
--output-video-quality [0-100] quality used for the output video
--max-memory MAX_MEMORY maximum amount of RAM in GB
--execution-providers {cpu} [{cpu} ...] list of available execution providers (choices: cpu, ...)
--execution-thread-count EXECUTION_THREAD_COUNT number of execution threads
--execution-queue-count EXECUTION_QUEUE_COUNT number of execution queries
-v, --version show program's version number and exit
```


Expand Down
16 changes: 12 additions & 4 deletions roop/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import argparse
import onnxruntime
import tensorflow


import roop.globals
import roop.metadata
from roop.predictor import predict_image, predict_video
Expand All @@ -35,10 +37,13 @@ def parse_args() -> None:
program.add_argument('--keep-fps', help='keep target fps', dest='keep_fps', action='store_true')
program.add_argument('--keep-temp', help='keep temporary frames', dest='keep_temp', action='store_true')
program.add_argument('--skip-audio', help='skip target audio', dest='skip_audio', action='store_true')
program.add_argument('--many-faces', help='process every face', dest='many_faces', action='store_true')
program.add_argument('--face-recognition', help='face recognition method', dest='face_recognition', default='reference', choices=['reference', 'many'])
program.add_argument('--face-analyser-direction', help='direction used for the face analyser', dest='face_analyser_direction', choices=['left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small'])
program.add_argument('--face-analyser-age', help='age used for the face analyser', dest='face_analyser_age', choices=['children', 'teenager', 'adult', 'senior'])
program.add_argument('--face-analyser-gender', help='gender used for the face analyser', dest='face_analyser_gender', choices=['male', 'female'])
program.add_argument('--reference-face-position', help='position of the reference face', dest='reference_face_position', type=int, default=0)
program.add_argument('--reference-face-distance', help='distance between reference face and target face', dest='reference_face_distance', type=float, default=0.85)
program.add_argument('--reference-frame-number', help='number of the reference frame', dest='reference_frame_number', type=int, default=0)
program.add_argument('--similar-face-distance', help='face distance used for recognition', dest='similar_face_distance', type=float, default=0.85)
program.add_argument('--trim-frame-start', help='start frame use for extraction', dest='trim_frame_start', type=int)
program.add_argument('--trim-frame-end', help='end frame use for extraction', dest='trim_frame_end', type=int)
program.add_argument('--temp-frame-format', help='image format used for frame extraction', dest='temp_frame_format', default='jpg', choices=['jpg', 'png'])
Expand All @@ -62,10 +67,13 @@ def parse_args() -> None:
roop.globals.keep_fps = args.keep_fps
roop.globals.keep_temp = args.keep_temp
roop.globals.skip_audio = args.skip_audio
roop.globals.many_faces = args.many_faces
roop.globals.face_recognition = args.face_recognition
roop.globals.face_analyser_direction = args.face_analyser_direction
roop.globals.face_analyser_age = args.face_analyser_age
roop.globals.face_analyser_gender = args.face_analyser_gender
roop.globals.reference_face_position = args.reference_face_position
roop.globals.reference_frame_number = args.reference_frame_number
roop.globals.similar_face_distance = args.similar_face_distance
roop.globals.reference_face_distance = args.reference_face_distance
roop.globals.trim_frame_start = args.trim_frame_start
roop.globals.trim_frame_end = args.trim_frame_end
roop.globals.temp_frame_format = args.temp_frame_format
Expand Down
57 changes: 52 additions & 5 deletions roop/face_analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy

import roop.globals
from roop.typing import Frame, Face
from roop.typing import Frame, Face, FaceAnalyserDirection, FaceAnalyserAge, FaceAnalyserGender

FACE_ANALYSER = None
THREAD_LOCK = threading.Lock()
Expand Down Expand Up @@ -38,21 +38,68 @@ def get_one_face(frame: Frame, position: int = 0) -> Optional[Face]:

def get_many_faces(frame: Frame) -> Optional[List[Face]]:
try:
return get_face_analyser().get(frame)
faces = get_face_analyser().get(frame)
if roop.globals.face_analyser_direction:
faces = sort_by_direction(faces, roop.globals.face_analyser_direction)
if roop.globals.face_analyser_age:
faces = filter_by_age(faces, roop.globals.face_analyser_age)
if roop.globals.face_analyser_gender:
faces = filter_by_gender(faces, roop.globals.face_analyser_gender)
return faces
except (AttributeError, ValueError):
return None


def find_similar_face(frame: Frame, reference_face: Face) -> Optional[Face]:
def find_similar_face(frame: Frame, reference_face: Face, face_distance: float) -> Optional[Face]:
many_faces = get_many_faces(frame)
if many_faces:
for face in many_faces:
if hasattr(face, 'normed_embedding') and hasattr(reference_face, 'normed_embedding'):
distance = numpy.sum(numpy.square(face.normed_embedding - reference_face.normed_embedding))
if distance < roop.globals.similar_face_distance:
current_face_distance = numpy.sum(numpy.square(face.normed_embedding - reference_face.normed_embedding))
if current_face_distance < face_distance:
return face
return None


def sort_by_direction(faces: List[Face], direction: FaceAnalyserDirection) -> List[Face]:
if direction == 'left-right':
return sorted(faces, key=lambda face: face['bbox'][0])
if direction == 'right-left':
return sorted(faces, key=lambda face: face['bbox'][0], reverse=True)
if direction == 'top-bottom':
return sorted(faces, key=lambda face: face['bbox'][1])
if direction == 'bottom-top':
return sorted(faces, key=lambda face: face['bbox'][1], reverse=True)
if direction == 'small-large':
return sorted(faces, key=lambda face: (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1]))
if direction == 'large-small':
return sorted(faces, key=lambda face: (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1]), reverse=True)
return faces


def filter_by_age(faces: List[Face], age: FaceAnalyserAge) -> List[Face]:
filter_faces = []
for face in faces:
if face['age'] < 10 and age == 'children':
filter_faces.append(face)
elif face['age'] < 20 and age == 'teenager':
filter_faces.append(face)
elif face['age'] < 60 and age == 'adult':
filter_faces.append(face)
elif face['age'] < 100 and age == 'senior':
filter_faces.append(face)
return filter_faces


def filter_by_gender(faces: List[Face], gender: FaceAnalyserGender) -> List[Face]:
filter_faces = []
for face in faces:
if face['gender'] == 1 and gender == 'male':
filter_faces.append(face)
if face['gender'] == 0 and gender == 'female':
filter_faces.append(face)
return filter_faces


def get_faces_total(frame: Frame) -> int:
return len(get_many_faces(frame))
13 changes: 9 additions & 4 deletions roop/globals.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import List, Optional

from roop.typing import FaceRecognition, FaceAnalyserDirection, FaceAnalyserAge, FaceAnalyserGender, LogLevel, TempFrameFormat

source_path: Optional[str] = None
target_path: Optional[str] = None
output_path: Optional[str] = None
Expand All @@ -9,18 +11,21 @@
keep_fps: Optional[bool] = None
keep_temp: Optional[bool] = None
skip_audio: Optional[bool] = None
many_faces: Optional[bool] = None
face_recognition: Optional[FaceRecognition] = None
face_analyser_direction: Optional[FaceAnalyserDirection] = None
face_analyser_age: Optional[FaceAnalyserAge] = None
face_analyser_gender: Optional[FaceAnalyserGender] = None
reference_face_position: Optional[int] = None
reference_frame_number: Optional[int] = None
similar_face_distance: Optional[float] = None
reference_face_distance: Optional[float] = None
trim_frame_start: Optional[int] = None
trim_frame_end: Optional[int] = None
temp_frame_format: Optional[str] = None
temp_frame_format: Optional[TempFrameFormat] = None
temp_frame_quality: Optional[int] = None
output_video_encoder: Optional[str] = None
output_video_quality: Optional[int] = None
max_memory: Optional[int] = None
execution_providers: List[str] = []
execution_thread_count: Optional[int] = None
execution_queue_count: Optional[int] = None
log_level: str = 'error'
log_level: LogLevel = 'error'
16 changes: 8 additions & 8 deletions roop/processors/frame/__modules__/face_swapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,21 +61,21 @@ def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame:


def process_frame(source_face: Face, reference_face: Face, temp_frame: Frame) -> Frame:
if roop.globals.many_faces:
if 'reference' in roop.globals.face_recognition:
target_face = find_similar_face(temp_frame, reference_face, roop.globals.reference_face_distance)
if target_face:
temp_frame = swap_face(source_face, target_face, temp_frame)
if 'many' in roop.globals.face_recognition:
many_faces = get_many_faces(temp_frame)
if many_faces:
for target_face in many_faces:
temp_frame = swap_face(source_face, target_face, temp_frame)
else:
target_face = find_similar_face(temp_frame, reference_face)
if target_face:
temp_frame = swap_face(source_face, target_face, temp_frame)
return temp_frame


def process_frames(source_path: str, temp_frame_paths: List[str], update: Callable[[], None]) -> None:
source_face = get_one_face(cv2.imread(source_path))
reference_face = get_face_reference() if not roop.globals.many_faces else None
reference_face = get_face_reference() if 'reference' in roop.globals.face_recognition else None
for temp_frame_path in temp_frame_paths:
temp_frame = cv2.imread(temp_frame_path)
result_frame = process_frame(source_face, reference_face, temp_frame)
Expand All @@ -87,13 +87,13 @@ def process_frames(source_path: str, temp_frame_paths: List[str], update: Callab
def process_image(source_path: str, target_path: str, output_path: str) -> None:
source_face = get_one_face(cv2.imread(source_path))
target_frame = cv2.imread(target_path)
reference_face = get_one_face(target_frame, roop.globals.reference_face_position) if not roop.globals.many_faces else None
reference_face = get_one_face(target_frame, roop.globals.reference_face_position) if 'reference' in roop.globals.face_recognition else None
result_frame = process_frame(source_face, reference_face, target_frame)
cv2.imwrite(output_path, result_frame)


def process_video(source_path: str, temp_frame_paths: List[str]) -> None:
if not roop.globals.many_faces and not get_face_reference():
if 'reference' in roop.globals.face_recognition and not get_face_reference():
reference_frame = cv2.imread(temp_frame_paths[roop.globals.reference_frame_number])
reference_face = get_one_face(reference_frame, roop.globals.reference_face_position)
set_face_reference(reference_face)
Expand Down
10 changes: 9 additions & 1 deletion roop/typing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
from typing import Any
from typing import Any, Literal
from insightface.app.common import Face
import numpy

Face = Face
Frame = numpy.ndarray[Any, Any]

FaceRecognition = Literal['reference', 'many']
FaceAnalyserDirection = Literal['left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small']
FaceAnalyserAge = Literal['children', 'teenager', 'adult', 'senior']
FaceAnalyserGender = Literal['male', 'female']
TempFrameFormat = Literal['jpg', 'png']
OutputVideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc']
LogLevel = Literal['error']
Loading

0 comments on commit 6f99280

Please sign in to comment.