manual submit

This commit is contained in:
Khoa (Revenovich) Tran Gia
2026-03-07 21:49:16 +07:00
parent 1748cbf8d2
commit 6004b000a7
39 changed files with 5794 additions and 614 deletions

565
face_service.py Normal file
View File

@@ -0,0 +1,565 @@
"""
face_service.py
===============
FaceService: wrapper around insightface for face detection and recognition.
Runs CPU-bound work in a ThreadPoolExecutor(max_workers=1).
Falls back gracefully if insightface is not installed (available=False).
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import tempfile
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from typing import Optional
import numpy as np
logger = logging.getLogger(__name__)
try:
from insightface.app import FaceAnalysis as _FaceAnalysis
_INSIGHTFACE_AVAILABLE = True
except ImportError:
_FaceAnalysis = None # type: ignore
_INSIGHTFACE_AVAILABLE = False
_SIMILARITY_THRESHOLD = 0.4
@dataclass
class DetectedFace:
face_index: int
bbox: dict # {x1, y1, x2, y2}
embedding: np.ndarray
crop_bytes: bytes # JPEG bytes of the face crop
@dataclass
class ScanResult:
detection_id: int
face_index: int
bbox: dict
matched_person_id: Optional[int]
matched_person_name: Optional[str]
class FaceService:
available: bool
def __init__(self) -> None:
self.available = _INSIGHTFACE_AVAILABLE
self._executor = ThreadPoolExecutor(max_workers=1)
self._app = None
if self.available:
try:
self._app = _FaceAnalysis(providers=["CPUExecutionProvider"])
self._app.prepare(ctx_id=0, det_size=(640, 640))
logger.info("FaceService: insightface ready")
except Exception as exc:
logger.warning("FaceService: failed to init insightface: %s", exc)
self.available = False
# ------------------------------------------------------------------
# Low-level detection
# ------------------------------------------------------------------
def _detect_sync(self, image_bytes: bytes) -> list[DetectedFace]:
"""CPU-bound: detect faces in image bytes."""
import cv2
arr = np.frombuffer(image_bytes, dtype=np.uint8)
try:
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
except Exception as exc:
logger.warning("FaceService: cv2.imdecode failed: %s", exc)
return []
if img is None:
return []
try:
faces = self._app.get(img)
except Exception as exc:
logger.warning("FaceService: face detection failed: %s", exc)
return []
results = []
for i, face in enumerate(faces):
x1, y1, x2, y2 = (int(v) for v in face.bbox)
bbox = {"x1": x1, "y1": y1, "x2": x2, "y2": y2}
emb = face.normed_embedding.astype(np.float32)
# Crop with padding
pad = 20
h, w = img.shape[:2]
cx1 = max(0, x1 - pad)
cy1 = max(0, y1 - pad)
cx2 = min(w, x2 + pad)
cy2 = min(h, y2 + pad)
crop = img[cy1:cy2, cx1:cx2]
_, buf = cv2.imencode(".jpg", crop, [cv2.IMWRITE_JPEG_QUALITY, 85])
crop_bytes = buf.tobytes()
results.append(DetectedFace(
face_index=i,
bbox=bbox,
embedding=emb,
crop_bytes=crop_bytes,
))
return results
async def detect(self, image_bytes: bytes) -> list[DetectedFace]:
"""Async face detection wrapper."""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(self._executor, self._detect_sync, image_bytes)
# ------------------------------------------------------------------
# Matching
# ------------------------------------------------------------------
def find_best_match(
self,
embedding: np.ndarray,
known_list: list[dict],
) -> tuple[Optional[int], float]:
"""Return (person_id, similarity) of the best cosine-similarity match, or (None, 0.0)."""
if not known_list:
return None, 0.0
best_sim = 0.0
best_id = None
for entry in known_list:
sim = float(np.dot(embedding, entry["embedding"]))
if sim > best_sim:
best_sim = sim
best_id = entry["person_id"]
if best_sim >= _SIMILARITY_THRESHOLD:
return best_id, best_sim
return None, best_sim
# ------------------------------------------------------------------
# Clustering
# ------------------------------------------------------------------
def _cluster_sync(self, embeddings: list[dict], threshold: float) -> list[list[int]]:
"""
Union-find clustering of face embeddings by cosine similarity.
O(n²) memory — suitable for up to ~10k faces (1000 faces ≈ 4 MB float32).
Returns list of detection-id lists, one per cluster with ≥ 2 members.
"""
n = len(embeddings)
parent = list(range(n))
def find(x: int) -> int:
while parent[x] != x:
parent[x] = parent[parent[x]]
x = parent[x]
return x
def union(x: int, y: int) -> None:
px, py = find(x), find(y)
if px != py:
parent[py] = px
M = np.stack([e["embedding"] for e in embeddings])
norms = np.linalg.norm(M, axis=1, keepdims=True)
M_norm = M / (norms + 1e-8)
sim_matrix = M_norm @ M_norm.T
pairs = np.argwhere(sim_matrix >= threshold)
for i, j in pairs:
if i < j:
union(int(i), int(j))
groups: dict[int, list[int]] = {}
for idx, e in enumerate(embeddings):
root = find(idx)
groups.setdefault(root, []).append(e["id"])
return [ids for ids in groups.values() if len(ids) >= 2]
async def cluster_unidentified_faces(self, threshold: float = 0.45) -> list[list[int]]:
"""
Cluster all unidentified detections by embedding similarity and persist groups to face_db.
Clears existing groups before recomputing.
"""
if not self.available:
return []
import face_db
embeddings = face_db.get_unidentified_embeddings()
if len(embeddings) < 2:
face_db.clear_all_groups()
return []
loop = asyncio.get_event_loop()
groups = await loop.run_in_executor(
self._executor, self._cluster_sync, embeddings, threshold
)
face_db.clear_all_groups()
for det_ids in groups:
gid = face_db.create_group(threshold)
for det_id in det_ids:
face_db.assign_detection_to_group(det_id, gid)
return groups
def _assign_to_nearest_group_sync(self, embedding: np.ndarray) -> int | None:
"""
Compare embedding against existing group centroids and return the best matching group_id,
or None if no group exceeds its threshold.
Fast enough to call synchronously (< 50 groups × < 50 members).
"""
import face_db
groups = face_db.get_all_group_embeddings_with_threshold()
if not groups:
return None
norm_emb = embedding / (np.linalg.norm(embedding) + 1e-8)
best_gid: int | None = None
best_sim = -1.0
for g in groups:
M = np.stack(g["embeddings"])
norms = np.linalg.norm(M, axis=1, keepdims=True)
M_norm = M / (norms + 1e-8)
mean_sim = float(np.mean(M_norm @ norm_emb))
if mean_sim >= g["threshold"] and mean_sim > best_sim:
best_sim = mean_sim
best_gid = g["group_id"]
return best_gid
# ------------------------------------------------------------------
# High-level pipelines
# ------------------------------------------------------------------
async def scan_input_image(self, source_id: int, image_bytes: bytes) -> list[ScanResult]:
"""Detect faces in an input image, auto-link if known, store to face_db."""
if not self.available:
return []
import face_db
faces = await self.detect(image_bytes)
if not faces:
return []
known = face_db.get_all_embeddings()
persons_cache: dict[int, str] = {p["id"]: p["name"] for p in face_db.list_persons()}
results = []
for face in faces:
person_id, _ = self.find_best_match(face.embedding, known)
person_name = persons_cache.get(person_id) if person_id is not None else None
det_id = face_db.insert_detection(
source_type="input",
source_id=source_id,
embedding=face.embedding,
bbox=face.bbox,
frame_index=0,
face_index=face.face_index,
person_id=person_id,
)
if person_id is None:
gid = self._assign_to_nearest_group_sync(face.embedding)
if gid is not None:
face_db.assign_detection_to_group(det_id, gid)
results.append(ScanResult(
detection_id=det_id,
face_index=face.face_index,
bbox=face.bbox,
matched_person_id=person_id,
matched_person_name=person_name,
))
return results
async def scan_output_image(self, source_id: int, image_bytes: bytes) -> list[ScanResult]:
"""Detect faces in a generated output image. Silent background scan."""
if not self.available:
return []
import face_db
faces = await self.detect(image_bytes)
if not faces:
return []
known = face_db.get_all_embeddings()
persons_cache: dict[int, str] = {p["id"]: p["name"] for p in face_db.list_persons()}
results = []
for face in faces:
person_id, _ = self.find_best_match(face.embedding, known)
det_id = face_db.insert_detection(
source_type="output",
source_id=source_id,
embedding=None, # discard; saves space; rescan fills on demand
bbox=face.bbox,
frame_index=0,
face_index=face.face_index,
person_id=person_id,
)
if person_id is None:
gid = self._assign_to_nearest_group_sync(face.embedding)
if gid is not None:
face_db.assign_detection_to_group(det_id, gid)
if person_id is not None:
person_name = persons_cache.get(person_id)
results.append(ScanResult(
detection_id=det_id,
face_index=face.face_index,
bbox=face.bbox,
matched_person_id=person_id,
matched_person_name=person_name,
))
logger.info(
"Face scan [output image source_id=%d]: %d face(s) detected, %d matched",
source_id, len(faces), sum(1 for r in results if r.matched_person_id is not None),
)
return results
def _extract_keyframes_sync(self, video_bytes: bytes, max_frames: int = 20) -> list:
"""Extract evenly-spaced keyframes from video bytes. Returns list of BGR numpy arrays."""
import cv2
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
f.write(video_bytes)
tmp_path = f.name
try:
cap = cv2.VideoCapture(tmp_path)
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if total <= 0:
cap.release()
return []
n = min(max_frames, total)
indices = [int(i * total / n) for i in range(n)]
frames = []
for idx in indices:
cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
ret, frame = cap.read()
if ret:
frames.append(frame)
cap.release()
return frames
finally:
try:
os.unlink(tmp_path)
except Exception:
pass
async def scan_video(self, source_id: int, video_bytes: bytes, max_frames: int = 20) -> list[ScanResult]:
"""Detect faces across video keyframes. Silent background scan."""
if not self.available:
return []
import cv2
import face_db
loop = asyncio.get_event_loop()
frames = await loop.run_in_executor(
self._executor, self._extract_keyframes_sync, video_bytes, max_frames
)
if not frames:
return []
known = face_db.get_all_embeddings()
persons_cache: dict[int, str] = {p["id"]: p["name"] for p in face_db.list_persons()}
results = []
seen_det_ids: set[int] = set()
for frame_idx, frame in enumerate(frames):
_, buf = cv2.imencode(".jpg", frame)
frame_bytes = buf.tobytes()
faces = await self.detect(frame_bytes)
for face in faces:
person_id, _ = self.find_best_match(face.embedding, known)
det_id = face_db.insert_detection(
source_type="output",
source_id=source_id,
embedding=None, # discard; saves space; rescan fills on demand
bbox=face.bbox,
frame_index=frame_idx,
face_index=face.face_index,
person_id=person_id,
)
if det_id not in seen_det_ids:
seen_det_ids.add(det_id)
if person_id is not None:
person_name = persons_cache.get(person_id)
results.append(ScanResult(
detection_id=det_id,
face_index=face.face_index,
bbox=face.bbox,
matched_person_id=person_id,
matched_person_name=person_name,
))
logger.info(
"Face scan [output video source_id=%d]: %d frame(s), %d result(s) matched",
source_id, len(frames), len(results),
)
return results
async def rescan_output_embedding(self, source_id: int) -> int:
"""
Re-detect faces in a stored output image and update NULL embeddings
for existing detections by bbox proximity matching.
Returns count of detections updated.
"""
if not self.available:
return 0
import sqlite3
import face_db
import generation_db
conn = sqlite3.connect(str(generation_db._DB_PATH), check_same_thread=False)
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT file_data, mime_type FROM generation_files WHERE id = ?", (source_id,)
).fetchone()
conn.close()
if row is None:
return 0
file_bytes = bytes(row["file_data"])
mime = (row["mime_type"] or "").lower()
if mime.startswith("video/"):
return 0 # skip videos — too expensive for backfill
faces = await self.detect(file_bytes)
if not faces:
return 0
existing = [
d for d in face_db.get_detections_for_source("output", source_id)
if d.get("embedding") is None and d.get("bbox_json") not in (None, "{}")
]
if not existing:
return 0
updated = 0
for face in faces:
fx = (face.bbox["x1"] + face.bbox["x2"]) / 2
fy = (face.bbox["y1"] + face.bbox["y2"]) / 2
best_det = None
best_dist = float("inf")
for det in existing:
b = json.loads(det["bbox_json"])
dx = fx - (b["x1"] + b["x2"]) / 2
dy = fy - (b["y1"] + b["y2"]) / 2
dist = (dx * dx + dy * dy) ** 0.5
if dist < best_dist:
best_dist = dist
best_det = det
if best_det is not None and best_dist <= 50:
face_db.update_detection_embedding(best_det["id"], face.embedding)
existing = [d for d in existing if d["id"] != best_det["id"]]
updated += 1
if best_det.get("person_id") is None:
known = face_db.get_all_embeddings()
matched_pid, _ = self.find_best_match(face.embedding, known)
if matched_pid is not None:
face_db.link_detection_to_person(best_det["id"], matched_pid)
return updated
# ------------------------------------------------------------------
# Utility
# ------------------------------------------------------------------
def _extract_frame_at_sync(
self, video_bytes: bytes, frame_index: int, max_frames: int = 20,
suffix: str = ".mp4",
) -> "np.ndarray | None":
"""
Re-extract the specific video frame that was used during scan_video.
frame_index is the enumeration index (0…n-1) used by scan_video, NOT the raw
video frame number. We reconstruct the same sampling formula:
actual_frame = int(frame_index * total / n) where n = min(max_frames, total)
"""
import cv2
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as f:
f.write(video_bytes)
tmp_path = f.name
try:
cap = cv2.VideoCapture(tmp_path)
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if total <= 0:
cap.release()
return None
n = min(max_frames, total)
if frame_index >= n:
cap.release()
return None
actual_idx = int(frame_index * total / n)
cap.set(cv2.CAP_PROP_POS_FRAMES, actual_idx)
ret, frame = cap.read()
cap.release()
return frame if ret else None
except Exception:
return None
finally:
try:
os.unlink(tmp_path)
except Exception:
pass
def get_face_crop(self, detection_id: int) -> bytes | None:
"""Re-derive the face crop from the stored source image or video frame. Returns JPEG bytes or None."""
import cv2
import face_db
det = face_db.get_detection(detection_id)
if det is None:
return None
source_type = det["source_type"]
source_id = det["source_id"]
bbox_raw = det["bbox_json"]
if not bbox_raw:
return None
bbox = json.loads(bbox_raw)
img = None
if source_type == "input":
from input_image_db import get_image_data
image_bytes = get_image_data(source_id)
if image_bytes is None:
return None
arr = np.frombuffer(image_bytes, dtype=np.uint8)
try:
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
except Exception:
return None
elif source_type == "output":
import sqlite3
import generation_db
conn = sqlite3.connect(str(generation_db._DB_PATH), check_same_thread=False)
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT file_data, mime_type FROM generation_files WHERE id = ?", (source_id,)
).fetchone()
conn.close()
if row is None:
return None
file_bytes = bytes(row["file_data"])
mime = (row["mime_type"] or "").lower()
if mime.startswith("video/"):
frame_index = det.get("frame_index", 0) or 0
# Pick a matching temp-file suffix so OpenCV selects the right codec
_mime_to_ext = {"video/mp4": ".mp4", "video/webm": ".webm",
"video/avi": ".avi", "video/quicktime": ".mov"}
vsuffix = _mime_to_ext.get(mime, ".mp4")
img = self._extract_frame_at_sync(file_bytes, frame_index, suffix=vsuffix)
else:
arr = np.frombuffer(file_bytes, dtype=np.uint8)
try:
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
except Exception:
return None
if img is None:
return None
x1, y1, x2, y2 = bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]
pad = 20
h, w = img.shape[:2]
cx1 = max(0, x1 - pad)
cy1 = max(0, y1 - pad)
cx2 = min(w, x2 + pad)
cy2 = min(h, y2 + pad)
crop = img[cy1:cy2, cx1:cx2]
_, buf = cv2.imencode(".jpg", crop, [cv2.IMWRITE_JPEG_QUALITY, 85])
return buf.tobytes()
# Module-level singleton
_face_service: FaceService | None = None
def get_face_service() -> FaceService:
global _face_service
if _face_service is None:
_face_service = FaceService()
return _face_service