manual submit
This commit is contained in:
131
sync_faces.py
Normal file
131
sync_faces.py
Normal file
@@ -0,0 +1,131 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
sync_faces.py
|
||||
=============
|
||||
|
||||
One-time backfill script: scan existing input_images and generation_files
|
||||
for faces and store detections in faces.db.
|
||||
|
||||
Usage:
|
||||
python sync_faces.py [--dry-run] [--input-only] [--output-only]
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import logging
|
||||
import sqlite3
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def main(
|
||||
dry_run: bool, input_only: bool, output_only: bool,
|
||||
cluster: bool, cluster_threshold: float,
|
||||
) -> None:
|
||||
import face_db
|
||||
from face_service import get_face_service
|
||||
|
||||
face_db.init_db()
|
||||
svc = get_face_service()
|
||||
|
||||
if not svc.available:
|
||||
logger.error(
|
||||
"insightface is not available. "
|
||||
"Install: pip install insightface onnxruntime opencv-python"
|
||||
)
|
||||
return
|
||||
|
||||
import generation_db
|
||||
import input_image_db
|
||||
|
||||
total_faces = 0
|
||||
total_matched = 0
|
||||
total_unidentified = 0
|
||||
|
||||
# Scan input images
|
||||
if not output_only:
|
||||
logger.info("Scanning input images…")
|
||||
conn = sqlite3.connect(str(input_image_db.DB_PATH), check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
rows = conn.execute(
|
||||
"SELECT id, image_data FROM input_images WHERE image_data IS NOT NULL"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
|
||||
for row in rows:
|
||||
row_id = row["id"]
|
||||
image_bytes = bytes(row["image_data"])
|
||||
logger.info(" input image id=%d (%d bytes)", row_id, len(image_bytes))
|
||||
if not dry_run:
|
||||
try:
|
||||
results = await svc.scan_input_image(row_id, image_bytes)
|
||||
for r in results:
|
||||
total_faces += 1
|
||||
if r.matched_person_id is not None:
|
||||
total_matched += 1
|
||||
else:
|
||||
total_unidentified += 1
|
||||
except Exception as exc:
|
||||
logger.warning(" Failed for input id=%d: %s", row_id, exc)
|
||||
|
||||
# Scan generated output files
|
||||
if not input_only:
|
||||
logger.info("Scanning generation output files…")
|
||||
conn = sqlite3.connect(str(generation_db._DB_PATH), check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
rows = conn.execute(
|
||||
"SELECT id, file_data, mime_type FROM generation_files"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
|
||||
for row in rows:
|
||||
file_id = row["id"]
|
||||
file_data = bytes(row["file_data"])
|
||||
mime_type = row["mime_type"] or ""
|
||||
logger.info(
|
||||
" output file id=%d mime=%s (%d bytes)", file_id, mime_type, len(file_data)
|
||||
)
|
||||
if not dry_run:
|
||||
try:
|
||||
if mime_type.startswith("image/"):
|
||||
results = await svc.scan_output_image(file_id, file_data)
|
||||
total_faces += len(results)
|
||||
total_matched += sum(1 for r in results if r.matched_person_id is not None)
|
||||
elif mime_type.startswith("video/"):
|
||||
results = await svc.scan_video(file_id, file_data)
|
||||
total_faces += len(results)
|
||||
total_matched += sum(1 for r in results if r.matched_person_id is not None)
|
||||
except Exception as exc:
|
||||
logger.warning(" Failed for output id=%d: %s", file_id, exc)
|
||||
|
||||
if dry_run:
|
||||
logger.info("Dry run — no data written.")
|
||||
else:
|
||||
logger.info(
|
||||
"Done. %d faces detected, %d matched to known persons, %d unidentified",
|
||||
total_faces,
|
||||
total_matched,
|
||||
total_unidentified,
|
||||
)
|
||||
if cluster:
|
||||
logger.info("Clustering unidentified faces (threshold=%.2f)…", cluster_threshold)
|
||||
groups = await svc.cluster_unidentified_faces(cluster_threshold)
|
||||
logger.info("Clustering: %d groups created", len(groups))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Backfill face detections for existing media")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--input-only", action="store_true")
|
||||
parser.add_argument("--output-only", action="store_true")
|
||||
parser.add_argument("--cluster", action="store_true", help="Run auto-clustering after scanning")
|
||||
parser.add_argument("--cluster-threshold", type=float, default=0.45, metavar="T",
|
||||
help="Cosine similarity threshold for clustering (default: 0.45)")
|
||||
args = parser.parse_args()
|
||||
asyncio.run(main(
|
||||
args.dry_run, args.input_only, args.output_only,
|
||||
args.cluster, args.cluster_threshold,
|
||||
))
|
||||
Reference in New Issue
Block a user