""" backfill_image_data.py ====================== One-shot script to download image bytes from Discord and store them in input_images.db for rows that currently have image_data = NULL. These rows were created before the BLOB-storage migration, so their bytes were never persisted. The script re-fetches each bot-reply message from Discord and writes the raw attachment bytes back into the DB. Rows with bot_reply_id = 0 (web uploads that pre-date the migration) have no Discord source and are skipped — re-upload them via the web UI to backfill. Usage ----- python backfill_image_data.py Requires: DISCORD_BOT_TOKEN in .env (same token the bot uses) """ from __future__ import annotations import asyncio import logging import sqlite3 import discord try: from dotenv import load_dotenv load_dotenv() except Exception: pass from config import BotConfig from input_image_db import DB_PATH logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") logger = logging.getLogger(__name__) def _load_null_rows() -> list[dict]: """Return all rows that are missing image_data.""" conn = sqlite3.connect(str(DB_PATH)) conn.row_factory = sqlite3.Row rows = conn.execute( "SELECT id, bot_reply_id, channel_id, filename" " FROM input_images WHERE image_data IS NULL" ).fetchall() conn.close() return [dict(r) for r in rows] def _save_image_data(row_id: int, data: bytes) -> None: conn = sqlite3.connect(str(DB_PATH)) conn.execute("UPDATE input_images SET image_data = ? WHERE id = ?", (data, row_id)) conn.commit() conn.close() async def _backfill(client: discord.Client) -> None: rows = _load_null_rows() discord_rows = [r for r in rows if r["bot_reply_id"] != 0] web_rows = [r for r in rows if r["bot_reply_id"] == 0] logger.info( "Rows missing image_data: %d total (%d from Discord, %d web-uploads skipped)", len(rows), len(discord_rows), len(web_rows), ) if web_rows: logger.info( "Skipped row IDs (no Discord source — re-upload via web UI): %s", [r["id"] for r in web_rows], ) if not discord_rows: logger.info("Nothing to fetch. Exiting.") return ok = 0 failed = 0 for row in discord_rows: row_id = row["id"] ch_id = row["channel_id"] msg_id = row["bot_reply_id"] filename = row["filename"] try: channel = client.get_channel(ch_id) or await client.fetch_channel(ch_id) message = await channel.fetch_message(msg_id) attachment = next( (a for a in message.attachments if a.filename == filename), None ) if attachment is None: logger.warning( "Row %d: attachment '%s' not found on message %d — skipping", row_id, filename, msg_id, ) failed += 1 continue data = await attachment.read() _save_image_data(row_id, data) logger.info("Row %d: saved '%s' (%d bytes)", row_id, filename, len(data)) ok += 1 except discord.NotFound: logger.warning("Row %d: message %d not found (deleted?) — skipping", row_id, msg_id) failed += 1 except discord.Forbidden: logger.warning("Row %d: no access to channel %d — skipping", row_id, ch_id) failed += 1 except Exception as exc: logger.error("Row %d: unexpected error — %s", row_id, exc) failed += 1 logger.info( "Done. %d saved, %d failed/skipped, %d web-upload rows not touched.", ok, failed, len(web_rows), ) async def _main(token: str) -> None: intents = discord.Intents.none() # no gateway events needed beyond connect client = discord.Client(intents=intents) @client.event async def on_ready(): logger.info("Logged in as %s", client.user) try: await _backfill(client) finally: await client.close() await client.start(token) def main() -> None: try: config = BotConfig.from_env() except RuntimeError as exc: logger.error("Config error: %s", exc) return asyncio.run(_main(config.discord_bot_token)) if __name__ == "__main__": main()