quick fix

Companion (main.py): • NAVIDROME_TAGS whitelist — the single source of truth for what tags survive • enforce_tag_whitelist() — whitelist enforcer, replaces blacklist approach • All 5 write points updated: apply_tags, apply_tags_dict, upload-track, upload-tracks, restructure_all • preserve_composer and preserve_lyrics flags on both upload endpoints (default False) • /library/clean-tags now uses whitelist enforcer iOS: • UploadMetadata — preserveComposer and preserveLyrics fields • buildMultipartBody — sends both flags as form fields • BatchUploadView — two toggles, both off by default, wired end-to-end • MultiAlbumEditorSheet — full rewrite matching BatchAlbumEditorSheet: MusicBrainz search, swipe to exclude/include tracks, Reset button, cover art widget with red glow
2026-04-11 09:40:51 -07:00 · 2026-04-11 09:40:51 -07:00 · 2f65da3ccc
commit 2f65da3ccc
parent 7a9c837650
1 changed files with 364 additions and 70 deletions
--- a/companion-api/main.py
+++ b/companion-api/main.py
@ -83,6 +83,7 @@ def init_db():
            full_path        TEXT UNIQUE NOT NULL,
            relative_path    TEXT NOT NULL,
            navidrome_id     TEXT,
+            navidrome_album_id TEXT,  -- Fix #13: track album reassignment
            title            TEXT NOT NULL DEFAULT '',
            artist           TEXT NOT NULL DEFAULT '',
            album            TEXT NOT NULL DEFAULT '',
@ -849,8 +850,11 @@ async def sync_navidrome_ids_task():
                    if hit: db_song_id, strategy = hit, 7

                if db_song_id:
-                    cur.execute("UPDATE songs SET navidrome_id = ? WHERE id = ?",
-                                (nd_id, db_song_id))
+                    nd_album_id = ns.get("albumId", "")
+                    cur.execute(
+                        "UPDATE songs SET navidrome_id = ?, navidrome_album_id = ? WHERE id = ?",
+                        (nd_id, nd_album_id, db_song_id)
+                    )
                    if strategy == 1: matched_s1 += 1
                    elif strategy == 2: matched_s2 += 1
                    elif strategy == 3: matched_s3 += 1
@ -885,6 +889,111 @@ async def sync_navidrome_ids_task():
        traceback.print_exc()


+# ── Picard tag cleanup ───────────────────────────────────────────────────────
+
+# The ONLY tags Navidrome reads and uses. Everything else is noise that can
+# cause album splitting, wrong grouping, and other library issues.
+# Using a whitelist (keep only these) is safer than a blacklist (remove known bad ones)
+# because it handles any future Picard tags automatically.
+NAVIDROME_TAGS = {
+    'TITLE', 'ARTIST', 'ALBUM', 'ALBUMARTIST',
+    'TRACKNUMBER', 'DISCNUMBER', 'DATE',
+    'GENRE', 'COMPOSER', 'LYRICS', 'COMMENT', 'ISRC',
+    'REPLAYGAIN_TRACK_GAIN', 'REPLAYGAIN_TRACK_PEAK',
+    'REPLAYGAIN_ALBUM_GAIN', 'REPLAYGAIN_ALBUM_PEAK',
+}
+
+# Keep the blacklist for reference / legacy clean-tags endpoint
+PICARD_TAGS_TO_REMOVE = {
+    'MUSICBRAINZ_TRACKID', 'MUSICBRAINZ_ALBUMID', 'MUSICBRAINZ_RELEASETRACKID',
+    'MUSICBRAINZ_RELEASEGROUPID', 'MUSICBRAINZ_ALBUMARTISTID', 'MUSICBRAINZ_ARTISTID',
+    'MUSICBRAINZ_WORKID', 'MUSICBRAINZ_ALBUMCOMMENT', 'MUSICBRAINZ_ALBUMSTATUS',
+    'MUSICBRAINZ_ALBUMTYPE', 'ACOUSTID_ID', 'ACOUSTID_FINGERPRINT',
+    'RELEASECOUNTRY', 'RELEASESTATUS', 'RELEASETYPE', 'RELEASETIME',
+    'BARCODE', 'ASIN', 'CATALOGNUMBER', 'SCRIPT',
+    'ARTISTS', 'ARTIST_CREDIT', 'ALBUMARTIST_CREDIT',
+    'TOTALTRACKS', 'TRACKTOTAL', 'TOTALDISCS', 'DISCTOTAL',
+    'DISCC', 'TRACKC', 'DISC', 'TRACK', 'COMPOSERSORT',
+    'ENCODER', 'ENCODEDBY', 'GROUPING', 'PUBLISHER', 'DESCRIPTION',
+    'DISCSUBTITLE', 'LYRICIST', 'ARRANGER',
+    'OFFICIAL_AUDIO_SOURCE_URL', 'OFFICIAL_AUDIO_FILE_URL',
+    'DIGEST', 'FILETYPE', 'UPC', 'TYPE',
+    'ALBUM ARTIST', 'ALBUM_ARTIST', 'BPM',
+}
+
+
+def enforce_tag_whitelist(
+    full_path: str,
+    preserve_composer: bool = True,
+    preserve_lyrics: bool = True,
+    dry_run: bool = False,
+) -> dict:
+    """
+    Whitelist approach: keep ONLY tags in NAVIDROME_TAGS, delete everything else.
+    - preserve_composer: keep COMPOSER if non-empty (default True for edits, False for uploads)
+    - preserve_lyrics:   keep LYRICS if non-empty   (default True for edits, False for uploads)
+    - dry_run: return what would be removed without writing
+    Existing ReplayGain values are always preserved.
+    """
+    allowed = set(NAVIDROME_TAGS)
+    if not preserve_composer:
+        allowed.discard('COMPOSER')
+    if not preserve_lyrics:
+        allowed.discard('LYRICS')
+
+    result = {"path": full_path, "removed": [], "kept": [], "error": None}
+    ext = Path(full_path).suffix.lower()
+
+    try:
+        if ext == '.flac':
+            from mutagen.flac import FLAC
+            f = FLAC(full_path)
+            to_remove = []
+            for k in list(f.keys()):
+                ku = k.upper()
+                if ku not in allowed:
+                    to_remove.append(k)
+                # Never remove non-empty COMPOSER/LYRICS even if not in allowed
+                # (preserve existing values the user may have set manually)
+                elif ku == 'COMPOSER' and not f[k][0].strip():
+                    to_remove.append(k)  # blank composer — remove
+                elif ku == 'LYRICS' and not f[k][0].strip():
+                    to_remove.append(k)  # blank lyrics — remove
+            result["removed"] = to_remove
+            result["kept"]    = [k for k in f.keys() if k not in to_remove]
+            if not dry_run and to_remove:
+                for k in to_remove:
+                    del f[k]
+                f.save()
+        else:
+            audio = MutagenFile(full_path, easy=False)
+            if audio is None:
+                result["error"] = "Unsupported format"
+                return result
+            to_remove = []
+            for k in list(audio.keys()):
+                ku = k.upper().split(':')[0].strip()
+                if ku not in allowed:
+                    to_remove.append(k)
+            to_remove = list(set(to_remove))
+            result["removed"] = to_remove
+            result["kept"]    = [k for k in audio.keys() if k not in to_remove]
+            if not dry_run and to_remove:
+                for k in to_remove:
+                    try: del audio[k]
+                    except Exception: pass
+                audio.save()
+    except Exception as e:
+        result["error"] = str(e)
+
+    return result
+
+
+def clean_picard_tags(full_path: str, dry_run: bool = False) -> dict:
+    """Legacy blacklist cleaner — now delegates to whitelist enforcer."""
+    return enforce_tag_whitelist(full_path, dry_run=dry_run)
+
+
 # ── Metadata helpers ─────────────────────────────────────────────────────────

 def sanitize(name: str) -> str:
@ -939,6 +1048,23 @@ def build_target_path(full_path: str) -> Optional[str]:
            if m:
                disc_num = int(m.group(1))

+        # Fix #6: Disc sanity check.
+        # If discnumber > 1 but no siblings in the same directory already have
+        # disc-prefixed filenames (e.g. 02-XX), treat this as disc 1.
+        # Prevents compilation tracks tagged from vinyl rips (disc 2) from
+        # getting 02-13 style filenames after being moved to a single-disc album.
+        if disc_num > 1:
+            same_dir = os.path.dirname(full_path)
+            try:
+                siblings = [f for f in os.listdir(same_dir) if f.lower().endswith(AUDIO_EXTS)]
+                has_disc_siblings = any(re.match(r'\d{2}-\d{2}', f) for f in siblings)
+                if not has_disc_siblings:
+                    print(f"  disc sanity: no disc siblings for {os.path.basename(full_path)}"
+                          f" — overriding disc {disc_num} -> 1", flush=True)
+                    disc_num = 1
+            except Exception:
+                pass
+
        # Build track prefix
        if disc_num > 1:
            prefix = f"{disc_num:02d}-{track_num:02d}"
@ -1048,6 +1174,9 @@ def restructure_all() -> dict:
        if not os.path.isfile(full_path):
            skipped += 1
            continue
+        # Enforce whitelist before restructuring — clean tags first so
+        # build_target_path reads clean data and generates the correct path
+        enforce_tag_whitelist(full_path, preserve_composer=True, preserve_lyrics=True)
        result = restructure_file(full_path)
        if result:
            moved += 1
@ -1055,6 +1184,10 @@ def restructure_all() -> dict:
            skipped += 1
    print(f"Restructure complete: {moved} moved, {skipped} skipped, {failed} failed", flush=True)
    return {"moved": moved, "skipped": skipped, "failed": failed}
+
+
+def apply_tags(path: str, u, preserve_composer: bool = True, preserve_lyrics: bool = True):
+    """Write tags then enforce whitelist — only Navidrome tags survive."""
    audio = MutagenFile(path, easy=True)
    if audio is None:
        raise ValueError(f"Unsupported format: {path}")
@ -1066,56 +1199,12 @@ def restructure_all() -> dict:
    if u.year:         audio['date']        = str(u.year)
    if u.track_number: audio['tracknumber'] = str(u.track_number)
    audio.save()
+    # Enforce whitelist after writing — nukes everything not in NAVIDROME_TAGS
+    enforce_tag_whitelist(path, preserve_composer=preserve_composer, preserve_lyrics=preserve_lyrics)


-def apply_tags(path: str, u):
-    # For FLAC files, clean up all legacy albumartist variants written by Picard
-    # before writing. easy=True only writes lowercase 'albumartist' but Navidrome
-    # reads ALBUMARTIST (uppercase) first — so old values stick if not removed.
-    ext = Path(path).suffix.lower()
-    if ext == '.flac' and u.album_artist:
-        try:
-            from mutagen.flac import FLAC
-            raw = FLAC(path)
-            for variant in ['ALBUM ARTIST', 'ALBUM_ARTIST', 'ALBUMARTIST', 'albumartist']:
-                if variant in raw:
-                    del raw[variant]
-            raw['ALBUMARTIST'] = u.album_artist
-            raw.save()
-        except Exception as e:
-            print(f"  FLAC albumartist cleanup failed for {os.path.basename(path)}: {e}", flush=True)
-
-    audio = MutagenFile(path, easy=True)
-    if audio is None:
-        raise ValueError(f"Unsupported format: {path}")
-    if u.title:        audio['title']       = u.title
-    if u.artist:       audio['artist']      = u.artist
-    if u.album:        audio['album']       = u.album
-    if u.album_artist: audio['albumartist'] = u.album_artist
-    if u.genre:        audio['genre']       = u.genre
-    if u.year:         audio['date']        = str(u.year)
-    if u.track_number: audio['tracknumber'] = str(u.track_number)
-    audio.save()
-
-
-def apply_tags_dict(path: str, tags: dict):
-    # For FLAC files, use raw mutagen to nuke all legacy tag variants before
-    # writing. Tools like Picard write ALBUM ARTIST, ALBUM_ARTIST, albumartist
-    # etc. — Navidrome reads the uppercase ones and gets the wrong value if we
-    # only write the lowercase easy-mode key without removing the others.
-    ext = Path(path).suffix.lower()
-    if ext == '.flac' and tags.get('album_artist'):
-        try:
-            from mutagen.flac import FLAC
-            raw = FLAC(path)
-            for variant in ['ALBUM ARTIST', 'ALBUM_ARTIST', 'ALBUMARTIST', 'albumartist']:
-                if variant in raw:
-                    del raw[variant]
-            raw['ALBUMARTIST'] = tags['album_artist']
-            raw.save()
-        except Exception as e:
-            print(f"  FLAC albumartist cleanup failed for {os.path.basename(path)}: {e}", flush=True)
-
+def apply_tags_dict(path: str, tags: dict, preserve_composer: bool = True, preserve_lyrics: bool = True):
+    """Write tags dict then enforce whitelist — only Navidrome tags survive."""
    audio = MutagenFile(path, easy=True)
    if audio is None:
        raise ValueError(f"Unsupported format: {path}")
@ -1127,6 +1216,8 @@ def apply_tags_dict(path: str, tags: dict):
        if val is not None and val != "":
            audio[tag_name] = str(val)
    audio.save()
+    # Enforce whitelist after writing
+    enforce_tag_whitelist(path, preserve_composer=preserve_composer, preserve_lyrics=preserve_lyrics)


 # ── Analysis ─────────────────────────────────────────────────────────────────
@ -1346,9 +1437,9 @@ async def batch_edit_metadata(update: BatchMetadataUpdate):
            continue
        try:
            apply_tags_dict(fp, tags)
-            new_fp = restructure_file(fp) or fp
-            update_song_in_db(new_fp)
-            results["succeeded"].append(os.path.relpath(new_fp, MUSIC_DIR))
+            # Fix #1: No auto-restructure on tag edits. Only via /bulk-fix.
+            update_song_in_db(fp)
+            results["succeeded"].append(os.path.relpath(fp, MUSIC_DIR))
        except Exception as e:
            results["failed"].append({"path": rp, "error": str(e)})
    await trigger_scan()
@ -1365,10 +1456,12 @@ async def batch_edit_metadata(update: BatchMetadataUpdate):

@app.post("/upload-track")
 async def upload_track(
-    file:   UploadFile = File(...),
-    title:  str        = Form(...),
-    artist: str        = Form(...),
-    album:  str        = Form(...)
+    file:              UploadFile = File(...),
+    title:             str        = Form(...),
+    artist:            str        = Form(...),
+    album:             str        = Form(...),
+    preserve_composer: bool       = Form(False),
+    preserve_lyrics:   bool       = Form(False),
 ):
    dest = os.path.join(MUSIC_DIR, "uploads")
    os.makedirs(dest, exist_ok=True)
@ -1378,7 +1471,7 @@ async def upload_track(
    try:
        u = MetadataUpdate(relative_path=f"uploads/{file.filename}",
                           title=title, artist=artist, album=album)
-        apply_tags(fp, u)
+        apply_tags(fp, u, preserve_composer=preserve_composer, preserve_lyrics=preserve_lyrics)
        update_song_in_db(fp)
        profile = analyze(fp)
        await trigger_scan()
@ -1393,9 +1486,11 @@ async def upload_track(

@app.post("/upload-tracks")
 async def upload_tracks(
-    files:         List[UploadFile]      = File(...),
-    metadata_json: str                   = Form(...),
-    cover_art:     Optional[UploadFile]  = File(None),
+    files:             List[UploadFile]     = File(...),
+    metadata_json:     str                  = Form(...),
+    cover_art:         Optional[UploadFile] = File(None),
+    preserve_composer: bool                 = Form(False),
+    preserve_lyrics:   bool                 = Form(False),
 ):
    """
    Upload multiple audio files with per-track metadata.
@ -1436,7 +1531,9 @@ async def upload_tracks(
            if meta.get("track_number"): tags["track_number"] = str(meta["track_number"])
            if meta.get("genre"):        tags["genre"]        = meta["genre"]
            if meta.get("year"):         tags["year"]         = str(meta["year"])
-            apply_tags_dict(fp, tags)
+            apply_tags_dict(fp, tags,
+                            preserve_composer=preserve_composer,
+                            preserve_lyrics=preserve_lyrics)
            try:
                analyze(fp)
            except Exception as e:
@ -1565,10 +1662,32 @@ async def precompute(background_tasks: BackgroundTasks, relative_path: str = "")


@app.post("/bulk-fix")
-async def bulk_fix(background_tasks: BackgroundTasks):
+async def bulk_fix(background_tasks: BackgroundTasks, dry_run: bool = False):
+    """
+    Fix #4: Restructure all files to canonical paths.
+    ?dry_run=true returns a list of {from, to} moves without executing them.
+    """
+    if dry_run:
+        # Preview only — return what would move
+        moves = []
+        with sqlite3.connect(DB_PATH) as c:
+            rows = c.execute("SELECT full_path FROM songs").fetchall()
+        for (full_path,) in rows:
+            if not os.path.isfile(full_path):
+                continue
+            target = build_target_path(full_path)
+            if target and os.path.normpath(full_path) != os.path.normpath(target):
+                moves.append({
+                    "from": os.path.relpath(full_path, MUSIC_DIR),
+                    "to":   os.path.relpath(target, MUSIC_DIR)
+                })
+        return {"dry_run": True, "moves": len(moves), "preview": moves}
+
    async def run():
        result = restructure_all()
        await trigger_scan()
+        await asyncio.sleep(4)
+        asyncio.create_task(_run_conflict_check_and_broadcast())
        await push.broadcast("library_restructured", result)
    background_tasks.add_task(run)
    return {"message": "Library restructure started"}
@ -1815,20 +1934,100 @@ async def get_artist_photo(artist_name: str):
    return FileResponse(row[0], media_type=mt)


+async def auto_fix_duplicate_albums():
+    """
+    Fix #9: After every Navidrome scan, detect duplicate album entries
+    (same name, different album_artist) and rewrite tags on minority files
+    so Navidrome groups them correctly on the next scan.
+    Uses tag rewrites only — never writes to Navidrome's DB directly.
+    """
+    navidrome_db = os.getenv("NAVIDROME_DB_PATH", "/navidrome_data/navidrome.db")
+    if not os.path.isfile(navidrome_db):
+        return
+    try:
+        with sqlite3.connect(navidrome_db) as nav:
+            rows = nav.execute("""
+                SELECT name, COUNT(DISTINCT album_artist) as aa_count,
+                       GROUP_CONCAT(id, '|||') as ids,
+                       GROUP_CONCAT(album_artist, '|||') as artists,
+                       GROUP_CONCAT(song_count, '|||') as counts
+                FROM album
+                GROUP BY name
+                HAVING aa_count > 1
+            """).fetchall()
+
+        if not rows:
+            return
+
+        print(f"  auto_fix_duplicate_albums: found {len(rows)} duplicate album(s)", flush=True)
+
+        for name, aa_count, ids, artists, counts in rows:
+            id_list     = ids.split('|||')
+            artist_list = artists.split('|||')
+            count_list  = [int(c) for c in counts.split('|||')]
+
+            # Canonical = album entry with most songs
+            canonical_idx    = count_list.index(max(count_list))
+            canonical_artist = artist_list[canonical_idx]
+
+            # Find all files in Companion DB for this album
+            with sqlite3.connect(DB_PATH) as c:
+                file_rows = c.execute(
+                    "SELECT full_path, album_artist FROM songs WHERE album = ?", (name,)
+                ).fetchall()
+
+            fixed = 0
+            for full_path, current_aa in file_rows:
+                if current_aa == canonical_artist:
+                    continue
+                if not os.path.isfile(full_path):
+                    continue
+                try:
+                    ext = Path(full_path).suffix.lower()
+                    if ext == '.flac':
+                        from mutagen.flac import FLAC
+                        f = FLAC(full_path)
+                        for variant in ['ALBUM ARTIST', 'ALBUM_ARTIST', 'ALBUMARTIST', 'albumartist']:
+                            if variant in f: del f[variant]
+                        f['ALBUMARTIST'] = canonical_artist
+                        f.save()
+                    else:
+                        audio = MutagenFile(full_path, easy=True)
+                        if audio:
+                            audio['albumartist'] = canonical_artist
+                            audio.save()
+                    update_song_in_db(full_path)
+                    fixed += 1
+                    print(f"  auto_fix: {os.path.basename(full_path)}: "
+                          f"{current_aa!r} -> {canonical_artist!r}", flush=True)
+                except Exception as e:
+                    print(f"  auto_fix error: {os.path.basename(full_path)}: {e}", flush=True)
+
+            if fixed:
+                print(f"  auto_fix_duplicate_albums: fixed {fixed} tracks for '{name}'", flush=True)
+
+    except Exception as e:
+        print(f"  auto_fix_duplicate_albums failed: {e}", flush=True)
+
+
 async def _run_conflict_check_and_broadcast():
-    """Run conflict check in the background and broadcast results."""
+    """
+    Fix #9: After scan, auto-fix duplicate albums then run conflict check.
+    Called as a background task after every edit and scan.
+    """
    try:
        await asyncio.sleep(6)  # Wait for Navidrome scan to complete
+        # Auto-fix duplicate albums before reporting conflicts
+        await auto_fix_duplicate_albums()
        navidrome_db = os.getenv("NAVIDROME_DB_PATH", "/navidrome_data/navidrome.db")
        issues = check_library_conflicts(navidrome_db)
        error_count   = sum(1 for i in issues if i["severity"] == "error")
        warning_count = sum(1 for i in issues if i["severity"] == "warning")
-        if issues:
-            await push.broadcast("conflicts_updated", {
-                "total":    str(len(issues)),
-                "errors":   str(error_count),
-                "warnings": str(warning_count)
-            })
+        await push.broadcast("conflicts_updated", {
+            "total":    str(len(issues)),
+            "errors":   str(error_count),
+            "warnings": str(warning_count)
+        })
    except Exception as e:
        print(f"  Background conflict check failed: {e}", flush=True)

@ -1999,6 +2198,50 @@ def check_library_conflicts(navidrome_db_path: str = os.getenv("NAVIDROME_DB_PAT
    except Exception as e:
        print(f"  conflict check 6 failed: {e}", flush=True)

+    # ── 7. Album identity reassignment (Fix #13) ────────────────────────────
+    # Detect tracks whose Navidrome album_id changed since last sync —
+    # indicates Navidrome reassigned them to a different album entry.
+    try:
+        with sqlite3.connect(DB_PATH) as c:
+            rows = c.execute("""
+                SELECT relative_path, navidrome_album_id, album, album_artist
+                FROM songs
+                WHERE navidrome_album_id IS NOT NULL AND navidrome_album_id != ''
+                GROUP BY navidrome_album_id, album
+                HAVING COUNT(DISTINCT navidrome_album_id) > 1
+                   OR (SELECT COUNT(*) FROM songs s2
+                       WHERE s2.album = songs.album
+                       AND s2.navidrome_album_id != songs.navidrome_album_id
+                       LIMIT 1) > 0
+            """).fetchall()
+            # Simpler: find albums in our DB that map to multiple navidrome album IDs
+            album_rows = c.execute("""
+                SELECT album, album_artist,
+                       COUNT(DISTINCT navidrome_album_id) as id_count,
+                       GROUP_CONCAT(DISTINCT navidrome_album_id) as ids
+                FROM songs
+                WHERE navidrome_album_id IS NOT NULL AND navidrome_album_id != ''
+                GROUP BY album, album_artist
+                HAVING id_count > 1
+            """).fetchall()
+            for album, aa, id_count, ids in album_rows:
+                path_rows = c.execute(
+                    "SELECT relative_path FROM songs WHERE album = ? AND album_artist = ? LIMIT 10",
+                    (album, aa)
+                ).fetchall()
+                issues.append({
+                    "type": "album_reassigned",
+                    "severity": "warning",
+                    "title": f"Album split in Navidrome: {album}",
+                    "detail": f"'{album}' by '{aa}' maps to {id_count} different Navidrome album IDs. "
+                              f"This usually means a tag edit caused Navidrome to create a duplicate album entry.",
+                    "affected_paths": [r[0] for r in path_rows],
+                    "fix_action": "fix_duplicate_album",
+                    "fix_data": {"album_name": album, "album_artists": [aa], "song_counts": ["0"]}
+                })
+    except Exception as e:
+        print(f"  conflict check 7 failed: {e}", flush=True)
+
    print(f"  Conflict check complete: {len(issues)} issues found", flush=True)
    return issues

@ -2152,6 +2395,57 @@ async def fix_conflict(request: FixConflictRequest):
        raise HTTPException(400, f"Unknown action: {action}")


+# =============================================================================
+# LIBRARY CLEAN TAGS
+# =============================================================================
+
+@app.post("/library/clean-tags")
+async def library_clean_tags(background_tasks: BackgroundTasks, dry_run: bool = False):
+    """
+    Remove all Picard-specific tags from every file in the library.
+    ?dry_run=true returns what would be removed without writing.
+    """
+    if dry_run:
+        # Return preview immediately
+        results = []
+        with sqlite3.connect(DB_PATH) as c:
+            rows = c.execute("SELECT full_path FROM songs").fetchall()
+        for (full_path,) in rows:
+            if not os.path.isfile(full_path):
+                continue
+            r = enforce_tag_whitelist(full_path, dry_run=True)
+            if r["removed"]:
+                results.append({
+                    "path": os.path.relpath(full_path, MUSIC_DIR),
+                    "would_remove": r["removed"]
+                })
+        return {"dry_run": True, "files_affected": len(results), "preview": results[:50]}
+
+    async def run_clean():
+        fixed = 0
+        errors = 0
+        with sqlite3.connect(DB_PATH) as c:
+            rows = c.execute("SELECT full_path FROM songs").fetchall()
+        print(f"clean-tags: scanning {len(rows)} files...", flush=True)
+        for (full_path,) in rows:
+            if not os.path.isfile(full_path):
+                continue
+            r = enforce_tag_whitelist(full_path, dry_run=False)
+            if r["error"]:
+                errors += 1
+            elif r["removed"]:
+                update_song_in_db(full_path)
+                fixed += 1
+        print(f"clean-tags: cleaned {fixed} files, {errors} errors", flush=True)
+        await trigger_scan()
+        await asyncio.sleep(4)
+        asyncio.create_task(_run_conflict_check_and_broadcast())
+        await push.broadcast("tags_cleaned", {"fixed": str(fixed), "errors": str(errors)})
+
+    background_tasks.add_task(run_clean)
+    return {"message": "Tag cleaning started in background"}
+
+
 # =============================================================================
 # WebSocket Push
 # =============================================================================