diff --git a/companion-api/main.py b/companion-api/main.py index 14069f6..4bae094 100644 --- a/companion-api/main.py +++ b/companion-api/main.py @@ -83,6 +83,7 @@ def init_db(): full_path TEXT UNIQUE NOT NULL, relative_path TEXT NOT NULL, navidrome_id TEXT, + navidrome_album_id TEXT, -- Fix #13: track album reassignment title TEXT NOT NULL DEFAULT '', artist TEXT NOT NULL DEFAULT '', album TEXT NOT NULL DEFAULT '', @@ -849,8 +850,11 @@ async def sync_navidrome_ids_task(): if hit: db_song_id, strategy = hit, 7 if db_song_id: - cur.execute("UPDATE songs SET navidrome_id = ? WHERE id = ?", - (nd_id, db_song_id)) + nd_album_id = ns.get("albumId", "") + cur.execute( + "UPDATE songs SET navidrome_id = ?, navidrome_album_id = ? WHERE id = ?", + (nd_id, nd_album_id, db_song_id) + ) if strategy == 1: matched_s1 += 1 elif strategy == 2: matched_s2 += 1 elif strategy == 3: matched_s3 += 1 @@ -885,6 +889,111 @@ async def sync_navidrome_ids_task(): traceback.print_exc() +# ── Picard tag cleanup ─────────────────────────────────────────────────────── + +# The ONLY tags Navidrome reads and uses. Everything else is noise that can +# cause album splitting, wrong grouping, and other library issues. +# Using a whitelist (keep only these) is safer than a blacklist (remove known bad ones) +# because it handles any future Picard tags automatically. +NAVIDROME_TAGS = { + 'TITLE', 'ARTIST', 'ALBUM', 'ALBUMARTIST', + 'TRACKNUMBER', 'DISCNUMBER', 'DATE', + 'GENRE', 'COMPOSER', 'LYRICS', 'COMMENT', 'ISRC', + 'REPLAYGAIN_TRACK_GAIN', 'REPLAYGAIN_TRACK_PEAK', + 'REPLAYGAIN_ALBUM_GAIN', 'REPLAYGAIN_ALBUM_PEAK', +} + +# Keep the blacklist for reference / legacy clean-tags endpoint +PICARD_TAGS_TO_REMOVE = { + 'MUSICBRAINZ_TRACKID', 'MUSICBRAINZ_ALBUMID', 'MUSICBRAINZ_RELEASETRACKID', + 'MUSICBRAINZ_RELEASEGROUPID', 'MUSICBRAINZ_ALBUMARTISTID', 'MUSICBRAINZ_ARTISTID', + 'MUSICBRAINZ_WORKID', 'MUSICBRAINZ_ALBUMCOMMENT', 'MUSICBRAINZ_ALBUMSTATUS', + 'MUSICBRAINZ_ALBUMTYPE', 'ACOUSTID_ID', 'ACOUSTID_FINGERPRINT', + 'RELEASECOUNTRY', 'RELEASESTATUS', 'RELEASETYPE', 'RELEASETIME', + 'BARCODE', 'ASIN', 'CATALOGNUMBER', 'SCRIPT', + 'ARTISTS', 'ARTIST_CREDIT', 'ALBUMARTIST_CREDIT', + 'TOTALTRACKS', 'TRACKTOTAL', 'TOTALDISCS', 'DISCTOTAL', + 'DISCC', 'TRACKC', 'DISC', 'TRACK', 'COMPOSERSORT', + 'ENCODER', 'ENCODEDBY', 'GROUPING', 'PUBLISHER', 'DESCRIPTION', + 'DISCSUBTITLE', 'LYRICIST', 'ARRANGER', + 'OFFICIAL_AUDIO_SOURCE_URL', 'OFFICIAL_AUDIO_FILE_URL', + 'DIGEST', 'FILETYPE', 'UPC', 'TYPE', + 'ALBUM ARTIST', 'ALBUM_ARTIST', 'BPM', +} + + +def enforce_tag_whitelist( + full_path: str, + preserve_composer: bool = True, + preserve_lyrics: bool = True, + dry_run: bool = False, +) -> dict: + """ + Whitelist approach: keep ONLY tags in NAVIDROME_TAGS, delete everything else. + - preserve_composer: keep COMPOSER if non-empty (default True for edits, False for uploads) + - preserve_lyrics: keep LYRICS if non-empty (default True for edits, False for uploads) + - dry_run: return what would be removed without writing + Existing ReplayGain values are always preserved. + """ + allowed = set(NAVIDROME_TAGS) + if not preserve_composer: + allowed.discard('COMPOSER') + if not preserve_lyrics: + allowed.discard('LYRICS') + + result = {"path": full_path, "removed": [], "kept": [], "error": None} + ext = Path(full_path).suffix.lower() + + try: + if ext == '.flac': + from mutagen.flac import FLAC + f = FLAC(full_path) + to_remove = [] + for k in list(f.keys()): + ku = k.upper() + if ku not in allowed: + to_remove.append(k) + # Never remove non-empty COMPOSER/LYRICS even if not in allowed + # (preserve existing values the user may have set manually) + elif ku == 'COMPOSER' and not f[k][0].strip(): + to_remove.append(k) # blank composer — remove + elif ku == 'LYRICS' and not f[k][0].strip(): + to_remove.append(k) # blank lyrics — remove + result["removed"] = to_remove + result["kept"] = [k for k in f.keys() if k not in to_remove] + if not dry_run and to_remove: + for k in to_remove: + del f[k] + f.save() + else: + audio = MutagenFile(full_path, easy=False) + if audio is None: + result["error"] = "Unsupported format" + return result + to_remove = [] + for k in list(audio.keys()): + ku = k.upper().split(':')[0].strip() + if ku not in allowed: + to_remove.append(k) + to_remove = list(set(to_remove)) + result["removed"] = to_remove + result["kept"] = [k for k in audio.keys() if k not in to_remove] + if not dry_run and to_remove: + for k in to_remove: + try: del audio[k] + except Exception: pass + audio.save() + except Exception as e: + result["error"] = str(e) + + return result + + +def clean_picard_tags(full_path: str, dry_run: bool = False) -> dict: + """Legacy blacklist cleaner — now delegates to whitelist enforcer.""" + return enforce_tag_whitelist(full_path, dry_run=dry_run) + + # ── Metadata helpers ───────────────────────────────────────────────────────── def sanitize(name: str) -> str: @@ -939,6 +1048,23 @@ def build_target_path(full_path: str) -> Optional[str]: if m: disc_num = int(m.group(1)) + # Fix #6: Disc sanity check. + # If discnumber > 1 but no siblings in the same directory already have + # disc-prefixed filenames (e.g. 02-XX), treat this as disc 1. + # Prevents compilation tracks tagged from vinyl rips (disc 2) from + # getting 02-13 style filenames after being moved to a single-disc album. + if disc_num > 1: + same_dir = os.path.dirname(full_path) + try: + siblings = [f for f in os.listdir(same_dir) if f.lower().endswith(AUDIO_EXTS)] + has_disc_siblings = any(re.match(r'\d{2}-\d{2}', f) for f in siblings) + if not has_disc_siblings: + print(f" disc sanity: no disc siblings for {os.path.basename(full_path)}" + f" — overriding disc {disc_num} -> 1", flush=True) + disc_num = 1 + except Exception: + pass + # Build track prefix if disc_num > 1: prefix = f"{disc_num:02d}-{track_num:02d}" @@ -1048,6 +1174,9 @@ def restructure_all() -> dict: if not os.path.isfile(full_path): skipped += 1 continue + # Enforce whitelist before restructuring — clean tags first so + # build_target_path reads clean data and generates the correct path + enforce_tag_whitelist(full_path, preserve_composer=True, preserve_lyrics=True) result = restructure_file(full_path) if result: moved += 1 @@ -1055,6 +1184,10 @@ def restructure_all() -> dict: skipped += 1 print(f"Restructure complete: {moved} moved, {skipped} skipped, {failed} failed", flush=True) return {"moved": moved, "skipped": skipped, "failed": failed} + + +def apply_tags(path: str, u, preserve_composer: bool = True, preserve_lyrics: bool = True): + """Write tags then enforce whitelist — only Navidrome tags survive.""" audio = MutagenFile(path, easy=True) if audio is None: raise ValueError(f"Unsupported format: {path}") @@ -1066,56 +1199,12 @@ def restructure_all() -> dict: if u.year: audio['date'] = str(u.year) if u.track_number: audio['tracknumber'] = str(u.track_number) audio.save() + # Enforce whitelist after writing — nukes everything not in NAVIDROME_TAGS + enforce_tag_whitelist(path, preserve_composer=preserve_composer, preserve_lyrics=preserve_lyrics) -def apply_tags(path: str, u): - # For FLAC files, clean up all legacy albumartist variants written by Picard - # before writing. easy=True only writes lowercase 'albumartist' but Navidrome - # reads ALBUMARTIST (uppercase) first — so old values stick if not removed. - ext = Path(path).suffix.lower() - if ext == '.flac' and u.album_artist: - try: - from mutagen.flac import FLAC - raw = FLAC(path) - for variant in ['ALBUM ARTIST', 'ALBUM_ARTIST', 'ALBUMARTIST', 'albumartist']: - if variant in raw: - del raw[variant] - raw['ALBUMARTIST'] = u.album_artist - raw.save() - except Exception as e: - print(f" FLAC albumartist cleanup failed for {os.path.basename(path)}: {e}", flush=True) - - audio = MutagenFile(path, easy=True) - if audio is None: - raise ValueError(f"Unsupported format: {path}") - if u.title: audio['title'] = u.title - if u.artist: audio['artist'] = u.artist - if u.album: audio['album'] = u.album - if u.album_artist: audio['albumartist'] = u.album_artist - if u.genre: audio['genre'] = u.genre - if u.year: audio['date'] = str(u.year) - if u.track_number: audio['tracknumber'] = str(u.track_number) - audio.save() - - -def apply_tags_dict(path: str, tags: dict): - # For FLAC files, use raw mutagen to nuke all legacy tag variants before - # writing. Tools like Picard write ALBUM ARTIST, ALBUM_ARTIST, albumartist - # etc. — Navidrome reads the uppercase ones and gets the wrong value if we - # only write the lowercase easy-mode key without removing the others. - ext = Path(path).suffix.lower() - if ext == '.flac' and tags.get('album_artist'): - try: - from mutagen.flac import FLAC - raw = FLAC(path) - for variant in ['ALBUM ARTIST', 'ALBUM_ARTIST', 'ALBUMARTIST', 'albumartist']: - if variant in raw: - del raw[variant] - raw['ALBUMARTIST'] = tags['album_artist'] - raw.save() - except Exception as e: - print(f" FLAC albumartist cleanup failed for {os.path.basename(path)}: {e}", flush=True) - +def apply_tags_dict(path: str, tags: dict, preserve_composer: bool = True, preserve_lyrics: bool = True): + """Write tags dict then enforce whitelist — only Navidrome tags survive.""" audio = MutagenFile(path, easy=True) if audio is None: raise ValueError(f"Unsupported format: {path}") @@ -1127,6 +1216,8 @@ def apply_tags_dict(path: str, tags: dict): if val is not None and val != "": audio[tag_name] = str(val) audio.save() + # Enforce whitelist after writing + enforce_tag_whitelist(path, preserve_composer=preserve_composer, preserve_lyrics=preserve_lyrics) # ── Analysis ───────────────────────────────────────────────────────────────── @@ -1346,9 +1437,9 @@ async def batch_edit_metadata(update: BatchMetadataUpdate): continue try: apply_tags_dict(fp, tags) - new_fp = restructure_file(fp) or fp - update_song_in_db(new_fp) - results["succeeded"].append(os.path.relpath(new_fp, MUSIC_DIR)) + # Fix #1: No auto-restructure on tag edits. Only via /bulk-fix. + update_song_in_db(fp) + results["succeeded"].append(os.path.relpath(fp, MUSIC_DIR)) except Exception as e: results["failed"].append({"path": rp, "error": str(e)}) await trigger_scan() @@ -1365,10 +1456,12 @@ async def batch_edit_metadata(update: BatchMetadataUpdate): @app.post("/upload-track") async def upload_track( - file: UploadFile = File(...), - title: str = Form(...), - artist: str = Form(...), - album: str = Form(...) + file: UploadFile = File(...), + title: str = Form(...), + artist: str = Form(...), + album: str = Form(...), + preserve_composer: bool = Form(False), + preserve_lyrics: bool = Form(False), ): dest = os.path.join(MUSIC_DIR, "uploads") os.makedirs(dest, exist_ok=True) @@ -1378,7 +1471,7 @@ async def upload_track( try: u = MetadataUpdate(relative_path=f"uploads/{file.filename}", title=title, artist=artist, album=album) - apply_tags(fp, u) + apply_tags(fp, u, preserve_composer=preserve_composer, preserve_lyrics=preserve_lyrics) update_song_in_db(fp) profile = analyze(fp) await trigger_scan() @@ -1393,9 +1486,11 @@ async def upload_track( @app.post("/upload-tracks") async def upload_tracks( - files: List[UploadFile] = File(...), - metadata_json: str = Form(...), - cover_art: Optional[UploadFile] = File(None), + files: List[UploadFile] = File(...), + metadata_json: str = Form(...), + cover_art: Optional[UploadFile] = File(None), + preserve_composer: bool = Form(False), + preserve_lyrics: bool = Form(False), ): """ Upload multiple audio files with per-track metadata. @@ -1436,7 +1531,9 @@ async def upload_tracks( if meta.get("track_number"): tags["track_number"] = str(meta["track_number"]) if meta.get("genre"): tags["genre"] = meta["genre"] if meta.get("year"): tags["year"] = str(meta["year"]) - apply_tags_dict(fp, tags) + apply_tags_dict(fp, tags, + preserve_composer=preserve_composer, + preserve_lyrics=preserve_lyrics) try: analyze(fp) except Exception as e: @@ -1565,10 +1662,32 @@ async def precompute(background_tasks: BackgroundTasks, relative_path: str = "") @app.post("/bulk-fix") -async def bulk_fix(background_tasks: BackgroundTasks): +async def bulk_fix(background_tasks: BackgroundTasks, dry_run: bool = False): + """ + Fix #4: Restructure all files to canonical paths. + ?dry_run=true returns a list of {from, to} moves without executing them. + """ + if dry_run: + # Preview only — return what would move + moves = [] + with sqlite3.connect(DB_PATH) as c: + rows = c.execute("SELECT full_path FROM songs").fetchall() + for (full_path,) in rows: + if not os.path.isfile(full_path): + continue + target = build_target_path(full_path) + if target and os.path.normpath(full_path) != os.path.normpath(target): + moves.append({ + "from": os.path.relpath(full_path, MUSIC_DIR), + "to": os.path.relpath(target, MUSIC_DIR) + }) + return {"dry_run": True, "moves": len(moves), "preview": moves} + async def run(): result = restructure_all() await trigger_scan() + await asyncio.sleep(4) + asyncio.create_task(_run_conflict_check_and_broadcast()) await push.broadcast("library_restructured", result) background_tasks.add_task(run) return {"message": "Library restructure started"} @@ -1815,20 +1934,100 @@ async def get_artist_photo(artist_name: str): return FileResponse(row[0], media_type=mt) +async def auto_fix_duplicate_albums(): + """ + Fix #9: After every Navidrome scan, detect duplicate album entries + (same name, different album_artist) and rewrite tags on minority files + so Navidrome groups them correctly on the next scan. + Uses tag rewrites only — never writes to Navidrome's DB directly. + """ + navidrome_db = os.getenv("NAVIDROME_DB_PATH", "/navidrome_data/navidrome.db") + if not os.path.isfile(navidrome_db): + return + try: + with sqlite3.connect(navidrome_db) as nav: + rows = nav.execute(""" + SELECT name, COUNT(DISTINCT album_artist) as aa_count, + GROUP_CONCAT(id, '|||') as ids, + GROUP_CONCAT(album_artist, '|||') as artists, + GROUP_CONCAT(song_count, '|||') as counts + FROM album + GROUP BY name + HAVING aa_count > 1 + """).fetchall() + + if not rows: + return + + print(f" auto_fix_duplicate_albums: found {len(rows)} duplicate album(s)", flush=True) + + for name, aa_count, ids, artists, counts in rows: + id_list = ids.split('|||') + artist_list = artists.split('|||') + count_list = [int(c) for c in counts.split('|||')] + + # Canonical = album entry with most songs + canonical_idx = count_list.index(max(count_list)) + canonical_artist = artist_list[canonical_idx] + + # Find all files in Companion DB for this album + with sqlite3.connect(DB_PATH) as c: + file_rows = c.execute( + "SELECT full_path, album_artist FROM songs WHERE album = ?", (name,) + ).fetchall() + + fixed = 0 + for full_path, current_aa in file_rows: + if current_aa == canonical_artist: + continue + if not os.path.isfile(full_path): + continue + try: + ext = Path(full_path).suffix.lower() + if ext == '.flac': + from mutagen.flac import FLAC + f = FLAC(full_path) + for variant in ['ALBUM ARTIST', 'ALBUM_ARTIST', 'ALBUMARTIST', 'albumartist']: + if variant in f: del f[variant] + f['ALBUMARTIST'] = canonical_artist + f.save() + else: + audio = MutagenFile(full_path, easy=True) + if audio: + audio['albumartist'] = canonical_artist + audio.save() + update_song_in_db(full_path) + fixed += 1 + print(f" auto_fix: {os.path.basename(full_path)}: " + f"{current_aa!r} -> {canonical_artist!r}", flush=True) + except Exception as e: + print(f" auto_fix error: {os.path.basename(full_path)}: {e}", flush=True) + + if fixed: + print(f" auto_fix_duplicate_albums: fixed {fixed} tracks for '{name}'", flush=True) + + except Exception as e: + print(f" auto_fix_duplicate_albums failed: {e}", flush=True) + + async def _run_conflict_check_and_broadcast(): - """Run conflict check in the background and broadcast results.""" + """ + Fix #9: After scan, auto-fix duplicate albums then run conflict check. + Called as a background task after every edit and scan. + """ try: await asyncio.sleep(6) # Wait for Navidrome scan to complete + # Auto-fix duplicate albums before reporting conflicts + await auto_fix_duplicate_albums() navidrome_db = os.getenv("NAVIDROME_DB_PATH", "/navidrome_data/navidrome.db") issues = check_library_conflicts(navidrome_db) error_count = sum(1 for i in issues if i["severity"] == "error") warning_count = sum(1 for i in issues if i["severity"] == "warning") - if issues: - await push.broadcast("conflicts_updated", { - "total": str(len(issues)), - "errors": str(error_count), - "warnings": str(warning_count) - }) + await push.broadcast("conflicts_updated", { + "total": str(len(issues)), + "errors": str(error_count), + "warnings": str(warning_count) + }) except Exception as e: print(f" Background conflict check failed: {e}", flush=True) @@ -1999,6 +2198,50 @@ def check_library_conflicts(navidrome_db_path: str = os.getenv("NAVIDROME_DB_PAT except Exception as e: print(f" conflict check 6 failed: {e}", flush=True) + # ── 7. Album identity reassignment (Fix #13) ──────────────────────────── + # Detect tracks whose Navidrome album_id changed since last sync — + # indicates Navidrome reassigned them to a different album entry. + try: + with sqlite3.connect(DB_PATH) as c: + rows = c.execute(""" + SELECT relative_path, navidrome_album_id, album, album_artist + FROM songs + WHERE navidrome_album_id IS NOT NULL AND navidrome_album_id != '' + GROUP BY navidrome_album_id, album + HAVING COUNT(DISTINCT navidrome_album_id) > 1 + OR (SELECT COUNT(*) FROM songs s2 + WHERE s2.album = songs.album + AND s2.navidrome_album_id != songs.navidrome_album_id + LIMIT 1) > 0 + """).fetchall() + # Simpler: find albums in our DB that map to multiple navidrome album IDs + album_rows = c.execute(""" + SELECT album, album_artist, + COUNT(DISTINCT navidrome_album_id) as id_count, + GROUP_CONCAT(DISTINCT navidrome_album_id) as ids + FROM songs + WHERE navidrome_album_id IS NOT NULL AND navidrome_album_id != '' + GROUP BY album, album_artist + HAVING id_count > 1 + """).fetchall() + for album, aa, id_count, ids in album_rows: + path_rows = c.execute( + "SELECT relative_path FROM songs WHERE album = ? AND album_artist = ? LIMIT 10", + (album, aa) + ).fetchall() + issues.append({ + "type": "album_reassigned", + "severity": "warning", + "title": f"Album split in Navidrome: {album}", + "detail": f"'{album}' by '{aa}' maps to {id_count} different Navidrome album IDs. " + f"This usually means a tag edit caused Navidrome to create a duplicate album entry.", + "affected_paths": [r[0] for r in path_rows], + "fix_action": "fix_duplicate_album", + "fix_data": {"album_name": album, "album_artists": [aa], "song_counts": ["0"]} + }) + except Exception as e: + print(f" conflict check 7 failed: {e}", flush=True) + print(f" Conflict check complete: {len(issues)} issues found", flush=True) return issues @@ -2152,6 +2395,57 @@ async def fix_conflict(request: FixConflictRequest): raise HTTPException(400, f"Unknown action: {action}") +# ============================================================================= +# LIBRARY CLEAN TAGS +# ============================================================================= + +@app.post("/library/clean-tags") +async def library_clean_tags(background_tasks: BackgroundTasks, dry_run: bool = False): + """ + Remove all Picard-specific tags from every file in the library. + ?dry_run=true returns what would be removed without writing. + """ + if dry_run: + # Return preview immediately + results = [] + with sqlite3.connect(DB_PATH) as c: + rows = c.execute("SELECT full_path FROM songs").fetchall() + for (full_path,) in rows: + if not os.path.isfile(full_path): + continue + r = enforce_tag_whitelist(full_path, dry_run=True) + if r["removed"]: + results.append({ + "path": os.path.relpath(full_path, MUSIC_DIR), + "would_remove": r["removed"] + }) + return {"dry_run": True, "files_affected": len(results), "preview": results[:50]} + + async def run_clean(): + fixed = 0 + errors = 0 + with sqlite3.connect(DB_PATH) as c: + rows = c.execute("SELECT full_path FROM songs").fetchall() + print(f"clean-tags: scanning {len(rows)} files...", flush=True) + for (full_path,) in rows: + if not os.path.isfile(full_path): + continue + r = enforce_tag_whitelist(full_path, dry_run=False) + if r["error"]: + errors += 1 + elif r["removed"]: + update_song_in_db(full_path) + fixed += 1 + print(f"clean-tags: cleaned {fixed} files, {errors} errors", flush=True) + await trigger_scan() + await asyncio.sleep(4) + asyncio.create_task(_run_conflict_check_and_broadcast()) + await push.broadcast("tags_cleaned", {"fixed": str(fixed), "errors": str(errors)}) + + background_tasks.add_task(run_clean) + return {"message": "Tag cleaning started in background"} + + # ============================================================================= # WebSocket Push # =============================================================================