fixed library management

2026-04-10 16:22:49 -07:00 · 2026-04-10 16:22:49 -07:00 · fde3df0d26
commit fde3df0d26
parent 5b319ad643
1 changed files with 286 additions and 73 deletions
--- a/companion-api/main.py
+++ b/companion-api/main.py
@ -175,47 +175,74 @@ def find_cover_art(song_path: str) -> Optional[str]:
 # ── Tag reader ──────────────────────────────────────────────────────────────

 def read_tags(full_path: str) -> dict:
+    """Read all display tags from an audio file using mutagen."""
+    audio_easy = None
    try:
-        audio = MutagenFile(full_path, easy=True)
+        audio_easy = MutagenFile(full_path, easy=True)
    except Exception:
-        audio = None
+        pass

-    def get(key):
-        if audio and key in audio and audio[key]:
-            return audio[key][0]
+    # AIFF files don't support easy=True — fall back to raw ID3 tags
+    audio_raw = None
+    ext = Path(full_path).suffix.lower()
+    if ext in ('.aiff', '.aif') and (audio_easy is None or not audio_easy):
+        try:
+            from mutagen.aiff import AIFF
+            audio_raw = AIFF(full_path)
+        except Exception:
+            pass
+
+    def get_easy(key):
+        if audio_easy and key in audio_easy and audio_easy[key]:
+            return audio_easy[key][0]
        return ''

-    title        = get('title')       or Path(full_path).stem
-    artist       = get('artist')      or 'Unknown Artist'
-    album        = get('album')       or 'Unknown Album'
-    album_artist = get('albumartist') or artist
-    genre        = get('genre')       or ''
+    def get_raw(frame_id):
+        """Read a raw ID3 frame value from AIFF."""
+        if audio_raw and audio_raw.tags:
+            frame = audio_raw.tags.get(frame_id)
+            if frame:
+                return str(frame.text[0]) if hasattr(frame, 'text') and frame.text else str(frame)
+        return ''
+
+    def get(easy_key, raw_id=None):
+        val = get_easy(easy_key)
+        if not val and raw_id:
+            val = get_raw(raw_id)
+        return val
+
+    title        = get('title', 'TIT2')       or Path(full_path).stem
+    artist       = get('artist', 'TPE1')      or 'Unknown Artist'
+    album        = get('album', 'TALB')       or 'Unknown Album'
+    album_artist = get('albumartist', 'TPE2') or artist
+    genre        = get('genre', 'TCON')       or ''

    year = None
-    raw = get('date')
-    if raw:
-        m = re.search(r'\d{4}', raw)
+    raw_date = get('date', 'TDRC') or get('date', 'TYER')
+    if raw_date:
+        m = re.search(r'\d{4}', str(raw_date))
        if m:
            year = int(m.group())

    track_number = None
-    raw = get('tracknumber')
-    if raw:
-        m = re.match(r'(\d+)', raw)
+    raw_track = get('tracknumber', 'TRCK')
+    if raw_track:
+        m = re.match(r'(\d+)', str(raw_track))
        if m:
            track_number = int(m.group(1))

    disc_number = None
-    raw = get('discnumber')
-    if raw:
-        m = re.match(r'(\d+)', raw)
+    raw_disc = get('discnumber', 'TPOS')
+    if raw_disc:
+        m = re.match(r'(\d+)', str(raw_disc))
        if m:
            disc_number = int(m.group(1))

    duration = None
-    if audio and hasattr(audio, 'info') and audio.info:
+    audio_for_info = audio_easy or audio_raw
+    if audio_for_info and hasattr(audio_for_info, 'info') and audio_for_info.info:
        try:
-            duration = float(audio.info.length)
+            duration = float(audio_for_info.info.length)
        except Exception:
            pass

@ -310,7 +337,8 @@ def update_song_in_db(full_path: str):
        mtime = fsize = None

    with sqlite3.connect(DB_PATH) as c:
-        c.execute("""UPDATE songs SET
+        cur = c.cursor()
+        cur.execute("""UPDATE songs SET
            title=?, artist=?, album=?, album_artist=?, genre=?,
            year=?, track_number=?, disc_number=?, duration=?,
            sort_title=?, sort_artist=?, sort_album=?, sort_album_artist=?,
@ -323,8 +351,8 @@ def update_song_in_db(full_path: str):
            sort_key(tags['album']),  sort_key(tags['album_artist']),
            cover, fsize, mtime, datetime.utcnow().isoformat(), song_id
        ))
-        if c.rowcount == 0:
-            c.execute("""INSERT OR REPLACE INTO songs (
+        if cur.rowcount == 0:
+            cur.execute("""INSERT OR REPLACE INTO songs (
                id, full_path, relative_path,
                title, artist, album, album_artist, genre,
                year, track_number, disc_number, duration,
@ -506,15 +534,27 @@ async def trigger_scan():


 async def sync_navidrome_ids_task():
-    """Fetch all songs from Navidrome and write navidrome_id into our songs table."""
+    """
+    Fetch all songs from Navidrome and match them into our songs table.
+
+    Matching strategy (tried in order per song):
+      1. title + artist          — primary, both read from same ID3 tags
+      2. title + album           — fallback when artist field differs
+      3. title only              — fallback for unique titles
+      4. duration bucket         — last resort (±2s tolerance, unique per bucket)
+    """
    try:
        if not all([SUBSONIC_USER, SUBSONIC_TOKEN, SUBSONIC_SALT]):
            print("Subsonic credentials not set - cannot sync IDs")
            return
        print(f"Syncing Navidrome IDs... URL={NAVIDROME_URL}", flush=True)
-        base_params = {"u": SUBSONIC_USER, "t": SUBSONIC_TOKEN, "s": SUBSONIC_SALT,
-                       "v": "1.16.1", "c": "CompanionAPI", "f": "json",
-                       "albumCount": 0, "artistCount": 0, "songCount": 500, "query": ""}
+
+        # ── Fetch all songs from Navidrome ────────────────────────────────────
+        base_params = {
+            "u": SUBSONIC_USER, "t": SUBSONIC_TOKEN, "s": SUBSONIC_SALT,
+            "v": "1.16.1", "c": "CompanionAPI", "f": "json",
+            "albumCount": 0, "artistCount": 0, "songCount": 500, "query": ""
+        }
        all_songs = []
        offset = 0
        async with httpx.AsyncClient(timeout=60) as client:
@ -524,13 +564,9 @@ async def sync_navidrome_ids_task():
                        f"{NAVIDROME_URL}/rest/search3.view",
                        params={**base_params, "songOffset": offset}
                    )
-                    print(f"  Navidrome response: HTTP {r.status_code}", flush=True)
-                    body = r.json()
-                    # Check for auth errors
-                    resp = body.get("subsonic-response", {})
+                    resp = r.json().get("subsonic-response", {})
                    if resp.get("status") == "failed":
-                        err = resp.get("error", {})
-                        print(f"  Navidrome auth error: {err}", flush=True)
+                        print(f"  Navidrome auth error: {resp.get('error')}", flush=True)
                        return
                    songs = resp.get("searchResult3", {}).get("song", [])
                    print(f"  Page offset={offset}: {len(songs)} songs", flush=True)
@ -544,42 +580,212 @@ async def sync_navidrome_ids_task():
                    print(f"  Navidrome fetch error: {e}", flush=True)
                    break

-        matched = 0
+        print(f"  Navidrome total: {len(all_songs)} songs", flush=True)
+        if not all_songs:
+            return
+
+        # ── Show first 3 Navidrome songs for diagnosis ────────────────────────
+        for ns in all_songs[:3]:
+            print(f"  ND sample: title={repr(ns.get('title',''))} "
+                  f"artist={repr(ns.get('artist',''))} "
+                  f"album={repr(ns.get('album',''))} "
+                  f"duration={ns.get('duration')}", flush=True)
+
+        # ── Build lookup tables from our DB ───────────────────────────────────
+        def norm(s: str) -> str:
+            """Lowercase, strip, NFC-normalize."""
+            return unicodedata.normalize("NFC", (s or "").lower().strip())
+
+        def clean_title(s: str) -> str:
+            """Strip leading track/disc number prefix from filename-derived titles.
+            e.g. '09 Careless' -> 'careless', '01-02 Song' -> 'song'
+            """
+            s = norm(s)
+            # Strip patterns like "09 ", "09 - ", "1-02 ", "01. " etc
+            s = re.sub(r'^\d{1,2}[-\s\.]+\d{0,2}[-\s\.]*', '', s).strip()
+            s = re.sub(r'^\d{1,2}[-\s\.]+', '', s).strip()
+            return s
+
+        def dur_bucket(seconds) -> Optional[int]:
+            """Round to nearest 2-second bucket for fuzzy duration matching."""
+            if seconds is None:
+                return None
+            return int(round(float(seconds) / 2.0))
+
        with sqlite3.connect(DB_PATH) as c:
            cur = c.cursor()
-            total_db = cur.execute("SELECT COUNT(*) FROM songs").fetchone()[0]
-            print(f"  DB songs total: {total_db}", flush=True)
-
-            # Match by (title, artist) from ID3 tags — both Navidrome and Companion
-            # read the same tags so this is always consistent regardless of
-            # folder structure or filename format differences.
            db_rows = cur.execute(
-                "SELECT id, LOWER(TRIM(title)), LOWER(TRIM(artist)) FROM songs"
+                "SELECT id, title, artist, album, duration FROM songs"
            ).fetchall()
-            db_lookup = {}
-            for song_id, title, artist in db_rows:
-                key = (unicodedata.normalize("NFC", title),
-                       unicodedata.normalize("NFC", artist))
-                # If duplicate title+artist, keep first (edge case)
-                if key not in db_lookup:
-                    db_lookup[key] = song_id
-            print(f"  DB lookup built: {len(db_lookup)} entries", flush=True)

+        total_db = len(db_rows)
+        print(f"  DB songs total: {total_db}", flush=True)
+
+        # Show first 3 DB songs for comparison
+        for row in db_rows[:3]:
+            print(f"  DB sample: title={repr(row[1])} "
+                  f"artist={repr(row[2])} "
+                  f"album={repr(row[3])} "
+                  f"duration={row[4]}", flush=True)
+
+        # Strategy 1: title + artist
+        by_title_artist: dict[tuple, str] = {}
+        # Strategy 2: title + album
+        by_title_album: dict[tuple, str] = {}
+        # Strategy 3: title only (only stored if unique)
+        by_title: dict[str, Optional[str]] = {}
+        # Strategy 4: duration bucket + first 8 chars of title (unique)
+        by_dur: dict[tuple, Optional[str]] = {}
+        # Strategy 5: clean_title + artist (strips track number prefix)
+        by_clean_artist: dict[tuple, Optional[str]] = {}
+        # Strategy 6: duration only within ±2s (unique per bucket)
+        by_dur_only: dict[int, Optional[str]] = {}
+
+        for song_id, title, artist, album, duration in db_rows:
+            nt  = norm(title)
+            na  = norm(artist)
+            nb  = norm(album)
+            ct  = clean_title(title)
+            dk  = dur_bucket(duration)
+
+            k1 = (nt, na)
+            if k1 not in by_title_artist:
+                by_title_artist[k1] = song_id
+
+            k2 = (nt, nb)
+            if k2 not in by_title_album:
+                by_title_album[k2] = song_id
+
+            if nt in by_title:
+                by_title[nt] = None
+            else:
+                by_title[nt] = song_id
+
+            if dk is not None:
+                k4 = (dk, nt[:8])
+                if k4 in by_dur:
+                    by_dur[k4] = None
+                else:
+                    by_dur[k4] = song_id
+
+            k5 = (ct, na)
+            if k5 not in by_clean_artist:
+                if k5 in by_clean_artist:
+                    by_clean_artist[k5] = None
+                else:
+                    by_clean_artist[k5] = song_id
+            else:
+                by_clean_artist[k5] = None  # ambiguous
+
+            if dk is not None:
+                if dk in by_dur_only:
+                    by_dur_only[dk] = None
+                else:
+                    by_dur_only[dk] = song_id
+
+        # Strategy 7: clean_title + duration bucket (catches untagged files
+        # where artist is missing but filename title + duration uniquely identify the song)
+        by_clean_dur: dict[tuple, Optional[str]] = {}
+
+        for song_id, title, artist, album, duration in db_rows:
+            ct = clean_title(title)
+            dk = dur_bucket(duration)
+            if dk is not None:
+                k7 = (ct, dk)
+                if k7 in by_clean_dur:
+                    by_clean_dur[k7] = None  # ambiguous
+                else:
+                    by_clean_dur[k7] = song_id
+
+        print(f"  Lookups: title+artist={len(by_title_artist)} "
+              f"title+album={len(by_title_album)} "
+              f"title_only={sum(1 for v in by_title.values() if v)} "
+              f"duration={sum(1 for v in by_dur.values() if v)} "
+              f"clean+artist={sum(1 for v in by_clean_artist.values() if v)} "
+              f"dur_only={sum(1 for v in by_dur_only.values() if v)} "
+              f"clean+dur={sum(1 for v in by_clean_dur.values() if v)}", flush=True)
+
+        matched_s1 = matched_s2 = matched_s3 = matched_s4 = 0
+        matched_s5 = matched_s6 = matched_s7 = unmatched = 0
+        unmatched_samples = []
+
+        with sqlite3.connect(DB_PATH) as c:
+            cur = c.cursor()
            for ns in all_songs:
-                nd_id     = ns.get("id", "")
-                nd_title  = unicodedata.normalize("NFC",
-                    (ns.get("title") or "").lower().strip())
-                nd_artist = unicodedata.normalize("NFC",
-                    (ns.get("artist") or "").lower().strip())
-                if not nd_id or not nd_title:
+                nd_id  = ns.get("id", "")
+                if not nd_id:
                    continue
-                key = (nd_title, nd_artist)
-                if key in db_lookup:
-                    cur.execute("UPDATE songs SET navidrome_id = ? WHERE id = ?",
-                                (nd_id, db_lookup[key]))
-                    matched += 1

-        print(f"Navidrome ID sync: {matched}/{len(all_songs)} matched", flush=True)
+                nt = norm(ns.get("title", ""))
+                na = norm(ns.get("artist", ""))
+                nb = norm(ns.get("album", ""))
+                ct = clean_title(ns.get("title", ""))
+                dk = dur_bucket(ns.get("duration"))
+
+                db_song_id = None
+                strategy   = 0
+
+                if not db_song_id:
+                    hit = by_title_artist.get((nt, na))
+                    if hit: db_song_id, strategy = hit, 1
+
+                if not db_song_id:
+                    hit = by_title_album.get((nt, nb))
+                    if hit: db_song_id, strategy = hit, 2
+
+                if not db_song_id:
+                    hit = by_title.get(nt)
+                    if hit: db_song_id, strategy = hit, 3
+
+                if not db_song_id and dk is not None:
+                    hit = by_dur.get((dk, nt[:8]))
+                    if hit: db_song_id, strategy = hit, 4
+
+                if not db_song_id:
+                    hit = by_clean_artist.get((ct, na))
+                    if hit: db_song_id, strategy = hit, 5
+
+                if not db_song_id and dk is not None:
+                    hit = by_dur_only.get(dk)
+                    if hit: db_song_id, strategy = hit, 6
+
+                # S7: clean title + duration bucket — catches untagged AIFF/files
+                # where artist is unknown but filename+duration uniquely identify song
+                if not db_song_id and dk is not None:
+                    hit = by_clean_dur.get((ct, dk))
+                    if hit: db_song_id, strategy = hit, 7
+
+                if db_song_id:
+                    cur.execute("UPDATE songs SET navidrome_id = ? WHERE id = ?",
+                                (nd_id, db_song_id))
+                    if strategy == 1: matched_s1 += 1
+                    elif strategy == 2: matched_s2 += 1
+                    elif strategy == 3: matched_s3 += 1
+                    elif strategy == 4: matched_s4 += 1
+                    elif strategy == 5: matched_s5 += 1
+                    elif strategy == 6: matched_s6 += 1
+                    else: matched_s7 += 1
+                else:
+                    unmatched += 1
+                    if len(unmatched_samples) < 10:
+                        unmatched_samples.append(
+                            f"title={repr(ns.get('title',''))} "
+                            f"artist={repr(ns.get('artist',''))} "
+                            f"duration={ns.get('duration')}"
+                        )
+
+        total_matched = matched_s1+matched_s2+matched_s3+matched_s4+matched_s5+matched_s6+matched_s7
+        print(f"Navidrome ID sync complete: {total_matched}/{len(all_songs)} matched", flush=True)
+        print(f"  Strategy breakdown: "
+              f"title+artist={matched_s1} title+album={matched_s2} "
+              f"title_only={matched_s3} dur+prefix={matched_s4} "
+              f"clean+artist={matched_s5} dur_only={matched_s6} "
+              f"clean+dur={matched_s7} unmatched={unmatched}", flush=True)
+        if unmatched_samples:
+            print(f"  Unmatched samples:", flush=True)
+            for s in unmatched_samples:
+                print(f"    {s}", flush=True)
+
    except Exception as e:
        import traceback
        print(f"sync_navidrome_ids_task FAILED: {e}", flush=True)
@ -692,32 +898,39 @@ def restructure_file(full_path: str) -> Optional[str]:
            else:
                break

-        # Update DB with new path
+        # Update DB with new path — re-read tags for accurate sort keys
        new_relative = os.path.relpath(target, MUSIC_DIR)
        song_id = hashlib.md5(full_path.encode()).hexdigest()
        new_id  = hashlib.md5(target.encode()).hexdigest()
+        tags    = read_tags(target)
        with sqlite3.connect(DB_PATH) as c:
-            # Update the existing row to reflect new path and new id
-            c.execute("""UPDATE songs SET
+            cur = c.cursor()
+            cur.execute("""UPDATE songs SET
                id=?, full_path=?, relative_path=?,
                sort_title=?, sort_artist=?, sort_album=?, sort_album_artist=?,
                file_mtime=?, date_modified=?
                WHERE id=?""", (
                new_id, target, new_relative,
-                sort_key(Path(target).stem),
-                sort_key(os.path.dirname(new_relative).split(os.sep)[0] if os.sep in new_relative else ''),
-                sort_key(os.path.dirname(new_relative).split(os.sep)[1] if new_relative.count(os.sep) > 0 else ''),
-                sort_key(os.path.dirname(new_relative).split(os.sep)[0] if os.sep in new_relative else ''),
+                sort_key(tags['title']),
+                sort_key(tags['artist']),
+                sort_key(tags['album']),
+                sort_key(tags['album_artist']),
                os.stat(target).st_mtime,
                datetime.utcnow().isoformat(),
                song_id
            ))
-            if c.rowcount == 0:
-                # Row used old id — try by full_path
-                c.execute("""UPDATE songs SET
-                    id=?, full_path=?, relative_path=?, file_mtime=?, date_modified=?
+            if cur.rowcount == 0:
+                # Row used old full_path as key — try matching by path
+                cur.execute("""UPDATE songs SET
+                    id=?, full_path=?, relative_path=?,
+                    sort_title=?, sort_artist=?, sort_album=?, sort_album_artist=?,
+                    file_mtime=?, date_modified=?
                    WHERE full_path=?""", (
                    new_id, target, new_relative,
+                    sort_key(tags['title']),
+                    sort_key(tags['artist']),
+                    sort_key(tags['album']),
+                    sort_key(tags['album_artist']),
                    os.stat(target).st_mtime,
                    datetime.utcnow().isoformat(),
                    full_path