- AudioTapProcessor: shared MTAudioProcessingTap with lock-free PCM ring buffer - Pre-allocated vDSP FFT (1024-sample, Hann window, log-frequency 30-band output) - Zero per-frame heap allocation in FFT path - Shared tap serves both FFT visualizer and Shazam simultaneously Fixes (blockers for tap to work): - radioGoLive/radioSeekBack now update self.playerItem (was orphaned) - Tap reinstalled on every AVPlayerItem swap (seek, live, station change) - Tap removed on background, reinstalled on foreground - Tap removed on radio→music transition Shazam rework: - Uses shared AudioTapProcessor instead of creating its own tap - Fixes tap conflict where Shazam overwrote FFT audioMix - 500ms wait for tapPrepare callback (sourceFormat timing race) - Fixed pre-existing bug: stopAll() audio session never restored after mic fallback Debug capture: - Capture Audio Tap button in Visualizer Settings - Records 5s of raw tap PCM as playable WAV file - Uses actual stream sample rate (not hardcoded 44100) - Share sheet via Notification pattern (survives view dismiss) - Spinner auto-resets on appear if capture interrupted by background Also includes from main branch: - Edit History UI, batch undo, companion API 7-bug fix - Recently Played tab, Discover section, Play Queue sync, Share links"
420 lines
16 KiB
Swift
420 lines
16 KiB
Swift
import Foundation
|
|
import AVFoundation
|
|
import Accelerate
|
|
|
|
// MARK: - Lock-free Ring Buffer for audio samples
|
|
|
|
/// Single-producer (audio render thread), single-consumer (main thread) ring buffer.
|
|
/// No locks, no allocations on the audio thread. ARM64 naturally-atomic Int writes
|
|
/// ensure the single write index is safely visible across threads.
|
|
final class PCMRingBuffer {
|
|
let capacity: Int
|
|
private let buffer: UnsafeMutablePointer<Float>
|
|
// Atomic indices — render thread writes `writeIndex`, main thread writes `readIndex`
|
|
private var _writeIndex: Int = 0
|
|
|
|
init(capacity: Int) {
|
|
self.capacity = capacity
|
|
buffer = .allocate(capacity: capacity)
|
|
buffer.initialize(repeating: 0, count: capacity)
|
|
}
|
|
|
|
deinit {
|
|
buffer.deallocate()
|
|
}
|
|
|
|
/// Write samples from the audio render thread. Lock-free.
|
|
func write(_ samples: UnsafePointer<Float>, count: Int) {
|
|
let wi = _writeIndex
|
|
let space = capacity
|
|
for i in 0..<count {
|
|
buffer[(wi + i) % space] = samples[i]
|
|
}
|
|
_writeIndex = (wi + count) % space
|
|
}
|
|
|
|
/// Read the most recent `count` samples from the main thread. Lock-free.
|
|
/// Returns the number of samples actually copied.
|
|
@discardableResult
|
|
func readMostRecent(into dest: UnsafeMutablePointer<Float>, count: Int) -> Int {
|
|
let wi = _writeIndex
|
|
let start = (wi - count + capacity) % capacity
|
|
for i in 0..<count {
|
|
dest[i] = buffer[(start + i) % capacity]
|
|
}
|
|
return count
|
|
}
|
|
|
|
func reset() {
|
|
_writeIndex = 0
|
|
buffer.initialize(repeating: 0, count: capacity)
|
|
}
|
|
}
|
|
|
|
// MARK: - Audio Tap Processor
|
|
|
|
/// Installs an MTAudioProcessingTap on an AVPlayerItem and makes raw PCM samples
|
|
/// available for FFT visualization and Shazam recognition.
|
|
///
|
|
/// Architecture:
|
|
/// AVPlayerItem → MTAudioProcessingTap (C callback on render thread)
|
|
/// → PCMRingBuffer (lock-free)
|
|
/// → Timer at 30fps reads buffer, runs vDSP FFT → 30 bands → setLevels()
|
|
/// → Optional: Shazam consumer subscribes via `shazamHandler`
|
|
///
|
|
/// Thread safety:
|
|
/// - Tap callback: CoreAudio real-time render thread (no locks, no ObjC, no heap alloc)
|
|
/// - FFT timer: main thread
|
|
/// - Ring buffer: lock-free single-producer/single-consumer
|
|
final class AudioTapProcessor {
|
|
static let shared = AudioTapProcessor()
|
|
|
|
// Ring buffer: 8192 samples ≈ 186ms at 44.1kHz — plenty for 1024-sample FFT windows
|
|
let ringBuffer = PCMRingBuffer(capacity: 8192)
|
|
|
|
// Shazam consumer — set by ShazamRecognizer, cleared when done
|
|
var shazamHandler: ((UnsafeMutablePointer<AudioBufferList>, CMItemCount) -> Void)?
|
|
|
|
// Pre-allocated FFT resources — created once, reused every frame (30fps)
|
|
private let fftSize = 1024
|
|
private let fftLog2n: vDSP_Length
|
|
private let fftSetup: FFTSetup
|
|
private var hannWindow: [Float]
|
|
private var fftTimeDomain: [Float]
|
|
private var fftRealp: [Float]
|
|
private var fftImagp: [Float]
|
|
private var fftMagnitudes: [Float]
|
|
|
|
// Debug: save PCM to WAV file for verification — tap the share button in settings
|
|
var debugDumpEnabled = false
|
|
var debugDumpURL: URL?
|
|
private var debugFileHandle: FileHandle?
|
|
private var debugSamplesWritten: Int = 0
|
|
private var debugMaxSamplesActual = 44100 * 5 // recalculated in startDebugDump from actual rate
|
|
|
|
/// Posted on main thread when debug capture completes. userInfo contains "url": URL.
|
|
static let captureCompleteNotification = Notification.Name("AudioTapCaptureComplete")
|
|
|
|
// Source format detected by the tap's prepare callback
|
|
var sourceFormat: AVAudioFormat?
|
|
|
|
private init() {
|
|
let halfSize = fftSize / 2
|
|
fftLog2n = vDSP_Length(log2(Float(fftSize)))
|
|
fftSetup = vDSP_create_fftsetup(fftLog2n, FFTRadix(kFFTRadix2))!
|
|
hannWindow = [Float](repeating: 0, count: fftSize)
|
|
vDSP_hann_window(&hannWindow, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
|
|
fftTimeDomain = [Float](repeating: 0, count: fftSize)
|
|
fftRealp = [Float](repeating: 0, count: halfSize)
|
|
fftImagp = [Float](repeating: 0, count: halfSize)
|
|
fftMagnitudes = [Float](repeating: 0, count: halfSize)
|
|
}
|
|
|
|
deinit {
|
|
vDSP_destroy_fftsetup(fftSetup)
|
|
}
|
|
|
|
// MARK: - Install / Remove Tap
|
|
|
|
/// Install the shared tap on a player item. Returns true if successful.
|
|
/// Safe to call multiple times — removes any existing tap first.
|
|
@MainActor
|
|
func installTap(on playerItem: AVPlayerItem) async -> Bool {
|
|
// Remove existing tap
|
|
removeTap(from: playerItem)
|
|
|
|
// Load audio tracks — async API (non-deprecated)
|
|
guard let audioTrack = try? await playerItem.asset
|
|
.loadTracks(withMediaType: .audio).first else {
|
|
print("[AudioTap] No audio track found on playerItem")
|
|
return false
|
|
}
|
|
|
|
// Create the MTAudioProcessingTap with C callbacks
|
|
var callbacks = MTAudioProcessingTapCallbacks(
|
|
version: kMTAudioProcessingTapCallbacksVersion_0,
|
|
clientInfo: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque()),
|
|
init: tapInit,
|
|
finalize: nil,
|
|
prepare: tapPrepare,
|
|
unprepare: nil,
|
|
process: tapProcess
|
|
)
|
|
|
|
var tapOut: MTAudioProcessingTap?
|
|
let status = MTAudioProcessingTapCreate(
|
|
kCFAllocatorDefault, &callbacks,
|
|
kMTAudioProcessingTapCreationFlag_PostEffects, &tapOut
|
|
)
|
|
guard status == noErr, let tap = tapOut else {
|
|
print("[AudioTap] MTAudioProcessingTapCreate failed: \(status)")
|
|
return false
|
|
}
|
|
|
|
let inputParams = AVMutableAudioMixInputParameters(track: audioTrack)
|
|
inputParams.audioTapProcessor = tap
|
|
|
|
let mix = AVMutableAudioMix()
|
|
mix.inputParameters = [inputParams]
|
|
playerItem.audioMix = mix
|
|
|
|
print("[AudioTap] Tap installed successfully")
|
|
|
|
// Start debug dump if enabled
|
|
if debugDumpEnabled {
|
|
startDebugDump()
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
/// Remove the tap from a player item.
|
|
func removeTap(from playerItem: AVPlayerItem?) {
|
|
playerItem?.audioMix = nil
|
|
sourceFormat = nil
|
|
stopDebugDump()
|
|
}
|
|
|
|
// MARK: - FFT Processing (called from main thread timer)
|
|
|
|
/// Perform FFT on the most recent samples and return frequency bands (0.0-1.0).
|
|
/// Uses pre-allocated buffers — minimal heap allocation per call.
|
|
/// Call this at 30fps from the visualizer timer.
|
|
func computeFFTBands(bandCount: Int = 30) -> [Float] {
|
|
let halfSize = fftSize / 2
|
|
|
|
// Read most recent 1024 samples from ring buffer into pre-allocated array
|
|
_ = fftTimeDomain.withUnsafeMutableBufferPointer { buf in
|
|
ringBuffer.readMostRecent(into: buf.baseAddress!, count: fftSize)
|
|
}
|
|
|
|
// Apply Hann window (pre-computed) to reduce spectral leakage
|
|
vDSP_vmul(fftTimeDomain, 1, hannWindow, 1, &fftTimeDomain, 1, vDSP_Length(fftSize))
|
|
|
|
// Zero the split complex buffers
|
|
for i in 0..<halfSize { fftRealp[i] = 0; fftImagp[i] = 0 }
|
|
|
|
// Run FFT using withUnsafeMutableBufferPointer to get stable pointers
|
|
// that outlive the DSPSplitComplex init call.
|
|
fftRealp.withUnsafeMutableBufferPointer { realpBuf in
|
|
fftImagp.withUnsafeMutableBufferPointer { imagpBuf in
|
|
var splitComplex = DSPSplitComplex(
|
|
realp: realpBuf.baseAddress!,
|
|
imagp: imagpBuf.baseAddress!
|
|
)
|
|
|
|
fftTimeDomain.withUnsafeBufferPointer { ptr in
|
|
ptr.baseAddress!.withMemoryRebound(to: DSPComplex.self, capacity: halfSize) {
|
|
vDSP_ctoz($0, 2, &splitComplex, 1, vDSP_Length(halfSize))
|
|
}
|
|
}
|
|
|
|
vDSP_fft_zrip(fftSetup, &splitComplex, 1, fftLog2n, FFTDirection(kFFTDirection_Forward))
|
|
vDSP_zvmags(&splitComplex, 1, &fftMagnitudes, 1, vDSP_Length(halfSize))
|
|
}
|
|
}
|
|
|
|
// Convert to dB scale — magnitude 0 → -inf, clamped to 0 in normalization step
|
|
var one: Float = 1.0
|
|
vDSP_vdbcon(fftMagnitudes, 1, &one, &fftMagnitudes, 1, vDSP_Length(halfSize), 1)
|
|
|
|
// Map to frequency bands with logarithmic spacing (more bass/mid resolution)
|
|
var bands = [Float](repeating: 0, count: bandCount)
|
|
let maxBin = min(halfSize, 300) // Cap at ~13kHz (300/512 * 22050)
|
|
|
|
for i in 0..<bandCount {
|
|
let lowPct = Float(i) / Float(bandCount)
|
|
let highPct = Float(i + 1) / Float(bandCount)
|
|
let lowBin = Int(powf(lowPct, 2.0) * Float(maxBin))
|
|
let highBin = max(lowBin + 1, Int(powf(highPct, 2.0) * Float(maxBin)))
|
|
let clampedHigh = min(highBin, maxBin)
|
|
|
|
if lowBin < clampedHigh {
|
|
var sum: Float = 0
|
|
var count: Float = 0
|
|
for bin in lowBin..<clampedHigh {
|
|
sum += fftMagnitudes[bin]
|
|
count += 1
|
|
}
|
|
bands[i] = sum / count
|
|
}
|
|
}
|
|
|
|
// Normalize: map dB range to 0.0-1.0
|
|
let minDB: Float = -50
|
|
let maxDB: Float = 15
|
|
let range = maxDB - minDB
|
|
for i in 0..<bandCount {
|
|
bands[i] = max(0, min(1, (bands[i] - minDB) / range))
|
|
}
|
|
|
|
return bands
|
|
}
|
|
|
|
// MARK: - Debug Dump (WAV file)
|
|
|
|
/// Start capturing audio tap output to a WAV file. Captures 5 seconds then auto-stops.
|
|
func startDebugDump() {
|
|
stopDebugDump()
|
|
let url = FileManager.default.temporaryDirectory
|
|
.appendingPathComponent("audio_tap_capture_\(Int(Date().timeIntervalSince1970)).wav")
|
|
debugDumpURL = url
|
|
|
|
// Use the actual stream sample rate, default 44100 if unknown
|
|
let sampleRate = UInt32(sourceFormat?.sampleRate ?? 44100)
|
|
debugMaxSamplesActual = Int(sampleRate) * 5 // 5 seconds at actual rate
|
|
|
|
// Write WAV header placeholder (44 bytes) — we'll patch the size fields when done
|
|
var header = Data(count: 44)
|
|
header.withUnsafeMutableBytes { ptr in
|
|
let p = ptr.baseAddress!.assumingMemoryBound(to: UInt8.self)
|
|
// "RIFF"
|
|
p[0] = 0x52; p[1] = 0x49; p[2] = 0x46; p[3] = 0x46
|
|
// File size placeholder (patch later)
|
|
// "WAVE"
|
|
p[8] = 0x57; p[9] = 0x41; p[10] = 0x56; p[11] = 0x45
|
|
// "fmt "
|
|
p[12] = 0x66; p[13] = 0x6D; p[14] = 0x74; p[15] = 0x20
|
|
// Chunk size: 16
|
|
p[16] = 16; p[17] = 0; p[18] = 0; p[19] = 0
|
|
// Format: IEEE float (3)
|
|
p[20] = 3; p[21] = 0
|
|
// Channels: 1
|
|
p[22] = 1; p[23] = 0
|
|
// Sample rate (from actual stream format)
|
|
let sr = sampleRate
|
|
p[24] = UInt8(sr & 0xFF); p[25] = UInt8((sr >> 8) & 0xFF)
|
|
p[26] = UInt8((sr >> 16) & 0xFF); p[27] = UInt8((sr >> 24) & 0xFF)
|
|
// Byte rate: sampleRate * 1ch * 4 bytes
|
|
let br = sr * 4
|
|
p[28] = UInt8(br & 0xFF); p[29] = UInt8((br >> 8) & 0xFF)
|
|
p[30] = UInt8((br >> 16) & 0xFF); p[31] = UInt8((br >> 24) & 0xFF)
|
|
// Block align: 4
|
|
p[32] = 4; p[33] = 0
|
|
// Bits per sample: 32
|
|
p[34] = 32; p[35] = 0
|
|
// "data"
|
|
p[36] = 0x64; p[37] = 0x61; p[38] = 0x74; p[39] = 0x61
|
|
// Data size placeholder (patch later)
|
|
}
|
|
|
|
FileManager.default.createFile(atPath: url.path, contents: header)
|
|
debugFileHandle = FileHandle(forWritingAtPath: url.path)
|
|
debugFileHandle?.seekToEndOfFile()
|
|
debugSamplesWritten = 0
|
|
debugDumpEnabled = true
|
|
print("[AudioTap] Debug capture started: \(url.lastPathComponent) at \(sampleRate)Hz")
|
|
}
|
|
|
|
/// Stop capturing and finalize the WAV header with correct sizes.
|
|
func stopDebugDump() {
|
|
guard let fh = debugFileHandle else { return }
|
|
debugDumpEnabled = false
|
|
|
|
// Patch WAV header with correct sizes
|
|
let dataSize = UInt32(debugSamplesWritten * MemoryLayout<Float>.size)
|
|
let fileSize = dataSize + 36 // 44 - 8 = 36
|
|
|
|
fh.seek(toFileOffset: 4)
|
|
var fs = fileSize; fh.write(Data(bytes: &fs, count: 4))
|
|
fh.seek(toFileOffset: 40)
|
|
var ds = dataSize; fh.write(Data(bytes: &ds, count: 4))
|
|
fh.closeFile()
|
|
debugFileHandle = nil
|
|
|
|
if debugSamplesWritten > 0 {
|
|
let rate = sourceFormat?.sampleRate ?? 44100
|
|
let duration = Double(debugSamplesWritten) / rate
|
|
print("[AudioTap] Debug capture complete: \(String(format: "%.1f", duration))s, \(dataSize) bytes")
|
|
}
|
|
debugSamplesWritten = 0
|
|
}
|
|
|
|
/// Called from the tap process callback to write samples to WAV file.
|
|
func debugWriteSamples(_ samples: UnsafePointer<Float>, count: Int) {
|
|
guard debugDumpEnabled, debugSamplesWritten < debugMaxSamplesActual else {
|
|
if debugDumpEnabled && debugSamplesWritten >= debugMaxSamplesActual {
|
|
// Auto-stop after 5 seconds
|
|
debugDumpEnabled = false
|
|
DispatchQueue.main.async { [weak self] in
|
|
self?.stopDebugDump()
|
|
if let url = self?.debugDumpURL {
|
|
NotificationCenter.default.post(
|
|
name: AudioTapProcessor.captureCompleteNotification,
|
|
object: nil,
|
|
userInfo: ["url": url]
|
|
)
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|
|
let data = Data(bytes: samples, count: count * MemoryLayout<Float>.size)
|
|
debugFileHandle?.write(data)
|
|
debugSamplesWritten += count
|
|
}
|
|
}
|
|
|
|
// MARK: - C Tap Callbacks (free functions, not methods)
|
|
|
|
private func tapInit(
|
|
tap: MTAudioProcessingTap,
|
|
clientInfo: UnsafeMutableRawPointer?,
|
|
tapStorageOut: UnsafeMutablePointer<UnsafeMutableRawPointer?>
|
|
) {
|
|
tapStorageOut.pointee = clientInfo
|
|
}
|
|
|
|
private func tapPrepare(
|
|
tap: MTAudioProcessingTap,
|
|
maxFrames: CMItemCount,
|
|
processingFormat: UnsafePointer<AudioStreamBasicDescription>
|
|
) {
|
|
let processor = Unmanaged<AudioTapProcessor>
|
|
.fromOpaque(MTAudioProcessingTapGetStorage(tap))
|
|
.takeUnretainedValue()
|
|
let format = AVAudioFormat(streamDescription: processingFormat)
|
|
processor.sourceFormat = format
|
|
print("[AudioTap] Prepared: \(processingFormat.pointee.mSampleRate)Hz, " +
|
|
"\(processingFormat.pointee.mChannelsPerFrame)ch, " +
|
|
"\(processingFormat.pointee.mBitsPerChannel)bit, " +
|
|
"float=\(processingFormat.pointee.mFormatFlags & kAudioFormatFlagIsFloat != 0)")
|
|
}
|
|
|
|
private func tapProcess(
|
|
tap: MTAudioProcessingTap,
|
|
numberFrames: CMItemCount,
|
|
flags: MTAudioProcessingTapFlags,
|
|
bufferListInOut: UnsafeMutablePointer<AudioBufferList>,
|
|
numberFramesOut: UnsafeMutablePointer<CMItemCount>,
|
|
flagsOut: UnsafeMutablePointer<MTAudioProcessingTapFlags>
|
|
) {
|
|
// Fetch audio from the source — passes through to player unchanged
|
|
let status = MTAudioProcessingTapGetSourceAudio(
|
|
tap, numberFrames, bufferListInOut, flagsOut, nil, numberFramesOut
|
|
)
|
|
guard status == noErr else { return }
|
|
|
|
let processor = Unmanaged<AudioTapProcessor>
|
|
.fromOpaque(MTAudioProcessingTapGetStorage(tap))
|
|
.takeUnretainedValue()
|
|
|
|
// Extract mono float samples from the first channel
|
|
let abl = UnsafeMutableAudioBufferListPointer(bufferListInOut)
|
|
guard let firstBuffer = abl.first,
|
|
let data = firstBuffer.mData else { return }
|
|
|
|
let floatPtr = data.assumingMemoryBound(to: Float.self)
|
|
let frameCount = Int(numberFramesOut.pointee)
|
|
|
|
// Write to ring buffer (lock-free, no allocation)
|
|
processor.ringBuffer.write(floatPtr, count: frameCount)
|
|
|
|
// Forward to Shazam handler if active
|
|
processor.shazamHandler?(bufferListInOut, numberFramesOut.pointee)
|
|
|
|
// Debug dump if enabled
|
|
if processor.debugDumpEnabled {
|
|
processor.debugWriteSamples(floatPtr, count: frameCount)
|
|
}
|
|
}
|