Fix UI freeze by using MediaStreamTrackProcessor for audio capture (restored PCM)

2026-02-09 21:57:41 +05:30 · 2026-02-09 21:57:41 +05:30 · ad37ce8296
commit ad37ce8296
parent d1d3ed3e14
5 changed files with 179 additions and 145 deletions
--- a/src/main/index.ts
+++ b/src/main/index.ts
@ -77,6 +77,12 @@ app.whenReady().then(() => {
        network?.sendEncodedVideoChunk(payload.chunk, payload.isKeyFrame, payload.timestamp, payload.streamType);
    });

+    // Original simple PCM audio sending
+    ipcMain.on('send-audio-frame', (_, { frame }) => {
+        network?.sendAudioFrame(new Uint8Array(frame));
+    });
+
+    // Opus encoded audio (keeping for compatibility)
    ipcMain.on('send-audio-chunk', (_, payload) => {
        network?.sendEncodedAudioChunk(payload.chunk, payload.timestamp);
    });
--- a/src/main/network.ts
+++ b/src/main/network.ts
@ -146,7 +146,11 @@ export class NetworkManager extends EventEmitter {
                this.safeSend('chat-message', msg.data);
                break;
            case 'UpdateStream':
-                this.safeSend('peer-stream-update', msg.data);
+                // Ignore stream updates for self (we manage local state directly)
+                if (msg.data.user_id !== this.userId) {
+                    console.log(`[Network] Peer Stream Update: User=${msg.data.user_id} Type=${msg.data.media_type} Active=${msg.data.active}`);
+                    this.safeSend('peer-stream-update', msg.data);
+                }
                break;
            case 'Error':
                console.error('WS Error Msg:', msg.data);
@ -221,54 +225,8 @@ export class NetworkManager extends EventEmitter {
        const payload = msg.subarray(HEADER_SIZE);

        if (mediaType === MediaType.Audio) {
-            // Audio can be fragmented now (PCM)
-            this.safeSend('video-chunk', { // Use 'video-chunk' handler in renderer for reassembly? 
-                // Wait, App.tsx has separate 'audio-chunk' which doesn't reassemble.
-                // We need to reassemble here or change App.tsx.
-                // Reassembling in main process is easier or reusing video logic.
-
-                // Let's use 'audio-chunk' but we need to pass frag info?
-                // No, App.tsx 'audio-chunk' handler just decodes immediately.
-                // It expects a full frame.
-
-                // We MUST reassemble here or update App.tsx.
-                // Updating App.tsx to use the reassembler for Audio is cleaner.
-                // But 'video-chunk' in App.tsx calls 'handleIncomingVideoFragment' which uses 'MediaEngine.decodeVideoChunk'.
-
-                // Option: Treat Audio as "Video" for transport, but with streamType='audio'?
-                // MediaType.Audio is distinct.
-
-                // Let's implement reassembly here in NetworkManager?
-                // Or update App.tsx to use 'handleIncomingVideoFragment' for audio too?
-                // 'handleIncomingVideoFragment' does `decodeVideoChunk`.
-
-                // Let's change App.tsx to have `handleIncomingAudioFragment`?
-                // Or just reassemble here. UDP reassembly in Node.js is fine.
-
-                // ACtually, App.tsx's `handleIncomingVideoFragment` is nice.
-                // Let's emit 'audio-fragment' and add a handler in App.tsx.
-                user_id: userId,
-                data: payload,
-                seq: this.audioSeq, // Wait, seq is in packet
-                ts: timestamp,
-                fidx: fragIdx,
-                fcnt: fragCnt,
-                isKeyFrame,
-                streamType: 'audio'
-                // We can't use 'video-chunk' channel because it calls decodeVideoChunk.
-            });
-
-            // Actually, let's just send it to 'audio-fragment' channel
-            this.safeSend('audio-fragment', {
-                user_id: userId,
-                data: payload,
-                seq: seq, // We need valid seq from packet
-                ts: timestamp,
-                fidx: fragIdx,
-                fcnt: fragCnt,
-                isKeyFrame
-            });
-
+            // Original simple approach - just forward to renderer (PCM)
+            this.safeSend('audio-frame', { user_id: userId, data: payload });
        } else if (mediaType === MediaType.Video || mediaType === MediaType.Screen) {
            // Differentiate based on MediaType
            const streamType = mediaType === MediaType.Screen ? 'screen' : 'video';
@ -292,10 +250,15 @@ export class NetworkManager extends EventEmitter {
    private safeSend(channel: string, data: any) {
        if (this.mainWindow && !this.mainWindow.isDestroyed() && this.mainWindow.webContents) {
            try {
+                if (channel === 'audio-fragment') {
+                    console.log(`[Network] safeSend audio-fragment to renderer, data size=${data.data?.length}`);
+                }
                this.mainWindow.webContents.send(channel, data);
            } catch (e) {
                console.error(`Failed to send ${channel} to renderer:`, e);
            }
+        } else {
+            console.warn(`[Network] Cannot send ${channel}: mainWindow not ready`);
        }
    }

@ -344,6 +307,26 @@ export class NetworkManager extends EventEmitter {
        }
    }

+    // Simple audio frame sending (raw PCM) - matches original working implementation
+    sendAudioFrame(frame: Uint8Array) {
+        if (!this.udp) return;
+
+        const header = Buffer.alloc(HEADER_SIZE);
+        header.writeUInt8(1, 0); // Version
+        header.writeUInt8(MediaType.Audio, 1);
+        header.writeUInt32LE(this.userId, 2);
+        header.writeUInt32LE(this.audioSeq++, 6);
+        header.writeBigUInt64LE(BigInt(Date.now()), 10);
+        header.writeUInt16LE(0, 18); // Frag idx
+        header.writeUInt16LE(1, 20); // Frag cnt
+        header.writeUInt16LE(0, 22); // Flags
+
+        const packet = Buffer.concat([header, Buffer.from(frame)]);
+
+        // Send directly via pacer queue
+        this.udpQueue.push(packet);
+    }
+
    sendEncodedAudioChunk(chunk: Uint8Array, timestamp: number) {
        if (!this.udp) {
            console.warn('[Network] UDP Socket not ready for Audio');
--- a/src/preload/index.ts
+++ b/src/preload/index.ts
@ -1,8 +1,20 @@
 import { contextBridge, ipcRenderer } from 'electron'
 import { electronAPI } from '@electron-toolkit/preload'

-// Custom APIs for renderer
-const api = {}
+// Custom APIs for renderer - extend with explicit audio/video IPC
+const api = {
+    // Explicitly expose receive channels for media
+    onAudioFragment: (callback: (payload: any) => void) => {
+        const handler = (_: any, payload: any) => callback(payload);
+        ipcRenderer.on('audio-fragment', handler);
+        return () => ipcRenderer.removeListener('audio-fragment', handler);
+    },
+    onVideoChunk: (callback: (payload: any) => void) => {
+        const handler = (_: any, payload: any) => callback(payload);
+        ipcRenderer.on('video-chunk', handler);
+        return () => ipcRenderer.removeListener('video-chunk', handler);
+    }
+}

 // Use `contextBridge` APIs to expose Electron APIs to
 // renderer only if context isolation is enabled, otherwise
--- a/src/renderer/src/App.tsx
+++ b/src/renderer/src/App.tsx
@ -63,57 +63,7 @@ function App() {
    // Audio Handling - Ref to context
    const audioCtxRef = useRef<AudioContext | null>(null);

-    // Audio Reassembly Buffer (Moved out of useEffect)
-    // Key: `${userId}-audio-${seq}`
-    const audioFragmentBuffer = useRef<Map<string, { chunks: Uint8Array[], count: number, total: number, ts: number }>>(new Map());

-    const handleIncomingAudioFragment = useCallback((payload: any) => {
-        const { user_id, data, seq, ts, fidx, fcnt } = payload;
-
-        // If single fragment, decode immediately
-        if (fcnt === 1) {
-            if (mediaEngineRef.current) {
-                const buffer = data instanceof Uint8Array ? data : new Uint8Array(data);
-                mediaEngineRef.current.decodeAudioChunk(buffer, user_id, ts);
-            }
-            return;
-        }
-
-        const key = `${user_id}-${seq}`;
-        const map = audioFragmentBuffer.current;
-
-        if (!map.has(key)) {
-            map.set(key, { chunks: new Array(fcnt), count: 0, total: fcnt, ts });
-        }
-
-        const entry = map.get(key)!;
-        if (!entry.chunks[fidx]) {
-            entry.chunks[fidx] = data instanceof Uint8Array ? data : new Uint8Array(data);
-            entry.count++;
-        }
-
-        if (entry.count === entry.total) {
-            // Reassemble
-            const totalLen = entry.chunks.reduce((acc, c) => acc + c.length, 0);
-            const fullFrame = new Uint8Array(totalLen);
-            let offset = 0;
-            for (const c of entry.chunks) {
-                fullFrame.set(c, offset);
-                offset += c.length;
-            }
-
-            if (mediaEngineRef.current) {
-                mediaEngineRef.current.decodeAudioChunk(fullFrame, user_id, ts);
-            }
-            map.delete(key);
-        }
-
-        // Cleanup old
-        for (const k of map.keys()) {
-            const kSeq = parseInt(k.split('-')[1]);
-            if (map.size > 20) map.delete(k);
-        }
-    }, []);

    // Event Listeners
    useEffect(() => {
@ -178,15 +128,20 @@ function App() {
        let audioPacketCount = 0;

        engine.on('decoded-audio', ({ userId, data }: { userId: number, data: AudioData }) => {
-            if (audioPacketCount % 50 === 0) console.log(`[App] Playing Audio packet #${audioPacketCount} from User ${userId}`);
+            audioPacketCount++;
+            if (audioPacketCount % 50 === 0) {
+                console.log(`[App] Playing Audio packet #${audioPacketCount} from User ${userId}, frames=${data.numberOfFrames}, sampleRate=${data.sampleRate}`);
+            }

            const ctx = audioCtxRef.current;
            if (!ctx) {
+                console.warn('[App] AudioContext not initialized, dropping audio');
                data.close();
                return;
            }

            if (ctx.state === 'suspended') {
+                console.log('[App] AudioContext suspended, attempting resume');
                // Try to resume again
                ctx.resume().catch(e => console.error("Audio resume failed in playback", e));
            }
@ -255,22 +210,84 @@ function App() {
            });
        });

+        // Use explicit API for reliable video IPC
        // @ts-ignore
-        const removeVideoChunk = window.electron.ipcRenderer.on("video-chunk", (_, payload) => {
-            handleIncomingVideoFragment(payload);
-        });
+        const removeVideoChunk = window.api?.onVideoChunk
+            ? window.api.onVideoChunk((payload: any) => handleIncomingVideoFragment(payload))
+            // @ts-ignore
+            : window.electron.ipcRenderer.on("video-chunk", (_, payload) => handleIncomingVideoFragment(payload));

        // @ts-ignore
-        const removeAudioFragment = window.electron.ipcRenderer.on("audio-fragment", (_, payload) => {
-            handleIncomingAudioFragment(payload);
-        });
+        const removeAudioFragment = () => { }; // No-op, removed

        // @ts-ignore
-        const removeAudioChunk = window.electron.ipcRenderer.on("audio-chunk", (_, payload) => {
-            // Check if it's the old single chunk message (fallback)
-            if (mediaEngineRef.current) {
-                const data = payload.data instanceof Uint8Array ? payload.data : new Uint8Array(payload.data);
-                mediaEngineRef.current.decodeAudioChunk(data, payload.user_id, payload.ts);
+        const removeOpusChunk = () => { };
+
+        // window.electron.ipcRenderer.removeAllListeners("audio-fragment"); // Ensure clean slate
+
+        // --- Original Simple Audio Playback (PCM) ---
+        const JITTER_BUFFER_MS = 60;
+        let playbackCtx: AudioContext | null = null;
+        let nextPlayTime = 0;
+        let audioBufferQueue: Float32Array[] = [];
+        let audioStarted = false;
+
+        const scheduleAudioBuffer = (float32: Float32Array) => {
+            if (!playbackCtx) return;
+            const buffer = playbackCtx.createBuffer(1, float32.length, 48000);
+            buffer.copyToChannel(float32 as any, 0);
+            const source = playbackCtx.createBufferSource();
+            source.buffer = buffer;
+            source.connect(playbackCtx.destination);
+
+            const now = playbackCtx.currentTime;
+            if (nextPlayTime < now) {
+                nextPlayTime = now + 0.01;
+            }
+            source.start(nextPlayTime);
+            nextPlayTime += buffer.duration;
+        };
+
+        const flushAudioBuffer = () => {
+            while (audioBufferQueue.length > 0) {
+                scheduleAudioBuffer(audioBufferQueue.shift()!);
+            }
+        };
+
+        // @ts-ignore
+        const removeAudioFrame = window.electron.ipcRenderer.on("audio-frame", (_, payload) => {
+            try {
+                const { data } = payload;
+                if (!playbackCtx) {
+                    playbackCtx = new AudioContext({ sampleRate: 48000 });
+                    nextPlayTime = playbackCtx.currentTime + JITTER_BUFFER_MS / 1000;
+                }
+
+                if (playbackCtx.state === 'suspended') {
+                    playbackCtx.resume();
+                }
+
+                // Convert Uint8Array (bytes) to Int16 PCM then to Float32
+                const uint8 = new Uint8Array(data);
+                const int16 = new Int16Array(uint8.buffer, uint8.byteOffset, uint8.length / 2);
+                const float32 = new Float32Array(int16.length);
+
+                for (let i = 0; i < int16.length; i++) {
+                    float32[i] = int16[i] / 32768;
+                }
+
+                if (!audioStarted) {
+                    // Buffer a few packets before starting
+                    audioBufferQueue.push(float32);
+                    if (audioBufferQueue.length >= 3) {
+                        audioStarted = true;
+                        flushAudioBuffer();
+                    }
+                } else {
+                    scheduleAudioBuffer(float32);
+                }
+            } catch (e) {
+                console.error('[App] Audio playback error:', e);
            }
        });

@ -305,18 +322,25 @@ function App() {
                    setPeersWithScreen(prev => {
                        const next = new Set(prev);
                        next.delete(userId);
+                        console.log(`[App] peersWithScreen after removal:`, [...next]);
                        return next;
                    });
-                    // Force clear canvas immediately
+                    // Force clear canvas immediately and remove from refs
                    const key = `${userId}-screen`;
                    const canvas = peerCanvasRefs.current.get(key);
                    if (canvas) {
                        const ctx = canvas.getContext('2d');
                        if (ctx) ctx.clearRect(0, 0, canvas.width, canvas.height);
                    }
+                    // Remove canvas reference to prevent stale rendering
+                    peerCanvasRefs.current.delete(key);
                } else {
                    console.log(`[App] Adding Screen Share for User ${userId}`);
-                    setPeersWithScreen(prev => new Set(prev).add(userId));
+                    setPeersWithScreen(prev => {
+                        const next = new Set(prev).add(userId);
+                        console.log(`[App] peersWithScreen after addition:`, [...next]);
+                        return next;
+                    });
                }
            }
        });
@ -361,11 +385,13 @@ function App() {
            removePeerJoined();
            removePeerLeft();
            removeVideoChunk();
-            removeAudioChunk();
+            removeAudioFrame();
            removePeerStreamUpdate();
            removeChatMessage();
-            // Don't close AudioContext here if we want to reuse it?
-            // Actually good practice to close it on component unmount
+            // Close playback context on unmount
+            if (playbackCtx) {
+                playbackCtx.close();
+            }
            if (audioCtxRef.current) {
                audioCtxRef.current.close();
                audioCtxRef.current = null;
@ -617,17 +643,10 @@ function App() {
    useEffect(() => {
        let active = true;
        let stream: MediaStream | null = null;
-        let reader: ReadableStreamDefaultReader<AudioData> | null = null;

        const startAudio = async () => {
            if (!audioEnabled || !mediaEngineRef.current) return;

-            addLog("[App] startAudio: Acquiring Lock");
-            const lock = acquireMediaLock();
-            await lock.wait();
-            if (!active) { lock.release(); return; }
-            addLog("[App] startAudio: Lock Acquired");
-
            try {
                addLog("Requesting Audio Access...");
                stream = await navigator.mediaDevices.getUserMedia({
@ -635,62 +654,73 @@ function App() {
                        deviceId: selectedAudioDevice ? { exact: selectedAudioDevice } : undefined,
                        echoCancellation: true,
                        noiseSuppression: true,
-                        autoGainControl: true
+                        autoGainControl: true,
+                        sampleRate: 48000,
+                        channelCount: 1
                    }
                });
                addLog(`Audio Access Granted: ${stream.id}`);

-                const track = stream.getAudioTracks()[0];
-                // @ts-ignore
-                const processor = new MediaStreamTrackProcessor({ track });
-                reader = processor.readable.getReader();
+                if (!active) {
+                    stream.getTracks().forEach(t => t.stop());
+                    return;
+                }

                // Signal ON
                // @ts-ignore
                window.electron.ipcRenderer.send('update-stream', { active: true, mediaType: 0 }); // 0 = Audio

-                lock.release();
-                addLog("[App] startAudio: Reading Loop Start");
-
-                let frameCount = 0;
+                const track = stream.getAudioTracks()[0];
+                // @ts-ignore
+                const processor = new MediaStreamTrackProcessor({ track });
+                const reader = processor.readable.getReader();

                while (active) {
                    const result = await reader.read();
                    if (result.done) break;
                    if (result.value) {
-                        if (frameCount % 100 === 0) console.log(`[App] Capturing Audio Frame ${frameCount}`);
-                        mediaEngineRef.current?.encodeAudioData(result.value);
-                        frameCount++;
+                        const audioData = result.value;
+                        // Convert AudioData to Int16 PCM
+                        // We need to extract the data. format is usually f32-planar.
+
+                        const float32 = new Float32Array(audioData.numberOfFrames * audioData.numberOfChannels);
+                        audioData.copyTo(float32, { planeIndex: 0 }); // Assuming mono for now as requested in getUserMedia
+
+                        const pcm = new Int16Array(float32.length);
+                        for (let i = 0; i < float32.length; i++) {
+                            let s = Math.max(-1, Math.min(1, float32[i]));
+                            pcm[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
+                        }
+
+                        // Send as raw bytes
+                        // @ts-ignore
+                        window.electron.ipcRenderer.send('send-audio-frame', { frame: new Uint8Array(pcm.buffer) });
+
+                        audioData.close();
                    }
                }
-                addLog("[App] startAudio: Reading Loop End");

            } catch (e) {
                console.error("Audio capture error", e);
                setAudioEnabled(false);
-                lock.release();
            }
        };

        if (audioEnabled) {
-            console.log("[App] Audio Enabled -> Starting");
            startAudio();
        } else {
-            console.log("[App] Audio Disabled");
            if (connected) {
                // @ts-ignore
-                window.electron.ipcRenderer.send('update-stream', { active: false, mediaType: 0 }); // 0 = Audio
+                window.electron.ipcRenderer.send('update-stream', { active: false, mediaType: 0 });
            }
        }

        return () => {
            active = false;
-            console.log("[App] Audio Cleanup");
            if (stream) {
-                addLog("Stopping Audio Stream");
+                console.log("[App] Stopping Audio Stream");
                stream.getTracks().forEach(t => t.stop());
            }
-            if (reader) reader.cancel();
        };
    }, [audioEnabled, selectedAudioDevice, connected]);

--- a/src/renderer/src/utils/MediaEngine.ts
+++ b/src/renderer/src/utils/MediaEngine.ts
@ -230,6 +230,7 @@ export class MediaEngine extends SimpleEventEmitter {
    }

    decodeAudioChunk(chunkData: Uint8Array, userId: number, timestamp: number) {
+        console.log(`[MediaEngine] decodeAudioChunk called: userId=${userId}, dataLen=${chunkData.length}, ts=${timestamp}`);
        const decoderKey = `${userId}-audio`;
        let decoder = this.audioDecoders.get(decoderKey);

@ -260,6 +261,8 @@ export class MediaEngine extends SimpleEventEmitter {
            } catch (e) {
                console.error(`[MediaEngine] Audio Decode error ${decoderKey}:`, e);
            }
+        } else {
+            console.warn(`[MediaEngine] AudioDecoder not configured, state=${decoder.state}`);
        }
    }