Fix UI freeze by using MediaStreamTrackProcessor for audio capture (restored PCM)

This commit is contained in:
srtk 2026-02-09 21:57:41 +05:30
parent d1d3ed3e14
commit ad37ce8296
5 changed files with 179 additions and 145 deletions

View file

@ -77,6 +77,12 @@ app.whenReady().then(() => {
network?.sendEncodedVideoChunk(payload.chunk, payload.isKeyFrame, payload.timestamp, payload.streamType);
});
// Original simple PCM audio sending
ipcMain.on('send-audio-frame', (_, { frame }) => {
network?.sendAudioFrame(new Uint8Array(frame));
});
// Opus encoded audio (keeping for compatibility)
ipcMain.on('send-audio-chunk', (_, payload) => {
network?.sendEncodedAudioChunk(payload.chunk, payload.timestamp);
});

View file

@ -146,7 +146,11 @@ export class NetworkManager extends EventEmitter {
this.safeSend('chat-message', msg.data);
break;
case 'UpdateStream':
this.safeSend('peer-stream-update', msg.data);
// Ignore stream updates for self (we manage local state directly)
if (msg.data.user_id !== this.userId) {
console.log(`[Network] Peer Stream Update: User=${msg.data.user_id} Type=${msg.data.media_type} Active=${msg.data.active}`);
this.safeSend('peer-stream-update', msg.data);
}
break;
case 'Error':
console.error('WS Error Msg:', msg.data);
@ -221,54 +225,8 @@ export class NetworkManager extends EventEmitter {
const payload = msg.subarray(HEADER_SIZE);
if (mediaType === MediaType.Audio) {
// Audio can be fragmented now (PCM)
this.safeSend('video-chunk', { // Use 'video-chunk' handler in renderer for reassembly?
// Wait, App.tsx has separate 'audio-chunk' which doesn't reassemble.
// We need to reassemble here or change App.tsx.
// Reassembling in main process is easier or reusing video logic.
// Let's use 'audio-chunk' but we need to pass frag info?
// No, App.tsx 'audio-chunk' handler just decodes immediately.
// It expects a full frame.
// We MUST reassemble here or update App.tsx.
// Updating App.tsx to use the reassembler for Audio is cleaner.
// But 'video-chunk' in App.tsx calls 'handleIncomingVideoFragment' which uses 'MediaEngine.decodeVideoChunk'.
// Option: Treat Audio as "Video" for transport, but with streamType='audio'?
// MediaType.Audio is distinct.
// Let's implement reassembly here in NetworkManager?
// Or update App.tsx to use 'handleIncomingVideoFragment' for audio too?
// 'handleIncomingVideoFragment' does `decodeVideoChunk`.
// Let's change App.tsx to have `handleIncomingAudioFragment`?
// Or just reassemble here. UDP reassembly in Node.js is fine.
// ACtually, App.tsx's `handleIncomingVideoFragment` is nice.
// Let's emit 'audio-fragment' and add a handler in App.tsx.
user_id: userId,
data: payload,
seq: this.audioSeq, // Wait, seq is in packet
ts: timestamp,
fidx: fragIdx,
fcnt: fragCnt,
isKeyFrame,
streamType: 'audio'
// We can't use 'video-chunk' channel because it calls decodeVideoChunk.
});
// Actually, let's just send it to 'audio-fragment' channel
this.safeSend('audio-fragment', {
user_id: userId,
data: payload,
seq: seq, // We need valid seq from packet
ts: timestamp,
fidx: fragIdx,
fcnt: fragCnt,
isKeyFrame
});
// Original simple approach - just forward to renderer (PCM)
this.safeSend('audio-frame', { user_id: userId, data: payload });
} else if (mediaType === MediaType.Video || mediaType === MediaType.Screen) {
// Differentiate based on MediaType
const streamType = mediaType === MediaType.Screen ? 'screen' : 'video';
@ -292,10 +250,15 @@ export class NetworkManager extends EventEmitter {
private safeSend(channel: string, data: any) {
if (this.mainWindow && !this.mainWindow.isDestroyed() && this.mainWindow.webContents) {
try {
if (channel === 'audio-fragment') {
console.log(`[Network] safeSend audio-fragment to renderer, data size=${data.data?.length}`);
}
this.mainWindow.webContents.send(channel, data);
} catch (e) {
console.error(`Failed to send ${channel} to renderer:`, e);
}
} else {
console.warn(`[Network] Cannot send ${channel}: mainWindow not ready`);
}
}
@ -344,6 +307,26 @@ export class NetworkManager extends EventEmitter {
}
}
// Simple audio frame sending (raw PCM) - matches original working implementation
sendAudioFrame(frame: Uint8Array) {
if (!this.udp) return;
const header = Buffer.alloc(HEADER_SIZE);
header.writeUInt8(1, 0); // Version
header.writeUInt8(MediaType.Audio, 1);
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(this.audioSeq++, 6);
header.writeBigUInt64LE(BigInt(Date.now()), 10);
header.writeUInt16LE(0, 18); // Frag idx
header.writeUInt16LE(1, 20); // Frag cnt
header.writeUInt16LE(0, 22); // Flags
const packet = Buffer.concat([header, Buffer.from(frame)]);
// Send directly via pacer queue
this.udpQueue.push(packet);
}
sendEncodedAudioChunk(chunk: Uint8Array, timestamp: number) {
if (!this.udp) {
console.warn('[Network] UDP Socket not ready for Audio');

View file

@ -1,8 +1,20 @@
import { contextBridge, ipcRenderer } from 'electron'
import { electronAPI } from '@electron-toolkit/preload'
// Custom APIs for renderer
const api = {}
// Custom APIs for renderer - extend with explicit audio/video IPC
const api = {
// Explicitly expose receive channels for media
onAudioFragment: (callback: (payload: any) => void) => {
const handler = (_: any, payload: any) => callback(payload);
ipcRenderer.on('audio-fragment', handler);
return () => ipcRenderer.removeListener('audio-fragment', handler);
},
onVideoChunk: (callback: (payload: any) => void) => {
const handler = (_: any, payload: any) => callback(payload);
ipcRenderer.on('video-chunk', handler);
return () => ipcRenderer.removeListener('video-chunk', handler);
}
}
// Use `contextBridge` APIs to expose Electron APIs to
// renderer only if context isolation is enabled, otherwise

View file

@ -63,57 +63,7 @@ function App() {
// Audio Handling - Ref to context
const audioCtxRef = useRef<AudioContext | null>(null);
// Audio Reassembly Buffer (Moved out of useEffect)
// Key: `${userId}-audio-${seq}`
const audioFragmentBuffer = useRef<Map<string, { chunks: Uint8Array[], count: number, total: number, ts: number }>>(new Map());
const handleIncomingAudioFragment = useCallback((payload: any) => {
const { user_id, data, seq, ts, fidx, fcnt } = payload;
// If single fragment, decode immediately
if (fcnt === 1) {
if (mediaEngineRef.current) {
const buffer = data instanceof Uint8Array ? data : new Uint8Array(data);
mediaEngineRef.current.decodeAudioChunk(buffer, user_id, ts);
}
return;
}
const key = `${user_id}-${seq}`;
const map = audioFragmentBuffer.current;
if (!map.has(key)) {
map.set(key, { chunks: new Array(fcnt), count: 0, total: fcnt, ts });
}
const entry = map.get(key)!;
if (!entry.chunks[fidx]) {
entry.chunks[fidx] = data instanceof Uint8Array ? data : new Uint8Array(data);
entry.count++;
}
if (entry.count === entry.total) {
// Reassemble
const totalLen = entry.chunks.reduce((acc, c) => acc + c.length, 0);
const fullFrame = new Uint8Array(totalLen);
let offset = 0;
for (const c of entry.chunks) {
fullFrame.set(c, offset);
offset += c.length;
}
if (mediaEngineRef.current) {
mediaEngineRef.current.decodeAudioChunk(fullFrame, user_id, ts);
}
map.delete(key);
}
// Cleanup old
for (const k of map.keys()) {
const kSeq = parseInt(k.split('-')[1]);
if (map.size > 20) map.delete(k);
}
}, []);
// Event Listeners
useEffect(() => {
@ -178,15 +128,20 @@ function App() {
let audioPacketCount = 0;
engine.on('decoded-audio', ({ userId, data }: { userId: number, data: AudioData }) => {
if (audioPacketCount % 50 === 0) console.log(`[App] Playing Audio packet #${audioPacketCount} from User ${userId}`);
audioPacketCount++;
if (audioPacketCount % 50 === 0) {
console.log(`[App] Playing Audio packet #${audioPacketCount} from User ${userId}, frames=${data.numberOfFrames}, sampleRate=${data.sampleRate}`);
}
const ctx = audioCtxRef.current;
if (!ctx) {
console.warn('[App] AudioContext not initialized, dropping audio');
data.close();
return;
}
if (ctx.state === 'suspended') {
console.log('[App] AudioContext suspended, attempting resume');
// Try to resume again
ctx.resume().catch(e => console.error("Audio resume failed in playback", e));
}
@ -255,22 +210,84 @@ function App() {
});
});
// Use explicit API for reliable video IPC
// @ts-ignore
const removeVideoChunk = window.electron.ipcRenderer.on("video-chunk", (_, payload) => {
handleIncomingVideoFragment(payload);
});
const removeVideoChunk = window.api?.onVideoChunk
? window.api.onVideoChunk((payload: any) => handleIncomingVideoFragment(payload))
// @ts-ignore
: window.electron.ipcRenderer.on("video-chunk", (_, payload) => handleIncomingVideoFragment(payload));
// @ts-ignore
const removeAudioFragment = window.electron.ipcRenderer.on("audio-fragment", (_, payload) => {
handleIncomingAudioFragment(payload);
});
const removeAudioFragment = () => { }; // No-op, removed
// @ts-ignore
const removeAudioChunk = window.electron.ipcRenderer.on("audio-chunk", (_, payload) => {
// Check if it's the old single chunk message (fallback)
if (mediaEngineRef.current) {
const data = payload.data instanceof Uint8Array ? payload.data : new Uint8Array(payload.data);
mediaEngineRef.current.decodeAudioChunk(data, payload.user_id, payload.ts);
const removeOpusChunk = () => { };
// window.electron.ipcRenderer.removeAllListeners("audio-fragment"); // Ensure clean slate
// --- Original Simple Audio Playback (PCM) ---
const JITTER_BUFFER_MS = 60;
let playbackCtx: AudioContext | null = null;
let nextPlayTime = 0;
let audioBufferQueue: Float32Array[] = [];
let audioStarted = false;
const scheduleAudioBuffer = (float32: Float32Array) => {
if (!playbackCtx) return;
const buffer = playbackCtx.createBuffer(1, float32.length, 48000);
buffer.copyToChannel(float32 as any, 0);
const source = playbackCtx.createBufferSource();
source.buffer = buffer;
source.connect(playbackCtx.destination);
const now = playbackCtx.currentTime;
if (nextPlayTime < now) {
nextPlayTime = now + 0.01;
}
source.start(nextPlayTime);
nextPlayTime += buffer.duration;
};
const flushAudioBuffer = () => {
while (audioBufferQueue.length > 0) {
scheduleAudioBuffer(audioBufferQueue.shift()!);
}
};
// @ts-ignore
const removeAudioFrame = window.electron.ipcRenderer.on("audio-frame", (_, payload) => {
try {
const { data } = payload;
if (!playbackCtx) {
playbackCtx = new AudioContext({ sampleRate: 48000 });
nextPlayTime = playbackCtx.currentTime + JITTER_BUFFER_MS / 1000;
}
if (playbackCtx.state === 'suspended') {
playbackCtx.resume();
}
// Convert Uint8Array (bytes) to Int16 PCM then to Float32
const uint8 = new Uint8Array(data);
const int16 = new Int16Array(uint8.buffer, uint8.byteOffset, uint8.length / 2);
const float32 = new Float32Array(int16.length);
for (let i = 0; i < int16.length; i++) {
float32[i] = int16[i] / 32768;
}
if (!audioStarted) {
// Buffer a few packets before starting
audioBufferQueue.push(float32);
if (audioBufferQueue.length >= 3) {
audioStarted = true;
flushAudioBuffer();
}
} else {
scheduleAudioBuffer(float32);
}
} catch (e) {
console.error('[App] Audio playback error:', e);
}
});
@ -305,18 +322,25 @@ function App() {
setPeersWithScreen(prev => {
const next = new Set(prev);
next.delete(userId);
console.log(`[App] peersWithScreen after removal:`, [...next]);
return next;
});
// Force clear canvas immediately
// Force clear canvas immediately and remove from refs
const key = `${userId}-screen`;
const canvas = peerCanvasRefs.current.get(key);
if (canvas) {
const ctx = canvas.getContext('2d');
if (ctx) ctx.clearRect(0, 0, canvas.width, canvas.height);
}
// Remove canvas reference to prevent stale rendering
peerCanvasRefs.current.delete(key);
} else {
console.log(`[App] Adding Screen Share for User ${userId}`);
setPeersWithScreen(prev => new Set(prev).add(userId));
setPeersWithScreen(prev => {
const next = new Set(prev).add(userId);
console.log(`[App] peersWithScreen after addition:`, [...next]);
return next;
});
}
}
});
@ -361,11 +385,13 @@ function App() {
removePeerJoined();
removePeerLeft();
removeVideoChunk();
removeAudioChunk();
removeAudioFrame();
removePeerStreamUpdate();
removeChatMessage();
// Don't close AudioContext here if we want to reuse it?
// Actually good practice to close it on component unmount
// Close playback context on unmount
if (playbackCtx) {
playbackCtx.close();
}
if (audioCtxRef.current) {
audioCtxRef.current.close();
audioCtxRef.current = null;
@ -617,17 +643,10 @@ function App() {
useEffect(() => {
let active = true;
let stream: MediaStream | null = null;
let reader: ReadableStreamDefaultReader<AudioData> | null = null;
const startAudio = async () => {
if (!audioEnabled || !mediaEngineRef.current) return;
addLog("[App] startAudio: Acquiring Lock");
const lock = acquireMediaLock();
await lock.wait();
if (!active) { lock.release(); return; }
addLog("[App] startAudio: Lock Acquired");
try {
addLog("Requesting Audio Access...");
stream = await navigator.mediaDevices.getUserMedia({
@ -635,62 +654,73 @@ function App() {
deviceId: selectedAudioDevice ? { exact: selectedAudioDevice } : undefined,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
autoGainControl: true,
sampleRate: 48000,
channelCount: 1
}
});
addLog(`Audio Access Granted: ${stream.id}`);
const track = stream.getAudioTracks()[0];
// @ts-ignore
const processor = new MediaStreamTrackProcessor({ track });
reader = processor.readable.getReader();
if (!active) {
stream.getTracks().forEach(t => t.stop());
return;
}
// Signal ON
// @ts-ignore
window.electron.ipcRenderer.send('update-stream', { active: true, mediaType: 0 }); // 0 = Audio
lock.release();
addLog("[App] startAudio: Reading Loop Start");
let frameCount = 0;
const track = stream.getAudioTracks()[0];
// @ts-ignore
const processor = new MediaStreamTrackProcessor({ track });
const reader = processor.readable.getReader();
while (active) {
const result = await reader.read();
if (result.done) break;
if (result.value) {
if (frameCount % 100 === 0) console.log(`[App] Capturing Audio Frame ${frameCount}`);
mediaEngineRef.current?.encodeAudioData(result.value);
frameCount++;
const audioData = result.value;
// Convert AudioData to Int16 PCM
// We need to extract the data. format is usually f32-planar.
const float32 = new Float32Array(audioData.numberOfFrames * audioData.numberOfChannels);
audioData.copyTo(float32, { planeIndex: 0 }); // Assuming mono for now as requested in getUserMedia
const pcm = new Int16Array(float32.length);
for (let i = 0; i < float32.length; i++) {
let s = Math.max(-1, Math.min(1, float32[i]));
pcm[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
// Send as raw bytes
// @ts-ignore
window.electron.ipcRenderer.send('send-audio-frame', { frame: new Uint8Array(pcm.buffer) });
audioData.close();
}
}
addLog("[App] startAudio: Reading Loop End");
} catch (e) {
console.error("Audio capture error", e);
setAudioEnabled(false);
lock.release();
}
};
if (audioEnabled) {
console.log("[App] Audio Enabled -> Starting");
startAudio();
} else {
console.log("[App] Audio Disabled");
if (connected) {
// @ts-ignore
window.electron.ipcRenderer.send('update-stream', { active: false, mediaType: 0 }); // 0 = Audio
window.electron.ipcRenderer.send('update-stream', { active: false, mediaType: 0 });
}
}
return () => {
active = false;
console.log("[App] Audio Cleanup");
if (stream) {
addLog("Stopping Audio Stream");
console.log("[App] Stopping Audio Stream");
stream.getTracks().forEach(t => t.stop());
}
if (reader) reader.cancel();
};
}, [audioEnabled, selectedAudioDevice, connected]);

View file

@ -230,6 +230,7 @@ export class MediaEngine extends SimpleEventEmitter {
}
decodeAudioChunk(chunkData: Uint8Array, userId: number, timestamp: number) {
console.log(`[MediaEngine] decodeAudioChunk called: userId=${userId}, dataLen=${chunkData.length}, ts=${timestamp}`);
const decoderKey = `${userId}-audio`;
let decoder = this.audioDecoders.get(decoderKey);
@ -260,6 +261,8 @@ export class MediaEngine extends SimpleEventEmitter {
} catch (e) {
console.error(`[MediaEngine] Audio Decode error ${decoderKey}:`, e);
}
} else {
console.warn(`[MediaEngine] AudioDecoder not configured, state=${decoder.state}`);
}
}