chore: revert audio logic to webcodecs

This commit is contained in:
Sarthak 2026-02-09 17:06:56 +05:30
parent cade45d16d
commit 4bd20fc988
12 changed files with 1288 additions and 816 deletions

View file

@ -1,25 +1,29 @@
import { app, shell, BrowserWindow, ipcMain, session, desktopCapturer } from 'electron'
import { app, shell, BrowserWindow, ipcMain, desktopCapturer } from 'electron'
import { join } from 'path'
import { electronApp, optimizer, is } from '@electron-toolkit/utils'
import { NetworkManager } from './network' // Import NetworkManager
// import icon from '../../resources/icon.png?asset'
import { NetworkManager } from './network'
let mainWindow: BrowserWindow | null = null;
let networkManager: NetworkManager | null = null;
let network: NetworkManager | null = null;
function createWindow(): void {
// Create the browser window.
mainWindow = new BrowserWindow({
width: 900,
height: 670,
width: 1280,
height: 720,
show: false,
autoHideMenuBar: true,
// ...(process.platform === 'linux' ? { icon } : {}),
webPreferences: {
preload: join(__dirname, '../preload/index.js'),
sandbox: false
sandbox: false,
contextIsolation: true,
nodeIntegration: false // Best practice
}
})
networkManager = new NetworkManager(mainWindow);
network = new NetworkManager(mainWindow);
mainWindow.on('ready-to-show', () => {
mainWindow?.show()
@ -43,83 +47,47 @@ app.whenReady().then(() => {
// Set app user model id for windows
electronApp.setAppUserModelId('com.electron')
// Grant permissions for camera/mic/screen
session.defaultSession.setPermissionRequestHandler((webContents, permission, callback) => {
console.log(`[Main] Requesting permission: ${permission}`);
// Grant all permissions for this valid local app
callback(true);
});
// Default open or close DevTools by F12 in development
// and ignore CommandOrControl + R in production.
// see https://github.com/alex8088/electron-toolkit/tree/master/packages/utils
app.on('browser-window-created', (_, window) => {
optimizer.watchWindowShortcuts(window)
})
// IPC Handlers
ipcMain.handle('connect', async (_, { serverUrl, roomCode, displayName }) => {
if (networkManager) {
return await networkManager.connect(serverUrl, roomCode, displayName);
}
return network?.connect(serverUrl, roomCode, displayName);
});
ipcMain.handle('disconnect', async () => {
if (networkManager) {
networkManager.disconnect();
}
network?.disconnect();
});
ipcMain.on('send-video-frame', (_, { frame }) => {
if (networkManager) {
networkManager.sendVideoFrame(frame);
}
ipcMain.handle('send-chat', (_, { message, displayName }) => {
network?.sendChat(message, displayName);
});
ipcMain.on('send-audio-frame', (_, { frame }) => {
if (networkManager) {
networkManager.sendAudioFrame(new Uint8Array(frame));
}
});
ipcMain.on('send-screen-frame', (_, { frame }) => {
if (networkManager) {
networkManager.sendScreenFrame(frame);
}
});
// Screen sharing: get available sources
ipcMain.handle('get-screen-sources', async () => {
const sources = await desktopCapturer.getSources({
types: ['screen', 'window'],
thumbnailSize: { width: 150, height: 150 }
});
return sources.map(s => ({
id: s.id,
name: s.name,
thumbnail: s.thumbnail.toDataURL()
const sources = await desktopCapturer.getSources({ types: ['window', 'screen'], thumbnailSize: { width: 150, height: 150 } });
return sources.map(source => ({
id: source.id,
name: source.name,
thumbnail: source.thumbnail.toDataURL()
}));
});
// Chat
ipcMain.handle('send-chat', (_, { message, displayName }) => {
if (networkManager) {
networkManager.sendChat(message, displayName);
}
ipcMain.on('send-video-chunk', (_, payload) => {
network?.sendEncodedVideoChunk(payload.chunk, payload.isKeyFrame, payload.timestamp, payload.streamType);
});
ipcMain.on('send-audio-chunk', (_, payload) => {
network?.sendEncodedAudioChunk(payload.chunk, payload.timestamp);
});
// Stream Updates
ipcMain.on('update-stream', (_, { active, mediaType }) => {
if (networkManager) {
networkManager.updateStream(active, mediaType);
}
network?.updateStream(active, mediaType);
});
createWindow()
app.on('activate', function () {
// On macOS it's common to re-create a window in the app when the
// dock icon is clicked and there are no other windows open.
if (BrowserWindow.getAllWindows().length === 0) createWindow()
})
})

View file

@ -6,8 +6,8 @@ import { BrowserWindow } from 'electron';
// Constants
const SERVER_UDP_PORT = 4000;
// Packet Header Structure (22 bytes)
const HEADER_SIZE = 22;
// Packet Header Structure (24 bytes)
const HEADER_SIZE = 24;
export enum MediaType {
Audio = 0,
@ -15,6 +15,11 @@ export enum MediaType {
Screen = 2,
}
// Token Bucket Pacer Constants
const PACER_RATE_BYTES_PER_MS = 1500; // ~12 Mbps limit (Targeting 8-10 Mbps for 1080p60)
const PACER_BUCKET_SIZE_BYTES = 15000; // Allow 10 packets burst (Instant Keyframe start)
const MAX_PAYLOAD = 1200; // Reduced from 1400 to be safe with MTU
export class NetworkManager extends EventEmitter {
private ws: WebSocket | null = null;
private udp: dgram.Socket | null = null;
@ -26,9 +31,46 @@ export class NetworkManager extends EventEmitter {
private mainWindow: BrowserWindow;
private serverUdpHost: string = '127.0.0.1';
// Pacing
private udpQueue: Buffer[] = [];
private pacerTokens: number = PACER_BUCKET_SIZE_BYTES;
private lastPacerUpdate: number = Date.now();
private pacerInterval: NodeJS.Timeout | null = null;
constructor(mainWindow: BrowserWindow) {
super();
this.mainWindow = mainWindow;
this.startPacer();
}
private startPacer() {
this.pacerInterval = setInterval(() => {
if (!this.udp) return;
const now = Date.now();
const elapsed = now - this.lastPacerUpdate;
this.lastPacerUpdate = now;
// Refill tokens
this.pacerTokens += elapsed * PACER_RATE_BYTES_PER_MS;
if (this.pacerTokens > PACER_BUCKET_SIZE_BYTES) {
this.pacerTokens = PACER_BUCKET_SIZE_BYTES;
}
// Drain queue
while (this.udpQueue.length > 0) {
const packet = this.udpQueue[0];
if (this.pacerTokens >= packet.length) {
this.pacerTokens -= packet.length;
this.udpQueue.shift();
this.udp.send(packet, SERVER_UDP_PORT, this.serverUdpHost, (err) => {
if (err) console.error('UDP Send Error', err);
});
} else {
break; // Not enough tokens, wait for next tick
}
}
}, 2); // Check every 2ms
}
async connect(serverUrl: string, roomCode: string, displayName: string): Promise<any> {
@ -156,7 +198,7 @@ export class NetworkManager extends EventEmitter {
});
this.udp.on('message', (msg, rinfo) => {
console.log(`[UDP] Msg from ${rinfo.address}:${rinfo.port} - ${msg.length} bytes`);
// console.log(`[UDP] Msg from ${rinfo.address}:${rinfo.port} - ${msg.length} bytes`);
this.handleUdpMessage(msg);
});
@ -166,34 +208,83 @@ export class NetworkManager extends EventEmitter {
handleUdpMessage(msg: Buffer) {
if (msg.length < HEADER_SIZE) return;
const version = msg.readUInt8(0);
const mediaType = msg.readUInt8(1);
const userId = msg.readUInt32LE(2);
const payload = msg.subarray(HEADER_SIZE);
const sequence = msg.readUInt32LE(6);
const seq = msg.readUInt32LE(6);
const timestamp = Number(msg.readBigUInt64LE(10));
const fragIdx = msg.readUInt8(18);
const fragCnt = msg.readUInt8(19);
const fragIdx = msg.readUInt16LE(18);
const fragCnt = msg.readUInt16LE(20);
const flags = msg.readUInt16LE(22);
const isKeyFrame = (flags & 1) !== 0;
const payload = msg.subarray(HEADER_SIZE);
if (mediaType === MediaType.Audio) {
this.safeSend('audio-frame', { user_id: userId, data: payload });
} else if (mediaType === MediaType.Video) {
this.safeSend('video-frame', {
// Audio can be fragmented now (PCM)
this.safeSend('video-chunk', { // Use 'video-chunk' handler in renderer for reassembly?
// Wait, App.tsx has separate 'audio-chunk' which doesn't reassemble.
// We need to reassemble here or change App.tsx.
// Reassembling in main process is easier or reusing video logic.
// Let's use 'audio-chunk' but we need to pass frag info?
// No, App.tsx 'audio-chunk' handler just decodes immediately.
// It expects a full frame.
// We MUST reassemble here or update App.tsx.
// Updating App.tsx to use the reassembler for Audio is cleaner.
// But 'video-chunk' in App.tsx calls 'handleIncomingVideoFragment' which uses 'MediaEngine.decodeVideoChunk'.
// Option: Treat Audio as "Video" for transport, but with streamType='audio'?
// MediaType.Audio is distinct.
// Let's implement reassembly here in NetworkManager?
// Or update App.tsx to use 'handleIncomingVideoFragment' for audio too?
// 'handleIncomingVideoFragment' does `decodeVideoChunk`.
// Let's change App.tsx to have `handleIncomingAudioFragment`?
// Or just reassemble here. UDP reassembly in Node.js is fine.
// ACtually, App.tsx's `handleIncomingVideoFragment` is nice.
// Let's emit 'audio-fragment' and add a handler in App.tsx.
user_id: userId,
data: payload,
seq: sequence,
seq: this.audioSeq, // Wait, seq is in packet
ts: timestamp,
fidx: fragIdx,
fcnt: fragCnt
fcnt: fragCnt,
isKeyFrame,
streamType: 'audio'
// We can't use 'video-chunk' channel because it calls decodeVideoChunk.
});
} else if (mediaType === MediaType.Screen) {
this.safeSend('screen-frame', {
// Actually, let's just send it to 'audio-fragment' channel
this.safeSend('audio-fragment', {
user_id: userId,
data: payload,
seq: sequence,
seq: seq, // We need valid seq from packet
ts: timestamp,
fidx: fragIdx,
fcnt: fragCnt
fcnt: fragCnt,
isKeyFrame
});
} else if (mediaType === MediaType.Video || mediaType === MediaType.Screen) {
// Differentiate based on MediaType
const streamType = mediaType === MediaType.Screen ? 'screen' : 'video';
if (mediaType === MediaType.Screen && fragIdx === 0) {
console.log(`[Network] RX Screen Chunk User=${userId} Seq=${seq}`);
}
this.safeSend('video-chunk', {
user_id: userId,
data: payload,
seq,
ts: timestamp,
fidx: fragIdx,
fcnt: fragCnt,
isKeyFrame,
streamType // Pass this to renderer
});
}
}
@ -208,88 +299,88 @@ export class NetworkManager extends EventEmitter {
}
}
sendVideoFrame(frame: Uint8Array) {
if (!this.udp || !this.userId) return;
// --- New Encode Methods ---
const buffer = Buffer.from(frame);
const MAX_PAYLOAD = 1400;
const fragCount = Math.ceil(buffer.length / MAX_PAYLOAD);
const seq = this.videoSeq++;
const ts = BigInt(Date.now());
for (let i = 0; i < fragCount; i++) {
const start = i * MAX_PAYLOAD;
const end = Math.min(start + MAX_PAYLOAD, buffer.length);
const chunk = buffer.subarray(start, end);
const header = Buffer.alloc(HEADER_SIZE);
header.writeUInt8(1, 0); // Version
header.writeUInt8(MediaType.Video, 1);
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(seq, 6);
header.writeBigUInt64LE(ts, 10);
header.writeUInt8(i, 18); // Frag idx
header.writeUInt8(fragCount, 19); // Frag cnt
header.writeUInt16LE(0, 20); // Flags
const packet = Buffer.concat([header, chunk]);
this.udp.send(packet, SERVER_UDP_PORT, this.serverUdpHost, (err) => {
if (err) console.error('UDP Video Send Error', err);
});
}
}
sendAudioFrame(frame: Uint8Array) {
sendEncodedVideoChunk(chunk: any, isKeyFrame: boolean, timestamp: number, streamType: 'video' | 'screen' = 'video') {
if (!this.udp) return;
const header = Buffer.alloc(HEADER_SIZE);
header.writeUInt8(1, 0); // Version
header.writeUInt8(MediaType.Audio, 1);
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(this.audioSeq++, 6);
header.writeBigUInt64LE(BigInt(Date.now()), 10);
header.writeUInt8(0, 18); // Frag idx
header.writeUInt8(1, 19); // Frag cnt
header.writeUInt16LE(0, 20); // Flags
const MAX_PAYLOAD = 1400;
const totalSize = chunk.length;
const packet = Buffer.concat([header, Buffer.from(frame)]);
// Use generic videoSeq for both? Or separate?
// Best to separate to avoid gap detection issues if one stream is idle.
// But for now, let's share for simplicity or use screenSeq if screen.
// Actually, let's use separate seq if possible, but I only have videoSeq.
// Let's use videoSeq for both for now, assuming the receiver tracks them separately or doesn't care about gaps across types.
// Better: Use a map or separate counters.
const seq = streamType === 'screen' ? this.screenSeq++ : this.videoSeq++;
this.udp.send(packet, SERVER_UDP_PORT, this.serverUdpHost, (err) => {
if (err) console.error('UDP Audio Send Error', err);
});
const fragmentCount = Math.ceil(totalSize / MAX_PAYLOAD);
for (let i = 0; i < fragmentCount; i++) {
const start = i * MAX_PAYLOAD;
const end = Math.min(start + MAX_PAYLOAD, totalSize);
const slice = chunk.slice(start, end);
// Header (22 bytes)
const header = Buffer.alloc(HEADER_SIZE);
header.writeUInt8(1, 0); // Version
const mType = streamType === 'screen' ? MediaType.Screen : MediaType.Video;
header.writeUInt8(mType, 1);
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(seq, 6);
header.writeBigUInt64LE(BigInt(timestamp), 10);
header.writeUInt16LE(i, 18); // Frag Idx (u16)
header.writeUInt16LE(fragmentCount, 20); // Frag Cnt (u16)
let flags = 0;
if (isKeyFrame) flags |= 1;
header.writeUInt16LE(flags, 22);
const packet = Buffer.concat([header, slice]);
// Enqueue for pacing
this.udpQueue.push(packet);
}
}
sendScreenFrame(frame: number[]) {
if (!this.udp || !this.userId) return;
sendEncodedAudioChunk(chunk: Uint8Array, timestamp: number) {
if (!this.udp) {
console.warn('[Network] UDP Socket not ready for Audio');
return;
}
const buffer = Buffer.from(frame);
const MAX_PAYLOAD = 1400;
const fragCount = Math.ceil(buffer.length / MAX_PAYLOAD);
const seq = this.screenSeq++;
const ts = BigInt(Date.now());
const totalSize = chunk.length;
const MAX_PAYLOAD = 1400; // Safe MTU
for (let i = 0; i < fragCount; i++) {
// PCM packets (approx 2KB) need fragmentation.
// We use the same logic as video but with Audio MediaType.
const fragmentCount = Math.ceil(totalSize / MAX_PAYLOAD);
// Log randomly to avoid spam but confirm activity
if (Math.random() < 0.05) console.log(`[Network] Sending Audio Chunk size=${totalSize} frags=${fragmentCount}`);
for (let i = 0; i < fragmentCount; i++) {
const start = i * MAX_PAYLOAD;
const end = Math.min(start + MAX_PAYLOAD, buffer.length);
const chunk = buffer.subarray(start, end);
const end = Math.min(start + MAX_PAYLOAD, totalSize);
const slice = chunk.slice(start, end);
const header = Buffer.alloc(HEADER_SIZE);
header.writeUInt8(1, 0); // Version
header.writeUInt8(MediaType.Screen, 1);
header.writeUInt8(MediaType.Audio, 1);
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(seq, 6);
header.writeBigUInt64LE(ts, 10);
header.writeUInt8(i, 18); // Frag idx
header.writeUInt8(fragCount, 19); // Frag cnt
header.writeUInt16LE(0, 20); // Flags
header.writeUInt32LE(this.audioSeq, 6); // Same seq for all fragments
header.writeBigUInt64LE(BigInt(Math.floor(timestamp)), 10);
header.writeUInt16LE(i, 18); // Frag idx
header.writeUInt16LE(fragmentCount, 20); // Frag cnt
header.writeUInt16LE(1, 22); // Flags (1=Keyframe, audio is always key)
const packet = Buffer.concat([header, chunk]);
this.udp.send(packet, SERVER_UDP_PORT, this.serverUdpHost, (err) => {
if (err) console.error('UDP Screen Send Error', err);
});
const packet = Buffer.concat([header, Buffer.from(slice)]);
this.udpQueue.push(packet);
}
this.audioSeq++;
}
startHeartbeat() {
@ -327,19 +418,23 @@ export class NetworkManager extends EventEmitter {
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(0, 6); // Sequence
header.writeBigUInt64LE(BigInt(Date.now()), 10);
header.writeUInt8(0, 18); // Frag idx
header.writeUInt8(1, 19); // Frag cnt
header.writeUInt16LE(0, 20); // Flags
header.writeUInt16LE(0, 18); // Frag idx
header.writeUInt16LE(1, 20); // Frag cnt
header.writeUInt16LE(0, 22); // Flags
const packet = Buffer.concat([header, payload]);
console.log(`[UDP] Sending Handshake: userId=${this.userId}, room=${this.roomCode}, ${packet.length} bytes to ${this.serverUdpHost}:${SERVER_UDP_PORT}`);
// console.log(`[UDP] Sending Handshake: userId=${this.userId}, room=${this.roomCode}, ${packet.length} bytes to ${this.serverUdpHost}:${SERVER_UDP_PORT}`);
this.udp.send(packet, SERVER_UDP_PORT, this.serverUdpHost, (err) => {
if (err) console.error('UDP Handshake Send Error', err);
});
}
disconnect() {
if (this.pacerInterval) {
clearInterval(this.pacerInterval);
this.pacerInterval = null;
}
if (this.heartbeatInterval) {
clearInterval(this.heartbeatInterval);
this.heartbeatInterval = null;