chore: revert audio logic to webcodecs

This commit is contained in:
Sarthak 2026-02-09 17:06:56 +05:30
parent cade45d16d
commit 4bd20fc988
12 changed files with 1288 additions and 816 deletions

View file

@ -6,8 +6,8 @@ import { BrowserWindow } from 'electron';
// Constants
const SERVER_UDP_PORT = 4000;
// Packet Header Structure (22 bytes)
const HEADER_SIZE = 22;
// Packet Header Structure (24 bytes)
const HEADER_SIZE = 24;
export enum MediaType {
Audio = 0,
@ -15,6 +15,11 @@ export enum MediaType {
Screen = 2,
}
// Token Bucket Pacer Constants
const PACER_RATE_BYTES_PER_MS = 1500; // ~12 Mbps limit (Targeting 8-10 Mbps for 1080p60)
const PACER_BUCKET_SIZE_BYTES = 15000; // Allow 10 packets burst (Instant Keyframe start)
const MAX_PAYLOAD = 1200; // Reduced from 1400 to be safe with MTU
export class NetworkManager extends EventEmitter {
private ws: WebSocket | null = null;
private udp: dgram.Socket | null = null;
@ -26,9 +31,46 @@ export class NetworkManager extends EventEmitter {
private mainWindow: BrowserWindow;
private serverUdpHost: string = '127.0.0.1';
// Pacing
private udpQueue: Buffer[] = [];
private pacerTokens: number = PACER_BUCKET_SIZE_BYTES;
private lastPacerUpdate: number = Date.now();
private pacerInterval: NodeJS.Timeout | null = null;
constructor(mainWindow: BrowserWindow) {
super();
this.mainWindow = mainWindow;
this.startPacer();
}
private startPacer() {
this.pacerInterval = setInterval(() => {
if (!this.udp) return;
const now = Date.now();
const elapsed = now - this.lastPacerUpdate;
this.lastPacerUpdate = now;
// Refill tokens
this.pacerTokens += elapsed * PACER_RATE_BYTES_PER_MS;
if (this.pacerTokens > PACER_BUCKET_SIZE_BYTES) {
this.pacerTokens = PACER_BUCKET_SIZE_BYTES;
}
// Drain queue
while (this.udpQueue.length > 0) {
const packet = this.udpQueue[0];
if (this.pacerTokens >= packet.length) {
this.pacerTokens -= packet.length;
this.udpQueue.shift();
this.udp.send(packet, SERVER_UDP_PORT, this.serverUdpHost, (err) => {
if (err) console.error('UDP Send Error', err);
});
} else {
break; // Not enough tokens, wait for next tick
}
}
}, 2); // Check every 2ms
}
async connect(serverUrl: string, roomCode: string, displayName: string): Promise<any> {
@ -156,7 +198,7 @@ export class NetworkManager extends EventEmitter {
});
this.udp.on('message', (msg, rinfo) => {
console.log(`[UDP] Msg from ${rinfo.address}:${rinfo.port} - ${msg.length} bytes`);
// console.log(`[UDP] Msg from ${rinfo.address}:${rinfo.port} - ${msg.length} bytes`);
this.handleUdpMessage(msg);
});
@ -166,34 +208,83 @@ export class NetworkManager extends EventEmitter {
handleUdpMessage(msg: Buffer) {
if (msg.length < HEADER_SIZE) return;
const version = msg.readUInt8(0);
const mediaType = msg.readUInt8(1);
const userId = msg.readUInt32LE(2);
const payload = msg.subarray(HEADER_SIZE);
const sequence = msg.readUInt32LE(6);
const seq = msg.readUInt32LE(6);
const timestamp = Number(msg.readBigUInt64LE(10));
const fragIdx = msg.readUInt8(18);
const fragCnt = msg.readUInt8(19);
const fragIdx = msg.readUInt16LE(18);
const fragCnt = msg.readUInt16LE(20);
const flags = msg.readUInt16LE(22);
const isKeyFrame = (flags & 1) !== 0;
const payload = msg.subarray(HEADER_SIZE);
if (mediaType === MediaType.Audio) {
this.safeSend('audio-frame', { user_id: userId, data: payload });
} else if (mediaType === MediaType.Video) {
this.safeSend('video-frame', {
// Audio can be fragmented now (PCM)
this.safeSend('video-chunk', { // Use 'video-chunk' handler in renderer for reassembly?
// Wait, App.tsx has separate 'audio-chunk' which doesn't reassemble.
// We need to reassemble here or change App.tsx.
// Reassembling in main process is easier or reusing video logic.
// Let's use 'audio-chunk' but we need to pass frag info?
// No, App.tsx 'audio-chunk' handler just decodes immediately.
// It expects a full frame.
// We MUST reassemble here or update App.tsx.
// Updating App.tsx to use the reassembler for Audio is cleaner.
// But 'video-chunk' in App.tsx calls 'handleIncomingVideoFragment' which uses 'MediaEngine.decodeVideoChunk'.
// Option: Treat Audio as "Video" for transport, but with streamType='audio'?
// MediaType.Audio is distinct.
// Let's implement reassembly here in NetworkManager?
// Or update App.tsx to use 'handleIncomingVideoFragment' for audio too?
// 'handleIncomingVideoFragment' does `decodeVideoChunk`.
// Let's change App.tsx to have `handleIncomingAudioFragment`?
// Or just reassemble here. UDP reassembly in Node.js is fine.
// ACtually, App.tsx's `handleIncomingVideoFragment` is nice.
// Let's emit 'audio-fragment' and add a handler in App.tsx.
user_id: userId,
data: payload,
seq: sequence,
seq: this.audioSeq, // Wait, seq is in packet
ts: timestamp,
fidx: fragIdx,
fcnt: fragCnt
fcnt: fragCnt,
isKeyFrame,
streamType: 'audio'
// We can't use 'video-chunk' channel because it calls decodeVideoChunk.
});
} else if (mediaType === MediaType.Screen) {
this.safeSend('screen-frame', {
// Actually, let's just send it to 'audio-fragment' channel
this.safeSend('audio-fragment', {
user_id: userId,
data: payload,
seq: sequence,
seq: seq, // We need valid seq from packet
ts: timestamp,
fidx: fragIdx,
fcnt: fragCnt
fcnt: fragCnt,
isKeyFrame
});
} else if (mediaType === MediaType.Video || mediaType === MediaType.Screen) {
// Differentiate based on MediaType
const streamType = mediaType === MediaType.Screen ? 'screen' : 'video';
if (mediaType === MediaType.Screen && fragIdx === 0) {
console.log(`[Network] RX Screen Chunk User=${userId} Seq=${seq}`);
}
this.safeSend('video-chunk', {
user_id: userId,
data: payload,
seq,
ts: timestamp,
fidx: fragIdx,
fcnt: fragCnt,
isKeyFrame,
streamType // Pass this to renderer
});
}
}
@ -208,88 +299,88 @@ export class NetworkManager extends EventEmitter {
}
}
sendVideoFrame(frame: Uint8Array) {
if (!this.udp || !this.userId) return;
// --- New Encode Methods ---
const buffer = Buffer.from(frame);
const MAX_PAYLOAD = 1400;
const fragCount = Math.ceil(buffer.length / MAX_PAYLOAD);
const seq = this.videoSeq++;
const ts = BigInt(Date.now());
for (let i = 0; i < fragCount; i++) {
const start = i * MAX_PAYLOAD;
const end = Math.min(start + MAX_PAYLOAD, buffer.length);
const chunk = buffer.subarray(start, end);
const header = Buffer.alloc(HEADER_SIZE);
header.writeUInt8(1, 0); // Version
header.writeUInt8(MediaType.Video, 1);
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(seq, 6);
header.writeBigUInt64LE(ts, 10);
header.writeUInt8(i, 18); // Frag idx
header.writeUInt8(fragCount, 19); // Frag cnt
header.writeUInt16LE(0, 20); // Flags
const packet = Buffer.concat([header, chunk]);
this.udp.send(packet, SERVER_UDP_PORT, this.serverUdpHost, (err) => {
if (err) console.error('UDP Video Send Error', err);
});
}
}
sendAudioFrame(frame: Uint8Array) {
sendEncodedVideoChunk(chunk: any, isKeyFrame: boolean, timestamp: number, streamType: 'video' | 'screen' = 'video') {
if (!this.udp) return;
const header = Buffer.alloc(HEADER_SIZE);
header.writeUInt8(1, 0); // Version
header.writeUInt8(MediaType.Audio, 1);
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(this.audioSeq++, 6);
header.writeBigUInt64LE(BigInt(Date.now()), 10);
header.writeUInt8(0, 18); // Frag idx
header.writeUInt8(1, 19); // Frag cnt
header.writeUInt16LE(0, 20); // Flags
const MAX_PAYLOAD = 1400;
const totalSize = chunk.length;
const packet = Buffer.concat([header, Buffer.from(frame)]);
// Use generic videoSeq for both? Or separate?
// Best to separate to avoid gap detection issues if one stream is idle.
// But for now, let's share for simplicity or use screenSeq if screen.
// Actually, let's use separate seq if possible, but I only have videoSeq.
// Let's use videoSeq for both for now, assuming the receiver tracks them separately or doesn't care about gaps across types.
// Better: Use a map or separate counters.
const seq = streamType === 'screen' ? this.screenSeq++ : this.videoSeq++;
this.udp.send(packet, SERVER_UDP_PORT, this.serverUdpHost, (err) => {
if (err) console.error('UDP Audio Send Error', err);
});
const fragmentCount = Math.ceil(totalSize / MAX_PAYLOAD);
for (let i = 0; i < fragmentCount; i++) {
const start = i * MAX_PAYLOAD;
const end = Math.min(start + MAX_PAYLOAD, totalSize);
const slice = chunk.slice(start, end);
// Header (22 bytes)
const header = Buffer.alloc(HEADER_SIZE);
header.writeUInt8(1, 0); // Version
const mType = streamType === 'screen' ? MediaType.Screen : MediaType.Video;
header.writeUInt8(mType, 1);
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(seq, 6);
header.writeBigUInt64LE(BigInt(timestamp), 10);
header.writeUInt16LE(i, 18); // Frag Idx (u16)
header.writeUInt16LE(fragmentCount, 20); // Frag Cnt (u16)
let flags = 0;
if (isKeyFrame) flags |= 1;
header.writeUInt16LE(flags, 22);
const packet = Buffer.concat([header, slice]);
// Enqueue for pacing
this.udpQueue.push(packet);
}
}
sendScreenFrame(frame: number[]) {
if (!this.udp || !this.userId) return;
sendEncodedAudioChunk(chunk: Uint8Array, timestamp: number) {
if (!this.udp) {
console.warn('[Network] UDP Socket not ready for Audio');
return;
}
const buffer = Buffer.from(frame);
const MAX_PAYLOAD = 1400;
const fragCount = Math.ceil(buffer.length / MAX_PAYLOAD);
const seq = this.screenSeq++;
const ts = BigInt(Date.now());
const totalSize = chunk.length;
const MAX_PAYLOAD = 1400; // Safe MTU
for (let i = 0; i < fragCount; i++) {
// PCM packets (approx 2KB) need fragmentation.
// We use the same logic as video but with Audio MediaType.
const fragmentCount = Math.ceil(totalSize / MAX_PAYLOAD);
// Log randomly to avoid spam but confirm activity
if (Math.random() < 0.05) console.log(`[Network] Sending Audio Chunk size=${totalSize} frags=${fragmentCount}`);
for (let i = 0; i < fragmentCount; i++) {
const start = i * MAX_PAYLOAD;
const end = Math.min(start + MAX_PAYLOAD, buffer.length);
const chunk = buffer.subarray(start, end);
const end = Math.min(start + MAX_PAYLOAD, totalSize);
const slice = chunk.slice(start, end);
const header = Buffer.alloc(HEADER_SIZE);
header.writeUInt8(1, 0); // Version
header.writeUInt8(MediaType.Screen, 1);
header.writeUInt8(MediaType.Audio, 1);
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(seq, 6);
header.writeBigUInt64LE(ts, 10);
header.writeUInt8(i, 18); // Frag idx
header.writeUInt8(fragCount, 19); // Frag cnt
header.writeUInt16LE(0, 20); // Flags
header.writeUInt32LE(this.audioSeq, 6); // Same seq for all fragments
header.writeBigUInt64LE(BigInt(Math.floor(timestamp)), 10);
header.writeUInt16LE(i, 18); // Frag idx
header.writeUInt16LE(fragmentCount, 20); // Frag cnt
header.writeUInt16LE(1, 22); // Flags (1=Keyframe, audio is always key)
const packet = Buffer.concat([header, chunk]);
this.udp.send(packet, SERVER_UDP_PORT, this.serverUdpHost, (err) => {
if (err) console.error('UDP Screen Send Error', err);
});
const packet = Buffer.concat([header, Buffer.from(slice)]);
this.udpQueue.push(packet);
}
this.audioSeq++;
}
startHeartbeat() {
@ -327,19 +418,23 @@ export class NetworkManager extends EventEmitter {
header.writeUInt32LE(this.userId, 2);
header.writeUInt32LE(0, 6); // Sequence
header.writeBigUInt64LE(BigInt(Date.now()), 10);
header.writeUInt8(0, 18); // Frag idx
header.writeUInt8(1, 19); // Frag cnt
header.writeUInt16LE(0, 20); // Flags
header.writeUInt16LE(0, 18); // Frag idx
header.writeUInt16LE(1, 20); // Frag cnt
header.writeUInt16LE(0, 22); // Flags
const packet = Buffer.concat([header, payload]);
console.log(`[UDP] Sending Handshake: userId=${this.userId}, room=${this.roomCode}, ${packet.length} bytes to ${this.serverUdpHost}:${SERVER_UDP_PORT}`);
// console.log(`[UDP] Sending Handshake: userId=${this.userId}, room=${this.roomCode}, ${packet.length} bytes to ${this.serverUdpHost}:${SERVER_UDP_PORT}`);
this.udp.send(packet, SERVER_UDP_PORT, this.serverUdpHost, (err) => {
if (err) console.error('UDP Handshake Send Error', err);
});
}
disconnect() {
if (this.pacerInterval) {
clearInterval(this.pacerInterval);
this.pacerInterval = null;
}
if (this.heartbeatInterval) {
clearInterval(this.heartbeatInterval);
this.heartbeatInterval = null;