diff options
author | metamuffin <metamuffin@disroot.org> | 2025-01-16 12:23:05 +0100 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2025-01-16 12:23:05 +0100 |
commit | 44220f3ff1015c056730c8ee4c95cb55a0759abc (patch) | |
tree | 6a003ab2f58ffae508c802ecdbfb8d063c40ccc4 /client | |
parent | d465b5e9edfa3147af900414a11484b3feb50337 (diff) | |
download | weareserver-44220f3ff1015c056730c8ee4c95cb55a0759abc.tar weareserver-44220f3ff1015c056730c8ee4c95cb55a0759abc.tar.bz2 weareserver-44220f3ff1015c056730c8ee4c95cb55a0759abc.tar.zst |
rnnoise and vad
Diffstat (limited to 'client')
-rw-r--r-- | client/Cargo.toml | 1 | ||||
-rw-r--r-- | client/src/audio.rs | 76 |
2 files changed, 68 insertions, 9 deletions
diff --git a/client/Cargo.toml b/client/Cargo.toml index 77149b2..c98374a 100644 --- a/client/Cargo.toml +++ b/client/Cargo.toml @@ -21,3 +21,4 @@ egui-wgpu = "0.30.0" egui = { version = "0.30.0", features = ["bytemuck"] } bytemuck = "1.21.0" xdg = "2.5.2" +nnnoiseless = "0.5.1" diff --git a/client/src/audio.rs b/client/src/audio.rs index 5751c0e..f93466d 100644 --- a/client/src/audio.rs +++ b/client/src/audio.rs @@ -17,6 +17,7 @@ use std::{ collections::{HashMap, VecDeque}, sync::mpsc::{Receiver, SyncSender, sync_channel}, + time::Instant, }; use anyhow::{Result, anyhow}; @@ -29,7 +30,8 @@ use cpal::{ traits::{DeviceTrait, HostTrait}, }; use glam::Vec3; -use log::warn; +use log::{debug, info, warn}; +use nnnoiseless::{DenoiseState, RnnModel}; pub struct Audio { _instream: Stream, @@ -105,38 +107,86 @@ pub struct APlayPacket { data: Vec<u8>, } +const AE_FRAME_SIZE: usize = 480; + pub struct AEncoder { encoder: Encoder, sender: SyncSender<Vec<u8>>, buffer: VecDeque<f32>, + noise_rnn: DenoiseState<'static>, + trigger: VadTrigger, +} + +struct VadTrigger { + last_sig: Instant, + transmitting: bool, } + impl AEncoder { pub fn new() -> Result<(Self, Receiver<Vec<u8>>)> { let (sender, rx) = sync_channel(1024); Ok(( Self { + noise_rnn: *DenoiseState::from_model(RnnModel::default()), encoder: Encoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip)?, sender, buffer: VecDeque::new(), + trigger: VadTrigger { + last_sig: Instant::now(), + transmitting: false, + }, }, rx, )) } pub fn data(&mut self, samples: &[f32]) -> Result<()> { self.buffer.extend(samples); - while self.buffer.len() >= 120 { - let mut out = [0u8; 120]; - let mut inp = [0f32; 120]; - for i in 0..120 { - inp[i] = self.buffer.pop_front().unwrap(); + while self.buffer.len() >= AE_FRAME_SIZE { + let mut out = [0u8; AE_FRAME_SIZE]; + let mut denoise = [0f32; AE_FRAME_SIZE]; + let mut raw = [0f32; AE_FRAME_SIZE]; + for i in 0..AE_FRAME_SIZE { + raw[i] = self.buffer.pop_front().unwrap() * 32768.0; + } + self.noise_rnn.process_frame(&mut denoise, &raw); + for e in &mut denoise { + *e /= 32768.0; + } + let energy = measure_energy(&denoise); + let (tx, end_tx) = self.trigger.update(energy); + if tx { + let size = self.encoder.encode_float(&denoise, &mut out)?; + let _ = self.sender.try_send(out[..size].to_vec()); + } + if end_tx { + // TODO send end frame } - let size = self.encoder.encode_float(&inp, &mut out)?; - let _ = self.sender.try_send(out[..size].to_vec()); } Ok(()) } } +impl VadTrigger { + pub fn update(&mut self, energy: f32) -> (bool, bool) { + debug!("E={energy:.02}"); + let now = Instant::now(); + if energy > 1. { + self.last_sig = now; + } + let last_sig_elapsed = (now - self.last_sig).as_secs_f32(); + let prev_transmitting = self.transmitting; + self.transmitting = last_sig_elapsed < 0.5; + + match (prev_transmitting, self.transmitting) { + (false, true) => info!("start transmit"), + (true, false) => info!("end transmit"), + _ => (), + } + + (self.transmitting, prev_transmitting && !self.transmitting) + } +} + const BUFFER_SIZE: usize = 48_000; pub struct ADecoder { decoder: Decoder, @@ -162,7 +212,7 @@ impl ADecoder { pub fn data(&mut self, samples: &mut [f32]) -> Result<()> { while self.buffer.len() < samples.len() { if let Ok(p) = self.receiver.try_recv() { - let mut output = [0f32; 120]; + let mut output = [0f32; AE_FRAME_SIZE]; let size = self.decoder.decode_float( Some(p.data.as_slice()), output.as_mut_slice(), @@ -191,3 +241,11 @@ impl ADecoder { Ok(()) } } + +fn measure_energy(samples: &[f32]) -> f32 { + let mut e = 0.; + for s in samples { + e += *s * *s; + } + e +} |