diff options
author | metamuffin <metamuffin@disroot.org> | 2023-11-18 02:39:47 +0100 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2023-11-18 02:39:47 +0100 |
commit | 28af3138598e5c5f7e4d7c70218c26f4d2f46208 (patch) | |
tree | 83a8e1559f4798b2f33cc59e35cd5d1931be2683 | |
parent | ebd59fb09a4e094701f195d86662e1a9d00fed2b (diff) | |
download | video-codec-experiments-28af3138598e5c5f7e4d7c70218c26f4d2f46208.tar video-codec-experiments-28af3138598e5c5f7e4d7c70218c26f4d2f46208.tar.bz2 video-codec-experiments-28af3138598e5c5f7e4d7c70218c26f4d2f46208.tar.zst |
works very well
-rw-r--r-- | flowy/src/main.rs | 24 | ||||
-rw-r--r-- | flowy/src/motion/dec.rs | 2 | ||||
-rw-r--r-- | flowy/src/motion/dec.wgsl | 3 | ||||
-rw-r--r-- | flowy/src/motion/enc.rs | 10 | ||||
-rw-r--r-- | flowy/src/motion/enc.wgsl | 53 | ||||
-rw-r--r-- | flowy/src/motion/mod.rs | 1 |
6 files changed, 72 insertions, 21 deletions
diff --git a/flowy/src/main.rs b/flowy/src/main.rs index 557417f..1f84e3b 100644 --- a/flowy/src/main.rs +++ b/flowy/src/main.rs @@ -1,5 +1,6 @@ pub mod motion; +use log::debug; use motion::{dec::MotionDecoder, enc::MotionEncoder, CommonBuffers, Params}; use pollster::FutureExt; use std::io::{stdin, stdout, Read, Write}; @@ -11,6 +12,8 @@ use wgpu::{ use crate::motion::RoundParams; fn main() { + env_logger::init_from_env("LOG"); + let instance = Instance::new(InstanceDescriptor { backends: Backends::all(), ..Default::default() @@ -38,6 +41,7 @@ fn main() { .unwrap(); let (width, height) = (1920, 1080); + let bsize = 8; let params = Params { width, height, @@ -46,11 +50,11 @@ fn main() { height: height as u32, depth_or_array_layers: 1, }, - blocks_x: width / 8, - blocks_y: height / 8, - block_width: 8, - block_height: 8, - blocks: (width / 8) * (height / 8), + blocks_x: width / bsize, + blocks_y: height / bsize, + block_width: bsize, + block_height: bsize, + blocks: (width / bsize) * (height / bsize), }; let bufs = CommonBuffers::create(&device, ¶ms); @@ -65,13 +69,13 @@ fn main() { let mut i = 0; loop { let rp = RoundParams { swap: i % 2 }; - eprintln!("read"); + debug!("read"); stdin().read_exact(&mut buffer).unwrap(); - eprintln!("upload"); + debug!("upload"); bufs.upload(&queue, ¶ms, &rp, &buffer); - eprintln!("compute"); + debug!("compute"); let mut encoder = device.create_command_encoder(&Default::default()); @@ -84,10 +88,10 @@ fn main() { queue.submit(Some(encoder.finish())); device.poll(MaintainBase::Wait); - eprintln!("download"); + debug!("download"); bufs.download(&device, &mut buffer); - eprintln!("write"); + debug!("write"); stdout().write_all(&buffer).unwrap(); i += 1; } diff --git a/flowy/src/motion/dec.rs b/flowy/src/motion/dec.rs index 9d15259..518fbd2 100644 --- a/flowy/src/motion/dec.rs +++ b/flowy/src/motion/dec.rs @@ -33,7 +33,7 @@ impl MotionDecoder { mapped_at_creation: false, }); let uniform = DecoderUniform { - block_size: [8, 8], + block_size: [params.block_width as i32, params.block_height as i32], output_stride: (params.width / params.block_width) as i32, ..Default::default() }; diff --git a/flowy/src/motion/dec.wgsl b/flowy/src/motion/dec.wgsl index 982a1be..2fa3f3b 100644 --- a/flowy/src/motion/dec.wgsl +++ b/flowy/src/motion/dec.wgsl @@ -7,6 +7,7 @@ struct Params { struct BlockOffset { score: f32, offset: vec2<i32>, + tint: vec3<f32>, } @group(0) @binding(0) var<uniform> params: Params; @@ -22,7 +23,7 @@ struct BlockOffset { for (var x = 0; x < params.block_size.x; x++) { for (var y = 0; y < params.block_size.y; y++) { let base = uv+vec2(x,y); - let col = textureLoad(prev, base+bl.offset, 0); + let col = textureLoad(prev, base+bl.offset, 0)+vec4(bl.tint,1.); textureStore(next, base, col); }} } diff --git a/flowy/src/motion/enc.rs b/flowy/src/motion/enc.rs index 76a2c7f..cb672a7 100644 --- a/flowy/src/motion/enc.rs +++ b/flowy/src/motion/enc.rs @@ -20,7 +20,9 @@ pub struct MotionEncoder { #[derive(Debug, Clone, Copy, Pod, Zeroable, Default)] pub struct EncoderUniform { block_size: [i32; 2], - output_stride: i32, + output_stride: u32, + search_radius: i32, + skip_threshold: f32, _pad: u32, } @@ -33,8 +35,10 @@ impl MotionEncoder { mapped_at_creation: false, }); let uniform = EncoderUniform { - block_size: [8, 8], - output_stride: (params.width / params.block_width) as i32, + block_size: [params.block_width as i32, params.block_height as i32], + output_stride: (params.width / params.block_width) as u32, + search_radius: 16, + skip_threshold: 0.04, ..Default::default() }; diff --git a/flowy/src/motion/enc.wgsl b/flowy/src/motion/enc.wgsl index bb0139f..b94bac9 100644 --- a/flowy/src/motion/enc.wgsl +++ b/flowy/src/motion/enc.wgsl @@ -1,12 +1,15 @@ struct Params { block_size: vec2<i32>, - offsets_stride: u32 + offsets_stride: u32, + search_radius: i32, + skip_threshold: f32, } struct BlockOffset { score: f32, offset: vec2<i32>, + tint: vec3<f32>, } @group(0) @binding(0) var<uniform> params: Params; @@ -17,10 +20,46 @@ struct BlockOffset { @compute @workgroup_size(1) fn main(@builtin(global_invocation_id) global_id: vec3<u32>) { let uv = vec2<i32>(global_id.xy) * params.block_size; + let _f = search_offset(uv); + let best_err = _f.error; + let best_offset = _f.offset; + + var best_tint = vec3(0.); + + var average_pcol = vec3(0.); + var average_ncol = vec3(0.); + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv+vec2(x,y); + average_pcol += textureLoad(prev, base+best_offset, 0).rgb; + average_ncol += textureLoad(next, base, 0).rgb; + }} + + let tint = (average_ncol - average_pcol) / f32(params.block_size.x * params.block_size.y); + + var err = 0.; + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv+vec2(x,y); + let pcol = textureLoad(prev, base+best_offset, 0).rgb+tint; + let ncol = textureLoad(next, base, 0).rgb; + err += distance(pcol, ncol); + }} + if err < best_err { + best_tint = tint; + } + + offsets[global_id.x + global_id.y * params.offsets_stride] = BlockOffset(best_err, best_offset, best_tint); +} + +struct SearchRes {offset: vec2<i32>, error: f32} + +fn search_offset(uv: vec2<i32>) -> SearchRes { var best_err = 100000000.; - var best_offset = vec2(0,0); - for (var ox = -5; ox <= 5; ox++) { - for (var oy = -5; oy <= 5; oy++) { + var best_offset = vec2(0); + // TODO: better ordering + for (var ox = -params.search_radius; ox <= params.search_radius; ox++) { + for (var oy = -params.search_radius; oy <= params.search_radius; oy++) { let offset = vec2(ox,oy); var err = 0.; @@ -35,10 +74,12 @@ struct BlockOffset { if err < best_err { best_err = err; best_offset = offset; + if err < params.skip_threshold { + return SearchRes(offset, err); + } } }} - - offsets[global_id.x + global_id.y * params.offsets_stride] = BlockOffset(best_err, best_offset); + return SearchRes(best_offset,best_err); } // fn colormap_vec(v: vec2<f32>) -> vec3<f32> { diff --git a/flowy/src/motion/mod.rs b/flowy/src/motion/mod.rs index 558b8d1..dfdf7a6 100644 --- a/flowy/src/motion/mod.rs +++ b/flowy/src/motion/mod.rs @@ -36,6 +36,7 @@ pub struct BlockOffset { score: f32, _pad: u32, offset: [f32; 2], + tint: [f32; 3], } impl CommonBuffers { |