diff options
author | metamuffin <metamuffin@disroot.org> | 2023-11-18 02:08:41 +0100 |
---|---|---|
committer | metamuffin <metamuffin@disroot.org> | 2023-11-18 02:08:41 +0100 |
commit | ebd59fb09a4e094701f195d86662e1a9d00fed2b (patch) | |
tree | 2e24901b15493612c73ede8fcb4cacde49e9692e | |
parent | 3deb911083605ad5b63a0ecd372e4ae437c11b4a (diff) | |
download | video-codec-experiments-ebd59fb09a4e094701f195d86662e1a9d00fed2b.tar video-codec-experiments-ebd59fb09a4e094701f195d86662e1a9d00fed2b.tar.bz2 video-codec-experiments-ebd59fb09a4e094701f195d86662e1a9d00fed2b.tar.zst |
rudimentary mcomp
-rw-r--r-- | flowy/src/main.rs | 16 | ||||
-rw-r--r-- | flowy/src/motion/debug.rs | 1 | ||||
-rw-r--r-- | flowy/src/motion/debug.wgsl | 0 | ||||
-rw-r--r-- | flowy/src/motion/dec.rs | 156 | ||||
-rw-r--r-- | flowy/src/motion/dec.wgsl | 30 | ||||
-rw-r--r-- | flowy/src/motion/enc.rs | 11 | ||||
-rw-r--r-- | flowy/src/motion/enc.wgsl | 50 | ||||
-rw-r--r-- | flowy/src/motion/mod.rs | 4 |
8 files changed, 231 insertions, 37 deletions
diff --git a/flowy/src/main.rs b/flowy/src/main.rs index 7755ab8..557417f 100644 --- a/flowy/src/main.rs +++ b/flowy/src/main.rs @@ -1,6 +1,6 @@ pub mod motion; -use motion::{enc::MotionEncoder, CommonBuffers, Params}; +use motion::{dec::MotionDecoder, enc::MotionEncoder, CommonBuffers, Params}; use pollster::FutureExt; use std::io::{stdin, stdout, Read, Write}; use wgpu::{ @@ -46,6 +46,8 @@ fn main() { height: height as u32, depth_or_array_layers: 1, }, + blocks_x: width / 8, + blocks_y: height / 8, block_width: 8, block_height: 8, blocks: (width / 8) * (height / 8), @@ -53,12 +55,16 @@ fn main() { let bufs = CommonBuffers::create(&device, ¶ms); let menc = MotionEncoder::create(&device, ¶ms, &bufs); + let mdec = MotionDecoder::create(&device, ¶ms, &bufs); let mut buffer = vec![0u8; width * height * 4]; + menc.write_uniforms(&queue); + mdec.write_uniforms(&queue); + let mut i = 0; loop { - let rp = RoundParams { swap: i }; + let rp = RoundParams { swap: i % 2 }; eprintln!("read"); stdin().read_exact(&mut buffer).unwrap(); @@ -69,7 +75,10 @@ fn main() { let mut encoder = device.create_command_encoder(&Default::default()); - menc.pass(&mut encoder, ¶ms, &rp); + if i > 1 { + menc.pass(&mut encoder, ¶ms, &rp); + mdec.pass(&mut encoder, ¶ms, &rp); + } bufs.prepare_texture_download(&mut encoder, ¶ms, &rp); queue.submit(Some(encoder.finish())); @@ -81,6 +90,5 @@ fn main() { eprintln!("write"); stdout().write_all(&buffer).unwrap(); i += 1; - i %= 2; } } diff --git a/flowy/src/motion/debug.rs b/flowy/src/motion/debug.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/flowy/src/motion/debug.rs @@ -0,0 +1 @@ + diff --git a/flowy/src/motion/debug.wgsl b/flowy/src/motion/debug.wgsl new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/flowy/src/motion/debug.wgsl diff --git a/flowy/src/motion/dec.rs b/flowy/src/motion/dec.rs index 84aeff7..9d15259 100644 --- a/flowy/src/motion/dec.rs +++ b/flowy/src/motion/dec.rs @@ -1,4 +1,158 @@ -pub struct MotionDecoder {} +use super::{CommonBuffers, Params, RoundParams}; +use bytemuck::{Pod, Zeroable}; +use std::mem::size_of; +use wgpu::{ + include_wgsl, BindGroup, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, + BindGroupLayoutEntry, BindingType, Buffer, BufferDescriptor, BufferUsages, CommandEncoder, + ComputePipeline, ComputePipelineDescriptor, Device, PipelineLayoutDescriptor, Queue, + ShaderStages, TextureSampleType, TextureViewDimension, +}; + +pub struct MotionDecoder { + pipeline: ComputePipeline, + bind_groups: [BindGroup; 2], + + uniform_buffer: Buffer, + uniform: DecoderUniform, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pod, Zeroable, Default)] +pub struct DecoderUniform { + block_size: [i32; 2], + output_stride: i32, + _pad: u32, +} + +impl MotionDecoder { + pub fn create(device: &Device, params: &Params, bufs: &CommonBuffers) -> Self { + let uniform_buffer = device.create_buffer(&BufferDescriptor { + label: Some("encoder uniforms"), + size: size_of::<DecoderUniform>() as u64, + usage: BufferUsages::UNIFORM | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let uniform = DecoderUniform { + block_size: [8, 8], + output_stride: (params.width / params.block_width) as i32, + ..Default::default() + }; + + let bind_group_layout = device.create_bind_group_layout(&BindGroupLayoutDescriptor { + label: None, + entries: &[ + BindGroupLayoutEntry { + binding: 0, + count: None, + ty: BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 1, + count: None, + ty: BindingType::Buffer { + has_dynamic_offset: false, + min_binding_size: None, + ty: wgpu::BufferBindingType::Storage { read_only: true }, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 2, + count: None, + ty: BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::WriteOnly, + format: wgpu::TextureFormat::Bgra8Unorm, + view_dimension: TextureViewDimension::D2, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 3, + count: None, + ty: BindingType::Texture { + sample_type: TextureSampleType::Float { filterable: false }, + view_dimension: TextureViewDimension::D2, + multisampled: false, + }, + visibility: ShaderStages::COMPUTE, + }, + ], + }); + + let bind_groups = [0, 1].map(|i| { + device.create_bind_group(&BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[ + BindGroupEntry { + binding: 0, + resource: uniform_buffer.as_entire_binding(), + }, + BindGroupEntry { + binding: 1, + resource: bufs.offsets.as_entire_binding(), + }, + BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::TextureView( + &bufs.textures[i].create_view(&Default::default()), + ), + }, + BindGroupEntry { + binding: 3, + resource: wgpu::BindingResource::TextureView( + &bufs.textures[1 - i].create_view(&Default::default()), + ), + }, + ], + }) + }); + + let pipeline_layout = device.create_pipeline_layout(&PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let module = device.create_shader_module(include_wgsl!("dec.wgsl")); + let pipeline = device.create_compute_pipeline(&ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &module, + entry_point: "main", + }); + + Self { + bind_groups, + uniform, + uniform_buffer, + pipeline, + } + } + + pub fn write_uniforms(&self, queue: &Queue) { + queue.write_buffer( + &self.uniform_buffer, + 0, + bytemuck::cast_slice(&[self.uniform]), + ) + } + + pub fn pass(&self, encoder: &mut CommandEncoder, params: &Params, rp: &RoundParams) { + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + cpass.set_pipeline(&self.pipeline); + cpass.set_bind_group(0, &self.bind_groups[rp.swap], &[]); + cpass.dispatch_workgroups(params.blocks_x as u32, params.blocks_y as u32, 1); + } +} /* BindGroupLayoutEntry { diff --git a/flowy/src/motion/dec.wgsl b/flowy/src/motion/dec.wgsl new file mode 100644 index 0000000..982a1be --- /dev/null +++ b/flowy/src/motion/dec.wgsl @@ -0,0 +1,30 @@ + +struct Params { + block_size: vec2<i32>, + offsets_stride: u32 +} + +struct BlockOffset { + score: f32, + offset: vec2<i32>, +} + +@group(0) @binding(0) var<uniform> params: Params; +@group(0) @binding(1) var<storage, read> offsets: array<BlockOffset>; +@group(0) @binding(2) var next: texture_storage_2d<bgra8unorm, write>; +@group(0) @binding(3) var prev: texture_2d<f32>; + +@compute @workgroup_size(1) fn main(@builtin(global_invocation_id) global_id: vec3<u32>) { + let uv = vec2<i32>(global_id.xy) * params.block_size; + + let bl = offsets[global_id.x + global_id.y * params.offsets_stride]; + + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv+vec2(x,y); + let col = textureLoad(prev, base+bl.offset, 0); + textureStore(next, base, col); + }} +} + + diff --git a/flowy/src/motion/enc.rs b/flowy/src/motion/enc.rs index aa45d7b..76a2c7f 100644 --- a/flowy/src/motion/enc.rs +++ b/flowy/src/motion/enc.rs @@ -1,12 +1,11 @@ -use std::mem::size_of; - use super::{CommonBuffers, Params, RoundParams}; use bytemuck::{Pod, Zeroable}; +use std::mem::size_of; use wgpu::{ include_wgsl, BindGroup, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry, BindingType, Buffer, BufferDescriptor, BufferUsages, CommandEncoder, ComputePipeline, ComputePipelineDescriptor, Device, PipelineLayoutDescriptor, Queue, - ShaderStages, TextureFormat, TextureSampleType, TextureViewDimension, + ShaderStages, TextureSampleType, TextureViewDimension, }; pub struct MotionEncoder { @@ -101,7 +100,7 @@ impl MotionEncoder { BindGroupEntry { binding: 2, resource: wgpu::BindingResource::TextureView( - &bufs.textures[1 - i].create_view(&Default::default()), + &bufs.textures[i].create_view(&Default::default()), ), }, BindGroupEntry { @@ -136,7 +135,7 @@ impl MotionEncoder { } } - pub fn write_uniforms(&self, queue: Queue) { + pub fn write_uniforms(&self, queue: &Queue) { queue.write_buffer( &self.uniform_buffer, 0, @@ -151,6 +150,6 @@ impl MotionEncoder { }); cpass.set_pipeline(&self.pipeline); cpass.set_bind_group(0, &self.bind_groups[rp.swap], &[]); - cpass.dispatch_workgroups(params.extent.width, params.extent.height, 1); + cpass.dispatch_workgroups(params.blocks_x as u32, params.blocks_y as u32, 1); } } diff --git a/flowy/src/motion/enc.wgsl b/flowy/src/motion/enc.wgsl index cc185e9..bb0139f 100644 --- a/flowy/src/motion/enc.wgsl +++ b/flowy/src/motion/enc.wgsl @@ -1,7 +1,7 @@ struct Params { block_size: vec2<i32>, - output_stride: i32 + offsets_stride: u32 } struct BlockOffset { @@ -10,37 +10,35 @@ struct BlockOffset { } @group(0) @binding(0) var<uniform> params: Params; -@group(0) @binding(1) var<storage, read_write> output: BlockOffset; -@group(0) @binding(2) var prev: texture_2d<f32>; -@group(0) @binding(3) var next: texture_2d<f32>; +@group(0) @binding(1) var<storage, read_write> offsets: array<BlockOffset>; +@group(0) @binding(2) var next: texture_2d<f32>; +@group(0) @binding(3) var prev: texture_2d<f32>; @compute @workgroup_size(1) fn main(@builtin(global_invocation_id) global_id: vec3<u32>) { let uv = vec2<i32>(global_id.xy) * params.block_size; - // output[global_id.x + global_id.y * params.output_stride] = BlockOffset(0., uv); - - // let orig = textureLoad(texa, uv, 0); + var best_err = 100000000.; + var best_offset = vec2(0,0); + for (var ox = -5; ox <= 5; ox++) { + for (var oy = -5; oy <= 5; oy++) { + let offset = vec2(ox,oy); - // var best_sim = 1000.; - // var best_coord = vec2(0,0); - // for (var x = -SDIST; x <= SDIST; x++) { - // for (var y = -SDIST; y <= SDIST; y++) { - // let ov = uv + vec2(x,y); - // let samp = textureLoad(texb, ov, 0); - // let d = distance(samp.rgb, orig.rgb) - // + distance(sampnn.rgb, orignn.rgb) - // + distance(sampnp.rgb, orignp.rgb) - // + distance(samppn.rgb, origpn.rgb) - // + distance(samppp.rgb, origpp.rgb); - // if d < best_sim { - // best_sim = d; - // best_coord = vec2(x,y); - // } - // }} + var err = 0.; + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv+vec2(x,y); + let pcol = textureLoad(prev, base+offset, 0).rgb; + let ncol = textureLoad(next, base, 0).rgb; + err += distance(pcol, ncol); + }} + + if err < best_err { + best_err = err; + best_offset = offset; + } + }} - // let col = vec4(colormap_vec(vec2<f32>(best_coord) / f32(SDIST)), 1.); - - // textureStore(output, global_id.xy, col); + offsets[global_id.x + global_id.y * params.offsets_stride] = BlockOffset(best_err, best_offset); } // fn colormap_vec(v: vec2<f32>) -> vec3<f32> { diff --git a/flowy/src/motion/mod.rs b/flowy/src/motion/mod.rs index acb1396..558b8d1 100644 --- a/flowy/src/motion/mod.rs +++ b/flowy/src/motion/mod.rs @@ -1,3 +1,4 @@ +pub mod debug; pub mod dec; pub mod enc; @@ -13,6 +14,8 @@ pub struct Params { pub extent: Extent3d, pub block_width: usize, pub block_height: usize, + pub blocks_x: usize, + pub blocks_y: usize, pub blocks: usize, } @@ -46,6 +49,7 @@ impl CommonBuffers { dimension: TextureDimension::D2, format: TextureFormat::Bgra8Unorm, usage: TextureUsages::TEXTURE_BINDING + | TextureUsages::STORAGE_BINDING | TextureUsages::COPY_DST | TextureUsages::COPY_SRC, view_formats: &[], |