diff options
Diffstat (limited to 'old/flowy/src')
-rw-r--r-- | old/flowy/src/main.rs | 132 | ||||
-rw-r--r-- | old/flowy/src/motion/debug.rs | 176 | ||||
-rw-r--r-- | old/flowy/src/motion/debug.wgsl | 50 | ||||
-rw-r--r-- | old/flowy/src/motion/dec.rs | 156 | ||||
-rw-r--r-- | old/flowy/src/motion/dec.wgsl | 31 | ||||
-rw-r--r-- | old/flowy/src/motion/enc-old.wgsl | 88 | ||||
-rw-r--r-- | old/flowy/src/motion/enc.rs | 160 | ||||
-rw-r--r-- | old/flowy/src/motion/enc.wgsl | 121 | ||||
-rw-r--r-- | old/flowy/src/motion/mod.rs | 203 |
9 files changed, 1117 insertions, 0 deletions
diff --git a/old/flowy/src/main.rs b/old/flowy/src/main.rs new file mode 100644 index 0000000..535de7e --- /dev/null +++ b/old/flowy/src/main.rs @@ -0,0 +1,132 @@ +pub mod motion; + +use framework::{CodecMode, Framework}; +use log::{debug, info}; +use motion::{dec::MotionDecoder, enc::MotionEncoder, CommonBuffers, Params}; +use pollster::FutureExt; +use std::{ + io::{stdin, stdout, ErrorKind, Read, Write}, + process::exit, + time::Instant, +}; +use wgpu::{ + Backends, DeviceDescriptor, Extent3d, Features, Instance, InstanceDescriptor, Limits, + MaintainBase, PowerPreference, RequestAdapterOptions, +}; + +use crate::motion::{debug::MotionDebugger, RoundParams}; + +fn main() { + env_logger::init_from_env("LOG"); + info!("allocating some stuff"); + let t = Instant::now(); + + let instance = Instance::new(InstanceDescriptor { + backends: Backends::all(), + ..Default::default() + }); + + let adapter = instance + .request_adapter(&RequestAdapterOptions { + power_preference: PowerPreference::HighPerformance, + force_fallback_adapter: false, + compatible_surface: None, + }) + .block_on() + .unwrap(); + + let (device, queue) = adapter + .request_device( + &DeviceDescriptor { + label: None, + required_features: Features::empty(), + required_limits: Limits::default(), + }, + None, + ) + .block_on() + .unwrap(); + + let (mut framework, fparams) = Framework::init(); + + let (width, height) = (fparams.width, fparams.height); + let bsize = 8; + let params = Params { + width, + height, + extent: Extent3d { + width: width as u32, + height: height as u32, + depth_or_array_layers: 1, + }, + blocks_x: width / bsize, + blocks_y: height / bsize, + block_width: bsize, + block_height: bsize, + blocks: (width / bsize) * (height / bsize), + init_debug: true, + }; + + let bufs = CommonBuffers::create(&device, ¶ms); + let menc = MotionEncoder::create(&device, ¶ms, &bufs); + let mdec = MotionDecoder::create(&device, ¶ms, &bufs); + let mdeb = MotionDebugger::create(&device, ¶ms, &bufs); + + let mut buffer = vec![0u8; width * height * 4]; + + menc.write_uniforms(&queue); + mdec.write_uniforms(&queue); + mdeb.write_uniforms(&queue); + + info!("ready (setup took {:?})", t.elapsed()); + + let mut i = 0; + loop { + let rp = RoundParams { + swap: i % 2, + debug: fparams.debug == 2, + preview: fparams.debug > 0, + }; + debug!("{params:?} {rp:?}"); + debug!("read"); + match stdin().read_exact(&mut buffer) { + Ok(_) => (), + Err(e) if e.kind() == ErrorKind::UnexpectedEof => { + break; + } + Err(e) => Err(e).unwrap(), + } + + framework.next_frame_manual(); + + debug!("upload"); + bufs.upload_texture(&queue, ¶ms, &rp, &buffer); + + debug!("compute"); + let mut encoder = device.create_command_encoder(&Default::default()); + + if let CodecMode::Encode = fparams.mode { + menc.pass(&mut encoder, ¶ms, &rp); + } + mdec.pass(&mut encoder, ¶ms, &rp); + if rp.debug { + mdeb.pass(&mut encoder, ¶ms, &rp); + } + if rp.preview { + bufs.prepare_texture_download(&mut encoder, ¶ms, &rp); + } + + queue.submit(Some(encoder.finish())); + device.poll(MaintainBase::Wait); + + debug!("download"); + bufs.download_texture(&device, &mut buffer); + + framework.encode_done(&[]); + debug!("write"); + stdout().write_all(&buffer).unwrap(); + i += 1; + } + eprintln!("done"); + exit(0); +} diff --git a/old/flowy/src/motion/debug.rs b/old/flowy/src/motion/debug.rs new file mode 100644 index 0000000..5e5f4ee --- /dev/null +++ b/old/flowy/src/motion/debug.rs @@ -0,0 +1,176 @@ +use super::{CommonBuffers, Params, RoundParams}; +use bytemuck::{Pod, Zeroable}; +use std::mem::size_of; +use wgpu::{ + include_wgsl, BindGroup, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, + BindGroupLayoutEntry, BindingType, Buffer, BufferDescriptor, BufferUsages, CommandEncoder, + ComputePipeline, ComputePipelineDescriptor, Device, PipelineCompilationOptions, + PipelineLayoutDescriptor, Queue, ShaderStages, TextureSampleType, TextureViewDimension, +}; + +pub struct MotionDebugger { + pipeline: ComputePipeline, + bind_groups: [BindGroup; 2], + + uniform_buffer: Buffer, + uniform: DebuggerUniform, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pod, Zeroable, Default)] +pub struct DebuggerUniform { + block_size: [i32; 2], + output_stride: i32, + _pad: u32, +} + +impl MotionDebugger { + pub fn create(device: &Device, params: &Params, bufs: &CommonBuffers) -> Self { + let uniform_buffer = device.create_buffer(&BufferDescriptor { + label: Some("encoder uniforms"), + size: size_of::<DebuggerUniform>() as u64, + usage: BufferUsages::UNIFORM | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let uniform = DebuggerUniform { + block_size: [params.block_width as i32, params.block_height as i32], + output_stride: (params.width / params.block_width) as i32, + ..Default::default() + }; + + let bind_group_layout = device.create_bind_group_layout(&BindGroupLayoutDescriptor { + label: None, + entries: &[ + BindGroupLayoutEntry { + binding: 0, + count: None, + ty: BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 1, + count: None, + ty: BindingType::Buffer { + has_dynamic_offset: false, + min_binding_size: None, + ty: wgpu::BufferBindingType::Storage { read_only: true }, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 2, + count: None, + ty: BindingType::Texture { + sample_type: TextureSampleType::Float { filterable: false }, + view_dimension: TextureViewDimension::D2, + multisampled: false, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 3, + count: None, + ty: BindingType::Texture { + sample_type: TextureSampleType::Float { filterable: false }, + view_dimension: TextureViewDimension::D2, + multisampled: false, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 4, + count: None, + ty: BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::WriteOnly, + format: wgpu::TextureFormat::Rgba8Unorm, + view_dimension: TextureViewDimension::D2, + }, + visibility: ShaderStages::COMPUTE, + }, + ], + }); + + let bind_groups = [0, 1].map(|i| { + device.create_bind_group(&BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[ + BindGroupEntry { + binding: 0, + resource: uniform_buffer.as_entire_binding(), + }, + BindGroupEntry { + binding: 1, + resource: bufs.offsets.as_entire_binding(), + }, + BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::TextureView( + &bufs.textures[i].create_view(&Default::default()), + ), + }, + BindGroupEntry { + binding: 3, + resource: wgpu::BindingResource::TextureView( + &bufs.textures[1 - i].create_view(&Default::default()), + ), + }, + BindGroupEntry { + binding: 4, + resource: wgpu::BindingResource::TextureView( + &bufs + .debug_output + .as_ref() + .unwrap() + .create_view(&Default::default()), + ), + }, + ], + }) + }); + + let pipeline_layout = device.create_pipeline_layout(&PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let module = device.create_shader_module(include_wgsl!("debug.wgsl")); + let pipeline = device.create_compute_pipeline(&ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + module: &module, + entry_point: "main", + compilation_options: PipelineCompilationOptions::default(), + }); + + Self { + bind_groups, + uniform, + uniform_buffer, + pipeline, + } + } + + pub fn write_uniforms(&self, queue: &Queue) { + queue.write_buffer( + &self.uniform_buffer, + 0, + bytemuck::cast_slice(&[self.uniform]), + ) + } + + pub fn pass(&self, encoder: &mut CommandEncoder, params: &Params, rp: &RoundParams) { + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + cpass.set_pipeline(&self.pipeline); + cpass.set_bind_group(0, &self.bind_groups[rp.swap], &[]); + cpass.dispatch_workgroups(params.blocks_x as u32, params.blocks_y as u32, 1); + } +} diff --git a/old/flowy/src/motion/debug.wgsl b/old/flowy/src/motion/debug.wgsl new file mode 100644 index 0000000..d2c56a4 --- /dev/null +++ b/old/flowy/src/motion/debug.wgsl @@ -0,0 +1,50 @@ + + +struct Params { + block_size: vec2<i32>, + offsets_stride: u32 +} + +struct BlockOffset { + score: f32, + offset: vec2<i32>, + tint: vec3<f32>, +} + +@group(0) @binding(0) var<uniform> params: Params; +@group(0) @binding(1) var<storage, read> offsets: array<BlockOffset>; +@group(0) @binding(2) var next: texture_2d<f32>; +@group(0) @binding(3) var prev: texture_2d<f32>; +@group(0) @binding(4) var out: texture_storage_2d<rgba8unorm, write>; + +@compute @workgroup_size(1)fn main(@builtin(global_invocation_id) global_id: vec3<u32>) { + let uv = vec2<i32>(global_id.xy) * params.block_size; + + let bl = offsets[global_id.x + global_id.y * params.offsets_stride]; + + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv + vec2(x, y); + let col = vec4(colormap_vec(vec2<f32>(bl.offset) * 0.08),1.); + textureStore(out, base, col); + } + } + + // for (var x = 0; x < params.block_size.x; x++) { + // for (var y = 0; y < params.block_size.y; y++) { + // let base = uv + vec2(x, y); + // let col_dec = textureLoad(prev, base + bl.offset, 0) + vec4(bl.tint, 1.); + // let col_orig = textureLoad(next, base, 0); + + // let col = vec4( saturate((0.5) + (col_orig.rgb - col_dec.rgb) * 100.), 1.0); + + // textureStore(out, base, col); + // } + // } +} + + +fn colormap_vec(v: vec2<f32>) -> vec3<f32> { + return vec3(v.y, v.x - 0.5 * v.y, -v.x - 0.5 * v.y); +} + diff --git a/old/flowy/src/motion/dec.rs b/old/flowy/src/motion/dec.rs new file mode 100644 index 0000000..cc1812f --- /dev/null +++ b/old/flowy/src/motion/dec.rs @@ -0,0 +1,156 @@ +use super::{CommonBuffers, Params, RoundParams}; +use bytemuck::{Pod, Zeroable}; +use std::mem::size_of; +use wgpu::{ + include_wgsl, BindGroup, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, + BindGroupLayoutEntry, BindingType, Buffer, BufferDescriptor, BufferUsages, CommandEncoder, + ComputePipeline, ComputePipelineDescriptor, Device, PipelineCompilationOptions, + PipelineLayoutDescriptor, Queue, ShaderStages, TextureSampleType, TextureViewDimension, +}; + +pub struct MotionDecoder { + pipeline: ComputePipeline, + bind_groups: [BindGroup; 2], + + uniform_buffer: Buffer, + uniform: DecoderUniform, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pod, Zeroable, Default)] +pub struct DecoderUniform { + block_size: [i32; 2], + output_stride: i32, + _pad: u32, +} + +impl MotionDecoder { + pub fn create(device: &Device, params: &Params, bufs: &CommonBuffers) -> Self { + let uniform_buffer = device.create_buffer(&BufferDescriptor { + label: Some("encoder uniforms"), + size: size_of::<DecoderUniform>() as u64, + usage: BufferUsages::UNIFORM | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let uniform = DecoderUniform { + block_size: [params.block_width as i32, params.block_height as i32], + output_stride: (params.width / params.block_width) as i32, + ..Default::default() + }; + + let bind_group_layout = device.create_bind_group_layout(&BindGroupLayoutDescriptor { + label: None, + entries: &[ + BindGroupLayoutEntry { + binding: 0, + count: None, + ty: BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 1, + count: None, + ty: BindingType::Buffer { + has_dynamic_offset: false, + min_binding_size: None, + ty: wgpu::BufferBindingType::Storage { read_only: true }, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 2, + count: None, + ty: BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::WriteOnly, + format: wgpu::TextureFormat::Rgba8Unorm, + view_dimension: TextureViewDimension::D2, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 3, + count: None, + ty: BindingType::Texture { + sample_type: TextureSampleType::Float { filterable: false }, + view_dimension: TextureViewDimension::D2, + multisampled: false, + }, + visibility: ShaderStages::COMPUTE, + }, + ], + }); + + let bind_groups = [0, 1].map(|i| { + device.create_bind_group(&BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[ + BindGroupEntry { + binding: 0, + resource: uniform_buffer.as_entire_binding(), + }, + BindGroupEntry { + binding: 1, + resource: bufs.offsets.as_entire_binding(), + }, + BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::TextureView( + &bufs.textures[i].create_view(&Default::default()), + ), + }, + BindGroupEntry { + binding: 3, + resource: wgpu::BindingResource::TextureView( + &bufs.textures[1 - i].create_view(&Default::default()), + ), + }, + ], + }) + }); + + let pipeline_layout = device.create_pipeline_layout(&PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let module = device.create_shader_module(include_wgsl!("dec.wgsl")); + let pipeline = device.create_compute_pipeline(&ComputePipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + compilation_options: PipelineCompilationOptions::default(), + module: &module, + entry_point: "main", + }); + + Self { + bind_groups, + uniform, + uniform_buffer, + pipeline, + } + } + + pub fn write_uniforms(&self, queue: &Queue) { + queue.write_buffer( + &self.uniform_buffer, + 0, + bytemuck::cast_slice(&[self.uniform]), + ) + } + + pub fn pass(&self, encoder: &mut CommandEncoder, params: &Params, rp: &RoundParams) { + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + cpass.set_pipeline(&self.pipeline); + cpass.set_bind_group(0, &self.bind_groups[rp.swap], &[]); + cpass.dispatch_workgroups(params.blocks_x as u32, params.blocks_y as u32, 1); + } +} diff --git a/old/flowy/src/motion/dec.wgsl b/old/flowy/src/motion/dec.wgsl new file mode 100644 index 0000000..f4db974 --- /dev/null +++ b/old/flowy/src/motion/dec.wgsl @@ -0,0 +1,31 @@ + +struct Params { + block_size: vec2<i32>, + offsets_stride: u32 +} + +struct BlockOffset { + score: f32, + offset: vec2<i32>, + tint: vec3<f32>, +} + +@group(0) @binding(0) var<uniform> params: Params; +@group(0) @binding(1) var<storage, read> offsets: array<BlockOffset>; +@group(0) @binding(2) var next: texture_storage_2d<rgba8unorm, write>; +@group(0) @binding(3) var prev: texture_2d<f32>; + +@compute @workgroup_size(1) fn main(@builtin(global_invocation_id) global_id: vec3<u32>) { + let uv = vec2<i32>(global_id.xy) * params.block_size; + + let bl = offsets[global_id.x + global_id.y * params.offsets_stride]; + + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv+vec2(x,y); + let col = textureLoad(prev, base+bl.offset, 0)+vec4(bl.tint,1.); + textureStore(next, base, col); + }} +} + + diff --git a/old/flowy/src/motion/enc-old.wgsl b/old/flowy/src/motion/enc-old.wgsl new file mode 100644 index 0000000..b94bac9 --- /dev/null +++ b/old/flowy/src/motion/enc-old.wgsl @@ -0,0 +1,88 @@ + +struct Params { + block_size: vec2<i32>, + offsets_stride: u32, + search_radius: i32, + skip_threshold: f32, +} + +struct BlockOffset { + score: f32, + offset: vec2<i32>, + tint: vec3<f32>, +} + +@group(0) @binding(0) var<uniform> params: Params; +@group(0) @binding(1) var<storage, read_write> offsets: array<BlockOffset>; +@group(0) @binding(2) var next: texture_2d<f32>; +@group(0) @binding(3) var prev: texture_2d<f32>; + +@compute @workgroup_size(1) fn main(@builtin(global_invocation_id) global_id: vec3<u32>) { + let uv = vec2<i32>(global_id.xy) * params.block_size; + + let _f = search_offset(uv); + let best_err = _f.error; + let best_offset = _f.offset; + + var best_tint = vec3(0.); + + var average_pcol = vec3(0.); + var average_ncol = vec3(0.); + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv+vec2(x,y); + average_pcol += textureLoad(prev, base+best_offset, 0).rgb; + average_ncol += textureLoad(next, base, 0).rgb; + }} + + let tint = (average_ncol - average_pcol) / f32(params.block_size.x * params.block_size.y); + + var err = 0.; + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv+vec2(x,y); + let pcol = textureLoad(prev, base+best_offset, 0).rgb+tint; + let ncol = textureLoad(next, base, 0).rgb; + err += distance(pcol, ncol); + }} + if err < best_err { + best_tint = tint; + } + + offsets[global_id.x + global_id.y * params.offsets_stride] = BlockOffset(best_err, best_offset, best_tint); +} + +struct SearchRes {offset: vec2<i32>, error: f32} + +fn search_offset(uv: vec2<i32>) -> SearchRes { + var best_err = 100000000.; + var best_offset = vec2(0); + // TODO: better ordering + for (var ox = -params.search_radius; ox <= params.search_radius; ox++) { + for (var oy = -params.search_radius; oy <= params.search_radius; oy++) { + let offset = vec2(ox,oy); + + var err = 0.; + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv+vec2(x,y); + let pcol = textureLoad(prev, base+offset, 0).rgb; + let ncol = textureLoad(next, base, 0).rgb; + err += distance(pcol, ncol); + }} + + if err < best_err { + best_err = err; + best_offset = offset; + if err < params.skip_threshold { + return SearchRes(offset, err); + } + } + }} + return SearchRes(best_offset,best_err); +} + +// fn colormap_vec(v: vec2<f32>) -> vec3<f32> { +// return vec3(v.y, v.x - 0.5 * v.y, -v.x - 0.5 * v.y); +// } + diff --git a/old/flowy/src/motion/enc.rs b/old/flowy/src/motion/enc.rs new file mode 100644 index 0000000..a8fd96f --- /dev/null +++ b/old/flowy/src/motion/enc.rs @@ -0,0 +1,160 @@ +use super::{CommonBuffers, Params, RoundParams}; +use bytemuck::{Pod, Zeroable}; +use std::mem::size_of; +use wgpu::{ + include_wgsl, BindGroup, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, + BindGroupLayoutEntry, BindingType, Buffer, BufferDescriptor, BufferUsages, CommandEncoder, + ComputePipeline, ComputePipelineDescriptor, Device, PipelineCompilationOptions, + PipelineLayoutDescriptor, Queue, ShaderStages, TextureSampleType, TextureViewDimension, +}; + +pub struct MotionEncoder { + pipeline: ComputePipeline, + bind_groups: [BindGroup; 2], + + uniform_buffer: Buffer, + uniform: EncoderUniform, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pod, Zeroable, Default)] +pub struct EncoderUniform { + block_size: [i32; 2], + output_stride: u32, + search_radius: i32, + skip_threshold: f32, + _pad: u32, +} + +impl MotionEncoder { + pub fn create(device: &Device, params: &Params, bufs: &CommonBuffers) -> Self { + let uniform_buffer = device.create_buffer(&BufferDescriptor { + label: Some("encoder uniforms"), + size: size_of::<EncoderUniform>() as u64, + usage: BufferUsages::UNIFORM | BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let uniform = EncoderUniform { + block_size: [params.block_width as i32, params.block_height as i32], + output_stride: (params.width / params.block_width) as u32, + search_radius: 24, + skip_threshold: 1., + ..Default::default() + }; + + let bind_group_layout = device.create_bind_group_layout(&BindGroupLayoutDescriptor { + label: None, + entries: &[ + BindGroupLayoutEntry { + binding: 0, + count: None, + ty: BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 1, + count: None, + ty: BindingType::Buffer { + has_dynamic_offset: false, + min_binding_size: None, + ty: wgpu::BufferBindingType::Storage { read_only: false }, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 2, + count: None, + ty: BindingType::Texture { + sample_type: TextureSampleType::Float { filterable: false }, + view_dimension: TextureViewDimension::D2, + multisampled: false, + }, + visibility: ShaderStages::COMPUTE, + }, + BindGroupLayoutEntry { + binding: 3, + count: None, + ty: BindingType::Texture { + sample_type: TextureSampleType::Float { filterable: false }, + view_dimension: TextureViewDimension::D2, + multisampled: false, + }, + visibility: ShaderStages::COMPUTE, + }, + ], + }); + + let bind_groups = [0, 1].map(|i| { + device.create_bind_group(&BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[ + BindGroupEntry { + binding: 0, + resource: uniform_buffer.as_entire_binding(), + }, + BindGroupEntry { + binding: 1, + resource: bufs.offsets.as_entire_binding(), + }, + BindGroupEntry { + binding: 2, + resource: wgpu::BindingResource::TextureView( + &bufs.textures[i].create_view(&Default::default()), + ), + }, + BindGroupEntry { + binding: 3, + resource: wgpu::BindingResource::TextureView( + &bufs.textures[1 - i].create_view(&Default::default()), + ), + }, + ], + }) + }); + + let pipeline_layout = device.create_pipeline_layout(&PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let module = device.create_shader_module(include_wgsl!("enc.wgsl")); + let pipeline = device.create_compute_pipeline(&ComputePipelineDescriptor { + label: None, + compilation_options: PipelineCompilationOptions::default(), + layout: Some(&pipeline_layout), + module: &module, + entry_point: "main", + }); + + Self { + bind_groups, + uniform, + uniform_buffer, + pipeline, + } + } + + pub fn write_uniforms(&self, queue: &Queue) { + queue.write_buffer( + &self.uniform_buffer, + 0, + bytemuck::cast_slice(&[self.uniform]), + ) + } + + pub fn pass(&self, encoder: &mut CommandEncoder, params: &Params, rp: &RoundParams) { + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + cpass.set_pipeline(&self.pipeline); + cpass.set_bind_group(0, &self.bind_groups[rp.swap], &[]); + cpass.dispatch_workgroups(params.blocks_x as u32, params.blocks_y as u32, 1); + } +} diff --git a/old/flowy/src/motion/enc.wgsl b/old/flowy/src/motion/enc.wgsl new file mode 100644 index 0000000..dbc6410 --- /dev/null +++ b/old/flowy/src/motion/enc.wgsl @@ -0,0 +1,121 @@ + +struct Params { + block_size: vec2<i32>, + offsets_stride: u32, + search_radius: i32, + skip_threshold: f32, +} + +struct BlockOffset { + score: f32, + offset: vec2<i32>, + tint: vec3<f32>, +} + +@group(0) @binding(0) var<uniform> params: Params; +@group(0) @binding(1) var<storage, read_write> offsets: array<BlockOffset>; +@group(0) @binding(2) var next: texture_2d<f32>; +@group(0) @binding(3) var prev: texture_2d<f32>; + +var<private> best_offset: vec2<i32> = vec2(0); +var<private> best_error: f32 = 100000.; +var<private> best_tint: vec3<f32> = vec3(0.); + +@compute @workgroup_size(1) +fn main(@builtin(global_invocation_id) global_id: vec3<u32>) { + let uv = vec2<i32>(global_id.xy) * params.block_size; + + loop { + test_offset(uv, vec2(0, 0)); + if best_error < params.skip_threshold { break; } + apply_tint(uv); + if best_error < params.skip_threshold { break; } + best_tint = vec3(0.); + best_error = 10000000.; + + do_dist(uv, 64); + if best_error < params.skip_threshold { break; } + do_dist(uv, 32); + if best_error < params.skip_threshold { break; } + do_dist(uv, 24); + if best_error < params.skip_threshold { break; } + do_dist(uv, 16); + if best_error < params.skip_threshold { break; } + do_dist(uv, 12); + if best_error < params.skip_threshold { break; } + do_dist(uv, 8); + if best_error < params.skip_threshold { break; } + do_dist(uv, 6); + if best_error < params.skip_threshold { break; } + do_dist(uv, 4); + if best_error < params.skip_threshold { break; } + do_dist(uv, 3); + if best_error < params.skip_threshold { break; } + do_dist(uv, 2); + if best_error < params.skip_threshold { break; } + do_dist(uv, 1); + if best_error < params.skip_threshold { break; } + + apply_tint(uv); + break; + } + + offsets[global_id.x + global_id.y * params.offsets_stride] = BlockOffset(best_error, best_offset, best_tint); +} + +fn do_dist(uv: vec2<i32>, n: i32) { + test_offset(uv, vec2(0, n)); + test_offset(uv, vec2(n, n)); + test_offset(uv, vec2(n, 0)); + test_offset(uv, vec2(n, -n)); + test_offset(uv, vec2(0, -n)); + test_offset(uv, vec2(-n, -n)); + test_offset(uv, vec2(-n, 0)); + test_offset(uv, vec2(-n, n)); +} + +fn apply_tint(uv: vec2<i32>) { + + var average_pcol = vec3(0.); + var average_ncol = vec3(0.); + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv + vec2(x, y); + average_pcol += textureLoad(prev, base + best_offset, 0).rgb; + average_ncol += textureLoad(next, base, 0).rgb; + } + } + + let tint = (average_ncol - average_pcol) / f32(params.block_size.x * params.block_size.y); + + var err = 0.; + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv + vec2(x, y); + let pcol = textureLoad(prev, base + best_offset, 0).rgb + tint; + let ncol = textureLoad(next, base, 0).rgb; + err += distance(pcol, ncol); + } + } + if err < best_error { + best_error = err; + best_tint = tint; + } +} + +fn test_offset(uv: vec2<i32>, offset: vec2<i32>) { + var err = 0.; + for (var x = 0; x < params.block_size.x; x++) { + for (var y = 0; y < params.block_size.y; y++) { + let base = uv + vec2(x, y); + let pcol = textureLoad(prev, base + offset, 0).rgb; + let ncol = textureLoad(next, base, 0).rgb; + err += distance(pcol, ncol); + } + } + if err < best_error { + best_error = err; + best_offset = offset; + } +} + diff --git a/old/flowy/src/motion/mod.rs b/old/flowy/src/motion/mod.rs new file mode 100644 index 0000000..337290f --- /dev/null +++ b/old/flowy/src/motion/mod.rs @@ -0,0 +1,203 @@ +pub mod debug; +pub mod dec; +pub mod enc; + +use std::mem::size_of; +use wgpu::{ + Buffer, BufferUsages, CommandEncoder, Device, Extent3d, ImageCopyTexture, Origin3d, Queue, + Texture, TextureDescriptor, TextureDimension, TextureFormat, TextureUsages, +}; + +#[derive(Debug)] +pub struct Params { + pub width: usize, + pub height: usize, + pub extent: Extent3d, + pub block_width: usize, + pub block_height: usize, + pub blocks_x: usize, + pub blocks_y: usize, + pub blocks: usize, + pub init_debug: bool, +} + +#[derive(Debug)] +pub struct RoundParams { + pub swap: usize, + pub debug: bool, + pub preview: bool, +} + +pub struct CommonBuffers { + textures: [Texture; 2], + offsets: Buffer, + offsets_download: Option<Buffer>, + debug_output: Option<Texture>, + texture_download: Option<Buffer>, +} + +#[repr(C)] +pub struct BlockOffset { + score: f32, + _pad: u32, + offset: [f32; 2], + tint: [f32; 3], +} + +impl CommonBuffers { + pub fn create(device: &Device, params: &Params) -> Self { + let textures = [(), ()].map(|_| { + device.create_texture(&TextureDescriptor { + label: None, + size: params.extent, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::Rgba8Unorm, + usage: TextureUsages::TEXTURE_BINDING + | TextureUsages::STORAGE_BINDING + | TextureUsages::COPY_DST + | TextureUsages::COPY_SRC, + view_formats: &[], + }) + }); + + let debug_output = Some(device.create_texture(&TextureDescriptor { + label: None, + size: params.extent, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::Rgba8Unorm, + usage: TextureUsages::STORAGE_BINDING | TextureUsages::COPY_SRC, + view_formats: &[], + })); + + let texture_download = Some(device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: (params.width * params.height * 4) as u64, + usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ, + mapped_at_creation: false, + })); + + let offsets = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: (params.blocks * size_of::<BlockOffset>()) as u64, + usage: BufferUsages::COPY_DST | BufferUsages::STORAGE, + mapped_at_creation: false, + }); + let offsets_download = Some(device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: (params.blocks * size_of::<BlockOffset>()) as u64, + usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ, + mapped_at_creation: false, + })); + + Self { + debug_output, + textures, + offsets_download, + offsets, + texture_download, + } + } + + pub fn upload_texture(&self, queue: &Queue, params: &Params, rp: &RoundParams, buffer: &[u8]) { + queue.write_texture( + ImageCopyTexture { + aspect: wgpu::TextureAspect::All, + mip_level: 0, + origin: Origin3d::ZERO, + texture: &self.textures[rp.swap], + }, + buffer, + wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: Some(params.extent.width * 4), + rows_per_image: Some(params.extent.height), + }, + params.extent, + ); + } + // pub fn upload_offsets(&self, queue: &Queue, params: &Params, rp: &RoundParams, buffer: &[u8]) { + // queue.write_texture( + // ImageCopyTexture { + // aspect: wgpu::TextureAspect::All, + // mip_level: 0, + // origin: Origin3d::ZERO, + // texture: &self.textures[rp.swap], + // }, + // buffer, + // wgpu::ImageDataLayout { + // offset: 0, + // bytes_per_row: Some(params.extent.width * 4), + // rows_per_image: Some(params.extent.height), + // }, + // params.extent, + // ); + // } + + pub fn prepare_texture_download( + &self, + encoder: &mut CommandEncoder, + params: &Params, + rp: &RoundParams, + ) { + encoder.copy_texture_to_buffer( + wgpu::ImageCopyTexture { + texture: if rp.debug { + self.debug_output.as_ref().unwrap() + } else { + &self.textures[rp.swap] + }, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::ImageCopyBuffer { + buffer: self.texture_download.as_ref().unwrap(), + layout: wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: Some(params.extent.width * 4), + rows_per_image: Some(params.extent.height), + }, + }, + params.extent, + ); + } + pub fn prepare_offsets_download(&self, encoder: &mut CommandEncoder, params: &Params) { + encoder.copy_buffer_to_buffer( + &self.offsets, + 0, + self.offsets_download.as_ref().unwrap(), + 0, + (params.blocks * size_of::<BlockOffset>()) as u64, + ); + } + + pub fn download_offsets(&self, device: &Device, buffer: &mut [u8]) { + let buffer_slice = self.offsets_download.as_ref().unwrap().slice(..); + let (sender, receiver) = oneshot::channel(); + buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap()); + device.poll(wgpu::Maintain::Wait); + receiver.recv().unwrap().unwrap(); + { + let view = buffer_slice.get_mapped_range(); + buffer.copy_from_slice(&view[..]); + } + self.offsets_download.as_ref().unwrap().unmap(); + } + + pub fn download_texture(&self, device: &Device, buffer: &mut [u8]) { + let buffer_slice = self.texture_download.as_ref().unwrap().slice(..); + let (sender, receiver) = oneshot::channel(); + buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap()); + device.poll(wgpu::Maintain::Wait); + receiver.recv().unwrap().unwrap(); + { + let view = buffer_slice.get_mapped_range(); + buffer.copy_from_slice(&view[..]); + } + self.texture_download.as_ref().unwrap().unmap(); + } +} |