aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2023-11-18 02:39:47 +0100
committermetamuffin <metamuffin@disroot.org>2023-11-18 02:39:47 +0100
commit28af3138598e5c5f7e4d7c70218c26f4d2f46208 (patch)
tree83a8e1559f4798b2f33cc59e35cd5d1931be2683
parentebd59fb09a4e094701f195d86662e1a9d00fed2b (diff)
downloadvideo-codec-experiments-28af3138598e5c5f7e4d7c70218c26f4d2f46208.tar
video-codec-experiments-28af3138598e5c5f7e4d7c70218c26f4d2f46208.tar.bz2
video-codec-experiments-28af3138598e5c5f7e4d7c70218c26f4d2f46208.tar.zst
works very well
-rw-r--r--flowy/src/main.rs24
-rw-r--r--flowy/src/motion/dec.rs2
-rw-r--r--flowy/src/motion/dec.wgsl3
-rw-r--r--flowy/src/motion/enc.rs10
-rw-r--r--flowy/src/motion/enc.wgsl53
-rw-r--r--flowy/src/motion/mod.rs1
6 files changed, 72 insertions, 21 deletions
diff --git a/flowy/src/main.rs b/flowy/src/main.rs
index 557417f..1f84e3b 100644
--- a/flowy/src/main.rs
+++ b/flowy/src/main.rs
@@ -1,5 +1,6 @@
pub mod motion;
+use log::debug;
use motion::{dec::MotionDecoder, enc::MotionEncoder, CommonBuffers, Params};
use pollster::FutureExt;
use std::io::{stdin, stdout, Read, Write};
@@ -11,6 +12,8 @@ use wgpu::{
use crate::motion::RoundParams;
fn main() {
+ env_logger::init_from_env("LOG");
+
let instance = Instance::new(InstanceDescriptor {
backends: Backends::all(),
..Default::default()
@@ -38,6 +41,7 @@ fn main() {
.unwrap();
let (width, height) = (1920, 1080);
+ let bsize = 8;
let params = Params {
width,
height,
@@ -46,11 +50,11 @@ fn main() {
height: height as u32,
depth_or_array_layers: 1,
},
- blocks_x: width / 8,
- blocks_y: height / 8,
- block_width: 8,
- block_height: 8,
- blocks: (width / 8) * (height / 8),
+ blocks_x: width / bsize,
+ blocks_y: height / bsize,
+ block_width: bsize,
+ block_height: bsize,
+ blocks: (width / bsize) * (height / bsize),
};
let bufs = CommonBuffers::create(&device, &params);
@@ -65,13 +69,13 @@ fn main() {
let mut i = 0;
loop {
let rp = RoundParams { swap: i % 2 };
- eprintln!("read");
+ debug!("read");
stdin().read_exact(&mut buffer).unwrap();
- eprintln!("upload");
+ debug!("upload");
bufs.upload(&queue, &params, &rp, &buffer);
- eprintln!("compute");
+ debug!("compute");
let mut encoder = device.create_command_encoder(&Default::default());
@@ -84,10 +88,10 @@ fn main() {
queue.submit(Some(encoder.finish()));
device.poll(MaintainBase::Wait);
- eprintln!("download");
+ debug!("download");
bufs.download(&device, &mut buffer);
- eprintln!("write");
+ debug!("write");
stdout().write_all(&buffer).unwrap();
i += 1;
}
diff --git a/flowy/src/motion/dec.rs b/flowy/src/motion/dec.rs
index 9d15259..518fbd2 100644
--- a/flowy/src/motion/dec.rs
+++ b/flowy/src/motion/dec.rs
@@ -33,7 +33,7 @@ impl MotionDecoder {
mapped_at_creation: false,
});
let uniform = DecoderUniform {
- block_size: [8, 8],
+ block_size: [params.block_width as i32, params.block_height as i32],
output_stride: (params.width / params.block_width) as i32,
..Default::default()
};
diff --git a/flowy/src/motion/dec.wgsl b/flowy/src/motion/dec.wgsl
index 982a1be..2fa3f3b 100644
--- a/flowy/src/motion/dec.wgsl
+++ b/flowy/src/motion/dec.wgsl
@@ -7,6 +7,7 @@ struct Params {
struct BlockOffset {
score: f32,
offset: vec2<i32>,
+ tint: vec3<f32>,
}
@group(0) @binding(0) var<uniform> params: Params;
@@ -22,7 +23,7 @@ struct BlockOffset {
for (var x = 0; x < params.block_size.x; x++) {
for (var y = 0; y < params.block_size.y; y++) {
let base = uv+vec2(x,y);
- let col = textureLoad(prev, base+bl.offset, 0);
+ let col = textureLoad(prev, base+bl.offset, 0)+vec4(bl.tint,1.);
textureStore(next, base, col);
}}
}
diff --git a/flowy/src/motion/enc.rs b/flowy/src/motion/enc.rs
index 76a2c7f..cb672a7 100644
--- a/flowy/src/motion/enc.rs
+++ b/flowy/src/motion/enc.rs
@@ -20,7 +20,9 @@ pub struct MotionEncoder {
#[derive(Debug, Clone, Copy, Pod, Zeroable, Default)]
pub struct EncoderUniform {
block_size: [i32; 2],
- output_stride: i32,
+ output_stride: u32,
+ search_radius: i32,
+ skip_threshold: f32,
_pad: u32,
}
@@ -33,8 +35,10 @@ impl MotionEncoder {
mapped_at_creation: false,
});
let uniform = EncoderUniform {
- block_size: [8, 8],
- output_stride: (params.width / params.block_width) as i32,
+ block_size: [params.block_width as i32, params.block_height as i32],
+ output_stride: (params.width / params.block_width) as u32,
+ search_radius: 16,
+ skip_threshold: 0.04,
..Default::default()
};
diff --git a/flowy/src/motion/enc.wgsl b/flowy/src/motion/enc.wgsl
index bb0139f..b94bac9 100644
--- a/flowy/src/motion/enc.wgsl
+++ b/flowy/src/motion/enc.wgsl
@@ -1,12 +1,15 @@
struct Params {
block_size: vec2<i32>,
- offsets_stride: u32
+ offsets_stride: u32,
+ search_radius: i32,
+ skip_threshold: f32,
}
struct BlockOffset {
score: f32,
offset: vec2<i32>,
+ tint: vec3<f32>,
}
@group(0) @binding(0) var<uniform> params: Params;
@@ -17,10 +20,46 @@ struct BlockOffset {
@compute @workgroup_size(1) fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let uv = vec2<i32>(global_id.xy) * params.block_size;
+ let _f = search_offset(uv);
+ let best_err = _f.error;
+ let best_offset = _f.offset;
+
+ var best_tint = vec3(0.);
+
+ var average_pcol = vec3(0.);
+ var average_ncol = vec3(0.);
+ for (var x = 0; x < params.block_size.x; x++) {
+ for (var y = 0; y < params.block_size.y; y++) {
+ let base = uv+vec2(x,y);
+ average_pcol += textureLoad(prev, base+best_offset, 0).rgb;
+ average_ncol += textureLoad(next, base, 0).rgb;
+ }}
+
+ let tint = (average_ncol - average_pcol) / f32(params.block_size.x * params.block_size.y);
+
+ var err = 0.;
+ for (var x = 0; x < params.block_size.x; x++) {
+ for (var y = 0; y < params.block_size.y; y++) {
+ let base = uv+vec2(x,y);
+ let pcol = textureLoad(prev, base+best_offset, 0).rgb+tint;
+ let ncol = textureLoad(next, base, 0).rgb;
+ err += distance(pcol, ncol);
+ }}
+ if err < best_err {
+ best_tint = tint;
+ }
+
+ offsets[global_id.x + global_id.y * params.offsets_stride] = BlockOffset(best_err, best_offset, best_tint);
+}
+
+struct SearchRes {offset: vec2<i32>, error: f32}
+
+fn search_offset(uv: vec2<i32>) -> SearchRes {
var best_err = 100000000.;
- var best_offset = vec2(0,0);
- for (var ox = -5; ox <= 5; ox++) {
- for (var oy = -5; oy <= 5; oy++) {
+ var best_offset = vec2(0);
+ // TODO: better ordering
+ for (var ox = -params.search_radius; ox <= params.search_radius; ox++) {
+ for (var oy = -params.search_radius; oy <= params.search_radius; oy++) {
let offset = vec2(ox,oy);
var err = 0.;
@@ -35,10 +74,12 @@ struct BlockOffset {
if err < best_err {
best_err = err;
best_offset = offset;
+ if err < params.skip_threshold {
+ return SearchRes(offset, err);
+ }
}
}}
-
- offsets[global_id.x + global_id.y * params.offsets_stride] = BlockOffset(best_err, best_offset);
+ return SearchRes(best_offset,best_err);
}
// fn colormap_vec(v: vec2<f32>) -> vec3<f32> {
diff --git a/flowy/src/motion/mod.rs b/flowy/src/motion/mod.rs
index 558b8d1..dfdf7a6 100644
--- a/flowy/src/motion/mod.rs
+++ b/flowy/src/motion/mod.rs
@@ -36,6 +36,7 @@ pub struct BlockOffset {
score: f32,
_pad: u32,
offset: [f32; 2],
+ tint: [f32; 3],
}
impl CommonBuffers {