From 2167abcf72d978b4ac2f08fa7cbbddaada01f165 Mon Sep 17 00:00:00 2001 From: metamuffin Date: Tue, 7 Mar 2023 08:00:00 +0100 Subject: a --- evc/Cargo.lock | 97 ++++++++++++++++++++++++++++++++++++++++ evc/Cargo.toml | 1 + evc/src/bin/decode.rs | 7 +-- evc/src/bin/encode.rs | 17 ++----- evc/src/codec/compress.rs | 9 ++-- evc/src/codec/decode.rs | 7 +-- evc/src/codec/encode/mod.rs | 4 +- lvc/.gitignore | 2 + lvc/Cargo.lock | 7 +++ lvc/Cargo.toml | 8 ++++ lvc/src/diff.rs | 105 ++++++++++++++++++++++++++++++++++++++++++++ lvc/src/encode.rs | 49 +++++++++++++++++++++ lvc/src/impls.rs | 68 ++++++++++++++++++++++++++++ lvc/src/lib.rs | 44 +++++++++++++++++++ lvc/src/main.rs | 72 ++++++++++++++++++++++++++++++ 15 files changed, 467 insertions(+), 30 deletions(-) create mode 100644 lvc/.gitignore create mode 100644 lvc/Cargo.lock create mode 100644 lvc/Cargo.toml create mode 100644 lvc/src/diff.rs create mode 100644 lvc/src/encode.rs create mode 100644 lvc/src/impls.rs create mode 100644 lvc/src/lib.rs create mode 100644 lvc/src/main.rs diff --git a/evc/Cargo.lock b/evc/Cargo.lock index 632b23f..ee6d574 100644 --- a/evc/Cargo.lock +++ b/evc/Cargo.lock @@ -91,6 +91,55 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf2b3e8478797446514c91ef04bafcb59faba183e621ad488df88983cc14128c" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "either" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + [[package]] name = "encode_unicode" version = "0.3.6" @@ -207,6 +256,7 @@ dependencies = [ "env_logger", "indicatif", "log", + "rayon", "rustdct", ] @@ -231,6 +281,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memoffset" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +dependencies = [ + "autocfg", +] + [[package]] name = "num-complex" version = "0.4.2" @@ -259,6 +318,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "number_prefix" version = "0.4.0" @@ -334,6 +403,28 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + [[package]] name = "regex" version = "1.7.0" @@ -389,6 +480,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + [[package]] name = "strength_reduce" version = "0.2.4" diff --git a/evc/Cargo.toml b/evc/Cargo.toml index 9669d1c..ceed3a6 100644 --- a/evc/Cargo.toml +++ b/evc/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" clap = { version = "*", features = ["derive"] } anyhow = "1.0.66" log = "0.4.17" +rayon = "1.7.0" env_logger = "0.10.0" rustdct = "0.7.1" indicatif = "*" diff --git a/evc/src/bin/decode.rs b/evc/src/bin/decode.rs index 4ac4fc8..fbf5624 100644 --- a/evc/src/bin/decode.rs +++ b/evc/src/bin/decode.rs @@ -17,9 +17,6 @@ use std::io::{BufReader, BufWriter}; pub struct DecodeArgs { #[arg(long)] debug: bool, - - #[arg(short, long, default_value = "8")] - jobs: usize, } fn main() -> anyhow::Result<()> { @@ -36,9 +33,7 @@ fn main() -> anyhow::Result<()> { } let size = header.resolution; - let config = DecodeConfig { - max_threads: args.jobs, - }; + let config = DecodeConfig {}; let progress_bar = ProgressBar::new(header.frame_count as u64); diff --git a/evc/src/bin/encode.rs b/evc/src/bin/encode.rs index e1f6730..6d18992 100644 --- a/evc/src/bin/encode.rs +++ b/evc/src/bin/encode.rs @@ -3,6 +3,7 @@ use clap::Parser; use indicatif::ProgressBar; use libreschmux::{ codec::{ + compress::compress_block, decode::{decode_block, DecodeConfig}, encode::{encode_block, EncodeConfig, EncodeMode}, }, @@ -11,6 +12,7 @@ use libreschmux::{ helpers::vector::Vec2, }; use log::info; +use rayon::{ThreadPool, ThreadPoolBuilder}; use std::io::{BufReader, BufWriter}; #[derive(Parser)] @@ -34,9 +36,6 @@ pub struct EncodeArgs { #[arg[short = 'L', long]] length: Option, - #[arg(short, long, default_value = "8")] - jobs: usize, - #[arg(short, long, default_value = "8")] min_block_size: isize, @@ -57,7 +56,6 @@ fn main() -> anyhow::Result<()> { weight_factor: 50.0, max_diff_area: 10_000, min_block_size: args.min_block_size, - max_threads: args.jobs, do_translate: !args.no_translate, do_linear_transform: !args.no_linear_transform, do_value_scale: !args.no_value_scale, @@ -75,8 +73,8 @@ fn main() -> anyhow::Result<()> { .context("writing header")?; let progress_bar = args.length.map(|len| ProgressBar::new(len as u64)); - let mut prev_frame = Frame::new(size); + for i in 0.. { info!("encode frame {i}"); let mut frame = Frame::read(&mut input, size).context("reading raw frame")?; @@ -106,14 +104,7 @@ fn main() -> anyhow::Result<()> { progress_bar.inc(1); } - decode_block( - &root, - frame.view_mut(), - prev_frame.view(), - &DecodeConfig { - max_threads: config.max_threads, - }, - ); + decode_block(&root, frame.view_mut(), prev_frame.view(), &DecodeConfig {}); prev_frame = frame; } diff --git a/evc/src/codec/compress.rs b/evc/src/codec/compress.rs index 688f04b..09d1f29 100644 --- a/evc/src/codec/compress.rs +++ b/evc/src/codec/compress.rs @@ -65,17 +65,20 @@ pub fn lit_compress(w: usize, h: usize, pixels: &[Pixel]) -> Vec { norm_dct_channel(w, h, &mut ch); for i in 0..w * h { - out.push(unsafe { std::mem::transmute(ch[i] as i8) }); + out.extend(unsafe { std::mem::transmute::<_, [u8; 4]>(ch[i]) }); } } out } + pub fn lit_decompress(compressed: &[u8], mut target: View<&mut Frame>) { let (w, h) = (target.size.x as usize, target.size.y as usize); for ci in 0..3 { let mut ch = compressed[ci * w * h..(ci + 1) * w * h] - .iter() - .map(|v| unsafe { std::mem::transmute::<_, i8>(*v) } as f32) + .chunks_exact(4) + .map(|v| unsafe { + std::mem::transmute::<_, f32>(TryInto::<[u8; 4]>::try_into(v).unwrap()) + }) .collect::>(); norm_idct_channel(w, h, &mut ch); for y in 0..h { diff --git a/evc/src/codec/decode.rs b/evc/src/codec/decode.rs index bb2aadf..b1f6a3d 100644 --- a/evc/src/codec/decode.rs +++ b/evc/src/codec/decode.rs @@ -3,9 +3,7 @@ use crate::{ block::Block, frame::Frame, helpers::threading::both_par, refsampler::Sampler, view::View, }; -pub struct DecodeConfig { - pub max_threads: usize, -} +pub struct DecodeConfig {} pub fn decode_block( block: &Block, @@ -24,10 +22,9 @@ pub fn decode_block( unsafe { std::mem::transmute::<_, [View<&'static Frame>; 2]>(prev.split()) }; let config = unsafe { std::mem::transmute::<_, &'static DecodeConfig>(config) }; - both_par( + rayon::join( move || decode_block(a, at, ap, config), move || decode_block(b, bt, bp, config), - config.max_threads, ); } Block::CompressedLiteral(data) => { diff --git a/evc/src/codec/encode/mod.rs b/evc/src/codec/encode/mod.rs index 8b7b342..43607aa 100644 --- a/evc/src/codec/encode/mod.rs +++ b/evc/src/codec/encode/mod.rs @@ -15,7 +15,6 @@ pub struct EncodeConfig { pub ref_thres: f64, pub max_diff_area: isize, pub min_block_size: isize, - pub max_threads: usize, pub weight_factor: f64, pub do_translate: bool, @@ -72,10 +71,9 @@ pub fn encode_block(view: View<&Frame>, prev: View<&Frame>, config: &EncodeConfi // only bother to do multithreading, when the block is big. let ((ad, a), (bd, b)) = if view.area() > 100 { - both_par( + rayon::join( || encode_block(av, ap, config), || encode_block(bv, bp, config), - config.max_threads, ) } else { (encode_block(av, ap, config), encode_block(bv, bp, config)) diff --git a/lvc/.gitignore b/lvc/.gitignore new file mode 100644 index 0000000..2f22d72 --- /dev/null +++ b/lvc/.gitignore @@ -0,0 +1,2 @@ +/data +/target diff --git a/lvc/Cargo.lock b/lvc/Cargo.lock new file mode 100644 index 0000000..15c8961 --- /dev/null +++ b/lvc/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "lvc" +version = "0.1.0" diff --git a/lvc/Cargo.toml b/lvc/Cargo.toml new file mode 100644 index 0000000..15df5d7 --- /dev/null +++ b/lvc/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "lvc" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/lvc/src/diff.rs b/lvc/src/diff.rs new file mode 100644 index 0000000..5c65c29 --- /dev/null +++ b/lvc/src/diff.rs @@ -0,0 +1,105 @@ +use crate::{Frame, Ref, View, P2}; +use std::simd::{i32x16, SimdInt}; + +// 4ms +pub fn diff([frame1, frame2]: [&Frame; 2], view: View, rp: Ref) -> u32 { + let mut k = 0; + for y in view.a.y..view.b.y { + for x in view.a.x..view.b.x { + let p1 = frame1[P2 { x, y }] + rp.color_off; + let p2 = frame2[P2 { x, y }]; + k += p1.r.abs_diff(p2.r) as u32 + + p1.g.abs_diff(p2.g) as u32 + + p1.b.abs_diff(p2.b) as u32; + } + } + k +} + +pub fn fast_diff([frame1, frame2]: [&Frame; 2], view: View, rp: Ref) -> u32 { + let mut k = 0; + + let mut diff_lanes = i32x16::from_array([0; 16]); + let mut k1 = [0; 16]; + let mut k2 = [0; 16]; + + let next_line = frame1.size.x as usize - view.size().x as usize; + let index_start = view.a.x as usize + view.a.y as usize * frame1.size.x as usize; + let index_end = view.b.x as usize + (view.b.y as usize - 1) * frame1.size.x as usize; + + let mut i = index_start; + let mut x = view.a.x; + let mut kfill = 0; + + while i < index_end { + k1[kfill] = frame1.pixels[i].r as i32; + k2[kfill] = frame2.pixels[i].r as i32; + kfill += 1; + k1[kfill] = frame1.pixels[i].g as i32; + k2[kfill] = frame2.pixels[i].g as i32; + kfill += 1; + k1[kfill] = frame1.pixels[i].b as i32; + k2[kfill] = frame2.pixels[i].b as i32; + kfill += 1; + + i += 1; + x += 1; + if x > view.b.x { + i += next_line; + x = view.a.x + } + + if kfill == 15 { + let pl1 = i32x16::from_array(k1); + let pl2 = i32x16::from_array(k2); + diff_lanes += (pl1 - pl2).abs(); + kfill = 0; + } + } + + return diff_lanes.reduce_sum() as u32; +} + +// pub fn fast_diff([frame1, frame2]: [&Frame; 2], view: View, rp: Ref) -> u32 { +// let mut k = 0; + +// let mut diff_lanes = i32x16::from_array([0; 16]); +// let mut k1 = [0; 16]; +// let mut k2 = [0; 16]; + +// let next_line = frame1.size.x as usize - view.size().x as usize; +// let index_start = view.a.x as usize + view.a.y as usize * frame1.size.x as usize; +// let index_end = view.b.x as usize + (view.b.y as usize - 1) * frame1.size.x as usize; + +// let mut i = index_start; +// let mut x = view.a.x; +// let mut kfill = 0; + +// while i < index_end { +// k1[kfill] = frame1.pixels[i].r as i32; +// k2[kfill] = frame2.pixels[i].r as i32; +// kfill += 1; +// k1[kfill] = frame1.pixels[i].g as i32; +// k2[kfill] = frame2.pixels[i].g as i32; +// kfill += 1; +// k1[kfill] = frame1.pixels[i].b as i32; +// k2[kfill] = frame2.pixels[i].b as i32; +// kfill += 1; + +// i += 1; +// x += 1; +// if x > view.b.x { +// i += next_line; +// x = view.a.x +// } + +// if kfill == 15 { +// let pl1 = i32x16::from_array(k1); +// let pl2 = i32x16::from_array(k2); +// diff_lanes += (pl1 - pl2).abs(); +// kfill = 0; +// } +// } + +// return diff_lanes.reduce_sum() as u32; +// } diff --git a/lvc/src/encode.rs b/lvc/src/encode.rs new file mode 100644 index 0000000..7737626 --- /dev/null +++ b/lvc/src/encode.rs @@ -0,0 +1,49 @@ +use crate::diff::diff; +use crate::{Frame, Ref, View, P2}; + +pub fn encode(last_frame: &Frame, frame: &Frame, view: View) { + let rp = Ref::default(); + + let d = diff([last_frame, frame], view, rp); +} + +pub fn split(view: View) -> [View; 2] { + let s = view.size(); + if s.x > s.y { + let mid_x = (view.a.x + view.b.x) / 2; + [ + View { + a: view.a, + b: P2 { + x: mid_x, + y: view.b.y, + }, + }, + View { + a: P2 { + x: mid_x, + y: view.a.y, + }, + b: view.b, + }, + ] + } else { + let mid_y = (view.a.y + view.b.y) / 2; + [ + View { + a: view.a, + b: P2 { + x: view.b.x, + y: mid_y, + }, + }, + View { + a: P2 { + x: view.a.x, + y: mid_y, + }, + b: view.b, + }, + ] + } +} diff --git a/lvc/src/impls.rs b/lvc/src/impls.rs new file mode 100644 index 0000000..01eb1c6 --- /dev/null +++ b/lvc/src/impls.rs @@ -0,0 +1,68 @@ +use crate::{Frame, Pixel, View, P2}; +use std::ops::{Add, Index, IndexMut, Sub}; + +impl Add for Pixel { + type Output = Pixel; + #[inline] + fn add(self, rhs: Self) -> Self::Output { + Self { + r: self.r + rhs.r, + g: self.g + rhs.g, + b: self.b + rhs.b, + } + } +} +impl P2 { + pub const ZERO: P2 = P2 { x: 0, y: 0 }; + #[inline] + pub fn area(&self) -> usize { + (self.x * self.y) as usize + } +} +impl View { + #[inline] + pub fn all(b: P2) -> Self { + Self { + a: P2::default(), + b, + } + } + #[inline] + pub fn size(&self) -> P2 { + self.b - self.a + } +} +impl Add for P2 { + type Output = P2; + #[inline] + fn add(self, rhs: Self) -> Self::Output { + Self { + x: self.x + rhs.x, + y: self.y + rhs.y, + } + } +} +impl Sub for P2 { + type Output = P2; + #[inline] + fn sub(self, rhs: Self) -> Self::Output { + Self { + x: self.x - rhs.x, + y: self.y - rhs.y, + } + } +} + +impl Index for Frame { + type Output = Pixel; + #[inline] + fn index(&self, P2 { x, y }: P2) -> &Self::Output { + &self.pixels[x as usize + (y as usize * self.size.x as usize)] + } +} +impl IndexMut for Frame { + #[inline] + fn index_mut(&mut self, P2 { x, y }: P2) -> &mut Self::Output { + &mut self.pixels[x as usize + (y as usize * self.size.x as usize)] + } +} diff --git a/lvc/src/lib.rs b/lvc/src/lib.rs new file mode 100644 index 0000000..ad284df --- /dev/null +++ b/lvc/src/lib.rs @@ -0,0 +1,44 @@ +#![feature(portable_simd)] + +pub mod diff; +pub mod encode; +pub mod impls; + +pub type PixelValue = u8; + +#[repr(C, align(2))] +#[derive(Debug, Clone, Copy, Default)] +pub struct Pixel { + pub r: PixelValue, + pub g: PixelValue, + pub b: PixelValue, +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct P2 { + pub x: i32, + pub y: i32, +} + +pub struct Frame { + pub size: P2, + pub pixels: Vec, +} + +pub struct View { + pub a: P2, + pub b: P2, +} + +#[derive(Debug, Clone)] +pub enum Block { + Lit(Vec), + Split([Box; 2]), + Ref(Ref), +} + +#[derive(Debug, Clone, Default)] +pub struct Ref { + pub pos_off: P2, + pub color_off: Pixel, +} diff --git a/lvc/src/main.rs b/lvc/src/main.rs new file mode 100644 index 0000000..523f22c --- /dev/null +++ b/lvc/src/main.rs @@ -0,0 +1,72 @@ +use lvc::{diff::{diff, fast_diff}, Frame, Pixel, Ref, View, P2, PixelValue}; +use std::{ + io::{stdin, stdout, BufReader, BufWriter, Read, Write}, + time::Instant, +}; + +fn main() { + let size = P2 { x: 1920, y: 1080 }; + + // let frame1 = read_frame(size); + // let frame2 = read_frame(size); + + // encode(&frame1, &frame2, View::all(size)); + + let mut last_frame = Frame { + pixels: vec![Pixel::default(); size.area()], + size, + }; + + let mut stdin = BufReader::new(stdin()); + let mut stdout = BufWriter::new(stdout()); + + loop { + let frame = read_frame(&mut stdin, size); + + let t1 = Instant::now(); + let d = diff([&last_frame, &frame], View::all(size), Ref::default()); + let t2 = Instant::now(); + eprintln!("diff {:?} {d}", t2 - t1); + + let t1 = Instant::now(); + let d = fast_diff([&last_frame, &frame], View::all(size), Ref::default()); + let t2 = Instant::now(); + eprintln!("diff2 {:?} {d}", t2 - t1); + + write_frame(&mut stdout, &frame); + last_frame = frame; + } +} + +fn read_frame(inp: &mut impl Read, size: P2) -> Frame { + let mut f = Frame { + size, + pixels: vec![Pixel::default(); size.area()], + }; + + for y in 0..size.y { + for x in 0..size.x { + let mut cc = [0u8; 3]; + inp.read_exact(&mut cc).unwrap(); + f[P2 { x, y }] = Pixel { + r: cc[0] as PixelValue, + g: cc[1] as PixelValue, + b: cc[2] as PixelValue, + }; + } + } + f +} + +fn write_frame(out: &mut impl Write, frame: &Frame) { + for y in 0..frame.size.y { + for x in 0..frame.size.x { + let p = frame[P2 { x, y }]; + let mut cc = [0u8; 3]; + cc[0] = p.r as u8; + cc[1] = p.g as u8; + cc[2] = p.b as u8; + out.write_all(&mut cc).unwrap(); + } + } +} -- cgit v1.2.3-70-g09d2