From e2a36c1e5822e533983a9640d7c08cb82812be67 Mon Sep 17 00:00:00 2001 From: metamuffin Date: Tue, 6 May 2025 18:52:25 +0200 Subject: parallel bulk insert --- .cargo/config.toml | 2 + Cargo.lock | 52 ++++++++++++++++++++++++ mtree-test/Cargo.toml | 1 + mtree-test/src/bin/encode.rs | 94 ++++++++++++++++++++++++++++++++++++++------ mtree-test/src/lib.rs | 2 +- 5 files changed, 137 insertions(+), 14 deletions(-) create mode 100644 .cargo/config.toml diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..ddff440 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["-C", "target-cpu=native"] diff --git a/Cargo.lock b/Cargo.lock index 00dd49d..44cff8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,11 +2,42 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "ebml-struct" version = "0.1.0" source = "git+https://codeberg.org/metamuffin/ebml-struct#fcefaa67b85b96b17cec2d1c7f7c53998520559b" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "framework" version = "0.1.0" @@ -26,4 +57,25 @@ version = "0.1.0" dependencies = [ "framework", "glam", + "rayon", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", ] diff --git a/mtree-test/Cargo.toml b/mtree-test/Cargo.toml index 5dbe38c..842c7f1 100644 --- a/mtree-test/Cargo.toml +++ b/mtree-test/Cargo.toml @@ -6,3 +6,4 @@ edition = "2024" [dependencies] framework = { path = "../framework" } glam = "0.30.3" +rayon = "1.10.0" diff --git a/mtree-test/src/bin/encode.rs b/mtree-test/src/bin/encode.rs index 5a166c8..80f3499 100644 --- a/mtree-test/src/bin/encode.rs +++ b/mtree-test/src/bin/encode.rs @@ -1,6 +1,10 @@ use framework::{BitstreamFilter, bitstream_filter_main}; use glam::{I16Vec2, i16vec2}; use mtree_test::{AbsRef, BLOCK_SIZE, Frame, LastFrames, frame_to_frame_rect_copy}; +use rayon::{ + iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}, + join, +}; use std::{collections::VecDeque, io::Result, time::Instant}; fn main() -> Result<()> { @@ -11,6 +15,9 @@ struct Enc { res: I16Vec2, last: LastFrames, tree: MTree, + frame_num: u64, + + use_bulk_insert: bool, } impl BitstreamFilter for Enc { const INPUT_CODEC_ID: &str = "V_UNCOMPRESSED"; @@ -27,6 +34,8 @@ impl BitstreamFilter for Enc { off: i16vec2(0, 0), frame: u64::MAX, }), + frame_num: 0, + use_bulk_insert: true, } } fn process_block(&mut self, frame: Vec) -> Vec { @@ -51,7 +60,7 @@ impl BitstreamFilter for Enc { boff, ) }; - if rdist < (BLOCK_SIZE * BLOCK_SIZE * 20) as u32 { + if rdist < BLOCK_SIZE as u32 * BLOCK_SIZE as u32 * 50 { out.push(1); out.extend(r.frame.to_le_bytes()); out.extend(r.off.x.to_le_bytes()); @@ -76,23 +85,39 @@ impl BitstreamFilter for Enc { self.last.frames.push_back(frame); let frame_index = self.last.frame_offset + self.last.frames.len() as u64 - 1; - let t = Instant::now(); - for xo in 0..self.res.x - BLOCK_SIZE { - for yo in 0..self.res.y - BLOCK_SIZE { - self.tree.insert( - self.res, - &self.last, - AbsRef { - frame: frame_index, - off: i16vec2(xo, yo), - }, - ) + if self.frame_num % 30 == 0 { + let t = Instant::now(); + if self.use_bulk_insert { + let mut rs = Vec::new(); + for xo in 0..self.res.x - BLOCK_SIZE { + for yo in 0..self.res.y - BLOCK_SIZE { + rs.push(AbsRef { + frame: frame_index, + off: i16vec2(xo, yo), + }); + } + } + self.tree.bulk_insert(self.res, &self.last, rs); + } else { + for xo in 0..self.res.x - BLOCK_SIZE { + for yo in 0..self.res.y - BLOCK_SIZE { + self.tree.insert( + self.res, + &self.last, + AbsRef { + frame: frame_index, + off: i16vec2(xo, yo), + }, + ) + } + } } + eprintln!("insert\t{:?}", t.elapsed()); } - eprintln!("insert\t{:?}", t.elapsed()); eprintln!("depth={} refs={num_refs}", self.tree.depth()); + self.frame_num += 1; out } } @@ -114,6 +139,49 @@ impl MTree { MTree::Leaf(r) => *r, } } + pub fn bulk_insert(&mut self, res: I16Vec2, last: &LastFrames, mut rs: Vec) { + match self { + MTree::Branch(b) => { + let dirs = rs + .par_iter() + .map(|r| { + distance_absref(res, last, b[0].center(), *r) + < distance_absref(res, last, b[1].center(), *r) + }) + .collect::>(); + + let mut rs0 = Vec::new(); + let mut rs1 = Vec::new(); + for (r, d) in rs.into_iter().zip(dirs) { + if d { + rs0.push(r); + } else { + rs1.push(r); + } + } + let [b0, b1] = b.as_mut(); + join( + || b0.bulk_insert(res, last, rs0), + || b1.bulk_insert(res, last, rs1), + ); + } + MTree::Leaf(l) => { + if rs.is_empty() { + return; + } + if l.frame == u64::MAX { + *l = rs.pop().unwrap(); + if rs.is_empty() { + return; + } + } + let mut b = + MTree::Branch(Box::new([MTree::Leaf(*l), MTree::Leaf(rs.pop().unwrap())])); + b.bulk_insert(res, last, rs); + *self = b; + } + } + } pub fn insert(&mut self, res: I16Vec2, last: &LastFrames, r: AbsRef) { match self { MTree::Branch(b) => { diff --git a/mtree-test/src/lib.rs b/mtree-test/src/lib.rs index bf0117d..fd282db 100644 --- a/mtree-test/src/lib.rs +++ b/mtree-test/src/lib.rs @@ -1,7 +1,7 @@ use glam::{I16Vec2, i16vec2}; use std::{collections::VecDeque, ops::Range}; -pub const BLOCK_SIZE: i16 = 4; +pub const BLOCK_SIZE: i16 = 8; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct AbsRef { -- cgit v1.2.3-70-g09d2