From 2d127740cf30cfbd3875a406ecc42ef6ebde60e4 Mon Sep 17 00:00:00 2001 From: Lia Lenckowski Date: Wed, 20 Sep 2023 13:34:35 +0200 Subject: ~3x speed improvement: replace priority queues and prim's algoritm with sorted vectors and krushkal's algorithm --- Cargo.lock | 44 +++++++++++++-------------------- Cargo.toml | 2 +- src/tsp_approx.rs | 73 +++++++++++++++---------------------------------------- 3 files changed, 37 insertions(+), 82 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3871c28..235df0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,6 +77,12 @@ dependencies = [ "serde", ] +[[package]] +name = "bit-vec" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f59bbe95d4e52a6398ec21238d31577f2b28a9d86807f06ca59d191d8440d0bb" + [[package]] name = "bit_field" version = "0.10.2" @@ -289,8 +295,8 @@ dependencies = [ "image", "indicatif", "multiset", + "partitions", "pathdiff", - "priority-queue", "rayon", "reflink-copy", "serde", @@ -428,12 +434,6 @@ dependencies = [ "crunchy", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "heck" version = "0.4.1" @@ -465,16 +465,6 @@ dependencies = [ "tiff", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown", -] - [[package]] name = "indicatif" version = "0.17.6" @@ -673,6 +663,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "partitions" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9249745fe5a60e2ebd69cc649af1baf28fa3f4606b24146490124405401510d8" +dependencies = [ + "bit-vec", + "rayon", +] + [[package]] name = "pathdiff" version = "0.2.1" @@ -718,16 +718,6 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31114a898e107c51bb1609ffaf55a0e011cf6a4d7f1170d0015a165082c0338b" -[[package]] -name = "priority-queue" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fff39edfcaec0d64e8d0da38564fad195d2d51b680940295fcc307366e101e61" -dependencies = [ - "autocfg", - "indexmap", -] - [[package]] name = "proc-macro2" version = "1.0.66" diff --git a/Cargo.toml b/Cargo.toml index 20d6077..6fa36c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,6 @@ edition = "2021" image = "0" xdg = "2" clap = { version = "4", features = ["derive"] } -priority-queue = "1" rayon = "1" indicatif = { version = "0", features = ["rayon"] } sled = "0" @@ -21,3 +20,4 @@ anyhow = "1" pathdiff = "0" reflink-copy = "0" multiset = "0" +partitions = "0" diff --git a/src/tsp_approx.rs b/src/tsp_approx.rs index 6a19b22..4c886cd 100644 --- a/src/tsp_approx.rs +++ b/src/tsp_approx.rs @@ -1,72 +1,39 @@ use indicatif::{ProgressBar, ProgressStyle}; use multiset::HashMultiSet; -use priority_queue::PriorityQueue; -use std::{ - cmp::Ordering, - collections::{HashMap, HashSet}, -}; +use partitions::partition_vec::PartitionVec; +use std::collections::{HashMap, HashSet}; use crate::{MetricElem, TspAlg}; -// wrapper struct to -// - reverse the ordering -// - implement Ord, even though the type is backed by an f64 -#[repr(transparent)] -#[derive(Debug, PartialEq)] -struct DownOrd(f64); -impl Eq for DownOrd {} -impl PartialOrd for DownOrd { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} -impl Ord for DownOrd { - fn cmp(&self, other: &Self) -> Ordering { - self.0.partial_cmp(&other.0).unwrap().reverse() - } -} - fn get_mst(embeds: &[M]) -> HashMap> where M: MetricElem, { let num_embeds = embeds.len(); - let mut possible_edges = - PriorityQueue::with_capacity((num_embeds * num_embeds - num_embeds) / 2); + let mut possible_edges = Vec::with_capacity((num_embeds * num_embeds - num_embeds) / 2); let mut mst = HashMap::with_capacity(num_embeds); - // here, we start at 0. - // we might get a better result in the end if we started with a vertex next - // to the lowest-cost edge, but we don't know which one that is (though we - // could compute that without changing our asymptotic complexity) + // insert all edges we could ever use mst.insert(0, Vec::new()); - for i in 1..num_embeds { - possible_edges.push((0, i), DownOrd(embeds[0].dist(&embeds[i]))); + for a in 0..num_embeds { + for b in a + 1..num_embeds { + possible_edges.push((embeds[a].dist(&embeds[b]), a, b)); + } } - // prims algorithm or something like that - while mst.len() < num_embeds { - // find the edge with the least cost that connects us to a new vertex - let (new, old) = loop { - let ((a, b), _) = possible_edges.pop().unwrap(); - if !mst.contains_key(&a) { - break (a, b); - } else if !mst.contains_key(&b) { - break (b, a); - } - }; - mst.insert(new, vec![old]); - - // insert all the new edges we could take - mst.entry(old).and_modify(|v| v.push(new)); - for i in 0..num_embeds { - // don't consider edges taking us to nodes we already visited - if mst.contains_key(&i) { - continue; - } + let mut subtrees: PartitionVec = (0..num_embeds).collect(); + + possible_edges.sort_unstable_by(|(da, _, _), (db, _, _)| da.partial_cmp(db).unwrap()); + for (_, a, b) in possible_edges.into_iter() { + if !subtrees.same_set(a, b) { + mst.entry(a).or_default().push(b); + mst.entry(b).or_default().push(a); - possible_edges.push((new, i), DownOrd(embeds[new].dist(&embeds[i]))); + subtrees.union(a, b); + } + if mst.len() >= num_embeds { + break; } } @@ -110,7 +77,6 @@ where for &x in verts { for &y in verts { if x != y { - //possible_edges.push((x, y), DownOrd(embeds[x].dist(&embeds[y]))); possible_edges.push((embeds[x].dist(&embeds[y]), x, y)); } } @@ -165,7 +131,6 @@ fn euler_tour( match e.iter().next() { Some(&next) => { e.remove(&next); - // TODO das hier lässt vllt deduplizieren graph.get_mut(&next).unwrap().remove(&cur); r.entry(cur).or_default().push((pprev, prev, next)); -- cgit v1.2.3-70-g09d2