aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLia Lenckowski <lialenck@protonmail.com>2024-11-28 16:16:23 +0100
committerLia Lenckowski <lialenck@protonmail.com>2024-11-28 16:16:23 +0100
commite71c5d901beec2b15052785893f7250f958f7719 (patch)
tree832661f0b122f036adc8d85ebaa05ea062527b2a
parent552bc452de76fd6760c9946c641a2de8217ff1d2 (diff)
downloadembeddings-sort-e71c5d901beec2b15052785893f7250f958f7719.tar
embeddings-sort-e71c5d901beec2b15052785893f7250f958f7719.tar.bz2
embeddings-sort-e71c5d901beec2b15052785893f7250f958f7719.tar.zst
wrap help, add flag for path rotation, change default 2-opt iteration count
-rw-r--r--Cargo.lock13
-rw-r--r--Cargo.toml4
-rw-r--r--README.md9
-rw-r--r--src/main.rs15
-rw-r--r--src/tsp_approx.rs9
5 files changed, 39 insertions, 11 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 8b439c2..0812379 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -290,6 +290,7 @@ dependencies = [
"anstyle",
"clap_lex",
"strsim",
+ "terminal_size",
]
[[package]]
@@ -526,7 +527,7 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "embeddings-sort"
-version = "0.3.1"
+version = "0.4.0"
dependencies = [
"ahash",
"anyhow",
@@ -1997,6 +1998,16 @@ dependencies = [
]
[[package]]
+name = "terminal_size"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef"
+dependencies = [
+ "rustix",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index de2391e..bfc9bd1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,12 +1,12 @@
[package]
name = "embeddings-sort"
-version = "0.3.1"
+version = "0.4.0"
edition = "2021"
[dependencies]
ahash = "0"
anyhow = "1"
-clap = { version = "4", features = ["derive"] }
+clap = { version = "4", features = ["derive", "wrap_help"] }
dirs = "5"
fastembed = "4"
image = "0"
diff --git a/README.md b/README.md
index e6189f8..160df50 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# embeddings-sort
-This program can sort images such that ones with similar motives are close together. This is accomplished by using [AI](https://github.com/minimaxir/imgbeddings) to extract the meaning of the image, and then approximating a travelling-salesperson-tour through all of them.
+This program can sort images such that ones with similar content are close together. This is accomplished by using AI to extract the meaning of the image, and then approximating a travelling-salesperson-tour through all of them.
As a bonus feature, this program can also sort the images by hue, brightness or color, though the results for this could be improved by using a less generalized algorithm.
The sorting can be accessed by letting the progam print the image paths in order, or by copying/symlinking the images into a new directory.
@@ -11,7 +11,7 @@ Detailed usage:
Usage: embeddings-sort [OPTIONS] [IMAGES]...
Arguments:
- [IMAGES]...
+ [IMAGES]...
Options:
-e, --embedder <EMBEDDER> Characteristic to sort by [default: content-angular-distance] [possible values: brightness, hue, color, content-euclidean, content-angular-distance, content-manhatten]
@@ -21,7 +21,8 @@ Options:
-0, --stdout0 Write sorted paths into stdout, null-separated. Overrides -c
-b, --benchmark Output total tour length to stderr
--tsp-approx <TSP_APPROX> Algorithm for TSP approximation. Leave as default if unsure [default: christofides] [possible values: mst-dfs, christofides]
- -r <REFINE> Number of 2-Opt refinement steps. Has quickly diminishing returns [default: 3]
+ -r <REFINE> Number of 2-Opt refinement steps. Has quickly diminishing returns [default: 5]
+ --no-rotate Don't try to improve result by rotating the output path such that less emphasis is put on the similarity of the first and last image
-i, --ignore-errors Ignore failed embeddings
--hash-seed <HASH_SEED> Seed for hashing. Random by default
-h, --help Print help
@@ -30,7 +31,7 @@ Options:
## Insides
The christofides implementation uses an approximated min-weight matching algorithm, which may be non-ideal, though I haven't benchmarked how much of a difference it makes (mainly due to the implementation complexity of an exact algorithm, which would also increase the implementations complexity from O(n²) to O(n³) where n is the number of given images).
-The 2-Opt refinement algorithm uses a doubly linked list with implicit iteration order to be able to do 2-Opt swaps in O(1), bringing a 2-Opt iteration to O(n²) complexity.
+The 2-Opt refinement algorithm uses a doubly linked list with implicit iteration order to be able to do 2-Opt swaps in O(1), bringing a 2-Opt iteration to O(n²) complexity. Unless `--no-rotate` is given, the calculated path is rotated in-between 2-Opt iterations such that the longest edge is between the first and the last image - as this edge is of no importance for the output of the algorithm, this improves output quality, though it does not improve the output according to the TSP problem formulation.
## Compatibility
`embeddings-sort` has, at some point, worked on both linux and windows.
diff --git a/src/main.rs b/src/main.rs
index 45621cf..3055768 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -67,9 +67,14 @@ struct Args {
tsp_approx: TspBaseAlg,
/// Number of 2-Opt refinement steps. Has quickly diminishing returns
- #[arg(short = 'r', default_value = "3")]
+ #[arg(short = 'r', default_value = "5")]
refine: usize,
+ /// Don't try to improve result by rotating the output path such that less emphasis is put on
+ /// the similarity of the first and last image
+ #[arg(long)]
+ no_rotate: bool,
+
/// Ignore failed embeddings
#[arg(short = 'i', long)]
ignore_errors: bool,
@@ -174,7 +179,13 @@ where
})
.unzip();
- let (tsp_path, total_dist) = tsp(&embeds, &args.tsp_approx, args.refine, &args.hash_seed);
+ let (tsp_path, total_dist) = tsp(
+ &embeds,
+ &args.tsp_approx,
+ args.refine,
+ !args.no_rotate,
+ &args.hash_seed,
+ );
Ok((
tsp_path.iter().map(|i| images[*i].clone()).collect(),
diff --git a/src/tsp_approx.rs b/src/tsp_approx.rs
index a23a726..3ce2bd5 100644
--- a/src/tsp_approx.rs
+++ b/src/tsp_approx.rs
@@ -413,6 +413,7 @@ pub(crate) fn tsp<M>(
embeds: &[M],
alg: &TspBaseAlg,
refinements: usize,
+ rotate: bool,
hash_seed: &Option<u64>,
) -> (Vec<usize>, f64)
where
@@ -441,7 +442,9 @@ where
};
for _ in 0..refinements {
- rotate_towards_optimum(&dc, &mut tour);
+ if rotate {
+ rotate_towards_optimum(&dc, &mut tour);
+ }
let res = refine_2_opt(&dc, tour);
tour = res.1;
if !res.0 {
@@ -449,7 +452,9 @@ where
}
}
- rotate_towards_optimum(&dc, &mut tour);
+ if rotate {
+ rotate_towards_optimum(&dc, &mut tour);
+ }
let mut total_dist = 0.;
for i in 0..tour.len() - 1 {