aboutsummaryrefslogtreecommitdiff
path: root/src/unityfs
diff options
context:
space:
mode:
authormetamuffin <metamuffin@disroot.org>2025-03-15 15:18:40 +0100
committermetamuffin <metamuffin@disroot.org>2025-03-15 15:18:40 +0100
commitd836e24357b81496c61f3cc9195ba36758523578 (patch)
tree0028aee5a453cc761dd39e92430a35c55147537f /src/unityfs
parent07fc3656274117c211ca0d6a54926d390a4d9b68 (diff)
downloadunity-tools-d836e24357b81496c61f3cc9195ba36758523578.tar
unity-tools-d836e24357b81496c61f3cc9195ba36758523578.tar.bz2
unity-tools-d836e24357b81496c61f3cc9195ba36758523578.tar.zst
more abstraction around unityfs to read multiple files from a single reader
Diffstat (limited to 'src/unityfs')
-rw-r--r--src/unityfs/block_reader.rs99
-rw-r--r--src/unityfs/header.rs175
-rw-r--r--src/unityfs/mod.rs95
-rw-r--r--src/unityfs/multi_reader.rs50
4 files changed, 419 insertions, 0 deletions
diff --git a/src/unityfs/block_reader.rs b/src/unityfs/block_reader.rs
new file mode 100644
index 0000000..8aa18c3
--- /dev/null
+++ b/src/unityfs/block_reader.rs
@@ -0,0 +1,99 @@
+use super::BlockInfo;
+use log::{debug, trace};
+use std::{
+ io::{ErrorKind, Read, Seek, SeekFrom},
+ sync::Arc,
+};
+
+pub struct BlockReader<T> {
+ blocks: Arc<Vec<BlockInfo>>,
+ inner: T,
+ inner_seek_offset: u64,
+ nblock_index: usize,
+ cblock_data: Vec<u8>,
+ cblock_off: usize,
+}
+
+impl<T: Read> BlockReader<T> {
+ pub fn new(blocks: Arc<Vec<BlockInfo>>, inner: T, inner_seek_offset: u64) -> Self {
+ Self {
+ blocks,
+ inner,
+ inner_seek_offset,
+ nblock_index: 0,
+ cblock_data: Vec::new(),
+ cblock_off: 0,
+ }
+ }
+ pub fn load_next_block(&mut self) -> std::io::Result<()> {
+ trace!("loading block {}", self.nblock_index);
+ let block = &self.blocks[self.nblock_index];
+ let mut comp_buf = vec![0; block.comp_size as usize];
+ self.inner.read_exact(&mut comp_buf)?;
+ let decomp_buf = block
+ .comp_scheme
+ .decompress(comp_buf, block.decomp_size as usize)
+ .map_err(|e| {
+ std::io::Error::new(
+ ErrorKind::InvalidData,
+ format!("decompression failure: {e}"),
+ )
+ })?;
+ self.nblock_index += 1;
+ self.cblock_data = decomp_buf;
+ self.cblock_off = 0;
+ Ok(())
+ }
+}
+
+impl<T: Read> Read for BlockReader<T> {
+ fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+ if self.cblock_off >= self.cblock_data.len() {
+ self.load_next_block()?;
+ }
+ let size = (self.cblock_data.len() - self.cblock_off).min(buf.len());
+ buf[..size].copy_from_slice(&self.cblock_data[self.cblock_off..self.cblock_off + size]);
+ self.cblock_off += size;
+ Ok(size)
+ }
+}
+impl<T: Seek + Read> Seek for BlockReader<T> {
+ fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
+ let SeekFrom::Start(pos) = pos else {
+ unimplemented!()
+ };
+ debug!("seek decomp to {pos}");
+ let mut comp_off = self.inner_seek_offset;
+ let mut decomp_off = 0;
+ let mut target_block = None;
+ for (i, b) in self.blocks.iter().enumerate() {
+ if pos <= decomp_off + b.decomp_size as u64 {
+ target_block = Some(i);
+ break;
+ }
+ decomp_off += b.decomp_size as u64;
+ comp_off += b.comp_size as u64;
+ }
+
+ let Some(i) = target_block else {
+ return Err(std::io::Error::new(
+ ErrorKind::UnexpectedEof,
+ "seek out of bounds",
+ ));
+ };
+
+ let block_off = pos - decomp_off;
+ debug!("target is block={i} offset={block_off}");
+ if self.nblock_index == i + 1 {
+ debug!("intra-block seek")
+ } else {
+ debug!("seek comp to {comp_off}");
+ self.inner.seek(SeekFrom::Start(comp_off))?;
+ self.nblock_index = i;
+ self.load_next_block()?;
+ }
+ self.cblock_off = block_off as usize;
+
+ Ok(pos)
+ }
+}
diff --git a/src/unityfs/header.rs b/src/unityfs/header.rs
new file mode 100644
index 0000000..d4fc89f
--- /dev/null
+++ b/src/unityfs/header.rs
@@ -0,0 +1,175 @@
+use crate::helper::{AlignExt, ReadExt};
+use anyhow::{Result, anyhow, bail};
+use humansize::DECIMAL;
+use log::{debug, info};
+use std::io::{Cursor, Read, Seek, SeekFrom};
+
+#[derive(Debug, Clone)]
+pub struct NodeInfo {
+ pub name: String,
+ pub size: u64,
+ pub(super) offset: u64,
+ _status: u32,
+}
+
+pub struct BlockInfo {
+ pub comp_size: u32,
+ pub decomp_size: u32,
+ pub comp_scheme: CompressionScheme,
+}
+
+pub struct UnityFSHeader {
+ pub(crate) nodes: Vec<NodeInfo>,
+ pub file_version: u32,
+ pub player_version: String,
+ pub unity_version: String,
+}
+
+impl UnityFSHeader {
+ pub fn read(mut file: impl Read + Seek) -> Result<(Self, Vec<BlockInfo>)> {
+ let signature = file.read_cstr()?;
+ if signature.as_str() != "UnityFS" {
+ bail!("unknown signature {signature:?}")
+ }
+
+ let file_version = file.read_u32_be()?;
+ let player_version = file.read_cstr()?;
+ let unity_version = file.read_cstr()?;
+ let size = file.read_u64_be()?;
+ let blockindex_comp_size = file.read_u32_be()?;
+ let blockindex_decomp_size = file.read_u32_be()?;
+ let flags = file.read_u32_be()?;
+
+ let meta_comp_scheme = CompressionScheme::from_flag_num(flags as u8).ok_or(anyhow!(
+ "unknown block compression 0x{:02x}",
+ (flags & 0x3f) as u8
+ ))?;
+ let blockindex_eof = flags & 0x80 != 0;
+ let blockindex_has_directory = flags & 0x40 != 0;
+ let blockindex_need_padding = flags & 0x200 != 0;
+
+ info!("File Version: {file_version:?}");
+ info!("Player Version: {player_version:?}");
+ info!("Unity Version: {unity_version:?}");
+ debug!("size={size:?}");
+ debug!("meta_comp_size={blockindex_comp_size:?}");
+ debug!("meta_decomp_size={blockindex_decomp_size:?}");
+ debug!("flags={flags:?}");
+ debug!("meta_comp_scheme={meta_comp_scheme:?}");
+ debug!("blockindex_eof={blockindex_eof:?}");
+ debug!("blockindex_has_directory={blockindex_has_directory:?}");
+ debug!("blockindex_need_padding={blockindex_need_padding:?}");
+
+ let mut blockindex = {
+ let restore_position = if blockindex_eof {
+ let pos = file.stream_position()?;
+ file.seek(SeekFrom::End(-(blockindex_comp_size as i64)))?;
+ Some(pos)
+ } else {
+ None
+ };
+
+ let mut blockindex = vec![0u8; blockindex_comp_size as usize];
+ file.read_exact(&mut blockindex)?;
+
+ if let Some(pos) = restore_position {
+ file.seek(SeekFrom::Start(pos))?;
+ }
+ let blockindex =
+ meta_comp_scheme.decompress(blockindex, blockindex_decomp_size as usize)?;
+ Cursor::new(blockindex)
+ };
+
+ file.align(16)?;
+
+ blockindex.read_u128_be()?;
+
+ let num_blocks = blockindex.read_u32_be()?;
+ info!("File has {num_blocks} blocks");
+ let mut blocks = Vec::new();
+ for _ in 0..num_blocks {
+ let decomp_size = blockindex.read_u32_be()?;
+ let comp_size = blockindex.read_u32_be()?;
+ let flags = blockindex.read_u16_be()?;
+ let comp_scheme = CompressionScheme::from_flag_num(flags as u8)
+ .ok_or(anyhow!("unknown block compression 0x{:02x}", flags & 0x3f))?;
+ blocks.push(BlockInfo {
+ comp_size,
+ decomp_size,
+ comp_scheme,
+ })
+ }
+
+ let num_nodes = blockindex.read_u32_be()?;
+ debug!("num_nodes={num_nodes:?}");
+ let mut nodes = Vec::new();
+ for _ in 0..num_nodes {
+ let offset = blockindex.read_u64_be()?;
+ let size = blockindex.read_u64_be()?;
+ let status = blockindex.read_u32_be()?;
+ let name = blockindex.read_cstr()?;
+ info!(
+ "found node {name:?} (size={}, status={status})",
+ humansize::format_size(size, DECIMAL)
+ );
+ nodes.push(NodeInfo {
+ offset,
+ size,
+ _status: status,
+ name,
+ })
+ }
+
+ Ok((
+ Self {
+ file_version,
+ player_version,
+ unity_version,
+ nodes,
+ },
+ blocks,
+ ))
+ }
+ pub fn nodes(&self) -> &[NodeInfo] {
+ &self.nodes
+ }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum CompressionScheme {
+ None,
+ Lzma,
+ Lz4,
+ Lz4hc,
+ Lzham,
+}
+impl CompressionScheme {
+ pub fn from_flag_num(n: u8) -> Option<CompressionScheme> {
+ Some(match n & 0x3f {
+ 0 => CompressionScheme::None,
+ 1 => CompressionScheme::Lzma,
+ 2 => CompressionScheme::Lz4,
+ 3 => CompressionScheme::Lz4hc,
+ 4 => CompressionScheme::Lzham,
+ _ => return None,
+ })
+ }
+ pub fn decompress(&self, block: Vec<u8>, decomp_size: usize) -> Result<Vec<u8>> {
+ match self {
+ CompressionScheme::None => Ok(block),
+ CompressionScheme::Lzma => {
+ let mut r = lzma::Reader::from(Cursor::new(block))?;
+ let mut buf = Vec::new();
+ r.read_to_end(&mut buf)?;
+ Ok(buf)
+ }
+ CompressionScheme::Lz4hc | CompressionScheme::Lz4 => {
+ Ok(lz4_flex::block::decompress(&block, decomp_size)?)
+ }
+ // CompressionScheme::LZ4HC | CompressionScheme::LZ4 => {
+ // Ok(lz4::block::decompress(&block, Some(decomp_size as i32))?)
+ // }
+ CompressionScheme::Lzham => todo!(),
+ }
+ }
+}
diff --git a/src/unityfs/mod.rs b/src/unityfs/mod.rs
new file mode 100644
index 0000000..bc7e3ec
--- /dev/null
+++ b/src/unityfs/mod.rs
@@ -0,0 +1,95 @@
+pub mod block_reader;
+pub mod header;
+pub mod multi_reader;
+
+use anyhow::Result;
+use block_reader::BlockReader;
+use header::{BlockInfo, NodeInfo, UnityFSHeader};
+use log::debug;
+use multi_reader::MultiReader;
+use std::{
+ io::{Error, ErrorKind, Read, Seek, SeekFrom},
+ sync::Arc,
+};
+
+pub struct UnityFS<T> {
+ reader: MultiReader<T>,
+ blocks: Arc<Vec<BlockInfo>>,
+ inner_seek_offset: u64,
+ pub header: UnityFSHeader,
+}
+
+pub struct NodeReader<T> {
+ inner: T,
+ position: u64,
+ offset: u64,
+ size: u64,
+}
+
+impl<T: Read + Seek> UnityFS<T> {
+ pub fn open(mut file: T) -> Result<Self> {
+ let (header, blocks) = UnityFSHeader::read(&mut file)?;
+ let inner_seek_offset = file.stream_position()?;
+
+ Ok(Self {
+ blocks: Arc::new(blocks),
+ header,
+ inner_seek_offset,
+ reader: MultiReader::new(file)?,
+ })
+ }
+
+ pub fn find_main_file(&self) -> Option<&NodeInfo> {
+ self.header.nodes().iter().find(|n| {
+ !n.name.ends_with(".resource")
+ && !n.name.ends_with(".resS")
+ && !n.name.ends_with(".sharedAssets")
+ })
+ }
+
+ pub fn read<'a>(&'a self, node: &NodeInfo) -> Result<NodeReader<BlockReader<MultiReader<T>>>> {
+ let mut inner = self.reader.clone();
+ inner.seek(SeekFrom::Start(self.inner_seek_offset))?;
+ let br = BlockReader::new(self.blocks.clone(), inner, self.inner_seek_offset);
+ Ok(NodeReader {
+ size: node.size,
+ offset: node.offset,
+ position: 0,
+ inner: br,
+ })
+ }
+}
+
+impl<T: Read> Read for NodeReader<T> {
+ fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+ let bytes_left = self.size - self.position;
+ let end = buf.len().min(bytes_left as usize);
+ let size = self.inner.read(&mut buf[..end])?;
+ self.position += size as u64;
+ Ok(size)
+ }
+}
+impl<T: Seek + Read> Seek for NodeReader<T> {
+ fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
+ match pos {
+ SeekFrom::Current(n) if n >= 0 => {
+ for _ in 0..n {
+ self.read_exact(&mut [0u8])?;
+ }
+ Ok(self.stream_position()?)
+ }
+ SeekFrom::Start(n) => {
+ debug!("seek node to {n} (off={})", self.offset);
+ if n > self.size {
+ return Err(Error::new(ErrorKind::NotSeekable, "seek out of bounds"));
+ }
+ self.position = n;
+ self.inner.seek(SeekFrom::Start(self.offset + n))
+ }
+ _ => unimplemented!(),
+ }
+ }
+ fn stream_position(&mut self) -> std::io::Result<u64> {
+ Ok(self.position)
+ }
+}
diff --git a/src/unityfs/multi_reader.rs b/src/unityfs/multi_reader.rs
new file mode 100644
index 0000000..3de6cd5
--- /dev/null
+++ b/src/unityfs/multi_reader.rs
@@ -0,0 +1,50 @@
+use std::{
+ io::{Read, Seek, SeekFrom},
+ sync::{Arc, Mutex},
+};
+
+use anyhow::Result;
+
+pub struct MultiReader<T> {
+ position: u64,
+ inner: Arc<Mutex<(u64, T)>>,
+}
+impl<T: Seek> MultiReader<T> {
+ pub fn new(mut inner: T) -> Result<Self> {
+ let position = inner.stream_position()?;
+ Ok(Self {
+ position,
+ inner: Arc::new(Mutex::new((position, inner))),
+ })
+ }
+}
+impl<T> Clone for MultiReader<T> {
+ fn clone(&self) -> Self {
+ Self {
+ position: self.position,
+ inner: self.inner.clone(),
+ }
+ }
+}
+impl<T: Read + Seek> Read for MultiReader<T> {
+ fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+ let mut g = self.inner.lock().unwrap();
+ if g.0 != self.position {
+ g.1.seek(SeekFrom::Start(self.position))?;
+ }
+ let size = g.1.read(buf)?;
+ g.0 += size as u64;
+ self.position += size as u64;
+ Ok(size)
+ }
+}
+impl<T: Seek> Seek for MultiReader<T> {
+ fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
+ self.position = match pos {
+ SeekFrom::Start(x) => x,
+ SeekFrom::Current(x) => self.position.saturating_add_signed(x),
+ SeekFrom::End(_) => unimplemented!(),
+ };
+ Ok(self.position)
+ }
+}