vendor/gix-index/src/decode/mod.rs - toolchain/rustc - Git at Google

 use filetime::FileTime;

 use crate::{entry, extension, Entry, State, Version};

 mod entries;
 ///
 pub mod header;

 mod error {

     use crate::{decode, extension};

     /// The error returned by [`State::from_bytes()`][crate::State::from_bytes()].
     #[derive(Debug, thiserror::Error)]
     #[allow(missing_docs)]
     pub enum Error {
         #[error(transparent)]
         Header(#[from] decode::header::Error),
         #[error("Could not parse entry at index {index}")]
         Entry { index: u32 },
         #[error("Mandatory extension wasn't implemented or malformed.")]
         Extension(#[from] extension::decode::Error),
         #[error("Index trailer should have been {expected} bytes long, but was {actual}")]
         UnexpectedTrailerLength { expected: usize, actual: usize },
         #[error("Shared index checksum was {actual_checksum} but should have been {expected_checksum}")]
         ChecksumMismatch {
             actual_checksum: gix_hash::ObjectId,
             expected_checksum: gix_hash::ObjectId,
         },
     }
 }
 pub use error::Error;
 use gix_features::parallel::InOrderIter;

 use crate::util::read_u32;

 /// Options to define how to decode an index state [from bytes][State::from_bytes()].
 #[derive(Default, Clone, Copy)]
 pub struct Options {
     /// If Some(_), we are allowed to use more than one thread. If Some(N), use no more than N threads. If Some(0)|None, use as many threads
     /// as there are logical cores.
     ///
     /// This applies to loading extensions in parallel to entries if the common EOIE extension is available.
     /// It also allows to use multiple threads for loading entries if the IEOT extension is present.
     pub thread_limit: Option<usize>,
     /// The minimum size in bytes to load extensions in their own thread, assuming there is enough `num_threads` available.
     /// If set to 0, for example, extensions will always be read in their own thread if enough threads are available.
     pub min_extension_block_in_bytes_for_threading: usize,
     /// Set the expected hash of this index if we are read as part of a `link` extension.
     ///
     /// We will abort reading this file if it doesn't match.
     pub expected_checksum: Option<gix_hash::ObjectId>,
 }

 impl State {
     /// Decode an index state from `data` and store `timestamp` in the resulting instance for pass-through, assuming `object_hash`
     /// to be used through the file. Also return the stored hash over all bytes in `data` or `None` if none was written due to `index.skipHash`.
     pub fn from_bytes(
         data: &[u8],
         timestamp: FileTime,
         object_hash: gix_hash::Kind,
         Options {
             thread_limit,
             min_extension_block_in_bytes_for_threading,
             expected_checksum,
         }: Options,
     ) -> Result<(Self, Option<gix_hash::ObjectId>), Error> {
         let _span = gix_features::trace::detail!("gix_index::State::from_bytes()");
         let (version, num_entries, post_header_data) = header::decode(data, object_hash)?;
         let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash);

         let mut num_threads = gix_features::parallel::num_threads(thread_limit);
         let path_backing_buffer_size = entries::estimate_path_storage_requirements_in_bytes(
             num_entries,
             data.len(),
             start_of_extensions,
             object_hash,
             version,
         );

         let (entries, ext, data) = match start_of_extensions {
             Some(offset) if num_threads > 1 => {
                 let extensions_data = &data[offset..];
                 let index_offsets_table = extension::index_entry_offset_table::find(extensions_data, object_hash);
                 let (entries_res, ext_res) = gix_features::parallel::threads(|scope| {
                     let extension_loading =
                         (extensions_data.len() > min_extension_block_in_bytes_for_threading).then({
                             num_threads -= 1;
                             || {
                                 gix_features::parallel::build_thread()
                                     .name("gix-index.from_bytes.load-extensions".into())
                                     .spawn_scoped(scope, || extension::decode::all(extensions_data, object_hash))
                                     .expect("valid name")
                             }
                         });
                     let entries_res = match index_offsets_table {
                         Some(entry_offsets) => {
                             let chunk_size = (entry_offsets.len() as f32 / num_threads as f32).ceil() as usize;
                             let num_chunks = entry_offsets.chunks(chunk_size).count();
                             let mut threads = Vec::with_capacity(num_chunks);
                             for (id, chunks) in entry_offsets.chunks(chunk_size).enumerate() {
                                 let chunks = chunks.to_vec();
                                 threads.push(
                                     gix_features::parallel::build_thread()
                                         .name(format!("gix-index.from_bytes.read-entries.{id}"))
                                         .spawn_scoped(scope, move || {
                                             let num_entries_for_chunks =
                                                 chunks.iter().map(|c| c.num_entries).sum::<u32>() as usize;
                                             let mut entries = Vec::with_capacity(num_entries_for_chunks);
                                             let path_backing_buffer_size_for_chunks =
                                                 entries::estimate_path_storage_requirements_in_bytes(
                                                     num_entries_for_chunks as u32,
                                                     data.len() / num_chunks,
                                                     start_of_extensions.map(|ofs| ofs / num_chunks),
                                                     object_hash,
                                                     version,
                                                 );
                                             let mut path_backing =
                                                 Vec::with_capacity(path_backing_buffer_size_for_chunks);
                                             let mut is_sparse = false;
                                             for offset in chunks {
                                                 let (
                                                     entries::Outcome {
                                                         is_sparse: chunk_is_sparse,
                                                     },
                                                     _data,
                                                 ) = entries::chunk(
                                                     &data[offset.from_beginning_of_file as usize..],
                                                     &mut entries,
                                                     &mut path_backing,
                                                     offset.num_entries,
                                                     object_hash,
                                                     version,
                                                 )?;
                                                 is_sparse |= chunk_is_sparse;
                                             }
                                             Ok::<_, Error>((
                                                 id,
                                                 EntriesOutcome {
                                                     entries,
                                                     path_backing,
                                                     is_sparse,
                                                 },
                                             ))
                                         })
                                         .expect("valid name"),
                                 );
                             }
                             let mut results =
                                 InOrderIter::from(threads.into_iter().map(|thread| thread.join().unwrap()));
                             let mut acc = results.next().expect("have at least two results, one per thread");
                             // We explicitly don't adjust the reserve in acc and rather allow for more copying
                             // to happens as vectors grow to keep the peak memory size low.
                             // NOTE: one day, we might use a memory pool for paths. We could encode the block of memory
                             //       in some bytes in the path offset. That way there is more indirection/slower access
                             //       to the path, but it would save time here.
                             //       As it stands, `git` is definitely more efficient at this and probably uses less memory too.
                             //       Maybe benchmarks can tell if that is noticeable later at 200/400GB/s memory bandwidth, or maybe just
                             //       100GB/s on a single core.
                             while let (Ok(lhs), Some(res)) = (acc.as_mut(), results.next()) {
                                 match res {
                                     Ok(rhs) => {
                                         lhs.is_sparse |= rhs.is_sparse;
                                         let ofs = lhs.path_backing.len();
                                         lhs.path_backing.extend(rhs.path_backing);
                                         lhs.entries.extend(rhs.entries.into_iter().map(|mut e| {
                                             e.path.start += ofs;
                                             e.path.end += ofs;
                                             e
                                         }));
                                     }
                                     Err(err) => {
                                         acc = Err(err);
                                     }
                                 }
                             }
                             acc.map(|acc| (acc, &data[data.len() - object_hash.len_in_bytes()..]))
                         }
                         None => entries(
                             post_header_data,
                             path_backing_buffer_size,
                             num_entries,
                             object_hash,
                             version,
                         ),
                     };
                     let ext_res = extension_loading.map_or_else(
                         || extension::decode::all(extensions_data, object_hash),
                         |thread| thread.join().unwrap(),
                     );
                     (entries_res, ext_res)
                 });
                 let (ext, data) = ext_res?;
                 (entries_res?.0, ext, data)
             }
             None | Some(_) => {
                 let (entries, data) = entries(
                     post_header_data,
                     path_backing_buffer_size,
                     num_entries,
                     object_hash,
                     version,
                 )?;
                 let (ext, data) = extension::decode::all(data, object_hash)?;
                 (entries, ext, data)
             }
         };

         if data.len() != object_hash.len_in_bytes() {
             return Err(Error::UnexpectedTrailerLength {
                 expected: object_hash.len_in_bytes(),
                 actual: data.len(),
             });
         }

         let checksum = gix_hash::ObjectId::from_bytes_or_panic(data);
         let checksum = (!checksum.is_null()).then_some(checksum);
         if let Some((expected_checksum, actual_checksum)) = expected_checksum.zip(checksum) {
             if actual_checksum != expected_checksum {
                 return Err(Error::ChecksumMismatch {
                     actual_checksum,
                     expected_checksum,
                 });
             }
         }
         let EntriesOutcome {
             entries,
             path_backing,
             mut is_sparse,
         } = entries;
         let extension::decode::Outcome {
             tree,
             link,
             resolve_undo,
             untracked,
             fs_monitor,
             is_sparse: is_sparse_from_ext, // a marker is needed in case there are no directories
         } = ext;
         is_sparse |= is_sparse_from_ext;

         Ok((
             State {
                 object_hash,
                 timestamp,
                 version,
                 entries,
                 path_backing,
                 is_sparse,

                 tree,
                 link,
                 resolve_undo,
                 untracked,
                 fs_monitor,
             },
             checksum,
         ))
     }
 }

 struct EntriesOutcome {
     pub entries: Vec<Entry>,
     pub path_backing: Vec<u8>,
     pub is_sparse: bool,
 }

 fn entries(
     post_header_data: &[u8],
     path_backing_buffer_size: usize,
     num_entries: u32,
     object_hash: gix_hash::Kind,
     version: Version,
 ) -> Result<(EntriesOutcome, &[u8]), Error> {
     let mut entries = Vec::with_capacity(num_entries as usize);
     let mut path_backing = Vec::with_capacity(path_backing_buffer_size);
     entries::chunk(
         post_header_data,
         &mut entries,
         &mut path_backing,
         num_entries,
         object_hash,
         version,
     )
     .map(|(entries::Outcome { is_sparse }, data): (entries::Outcome, &[u8])| {
         (
             EntriesOutcome {
                 entries,
                 path_backing,
                 is_sparse,
             },
             data,
         )
     })
 }

 pub(crate) fn stat(data: &[u8]) -> Option<(entry::Stat, &[u8])> {
     let (ctime_secs, data) = read_u32(data)?;
     let (ctime_nsecs, data) = read_u32(data)?;
     let (mtime_secs, data) = read_u32(data)?;
     let (mtime_nsecs, data) = read_u32(data)?;
     let (dev, data) = read_u32(data)?;
     let (ino, data) = read_u32(data)?;
     let (uid, data) = read_u32(data)?;
     let (gid, data) = read_u32(data)?;
     let (size, data) = read_u32(data)?;
     Some((
         entry::Stat {
             mtime: entry::stat::Time {
                 secs: ctime_secs,
                 nsecs: ctime_nsecs,
             },
             ctime: entry::stat::Time {
                 secs: mtime_secs,
                 nsecs: mtime_nsecs,
             },
             dev,
             ino,
             uid,
             gid,
             size,
         },
         data,
     ))
 }
	use filetime::FileTime;

	use crate::{entry, extension, Entry, State, Version};

	mod entries;
	///
	pub mod header;

	mod error {

	use crate::{decode, extension};

	/// The error returned by [`State::from_bytes()`][crate::State::from_bytes()].
	#[derive(Debug, thiserror::Error)]
	#[allow(missing_docs)]
	pub enum Error {
	#[error(transparent)]
	Header(#[from] decode::header::Error),
	#[error("Could not parse entry at index {index}")]
	Entry { index: u32 },
	#[error("Mandatory extension wasn't implemented or malformed.")]
	Extension(#[from] extension::decode::Error),
	#[error("Index trailer should have been {expected} bytes long, but was {actual}")]
	UnexpectedTrailerLength { expected: usize, actual: usize },
	#[error("Shared index checksum was {actual_checksum} but should have been {expected_checksum}")]
	ChecksumMismatch {
	actual_checksum: gix_hash::ObjectId,
	expected_checksum: gix_hash::ObjectId,
	},
	}
	}
	pub use error::Error;
	use gix_features::parallel::InOrderIter;

	use crate::util::read_u32;

	/// Options to define how to decode an index state [from bytes][State::from_bytes()].
	#[derive(Default, Clone, Copy)]
	pub struct Options {
	/// If Some(_), we are allowed to use more than one thread. If Some(N), use no more than N threads. If Some(0)\|None, use as many threads
	/// as there are logical cores.
	///
	/// This applies to loading extensions in parallel to entries if the common EOIE extension is available.
	/// It also allows to use multiple threads for loading entries if the IEOT extension is present.
	pub thread_limit: Option<usize>,
	/// The minimum size in bytes to load extensions in their own thread, assuming there is enough `num_threads` available.
	/// If set to 0, for example, extensions will always be read in their own thread if enough threads are available.
	pub min_extension_block_in_bytes_for_threading: usize,
	/// Set the expected hash of this index if we are read as part of a `link` extension.
	///
	/// We will abort reading this file if it doesn't match.
	pub expected_checksum: Option<gix_hash::ObjectId>,
	}

	impl State {
	/// Decode an index state from `data` and store `timestamp` in the resulting instance for pass-through, assuming `object_hash`
	/// to be used through the file. Also return the stored hash over all bytes in `data` or `None` if none was written due to `index.skipHash`.
	pub fn from_bytes(
	data: &[u8],
	timestamp: FileTime,
	object_hash: gix_hash::Kind,
	Options {
	thread_limit,
	min_extension_block_in_bytes_for_threading,
	expected_checksum,
	}: Options,
	) -> Result<(Self, Option<gix_hash::ObjectId>), Error> {
	let _span = gix_features::trace::detail!("gix_index::State::from_bytes()");
	let (version, num_entries, post_header_data) = header::decode(data, object_hash)?;
	let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash);

	let mut num_threads = gix_features::parallel::num_threads(thread_limit);
	let path_backing_buffer_size = entries::estimate_path_storage_requirements_in_bytes(
	num_entries,
	data.len(),
	start_of_extensions,
	object_hash,
	version,
	);

	let (entries, ext, data) = match start_of_extensions {
	Some(offset) if num_threads > 1 => {
	let extensions_data = &data[offset..];
	let index_offsets_table = extension::index_entry_offset_table::find(extensions_data, object_hash);
	let (entries_res, ext_res) = gix_features::parallel::threads(\|scope\| {
	let extension_loading =
	(extensions_data.len() > min_extension_block_in_bytes_for_threading).then({
	num_threads -= 1;
	\|\| {
	gix_features::parallel::build_thread()
	.name("gix-index.from_bytes.load-extensions".into())
	.spawn_scoped(scope, \|\| extension::decode::all(extensions_data, object_hash))
	.expect("valid name")
	}
	});
	let entries_res = match index_offsets_table {
	Some(entry_offsets) => {
	let chunk_size = (entry_offsets.len() as f32 / num_threads as f32).ceil() as usize;
	let num_chunks = entry_offsets.chunks(chunk_size).count();
	let mut threads = Vec::with_capacity(num_chunks);
	for (id, chunks) in entry_offsets.chunks(chunk_size).enumerate() {
	let chunks = chunks.to_vec();
	threads.push(
	gix_features::parallel::build_thread()
	.name(format!("gix-index.from_bytes.read-entries.{id}"))
	.spawn_scoped(scope, move \|\| {
	let num_entries_for_chunks =
	chunks.iter().map(\|c\| c.num_entries).sum::<u32>() as usize;
	let mut entries = Vec::with_capacity(num_entries_for_chunks);
	let path_backing_buffer_size_for_chunks =
	entries::estimate_path_storage_requirements_in_bytes(
	num_entries_for_chunks as u32,
	data.len() / num_chunks,
	start_of_extensions.map(\|ofs\| ofs / num_chunks),
	object_hash,
	version,
	);
	let mut path_backing =
	Vec::with_capacity(path_backing_buffer_size_for_chunks);
	let mut is_sparse = false;
	for offset in chunks {
	let (
	entries::Outcome {
	is_sparse: chunk_is_sparse,
	},
	_data,
	) = entries::chunk(
	&data[offset.from_beginning_of_file as usize..],
	&mut entries,
	&mut path_backing,
	offset.num_entries,
	object_hash,
	version,
	)?;
	is_sparse \|= chunk_is_sparse;
	}
	Ok::<_, Error>((
	id,
	EntriesOutcome {
	entries,
	path_backing,
	is_sparse,
	},
	))
	})
	.expect("valid name"),
	);
	}
	let mut results =
	InOrderIter::from(threads.into_iter().map(\|thread\| thread.join().unwrap()));
	let mut acc = results.next().expect("have at least two results, one per thread");
	// We explicitly don't adjust the reserve in acc and rather allow for more copying
	// to happens as vectors grow to keep the peak memory size low.
	// NOTE: one day, we might use a memory pool for paths. We could encode the block of memory
	// in some bytes in the path offset. That way there is more indirection/slower access
	// to the path, but it would save time here.
	// As it stands, `git` is definitely more efficient at this and probably uses less memory too.
	// Maybe benchmarks can tell if that is noticeable later at 200/400GB/s memory bandwidth, or maybe just
	// 100GB/s on a single core.
	while let (Ok(lhs), Some(res)) = (acc.as_mut(), results.next()) {
	match res {
	Ok(rhs) => {
	lhs.is_sparse \|= rhs.is_sparse;
	let ofs = lhs.path_backing.len();
	lhs.path_backing.extend(rhs.path_backing);
	lhs.entries.extend(rhs.entries.into_iter().map(\|mut e\| {
	e.path.start += ofs;
	e.path.end += ofs;
	e
	}));
	}
	Err(err) => {
	acc = Err(err);
	}
	}
	}
	acc.map(\|acc\| (acc, &data[data.len() - object_hash.len_in_bytes()..]))
	}
	None => entries(
	post_header_data,
	path_backing_buffer_size,
	num_entries,
	object_hash,
	version,
	),
	};
	let ext_res = extension_loading.map_or_else(
	\|\| extension::decode::all(extensions_data, object_hash),
	\|thread\| thread.join().unwrap(),
	);
	(entries_res, ext_res)
	});
	let (ext, data) = ext_res?;
	(entries_res?.0, ext, data)
	}
	None \| Some(_) => {
	let (entries, data) = entries(
	post_header_data,
	path_backing_buffer_size,
	num_entries,
	object_hash,
	version,
	)?;
	let (ext, data) = extension::decode::all(data, object_hash)?;
	(entries, ext, data)
	}
	};

	if data.len() != object_hash.len_in_bytes() {
	return Err(Error::UnexpectedTrailerLength {
	expected: object_hash.len_in_bytes(),
	actual: data.len(),
	});
	}

	let checksum = gix_hash::ObjectId::from_bytes_or_panic(data);
	let checksum = (!checksum.is_null()).then_some(checksum);
	if let Some((expected_checksum, actual_checksum)) = expected_checksum.zip(checksum) {
	if actual_checksum != expected_checksum {
	return Err(Error::ChecksumMismatch {
	actual_checksum,
	expected_checksum,
	});
	}
	}
	let EntriesOutcome {
	entries,
	path_backing,
	mut is_sparse,
	} = entries;
	let extension::decode::Outcome {
	tree,
	link,
	resolve_undo,
	untracked,
	fs_monitor,
	is_sparse: is_sparse_from_ext, // a marker is needed in case there are no directories
	} = ext;
	is_sparse \|= is_sparse_from_ext;

	Ok((
	State {
	object_hash,
	timestamp,
	version,
	entries,
	path_backing,
	is_sparse,

	tree,
	link,
	resolve_undo,
	untracked,
	fs_monitor,
	},
	checksum,
	))
	}
	}

	struct EntriesOutcome {
	pub entries: Vec<Entry>,
	pub path_backing: Vec<u8>,
	pub is_sparse: bool,
	}

	fn entries(
	post_header_data: &[u8],
	path_backing_buffer_size: usize,
	num_entries: u32,
	object_hash: gix_hash::Kind,
	version: Version,
	) -> Result<(EntriesOutcome, &[u8]), Error> {
	let mut entries = Vec::with_capacity(num_entries as usize);
	let mut path_backing = Vec::with_capacity(path_backing_buffer_size);
	entries::chunk(
	post_header_data,
	&mut entries,
	&mut path_backing,
	num_entries,
	object_hash,
	version,
	)
	.map(\|(entries::Outcome { is_sparse }, data): (entries::Outcome, &[u8])\| {
	(
	EntriesOutcome {
	entries,
	path_backing,
	is_sparse,
	},
	data,
	)
	})
	}

	pub(crate) fn stat(data: &[u8]) -> Option<(entry::Stat, &[u8])> {
	let (ctime_secs, data) = read_u32(data)?;
	let (ctime_nsecs, data) = read_u32(data)?;
	let (mtime_secs, data) = read_u32(data)?;
	let (mtime_nsecs, data) = read_u32(data)?;
	let (dev, data) = read_u32(data)?;
	let (ino, data) = read_u32(data)?;
	let (uid, data) = read_u32(data)?;
	let (gid, data) = read_u32(data)?;
	let (size, data) = read_u32(data)?;
	Some((
	entry::Stat {
	mtime: entry::stat::Time {
	secs: ctime_secs,
	nsecs: ctime_nsecs,
	},
	ctime: entry::stat::Time {
	secs: mtime_secs,
	nsecs: mtime_nsecs,
	},
	dev,
	ino,
	uid,
	gid,
	size,
	},
	data,
	))
	}