blob: 0ee4259a8fc6dcb84890fc58246ff41973895116 [file] [log] [blame]
use std::sync::atomic::AtomicBool;
use gix_features::progress::{DynNestedProgress, Progress};
use gix_object::WriteTo;
use crate::index;
///
pub mod integrity {
use std::marker::PhantomData;
use gix_object::bstr::BString;
/// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum Error {
#[error("Reserialization of an object failed")]
Io(#[from] std::io::Error),
#[error("The fan at index {index} is out of order as it's larger then the following value.")]
Fan { index: usize },
#[error("{kind} object {id} could not be decoded")]
ObjectDecode {
source: gix_object::decode::Error,
kind: gix_object::Kind,
id: gix_hash::ObjectId,
},
#[error("{kind} object {id} wasn't re-encoded without change, wanted\n{expected}\n\nGOT\n\n{actual}")]
ObjectEncodeMismatch {
kind: gix_object::Kind,
id: gix_hash::ObjectId,
expected: BString,
actual: BString,
},
}
/// Returned by [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
pub struct Outcome {
/// The computed checksum of the index which matched the stored one.
pub actual_index_checksum: gix_hash::ObjectId,
/// The packs traversal outcome, if one was provided
pub pack_traverse_statistics: Option<crate::index::traverse::Statistics>,
}
/// Additional options to define how the integrity should be verified.
#[derive(Clone)]
pub struct Options<F> {
/// The thoroughness of the verification
pub verify_mode: crate::index::verify::Mode,
/// The way to traverse packs
pub traversal: crate::index::traverse::Algorithm,
/// The amount of threads to use of `Some(N)`, with `None|Some(0)` using all available cores are used.
pub thread_limit: Option<usize>,
/// A function to create a pack cache
pub make_pack_lookup_cache: F,
}
impl Default for Options<fn() -> crate::cache::Never> {
fn default() -> Self {
Options {
verify_mode: Default::default(),
traversal: Default::default(),
thread_limit: None,
make_pack_lookup_cache: || crate::cache::Never,
}
}
}
/// The progress ids used in [`index::File::verify_integrity()`][crate::index::File::verify_integrity()].
///
/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
#[derive(Debug, Copy, Clone)]
pub enum ProgressId {
/// The amount of bytes read to verify the index checksum.
ChecksumBytes,
/// A root progress for traversal which isn't actually used directly, but here to link to the respective `ProgressId` types.
Traverse(PhantomData<crate::index::verify::index::traverse::ProgressId>),
}
impl From<ProgressId> for gix_features::progress::Id {
fn from(v: ProgressId) -> Self {
match v {
ProgressId::ChecksumBytes => *b"PTHI",
ProgressId::Traverse(_) => gix_features::progress::UNKNOWN,
}
}
}
}
///
pub mod checksum {
/// Returned by [`index::File::verify_checksum()`][crate::index::File::verify_checksum()].
pub type Error = crate::verify::checksum::Error;
}
/// Various ways in which a pack and index can be verified
#[derive(Default, Debug, Eq, PartialEq, Hash, Clone, Copy)]
pub enum Mode {
/// Validate the object hash and CRC32
HashCrc32,
/// Validate hash and CRC32, and decode each non-Blob object.
/// Each object should be valid, i.e. be decodable.
HashCrc32Decode,
/// Validate hash and CRC32, and decode and encode each non-Blob object.
/// Each object should yield exactly the same hash when re-encoded.
#[default]
HashCrc32DecodeEncode,
}
/// Information to allow verifying the integrity of an index with the help of its corresponding pack.
pub struct PackContext<'a, F> {
/// The pack data file itself.
pub data: &'a crate::data::File,
/// The options further configuring the pack traversal and verification
pub options: integrity::Options<F>,
}
/// Verify and validate the content of the index file
impl index::File {
/// Returns the trailing hash stored at the end of this index file.
///
/// It's a hash over all bytes of the index.
pub fn index_checksum(&self) -> gix_hash::ObjectId {
gix_hash::ObjectId::from_bytes_or_panic(&self.data[self.data.len() - self.hash_len..])
}
/// Returns the hash of the pack data file that this index file corresponds to.
///
/// It should [`crate::data::File::checksum()`] of the corresponding pack data file.
pub fn pack_checksum(&self) -> gix_hash::ObjectId {
let from = self.data.len() - self.hash_len * 2;
gix_hash::ObjectId::from_bytes_or_panic(&self.data[from..][..self.hash_len])
}
/// Validate that our [`index_checksum()`][index::File::index_checksum()] matches the actual contents
/// of this index file, and return it if it does.
pub fn verify_checksum(
&self,
progress: &mut dyn Progress,
should_interrupt: &AtomicBool,
) -> Result<gix_hash::ObjectId, checksum::Error> {
crate::verify::checksum_on_disk_or_mmap(
self.path(),
&self.data,
self.index_checksum(),
self.object_hash,
progress,
should_interrupt,
)
}
/// The most thorough validation of integrity of both index file and the corresponding pack data file, if provided.
/// Returns the checksum of the index file, the traversal outcome and the given progress if the integrity check is successful.
///
/// If `pack` is provided, it is expected (and validated to be) the pack belonging to this index.
/// It will be used to validate internal integrity of the pack before checking each objects integrity
/// is indeed as advertised via its SHA1 as stored in this index, as well as the CRC32 hash.
/// The last member of the Option is a function returning an implementation of [`crate::cache::DecodeEntry`] to be used if
/// the [`index::traverse::Algorithm`] is `Lookup`.
/// To set this to `None`, use `None::<(_, _, _, fn() -> crate::cache::Never)>`.
///
/// The `thread_limit` optionally specifies the amount of threads to be used for the [pack traversal][index::File::traverse()].
/// `make_cache` is only used in case a `pack` is specified, use existing implementations in the [`crate::cache`] module.
///
/// # Tradeoffs
///
/// The given `progress` is inevitably consumed if there is an error, which is a tradeoff chosen to easily allow using `?` in the
/// error case.
pub fn verify_integrity<C, F>(
&self,
pack: Option<PackContext<'_, F>>,
progress: &mut dyn DynNestedProgress,
should_interrupt: &AtomicBool,
) -> Result<integrity::Outcome, index::traverse::Error<index::verify::integrity::Error>>
where
C: crate::cache::DecodeEntry,
F: Fn() -> C + Send + Clone,
{
if let Some(first_invalid) = crate::verify::fan(&self.fan) {
return Err(index::traverse::Error::Processor(integrity::Error::Fan {
index: first_invalid,
}));
}
match pack {
Some(PackContext {
data: pack,
options:
integrity::Options {
verify_mode,
traversal,
thread_limit,
make_pack_lookup_cache,
},
}) => self
.traverse(
pack,
progress,
should_interrupt,
{
let mut encode_buf = Vec::with_capacity(2048);
move |kind, data, index_entry, progress| {
Self::verify_entry(verify_mode, &mut encode_buf, kind, data, index_entry, progress)
}
},
index::traverse::Options {
traversal,
thread_limit,
check: index::traverse::SafetyCheck::All,
make_pack_lookup_cache,
},
)
.map(|o| integrity::Outcome {
actual_index_checksum: o.actual_index_checksum,
pack_traverse_statistics: Some(o.statistics),
}),
None => self
.verify_checksum(
&mut progress
.add_child_with_id("Sha1 of index".into(), integrity::ProgressId::ChecksumBytes.into()),
should_interrupt,
)
.map_err(Into::into)
.map(|id| integrity::Outcome {
actual_index_checksum: id,
pack_traverse_statistics: None,
}),
}
}
#[allow(clippy::too_many_arguments)]
fn verify_entry(
verify_mode: Mode,
encode_buf: &mut Vec<u8>,
object_kind: gix_object::Kind,
buf: &[u8],
index_entry: &index::Entry,
_progress: &dyn gix_features::progress::Progress,
) -> Result<(), integrity::Error> {
if let Mode::HashCrc32Decode | Mode::HashCrc32DecodeEncode = verify_mode {
use gix_object::Kind::*;
match object_kind {
Tree | Commit | Tag => {
let object = gix_object::ObjectRef::from_bytes(object_kind, buf).map_err(|err| {
integrity::Error::ObjectDecode {
source: err,
kind: object_kind,
id: index_entry.oid,
}
})?;
if let Mode::HashCrc32DecodeEncode = verify_mode {
encode_buf.clear();
object.write_to(&mut *encode_buf)?;
if encode_buf.as_slice() != buf {
return Err(integrity::Error::ObjectEncodeMismatch {
kind: object_kind,
id: index_entry.oid,
expected: buf.into(),
actual: encode_buf.clone().into(),
});
}
}
}
Blob => {}
};
}
Ok(())
}
}