blob: 618e5dcb39640a33ccefd9ff2679a26c8361c21c [file] [log] [blame]
use std::{mem::size_of, ops::Range};
use crate::{
data,
index::{self, EntryIndex, PrefixLookupResult, FAN_LEN},
};
const N32_SIZE: usize = size_of::<u32>();
const N64_SIZE: usize = size_of::<u64>();
const V1_HEADER_SIZE: usize = FAN_LEN * N32_SIZE;
const V2_HEADER_SIZE: usize = N32_SIZE * 2 + FAN_LEN * N32_SIZE;
const N32_HIGH_BIT: u32 = 1 << 31;
/// Represents an entry within a pack index file, effectively mapping object [`IDs`][gix_hash::ObjectId] to pack data file locations.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Entry {
/// The ID of the object
pub oid: gix_hash::ObjectId,
/// The offset to the object's header in the pack data file
pub pack_offset: data::Offset,
/// The CRC32 hash over all bytes of the pack data entry.
///
/// This can be useful for direct copies of pack data entries from one pack to another with insurance there was no bit rot.
/// _Note_: Only available in index version 2 or newer
pub crc32: Option<u32>,
}
/// Iteration and access
impl index::File {
fn iter_v1(&self) -> impl Iterator<Item = Entry> + '_ {
match self.version {
index::Version::V1 => self.data[V1_HEADER_SIZE..]
.chunks(N32_SIZE + self.hash_len)
.take(self.num_objects as usize)
.map(|c| {
let (ofs, oid) = c.split_at(N32_SIZE);
Entry {
oid: gix_hash::ObjectId::from_bytes_or_panic(oid),
pack_offset: crate::read_u32(ofs) as u64,
crc32: None,
}
}),
_ => panic!("Cannot use iter_v1() on index of type {:?}", self.version),
}
}
fn iter_v2(&self) -> impl Iterator<Item = Entry> + '_ {
let pack64_offset = self.offset_pack_offset64_v2();
match self.version {
index::Version::V2 => izip!(
self.data[V2_HEADER_SIZE..].chunks(self.hash_len),
self.data[self.offset_crc32_v2()..].chunks(N32_SIZE),
self.data[self.offset_pack_offset_v2()..].chunks(N32_SIZE)
)
.take(self.num_objects as usize)
.map(move |(oid, crc32, ofs32)| Entry {
oid: gix_hash::ObjectId::from_bytes_or_panic(oid),
pack_offset: self.pack_offset_from_offset_v2(ofs32, pack64_offset),
crc32: Some(crate::read_u32(crc32)),
}),
_ => panic!("Cannot use iter_v2() on index of type {:?}", self.version),
}
}
/// Returns the object hash at the given index in our list of (sorted) sha1 hashes.
/// The index ranges from 0 to `self.num_objects()`
///
/// # Panics
///
/// If `index` is out of bounds.
pub fn oid_at_index(&self, index: EntryIndex) -> &gix_hash::oid {
let index = index as usize;
let start = match self.version {
index::Version::V2 => V2_HEADER_SIZE + index * self.hash_len,
index::Version::V1 => V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len) + N32_SIZE,
};
gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len])
}
/// Returns the offset into our pack data file at which to start reading the object at `index`.
///
/// # Panics
///
/// If `index` is out of bounds.
pub fn pack_offset_at_index(&self, index: EntryIndex) -> data::Offset {
let index = index as usize;
match self.version {
index::Version::V2 => {
let start = self.offset_pack_offset_v2() + index * N32_SIZE;
self.pack_offset_from_offset_v2(&self.data[start..][..N32_SIZE], self.offset_pack_offset64_v2())
}
index::Version::V1 => {
let start = V1_HEADER_SIZE + index * (N32_SIZE + self.hash_len);
crate::read_u32(&self.data[start..][..N32_SIZE]) as u64
}
}
}
/// Returns the CRC32 of the object at the given `index`.
///
/// _Note_: These are always present for index version 2 or higher.
/// # Panics
///
/// If `index` is out of bounds.
pub fn crc32_at_index(&self, index: EntryIndex) -> Option<u32> {
let index = index as usize;
match self.version {
index::Version::V2 => {
let start = self.offset_crc32_v2() + index * N32_SIZE;
Some(crate::read_u32(&self.data[start..start + N32_SIZE]))
}
index::Version::V1 => None,
}
}
/// Returns the `index` of the given hash for use with the [`oid_at_index()`][index::File::oid_at_index()],
/// [`pack_offset_at_index()`][index::File::pack_offset_at_index()] or [`crc32_at_index()`][index::File::crc32_at_index()].
// NOTE: pretty much the same things as in `multi_index::File::lookup`, change things there
// as well.
pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<EntryIndex> {
lookup(id.as_ref(), &self.fan, &|idx| self.oid_at_index(idx))
}
/// Given a `prefix`, find an object that matches it uniquely within this index and return `Some(Ok(entry_index))`.
/// If there is more than one object matching the object `Some(Err(())` is returned.
///
/// Finally, if no object matches the index, the return value is `None`.
///
/// Pass `candidates` to obtain the set of entry-indices matching `prefix`, with the same return value as
/// one would have received if it remained `None`. It will be empty if no object matched the `prefix`.
///
// NOTE: pretty much the same things as in `index::File::lookup`, change things there
// as well.
pub fn lookup_prefix(
&self,
prefix: gix_hash::Prefix,
candidates: Option<&mut Range<EntryIndex>>,
) -> Option<PrefixLookupResult> {
lookup_prefix(
prefix,
candidates,
&self.fan,
&|idx| self.oid_at_index(idx),
self.num_objects,
)
}
/// An iterator over all [`Entries`][Entry] of this index file.
pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = Entry> + 'a> {
match self.version {
index::Version::V2 => Box::new(self.iter_v2()),
index::Version::V1 => Box::new(self.iter_v1()),
}
}
/// Return a vector of ascending offsets into our respective pack data file.
///
/// Useful to control an iteration over all pack entries in a cache-friendly way.
pub fn sorted_offsets(&self) -> Vec<data::Offset> {
let mut ofs: Vec<_> = match self.version {
index::Version::V1 => self.iter().map(|e| e.pack_offset).collect(),
index::Version::V2 => {
let offset32_start = &self.data[self.offset_pack_offset_v2()..];
let pack_offset_64_start = self.offset_pack_offset64_v2();
offset32_start
.chunks(N32_SIZE)
.take(self.num_objects as usize)
.map(|offset| self.pack_offset_from_offset_v2(offset, pack_offset_64_start))
.collect()
}
};
ofs.sort_unstable();
ofs
}
#[inline]
fn offset_crc32_v2(&self) -> usize {
V2_HEADER_SIZE + self.num_objects as usize * self.hash_len
}
#[inline]
fn offset_pack_offset_v2(&self) -> usize {
self.offset_crc32_v2() + self.num_objects as usize * N32_SIZE
}
#[inline]
fn offset_pack_offset64_v2(&self) -> usize {
self.offset_pack_offset_v2() + self.num_objects as usize * N32_SIZE
}
#[inline]
fn pack_offset_from_offset_v2(&self, offset: &[u8], pack64_offset: usize) -> data::Offset {
debug_assert_eq!(self.version, index::Version::V2);
let ofs32 = crate::read_u32(offset);
if (ofs32 & N32_HIGH_BIT) == N32_HIGH_BIT {
let from = pack64_offset + (ofs32 ^ N32_HIGH_BIT) as usize * N64_SIZE;
crate::read_u64(&self.data[from..][..N64_SIZE])
} else {
ofs32 as u64
}
}
}
pub(crate) fn lookup_prefix<'a>(
prefix: gix_hash::Prefix,
candidates: Option<&mut Range<EntryIndex>>,
fan: &[u32; FAN_LEN],
oid_at_index: &dyn Fn(EntryIndex) -> &'a gix_hash::oid,
num_objects: u32,
) -> Option<PrefixLookupResult> {
let first_byte = prefix.as_oid().first_byte() as usize;
let mut upper_bound = fan[first_byte];
let mut lower_bound = if first_byte != 0 { fan[first_byte - 1] } else { 0 };
// Bisect using indices
while lower_bound < upper_bound {
let mid = (lower_bound + upper_bound) / 2;
let mid_sha = oid_at_index(mid);
use std::cmp::Ordering::*;
match prefix.cmp_oid(mid_sha) {
Less => upper_bound = mid,
Equal => match candidates {
Some(candidates) => {
let first_past_entry = ((0..mid).rev())
.take_while(|prev| prefix.cmp_oid(oid_at_index(*prev)) == Equal)
.last();
let last_future_entry = ((mid + 1)..num_objects)
.take_while(|next| prefix.cmp_oid(oid_at_index(*next)) == Equal)
.last();
*candidates = match (first_past_entry, last_future_entry) {
(Some(first), Some(last)) => first..last + 1,
(Some(first), None) => first..mid + 1,
(None, Some(last)) => mid..last + 1,
(None, None) => mid..mid + 1,
};
return if candidates.len() > 1 {
Some(Err(()))
} else {
Some(Ok(mid))
};
}
None => {
let next = mid + 1;
if next < num_objects && prefix.cmp_oid(oid_at_index(next)) == Equal {
return Some(Err(()));
}
if mid != 0 && prefix.cmp_oid(oid_at_index(mid - 1)) == Equal {
return Some(Err(()));
}
return Some(Ok(mid));
}
},
Greater => lower_bound = mid + 1,
}
}
if let Some(candidates) = candidates {
*candidates = 0..0;
}
None
}
pub(crate) fn lookup<'a>(
id: &gix_hash::oid,
fan: &[u32; FAN_LEN],
oid_at_index: &dyn Fn(EntryIndex) -> &'a gix_hash::oid,
) -> Option<EntryIndex> {
let first_byte = id.first_byte() as usize;
let mut upper_bound = fan[first_byte];
let mut lower_bound = if first_byte != 0 { fan[first_byte - 1] } else { 0 };
while lower_bound < upper_bound {
let mid = (lower_bound + upper_bound) / 2;
let mid_sha = oid_at_index(mid);
use std::cmp::Ordering::*;
match id.cmp(mid_sha) {
Less => upper_bound = mid,
Equal => return Some(mid),
Greater => lower_bound = mid + 1,
}
}
None
}