blob: 8852b3804896dc52f7ae1222b4f36845ceacd1b7 [file] [log] [blame]
//! Defining custom `Reader`s quickly.
use alloc::borrow::Cow;
use alloc::rc::Rc;
use alloc::string::String;
use alloc::sync::Arc;
use core::fmt::Debug;
use core::ops::{Deref, Index, Range, RangeFrom, RangeTo};
use core::slice;
use core::str;
use stable_deref_trait::CloneStableDeref;
use crate::endianity::Endianity;
use crate::read::{Error, Reader, ReaderOffsetId, Result};
/// A reference counted, non-thread-safe slice of bytes and associated
/// endianity.
///
/// ```
/// # #[cfg(feature = "std")] {
/// use std::rc::Rc;
///
/// let buf = Rc::from(&[1, 2, 3, 4][..]);
/// let reader = gimli::EndianRcSlice::new(buf, gimli::NativeEndian);
/// # let _ = reader;
/// # }
/// ```
pub type EndianRcSlice<Endian> = EndianReader<Endian, Rc<[u8]>>;
/// An atomically reference counted, thread-safe slice of bytes and associated
/// endianity.
///
/// ```
/// # #[cfg(feature = "std")] {
/// use std::sync::Arc;
///
/// let buf = Arc::from(&[1, 2, 3, 4][..]);
/// let reader = gimli::EndianArcSlice::new(buf, gimli::NativeEndian);
/// # let _ = reader;
/// # }
/// ```
pub type EndianArcSlice<Endian> = EndianReader<Endian, Arc<[u8]>>;
/// An easy way to define a custom `Reader` implementation with a reference to a
/// generic buffer of bytes and an associated endianity.
///
/// Note that the whole original buffer is kept alive in memory even if there is
/// only one reader that references only a handful of bytes from that original
/// buffer. That is, `EndianReader` will not do any copying, moving, or
/// compacting in order to free up unused regions of the original buffer. If you
/// require this kind of behavior, it is up to you to implement `Reader`
/// directly by-hand.
///
/// # Example
///
/// Say you have an `mmap`ed file that you want to serve as a `gimli::Reader`.
/// You can wrap that `mmap`ed file up in a `MmapFile` type and use
/// `EndianReader<Rc<MmapFile>>` or `EndianReader<Arc<MmapFile>>` as readers as
/// long as `MmapFile` dereferences to the underlying `[u8]` data.
///
/// ```
/// use std::io;
/// use std::ops::Deref;
/// use std::path::Path;
/// use std::slice;
/// use std::sync::Arc;
///
/// /// A type that represents an `mmap`ed file.
/// #[derive(Debug)]
/// pub struct MmapFile {
/// ptr: *const u8,
/// len: usize,
/// }
///
/// impl MmapFile {
/// pub fn new(path: &Path) -> io::Result<MmapFile> {
/// // Call `mmap` and check for errors and all that...
/// # unimplemented!()
/// }
/// }
///
/// impl Drop for MmapFile {
/// fn drop(&mut self) {
/// // Call `munmap` to clean up after ourselves...
/// # unimplemented!()
/// }
/// }
///
/// // And `MmapFile` can deref to a slice of the `mmap`ed region of memory.
/// impl Deref for MmapFile {
/// type Target = [u8];
/// fn deref(&self) -> &[u8] {
/// unsafe {
/// slice::from_raw_parts(self.ptr, self.len)
/// }
/// }
/// }
///
/// /// A type that represents a shared `mmap`ed file.
/// #[derive(Debug, Clone)]
/// pub struct ArcMmapFile(Arc<MmapFile>);
///
/// // And `ArcMmapFile` can deref to a slice of the `mmap`ed region of memory.
/// impl Deref for ArcMmapFile {
/// type Target = [u8];
/// fn deref(&self) -> &[u8] {
/// &self.0
/// }
/// }
///
/// // These are both valid for any `Rc` or `Arc`.
/// unsafe impl gimli::StableDeref for ArcMmapFile {}
/// unsafe impl gimli::CloneStableDeref for ArcMmapFile {}
///
/// /// A `gimli::Reader` that is backed by an `mmap`ed file!
/// pub type MmapFileReader<Endian> = gimli::EndianReader<Endian, ArcMmapFile>;
/// # fn test(_: &MmapFileReader<gimli::NativeEndian>) { }
/// ```
#[derive(Debug, Clone, Copy, Hash)]
pub struct EndianReader<Endian, T>
where
Endian: Endianity,
T: CloneStableDeref<Target = [u8]> + Debug,
{
range: SubRange<T>,
endian: Endian,
}
impl<Endian, T1, T2> PartialEq<EndianReader<Endian, T2>> for EndianReader<Endian, T1>
where
Endian: Endianity,
T1: CloneStableDeref<Target = [u8]> + Debug,
T2: CloneStableDeref<Target = [u8]> + Debug,
{
fn eq(&self, rhs: &EndianReader<Endian, T2>) -> bool {
self.bytes() == rhs.bytes()
}
}
impl<Endian, T> Eq for EndianReader<Endian, T>
where
Endian: Endianity,
T: CloneStableDeref<Target = [u8]> + Debug,
{
}
// This is separated out from `EndianReader` so that we can avoid running afoul
// of borrowck. We need to `read_slice(&mut self, ...) -> &[u8]` and then call
// `self.endian.read_whatever` on the result. The problem is that the returned
// slice keeps the `&mut self` borrow active, so we wouldn't be able to access
// `self.endian`. Splitting the sub-range out from the endian lets us work
// around this, making it so that only the `self.range` borrow is held active,
// not all of `self`.
//
// This also serves to encapsulate the unsafe code concerning `CloneStableDeref`.
// The `bytes` member is held so that the bytes live long enough, and the
// `CloneStableDeref` ensures these bytes never move. The `ptr` and `len`
// members point inside `bytes`, and are updated during read operations.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct SubRange<T>
where
T: CloneStableDeref<Target = [u8]> + Debug,
{
bytes: T,
ptr: *const u8,
len: usize,
}
unsafe impl<T> Send for SubRange<T> where T: CloneStableDeref<Target = [u8]> + Debug + Send {}
unsafe impl<T> Sync for SubRange<T> where T: CloneStableDeref<Target = [u8]> + Debug + Sync {}
impl<T> SubRange<T>
where
T: CloneStableDeref<Target = [u8]> + Debug,
{
#[inline]
fn new(bytes: T) -> Self {
let ptr = bytes.as_ptr();
let len = bytes.len();
SubRange { bytes, ptr, len }
}
#[inline]
fn bytes(&self) -> &[u8] {
// Safe because `T` implements `CloneStableDeref`, `bytes` can't be modified,
// and all operations that modify `ptr` and `len` ensure they stay in range.
unsafe { slice::from_raw_parts(self.ptr, self.len) }
}
#[inline]
fn len(&self) -> usize {
self.len
}
#[inline]
fn truncate(&mut self, len: usize) {
assert!(len <= self.len);
self.len = len;
}
#[inline]
fn skip(&mut self, len: usize) {
assert!(len <= self.len);
self.ptr = unsafe { self.ptr.add(len) };
self.len -= len;
}
#[inline]
fn read_slice(&mut self, len: usize) -> Option<&[u8]> {
if self.len() < len {
None
} else {
// Same as for `bytes()`.
let bytes = unsafe { slice::from_raw_parts(self.ptr, len) };
self.skip(len);
Some(bytes)
}
}
}
impl<Endian, T> EndianReader<Endian, T>
where
Endian: Endianity,
T: CloneStableDeref<Target = [u8]> + Debug,
{
/// Construct a new `EndianReader` with the given bytes.
#[inline]
pub fn new(bytes: T, endian: Endian) -> EndianReader<Endian, T> {
EndianReader {
range: SubRange::new(bytes),
endian,
}
}
/// Return a reference to the raw bytes underlying this reader.
#[inline]
pub fn bytes(&self) -> &[u8] {
self.range.bytes()
}
}
/// # Range Methods
///
/// Unfortunately, `std::ops::Index` *must* return a reference, so we can't
/// implement `Index<Range<usize>>` to return a new `EndianReader` the way we
/// would like to. Instead, we abandon fancy indexing operators and have these
/// plain old methods.
impl<Endian, T> EndianReader<Endian, T>
where
Endian: Endianity,
T: CloneStableDeref<Target = [u8]> + Debug,
{
/// Take the given `start..end` range of the underlying buffer and return a
/// new `EndianReader`.
///
/// ```
/// # #[cfg(feature = "std")] {
/// use gimli::{EndianReader, LittleEndian};
/// use std::sync::Arc;
///
/// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
/// let reader = EndianReader::new(buf.clone(), LittleEndian);
/// assert_eq!(reader.range(1..3),
/// EndianReader::new(&buf[1..3], LittleEndian));
/// # }
/// ```
///
/// # Panics
///
/// Panics if the range is out of bounds.
pub fn range(&self, idx: Range<usize>) -> EndianReader<Endian, T> {
let mut r = self.clone();
r.range.skip(idx.start);
r.range.truncate(idx.len());
r
}
/// Take the given `start..` range of the underlying buffer and return a new
/// `EndianReader`.
///
/// ```
/// # #[cfg(feature = "std")] {
/// use gimli::{EndianReader, LittleEndian};
/// use std::sync::Arc;
///
/// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
/// let reader = EndianReader::new(buf.clone(), LittleEndian);
/// assert_eq!(reader.range_from(2..),
/// EndianReader::new(&buf[2..], LittleEndian));
/// # }
/// ```
///
/// # Panics
///
/// Panics if the range is out of bounds.
pub fn range_from(&self, idx: RangeFrom<usize>) -> EndianReader<Endian, T> {
let mut r = self.clone();
r.range.skip(idx.start);
r
}
/// Take the given `..end` range of the underlying buffer and return a new
/// `EndianReader`.
///
/// ```
/// # #[cfg(feature = "std")] {
/// use gimli::{EndianReader, LittleEndian};
/// use std::sync::Arc;
///
/// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
/// let reader = EndianReader::new(buf.clone(), LittleEndian);
/// assert_eq!(reader.range_to(..3),
/// EndianReader::new(&buf[..3], LittleEndian));
/// # }
/// ```
///
/// # Panics
///
/// Panics if the range is out of bounds.
pub fn range_to(&self, idx: RangeTo<usize>) -> EndianReader<Endian, T> {
let mut r = self.clone();
r.range.truncate(idx.end);
r
}
}
impl<Endian, T> Index<usize> for EndianReader<Endian, T>
where
Endian: Endianity,
T: CloneStableDeref<Target = [u8]> + Debug,
{
type Output = u8;
fn index(&self, idx: usize) -> &Self::Output {
&self.bytes()[idx]
}
}
impl<Endian, T> Index<RangeFrom<usize>> for EndianReader<Endian, T>
where
Endian: Endianity,
T: CloneStableDeref<Target = [u8]> + Debug,
{
type Output = [u8];
fn index(&self, idx: RangeFrom<usize>) -> &Self::Output {
&self.bytes()[idx]
}
}
impl<Endian, T> Deref for EndianReader<Endian, T>
where
Endian: Endianity,
T: CloneStableDeref<Target = [u8]> + Debug,
{
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.bytes()
}
}
impl<Endian, T> Reader for EndianReader<Endian, T>
where
Endian: Endianity,
T: CloneStableDeref<Target = [u8]> + Debug,
{
type Endian = Endian;
type Offset = usize;
#[inline]
fn endian(&self) -> Endian {
self.endian
}
#[inline]
fn len(&self) -> usize {
self.range.len()
}
#[inline]
fn empty(&mut self) {
self.range.truncate(0);
}
#[inline]
fn truncate(&mut self, len: usize) -> Result<()> {
if self.len() < len {
Err(Error::UnexpectedEof(self.offset_id()))
} else {
self.range.truncate(len);
Ok(())
}
}
#[inline]
fn offset_from(&self, base: &EndianReader<Endian, T>) -> usize {
let base_ptr = base.bytes().as_ptr() as *const u8 as usize;
let ptr = self.bytes().as_ptr() as *const u8 as usize;
debug_assert!(base_ptr <= ptr);
debug_assert!(ptr + self.bytes().len() <= base_ptr + base.bytes().len());
ptr - base_ptr
}
#[inline]
fn offset_id(&self) -> ReaderOffsetId {
ReaderOffsetId(self.bytes().as_ptr() as u64)
}
#[inline]
fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option<Self::Offset> {
let id = id.0;
let self_id = self.bytes().as_ptr() as u64;
let self_len = self.bytes().len() as u64;
if id >= self_id && id <= self_id + self_len {
Some((id - self_id) as usize)
} else {
None
}
}
#[inline]
fn find(&self, byte: u8) -> Result<usize> {
self.bytes()
.iter()
.position(|x| *x == byte)
.ok_or_else(|| Error::UnexpectedEof(self.offset_id()))
}
#[inline]
fn skip(&mut self, len: usize) -> Result<()> {
if self.len() < len {
Err(Error::UnexpectedEof(self.offset_id()))
} else {
self.range.skip(len);
Ok(())
}
}
#[inline]
fn split(&mut self, len: usize) -> Result<Self> {
if self.len() < len {
Err(Error::UnexpectedEof(self.offset_id()))
} else {
let mut r = self.clone();
r.range.truncate(len);
self.range.skip(len);
Ok(r)
}
}
#[inline]
fn to_slice(&self) -> Result<Cow<[u8]>> {
Ok(self.bytes().into())
}
#[inline]
fn to_string(&self) -> Result<Cow<str>> {
match str::from_utf8(self.bytes()) {
Ok(s) => Ok(s.into()),
_ => Err(Error::BadUtf8),
}
}
#[inline]
fn to_string_lossy(&self) -> Result<Cow<str>> {
Ok(String::from_utf8_lossy(self.bytes()))
}
#[inline]
fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> {
match self.range.read_slice(buf.len()) {
Some(slice) => {
buf.copy_from_slice(slice);
Ok(())
}
None => Err(Error::UnexpectedEof(self.offset_id())),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::endianity::NativeEndian;
use crate::read::Reader;
fn native_reader<T: CloneStableDeref<Target = [u8]> + Debug>(
bytes: T,
) -> EndianReader<NativeEndian, T> {
EndianReader::new(bytes, NativeEndian)
}
const BUF: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
#[test]
fn test_reader_split() {
let mut reader = native_reader(BUF);
let left = reader.split(3).unwrap();
assert_eq!(left, native_reader(&BUF[..3]));
assert_eq!(reader, native_reader(&BUF[3..]));
}
#[test]
fn test_reader_split_out_of_bounds() {
let mut reader = native_reader(BUF);
assert!(reader.split(30).is_err());
}
#[test]
fn bytes_and_len_and_range_and_eq() {
let reader = native_reader(BUF);
assert_eq!(reader.len(), BUF.len());
assert_eq!(reader.bytes(), BUF);
assert_eq!(reader, native_reader(BUF));
let range = reader.range(2..8);
let buf_range = &BUF[2..8];
assert_eq!(range.len(), buf_range.len());
assert_eq!(range.bytes(), buf_range);
assert_ne!(range, native_reader(BUF));
assert_eq!(range, native_reader(buf_range));
let range_from = range.range_from(1..);
let buf_range_from = &buf_range[1..];
assert_eq!(range_from.len(), buf_range_from.len());
assert_eq!(range_from.bytes(), buf_range_from);
assert_ne!(range_from, native_reader(BUF));
assert_eq!(range_from, native_reader(buf_range_from));
let range_to = range_from.range_to(..4);
let buf_range_to = &buf_range_from[..4];
assert_eq!(range_to.len(), buf_range_to.len());
assert_eq!(range_to.bytes(), buf_range_to);
assert_ne!(range_to, native_reader(BUF));
assert_eq!(range_to, native_reader(buf_range_to));
}
#[test]
fn find() {
let mut reader = native_reader(BUF);
reader.skip(2).unwrap();
assert_eq!(
reader.find(5),
Ok(BUF[2..].iter().position(|x| *x == 5).unwrap())
);
}
#[test]
fn indexing() {
let mut reader = native_reader(BUF);
reader.skip(2).unwrap();
assert_eq!(reader[0], BUF[2]);
}
#[test]
#[should_panic]
fn indexing_out_of_bounds() {
let mut reader = native_reader(BUF);
reader.skip(2).unwrap();
let _ = reader[900];
}
#[test]
fn endian() {
let reader = native_reader(BUF);
assert_eq!(reader.endian(), NativeEndian);
}
#[test]
fn empty() {
let mut reader = native_reader(BUF);
assert!(!reader.is_empty());
reader.empty();
assert!(reader.is_empty());
assert!(reader.bytes().is_empty());
}
#[test]
fn truncate() {
let reader = native_reader(BUF);
let mut reader = reader.range(2..8);
reader.truncate(2).unwrap();
assert_eq!(reader.bytes(), &BUF[2..4]);
}
#[test]
fn offset_from() {
let reader = native_reader(BUF);
let sub = reader.range(2..8);
assert_eq!(sub.offset_from(&reader), 2);
}
#[test]
fn skip() {
let mut reader = native_reader(BUF);
reader.skip(2).unwrap();
assert_eq!(reader.bytes(), &BUF[2..]);
}
#[test]
fn to_slice() {
assert_eq!(
native_reader(BUF).range(2..5).to_slice(),
Ok(Cow::from(&BUF[2..5]))
);
}
#[test]
fn to_string_ok() {
let buf = b"hello, world!";
let reader = native_reader(&buf[..]);
let reader = reader.range_from(7..);
assert_eq!(reader.to_string(), Ok(Cow::from("world!")));
}
// The rocket emoji (🚀 = [0xf0, 0x9f, 0x9a, 0x80]) but rotated left by one
// to make it invalid UTF-8.
const BAD_UTF8: &[u8] = &[0x9f, 0x9a, 0x80, 0xf0];
#[test]
fn to_string_err() {
let reader = native_reader(BAD_UTF8);
assert!(reader.to_string().is_err());
}
#[test]
fn to_string_lossy() {
let reader = native_reader(BAD_UTF8);
assert_eq!(reader.to_string_lossy(), Ok(Cow::from("����")));
}
#[test]
fn read_u8_array() {
let mut reader = native_reader(BAD_UTF8);
reader.skip(1).unwrap();
let arr: [u8; 2] = reader.read_u8_array().unwrap();
assert_eq!(arr, &BAD_UTF8[1..3]);
assert_eq!(reader.bytes(), &BAD_UTF8[3..]);
}
}