blob: 85f6a9b1861d2897a5fe6ce03340b96510a7d22a [file] [log] [blame]
use std::{io::Read, path::Path};
use bstr::BStr;
use crate::{driver, eol, ident, pipeline::util::Configuration, worktree, Pipeline};
///
pub mod configuration {
use bstr::BString;
/// Errors related to the configuration of filter attributes.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("The encoding named '{name}' isn't available")]
UnknownEncoding { name: BString },
#[error("Encodings must be names, like UTF-16, and cannot be booleans.")]
InvalidEncoding,
}
}
///
pub mod to_git {
/// A function that fills `buf` `fn(&mut buf)` with the data stored in the index of the file that should be converted.
pub type IndexObjectFn<'a> =
dyn FnMut(&mut Vec<u8>) -> Result<Option<()>, Box<dyn std::error::Error + Send + Sync>> + 'a;
/// The error returned by [Pipeline::convert_to_git()][super::Pipeline::convert_to_git()].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
Eol(#[from] crate::eol::convert_to_git::Error),
#[error(transparent)]
Worktree(#[from] crate::worktree::encode_to_git::Error),
#[error(transparent)]
Driver(#[from] crate::driver::apply::Error),
#[error(transparent)]
Configuration(#[from] super::configuration::Error),
#[error("Copy of driver process output to memory failed")]
ReadProcessOutputToBuffer(#[from] std::io::Error),
}
}
///
pub mod to_worktree {
/// The error returned by [Pipeline::convert_to_worktree()][super::Pipeline::convert_to_worktree()].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
Worktree(#[from] crate::worktree::encode_to_worktree::Error),
#[error(transparent)]
Driver(#[from] crate::driver::apply::Error),
#[error(transparent)]
Configuration(#[from] super::configuration::Error),
}
}
/// Access
impl Pipeline {
/// Convert a `src` stream (to be found at `rela_path`) to a representation suitable for storage in `git`
/// based on the `attributes` at `rela_path` which is passed as first argument..
/// When converting to `crlf`, and depending on the configuration, `index_object` might be called to obtain the index
/// version of `src` if available. It can return `Ok(None)` if this information isn't available.
pub fn convert_to_git<R>(
&mut self,
mut src: R,
rela_path: &Path,
attributes: &mut dyn FnMut(&BStr, &mut gix_attributes::search::Outcome),
index_object: &mut to_git::IndexObjectFn<'_>,
) -> Result<ToGitOutcome<'_, R>, to_git::Error>
where
R: std::io::Read,
{
let bstr_path = gix_path::into_bstr(rela_path);
let Configuration {
driver,
digest,
_attr_digest: _,
encoding,
apply_ident_filter,
} = Configuration::at_path(
bstr_path.as_ref(),
&self.options.drivers,
&mut self.attrs,
attributes,
self.options.eol_config,
)?;
let mut in_buffer = false;
// this is just an approximation, but it's as good as it gets without reading the actual input.
let would_convert_eol = eol::convert_to_git(
b"\r\n",
digest,
&mut self.bufs.dest,
&mut |_| Ok(None),
eol::convert_to_git::Options {
round_trip_check: None,
config: self.options.eol_config,
},
)?;
if let Some(driver) = driver {
if let Some(mut read) = self.processes.apply(
driver,
&mut src,
driver::Operation::Clean,
self.context.with_path(bstr_path.as_ref()),
)? {
if !apply_ident_filter && encoding.is_none() && !would_convert_eol {
// Note that this is not typically a benefit in terms of saving memory as most filters
// aren't expected to make the output file larger. It's more about who is waiting for the filter's
// output to arrive, which won't be us now. For `git-lfs` it definitely won't matter though.
return Ok(ToGitOutcome::Process(read));
}
self.bufs.clear();
read.read_to_end(&mut self.bufs.src)?;
in_buffer = true;
}
}
if !in_buffer && (apply_ident_filter || encoding.is_some() || would_convert_eol) {
self.bufs.clear();
src.read_to_end(&mut self.bufs.src)?;
in_buffer = true;
}
if let Some(encoding) = encoding {
worktree::encode_to_git(
&self.bufs.src,
encoding,
&mut self.bufs.dest,
if self.options.encodings_with_roundtrip_check.contains(&encoding) {
worktree::encode_to_git::RoundTripCheck::Fail
} else {
worktree::encode_to_git::RoundTripCheck::Skip
},
)?;
self.bufs.swap();
}
if eol::convert_to_git(
&self.bufs.src,
digest,
&mut self.bufs.dest,
&mut |buf| index_object(buf),
eol::convert_to_git::Options {
round_trip_check: self.options.crlf_roundtrip_check.to_eol_roundtrip_check(rela_path),
config: self.options.eol_config,
},
)? {
self.bufs.swap();
}
if apply_ident_filter && ident::undo(&self.bufs.src, &mut self.bufs.dest) {
self.bufs.swap();
}
Ok(if in_buffer {
ToGitOutcome::Buffer(&self.bufs.src)
} else {
ToGitOutcome::Unchanged(src)
})
}
/// Convert a `src` buffer located at `rela_path` (in the index) from what's in `git` to the worktree representation,
/// asking for `attributes` with `rela_path` as first argument to configure the operation automatically.
/// `can_delay` defines if long-running processes can delay their response, and if they *choose* to the caller has to
/// specifically deal with it by interacting with the [`driver_state`][Pipeline::driver_state_mut()] directly.
///
/// The reason `src` is a buffer is to indicate that `git` generally doesn't do well streaming data, so it should be small enough
/// to be performant while being held in memory. This is typically the case, especially if `git-lfs` is used as intended.
pub fn convert_to_worktree<'input>(
&mut self,
src: &'input [u8],
rela_path: &BStr,
attributes: &mut dyn FnMut(&BStr, &mut gix_attributes::search::Outcome),
can_delay: driver::apply::Delay,
) -> Result<ToWorktreeOutcome<'input, '_>, to_worktree::Error> {
let Configuration {
driver,
digest,
_attr_digest: _,
encoding,
apply_ident_filter,
} = Configuration::at_path(
rela_path,
&self.options.drivers,
&mut self.attrs,
attributes,
self.options.eol_config,
)?;
let mut bufs = self.bufs.with_src(src);
let (src, dest) = bufs.src_and_dest();
if apply_ident_filter && ident::apply(src, self.options.object_hash, dest) {
bufs.swap();
}
let (src, dest) = bufs.src_and_dest();
if eol::convert_to_worktree(src, digest, dest, self.options.eol_config) {
bufs.swap();
};
if let Some(encoding) = encoding {
let (src, dest) = bufs.src_and_dest();
worktree::encode_to_worktree(src, encoding, dest)?;
bufs.swap();
}
if let Some(driver) = driver {
let (mut src, _dest) = bufs.src_and_dest();
if let Some(maybe_delayed) = self.processes.apply_delayed(
driver,
&mut src,
driver::Operation::Smudge,
can_delay,
self.context.with_path(rela_path),
)? {
return Ok(ToWorktreeOutcome::Process(maybe_delayed));
}
}
Ok(match bufs.ro_src {
Some(src) => ToWorktreeOutcome::Unchanged(src),
None => ToWorktreeOutcome::Buffer(bufs.src),
})
}
}
/// The result of a conversion with zero or more filters to be stored in git.
pub enum ToGitOutcome<'pipeline, R> {
/// The original input wasn't changed and the reader is still available for consumption.
Unchanged(R),
/// An external filter (and only that) was applied and its results *have to be consumed*.
Process(Box<dyn std::io::Read + 'pipeline>),
/// A reference to the result of one or more filters of which one didn't support streaming.
///
/// This can happen if an `eol`, `working-tree-encoding` or `ident` filter is applied, possibly on top of an external filter.
Buffer(&'pipeline [u8]),
}
/// The result of a conversion with zero or more filters.
///
/// ### Panics
///
/// If `std::io::Read` is used on it and the output is delayed, a panic will occour. The caller is responsible for either disallowing delayed
/// results or if allowed, handle them. Use [`is_delayed()][Self::is_delayed()].
pub enum ToWorktreeOutcome<'input, 'pipeline> {
/// The original input wasn't changed and the original buffer is present
Unchanged(&'input [u8]),
/// A reference to the result of one or more filters of which one didn't support streaming.
///
/// This can happen if an `eol`, `working-tree-encoding` or `ident` filter is applied, possibly on top of an external filter.
Buffer(&'pipeline [u8]),
/// An external filter (and only that) was applied and its results *have to be consumed*. Note that the output might be delayed,
/// which requires special handling to eventually receive it.
Process(driver::apply::MaybeDelayed<'pipeline>),
}
impl<'input, 'pipeline> ToWorktreeOutcome<'input, 'pipeline> {
/// Return true if this outcome is delayed. In that case, one isn't allowed to use [`Read`] or cause a panic.
pub fn is_delayed(&self) -> bool {
matches!(
self,
ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Delayed(_))
)
}
/// Returns `true` if the input buffer was actually changed, or `false` if it is returned directly.
pub fn is_changed(&self) -> bool {
!matches!(self, ToWorktreeOutcome::Unchanged(_))
}
/// Return a buffer if we contain one, or `None` otherwise.
///
/// This method is useful only if it's clear that no driver is available, which may cause a stream to be returned and not a buffer.
pub fn as_bytes(&self) -> Option<&[u8]> {
match self {
ToWorktreeOutcome::Unchanged(b) | ToWorktreeOutcome::Buffer(b) => Some(b),
ToWorktreeOutcome::Process(_) => None,
}
}
/// Return a stream to read the drivers output from, if possible.
///
/// Note that this is only the case if the driver process was applied last *and* didn't delay its output.
pub fn as_read(&mut self) -> Option<&mut (dyn std::io::Read + '_)> {
match self {
ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Delayed(_))
| ToWorktreeOutcome::Unchanged(_)
| ToWorktreeOutcome::Buffer(_) => None,
ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Immediate(read)) => Some(read),
}
}
}
impl<'input, 'pipeline> std::io::Read for ToWorktreeOutcome<'input, 'pipeline> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
match self {
ToWorktreeOutcome::Unchanged(b) => b.read(buf),
ToWorktreeOutcome::Buffer(b) => b.read(buf),
ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Delayed(_)) => {
panic!("BUG: must not try to read delayed output")
}
ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Immediate(r)) => r.read(buf),
}
}
}
impl<'pipeline, R> std::io::Read for ToGitOutcome<'pipeline, R>
where
R: std::io::Read,
{
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
match self {
ToGitOutcome::Unchanged(r) => r.read(buf),
ToGitOutcome::Process(r) => r.read(buf),
ToGitOutcome::Buffer(r) => r.read(buf),
}
}
}
impl<'a, R> ToGitOutcome<'a, R>
where
R: std::io::Read,
{
/// If we contain a buffer, and not a stream, return it.
pub fn as_bytes(&self) -> Option<&'a [u8]> {
match self {
ToGitOutcome::Unchanged(_) | ToGitOutcome::Process(_) => None,
ToGitOutcome::Buffer(b) => Some(b),
}
}
/// Return a stream to read the drivers output from. This is only possible if there is only a driver, and no other filter.
pub fn as_read(&mut self) -> Option<&mut (dyn std::io::Read + '_)> {
match self {
ToGitOutcome::Process(read) => Some(read),
ToGitOutcome::Unchanged(read) => Some(read),
ToGitOutcome::Buffer(_) => None,
}
}
/// Returns `true` if the input buffer was actually changed, or `false` if it is returned directly.
pub fn is_changed(&self) -> bool {
!matches!(self, ToGitOutcome::Unchanged(_))
}
}