| //! This contains a build plan that is created during the Cargo build routine |
| //! and stored afterwards, which can be later queried, given a list of dirty |
| //! files, to retrieve a queue of compiler calls to be invoked (including |
| //! appropriate arguments and env variables). |
| //! The underlying structure is a dependency graph between simplified units |
| //! (package id and crate target kind), as opposed to Cargo units (package with |
| //! a target info, including crate target kind, profile and host/target kind). |
| //! This will be used for a quick check recompilation and does not aim to |
| //! reimplement all the intricacies of Cargo. |
| //! The unit dependency graph in Cargo also distinguishes between compiling the |
| //! build script and running it and collecting the build script output to modify |
| //! the subsequent compilations etc. Since build script executions (not building) |
| //! are not exposed via `Executor` trait in Cargo, we simply coalesce every unit |
| //! with a same package and crate target kind (e.g. both building and running |
| //! build scripts). |
| |
| use std::collections::{HashMap, HashSet}; |
| use std::fmt; |
| use std::path::{Path, PathBuf}; |
| use std::sync::Mutex; |
| |
| use cargo::core::compiler::{CompileKind, CompileMode, Context, Unit}; |
| use cargo::core::profiles::Profile; |
| use cargo::core::{PackageId, Target, TargetKind}; |
| use cargo::util::ProcessBuilder; |
| use cargo_metadata; |
| use log::{error, trace}; |
| |
| use crate::build::plan::{BuildGraph, BuildKey, JobQueue, WorkStatus}; |
| use crate::build::rustc::src_path; |
| use crate::build::PackageArg; |
| |
| /// Main key type by which `Unit`s will be distinguished in the build plan. |
| /// In `Target` we're mostly interested in `TargetKind` (Lib, Bin, ...) and name |
| /// (e.g., we can have 2 binary targets with different names). |
| #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] |
| pub(crate) struct UnitKey { |
| pkg_id: PackageId, |
| target: Target, |
| mode: CompileMode, |
| } |
| |
| /// Holds the information how exactly the build will be performed for a given |
| /// workspace with given, specified features. |
| #[derive(Debug, Default)] |
| pub(crate) struct CargoPlan { |
| /// Stores a full Cargo `Unit` data for a first processed unit with a given key. |
| pub(crate) units: HashMap<UnitKey, OwnedUnit>, |
| /// Main dependency graph between the simplified units. |
| pub(crate) dep_graph: HashMap<UnitKey, HashSet<UnitKey>>, |
| /// Reverse dependency graph that's used to construct a dirty compiler call queue. |
| pub(crate) rev_dep_graph: HashMap<UnitKey, HashSet<UnitKey>>, |
| /// Cached compiler calls used when creating a compiler call queue. |
| pub(crate) compiler_jobs: HashMap<UnitKey, ProcessBuilder>, |
| /// Calculated input files that unit depend on. |
| pub(crate) input_files: HashMap<UnitKey, Vec<PathBuf>>, |
| pub(crate) file_key_mapping: HashMap<PathBuf, HashSet<UnitKey>>, |
| // An object for finding the package which a file belongs to and this inferring |
| // a package argument. |
| package_map: Option<PackageMap>, |
| /// Packages (names) for which this build plan was prepared. |
| /// Used to detect if the plan can reused when building certain packages. |
| built_packages: HashSet<String>, |
| } |
| |
| impl CargoPlan { |
| pub(crate) fn with_manifest(manifest_path: &Path) -> CargoPlan { |
| CargoPlan { package_map: Some(PackageMap::new(manifest_path)), ..Default::default() } |
| } |
| |
| pub(crate) fn with_packages(manifest_path: &Path, pkgs: HashSet<String>) -> CargoPlan { |
| CargoPlan { built_packages: pkgs, ..Self::with_manifest(manifest_path) } |
| } |
| |
| /// Returns `true` if a build plan has cached compiler invocations and dep |
| /// graph, so it's possibly able to return a job queue via `prepare_work`. |
| pub(crate) fn is_ready(&self) -> bool { |
| !self.compiler_jobs.is_empty() |
| } |
| |
| /// Caches a given compiler invocation in `ProcessBuilder` for a given |
| /// `PackageId` and `TargetKind` in `Target`, to be used when processing |
| /// cached build plan. |
| pub(crate) fn cache_compiler_job( |
| &mut self, |
| id: PackageId, |
| target: &Target, |
| mode: CompileMode, |
| cmd: &ProcessBuilder, |
| ) { |
| let unit_key = UnitKey { pkg_id: id, target: target.clone(), mode }; |
| self.compiler_jobs.insert(unit_key, cmd.clone()); |
| } |
| |
| pub(crate) fn cache_input_files( |
| &mut self, |
| id: PackageId, |
| target: &Target, |
| mode: CompileMode, |
| input_files: Vec<PathBuf>, |
| cwd: Option<&Path>, |
| ) { |
| let input_files: Vec<_> = input_files |
| .iter() |
| .filter_map(|file| src_path(cwd, file)) |
| .filter_map(|file| match std::fs::canonicalize(&file) { |
| Ok(file) => Some(file), |
| Err(err) => { |
| error!("Couldn't canonicalize `{}`: {}", file.display(), err); |
| None |
| } |
| }) |
| .collect(); |
| |
| let unit_key = UnitKey { pkg_id: id, target: target.clone(), mode }; |
| trace!("Caching these files: {:#?} for {:?} key", &input_files, &unit_key); |
| |
| // Create reverse file -> unit mapping (to be used for dirty unit calculation). |
| for file in &input_files { |
| self.file_key_mapping.entry(file.to_path_buf()).or_default().insert(unit_key.clone()); |
| } |
| |
| self.input_files.insert(unit_key, input_files); |
| } |
| |
| /// Places a given `Unit`, along with its `Unit` dependencies (recursively) |
| /// into the dependency graph as long as the passed `Unit` isn't filtered |
| /// out by the `filter` closure. |
| pub(crate) fn emplace_dep_with_filter<'a, Filter>( |
| &mut self, |
| unit: Unit<'a>, |
| cx: &Context<'a, '_>, |
| filter: &Filter, |
| ) where |
| Filter: Fn(Unit<'a>) -> bool, |
| { |
| if !filter(unit) { |
| return; |
| } |
| |
| let key = UnitKey::from(unit); |
| self.units.entry(key.clone()).or_insert_with(|| unit.into()); |
| // Process only those units, which are not yet in the dep graph. |
| if self.dep_graph.get(&key).is_some() { |
| return; |
| } |
| |
| // Keep all the additional Unit information for a given unit (It's |
| // worth remembering, that the units are only discriminated by a |
| // pair of (PackageId, TargetKind), so only first occurrence will be saved. |
| self.units.insert(key.clone(), unit.into()); |
| |
| // Fetch and insert relevant unit dependencies to the forward dep graph. |
| let units = cx.dep_targets(&unit); |
| let dep_keys: HashSet<UnitKey> = units |
| .iter() |
| .copied() |
| // We might not want certain deps to be added transitively (e.g. |
| // when creating only a sub-dep-graph, limiting the scope). |
| .filter(|unit| filter(*unit)) |
| .map(UnitKey::from) |
| // Units can depend on others with different Targets or Profiles |
| // (e.g. different `run_custom_build`) despite having the same UnitKey. |
| // We coalesce them here while creating the UnitKey dep graph. |
| .filter(|dep| key != *dep) |
| .collect(); |
| self.dep_graph.insert(key.clone(), dep_keys.clone()); |
| |
| // We also need to track reverse dependencies here, as it's needed |
| // to quickly construct a work sub-graph from a set of dirty units. |
| self.rev_dep_graph.entry(key.clone()).or_insert_with(HashSet::new); |
| for unit in dep_keys { |
| let revs = self.rev_dep_graph.entry(unit).or_insert_with(HashSet::new); |
| revs.insert(key.clone()); |
| } |
| |
| // Recursively process other remaining forward dependencies. |
| for unit in units { |
| self.emplace_dep_with_filter(unit, cx, filter); |
| } |
| } |
| |
| /// TODO: improve detecting dirty crate targets for a set of dirty file paths. |
| /// This uses a lousy heuristic of checking path prefix for a given crate |
| /// target to determine whether a given unit (crate target) is dirty. This |
| /// can easily backfire, e.g., when build script is under `src/`. Any change |
| /// to a file under src/ would imply the build script is always dirty, so we |
| /// never do work and always offload to Cargo in such case. |
| /// Because of that, build scripts are checked separately and only other |
| /// crate targets are checked with path prefixes. |
| fn fetch_dirty_units<T: AsRef<Path>>(&self, files: &[T]) -> HashSet<UnitKey> { |
| let mut result = HashSet::new(); |
| |
| let build_scripts: HashMap<&Path, UnitKey> = self |
| .units |
| .iter() |
| .filter(|(UnitKey { target, .. }, _)| { |
| target.is_custom_build() && target.src_path().is_path() |
| }) |
| .map(|(key, unit)| (unit.target.src_path().path().unwrap(), key.clone())) |
| .collect(); |
| let other_targets: HashMap<UnitKey, &Path> = self |
| .units |
| .iter() |
| .filter(|(UnitKey { target, .. }, _)| !target.is_custom_build()) |
| .map(|(key, unit)| { |
| ( |
| key.clone(), |
| unit.target |
| .src_path() |
| .path() |
| .expect("normal targets should have a path") |
| .parent() |
| .expect("no parent for src_path"), |
| ) |
| }) |
| .collect(); |
| |
| for modified in files.iter().map(AsRef::as_ref) { |
| if let Some(unit) = build_scripts.get(modified) { |
| result.insert(unit.clone()); |
| } else { |
| // Not a build script, so we associate a dirty file with a |
| // package by finding longest (most specified) path prefix. |
| let matching_prefix_components = |a: &Path, b: &Path| -> usize { |
| assert!(a.is_absolute() && b.is_absolute()); |
| a.components() |
| .zip(b.components()) |
| .skip(1) // Skip RootDir |
| .take_while(|&(x, y)| x == y) |
| .count() |
| }; |
| // Since a package can correspond to many units (e.g., compiled |
| // as a regular binary or a test harness for unit tests), we |
| // collect every unit having the longest path prefix. |
| let max_matching_prefix = other_targets |
| .values() |
| .map(|src_dir| matching_prefix_components(modified, src_dir)) |
| .max(); |
| |
| match max_matching_prefix { |
| Some(0) => error!( |
| "Modified file {} didn't correspond to any buildable unit!", |
| modified.display() |
| ), |
| Some(max) => { |
| let dirty_units = other_targets |
| .iter() |
| .filter(|(_, dir)| max == matching_prefix_components(modified, dir)) |
| .map(|(unit, _)| unit); |
| |
| result.extend(dirty_units.cloned()); |
| } |
| None => {} // Possible that only build scripts were modified |
| } |
| } |
| } |
| result |
| } |
| |
| /// For a given set of select dirty units, returns a set of all the |
| /// dependencies that has to be rebuilt transitively. |
| fn transitive_dirty_units(&self, dirties: &HashSet<UnitKey>) -> HashSet<UnitKey> { |
| let mut transitive = dirties.clone(); |
| // Walk through a rev dep graph using a stack of nodes to collect |
| // transitively every dirty node. |
| let mut to_process: Vec<_> = dirties.iter().cloned().collect(); |
| while let Some(top) = to_process.pop() { |
| if transitive.get(&top).is_some() { |
| continue; |
| } |
| transitive.insert(top.clone()); |
| |
| // Process every dirty rev dep of the processed node. |
| let dirty_rev_deps = self |
| .rev_dep_graph |
| .get(&top) |
| .expect("missing key in rev_dep_graph") |
| .iter() |
| .filter(|dep| dirties.contains(dep)); |
| for rev_dep in dirty_rev_deps { |
| to_process.push(rev_dep.clone()); |
| } |
| } |
| transitive |
| } |
| |
| /// Creates a dirty reverse dependency graph using a set of given dirty units. |
| fn dirty_rev_dep_graph( |
| &self, |
| dirties: &HashSet<UnitKey>, |
| ) -> HashMap<UnitKey, HashSet<UnitKey>> { |
| let dirties = self.transitive_dirty_units(dirties); |
| trace!("transitive_dirty_units: {:?}", dirties); |
| |
| self.rev_dep_graph |
| .iter() |
| // Remove nodes that are not dirty. |
| .filter(|&(unit, _)| dirties.contains(unit)) |
| // Retain only dirty dependencies of the ones that are dirty. |
| .map(|(k, deps)| { |
| (k.clone(), deps.iter().cloned().filter(|d| dirties.contains(d)).collect()) |
| }) |
| .collect() |
| } |
| |
| /// Returns a topological ordering of a connected DAG of rev deps. The |
| /// output is a stack of units that can be linearly rebuilt, starting from |
| /// the last element. |
| fn topological_sort(&self, dirties: &HashMap<UnitKey, HashSet<UnitKey>>) -> Vec<UnitKey> { |
| let mut visited = HashSet::new(); |
| let mut output = vec![]; |
| |
| for k in dirties.keys() { |
| if !visited.contains(k) { |
| dfs(k, &self.rev_dep_graph, &mut visited, &mut output); |
| } |
| } |
| |
| return output; |
| |
| // Process graph depth-first recursively. A node needs to be pushed |
| // after processing every other before to ensure topological ordering. |
| fn dfs( |
| unit: &UnitKey, |
| graph: &HashMap<UnitKey, HashSet<UnitKey>>, |
| visited: &mut HashSet<UnitKey>, |
| output: &mut Vec<UnitKey>, |
| ) { |
| if !visited.contains(unit) { |
| visited.insert(unit.clone()); |
| for neighbour in graph.get(unit).into_iter().flatten() { |
| dfs(neighbour, graph, visited, output); |
| } |
| output.push(unit.clone()); |
| } |
| } |
| } |
| |
| pub(crate) fn prepare_work<T: AsRef<Path> + fmt::Debug>(&self, modified: &[T]) -> WorkStatus { |
| if !self.is_ready() || self.package_map.is_none() { |
| return WorkStatus::NeedsCargo(PackageArg::Default); |
| } |
| |
| let dirty_packages = self.package_map.as_ref().unwrap().compute_dirty_packages(modified); |
| |
| let needs_more_packages = dirty_packages.difference(&self.built_packages).next().is_some(); |
| |
| let needed_packages = self.built_packages.union(&dirty_packages).cloned().collect(); |
| |
| // We modified a file from a packages, that are not included in the |
| // cached build plan -- run Cargo to recreate the build plan including them. |
| if needs_more_packages { |
| return WorkStatus::NeedsCargo(PackageArg::Packages(needed_packages)); |
| } |
| |
| let dirties = self.fetch_dirty_units(modified); |
| trace!("fetch_dirty_units: for files {:?}, these units are dirty: {:?}", modified, dirties,); |
| |
| if dirties.iter().any(|UnitKey { target, .. }| *target.kind() == TargetKind::CustomBuild) { |
| WorkStatus::NeedsCargo(PackageArg::Packages(needed_packages)) |
| } else { |
| let graph = self.dirty_rev_dep_graph(&dirties); |
| trace!("Constructed dirty rev dep graph: {:?}", graph); |
| |
| if graph.is_empty() { |
| return WorkStatus::NeedsCargo(PackageArg::Default); |
| } |
| |
| let queue = self.topological_sort(&graph); |
| trace!("Topologically sorted dirty graph: {:?} {}", queue, self.is_ready()); |
| let jobs: Option<Vec<_>> = |
| queue.iter().map(|x| self.compiler_jobs.get(x).cloned()).collect(); |
| |
| // It is possible that we want a job which is not in our cache (compiler_jobs), |
| // for example we might be building a workspace with an error in a crate and later |
| // crates within the crate that depend on the erroring one have never been built. |
| // In that case we need to build from scratch so that everything is in our cache, or |
| // we cope with the error. In the error case, jobs will be None. |
| match jobs { |
| None => WorkStatus::NeedsCargo(PackageArg::Default), |
| Some(jobs) => { |
| assert!(!jobs.is_empty()); |
| WorkStatus::Execute(JobQueue::with_commands(jobs)) |
| } |
| } |
| } |
| } |
| } |
| |
| /// Maps paths to packages. |
| /// |
| /// The point of the PackageMap is detect if additional packages need to be |
| /// included in the cached build plan. The cache can represent only a subset of |
| /// the entire workspace, hence why we need to detect if a package was modified |
| /// that's outside the cached build plan - if so, we need to recreate it, |
| /// including the new package. |
| #[derive(Debug)] |
| struct PackageMap { |
| // A map from a manifest directory to the package name. |
| package_paths: HashMap<PathBuf, String>, |
| // A map from a file's path, to the package it belongs to. |
| map_cache: Mutex<HashMap<PathBuf, String>>, |
| } |
| |
| impl PackageMap { |
| fn new(manifest_path: &Path) -> PackageMap { |
| PackageMap { |
| package_paths: Self::discover_package_paths(manifest_path), |
| map_cache: Mutex::new(HashMap::new()), |
| } |
| } |
| |
| // Finds each package in the workspace and record the root directory and package name. |
| fn discover_package_paths(manifest_path: &Path) -> HashMap<PathBuf, String> { |
| trace!("read metadata {:?}", manifest_path); |
| cargo_metadata::MetadataCommand::new() |
| .manifest_path(manifest_path) |
| .exec() |
| .iter() |
| .flat_map(|meta| meta.workspace_members.iter().map(move |id| &meta[id])) |
| .filter_map(|pkg| { |
| let dir = pkg.manifest_path.parent()?.to_path_buf(); |
| Some((dir, pkg.name.clone())) |
| }) |
| .collect() |
| } |
| |
| /// Given modified set of files, returns a set of corresponding dirty packages. |
| fn compute_dirty_packages<T: AsRef<Path> + fmt::Debug>( |
| &self, |
| modified_files: &[T], |
| ) -> HashSet<String> { |
| modified_files.iter().filter_map(|p| self.map(p.as_ref())).collect() |
| } |
| |
| // Maps a file to the package which it belongs to. |
| // We do this by walking up the directory tree from `path` until we get to |
| // one of the recorded package root directories. |
| fn map(&self, path: &Path) -> Option<String> { |
| if self.package_paths.is_empty() { |
| return None; |
| } |
| |
| let mut map_cache = self.map_cache.lock().unwrap(); |
| if map_cache.contains_key(path) { |
| return Some(map_cache[path].clone()); |
| } |
| |
| let result = Self::map_uncached(path, &self.package_paths)?; |
| |
| map_cache.insert(path.to_owned(), result.clone()); |
| Some(result) |
| } |
| |
| fn map_uncached(path: &Path, package_paths: &HashMap<PathBuf, String>) -> Option<String> { |
| if package_paths.is_empty() { |
| return None; |
| } |
| |
| match package_paths.get(path) { |
| Some(package) => Some(package.clone()), |
| None => Self::map_uncached(path.parent()?, package_paths), |
| } |
| } |
| } |
| |
| impl From<Unit<'_>> for UnitKey { |
| fn from(unit: Unit<'_>) -> UnitKey { |
| UnitKey { pkg_id: unit.pkg.package_id(), target: unit.target.clone(), mode: unit.mode } |
| } |
| } |
| |
| #[derive(Hash, PartialEq, Eq, Debug, Clone)] |
| /// An owned version of `cargo::core::Unit`. |
| pub(crate) struct OwnedUnit { |
| pub(crate) id: PackageId, |
| pub(crate) target: Target, |
| pub(crate) profile: Profile, |
| pub(crate) kind: CompileKind, |
| pub(crate) mode: CompileMode, |
| } |
| |
| impl From<Unit<'_>> for OwnedUnit { |
| fn from(unit: Unit<'_>) -> OwnedUnit { |
| OwnedUnit { |
| id: unit.pkg.package_id().to_owned(), |
| target: unit.target.clone(), |
| profile: unit.profile, |
| kind: unit.kind, |
| mode: unit.mode, |
| } |
| } |
| } |
| |
| impl From<&OwnedUnit> for UnitKey { |
| fn from(unit: &OwnedUnit) -> UnitKey { |
| UnitKey { pkg_id: unit.id, target: unit.target.clone(), mode: unit.mode } |
| } |
| } |
| |
| impl BuildKey for OwnedUnit { |
| type Key = UnitKey; |
| |
| fn key(&self) -> UnitKey { |
| UnitKey::from(self) |
| } |
| } |
| |
| impl BuildGraph for CargoPlan { |
| type Unit = OwnedUnit; |
| |
| fn units(&self) -> Vec<&Self::Unit> { |
| self.units.values().collect() |
| } |
| fn get(&self, key: <Self::Unit as BuildKey>::Key) -> Option<&Self::Unit> { |
| self.units.get(&key) |
| } |
| fn get_mut(&mut self, key: <Self::Unit as BuildKey>::Key) -> Option<&mut Self::Unit> { |
| self.units.get_mut(&key) |
| } |
| fn deps(&self, key: <Self::Unit as BuildKey>::Key) -> Vec<&Self::Unit> { |
| self.dep_graph |
| .get(&key) |
| .map(|d| d.iter().map(|d| &self.units[d]).collect()) |
| .unwrap_or_default() |
| } |
| |
| fn add<T: Into<Self::Unit>>(&mut self, unit: T, deps: Vec<T>) { |
| let unit = unit.into(); |
| // Units can depend on others with different `Target`s or `Profile`s |
| // (e.g., different `run_custom_build`) despite having the same `UnitKey`. |
| // We coalesce them here while creating the `UnitKey` dep graph. |
| // TODO: Are we sure? Can we figure that out? |
| let deps = deps.into_iter().map(|d| d.into()).filter(|dep| unit.key() != dep.key()); |
| |
| for dep in deps { |
| self.dep_graph.entry(unit.key()).or_insert_with(HashSet::new).insert(dep.key()); |
| self.rev_dep_graph.entry(dep.key()).or_insert_with(HashSet::new).insert(unit.key()); |
| |
| self.units.entry(dep.key()).or_insert(dep); |
| } |
| |
| // We expect these entries to be present for each unit in the graph. |
| self.dep_graph.entry(unit.key()).or_insert_with(HashSet::new); |
| self.rev_dep_graph.entry(unit.key()).or_insert_with(HashSet::new); |
| |
| self.units.entry(unit.key()).or_insert(unit); |
| } |
| |
| fn dirties<T: AsRef<Path>>(&self, files: &[T]) -> Vec<&Self::Unit> { |
| self.fetch_dirty_units(files) |
| .iter() |
| .map(|key| self.units.get(key).expect("dirties")) |
| .collect() |
| } |
| |
| fn dirties_transitive<T: AsRef<Path>>(&self, files: &[T]) -> Vec<&Self::Unit> { |
| let dirties = self.fetch_dirty_units(files); |
| |
| self.transitive_dirty_units(&dirties) |
| .iter() |
| .map(|key| self.units.get(key).expect("dirties_transitive")) |
| .collect() |
| } |
| |
| fn topological_sort(&self, units: Vec<&Self::Unit>) -> Vec<&Self::Unit> { |
| let keys = units.into_iter().map(BuildKey::key).collect(); |
| let graph = self.dirty_rev_dep_graph(&keys); |
| |
| CargoPlan::topological_sort(self, &graph) |
| .iter() |
| .map(|key| self.units.get(key).expect("topological_sort")) |
| .collect() |
| } |
| |
| fn prepare_work<T: AsRef<Path> + std::fmt::Debug>(&self, files: &[T]) -> WorkStatus { |
| CargoPlan::prepare_work(self, files) |
| } |
| } |