diff --git a/binaries/cli/src/command/build/local.rs b/binaries/cli/src/command/build/local.rs index 32c7b319..7f6c2557 100644 --- a/binaries/cli/src/command/build/local.rs +++ b/binaries/cli/src/command/build/local.rs @@ -2,7 +2,7 @@ use std::{collections::BTreeMap, path::PathBuf}; use colored::Colorize; use dora_core::{ - build::{BuildInfo, BuildLogger, Builder, GitManager, LogLevelOrStdout}, + build::{BuildInfo, BuildLogger, Builder, GitManager, LogLevelOrStdout, PrevGitSource}, descriptor::{Descriptor, DescriptorExt}, }; use dora_message::{common::GitSource, id::NodeId}; @@ -52,13 +52,17 @@ async fn build_dataflow( let node_id = node.id.clone(); let git_source = git_sources.get(&node_id).cloned(); let prev_git_source = prev_git_sources.get(&node_id).cloned(); + let prev_git = prev_git_source.map(|prev_source| PrevGitSource { + still_needed_for_this_build: git_sources.values().any(|s| s == &prev_source), + git_source: prev_source, + }); let task = builder .clone() .build_node( node, git_source, - prev_git_source, + prev_git, LocalBuildLogger { node_id: node_id.clone(), }, diff --git a/binaries/daemon/src/lib.rs b/binaries/daemon/src/lib.rs index c1e39d2b..c108410d 100644 --- a/binaries/daemon/src/lib.rs +++ b/binaries/daemon/src/lib.rs @@ -2,7 +2,7 @@ use aligned_vec::{AVec, ConstAlign}; use coordinator::CoordinatorEvent; use crossbeam::queue::ArrayQueue; use dora_core::{ - build::{self, BuildInfo, GitManager}, + build::{self, BuildInfo, GitManager, PrevGitSource}, config::{DataId, Input, InputMapping, NodeId, NodeRunConfig, OperatorId}, descriptor::{ read_as_descriptor, CoreNodeKind, Descriptor, DescriptorExt, ResolvedNode, RuntimeNode, @@ -1011,6 +1011,10 @@ impl Daemon { logger.log(LogLevel::Info, "building").await; let git_source = git_sources.get(&node_id).cloned(); let prev_git_source = prev_git_sources.get(&node_id).cloned(); + let prev_git = prev_git_source.map(|prev_source| PrevGitSource { + still_needed_for_this_build: git_sources.values().any(|s| s == &prev_source), + git_source: prev_source, + }); let logger_cloned = logger .try_clone_impl() @@ -1028,7 +1032,7 @@ impl Daemon { .build_node( node, git_source, - prev_git_source, + prev_git, logger_cloned, &mut self.git_manager, ) diff --git a/libraries/core/src/build/git.rs b/libraries/core/src/build/git.rs index 9e938ad4..3a27e4e1 100644 --- a/libraries/core/src/build/git.rs +++ b/libraries/core/src/build/git.rs @@ -1,5 +1,5 @@ -use crate::build::BuildLogger; -use dora_message::{common::LogLevel, descriptor::GitRepoRev, DataflowId, SessionId}; +use crate::build::{BuildLogger, PrevGitSource}; +use dora_message::{common::LogLevel, DataflowId, SessionId}; use eyre::{bail, ContextCompat, WrapErr}; use git2::FetchOptions; use itertools::Itertools; @@ -30,13 +30,19 @@ impl GitManager { pub fn choose_clone_dir( &mut self, session_id: SessionId, - repo_url: Url, + repo: String, commit_hash: String, - prev_commit_hash: Option, + prev_git: Option, target_dir: &Path, ) -> eyre::Result { + let repo_url = Url::parse(&repo).context("failed to parse git repository URL")?; let clone_dir = Self::clone_dir_path(target_dir, &repo_url, &commit_hash)?; + let prev_commit_hash = prev_git + .as_ref() + .filter(|p| p.git_source.repo == repo) + .map(|p| &p.git_source.commit_hash); + if let Some(using) = self.clones_in_use.get(&clone_dir) { if !using.is_empty() { // The directory is currently in use by another dataflow. Rebuilding @@ -58,27 +64,31 @@ impl GitManager { } } else if let Some(previous_commit_hash) = prev_commit_hash { // we might be able to update a previous clone - let prev_clone_dir = - Self::clone_dir_path(target_dir, &repo_url, &previous_commit_hash)?; + let prev_clone_dir = Self::clone_dir_path(target_dir, &repo_url, previous_commit_hash)?; - if self - .clones_in_use - .get(&prev_clone_dir) - .map(|ids| !ids.is_empty()) - .unwrap_or(false) - { - // previous clone is still in use -> we cannot rename it, but we can copy it - ReuseOptions::CopyAndFetch { - from: prev_clone_dir, - target_dir: clone_dir.clone(), - commit_hash, - } - } else if prev_clone_dir.exists() { - // there is an unused previous clone that is not in use -> rename it - ReuseOptions::RenameAndFetch { - from: prev_clone_dir, - target_dir: clone_dir.clone(), - commit_hash, + if prev_clone_dir.exists() { + let still_needed = prev_git + .map(|g| g.still_needed_for_this_build) + .unwrap_or(false); + let used_by_others = self + .clones_in_use + .get(&prev_clone_dir) + .map(|ids| !ids.is_empty()) + .unwrap_or(false); + if still_needed || used_by_others { + // previous clone is still in use -> we cannot rename it, but we can copy it + ReuseOptions::CopyAndFetch { + from: prev_clone_dir, + target_dir: clone_dir.clone(), + commit_hash, + } + } else { + // there is an unused previous clone that is no longer needed -> rename it + ReuseOptions::RenameAndFetch { + from: prev_clone_dir, + target_dir: clone_dir.clone(), + commit_hash, + } } } else { // no existing clone associated with previous build id diff --git a/libraries/core/src/build/mod.rs b/libraries/core/src/build/mod.rs index b0449d35..5e7193d5 100644 --- a/libraries/core/src/build/mod.rs +++ b/libraries/core/src/build/mod.rs @@ -33,19 +33,17 @@ impl Builder { self, node: ResolvedNode, git: Option, - prev_git: Option, + prev_git: Option, mut logger: impl BuildLogger, git_manager: &mut GitManager, ) -> eyre::Result>> { let prepared_git = if let Some(GitSource { repo, commit_hash }) = git { - let repo_url = Url::parse(&repo).context("failed to parse git repository URL")?; let target_dir = self.base_working_dir.join("git"); - let prev_hash = prev_git.filter(|p| p.repo == repo).map(|p| p.commit_hash); let git_folder = git_manager.choose_clone_dir( self.session_id, - repo_url, + repo, commit_hash, - prev_hash, + prev_git, &target_dir, )?; Some(git_folder) @@ -142,3 +140,9 @@ pub struct BuiltNode { pub struct BuildInfo { pub node_working_dirs: BTreeMap, } + +pub struct PrevGitSource { + pub git_source: GitSource, + /// `True` if any nodes of this dataflow still require the source for building. + pub still_needed_for_this_build: bool, +}