Browse Source

Fix: Don't rename git dir if other nodes of same dataflow still need it

tags/v0.3.12-rc0
Philipp Oppermann 7 months ago
parent
commit
84084b1475
Failed to extract signature
4 changed files with 55 additions and 33 deletions
  1. +6
    -2
      binaries/cli/src/command/build/local.rs
  2. +6
    -2
      binaries/daemon/src/lib.rs
  3. +34
    -24
      libraries/core/src/build/git.rs
  4. +9
    -5
      libraries/core/src/build/mod.rs

+ 6
- 2
binaries/cli/src/command/build/local.rs View File

@@ -2,7 +2,7 @@ use std::{collections::BTreeMap, path::PathBuf};

use colored::Colorize;
use dora_core::{
build::{BuildInfo, BuildLogger, Builder, GitManager, LogLevelOrStdout},
build::{BuildInfo, BuildLogger, Builder, GitManager, LogLevelOrStdout, PrevGitSource},
descriptor::{Descriptor, DescriptorExt},
};
use dora_message::{common::GitSource, id::NodeId};
@@ -52,13 +52,17 @@ async fn build_dataflow(
let node_id = node.id.clone();
let git_source = git_sources.get(&node_id).cloned();
let prev_git_source = prev_git_sources.get(&node_id).cloned();
let prev_git = prev_git_source.map(|prev_source| PrevGitSource {
still_needed_for_this_build: git_sources.values().any(|s| s == &prev_source),
git_source: prev_source,
});

let task = builder
.clone()
.build_node(
node,
git_source,
prev_git_source,
prev_git,
LocalBuildLogger {
node_id: node_id.clone(),
},


+ 6
- 2
binaries/daemon/src/lib.rs View File

@@ -2,7 +2,7 @@ use aligned_vec::{AVec, ConstAlign};
use coordinator::CoordinatorEvent;
use crossbeam::queue::ArrayQueue;
use dora_core::{
build::{self, BuildInfo, GitManager},
build::{self, BuildInfo, GitManager, PrevGitSource},
config::{DataId, Input, InputMapping, NodeId, NodeRunConfig, OperatorId},
descriptor::{
read_as_descriptor, CoreNodeKind, Descriptor, DescriptorExt, ResolvedNode, RuntimeNode,
@@ -1011,6 +1011,10 @@ impl Daemon {
logger.log(LogLevel::Info, "building").await;
let git_source = git_sources.get(&node_id).cloned();
let prev_git_source = prev_git_sources.get(&node_id).cloned();
let prev_git = prev_git_source.map(|prev_source| PrevGitSource {
still_needed_for_this_build: git_sources.values().any(|s| s == &prev_source),
git_source: prev_source,
});

let logger_cloned = logger
.try_clone_impl()
@@ -1028,7 +1032,7 @@ impl Daemon {
.build_node(
node,
git_source,
prev_git_source,
prev_git,
logger_cloned,
&mut self.git_manager,
)


+ 34
- 24
libraries/core/src/build/git.rs View File

@@ -1,5 +1,5 @@
use crate::build::BuildLogger;
use dora_message::{common::LogLevel, descriptor::GitRepoRev, DataflowId, SessionId};
use crate::build::{BuildLogger, PrevGitSource};
use dora_message::{common::LogLevel, DataflowId, SessionId};
use eyre::{bail, ContextCompat, WrapErr};
use git2::FetchOptions;
use itertools::Itertools;
@@ -30,13 +30,19 @@ impl GitManager {
pub fn choose_clone_dir(
&mut self,
session_id: SessionId,
repo_url: Url,
repo: String,
commit_hash: String,
prev_commit_hash: Option<String>,
prev_git: Option<PrevGitSource>,
target_dir: &Path,
) -> eyre::Result<GitFolder> {
let repo_url = Url::parse(&repo).context("failed to parse git repository URL")?;
let clone_dir = Self::clone_dir_path(target_dir, &repo_url, &commit_hash)?;

let prev_commit_hash = prev_git
.as_ref()
.filter(|p| p.git_source.repo == repo)
.map(|p| &p.git_source.commit_hash);

if let Some(using) = self.clones_in_use.get(&clone_dir) {
if !using.is_empty() {
// The directory is currently in use by another dataflow. Rebuilding
@@ -58,27 +64,31 @@ impl GitManager {
}
} else if let Some(previous_commit_hash) = prev_commit_hash {
// we might be able to update a previous clone
let prev_clone_dir =
Self::clone_dir_path(target_dir, &repo_url, &previous_commit_hash)?;
let prev_clone_dir = Self::clone_dir_path(target_dir, &repo_url, previous_commit_hash)?;

if self
.clones_in_use
.get(&prev_clone_dir)
.map(|ids| !ids.is_empty())
.unwrap_or(false)
{
// previous clone is still in use -> we cannot rename it, but we can copy it
ReuseOptions::CopyAndFetch {
from: prev_clone_dir,
target_dir: clone_dir.clone(),
commit_hash,
}
} else if prev_clone_dir.exists() {
// there is an unused previous clone that is not in use -> rename it
ReuseOptions::RenameAndFetch {
from: prev_clone_dir,
target_dir: clone_dir.clone(),
commit_hash,
if prev_clone_dir.exists() {
let still_needed = prev_git
.map(|g| g.still_needed_for_this_build)
.unwrap_or(false);
let used_by_others = self
.clones_in_use
.get(&prev_clone_dir)
.map(|ids| !ids.is_empty())
.unwrap_or(false);
if still_needed || used_by_others {
// previous clone is still in use -> we cannot rename it, but we can copy it
ReuseOptions::CopyAndFetch {
from: prev_clone_dir,
target_dir: clone_dir.clone(),
commit_hash,
}
} else {
// there is an unused previous clone that is no longer needed -> rename it
ReuseOptions::RenameAndFetch {
from: prev_clone_dir,
target_dir: clone_dir.clone(),
commit_hash,
}
}
} else {
// no existing clone associated with previous build id


+ 9
- 5
libraries/core/src/build/mod.rs View File

@@ -33,19 +33,17 @@ impl Builder {
self,
node: ResolvedNode,
git: Option<GitSource>,
prev_git: Option<GitSource>,
prev_git: Option<PrevGitSource>,
mut logger: impl BuildLogger,
git_manager: &mut GitManager,
) -> eyre::Result<impl Future<Output = eyre::Result<BuiltNode>>> {
let prepared_git = if let Some(GitSource { repo, commit_hash }) = git {
let repo_url = Url::parse(&repo).context("failed to parse git repository URL")?;
let target_dir = self.base_working_dir.join("git");
let prev_hash = prev_git.filter(|p| p.repo == repo).map(|p| p.commit_hash);
let git_folder = git_manager.choose_clone_dir(
self.session_id,
repo_url,
repo,
commit_hash,
prev_hash,
prev_git,
&target_dir,
)?;
Some(git_folder)
@@ -142,3 +140,9 @@ pub struct BuiltNode {
pub struct BuildInfo {
pub node_working_dirs: BTreeMap<NodeId, PathBuf>,
}

pub struct PrevGitSource {
pub git_source: GitSource,
/// `True` if any nodes of this dataflow still require the source for building.
pub still_needed_for_this_build: bool,
}

Loading…
Cancel
Save