Browse Source

Fix GPU Memory not displayed

tags/v0.3.11-rc1
haixuanTao 10 months ago
parent
commit
26c3917470
4 changed files with 49 additions and 32 deletions
  1. +2
    -2
      Cargo.lock
  2. +35
    -19
      apis/rust/node/src/node/mod.rs
  3. +1
    -1
      libraries/extensions/telemetry/metrics/Cargo.toml
  4. +11
    -10
      node-hub/dora-llama-cpp-python/pyproject.toml

+ 2
- 2
Cargo.lock View File

@@ -6625,9 +6625,9 @@ dependencies = [

[[package]]
name = "opentelemetry-system-metrics"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61aae6ffd9e461eba3216e6bbf780724b607039441118e0c6284d98931fcbbad"
checksum = "febe29a01146e142a724009278d86d80e6924acc91cedb0f508e7e14ddd06670"
dependencies = [
"eyre",
"indexmap 2.8.0",


+ 35
- 19
apis/rust/node/src/node/mod.rs View File

@@ -35,7 +35,7 @@ use tracing::{info, warn};
use dora_metrics::init_meter_provider;
#[cfg(feature = "tracing")]
use dora_tracing::set_up_tracing;
use tokio::runtime::Handle;
use tokio::runtime::{Handle, Runtime};

pub mod arrow_utils;
mod control_channel;
@@ -43,6 +43,11 @@ mod drop_stream;

pub const ZERO_COPY_THRESHOLD: usize = 4096;

enum TokioRuntime {
Runtime(Runtime),
Handle(Handle),
}

pub struct DoraNode {
id: NodeId,
dataflow_id: DataflowId,
@@ -56,7 +61,7 @@ pub struct DoraNode {

dataflow_descriptor: Descriptor,
warned_unknown_output: BTreeSet<DataId>,
_rt: Handle,
_rt: TokioRuntime,
}

impl DoraNode {
@@ -138,26 +143,37 @@ impl DoraNode {
let input_config = run_config.inputs.clone();

let rt = match Handle::try_current() {
Ok(rt) => rt,
Err(_) => tokio::runtime::Builder::new_multi_thread()
.worker_threads(2)
.enable_all()
.build()
.context("tokio runtime failed")?
.handle()
.clone(),
Ok(handle) => TokioRuntime::Handle(handle),
Err(_) => TokioRuntime::Runtime(
tokio::runtime::Builder::new_multi_thread()
.worker_threads(2)
.enable_all()
.build()
.context("tokio runtime failed")?,
),
};

let id = node_id.to_string();
let id = format!("{}/{}", dataflow_id, node_id);

#[cfg(feature = "metrics")]
rt.spawn(async {
if let Err(e) = init_meter_provider(id)
.await
.context("failed to init metrics provider")
{
warn!("could not create metric provider with err: {:#?}", e);
}
});
match &rt {
TokioRuntime::Runtime(rt) => rt.spawn(async {
if let Err(e) = init_meter_provider(id)
.await
.context("failed to init metrics provider")
{
warn!("could not create metric provider with err: {:#?}", e);
}
}),
TokioRuntime::Handle(handle) => handle.spawn(async {
if let Err(e) = init_meter_provider(id)
.await
.context("failed to init metrics provider")
{
warn!("could not create metric provider with err: {:#?}", e);
}
}),
};

let event_stream = EventStream::init(
dataflow_id,


+ 1
- 1
libraries/extensions/telemetry/metrics/Cargo.toml View File

@@ -18,4 +18,4 @@ opentelemetry-otlp = { version = "0.28.0", features = [
] }
opentelemetry_sdk = { version = "0.28.0", features = ["rt-tokio", "metrics"] }
eyre = "0.6.12"
opentelemetry-system-metrics = { version = "0.3.0" }
opentelemetry-system-metrics = { version = "0.3.1" }

+ 11
- 10
node-hub/dora-llama-cpp-python/pyproject.toml View File

@@ -8,20 +8,21 @@ readme = "README.md"
requires-python = ">=3.9"

dependencies = [
"dora-rs >= 0.3.9",
"torch == 2.4.0",
"torchvision >= 0.19",
"torchaudio >= 2.1.0",
"opencv-python >= 4.1.1",
"modelscope >= 1.18.1",
"mlx-lm>=0.21.1; sys_platform == 'darwin'",
"llama-cpp-python",
"dora-rs >= 0.3.9",
"torch == 2.4.0",
"torchvision >= 0.19",
"torchaudio >= 2.1.0",
"opencv-python >= 4.1.1",
"modelscope >= 1.18.1",
"huggingface-hub>=0.29.0",
"mlx-lm>=0.21.1; sys_platform == 'darwin'",
"llama-cpp-python",
]

[tool.uv.sources]
llama-cpp-python = [
{ index = "llama_cpp_python_metal", marker = "sys_platform == 'darwin'" },
{ index = "llama_cpp_python_cu121", marker = "sys_platform == 'linux'" },
{ index = "llama_cpp_python_metal", marker = "sys_platform == 'darwin'" },
{ index = "llama_cpp_python_cu121", marker = "sys_platform == 'linux'" },
]

[[tool.uv.index]]


Loading…
Cancel
Save