Browse Source

convert chinese character to pinyin

tags/v0.3.12-fix
haixuantao 6 months ago
parent
commit
c790b65aff
4 changed files with 28 additions and 2 deletions
  1. +7
    -0
      Cargo.lock
  2. +1
    -1
      examples/speech-to-text/whisper-dev.yml
  3. +1
    -0
      node-hub/dora-rerun/Cargo.toml
  4. +19
    -1
      node-hub/dora-rerun/src/lib.rs

+ 7
- 0
Cargo.lock View File

@@ -3343,6 +3343,7 @@ dependencies = [
"eyre",
"k",
"ndarray 0.15.6",
"pinyin",
"pyo3",
"rand 0.9.1",
"rerun",
@@ -8420,6 +8421,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"

[[package]]
name = "pinyin"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16f2611cd06a1ac239a0cea4521de9eb068a6ca110324ee00631aa68daa74fc0"

[[package]]
name = "piper"
version = "0.2.4"


+ 1
- 1
examples/speech-to-text/whisper-dev.yml View File

@@ -28,7 +28,7 @@ nodes:
# USE_MODELSCOPE_HUB: true

- id: dora-rerun
build: cargo build -p dora-rerun --release
build: pip install -e ../../node-hub/dora-rerun
path: dora-rerun
inputs:
original_text: dora-distil-whisper/text

+ 1
- 0
node-hub/dora-rerun/Cargo.toml View File

@@ -28,6 +28,7 @@ pyo3 = { workspace = true, features = [
], optional = true }
bytemuck = "1.20.0"
rand = "0.9.1"
pinyin = "0.10.0"


[lib]


+ 19
- 1
node-hub/dora-rerun/src/lib.rs View File

@@ -12,6 +12,7 @@ use dora_node_api::{
};
use eyre::{bail, eyre, Context, Result};

use pinyin::ToPinyin;
use rerun::{
components::ImageBuffer, external::log::warn, ImageFormat, Points2D, Points3D, SpawnOptions,
};
@@ -317,7 +318,24 @@ pub fn lib_main() -> Result<()> {
let buffer: StringArray = data.to_data().into();
buffer.iter().try_for_each(|string| -> Result<()> {
if let Some(str) = string {
rec.log(id.as_str(), &rerun::TextLog::new(str))
let chars = str.chars().collect::<Vec<_>>();
let mut new_string = vec![];
for char in chars {
// Check if the character is a Chinese character
if char.is_ascii() || char.is_control() {
new_string.push(char);
continue;
}
// If it is a Chinese character, replace it with its pinyin
if let Some(pinyin) = char.to_pinyin() {
for char in pinyin.with_tone().chars() {
new_string.push(char);
}
new_string.push(' ');
}
}
let pinyined_str = new_string.iter().collect::<String>();
rec.log(id.as_str(), &rerun::TextLog::new(pinyined_str))
.wrap_err("Could not log text")
} else {
Ok(())


Loading…
Cancel
Save