From 7732a55200c78f9f9cf2502b3a5af8d0b3975ced Mon Sep 17 00:00:00 2001 From: guoyuanplct Date: Fri, 16 May 2025 18:24:46 +0800 Subject: [PATCH] Add retry mechanism after deadlock timeout for c910v. --- .github/workflows/c910v.yml | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/.github/workflows/c910v.yml b/.github/workflows/c910v.yml index c5b497316..9981c437b 100644 --- a/.github/workflows/c910v.yml +++ b/.github/workflows/c910v.yml @@ -83,9 +83,39 @@ jobs: - name: test run: | - export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH - qemu-riscv64 ./utest/openblas_utest - qemu-riscv64 ./utest/openblas_utest_ext + run_with_retry() { + local cmd="$1" + local time_out=10 + local retries=10 + local attempt=0 + + for ((i=1; i<=retries; i++)); do + attempt=$((i)) + if timeout -s 12 --preserve-status $time_out $cmd; then + echo "Command succeeded on attempt $i." + return 0 + else + local exit_code=$? + if [ $exit_code -eq 140 ]; then + echo "Attempt $i timed out (retrying...)" + time_out=$((time_out + 5)) + else + echo "Attempt $i failed with exit code $exit_code. Aborting workflow." + exit $exit_code + fi + fi + done + echo "All $retries attempts failed, giving up." + echo "Final failure was due to timeout." + echo "Aborting workflow." + exit $exit_code + } + export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH + which qemu-riscv64 + export QEMU_BIN=$(which qemu-riscv64) + run_with_retry "$QEMU_BIN ./utest/openblas_utest" + run_with_retry "$QEMU_BIN ./utest/openblas_utest_ext" + OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1 OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1 OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1