|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405 |
- // Tencent is pleased to support the open source community by making ncnn available.
- //
- // Copyright (C) 2025 THL A29 Limited, a Tencent company. All rights reserved.
- //
- // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
- // in compliance with the License. You may obtain a copy of the License at
- //
- // https://opensource.org/licenses/BSD-3-Clause
- //
- // Unless required by applicable law or agreed to in writing, software distributed
- // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- // CONDITIONS OF ANY KIND, either express or implied. See the License for the
- // specific language governing permissions and limitations under the License.
-
- #include "datareader.h"
- #include "gpu.h"
- #include "mat.h"
- #include "net.h"
- #include "pipelinecache.h"
- #include "testutil.h"
-
- #include <iostream>
- #include <chrono>
- #include <vector>
-
- // 一个空数据读取器,用于加载模型结构,权重将全部为0
- class DataReaderFromEmpty : public ncnn::DataReader
- {
- public:
- virtual int scan(const char* format, void* p) const
- {
- (void)format; // unused
- (void)p; // unused
- return 0;
- }
- virtual size_t read(void* buf, size_t size) const
- {
- memset(buf, 0, size);
- return size;
- }
- };
-
- // MobileNetV3 的网络结构参数
- static const char* mobilenet_v3_param = R"delimiter(
- 7767517
- 145 163
- Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
- Convolution 313 1 1 data 313 -23330=4,3,112,112,16 0=16 1=3 3=2 4=1 5=1 6=432
- Split splitncnn_0 1 2 313 313_splitncnn_0 313_splitncnn_1 -23330=8,3,112,112,16,3,112,112,16
- HardSigmoid 319 1 1 313_splitncnn_1 319 -23330=4,3,112,112,16
- BinaryOp 320 2 1 313_splitncnn_0 319 320 -23330=4,3,112,112,16 0=2
- Split splitncnn_1 1 2 320 320_splitncnn_0 320_splitncnn_1 -23330=8,3,112,112,16,3,112,112,16
- ConvolutionDepthWise 321 1 1 320_splitncnn_1 323 -23330=4,3,112,112,16 0=16 1=3 4=1 5=1 6=144 7=16 9=1
- Convolution 324 1 1 323 324 -23330=4,3,112,112,16 0=16 1=1 5=1 6=256
- BinaryOp 326 2 1 320_splitncnn_0 324 326 -23330=4,3,112,112,16
- Convolution 327 1 1 326 329 -23330=4,3,112,112,64 0=64 1=1 5=1 6=1024 9=1
- ConvolutionDepthWise 330 1 1 329 332 -23330=4,3,56,56,64 0=64 1=3 3=2 4=1 5=1 6=576 7=64 9=1
- Convolution 333 1 1 332 333 -23330=4,3,56,56,24 0=24 1=1 5=1 6=1536
- Split splitncnn_2 1 2 333 333_splitncnn_0 333_splitncnn_1 -23330=8,3,56,56,24,3,56,56,24
- Convolution 335 1 1 333_splitncnn_1 337 -23330=4,3,56,56,72 0=72 1=1 5=1 6=1728 9=1
- ConvolutionDepthWise 338 1 1 337 340 -23330=4,3,56,56,72 0=72 1=3 4=1 5=1 6=648 7=72 9=1
- Convolution 341 1 1 340 341 -23330=4,3,56,56,24 0=24 1=1 5=1 6=1728
- BinaryOp 343 2 1 333_splitncnn_0 341 343 -23330=4,3,56,56,24
- Convolution 344 1 1 343 346 -23330=4,3,56,56,72 0=72 1=1 5=1 6=1728 9=1
- ConvolutionDepthWise 347 1 1 346 347 -23330=4,3,28,28,72 0=72 1=5 3=2 4=2 5=1 6=1800 7=72
- Split splitncnn_3 1 2 347 347_splitncnn_0 347_splitncnn_1 -23330=8,3,28,28,72,3,28,28,72
- Pooling 355 1 1 347_splitncnn_1 359 -23330=4,1,72,1,1 0=1 4=1
- InnerProduct 360 1 1 359 361 -23330=4,1,18,1,1 0=18 1=1 2=1296 9=1
- InnerProduct 362 1 1 361 362 -23330=4,1,72,1,1 0=72 1=1 2=1296
- HardSigmoid 367 1 1 362 367 -23330=4,1,72,1,1
- BinaryOp 376 2 1 347_splitncnn_0 367 376 -23330=4,3,28,28,72 0=2
- ReLU 377 1 1 376 377 -23330=4,3,28,28,72
- Convolution 378 1 1 377 378 -23330=4,3,28,28,40 0=40 1=1 5=1 6=2880
- Split splitncnn_4 1 2 378 378_splitncnn_0 378_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
- Convolution 380 1 1 378_splitncnn_1 382 -23330=4,3,28,28,120 0=120 1=1 5=1 6=4800 9=1
- ConvolutionDepthWise 383 1 1 382 383 -23330=4,3,28,28,120 0=120 1=5 4=2 5=1 6=3000 7=120
- Split splitncnn_5 1 2 383 383_splitncnn_0 383_splitncnn_1 -23330=8,3,28,28,120,3,28,28,120
- Pooling 391 1 1 383_splitncnn_1 395 -23330=4,1,120,1,1 0=1 4=1
- InnerProduct 396 1 1 395 397 -23330=4,1,30,1,1 0=30 1=1 2=3600 9=1
- InnerProduct 398 1 1 397 398 -23330=4,1,120,1,1 0=120 1=1 2=3600
- HardSigmoid 403 1 1 398 403 -23330=4,1,120,1,1
- BinaryOp 412 2 1 383_splitncnn_0 403 412 -23330=4,3,28,28,120 0=2
- ReLU 413 1 1 412 413 -23330=4,3,28,28,120
- Convolution 414 1 1 413 414 -23330=4,3,28,28,40 0=40 1=1 5=1 6=4800
- BinaryOp 416 2 1 378_splitncnn_0 414 416 -23330=4,3,28,28,40
- Split splitncnn_6 1 2 416 416_splitncnn_0 416_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
- Convolution 417 1 1 416_splitncnn_1 419 -23330=4,3,28,28,120 0=120 1=1 5=1 6=4800 9=1
- ConvolutionDepthWise 420 1 1 419 420 -23330=4,3,28,28,120 0=120 1=5 4=2 5=1 6=3000 7=120
- Split splitncnn_7 1 2 420 420_splitncnn_0 420_splitncnn_1 -23330=8,3,28,28,120,3,28,28,120
- Pooling 428 1 1 420_splitncnn_1 432 -23330=4,1,120,1,1 0=1 4=1
- InnerProduct 433 1 1 432 434 -23330=4,1,30,1,1 0=30 1=1 2=3600 9=1
- InnerProduct 435 1 1 434 435 -23330=4,1,120,1,1 0=120 1=1 2=3600
- HardSigmoid 440 1 1 435 440 -23330=4,1,120,1,1
- BinaryOp 449 2 1 420_splitncnn_0 440 449 -23330=4,3,28,28,120 0=2
- ReLU 450 1 1 449 450 -23330=4,3,28,28,120
- Convolution 451 1 1 450 451 -23330=4,3,28,28,40 0=40 1=1 5=1 6=4800
- BinaryOp 453 2 1 416_splitncnn_0 451 453 -23330=4,3,28,28,40
- Convolution 454 1 1 453 454 -23330=4,3,28,28,240 0=240 1=1 5=1 6=9600
- HardSwish 461 1 1 454 461 -23330=4,3,28,28,240
- ConvolutionDepthWise 462 1 1 461 462 -23330=4,3,14,14,240 0=240 1=3 3=2 4=1 5=1 6=2160 7=240
- HardSwish 469 1 1 462 469 -23330=4,3,14,14,240
- Convolution 470 1 1 469 470 -23330=4,3,14,14,80 0=80 1=1 5=1 6=19200
- Split splitncnn_8 1 2 470 470_splitncnn_0 470_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
- Convolution 472 1 1 470_splitncnn_1 472 -23330=4,3,14,14,200 0=200 1=1 5=1 6=16000
- HardSwish 479 1 1 472 479 -23330=4,3,14,14,200
- ConvolutionDepthWise 480 1 1 479 480 -23330=4,3,14,14,200 0=200 1=3 4=1 5=1 6=1800 7=200
- HardSwish 487 1 1 480 487 -23330=4,3,14,14,200
- Convolution 488 1 1 487 488 -23330=4,3,14,14,80 0=80 1=1 5=1 6=16000
- BinaryOp 490 2 1 470_splitncnn_0 488 490 -23330=4,3,14,14,80
- Split splitncnn_9 1 2 490 490_splitncnn_0 490_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
- Convolution 491 1 1 490_splitncnn_1 491 -23330=4,3,14,14,184 0=184 1=1 5=1 6=14720
- HardSwish 498 1 1 491 498 -23330=4,3,14,14,184
- ConvolutionDepthWise 499 1 1 498 499 -23330=4,3,14,14,184 0=184 1=3 4=1 5=1 6=1656 7=184
- HardSwish 506 1 1 499 506 -23330=4,3,14,14,184
- Convolution 507 1 1 506 507 -23330=4,3,14,14,80 0=80 1=1 5=1 6=14720
- BinaryOp 509 2 1 490_splitncnn_0 507 509 -23330=4,3,14,14,80
- Split splitncnn_10 1 2 509 509_splitncnn_0 509_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
- Convolution 510 1 1 509_splitncnn_1 510 -23330=4,3,14,14,184 0=184 1=1 5=1 6=14720
- HardSwish 517 1 1 510 517 -23330=4,3,14,14,184
- ConvolutionDepthWise 518 1 1 517 518 -23330=4,3,14,14,184 0=184 1=3 4=1 5=1 6=1656 7=184
- HardSwish 525 1 1 518 525 -23330=4,3,14,14,184
- Convolution 526 1 1 525 526 -23330=4,3,14,14,80 0=80 1=1 5=1 6=14720
- BinaryOp 528 2 1 509_splitncnn_0 526 528 -23330=4,3,14,14,80
- Convolution 529 1 1 528 529 -23330=4,3,14,14,480 0=480 1=1 5=1 6=38400
- HardSwish 536 1 1 529 536 -23330=4,3,14,14,480
- ConvolutionDepthWise 537 1 1 536 537 -23330=4,3,14,14,480 0=480 1=3 4=1 5=1 6=4320 7=480
- Split splitncnn_11 1 2 537 537_splitncnn_0 537_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
- Pooling 545 1 1 537_splitncnn_1 549 -23330=4,1,480,1,1 0=1 4=1
- InnerProduct 550 1 1 549 551 -23330=4,1,120,1,1 0=120 1=1 2=57600 9=1
- InnerProduct 552 1 1 551 552 -23330=4,1,480,1,1 0=480 1=1 2=57600
- HardSigmoid 557 1 1 552 557 -23330=4,1,480,1,1
- BinaryOp 566 2 1 537_splitncnn_0 557 566 -23330=4,3,14,14,480 0=2
- HardSwish 572 1 1 566 572 -23330=4,3,14,14,480
- Convolution 573 1 1 572 573 -23330=4,3,14,14,112 0=112 1=1 5=1 6=53760
- Split splitncnn_12 1 2 573 573_splitncnn_0 573_splitncnn_1 -23330=8,3,14,14,112,3,14,14,112
- Convolution 575 1 1 573_splitncnn_1 575 -23330=4,3,14,14,672 0=672 1=1 5=1 6=75264
- HardSwish 582 1 1 575 582 -23330=4,3,14,14,672
- ConvolutionDepthWise 583 1 1 582 583 -23330=4,3,14,14,672 0=672 1=3 4=1 5=1 6=6048 7=672
- Split splitncnn_13 1 2 583 583_splitncnn_0 583_splitncnn_1 -23330=8,3,14,14,672,3,14,14,672
- Pooling 591 1 1 583_splitncnn_1 595 -23330=4,1,672,1,1 0=1 4=1
- InnerProduct 596 1 1 595 597 -23330=4,1,168,1,1 0=168 1=1 2=112896 9=1
- InnerProduct 598 1 1 597 598 -23330=4,1,672,1,1 0=672 1=1 2=112896
- HardSigmoid 603 1 1 598 603 -23330=4,1,672,1,1
- BinaryOp 612 2 1 583_splitncnn_0 603 612 -23330=4,3,14,14,672 0=2
- HardSwish 618 1 1 612 618 -23330=4,3,14,14,672
- Convolution 619 1 1 618 619 -23330=4,3,14,14,112 0=112 1=1 5=1 6=75264
- BinaryOp 621 2 1 573_splitncnn_0 619 621 -23330=4,3,14,14,112
- Convolution 622 1 1 621 622 -23330=4,3,14,14,672 0=672 1=1 5=1 6=75264
- HardSwish 629 1 1 622 629 -23330=4,3,14,14,672
- ConvolutionDepthWise 630 1 1 629 630 -23330=4,3,14,14,672 0=672 1=5 4=2 5=1 6=16800 7=672
- Split splitncnn_14 1 2 630 630_splitncnn_0 630_splitncnn_1 -23330=8,3,14,14,672,3,14,14,672
- Pooling 638 1 1 630_splitncnn_1 642 -23330=4,1,672,1,1 0=1 4=1
- InnerProduct 643 1 1 642 644 -23330=4,1,168,1,1 0=168 1=1 2=112896 9=1
- InnerProduct 645 1 1 644 645 -23330=4,1,672,1,1 0=672 1=1 2=112896
- HardSigmoid 650 1 1 645 650 -23330=4,1,672,1,1
- BinaryOp 659 2 1 630_splitncnn_0 650 659 -23330=4,3,14,14,672 0=2
- HardSwish 665 1 1 659 665 -23330=4,3,14,14,672
- Convolution 666 1 1 665 666 -23330=4,3,14,14,160 0=160 1=1 5=1 6=107520
- Convolution 668 1 1 666 668 -23330=4,3,14,14,672 0=672 1=1 5=1 6=107520
- HardSwish 675 1 1 668 675 -23330=4,3,14,14,672
- ConvolutionDepthWise 676 1 1 675 676 -23330=4,3,7,7,672 0=672 1=5 3=2 4=2 5=1 6=16800 7=672
- Split splitncnn_15 1 2 676 676_splitncnn_0 676_splitncnn_1 -23330=8,3,7,7,672,3,7,7,672
- Pooling 684 1 1 676_splitncnn_1 688 -23330=4,1,672,1,1 0=1 4=1
- InnerProduct 689 1 1 688 690 -23330=4,1,168,1,1 0=168 1=1 2=112896 9=1
- InnerProduct 691 1 1 690 691 -23330=4,1,672,1,1 0=672 1=1 2=112896
- HardSigmoid 696 1 1 691 696 -23330=4,1,672,1,1
- BinaryOp 705 2 1 676_splitncnn_0 696 705 -23330=4,3,7,7,672 0=2
- HardSwish 711 1 1 705 711 -23330=4,3,7,7,672
- Convolution 712 1 1 711 712 -23330=4,3,7,7,160 0=160 1=1 5=1 6=107520
- Split splitncnn_16 1 2 712 712_splitncnn_0 712_splitncnn_1 -23330=8,3,7,7,160,3,7,7,160
- Convolution 714 1 1 712_splitncnn_1 714 -23330=4,3,7,7,960 0=960 1=1 5=1 6=153600
- HardSwish 721 1 1 714 721 -23330=4,3,7,7,960
- ConvolutionDepthWise 722 1 1 721 722 -23330=4,3,7,7,960 0=960 1=5 4=2 5=1 6=24000 7=960
- Split splitncnn_17 1 2 722 722_splitncnn_0 722_splitncnn_1 -23330=8,3,7,7,960,3,7,7,960
- Pooling 730 1 1 722_splitncnn_1 734 -23330=4,1,960,1,1 0=1 4=1
- InnerProduct 735 1 1 734 736 -23330=4,1,240,1,1 0=240 1=1 2=230400 9=1
- InnerProduct 737 1 1 736 737 -23330=4,1,960,1,1 0=960 1=1 2=230400
- HardSigmoid 742 1 1 737 742 -23330=4,1,960,1,1
- BinaryOp 751 2 1 722_splitncnn_0 742 751 -23330=4,3,7,7,960 0=2
- HardSwish 757 1 1 751 757 -23330=4,3,7,7,960
- Convolution 758 1 1 757 758 -23330=4,3,7,7,160 0=160 1=1 5=1 6=153600
- BinaryOp 760 2 1 712_splitncnn_0 758 760 -23330=4,3,7,7,160
- Convolution 761 1 1 760 761 -23330=4,3,7,7,960 0=960 1=1 5=1 6=153600
- HardSwish 768 1 1 761 768 -23330=4,3,7,7,960
- Pooling 769 1 1 768 769 -23330=4,1,960,1,1 0=1 4=1
- HardSwish 775 1 1 769 775 -23330=4,1,960,1,1
- Reshape 783 1 1 775 783 -23330=4,1,960,1,1 0=-1
- InnerProduct 784 1 1 783 784 -23330=4,1,1280,1,1 0=1280 1=1 2=1228800
- HardSwish 790 1 1 784 790 -23330=4,1,1280,1,1
- InnerProduct 791 1 1 790 791 -23330=4,1,1000,1,1 0=1000 1=1 2=1280000
- Softmax prob 1 1 791 output -23330=4,1,1000,1,1
- )delimiter";
-
- /**
- * @brief 使用一个简单的 Sigmoid 网络预热并测试 Pipeline Cache 的基本保存和加载功能
- * @return 0 on success, -1 on failure
- */
- static int warmup_gpu_pipecache()
- {
- std::cout << "==================================================" << std::endl;
- std::cout << " Warmup: Testing Basic Cache IO " << std::endl;
- std::cout << "==================================================" << std::endl;
-
- // 1. 创建一个网络,运行一次以生成 pipeline
- ncnn::Net net;
- net.opt.use_vulkan_compute = true;
-
- net.load_param_mem("7767517\n2 2\nInput input0 0 1 input0\nSigmoid sigmoid0 1 1 input0 output0");
- net.load_model((unsigned char*)""); // 用于创建 pipeline
-
- ncnn::Mat input0 = RandomMat(224, 224);
- ncnn::Mat output0;
- {
- ncnn::Extractor ex = net.create_extractor();
- ex.input("input0", input0);
- ex.extract("output0", output0);
- }
-
- if (output0.empty())
- {
- std::cerr << "Warmup failed: initial extraction failed." << std::endl;
- return -1;
- }
-
- // 2. 保存 pipeline cache
- const char* cache_path = "./sigmoid_pipecache.bin";
- if (net.opt.pipeline_cache->save_cache(cache_path) != 0)
- {
- std::cerr << "Warmup failed: could not save pipeline cache to " << cache_path << std::endl;
- return -1;
- }
- std::cout << "Warmup: Pipeline cache saved successfully." << std::endl;
-
- // 3. 创建第二个网络,加载刚才保存的 cache
- ncnn::Net net2;
- net2.opt.use_vulkan_compute = true;
- net2.opt.pipeline_cache = new ncnn::PipelineCache(net.vulkan_device());
-
- net2.load_param_mem("7767517\n2 2\nInput input0 0 1 input0\nSigmoid sigmoid0 1 1 input0 output0");
- if (net2.opt.pipeline_cache->load_cache(cache_path) != 0)
- {
- std::cerr << "Warmup failed: could not load pipeline cache from " << cache_path << std::endl;
- return -1;
- }
- std::cout << "Warmup: Pipeline cache loaded successfully." << std::endl;
- net2.load_model((unsigned char*)""); // 创建 pipeline
-
- // 4. 再次推理并验证结果是否一致
- ncnn::Mat output0_2;
- {
- ncnn::Extractor ex2 = net2.create_extractor();
- ex2.input("input0", input0);
- ex2.extract("output0", output0_2);
- }
-
- if (output0_2.empty())
- {
- std::cerr << "Warmup failed: extraction after loading cache failed." << std::endl;
- return -1;
- }
-
- if (CompareMat(output0, output0_2, 0.001) != 0)
- {
- std::cerr << "Warmup failed: output mismatch after loading cache." << std::endl;
- return -1;
- }
-
- std::cout << "Warmup PASSED: Outputs are identical." << std::endl;
- return 0;
- }
-
- /**
- * @brief 对比使用和不使用 Pipeline Cache 时的模型加载性能
- * @return 0 on success, -1 on failure
- */
- static int test_gpu_pipecache_performance()
- {
- ncnn::Mat output_no_cache;
- double time_no_cache = 0;
-
- const char* cache_path = "./mobilenet_pipecache.bin";
- DataReaderFromEmpty dr;
- ncnn::Mat input = RandomMat(224, 224, 3);
-
- // -------------------------------------------------
- // 1. 不使用 Pipeline Cache (首次加载)
- // -------------------------------------------------
- std::cout << "\n==================================================" << std::endl;
- std::cout << " Performance Test: Without Pipeline Cache " << std::endl;
- std::cout << "==================================================" << std::endl;
- {
- ncnn::Net net_no_cache;
- net_no_cache.opt.use_vulkan_compute = true;
-
- auto start = std::chrono::high_resolution_clock::now();
-
- net_no_cache.load_param_mem(mobilenet_v3_param);
- net_no_cache.load_model(dr);
-
- auto end = std::chrono::high_resolution_clock::now();
- time_no_cache = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(end - start).count();
- std::cout << "Model loading time without cache: " << time_no_cache << " ms" << std::endl;
-
- // 推理以获得基准输出
- ncnn::Extractor ex = net_no_cache.create_extractor();
- ex.input("data", input);
- ex.extract("output", output_no_cache);
-
- if (output_no_cache.empty())
- {
- std::cerr << "Test failed: extraction without cache failed." << std::endl;
- return -1;
- }
-
- // 保存 cache 以供下一步使用
- if (net_no_cache.opt.pipeline_cache->save_cache(cache_path) != 0)
- {
- std::cerr << "Test failed: could not save pipeline cache to " << cache_path << std::endl;
- return -1;
- }
- std::cout << "Pipeline cache generated and saved to " << cache_path << std::endl;
- }
-
- // -------------------------------------------------
- // 2. 使用 Pipeline Cache (二次加载)
- // -------------------------------------------------
- ncnn::Mat output_with_cache;
- double time_with_cache = 0;
- std::cout << "\n==================================================" << std::endl;
- std::cout << " Performance Test: With Pipeline Cache " << std::endl;
- std::cout << "==================================================" << std::endl;
- {
- ncnn::Net net_with_cache;
- // 必须在加载模型前设置好 cache
- net_with_cache.opt.pipeline_cache = new ncnn::PipelineCache(ncnn::get_gpu_device());
- net_with_cache.opt.use_vulkan_compute = true;
-
- auto start = std::chrono::high_resolution_clock::now();
-
- // 从文件加载 cache
- if (net_with_cache.opt.pipeline_cache->load_cache(cache_path) != 0)
- {
- std::cerr << "Test failed: could not load pipeline cache from " << cache_path << std::endl;
- return -1;
- }
- net_with_cache.load_param_mem(mobilenet_v3_param);
- net_with_cache.load_model(dr);
-
- auto end = std::chrono::high_resolution_clock::now();
- time_with_cache = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(end - start).count();
- std::cout << "Model loading time with cache: " << time_with_cache << " ms" << std::endl;
-
- // 推理
- ncnn::Extractor ex2 = net_with_cache.create_extractor();
- ex2.input("data", input);
- ex2.extract("output", output_with_cache);
-
- if (output_with_cache.empty())
- {
- std::cerr << "Test failed: extraction with cache failed." << std::endl;
- return -1;
- }
- }
-
- // -------------------------------------------------
- // 3. 结果验证与总结
- // -------------------------------------------------
- std::cout << "\n==================================================" << std::endl;
- std::cout << " Verification and Summary " << std::endl;
- std::cout << "==================================================" << std::endl;
-
- bool is_output_same = (CompareMat(output_no_cache, output_with_cache, 0.001) == 0);
-
- std::cout << "Output verification: " << (is_output_same ? "SUCCESS" : "FAILURE") << std::endl;
- std::cout << "--------------------------------------------------" << std::endl;
- std::cout << "Performance Summary:" << std::endl;
- std::cout << " - Without Cache: " << time_no_cache << " ms" << std::endl;
- std::cout << " - With Cache: " << time_with_cache << " ms" << std::endl;
-
- if (time_no_cache > 0) {
- double speedup = (time_no_cache - time_with_cache) / time_no_cache * 100;
- std::cout << " - Speedup: " << speedup << "%" << std::endl;
- }
-
- if (!is_output_same)
- {
- std::cerr << "\nTest FAILED due to output mismatch." << std::endl;
- return -1;
- }
-
- std::cout << "\nTest PASSED." << std::endl;
- return 0;
- }
-
- int main()
- {
- // 运行预热测试,检查基本IO功能
- if (warmup_gpu_pipecache() != 0)
- {
- return -1;
- }
-
- // 运行性能对比测试
- return test_gpu_pipecache_performance();
- }
|