From 66e882aeedf387614316e0c94f7d59a815766c9b Mon Sep 17 00:00:00 2001 From: "Kumawat, Sachin" Date: Tue, 13 Jan 2026 14:14:27 -0800 Subject: [PATCH 1/4] Add VitisAI Plugin * Added VitisAI encoder module placeholder files * VitisAI build integration * VitisAI encoder offload functional * Clean up vitisai integration * Add c++17 requirement for Windows * Enabled preemption for windows runs * Add model cache override option * Remove vitisai premature log message * Add rai support through file mapping * Fixed flatbuffer loading * Fixed Windows file mapping issue * Update FlexmlRT resolution * Use Flexmlrt wheel pkg to build VitisAI plugin * Clean up * Remove prints * Change flexmlrt target from Shared to Interface * Add c++17 requirement for Windows * Enabled preemption for windows runs * Add rai support through file mapping * Fixed flatbuffer loading * Fixed Windows file mapping issue * Update FlexmlRT resolution * Use Flexmlrt wheel pkg to build VitisAI plugin * Clean up * Remove prints * Change flexmlrt target from Shared to Interface * Cleanup FlexmlRT integration * format fix * Adding AMD Licenses * Update CMakeLists.txt Co-authored-by: Kumawat, Sachin * Update src/CMakeLists.txt Co-authored-by: Kumawat, Sachin * Update whisper.cpp * Added VitisAI encoder readme section * Remove license headers from common files to whisper.cpp --------- Co-authored-by: Sachin Kumawat Co-authored-by: Jeff Lin Co-authored-by: Lin Co-authored-by: Lin, Jeff (DCG-ENG) Co-authored-by: Iswarya Alex Co-authored-by: Alex, Iswarya --- CMakeLists.txt | 1 + README.md | 29 ++++ src/CMakeLists.txt | 32 ++++ src/vitisai/whisper-vitisai-encoder.cpp | 204 ++++++++++++++++++++++++ src/vitisai/whisper-vitisai-encoder.h | 32 ++++ src/whisper.cpp | 61 ++++++- 6 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 src/vitisai/whisper-vitisai-encoder.cpp create mode 100644 src/vitisai/whisper-vitisai-encoder.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b60bb045..a8c7347a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,7 @@ endif() option(WHISPER_COREML "whisper: enable Core ML framework" OFF) option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF) option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF) +option(WHISPER_VITISAI "whisper: support for AMD Vitis AI" OFF) # Required for relocatable CMake package include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) diff --git a/README.md b/README.md index 6d4988e6..0369f142 100644 --- a/README.md +++ b/README.md @@ -312,6 +312,35 @@ This can result in significant speedup in encoder performance. Here are the inst For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggml-org/whisper.cpp/pull/1037). +## VitisAI encoder support + +On AMD Ryzen AI NPU devices, you can run the Encoder via the VitisAI plugin to significantly accelerate the whisper models. + +- Prepare the AMD runtime packages (required before building): + + - Obtain the XRT package and the FlexmlRT package from AMD. Both are distributed as tarballs or wheels. + - Copy the downloaded archives to a local path, extract them, and run the setup script from each extracted package in your shell (for example `source /path/to/xrt/setup.sh` and `source /path/to/flexmlrt/setup.sh`). Run these in every new shell you use to build or run `whisper.cpp`. + +- Fetch the prebuilt VitisAI encoder cache: + + - Download the appropriate Whisper encoder `.rai` cache for your model size from the AMD collection on Hugging Face: https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models + - Place and rename the downloaded `.rai` file as `-encoder-vitisai.rai` alongside your ggml model files `.bin`. + +- Build `whisper.cpp` with VitisAI support: + + ```bash + cmake -B build -DWHISPER_VITISAI=1 + cmake --build build -j --config Release + ``` + +- Run the examples as usual. For example: + + ```text + $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav + ``` + +The VitisAI artifact from Huggingface is already optimized for Ryzen AI NPUs, there is no slow compilation needed. The acceleration advantage should be seen from first run itself apart from CPU caching overheads. + ## NVIDIA GPU support With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 095a2791..6cba1c6e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -48,6 +48,10 @@ if (WHISPER_OPENVINO) find_package(OpenVINO REQUIRED COMPONENTS Runtime) endif() +if (WHISPER_VITISAI) + find_package(FlexmlRT REQUIRED) +endif() + # # libraries # @@ -101,6 +105,30 @@ if (WHISPER_OPENVINO) set_target_properties(${TARGET} PROPERTIES FOLDER "libs") endif() +if (WHISPER_VITISAI) + set(TARGET whisper.vitisai) + + add_library(${TARGET} OBJECT + vitisai/whisper-vitisai-encoder.h + vitisai/whisper-vitisai-encoder.cpp + ) + + target_include_directories(${TARGET} PUBLIC + . + ) + + set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON) + set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_VITISAI) + + # Add C++17 standard for MSVC + if (MSVC) + target_compile_options(${TARGET} PRIVATE /std:c++17) + endif() + + target_link_libraries(${TARGET} PRIVATE ggml flexmlrt::flexmlrt) + set_target_properties(${TARGET} PROPERTIES FOLDER "libs") +endif() + # whisper add_library(whisper @@ -137,6 +165,10 @@ if (WHISPER_OPENVINO) target_link_libraries(whisper PRIVATE whisper.openvino) endif() +if (WHISPER_VITISAI) + target_link_libraries(whisper PRIVATE whisper.vitisai) +endif() + if (WHISPER_MKL) target_link_libraries(whisper PRIVATE MKL::MKL) endif() diff --git a/src/vitisai/whisper-vitisai-encoder.cpp b/src/vitisai/whisper-vitisai-encoder.cpp new file mode 100644 index 00000000..a6d20a88 --- /dev/null +++ b/src/vitisai/whisper-vitisai-encoder.cpp @@ -0,0 +1,204 @@ +// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved. +#include "vitisai/whisper-vitisai-encoder.h" +#include "FlexMLClient.h" +#include "ggml.h" +#include "ggml-backend.h" + +#include +#include +#ifdef _WIN32 + #include +#else + #include + #include + #include +#endif +#include +#include + +struct whisper_vitisai_context { + std::string model_path; + std::shared_ptr runner; + uint8_t * fbs_buffer; + size_t fbs_buffer_size; +}; + +// Function to mmap rai file for Linux and MapViewOfFile for Windows +bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size) { +#ifdef _WIN32 + // Open the file + HANDLE hFile = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + std::fprintf(stderr, "%s: %d: Failed to open rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Get the file size + LARGE_INTEGER fileSize; + if (!GetFileSizeEx(hFile, &fileSize)) { + CloseHandle(hFile); + std::fprintf(stderr, "%s: %d: Failed to get file size for rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Create a file mapping object + HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, fileSize.QuadPart, NULL); + if (hMapping == NULL) { + CloseHandle(hFile); + std::fprintf(stderr, "%s: %d: Failed to create file mapping for rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Map the file + *buffer = (uint8_t *)MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, fileSize.QuadPart); + if (*buffer == NULL) { + CloseHandle(hMapping); + CloseHandle(hFile); + std::fprintf(stderr, "%s: %d: Failed to map rai file '%s'\n", __func__, __LINE__, path); + return false; + } + *size = fileSize.QuadPart; + return true; +#else + // Open the file + FILE * fd = fopen(path, "rb"); + if (!fd) { + std::fprintf(stderr, "%s: %d: Failed to open rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Get the file size + struct stat st; + if (fstat(fileno(fd), &st) == -1) { + fclose(fd); + std::fprintf(stderr, "%s: %d: Failed to get file size for rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Mmap the file + *buffer = (uint8_t *)mmap(nullptr, st.st_size, PROT_READ, MAP_SHARED, fileno(fd), 0); + if (*buffer == MAP_FAILED) { + fclose(fd); + std::fprintf(stderr, "%s: %d: Failed to mmap rai file '%s'\n", __func__, __LINE__, path); + return false; + } + *size = st.st_size; + return true; +#endif // _WIN32 +} + +void unmap_rai_file(uint8_t * buffer, size_t size) { +#ifdef _WIN32 + UnmapViewOfFile(buffer); +#else + munmap(buffer, size); +#endif // _WIN32 +} + +struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model) { + if (!path_model) { + std::fprintf(stderr, "%s: path_model is null\n", __func__); + return nullptr; + } + + auto * ctx = new whisper_vitisai_context; + ctx->model_path = path_model; + + // Override the model path with the environment variable if it is set + if (const char * env_model_path = std::getenv("OVERRIDE_VITISAI_MODEL_PATH")) { + if (env_model_path[0] != '\0') { + ctx->model_path = env_model_path; + } + } + + // Step 1: Set up the model + flexmlrt::client::Options options; + options.modelPath = ctx->model_path; + options.deviceName = "stx"; + options.debug = false; + options.executeMode = 2; + options.extOptions["ai_analyzer_profiling"] = true; // Enable AIA profiling + options.extOptions["enable_preemption"] = true; + + // Check if model_path is rai file and if so, add fbs_buffer and fbs_buffer_size to the options + if (ctx->model_path.find(".rai") != std::string::npos) { + // mmap rai file for both Linux and Windows and pass the buffer to the options + ctx->fbs_buffer = nullptr; + ctx->fbs_buffer_size = 0; + if (map_rai_file(ctx->model_path.c_str(), &ctx->fbs_buffer, &ctx->fbs_buffer_size)) { + options.extOptions["fbs_buffer"] = ctx->fbs_buffer; + options.extOptions["fbs_buffer_size"] = ctx->fbs_buffer_size; + options.subgraphName = "vaiml_par_0"; + options.extOptions["cache_dir"] = std::string("."); + } else { + std::fprintf(stderr, "%s: Failed to mmap rai file '%s'\n", __func__, ctx->model_path.c_str()); + delete ctx; + return nullptr; + } + } + + try { + ctx->runner = std::make_shared(options); + + if (!ctx->runner->good()) { + throw std::runtime_error("Runner creation ran into an error"); + } + } catch (const std::exception & e) { + std::fprintf(stderr, "%s: Exception during Vitis AI runner creation: %s\n", __func__, e.what()); + delete ctx; + return nullptr; + } + return ctx; +} + +void whisper_vitisai_free(struct whisper_vitisai_context * ctx) { + if (!ctx) { + return; + } + + std::fprintf(stderr, "%s: releasing Vitis AI encoder context for model '%s'\n", __func__, ctx->model_path.c_str()); + if (ctx->fbs_buffer) { + unmap_rai_file(ctx->fbs_buffer, ctx->fbs_buffer_size); + } + delete ctx; +} + +int whisper_vitisai_encode(struct whisper_vitisai_context * ctx, struct ggml_tensor * mel, struct ggml_tensor * out) { + if (!ctx || !mel || !out) { + std::fprintf(stderr, "%s: ctx/mel/out must not be null\n", __func__); + return 0; + } + + if (ggml_n_dims(mel) != 2) { + std::fprintf(stderr, "%s: mel tensor expected to have 2 dims, got %d\n", __func__, ggml_n_dims(mel)); + return 0; + } + + if (ggml_n_dims(out) != 2) { + std::fprintf(stderr, "%s: out tensor expected to have 2 dims, got %d\n", __func__, ggml_n_dims(out)); + return 0; + } + + // setup input and output tensors for Vitis AI model + std::vector input_tensors, output_tensors; + auto model = ctx->runner; + + // Get tensors as CPU tensors (hwTensor = false) + input_tensors = model->getIOTensors("input", false); + output_tensors = model->getIOTensors("output", false); + + // TODO: add assert checks for tensor numbers and shapes + + input_tensors[0].data = mel->data; + output_tensors[0].data = out->data; + + try { + model->forward(input_tensors, output_tensors); + std::fprintf(stdout, "%s: Vitis AI model inference completed.\n", __func__); + } catch (const std::exception & e) { + std::fprintf(stderr, "%s: Exception during model inference: %s\n", __func__, e.what()); + return 0; + } + + return 1; +} diff --git a/src/vitisai/whisper-vitisai-encoder.h b/src/vitisai/whisper-vitisai-encoder.h new file mode 100644 index 00000000..05dc812b --- /dev/null +++ b/src/vitisai/whisper-vitisai-encoder.h @@ -0,0 +1,32 @@ +// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved. + +#pragma once + +#include +#include +#include + +#if __cplusplus +extern "C" { +#endif + +struct whisper_vitisai_context; + +struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model); +void whisper_vitisai_free(struct whisper_vitisai_context * ctx); + +// Function to mmap rai file for Linux and MapViewOfFile for Windows +bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size); +// Function to unmap rai file for Linux and UnmapViewOfFile for Windows +void unmap_rai_file(uint8_t * buffer, size_t size); + +struct ggml_tensor; + +int whisper_vitisai_encode( + struct whisper_vitisai_context * ctx, + struct ggml_tensor * mel, + struct ggml_tensor * out); + +#if __cplusplus +} +#endif diff --git a/src/whisper.cpp b/src/whisper.cpp index 5b6e4b4b..59dd59c5 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -14,6 +14,10 @@ #include "openvino/whisper-openvino-encoder.h" #endif +#ifdef WHISPER_USE_VITISAI +#include "vitisai/whisper-vitisai-encoder.h" +#endif + #include #include #include @@ -903,6 +907,10 @@ struct whisper_state { whisper_openvino_context * ctx_openvino = nullptr; #endif +#ifdef WHISPER_USE_VITISAI + whisper_vitisai_context * ctx_vitisai = nullptr; +#endif + // [EXPERIMENTAL] token-level timestamps data int64_t t_beg = 0; int64_t t_last = 0; @@ -1970,7 +1978,13 @@ static bool whisper_encode_external(const whisper_state & wstate) { const bool use_openvino = wstate.ctx_openvino != nullptr; #endif - return use_coreml || use_openvino; +#ifndef WHISPER_USE_VITISAI + const bool use_vitisai = false; +#else + const bool use_vitisai = wstate.ctx_vitisai != nullptr; +#endif + + return use_coreml || use_openvino || use_vitisai; } static struct ggml_cgraph * whisper_build_graph_conv( @@ -2411,6 +2425,8 @@ static bool whisper_encode_internal( #if defined(WHISPER_USE_COREML) whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data); +#elif defined(WHISPER_USE_VITISAI) + whisper_vitisai_encode(wstate.ctx_vitisai, mel, wstate.embd_enc); #elif defined(WHISPER_USE_OPENVINO) whisper_openvino_encode(wstate.ctx_openvino, mel, wstate.embd_enc); #endif @@ -3346,6 +3362,20 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) { } #endif +#ifdef WHISPER_USE_VITISAI +// replace extension with Vitis AI encoder artifact +static std::string whisper_get_vitisai_path_encoder_cache(std::string path_bin) { + auto pos = path_bin.rfind('.'); + if (pos != std::string::npos) { + path_bin = path_bin.substr(0, pos); + } + + path_bin += "-encoder-vitisai.rai"; + + return path_bin; +} +#endif + #ifdef WHISPER_USE_OPENVINO // replace .bin with-encoder-openvino.xml static std::string whisper_openvino_get_path_encoder(std::string path_bin) { @@ -3455,6 +3485,19 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { } #endif +#ifdef WHISPER_USE_VITISAI + const auto path_vitisai = whisper_get_vitisai_path_encoder_cache(ctx->path_model); + + state->ctx_vitisai = whisper_vitisai_init(path_vitisai.c_str()); + if (!state->ctx_vitisai) { + WHISPER_LOG_ERROR("%s: failed to load Vitis AI model from '%s'\n", __func__, path_vitisai.c_str()); + whisper_free_state(state); + return nullptr; + } else { + WHISPER_LOG_INFO("%s: Vitis AI model loaded\n", __func__); + } +#endif + state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx); state->batch = whisper_batch_init(ctx->model.hparams.n_text_ctx, WHISPER_MAX_DECODERS); @@ -3821,6 +3864,13 @@ void whisper_free_state(struct whisper_state * state) { } #endif +#ifdef WHISPER_USE_VITISAI + if (state->ctx_vitisai != nullptr) { + whisper_vitisai_free(state->ctx_vitisai); + state->ctx_vitisai = nullptr; + } +#endif + whisper_batch_free(state->batch); ggml_backend_sched_free(state->sched_conv.sched); @@ -4312,11 +4362,20 @@ static int whisper_has_openvino(void) { #endif } +static int whisper_has_vitisai(void) { +#ifdef WHISPER_USE_VITISAI + return 1; +#else + return 0; +#endif +} + const char * whisper_print_system_info(void) { static std::string s; s = ""; s += "WHISPER : "; + s += "VITISAI = " + std::to_string(whisper_has_vitisai()) + " | "; s += "COREML = " + std::to_string(whisper_has_coreml()) + " | "; s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | "; From 1a98960e5c21e8d2aadf5316d65b8615cfdb2eee Mon Sep 17 00:00:00 2001 From: Iswarya Alex <47045679+iswaryaalex@users.noreply.github.com> Date: Tue, 27 Jan 2026 11:51:47 -0800 Subject: [PATCH 2/4] Update README.md - RAI EULA Links - Updated for RAI Whisper instructions --- README.md | 56 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 0369f142..91a4b114 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp - [Vulkan support](#vulkan-gpu-support) - Support for CPU-only inference - [Efficient GPU support for NVIDIA](#nvidia-gpu-support) +- [AMD Ryzen AI NPU Support](#amd-ryzen-ai-support-for-npu) - [OpenVINO Support](#openvino-support) - [Ascend NPU Support](#ascend-npu-support) - [Moore Threads GPU Support](#moore-threads-gpu-support) @@ -312,34 +313,47 @@ This can result in significant speedup in encoder performance. Here are the inst For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggml-org/whisper.cpp/pull/1037). -## VitisAI encoder support +## AMD Ryzen™ AI support for NPU -On AMD Ryzen AI NPU devices, you can run the Encoder via the VitisAI plugin to significantly accelerate the whisper models. +On AMD's Ryzen™ AI 300 Series with dedicated NPUs for acceleration, you can now run Whisper models with the ability to fully offload the encoder to NPU. This brings significant speedup compared to CPU-only. +> **Note:** +> **Ryzen™ AI NPU acceleration is currently supported on Windows only.** Linux support is planned for upcoming releases. +> For the latest updates on Ryzen AI, check out [the official documentation](https://ryzenai.docs.amd.com/en/latest/). -- Prepare the AMD runtime packages (required before building): +### Setup environment (Windows only) - - Obtain the XRT package and the FlexmlRT package from AMD. Both are distributed as tarballs or wheels. - - Copy the downloaded archives to a local path, extract them, and run the setup script from each extracted package in your shell (for example `source /path/to/xrt/setup.sh` and `source /path/to/flexmlrt/setup.sh`). Run these in every new shell you use to build or run `whisper.cpp`. - -- Fetch the prebuilt VitisAI encoder cache: - - - Download the appropriate Whisper encoder `.rai` cache for your model size from the AMD collection on Hugging Face: https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models - - Place and rename the downloaded `.rai` file as `-encoder-vitisai.rai` alongside your ggml model files `.bin`. - -- Build `whisper.cpp` with VitisAI support: - - ```bash - cmake -B build -DWHISPER_VITISAI=1 - cmake --build build -j --config Release +- **Driver:** Make sure you have NPU drivers version **.280 or newer** installed. [Download latest drivers from here](https://account.amd.com/en/forms/downloads/ryzenai-eula-public-xef.html?filename=NPU_RAI1.5_280_WHQL.zip) +- **Runtime libraries:** Download and install the necessary [runtime dependencies from here](https://account.amd.com/en/forms/downloads/ryzenai-eula-public-xef.html?filename=flexmlrt1.7.0-win.zip). +- **Environment:** Extract the runtime package and set up the environment: + ```powershell + tar xvf flexmlrt1.7.0-win.zip + flexmlrt\setup.bat ``` +Your environment is now ready. -- Run the examples as usual. For example: +### Build Whisper.cpp for Ryzen™ AI support - ```text - $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav - ``` +```bash +cmake -B build -DWHISPER_VITISAI=1 +cmake --build build -j --config Release +``` + +### Download NPU-optimized models + +- All NPU-supported Whisper models and their compiled `.rai` cache files are available in this collection: + https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models +- Download the pre-compiled `.rai` cache file matching your desired model, and place it in your `models/` directory alongside its corresponding `ggml-<...>.bin` file. + The cache file must be named with the `-encoder-vitisai.rai` suffix. For example, if your model file is named `ggml-small.bin`, the cache file should be named `ggml-small-encoder-vitisai.rai`. + + +> **Note:** The ".rai" models from Hugging Face are pre-optimized for Ryzen™ AI NPUs, delivering acceleration benefits from the very first run (aside from any initial CPU-side caching overhead). + +Run the examples as usual: + +```bash +./build/bin/whisper-cli -m models/ggml-small.bin -f samples/jfk.wav +``` -The VitisAI artifact from Huggingface is already optimized for Ryzen AI NPUs, there is no slow compilation needed. The acceleration advantage should be seen from first run itself apart from CPU caching overheads. ## NVIDIA GPU support From 175b9a53451b13e103b37194c8cb9f66de5c91fa Mon Sep 17 00:00:00 2001 From: Sachin Kumawat Date: Thu, 26 Feb 2026 12:42:08 -0800 Subject: [PATCH 3/4] Cleanup and add runtime print debug guard --- src/vitisai/whisper-vitisai-encoder.cpp | 9 +++++---- src/vitisai/whisper-vitisai-encoder.h | 7 ------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/vitisai/whisper-vitisai-encoder.cpp b/src/vitisai/whisper-vitisai-encoder.cpp index a6d20a88..c10e1c37 100644 --- a/src/vitisai/whisper-vitisai-encoder.cpp +++ b/src/vitisai/whisper-vitisai-encoder.cpp @@ -1,4 +1,3 @@ -// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved. #include "vitisai/whisper-vitisai-encoder.h" #include "FlexMLClient.h" #include "ggml.h" @@ -24,7 +23,7 @@ struct whisper_vitisai_context { }; // Function to mmap rai file for Linux and MapViewOfFile for Windows -bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size) { +static bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size) { #ifdef _WIN32 // Open the file HANDLE hFile = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); @@ -87,7 +86,7 @@ bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size) { #endif // _WIN32 } -void unmap_rai_file(uint8_t * buffer, size_t size) { +static void unmap_rai_file(uint8_t * buffer, size_t size) { #ifdef _WIN32 UnmapViewOfFile(buffer); #else @@ -194,7 +193,9 @@ int whisper_vitisai_encode(struct whisper_vitisai_context * ctx, struct ggml_ten try { model->forward(input_tensors, output_tensors); - std::fprintf(stdout, "%s: Vitis AI model inference completed.\n", __func__); +#if defined(WHISPER_DEBUG) + std::fprintf(stderr, "%s: Vitis AI model inference completed.\n", __func__); +#endif } catch (const std::exception & e) { std::fprintf(stderr, "%s: Exception during model inference: %s\n", __func__, e.what()); return 0; diff --git a/src/vitisai/whisper-vitisai-encoder.h b/src/vitisai/whisper-vitisai-encoder.h index 05dc812b..840ce694 100644 --- a/src/vitisai/whisper-vitisai-encoder.h +++ b/src/vitisai/whisper-vitisai-encoder.h @@ -1,5 +1,3 @@ -// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved. - #pragma once #include @@ -15,11 +13,6 @@ struct whisper_vitisai_context; struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model); void whisper_vitisai_free(struct whisper_vitisai_context * ctx); -// Function to mmap rai file for Linux and MapViewOfFile for Windows -bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size); -// Function to unmap rai file for Linux and UnmapViewOfFile for Windows -void unmap_rai_file(uint8_t * buffer, size_t size); - struct ggml_tensor; int whisper_vitisai_encode( From 988a4af6f1b7a1153b5d37e483d4e7f5caa605b4 Mon Sep 17 00:00:00 2001 From: Sachin Kumawat Date: Tue, 3 Mar 2026 16:37:29 -0800 Subject: [PATCH 4/4] turn off profiling --- src/vitisai/whisper-vitisai-encoder.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/vitisai/whisper-vitisai-encoder.cpp b/src/vitisai/whisper-vitisai-encoder.cpp index c10e1c37..580bcfe3 100644 --- a/src/vitisai/whisper-vitisai-encoder.cpp +++ b/src/vitisai/whisper-vitisai-encoder.cpp @@ -116,7 +116,6 @@ struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model) { options.deviceName = "stx"; options.debug = false; options.executeMode = 2; - options.extOptions["ai_analyzer_profiling"] = true; // Enable AIA profiling options.extOptions["enable_preemption"] = true; // Check if model_path is rai file and if so, add fbs_buffer and fbs_buffer_size to the options