Merge b899ce0b79 into fc674574ca

2026-04-24 09:06:22 +00:00 · 2026-04-24 09:06:22 +00:00 · 03443a93f3
parent fc674574ca b899ce0b79
commit 03443a93f3
20 changed files with 6007 additions and 1 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -634,6 +634,8 @@ jobs:
          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
          -DBUILD_SHARED_LIBS=ON
          -DWHISPER_SDL2=${{ matrix.sdl2 }}
+          -DGGML_NATIVE=OFF
+          -DGGML_BMI2=OFF

      - name: Build
        run: |
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -179,12 +179,20 @@ set(WHISPER_BIN_INSTALL_DIR     ${CMAKE_INSTALL_BINDIR}     CACHE PATH "Location
 get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)

 set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
+
 install(TARGETS whisper LIBRARY PUBLIC_HEADER)

 target_compile_definitions(whisper PRIVATE
    WHISPER_VERSION="${PROJECT_VERSION}"
 )

+set_target_properties(parakeet PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/parakeet.h)
+install(TARGETS parakeet LIBRARY PUBLIC_HEADER)
+
+target_compile_definitions(parakeet PRIVATE
+    PARAKEET_VERSION="${PROJECT_VERSION}"
+)
+
 configure_package_config_file(
        ${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
        ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
@ -210,6 +218,35 @@ configure_file(cmake/whisper.pc.in
 install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
        DESTINATION lib/pkgconfig)

+set(PARAKEET_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header  files")
+set(PARAKEET_LIB_INSTALL_DIR     ${CMAKE_INSTALL_LIBDIR}     CACHE PATH "Location of library files")
+set(PARAKEET_BIN_INSTALL_DIR     ${CMAKE_INSTALL_BINDIR}     CACHE PATH "Location of binary  files")
+
+configure_package_config_file(
+        ${CMAKE_CURRENT_SOURCE_DIR}/cmake/parakeet-config.cmake.in
+        ${CMAKE_CURRENT_BINARY_DIR}/parakeet-config.cmake
+    INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/parakeet
+    PATH_VARS
+    PARAKEET_INCLUDE_INSTALL_DIR
+    PARAKEET_LIB_INSTALL_DIR
+    PARAKEET_BIN_INSTALL_DIR)
+
+write_basic_package_version_file(
+    ${CMAKE_CURRENT_BINARY_DIR}/parakeet-version.cmake
+    VERSION ${WHISPER_INSTALL_VERSION}
+    COMPATIBILITY SameMajorVersion)
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/parakeet-config.cmake
+              ${CMAKE_CURRENT_BINARY_DIR}/parakeet-version.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/parakeet)
+
+configure_file(cmake/parakeet.pc.in
+        "${CMAKE_CURRENT_BINARY_DIR}/parakeet.pc"
+        @ONLY)
+
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/parakeet.pc"
+        DESTINATION lib/pkgconfig)
+
 #
 # programs, examples and tests
 #
--- a/bindings/ruby/ext/extconf.rb
+++ b/bindings/ruby/ext/extconf.rb
@ -18,6 +18,6 @@ create_makefile "whisper" do |conf|
    #{libs}: cmake-targets
    cmake-targets:
    #{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON #{options}
-    #{"\t"}#{cmake} --build build --config Release --target common whisper
+    #{"\t"}#{cmake} --build build --config Release --target common whisper parakeet
  EOF
 end
--- a/cmake/parakeet-config.cmake.in
+++ b/cmake/parakeet-config.cmake.in
@ -0,0 +1,30 @@
+set(PARAKEET_VERSION      @WHISPER_INSTALL_VERSION@)
+set(PARAKEET_BUILD_COMMIT @WHISPER_BUILD_COMMIT@)
+set(PARAKEET_BUILD_NUMBER @WHISPER_BUILD_NUMBER@)
+set(PARAKEET_SHARED_LIB   @BUILD_SHARED_LIBS@)
+
+@PACKAGE_INIT@
+
+set_and_check(PARAKEET_INCLUDE_DIR "@PACKAGE_PARAKEET_INCLUDE_INSTALL_DIR@")
+set_and_check(PARAKEET_LIB_DIR     "@PACKAGE_PARAKEET_LIB_INSTALL_DIR@")
+set_and_check(PARAKEET_BIN_DIR     "@PACKAGE_PARAKEET_BIN_INSTALL_DIR@")
+
+find_package(ggml REQUIRED HINTS ${PARAKEET_LIB_DIR}/cmake)
+
+find_library(parakeet_LIBRARY parakeet
+    REQUIRED
+    HINTS ${PARAKEET_LIB_DIR}
+    NO_CMAKE_FIND_ROOT_PATH
+)
+
+add_library(parakeet UNKNOWN IMPORTED)
+set_target_properties(parakeet
+    PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${PARAKEET_INCLUDE_DIR}"
+        INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;"
+        IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+        IMPORTED_LOCATION "${parakeet_LIBRARY}"
+        INTERFACE_COMPILE_FEATURES cxx_std_11
+        POSITION_INDEPENDENT_CODE ON)
+
+check_required_components(parakeet)
--- a/cmake/parakeet.pc.in
+++ b/cmake/parakeet.pc.in
@ -0,0 +1,10 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=${exec_prefix}/lib
+includedir=${prefix}/include
+
+Name: parakeet
+Description: Port of NVIDIA's Parakeet model in C/C++
+Version: @PROJECT_VERSION@
+Libs: -L${libdir} -lggml -lggml-base -lparakeet
+Cflags: -I${includedir}
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -107,6 +107,7 @@ else()
    add_subdirectory(server)
    add_subdirectory(quantize)
    add_subdirectory(vad-speech-segments)
+    add_subdirectory(parakeet-cli)
    if (WHISPER_SDL2)
        add_subdirectory(stream)
        add_subdirectory(command)
--- a/examples/parakeet-cli/CMakeLists.txt
+++ b/examples/parakeet-cli/CMakeLists.txt
@ -0,0 +1,8 @@
+set(TARGET parakeet-cli)
+add_executable(${TARGET} parakeet-cli.cpp)
+
+include(DefaultTargetOptions)
+
+target_link_libraries(${TARGET} PRIVATE common parakeet ${FFMPEG_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+
+install(TARGETS ${TARGET} RUNTIME)
--- a/examples/parakeet-cli/README.md
+++ b/examples/parakeet-cli/README.md
@ -0,0 +1,112 @@
+# whisper.cpp/examples/parakeet-cli
+
+This is an example of using the [Parakeet] model in whisper.cpp.
+
+### Download converted model
+```console
+$ hf download danbev/parakeet parakeet-tdt-0.6b-v3.bin --local-dir models
+```
+
+### Building
+```console
+$ cmake -B build -S .
+$ cmake --build build --target parakeet-cli -j 12
+```
+
+### Usage
+```console
+$ ./build/bin/parakeet-cli --help
+
+usage: ./build/bin/parakeet-cli [options] file0 file1 ...
+supported audio formats: flac, mp3, ogg, wav
+
+options:
+  -h,     --help              [default] show this help message and exit
+  -t N,   --threads N         [4      ] number of threads to use during computation
+  -cl N,  --chunk-length N    [10000  ] chunk length in milliseconds
+  -lc N,  --left-context N    [10000  ] left context in milliseconds
+  -rc N,  --right-context N   [4960   ] right context in milliseconds
+  -m,     --model FILE        [models/ggml-parakeet-tdt-0.6b-v3.bin] model path
+  -f,     --file FILE         [       ] input audio file
+  -ng,    --no-gpu            [false  ] disable GPU
+  -dev N, --device N          [0      ] GPU device to use
+  -fa,    --flash-attn        [false  ] enable flash attention
+  -nfa,   --no-flash-attn     [false  ] disable flash attention
+  -ps,    --print-segments    [false  ] print segment information
+```
+
+### Example
+```console
+$ ./build/bin/parakeet-cli -m models/parakeet-tdt-0.6b-v3.bin -f samples/jfk.wav
+Processing audio (176000 samples, 11.00 seconds)
+Processing audio: total_frames=1101, chunk_size=1101
+parakeet_decode: starting decode with n_frames=138
+And so, my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
+```
+
+To print segment information:
+```console
+$ ./build/bin/parakeet-cli -m models/parakeet-tdt-0.6b-v3.bin -f samples/jfk.wav --print-segments
+Processing audio (176000 samples, 11.00 seconds)
+Processing audio: total_frames=1101, chunk_size=1101
+parakeet_decode: starting decode with n_frames=138
+And so, my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
+
+Segments (1):
+Segment 0: [0 -> 1101] "And so, my fellow Americans, ask not what your country can do for you, ask what you can do for your country."
+Tokens [38]:
+  [ 0] id= 1976 frame=  3 dur_idx= 4 dur_val= 4 p=0.9996 plog=-15.6206 t0=  24 t1=  56 word_start=true "▁And"
+  [ 1] id=  547 frame=  7 dur_idx= 4 dur_val= 4 p=0.9999 plog=-18.7922 t0=  56 t1=  88 word_start=true "▁so"
+  [ 2] id= 7877 frame= 11 dur_idx= 2 dur_val= 2 p=0.8451 plog=-14.5929 t0=  88 t1=  88 word_start=false ","
+  [ 3] id= 1103 frame= 13 dur_idx= 3 dur_val= 3 p=0.9996 plog=-15.6127 t0= 104 t1= 128 word_start=true "▁my"
+  [ 4] id=  309 frame= 16 dur_idx= 1 dur_val= 1 p=0.9912 plog=-11.9635 t0= 128 t1= 136 word_start=true "▁f"
+  [ 5] id=  530 frame= 17 dur_idx= 2 dur_val= 2 p=1.0000 plog=-13.5239 t0= 136 t1= 152 word_start=false "ell"
+  [ 6] id=  596 frame= 19 dur_idx= 3 dur_val= 3 p=1.0000 plog=-16.3120 t0= 152 t1= 176 word_start=false "ow"
+  [ 7] id= 3213 frame= 22 dur_idx= 4 dur_val= 4 p=0.9999 plog=-10.1462 t0= 176 t1= 208 word_start=true "▁Amer"
+  [ 8] id=  404 frame= 26 dur_idx= 4 dur_val= 4 p=1.0000 plog=-25.0910 t0= 208 t1= 240 word_start=false "ic"
+  [ 9] id=  667 frame= 30 dur_idx= 4 dur_val= 4 p=1.0000 plog=-27.1707 t0= 240 t1= 272 word_start=false "ans"
+  [10] id= 7877 frame= 37 dur_idx= 4 dur_val= 4 p=0.9094 plog=-16.3405 t0= 272 t1= 272 word_start=false ","
+  [11] id=  279 frame= 41 dur_idx= 4 dur_val= 4 p=0.9980 plog=-19.7244 t0= 328 t1= 360 word_start=true "▁a"
+  [12] id=  583 frame= 45 dur_idx= 4 dur_val= 4 p=1.0000 plog=-24.5312 t0= 360 t1= 392 word_start=false "sk"
+  [13] id= 1491 frame= 53 dur_idx= 4 dur_val= 4 p=1.0000 plog=-23.2991 t0= 424 t1= 456 word_start=true "▁not"
+  [14] id= 3470 frame= 65 dur_idx= 4 dur_val= 4 p=0.9995 plog=-16.7306 t0= 520 t1= 552 word_start=true "▁what"
+  [15] id= 3629 frame= 69 dur_idx= 2 dur_val= 2 p=0.8139 plog=-11.6486 t0= 552 t1= 568 word_start=true "▁your"
+  [16] id=  867 frame= 75 dur_idx= 1 dur_val= 1 p=0.9980 plog=-12.5265 t0= 600 t1= 608 word_start=true "▁co"
+  [17] id=  331 frame= 76 dur_idx= 2 dur_val= 2 p=1.0000 plog=-11.6697 t0= 608 t1= 624 word_start=false "un"
+  [18] id=  958 frame= 78 dur_idx= 2 dur_val= 2 p=1.0000 plog=-11.3621 t0= 624 t1= 640 word_start=false "tr"
+  [19] id= 7893 frame= 80 dur_idx= 2 dur_val= 2 p=1.0000 plog=-14.3245 t0= 640 t1= 656 word_start=false "y"
+  [20] id= 2059 frame= 82 dur_idx= 3 dur_val= 3 p=1.0000 plog=-17.7694 t0= 656 t1= 680 word_start=true "▁can"
+  [21] id=  458 frame= 85 dur_idx= 4 dur_val= 4 p=1.0000 plog=-23.2510 t0= 680 t1= 712 word_start=true "▁do"
+  [22] id=  509 frame= 89 dur_idx= 4 dur_val= 4 p=1.0000 plog=-23.0688 t0= 712 t1= 744 word_start=true "▁for"
+  [23] id= 1180 frame= 93 dur_idx= 4 dur_val= 4 p=0.9999 plog=-25.0567 t0= 744 t1= 776 word_start=true "▁you"
+  [24] id= 7877 frame= 98 dur_idx= 4 dur_val= 4 p=0.8820 plog=-14.2549 t0= 776 t1= 776 word_start=false ","
+  [25] id=  279 frame=102 dur_idx= 3 dur_val= 3 p=0.9992 plog=-16.8176 t0= 816 t1= 840 word_start=true "▁a"
+  [26] id=  583 frame=105 dur_idx= 4 dur_val= 4 p=1.0000 plog=-21.0352 t0= 840 t1= 872 word_start=false "sk"
+  [27] id= 3470 frame=109 dur_idx= 3 dur_val= 3 p=0.9999 plog=-15.4659 t0= 872 t1= 896 word_start=true "▁what"
+  [28] id= 1180 frame=112 dur_idx= 4 dur_val= 4 p=0.9997 plog=-17.6392 t0= 896 t1= 928 word_start=true "▁you"
+  [29] id= 2059 frame=116 dur_idx= 3 dur_val= 3 p=0.9999 plog=-15.5484 t0= 928 t1= 952 word_start=true "▁can"
+  [30] id=  458 frame=119 dur_idx= 2 dur_val= 2 p=1.0000 plog=-15.9953 t0= 952 t1= 968 word_start=true "▁do"
+  [31] id=  509 frame=121 dur_idx= 3 dur_val= 3 p=1.0000 plog=-15.9605 t0= 968 t1= 992 word_start=true "▁for"
+  [32] id= 3629 frame=124 dur_idx= 2 dur_val= 2 p=0.9994 plog=-12.2083 t0= 992 t1=1008 word_start=true "▁your"
+  [33] id=  867 frame=126 dur_idx= 2 dur_val= 2 p=0.9969 plog=-9.1252 t0=1008 t1=1024 word_start=true "▁co"
+  [34] id=  331 frame=128 dur_idx= 1 dur_val= 1 p=0.9999 plog=-12.6911 t0=1024 t1=1032 word_start=false "un"
+  [35] id=  958 frame=129 dur_idx= 1 dur_val= 1 p=1.0000 plog=-8.8885 t0=1032 t1=1040 word_start=false "tr"
+  [36] id= 7893 frame=130 dur_idx= 2 dur_val= 2 p=1.0000 plog=-14.1441 t0=1040 t1=1056 word_start=false "y"
+  [37] id= 7883 frame=132 dur_idx= 4 dur_val= 4 p=0.9567 plog=-11.5227 t0=1056 t1=1056 word_start=false "."
+```
+
+### Model conversion
+Clone the original model from Hugging Face:
+```console
+$ git clone https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3
+```
+Convert the model:
+```console
+(venv) $ python models/convert-parakeet-to-ggml.py \
+    --model <path to cloned model> \
+    --use-f32 \
+    --out-dir models \
+    --out-name ggml-parakeet-tdt-0.6b-v3.bin
+```
+
+[Parakeet]: https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3
--- a/examples/parakeet-cli/parakeet-cli.cpp
+++ b/examples/parakeet-cli/parakeet-cli.cpp
@ -0,0 +1,220 @@
+#include "parakeet.h"
+#include "common-whisper.h"
+
+#include <cstdio>
+#include <string>
+#include <thread>
+#include <vector>
+#include <cstring>
+
+// command-line parameters
+struct parakeet_params {
+    int32_t n_threads         = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t chunk_length_ms   = 10000;
+    int32_t left_context_ms   = 10000;
+    int32_t right_context_ms  = 4960;
+
+    bool use_gpu       = true;
+    bool flash_attn    = true;
+    int32_t gpu_device = 0;
+
+    bool print_segments = false;
+
+    std::string model    = "models/ggml-parakeet-tdt-0.6b-v3.bin";
+    std::vector<std::string> fname_inp = {};
+};
+
+static void parakeet_print_usage(int argc, char ** argv, const parakeet_params & params);
+
+static char * requires_value_error(const std::string & arg) {
+    fprintf(stderr, "error: argument %s requires value\n", arg.c_str());
+    exit(1);
+}
+
+static bool parakeet_params_parse(int argc, char ** argv, parakeet_params & params) {
+    if (const char * env_device = std::getenv("PARAKEET_ARG_DEVICE")) {
+        params.gpu_device = std::stoi(env_device);
+    }
+
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+
+        if (arg == "-"){
+            params.fname_inp.push_back(arg);
+            continue;
+        }
+
+        if (arg[0] != '-') {
+            params.fname_inp.push_back(arg);
+            continue;
+        }
+
+        if (arg == "-h" || arg == "--help") {
+            parakeet_print_usage(argc, argv, params);
+            exit(0);
+        }
+        #define ARGV_NEXT (((i + 1) < argc) ? argv[++i] : requires_value_error(arg))
+        else if (arg == "-t"    || arg == "--threads")         { params.n_threads         = std::stoi(ARGV_NEXT); }
+        else if (arg == "-cl"   || arg == "--chunk-length")    { params.chunk_length_ms   = std::stoi(ARGV_NEXT); }
+        else if (arg == "-lc"   || arg == "--left-context")    { params.left_context_ms   = std::stoi(ARGV_NEXT); }
+        else if (arg == "-rc"   || arg == "--right-context")   { params.right_context_ms  = std::stoi(ARGV_NEXT); }
+        else if (arg == "-m"    || arg == "--model")           { params.model             = ARGV_NEXT; }
+        else if (arg == "-f"    || arg == "--file")            { params.fname_inp.emplace_back(ARGV_NEXT); }
+        else if (arg == "-ng"   || arg == "--no-gpu")          { params.use_gpu           = false; }
+        else if (arg == "-dev"  || arg == "--device")          { params.gpu_device        = std::stoi(ARGV_NEXT); }
+        else if (arg == "-fa"   || arg == "--flash-attn")      { params.flash_attn        = false; }
+        else if (arg == "-nfa"  || arg == "--no-flash-attn")   { params.flash_attn        = false; }
+        else if (arg == "-ps"   || arg == "--print-segments")  { params.print_segments    = true; }
+        else {
+            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
+            parakeet_print_usage(argc, argv, params);
+            exit(1);
+        }
+    }
+
+    return true;
+}
+
+static void parakeet_print_usage(int /*argc*/, char ** argv, const parakeet_params & params) {
+    fprintf(stderr, "\n");
+    fprintf(stderr, "usage: %s [options] file0 file1 ...\n", argv[0]);
+    fprintf(stderr, "supported audio formats: flac, mp3, ogg, wav\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "options:\n");
+    fprintf(stderr, "  -h,     --help              [default] show this help message and exit\n");
+    fprintf(stderr, "  -t N,   --threads N         [%-7d] number of threads to use during computation\n", params.n_threads);
+    fprintf(stderr, "  -cl N,  --chunk-length N    [%-7d] chunk length in milliseconds\n",                 params.chunk_length_ms);
+    fprintf(stderr, "  -lc N,  --left-context N    [%-7d] left context in milliseconds\n",                params.left_context_ms);
+    fprintf(stderr, "  -rc N,  --right-context N   [%-7d] right context in milliseconds\n",               params.right_context_ms);
+    fprintf(stderr, "  -m,     --model FILE        [%-7s] model path\n",                                  params.model.c_str());
+    fprintf(stderr, "  -f,     --file FILE         [%-7s] input audio file\n",                            "");
+    fprintf(stderr, "  -ng,    --no-gpu            [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
+    fprintf(stderr, "  -dev N, --device N          [%-7d] GPU device to use\n",                           params.gpu_device);
+    fprintf(stderr, "  -fa,    --flash-attn        [%-7s] enable flash attention\n",                      params.flash_attn ? "true" : "false");
+    fprintf(stderr, "  -nfa,   --no-flash-attn     [%-7s] disable flash attention\n",                     !params.flash_attn ? "true" : "false");
+    fprintf(stderr, "  -ps,    --print-segments    [%-7s] print segment information\n",                   params.print_segments ? "true" : "false");
+    fprintf(stderr, "\n");
+}
+
+void token_callback(parakeet_context * ctx, parakeet_state * state, const parakeet_token_data * token_data, void * user_data) {
+    static bool is_first = true;
+
+    const char * token_str = parakeet_token_to_str(ctx, token_data->id);
+    char text_buf[256];
+    parakeet_token_to_text(token_str, is_first, text_buf, sizeof(text_buf));
+    printf("%s", text_buf);
+    fflush(stdout);
+
+    is_first = false;
+}
+
+
+int main(int argc, char ** argv) {
+    ggml_backend_load_all();
+
+    parakeet_params params;
+
+    if (parakeet_params_parse(argc, argv, params) == false) {
+        return 1;
+    }
+
+    if (params.fname_inp.empty()) {
+        fprintf(stderr, "error: no input files specified\n");
+        parakeet_print_usage(argc, argv, params);
+        return 1;
+    }
+
+    // Process each input file
+    for (const auto & fname : params.fname_inp) {
+        fprintf(stderr, "\nProcessing file: %s\n", fname.c_str());
+
+        std::vector<float> pcmf32;
+        std::vector<std::vector<float>> pcmf32s;
+        if (!read_audio_data(fname.c_str(), pcmf32, pcmf32s, false)) {
+            fprintf(stderr, "error: failed to read audio file '%s'\n", fname.c_str());
+            continue;
+        }
+
+        if (pcmf32.empty()) {
+            fprintf(stderr, "error: no audio data in file '%s'\n", fname.c_str());
+            continue;
+        }
+
+        fprintf(stderr, "Loading Parakeet model from: %s\n", params.model.c_str());
+
+        struct parakeet_context_params ctx_params = parakeet_context_default_params();
+        ctx_params.use_gpu     = params.use_gpu;
+        ctx_params.flash_attn  = params.flash_attn;
+        ctx_params.gpu_device  = params.gpu_device;
+
+        struct parakeet_context * pctx = parakeet_init_from_file_with_params(params.model.c_str(), ctx_params);
+        if (pctx == nullptr) {
+            fprintf(stderr, "error: failed to load Parakeet model from '%s'\n", params.model.c_str());
+            return 1;
+        }
+
+        fprintf(stderr, "Successfully loaded Parakeet model\n");
+        fprintf(stderr, "Processing audio (%zu samples, %.2f seconds)\n",
+                pcmf32.size(), (float)pcmf32.size() / PARAKEET_SAMPLE_RATE);
+
+        struct parakeet_full_params full_params = parakeet_full_default_params(PARAKEET_SAMPLING_GREEDY);
+        full_params.n_threads           = params.n_threads;
+        full_params.chunk_length_ms     = params.chunk_length_ms;
+        full_params.left_context_ms     = params.left_context_ms;
+        full_params.right_context_ms    = params.right_context_ms;
+        full_params.new_token_callback  = token_callback;
+        full_params.new_token_callback_user_data = nullptr;
+
+        const int mel_frames = (int)(pcmf32.size() / PARAKEET_HOP_LENGTH);
+        if (mel_frames <= parakeet_n_audio_ctx(pctx)) {
+            full_params.chunk_length_ms = 0;
+        }
+
+        int ret = parakeet_full(pctx, full_params, pcmf32.data(), pcmf32.size());
+
+        if (ret != 0) {
+            fprintf(stderr, "error: failed to process audio file '%s'\n", fname.c_str());
+            parakeet_free(pctx);
+            continue;
+        }
+
+        printf("\n");
+
+        if (params.print_segments) {
+            const int n_segments = parakeet_full_n_segments(pctx);
+            fprintf(stderr, "\nSegments (%d):\n", n_segments);
+
+            for (int i = 0; i < n_segments; i++) {
+                const char * text = parakeet_full_get_segment_text(pctx, i);
+                const int64_t t0 = parakeet_full_get_segment_t0(pctx, i);
+                const int64_t t1 = parakeet_full_get_segment_t1(pctx, i);
+                const int n_tokens = parakeet_full_n_tokens(pctx, i);
+
+                fprintf(stderr, "Segment %d: [%lld -> %lld] \"%s\"\n", i, (long long)t0, (long long)t1, text);
+                fprintf(stderr, "Tokens [%d]:\n", n_tokens);
+
+                for (int j = 0; j < n_tokens; j++) {
+                    parakeet_token_data token_data = parakeet_full_get_token_data(pctx, i, j);
+                    const char * token_str = parakeet_token_to_str(pctx, token_data.id);
+
+                    fprintf(stderr, "  [%2d] id=%5d frame=%3d dur_idx=%2d dur_val=%2d p=%.4f plog=%.4f t0=%4lld t1=%4lld word_start=%s \"%s\"\n",
+                           j,
+                           token_data.id,
+                           token_data.frame_index,
+                           token_data.duration_idx,
+                           token_data.duration_value,
+                           token_data.p,
+                           token_data.plog,
+                           (long long)token_data.t0,
+                           (long long)token_data.t1,
+                           token_data.is_word_start ? "true": "false",
+                           token_str);
+                }
+            }
+        }
+
+        parakeet_free(pctx);
+    }
+
+    return 0;
+}
--- a/include/parakeet.h
+++ b/include/parakeet.h
@ -0,0 +1,383 @@
+#ifndef PARAKEET_H
+#define PARAKEET_H
+
+#include "ggml.h"
+#include "ggml-cpu.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#ifdef __GNUC__
+#    define PARAKEET_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
+#elif defined(_MSC_VER)
+#    define PARAKEET_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
+#else
+#    define PARAKEET_DEPRECATED(func, hint) func
+#endif
+
+#ifdef PARAKEET_SHARED
+#    ifdef _WIN32
+#        ifdef PARAKEET_BUILD
+#            define PARAKEET_API __declspec(dllexport)
+#        else
+#            define PARAKEET_API __declspec(dllimport)
+#        endif
+#    else
+#        define PARAKEET_API __attribute__ ((visibility ("default")))
+#    endif
+#else
+#    define PARAKEET_API
+#endif
+
+#define PARAKEET_SAMPLE_RATE 16000
+#define PARAKEET_HOP_LENGTH  160
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    struct parakeet_context;
+    struct parakeet_state;
+    struct parakeet_full_params;
+
+    typedef int32_t parakeet_pos;
+    typedef int32_t parakeet_token;
+    typedef int32_t parakeet_seq_id;
+
+    struct parakeet_context_params {
+        bool  use_gpu;
+        bool  flash_attn;
+        int   gpu_device;  // CUDA device
+    };
+
+    typedef struct parakeet_token_data {
+        parakeet_token id;  // the BPE subword ID (0-8191)
+
+        int duration_idx;   // index into the models durations array
+        int duration_value; // actual duration value
+        int frame_index;
+
+        float p;
+        float plog;
+
+        int64_t t0;
+        int64_t t1;
+
+        bool is_word_start;
+    } parakeet_token_data;
+
+    typedef struct parakeet_model_loader {
+        void * context;
+
+        size_t (*read)(void * ctx, void * output, size_t read_size);
+        bool    (*eof)(void * ctx);
+        void  (*close)(void * ctx);
+    } parakeet_model_loader;
+
+    PARAKEET_API const char * parakeet_version(void);
+
+    // Various functions for loading a ggml parakeet model.
+    // Allocate (almost) all memory needed for the model.
+    // Return NULL on failure
+    PARAKEET_API struct parakeet_context * parakeet_init_from_file_with_params  (const char * path_model,              struct parakeet_context_params params);
+    PARAKEET_API struct parakeet_context * parakeet_init_from_buffer_with_params(void * buffer, size_t buffer_size,    struct parakeet_context_params params);
+    PARAKEET_API struct parakeet_context * parakeet_init_with_params            (struct parakeet_model_loader * loader, struct parakeet_context_params params);
+
+    // These are the same as the above, but the internal state of the context is not allocated automatically
+    // It is the responsibility of the caller to allocate the state using parakeet_init_state() (#523)
+    PARAKEET_API struct parakeet_context * parakeet_init_from_file_with_params_no_state  (const char * path_model,              struct parakeet_context_params params);
+    PARAKEET_API struct parakeet_context * parakeet_init_from_buffer_with_params_no_state(void * buffer, size_t buffer_size,    struct parakeet_context_params params);
+    PARAKEET_API struct parakeet_context * parakeet_init_with_params_no_state            (struct parakeet_model_loader * loader, struct parakeet_context_params params);
+
+    PARAKEET_API struct parakeet_state * parakeet_init_state(struct parakeet_context * ctx);
+
+    // Frees all allocated memory
+    PARAKEET_API void parakeet_free      (struct parakeet_context * ctx);
+    PARAKEET_API void parakeet_free_state(struct parakeet_state * state);
+    PARAKEET_API void parakeet_free_params(struct parakeet_full_params * params);
+    PARAKEET_API void parakeet_free_context_params(struct parakeet_context_params * params);
+
+    // Convert RAW PCM audio to log mel spectrogram.
+    // The resulting spectrogram is stored inside the default state of the provided parakeet context.
+    // Returns 0 on success
+    PARAKEET_API int parakeet_pcm_to_mel(
+            struct parakeet_context * ctx,
+                        const float * samples,
+                                int   n_samples,
+                                int   n_threads);
+
+    PARAKEET_API int parakeet_pcm_to_mel_with_state(
+            struct parakeet_context * ctx,
+              struct parakeet_state * state,
+                        const float * samples,
+                                int   n_samples,
+                                int   n_threads);
+
+    // This can be used to set a custom log mel spectrogram inside the default state of the provided parakeet context.
+    // Use this instead of parakeet_pcm_to_mel() if you want to provide your own log mel spectrogram.
+    // n_mel must be 80
+    // Returns 0 on success
+    PARAKEET_API int parakeet_set_mel(
+            struct parakeet_context * ctx,
+                        const float * data,
+                                int   n_len,
+                                int   n_mel);
+
+    PARAKEET_API int parakeet_set_mel_with_state(
+            struct parakeet_context * ctx,
+              struct parakeet_state * state,
+                        const float * data,
+                                int   n_len,
+                                int   n_mel);
+
+    // Run the Parakeet encoder on the log mel spectrogram stored inside the default state in the provided parakeet context.
+    // Make sure to call parakeet_pcm_to_mel() or parakeet_set_mel() first.
+    // offset can be used to specify the offset of the first frame in the spectrogram.
+    // Returns 0 on success
+    PARAKEET_API int parakeet_encode(
+            struct parakeet_context * ctx,
+                                int   offset,
+                                int   n_threads);
+
+    PARAKEET_API int parakeet_encode_with_state(
+            struct parakeet_context * ctx,
+              struct parakeet_state * state,
+                                int   offset,
+                                int   n_threads);
+
+    // Convert the provided text into tokens.
+    // The tokens pointer must be large enough to hold the resulting tokens.
+    // Returns the number of tokens on success, no more than n_max_tokens
+    // Returns a negative number on failure - the number of tokens that would have been returned
+    // TODO: not sure if correct
+    PARAKEET_API int parakeet_tokenize(
+            struct parakeet_context * ctx,
+                        const char * text,
+                     parakeet_token * tokens,
+                               int   n_max_tokens);
+
+    // Return the number of tokens in the provided text
+    // Equivalent to: -parakeet_tokenize(ctx, text, NULL, 0)
+    int parakeet_token_count(struct parakeet_context * ctx, const char * text);
+
+    PARAKEET_API int parakeet_n_len           (struct parakeet_context * ctx); // mel length
+    PARAKEET_API int parakeet_n_len_from_state(struct parakeet_state * state); // mel length
+    PARAKEET_API int parakeet_n_vocab         (struct parakeet_context * ctx);
+    PARAKEET_API int parakeet_n_audio_ctx     (struct parakeet_context * ctx);
+
+    PARAKEET_API int parakeet_model_n_vocab      (struct parakeet_context * ctx);
+    PARAKEET_API int parakeet_model_n_audio_ctx  (struct parakeet_context * ctx);
+    PARAKEET_API int parakeet_model_n_audio_state(struct parakeet_context * ctx);
+    PARAKEET_API int parakeet_model_n_audio_head (struct parakeet_context * ctx);
+    PARAKEET_API int parakeet_model_n_audio_layer(struct parakeet_context * ctx);
+    PARAKEET_API int parakeet_model_n_mels       (struct parakeet_context * ctx);
+    PARAKEET_API int parakeet_model_ftype        (struct parakeet_context * ctx);
+
+    // Token logits obtained from the last call to parakeet_full/parakeet_chunk
+    // The logits for the last token are stored in the last row
+    // Rows: n_tokens
+    // Cols: n_vocab
+    PARAKEET_API float * parakeet_get_logits           (struct parakeet_context * ctx);
+    PARAKEET_API float * parakeet_get_logits_from_state(struct parakeet_state * state);
+
+    // Token Id -> String. Uses the vocabulary in the provided context
+    PARAKEET_API const char * parakeet_token_to_str(struct parakeet_context * ctx, parakeet_token token);
+
+    PARAKEET_API int parakeet_token_to_text(const char * token_str, bool is_first, char * output, int max_len);
+
+    // Special tokens
+    PARAKEET_API parakeet_token parakeet_token_blank (struct parakeet_context * ctx);
+    PARAKEET_API parakeet_token parakeet_token_unk (struct parakeet_context * ctx);
+    PARAKEET_API parakeet_token parakeet_token_bos(struct parakeet_context * ctx);
+
+    // Performance information from the default state.
+    struct parakeet_timings {
+        float sample_ms;
+        float encode_ms;
+        float decode_ms;
+        float batchd_ms;
+        float prompt_ms;
+    };
+    PARAKEET_API struct parakeet_timings * parakeet_get_timings(struct parakeet_context * ctx);
+    PARAKEET_API void parakeet_print_timings(struct parakeet_context * ctx);
+    PARAKEET_API void parakeet_reset_timings(struct parakeet_context * ctx);
+
+    // Print system information
+    PARAKEET_API const char * parakeet_print_system_info(void);
+
+    // Available sampling strategies
+    enum parakeet_sampling_strategy {
+        PARAKEET_SAMPLING_GREEDY,
+    };
+
+    // Token callback.
+    // Called for each new predicted token.
+    // Use the parakeet_full_...() functions to obtain the text segments
+    typedef void (*parakeet_new_token_callback)(
+            struct parakeet_context * ctx,
+              struct parakeet_state * state,
+          const parakeet_token_data * token_data,
+                              void * user_data);
+
+    // Text segment callback
+    // Called on every newly generated text segment
+    // Use the parakeet_full_...() functions to obtain the text segments
+    typedef void (*parakeet_new_segment_callback)(struct parakeet_context * ctx, struct parakeet_state * state, int n_new, void * user_data);
+
+    // Progress callback
+    typedef void (*parakeet_progress_callback)(struct parakeet_context * ctx, struct parakeet_state * state, int progress, void * user_data);
+
+    // Encoder begin callback
+    // If not NULL, called before the encoder starts
+    // If it returns false, the computation is aborted
+    typedef bool (*parakeet_encoder_begin_callback)(struct parakeet_context * ctx, struct parakeet_state * state, void * user_data);
+
+    // Parameters for the parakeet_full() function
+    // If you change the order or add new parameters, make sure to update the default values in parakeet.cpp:
+    // parakeet_full_default_params()
+    struct parakeet_full_params {
+        enum parakeet_sampling_strategy strategy;
+
+        int n_threads;
+        int offset_ms;          // start offset in ms
+        int duration_ms;        // audio duration to process in ms
+
+        bool no_context;        // do not use past transcription (if any) as context
+
+        // [EXPERIMENTAL] speed-up techniques
+        int  audio_ctx;         // overwrite the audio context size (0 = use default)
+
+        int  chunk_length_ms;  // length of each chunk in ms
+        int  left_context_ms;  // left context in ms
+        int  right_context_ms; // right context in ms
+
+        // called for every newly generated text segment
+        parakeet_new_segment_callback new_segment_callback;
+        void * new_segment_callback_user_data;
+
+        // called for every newly generated token
+        parakeet_new_token_callback new_token_callback;
+        void * new_token_callback_user_data;
+
+        // called on each progress update
+        parakeet_progress_callback progress_callback;
+        void * progress_callback_user_data;
+
+        // called each time before the encoder starts
+        parakeet_encoder_begin_callback encoder_begin_callback;
+        void * encoder_begin_callback_user_data;
+
+        // called each time before ggml computation starts
+        ggml_abort_callback abort_callback;
+        void * abort_callback_user_data;
+    };
+
+    // NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see parakeet_free_context_params() & parakeet_free_params()
+    PARAKEET_API struct parakeet_context_params * parakeet_context_default_params_by_ref(void);
+    PARAKEET_API struct parakeet_context_params   parakeet_context_default_params       (void);
+
+    PARAKEET_API struct parakeet_full_params * parakeet_full_default_params_by_ref(enum parakeet_sampling_strategy strategy);
+    PARAKEET_API struct parakeet_full_params   parakeet_full_default_params       (enum parakeet_sampling_strategy strategy);
+
+    // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
+    // Not thread safe for same context
+    PARAKEET_API int parakeet_full(
+                struct parakeet_context * ctx,
+            struct parakeet_full_params   params,
+                            const float * samples,
+                                    int   n_samples);
+
+    PARAKEET_API int parakeet_full_with_state(
+                struct parakeet_context * ctx,
+                  struct parakeet_state * state,
+            struct parakeet_full_params   params,
+                            const float * samples,
+                                    int   n_samples);
+
+    // Split the input audio in chunks and process each chunk separately using parakeet_full_with_state()
+    // Result is stored in the default state of the context
+    // Not thread safe if executed in parallel on the same context.
+    PARAKEET_API int parakeet_full_parallel(
+                struct parakeet_context * ctx,
+            struct parakeet_full_params   params,
+                            const float * samples,
+                                    int   n_samples,
+                                    int   n_processors);
+
+    // Process a single chunk of audio data that fits within the model's audio context window.
+    // This is more efficient than parakeet_full() for short audio clips.
+    PARAKEET_API int parakeet_chunk(
+                struct parakeet_context * ctx,
+                  struct parakeet_state * state,
+            struct parakeet_full_params   params,
+                            const float * samples,
+                                   int    n_samples);
+
+    // Initialize streaming state for a new stream.
+    PARAKEET_API int parakeet_stream_init(
+                struct parakeet_context * ctx,
+                  struct parakeet_state * state,
+            struct parakeet_full_params   params);
+
+    // Push audio samples in streaming mode. Internally this function will structure
+    // the samples in a buffer where with a left context, a center chunk, and a
+    // right context. The encoder will see the complete buffer which enables it
+    // to get boundry context for the target/center audio chunk. This avoids hard
+    // cut offs at the chunk boundaries. The joint network then only sees the
+    // center chunk and this function internally handles the context windowing.
+    PARAKEET_API int parakeet_stream_push(
+                struct parakeet_context * ctx,
+                  struct parakeet_state * state,
+                            const float * samples,
+                                    int   n_samples);
+
+    // Flush the final partial chunk at end-of-stream.
+    PARAKEET_API int parakeet_stream_flush(
+                struct parakeet_context * ctx,
+                  struct parakeet_state * state);
+
+    // Number of generated text segments
+    PARAKEET_API int parakeet_full_n_segments           (struct parakeet_context * ctx);
+    PARAKEET_API int parakeet_full_n_segments_from_state(struct parakeet_state * state);
+
+    // Get the start and end time of the specified segment
+    PARAKEET_API int64_t parakeet_full_get_segment_t0           (struct parakeet_context * ctx, int i_segment);
+    PARAKEET_API int64_t parakeet_full_get_segment_t0_from_state(struct parakeet_state * state, int i_segment);
+
+    PARAKEET_API int64_t parakeet_full_get_segment_t1           (struct parakeet_context * ctx, int i_segment);
+    PARAKEET_API int64_t parakeet_full_get_segment_t1_from_state(struct parakeet_state * state, int i_segment);
+
+    // Get the text of the specified segment
+    PARAKEET_API const char * parakeet_full_get_segment_text           (struct parakeet_context * ctx, int i_segment);
+    PARAKEET_API const char * parakeet_full_get_segment_text_from_state(struct parakeet_state * state, int i_segment);
+
+    // Get number of tokens in the specified segment
+    PARAKEET_API int parakeet_full_n_tokens           (struct parakeet_context * ctx, int i_segment);
+    PARAKEET_API int parakeet_full_n_tokens_from_state(struct parakeet_state * state, int i_segment);
+
+    // Get the token text of the specified token in the specified segment
+    PARAKEET_API const char * parakeet_full_get_token_text           (struct parakeet_context * ctx, int i_segment, int i_token);
+    PARAKEET_API const char * parakeet_full_get_token_text_from_state(struct parakeet_context * ctx, struct parakeet_state * state, int i_segment, int i_token);
+
+    // Get the token id of the specified token in the specified segment
+    PARAKEET_API parakeet_token parakeet_full_get_token_id           (struct parakeet_context * ctx, int i_segment, int i_token);
+    PARAKEET_API parakeet_token parakeet_full_get_token_id_from_state(struct parakeet_state * state, int i_segment, int i_token);
+
+    // Get token data for the specified token in the specified segment
+    PARAKEET_API parakeet_token_data parakeet_full_get_token_data           (struct parakeet_context * ctx, int i_segment, int i_token);
+    PARAKEET_API parakeet_token_data parakeet_full_get_token_data_from_state(struct parakeet_state * state, int i_segment, int i_token);
+
+    // Get the probability of the specified token in the specified segment
+    PARAKEET_API float parakeet_full_get_token_p           (struct parakeet_context * ctx, int i_segment, int i_token);
+    PARAKEET_API float parakeet_full_get_token_p_from_state(struct parakeet_state * state, int i_segment, int i_token);
+
+    // Control logging output; default behavior is to print to stderr
+
+    PARAKEET_API void parakeet_log_set(ggml_log_callback log_callback, void * user_data);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/models/convert-parakeet-to-ggml.py
+++ b/models/convert-parakeet-to-ggml.py
@ -0,0 +1,331 @@
+#!/usr/bin/env python3
+# Convert Parakeet TDT model from NeMo format to ggml format
+#
+# Usage: python convert-parakeet-to-ggml.py --model parakeet-model.nemo --output-dir output-dir [--use-f32]
+#
+# The NeMo file is a tar archive containing:
+#   - model_weights.ckpt (PyTorch checkpoint)
+#   - model_config.yaml (model configuration)
+#   - tokenizer files
+#
+# This script extracts the NeMo archive, loads the model weights and configuration,
+# and saves them in ggml format compatible with whisper.cpp.
+#
+
+import torch
+import argparse
+import io
+import os
+import sys
+import struct
+import tarfile
+import tempfile
+import shutil
+import yaml
+import numpy as np
+from pathlib import Path
+from typing import Optional
+
+def hz_to_mel(freq):
+    return 2595.0 * np.log10(1.0 + freq / 700.0)
+
+def mel_to_hz(mel):
+    return 700.0 * (10.0**(mel / 2595.0) - 1.0)
+
+def create_relative_positional_encoding(d_model: int, n_pos_max_len: int) -> np.ndarray:
+    max_len = n_pos_max_len * 2 - 1
+    log_10000 = np.log(10000.0)
+    pe = np.zeros((max_len, d_model), dtype=np.float32)
+
+    for idx in range(max_len):
+        position = float((max_len // 2) - idx)
+
+        for i in range(0, d_model, 2):
+            div_term = np.exp(-float(i) * log_10000 / float(d_model))
+            angle = position * div_term
+
+            pe[idx, i] = np.sin(angle)
+            pe[idx, i + 1] = np.cos(angle)
+
+    return pe
+
+def extract_nemo_archive(nemo_path, extract_dir):
+    print(f"Extracting {nemo_path} to {extract_dir}")
+    with tarfile.open(nemo_path, 'r') as tar:
+        tar.extractall(path=extract_dir)
+    print("Extraction complete")
+
+def load_model_config(config_path):
+    with open(config_path, 'r') as f:
+        config = yaml.safe_load(f)
+    return config
+
+def load_tokenizer(extract_dir, config):
+    tokenizer_model_path = None
+    tokenizer_vocab_path = None
+
+    for file in os.listdir(extract_dir):
+        if file.endswith('_tokenizer.model'):
+            tokenizer_model_path = os.path.join(extract_dir, file)
+        elif file.endswith('tokenizer.vocab'):
+            tokenizer_vocab_path = os.path.join(extract_dir, file)
+
+    if not tokenizer_model_path:
+        raise FileNotFoundError("Tokenizer model file not found")
+
+    if not tokenizer_vocab_path:
+        raise FileNotFoundError("Tokenizer vocab file not found")
+
+    tokens = {}
+    with open(tokenizer_vocab_path, 'r', encoding='utf-8') as f:
+        for idx, line in enumerate(f):
+            parts = line.strip().split('\t')
+            if len(parts) >= 1:
+                token = parts[0]
+                tokens[token.encode('utf-8')] = idx
+
+    print(f"Loaded {len(tokens)} tokens from {os.path.basename(tokenizer_vocab_path)}")
+
+    if len(tokens) != 8192:
+        print(f"WARNING: Expected 8192 tokens, got {len(tokens)}")
+
+    return tokens
+
+def write_tensor(fout, name, data, use_f16=True, force_f32=False):
+    if 'pre_encode.conv' in name and 'bias' in name and len(data.shape) == 1:
+        data = data.reshape(1, -1, 1, 1)
+        print(f"  Reshaped conv bias {name} to {data.shape}")
+
+    n_dims = len(data.shape)
+
+    ftype = 1 if use_f16 and not force_f32 else 0
+    if force_f32:
+        data = data.astype(np.float32)
+    elif use_f16:
+        if n_dims < 2 or 'bias' in name or 'norm' in name:
+            data = data.astype(np.float32)
+            ftype = 0
+        else:
+            data = data.astype(np.float16)
+    else:
+        data = data.astype(np.float32)
+
+    dims_reversed = [data.shape[n_dims - 1 - i] for i in range(n_dims)]
+    print(f"Processing: {name} {list(data.shape)}, dtype: {data.dtype}, n_dims: {n_dims}, reversed: {dims_reversed}")
+    name_bytes = name.encode('utf-8')
+    fout.write(struct.pack("iii", n_dims, len(name_bytes), ftype))
+    for i in range(n_dims):
+        fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
+    fout.write(name_bytes)
+
+    data.tofile(fout)
+
+def convert_parakeet_to_ggml(nemo_path, output_dir, use_f16=True, out_name=None):
+    nemo_path = Path(nemo_path)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Create temporary directory for extraction
+    with tempfile.TemporaryDirectory() as temp_dir:
+        extract_nemo_archive(nemo_path, temp_dir)
+
+        config_path = os.path.join(temp_dir, 'model_config.yaml')
+        config = load_model_config(config_path)
+
+        print("Model configuration:")
+        print(f"  Sample rate: {config['sample_rate']}")
+        print(f"  Encoder layers: {config['encoder']['n_layers']}")
+        print(f"  Encoder d_model: {config['encoder']['d_model']}")
+        print(f"  Mel features: {config['preprocessor']['features']}")
+
+        weights_path = os.path.join(temp_dir, 'model_weights.ckpt')
+        print(f"\nLoading model weights from {weights_path}")
+        checkpoint = torch.load(weights_path, map_location='cpu')
+
+        # Extract state dict
+        if 'state_dict' in checkpoint:
+            state_dict = checkpoint['state_dict']
+        else:
+            state_dict = checkpoint
+
+        print(f"Loaded {len(state_dict)} tensors")
+
+        # Load tokenizer
+        print("\nLoading tokenizer...")
+        tokens = load_tokenizer(temp_dir, config)
+        print(f"Loaded {len(tokens)} tokens")
+
+        # Prepare hyperparameters for the Parakeet ggml format.
+        hparams = {
+            'n_audio_ctx': 5000,
+            'n_audio_state': config['encoder']['d_model'],
+            'n_audio_head': config['encoder']['n_heads'],
+            'n_audio_layer': config['encoder']['n_layers'],
+            'n_mels': config['preprocessor']['features'],
+            'n_fft': config['preprocessor']['n_fft'],
+            'subsampling_factor': config['encoder']['subsampling_factor'],
+            'n_subsampling_channels': config['encoder']['subsampling_conv_channels'],
+            'n_pos_max_len': config['encoder']['pos_emb_max_len'],
+
+            'n_pred_dim': config['decoder']['prednet']['pred_hidden'],
+            'n_pred_layers': config['decoder']['prednet']['pred_rnn_layers'],
+            'n_vocab': config['decoder']['vocab_size'],
+            'n_tdt_durations': config['model_defaults']['num_tdt_durations'],
+            'n_max_tokens': config['decoding']['greedy']['max_symbols'],
+        }
+
+        print("\nGGML hyperparameters:")
+        for key, value in hparams.items():
+            print(f"  {key}: {value}")
+
+        pe = create_relative_positional_encoding(hparams['n_audio_state'], hparams['n_pos_max_len'])
+        print(f"\nGenerated positional encoding tensor 'encoder.pe' with shape {pe.shape}")
+
+        # Create output file
+        if out_name:
+            fname_out = output_dir / out_name
+        else:
+            fname_out = output_dir / ("ggml-model-f32.bin" if not use_f16 else "ggml-model.bin")
+        print(f"\nWriting to {fname_out}")
+
+        with open(fname_out, 'wb') as fout:
+            # Write magic number
+            fout.write(struct.pack("i", 0x67676d6c))  # 'ggml' in hex
+
+            # Write hyperparameters
+            fout.write(struct.pack("i", hparams['n_vocab']))
+            fout.write(struct.pack("i", hparams['n_audio_ctx']))
+            fout.write(struct.pack("i", hparams['n_audio_state']))
+            fout.write(struct.pack("i", hparams['n_audio_head']))
+            fout.write(struct.pack("i", hparams['n_audio_layer']))
+            fout.write(struct.pack("i", hparams['n_mels']))
+            fout.write(struct.pack("i", 1 if use_f16 else 0))
+            fout.write(struct.pack("i", hparams['n_fft']))
+            fout.write(struct.pack("i", hparams['subsampling_factor']))
+            fout.write(struct.pack("i", hparams['n_subsampling_channels']))
+            fout.write(struct.pack("i", hparams['n_pos_max_len']))
+            fout.write(struct.pack("i", hparams['n_pred_dim']))
+            fout.write(struct.pack("i", hparams['n_pred_layers']))
+            fout.write(struct.pack("i", hparams['n_tdt_durations']))
+            fout.write(struct.pack("i", hparams['n_max_tokens']))
+
+            # Extract mel filterbank from model
+            fb_key = None
+            for key in state_dict.keys():
+                if 'featurizer.fb' in key or 'filterbank' in key.lower():
+                    fb_key = key
+                    break
+
+            if not fb_key:
+                print("\nERROR: Mel filterbank not found in model!")
+                print("Expected tensor with 'featurizer.fb' or 'filterbank' in name")
+                print("\nAvailable preprocessor tensors:")
+                for key in sorted(state_dict.keys()):
+                    if 'preprocessor' in key or 'featurizer' in key:
+                        print(f"  {key}: {state_dict[key].shape}")
+                raise ValueError("Mel filterbank tensor not found in model")
+
+            print(f"\nUsing model's mel filterbank from: {fb_key}")
+            mel_filters = state_dict[fb_key].squeeze().numpy().astype(np.float32)
+            print(f"  Filterbank shape: {mel_filters.shape}")
+            print(f"  Filterbank min/max values: {mel_filters.min():.6f} / {mel_filters.max():.6f}")
+            print(f"  Filterbank non-zero elements: {np.count_nonzero(mel_filters)} / {mel_filters.size}")
+            print(f"  First row sum: {mel_filters[0].sum():.6f}")
+
+            if len(mel_filters.shape) != 2:
+                raise ValueError(f"Expected 2D filterbank, got shape {mel_filters.shape}")
+
+            n_mels, n_freqs = mel_filters.shape
+            fout.write(struct.pack("i", n_mels))      # n_mel
+            fout.write(struct.pack("i", n_freqs))     # n_fb (frequency bins)
+
+            # Write mel filterbank
+            for i in range(n_mels):
+                for j in range(n_freqs):
+                    fout.write(struct.pack("f", mel_filters[i, j]))
+
+            # Extract window function from model
+            window_key = None
+            for key in state_dict.keys():
+                if 'featurizer.window' in key or 'preproc' in key and 'window' in key:
+                    window_key = key
+                    break
+
+            if not window_key:
+                print("\nERROR: Window function not found in model!")
+                print("Expected tensor with 'featurizer.window' in name")
+                raise ValueError("Window function tensor not found in model")
+
+            print(f"\nUsing model's window function from: {window_key}")
+            window = state_dict[window_key].squeeze().numpy().astype(np.float32)
+            print(f"  Window shape: {window.shape}")
+            print(f"  Window min/max values: {window.min():.6f} / {window.max():.6f}")
+            print(f"  Window non-zero elements: {np.count_nonzero(window)} / {window.size}")
+            print(f"  Window sum: {window.sum():.6f}")
+
+            if len(window.shape) != 1:
+                raise ValueError(f"Expected 1D window, got shape {window.shape}")
+
+            n_window = window.shape[0]
+            fout.write(struct.pack("i", n_window))
+
+            # Write window function
+            for i in range(n_window):
+                fout.write(struct.pack("f", window[i]))
+
+            # Write TDT durations
+            tdt_durations = config['model_defaults']['tdt_durations']
+            if len(tdt_durations) != hparams['n_tdt_durations']:
+                raise ValueError(f"TDT durations count mismatch: {len(tdt_durations)} vs {hparams['n_tdt_durations']}")
+
+            for duration in tdt_durations:
+                fout.write(struct.pack("I", duration))
+
+            fout.write(struct.pack("i", len(tokens)))
+            for token_bytes, idx in sorted(tokens.items(), key=lambda x: x[1]):
+                fout.write(struct.pack("i", len(token_bytes)))
+                fout.write(token_bytes)
+
+            print("\nConverting model weights...")
+            for name, tensor in state_dict.items():
+                # Skip the filterbank and window - already written in preprocessing section
+                if name == fb_key:
+                    continue
+                if name == window_key:
+                    continue
+
+                # Don't squeeze Conv2d weights - they need to preserve all 4 dimensions
+                if 'conv' in name and 'weight' in name and len(tensor.shape) == 4:
+                    data = tensor.numpy()
+                else:
+                    data = tensor.squeeze().numpy()
+
+                write_tensor(fout, name, data, use_f16=use_f16)
+
+            write_tensor(fout, "encoder.pe", pe, use_f16=use_f16, force_f32=True)
+
+        print(f"\nConversion complete!")
+        print(f"Output file: {fname_out}")
+        print(f"File size: {fname_out.stat().st_size / (1024**2):.2f} MB")
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Convert Parakeet TDT model from NeMo format to ggml format'
+    )
+    parser.add_argument('--model', type=str, required=True,
+                        help='Path to Parakeet .nemo model file')
+    parser.add_argument('--out-dir', type=str, required=True,
+                        help='Directory to write ggml model file')
+    parser.add_argument('--use-f32', action='store_true', default=False,
+                        help='Use f32 instead of f16 (default: f16)')
+    parser.add_argument('--out-name', type=str, default=None,
+                        help='Output file name (default: ggml-model.bin or ggml-model-f32.bin)')
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.model):
+        print(f"Error: {args.model} not found")
+        sys.exit(1)
+
+    use_f16 = not args.use_f32
+    convert_parakeet_to_ggml(args.model, args.out_dir, use_f16, args.out_name)
--- a/models/requirements-parakeet.txt
+++ b/models/requirements-parakeet.txt
@ -0,0 +1 @@
+pyyaml
--- a/scripts/upload-parakeet.py
+++ b/scripts/upload-parakeet.py
@ -0,0 +1,75 @@
+import os
+from huggingface_hub import HfApi, create_repo
+
+# TODO: change to ggml-org once merged.
+USER_NAME = "danbev"
+REPO_ID = f"{USER_NAME}/parakeet"
+LOCAL_GGUF_PATH = "models/ggml-parakeet-tdt-0.6b-v3.bin"
+REMOTE_GGUF_NAME = "parakeet-tdt-0.6b-v3.bin"
+
+MODEL_CARD_CONTENT = f"""---
+license: apache-2.0
+base_model: {USER_NAME}/parakeet
+tags:
+- gguf
+---
+
+# Parakeet Model Card
+
+## Description
+This is an iterative release of the Parakeet model in whisper.cpp format.
+
+## Usage
+You can use this file with [parakeet-cli](https://github.com/danbev/whisper.cpp/tree/parakeet-support/examples/parakeet-cli).
+
+Build parakeet-cli:
+```console
+$ git clone -b parakeet-support https://github.com/danbev/whisper.cpp.git
+$ cd whisper.cpp
+$ cmake -B build -S .
+$ cmake --build build --target parakeet-cli -j 12
+```
+
+Download the model:
+```console
+$ hf download danbev/parakeet parakeet-tdt-0.6b-v3.bin --local-dir models
+```
+
+Run:
+```console
+$ ./build/bin/parakeet-cli -m models/parakeet-tdt-0.6b-v3.bin -f samples/jfk.wav
+```
+
+"""
+
+api = HfApi()
+
+def deploy_iteration():
+    create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True)
+
+    print("Updating Model Card...")
+    api.upload_file(
+        path_or_fileobj=MODEL_CARD_CONTENT.encode(),
+        path_in_repo="README.md",
+        repo_id=REPO_ID,
+        repo_type="model",
+        commit_message="Update README.md"
+    )
+
+    print(f"Uploading {REMOTE_GGUF_NAME}...")
+    api.upload_file(
+        path_or_fileobj=LOCAL_GGUF_PATH,
+        path_in_repo=REMOTE_GGUF_NAME,
+        repo_id=REPO_ID,
+        repo_type="model",
+        commit_message="Upload new parakeet iteration"
+    )
+
+    print(f"\nDeployment successful!")
+    print(f"URL: https://huggingface.co/{REPO_ID}")
+
+if __name__ == "__main__":
+    if os.path.exists(LOCAL_GGUF_PATH):
+        deploy_iteration()
+    else:
+        print(f"Error: {LOCAL_GGUF_PATH} not found.")
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -109,23 +109,43 @@ add_library(whisper
            whisper.cpp
            )

+add_library(parakeet
+            ../include/parakeet.h
+            parakeet-arch.h
+            parakeet.cpp
+            )
+
+target_include_directories(parakeet PUBLIC . ../include)
+target_compile_features   (parakeet PUBLIC cxx_std_11)
+target_link_libraries(parakeet PUBLIC ggml Threads::Threads)
+
 # Set the version numbers
 set_target_properties(whisper PROPERTIES
    VERSION ${PROJECT_VERSION}
    SOVERSION ${SOVERSION}
 )

+set_target_properties(parakeet PROPERTIES
+    VERSION ${PROJECT_VERSION}
+    SOVERSION ${SOVERSION}
+)
+
 target_include_directories(whisper PUBLIC . ../include)
 target_compile_features   (whisper PUBLIC cxx_std_11) # don't bump

 if (CMAKE_CXX_BYTE_ORDER STREQUAL "BIG_ENDIAN")
    set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_BIG_ENDIAN)
+    set(PARAKEET_EXTRA_FLAGS ${PARAKEET_EXTRA_FLAGS} -DPARAKEET_BIG_ENDIAN)
 endif()

 if (WHISPER_EXTRA_FLAGS)
    target_compile_options(whisper PRIVATE ${WHISPER_EXTRA_FLAGS})
 endif()

+if (PARAKEET_EXTRA_FLAGS)
+    target_compile_options(parakeet PRIVATE ${PARAKEET_EXTRA_FLAGS})
+endif()
+
 find_package(Threads REQUIRED)
 target_link_libraries(whisper PUBLIC ggml Threads::Threads)

@ -144,4 +164,7 @@ endif()
 if (BUILD_SHARED_LIBS)
    set_target_properties(whisper PROPERTIES POSITION_INDEPENDENT_CODE ON)
    target_compile_definitions(whisper PRIVATE WHISPER_SHARED WHISPER_BUILD)
+
+    set_target_properties(parakeet PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_compile_definitions(parakeet PRIVATE PARAKEET_SHARED PARAKEET_BUILD)
 endif()
--- a/src/parakeet-arch.h
+++ b/src/parakeet-arch.h
@ -0,0 +1,194 @@
+#pragma once
+
+#include "ggml.h"
+
+#include <map>
+
+enum parakeet_tensor {
+    // Encoder pre_encode
+    PARAKEET_TENSOR_ENC_PRE_OUT_WEIGHT,
+    PARAKEET_TENSOR_ENC_PRE_OUT_BIAS,
+    PARAKEET_TENSOR_ENC_PRE_CONV_0_WEIGHT,
+    PARAKEET_TENSOR_ENC_PRE_CONV_0_BIAS,
+    PARAKEET_TENSOR_ENC_PRE_CONV_2_WEIGHT,
+    PARAKEET_TENSOR_ENC_PRE_CONV_2_BIAS,
+    PARAKEET_TENSOR_ENC_PRE_CONV_3_WEIGHT,
+    PARAKEET_TENSOR_ENC_PRE_CONV_3_BIAS,
+    PARAKEET_TENSOR_ENC_PRE_CONV_5_WEIGHT,
+    PARAKEET_TENSOR_ENC_PRE_CONV_5_BIAS,
+    PARAKEET_TENSOR_ENC_PRE_CONV_6_WEIGHT,
+    PARAKEET_TENSOR_ENC_PRE_CONV_6_BIAS,
+    PARAKEET_TENSOR_ENC_PE,
+
+    // Encoder layers (per-layer)
+    PARAKEET_TENSOR_ENC_NORM_FF1_WEIGHT,
+    PARAKEET_TENSOR_ENC_NORM_FF1_BIAS,
+    PARAKEET_TENSOR_ENC_FF1_LINEAR1_WEIGHT,
+    PARAKEET_TENSOR_ENC_FF1_LINEAR2_WEIGHT,
+    PARAKEET_TENSOR_ENC_NORM_CONV_WEIGHT,
+    PARAKEET_TENSOR_ENC_NORM_CONV_BIAS,
+    PARAKEET_TENSOR_ENC_CONV_PW1_WEIGHT,
+    PARAKEET_TENSOR_ENC_CONV_DW_WEIGHT,
+    PARAKEET_TENSOR_ENC_CONV_BN_WEIGHT,
+    PARAKEET_TENSOR_ENC_CONV_BN_BIAS,
+    PARAKEET_TENSOR_ENC_CONV_BN_MEAN,
+    PARAKEET_TENSOR_ENC_CONV_BN_VAR,
+    PARAKEET_TENSOR_ENC_CONV_BN_NUM_BATCHES,
+    PARAKEET_TENSOR_ENC_CONV_PW2_WEIGHT,
+    PARAKEET_TENSOR_ENC_NORM_ATTN_WEIGHT,
+    PARAKEET_TENSOR_ENC_NORM_ATTN_BIAS,
+    PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_U,
+    PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_V,
+    PARAKEET_TENSOR_ENC_ATTN_Q_WEIGHT,
+    PARAKEET_TENSOR_ENC_ATTN_K_WEIGHT,
+    PARAKEET_TENSOR_ENC_ATTN_V_WEIGHT,
+    PARAKEET_TENSOR_ENC_ATTN_OUT_WEIGHT,
+    PARAKEET_TENSOR_ENC_ATTN_POS_WEIGHT,
+    PARAKEET_TENSOR_ENC_NORM_FF2_WEIGHT,
+    PARAKEET_TENSOR_ENC_NORM_FF2_BIAS,
+    PARAKEET_TENSOR_ENC_FF2_LINEAR1_WEIGHT,
+    PARAKEET_TENSOR_ENC_FF2_LINEAR2_WEIGHT,
+    PARAKEET_TENSOR_ENC_NORM_OUT_WEIGHT,
+    PARAKEET_TENSOR_ENC_NORM_OUT_BIAS,
+
+    // Prediction network
+    PARAKEET_TENSOR_PRED_EMBED_WEIGHT,
+    PARAKEET_TENSOR_PRED_LSTM_WEIGHT_IH,
+    PARAKEET_TENSOR_PRED_LSTM_WEIGHT_HH,
+    PARAKEET_TENSOR_PRED_LSTM_BIAS_IH,
+    PARAKEET_TENSOR_PRED_LSTM_BIAS_HH,
+
+    // Joint network
+    PARAKEET_TENSOR_JOINT_PRED_WEIGHT,
+    PARAKEET_TENSOR_JOINT_PRED_BIAS,
+    PARAKEET_TENSOR_JOINT_ENC_WEIGHT,
+    PARAKEET_TENSOR_JOINT_ENC_BIAS,
+    PARAKEET_TENSOR_JOINT_NET_WEIGHT,
+    PARAKEET_TENSOR_JOINT_NET_BIAS,
+};
+
+static const std::map<parakeet_tensor, const char *> PARAKEET_TENSOR_NAMES = {
+    // Encoder pre_encode
+    {PARAKEET_TENSOR_ENC_PRE_OUT_WEIGHT,          "encoder.pre_encode.out.weight"},
+    {PARAKEET_TENSOR_ENC_PRE_OUT_BIAS,            "encoder.pre_encode.out.bias"},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_0_WEIGHT,       "encoder.pre_encode.conv.0.weight"},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_0_BIAS,         "encoder.pre_encode.conv.0.bias"},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_2_WEIGHT,       "encoder.pre_encode.conv.2.weight"},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_2_BIAS,         "encoder.pre_encode.conv.2.bias"},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_3_WEIGHT,       "encoder.pre_encode.conv.3.weight"},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_3_BIAS,         "encoder.pre_encode.conv.3.bias"},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_5_WEIGHT,       "encoder.pre_encode.conv.5.weight"},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_5_BIAS,         "encoder.pre_encode.conv.5.bias"},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_6_WEIGHT,       "encoder.pre_encode.conv.6.weight"},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_6_BIAS,         "encoder.pre_encode.conv.6.bias"},
+    {PARAKEET_TENSOR_ENC_PE,                      "encoder.pe"},
+
+    // Encoder layers (use %d for layer number)
+    {PARAKEET_TENSOR_ENC_NORM_FF1_WEIGHT,         "encoder.layers.%d.norm_feed_forward1.weight"},
+    {PARAKEET_TENSOR_ENC_NORM_FF1_BIAS,           "encoder.layers.%d.norm_feed_forward1.bias"},
+    {PARAKEET_TENSOR_ENC_FF1_LINEAR1_WEIGHT,      "encoder.layers.%d.feed_forward1.linear1.weight"},
+    {PARAKEET_TENSOR_ENC_FF1_LINEAR2_WEIGHT,      "encoder.layers.%d.feed_forward1.linear2.weight"},
+    {PARAKEET_TENSOR_ENC_NORM_CONV_WEIGHT,        "encoder.layers.%d.norm_conv.weight"},
+    {PARAKEET_TENSOR_ENC_NORM_CONV_BIAS,          "encoder.layers.%d.norm_conv.bias"},
+    {PARAKEET_TENSOR_ENC_CONV_PW1_WEIGHT,         "encoder.layers.%d.conv.pointwise_conv1.weight"},
+    {PARAKEET_TENSOR_ENC_CONV_DW_WEIGHT,          "encoder.layers.%d.conv.depthwise_conv.weight"},
+    {PARAKEET_TENSOR_ENC_CONV_BN_WEIGHT,          "encoder.layers.%d.conv.batch_norm.weight"},
+    {PARAKEET_TENSOR_ENC_CONV_BN_BIAS,            "encoder.layers.%d.conv.batch_norm.bias"},
+    {PARAKEET_TENSOR_ENC_CONV_BN_MEAN,            "encoder.layers.%d.conv.batch_norm.running_mean"},
+    {PARAKEET_TENSOR_ENC_CONV_BN_VAR,             "encoder.layers.%d.conv.batch_norm.running_var"},
+    {PARAKEET_TENSOR_ENC_CONV_BN_NUM_BATCHES,     "encoder.layers.%d.conv.batch_norm.num_batches_tracked"},
+    {PARAKEET_TENSOR_ENC_CONV_PW2_WEIGHT,         "encoder.layers.%d.conv.pointwise_conv2.weight"},
+    {PARAKEET_TENSOR_ENC_NORM_ATTN_WEIGHT,        "encoder.layers.%d.norm_self_att.weight"},
+    {PARAKEET_TENSOR_ENC_NORM_ATTN_BIAS,          "encoder.layers.%d.norm_self_att.bias"},
+    {PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_U,         "encoder.layers.%d.self_attn.pos_bias_u"},
+    {PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_V,         "encoder.layers.%d.self_attn.pos_bias_v"},
+    {PARAKEET_TENSOR_ENC_ATTN_Q_WEIGHT,           "encoder.layers.%d.self_attn.linear_q.weight"},
+    {PARAKEET_TENSOR_ENC_ATTN_K_WEIGHT,           "encoder.layers.%d.self_attn.linear_k.weight"},
+    {PARAKEET_TENSOR_ENC_ATTN_V_WEIGHT,           "encoder.layers.%d.self_attn.linear_v.weight"},
+    {PARAKEET_TENSOR_ENC_ATTN_OUT_WEIGHT,         "encoder.layers.%d.self_attn.linear_out.weight"},
+    {PARAKEET_TENSOR_ENC_ATTN_POS_WEIGHT,         "encoder.layers.%d.self_attn.linear_pos.weight"},
+    {PARAKEET_TENSOR_ENC_NORM_FF2_WEIGHT,         "encoder.layers.%d.norm_feed_forward2.weight"},
+    {PARAKEET_TENSOR_ENC_NORM_FF2_BIAS,           "encoder.layers.%d.norm_feed_forward2.bias"},
+    {PARAKEET_TENSOR_ENC_FF2_LINEAR1_WEIGHT,      "encoder.layers.%d.feed_forward2.linear1.weight"},
+    {PARAKEET_TENSOR_ENC_FF2_LINEAR2_WEIGHT,      "encoder.layers.%d.feed_forward2.linear2.weight"},
+    {PARAKEET_TENSOR_ENC_NORM_OUT_WEIGHT,         "encoder.layers.%d.norm_out.weight"},
+    {PARAKEET_TENSOR_ENC_NORM_OUT_BIAS,           "encoder.layers.%d.norm_out.bias"},
+
+    // Prediction network
+    {PARAKEET_TENSOR_PRED_EMBED_WEIGHT,            "decoder.prediction.embed.weight"},
+    {PARAKEET_TENSOR_PRED_LSTM_WEIGHT_IH,          "decoder.prediction.dec_rnn.lstm.weight_ih_l%d"},
+    {PARAKEET_TENSOR_PRED_LSTM_WEIGHT_HH,          "decoder.prediction.dec_rnn.lstm.weight_hh_l%d"},
+    {PARAKEET_TENSOR_PRED_LSTM_BIAS_IH,            "decoder.prediction.dec_rnn.lstm.bias_ih_l%d"},
+    {PARAKEET_TENSOR_PRED_LSTM_BIAS_HH,            "decoder.prediction.dec_rnn.lstm.bias_hh_l%d"},
+
+    // Joint network
+    {PARAKEET_TENSOR_JOINT_PRED_WEIGHT,           "joint.pred.weight"},
+    {PARAKEET_TENSOR_JOINT_PRED_BIAS,             "joint.pred.bias"},
+    {PARAKEET_TENSOR_JOINT_ENC_WEIGHT,            "joint.enc.weight"},
+    {PARAKEET_TENSOR_JOINT_ENC_BIAS,              "joint.enc.bias"},
+    {PARAKEET_TENSOR_JOINT_NET_WEIGHT,            "joint.joint_net.2.weight"},
+    {PARAKEET_TENSOR_JOINT_NET_BIAS,              "joint.joint_net.2.bias"},
+};
+
+static const std::map<parakeet_tensor, ggml_op> PARAKEET_TENSOR_INFO = {
+    // Encoder pre_encode
+    {PARAKEET_TENSOR_ENC_PRE_OUT_WEIGHT,          GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_ENC_PRE_OUT_BIAS,            GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_0_WEIGHT,       GGML_OP_IM2COL},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_0_BIAS,         GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_2_WEIGHT,       GGML_OP_IM2COL},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_2_BIAS,         GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_3_WEIGHT,       GGML_OP_IM2COL},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_3_BIAS,         GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_5_WEIGHT,       GGML_OP_IM2COL},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_5_BIAS,         GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_6_WEIGHT,       GGML_OP_IM2COL},
+    {PARAKEET_TENSOR_ENC_PRE_CONV_6_BIAS,         GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_PE,                      GGML_OP_ADD},
+
+    // Encoder layers
+    {PARAKEET_TENSOR_ENC_NORM_FF1_WEIGHT,         GGML_OP_MUL},
+    {PARAKEET_TENSOR_ENC_NORM_FF1_BIAS,           GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_FF1_LINEAR1_WEIGHT,      GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_ENC_FF1_LINEAR2_WEIGHT,      GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_ENC_NORM_CONV_WEIGHT,        GGML_OP_MUL},
+    {PARAKEET_TENSOR_ENC_NORM_CONV_BIAS,          GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_CONV_PW1_WEIGHT,         GGML_OP_IM2COL},
+    {PARAKEET_TENSOR_ENC_CONV_DW_WEIGHT,          GGML_OP_IM2COL},
+    {PARAKEET_TENSOR_ENC_CONV_BN_WEIGHT,          GGML_OP_MUL},
+    {PARAKEET_TENSOR_ENC_CONV_BN_BIAS,            GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_CONV_BN_MEAN,            GGML_OP_SUB},
+    {PARAKEET_TENSOR_ENC_CONV_BN_VAR,             GGML_OP_DIV},
+    {PARAKEET_TENSOR_ENC_CONV_BN_NUM_BATCHES,     GGML_OP_NONE},
+    {PARAKEET_TENSOR_ENC_CONV_PW2_WEIGHT,         GGML_OP_IM2COL},
+    {PARAKEET_TENSOR_ENC_NORM_ATTN_WEIGHT,        GGML_OP_MUL},
+    {PARAKEET_TENSOR_ENC_NORM_ATTN_BIAS,          GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_U,         GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_ATTN_POS_BIAS_V,         GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_ATTN_Q_WEIGHT,           GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_ENC_ATTN_K_WEIGHT,           GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_ENC_ATTN_V_WEIGHT,           GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_ENC_ATTN_OUT_WEIGHT,         GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_ENC_ATTN_POS_WEIGHT,         GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_ENC_NORM_FF2_WEIGHT,         GGML_OP_MUL},
+    {PARAKEET_TENSOR_ENC_NORM_FF2_BIAS,           GGML_OP_ADD},
+    {PARAKEET_TENSOR_ENC_FF2_LINEAR1_WEIGHT,      GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_ENC_FF2_LINEAR2_WEIGHT,      GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_ENC_NORM_OUT_WEIGHT,         GGML_OP_MUL},
+    {PARAKEET_TENSOR_ENC_NORM_OUT_BIAS,           GGML_OP_ADD},
+
+    // Prediction network
+    {PARAKEET_TENSOR_PRED_EMBED_WEIGHT,            GGML_OP_GET_ROWS},
+    {PARAKEET_TENSOR_PRED_LSTM_WEIGHT_IH,          GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_PRED_LSTM_WEIGHT_HH,          GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_PRED_LSTM_BIAS_IH,            GGML_OP_ADD},
+    {PARAKEET_TENSOR_PRED_LSTM_BIAS_HH,            GGML_OP_ADD},
+
+    // Joint network
+    {PARAKEET_TENSOR_JOINT_PRED_WEIGHT,           GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_JOINT_PRED_BIAS,             GGML_OP_ADD},
+    {PARAKEET_TENSOR_JOINT_ENC_WEIGHT,            GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_JOINT_ENC_BIAS,              GGML_OP_ADD},
+    {PARAKEET_TENSOR_JOINT_NET_WEIGHT,            GGML_OP_MUL_MAT},
+    {PARAKEET_TENSOR_JOINT_NET_BIAS,              GGML_OP_ADD},
+};
--- a/src/parakeet.cpp
+++ b/src/parakeet.cpp
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -110,3 +110,32 @@ target_compile_definitions(${VAD_TEST} PRIVATE
    SAMPLE_PATH="${PROJECT_SOURCE_DIR}/samples/jfk.wav")
 add_test(NAME ${VAD_TEST} COMMAND ${VAD_TEST})
 set_tests_properties(${VAD_TEST} PROPERTIES LABELS "base;en")
+
+# Parakeet model loading test
+set(PARAKEET_TEST test-parakeet)
+add_executable(${PARAKEET_TEST} ${PARAKEET_TEST}.cpp)
+target_include_directories(${PARAKEET_TEST} PRIVATE ../include ../ggml/include ../examples)
+target_link_libraries(${PARAKEET_TEST} PRIVATE parakeet common)
+target_compile_definitions(${PARAKEET_TEST} PRIVATE
+    PARAKEET_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/ggml-parakeet-tdt-0.6b-v3.bin"
+    SAMPLE_PATH="${PROJECT_SOURCE_DIR}/samples/jfk.wav")
+add_test(NAME ${PARAKEET_TEST} COMMAND ${PARAKEET_TEST})
+
+set(PARAKEET_TEST test-parakeet-stream)
+add_executable(${PARAKEET_TEST} ${PARAKEET_TEST}.cpp)
+target_include_directories(${PARAKEET_TEST} PRIVATE ../include ../ggml/include ../examples)
+target_link_libraries(${PARAKEET_TEST} PRIVATE parakeet common)
+target_compile_definitions(${PARAKEET_TEST} PRIVATE
+    PARAKEET_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/ggml-parakeet-tdt-0.6b-v3.bin"
+    SAMPLE_PATH="${PROJECT_SOURCE_DIR}/samples/gb1.wav")
+add_test(NAME ${PARAKEET_TEST} COMMAND ${PARAKEET_TEST})
+
+set(PARAKEET_TEST test-parakeet-full)
+add_executable(${PARAKEET_TEST} ${PARAKEET_TEST}.cpp)
+target_include_directories(${PARAKEET_TEST} PRIVATE ../include ../ggml/include ../examples)
+target_link_libraries(${PARAKEET_TEST} PRIVATE parakeet common)
+target_compile_definitions(${PARAKEET_TEST} PRIVATE
+    PARAKEET_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/ggml-parakeet-tdt-0.6b-v3.bin"
+    SAMPLE_PATH="${PROJECT_SOURCE_DIR}/samples/gb1.wav")
+add_test(NAME ${PARAKEET_TEST} COMMAND ${PARAKEET_TEST})
+set_tests_properties(${PARAKEET_TEST} PROPERTIES LABELS "parakeet;unit")
--- a/tests/test-parakeet-full.cpp
+++ b/tests/test-parakeet-full.cpp
@ -0,0 +1,62 @@
+#include "parakeet.h"
+#include "common-whisper.h"
+
+#include <cstdio>
+#include <string>
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+#include <cassert>
+
+void token_callback(parakeet_context * ctx, parakeet_state * state, const parakeet_token_data * token_data, void * user_data) {
+    static bool is_first = true;
+    const char * token_str = parakeet_token_to_str(ctx, token_data->id);
+    char text_buf[256];
+    parakeet_token_to_text(token_str, is_first, text_buf, sizeof(text_buf));
+
+    int32_t time_ms = token_data->frame_index * 10;
+
+    printf("%s", text_buf);
+    fflush(stdout);
+
+    is_first = false;
+}
+
+int main() {
+    std::string model_path  = PARAKEET_MODEL_PATH;
+    std::string sample_path = SAMPLE_PATH;
+
+    std::vector<float> pcmf32;
+    std::vector<std::vector<float>> pcmf32s;
+    assert(read_audio_data(sample_path.c_str(), pcmf32, pcmf32s, false));
+    assert(pcmf32.size() > 0);
+    assert(pcmf32s.size() == 0); // no stereo vector
+
+    printf("Loading Parakeet model from: %s\n", model_path.c_str());
+
+    struct parakeet_context_params ctx_params = parakeet_context_default_params();
+
+    struct parakeet_context * pctx = parakeet_init_from_file_with_params(model_path.c_str(), ctx_params);
+    if (pctx == nullptr) {
+        fprintf(stderr, "Failed to load Parakeet model\n");
+        return 1;
+    }
+    printf("Successfully loaded Parakeet model\n");
+
+    struct parakeet_full_params params = parakeet_full_default_params(PARAKEET_SAMPLING_GREEDY);
+    params.new_token_callback = token_callback;
+    params.new_token_callback_user_data = nullptr;
+
+    params.chunk_length_ms  = 10000;
+    params.left_context_ms  = 10000;
+    params.right_context_ms = 4960;
+
+    int ret = parakeet_full(pctx, params, pcmf32.data(), pcmf32.size());
+    assert(ret == 0);
+
+    parakeet_free(pctx);
+
+    printf("\nTest passed: parakeet_full_parallel succeeded!\n");
+    return 0;
+}
--- a/tests/test-parakeet-stream.cpp
+++ b/tests/test-parakeet-stream.cpp
@ -0,0 +1,107 @@
+#include "parakeet.h"
+#include "common-whisper.h"
+
+#include <cstdio>
+#include <string>
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+#include <cassert>
+
+void token_callback(parakeet_context * ctx, parakeet_state * state, const parakeet_token_data * token_data, void * user_data) {
+    static bool is_first = true;
+    const char * token_str = parakeet_token_to_str(ctx, token_data->id);
+    char text_buf[256];
+    parakeet_token_to_text(token_str, is_first, text_buf, sizeof(text_buf));
+
+    int32_t time_ms = token_data->frame_index * 10;
+
+    printf("%s", text_buf);
+    fflush(stdout);
+
+    is_first = false;
+}
+
+void segment_callback(parakeet_context * ctx, parakeet_state * state, int n_new, void * user_data) {
+    const int n_segments = parakeet_full_n_segments_from_state(state);
+    const int s0 = n_segments - n_new;
+
+    printf("\nSegment Callback: %d new segment(s)\n", n_new);
+
+    for (int i = s0; i < n_segments; i++) {
+        const char * text = parakeet_full_get_segment_text_from_state(state, i);
+        const int64_t t0 = parakeet_full_get_segment_t0_from_state(state, i);
+        const int64_t t1 = parakeet_full_get_segment_t1_from_state(state, i);
+
+        printf("Segment %d: [%lld -> %lld] \"%s\"\n", i, (long long)t0, (long long)t1, text);
+        printf("Tokens:\n");
+
+        const int n_tokens = parakeet_full_n_tokens_from_state(state, i);
+        for (int j = 0; j < n_tokens; j++) {
+            parakeet_token_data token_data = parakeet_full_get_token_data_from_state(state, i, j);
+            const char * token_str = parakeet_token_to_str(ctx, token_data.id);
+
+            printf("  [%2d] id=%5d frame=%3d dur_idx=%2d dur_val=%2d p=%.4f plog=%.4f t0=%4lld t1=%4lld word_start=%d \"%s\"\n",
+                   j,
+                   token_data.id,
+                   token_data.frame_index,
+                   token_data.duration_idx,
+                   token_data.duration_value,
+                   token_data.p,
+                   token_data.plog,
+                   (long long)token_data.t0,
+                   (long long)token_data.t1,
+                   token_data.is_word_start,
+                   token_str);
+        }
+    }
+    printf("\n");
+}
+
+int main() {
+    std::string model_path  = PARAKEET_MODEL_PATH;
+    std::string sample_path = SAMPLE_PATH;
+
+    std::vector<float> pcmf32;
+    std::vector<std::vector<float>> pcmf32s;
+    assert(read_audio_data(sample_path.c_str(), pcmf32, pcmf32s, false));
+    assert(pcmf32.size() > 0);
+
+    struct parakeet_context_params ctx_params = parakeet_context_default_params();
+    struct parakeet_context * pctx = parakeet_init_from_file_with_params_no_state(model_path.c_str(), ctx_params);
+    if (pctx == nullptr) { return 1; }
+
+    struct parakeet_full_params params = parakeet_full_default_params(PARAKEET_SAMPLING_GREEDY);
+    params.new_token_callback = token_callback;
+
+    params.left_context_ms  = 10000;
+    params.chunk_length_ms  = 10000;
+    params.right_context_ms = 4960;
+
+    parakeet_state * state = parakeet_init_state(pctx);
+
+    // initialize streaming state
+    assert(parakeet_stream_init(pctx, state, params) == 0);
+
+    const int samples_batch_size = 1600;
+    int position = 0;
+
+    while (position < (int)pcmf32.size()) {
+        int samples_to_push = std::min(samples_batch_size, (int)pcmf32.size() - position);
+
+        int ret = parakeet_stream_push(pctx, state, pcmf32.data() + position, samples_to_push);
+        assert(ret == 0);
+
+        position += samples_to_push;
+    }
+
+    // flush remaining samples.
+    assert(parakeet_stream_flush(pctx, state) == 0);
+
+    parakeet_free_state(state);
+    parakeet_free(pctx);
+
+    printf("\n\nTest passed: Streaming logic.\n");
+    return 0;
+}
--- a/tests/test-parakeet.cpp
+++ b/tests/test-parakeet.cpp
@ -0,0 +1,99 @@
+#include "parakeet.h"
+#include "common-whisper.h"
+
+#include <cstdio>
+#include <string>
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+#include <cassert>
+
+void token_callback(parakeet_context * ctx, parakeet_state * state, const parakeet_token_data * token_data, void * user_data) {
+    static bool is_first = true;
+    const char * token_str = parakeet_token_to_str(ctx, token_data->id);
+    char text_buf[256];
+    parakeet_token_to_text(token_str, is_first, text_buf, sizeof(text_buf));
+
+    int32_t time_ms = token_data->frame_index * 10;
+
+    printf("%s", text_buf);
+    fflush(stdout);
+
+    is_first = false;
+}
+
+void segment_callback(parakeet_context * ctx, parakeet_state * state, int n_new, void * user_data) {
+    const int n_segments = parakeet_full_n_segments_from_state(state);
+    const int s0 = n_segments - n_new;
+
+    printf("\nSegment Callback: %d new segment(s)\n", n_new);
+
+    for (int i = s0; i < n_segments; i++) {
+        const char * text = parakeet_full_get_segment_text_from_state(state, i);
+        const int64_t t0 = parakeet_full_get_segment_t0_from_state(state, i);
+        const int64_t t1 = parakeet_full_get_segment_t1_from_state(state, i);
+
+        printf("Segment %d: [%lld -> %lld] \"%s\"\n", i, (long long)t0, (long long)t1, text);
+        printf("Tokens:\n");
+
+        const int n_tokens = parakeet_full_n_tokens_from_state(state, i);
+        for (int j = 0; j < n_tokens; j++) {
+            parakeet_token_data token_data = parakeet_full_get_token_data_from_state(state, i, j);
+            const char * token_str = parakeet_token_to_str(ctx, token_data.id);
+
+            printf("  [%2d] id=%5d frame=%3d dur_idx=%2d dur_val=%2d p=%.4f plog=%.4f t0=%4lld t1=%4lld word_start=%d \"%s\"\n",
+                   j,
+                   token_data.id,
+                   token_data.frame_index,
+                   token_data.duration_idx,
+                   token_data.duration_value,
+                   token_data.p,
+                   token_data.plog,
+                   (long long)token_data.t0,
+                   (long long)token_data.t1,
+                   token_data.is_word_start,
+                   token_str);
+        }
+    }
+    printf("\n");
+}
+
+int main() {
+    std::string model_path  = PARAKEET_MODEL_PATH;
+    std::string sample_path = SAMPLE_PATH;
+
+    // Load the sample audio file
+    std::vector<float> pcmf32;
+    std::vector<std::vector<float>> pcmf32s;
+    assert(read_audio_data(sample_path.c_str(), pcmf32, pcmf32s, false));
+    assert(pcmf32.size() > 0);
+    assert(pcmf32s.size() == 0);
+
+    printf("Loading Parakeet model from: %s\n", model_path.c_str());
+
+    struct parakeet_context_params ctx_params = parakeet_context_default_params();
+
+    struct parakeet_context * pctx = parakeet_init_from_file_with_params_no_state(model_path.c_str(), ctx_params);
+    if (pctx == nullptr) {
+        fprintf(stderr, "Failed to load Parakeet model\n");
+        return 1;
+    }
+    printf("Successfully loaded Parakeet model\n");
+
+    struct parakeet_full_params params = parakeet_full_default_params(PARAKEET_SAMPLING_GREEDY);
+    params.new_token_callback = token_callback;
+    params.new_token_callback_user_data = nullptr;
+    params.new_segment_callback = segment_callback;
+    params.new_segment_callback_user_data = nullptr;
+    parakeet_state * state = parakeet_init_state(pctx);
+
+    int ret = parakeet_chunk(pctx, state, params, pcmf32.data(), pcmf32.size());
+    assert(ret == 0);
+
+    parakeet_free_state(state);
+    parakeet_free(pctx);
+
+    printf("\nTest passed: Parakeet model loaded and freed successfully\n");
+    return 0;
+}