512 lines
14 KiB
Bash
Executable File
512 lines
14 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
|
AUDIO_DIR="${SCRIPT_DIR}/audio"
|
|
REFS_DIR="${SCRIPT_DIR}/references"
|
|
RESULTS_ROOT="${SCRIPT_DIR}/results"
|
|
LOCK_FILE="${AUDIO_DIR}/lock.json"
|
|
|
|
# Fixed benchmark configuration for reproducibility.
|
|
WARMUP_RUNS=1
|
|
MEASURED_RUNS=5
|
|
THREADS=8
|
|
PROCESSORS=1
|
|
VARIANT="metal-baseline"
|
|
MODEL_REL_DEFAULT="models/ggml-small.en.bin"
|
|
MAX_WER=0.02
|
|
MAX_CER=0.02
|
|
ENFORCE_CORRECTNESS=1
|
|
AUDIO_KEYS=()
|
|
CLI_BIN="${REPO_ROOT}/build/bin/whisper-cli"
|
|
CLI_ARGS=(
|
|
"-l" "en"
|
|
"-tp" "0"
|
|
"-tpi" "0"
|
|
"-nf"
|
|
"-bs" "1"
|
|
"-bo" "1"
|
|
"-fa"
|
|
)
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage:
|
|
benchmark/bench.sh --create-lock
|
|
benchmark/bench.sh [--variant <name>] [--audio <short|medium|long>]...
|
|
benchmark/bench.sh [--variant <name>] --all-audio
|
|
|
|
Description:
|
|
--create-lock Validates benchmark/audio/{short,medium,long}.wav and writes benchmark/audio/lock.json
|
|
with file hashes + durations. This lock is required for benchmark runs.
|
|
|
|
--variant Logical variant label in output tables (default: metal-baseline).
|
|
--audio Add one audio key to run: short, medium, or long.
|
|
If omitted, only short is run (development default).
|
|
--all-audio Run short + medium + long.
|
|
|
|
Notes:
|
|
- This harness runs 1 warm-up + 5 measured runs per audio sample.
|
|
- Runs are always sequential and use fixed model + fixed CLI flags.
|
|
- Audio files must be 16 kHz, mono, 16-bit WAV.
|
|
- Correctness is computed via WER/CER against benchmark/references/{short,medium,long}.txt.
|
|
EOF
|
|
}
|
|
|
|
MODE="run"
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--create-lock)
|
|
MODE="create-lock"
|
|
shift
|
|
;;
|
|
--variant)
|
|
VARIANT="$2"
|
|
shift 2
|
|
;;
|
|
--audio)
|
|
case "$2" in
|
|
short|medium|long)
|
|
AUDIO_KEYS+=( "$2" )
|
|
;;
|
|
*)
|
|
echo "Invalid --audio value: $2 (expected short|medium|long)" >&2
|
|
exit 2
|
|
;;
|
|
esac
|
|
shift 2
|
|
;;
|
|
--all-audio)
|
|
AUDIO_KEYS=( "short" "medium" "long" )
|
|
shift
|
|
;;
|
|
--help|-h)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "Unknown argument: $1" >&2
|
|
usage >&2
|
|
exit 2
|
|
;;
|
|
esac
|
|
done
|
|
|
|
require_file() {
|
|
local path="$1"
|
|
if [[ ! -f "$path" ]]; then
|
|
echo "Required file not found: $path" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
create_lock_file() {
|
|
mkdir -p "${AUDIO_DIR}"
|
|
require_file "${REPO_ROOT}/${MODEL_REL_DEFAULT}"
|
|
require_file "${AUDIO_DIR}/short.wav"
|
|
require_file "${AUDIO_DIR}/medium.wav"
|
|
require_file "${AUDIO_DIR}/long.wav"
|
|
|
|
python3 - "${LOCK_FILE}" "${REPO_ROOT}" "${MODEL_REL_DEFAULT}" "${AUDIO_DIR}" <<'PY'
|
|
import contextlib
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import sys
|
|
import wave
|
|
from pathlib import Path
|
|
|
|
lock_path = Path(sys.argv[1])
|
|
repo_root = Path(sys.argv[2])
|
|
model_rel = sys.argv[3]
|
|
audio_dir = Path(sys.argv[4])
|
|
|
|
def sha256(path: Path) -> str:
|
|
h = hashlib.sha256()
|
|
with path.open("rb") as f:
|
|
while True:
|
|
chunk = f.read(1024 * 1024)
|
|
if not chunk:
|
|
break
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
def inspect_wav(path: Path):
|
|
with contextlib.closing(wave.open(str(path), "rb")) as wf:
|
|
channels = wf.getnchannels()
|
|
sample_rate = wf.getframerate()
|
|
sample_width = wf.getsampwidth()
|
|
frames = wf.getnframes()
|
|
if channels != 1:
|
|
raise SystemExit(f"{path}: expected mono (1 channel), got {channels}")
|
|
if sample_rate != 16000:
|
|
raise SystemExit(f"{path}: expected 16000 Hz, got {sample_rate}")
|
|
if sample_width != 2:
|
|
raise SystemExit(f"{path}: expected 16-bit PCM (2 bytes), got {sample_width}")
|
|
duration_s = frames / float(sample_rate)
|
|
return {
|
|
"sha256": sha256(path),
|
|
"duration_s": duration_s,
|
|
"sample_rate_hz": sample_rate,
|
|
"channels": channels,
|
|
"sample_width_bytes": sample_width,
|
|
}
|
|
|
|
model_path = repo_root / model_rel
|
|
if not model_path.is_file():
|
|
raise SystemExit(f"Model not found: {model_path}")
|
|
|
|
lock = {
|
|
"schema_version": 1,
|
|
"model_rel": model_rel,
|
|
"model_sha256": sha256(model_path),
|
|
"audio": {
|
|
"short": inspect_wav(audio_dir / "short.wav"),
|
|
"medium": inspect_wav(audio_dir / "medium.wav"),
|
|
"long": inspect_wav(audio_dir / "long.wav"),
|
|
},
|
|
}
|
|
|
|
tmp = lock_path.with_suffix(".json.tmp")
|
|
tmp.write_text(json.dumps(lock, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
os.replace(tmp, lock_path)
|
|
print(f"Wrote lock file: {lock_path}")
|
|
PY
|
|
}
|
|
|
|
validate_inputs_against_lock() {
|
|
require_file "${LOCK_FILE}"
|
|
require_file "${CLI_BIN}"
|
|
|
|
local validation_json="$1"
|
|
|
|
python3 - "${LOCK_FILE}" "${REPO_ROOT}" "${AUDIO_DIR}" "${MODEL_REL_DEFAULT}" > "${validation_json}" <<'PY'
|
|
import contextlib
|
|
import hashlib
|
|
import json
|
|
import sys
|
|
import wave
|
|
from pathlib import Path
|
|
|
|
lock_path = Path(sys.argv[1])
|
|
repo_root = Path(sys.argv[2])
|
|
audio_dir = Path(sys.argv[3])
|
|
configured_model_rel = sys.argv[4]
|
|
|
|
def sha256(path: Path) -> str:
|
|
h = hashlib.sha256()
|
|
with path.open("rb") as f:
|
|
while True:
|
|
chunk = f.read(1024 * 1024)
|
|
if not chunk:
|
|
break
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
def inspect_wav(path: Path):
|
|
with contextlib.closing(wave.open(str(path), "rb")) as wf:
|
|
channels = wf.getnchannels()
|
|
sample_rate = wf.getframerate()
|
|
sample_width = wf.getsampwidth()
|
|
frames = wf.getnframes()
|
|
duration_s = frames / float(sample_rate)
|
|
return channels, sample_rate, sample_width, duration_s
|
|
|
|
lock = json.loads(lock_path.read_text(encoding="utf-8"))
|
|
if lock.get("model_rel") != configured_model_rel:
|
|
raise SystemExit(
|
|
f"Lock model_rel mismatch: lock={lock.get('model_rel')} script={configured_model_rel}"
|
|
)
|
|
|
|
model_rel = lock["model_rel"]
|
|
model_path = repo_root / model_rel
|
|
if not model_path.is_file():
|
|
raise SystemExit(f"Model file not found: {model_path}")
|
|
|
|
actual_model_sha = sha256(model_path)
|
|
if actual_model_sha != lock["model_sha256"]:
|
|
raise SystemExit(
|
|
f"Model checksum mismatch for {model_path}. expected={lock['model_sha256']} actual={actual_model_sha}"
|
|
)
|
|
|
|
validated_audio = {}
|
|
for key in ("short", "medium", "long"):
|
|
wav_path = audio_dir / f"{key}.wav"
|
|
if not wav_path.is_file():
|
|
raise SystemExit(f"Audio file missing: {wav_path}")
|
|
|
|
expected = lock["audio"][key]
|
|
actual_sha = sha256(wav_path)
|
|
if actual_sha != expected["sha256"]:
|
|
raise SystemExit(
|
|
f"Checksum mismatch for {wav_path}. expected={expected['sha256']} actual={actual_sha}"
|
|
)
|
|
|
|
channels, sample_rate, sample_width, duration_s = inspect_wav(wav_path)
|
|
if channels != 1 or sample_rate != 16000 or sample_width != 2:
|
|
raise SystemExit(
|
|
f"Format mismatch for {wav_path}: channels={channels}, sample_rate={sample_rate}, sample_width={sample_width}"
|
|
)
|
|
|
|
validated_audio[key] = {
|
|
"path": str(wav_path),
|
|
"duration_s": duration_s,
|
|
"sha256": actual_sha,
|
|
}
|
|
|
|
print(
|
|
json.dumps(
|
|
{
|
|
"model_rel": model_rel,
|
|
"model_abs": str(model_path),
|
|
"model_sha256": actual_model_sha,
|
|
"audio": validated_audio,
|
|
},
|
|
indent=2,
|
|
sort_keys=True,
|
|
)
|
|
)
|
|
PY
|
|
}
|
|
|
|
write_config_file() {
|
|
local config_path="$1"
|
|
local validation_json="$2"
|
|
|
|
python3 - "${config_path}" "${validation_json}" "${VARIANT}" "${THREADS}" "${PROCESSORS}" "${WARMUP_RUNS}" "${MEASURED_RUNS}" "${REPO_ROOT}" "${CLI_BIN}" "${CLI_ARGS[@]}" <<'PY'
|
|
import datetime as dt
|
|
import json
|
|
import platform
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
config_path = Path(sys.argv[1])
|
|
validation_path = Path(sys.argv[2])
|
|
variant = sys.argv[3]
|
|
threads = int(sys.argv[4])
|
|
processors = int(sys.argv[5])
|
|
warmup_runs = int(sys.argv[6])
|
|
measured_runs = int(sys.argv[7])
|
|
repo_root = sys.argv[8]
|
|
cli_bin = sys.argv[9]
|
|
cli_args = sys.argv[10:]
|
|
|
|
validation = json.loads(validation_path.read_text(encoding="utf-8"))
|
|
|
|
def run_cmd(args):
|
|
try:
|
|
return subprocess.check_output(args, stderr=subprocess.STDOUT, text=True).strip()
|
|
except Exception:
|
|
return ""
|
|
|
|
env = {
|
|
"git_commit": run_cmd(["git", "-C", repo_root, "rev-parse", "HEAD"]),
|
|
"git_short_commit": run_cmd(["git", "-C", repo_root, "rev-parse", "--short", "HEAD"]),
|
|
"sw_vers": run_cmd(["sw_vers"]),
|
|
"uname": run_cmd(["uname", "-a"]),
|
|
"hw_memsize": run_cmd(["sysctl", "-n", "hw.memsize"]),
|
|
"cpu_brand_string": run_cmd(["sysctl", "-n", "machdep.cpu.brand_string"]),
|
|
"xcodebuild_version": run_cmd(["xcodebuild", "-version"]),
|
|
"clang_version": run_cmd(["clang", "--version"]),
|
|
"cmake_version": run_cmd(["cmake", "--version"]),
|
|
"python_version": platform.python_version(),
|
|
}
|
|
|
|
cfg = {
|
|
"created_at_utc": dt.datetime.now(dt.timezone.utc).isoformat(),
|
|
"variant": variant,
|
|
"run_policy": {
|
|
"warmup_runs": warmup_runs,
|
|
"measured_runs": measured_runs,
|
|
"sequential_execution": True,
|
|
},
|
|
"model": {
|
|
"rel_path": validation["model_rel"],
|
|
"abs_path": validation["model_abs"],
|
|
"sha256": validation["model_sha256"],
|
|
},
|
|
"audio": validation["audio"],
|
|
"cli": {
|
|
"binary": cli_bin,
|
|
"args": cli_args,
|
|
"threads": threads,
|
|
"processors": processors,
|
|
},
|
|
"environment": env,
|
|
}
|
|
|
|
config_path.write_text(json.dumps(cfg, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
PY
|
|
}
|
|
|
|
run_one() {
|
|
local log_path="$1"
|
|
local meta_path="$2"
|
|
local audio_key="$3"
|
|
local audio_duration="$4"
|
|
local run_kind="$5"
|
|
local run_index="$6"
|
|
shift 6
|
|
local cmd=( "$@" )
|
|
|
|
python3 - "${log_path}" "${meta_path}" "${audio_key}" "${audio_duration}" "${run_kind}" "${run_index}" "${cmd[@]}" <<'PY'
|
|
import datetime as dt
|
|
import json
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
log_path = Path(sys.argv[1])
|
|
meta_path = Path(sys.argv[2])
|
|
audio_key = sys.argv[3]
|
|
audio_duration_s = float(sys.argv[4])
|
|
run_kind = sys.argv[5]
|
|
run_index = int(sys.argv[6])
|
|
cmd = sys.argv[7:]
|
|
|
|
segment_re = re.compile(r"^\[\d{2}:\d{2}:\d{2}\.\d{3}\s+-->\s+\d{2}:\d{2}:\d{2}\.\d{3}\]")
|
|
|
|
start_utc = dt.datetime.now(dt.timezone.utc).isoformat()
|
|
start = time.perf_counter()
|
|
first_inference_s = None
|
|
|
|
with log_path.open("w", encoding="utf-8") as logf:
|
|
proc = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
bufsize=1,
|
|
)
|
|
assert proc.stdout is not None
|
|
|
|
for line in proc.stdout:
|
|
logf.write(line)
|
|
if first_inference_s is None and segment_re.search(line.strip()):
|
|
first_inference_s = time.perf_counter() - start
|
|
|
|
rc = proc.wait()
|
|
|
|
end = time.perf_counter()
|
|
end_utc = dt.datetime.now(dt.timezone.utc).isoformat()
|
|
|
|
meta = {
|
|
"audio_key": audio_key,
|
|
"audio_duration_s": audio_duration_s,
|
|
"run_kind": run_kind,
|
|
"run_index": run_index,
|
|
"command": cmd,
|
|
"log_path": str(log_path),
|
|
"start_utc": start_utc,
|
|
"end_utc": end_utc,
|
|
"wall_clock_runtime_s": end - start,
|
|
"first_inference_latency_s": first_inference_s,
|
|
"exit_code": rc,
|
|
}
|
|
|
|
meta_path.write_text(json.dumps(meta, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
sys.exit(rc)
|
|
PY
|
|
}
|
|
|
|
if [[ "${MODE}" == "create-lock" ]]; then
|
|
create_lock_file
|
|
exit 0
|
|
fi
|
|
|
|
if [[ "${#AUDIO_KEYS[@]}" -eq 0 ]]; then
|
|
AUDIO_KEYS=( "short" )
|
|
fi
|
|
|
|
mkdir -p "${RESULTS_ROOT}"
|
|
|
|
RUN_ID="$(date '+%Y%m%d_%H%M%S')"
|
|
RUN_DIR="${RESULTS_ROOT}/${RUN_ID}_${VARIANT}"
|
|
RAW_DIR="${RUN_DIR}/raw"
|
|
mkdir -p "${RAW_DIR}"
|
|
|
|
VALIDATION_JSON="${RUN_DIR}/validated_inputs.json"
|
|
validate_inputs_against_lock "${VALIDATION_JSON}"
|
|
write_config_file "${RUN_DIR}/config.json" "${VALIDATION_JSON}"
|
|
|
|
MODEL_PATH="$(python3 - "${VALIDATION_JSON}" <<'PY'
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
data = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
|
|
print(data["model_abs"])
|
|
PY
|
|
)"
|
|
|
|
echo "Benchmark run directory: ${RUN_DIR}"
|
|
echo "Variant: ${VARIANT}"
|
|
echo "Model: ${MODEL_PATH}"
|
|
echo "Warm-up runs: ${WARMUP_RUNS}, measured runs: ${MEASURED_RUNS}"
|
|
echo "Threads: ${THREADS}, processors: ${PROCESSORS}"
|
|
echo "Fixed CLI flags: ${CLI_ARGS[*]}"
|
|
echo "Audio set: ${AUDIO_KEYS[*]}"
|
|
|
|
for audio_key in "${AUDIO_KEYS[@]}"; do
|
|
audio_path="${AUDIO_DIR}/${audio_key}.wav"
|
|
audio_duration="$(python3 - "${VALIDATION_JSON}" "${audio_key}" <<'PY'
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
data = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
|
|
print(data["audio"][sys.argv[2]]["duration_s"])
|
|
PY
|
|
)"
|
|
|
|
audio_run_dir="${RAW_DIR}/${audio_key}"
|
|
mkdir -p "${audio_run_dir}"
|
|
|
|
for i in $(seq 1 "${WARMUP_RUNS}"); do
|
|
printf -v idx "%02d" "${i}"
|
|
echo "[${audio_key}] warm-up ${i}/${WARMUP_RUNS}"
|
|
run_one \
|
|
"${audio_run_dir}/warmup_${idx}.log" \
|
|
"${audio_run_dir}/warmup_${idx}.meta.json" \
|
|
"${audio_key}" \
|
|
"${audio_duration}" \
|
|
"warmup" \
|
|
"${i}" \
|
|
"${CLI_BIN}" -m "${MODEL_PATH}" -f "${audio_path}" -t "${THREADS}" -p "${PROCESSORS}" "${CLI_ARGS[@]}"
|
|
done
|
|
|
|
for i in $(seq 1 "${MEASURED_RUNS}"); do
|
|
printf -v idx "%02d" "${i}"
|
|
echo "[${audio_key}] measured ${i}/${MEASURED_RUNS}"
|
|
run_one \
|
|
"${audio_run_dir}/run_${idx}.log" \
|
|
"${audio_run_dir}/run_${idx}.meta.json" \
|
|
"${audio_key}" \
|
|
"${audio_duration}" \
|
|
"measured" \
|
|
"${i}" \
|
|
"${CLI_BIN}" -m "${MODEL_PATH}" -f "${audio_path}" -t "${THREADS}" -p "${PROCESSORS}" "${CLI_ARGS[@]}"
|
|
done
|
|
done
|
|
|
|
PARSE_ARGS=(
|
|
"--run-dir" "${RUN_DIR}"
|
|
"--refs-dir" "${REFS_DIR}"
|
|
"--max-wer" "${MAX_WER}"
|
|
"--max-cer" "${MAX_CER}"
|
|
)
|
|
if [[ "${ENFORCE_CORRECTNESS}" == "1" ]]; then
|
|
PARSE_ARGS+=( "--enforce-correctness" )
|
|
fi
|
|
python3 "${SCRIPT_DIR}/parse_results.py" "${PARSE_ARGS[@]}"
|
|
|
|
echo "Completed benchmark parsing:"
|
|
echo " ${RUN_DIR}/runs.csv"
|
|
echo " ${RUN_DIR}/summary.csv"
|
|
echo " ${RUN_DIR}/summary.md"
|
|
echo " ${RUN_DIR}/correctness.json"
|