This commit is contained in:
Octopus 2026-04-21 03:34:38 +02:00 committed by GitHub
commit c9cd97a5a3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 147 additions and 2 deletions

View File

@ -0,0 +1,134 @@
import sys
import os
import json
import argparse
import subprocess
import tempfile
import urllib.request
MINIMAX_VOICES = [
'English_Graceful_Lady',
'English_Insightful_Speaker',
'English_radiant_girl',
'English_Persuasive_Man',
'English_Lucky_Robot',
'English_expressive_narrator',
]
parser = argparse.ArgumentParser(add_help=False,
formatter_class=argparse.RawTextHelpFormatter,
description='MiniMax TTS client for whisper.cpp talk-llama example')
modes = parser.add_argument_group("action")
modes.add_argument("inputfile", metavar="TEXTFILE",
nargs='?', type=argparse.FileType(), default=sys.stdin,
help="read the text file (default: stdin)")
modes.add_argument("-l", "--list", action="store_true",
help="show the list of voices and exit")
modes.add_argument("-h", "--help", action="help",
help="show this help and exit")
selopts = parser.add_argument_group("voice selection")
selmodes = selopts.add_mutually_exclusive_group()
selmodes.add_argument("-n", "--name",
default="English_Graceful_Lady",
help="voice ID to use (default: English_Graceful_Lady)")
selmodes.add_argument("-v", "--voice", type=int, metavar="NUMBER",
help="voice by index number (see --list)")
outmodes = parser.add_argument_group("output")
outgroup = outmodes.add_mutually_exclusive_group()
outgroup.add_argument("-s", "--save", metavar="FILE",
default="audio.mp3",
help="save the TTS to a file (default: audio.mp3)")
outgroup.add_argument("-p", "--play", action="store_true",
help="play the TTS with ffplay")
apiopts = parser.add_argument_group("API options")
apiopts.add_argument("-k", "--api-key", metavar="KEY",
default=os.environ.get("MINIMAX_API_KEY", ""),
help="MiniMax API key (default: $MINIMAX_API_KEY)")
apiopts.add_argument("-m", "--model",
default="speech-2.8-hd",
help="TTS model to use (default: speech-2.8-hd)")
apiopts.add_argument("-b", "--base-url",
default=os.environ.get("MINIMAX_BASE_URL", "https://api.minimax.io"),
help="MiniMax base URL (default: https://api.minimax.io)")
args = parser.parse_args()
if args.list:
for i, v in enumerate(MINIMAX_VOICES):
print(str(i) + ": " + v)
sys.exit()
if not args.api_key:
print("MiniMax API key is required. Set MINIMAX_API_KEY environment variable or use -k.")
sys.exit(1)
if args.voice is not None:
voice_id = MINIMAX_VOICES[args.voice % len(MINIMAX_VOICES)]
else:
voice_id = args.name
text = args.inputfile.read()
url = args.base_url.rstrip("/") + "/v1/t2a_v2"
payload = json.dumps({
"model": args.model,
"text": text,
"stream": True,
"voice_setting": {
"voice_id": voice_id,
"speed": 1,
"vol": 1,
"pitch": 0,
},
"audio_setting": {
"sample_rate": 32000,
"bitrate": 128000,
"format": "mp3",
"channel": 1,
},
}).encode("utf-8")
req = urllib.request.Request(url, data=payload, method="POST")
req.add_header("Content-Type", "application/json")
req.add_header("Authorization", "Bearer " + args.api_key)
audio_chunks = []
buffer = b""
with urllib.request.urlopen(req) as resp:
for raw_line in resp:
line = raw_line.decode("utf-8", errors="replace").rstrip("\n\r")
if not line.startswith("data:"):
continue
json_str = line[5:].strip()
if not json_str or json_str == "[DONE]":
continue
try:
event = json.loads(json_str)
audio_hex = event.get("data", {}).get("audio", "")
if audio_hex:
audio_chunks.append(bytes.fromhex(audio_hex))
except (json.JSONDecodeError, ValueError):
pass
audio = b"".join(audio_chunks)
if args.play:
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
tmp.write(audio)
tmp_path = tmp.name
try:
subprocess.run(
["ffplay", "-autoexit", "-nodisp", "-loglevel", "quiet",
"-hide_banner", "-i", tmp_path],
check=False,
)
finally:
os.unlink(tmp_path)
else:
with open(args.save, "wb") as f:
f.write(audio)

View File

@ -32,9 +32,20 @@ elif installed python3 && \
#python3 $script -q -s ./audio.mp3 -v $1 $2 >/dev/null 2>&1
#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3 >/dev/null 2>&1
# MiniMax TTS
elif [ -n "$MINIMAX_API_KEY" ] && installed python3 && installed ffplay; then
wd=$(dirname $0)
script=$wd/minimax-tts.py
python3 $script -p -v $1 $2 >/dev/null 2>&1
# Uncomment to keep the audio file
#python3 $script -s ./audio.mp3 -v $1 $2 >/dev/null 2>&1
#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3 >/dev/null 2>&1
else
echo 'Install espeak ("brew install espeak" or "apt-get install espeak"),'
echo 'piper ("pip install piper-tts" or https://github.com/rhasspy/piper) with aplay,'
echo 'or elevenlabs ("pip install elevenlabs") with ffplay.'
echo '(export ELEVEN_API_KEY if you have an api key from https://beta.elevenlabs.io)'
echo 'or elevenlabs ("pip install elevenlabs") with ffplay,'
echo 'or set MINIMAX_API_KEY and install ffplay for MiniMax TTS.'
echo '(See https://platform.minimax.io for a MiniMax API key)'
fi