#!/bin/bash
# izSynth - TTS/Text To Speech synthesizer, background music overlay assembler and audio file converter for PBX and Home Automation Systems
# Home Page: http://www.initzero.it/products/opensource/izsynth
# GitHub: https://github.com/ugoviti/izsynth
# Written by: Ugo Viti
#
# izSynth is a bash script running under Linux, written to automate the synthesis of voices used into izPBX System or for realtime TTS (Text To Speech) used into Home Automation solutions.
# It can use offline synthesis software like Loquendo (using the Wine environment), eSpeak, Festival, VoiceRSS, NaturalReaders, and other popular online web TTS services
# to synthesize audio voices from ASCII text files and automatically merging the audio with background music (mp3 and wav format are supported), adding silences and fade in and out.
#
# For INSTALL, LICENSE, README, HowTo, ChangeLog and Contributors go to the end of this script
###########################################################################################
# script variables
NAME="izsynth"
DESCRIPTION="TTS/Text To Speech synthesizer, background music overlay assembler and audio file converter for PBX and Home Automation Systems"
VERSION="5.0"
VERSION_DATE="20190213"
#####################################
## default suggested user variables to change
## you can put these variables into $HOME/.config/izsynth/izsynth.conf to override the script variables
# default TTS engine
TTS_ENGINE="naturalreaders"
# default TTS voice (null = let TTS engine to set default voice. it will use english)
TTS_VOICE="Peter"
# resynth the file if it already exist
OUTPUT_OVERWRITE="no"
# remove synthesized file after playback
OUTPUT_REMOVE="no"
# assemble and export converted PBX recordings
EXPORT_AUDIO="no"
# default audio formats
EXPORT_AUDIO_FORMATS="44khz.wav wav gsm ulaw alaw slin"
# split destination audio files into audio formats sub directories
EXPORT_AUDIO_FORMAT_SUBDIR="yes"
# playback instead generate PBX files
PLAYBACK="yes"
# play the file in background, otherwise foreground
PLAYBACK_BACKGROUND="yes"
# command used to play the synthesized audio file (mplayer example)
PLAYBACK_ENGINE="mplayer"
# default playback command options (mplayer example)
#PLAYBACK_ENGINE_OPTS="-quiet -nolirc -noconsolecontrols"
# default playback device (mplayer example)
#PLAYBACK_DEVICE="alsa"
# sound card hardware playback volume (use an integer from 0 to 100)
#PLAYBACK_VOLUME="30"
###########################################################################################
## system variables - edit with care
# default mandatory commands
COMMANDS="curl md5sum file iconv soxi sox lame"
# default base temp directory (comment if you want use the system default directory base, ex. /tmp)
TMP_DIR_BASE="/dev/shm"
# default redirect to tmp dir
OUT_DIR="$TMP_DIR_BASE"
# default TTS volume
#TTS_VOLUME="0.4"
# default wait n. seconds before speaking TTS
TTS_PAD_BEGIN="5"
# default wait n. seconds after speaked TTS
TTS_PAD_END="5"
# default start music after n. seconds
MUSIC_START="0"
# default fade the start and end of music with 5 seconds
MUSIC_FADE="5"
# default music volume
#MUSIC_VOLUME="0.1"
# detect HOME variable if the current shell doesn't set it
#[ -z "$HOME" ] && HOME="$(cd >/dev/null 2>&1; pwd)"
[ -z "$HOME" ] && HOME="$(pwd)"
# read external izsynth config file (used for service API and external variables override)
: ${IZSYNTH_CONFIG_DIR:="$HOME/.config/$NAME"}
IZSYNTH_CONFIG_FILE="$IZSYNTH_CONFIG_DIR/$NAME.conf"
# make default izsynth config directory if not exist
[ ! -e "$IZSYNTH_CONFIG_DIR/engines" ] && mkdir -p "$IZSYNTH_CONFIG_DIR/engines"
[ -e "$IZSYNTH_CONFIG_FILE" ] && . "$IZSYNTH_CONFIG_FILE"
## compatibility and deprecated variabiles
[ ! -z "$PLAYBACK_OVERWRITE" ] && OUTPUT_OVERWRITE="$PLAYBACK_OVERWRITE"
[ ! -z "$PLAYBACK_REMOVE" ] && OUTPUT_REMOVE="$PLAYBACK_REMOVE"
###########################################################################################
# main script - don't edit anything bellow
###########################################################################################
###########################################################################################
# START TTS ENGINES
# add others TTS engines here
tts_engine_ispeech() {
local ACTION="$1"
shift
local VOICE="$1"
shift
local IN="$1"
shift
local OUT="$1"
case $ACTION in
description)
echo "2017-09-09 Remote iSpeech YES Custom http://www.ispeech.org"
;;
help)
echo "Get the iSpeech API KEY from: https://www.ispeech.org/developers"
echo "and put the following variable info '$IZSYNTH_CONFIG_FILE' config file:"
echo
echo "TTS_ENGINE_ISPEECH_APIKEY=\"developerdemokeydeveloperdemokey\""
;;
env)
# default output file format
TTS_EXT="mp3"
# default max allowed chars
TTS_LENGTH="150"
# default TTS voice
[ -z "$TTS_LANGUAGE" ] && TTS_LANGUAGE="English"
[ -z "$TTS_VOICE" ] && TTS_VOICE="English"
;;
commands)
echo "curl"
;;
voices)
# extractor command:
# curl -s http://www.ispeech.org/html/tts-demo.php | grep "\([^"]\+\)<\/a.*/\1;\2/p' | sort -t':' -k2 | column -s ";" -c1 -t
# extracted on: 2017-09-09
echo "Voice Language"
echo "-------------------- ---------------------"
echo "arabicmale Arabic Male
auenglishfemale Australian English Female
brportuguesefemale Brazilian Portuguese Female
caenglishfemale Canadian English Female
cafrenchfemale Canadian French Female
cafrenchmale Canadian French Male
chchinesefemale Chinese Female
eurcatalanfemale European Catalan Female
eurczechfemale European Czech Female
eurdanishfemale European Danish Female
eurdutchfemale European Dutch Female
eurfinnishfemale European Finnish Female
eurfrenchfemale European French Female
eurfrenchmale European French Male
eurgermanfemale European German Female
eurgermanmale European German Male
eurgreekfemale European Greek Female
euritalianfemale European Italian Female
euritalianmale European Italian Male
eurnorwegianfemale European Norweigian Female
eurpolishfemale European Polish Female
eurportuguesefemale European Portuguese Female
eurportuguesemale European Portuguese Male
eurspanishfemale European Spanish Female
eurspanishmale European Spanish Male
eurturkishfemale European Turkish Female
eurturkishmale European Turkish Male
hkchinesefemale Hong Kong Catonese Female
huhungarianfemale Hungarian Female
jpjapanesefemale Japanese Female
krkoreanfemale Korean Female
rurussianfemale Russian Female
rurussianmale Russian Male
swswedishfemale Swedish Female
twchinesefemale Taiwan Chinese Female
ukenglishfemale UK English Female
ukenglishmale UK English Male
usenglishfemale US English Female
usenglishmale US English Male
usspanishfemale US Spanish Female
usspanishmale US Spanish Male"
;;
voice)
eval ${FUNCNAME} voices 2>&1 | grep -i -F -w -- "$VOICE" | head -n1 | awk '{print $1}'
;;
synthesize)
local TTS_STRING_CONTENT="$(urlencode "$(cat "$IN" | parse_tts_string)")"
local TTS_VOICE="$(eval ${FUNCNAME} voice $VOICE)"
[ -z "$TTS_ENGINE_ISPEECH_APIKEY" ] && echo && print_help_config && exit 1
curl -s "http://api.ispeech.org/api/rest?apikey=$TTS_ENGINE_ISPEECH_APIKEY&action=convert&voice=$TTS_VOICE&text=$TTS_STRING_CONTENT" -o "$OUT"
# old from demo area
#curl -s "https://www.ispeech.org/p/generic/getaudio?text=$TTS_STRING_CONTENT&voice=$TTS_VOICE&speed=0&action=convert" -o "$OUT"
;;
*)
echo "${FUNCNAME}: wrong method called - abort"
exit 1
;;
esac
}
tts_engine_google() {
local ACTION="$1"
shift
local VOICE="$1"
shift
local IN="$1"
shift
local OUT="$1"
case $ACTION in
description)
echo "2017-09-09 Remote GoogleVoice NO Google http://translate.google.com"
;;
help)
echo "this engine is limited to 160 chars max"
;;
env)
# default output file format
TTS_EXT="mp3"
# default max allowed chars
TTS_LENGTH="150"
# default TTS voice
[ -z "$TTS_VOICE" ] && TTS_VOICE="en"
;;
commands)
echo "curl"
;;
voices)
# extractor command:
# curl -s "https://cloud.google.com/speech/docs/languages" | grep -e ^"/,+1 d' | sed ':a;N;$!ba;s/<\/td>\n/<\/td>/g' | sed -e 's/<[^>]*>/;/g' | awk -F";" '{print $2";"$4}' | sort | column -s ";" -c1 -t
# extracted on: 2017-09-08
echo "af-ZA Afrikaans (South Africa)
am-ET Amharic (Ethiopia)
ar-AE Arabic (United Arab Emirates)
ar-BH Arabic (Bahrain)
ar-DZ Arabic (Algeria)
ar-EG Arabic (Egypt)
ar-IL Arabic (Israel)
ar-IQ Arabic (Iraq)
ar-JO Arabic (Jordan)
ar-KW Arabic (Kuwait)
ar-LB Arabic (Lebanon)
ar-MA Arabic (Morocco)
ar-OM Arabic (Oman)
ar-PS Arabic (State of Palestine)
ar-QA Arabic (Qatar)
ar-SA Arabic (Saudi Arabia)
ar-TN Arabic (Tunisia)
az-AZ Azerbaijani (Azerbaijan)
bg-BG Bulgarian (Bulgaria)
bn-BD Bengali (Bangladesh)
bn-IN Bengali (India)
ca-ES Catalan (Spain)
cmn-Hans-CN Chinese, Mandarin (Simplified, China)
cmn-Hans-HK Chinese, Mandarin (Simplified, Hong Kong)
cmn-Hant-TW Chinese, Mandarin (Traditional, Taiwan)
cs-CZ Czech (Czech Republic)
da-DK Danish (Denmark)
de-DE German (Germany)
el-GR Greek (Greece)
en-AU English (Australia)
en-CA English (Canada)
en-GB English (United Kingdom)
en-GH English (Ghana)
en-IE English (Ireland)
en-IN English (India)
en-KE English (Kenya)
en-NG English (Nigeria)
en-NZ English (New Zealand)
en-PH English (Philippines)
en-TZ English (Tanzania)
en-US English (United States)
en-ZA English (South Africa)
es-AR Spanish (Argentina)
es-BO Spanish (Bolivia)
es-CL Spanish (Chile)
es-CO Spanish (Colombia)
es-CR Spanish (Costa Rica)
es-DO Spanish (Dominican Republic)
es-EC Spanish (Ecuador)
es-ES Spanish (Spain)
es-GT Spanish (Guatemala)
es-HN Spanish (Honduras)
es-MX Spanish (Mexico)
es-NI Spanish (Nicaragua)
es-PA Spanish (Panama)
es-PE Spanish (Peru)
es-PR Spanish (Puerto Rico)
es-PY Spanish (Paraguay)
es-SV Spanish (El Salvador)
es-US Spanish (United States)
es-UY Spanish (Uruguay)
es-VE Spanish (Venezuela)
eu-ES Basque (Spain)
fa-IR Persian (Iran)
fi-FI Finnish (Finland)
fil-PH Filipino (Philippines)
fr-CA French (Canada)
fr-FR French (France)
gl-ES Galician (Spain)
gu-IN Gujarati (India)
he-IL Hebrew (Israel)
hi-IN Hindi (India)
hr-HR Croatian (Croatia)
hu-HU Hungarian (Hungary)
hy-AM Armenian (Armenia)
id-ID Indonesian (Indonesia)
is-IS Icelandic (Iceland)
it-IT Italian (Italy)
ja-JP Japanese (Japan)
jv-ID Javanese (Indonesia)
ka-GE Georgian (Georgia)
km-KH Khmer (Cambodia)
kn-IN Kannada (India)
ko-KR Korean (South Korea)
lo-LA Lao (Laos)
lt-LT Lithuanian (Lithuania)
lv-LV Latvian (Latvia)
ml-IN Malayalam (India)
mr-IN Marathi (India)
ms-MY Malay (Malaysia)
nb-NO Norwegian Bokmål (Norway)
ne-NP Nepali (Nepal)
nl-NL Dutch (Netherlands)
pl-PL Polish (Poland)
pt-BR Portuguese (Brazil)
pt-PT Portuguese (Portugal)
ro-RO Romanian (Romania)
ru-RU Russian (Russia)
si-LK Sinhala (Srilanka)
sk-SK Slovak (Slovakia)
sl-SI Slovenian (Slovenia)
sr-RS Serbian (Serbia)
su-ID Sundanese (Indonesia)
sv-SE Swedish (Sweden)
sw-KE Swahili (Kenya)
sw-TZ Swahili (Tanzania)
ta-IN Tamil (India)
ta-LK Tamil (Sri Lanka)
ta-MY Tamil (Malaysia)
ta-SG Tamil (Singapore)
te-IN Telugu (India)
th-TH Thai (Thailand)
tr-TR Turkish (Turkey)
uk-UA Ukrainian (Ukraine)
ur-IN Urdu (India)
ur-PK Urdu (Pakistan)
vi-VN Vietnamese (Vietnam)
yue-Hant-HK Chinese, Cantonese (Traditional, Hong Kong)
zu-ZA Zulu (South Africa)"
;;
voice)
eval ${FUNCNAME} voices 2>&1 | grep -i -F -w -- "$VOICE" | head -n1 | awk '{print $1}'
;;
synthesize)
local TTS_STRING_CONTENT="$(urlencode "$(cat "$IN" | parse_tts_string)")"
local TTS_VOICE="$(eval ${FUNCNAME} voice $VOICE)"
curl -s -G "http://translate.google.com/translate_tts?ie=UTF-8&total=1&idx=0&client=tw-ob&&tl=$TTS_VOICE&q=$TTS_STRING_CONTENT" -A "Mozilla" --compressed -o "$OUT"
;;
*)
echo "${FUNCNAME}: wrong method called - abort"
exit 1
;;
esac
}
tts_engine_voicerss() {
local ACTION="$1"
shift
local VOICE="$1"
shift
local IN="$1"
shift
local OUT="$1"
case $ACTION in
description)
echo "2016-04-18 Remote VoiceRSS YES Custom http://www.voicerss.org"
;;
help)
echo "Get the VoiceRSS API KEY from: http://www.voicerss.org/personel"
echo "and put the following variable info '$IZSYNTH_CONFIG_FILE' config file:"
echo
echo "TTS_ENGINE_VOICERSS_APIKEY=\"1234567890abcdefghilmnopqtwxyz\""
;;
env)
# default output file format
TTS_EXT="mp3"
# default TTS voice
[ -z "$TTS_LANGUAGE" ] && TTS_LANGUAGE="English"
[ -z "$TTS_VOICE" ] && TTS_VOICE="English"
;;
commands)
echo "curl"
;;
voices)
# extractor command:
# curl -s "http://www.voicerss.org/api/demo.aspx" | grep "option value=" | grep -v "value=\"\"" | awk -F \" '{print $2 $3}' | awk -F "[><]" '{print $1" "$2}'
# extracted on: 2017-09-08
echo "ca-es Catalan
zh-cn Chinese (China)
zh-hk Chinese (Hong Kong)
zh-tw Chinese (Taiwan)
da-dk Danish
nl-nl Dutch
en-au English (Australia)
en-ca English (Canada)
en-gb English (Great Britain)
en-in English (India)
en-us English (United States)
fi-fi Finnish
fr-ca French (Canada)
fr-fr French (France)
de-de German
it-it Italian
ja-jp Japanese
ko-kr Korean
nb-no Norwegian
pl-pl Polish
pt-br Portuguese (Brazil)
pt-pt Portuguese (Portugal)
ru-ru Russian
es-mx Spanish (Mexico)
es-es Spanish (Spain)
sv-se Swedish (Sweden)"
;;
voice)
eval ${FUNCNAME} voices 2>&1 | grep -i -F -w -- "$VOICE" | head -n1 | awk '{print $1}'
;;
synthesize)
local TTS_STRING_CONTENT="$(urlencode "$(cat "$IN" | parse_tts_string)")"
local TTS_VOICE="$(eval ${FUNCNAME} voice $VOICE)"
[ -z "$TTS_ENGINE_VOICERSS_APIKEY" ] && echo && print_help_config && exit 1
curl -s "http://api.voicerss.org/?key=$TTS_ENGINE_VOICERSS_APIKEY&src=$TTS_STRING_CONTENT&f=44khz_16bit_mono&hl=$TTS_VOICE" -o "$OUT"
;;
*)
echo "${FUNCNAME}: wrong method called - abort"
exit 1
;;
esac
}
tts_engine_naturalreaders() {
local ACTION="$1"
shift
local VOICE="$1"
shift
local IN="$1"
shift
local OUT="$1"
case $ACTION in
description)
echo "2019-02-13 Remote NaturalReader NO Custom http://www.naturalreaders.com"
;;
help)
echo "No help available for $TTS_ENGINE"
;;
env)
# default output file format
TTS_EXT="mp3"
# default TTS voice
[ -z "$TTS_VOICE" ] && TTS_VOICE="en"
;;
commands)
echo "curl"
;;
voices)
# extractor command:
## setup
# npm -g install js-beautify
## extract voices list
# curl -s https://www.naturalreaders.com/online/main.9dc28aee981ea86ef48d.js | js-beautify | grep "return this.premiumVoices.length > 0 ? this.premiumVoices : (e = new Pt(" | sed 's/return this.premiumVoices.length > 0 ? this.premiumVoices : (e = new Pt(//' | sed 's/), this.premiumVoices.push(e), e = new Pt(/\n/g' | sed 's/), this.premiumVoices.push(e), this.premiumVoices);//' | sed 's/"//g' | awk -F, '{print $2" "$1" "$3" "$4}' | sort | column -c1 -t
# extracted on 2019-02-13
echo "de-DE Bertha mac 11
de-DE Jakob mac 12
de-DE Klara att 6
de-DE Klaus aca 7
de-DE Reiner att 5
de-DE Sarah aca 14
en-GB Audrey att 3
en-GB Charles att 2
en-GB Darren mac 3
en-GB Graham aca 4
en-GB Peter aca 10
en-GB Rachel aca 11
en-GB Selene mac 4
en-US Amanda mac 1
en-US Mike att 1
en-US Rod aca 20
en-US Ryan aca 12
en-US Sharon aca 21
en-US Susan mac 2
en-US Tim mac 0
en-US Tracy aca 16
es-CA Joaquin mac 8
es-ES Alberto att 19
es-ES Paula mac 7
es-ES Rosa att 20
es-MX Camila mac 6
es-MX Diego mac 5
fr-CA Emmanuel mac 9
fr-CA Louice aca 22
fr-CA Marie mac 10
fr-FR Alain att 7
fr-FR Alice aca 0
fr-FR Bruno aca 1
fr-FR Juliette att 8
it-IT Chiara aca 2
it-IT Mario mac 13
it-IT Valentina mac 14
it-IT Vittorio aca 15
nl-NL Anika mac 21
nl-NL Markus mac 22
pt-BR Renata mac 16
pt-PT Andrea mac 18
pt-PT Celia aca 23
pt-PT Julieta mac 17
sv-SE Emma aca 24
sv-SE Erik aca 25
sv-SE Gus mac 20
sv-SE Maja mac 19"
;;
voice)
eval ${FUNCNAME} voices 2>&1 | grep -i -F -w -- "$VOICE" | head -n1 | tr -d [:cntrl:] | awk '{print $4}'
;;
synthesize)
#local TTS_STRING_CONTENT="$(urlencode "$(cat "$IN" | parse_tts_string)")"
local TTS_STRING_CONTENT="$(cat "$IN" | parse_tts_string)"
local TTS_SPEED=1
local TTS_VOICE="$(eval ${FUNCNAME} voice $VOICE)"
local TTS_TYPE="$(eval ${FUNCNAME} voices 2>&1 | grep -i -F -w -- "$VOICE" | head -n1 | tr -d [:cntrl:] | awk '{print $3}')"
case $TTS_TYPE in
aca|att|mac) curl -s "https://kfiuqykx63.execute-api.us-east-1.amazonaws.com/Dev/tts?r=${TTS_VOICE}&s=${TTS_SPEED}&l=0&v=${TTS_TYPE}" --data "{\"t\":\"$TTS_STRING_CONTENT\"}" -o "$OUT" ;;
# old 2017-09-08 release
#aca|att) curl -s -G "http://api.naturalreaders.com/v0/tts/?t=$TTS_STRING_CONTENT&r=$TTS_VOICE&s=$TTS_SPEED" -o "$OUT" ;;
#mac) curl -s -G "http://api.naturalreaders.com/v4/tts/macspeak?apikey=b98x9xlfs54ws4k0wc0o8g4gwc0w8ss&src=pw&r=$TTS_VOICE&s=$TTS_SPEED&t=$TTS_STRING_CONTENT" -o "$OUT" ;;
esac
# deprecated old method
#local REQUESTTOKEN="$(curl -s -G "http://api.naturalreaders.com/v2/auth/requesttoken?appid=pelc790w2bx&appsecret=2ma3jkhafcyscswg8wgk00w0kwsog4s" | awk -F":" '{print $3}' | tr -d [:punct:])"
#curl -s -G "http://api.naturalreaders.com/v2/tts/?t=$TTS_STRING_CONTENT&r=$TTS_VOICE&s=$TTS_SPEED&requesttoken=$REQUESTTOKEN" -o "$OUT"
;;
*)
echo "${FUNCNAME}: wrong method called - abort"
exit 1
;;
esac
}
tts_engine_fromtexttospeech() {
local ACTION="$1"
shift
local VOICE="$1"
shift
local IN="$1"
shift
local OUT="$1"
case $ACTION in
description)
echo "2015-12-19 Remote FromtextToSpeech NO IVONA http://www.fromtexttospeech.com"
;;
help)
echo "No help available for $TTS_ENGINE"
;;
env)
# default output file format
TTS_EXT="mp3"
# default TTS voice
[ -z "$TTS_LANGUAGE" ] && TTS_LANGUAGE="English"
[ -z "$TTS_VOICE" ] && TTS_VOICE="English"
;;
commands)
echo "curl"
;;
voices)
# extractor command:
# curl -s http://www.fromtexttospeech.com | grep "voice_indexes\['" | awk -F = '{print $2}' | sed '/^$/d' | sed "s/ '//" | sed "s/';//"
# extracted on 2017-09-08
echo "IVONA Amy22 (UK English)
IVONA Brian22 (UK English)
IVONA CΘline22 (French)
IVONA Mathieu22 (French)
IVONA Marlene22 (German)
IVONA Hans22 (German)
IVONA Tatyana22 (Russian)
IVONA Eric22
IVONA Jennifer22
IVONA Joey22
IVONA Kimberly22
IVONA Salli22
IVONA Carla22 (Italian)
IVONA Giorgio22 (Italian)
IVONA Conchita22 (Spanish [Modern])
IVONA Enrique22 (Spanish [Modern])
IVONA Cristiano22 (Portuguese)"
;;
voice)
eval ${FUNCNAME} voices 2>&1 | grep -i -F -w -- "$VOICE" | head -n1 | tr -d [:cntrl:] | awk -F: '{print $1}'
;;
synthesize)
local TTS_STRING_CONTENT="$(cat $IN)"
local TTS_SPEED=0
local TTS_LANGUAGE="$(eval ${FUNCNAME} voice $VOICE | awk -F "[()]" '{ for (i=2; i]*\)'.*/\1/p")"
curl -s $ENGINE_URL$OUTPUT_FIlE -o "$OUT"
;;
*)
echo "${FUNCNAME}: wrong method called - abort"
exit 1
;;
esac
}
tts_engine_festival() {
local ACTION="$1"
shift
local VOICE="$1"
shift
local IN="$1"
shift
local OUT="$1"
case $ACTION in
description)
echo "2015-12-19 Local Festival NO OpenSource http://www.cstr.ed.ac.uk/projects/festival"
;;
help)
echo "No help available for $TTS_ENGINE"
;;
env)
# default output file format
TTS_EXT="wav"
# default TTS voice
[ -z "$TTS_VOICE" ] && TTS_VOICE="nitech_us_slt_arctic_hts"
;;
commands)
echo "text2wave"
;;
voices)
echo "festival voices list not supported"
echo "run 'festival' command and type: (voice.list)"
echo "CTRL-D to exit"
echo "configure the desired language into $HOME/.festivalrc"
;;
voice)
#eval ${FUNCNAME} voices 2>&1 | grep -w "^$VOICE$"
echo nitech_us_slt_arctic_hts
;;
synthesize)
cat "$IN" | text2wave -o "$OUT"
;;
*)
echo "${FUNCNAME}: wrong method called - abort"
exit 1
;;
esac
}
tts_engine_espeak() {
local ACTION="$1"
shift
local VOICE="$1"
shift
local IN="$1"
shift
local OUT="$1"
case $ACTION in
description)
echo "2015-12-19 Local eSpeak NO OpenSource http://espeak.sourceforge.net"
;;
help)
echo "No help available for $TTS_ENGINE"
;;
env)
# default output file format
TTS_EXT="wav"
# default TTS voice
[ -z "$TTS_VOICE" ] && TTS_VOICE="en"
;;
commands)
echo "espeak"
;;
voices)
espeak --voices | awk '{print $2}' | grep -v ^Language$
;;
voice)
eval ${FUNCNAME} voices 2>&1 | grep -w -- "^$VOICE$"
;;
synthesize)
espeak -w $OUT -v $TTS_VOICE -f $IN
;;
*)
echo "${FUNCNAME}: wrong method called - abort"
exit 1
;;
esac
}
tts_engine_sapi5() {
local ACTION="$1"
shift
local VOICE="$1"
shift
local IN="$1"
shift
local OUT="$1"
case $ACTION in
description)
echo "2015-12-19 Local SAPI5 NO Microsoft http://www.microsoft.com/en-us/download/details.aspx?id=10121"
;;
help)
echo "To use $TTS_ENGINE you must install:"
echo "- wine (to run Windows applications inside Linux) from http://www.winehq.org/"
echo "- Microsoft-English-TTS-51.msi (SAPI5 Speech SDK 5.1) from https://www.microsoft.com/en-us/download/details.aspx?id=10121"
echo "- sapi2wav.exe (Synth and save TTS wav files using Windows SAPI library) from https://gitlab.mister-muffin.de/josch/novel2audio/raw/master/sapi2wav.exe"
echo "- Buy and install commercial TTS engines for Windows SAPI environment, like Loquendo and IVONA voices"
;;
env)
# default output file format
TTS_EXT="wav"
# default TTS voice
[ -z "$TTS_VOICE" ] && TTS_VOICE="Giulia"
;;
commands)
echo "sapi2wav.exe"
;;
voices)
sapi2wav.exe -list 2>&1 | grep -v "wineboot.exe"
;;
voice)
eval ${FUNCNAME} voices 2>&1 | grep -i -F -w -- "$VOICE" | head -n1 | tr -d [:cntrl:] | awk -F: '{print $1}'
;;
synthesize)
# convert utf-8 text file to iso-8859-15
local IN_FILE_ISO8859="$TMP_DIR/$(basename "$IN")"
iconv --to-code ISO-8859-15 "$IN" -o "$IN_ISO8859"
sapi2wav.exe "$OUT" "$(eval ${FUNCNAME} voice $VOICE)" -f "$IN_ISO8859" 2>&1 | grep -v "wineboot.exe"
;;
*)
echo "${FUNCNAME}: wrong method called - abort"
exit 1
;;
esac
}
# END TTS ENGINES
###########################################################################################
###########################################################################################
# START PLAYBACK ENGINES
# add others PLAYBACK engines here
playback_engine_generic() {
local ACTION="$1"
shift
local IN="$1"
case $ACTION in
description)
echo "2018-04-08 Generic Playback Engine"
;;
help)
echo "Generic Playback Engine: $PLAYBACK_COMMAND"
echo "No support for default playback device, volume, options"
;;
env)
# set default options
#[ -z "$PLAYBACK_ENGINE_OPTS" ] && PLAYBACK_ENGINE_OPTS+=""
# change playback device. default: do not change playback device (suggested: alsa)
#[ -z "$PLAYBACK_DEVICE" ] || PLAYBACK_ENGINE_OPTS+=""
# change playback volume. default: do not change output volume
#[ -z "$PLAYBACK_VOLUME" ] || PLAYBACK_ENGINE_OPTS+=""
;;
commands)
echo "$PLAYBACK_COMMAND"
;;
play)
$PLAYBACK_COMMAND $PLAYBACK_ENGINE_OPTS "$IN"
;;
*)
echo "${FUNCNAME}: wrong method called - abort"
exit 1
;;
esac
}
playback_engine_mplayer() {
local ACTION="$1"
shift
local IN="$1"
PLAYBACK_COMMAND="mplayer"
case $ACTION in
description)
echo "2016-12-18 MPlayer http://www.mplayerhq.hu/"
;;
help)
echo "MPlayer - The Movie Player"
echo "MPlayer is a movie player which runs on many systems (see the documentation). It plays most MPEG/VOB, AVI, Ogg/OGM, VIVO, ASF/WMA/WMV, QT/MOV/MP4, RealMedia, Matroska, NUT, NuppelVideo, FLI, YUV4MPEG, FILM, RoQ, PVA files, supported by many native, XAnim, and Win32 DLL codecs. You can watch VideoCD, SVCD, DVD, 3ivx, DivX 3/4/5, WMV and even H.264 movies."
;;
env)
# set default options
[ -z "$PLAYBACK_ENGINE_OPTS" ] && PLAYBACK_ENGINE_OPTS+=" -quiet -nolirc -noconsolecontrols -afm mp3lib,ffmpeg"
# change playback device. default: do not change playback device (suggested: alsa)
[ -z "$PLAYBACK_DEVICE" ] || PLAYBACK_ENGINE_OPTS+=" -ao $PLAYBACK_DEVICE"
# change playback volume. default: do not change output volume
[ -z "$PLAYBACK_VOLUME" ] || PLAYBACK_ENGINE_OPTS+=" -volume $PLAYBACK_VOLUME"
;;
commands)
echo "$PLAYBACK_COMMAND"
;;
play)
$PLAYBACK_COMMAND $PLAYBACK_ENGINE_OPTS "$IN"
;;
*)
echo "${FUNCNAME}: wrong method called - abort"
exit 1
;;
esac
}
# END PLAYBACK ENGINES
###########################################################################################
write_default_config() {
echo "# ENGINE SPECIFIC API KEYS
#TTS_ENGINE_VOICERSS_APIKEY=\"$TTS_ENGINE_VOICERSS_APIKEY\"
#TTS_ENGINE_ISPEECH_APIKEY=\"$TTS_ENGINE_ISPEECH_APIKEY\"
# default tts engine
TTS_ENGINE=\"$TTS_ENGINE\"
# default tts voice (null = let tts engine to set default voice. it will use english)
TTS_VOICE=\"$TTS_VOICE\"
# resynth the file if it already exist
OUTPUT_OVERWRITE=\"$OUTPUT_OVERWRITE\"
# remove synthesized file after playback
OUTPUT_REMOVE=\"$OUTPUT_REMOVE\"
# assemble and export converted PBX recordings
EXPORT_AUDIO=\"$EXPORT_AUDIO\"
# assemble and export to the following audio formats
EXPORT_AUDIO_FORMATS=\"$EXPORT_AUDIO_FORMATS\"
# split destination audio files into audio formats sub directories
EXPORT_AUDIO_FORMAT_SUBDIR=\"$EXPORT_AUDIO_FORMAT_SUBDIR\"
# realtime audio playback variables
PLAYBACK=\"$PLAYBACK\"
# play the file in background, otherwise foreground
PLAYBACK_BACKGROUND=\"$PLAYBACK_BACKGROUND\"
# command used to play the synthesized audio file
PLAYBACK_ENGINE=\"$PLAYBACK_ENGINE\"
# default playback command options
PLAYBACK_ENGINE_OPTS=\"$PLAYBACK_ENGINE_OPTS\"
# default playback device
PLAYBACK_DEVICE=\"$PLAYBACK_DEVICE\"
# sound card playback volume
PLAYBACK_VOLUME=\"$PLAYBACK_VOLUME\"
# default base temp directory (comment if you want use the system default directory base, ex. /tmp)
TMP_DIR_BASE=\"$TMP_DIR_BASE\"
# default redirect to tmp dir
OUT_DIR=\"$TMP_DIR_BASE\"
# default tts volume
TTS_VOLUME=\"$TTS_VOLUME\"
# default wait n. seconds before speaking tts
TTS_PAD_BEGIN=\"$TTS_PAD_BEGIN\"
# default wait n. seconds after speaked tts
TTS_PAD_END=\"$TTS_PAD_END\"
# default start music after n. seconds
MUSIC_START=\"$MUSIC_START\"
# default fade the start and end of music with 5 seconds
MUSIC_FADE=\"$MUSIC_FADE\"
# default music volume
MUSIC_VOLUME=\"$MUSIC_VOLUME\"
" > "$IZSYNTH_CONFIG_FILE"
}
# load all external TTS engines
tts_engines_load() {
for engine in $(find "$IZSYNTH_CONFIG_DIR/engines/" -type f -name "*.conf") ; do . $engine ; done
}
# list all supported TTS engines
tts_engines_list(){
print_tts_engines_table() {
echo "Engine Version Type Provider APIKEY Technology HomePage"
echo "---------------- ---------- ------ ---------------- ------ ---------- --------"
tts_engines_load
typeset -f | awk '/ \(\) $/ && !/^main / {print $1}' | grep "^tts_engine_" | sed 's/^tts_engine_//g' | while read engine
do
echo -n -e "$engine $(tts_engine_$engine description)\n"
done
}
print_tts_engines_table | column -c1 -t
}
# verify if the specified TTS engine exist
tts_engines_check() {
if [ -z "$1" ] || [ -z "$(tts_engines_list | grep -w "$1")" ]
then
echo "ERROR: wrong TTS engine synthesizer specified: '$1'"
echo
echo "please select an engine from the following list:"
echo
tts_engines_list
exit 1
fi
}
tts_engines_check_voice() {
if [ -z "$2" ] || [ -z "$(tts_engine_$1 voice $2)" ]
then
echo "ERROR: the TTS voice '$2' doesn't exist, or is not supported by the engine '$1'"
echo
echo "please select a voice from the following list:"
echo
tts_engine_$1 voices
exit 1
fi
}
# list all supported playback engines
playback_engines_list(){
print_playback_engines_table() {
echo "Engine Version Name HomePage"
echo "---------------- ---------- ---------------- --------"
typeset -f | awk '/ \(\) $/ && !/^main / {print $1}' | grep "^playback_engine_" | sed 's/^playback_engine_//g' | while read engine
do
echo -n -e "$engine $(playback_engine_$engine description)\n"
done
}
print_playback_engines_table | column -c1 -t
}
# verify if the specified playback engine exist
playback_engines_check() {
if [ -z "$1" ] || [ -z "$(playback_engines_list | grep -w "$1")" ]
then
#echo "INFO: using generic engine for playback with command: '$1'"
PLAYBACK_COMMAND="$PLAYBACK_ENGINE"
PLAYBACK_ENGINE="generic"
#echo "ERROR: wrong PLAYBACK engine specified: '$1'"
#echo
#echo "please select an engine from the following list:"
#echo
#playback_engines_list
#exit 1
fi
}
# urlencode
urlencode() {
local LANG=C
local length="${#1}"
for (( i = 0; i < length; i++ )); do
local c="${1:i:1}"
case $c in
[a-zA-Z0-9.~_-]) printf "$c" ;;
*) printf '%%%02X' "'$c" ;;
esac
done
}
# remove all new lines, double white spaces and ending spaces to avoid problems when encoding url
parse_tts_string() {
#tr '\n' ' ' | sed 's/ \+/ /g' | sed 's/^[ \t]*//;s/[ \t]*$//'
#tr '\n' ' ' | sed 's/ \+/ /g' | sed 's/^[ \t]*//;s/[ \t]*$//' | sed 's/ *\([[:punct:]]\) */\1 /g'
tr '\n' ' ' | sed 's/ \+/ /g' | sed 's/ *\([,;:.]\) */\1 /g' | sed 's/^[ \t]*//;s/[ \t]*$//'
}
# because some engines allow only limited string length, we must count how many chars we have and pass only the max allowed to the engine
count_chars() {
tr -d '[:space:]' | wc -c
}
print_help_config() {
echo "ERROR: Wrong or missing $NAME user config file.
Step 1: Create this file: $IZSYNTH_CONFIG_FILE (use the command $0 -C)
Step 2: Modify variables values you want to override from the default
Step 3: $TTS_ENGINE engine specific config and variables:
"
tts_engine_$TTS_ENGINE help
}
confirm () {
# call with a prompt string or use a default
read -r -p "${1:-Are you sure? [y/N]} " -n 1 response
case $response in
[yY][eE][sS]|[yY])
echo
true
;;
*)
echo
false
;;
esac
}
# print orizontal line with the given length
rulem() {
if [ $# -eq 0 ]; then
echo "Usage: rulem MESSAGE WIDTH [RULE_CHARACTER]"
return 1
fi
# Fill line with ruler character ($3, default "-"), reset cursor, move 2 cols right, print message
if [ -z "$3" ] ; then
printf -v _hr "%*s" $(tput cols) && echo -en ${_hr// /${2--}} && echo -e "\r\033[2C$1"
else
printf -v _hr "%$3s" && echo -en ${_hr// /${2--}} && echo -e "\r\033[2C$1"
fi
}
# verify if the given variable is an array
is_array() {
[[ "$(declare -p $1 2>/dev/null)" =~ "declare -a" ]]
}
# show mime type of a file (long version)
show_mime() {
file --mime-type -b "$@"
}
# show mime type of a file (brief version)
show_mime_brief() {
file --mime-type -b "$@" | awk -F"/" '{print $1}'
}
# show size of a file
show_size() {
du -sh "$@" 2>/dev/null | awk '{print $1}'
}
# the name tell all :)
synthesize_file() {
local IN="$1"
shift
local OUT="$1"
# tts_engine_* destination_file
echo -e -n "Synthesizing - input:[$IN] engine:[${TTS_ENGINE[$ID]}] voice:["${TTS_VOICE[$ID]}"] output:[$OUT] "
tts_engine_${TTS_ENGINE[$ID]} synthesize "${TTS_VOICE[$ID]}" "$IN" "$OUT"
local RETVAL=$?
echo -e -n "size:[$(show_size "$OUT")] type:[$(show_mime "$OUT")] result:"
#if [[ $RETVAL = 0 && "$(show_mime_brief "$OUT")" == "audio" ]]; then echo -e -n "[SUCCESS]\n" ; else echo -e -n "[ERROR]\n" ; exit 1; fi
# workaround for some providers that doesn't set to mime type correctly
if [[ $RETVAL = 0 ]] && [[ "$(show_mime_brief "$OUT")" == "audio" || "$(show_mime_brief "$OUT")" == "application" ]]; then echo -e -n "[SUCCESS]\n" ; else echo -e -n "[ERROR]\n" ; exit 1; fi
}
# playback the synthesized file
playback_file () {
local IN="$1"
# verify if the given PLAYBACK_ENGINE engine exist
playback_engines_check $PLAYBACK_ENGINE
# load the playback_engine env
playback_engine_$PLAYBACK_ENGINE env
# if we must remove the synthesized file, of course we can't remove it if PLAYBACK_BACKGROUND=yes
[ "$OUTPUT_REMOVE" = "yes" ] && PLAYBACK_BACKGROUND="no"
echo "Playing - input:[$IN] command:[$PLAYBACK_COMMAND] options:[$PLAYBACK_ENGINE_OPTS]"
if [ "$PLAYBACK_BACKGROUND" = "yes" ]
then
# do not block the command execution
#$PLAYBACK_ENGINE $PLAYBACK_ENGINE_OPTS "$IN" 1>/dev/null &
playback_engine_$PLAYBACK_ENGINE play "$IN" 1>/dev/null &
else
# wait playback to finish before exit from izsynth
#$PLAYBACK_ENGINE $PLAYBACK_ENGINE_OPTS "$IN" 1>/dev/null
playback_engine_$PLAYBACK_ENGINE play "$IN" 1>/dev/null
fi
}
# save the file in the output directory
save_file() {
local IN="$1"
shift
local OUT="$1"
echo "Saving - input:[$IN] size:[$(show_size "$IN")] output:[$OUT]"
cp -f "$IN" "$OUT"
}
# convert the input file to wav format
convert_file() {
local IN="$1"
shift
local OUT="$1"
case "$(print_ext $IN)" in
mp3)
echo -e -n "Converting - input:[$IN] input_size:[$(show_size "$IN")] "
lame --quiet --resample 44.1 --decode "$IN" "$OUT" 2>&1 | grep -v "Can't step back"
echo -e -n "output:[$OUT] output_size:[$(show_size "$OUT")]\n"
# let the converted wav file the input file
IN="$OUT"
;;
esac
#debug
# normalize the converted file
# calc the sample rate of input audio file and convert the input audio file to the best usable sample rate (44100Hz) before processing and mixing with background music
local IN_SR="$(soxi -r $IN)"
if [ "$IN_SR" != 44100 ]
then
# change the destination file because become 44100hz frequency
OUT="$(print_path $OUT)/$(print_name $OUT).normalized.44khz.wav"
# convert to wav 44100 hz the wav file
sox $SOX_OPTS "$IN" -r 44100 "$OUT"
local RETVAL=$?
local OUT_SR="$(soxi -r $OUT)"
echo "Normalizing - input:[$IN] input_size:[$(show_size "$IN")] input_rate:[$IN_SR] output_size:[$(show_size "$OUT")] output_rate:[$OUT_SR]"
# replace the original 22.5khz with the 44.1khz version
[ $RETVAL = 0 ] && mv -f "$OUT" "$IN"
elif [[ ! -e "$OUT" || "$IN" != "$OUT" ]]
then
cp -a "$IN" "$OUT"
fi
}
# convert the input file to the asterisk pbx supported formats
export_file() {
local IN="$1"
shift
local OUT_DIR="$1"
local AUDIO_FORMAT
local SOX_OPTS_FORMAT
for AUDIO_FORMAT in $EXPORT_AUDIO_FORMATS
do
# set the destination pbx file name
if [ "$EXPORT_AUDIO_FORMAT_SUBDIR" = "yes" ]
then
mkdir -p "$OUT_DIR/$AUDIO_FORMAT"
local OUT="$OUT_DIR/$AUDIO_FORMAT/$(print_name $IN).$AUDIO_FORMAT"
else
local OUT="$OUT_DIR/$(print_name $IN).$AUDIO_FORMAT"
fi
case $AUDIO_FORMAT in
44khz.wav)
SOX_OPTS_FORMAT="-t wav -r 44100"
;;
wav)
SOX_OPTS_FORMAT="-t wav -r 8000 -c 1"
;;
gsm)
SOX_OPTS_FORMAT="-t gsm -r 8000 -c 1"
;;
ulaw)
SOX_OPTS_FORMAT="-t ul -r 8000 -c 1"
;;
alaw)
SOX_OPTS_FORMAT="-t al -r 8000 -c 1"
;;
slin)
SOX_OPTS_FORMAT="-t raw -r 8000 -c 1"
;;
esac
# overwrite only if input and output file are not the same
if [[ "$IN" != "$OUT" ]] && [[ ! -e "$OUT" || "$OUTPUT_OVERWRITE" = "yes" ]]; then
# convert to pbx formats
sox -V1 $SOX_OPTS "$IN" $SOX_OPTS_FORMAT "$OUT" lowpass 4000
# display log informations
echo "Saving PBX - input:[$IN] output:[$OUT] size:[$(show_size "$OUT")] type:[$(show_mime "$OUT")]"
else
echo "File PBX exist - input:[$OUT]"
fi
done
}
# remix the file adding background music and audio effects
remix_file() {
local IN="$1"
shift
local OUT="$1"
# set sox options
[ ! -z "$TTS_VOLUME" ] && SOX_OPTS="$SOX_OPTS -v $TTS_VOLUME"
[ ! -z "$MUSIC_VOLUME" ] && SOX_OPTS_MUSIC="$SOX_OPTS_MUSIC -v $MUSIC_VOLUME"
# calc the total time duration of input audio file
let local IN_DURATION="$(soxi -s "$IN")/$(soxi -r "$IN")+$TTS_PAD_BEGIN+$TTS_PAD_END"
#debug
if [ ! -z "$MUSIC_FILE" ]
then
let local TTS_PAD_BEGIN_REAL="$MUSIC_START+$TTS_PAD_BEGIN"
echo -e -n "Remixing - input:[$IN] music:[$MUSIC_FILE] music_volume:[$MUSIC_VOLUME] tts_volume:[$TTS_VOLUME] output:[$OUT] "
# mix tts voice with background music
sox -V1 $SOX_OPTS_MUSIC -m "$MUSIC_FILE" "| sox $SOX_OPTS $IN -p pad $TTS_PAD_BEGIN_REAL" "$OUT" trim $MUSIC_START fade h $MUSIC_FADE $IN_DURATION $MUSIC_FADE remix 1,2v0.2 1,2v0.2
local RETVAL=$?
echo -e -n "size:[$(show_size $OUT)] duration:[$IN_DURATION] result:"
else
echo -e -n "Remixing - input:[$IN] tts_volume:[$TTS_VOLUME] output:[$OUT] "
# remix without background music
sox -V1 $SOX_OPTS "$IN" "$OUT"
local RETVAL=$?
echo -e -n "size:[$(show_size $OUT)] duration:[$IN_DURATION] result:"
fi
if [ $RETVAL = 0 ]; then echo -e -n "[SUCCESS]\n" ; else echo -e -n "[ERROR]\n" ; exit 1; fi
}
# concatenate all input files in a big file
megamix_file() {
local OUT="$1"
sox $SOX_OPTS "${IN_FILE_MEGAMIX[@]}" "$OUT"
echo "Megamixing - input:[${IN_FILE_MEGAMIX[@]}] output:[$OUT] size:[$(show_size $OUT)]"
}
assemble_file() {
local IN="$1"
shift
local OUT="$1"
# local variables
local OUT_TMP="$TMP_DIR/$(print_name $IN).44khz.wav"
local OUT_TMP_REMIXED="$TMP_DIR/$(print_name $IN).remixed.44khz.wav"
local OUT_TMP_MEGAMIXED="$TMP_DIR/$(print_name $IN).megamixed.44khz.wav"
# if the input file is a text file, synthesize it
case "$(print_ext $IN)" in
txt)
# load any external TTS engines
tts_engines_load
# verify if the TTS_ENGINE exist
tts_engines_check ${TTS_ENGINE[$ID]}
# verify if the selected TTS voice is supported by the TTS engine
tts_engines_check_voice ${TTS_ENGINE[$ID]} ${TTS_VOICE[$ID]}
# import global tts_engine variables
tts_engine_${TTS_ENGINE[$ID]} env
local TTS_FILE="$TMP_DIR/$(print_name $IN).$TTS_EXT"
if [[ "$REMIX" = "yes" || "$PLAYBACK" = "no" ]]; then
synthesize_file "$IN" "$TTS_FILE"
else
# to speed up the playback process don't convert and use the wav version, directly use the tts_engine encoding instead
synthesize_file "$IN" "$OUT"
fi
IN="$TTS_FILE"
;;
esac
# to speed up the playback process don't convert and use the wav version, directly use the tts_engine encoding instead
if [[ "$REMIX" = "yes" || "$PLAYBACK" = "no" ]]; then
case "$(print_ext $IN)" in
txt|wav|mp3)
# convert and normalize all given files
convert_file "$IN" "$OUT_TMP"
# if not megamixing, remix and save all single files
if [ "$MEGAMIX" != "yes" ]
then
if [ "$REMIX" = "yes" ]; then
remix_file "$OUT_TMP" "$OUT_TMP_REMIXED"
save_file "$OUT_TMP_REMIXED" "$OUT"
else
# save the generated file if they are not in the same location
[ "$OUT_TMP" != "$OUT" ] && save_file "$OUT_TMP" "$OUT"
fi
else
# save into this global array all files to megamix (synthesized files and input wav/mp3 files)
IN_FILE_MEGAMIX[$ID]="$OUT_TMP"
# if this is the last file on megamix array, then make all remixing
if [ $ID = ${#IN_FILE[@]} ]
then
rulem " [$ID/${#IN_FILE[@]}] " - 80
megamix_file "$OUT_TMP_MEGAMIXED"
remix_file "$OUT_TMP_MEGAMIXED" "$OUT_TMP_REMIXED"
save_file "$OUT_TMP_REMIXED" "$OUT"
fi
fi
;;
*)
echo "ERROR: Invalid file extension found: $(print_ext $IN) input:[$IN]"
exit 1
;;
esac
fi
}
# function used to verify the version of sox commmand
check_ver() {
printf "%03d%03d%03d%03d" $(echo "$@" | tr '.' ' ')
}
# split the generated tts file into many files to bypass the limit of the engine
split_tts_file() {
local IN="$1"
shift
local ID="$1"
echo "INFO: Maximun allowed chars reached for engine ${TTS_ENGINE[$ID]}. allowed:$TTS_LENGTH - current:$TTS_LENGTH_CUR"
SID=0 ; while read string ; do
let SID+=1
TTS_ENGINE[$SID]="${TTS_ENGINE[$ID]}"
TTS_VOICE[$SID]="${TTS_VOICE[$ID]}"
local IN_FILE_SPLIT_TMP="$TMP_DIR/$(print_name "$IN")-$SID.$(print_ext "$IN")"
IN_FILE_SPLIT+=($IN_FILE_SPLIT_TMP)
#echo -e -n "$IN_FILE_SPLIT_TMP " ; echo $string
echo $string > "$IN_FILE_SPLIT_TMP"
done < <(echo -e -n "$(cat "$IN" | parse_tts_string | fold -sw $TTS_LENGTH)\n")
#debug
}
# file name manupilation
print_path() {
echo ${@%/*}
}
print_fullname() {
echo ${@##*/}
}
print_name() {
#print_fullname $(echo ${@%.*})
print_fullname $(echo ${@%.*}) | sed 's/\..*$//g'
}
print_ext() {
echo ${@##*.}
}
# print debug info
debug() {
echo "
IN_FILE ARRAY FIRST VALUE = $IN_FILE'
IN_FILE ARRAY ALL VALUES = ${IN_FILE[@]}
IN_FILE_MEGAMIX ARRAY FIRST VALUE = $IN_FILE_MEGAMIX'
IN_FILE_MEGAMIX ARRAY ALL VALUES = ${IN_FILE_MEGAMIX[@]}
other variables:
================
TMP_DIR = $TMP_DIR
PWD = $PWD
IN = $IN
OUT = $OUT
TTS_LANGUAGE = $TTS_LANGUAGE
TTS_VOICE = $TTS_VOICE
IN_FILE = $IN_FILE
OUT_FILE = $OUT_FILE
OUT_PBX = $OUT_PBX
OUT_DIR = $OUT_DIR
REMIX = $REMIX
MEGAMIX = $MEGAMIX
"
}
# temp file management
make_tmp() {
# the main synbak temp directory base name
TMP_NAME="$NAME-$(date +"%Y%m%d-%H%M%S")"
# a better way to manage temp directories
if [ -z "$TMP_DIR_BASE" ]
then
TMP_DIR="$(mktemp -d -t "${TMP_NAME}" 2>/dev/null)"
# for old systems that doesn't understand 'mktemp -t' option
[ $? -eq 1 ] && TMP_DIR="/tmp/${TMP_NAME}" && mkdir -p "${TMP_DIR}"
local TMP_DIR_BASE=/tmp
else
# make the final temp directory
TMP_DIR="${TMP_DIR_BASE}/${TMP_NAME}" && mkdir -p "${TMP_DIR}"
fi
# change current directory to the temporary directory and make the backup from there
if [ ! -d "${TMP_DIR}" ] && [ ! -w "${TMP_DIR}" ]
then
echo "ERROR: the directory '${TMP_DIR}' doesn't exist" >&2
echo >&2
exit 1
fi
}
check_env() {
# definig all global variables
# verify if all commands are installed in the system paths
#for COMMAND in $COMMANDS $(tts_engine_$TTS_ENGINE commands)
# do
# which $COMMAND >/dev/null 2>&1
# [ $? -ne 0 ] && echo "ERROR: the command '$COMMAND' doesn't exist in any path, please install it and retry" && exit 1
# done
# compatibility workarounds
local SOX_VER="$(sox --version | awk '{print $3}' | tr -d '[:alpha:]')"
if [ $(check_ver $SOX_VER) -ge $(check_ver 14.4.0) ]
then
#echo "Versione SOX attuale: $SOX_VER"
SOX_OPTS="-G"
fi
# default extension for syntesized files
TTS_EXT="mp3"
# import global tts_engine variables (suppress errors if the engine doesn't exist)
tts_engine_${TTS_ENGINE[$ID]} env >/dev/null 2>&1
# set default variables
[ -z "$OUTPUT_OVERWRITE" ] && OUTPUT_OVERWRITE="yes"
# if exporting to pbx format, always overwrite the destination
[ "$EXPORT_AUDIO" = "yes" ] && OUTPUT_OVERWRITE="yes"
# check if no args are given and make some sanity checks
[[ -z "${TTS_STRING}" && -z "${IN_FILE}" ]] && echo "ERROR: no text STRING/FILE/DIRECTORY specified, use '-t' or '-i' option - abort." >&2 && exit 1
# make an input file where paste the given text string
# [ -n "$TTS_STRING" ] && IN_FILE=("$TMP_DIR/$(echo "$TTS_STRING" | sed -e 's/[^A-Za-z0-9_-]/_/g' | cut -c-64).txt") && echo "$TTS_STRING" > "$IN_FILE"
[ -n "$TTS_STRING" ] && IN_FILE=("$TMP_DIR/$(echo "$TTS_STRING" | sed -e 's/[^A-Za-z0-9_-]/_/g' | cut -c-64)-$(echo "$TTS_STRING-$TTS_ENGINE-$TTS_VOICE-$TTS_VOLUME" | cksum | cut -d" " -f1).txt") && echo "$TTS_STRING" > "$IN_FILE"
#if is_array IN_FILE ; then echo IN_FILE è un array; fi
#[[ -z "${TTS_STRING}" && ! -f "${IN_FILE}" && ! -d "${IN_FILE}" ]] && echo "ERROR: ${IN_FILE}: invalid input file/directory - abort." >&2 && exit 1
# if input file is a directory then process all files creating the main array
if [ -d "${IN_FILE}" ]
then
# when input is a directory we never magamixing
MEGAMIX="no"
IN_FILE_ARRAY=($(find "$IN_FILE" -maxdepth 1 -type f))
else
# parse IN_FILE array and recreate special variables
IN_FILE_ARRAY=("${IN_FILE[@]}")
fi
# destroy IN_FILE and recreate the array
#echo IN_FILE_ARRAY=${IN_FILE_ARRAY[*]}
unset IN_FILE
# recreate the array parsing the command line
ID=0 ; for IN in ${IN_FILE_ARRAY[@]}; do
let ID+=1
if echo "$IN" | grep ":" >/dev/null 2>&1
then
# parse input files when provided with megamix syntax
TTS_ENGINE[$ID]="$(echo "$IN" | awk -F ":" '{print $1}')"
TTS_VOICE[$ID]="$(echo "$IN" | awk -F ":" '{print $2}')"
IN_FILE[$ID]="$(echo "$IN" | awk -F ":" '{print $3}')"
#echo ID: $ID - ${IN_FILE[$ID]}
else
# parse input files when provided with default syntax
TTS_ENGINE[$ID]="$TTS_ENGINE"
TTS_VOICE[$ID]="$TTS_VOICE"
IN_FILE[$ID]="$IN"
fi
# verify if the specified input file exist
[ ! -e "${IN_FILE[$ID]}" ] && echo "ERROR: '${IN_FILE[$ID]}' invalid input file/directory - abort." >&2 && exit 1
done
# TTS_LENGTH auto splitting feature
# recreate the array if the engine have chars limit, splitting the input file into many sub files
if [ ! -z "$TTS_LENGTH" ]; then
#for ((ID=1;ID<=${#IN_FILE[@]};ID++)); do
ID=0 ; for IN in ${IN_FILE[@]}; do
if [ "$(print_ext $IN)" = "txt" ]; then
let ID+=1
# destroy the original input file and use the splitted version
unset IN_FILE[$ID]
#echo
#echo ID=$ID IN_FILE=${IN_FILE[@]}
#echo ID=$ID IN_FILE_ARRAY=${IN_FILE_ARRAY[@]}
TTS_LENGTH_CUR="$(cat "$IN" | parse_tts_string | count_chars)"
if [ $TTS_LENGTH_CUR -ge $TTS_LENGTH ]; then
MEGAMIX="yes"
split_tts_file "$IN" $ID
else
TTS_ENGINE[$ID]="${TTS_ENGINE[$ID]}"
TTS_VOICE[$ID]="${TTS_VOICE[$ID]}"
IN_FILE_SPLIT+=($IN)
fi
fi
done
# recreate again the array with all engines, voices and splitted input files
if [ ! -z "${IN_FILE_SPLIT[*]}" ]; then
ID=0 ; for IN in ${IN_FILE_SPLIT[@]}; do
let ID+=1
# fix me, this doesn't works very well when using the special input file filter: engine:voice:file
[ -z "${TTS_ENGINE[$ID]}" ] && TTS_ENGINE[$ID]="$TTS_ENGINE"
[ -z "${TTS_VOICE[$ID]}" ] && TTS_VOICE[$ID]="$TTS_VOICE"
IN_FILE[$ID]="$IN"
done
fi
fi
# verify if a music file is given and the file exist
[[ ! -z "$MUSIC_FILE" && ! -e "$MUSIC_FILE" ]] && echo "ERROR: the music file '$MUSIC_FILE' doesn't exist or isn't readable" && exit 1
# use MUSIC_VOLUME as TTS_VOLUME when MUSIC_FILE and TTS_VOLUME is not specified
[[ -z "$MUSIC_FILE" && -z "$TTS_VOLUME" && ! -z "$MUSIC_VOLUME" ]] && TTS_VOLUME="$MUSIC_VOLUME"
# we will use the tmp base dir for reusability porpouse, because we will speak the audio soound using the internal soundcard and the sintetized audio
# will be used directly again if exist
[ -z "${OUT_DIR}" ] && OUT_DIR="$TMP_DIR_BASE" && echo "INFO: no output directory specified, redirecting playback to system soundcard"
[ ! -d "${OUT_DIR}" ] && echo "ERROR: invalid output directory specified: '${OUT_DIR}' - abort." >&2 && exit 1
[ ! -w "${OUT_DIR}" ] && echo "ERROR: not writable output directory specified: '${OUT_DIR}' - abort." >&2 && exit 1
# if we give a music file, or change the output volume then remix always
[[ ! -z "$MUSIC_FILE" || ! -z "$TTS_VOLUME" ]] && REMIX="yes" || REMIX="no"
# if you pass more that one file as input then megamix them
[[ "$MEGAMIX" != "no" && ${#IN_FILE[@]} -ge 2 ]] && REMIX="yes" && MEGAMIX="yes"
}
# all begin here
main() {
# disable system wide command aliases if configured
unalias mv cp 2>/dev/null
# make temporary files
make_tmp
# verify the enveropment if all is good before starting conversion
check_env
# convert music file before assembling
if [[ ! -z "$MUSIC_FILE" && "$(print_ext $MUSIC_FILE)" != "wav" ]]; then
convert_file "$MUSIC_FILE" "$TMP_DIR/$(print_name $MUSIC_FILE).44khz.wav"
MUSIC_FILE="$TMP_DIR/$(print_name $MUSIC_FILE).44khz.wav"
fi
# assemble all given files processing array contents
if [ "$MEGAMIX" != "yes" ] ; then
ID=0 ; for IN in "${IN_FILE[@]}"; do
let ID+=1
#debug
rulem " [$ID/${#IN_FILE[@]}] - $NAME Name:[$(print_name $IN)] " = 80
# define the output file name
[ "$REMIX" = "yes" ] && local OUT="$OUT_DIR/$(print_name $IN).44khz.wav" || local OUT="$OUT_DIR/$(print_name $IN).$TTS_EXT"
# deprecated
#if [[ "$PLAYBACK" = "yes" ]]; then
# to speed up the playback process don't convert and use the wav version, directly use the tts_engine encoding instead
# [ "$REMIX" = "yes" ] && local OUT="$OUT_DIR/$(print_name $IN).44khz.wav" || local OUT="$OUT_DIR/$(print_name $IN).$TTS_EXT"
#elif [[ "$EXPORT_AUDIO" = "yes" && "$OUTPUT_OVERWRITE" = "no" ]]; then
# # to speed up the playback process don't convert and use the wav version, directly use the tts_engine encoding instead
# [ "$REMIX" = "yes" ] && local OUT="$OUT_DIR/$(print_name $IN).44khz.wav" || local OUT="$OUT_DIR/$(print_name $IN).$TTS_EXT"
# else
# local OUT="$TMP_DIR/$(print_name $IN).44khz.wav"
#fi
if [[ -e "$OUT" && "$OUTPUT_OVERWRITE" = "no" ]]; then
# resynth the audio file only if the previous saved file was corrupted or with wrong format
if [ "$(show_mime_brief "$OUT")" != "audio" ]; then
echo "File exist - input:[$OUT] status:[corrupted] action:[resinthesizing]"
assemble_file "$IN" "$OUT"
else
#echo "File exist - input:[$OUT] size:[$(show_size "$OUT")] status:[good]"
echo "File exist - input:[$OUT]"
fi
else
# reset the output file name path
[ "$EXPORT_AUDIO" = "yes" ] && local OUT="$(print_path $OUT)/$(print_name $OUT).44khz.wav"
assemble_file "$IN" "$OUT"
fi
if [[ "$PLAYBACK" = "yes" && "$EXPORT_AUDIO" = "no" ]]; then
playback_file "$OUT"
elif [[ "$PLAYBACK" = "no" && "$EXPORT_AUDIO" = "yes" ]]; then
export_file "$OUT" "$OUT_DIR"
fi
# if required, remove the synthesized file
[ "$OUTPUT_REMOVE" = "yes" ] && echo "Removing - output:[$OUT]" && rm -f "$OUT"
rulem " [$ID/${#IN_FILE[@]}] " - 80
done
else
# the megamix file name is always taken from the first input name
[ "$PLAYBACK" = "yes" ] && local OUT_MEGAMIX="$OUT_DIR/$(print_name ${IN_FILE[1]}).44khz.wav" || local OUT_MEGAMIX="$TMP_DIR/$(print_name ${IN_FILE[1]}).44khz.wav"
if [[ ! -e "$OUT_MEGAMIX" || "$OUTPUT_OVERWRITE" = "yes" ]]; then
# remix all given files in one big file
ID=0 ; for IN in "${IN_FILE[@]}"; do
let ID+=1
rulem " [$ID/${#IN_FILE[@]}] - $NAME Name:[$(print_name $IN)] == MEGAMIX " = 80
assemble_file "$IN" "$OUT_MEGAMIX"
rulem " [$ID/${#IN_FILE[@]}] " - 80
done
fi
let ID+=1
rulem " [$ID/${#IN_FILE[@]}] - $NAME Name:[$(print_name $IN)] " = 80
if [[ "$PLAYBACK" = "yes" && "$EXPORT_AUDIO" = "no" ]]; then
playback_file "$OUT_MEGAMIX"
elif [[ "$PLAYBACK" = "no" && "$EXPORT_AUDIO" = "yes" ]]; then
export_file "$OUT_MEGAMIX" "$OUT_DIR"
fi
# if required, remove the synthesized file
[ "$OUTPUT_REMOVE" = "yes" ] && echo "Removing - output:[$OUT_MEGAMIX]" && rm -f "$OUT_MEGAMIX"
rulem " [$ID/${#IN_FILE[@]}] " - 80
fi
}
usage_examples() {
echo "$NAME usage examples:
--------------------------------------------------------------------------------
# simple command line using script default variables (must set PLAYBACK=yes into $NAME to works)
$NAME Hello World. This is a test message
# sinthesize using standard input as source, useful when using an external command piped to $NAME
echo \"Welcome home, mr Stark\" | $NAME -
# sinthesize using standard input as source, and adding other $NAME options
echo Good morning boss | $NAME -w -m /tmp/music.mp3 -P 10 -F 7 -
# specifing engine and language and reusing the synthesized file for every run (much faster execution)
$NAME -e "$TTS_ENGINE" -v "$TTS_VOICE" -t \"This is only a test\"
# same example but resynthesizing every time the text and overwriting the destination file
$NAME -e "$TTS_ENGINE" -v "$TTS_VOICE" -t \"This is only a test\" -w
# change the output soundcard volume to 80%
$NAME -e "$TTS_ENGINE" -v "$TTS_VOICE" -t \"This is only a test\" -w -W 80
# batch creating the pbx's audio file using an input txt file
$NAME -i /tmp/src/company-welcome.txt -o /tmp/dst -m /tmp/music.mp3 -e "$TTS_ENGINE" -v "$TTS_VOICE" -p 7 -P 10 -F 7 -S 10 -x
# batch creating the pbx's audio file using as input a directory containing *.txt files
$NAME -i /tmp/src -o /tmp/dst -m /tmp/music.mp3 -e "$TTS_ENGINE" -v "$TTS_VOICE" -p 7 -P 10 -F 7 -S 10 -x
# synthesize multiple file with different languages using a single pipe line (named MEGAMIX feature)
$NAME -i naturalreaders:Luca:txt/it/company-it-welcome.txt -i voicerss:en-gb:txt/en/company-en-welcome.txt -o sounds -m audio/musiconhold.mp3 -p 7 -P 10 -F 7 -S 10 -x"
}
# command prompt menu
usage(){
local progname=$0
echo "$NAME - $DESCRIPTION
written by Ugo Viti
version: $VERSION released: $VERSION_DATE
usage: $progname [options]
Option: Argument: Description:
--------------------------------------------------------------------------------
Input/Output file management:
- Synthesize using standard input
-t Text string to synthesize
-i Input txt/wav/mp3 file or directory to synthesize/convert (this disable '-t' option)
-o Output directory to save synthesized and assembled files (current: $OUT_DIR)
TextToSpeech management:
-e TTS voice engine (current: $TTS_ENGINE)
-v TTS voice language (current: $TTS_VOICE)
-T TTS volume (current: $([ ! -z "$TTS_VOLUME" ] && echo -e -n "$TTS_VOLUME" || echo -e -n "none) (use a number from 0.01 to 1"))
-L List usable TTS engines
-l List usable TTS voices
Background music management:
-m Input mp3/wav audio file to use as background music
-S Start background music after specified time (current: $MUSIC_START sec)
-p Pad the begin of the TTS audio using the specified time (current: $TTS_PAD_BEGIN sec)
-P Pad the end of the TTS audio using the specified time (current: $TTS_PAD_END sec)
-F Fade the begin and the end of the wav music using the specified time (current: $MUSIC_FADE sec)
-M Background music volume (current: $([ ! -z "$MUSIC_VOLUME" ] && echo -e -n "$MUSIC_VOLUME" || echo -e -n "none) (use a number from 0.01 to 1"))
Soundcard audio playback options:
-b Playback the generated files using the soundcard speakers (current: $PLAYBACK)
-B Playback in background detaching the $NAME command (current: $PLAYBACK_BACKGROUND) (only if PLAYBACK=yes)
-k Playback engine (current: $PLAYBACK_ENGINE)
-K Playback engine options (current: $([ ! -z "$PLAYBACK_ENGINE_OPTS" ] && echo -e -n "$PLAYBACK_ENGINE_OPTS" || echo -e -n "none"))
-d Playback output device (current: $([ -z "$PLAYBACK_DEVICE" ] && echo -e -n "default"))
-D List available playback output devices
-W Change the playback soundcard output volume (current: $([ ! -z "$PLAYBACK_VOLUME" ] && echo -e -n "$PLAYBACK_VOLUME" || echo -e -n "none) (use a number from 0 to 100"))
-r Remove synthesized file after playback (current: $OUTPUT_REMOVE) (no = keep the generated high quality audio file for later usage)
Output file options:
-x Export the generated audio file into PBX formats (current: $EXPORT_AUDIO) (yes = this will assume PLAYBACK=no and OUTPUT_OVERWRITE=yes)
-X Export using the following audio formats (current: \"$EXPORT_AUDIO_FORMATS\") (valid only if EXPORT_AUDIO=yes)
-w Overwrite the synthesized file (current: $OUTPUT_OVERWRITE)
-s Split destination audio files into audio formats sub directories (current: $EXPORT_AUDIO_FORMAT_SUBDIR) (valid only if EXPORT_AUDIO=yes)
Misc options:
-C Write/Overwrite the config file using the command arguments or the default izsynth variables (file: $IZSYNTH_CONFIG_FILE)
-h Display this help menu
-H Display the '$TTS_ENGINE' TTS Engine help menu
-E Display $NAME usage examples
-V Display $NAME version
"
}
###################################################################
#################### default program menu and arguments validations
NO_ARGS=0
E_OPTERROR=65
if [ $# -eq "$NO_ARGS" ] # Script invoked with no command-line args?
then
usage
exit $E_OPTERROR # Exit and explain usage, if no argument(s) given.
fi
# Usage: scriptname -options
# Note: dash (-) necessary
while getopts "t:i:o:m:S:p:P:F:M:T:e:v:d:k:K:W:X:slLrxwbBDChHEV" option ; do
case $option in
t) # text string to synthesize
TTS_STRING="${OPTARG}"
# parse stdin if the special char - is used
[ "$TTS_STRING" = "-" ] && TTS_STRING="$(cat -)"
;;
i) # input file or directory to synthesize or convert (create an array with the input files path)
IN_FILE+=("$OPTARG")
# parse stdin if the special char - is used
[ "$IN_FILE" = "-" ] && unset IN_FILE && TTS_STRING="$(cat -)"
;;
o) # output destination directory to save synthesized text
OUT_DIR="${OPTARG}"
;;
m) # wav file to use as background music
MUSIC_FILE="${OPTARG}"
;;
S) # start background music after specified seconds
MUSIC_START="${OPTARG}"
;;
p) # pad the begin of the TTS audio using the specified time
TTS_PAD_BEGIN="${OPTARG}"
;;
P) # pad the end of the TTS audio using the specified time
TTS_PAD_END="${OPTARG}"
;;
F) # fade the begin and the end of the wav music using the specified time
MUSIC_FADE="${OPTARG}"
;;
M) # music volume attenuation
MUSIC_VOLUME="${OPTARG}"
;;
T) # TTS volume attenuation
TTS_VOLUME="${OPTARG}"
;;
e) # use specified voice synthesizer engine
TTS_ENGINE="${OPTARG}"
;;
v) # use specified voice pack
TTS_VOICE="${OPTARG}"
;;
s) # split destination audio files into audio formats sub directories
[ "$EXPORT_AUDIO_FORMAT_SUBDIR" != "yes" ] && EXPORT_AUDIO_FORMAT_SUBDIR="yes" || EXPORT_AUDIO_FORMAT_SUBDIR="no"
;;
l) # list installed TTS voice pack
tts_engines_load
tts_engines_check $TTS_ENGINE
tts_engine_$TTS_ENGINE voices
exit 0
;;
L) # list installed TTS voice engines synthesizer
tts_engines_list
exit 0
;;
W) # change the playback volume
PLAYBACK_VOLUME="${OPTARG}"
;;
r) # remove synthesized file after playback
OUTPUT_REMOVE="yes"
;;
x) # assemble and export converted PBX recordings
[ "$EXPORT_AUDIO" != "yes" ] && EXPORT_AUDIO="yes" || EXPORT_AUDIO="no"
PLAYBACK="no"
;;
X) # exported file format
xEXPORT_AUDIO_FORMATS="${OPTARG}"
if [ -z "$(echo "$EXPORT_AUDIO_FORMATS" | grep -w "$xEXPORT_AUDIO_FORMATS")" ]; then
echo "Invalid audio format specified: $xEXPORT_AUDIO_FORMATS"
echo "Supported audio formats: \"$EXPORT_AUDIO_FORMATS\""
exit 1
else
EXPORT_AUDIO_FORMATS="$xEXPORT_AUDIO_FORMATS"
fi
;;
w) # overWrite the synthesized file
[ "$OUTPUT_OVERWRITE" != "no" ] && OUTPUT_OVERWRITE="no" || OUTPUT_OVERWRITE="yes"
;;
b) # playback the generated files using the soundcard speakers
[ "$PLAYBACK" != "yes" ] && PLAYBACK="yes" || PLAYBACK="no"
;;
B) # Playback in background detaching the command
[ "$PLAYBACK_BACKGROUND" != "yes" ] && PLAYBACK_BACKGROUND="yes" || PLAYBACK_BACKGROUND="no"
;;
d) # hardware playback device
PLAYBACK_DEVICE="${OPTARG}"
;;
D) # list hardware playback devices (alsa-utils package must be installed)
list_playback_devices() {
echo "Device: Description:"
echo "------- ------------"
aplay -l | grep ^card | while read devices ; do
card=$(echo $devices | awk '{print $2}' | tr -d ":")
device=$(echo $devices | awk -F", " '{print $2}' | awk '{print $2}' | tr -d ":")
description=$(echo $devices | sed 's/\[/(/g' | sed 's/\]/)/g' | awk -F "[()]" '{print $4}' | sed 's/ /-/g')
echo "alsa:device=hw=$card.$device $description"
done
# list all user custom software devices
cat $HOME/.asoundrc 2>/dev/null | grep ^pcm | awk '{print "alsa:device="$1}' | sed 's/pcm.//'
}
list_playback_devices | column -c1 -t
exit 0
;;
k) # playback engine
PLAYBACK_ENGINE="${OPTARG}"
;;
K) # playback engine extra options
PLAYBACK_ENGINE_OPTS="${OPTARG}"
;;
C) # write/overwrite user config file
if [ -e "$IZSYNTH_CONFIG_FILE" ]; then
echo "WARNING: the user config file '$IZSYNTH_CONFIG_FILE' already exist"
confirm "Do you want overwrite it? [y/N]:" && write_default_config
[ $? = 0 ] && echo "'$IZSYNTH_CONFIG_FILE' config file written successfully" || echo "ERROR writing '$IZSYNTH_CONFIG_FILE' config file"
else
write_default_config
[ $? = 0 ] && echo "'$IZSYNTH_CONFIG_FILE' config file written successfully" || echo "ERROR writing '$IZSYNTH_CONFIG_FILE' config file"
fi
exit 0
;;
h) # display this help menu
usage
exit 0
;;
H) # display engine help menu
tts_engine_$TTS_ENGINE help
exit 0
;;
E) # display usage examples
usage_examples
exit 0
;;
V) # display current version
echo "$VERSION"
#echo $NAME version: $VERSION released: $VERSION_DATE
exit 0
;;
*)
usage
echo "invalid switch specified - abort." >&2
exit 1
;;
esac
done
# Move argument pointer to next.
shift $(($OPTIND - 1))
# parse the input stream when no options are given or passed as stdin
if [[ "$1" = "-" ]]; then
TTS_STRING="$(cat)"
elif [[ ! -z "$*" && -z "$TTS_STRING" && -z "$IN_FILE" ]]; then
TTS_STRING="$*"
fi
# here begin the running of izsynth
main
# if we no error occur, then cleanup all temporary files and directories
trap "rm -rf "${TMP_DIR}" && exit 0 || exit 1" 0
# END
###########################################################################################
# License:
# ============================================================================
# Written by: Ugo Viti
# visit http://www.initzero.it for commercial support
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# ============================================================================
# QUICK INSTALL:
# ============================================================================
# CentOS: yum install -y sox lame mplayer curl espeak wine
# Debian: sudo apt-get install -y sox lame mplayer curl espeak wine
# ============================================================================
# TODO:
# ============================================================================
# - add support for automatically no keep long synthesized sentences (>= 255 chars)
# - add support for http://www.vocalware.com (investigating licence issues)
# - add support for http://www.readspeaker.com (investigating licence issues)
# - add support for http://www.acapela-group.com (investigating licence issues)
# ============================================================================
# ChangeLog:
# ============================================================================
# 5.0 - 20190213
# - Restored naturalreaders engine support
# 4.9 - 20181114
# - Implemented generic playback engine support. You can use -k to specify your playback command, and -K for the command options
# - fix: when exporting pbx files doesn't rename 44khz.wav to mp3
# 4.8 - 20170909
# - Implemented cksum hash file names when creating tts files from cli (thanks to Jean-Marc Leglise for the idea). So every txt/tts file will always have an unique file name based on contents.
# - updated engine: naturalreaders
# - updated engine: google
# - updated engine: ispeech (now need an apykey)
# - now tts voices can be case insensitive
# - renamed VOICERSS_APIKEY config field to TTS_ENGINE_VOICERSS_APIKEY
# - misc optimizations
# 4.7 - 20170428
# - renamed command line option: -x to -b
# - renamed command line option: -b to -B
# - renamed PLAYBACK_OVERWRITE variable to OUTPUT_OVERWRITE
# - renamed PLAYBACK_REMOVE variable to OUTPUT_REMOVE
# - new config file variable: EXPORT_AUDIO and new option switch: -x (assemble and save PBX recordings)
# - new config file variable: EXPORT_AUDIO_FORMATS and new option switch: -X
# - fix print_name function
# 4.6 - 20161228
# - some sox command optimizations
# - add sox "lowpass 4000" when converting to pbx format for better results
# 4.5 - 20161218
# - introduced playback engines (currently implemented mplayer only)
# - many script refactoring
# 4.4 - 20161028
# - implemented -C option to write/overwrite the izsynth user config file
# - some sanity checks to detect the $HOME directory of user
# 4.3 - 20160618
# - implemented PLAYBACK_DEVICE and new -d (specify the output device) -D (list available output devices) options
# - is now possibile to override the playback command (-k) and playback command options (-K)
# - help menu enhancements
# 4.2 - 20160524
# - don't create tts files name longer than 64 chars
# - dynamic voices listing for fromtexttospeech engine (not more static list)
# - some code cleanup
# 4.1 - 20160509
# - added support for auto splitting sentences longer than the supported engine chars
# each engine now have a TTS_LENGTH variable if engine usage is limited. This feature make possible to sinth very long sentences
# - speedup the synthesizing process avoiding intermediate convertion to wav format when not remizing o megamixing (this gained 500ms in 4 seconds playback)
# - more intuitive feedback when synthesizing
# - changed default engine to 'naturalreaders' and voice 'Peter' (english language)
# - restored google tts support (limited to 160 chars max)
# - added -r option for removing synthesized file after playback
# - added support for stdin as input stream, example:
# echo this is only a test | izsynth -
# - ispeech tts engine addded (http://www.ispeech.org/)
# 4.0 - 20160427
# - added external engine support (put your TTS engines into $HOME/.config/izsynth/engines/YOURENGINE.conf)
# - changed default engine to voicerss (before using it, get an APIKEY! type izsynth -H for more info)
# - added tts_engine help menu
# - enhanced output logging
# - minor script syntax fixes
# - MEGAMIX feature added (read the examples with -E switch)
# - major izsynth restructuration with added smartness ;)
# - now is possible to change on the fly the output volume for string TTS
# - sox command version check with extra options workarounds
# - addedd support for voicerss TTS engine
# - addedd external config file for override the script variables and use external services APIKEYs
# 3.0 - 20160302
# - project rename from ivrwizard to izsynth
# - some cleanup and restructuration
# - added option -G to sox command to remove samples clipping
# 2.2 - 20151219
# - many improvements and sanity checks
# - added mime type log of generated audio file and made sanity checks to discover synthesizing problems
# - added support for NaturalReader engine
# - added support for FromtextToSpeech (Using IVONA Voices) engine
# - NB. because api change, right now the google voice support is not reliable
# 2.1 - 20151024
# - implemented PLAYBACK_BACKGROUND variable to detach the process when playing the voice
# (useful when used as TTS engine inside an home automation systemi for not blocking commands execution)
# 2.0 - 20150908
# - direct playback to audio soundcard support
# - major code cleanup and functions rewrite
# - google voice support (via google translate service)
# - fixes for sox >= 14.4.x
# 1.5 - 20150805
# - changed sapi5 voice assignment (now you must use the name and not the voice id number)
# 1.4 - 20130221
# - mp3 convertion fixes
# 1.3 - 20120627
# - first public version
# ============================================================================
0 تعليقات على " "