viewtopic.php?f=3&t=6841&p=44362
viewtopic.php?f=3&t=9365&p=60816
viewtopic.php?f=3&t=7785&p=51864
viewtopic.php?f=3&t=6917&p=44979
and some more.
They use temporary VBScripts, or JScript invoked by mshta.exe, or a short Batch-JScript hybrid. However, the Speech API supports a lot of settings which are not yet considered in the existing codes here at DosTips. E.g. if I use them then the voice token defaults to German on my machine. English text is spoken entirely false as you can imagine. It's quite simple to change it. But once I dived in I thought I could make the most of it. The result is this little "speak.bat" script. I tried to avoid overwhelming you with too many options. That's the reason why the script programmatically detects what kind of source you passed along with the only required option /s.
The "speak_test.bat" displays the help message and the 9 examples should give you an indication of the capabilities.
speak.bat
Code: Select all
@if (0)==(0) (if "%~1" neq "" (for /f "tokens=* delims=:" %%H in ('findstr.exe /bc:"::" "%~fs0"') do @echo(%%H)|more.com
for %%C in (^"cscript.exe //nologo //e:jscript "%~fs0" ^") do @(set speak=%%~C&set speak_async=start /b %%~C))&goto :eof @end
var args=WScript.Arguments.Named, s=args('s'),
v=parseInt(args('v')), r=parseInt(args('r')), p=parseInt(args('p')),
n=args('n') && args('n').toLowerCase(), g=args('g'), l=parseInt(args('l'), 16),
fs=new ActiveXObject('Scripting.FileSystemObject'),
vc=new ActiveXObject('SAPI.SpVoice'), vt=vc.GetVoices(g && 'Gender='+g),
isFilename=0x4, parseSapi=0x80, parseSsml=0x100;
if(v>-1 && v<100) vc.Volume=v;
if(r>-11 && r<11) vc.Rate=r;
if(p>0 && p<3) vc.Priority=p;
if(n || g || l) for(var i=0; i<vt.Count; ++i) {
if(!((n && vt.Item(i).GetAttribute('Name').toLowerCase().indexOf(n)==-1) || (l && parseInt(vt.Item(i).GetAttribute('Language'), 16)!=l))) {
vc.Voice=vt.Item(i);
break;
}
}
if(fs.FileExists(s)) {
if(fs.GetExtensionName(s).toLowerCase()=='.wav') {
var fs=new ActiveXObject('SAPI.SpFileStream');
fs.Open(s);
vc.SpeakStream(fs);
}
else if(s.toLowerCase()=='conin$') vc.Speak(WScript.StdIn.ReadAll(), parseSapi|parseSsml);
else vc.Speak(fs.GetAbsolutePathName(s), isFilename|parseSapi|parseSsml);
WScript.Quit();
}
vc.Speak(new ActiveXObject('WScript.Shell').Environment('Process')(s) || s, parseSapi|parseSsml);
/* --- help ---
::Call "speak.bat" once to initialize variables %speak% and %speak_async%.
::Use %speak% for synchronous and %speak_async% for asynchronous execution as
:: described here:
::%speak%|%speak_async% /s:"<source>" [/v:<volume>] [/r:<rate>]
:: [/p:<priority>] [/n:<voice name>] [/g:<gender>] [/l:<language ID>]
::
:: <source> If <source> is the name of an existing .wav file then the
:: content of the file is played,
:: else if <source> is "CONIN$" then the text to be spoken is read
:: from the standard input stream,
:: else if <source> is the name of another existing file then the
:: content of the file is the text to be spoken,
:: else if <source> is the name of a defined environment variable
:: then the content of the variable is the text to be spoken,
:: else <source> is the text to be spoken.
:: The text to be spoken may consist of
:: - plain text,
:: - or SAPI XML TTS markup, - see:
:: https://docs.microsoft.com/en-us/previous-versions/windows/desktop/ee431815(v=vs.85)
:: - or SSML 1.0 markup. - see:
:: https://www.w3.org/TR/2004/REC-speech-synthesis-20040907/
:: <volume> Percentage (0..100) of the currently set system volume to be
:: used. The default is 100. Has no effect for .wav files played.
:: <rate> Speaking rate (-10..10) of the text to be spoken. The default
:: is 0. Has no effect for .wav files played.
:: <priority> Priority level (0..2) of the voice.
:: 0 (default) Normal voice. Streams spoken by a normal voice are
:: appended to the voice queue.
:: 1 Alert voice. An alert voice will interrupt a normal voice
:: for the time the alert voice speaks.
:: 2 Over voice. Streams spoken by an over voice don't interrupt
:: other voices, but are merged with the voices of lower
:: priorities.
:: <voice name> Unique part of the name attribute of the voice token to be used
:: (such like "Zira" or "David").
:: <gender> Gender of the voice token (either "male" or "female").
:: <language ID> Language identifier of the voice token. - see:
:: https://docs.microsoft.com/en-us/windows/win32/intl/language-identifier-constants-and-strings
::
::To explore installed voice tokens on your computer see registry key:
:: HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens
::Values defined in the command line options are used to update the basic
:: settings of the voice object. Attributes specified in markup text take
:: priority over these settings though.
::If all values of the specified options /n, /g, and /l match at least one
:: installed voice token then the first matching voice token is used. Otherwise
:: the default voice token is used.
*/
Code: Select all
@echo off
:: initialize the %speak% and %speak_async% variables
:: and display the help message by passing any argument
call "speak.bat" help
:: initialize the %speak% and %speak_async% variables
:: without displaying the help message
::call "speak.bat"
:: loud enough for the beginning, I don't want to make you jump ;)
set "volume=30"
echo *** examples ***
echo 1. play WAVE file
%speak% /s:"%SystemRoot%\Media\tada.wav" /v:%volume%
echo(
echo 2. plain text
%speak% /s:"That's just a plain-text example." /v:%volume% /g:female /l:409
echo(
echo 3. SAPI TTS XML
: (Use single quotes for the attribute values since double quotes are removed in JScript arguments.)
%speak% /s:"<voice required='Gender=Female;Language=409'><volume level='%volume%' />Another <pitch middle='-4'>example</pitch>.<silence msec='100' /> This time using the Speech Application Programming Interface <pitch middle='5'>text-to-speech</pitch> <pitch middle='-3'>XML</pitch>.</voice>"
echo(
echo 4. SSML 1.0
:: (Use single quotes for the attribute values since double quotes are removed in JScript arguments.)
%speak% /s:"<speak version='1.0' xmlns='https://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice gender='female'><prosody volume='%volume%'>And <prosody pitch='+35%%'>another</prosody> one.<break time='100ms' /> The <prosody pitch='+25%%'>Speech Synthesis Markup</prosody> Language is used here.</prosody></voice></speak>"
echo(
echo 5. environment variable
set "txtvar=This rapidly spoken text was saved in an environment variable."
%speak% /s:txtvar /v:%volume% /r:2 /n:David
echo(
echo 6. file (SSML in this case)
>"voice.ssml" (
echo ^<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US"^>
echo ^<prosody volume="%volume%"^>
echo ^<audio src="%SystemRoot%\Media\chimes.wav" /^>
echo ^<voice gender="male"^>
echo ^<s^>SSML text was ^<prosody pitch="+25%%"^>read from a^</prosody^> file.^</s^>
echo ^</voice^>
echo ^<voice gender="female"^>
echo ^<s^>Ain't that ^<prosody rate="-30%%"^>^<emphasis level="strong"^>impressive?^</emphasis^>^</prosody^>^</s^>
echo ^</voice^>
echo ^</prosody^>
echo ^</speak^>
)
%speak% /s:"voice.ssml"
del "voice.ssml"
echo(
echo 7. redirection to StdIn (SAPI TTS XML in this case)
(
echo ^^^<voice required="Gender=Male;Language=409"^^^>
echo ^^^<volume level="%volume%" /^^^>
echo ^^^<pitch middle="8"^^^>This^^^</pitch^^^> text is redirected to the standard input ^^^<pitch middle="-5"^^^>stream^^^</pitch^^^>.
echo ^^^</voice^^^>
)|%speak% /s:conin$
echo(
echo 8. asynchronous execution (the TIMEOUT prompt will appear before this text has been spoken)
%speak_async% /s:"And this text is spoken while the command processor already continued with the next line of code." /v:%volume% /g:male /l:409
timeout /t 3
echo 9. speak over (SAPI TTS XML)
%speak% /s:"<voice required='Gender=Female;Language=409'>I'm <emph>over you</emph><rate speed='-5'> boy!</rate></voice>" /v:%volume% /p:2
echo(
pause
- The first example plays a WAVE file. But the speech API is no replacement for a media player. It only supports the WAVE format and the purpose of those sound sequences is rather to decorate spoken text. E.g. draw attention or emphasize.
- The fifth example uses the /n option to specify the voice name. If you still hear your default voice or a female voice consider that you don't have a "David" voice token. Follow the registry path that I wrote in the help text to explore the installed voice tokens on your computer. In general it might be better to specify a voice by its gender and language.
Steffen