diff --git a/server/jsonic.py b/server/jsonic.py index f656f8b..ed7cf23 100644 --- a/server/jsonic.py +++ b/server/jsonic.py @@ -172,7 +172,7 @@ def post(self): self.start_time = time.time() args = json_decode(self.request.body) pool = self.application.settings['pool'] - engine = synthesizer.get_class(args.get('engine', 'espeak')) + engine = synthesizer.get_class(args['properties'].get('engine', 'espeak')) if engine is None: self.send_json_error({'description' : 'unknown speech engine'}) return @@ -262,7 +262,7 @@ def get(self, name=None): :param name: str ''' if name is None: - names = synthesizer.SYNTHS.keys() + names = synthesizer.AVAILABLE_SYNTHS.keys() ret = {'success' : True, 'result' : names} self.write(json_encode(ret)) else: @@ -388,6 +388,7 @@ def run(port=8888, processes=4, debug=False, static=False, pid=None): # log to console logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') + synthesizer.init() kwargs = {} kwargs['pool'] = pool = multiprocessing.Pool(processes=processes) if static: diff --git a/server/synthesizer/__init__.py b/server/synthesizer/__init__.py new file mode 100644 index 0000000..5c3be9b --- /dev/null +++ b/server/synthesizer/__init__.py @@ -0,0 +1,144 @@ +''' +Speech synthesizer implementations for JSonic. + +:var IMPLEMENTED_SYNTHS: Names of modules containing ISynthesizer + implementations +:type IMPLEMENTED_SYNTHS: list +:var AVAILABLE_SYNTHS: Names paired with ISynthesizer implementations that + are available to this instance of JSonic +:type AVAILABLE_SYNTHS: dict + +:requires: Python 2.6 +:copyright: Peter Parente 2010, Roger Que 2010 +:license: BSD +''' +import imp +import linecache +import logging +import os.path +import sys + +class SynthesizerError(Exception): + ''' + Exception to throw for any synthesis error, including a human readable + description of what went wrong. + ''' + pass + +class ISynthesizer(object): + ''' + All synthesizers must implement this instance and class interface. + ''' + def __init__(self, path, properties): + ''' + Constructor. + + :param path: Path to where synthesized files are stored + :type path: str + :param properties: Speech properties for any synthesis performed using + this instance of the synthesizer. The supported properties are + dictated by the synthesizer implementation as returned by the + get_info class method. + :param properties: dict + ''' + raise NotImplementedError + + def write_wav(self, utterance): + ''' + Synthesizes an utterance to a WAV file on disk in the cache folder. + The name of the file must be in the following format: + + -.wav + + :param utterance: Unicode text to synthesize as speech + :type utterance: unicode + :return: Root name of the WAV file on disk, sans extension + :rtype: str + ''' + raise NotImplementedError + + @classmethod + def get_info(cls): + ''' + Gets information about the speech properties supported by this + synthesizer. Caches this information whenever possible to speed future + queries. + + :return: A dictionary describing the properties supported by this + synthesizer. The common properties are defined as follows: + + { + 'rate' : { // always in words per minute + 'minimum' : , + 'maximum' : , + 'default' : + }, + 'pitch' : { + 'minimum' : , + 'maximum' : , + 'default' : + }, + 'voices' : { + 'values' : [, , ...], + 'default' : + } + } + + If any of these properties are not supported, they should be left + out of the dictionary. If additional properties are supported they + can be included in dictionary in a similar format. + :rtype: dict + :raises: RuntimeError if the engine is not available on the server + ''' + raise NotImplementedError + +# A dictionary containing the names of modules containing synth implementations +# provided by JSonic, mapped to a boolean value indicating whether support for +# this module is required. For example, the espeak module is used as a default +# synthesizer module in other parts of JSonic, so an exception should be thrown +# if it is not successfully imported. +IMPLEMENTED_SYNTHS = { 'espeak': True, 'macosx': False } + +# Populate the SYNTHS dictionary with synthesizer implementations that are +# available to JSonic. Synthesizer modules that are successfully imported are +# assumed to function properly; prerequisites can be checked in the main body +# of a module (outside classes and functions) and exceptions raised to prevent +# synthesizers from being added to SYNTHS on platforms where they do not work. +AVAILABLE_SYNTHS = {} + +# Look for synth modules in these directories. +synth_path = [os.path.dirname(__file__)] + +# init() would ordinarily be module-level code, but since it performs logging +# of successful and unsuccessful module imports, it must be called after the +# run() method in jsonic.py has initialized the logging module. +def init(): + ''' + Populates JSonic's dictionary of available synthesizer classes. + ''' + for synth in IMPLEMENTED_SYNTHS: + module_info = imp.find_module(synth, synth_path) + try: + module = imp.load_module(synth, *module_info) + synth_class = module.SynthClass + except: + sys.modules.pop(synth, None) + logging.info('Could not import synth module "%s"', synth, exc_info=True) + if IMPLEMENTED_SYNTHS[synth]: + raise SynthesizerError('Required synth module "%s" is unavailable' % synth) + else: + AVAILABLE_SYNTHS[synth] = synth_class + logging.info('Successfully imported synth module "%s"', synth) + + linecache.checkcache() + +def get_class(name): + ''' + Gets the synthesizer class associated with the given synth engine name. + + :param name: Name of the synthesizer + :type name: str + :return: ISynthesizer class or None if the name is unknown + :rtype: cls + ''' + return AVAILABLE_SYNTHS.get(name, None) \ No newline at end of file diff --git a/server/synthesizer.py b/server/synthesizer/espeak.py similarity index 54% rename from server/synthesizer.py rename to server/synthesizer/espeak.py index f98733d..ac893dd 100644 --- a/server/synthesizer.py +++ b/server/synthesizer/espeak.py @@ -1,91 +1,16 @@ ''' -Speech synthesizer implementations for JSonic. - -:var SYNTHS: Names paired with available ISynthesizer implementations -:type SYNTHS: dict +espeak speech synthesizer implementation for JSonic. :requires: Python 2.6, iterpipes 0.3, espeak 1.36.02 :copyright: Peter Parente 2010 :license: BSD ''' +from synthesizer import * + import iterpipes import hashlib import itertools -import os - -class SynthesizerError(Exception): - ''' - Exception to throw for any synthesis error, including a human readable - description of what went wrong. - ''' - pass - -class ISynthesizer(object): - ''' - All synthesizers must implement this instance and class interface. - ''' - def __init__(self, path, properties): - ''' - Constructor. - - :param path: Path to where synthesized files are stored - :type path: str - :param properties: Speech properties for any synthesis performed using - this instance of the synthesizer. The supported properties are - dictated by the synthesizer implementation as returned by the - get_info class method. - :param properties: dict - ''' - raise NotImplementedError - - def write_wav(self, utterance): - ''' - Synthesizes an utterance to a WAV file on disk in the cache folder. - The name of the file must be in the following format: - - -.wav - - :param utterance: Unicode text to synthesize as speech - :type utterance: unicode - :return: Root name of the WAV file on disk, sans extension - :rtype: str - ''' - raise NotImplementedError - - @classmethod - def get_info(cls): - ''' - Gets information about the speech properties supported by this - synthesizer. Caches this information whenever possible to speed future - queries. - - :return: A dictionary describing the properties supported by this - synthesizer. The common properties are defined as follows: - - { - 'rate' : { // always in words per minute - 'minimum' : , - 'maximum' : , - 'default' : - }, - 'pitch' : { - 'minimum' : , - 'maximum' : , - 'default' : - }, - 'voices' : { - 'values' : [, , ...], - 'default' : - } - } - - If any of these properties are not supported, they should be left - out of the dictionary. If additional properties are supported they - can be included in dictionary in a similar format. - :rtype: dict - :raises: RuntimeError if the engine is not available on the server - ''' - raise NotImplementedError +import os.path class EspeakSynth(ISynthesizer): ''' @@ -189,17 +114,7 @@ def get_info(cls): } return cls.INFO -# global list of available synth implementations -# @todo: add these dynamically if the synths actually work on the platform -SYNTHS = {'espeak' : EspeakSynth} +SynthClass = EspeakSynth -def get_class(name): - ''' - Gets the synthesizer class associated with the given synth engine name. - - :param name: Name of the synthesizer - :type name: str - :return: ISynthesizer class or None if the name is unknown - :rtype: cls - ''' - return SYNTHS.get(name, None) \ No newline at end of file +# Make sure that espeak is installed and functioning by asking for voices. +iterpipes.check_call(iterpipes.linecmd('speak --voices')) \ No newline at end of file diff --git a/server/synthesizer/macosx.py b/server/synthesizer/macosx.py new file mode 100644 index 0000000..1dbb4fe --- /dev/null +++ b/server/synthesizer/macosx.py @@ -0,0 +1,182 @@ +''' +Mac OS X speech synthesizer implementation for JSonic using PyObjC. + +:requires: Python 2.6, Mac OS X 10.6 +:copyright: Roger Que 2010 +:license: BSD +''' +from synthesizer import * + +import AppKit +from PyObjCTools.AppHelper import installMachInterrupt +import QTKit + +import hashlib +import os.path +import struct +import subprocess +import sys + +class MacOSXSpeechSynth(ISynthesizer): + ''' + Synthesizes speech using NSSpeechSynthesizer (Mac OS X 10.6 or later). + + :ivar _path: Output cache path + :ivar _opts: NSSpeechSynthesizer options list + :cvar MIN_RATE: Minimum rate supported in WPM + :cvar MAX_RATE: Maximum rate supported in WPM + :cvar INFO: Dictionary of all supported engine properties cached for + fast responses to queries + ''' + MIN_RATE = 80 + MAX_RATE = 390 + INFO = None + + def __init__(self, path, properties): + '''Implements ISynthesizer constructor.''' + # path where to write the file + self._path = path + # NSSpeechSynthesizer options for this synth instance + self._opts = [] + + try: + rate = int(properties['rate']) + rate = min(max(rate, self.MIN_RATE), self.MAX_RATE) + self._opts.append(str(rate)) + except TypeError: + raise SynthesizerError('invalid rate') + except KeyError: + self._opts.append('200') + + try: + voice = str(properties['voice']) + assert voice in MacOSXSpeechSynth.get_info()['voices']['values'] + self._opts.append(voice) + except AssertionError: + raise SynthesizerError('invalid voice') + except KeyError: + self._opts.append('default') + + # store property portion of filename + self._optHash = hashlib.sha1('macosx' + str(self._opts)).hexdigest() + + def write_wav(self, utterance): + '''Implements ISynthesizer.write_wav.''' + utf8Utterance = utterance.encode('utf-8') + utterHash = hashlib.sha1(utf8Utterance).hexdigest() + hashFn = '%s-%s' % (utterHash, self._optHash) + + # Invoke the __main__ portion of this file on the command line, passing + # in the rate, voice, and output prefix name as arguments, and the text + # to utter on standard input. + prefix = os.path.join(self._path, hashFn) + if not os.path.isfile(prefix + '.wav'): + args = [sys.executable, __file__] + self._opts + [os.path.abspath(prefix)] + p = subprocess.Popen(args, stdin=subprocess.PIPE, + env={'PYTHONPATH': '.'}) + p.communicate(utterance) + return hashFn + + @classmethod + def get_info(cls): + '''Implements ISynthesizer.get_info.''' + if cls.INFO is None: + voices = AppKit.NSSpeechSynthesizer.availableVoices() + cls.INFO = { + 'rate' : { + 'minimum' : cls.MIN_RATE, + 'maximum' : cls.MAX_RATE, + 'default' : 200 + }, + 'voices' : { + 'values' : list(voices) + ['default'], + 'default' : 'default' + } + } + return cls.INFO + +SynthClass = MacOSXSpeechSynth + +# Setting up NSApplication delegates requires that Cocoa code be executed in a +# separate process. This module invokes itself in MacOSXSpeechSynth.write_wav. +if __name__ == '__main__': + rate = float(sys.argv[1]) + voice = sys.argv[2] + if voice == 'default': + voice = None + prefix = sys.argv[3] + utterance = sys.stdin.read().decode('utf-8') + + aiff_url = 'file://' + prefix + '.aiff' + wav_file = prefix + '.wav' + + def long_from_string(s): + ''' + Unpacks a human-readable QuickTime file type code to an NSNumber used + internally by QTKit. + ''' + return AppKit.NSNumber.numberWithLong_(struct.unpack('>l', s)[0]) + + class MacOSXSynthError(SynthesizerError): + ''' + Wrapper for NSError instances thrown during the speech synthesis and + file conversion process. + ''' + + @classmethod + def from_nserror(cls, nserror): + return cls(nserror.userInfo()['NSLocalizedDescription']) + + class SynthDelegate(AppKit.NSObject): + ''' + NSApplication delegate for initiating speech synthesis and converting + the AIFF output of NSSpeechSynthesizer to WAV through QTKit. + ''' + + def applicationDidFinishLaunching_(self, app): + '''Called when the NSApplication has finished initialization.''' + speech = AppKit.NSSpeechSynthesizer.alloc().init() + speech.setDelegate_(self) + + # Setting the voice resets the speaking rate, so the former must be + # set before the latter. + speech.setVoice_(voice) + speech.setRate_(rate) + + speech.startSpeakingString_toURL_(utterance, + AppKit.NSURL.URLWithString_(aiff_url)) + + def speechSynthesizer_didFinishSpeaking_(self, synth, finishedSpeaking): + '''Called when a speech synthesis operation has finished.''' + + # finishedSpeaking is supposed to indicate whether speech was + # synthesized successfully; however, it is False even in many cases + # when speech synthesis has encountered no visible issues, so this + # function ignores its value. + + movie, error = QTKit.QTMovie.movieWithURL_error_( + AppKit.NSURL.URLWithString_(aiff_url), None) + + if movie is None: + raise MacOSXSynthError.from_nserror(error) + + out_attrs = { + 'QTMovieExport': True, + 'QTMovieExportType': long_from_string('WAVE') + } + status, error = movie.writeToFile_withAttributes_error_( + wav_file, out_attrs, None) + + if not status: + raise MacOSXSynthError.from_nserror(error) + + # Clean up after ourselves by removing the original AIFF file. + os.remove(prefix + '.aiff') + + AppKit.NSApp().terminate_(self) + + app = AppKit.NSApplication.sharedApplication() + delegate = SynthDelegate.alloc().init() + app.setDelegate_(delegate) + installMachInterrupt() + app.run()