Source code
Revision control
Copy as Markdown
Other Tools
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
#include "SpeechDispatcherService.h"
#include "mozilla/dom/nsSpeechTask.h"
#include "mozilla/dom/nsSynthVoiceRegistry.h"
#include "mozilla/ClearOnShutdown.h"
#include "mozilla/Preferences.h"
#include "mozilla/StaticPrefs_media.h"
#include "nsEscape.h"
#include "nsISupports.h"
#include "nsPrintfCString.h"
#include "nsReadableUtils.h"
#include "nsServiceManagerUtils.h"
#include "nsThreadUtils.h"
#include "nsXULAppAPI.h"
#include "prlink.h"
#include <math.h>
#include <stdlib.h>
#define URI_PREFIX "urn:moz-tts:speechd:"
#define MAX_RATE static_cast<float>(2.5)
#define MIN_RATE static_cast<float>(0.5)
// Some structures for libspeechd
typedef enum {
SPD_EVENT_BEGIN,
SPD_EVENT_END,
SPD_EVENT_INDEX_MARK,
SPD_EVENT_CANCEL,
SPD_EVENT_PAUSE,
SPD_EVENT_RESUME
} SPDNotificationType;
typedef enum {
SPD_BEGIN = 1,
SPD_END = 2,
SPD_INDEX_MARKS = 4,
SPD_CANCEL = 8,
SPD_PAUSE = 16,
SPD_RESUME = 32,
SPD_ALL = 0x3f
} SPDNotification;
typedef enum { SPD_MODE_SINGLE = 0, SPD_MODE_THREADED = 1 } SPDConnectionMode;
typedef void (*SPDCallback)(size_t msg_id, size_t client_id,
SPDNotificationType state);
typedef void (*SPDCallbackIM)(size_t msg_id, size_t client_id,
SPDNotificationType state, char* index_mark);
struct SPDConnection {
SPDCallback callback_begin;
SPDCallback callback_end;
SPDCallback callback_cancel;
SPDCallback callback_pause;
SPDCallback callback_resume;
SPDCallbackIM callback_im;
/* partial, more private fields in structure */
};
struct SPDVoice {
char* name;
char* language;
char* variant;
};
typedef enum {
SPD_IMPORTANT = 1,
SPD_MESSAGE = 2,
SPD_TEXT = 3,
SPD_NOTIFICATION = 4,
SPD_PROGRESS = 5
} SPDPriority;
#define SPEECHD_FUNCTIONS \
FUNC(spd_open, SPDConnection*, \
(const char*, const char*, const char*, SPDConnectionMode)) \
FUNC(spd_close, void, (SPDConnection*)) \
FUNC(spd_list_synthesis_voices, SPDVoice**, (SPDConnection*)) \
FUNC(spd_say, int, (SPDConnection*, SPDPriority, const char*)) \
FUNC(spd_cancel, int, (SPDConnection*)) \
FUNC(spd_set_volume, int, (SPDConnection*, int)) \
FUNC(spd_set_voice_rate, int, (SPDConnection*, int)) \
FUNC(spd_set_voice_pitch, int, (SPDConnection*, int)) \
FUNC(spd_set_synthesis_voice, int, (SPDConnection*, const char*)) \
FUNC(spd_set_notification_on, int, (SPDConnection*, SPDNotification))
#define FUNC(name, type, params) \
typedef type(*_##name##_fn) params; \
static _##name##_fn _##name;
SPEECHD_FUNCTIONS
#undef FUNC
#define spd_open _spd_open
#define spd_close _spd_close
#define spd_list_synthesis_voices _spd_list_synthesis_voices
#define spd_say _spd_say
#define spd_cancel _spd_cancel
#define spd_set_volume _spd_set_volume
#define spd_set_voice_rate _spd_set_voice_rate
#define spd_set_voice_pitch _spd_set_voice_pitch
#define spd_set_synthesis_voice _spd_set_synthesis_voice
#define spd_set_notification_on _spd_set_notification_on
static PRLibrary* speechdLib = nullptr;
typedef void (*nsSpeechDispatcherFunc)();
struct nsSpeechDispatcherDynamicFunction {
const char* functionName;
nsSpeechDispatcherFunc* function;
};
namespace mozilla::dom {
StaticRefPtr<SpeechDispatcherService> SpeechDispatcherService::sSingleton;
class SpeechDispatcherVoice {
public:
SpeechDispatcherVoice(const nsAString& aName, const nsAString& aLanguage)
: mName(aName), mLanguage(aLanguage) {}
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SpeechDispatcherVoice)
// Voice name
nsString mName;
// Voice language, in BCP-47 syntax
nsString mLanguage;
private:
~SpeechDispatcherVoice() = default;
};
class SpeechDispatcherCallback final : public nsISpeechTaskCallback {
public:
SpeechDispatcherCallback(nsISpeechTask* aTask,
SpeechDispatcherService* aService)
: mTask(aTask), mService(aService) {}
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechDispatcherCallback,
nsISpeechTaskCallback)
NS_DECL_NSISPEECHTASKCALLBACK
bool OnSpeechEvent(SPDNotificationType state);
private:
~SpeechDispatcherCallback() = default;
// This pointer is used to dispatch events
nsCOMPtr<nsISpeechTask> mTask;
// By holding a strong reference to the service we guarantee that it won't be
// destroyed before this runnable.
RefPtr<SpeechDispatcherService> mService;
TimeStamp mStartTime;
};
NS_IMPL_CYCLE_COLLECTION(SpeechDispatcherCallback, mTask);
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechDispatcherCallback)
NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechDispatcherCallback)
NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechDispatcherCallback)
NS_IMETHODIMP
SpeechDispatcherCallback::OnPause() {
// XXX: Speech dispatcher does not pause immediately, but waits for the speech
// to reach an index mark so that it could resume from that offset.
// There is no support for word or sentence boundaries, so index marks would
// only occur in explicit SSML marks, and we don't support that yet.
// What in actuality happens, is that if you call spd_pause(), it will speak
// the utterance in its entirety, dispatch an end event, and then put speechd
// in a 'paused' state. Since it is after the utterance ended, we don't get
// that state change, and our speech api is in an unrecoverable state.
// So, since it is useless anyway, I am not implementing pause.
return NS_OK;
}
NS_IMETHODIMP
SpeechDispatcherCallback::OnResume() {
// XXX: Unsupported, see OnPause().
return NS_OK;
}
NS_IMETHODIMP
SpeechDispatcherCallback::OnCancel() {
if (spd_cancel(mService->mSpeechdClient) < 0) {
return NS_ERROR_FAILURE;
}
return NS_OK;
}
NS_IMETHODIMP
SpeechDispatcherCallback::OnVolumeChanged(float aVolume) {
// XXX: This currently does not change the volume mid-utterance, but it
// doesn't do anything bad either. So we could put this here with the hopes
// that speechd supports this in the future.
if (spd_set_volume(mService->mSpeechdClient,
static_cast<int>(aVolume * 100)) < 0) {
return NS_ERROR_FAILURE;
}
return NS_OK;
}
bool SpeechDispatcherCallback::OnSpeechEvent(SPDNotificationType state) {
bool remove = false;
switch (state) {
case SPD_EVENT_BEGIN:
mStartTime = TimeStamp::Now();
mTask->DispatchStart();
break;
case SPD_EVENT_PAUSE:
mTask->DispatchPause((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
break;
case SPD_EVENT_RESUME:
mTask->DispatchResume((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
break;
case SPD_EVENT_CANCEL:
case SPD_EVENT_END:
mTask->DispatchEnd((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
remove = true;
break;
case SPD_EVENT_INDEX_MARK:
// Not yet supported
break;
default:
break;
}
return remove;
}
static void speechd_cb(size_t msg_id, size_t client_id,
SPDNotificationType state) {
SpeechDispatcherService* service =
SpeechDispatcherService::GetInstance(false);
if (service) {
NS_DispatchToMainThread(NewRunnableMethod<uint32_t, SPDNotificationType>(
"dom::SpeechDispatcherService::EventNotify", service,
&SpeechDispatcherService::EventNotify, static_cast<uint32_t>(msg_id),
state));
}
}
NS_INTERFACE_MAP_BEGIN(SpeechDispatcherService)
NS_INTERFACE_MAP_ENTRY(nsISpeechService)
NS_INTERFACE_MAP_ENTRY(nsIObserver)
NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver)
NS_INTERFACE_MAP_END
NS_IMPL_ADDREF(SpeechDispatcherService)
NS_IMPL_RELEASE(SpeechDispatcherService)
SpeechDispatcherService::SpeechDispatcherService()
: mInitialized(false), mSpeechdClient(nullptr) {}
void SpeechDispatcherService::Init() {
if (!StaticPrefs::media_webspeech_synth_enabled() ||
Preferences::GetBool("media.webspeech.synth.test")) {
return;
}
// While speech dispatcher has a "threaded" mode, only spd_say() is async.
// Since synchronous socket i/o could impact startup time, we do
// initialization in a separate thread.
DebugOnly<nsresult> rv =
NS_NewNamedThread("speechd init", getter_AddRefs(mInitThread));
MOZ_ASSERT(NS_SUCCEEDED(rv));
rv = mInitThread->Dispatch(
NewRunnableMethod("dom::SpeechDispatcherService::Setup", this,
&SpeechDispatcherService::Setup),
NS_DISPATCH_NORMAL);
MOZ_ASSERT(NS_SUCCEEDED(rv));
}
SpeechDispatcherService::~SpeechDispatcherService() {
if (mInitThread) {
mInitThread->Shutdown();
}
if (mSpeechdClient) {
spd_close(mSpeechdClient);
}
}
void SpeechDispatcherService::Setup() {
#define FUNC(name, type, params) {#name, (nsSpeechDispatcherFunc*)&_##name},
static const nsSpeechDispatcherDynamicFunction kSpeechDispatcherSymbols[] = {
SPEECHD_FUNCTIONS};
#undef FUNC
MOZ_ASSERT(!mInitialized);
speechdLib = PR_LoadLibrary("libspeechd.so.2");
if (!speechdLib) {
NS_WARNING("Failed to load speechd library");
NotifyError(u"lib-missing"_ns);
return;
}
if (!PR_FindFunctionSymbol(speechdLib, "spd_get_volume")) {
// There is no version getter function, so we rely on a symbol that was
// introduced in release 0.8.2 in order to check for ABI compatibility.
NS_WARNING("Unsupported version of speechd detected");
NotifyError(u"lib-too-old"_ns);
return;
}
for (uint32_t i = 0; i < std::size(kSpeechDispatcherSymbols); i++) {
*kSpeechDispatcherSymbols[i].function = PR_FindFunctionSymbol(
speechdLib, kSpeechDispatcherSymbols[i].functionName);
if (!*kSpeechDispatcherSymbols[i].function) {
NS_WARNING(nsPrintfCString("Failed to find speechd symbol for'%s'",
kSpeechDispatcherSymbols[i].functionName)
.get());
NotifyError(u"missing-symbol"_ns);
return;
}
}
mSpeechdClient =
spd_open("firefox", "web speech api", "who", SPD_MODE_THREADED);
if (!mSpeechdClient) {
NS_WARNING("Failed to call spd_open");
NotifyError(u"open-fail"_ns);
return;
}
// Get all the voices from sapi and register in the SynthVoiceRegistry
SPDVoice** list = spd_list_synthesis_voices(mSpeechdClient);
mSpeechdClient->callback_begin = speechd_cb;
mSpeechdClient->callback_end = speechd_cb;
mSpeechdClient->callback_cancel = speechd_cb;
mSpeechdClient->callback_pause = speechd_cb;
mSpeechdClient->callback_resume = speechd_cb;
spd_set_notification_on(mSpeechdClient, SPD_BEGIN);
spd_set_notification_on(mSpeechdClient, SPD_END);
spd_set_notification_on(mSpeechdClient, SPD_CANCEL);
if (list != NULL) {
for (int i = 0; list[i]; i++) {
nsAutoString uri;
uri.AssignLiteral(URI_PREFIX);
nsAutoCString name;
NS_EscapeURL(list[i]->name, -1,
esc_OnlyNonASCII | esc_Spaces | esc_AlwaysCopy, name);
uri.Append(NS_ConvertUTF8toUTF16(name));
uri.AppendLiteral("?");
nsAutoCString lang(list[i]->language);
uri.Append(NS_ConvertUTF8toUTF16(lang));
mVoices.InsertOrUpdate(uri, MakeRefPtr<SpeechDispatcherVoice>(
NS_ConvertUTF8toUTF16(list[i]->name),
NS_ConvertUTF8toUTF16(lang)));
}
}
if (mVoices.Count() == 0) {
NotifyError(u"no-voices"_ns);
}
NS_DispatchToMainThread(
NewRunnableMethod("dom::SpeechDispatcherService::RegisterVoices", this,
&SpeechDispatcherService::RegisterVoices));
// mInitialized = true;
}
// private methods
void SpeechDispatcherService::NotifyError(const nsString& aError) {
if (!NS_IsMainThread()) {
NS_DispatchToMainThread(NewRunnableMethod<const nsString>(
"dom::SpeechDispatcherService::NotifyError", this,
&SpeechDispatcherService::NotifyError, aError));
return;
}
RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance();
DebugOnly<nsresult> rv = registry->NotifyVoicesError(aError);
}
void SpeechDispatcherService::RegisterVoices() {
RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance();
for (const auto& entry : mVoices) {
const RefPtr<SpeechDispatcherVoice>& voice = entry.GetData();
// This service can only speak one utterance at a time, so we set
// aQueuesUtterances to true in order to track global state and schedule
// access to this service.
DebugOnly<nsresult> rv =
registry->AddVoice(this, entry.GetKey(), voice->mName, voice->mLanguage,
voice->mName.EqualsLiteral("default"), true);
NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice");
}
mInitThread->Shutdown();
mInitThread = nullptr;
mInitialized = true;
registry->NotifyVoicesChanged();
}
// nsIObserver
NS_IMETHODIMP
SpeechDispatcherService::Observe(nsISupports* aSubject, const char* aTopic,
const char16_t* aData) {
return NS_OK;
}
// nsISpeechService
// TODO: Support SSML
NS_IMETHODIMP
SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri,
float aVolume, float aRate, float aPitch,
nsISpeechTask* aTask) {
if (NS_WARN_IF(!mInitialized)) {
return NS_ERROR_NOT_AVAILABLE;
}
RefPtr<SpeechDispatcherCallback> callback =
new SpeechDispatcherCallback(aTask, this);
bool found = false;
SpeechDispatcherVoice* voice = mVoices.GetWeak(aUri, &found);
if (NS_WARN_IF(!(found))) {
return NS_ERROR_NOT_AVAILABLE;
}
spd_set_synthesis_voice(mSpeechdClient,
NS_ConvertUTF16toUTF8(voice->mName).get());
// We provide a volume of 0.0 to 1.0, speech-dispatcher expects 0 - 100.
spd_set_volume(mSpeechdClient, static_cast<int>(aVolume * 100));
// aRate is a value of 0.1 (0.1x) to 10 (10x) with 1 (1x) being normal rate.
// speechd expects -100 to 100 with 0 being normal rate.
float rate = 0;
if (aRate > 1) {
// Each step to 100 is logarithmically distributed up to 2.5x.
rate = log10(std::min(aRate, MAX_RATE)) / log10(MAX_RATE) * 100;
} else if (aRate < 1) {
// Each step to -100 is logarithmically distributed down to 0.5x.
rate = log10(std::max(aRate, MIN_RATE)) / log10(MIN_RATE) * -100;
}
spd_set_voice_rate(mSpeechdClient, static_cast<int>(rate));
// We provide a pitch of 0 to 2 with 1 being the default.
// speech-dispatcher expects -100 to 100 with 0 being default.
spd_set_voice_pitch(mSpeechdClient, static_cast<int>((aPitch - 1) * 100));
nsresult rv = aTask->Setup(callback);
if (NS_FAILED(rv)) {
return rv;
}
if (aText.Length()) {
int msg_id = spd_say(mSpeechdClient, SPD_MESSAGE,
NS_ConvertUTF16toUTF8(aText).get());
if (msg_id < 0) {
return NS_ERROR_FAILURE;
}
mCallbacks.InsertOrUpdate(msg_id, std::move(callback));
} else {
// Speech dispatcher does not work well with empty strings.
// In that case, don't send empty string to speechd,
// and just emulate a speechd start and end event.
NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>(
"dom::SpeechDispatcherCallback::OnSpeechEvent", callback,
&SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_BEGIN));
NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>(
"dom::SpeechDispatcherCallback::OnSpeechEvent", callback,
&SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_END));
}
return NS_OK;
}
SpeechDispatcherService* SpeechDispatcherService::GetInstance(bool create) {
if (XRE_GetProcessType() != GeckoProcessType_Default) {
MOZ_ASSERT(
false,
"SpeechDispatcherService can only be started on main gecko process");
return nullptr;
}
if (!sSingleton && create) {
sSingleton = new SpeechDispatcherService();
sSingleton->Init();
ClearOnShutdown(&sSingleton);
}
return sSingleton;
}
already_AddRefed<SpeechDispatcherService>
SpeechDispatcherService::GetInstanceForService() {
MOZ_ASSERT(NS_IsMainThread());
RefPtr<SpeechDispatcherService> sapiService = GetInstance();
return sapiService.forget();
}
void SpeechDispatcherService::EventNotify(uint32_t aMsgId, uint32_t aState) {
SpeechDispatcherCallback* callback = mCallbacks.GetWeak(aMsgId);
if (callback) {
if (callback->OnSpeechEvent((SPDNotificationType)aState)) {
mCallbacks.Remove(aMsgId);
}
}
}
} // namespace mozilla::dom