Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

android synthesis enhancements #114

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 63 additions & 2 deletions jni/src/main/java/com/samtupy/nvgt/TTS.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
import java.util.Set;
import java.util.stream.Collectors;
import org.libsdl.app.SDL;
import android.os.storage.StorageManager;
import android.os.ProxyFileDescriptorCallback;
import android.os.Handler;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.nio.file.Path;
import android.os.ParcelFileDescriptor;
import java.io.IOException;

public class TTS {
// First the static screen reader methods.
Expand Down Expand Up @@ -161,8 +169,7 @@ public void setVolume(float volume) {
ttsVolume = volume;
}

@Override
public void finalize() {
public void shutdown() {
if (isActive()) {
tts.shutdown();
}
Expand Down Expand Up @@ -205,4 +212,58 @@ public float getVolume() {
public float getPan() {
return ttsPan;
}

public boolean speakToFile(String filename, String text) throws IOException {
Path p = Path.of(filename);
Path actual_path;
if (p.isAbsolute()) {
actual_path = p;
} else {
actual_path = Path.of(SDL.getContext().getFilesDir().getAbsolutePath(), filename);
}
File f = actual_path.toFile();
f.createNewFile();
if (!f.canWrite()) return false;
Bundle params = new Bundle();
params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, ttsVolume);
params.putFloat(TextToSpeech.Engine.KEY_PARAM_PAN, ttsPan);
if (text.length() > tts.getMaxSpeechInputLength()) {
return false;
}
return tts.synthesizeToFile(text, params, f, null) == TextToSpeech.SUCCESS;
}

public byte[] speakToMemory(String text) throws IOException {
// It is much simpler to use the storage manager interface instead of memfd_create here
// To do: investigate whether it would be worth it creating a separate looper so we do not potentially risk blocking the main one
ByteArrayOutputStream stream = new ByteArrayOutputStream();
Handler handler = Handler.createAsync(SDL.getContext().getMainLooper());
Context context = SDL.getContext();
StorageManager manager = (StorageManager)context.getSystemService(context.STORAGE_SERVICE);
ParcelFileDescriptor pfd = manager.openProxyFileDescriptor(ParcelFileDescriptor.MODE_WRITE_ONLY, new ProxyFileDescriptorCallback() {
public void onFsync () { }

public long onGetSize () {
return stream.size();
}

public int onRead (long offset, int size, byte[] data) {
return 0;
}

public void onRelease () { }

public int onWrite (long offset, int size, byte[] data) {
stream.writeBytes(data);
return size;
}
}, handler);
Bundle params = new Bundle();
params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, ttsVolume);
params.putFloat(TextToSpeech.Engine.KEY_PARAM_PAN, ttsPan);
if (tts.synthesizeToFile(text, params, pfd, null) == TextToSpeech.SUCCESS) {
return stream.toByteArray();
}
return null;
}
}
60 changes: 57 additions & 3 deletions src/tts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <Poco/Exception.h>
#include <Poco/Format.h>
#include <SDL3/SDL.h>
#include <stdexcept>
#endif

char* minitrim(char* data, unsigned long* bufsize, int bitrate, int channels) {
Expand Down Expand Up @@ -66,7 +67,12 @@ tts_voice::tts_voice(const std::string& builtin_voice_name) {
audioout = 0;
}
tts_voice::~tts_voice() {
//destroy();
#ifdef __ANDROID__
// Manually clean up here
if (env != NULL && TTSObj != NULL && midShutdown != NULL) {
env->CallVoidMethod(TTSObj, midShutdown);
}
#endif
}
void tts_voice::setup() {
#ifdef _WIN32
Expand Down Expand Up @@ -106,7 +112,10 @@ void tts_voice::setup() {
midGetPitch = env->GetMethodID(TTSClass, "getPitch", "()F");
midGetPan = env->GetMethodID(TTSClass, "getPan", "()F");
midGetVolume = env->GetMethodID(TTSClass, "getVolume", "()F");
if (!midIsActive || !midIsSpeaking || !midSpeak || !midSilence || !midGetVoice || !midSetRate || !midSetPitch || !midSetPan || !midSetVolume || !midGetVoices || !midSetVoice || !midGetMaxSpeechInputLength || !midGetPitch || !midGetPan || !midGetRate || !midGetVolume) throw Poco::Exception("One or more methods on the TTS class could not be retrieved from JNI!");
midSpeakToFile = env->GetMethodID(TTSClass, "speakToFile", "(Ljava/lang/String;Ljava/lang/String)Z");
midSpeakToMemory = env->GetMethodID(TTSClass, "speakToMemory", "(Ljava/lang/String;)[B");
midShutdown = env->GetMethodID(TTSClass, "shutdown", "()V");
if (!midIsActive || !midIsSpeaking || !midSpeak || !midSilence || !midGetVoice || !midSetRate || !midSetPitch || !midSetPan || !midSetVolume || !midGetVoices || !midSetVoice || !midGetMaxSpeechInputLength || !midGetPitch || !midGetPan || !midGetRate || !midGetVolume || !midSpeakToFile || !midSpeakToMemory || !midShutdown) throw Poco::Exception("One or more methods on the TTS class could not be retrieved from JNI!");
if (!env->CallBooleanMethod(TTSObj, midIsActive)) throw Poco::Exception("TTS engine could not be initialized!");
voice_index = 1;
#else
Expand Down Expand Up @@ -237,7 +246,26 @@ bool tts_voice::speak_to_file(const std::string& filename, const std::string& te
}
#elif defined(__ANDROID__)
else {
return false;
jstring jtext = env->NewStringUTF(text.c_str());
jstring jfile = env->NewStringUTF(filename.c_str());
auto res = env->CallBooleanMethod(TTSObj, midSpeakToFile, jfile, jtext);
if (auto exc = env->ExceptionOccurred(); exc) {
// Translate into C++ exception
jclass ThrowableClass = env->FindClass("java/lang/Throwable");
if (!ThrowableClass) throw Poco::Exception("This JVM implementation is broken: could not translate Java exception!");
jmethodID midGetMessage = env->GetMethodID(ThrowableClass, "getMessage", "()Ljava/lang/String;");
if (!midGetMessage) throw Poco::Exception("This JVM implementation is broken: Throwable does not have getMessage method!");
jstring message = static_cast<jstring>(env->CallObjectMethod(exc, midGetMessage));
const char* msg_chars = env->GetStringUTFChars(message, 0);
std::string msg(msg_chars);
env->ReleaseStringUTFChars(message, msg_chars);
env->DeleteLocalRef(jfile);
env->DeleteLocalRef(jtext);
throw std::runtime_error(msg);
}
env->DeleteLocalRef(jfile);
env->DeleteLocalRef(jtext);
return res;
}
#endif
char* ptr = minitrim(data, &bufsize, bitrate, channels);
Expand All @@ -254,6 +282,7 @@ std::string tts_voice::speak_to_memory(const std::string& text) {
if (text.empty()) return "";
unsigned long bufsize;
char* data;
std::string data_res;
if (voice_index == builtin_index) {
if (samprate != 48000 || bitrate != 16 || channels != 2) {
samprate = 48000;
Expand Down Expand Up @@ -284,6 +313,31 @@ std::string tts_voice::speak_to_memory(const std::string& text) {
else {
return ""; // Not implemented yet.
}
#elif defined(__ANDROID__)
else {
jstring jtext = env->NewStringUTF(text.c_str());
jarray jbytes = env->CallObjectMethod(TTSObj, midSpeakToMemory, jtext);
if (auto exc = env->ExceptionOccurred(); exc) {
// Translate into C++ exception
jclass ThrowableClass = env->FindClass("java/lang/Throwable");
if (!ThrowableClass) throw Poco::Exception("This JVM implementation is broken: could not translate Java exception!");
jmethodID midGetMessage = env->GetMethodID(ThrowableClass, "getMessage", "()Ljava/lang/String;");
if (!midGetMessage) throw Poco::exception("This JVM implementation is broken: Throwable does not have getMessage method!");
jstring message = env->CallObjectMethod(exc, midGetMessage);
const char* msg_chars = env->GetStringUTFChars(message, 0);
std::string msg(msg_chars);
env->ReleaseStringUTFChars(message, msg_chars);
throw std::runtime_error(msg);
}
env->DeleteLocalRef(jtext);
data_res.reserve(env->GetArrayLength(jbytes));
jbyte* bytes = env->GetByteArrayElements(jbytes, NULL);
for (auto i = 0; i < env->GetArrayLength(jbytes); ++i) {
data_res.append(static_cast<char>(bytes[i]));
}
env->ReleaseByteArrayElements(jbytes, bytes, JNI_ABORT);
data = data_res.c_str();
}
#endif
if (!data) return "";
char* ptr = minitrim(data, &bufsize, bitrate, channels);
Expand Down
2 changes: 1 addition & 1 deletion src/tts.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class tts_voice {
AVTTSVoice* inst;
#elifdef __ANDROID__
jclass TTSClass;
jmethodID constructor, midIsActive, midIsSpeaking, midSpeak, midSilence, midGetVoice, midSetRate, midSetPitch, midSetPan, midSetVolume, midGetVoices, midSetVoice, midGetMaxSpeechInputLength, midGetPitch, midGetPan, midGetRate, midGetVolume;
jmethodID constructor, midIsActive, midIsSpeaking, midSpeak, midSilence, midGetVoice, midSetRate, midSetPitch, midSetPan, midSetVolume, midGetVoices, midSetVoice, midGetMaxSpeechInputLength, midGetPitch, midGetPan, midGetRate, midGetVolume, midSpeakToFile, midSpeakToMemory, midShutdown;
JNIEnv* env;
jobject TTSObj;
#endif
Expand Down