-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtranscriber.js
95 lines (86 loc) · 2.8 KB
/
transcriber.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
const { EndBehaviorType, VoiceReceiver } = require('@discordjs/voice');
const { pipeline, Readable } = require("stream");
const witClient = require('node-witai-speech');
const prism = require("prism-media");
const util = require('util');
const fs = require("fs");
class Transcriber {
constructor(apiKey) {
this.WITAPIKEY = apiKey;
this.witAI_lastcallTS = null;
return this;
}
sleep(ms) {
return new Promise((resolve) => {
setTimeout(resolve, ms);
});
}
async convert_audio(input) {
try {
// stereo to mono channel
const data = new Int16Array(input)
const ndata = new Int16Array(data.length/2)
for (let i = 0, j = 0; i < data.length; i+=4) {
ndata[j++] = data[i]
ndata[j++] = data[i+1]
}
return Buffer.from(ndata);
} catch (e) {
console.log('convert_audio: ', e)
throw e;
}
}
async transcribe(buffer, raw) {
try {
if (this.witAI_lastcallTS != null) {
let now = Math.floor(new Date());
while (now - this.witAI_lastcallTS < 1000) {
await this.sleep(100);
now = Math.floor(new Date());
}
}
const extractSpeechIntent = util.promisify(witClient.extractSpeechIntent);
var stream = Readable.from(buffer);
const contenttype = "audio/raw;encoding=signed-integer;bits=16;rate=48k;endian=little"
var output = await extractSpeechIntent(this.WITAPIKEY, stream, contenttype);
this.witAI_lastcallTS = Math.floor(new Date());
if (raw) return output;
if (typeof output == "object") return output;
output = output.split("\n").map((item) => item.trim()).join("");
let idx = output.lastIndexOf("}{");
let idx0 = output.lastIndexOf("}");
output = JSON.parse(output.substring(idx + 1, idx0 + 1).trim().replace(/\n/g, "").trim());
output.text = output.text.replace(/\./g, "");
stream.destroy();
return output;
} catch(e) {
console.log("Transcriber-error: ", e);
return {}
}
}
listen(receiver, userId, user) {
return new Promise(async (res, rej) => {
const stream = receiver.subscribe(userId, {
end: {
behavior: EndBehaviorType.AfterSilence,
duration: 300,
}
});
const decoder = new prism.opus.Decoder({ frameSize: 960, channels: 2, rate: 48000 });
stream.pipe(decoder);
let buffer = [];
decoder.on("data", (data) => {
buffer.push(data);
});
decoder.on("end", async () => {
buffer = Buffer.concat(buffer);
const duration = buffer.length / 48000 / 2;
if (duration > 1.0 || duration < 19) {
let transcript = await this.transcribe(await this.convert_audio(buffer));
res({user: user, transcript: transcript });
}
})
});
}
}
module.exports = Transcriber;