`createTranscript`
1
const functions = require('firebase-functions');
2
const admin = require('firebase-admin');
3
const path = require('path');
4
const os = require('os');
5
const fs = require('fs');
6
const ffmpeg = require('fluent-ffmpeg');
7
​
8
​
9
const AUDIO_EXTENSION = 'ogg';
10
const SAMPLE_RATE_HERTZ = 16000; // or 48000
11
const MAX_RUNTIME_OPTS = {
12
timeoutSeconds: 540, // 9 minutes
13
memory: '2GB',
14
};
15
​
16
​
17
const addMinutes = (date, minutes) => {
18
return new Date(date.getTime() + minutes * 60000);
19
};
20
​
21
const getSecondsSinceEpoch = date => {
22
return Math.round(date.getTime() / 1000);
23
};
24
​
25
/**
26
* Helper funciton to conver video and audio media to audio that meets GCP STT specs
27
*/
28
const convertToAudio = async (admin, storageRef, downloadURLLink, AUDIO_EXTENSION, SAMPLE_RATE_HERTZ) => {
29
return new Promise(async (resolve, reject) => {
30
// Get the file name.
31
const fileName = path.basename(storageRef);
32
const filePath = storageRef;
33
// Exit if the audio is already converted.
34
if (fileName.endsWith(`_output.${AUDIO_EXTENSION}`)) {
35
console.error('Already a converted audio.');
36
// return null;
37
reject(new Error());
38
}
39
​
40
const bucket = admin.storage().bucket();
41
// We add a '_output.flac' suffix to target audio file name. That's where we'll upload the converted audio.
42
const targetTempFileName = fileName.replace(/\.[^/.]+$/, '') + `_output.${AUDIO_EXTENSION}`;
43
const targetTempFilePath = path.join(os.tmpdir(), targetTempFileName);
44
const targetStorageFilePath = path.join(path.dirname(filePath), targetTempFileName);
45
​
46
ffmpeg(downloadURLLink)
47
.noVideo()
48
.audioChannels(1)
49
.audioFrequency(SAMPLE_RATE_HERTZ)
50
.audioCodec('libopus')
51
.output(targetTempFilePath)
52
.on('end', async () => {
53
// Uploading the audio Google Cloud Storage
54
await bucket.upload(targetTempFilePath, {
55
destination: targetStorageFilePath,
56
// without resumable false, this seems to fail
57
resumable: false,
58
});
59
// Once the audio has been uploaded delete the local file to free up disk space.
60
fs.unlinkSync(targetTempFilePath);
61
resolve(targetStorageFilePath);
62
})
63
.on('error', err => {
64
reject(err);
65
})
66
.run();
67
});
68
};
69
​
70
​
71
// TODO: Google cloud function triggers Goolge Cloud task
72
// Goolge Cloud task calls cloud function that calls STT SDK
73
// this function returns null.
74
// timeout 1min to 9min https://cloud.google.com/functions/docs/concepts/exec#timeout
75
// https://firebase.google.com/docs/functions/firestore-events
76
exports.createTranscript = functions
77
.runWith(MAX_RUNTIME_OPTS)
78
.firestore.document('projects/{projectId}/transcripts/{transcriptId}')
79
.onCreate(async (change, context) => {
80
// Get an object representing the document
81
const newValue = change.data();
82
// access a particular field as you would any JS property
83
let storageRef = newValue.storageRefName;
84
const downloadURLLink = newValue.downloadURL;
85
// https://firebase.google.com/docs/storage/admin/start
86
const storage = admin.storage();
87
// https://github.com/firebase/firebase-tools/issues/1573#issuecomment-517000981
88
const bucket = storage.appInternal.options.storageBucket;
89
90
// Convert video or audio to audio that meets GCP STT Specs
91
const audioForSttRef = await convertToAudio(admin, storageRef, downloadURLLink, AUDIO_EXTENSION, SAMPLE_RATE_HERTZ);
92
// save audio ref to firestore
93
change.ref.set(
94
{
95
audioUrl: audioForSttRef,
96
},
97
{
98
merge: true,
99
}
100
);
101
102
// STT
103
const client = new speech.SpeechClient();
104
const request = {
105
config: {
106
// https://cloud.google.com/speech-to-text/docs/async-time-offsets
107
enableWordTimeOffsets: true,
108
// https://cloud.google.com/speech-to-text/docs/automatic-punctuation
109
enableAutomaticPunctuation: true,
110
// https://cloud.google.com/speech-to-text/docs/multiple-voices
111
enableSpeakerDiarization: true,
112
diarizationConfig: {
113
enableSpeakerDiarization: true,
114
// If not set, the default value is 2.
115
// minSpeakerCount: 2,
116
// If not set, the default value is 6.
117
// maxSpeakerCount: 3,
118
},
119
encoding: 'OGG_OPUS',
120
// in RecognitionConfig must either be unspecified or match the value in the FLAC header `16000`;
121
sampleRateHertz: Number(SAMPLE_RATE_HERTZ).toString(),
122
languageCode: 'en-US',
123
// https://cloud.google.com/speech-to-text/docs/multiple-languages
124
// alternativeLanguageCodes: ['es-ES', 'en-US'],
125
// https://cloud.google.com/speech-to-text/docs/reference/rest/v1p1beta1/RecognitionConfig
126
// model: 'video'
127
},
128
audio: {
129
uri: `gs://${bucket}/${audioForSttRef}`,
130
},
131
};
132
133
// This creates a recognition job that you can wait for now, or get its result later.
134
// initialApiResponse.name is the operation name/"id"
135
// initialApiResponse.done is the status of the operation
136
const [operation, initialApiResponse] = await client.longRunningRecognize(request);
137
console.log('initialApiResponse', initialApiResponse.name);
138
139
const sttOperationName = initialApiResponse.name;
140
const sttOperationStatus = initialApiResponse.done;
141
142
// TODO: I don't think the first response will have just have the results as is?
143
if (sttOperationStatus && initialApiResponse.response && initialApiResponse.response.results) {
144
// const [response] = await operation.promise();
145
const transcript = gcpToDpe(initialApiResponse.response);
146
const { paragraphs, words } = transcript;
147
return change.ref.set(
148
{
149
paragraphs,
150
words,
151
status: 'done',
152
},
153
{
154
merge: true,
155
}
156
);
157
} else {
158
// Start a cloud task that triggers cloud function to check progress of GCP STT operation at latest stage
159
const project = admin.instanceId().app.options.projectId;
160
// https://firebase.google.com/docs/functions/locations
161
const location = 'us-central1';
162
const queue = 'firestore-stt';
163
164
const tasksClient = new CloudTasksClient();
165
const queuePath = tasksClient.queuePath(project, location, queue);
166
167
const url = `https://${location}-${project}.cloudfunctions.net/firestoreCheckSTT`;
168
console.log('url firestoreCheckSTT', url);
169
const docPath = change.ref.path;
170
171
const payload = { sttOperationName, docPath };
172
173
// time of expiration expressed in epoch seconds
174
const now = new Date();
175
const timeFromNowWhenToCheckAgainInMinutes = 5;
176
const timeFromNowWhenToCheckAgainAsDate = addMinutes(now, timeFromNowWhenToCheckAgainInMinutes);
177
// Epoch, also known as Unix timestamps, is the number of seconds (not milliseconds!) that have elapsed since January 1, 1970 at 00:00:00 GMT
178
const secondsSinceEpoch = getSecondsSinceEpoch(timeFromNowWhenToCheckAgainAsDate);
179
180
// For troubleshooting
181
change.ref.set(
182
{
183
sttOperationName,
184
nextSttProgressCheckAt: timeFromNowWhenToCheckAgainAsDate,
185
},
186
{
187
merge: true,
188
}
189
);
190
191
const task = {
192
httpRequest: {
193
httpMethod: 'POST',
194
url,
195
body: Buffer.from(JSON.stringify(payload)).toString('base64'),
196
headers: {
197
'Content-Type': 'application/json',
198
},
199
},
200
scheduleTime: {
201
seconds: secondsSinceEpoch,
202
},
203
};
204
const [response] = await tasksClient.createTask({ parent: queuePath, task });
205
console.log(`Created task ${response.name}`);
206
return null;
207
}
208
});
Copied!
Copy link