> For the complete documentation index, see [llms.txt](https://textav.gitbook.io/firebase-react-notes/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://textav.gitbook.io/firebase-react-notes/stt/stt-+-cloud-function-+-cloud-task/createtranscript.md).

# \`createTranscript\`

```javascript
const functions = require('firebase-functions');
const admin = require('firebase-admin');
const path = require('path');
const os = require('os');
const fs = require('fs');
const ffmpeg = require('fluent-ffmpeg');


const AUDIO_EXTENSION = 'ogg';
const SAMPLE_RATE_HERTZ = 16000; // or 48000
const MAX_RUNTIME_OPTS = {
  timeoutSeconds: 540, // 9 minutes
  memory: '2GB',
};


const addMinutes = (date, minutes) => {
  return new Date(date.getTime() + minutes * 60000);
};

const getSecondsSinceEpoch = date => {
  return Math.round(date.getTime() / 1000);
};

/**
* Helper funciton to conver video and audio media to audio that meets GCP STT specs 
*/
const convertToAudio = async (admin, storageRef, downloadURLLink, AUDIO_EXTENSION, SAMPLE_RATE_HERTZ) => {
  return new Promise(async (resolve, reject) => {
    // Get the file name.
    const fileName = path.basename(storageRef);
    const filePath = storageRef;
    // Exit if the audio is already converted.
    if (fileName.endsWith(`_output.${AUDIO_EXTENSION}`)) {
      console.error('Already a converted audio.');
      // return null;
      reject(new Error());
    }

    const bucket = admin.storage().bucket();
    // We add a '_output.flac' suffix to target audio file name. That's where we'll upload the converted audio.
    const targetTempFileName = fileName.replace(/\.[^/.]+$/, '') + `_output.${AUDIO_EXTENSION}`;
    const targetTempFilePath = path.join(os.tmpdir(), targetTempFileName);
    const targetStorageFilePath = path.join(path.dirname(filePath), targetTempFileName);

    ffmpeg(downloadURLLink)
      .noVideo()
      .audioChannels(1)
      .audioFrequency(SAMPLE_RATE_HERTZ)
      .audioCodec('libopus')
      .output(targetTempFilePath)
      .on('end', async () => {
        // Uploading the audio Google Cloud Storage
        await bucket.upload(targetTempFilePath, {
          destination: targetStorageFilePath,
          // without resumable false, this seems to fail
          resumable: false,
        });
        // Once the audio has been uploaded delete the local file to free up disk space.
        fs.unlinkSync(targetTempFilePath);
        resolve(targetStorageFilePath);
      })
      .on('error', err => {
        reject(err);
      })
      .run();
  });
};


// TODO: Google cloud function triggers Goolge Cloud task
//  Goolge Cloud task calls cloud function that calls STT SDK
// this function returns null.
// timeout 1min to 9min https://cloud.google.com/functions/docs/concepts/exec#timeout
// https://firebase.google.com/docs/functions/firestore-events
exports.createTranscript = functions
  .runWith(MAX_RUNTIME_OPTS)
  .firestore.document('projects/{projectId}/transcripts/{transcriptId}')
  .onCreate(async (change, context) => {
    // Get an object representing the document
    const newValue = change.data();
    // access a particular field as you would any JS property
    let storageRef = newValue.storageRefName;
    const downloadURLLink = newValue.downloadURL;
    // https://firebase.google.com/docs/storage/admin/start
    const storage = admin.storage();
    // https://github.com/firebase/firebase-tools/issues/1573#issuecomment-517000981
    const bucket = storage.appInternal.options.storageBucket;
  
    // Convert video or audio to audio that meets GCP STT Specs
    const audioForSttRef = await convertToAudio(admin, storageRef, downloadURLLink, AUDIO_EXTENSION, SAMPLE_RATE_HERTZ);
    // save audio ref to firestore
    change.ref.set(
      {
        audioUrl: audioForSttRef,
      },
      {
        merge: true,
      }
    );
  
    // STT
    const client = new speech.SpeechClient();
    const request = {
      config: {
        // https://cloud.google.com/speech-to-text/docs/async-time-offsets
        enableWordTimeOffsets: true,
        // https://cloud.google.com/speech-to-text/docs/automatic-punctuation
        enableAutomaticPunctuation: true,
        // https://cloud.google.com/speech-to-text/docs/multiple-voices
        enableSpeakerDiarization: true,
        diarizationConfig: {
          enableSpeakerDiarization: true,
          //  If not set, the default value is 2.
          // minSpeakerCount: 2,
          //  If not set, the default value is 6.
          // maxSpeakerCount: 3,
        },
        encoding: 'OGG_OPUS',
        // in RecognitionConfig must either be unspecified or match the value in the FLAC header `16000`;
        sampleRateHertz: Number(SAMPLE_RATE_HERTZ).toString(),
        languageCode: 'en-US',
        // https://cloud.google.com/speech-to-text/docs/multiple-languages
        // alternativeLanguageCodes: ['es-ES', 'en-US'],
        // https://cloud.google.com/speech-to-text/docs/reference/rest/v1p1beta1/RecognitionConfig
        // model: 'video'
      },
      audio: {
        uri: `gs://${bucket}/${audioForSttRef}`,
      },
    };
  
    // This creates a recognition job that you can wait for now, or get its result later.
    // initialApiResponse.name is the operation name/"id"
    // initialApiResponse.done is the status of the operation
    const [operation, initialApiResponse] = await client.longRunningRecognize(request);
    console.log('initialApiResponse', initialApiResponse.name);
  
    const sttOperationName = initialApiResponse.name;
    const sttOperationStatus = initialApiResponse.done;
  
    // TODO: I don't think the first response will have just have the results as is?
    if (sttOperationStatus && initialApiResponse.response && initialApiResponse.response.results) {
      //  const [response] = await operation.promise();
      const transcript = gcpToDpe(initialApiResponse.response);
      const { paragraphs, words } = transcript;
      return change.ref.set(
        {
          paragraphs,
          words,
          status: 'done',
        },
        {
          merge: true,
        }
      );
    } else {
      // Start a cloud task that triggers cloud function to check progress of GCP STT operation at latest stage
      const project = admin.instanceId().app.options.projectId;
      // https://firebase.google.com/docs/functions/locations
      const location = 'us-central1';
      const queue = 'firestore-stt';
  
      const tasksClient = new CloudTasksClient();
      const queuePath = tasksClient.queuePath(project, location, queue);
  
      const url = `https://${location}-${project}.cloudfunctions.net/firestoreCheckSTT`;
      console.log('url firestoreCheckSTT', url);
      const docPath = change.ref.path;
  
      const payload = { sttOperationName, docPath };
  
      // time of expiration expressed in epoch seconds
      const now = new Date();
      const timeFromNowWhenToCheckAgainInMinutes = 5;
      const timeFromNowWhenToCheckAgainAsDate = addMinutes(now, timeFromNowWhenToCheckAgainInMinutes);
      // Epoch, also known as Unix timestamps, is the number of seconds (not milliseconds!) that have elapsed since January 1, 1970 at 00:00:00 GMT
      const secondsSinceEpoch = getSecondsSinceEpoch(timeFromNowWhenToCheckAgainAsDate);
  
      // For troubleshooting
      change.ref.set(
        {
          sttOperationName,
          nextSttProgressCheckAt: timeFromNowWhenToCheckAgainAsDate,
        },
        {
          merge: true,
        }
      );
  
      const task = {
        httpRequest: {
          httpMethod: 'POST',
          url,
          body: Buffer.from(JSON.stringify(payload)).toString('base64'),
          headers: {
            'Content-Type': 'application/json',
          },
        },
        scheduleTime: {
          seconds: secondsSinceEpoch,
        },
      };
      const [response] = await tasksClient.createTask({ parent: queuePath, task });
      console.log(`Created task ${response.name}`);
      return null;
    }
  });
```


---

# Agent Instructions
This documentation is published with GitBook. GitBook is the documentation platform designed so that both humans and AI agents can read, navigate, and reason over technical content effectively. Learn more at gitbook.com.

## Querying This Documentation
If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter, and the optional `goal` query parameter:

```
GET https://textav.gitbook.io/firebase-react-notes/stt/stt-+-cloud-function-+-cloud-task/createtranscript.md?ask=<question>&goal=<endgoal>
```

`ask` is the immediate question: it should be specific, self-contained, and written in natural language.
`goal` is optional and describes the broader end goal you are ultimately trying to accomplish on behalf of the user. GitBook uses it to tailor the answer towards what is most useful for that goal.

The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
