C API Usage example

Creating a model instance and loading model

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
int
main(int argc, char **argv)
{
  if (!ProcessArgs(argc, argv)) {
    return 1;
  }

  // Initialise DeepSpeech
  ModelState* ctx;
  int status = DS_CreateModel(model, alphabet, BEAM_WIDTH, &ctx);
  if (status != 0) {
    fprintf(stderr, "Could not create model.\n");
    return 1;
  }

  if (lm && (trie || load_without_trie)) {
    int status = DS_EnableDecoderWithLM(ctx,
                                        lm,
                                        trie,
                                        LM_ALPHA,
                                        LM_BETA);
    if (status != 0) {
      fprintf(stderr, "Could not enable CTC decoder with LM.\n");
      return 1;
    }
  }

Performing inference

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
ds_result
LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
           int aSampleRate, bool extended_output, bool json_output)
{
  ds_result res = {0};

  clock_t ds_start_time = clock();

  if (extended_output) {
    Metadata *metadata = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, aSampleRate);
    res.string = metadataToString(metadata);
    DS_FreeMetadata(metadata);
  } else if (json_output) {
    Metadata *metadata = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, aSampleRate);
    res.string = JSONOutput(metadata);
    DS_FreeMetadata(metadata);
  } else if (stream_size > 0) {
    StreamingState* ctx;
    int status = DS_CreateStream(aCtx, aSampleRate, &ctx);
    if (status != DS_ERR_OK) {
      res.string = strdup("");
      return res;
    }
    size_t off = 0;
    const char *last = nullptr;
    while (off < aBufferSize) {
      size_t cur = aBufferSize - off > stream_size ? stream_size : aBufferSize - off;
      DS_FeedAudioContent(ctx, aBuffer + off, cur);
      off += cur;
      const char* partial = DS_IntermediateDecode(ctx);
      if (last == nullptr || strcmp(last, partial)) {
        printf("%s\n", partial);
        last = partial;
      } else {
        DS_FreeString((char *) partial);
      }
    }
    if (last != nullptr) {
      DS_FreeString((char *) last);
    }
    res.string = DS_FinishStream(ctx);
  } else {
    res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize, aSampleRate);
  }

  clock_t ds_end_infer = clock();

  res.cpu_time_overall =
    ((double) (ds_end_infer - ds_start_time)) / CLOCKS_PER_SEC;

  return res;
}

Full source code

See Full source code.