sherpa-onnx C API 1.0
Public C API and C++ wrapper for sherpa-onnx
Loading...
Searching...
No Matches
c-api.h
Go to the documentation of this file.
1// sherpa-onnx/c-api/c-api.h
2//
3// Copyright (c) 2023 Xiaomi Corporation
71#ifndef SHERPA_ONNX_C_API_C_API_H_
72#define SHERPA_ONNX_C_API_C_API_H_
73
74#include <stdint.h>
75
76#ifdef __cplusplus
77extern "C" {
78#endif
79
80// See https://github.com/pytorch/pytorch/blob/main/c10/macros/Export.h
81// We will set SHERPA_ONNX_BUILD_SHARED_LIBS and SHERPA_ONNX_BUILD_MAIN_LIB in
82// CMakeLists.txt
83
84#if defined(__GNUC__)
85#pragma GCC diagnostic push
86#pragma GCC diagnostic ignored "-Wattributes"
87#endif
88
89#if defined(_WIN32)
90#if defined(SHERPA_ONNX_BUILD_SHARED_LIBS)
91#define SHERPA_ONNX_EXPORT __declspec(dllexport)
92#define SHERPA_ONNX_IMPORT __declspec(dllimport)
93#else
94#define SHERPA_ONNX_EXPORT
95#define SHERPA_ONNX_IMPORT
96#endif
97#else // WIN32
98#define SHERPA_ONNX_EXPORT __attribute__((visibility("default")))
99
100#define SHERPA_ONNX_IMPORT SHERPA_ONNX_EXPORT
101#endif // WIN32
102
103#if defined(SHERPA_ONNX_BUILD_MAIN_LIB)
104#define SHERPA_ONNX_API SHERPA_ONNX_EXPORT
105#else
106#define SHERPA_ONNX_API SHERPA_ONNX_IMPORT
107#endif
108
109#ifndef SHERPA_ONNX_DEPRECATED
110#if defined(_MSC_VER)
111#define SHERPA_ONNX_DEPRECATED(msg) __declspec(deprecated(msg))
112#elif defined(__GNUC__) || defined(__clang__)
113#define SHERPA_ONNX_DEPRECATED(msg) __attribute__((deprecated(msg)))
114#else
115#define SHERPA_ONNX_DEPRECATED(msg)
116#endif
117#endif
118
132
142
152
165SHERPA_ONNX_API int32_t SherpaOnnxFileExists(const char *filename);
166
182
196
204
210
216
270
284
292
302
383
393 const char *text;
394
400 const char *tokens;
401
403 const char *const *tokens_arr;
404
413
415 int32_t count;
416
418 const char *json;
420
425
464
476 const SherpaOnnxOnlineRecognizer *recognizer);
477
496 const SherpaOnnxOnlineRecognizer *recognizer);
497
514 const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords);
515
528 const SherpaOnnxOnlineStream *stream);
529
550 const SherpaOnnxOnlineStream *stream, int32_t sample_rate,
551 const float *samples, int32_t n);
552
566SHERPA_ONNX_API int32_t
568 const SherpaOnnxOnlineStream *stream);
569
586 const SherpaOnnxOnlineRecognizer *recognizer,
587 const SherpaOnnxOnlineStream *stream);
588
605 const SherpaOnnxOnlineRecognizer *recognizer,
606 const SherpaOnnxOnlineStream **streams, int32_t n);
607
629 const SherpaOnnxOnlineStream *stream);
630
643
660 const SherpaOnnxOnlineRecognizer *recognizer,
661 const SherpaOnnxOnlineStream *stream);
662
675
692 const SherpaOnnxOnlineRecognizer *recognizer,
693 const SherpaOnnxOnlineStream *stream);
694
707 const SherpaOnnxOnlineStream *stream);
708
724 const SherpaOnnxOnlineStream *stream, const char *key, const char *value);
725
740 const SherpaOnnxOnlineStream *stream, const char *key);
741
754 const SherpaOnnxOnlineStream *stream, const char *key);
755
769SHERPA_ONNX_API int32_t
771 const SherpaOnnxOnlineStream *stream);
772
779
792 int32_t max_word_per_line);
793
800
813 int32_t idx, const char *s);
814// ============================================================
815// For offline ASR (i.e., non-streaming ASR)
816// ============================================================
817
829
835
841
863
877
891
899
905
919
925
933
943
949
955
961
967
971 const char *encoder_adaptor;
973 const char *llm;
975 const char *embedding;
977 const char *tokenizer;
979 const char *system_prompt;
981 const char *user_prompt;
987 float top_p;
989 int32_t seed;
991 const char *language;
993 int32_t itn;
995 const char *hotwords;
997
1022
1028
1101
1183
1186
1189
1248 const SherpaOnnxOfflineRecognizerConfig *config);
1249
1261 const SherpaOnnxOfflineRecognizer *recognizer,
1262 const SherpaOnnxOfflineRecognizerConfig *config);
1263
1275 const SherpaOnnxOfflineRecognizer *recognizer);
1276
1292 const SherpaOnnxOfflineRecognizer *recognizer);
1293
1310 const SherpaOnnxOfflineRecognizer *recognizer, const char *hotwords);
1311
1324 const SherpaOnnxOfflineStream *stream);
1325
1352 const SherpaOnnxOfflineStream *stream, int32_t sample_rate,
1353 const float *samples, int32_t n);
1354
1367 const SherpaOnnxOfflineStream *stream, const char *key, const char *value);
1368
1383 const SherpaOnnxOfflineStream *stream, const char *key);
1384
1398 const SherpaOnnxOfflineStream *stream, const char *key);
1399
1413 const SherpaOnnxOfflineRecognizer *recognizer,
1414 const SherpaOnnxOfflineStream *stream);
1415
1432 const SherpaOnnxOfflineRecognizer *recognizer,
1433 const SherpaOnnxOfflineStream **streams, int32_t n);
1434
1444 const char *text;
1445
1454
1456 int32_t count;
1457
1463 const char *tokens;
1464
1466 const char *const *tokens_arr;
1467
1469 const char *json;
1470
1472 const char *lang;
1473
1475 const char *emotion;
1476
1478 const char *event;
1479
1482
1485
1489
1492 const float *segment_durations;
1493
1495 const char *segment_texts;
1496
1498 const char *const *segment_texts_arr;
1499
1503
1526
1539
1554 const SherpaOnnxOfflineStream *stream);
1555
1568
1569// ============================================================
1570// For keyword spotting
1571// ============================================================
1584 const char *keyword;
1585
1591 const char *tokens;
1592
1598 const char *const *tokens_arr;
1599
1601 int32_t count;
1602
1610
1613
1619 const char *json;
1621
1683
1686
1695 const SherpaOnnxKeywordSpotterConfig *config);
1696
1703 const SherpaOnnxKeywordSpotter *spotter);
1704
1714 const SherpaOnnxKeywordSpotter *spotter);
1715
1735 const SherpaOnnxKeywordSpotter *spotter, const char *keywords);
1736
1745SHERPA_ONNX_API int32_t
1747 const SherpaOnnxOnlineStream *stream);
1748
1759 const SherpaOnnxKeywordSpotter *spotter,
1760 const SherpaOnnxOnlineStream *stream);
1761
1773 const SherpaOnnxKeywordSpotter *spotter,
1774 const SherpaOnnxOnlineStream *stream);
1775
1787 const SherpaOnnxKeywordSpotter *spotter,
1788 const SherpaOnnxOnlineStream **streams, int32_t n);
1789
1812 const SherpaOnnxKeywordSpotter *spotter,
1813 const SherpaOnnxOnlineStream *stream);
1814
1821 const SherpaOnnxKeywordResult *r);
1822
1833 const SherpaOnnxKeywordSpotter *spotter,
1834 const SherpaOnnxOnlineStream *stream);
1835
1842
1843// ============================================================
1844// For VAD
1845// ============================================================
1846
1867
1888
1934
1937
1951 int32_t capacity);
1952
1964 const SherpaOnnxCircularBuffer *buffer);
1965
1978 const SherpaOnnxCircularBuffer *buffer, const float *p, int32_t n);
1979
1995 const SherpaOnnxCircularBuffer *buffer, int32_t start_index, int32_t n);
1996
1999
2007 const SherpaOnnxCircularBuffer *buffer, int32_t n);
2008
2015SHERPA_ONNX_API int32_t
2017
2027SHERPA_ONNX_API int32_t
2029
2036 const SherpaOnnxCircularBuffer *buffer);
2037
2052
2055
2084 float buffer_size_in_seconds);
2085
2093
2110 const SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n);
2111
2118SHERPA_ONNX_API int32_t
2120
2129
2148
2156
2178
2186 const SherpaOnnxSpeechSegment *p);
2187
2195
2209
2210// ============================================================
2211// For offline Text-to-Speech (i.e., non-streaming TTS)
2212// ============================================================
2213
2233
2253
2257 const char *model;
2259 const char *voices;
2261 const char *tokens;
2263 const char *data_dir;
2267 const char *dict_dir;
2269 const char *lexicon;
2271 const char *lang;
2273
2287
2311
2331
2349
2389
2419
2434
2443typedef int32_t (*SherpaOnnxGeneratedAudioCallback)(const float *samples,
2444 int32_t n);
2445
2450typedef int32_t (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
2451 int32_t n,
2452 void *arg);
2453
2463 const float *samples, int32_t n, float p);
2464
2470 const float *samples, int32_t n, float p, void *arg);
2471
2474
2495 const SherpaOnnxOfflineTtsConfig *config);
2496
2503 const SherpaOnnxOfflineTts *tts);
2504
2511SHERPA_ONNX_API int32_t
2513
2522SHERPA_ONNX_API int32_t
2524
2545SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
2546 "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
2548 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
2549 float speed);
2550
2568SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
2569 "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
2571 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
2572 float speed, SherpaOnnxGeneratedAudioCallback callback);
2573
2598SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
2599 "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
2601 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
2602 float speed, SherpaOnnxGeneratedAudioProgressCallback callback);
2603
2618SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
2619 "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
2622 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
2623 float speed,
2625 void *arg);
2626
2642SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
2643 "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
2645 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
2646 float speed, SherpaOnnxGeneratedAudioCallbackWithArg callback,
2647 void *arg);
2648
2654SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
2655 "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
2657 const SherpaOnnxOfflineTts *tts, const char *text,
2658 const char *prompt_text, const float *prompt_samples, int32_t n_prompt,
2659 int32_t prompt_sr, float speed, int32_t num_steps);
2660
2699
2730 const SherpaOnnxOfflineTts *tts, const char *text,
2731 const SherpaOnnxGenerationConfig *config,
2733
2741 const SherpaOnnxGeneratedAudio *p);
2742
2757SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
2758 int32_t sample_rate,
2759 const char *filename);
2760
2767SHERPA_ONNX_API int64_t SherpaOnnxWaveFileSize(int32_t n_samples);
2768
2780 int32_t n, int32_t sample_rate,
2781 char *buffer);
2782
2794 const float *const *samples, int32_t n, int32_t sample_rate,
2795 int32_t num_channels, const char *filename);
2796
2802typedef struct SherpaOnnxWave {
2804 const float *samples;
2810
2828
2838 const char *data, int32_t n);
2839
2845
2864
2874
2881 const SherpaOnnxMultiChannelWave *wave);
2882
2883// ============================================================
2884// For spoken language identification
2885// ============================================================
2886
2907
2934
2938
2949
2958
2973
2985
3013 const SherpaOnnxOfflineStream *s);
3014
3023
3024// ============================================================
3025// For speaker embedding extraction
3026// ============================================================
3051
3055
3066
3074
3083
3098
3109 const SherpaOnnxOnlineStream *s);
3110
3136SHERPA_ONNX_API const float *
3139 const SherpaOnnxOnlineStream *s);
3140
3149 const float *v);
3150
3154
3168
3176
3185SHERPA_ONNX_API int32_t
3187 const char *name, const float *v);
3188
3206 const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
3207 const float **v);
3208
3222 const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
3223 const float *v, int32_t n);
3224
3233 const SherpaOnnxSpeakerEmbeddingManager *p, const char *name);
3234
3246 const SherpaOnnxSpeakerEmbeddingManager *p, const float *v,
3247 float threshold);
3248
3256 const char *name);
3257
3267
3279
3292 const SherpaOnnxSpeakerEmbeddingManager *p, const float *v, float threshold,
3293 int32_t n);
3294
3303
3314 const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
3315 const float *v, float threshold);
3316
3325 const SherpaOnnxSpeakerEmbeddingManager *p, const char *name);
3326
3335
3346SHERPA_ONNX_API const char *const *
3349
3358 const char *const *names);
3359
3360// ============================================================
3361// For audio tagging
3362// ============================================================
3368
3399
3424
3428typedef struct SherpaOnnxAudioEvent {
3430 const char *name;
3432 int32_t index;
3434 float prob;
3436
3439
3448 const SherpaOnnxAudioTaggingConfig *config);
3449
3456 const SherpaOnnxAudioTagging *tagger);
3457
3467
3494 const SherpaOnnxOfflineStream *s, int32_t top_k);
3495
3502 const SherpaOnnxAudioEvent *const *p);
3503
3504// ============================================================
3505// For punctuation
3506// ============================================================
3507
3532
3538
3541
3552
3559 const SherpaOnnxOfflinePunctuation *punct);
3560
3570 const SherpaOnnxOfflinePunctuation *punct, const char *text);
3571
3578
3605
3611
3614
3624 const SherpaOnnxOnlinePunctuationConfig *config);
3625
3632 const SherpaOnnxOnlinePunctuation *punctuation);
3633
3651 const SherpaOnnxOnlinePunctuation *punctuation, const char *text);
3652
3659
3660// For resampling
3663
3684SherpaOnnxCreateLinearResampler(int32_t samp_rate_in_hz,
3685 int32_t samp_rate_out_hz,
3686 float filter_cutoff_hz, int32_t num_zeros);
3687
3694 const SherpaOnnxLinearResampler *p);
3695
3702 const SherpaOnnxLinearResampler *p);
3703
3715
3729 const SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim,
3730 int32_t flush);
3731
3738 const SherpaOnnxResampleOut *p);
3739
3747 const SherpaOnnxLinearResampler *p);
3748
3756 const SherpaOnnxLinearResampler *p);
3757
3758// =========================================================================
3759// For offline speaker diarization (i.e., non-streaming speaker diarization)
3760// =========================================================================
3766
3783
3798
3826
3830
3841
3849
3858
3870
3874
3886
3896
3906
3921
3931
3939 int32_t num_processed_chunks, int32_t num_total_chunks, void *arg);
3940
3946 int32_t num_processed_chunks, int32_t num_total_chunks);
3947
3959 const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
3960 int32_t n);
3961
3982 const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
3984 void *arg);
3985
3999 const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
4000 int32_t n,
4002
4011
4012// =========================================================================
4013// For offline speech enhancement
4014// =========================================================================
4020
4026
4045
4051
4054
4073
4081
4090
4104
4126 const float *samples, int32_t n,
4127 int32_t sample_rate);
4128
4137 const SherpaOnnxDenoisedAudio *p);
4138
4139// =========================================================================
4140// For streaming speech enhancement
4141// =========================================================================
4147
4150
4161
4169
4178
4189
4205 const float *samples, int32_t n,
4206 int32_t sample_rate);
4207
4219
4227
4228// =========================================================================
4229// Source separation
4230// =========================================================================
4231
4239
4245
4254
4259
4263
4274
4282
4291
4302
4312
4322
4341 const SherpaOnnxOfflineSourceSeparation *ss, const float *const *samples,
4342 int32_t num_channels, int32_t num_samples, int32_t sample_rate);
4343
4351
4352#ifdef __OHOS__
4353
4360typedef struct NativeResourceManager NativeResourceManager;
4361
4373SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
4375 NativeResourceManager *mgr);
4376
4388SherpaOnnxCreateOnlineSpeechDenoiserOHOS(
4390 NativeResourceManager *mgr);
4391
4403SherpaOnnxCreateOnlineRecognizerOHOS(
4404 const SherpaOnnxOnlineRecognizerConfig *config, NativeResourceManager *mgr);
4405
4417SherpaOnnxCreateOfflineRecognizerOHOS(
4419 NativeResourceManager *mgr);
4420
4433SherpaOnnxCreateVoiceActivityDetectorOHOS(
4434 const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
4435 NativeResourceManager *mgr);
4436
4447SHERPA_ONNX_API const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
4448 const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr);
4449
4461SherpaOnnxCreateOfflinePunctuationOHOS(
4463 NativeResourceManager *mgr);
4464
4476SherpaOnnxCreateOnlinePunctuationOHOS(
4478 NativeResourceManager *mgr);
4479
4492SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
4494 NativeResourceManager *mgr);
4495
4507SherpaOnnxCreateKeywordSpotterOHOS(const SherpaOnnxKeywordSpotterConfig *config,
4508 NativeResourceManager *mgr);
4509
4522SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
4524 NativeResourceManager *mgr);
4525
4538SherpaOnnxCreateOfflineSourceSeparationOHOS(
4540 NativeResourceManager *mgr);
4541#endif
4542
4543#if defined(__GNUC__)
4544#pragma GCC diagnostic pop
4545#endif
4546
4547#ifdef __cplusplus
4548} /* extern "C" */
4549#endif
4550
4551#endif // SHERPA_ONNX_C_API_C_API_H_
int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(const SherpaOnnxOfflineSpeakerDiarizationResult *r)
Return the number of diarization segments.
int32_t SherpaOnnxVoiceActivityDetectorDetected(const SherpaOnnxVoiceActivityDetector *p)
Check whether the detector is currently inside speech.
const SherpaOnnxSpeechSegment * SherpaOnnxVoiceActivityDetectorFront(const SherpaOnnxVoiceActivityDetector *p)
Get the first queued speech segment.
void SherpaOnnxOfflineRecognizerSetConfig(const SherpaOnnxOfflineRecognizer *recognizer, const SherpaOnnxOfflineRecognizerConfig *config)
Update the configuration of an existing offline recognizer.
const SherpaOnnxMultiChannelWave * SherpaOnnxReadWaveMultiChannel(const char *filename)
Read a multi-channel 16-bit PCM WAVE file.
void SherpaOnnxFreeWave(const SherpaOnnxWave *wave)
Destroy a wave object returned by SherpaOnnxReadWave() or SherpaOnnxReadWaveFromBinaryData().
struct SherpaOnnxSpokenLanguageIdentification SherpaOnnxSpokenLanguageIdentification
Opaque spoken-language identification handle.
Definition c-api.h:2936
const SherpaOnnxOfflineSpeakerDiarizationResult * SherpaOnnxOfflineSpeakerDiarizationProcess(const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples, int32_t n)
Run offline speaker diarization.
void SherpaOnnxDestroySpokenLanguageIdentification(const SherpaOnnxSpokenLanguageIdentification *slid)
Destroy a spoken-language identifier.
int32_t SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(const SherpaOnnxSpeakerEmbeddingManager *p)
Return the number of enrolled speakers.
int32_t(* SherpaOnnxGeneratedAudioCallback)(const float *samples, int32_t n)
Callback invoked during incremental generation.
Definition c-api.h:2443
void SherpaOnnxOnlineSpeechDenoiserReset(const SherpaOnnxOnlineSpeechDenoiser *sd)
Reset an online denoiser so it can process a new stream.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithZipvoice(const SherpaOnnxOfflineTts *tts, const char *text, const char *prompt_text, const float *prompt_samples, int32_t n_prompt, int32_t prompt_sr, float speed, int32_t num_steps)
Deprecated ZipVoice-specific generation API.
const char * SherpaOnnxOfflineStreamGetOption(const SherpaOnnxOfflineStream *stream, const char *key)
Get a per-stream runtime option for offline ASR.
int32_t SherpaOnnxSpeakerEmbeddingManagerRemove(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name)
Remove a speaker from the manager.
const SherpaOnnxSpeakerEmbeddingExtractor * SherpaOnnxCreateSpeakerEmbeddingExtractor(const SherpaOnnxSpeakerEmbeddingExtractorConfig *config)
Create a speaker embedding extractor.
struct SherpaOnnxOfflineSpeechDenoiser SherpaOnnxOfflineSpeechDenoiser
Opaque offline speech denoiser handle.
Definition c-api.h:4053
void SherpaOnnxDestroyKeywordSpotter(const SherpaOnnxKeywordSpotter *spotter)
Destroy a keyword spotter.
void SherpaOnnxDestroyOnlineSpeechDenoiser(const SherpaOnnxOnlineSpeechDenoiser *sd)
Destroy an online speech denoiser.
const SherpaOnnxLinearResampler * SherpaOnnxCreateLinearResampler(int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz, int32_t num_zeros)
Create a linear resampler.
int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate(const SherpaOnnxOfflineSpeechDenoiser *sd)
Return the expected sample rate for the denoiser.
const SherpaOnnxWave * SherpaOnnxReadWave(const char *filename)
Read a mono 16-bit PCM WAVE file.
void SherpaOnnxOnlineStreamAcceptWaveform(const SherpaOnnxOnlineStream *stream, int32_t sample_rate, const float *samples, int32_t n)
Append audio samples to a streaming ASR stream.
void SherpaOnnxDestroyOnlineStreamResultJson(const char *s)
Free a JSON string returned by SherpaOnnxGetOnlineStreamResultAsJson().
void SherpaOnnxAudioTaggingFreeResults(const SherpaOnnxAudioEvent *const *p)
Destroy results returned by SherpaOnnxAudioTaggingCompute().
void SherpaOnnxDestroyOfflineSpeakerDiarization(const SherpaOnnxOfflineSpeakerDiarization *sd)
Destroy an offline speaker diarizer.
const SherpaOnnxOfflineRecognizer * SherpaOnnxCreateOfflineRecognizer(const SherpaOnnxOfflineRecognizerConfig *config)
Create a non-streaming ASR recognizer.
void SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(const float *v)
Destroy an embedding vector returned by SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding().
int32_t(* SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg)(int32_t num_processed_chunks, int32_t num_total_chunks)
Same as SherpaOnnxOfflineSpeakerDiarizationProgressCallback but without a user pointer.
Definition c-api.h:3945
const SherpaOnnxOfflineTts * SherpaOnnxCreateOfflineTts(const SherpaOnnxOfflineTtsConfig *config)
Create an offline TTS engine.
void SherpaOnnxVoiceActivityDetectorFlush(const SherpaOnnxVoiceActivityDetector *p)
Flush buffered tail samples and force final segmentation.
void SherpaOnnxDestroyOfflineStream(const SherpaOnnxOfflineStream *stream)
Destroy a non-streaming ASR stream.
const float * SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(const SherpaOnnxSpeakerEmbeddingExtractor *p, const SherpaOnnxOnlineStream *s)
Compute the embedding for a stream.
const SherpaOnnxOfflineRecognizerResult * SherpaOnnxGetOfflineStreamResult(const SherpaOnnxOfflineStream *stream)
Get the recognition result for an offline ASR stream.
void SherpaOnnxFreeMultiChannelWave(const SherpaOnnxMultiChannelWave *wave)
Destroy a multi-channel wave object.
void SherpaOnnxOnlineStreamSetOption(const SherpaOnnxOnlineStream *stream, const char *key, const char *value)
Set a per-stream runtime option.
const char * SherpaOnnxGetOfflineStreamResultAsJson(const SherpaOnnxOfflineStream *stream)
Get the offline ASR result as JSON.
int32_t(* SherpaOnnxOfflineSpeakerDiarizationProgressCallback)(int32_t num_processed_chunks, int32_t num_total_chunks, void *arg)
Progress callback for offline speaker diarization.
Definition c-api.h:3938
const char * SherpaOnnxGetKeywordResultAsJson(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream *stream)
Get the current keyword spotting result as JSON.
const float * SherpaOnnxCircularBufferGet(const SherpaOnnxCircularBuffer *buffer, int32_t start_index, int32_t n)
Copy out a slice of samples from a circular buffer.
const char * SherpaOnnxOnlinePunctuationAddPunct(const SherpaOnnxOnlinePunctuation *punctuation, const char *text)
Add punctuation to one text chunk using the online punctuation model.
int32_t SherpaOnnxSpeakerEmbeddingManagerAddList(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name, const float **v)
Add multiple enrollment embeddings for one speaker.
void SherpaOnnxDecodeOfflineStream(const SherpaOnnxOfflineRecognizer *recognizer, const SherpaOnnxOfflineStream *stream)
Run offline ASR on one stream.
int32_t SherpaOnnxIsKeywordStreamReady(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream *stream)
Check whether a keyword stream has enough audio for decoding.
int32_t SherpaOnnxSpeakerEmbeddingManagerAdd(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name, const float *v)
Add one enrollment embedding for a speaker.
void SherpaOnnxDecodeKeywordStream(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream *stream)
Decode one ready keyword stream.
void SherpaOnnxDestroySpeakerEmbeddingManager(const SherpaOnnxSpeakerEmbeddingManager *p)
Destroy a speaker embedding manager.
const SherpaOnnxOfflineSpeakerDiarizationResult * SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples, int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback, void *arg)
Run offline speaker diarization with a progress callback.
const SherpaOnnxResampleOut * SherpaOnnxLinearResamplerResample(const SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim, int32_t flush)
Resample one chunk of input audio.
struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream
Non-streaming decoding state for one utterance.
Definition c-api.h:1188
#define SHERPA_ONNX_API
Definition c-api.h:106
void SherpaOnnxOnlinePunctuationFreeText(const char *text)
Free a string returned by SherpaOnnxOnlinePunctuationAddPunct().
void SherpaOnnxCircularBufferPush(const SherpaOnnxCircularBuffer *buffer, const float *p, int32_t n)
Append samples to a circular buffer.
const char * SherpaOnnxGetOnlineStreamResultAsJson(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Get the current streaming ASR result as JSON.
struct SherpaOnnxOnlineSpeechDenoiser SherpaOnnxOnlineSpeechDenoiser
Opaque online speech denoiser handle.
Definition c-api.h:4149
const SherpaOnnxOnlinePunctuation * SherpaOnnxCreateOnlinePunctuation(const SherpaOnnxOnlinePunctuationConfig *config)
Create an online punctuation processor.
struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream
Streaming decoding state for one utterance or stream.
Definition c-api.h:424
void SherpaOnnxDestroyLinearResampler(const SherpaOnnxLinearResampler *p)
Destroy a linear resampler.
const SherpaOnnxDenoisedAudio * SherpaOnnxOnlineSpeechDenoiserRun(const SherpaOnnxOnlineSpeechDenoiser *sd, const float *samples, int32_t n, int32_t sample_rate)
Process one chunk of streaming audio.
const SherpaOnnxOfflineSourceSeparation * SherpaOnnxCreateOfflineSourceSeparation(const SherpaOnnxOfflineSourceSeparationConfig *config)
Create a source-separation engine.
void SherpaOnnxDestroyOfflineRecognizer(const SherpaOnnxOfflineRecognizer *recognizer)
Destroy a non-streaming recognizer.
void SherpaOnnxLinearResamplerReset(const SherpaOnnxLinearResampler *p)
Reset a linear resampler to its initial state.
void SherpaOnnxDestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r)
Destroy a result returned by SherpaOnnxGetOnlineStreamResult().
const SherpaOnnxAudioEvent *const * SherpaOnnxAudioTaggingCompute(const SherpaOnnxAudioTagging *tagger, const SherpaOnnxOfflineStream *s, int32_t top_k)
Run audio tagging on an offline stream.
void SherpaOnnxLinearResamplerResampleFree(const SherpaOnnxResampleOut *p)
Destroy a resampler output chunk.
int32_t SherpaOnnxOfflineSourceSeparationGetOutputSampleRate(const SherpaOnnxOfflineSourceSeparation *ss)
Return the output sample rate of the source-separation engine.
void SherpaOnnxDecodeMultipleKeywordStreams(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream **streams, int32_t n)
Decode multiple ready keyword streams in parallel.
int32_t SherpaOnnxCircularBufferSize(const SherpaOnnxCircularBuffer *buffer)
Return the number of currently stored samples.
void SherpaOnnxVoiceActivityDetectorReset(const SherpaOnnxVoiceActivityDetector *p)
Reset a voice activity detector so it can process a new stream.
const SherpaOnnxOnlineSpeechDenoiser * SherpaOnnxCreateOnlineSpeechDenoiser(const SherpaOnnxOnlineSpeechDenoiserConfig *config)
Create an online speech denoiser.
int32_t(* SherpaOnnxGeneratedAudioProgressCallback)(const float *samples, int32_t n, float p)
Progress callback invoked during incremental generation.
Definition c-api.h:2462
void SherpaOnnxDestroyOfflineTtsGeneratedAudio(const SherpaOnnxGeneratedAudio *p)
Destroy audio returned by a TTS generation API.
const SherpaOnnxOfflineStream * SherpaOnnxAudioTaggingCreateOfflineStream(const SherpaOnnxAudioTagging *tagger)
Create an offline stream for audio tagging.
const char * SherpaOfflinePunctuationAddPunct(const SherpaOnnxOfflinePunctuation *punct, const char *text)
Add punctuation to a complete input text.
int32_t SherpaOnnxVoiceActivityDetectorEmpty(const SherpaOnnxVoiceActivityDetector *p)
Check whether the detector currently has any completed speech segment.
int32_t SherpaOnnxIsOnlineStreamReady(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Check whether a streaming ASR stream is ready to decode.
int32_t SherpaOnnxSpeakerEmbeddingExtractorIsReady(const SherpaOnnxSpeakerEmbeddingExtractor *p, const SherpaOnnxOnlineStream *s)
Check whether enough audio has been provided to compute an embedding.
void SherpaOnnxCircularBufferPop(const SherpaOnnxCircularBuffer *buffer, int32_t n)
Drop samples from the front of a circular buffer.
int32_t SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(const SherpaOnnxOnlineSpeechDenoiser *sd)
Return the recommended chunk size in samples for streaming input.
void SherpaOnnxSpeakerEmbeddingManagerFreeSearch(const char *name)
Free a string returned by SherpaOnnxSpeakerEmbeddingManagerSearch().
void SherpaOnnxDestroySpeakerEmbeddingExtractor(const SherpaOnnxSpeakerEmbeddingExtractor *p)
Destroy a speaker embedding extractor.
const SherpaOnnxOfflineSpeakerDiarization * SherpaOnnxCreateOfflineSpeakerDiarization(const SherpaOnnxOfflineSpeakerDiarizationConfig *config)
Create an offline speaker diarization pipeline.
struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer
Non-streaming recognizer handle.
Definition c-api.h:1185
const char * SherpaOnnxSpeakerEmbeddingManagerSearch(const SherpaOnnxSpeakerEmbeddingManager *p, const float *v, float threshold)
Search for the best matching enrolled speaker.
void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(const SherpaOnnxOfflineSpeakerDiarizationResult *r)
Destroy a diarization result.
const SherpaOnnxSpeakerEmbeddingManager * SherpaOnnxCreateSpeakerEmbeddingManager(int32_t dim)
Create a speaker embedding manager.
int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(const SherpaOnnxLinearResampler *p)
Return the resampler output sample rate.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithProgressCallback(const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, SherpaOnnxGeneratedAudioProgressCallback callback)
Generate speech with a progress callback.
void SherpaOnnxDestroyAudioTagging(const SherpaOnnxAudioTagging *tagger)
Destroy an audio tagger.
int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts)
Return the output sample rate of a TTS engine.
void SherpaOnnxOfflineSpeakerDiarizationSetConfig(const SherpaOnnxOfflineSpeakerDiarization *sd, const SherpaOnnxOfflineSpeakerDiarizationConfig *config)
Update clustering-related settings of an existing diarizer.
void SherpaOnnxDestroyCircularBuffer(const SherpaOnnxCircularBuffer *buffer)
Destroy a circular buffer.
void SherpaOnnxDestroyOfflineStreamResultJson(const char *s)
Free a JSON string returned by SherpaOnnxGetOfflineStreamResultAsJson().
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithCallback(const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, SherpaOnnxGeneratedAudioCallback callback)
Generate speech and receive incremental audio chunks through a callback.
int32_t SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(const SherpaOnnxOfflineSpeakerDiarization *sd)
Return the expected input sample rate.
void SherpaOnnxAcceptWaveformOffline(const SherpaOnnxOfflineStream *stream, int32_t sample_rate, const float *samples, int32_t n)
Provide the full utterance to an offline ASR stream.
const char *const * SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(const SherpaOnnxSpeakerEmbeddingManager *p)
Return all enrolled speaker names.
const SherpaOnnxOnlineRecognizerResult * SherpaOnnxGetOnlineStreamResult(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Get the current streaming ASR result for a stream.
struct SherpaOnnxSpeakerEmbeddingManager SherpaOnnxSpeakerEmbeddingManager
Opaque speaker embedding manager handle.
Definition c-api.h:3152
struct SherpaOnnxKeywordSpotter SherpaOnnxKeywordSpotter
Opaque keyword spotter handle.
Definition c-api.h:1685
int32_t SherpaOnnxSpeakerEmbeddingManagerContains(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name)
Check whether a speaker is enrolled.
int32_t SherpaOnnxOfflineStreamHasOption(const SherpaOnnxOfflineStream *stream, const char *key)
Check whether a per-stream runtime option exists.
void SherpaOnnxDestroyOnlineRecognizer(const SherpaOnnxOnlineRecognizer *recognizer)
Destroy a streaming recognizer.
void SherpaOnnxWriteWaveToBuffer(const float *samples, int32_t n, int32_t sample_rate, char *buffer)
Write a mono 16-bit WAVE file to a caller-provided buffer.
int32_t SherpaOnnxSpeakerEmbeddingManagerVerify(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name, const float *v, float threshold)
Verify whether a query embedding matches a named speaker.
struct SherpaOnnxOnlinePunctuation SherpaOnnxOnlinePunctuation
Opaque online punctuation handle.
Definition c-api.h:3613
const SherpaOnnxOfflineStream * SherpaOnnxCreateOfflineStreamWithHotwords(const SherpaOnnxOfflineRecognizer *recognizer, const char *hotwords)
Create a non-streaming ASR input stream with per-stream hotwords.
int32_t SherpaOnnxCircularBufferHead(const SherpaOnnxCircularBuffer *buffer)
Return the current head index of the buffer timeline.
struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer
Streaming recognizer handle.
Definition c-api.h:422
void SherpaOnnxDestroyOfflineSpeechDenoiser(const SherpaOnnxOfflineSpeechDenoiser *sd)
Destroy an offline speech denoiser.
int32_t SherpaOnnxOnlineStreamIsEndpoint(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Check whether endpoint detection has triggered for a stream.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerate(const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed)
Generate speech from text using the simple sid/speed interface.
void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(const char *const *names)
Free an array returned by SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers().
const SherpaOnnxDenoisedAudio * SherpaOnnxOfflineSpeechDenoiserRun(const SherpaOnnxOfflineSpeechDenoiser *sd, const float *samples, int32_t n, int32_t sample_rate)
Run offline speech denoising on a complete waveform.
void SherpaOnnxDestroySourceSeparationOutput(const SherpaOnnxSourceSeparationOutput *p)
Destroy the output of a source-separation run.
const SherpaOnnxKeywordSpotter * SherpaOnnxCreateKeywordSpotter(const SherpaOnnxKeywordSpotterConfig *config)
Create a keyword spotter.
void SherpaOnnxVoiceActivityDetectorPop(const SherpaOnnxVoiceActivityDetector *p)
Remove the front speech segment from the detector queue.
struct SherpaOnnxCircularBuffer SherpaOnnxCircularBuffer
Opaque circular-buffer handle used by helper APIs.
Definition c-api.h:1936
void SherpaOnnxDestroyKeywordResult(const SherpaOnnxKeywordResult *r)
Destroy a keyword result snapshot.
void SherpaOnnxOfflineStreamSetOption(const SherpaOnnxOfflineStream *stream, const char *key, const char *value)
Set a per-stream runtime option for offline ASR.
int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers(const SherpaOnnxOfflineSpeakerDiarizationResult *r)
Return the number of speakers in a diarization result.
const SherpaOnnxOnlineStream * SherpaOnnxCreateKeywordStreamWithKeywords(const SherpaOnnxKeywordSpotter *spotter, const char *keywords)
Create a keyword spotting stream with extra or replacement keywords.
void SherpaOnnxDestroyDisplay(const SherpaOnnxDisplay *display)
Destroy a display helper.
const SherpaOnnxWave * SherpaOnnxReadWaveFromBinaryData(const char *data, int32_t n)
Read a mono 16-bit PCM WAVE file from binary memory.
void SherpaOnnxDestroyOfflineRecognizerResult(const SherpaOnnxOfflineRecognizerResult *r)
Destroy a result returned by SherpaOnnxGetOfflineStreamResult().
const SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult * SherpaOnnxSpeakerEmbeddingManagerGetBestMatches(const SherpaOnnxSpeakerEmbeddingManager *p, const float *v, float threshold, int32_t n)
Return up to n best matches above a similarity threshold.
struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts
Opaque offline TTS handle.
Definition c-api.h:2473
void SherpaOnnxDecodeMultipleOnlineStreams(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream **streams, int32_t n)
Decode multiple streaming ASR streams in parallel.
int32_t SherpaOnnxFileExists(const char *filename)
Check whether a file exists.
struct SherpaOnnxOfflineSpeakerDiarizationResult SherpaOnnxOfflineSpeakerDiarizationResult
Opaque offline speaker diarization result.
Definition c-api.h:3872
void SherpaOfflinePunctuationFreeText(const char *text)
Free a string returned by SherpaOfflinePunctuationAddPunct().
void SherpaOnnxFreeKeywordResultJson(const char *s)
Free a JSON string returned by SherpaOnnxGetKeywordResultAsJson().
const SherpaOnnxDisplay * SherpaOnnxCreateDisplay(int32_t max_word_per_line)
Create a display helper.
const SherpaOnnxSpokenLanguageIdentification * SherpaOnnxCreateSpokenLanguageIdentification(const SherpaOnnxSpokenLanguageIdentificationConfig *config)
Create a spoken-language identifier.
void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(const SherpaOnnxOfflineSpeakerDiarizationSegment *s)
Destroy a segment array returned by SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime().
void SherpaOnnxDestroyOfflineSourceSeparation(const SherpaOnnxOfflineSourceSeparation *ss)
Destroy a source-separation engine.
const SherpaOnnxOfflineStream * SherpaOnnxCreateOfflineStream(const SherpaOnnxOfflineRecognizer *recognizer)
Create a non-streaming ASR input stream.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithConfig(const SherpaOnnxOfflineTts *tts, const char *text, const SherpaOnnxGenerationConfig *config, SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg)
Generate speech using the advanced configuration interface.
struct SherpaOnnxLinearResampler SherpaOnnxLinearResampler
Opaque linear resampler handle.
Definition c-api.h:3662
void SherpaOnnxDecodeMultipleOfflineStreams(const SherpaOnnxOfflineRecognizer *recognizer, const SherpaOnnxOfflineStream **streams, int32_t n)
Run offline ASR on multiple streams in parallel.
void SherpaOnnxCircularBufferReset(const SherpaOnnxCircularBuffer *buffer)
Clear a circular buffer and reset its head index.
void SherpaOnnxCircularBufferFree(const float *p)
Free an array returned by SherpaOnnxCircularBufferGet().
int32_t SherpaOnnxOnlineSpeechDenoiserGetSampleRate(const SherpaOnnxOnlineSpeechDenoiser *sd)
Return the expected input sample rate for the online denoiser.
void SherpaOnnxVoiceActivityDetectorAcceptWaveform(const SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n)
Feed audio samples to the VAD.
const SherpaOnnxSpokenLanguageIdentificationResult * SherpaOnnxSpokenLanguageIdentificationCompute(const SherpaOnnxSpokenLanguageIdentification *slid, const SherpaOnnxOfflineStream *s)
Run spoken-language identification on an offline stream.
struct SherpaOnnxOfflineSourceSeparation SherpaOnnxOfflineSourceSeparation
Opaque source-separation engine handle.
Definition c-api.h:4261
struct SherpaOnnxOfflineSpeakerDiarization SherpaOnnxOfflineSpeakerDiarization
Opaque offline speaker diarization handle.
Definition c-api.h:3828
const SherpaOnnxOnlineStream * SherpaOnnxSpeakerEmbeddingExtractorCreateStream(const SherpaOnnxSpeakerEmbeddingExtractor *p)
Create a streaming feature buffer for embedding extraction.
const SherpaOnnxDenoisedAudio * SherpaOnnxOnlineSpeechDenoiserFlush(const SherpaOnnxOnlineSpeechDenoiser *sd)
Flush buffered samples and reset the online denoiser.
void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p)
Destroy a speech segment returned by SherpaOnnxVoiceActivityDetectorFront().
void SherpaOnnxDestroyOnlineStream(const SherpaOnnxOnlineStream *stream)
Destroy a streaming ASR state object.
void SherpaOnnxDestroyVoiceActivityDetector(const SherpaOnnxVoiceActivityDetector *p)
Destroy a voice activity detector.
const SherpaOnnxOnlineStream * SherpaOnnxCreateOnlineStreamWithHotwords(const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords)
Create a streaming ASR state object with per-stream hotwords.
void SherpaOnnxVoiceActivityDetectorClear(const SherpaOnnxVoiceActivityDetector *p)
Remove all queued speech segments.
const SherpaOnnxVoiceActivityDetector * SherpaOnnxCreateVoiceActivityDetector(const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds)
Create a voice activity detector.
const SherpaOnnxSourceSeparationOutput * SherpaOnnxOfflineSourceSeparationProcess(const SherpaOnnxOfflineSourceSeparation *ss, const float *const *samples, int32_t num_channels, int32_t num_samples, int32_t sample_rate)
Run source separation on multi-channel audio.
int64_t SherpaOnnxWaveFileSize(int32_t n_samples)
Return the number of bytes needed for a mono 16-bit WAVE file.
SherpaOnnxOfflineStream * SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(const SherpaOnnxSpokenLanguageIdentification *slid)
Create an offline stream for spoken-language identification.
const char * SherpaOnnxGetGitSha1()
Return the Git SHA1 used to build the library.
const SherpaOnnxOnlineStream * SherpaOnnxCreateKeywordStream(const SherpaOnnxKeywordSpotter *spotter)
Create a keyword spotting stream using the spotter's built-in keyword list.
int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts)
Return the number of available speaker IDs.
const SherpaOnnxOfflineSpeakerDiarizationSegment * SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(const SherpaOnnxOfflineSpeakerDiarizationResult *r)
Return segments sorted by start time.
void SherpaOnnxOnlineStreamReset(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Reset a streaming ASR stream after an endpoint or utterance boundary.
void SherpaOnnxDestroySpokenLanguageIdentificationResult(const SherpaOnnxSpokenLanguageIdentificationResult *r)
Destroy a spoken-language identification result.
const SherpaOnnxOfflineSpeechDenoiser * SherpaOnnxCreateOfflineSpeechDenoiser(const SherpaOnnxOfflineSpeechDenoiserConfig *config)
Create an offline speech denoiser.
struct SherpaOnnxSpeakerEmbeddingExtractor SherpaOnnxSpeakerEmbeddingExtractor
Opaque speaker embedding extractor handle.
Definition c-api.h:3053
void SherpaOnnxDestroyOnlinePunctuation(const SherpaOnnxOnlinePunctuation *punctuation)
Destroy an online punctuation processor.
int32_t(* SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples, int32_t n, void *arg)
Same as SherpaOnnxGeneratedAudioCallback but with an extra user pointer.
Definition c-api.h:2450
struct SherpaOnnxAudioTagging SherpaOnnxAudioTagging
Opaque audio tagger handle.
Definition c-api.h:3438
int32_t SherpaOnnxOnlineStreamHasOption(const SherpaOnnxOnlineStream *stream, const char *key)
Check whether a per-stream runtime option exists.
struct SherpaOnnxDisplay SherpaOnnxDisplay
Helper for pretty-printing incremental recognition results.
Definition c-api.h:778
const SherpaOnnxAudioTagging * SherpaOnnxCreateAudioTagging(const SherpaOnnxAudioTaggingConfig *config)
Create an audio tagger.
void SherpaOnnxOnlineStreamInputFinished(const SherpaOnnxOnlineStream *stream)
Signal end-of-input for a streaming ASR stream.
void SherpaOnnxDestroyOfflinePunctuation(const SherpaOnnxOfflinePunctuation *punct)
Destroy an offline punctuation processor.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg)
Generate speech with a progress callback that receives a user pointer.
int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename)
Write floating-point PCM to a mono 16-bit WAVE file.
const SherpaOnnxOnlineStream * SherpaOnnxCreateOnlineStream(const SherpaOnnxOnlineRecognizer *recognizer)
Create a streaming ASR state object.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg)
Same as SherpaOnnxOfflineTtsGenerateWithCallback() but with a user pointer.
int32_t SherpaOnnxSpeakerEmbeddingExtractorDim(const SherpaOnnxSpeakerEmbeddingExtractor *p)
Return the embedding dimension produced by the extractor.
void SherpaOnnxResetKeywordStream(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream *stream)
Reset a keyword stream after a keyword is detected.
void SherpaOnnxDecodeOnlineStream(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Decode one step of a streaming ASR stream.
const SherpaOnnxKeywordResult * SherpaOnnxGetKeywordResult(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream *stream)
Get the current keyword spotting result for a stream.
const char * SherpaOnnxOnlineStreamGetOption(const SherpaOnnxOnlineStream *stream, const char *key)
Get a per-stream runtime option.
void SherpaOnnxDestroyDenoisedAudio(const SherpaOnnxDenoisedAudio *p)
Destroy denoised audio returned by a speech enhancement API.
struct SherpaOnnxOfflinePunctuation SherpaOnnxOfflinePunctuation
Opaque offline punctuation handle.
Definition c-api.h:3540
const SherpaOnnxOfflineSpeakerDiarizationResult * SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples, int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback)
Run offline speaker diarization with a progress callback that has no user pointer.
void SherpaOnnxSpeakerEmbeddingManagerFreeBestMatches(const SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult *r)
Destroy a best-matches result.
const SherpaOnnxOnlineRecognizer * SherpaOnnxCreateOnlineRecognizer(const SherpaOnnxOnlineRecognizerConfig *config)
Create a streaming ASR recognizer.
const char * SherpaOnnxGetGitDate()
Return the Git build date used to build the library.
int32_t SherpaOnnxOfflineSourceSeparationGetNumberOfStems(const SherpaOnnxOfflineSourceSeparation *ss)
Return the number of stems produced by the engine.
struct SherpaOnnxVoiceActivityDetector SherpaOnnxVoiceActivityDetector
Opaque voice activity detector handle.
Definition c-api.h:2054
int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate(const SherpaOnnxLinearResampler *p)
Return the resampler input sample rate.
const SherpaOnnxCircularBuffer * SherpaOnnxCreateCircularBuffer(int32_t capacity)
Create a floating-point circular buffer.
int32_t(* SherpaOnnxGeneratedAudioProgressCallbackWithArg)(const float *samples, int32_t n, float p, void *arg)
Same as SherpaOnnxGeneratedAudioProgressCallback but with an extra user pointer.
Definition c-api.h:2469
const char * SherpaOnnxGetVersionStr()
Return the sherpa-onnx version string.
int32_t SherpaOnnxWriteWaveMultiChannel(const float *const *samples, int32_t n, int32_t sample_rate, int32_t num_channels, const char *filename)
Write multi-channel audio to a WAVE file (16-bit PCM).
const SherpaOnnxOfflinePunctuation * SherpaOnnxCreateOfflinePunctuation(const SherpaOnnxOfflinePunctuationConfig *config)
Create an offline punctuation processor.
int32_t SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name, const float *v, int32_t n)
Add multiple enrollment embeddings packed in one flat array.
void SherpaOnnxPrint(const SherpaOnnxDisplay *display, int32_t idx, const char *s)
Print one line of text using the display helper.
void SherpaOnnxDestroyOfflineTts(const SherpaOnnxOfflineTts *tts)
Destroy an offline TTS engine.
One audio-tagging prediction.
Definition c-api.h:3428
const char * name
Definition c-api.h:3430
Configuration for audio tagging.
Definition c-api.h:3415
SherpaOnnxAudioTaggingModelConfig model
Definition c-api.h:3417
Audio-tagging model configuration.
Definition c-api.h:3387
SherpaOnnxOfflineZipformerAudioTaggingModelConfig zipformer
Definition c-api.h:3389
Denoised audio returned by offline or online speech enhancement APIs.
Definition c-api.h:4096
const float * samples
Definition c-api.h:4098
Fast clustering configuration.
Definition c-api.h:3791
Feature extraction settings for ASR.
Definition c-api.h:277
Generated waveform returned by TTS APIs.
Definition c-api.h:2426
const float * samples
Definition c-api.h:2428
Generation-time parameters shared by advanced TTS APIs.
Definition c-api.h:2679
const char * reference_text
Definition c-api.h:2693
const float * reference_audio
Definition c-api.h:2687
Configuration for homophone replacement.
Definition c-api.h:294
Snapshot of the current keyword spotting result.
Definition c-api.h:1577
const char * keyword
Definition c-api.h:1584
const char * json
Definition c-api.h:1619
const char *const * tokens_arr
Definition c-api.h:1598
const char * tokens
Definition c-api.h:1591
Configuration for keyword spotting.
Definition c-api.h:1662
SherpaOnnxOnlineModelConfig model_config
Definition c-api.h:1666
SherpaOnnxFeatureConfig feat_config
Definition c-api.h:1664
Decoded multi-channel WAVE file content.
Definition c-api.h:2851
const float *const * samples
Definition c-api.h:2856
Configuration for a Canary model.
Definition c-api.h:865
Configuration for a Cohere Transcribe model.
Definition c-api.h:879
Configuration for a Dolphin model.
Definition c-api.h:945
Configuration for a FireRedAsr CTC model.
Definition c-api.h:901
Configuration for a FireRedAsr encoder/decoder model.
Definition c-api.h:893
Configuration for an offline FunASR Nano model.
Definition c-api.h:969
Configuration for an offline language model.
Definition c-api.h:927
const char * model
Definition c-api.h:929
Configuration for a MedASR CTC model.
Definition c-api.h:1024
Model configuration shared by offline ASR recognizers.
Definition c-api.h:1046
SherpaOnnxOfflineQwen3ASRModelConfig qwen3_asr
Definition c-api.h:1097
SherpaOnnxOfflineParaformerModelConfig paraformer
Definition c-api.h:1050
const char * modeling_unit
Definition c-api.h:1069
SherpaOnnxOfflineDolphinModelConfig dolphin
Definition c-api.h:1081
SherpaOnnxOfflineCanaryModelConfig canary
Definition c-api.h:1085
SherpaOnnxOfflineMoonshineModelConfig moonshine
Definition c-api.h:1077
SherpaOnnxOfflineFunASRNanoModelConfig funasr_nano
Definition c-api.h:1093
const char * telespeech_ctc
Definition c-api.h:1073
SherpaOnnxOfflineFireRedAsrCtcModelConfig fire_red_asr_ctc
Definition c-api.h:1095
SherpaOnnxOfflineOmnilingualAsrCtcModelConfig omnilingual
Definition c-api.h:1089
SherpaOnnxOfflineSenseVoiceModelConfig sense_voice
Definition c-api.h:1075
SherpaOnnxOfflineWhisperModelConfig whisper
Definition c-api.h:1054
SherpaOnnxOfflineTdnnModelConfig tdnn
Definition c-api.h:1056
SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc
Definition c-api.h:1052
SherpaOnnxOfflineMedAsrCtcModelConfig medasr
Definition c-api.h:1091
SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr
Definition c-api.h:1079
SherpaOnnxOfflineCohereTranscribeModelConfig cohere_transcribe
Definition c-api.h:1099
SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc
Definition c-api.h:1083
SherpaOnnxOfflineTransducerModelConfig transducer
Definition c-api.h:1048
SherpaOnnxOfflineWenetCtcModelConfig wenet_ctc
Definition c-api.h:1087
Configuration for a Moonshine model.
Definition c-api.h:907
Configuration for a non-streaming NeMo CTC model.
Definition c-api.h:837
Configuration for an omnilingual offline CTC model.
Definition c-api.h:963
Configuration for a non-streaming Paraformer model.
Definition c-api.h:831
Configuration for offline punctuation.
Definition c-api.h:3534
SherpaOnnxOfflinePunctuationModelConfig model
Definition c-api.h:3536
Offline punctuation model configuration.
Definition c-api.h:3522
Configuration for an offline Qwen3-ASR model.
Definition c-api.h:999
Configuration for a non-streaming ASR recognizer.
Definition c-api.h:1155
SherpaOnnxOfflineModelConfig model_config
Definition c-api.h:1159
SherpaOnnxFeatureConfig feat_config
Definition c-api.h:1157
SherpaOnnxHomophoneReplacerConfig hr
Definition c-api.h:1181
SherpaOnnxOfflineLMConfig lm_config
Definition c-api.h:1161
Recognition result for a non-streaming ASR stream.
Definition c-api.h:1442
const char *const * segment_texts_arr
Definition c-api.h:1498
const char *const * tokens_arr
Definition c-api.h:1466
Configuration for a SenseVoice model.
Definition c-api.h:935
Top-level source-separation configuration.
Definition c-api.h:4256
SherpaOnnxOfflineSourceSeparationModelConfig model
Definition c-api.h:4257
Source-separation model configuration.
Definition c-api.h:4247
SherpaOnnxOfflineSourceSeparationUvrModelConfig uvr
Definition c-api.h:4249
SherpaOnnxOfflineSourceSeparationSpleeterModelConfig spleeter
Definition c-api.h:4248
Spleeter source-separation model configuration.
Definition c-api.h:4233
UVR (MDX-Net) source-separation model configuration.
Definition c-api.h:4241
Configuration for offline speaker diarization.
Definition c-api.h:3814
SherpaOnnxFastClusteringConfig clustering
Definition c-api.h:3820
SherpaOnnxSpeakerEmbeddingExtractorConfig embedding
Definition c-api.h:3818
SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation
Definition c-api.h:3816
Segmentation model configuration for offline speaker diarization.
Definition c-api.h:3773
SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote
Definition c-api.h:3775
Pyannote speaker-segmentation model configuration.
Definition c-api.h:3762
Configuration for offline speech denoising.
Definition c-api.h:4047
SherpaOnnxOfflineSpeechDenoiserModelConfig model
Definition c-api.h:4049
DPDFNet offline denoiser model configuration.
Definition c-api.h:4022
GTCRN offline denoiser model configuration.
Definition c-api.h:4016
Speech denoiser model configuration shared by offline and online APIs.
Definition c-api.h:4033
SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig gtcrn
Definition c-api.h:4035
SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig dpdfnet
Definition c-api.h:4043
Configuration for a TDNN model.
Definition c-api.h:921
Configuration for a non-streaming transducer model.
Definition c-api.h:821
Configuration for offline text-to-speech.
Definition c-api.h:2407
const char * rule_fars
Definition c-api.h:2415
SherpaOnnxOfflineTtsModelConfig model
Definition c-api.h:2409
const char * rule_fsts
Definition c-api.h:2411
Configuration for a Kitten TTS model.
Definition c-api.h:2275
Configuration for a Kokoro TTS model.
Definition c-api.h:2255
Configuration for a Matcha TTS model.
Definition c-api.h:2235
Configuration shared by offline TTS models.
Definition c-api.h:2367
SherpaOnnxOfflineTtsVitsModelConfig vits
Definition c-api.h:2369
SherpaOnnxOfflineTtsKokoroModelConfig kokoro
Definition c-api.h:2379
SherpaOnnxOfflineTtsSupertonicModelConfig supertonic
Definition c-api.h:2387
SherpaOnnxOfflineTtsKittenModelConfig kitten
Definition c-api.h:2381
SherpaOnnxOfflineTtsPocketModelConfig pocket
Definition c-api.h:2385
SherpaOnnxOfflineTtsMatchaModelConfig matcha
Definition c-api.h:2377
SherpaOnnxOfflineTtsZipvoiceModelConfig zipvoice
Definition c-api.h:2383
Configuration for a Pocket TTS model.
Definition c-api.h:2313
Configuration for a Supertonic TTS model.
Definition c-api.h:2333
Configuration for a VITS TTS model.
Definition c-api.h:2215
Configuration for a ZipVoice TTS model.
Definition c-api.h:2289
Configuration for an offline WeNet CTC model.
Definition c-api.h:957
Configuration for a non-streaming Whisper model.
Definition c-api.h:845
Zipformer audio-tagging model configuration.
Definition c-api.h:3364
Configuration for an offline Zipformer CTC model.
Definition c-api.h:951
Configuration for HLG/FST-based online CTC decoding.
Definition c-api.h:286
Model configuration shared by streaming ASR recognizers.
Definition c-api.h:232
const char * model_type
Definition c-api.h:248
SherpaOnnxOnlineZipformer2CtcModelConfig zipformer2_ctc
Definition c-api.h:238
const char * modeling_unit
Definition c-api.h:257
SherpaOnnxOnlineNemoCtcModelConfig nemo_ctc
Definition c-api.h:266
const char * provider
Definition c-api.h:244
SherpaOnnxOnlineToneCtcModelConfig t_one_ctc
Definition c-api.h:268
const char * bpe_vocab
Definition c-api.h:259
const char * tokens_buf
Definition c-api.h:262
SherpaOnnxOnlineTransducerModelConfig transducer
Definition c-api.h:234
SherpaOnnxOnlineParaformerModelConfig paraformer
Definition c-api.h:236
Configuration for a streaming NeMo CTC model.
Definition c-api.h:206
Configuration for a streaming Paraformer model.
Definition c-api.h:190
Configuration for online punctuation.
Definition c-api.h:3607
SherpaOnnxOnlinePunctuationModelConfig model
Definition c-api.h:3609
Online punctuation model configuration.
Definition c-api.h:3593
Configuration for a streaming ASR recognizer.
Definition c-api.h:337
SherpaOnnxOnlineCtcFstDecoderConfig ctc_fst_decoder_config
Definition c-api.h:368
SherpaOnnxOnlineModelConfig model_config
Definition c-api.h:341
SherpaOnnxHomophoneReplacerConfig hr
Definition c-api.h:381
SherpaOnnxFeatureConfig feat_config
Definition c-api.h:339
Incremental recognition result for a streaming ASR stream.
Definition c-api.h:391
const char *const * tokens_arr
Definition c-api.h:403
Configuration for streaming speech denoising.
Definition c-api.h:4143
SherpaOnnxOfflineSpeechDenoiserModelConfig model
Definition c-api.h:4145
Configuration for a streaming T-One CTC model.
Definition c-api.h:212
Configuration for a streaming transducer model.
Definition c-api.h:174
Configuration for a streaming Zipformer2 CTC model.
Definition c-api.h:200
Output chunk returned by SherpaOnnxLinearResamplerResample().
Definition c-api.h:3709
const float * samples
Definition c-api.h:3711
Configuration for a Silero VAD model.
Definition c-api.h:1848
Output of a source-separation run.
Definition c-api.h:4314
const SherpaOnnxSourceSeparationStem * stems
Definition c-api.h:4316
A single stem (one output track) with one or more channels.
Definition c-api.h:4304
Configuration for speaker embedding extraction.
Definition c-api.h:3041
Collection of best speaker matches.
Definition c-api.h:3273
const SherpaOnnxSpeakerEmbeddingManagerSpeakerMatch * matches
Definition c-api.h:3275
One speaker match returned by the best-matches API.
Definition c-api.h:3261
One detected speech segment returned by the VAD.
Definition c-api.h:2044
Configuration for spoken language identification.
Definition c-api.h:2924
SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper
Definition c-api.h:2926
Result of spoken-language identification.
Definition c-api.h:2979
Whisper-based model files for spoken language identification.
Definition c-api.h:2899
Configuration for a Ten VAD model.
Definition c-api.h:1869
Configuration shared by voice activity detectors.
Definition c-api.h:1920
SherpaOnnxSileroVadModelConfig silero_vad
Definition c-api.h:1922
SherpaOnnxTenVadModelConfig ten_vad
Definition c-api.h:1932
const char * provider
Definition c-api.h:1928
Decoded mono WAVE file content.
Definition c-api.h:2802
int32_t num_samples
Definition c-api.h:2808
const float * samples
Definition c-api.h:2804
int32_t sample_rate
Definition c-api.h:2806