71#ifndef SHERPA_ONNX_C_API_C_API_H_
72#define SHERPA_ONNX_C_API_C_API_H_
85#pragma GCC diagnostic push
86#pragma GCC diagnostic ignored "-Wattributes"
90#if defined(SHERPA_ONNX_BUILD_SHARED_LIBS)
91#define SHERPA_ONNX_EXPORT __declspec(dllexport)
92#define SHERPA_ONNX_IMPORT __declspec(dllimport)
94#define SHERPA_ONNX_EXPORT
95#define SHERPA_ONNX_IMPORT
98#define SHERPA_ONNX_EXPORT __attribute__((visibility("default")))
100#define SHERPA_ONNX_IMPORT SHERPA_ONNX_EXPORT
103#if defined(SHERPA_ONNX_BUILD_MAIN_LIB)
104#define SHERPA_ONNX_API SHERPA_ONNX_EXPORT
106#define SHERPA_ONNX_API SHERPA_ONNX_IMPORT
109#ifndef SHERPA_ONNX_DEPRECATED
111#define SHERPA_ONNX_DEPRECATED(msg) __declspec(deprecated(msg))
112#elif defined(__GNUC__) || defined(__clang__)
113#define SHERPA_ONNX_DEPRECATED(msg) __attribute__((deprecated(msg)))
115#define SHERPA_ONNX_DEPRECATED(msg)
551 const float *samples, int32_t n);
792 int32_t max_word_per_line);
813 int32_t idx,
const char *s);
1353 const float *samples, int32_t n);
2084 float buffer_size_in_seconds);
2463 const float *samples, int32_t n,
float p);
2470 const float *samples, int32_t n,
float p,
void *arg);
2546 "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
2658 const
char *prompt_text, const
float *prompt_samples, int32_t n_prompt,
2659 int32_t prompt_sr,
float speed, int32_t num_steps);
2758 int32_t sample_rate,
2759 const char *filename);
2780 int32_t n, int32_t sample_rate,
2794 const float *
const *samples, int32_t n, int32_t sample_rate,
2795 int32_t num_channels,
const char *filename);
2838 const char *data, int32_t n);
3187 const char *name,
const float *v);
3223 const float *v, int32_t n);
3315 const float *v,
float threshold);
3358 const char *
const *names);
3685 int32_t samp_rate_out_hz,
3686 float filter_cutoff_hz, int32_t num_zeros);
3939 int32_t num_processed_chunks, int32_t num_total_chunks,
void *arg);
3946 int32_t num_processed_chunks, int32_t num_total_chunks);
4126 const float *samples, int32_t n,
4127 int32_t sample_rate);
4205 const float *samples, int32_t n,
4206 int32_t sample_rate);
4342 int32_t num_channels, int32_t num_samples, int32_t sample_rate);
4360typedef struct NativeResourceManager NativeResourceManager;
4373SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
4375 NativeResourceManager *mgr);
4388SherpaOnnxCreateOnlineSpeechDenoiserOHOS(
4390 NativeResourceManager *mgr);
4403SherpaOnnxCreateOnlineRecognizerOHOS(
4417SherpaOnnxCreateOfflineRecognizerOHOS(
4419 NativeResourceManager *mgr);
4433SherpaOnnxCreateVoiceActivityDetectorOHOS(
4435 NativeResourceManager *mgr);
4461SherpaOnnxCreateOfflinePunctuationOHOS(
4463 NativeResourceManager *mgr);
4476SherpaOnnxCreateOnlinePunctuationOHOS(
4478 NativeResourceManager *mgr);
4492SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
4494 NativeResourceManager *mgr);
4508 NativeResourceManager *mgr);
4522SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
4524 NativeResourceManager *mgr);
4538SherpaOnnxCreateOfflineSourceSeparationOHOS(
4540 NativeResourceManager *mgr);
4543#if defined(__GNUC__)
4544#pragma GCC diagnostic pop
int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(const SherpaOnnxOfflineSpeakerDiarizationResult *r)
Return the number of diarization segments.
int32_t SherpaOnnxVoiceActivityDetectorDetected(const SherpaOnnxVoiceActivityDetector *p)
Check whether the detector is currently inside speech.
const SherpaOnnxSpeechSegment * SherpaOnnxVoiceActivityDetectorFront(const SherpaOnnxVoiceActivityDetector *p)
Get the first queued speech segment.
void SherpaOnnxOfflineRecognizerSetConfig(const SherpaOnnxOfflineRecognizer *recognizer, const SherpaOnnxOfflineRecognizerConfig *config)
Update the configuration of an existing offline recognizer.
const SherpaOnnxMultiChannelWave * SherpaOnnxReadWaveMultiChannel(const char *filename)
Read a multi-channel 16-bit PCM WAVE file.
void SherpaOnnxFreeWave(const SherpaOnnxWave *wave)
Destroy a wave object returned by SherpaOnnxReadWave() or SherpaOnnxReadWaveFromBinaryData().
struct SherpaOnnxSpokenLanguageIdentification SherpaOnnxSpokenLanguageIdentification
Opaque spoken-language identification handle.
const SherpaOnnxOfflineSpeakerDiarizationResult * SherpaOnnxOfflineSpeakerDiarizationProcess(const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples, int32_t n)
Run offline speaker diarization.
void SherpaOnnxDestroySpokenLanguageIdentification(const SherpaOnnxSpokenLanguageIdentification *slid)
Destroy a spoken-language identifier.
int32_t SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(const SherpaOnnxSpeakerEmbeddingManager *p)
Return the number of enrolled speakers.
int32_t(* SherpaOnnxGeneratedAudioCallback)(const float *samples, int32_t n)
Callback invoked during incremental generation.
void SherpaOnnxOnlineSpeechDenoiserReset(const SherpaOnnxOnlineSpeechDenoiser *sd)
Reset an online denoiser so it can process a new stream.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithZipvoice(const SherpaOnnxOfflineTts *tts, const char *text, const char *prompt_text, const float *prompt_samples, int32_t n_prompt, int32_t prompt_sr, float speed, int32_t num_steps)
Deprecated ZipVoice-specific generation API.
const char * SherpaOnnxOfflineStreamGetOption(const SherpaOnnxOfflineStream *stream, const char *key)
Get a per-stream runtime option for offline ASR.
int32_t SherpaOnnxSpeakerEmbeddingManagerRemove(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name)
Remove a speaker from the manager.
const SherpaOnnxSpeakerEmbeddingExtractor * SherpaOnnxCreateSpeakerEmbeddingExtractor(const SherpaOnnxSpeakerEmbeddingExtractorConfig *config)
Create a speaker embedding extractor.
struct SherpaOnnxOfflineSpeechDenoiser SherpaOnnxOfflineSpeechDenoiser
Opaque offline speech denoiser handle.
void SherpaOnnxDestroyKeywordSpotter(const SherpaOnnxKeywordSpotter *spotter)
Destroy a keyword spotter.
void SherpaOnnxDestroyOnlineSpeechDenoiser(const SherpaOnnxOnlineSpeechDenoiser *sd)
Destroy an online speech denoiser.
const SherpaOnnxLinearResampler * SherpaOnnxCreateLinearResampler(int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz, int32_t num_zeros)
Create a linear resampler.
int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate(const SherpaOnnxOfflineSpeechDenoiser *sd)
Return the expected sample rate for the denoiser.
const SherpaOnnxWave * SherpaOnnxReadWave(const char *filename)
Read a mono 16-bit PCM WAVE file.
void SherpaOnnxOnlineStreamAcceptWaveform(const SherpaOnnxOnlineStream *stream, int32_t sample_rate, const float *samples, int32_t n)
Append audio samples to a streaming ASR stream.
void SherpaOnnxDestroyOnlineStreamResultJson(const char *s)
Free a JSON string returned by SherpaOnnxGetOnlineStreamResultAsJson().
void SherpaOnnxAudioTaggingFreeResults(const SherpaOnnxAudioEvent *const *p)
Destroy results returned by SherpaOnnxAudioTaggingCompute().
void SherpaOnnxDestroyOfflineSpeakerDiarization(const SherpaOnnxOfflineSpeakerDiarization *sd)
Destroy an offline speaker diarizer.
const SherpaOnnxOfflineRecognizer * SherpaOnnxCreateOfflineRecognizer(const SherpaOnnxOfflineRecognizerConfig *config)
Create a non-streaming ASR recognizer.
void SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(const float *v)
Destroy an embedding vector returned by SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding().
int32_t(* SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg)(int32_t num_processed_chunks, int32_t num_total_chunks)
Same as SherpaOnnxOfflineSpeakerDiarizationProgressCallback but without a user pointer.
const SherpaOnnxOfflineTts * SherpaOnnxCreateOfflineTts(const SherpaOnnxOfflineTtsConfig *config)
Create an offline TTS engine.
void SherpaOnnxVoiceActivityDetectorFlush(const SherpaOnnxVoiceActivityDetector *p)
Flush buffered tail samples and force final segmentation.
void SherpaOnnxDestroyOfflineStream(const SherpaOnnxOfflineStream *stream)
Destroy a non-streaming ASR stream.
const float * SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(const SherpaOnnxSpeakerEmbeddingExtractor *p, const SherpaOnnxOnlineStream *s)
Compute the embedding for a stream.
const SherpaOnnxOfflineRecognizerResult * SherpaOnnxGetOfflineStreamResult(const SherpaOnnxOfflineStream *stream)
Get the recognition result for an offline ASR stream.
void SherpaOnnxFreeMultiChannelWave(const SherpaOnnxMultiChannelWave *wave)
Destroy a multi-channel wave object.
void SherpaOnnxOnlineStreamSetOption(const SherpaOnnxOnlineStream *stream, const char *key, const char *value)
Set a per-stream runtime option.
const char * SherpaOnnxGetOfflineStreamResultAsJson(const SherpaOnnxOfflineStream *stream)
Get the offline ASR result as JSON.
int32_t(* SherpaOnnxOfflineSpeakerDiarizationProgressCallback)(int32_t num_processed_chunks, int32_t num_total_chunks, void *arg)
Progress callback for offline speaker diarization.
const char * SherpaOnnxGetKeywordResultAsJson(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream *stream)
Get the current keyword spotting result as JSON.
const float * SherpaOnnxCircularBufferGet(const SherpaOnnxCircularBuffer *buffer, int32_t start_index, int32_t n)
Copy out a slice of samples from a circular buffer.
const char * SherpaOnnxOnlinePunctuationAddPunct(const SherpaOnnxOnlinePunctuation *punctuation, const char *text)
Add punctuation to one text chunk using the online punctuation model.
int32_t SherpaOnnxSpeakerEmbeddingManagerAddList(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name, const float **v)
Add multiple enrollment embeddings for one speaker.
void SherpaOnnxDecodeOfflineStream(const SherpaOnnxOfflineRecognizer *recognizer, const SherpaOnnxOfflineStream *stream)
Run offline ASR on one stream.
int32_t SherpaOnnxIsKeywordStreamReady(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream *stream)
Check whether a keyword stream has enough audio for decoding.
int32_t SherpaOnnxSpeakerEmbeddingManagerAdd(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name, const float *v)
Add one enrollment embedding for a speaker.
void SherpaOnnxDecodeKeywordStream(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream *stream)
Decode one ready keyword stream.
void SherpaOnnxDestroySpeakerEmbeddingManager(const SherpaOnnxSpeakerEmbeddingManager *p)
Destroy a speaker embedding manager.
const SherpaOnnxOfflineSpeakerDiarizationResult * SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples, int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback, void *arg)
Run offline speaker diarization with a progress callback.
const SherpaOnnxResampleOut * SherpaOnnxLinearResamplerResample(const SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim, int32_t flush)
Resample one chunk of input audio.
struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream
Non-streaming decoding state for one utterance.
void SherpaOnnxOnlinePunctuationFreeText(const char *text)
Free a string returned by SherpaOnnxOnlinePunctuationAddPunct().
void SherpaOnnxCircularBufferPush(const SherpaOnnxCircularBuffer *buffer, const float *p, int32_t n)
Append samples to a circular buffer.
const char * SherpaOnnxGetOnlineStreamResultAsJson(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Get the current streaming ASR result as JSON.
struct SherpaOnnxOnlineSpeechDenoiser SherpaOnnxOnlineSpeechDenoiser
Opaque online speech denoiser handle.
const SherpaOnnxOnlinePunctuation * SherpaOnnxCreateOnlinePunctuation(const SherpaOnnxOnlinePunctuationConfig *config)
Create an online punctuation processor.
struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream
Streaming decoding state for one utterance or stream.
void SherpaOnnxDestroyLinearResampler(const SherpaOnnxLinearResampler *p)
Destroy a linear resampler.
const SherpaOnnxDenoisedAudio * SherpaOnnxOnlineSpeechDenoiserRun(const SherpaOnnxOnlineSpeechDenoiser *sd, const float *samples, int32_t n, int32_t sample_rate)
Process one chunk of streaming audio.
const SherpaOnnxOfflineSourceSeparation * SherpaOnnxCreateOfflineSourceSeparation(const SherpaOnnxOfflineSourceSeparationConfig *config)
Create a source-separation engine.
void SherpaOnnxDestroyOfflineRecognizer(const SherpaOnnxOfflineRecognizer *recognizer)
Destroy a non-streaming recognizer.
void SherpaOnnxLinearResamplerReset(const SherpaOnnxLinearResampler *p)
Reset a linear resampler to its initial state.
void SherpaOnnxDestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r)
Destroy a result returned by SherpaOnnxGetOnlineStreamResult().
const SherpaOnnxAudioEvent *const * SherpaOnnxAudioTaggingCompute(const SherpaOnnxAudioTagging *tagger, const SherpaOnnxOfflineStream *s, int32_t top_k)
Run audio tagging on an offline stream.
void SherpaOnnxLinearResamplerResampleFree(const SherpaOnnxResampleOut *p)
Destroy a resampler output chunk.
int32_t SherpaOnnxOfflineSourceSeparationGetOutputSampleRate(const SherpaOnnxOfflineSourceSeparation *ss)
Return the output sample rate of the source-separation engine.
void SherpaOnnxDecodeMultipleKeywordStreams(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream **streams, int32_t n)
Decode multiple ready keyword streams in parallel.
int32_t SherpaOnnxCircularBufferSize(const SherpaOnnxCircularBuffer *buffer)
Return the number of currently stored samples.
void SherpaOnnxVoiceActivityDetectorReset(const SherpaOnnxVoiceActivityDetector *p)
Reset a voice activity detector so it can process a new stream.
const SherpaOnnxOnlineSpeechDenoiser * SherpaOnnxCreateOnlineSpeechDenoiser(const SherpaOnnxOnlineSpeechDenoiserConfig *config)
Create an online speech denoiser.
int32_t(* SherpaOnnxGeneratedAudioProgressCallback)(const float *samples, int32_t n, float p)
Progress callback invoked during incremental generation.
void SherpaOnnxDestroyOfflineTtsGeneratedAudio(const SherpaOnnxGeneratedAudio *p)
Destroy audio returned by a TTS generation API.
const SherpaOnnxOfflineStream * SherpaOnnxAudioTaggingCreateOfflineStream(const SherpaOnnxAudioTagging *tagger)
Create an offline stream for audio tagging.
const char * SherpaOfflinePunctuationAddPunct(const SherpaOnnxOfflinePunctuation *punct, const char *text)
Add punctuation to a complete input text.
int32_t SherpaOnnxVoiceActivityDetectorEmpty(const SherpaOnnxVoiceActivityDetector *p)
Check whether the detector currently has any completed speech segment.
int32_t SherpaOnnxIsOnlineStreamReady(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Check whether a streaming ASR stream is ready to decode.
int32_t SherpaOnnxSpeakerEmbeddingExtractorIsReady(const SherpaOnnxSpeakerEmbeddingExtractor *p, const SherpaOnnxOnlineStream *s)
Check whether enough audio has been provided to compute an embedding.
void SherpaOnnxCircularBufferPop(const SherpaOnnxCircularBuffer *buffer, int32_t n)
Drop samples from the front of a circular buffer.
int32_t SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(const SherpaOnnxOnlineSpeechDenoiser *sd)
Return the recommended chunk size in samples for streaming input.
void SherpaOnnxSpeakerEmbeddingManagerFreeSearch(const char *name)
Free a string returned by SherpaOnnxSpeakerEmbeddingManagerSearch().
void SherpaOnnxDestroySpeakerEmbeddingExtractor(const SherpaOnnxSpeakerEmbeddingExtractor *p)
Destroy a speaker embedding extractor.
const SherpaOnnxOfflineSpeakerDiarization * SherpaOnnxCreateOfflineSpeakerDiarization(const SherpaOnnxOfflineSpeakerDiarizationConfig *config)
Create an offline speaker diarization pipeline.
struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer
Non-streaming recognizer handle.
const char * SherpaOnnxSpeakerEmbeddingManagerSearch(const SherpaOnnxSpeakerEmbeddingManager *p, const float *v, float threshold)
Search for the best matching enrolled speaker.
void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(const SherpaOnnxOfflineSpeakerDiarizationResult *r)
Destroy a diarization result.
const SherpaOnnxSpeakerEmbeddingManager * SherpaOnnxCreateSpeakerEmbeddingManager(int32_t dim)
Create a speaker embedding manager.
int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(const SherpaOnnxLinearResampler *p)
Return the resampler output sample rate.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithProgressCallback(const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, SherpaOnnxGeneratedAudioProgressCallback callback)
Generate speech with a progress callback.
void SherpaOnnxDestroyAudioTagging(const SherpaOnnxAudioTagging *tagger)
Destroy an audio tagger.
int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts)
Return the output sample rate of a TTS engine.
void SherpaOnnxOfflineSpeakerDiarizationSetConfig(const SherpaOnnxOfflineSpeakerDiarization *sd, const SherpaOnnxOfflineSpeakerDiarizationConfig *config)
Update clustering-related settings of an existing diarizer.
void SherpaOnnxDestroyCircularBuffer(const SherpaOnnxCircularBuffer *buffer)
Destroy a circular buffer.
void SherpaOnnxDestroyOfflineStreamResultJson(const char *s)
Free a JSON string returned by SherpaOnnxGetOfflineStreamResultAsJson().
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithCallback(const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, SherpaOnnxGeneratedAudioCallback callback)
Generate speech and receive incremental audio chunks through a callback.
int32_t SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(const SherpaOnnxOfflineSpeakerDiarization *sd)
Return the expected input sample rate.
void SherpaOnnxAcceptWaveformOffline(const SherpaOnnxOfflineStream *stream, int32_t sample_rate, const float *samples, int32_t n)
Provide the full utterance to an offline ASR stream.
const char *const * SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(const SherpaOnnxSpeakerEmbeddingManager *p)
Return all enrolled speaker names.
const SherpaOnnxOnlineRecognizerResult * SherpaOnnxGetOnlineStreamResult(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Get the current streaming ASR result for a stream.
struct SherpaOnnxSpeakerEmbeddingManager SherpaOnnxSpeakerEmbeddingManager
Opaque speaker embedding manager handle.
struct SherpaOnnxKeywordSpotter SherpaOnnxKeywordSpotter
Opaque keyword spotter handle.
int32_t SherpaOnnxSpeakerEmbeddingManagerContains(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name)
Check whether a speaker is enrolled.
int32_t SherpaOnnxOfflineStreamHasOption(const SherpaOnnxOfflineStream *stream, const char *key)
Check whether a per-stream runtime option exists.
void SherpaOnnxDestroyOnlineRecognizer(const SherpaOnnxOnlineRecognizer *recognizer)
Destroy a streaming recognizer.
void SherpaOnnxWriteWaveToBuffer(const float *samples, int32_t n, int32_t sample_rate, char *buffer)
Write a mono 16-bit WAVE file to a caller-provided buffer.
int32_t SherpaOnnxSpeakerEmbeddingManagerVerify(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name, const float *v, float threshold)
Verify whether a query embedding matches a named speaker.
struct SherpaOnnxOnlinePunctuation SherpaOnnxOnlinePunctuation
Opaque online punctuation handle.
const SherpaOnnxOfflineStream * SherpaOnnxCreateOfflineStreamWithHotwords(const SherpaOnnxOfflineRecognizer *recognizer, const char *hotwords)
Create a non-streaming ASR input stream with per-stream hotwords.
int32_t SherpaOnnxCircularBufferHead(const SherpaOnnxCircularBuffer *buffer)
Return the current head index of the buffer timeline.
struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer
Streaming recognizer handle.
void SherpaOnnxDestroyOfflineSpeechDenoiser(const SherpaOnnxOfflineSpeechDenoiser *sd)
Destroy an offline speech denoiser.
int32_t SherpaOnnxOnlineStreamIsEndpoint(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Check whether endpoint detection has triggered for a stream.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerate(const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed)
Generate speech from text using the simple sid/speed interface.
void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(const char *const *names)
Free an array returned by SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers().
const SherpaOnnxDenoisedAudio * SherpaOnnxOfflineSpeechDenoiserRun(const SherpaOnnxOfflineSpeechDenoiser *sd, const float *samples, int32_t n, int32_t sample_rate)
Run offline speech denoising on a complete waveform.
void SherpaOnnxDestroySourceSeparationOutput(const SherpaOnnxSourceSeparationOutput *p)
Destroy the output of a source-separation run.
const SherpaOnnxKeywordSpotter * SherpaOnnxCreateKeywordSpotter(const SherpaOnnxKeywordSpotterConfig *config)
Create a keyword spotter.
void SherpaOnnxVoiceActivityDetectorPop(const SherpaOnnxVoiceActivityDetector *p)
Remove the front speech segment from the detector queue.
struct SherpaOnnxCircularBuffer SherpaOnnxCircularBuffer
Opaque circular-buffer handle used by helper APIs.
void SherpaOnnxDestroyKeywordResult(const SherpaOnnxKeywordResult *r)
Destroy a keyword result snapshot.
void SherpaOnnxOfflineStreamSetOption(const SherpaOnnxOfflineStream *stream, const char *key, const char *value)
Set a per-stream runtime option for offline ASR.
int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers(const SherpaOnnxOfflineSpeakerDiarizationResult *r)
Return the number of speakers in a diarization result.
const SherpaOnnxOnlineStream * SherpaOnnxCreateKeywordStreamWithKeywords(const SherpaOnnxKeywordSpotter *spotter, const char *keywords)
Create a keyword spotting stream with extra or replacement keywords.
void SherpaOnnxDestroyDisplay(const SherpaOnnxDisplay *display)
Destroy a display helper.
const SherpaOnnxWave * SherpaOnnxReadWaveFromBinaryData(const char *data, int32_t n)
Read a mono 16-bit PCM WAVE file from binary memory.
void SherpaOnnxDestroyOfflineRecognizerResult(const SherpaOnnxOfflineRecognizerResult *r)
Destroy a result returned by SherpaOnnxGetOfflineStreamResult().
const SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult * SherpaOnnxSpeakerEmbeddingManagerGetBestMatches(const SherpaOnnxSpeakerEmbeddingManager *p, const float *v, float threshold, int32_t n)
Return up to n best matches above a similarity threshold.
struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts
Opaque offline TTS handle.
void SherpaOnnxDecodeMultipleOnlineStreams(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream **streams, int32_t n)
Decode multiple streaming ASR streams in parallel.
int32_t SherpaOnnxFileExists(const char *filename)
Check whether a file exists.
struct SherpaOnnxOfflineSpeakerDiarizationResult SherpaOnnxOfflineSpeakerDiarizationResult
Opaque offline speaker diarization result.
void SherpaOfflinePunctuationFreeText(const char *text)
Free a string returned by SherpaOfflinePunctuationAddPunct().
void SherpaOnnxFreeKeywordResultJson(const char *s)
Free a JSON string returned by SherpaOnnxGetKeywordResultAsJson().
const SherpaOnnxDisplay * SherpaOnnxCreateDisplay(int32_t max_word_per_line)
Create a display helper.
const SherpaOnnxSpokenLanguageIdentification * SherpaOnnxCreateSpokenLanguageIdentification(const SherpaOnnxSpokenLanguageIdentificationConfig *config)
Create a spoken-language identifier.
void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(const SherpaOnnxOfflineSpeakerDiarizationSegment *s)
Destroy a segment array returned by SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime().
void SherpaOnnxDestroyOfflineSourceSeparation(const SherpaOnnxOfflineSourceSeparation *ss)
Destroy a source-separation engine.
const SherpaOnnxOfflineStream * SherpaOnnxCreateOfflineStream(const SherpaOnnxOfflineRecognizer *recognizer)
Create a non-streaming ASR input stream.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithConfig(const SherpaOnnxOfflineTts *tts, const char *text, const SherpaOnnxGenerationConfig *config, SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg)
Generate speech using the advanced configuration interface.
struct SherpaOnnxLinearResampler SherpaOnnxLinearResampler
Opaque linear resampler handle.
void SherpaOnnxDecodeMultipleOfflineStreams(const SherpaOnnxOfflineRecognizer *recognizer, const SherpaOnnxOfflineStream **streams, int32_t n)
Run offline ASR on multiple streams in parallel.
void SherpaOnnxCircularBufferReset(const SherpaOnnxCircularBuffer *buffer)
Clear a circular buffer and reset its head index.
void SherpaOnnxCircularBufferFree(const float *p)
Free an array returned by SherpaOnnxCircularBufferGet().
int32_t SherpaOnnxOnlineSpeechDenoiserGetSampleRate(const SherpaOnnxOnlineSpeechDenoiser *sd)
Return the expected input sample rate for the online denoiser.
void SherpaOnnxVoiceActivityDetectorAcceptWaveform(const SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n)
Feed audio samples to the VAD.
const SherpaOnnxSpokenLanguageIdentificationResult * SherpaOnnxSpokenLanguageIdentificationCompute(const SherpaOnnxSpokenLanguageIdentification *slid, const SherpaOnnxOfflineStream *s)
Run spoken-language identification on an offline stream.
struct SherpaOnnxOfflineSourceSeparation SherpaOnnxOfflineSourceSeparation
Opaque source-separation engine handle.
struct SherpaOnnxOfflineSpeakerDiarization SherpaOnnxOfflineSpeakerDiarization
Opaque offline speaker diarization handle.
const SherpaOnnxOnlineStream * SherpaOnnxSpeakerEmbeddingExtractorCreateStream(const SherpaOnnxSpeakerEmbeddingExtractor *p)
Create a streaming feature buffer for embedding extraction.
const SherpaOnnxDenoisedAudio * SherpaOnnxOnlineSpeechDenoiserFlush(const SherpaOnnxOnlineSpeechDenoiser *sd)
Flush buffered samples and reset the online denoiser.
void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p)
Destroy a speech segment returned by SherpaOnnxVoiceActivityDetectorFront().
void SherpaOnnxDestroyOnlineStream(const SherpaOnnxOnlineStream *stream)
Destroy a streaming ASR state object.
void SherpaOnnxDestroyVoiceActivityDetector(const SherpaOnnxVoiceActivityDetector *p)
Destroy a voice activity detector.
const SherpaOnnxOnlineStream * SherpaOnnxCreateOnlineStreamWithHotwords(const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords)
Create a streaming ASR state object with per-stream hotwords.
void SherpaOnnxVoiceActivityDetectorClear(const SherpaOnnxVoiceActivityDetector *p)
Remove all queued speech segments.
const SherpaOnnxVoiceActivityDetector * SherpaOnnxCreateVoiceActivityDetector(const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds)
Create a voice activity detector.
const SherpaOnnxSourceSeparationOutput * SherpaOnnxOfflineSourceSeparationProcess(const SherpaOnnxOfflineSourceSeparation *ss, const float *const *samples, int32_t num_channels, int32_t num_samples, int32_t sample_rate)
Run source separation on multi-channel audio.
int64_t SherpaOnnxWaveFileSize(int32_t n_samples)
Return the number of bytes needed for a mono 16-bit WAVE file.
SherpaOnnxOfflineStream * SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(const SherpaOnnxSpokenLanguageIdentification *slid)
Create an offline stream for spoken-language identification.
const char * SherpaOnnxGetGitSha1()
Return the Git SHA1 used to build the library.
const SherpaOnnxOnlineStream * SherpaOnnxCreateKeywordStream(const SherpaOnnxKeywordSpotter *spotter)
Create a keyword spotting stream using the spotter's built-in keyword list.
int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts)
Return the number of available speaker IDs.
const SherpaOnnxOfflineSpeakerDiarizationSegment * SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(const SherpaOnnxOfflineSpeakerDiarizationResult *r)
Return segments sorted by start time.
void SherpaOnnxOnlineStreamReset(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Reset a streaming ASR stream after an endpoint or utterance boundary.
void SherpaOnnxDestroySpokenLanguageIdentificationResult(const SherpaOnnxSpokenLanguageIdentificationResult *r)
Destroy a spoken-language identification result.
const SherpaOnnxOfflineSpeechDenoiser * SherpaOnnxCreateOfflineSpeechDenoiser(const SherpaOnnxOfflineSpeechDenoiserConfig *config)
Create an offline speech denoiser.
struct SherpaOnnxSpeakerEmbeddingExtractor SherpaOnnxSpeakerEmbeddingExtractor
Opaque speaker embedding extractor handle.
void SherpaOnnxDestroyOnlinePunctuation(const SherpaOnnxOnlinePunctuation *punctuation)
Destroy an online punctuation processor.
int32_t(* SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples, int32_t n, void *arg)
Same as SherpaOnnxGeneratedAudioCallback but with an extra user pointer.
struct SherpaOnnxAudioTagging SherpaOnnxAudioTagging
Opaque audio tagger handle.
int32_t SherpaOnnxOnlineStreamHasOption(const SherpaOnnxOnlineStream *stream, const char *key)
Check whether a per-stream runtime option exists.
struct SherpaOnnxDisplay SherpaOnnxDisplay
Helper for pretty-printing incremental recognition results.
const SherpaOnnxAudioTagging * SherpaOnnxCreateAudioTagging(const SherpaOnnxAudioTaggingConfig *config)
Create an audio tagger.
void SherpaOnnxOnlineStreamInputFinished(const SherpaOnnxOnlineStream *stream)
Signal end-of-input for a streaming ASR stream.
void SherpaOnnxDestroyOfflinePunctuation(const SherpaOnnxOfflinePunctuation *punct)
Destroy an offline punctuation processor.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg)
Generate speech with a progress callback that receives a user pointer.
int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename)
Write floating-point PCM to a mono 16-bit WAVE file.
const SherpaOnnxOnlineStream * SherpaOnnxCreateOnlineStream(const SherpaOnnxOnlineRecognizer *recognizer)
Create a streaming ASR state object.
const SherpaOnnxGeneratedAudio * SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed, SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg)
Same as SherpaOnnxOfflineTtsGenerateWithCallback() but with a user pointer.
int32_t SherpaOnnxSpeakerEmbeddingExtractorDim(const SherpaOnnxSpeakerEmbeddingExtractor *p)
Return the embedding dimension produced by the extractor.
void SherpaOnnxResetKeywordStream(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream *stream)
Reset a keyword stream after a keyword is detected.
void SherpaOnnxDecodeOnlineStream(const SherpaOnnxOnlineRecognizer *recognizer, const SherpaOnnxOnlineStream *stream)
Decode one step of a streaming ASR stream.
const SherpaOnnxKeywordResult * SherpaOnnxGetKeywordResult(const SherpaOnnxKeywordSpotter *spotter, const SherpaOnnxOnlineStream *stream)
Get the current keyword spotting result for a stream.
const char * SherpaOnnxOnlineStreamGetOption(const SherpaOnnxOnlineStream *stream, const char *key)
Get a per-stream runtime option.
void SherpaOnnxDestroyDenoisedAudio(const SherpaOnnxDenoisedAudio *p)
Destroy denoised audio returned by a speech enhancement API.
struct SherpaOnnxOfflinePunctuation SherpaOnnxOfflinePunctuation
Opaque offline punctuation handle.
const SherpaOnnxOfflineSpeakerDiarizationResult * SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples, int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback)
Run offline speaker diarization with a progress callback that has no user pointer.
void SherpaOnnxSpeakerEmbeddingManagerFreeBestMatches(const SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult *r)
Destroy a best-matches result.
const SherpaOnnxOnlineRecognizer * SherpaOnnxCreateOnlineRecognizer(const SherpaOnnxOnlineRecognizerConfig *config)
Create a streaming ASR recognizer.
const char * SherpaOnnxGetGitDate()
Return the Git build date used to build the library.
int32_t SherpaOnnxOfflineSourceSeparationGetNumberOfStems(const SherpaOnnxOfflineSourceSeparation *ss)
Return the number of stems produced by the engine.
struct SherpaOnnxVoiceActivityDetector SherpaOnnxVoiceActivityDetector
Opaque voice activity detector handle.
int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate(const SherpaOnnxLinearResampler *p)
Return the resampler input sample rate.
const SherpaOnnxCircularBuffer * SherpaOnnxCreateCircularBuffer(int32_t capacity)
Create a floating-point circular buffer.
int32_t(* SherpaOnnxGeneratedAudioProgressCallbackWithArg)(const float *samples, int32_t n, float p, void *arg)
Same as SherpaOnnxGeneratedAudioProgressCallback but with an extra user pointer.
const char * SherpaOnnxGetVersionStr()
Return the sherpa-onnx version string.
int32_t SherpaOnnxWriteWaveMultiChannel(const float *const *samples, int32_t n, int32_t sample_rate, int32_t num_channels, const char *filename)
Write multi-channel audio to a WAVE file (16-bit PCM).
const SherpaOnnxOfflinePunctuation * SherpaOnnxCreateOfflinePunctuation(const SherpaOnnxOfflinePunctuationConfig *config)
Create an offline punctuation processor.
int32_t SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(const SherpaOnnxSpeakerEmbeddingManager *p, const char *name, const float *v, int32_t n)
Add multiple enrollment embeddings packed in one flat array.
void SherpaOnnxPrint(const SherpaOnnxDisplay *display, int32_t idx, const char *s)
Print one line of text using the display helper.
void SherpaOnnxDestroyOfflineTts(const SherpaOnnxOfflineTts *tts)
Destroy an offline TTS engine.
One audio-tagging prediction.
Configuration for audio tagging.
SherpaOnnxAudioTaggingModelConfig model
Audio-tagging model configuration.
SherpaOnnxOfflineZipformerAudioTaggingModelConfig zipformer
Denoised audio returned by offline or online speech enhancement APIs.
Fast clustering configuration.
Feature extraction settings for ASR.
Generated waveform returned by TTS APIs.
Generation-time parameters shared by advanced TTS APIs.
const char * reference_text
const float * reference_audio
int32_t reference_audio_len
int32_t reference_sample_rate
Configuration for homophone replacement.
Snapshot of the current keyword spotting result.
const char *const * tokens_arr
Configuration for keyword spotting.
SherpaOnnxOnlineModelConfig model_config
SherpaOnnxFeatureConfig feat_config
const char * keywords_file
const char * keywords_buf
int32_t keywords_buf_size
int32_t num_trailing_blanks
Decoded multi-channel WAVE file content.
const float *const * samples
Configuration for a Canary model.
Configuration for a Cohere Transcribe model.
Configuration for a Dolphin model.
Configuration for a FireRedAsr CTC model.
Configuration for a FireRedAsr encoder/decoder model.
Configuration for an offline FunASR Nano model.
const char * encoder_adaptor
const char * system_prompt
Configuration for an offline language model.
Configuration for a MedASR CTC model.
Model configuration shared by offline ASR recognizers.
SherpaOnnxOfflineQwen3ASRModelConfig qwen3_asr
SherpaOnnxOfflineParaformerModelConfig paraformer
const char * modeling_unit
SherpaOnnxOfflineDolphinModelConfig dolphin
SherpaOnnxOfflineCanaryModelConfig canary
SherpaOnnxOfflineMoonshineModelConfig moonshine
SherpaOnnxOfflineFunASRNanoModelConfig funasr_nano
const char * telespeech_ctc
SherpaOnnxOfflineFireRedAsrCtcModelConfig fire_red_asr_ctc
SherpaOnnxOfflineOmnilingualAsrCtcModelConfig omnilingual
SherpaOnnxOfflineSenseVoiceModelConfig sense_voice
SherpaOnnxOfflineWhisperModelConfig whisper
SherpaOnnxOfflineTdnnModelConfig tdnn
SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc
SherpaOnnxOfflineMedAsrCtcModelConfig medasr
SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr
SherpaOnnxOfflineCohereTranscribeModelConfig cohere_transcribe
SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc
SherpaOnnxOfflineTransducerModelConfig transducer
SherpaOnnxOfflineWenetCtcModelConfig wenet_ctc
Configuration for a Moonshine model.
const char * uncached_decoder
const char * preprocessor
const char * cached_decoder
const char * merged_decoder
Configuration for a non-streaming NeMo CTC model.
Configuration for an omnilingual offline CTC model.
Configuration for offline punctuation.
SherpaOnnxOfflinePunctuationModelConfig model
Offline punctuation model configuration.
const char * ct_transformer
Configuration for an offline Qwen3-ASR model.
const char * conv_frontend
Configuration for a non-streaming ASR recognizer.
SherpaOnnxOfflineModelConfig model_config
const char * hotwords_file
SherpaOnnxFeatureConfig feat_config
SherpaOnnxHomophoneReplacerConfig hr
SherpaOnnxOfflineLMConfig lm_config
const char * decoding_method
Recognition result for a non-streaming ASR stream.
const char *const * segment_texts_arr
const float * segment_durations
const float * segment_timestamps
const char *const * tokens_arr
const char * segment_texts
Configuration for a SenseVoice model.
Top-level source-separation configuration.
SherpaOnnxOfflineSourceSeparationModelConfig model
Source-separation model configuration.
SherpaOnnxOfflineSourceSeparationUvrModelConfig uvr
SherpaOnnxOfflineSourceSeparationSpleeterModelConfig spleeter
Spleeter source-separation model configuration.
const char * accompaniment
UVR (MDX-Net) source-separation model configuration.
Configuration for offline speaker diarization.
SherpaOnnxFastClusteringConfig clustering
SherpaOnnxSpeakerEmbeddingExtractorConfig embedding
SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation
Segmentation model configuration for offline speaker diarization.
SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote
Pyannote speaker-segmentation model configuration.
Configuration for offline speech denoising.
SherpaOnnxOfflineSpeechDenoiserModelConfig model
DPDFNet offline denoiser model configuration.
GTCRN offline denoiser model configuration.
Speech denoiser model configuration shared by offline and online APIs.
SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig gtcrn
SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig dpdfnet
Configuration for a TDNN model.
Configuration for a non-streaming transducer model.
Configuration for offline text-to-speech.
int32_t max_num_sentences
SherpaOnnxOfflineTtsModelConfig model
Configuration for a Kitten TTS model.
Configuration for a Kokoro TTS model.
Configuration for a Matcha TTS model.
const char * acoustic_model
Configuration shared by offline TTS models.
SherpaOnnxOfflineTtsVitsModelConfig vits
SherpaOnnxOfflineTtsKokoroModelConfig kokoro
SherpaOnnxOfflineTtsSupertonicModelConfig supertonic
SherpaOnnxOfflineTtsKittenModelConfig kitten
SherpaOnnxOfflineTtsPocketModelConfig pocket
SherpaOnnxOfflineTtsMatchaModelConfig matcha
SherpaOnnxOfflineTtsZipvoiceModelConfig zipvoice
Configuration for a Pocket TTS model.
const char * text_conditioner
const char * token_scores_json
int32_t voice_embedding_cache_capacity
Configuration for a Supertonic TTS model.
const char * vector_estimator
const char * text_encoder
const char * duration_predictor
const char * unicode_indexer
Configuration for a VITS TTS model.
Configuration for a ZipVoice TTS model.
Configuration for an offline WeNet CTC model.
Configuration for a non-streaming Whisper model.
int32_t enable_segment_timestamps
int32_t enable_token_timestamps
Configuration for HLG/FST-based online CTC decoding.
Model configuration shared by streaming ASR recognizers.
SherpaOnnxOnlineZipformer2CtcModelConfig zipformer2_ctc
const char * modeling_unit
SherpaOnnxOnlineNemoCtcModelConfig nemo_ctc
SherpaOnnxOnlineToneCtcModelConfig t_one_ctc
SherpaOnnxOnlineTransducerModelConfig transducer
SherpaOnnxOnlineParaformerModelConfig paraformer
Configuration for a streaming NeMo CTC model.
Configuration for online punctuation.
SherpaOnnxOnlinePunctuationModelConfig model
Online punctuation model configuration.
Configuration for a streaming ASR recognizer.
SherpaOnnxOnlineCtcFstDecoderConfig ctc_fst_decoder_config
SherpaOnnxOnlineModelConfig model_config
SherpaOnnxHomophoneReplacerConfig hr
float rule3_min_utterance_length
const char * decoding_method
float rule1_min_trailing_silence
int32_t hotwords_buf_size
float rule2_min_trailing_silence
const char * hotwords_buf
const char * hotwords_file
SherpaOnnxFeatureConfig feat_config
Incremental recognition result for a streaming ASR stream.
const char *const * tokens_arr
Configuration for streaming speech denoising.
SherpaOnnxOfflineSpeechDenoiserModelConfig model
Configuration for a streaming T-One CTC model.
Configuration for a streaming transducer model.
Output chunk returned by SherpaOnnxLinearResamplerResample().
Configuration for a Silero VAD model.
float min_silence_duration
float max_speech_duration
float min_speech_duration
Output of a source-separation run.
const SherpaOnnxSourceSeparationStem * stems
A single stem (one output track) with one or more channels.
Collection of best speaker matches.
const SherpaOnnxSpeakerEmbeddingManagerSpeakerMatch * matches
One speaker match returned by the best-matches API.
One detected speech segment returned by the VAD.
Configuration for spoken language identification.
SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper
Result of spoken-language identification.
Whisper-based model files for spoken language identification.
Configuration for a Ten VAD model.
float min_silence_duration
float max_speech_duration
float min_speech_duration
Configuration shared by voice activity detectors.
SherpaOnnxSileroVadModelConfig silero_vad
SherpaOnnxTenVadModelConfig ten_vad
Decoded mono WAVE file content.