sherpa-onnx C API 1.0
Public C API and C++ wrapper for sherpa-onnx
Loading...
Searching...
No Matches
cxx-api.h
Go to the documentation of this file.
1// sherpa-onnx/c-api/cxx-api.h
2//
3// Copyright (c) 2024 Xiaomi Corporation
44#ifndef SHERPA_ONNX_C_API_CXX_API_H_
45#define SHERPA_ONNX_C_API_CXX_API_H_
46
47#include <memory>
48#include <string>
49#include <unordered_map>
50#include <vector>
51
53
55
56// ============================================================================
57// Streaming ASR
58// ============================================================================
62 std::string encoder;
64 std::string decoder;
66 std::string joiner;
67};
68
72 std::string encoder;
74 std::string decoder;
75};
76
80 std::string model;
81};
82
86 std::string model;
87};
88
92 std::string model;
93};
94
149
153 int32_t sample_rate = 16000;
155 int32_t feature_dim = 80;
156};
157
161 std::string graph;
163 int32_t max_active = 3000;
164};
165
169 std::string dict_dir;
171 std::string lexicon;
173 std::string rule_fsts;
174};
175
243
247 std::string text;
249 std::vector<std::string> tokens;
251 std::vector<float> timestamps;
253 std::string json;
254};
255
257struct Wave {
259 std::vector<float> samples;
261 int32_t sample_rate = 0;
262};
263
272SHERPA_ONNX_API Wave ReadWave(const std::string &filename);
273
281SHERPA_ONNX_API bool WriteWave(const std::string &filename, const Wave &wave);
282
289template <typename Derived, typename T>
291 public:
293 MoveOnly() = default;
295 explicit MoveOnly(const T *p) : p_(p) {}
296
298 ~MoveOnly() { Destroy(); }
299
300 MoveOnly(const MoveOnly &) = delete;
301
302 MoveOnly &operator=(const MoveOnly &) = delete;
303
304 MoveOnly(MoveOnly &&other) : p_(other.Release()) {}
305
307 if (&other == this) {
308 return *this;
309 }
310
311 Destroy();
312
313 p_ = other.Release();
314
315 return *this;
316 }
317
319 const T *Get() const { return p_; }
320
322 const T *Release() {
323 const T *p = p_;
324 p_ = nullptr;
325 return p;
326 }
327
328 private:
329 void Destroy() {
330 if (p_ == nullptr) {
331 return;
332 }
333
334 static_cast<Derived *>(this)->Destroy(p_);
335
336 p_ = nullptr;
337 }
338
339 protected:
340 const T *p_ = nullptr;
341};
342
344 : public MoveOnly<OnlineStream, SherpaOnnxOnlineStream> {
345 public:
348
350 void AcceptWaveform(int32_t sample_rate, const float *samples,
351 int32_t n) const;
352
354 void InputFinished() const;
355
357 void SetOption(const char *key, const char *value) const;
359 const char *GetOption(const char *key) const;
361 int32_t HasOption(const char *key) const;
362
364 void Destroy(const SherpaOnnxOnlineStream *p) const;
365};
366
385 : public MoveOnly<OnlineRecognizer, SherpaOnnxOnlineRecognizer> {
386 public:
389
392
395
397 OnlineStream CreateStream(const std::string &hotwords) const;
398
400 bool IsReady(const OnlineStream *s) const;
401
403 void Decode(const OnlineStream *s) const;
404
406 void Decode(const OnlineStream *ss, int32_t n) const;
407
410
412 void Reset(const OnlineStream *s) const;
413
415 bool IsEndpoint(const OnlineStream *s) const;
416
417 private:
419};
420
421// ============================================================================
422// Non-streaming ASR
423// ============================================================================
427 std::string encoder;
429 std::string decoder;
431 std::string joiner;
432};
433
437 std::string model;
438};
439
443 std::string model;
444};
445
449 std::string encoder;
451 std::string decoder;
453 std::string language;
455 std::string task = "transcribe";
457 int32_t tail_paddings = -1;
462};
463
467 std::string encoder;
469 std::string decoder;
471 std::string src_lang;
473 std::string tgt_lang;
475 bool use_pnc = true;
476};
477
481 std::string encoder;
483 std::string decoder;
485 std::string language;
487 bool use_punct = true;
489 bool use_itn = true;
490};
491
495 std::string encoder;
497 std::string decoder;
498};
499
503 std::string model;
504};
505
509 std::string model;
510};
511
515 std::string model;
517 std::string language;
519 bool use_itn = false;
520};
521
525 std::string model;
526};
527
531 std::string model;
532};
533
537 std::string model;
538};
539
545
549 std::string model;
550};
551
555 std::string preprocessor;
557 std::string encoder;
559 std::string uncached_decoder;
561 std::string cached_decoder;
563 std::string merged_decoder;
564};
565
569 std::string encoder_adaptor;
571 std::string llm;
573 std::string embedding;
575 std::string tokenizer;
577 std::string system_prompt = "You are a helpful assistant.";
579 std::string user_prompt = "语音转写:";
581 int32_t max_new_tokens = 512;
583 float temperature = 1e-6f;
585 float top_p = 0.8f;
587 int32_t seed = 42;
589 std::string language;
591 bool itn = true;
593 std::string hotwords;
594};
595
599 std::string conv_frontend;
601 std::string encoder;
603 std::string decoder;
605 std::string tokenizer;
608 std::string hotwords;
610 int32_t max_total_len = 512;
612 int32_t max_new_tokens = 128;
614 float temperature = 1e-6f;
616 float top_p = 0.8f;
618 int32_t seed = 42;
619};
620
682
686 std::string model;
688 float scale = 1.0;
689};
690
750
754 std::string text;
756 std::vector<float> timestamps;
758 std::vector<std::string> tokens;
760 std::string json;
762 std::string lang;
764 std::string emotion;
766 std::string event;
767
769 std::vector<float> durations;
770};
771
774 : public MoveOnly<OfflineStream, SherpaOnnxOfflineStream> {
775 public:
778
780 void AcceptWaveform(int32_t sample_rate, const float *samples,
781 int32_t n) const;
782
784 void SetOption(const char *key, const char *value) const;
786 const char *GetOption(const char *key) const;
788 int32_t HasOption(const char *key) const;
789
791 void Destroy(const SherpaOnnxOfflineStream *p) const;
792};
793
801 : public MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer> {
802 public:
805
808
811
813 OfflineStream CreateStream(const std::string &hotwords) const;
814
816 void Decode(const OfflineStream *s) const;
817
819 void Decode(const OfflineStream *ss, int32_t n) const;
820
823
830 std::shared_ptr<OfflineRecognizerResult> GetResultPtr(
831 const OfflineStream *s) const;
832
834 void SetConfig(const OfflineRecognizerConfig &config) const;
835
836 private:
838};
839
840// ============================================================================
841// Non-streaming TTS
842// ============================================================================
846 std::string model;
848 std::string lexicon;
850 std::string tokens;
852 std::string data_dir;
854 std::string dict_dir;
855
857 float noise_scale = 0.667;
859 float noise_scale_w = 0.8;
861 float length_scale = 1.0;
862};
863
867 std::string acoustic_model;
869 std::string vocoder;
871 std::string lexicon;
873 std::string tokens;
875 std::string data_dir;
877 std::string dict_dir;
878
880 float noise_scale = 0.667;
882 float length_scale = 1.0;
883};
884
888 std::string model;
890 std::string voices;
892 std::string tokens;
894 std::string data_dir;
896 std::string dict_dir;
898 std::string lexicon;
900 std::string lang;
901
903 float length_scale = 1.0;
904};
905
909 std::string model;
911 std::string voices;
913 std::string tokens;
915 std::string data_dir;
916
918 float length_scale = 1.0;
919};
920
924 std::string tokens;
926 std::string encoder;
928 std::string decoder;
930 std::string vocoder;
932 std::string data_dir;
934 std::string lexicon;
935
937 float feat_scale = 0.1;
939 float t_shift = 0.5;
941 float target_rms = 0.1;
943 float guidance_scale = 1.0;
944};
945
949 std::string lm_flow;
951 std::string lm_main;
953 std::string encoder;
955 std::string decoder;
957 std::string text_conditioner;
958
960 std::string vocab_json;
962 std::string token_scores_json;
965};
966
972 std::string text_encoder;
974 std::string vector_estimator;
976 std::string vocoder;
978 std::string tts_json;
980 std::string unicode_indexer;
982 std::string voice_style;
983};
984
1014
1018 float silence_scale = 0.2;
1020 float speed = 1.0;
1022 int32_t sid = 0;
1024 std::vector<float> reference_audio;
1028 std::string reference_text;
1030 int32_t num_steps = 5;
1031
1033 std::unordered_map<std::string, std::string> extra;
1034};
1035
1049
1053 std::vector<float> samples;
1055 int32_t sample_rate = 0;
1056};
1057
1063using OfflineTtsCallback = int32_t (*)(const float *samples,
1064 int32_t num_samples, float progress,
1065 void *arg);
1066
1091 : public MoveOnly<OfflineTts, SherpaOnnxOfflineTts> {
1092 public:
1094 static OfflineTts Create(const OfflineTtsConfig &config);
1095
1097 void Destroy(const SherpaOnnxOfflineTts *p) const;
1098
1100 int32_t SampleRate() const;
1101
1103 int32_t NumSpeakers() const;
1104
1111 GeneratedAudio Generate(const std::string &text, int32_t sid = 0,
1112 float speed = 1.0,
1113 OfflineTtsCallback callback = nullptr,
1114 void *arg = nullptr) const;
1115
1117 GeneratedAudio Generate(const std::string &text,
1118 const GenerationConfig &config,
1119 OfflineTtsCallback callback = nullptr,
1120 void *arg = nullptr) const;
1121
1123 std::shared_ptr<GeneratedAudio> Generate2(
1124 const std::string &text, int32_t sid = 0, float speed = 1.0,
1125 OfflineTtsCallback callback = nullptr, void *arg = nullptr) const;
1126
1129 std::shared_ptr<GeneratedAudio> Generate2(
1130 const std::string &text, const GenerationConfig &config,
1131 OfflineTtsCallback callback = nullptr, void *arg = nullptr) const;
1132
1133 private:
1134 explicit OfflineTts(const SherpaOnnxOfflineTts *p);
1135};
1136
1137// ============================================================
1138// For Keyword Spotter
1139// ============================================================
1140
1144 std::string keyword;
1146 std::vector<std::string> tokens;
1148 std::vector<float> timestamps;
1150 float start_time = 0.0f;
1152 std::string json;
1153};
1154
1174
1177 : public MoveOnly<KeywordSpotter, SherpaOnnxKeywordSpotter> {
1178 public:
1181
1183 void Destroy(const SherpaOnnxKeywordSpotter *p) const;
1184
1187
1190 OnlineStream CreateStream(const std::string &keywords) const;
1191
1193 bool IsReady(const OnlineStream *s) const;
1194
1196 void Decode(const OnlineStream *s) const;
1197
1199 void Decode(const OnlineStream *ss, int32_t n) const;
1200
1202 void Reset(const OnlineStream *s) const;
1203
1206
1207 private:
1208 explicit KeywordSpotter(const SherpaOnnxKeywordSpotter *p);
1209};
1210
1216
1222
1241
1247
1251 std::vector<float> samples;
1253 int32_t sample_rate = 0;
1254};
1255
1258 : public MoveOnly<OfflineSpeechDenoiser, SherpaOnnxOfflineSpeechDenoiser> {
1259 public:
1262 const OfflineSpeechDenoiserConfig &config);
1263
1266
1268 DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate) const;
1269
1271 int32_t GetSampleRate() const;
1272
1273 private:
1275};
1276
1282
1285 : public MoveOnly<OnlineSpeechDenoiser, SherpaOnnxOnlineSpeechDenoiser> {
1286 public:
1289
1292
1294 DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate) const;
1295
1298
1300 void Reset() const;
1301
1303 int32_t GetSampleRate() const;
1304
1307 int32_t GetFrameShiftInSamples() const;
1308
1309 private:
1311};
1312
1313// ==============================
1314// VAD
1315// ==============================
1316
1320 std::string model;
1322 float threshold = 0.5;
1328 int32_t window_size = 512;
1331};
1332
1336 std::string model;
1338 float threshold = 0.5;
1344 int32_t window_size = 256;
1347};
1348
1360
1362 int32_t sample_rate = 16000;
1364 int32_t num_threads = 1;
1366 std::string provider = "cpu";
1368 bool debug = false;
1369};
1370
1374 int32_t start = 0;
1376 std::vector<float> samples;
1377};
1378
1381 : public MoveOnly<CircularBuffer, SherpaOnnxCircularBuffer> {
1382 public:
1384 static CircularBuffer Create(int32_t capacity);
1385
1387 void Destroy(const SherpaOnnxCircularBuffer *p) const;
1388
1390 void Push(const float *p, int32_t n) const;
1391
1393 std::vector<float> Get(int32_t start_index, int32_t n) const;
1394
1396 void Pop(int32_t n) const;
1397
1399 int32_t Size() const;
1400
1402 int32_t Head() const;
1403
1405 void Reset() const;
1406
1407 private:
1408 explicit CircularBuffer(const SherpaOnnxCircularBuffer *p);
1409};
1410
1418 : public MoveOnly<VoiceActivityDetector, SherpaOnnxVoiceActivityDetector> {
1419 public:
1422 float buffer_size_in_seconds);
1423
1426
1428 void AcceptWaveform(const float *samples, int32_t n) const;
1429
1431 bool IsEmpty() const;
1432
1434 bool IsDetected() const;
1435
1437 void Pop() const;
1438
1440 void Clear() const;
1441
1444
1446 std::shared_ptr<SpeechSegment> FrontPtr() const;
1447
1449 void Reset() const;
1450
1452 void Flush() const;
1453
1454 private:
1456};
1457
1460 : public MoveOnly<LinearResampler, SherpaOnnxLinearResampler> {
1461 public:
1463 LinearResampler() = default;
1465 static LinearResampler Create(int32_t samp_rate_in_hz,
1466 int32_t samp_rate_out_hz,
1467 float filter_cutoff_hz, int32_t num_zeros);
1468
1471
1473 void Reset() const;
1474
1476 std::vector<float> Resample(const float *input, int32_t input_dim,
1477 bool flush) const;
1478
1480 int32_t GetInputSamplingRate() const;
1482 int32_t GetOutputSamplingRate() const;
1483
1484 private:
1485 explicit LinearResampler(const SherpaOnnxLinearResampler *p);
1486};
1487
1495SHERPA_ONNX_API bool FileExists(const std::string &filename);
1496
1497// ============================================================================
1498// Offline Punctuation
1499// ============================================================================
1503 std::string ct_transformer;
1505 int32_t num_threads = 1;
1507 bool debug = false;
1509 std::string provider = "cpu";
1510};
1511
1517
1520 : public MoveOnly<OfflinePunctuation, SherpaOnnxOfflinePunctuation> {
1521 public:
1524
1527
1529 std::string AddPunctuation(const std::string &text) const;
1530
1531 private:
1533};
1534
1535// ============================================================================
1536// Online Punctuation
1537// ============================================================================
1541 std::string cnn_bilstm;
1543 std::string bpe_vocab;
1545 int32_t num_threads = 1;
1547 bool debug = false;
1549 std::string provider = "cpu";
1550};
1551
1557
1560 : public MoveOnly<OnlinePunctuation, SherpaOnnxOnlinePunctuation> {
1561 public:
1564
1567
1569 std::string AddPunctuation(const std::string &text) const;
1570
1571 private:
1573};
1574
1575// ============================================================================
1576// Audio tagging
1577// ============================================================================
1583
1602
1612
1616 std::string name;
1618 int32_t index;
1620 float prob;
1621};
1622
1625 : public MoveOnly<AudioTagging, SherpaOnnxAudioTagging> {
1626 public:
1629
1631 void Destroy(const SherpaOnnxAudioTagging *p) const;
1632
1641 std::vector<AudioEvent> Compute(const OfflineStream *s, int32_t top_k = -1);
1642
1645 std::shared_ptr<std::vector<AudioEvent>> ComputePtr(const OfflineStream *s,
1646 int32_t top_k = -1);
1647
1648 private:
1649 explicit AudioTagging(const SherpaOnnxAudioTagging *p);
1650};
1651
1652// ==============================
1653// Source Separation
1654// ==============================
1655
1663
1669
1687
1693
1697 std::vector<std::vector<float>> samples;
1698};
1699
1703 std::vector<SourceSeparationStem> stems;
1705 int32_t sample_rate = 0;
1706};
1707
1710 : public MoveOnly<OfflineSourceSeparation,
1711 SherpaOnnxOfflineSourceSeparation> {
1712 public:
1715 const OfflineSourceSeparationConfig &config);
1716
1719
1729 SourceSeparationOutput Process(const float *const *samples,
1730 int32_t num_channels, int32_t num_samples,
1731 int32_t sample_rate) const;
1732
1734 int32_t GetOutputSampleRate() const;
1735
1737 int32_t GetNumberOfStems() const;
1738
1739 private:
1741};
1742
1743} // namespace sherpa_onnx::cxx
1744
1745#endif // SHERPA_ONNX_C_API_CXX_API_H_
Public C API for sherpa-onnx.
struct SherpaOnnxOfflineSpeechDenoiser SherpaOnnxOfflineSpeechDenoiser
Opaque offline speech denoiser handle.
Definition c-api.h:4053
struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream
Non-streaming decoding state for one utterance.
Definition c-api.h:1188
#define SHERPA_ONNX_API
Definition c-api.h:106
struct SherpaOnnxOnlineSpeechDenoiser SherpaOnnxOnlineSpeechDenoiser
Opaque online speech denoiser handle.
Definition c-api.h:4149
struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream
Streaming decoding state for one utterance or stream.
Definition c-api.h:424
struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer
Non-streaming recognizer handle.
Definition c-api.h:1185
struct SherpaOnnxKeywordSpotter SherpaOnnxKeywordSpotter
Opaque keyword spotter handle.
Definition c-api.h:1685
struct SherpaOnnxOnlinePunctuation SherpaOnnxOnlinePunctuation
Opaque online punctuation handle.
Definition c-api.h:3613
struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer
Streaming recognizer handle.
Definition c-api.h:422
struct SherpaOnnxCircularBuffer SherpaOnnxCircularBuffer
Opaque circular-buffer handle used by helper APIs.
Definition c-api.h:1936
struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts
Opaque offline TTS handle.
Definition c-api.h:2473
struct SherpaOnnxLinearResampler SherpaOnnxLinearResampler
Opaque linear resampler handle.
Definition c-api.h:3662
struct SherpaOnnxOfflineSourceSeparation SherpaOnnxOfflineSourceSeparation
Opaque source-separation engine handle.
Definition c-api.h:4261
struct SherpaOnnxAudioTagging SherpaOnnxAudioTagging
Opaque audio tagger handle.
Definition c-api.h:3438
struct SherpaOnnxOfflinePunctuation SherpaOnnxOfflinePunctuation
Opaque offline punctuation handle.
Definition c-api.h:3540
struct SherpaOnnxVoiceActivityDetector SherpaOnnxVoiceActivityDetector
Opaque voice activity detector handle.
Definition c-api.h:2054
RAII wrapper for audio tagging.
Definition cxx-api.h:1625
std::vector< AudioEvent > Compute(const OfflineStream *s, int32_t top_k=-1)
Run audio tagging and return copied results.
OfflineStream CreateStream() const
Create an offline stream for tagging.
std::shared_ptr< std::vector< AudioEvent > > ComputePtr(const OfflineStream *s, int32_t top_k=-1)
Like Compute(), but returns the result vector in a shared pointer.
void Destroy(const SherpaOnnxAudioTagging *p) const
Destroy the wrapped C handle.
static AudioTagging Create(const AudioTaggingConfig &config)
Create an audio tagger.
RAII wrapper for the circular buffer helper used by VAD.
Definition cxx-api.h:1381
static CircularBuffer Create(int32_t capacity)
Create a circular buffer with the given capacity in samples.
int32_t Head() const
Return the current head index.
void Destroy(const SherpaOnnxCircularBuffer *p) const
Destroy the wrapped C handle.
std::vector< float > Get(int32_t start_index, int32_t n) const
Copy a contiguous span from the buffer.
void Push(const float *p, int32_t n) const
Append samples to the buffer.
int32_t Size() const
Return the number of stored samples.
void Reset() const
Reset the buffer to empty.
void Pop(int32_t n) const
Remove samples from the head of the buffer.
RAII wrapper for keyword spotting.
Definition cxx-api.h:1177
OnlineStream CreateStream(const std::string &keywords) const
Create a keyword stream with inline extra or replacement keywords.
bool IsReady(const OnlineStream *s) const
Check whether the stream has enough data to decode.
void Decode(const OnlineStream *ss, int32_t n) const
Decode multiple ready streams in parallel.
void Destroy(const SherpaOnnxKeywordSpotter *p) const
Destroy the wrapped C handle.
OnlineStream CreateStream() const
Create a keyword stream using configured keywords.
void Decode(const OnlineStream *s) const
Decode one ready stream.
static KeywordSpotter Create(const KeywordSpotterConfig &config)
Create a keyword spotter from a config struct.
void Reset(const OnlineStream *s) const
Reset a stream after a keyword trigger.
KeywordResult GetResult(const OnlineStream *s) const
Return the copied keyword spotting result for a stream.
RAII wrapper for linear resampling.
Definition cxx-api.h:1460
static LinearResampler Create(int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz, int32_t num_zeros)
Create a linear resampler.
LinearResampler()=default
Construct an empty wrapper.
int32_t GetInputSamplingRate() const
Return the input sample rate in Hz.
void Destroy(const SherpaOnnxLinearResampler *p) const
Destroy the wrapped C handle.
std::vector< float > Resample(const float *input, int32_t input_dim, bool flush) const
Resample one chunk of input audio.
int32_t GetOutputSamplingRate() const
Return the output sample rate in Hz.
void Reset() const
Reset the resampler state.
Base class for move-only RAII wrappers around C handles.
Definition cxx-api.h:290
MoveOnly(const T *p)
Construct a wrapper from a raw C handle.
Definition cxx-api.h:295
const T * Release()
Release ownership of the wrapped raw pointer.
Definition cxx-api.h:322
MoveOnly(const MoveOnly &)=delete
~MoveOnly()
Destroy the wrapped handle if present.
Definition cxx-api.h:298
MoveOnly()=default
Construct an empty wrapper.
MoveOnly & operator=(const MoveOnly &)=delete
MoveOnly & operator=(MoveOnly &&other)
Definition cxx-api.h:306
MoveOnly(MoveOnly &&other)
Definition cxx-api.h:304
const T * Get() const
Return the wrapped raw pointer without transferring ownership.
Definition cxx-api.h:319
RAII wrapper for offline punctuation restoration.
Definition cxx-api.h:1520
static OfflinePunctuation Create(const OfflinePunctuationConfig &config)
Create an offline punctuation model.
std::string AddPunctuation(const std::string &text) const
Add punctuation to a complete input text.
void Destroy(const SherpaOnnxOfflinePunctuation *p) const
Destroy the wrapped C handle.
RAII wrapper for an offline recognizer.
Definition cxx-api.h:801
std::shared_ptr< OfflineRecognizerResult > GetResultPtr(const OfflineStream *s) const
Convenience wrapper that returns the result inside a shared pointer.
static OfflineRecognizer Create(const OfflineRecognizerConfig &config)
Create an offline recognizer from a config struct.
void Decode(const OfflineStream *ss, int32_t n) const
Decode multiple offline streams in parallel.
void Decode(const OfflineStream *s) const
Decode one offline stream.
OfflineRecognizerResult GetResult(const OfflineStream *s) const
Return the copied recognition result for one stream.
void SetConfig(const OfflineRecognizerConfig &config) const
Update recognizer runtime configuration after creation.
OfflineStream CreateStream() const
Create a stream using the recognizer's configured hotwords.
void Destroy(const SherpaOnnxOfflineRecognizer *p) const
Destroy the wrapped C handle.
OfflineStream CreateStream(const std::string &hotwords) const
Create a stream with inline hotwords.
RAII wrapper for offline source separation.
Definition cxx-api.h:1711
void Destroy(const SherpaOnnxOfflineSourceSeparation *p) const
Destroy the wrapped C handle.
int32_t GetNumberOfStems() const
Return the number of stems produced.
int32_t GetOutputSampleRate() const
Return the output sample rate.
static OfflineSourceSeparation Create(const OfflineSourceSeparationConfig &config)
Create an offline source separation engine.
SourceSeparationOutput Process(const float *const *samples, int32_t num_channels, int32_t num_samples, int32_t sample_rate) const
Run source separation on multi-channel audio.
RAII wrapper for offline speech denoising.
Definition cxx-api.h:1258
DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate) const
Run denoising on a complete waveform.
int32_t GetSampleRate() const
Return the expected input sample rate.
static OfflineSpeechDenoiser Create(const OfflineSpeechDenoiserConfig &config)
Create an offline speech denoiser.
void Destroy(const SherpaOnnxOfflineSpeechDenoiser *p) const
Destroy the wrapped C handle.
RAII wrapper for an offline decoding stream.
Definition cxx-api.h:774
void SetOption(const char *key, const char *value) const
Set a per-stream string option.
const char * GetOption(const char *key) const
Get a per-stream string option.
void Destroy(const SherpaOnnxOfflineStream *p) const
Destroy the wrapped C handle.
void AcceptWaveform(int32_t sample_rate, const float *samples, int32_t n) const
Provide the complete waveform for offline decoding.
int32_t HasOption(const char *key) const
Check whether a per-stream option exists.
OfflineStream(const SherpaOnnxOfflineStream *p)
Wrap an existing C offline stream handle.
RAII wrapper for offline TTS.
Definition cxx-api.h:1091
GeneratedAudio Generate(const std::string &text, int32_t sid=0, float speed=1.0, OfflineTtsCallback callback=nullptr, void *arg=nullptr) const
Generate speech using the simple speaker-id and speed interface.
GeneratedAudio Generate(const std::string &text, const GenerationConfig &config, OfflineTtsCallback callback=nullptr, void *arg=nullptr) const
Generate speech using the advanced generation configuration.
int32_t NumSpeakers() const
Return the number of supported speakers.
static OfflineTts Create(const OfflineTtsConfig &config)
Create an offline TTS engine.
std::shared_ptr< GeneratedAudio > Generate2(const std::string &text, const GenerationConfig &config, OfflineTtsCallback callback=nullptr, void *arg=nullptr) const
Like the advanced Generate() overload, but returns a shared pointer.
std::shared_ptr< GeneratedAudio > Generate2(const std::string &text, int32_t sid=0, float speed=1.0, OfflineTtsCallback callback=nullptr, void *arg=nullptr) const
Like Generate(), but returns a shared pointer to the result.
void Destroy(const SherpaOnnxOfflineTts *p) const
Destroy the wrapped C handle.
int32_t SampleRate() const
Return the output sample rate of generated audio.
RAII wrapper for online punctuation restoration.
Definition cxx-api.h:1560
void Destroy(const SherpaOnnxOnlinePunctuation *p) const
Destroy the wrapped C handle.
static OnlinePunctuation Create(const OnlinePunctuationConfig &config)
Create an online punctuation model.
std::string AddPunctuation(const std::string &text) const
Add punctuation to one input text chunk.
RAII wrapper for a streaming recognizer.
Definition cxx-api.h:385
void Decode(const OnlineStream *s) const
Decode one ready stream.
OnlineRecognizerResult GetResult(const OnlineStream *s) const
Return the current recognition result for a stream.
static OnlineRecognizer Create(const OnlineRecognizerConfig &config)
Create a streaming recognizer from a config struct.
void Reset(const OnlineStream *s) const
Reset a stream after endpointing or utterance completion.
OnlineStream CreateStream(const std::string &hotwords) const
Create a stream with inline hotwords.
void Destroy(const SherpaOnnxOnlineRecognizer *p) const
Destroy the wrapped C handle.
bool IsReady(const OnlineStream *s) const
Check whether the given stream has enough data to decode.
bool IsEndpoint(const OnlineStream *s) const
Check whether endpointing has triggered for a stream.
OnlineStream CreateStream() const
Create a stream that uses the recognizer's configured hotwords.
void Decode(const OnlineStream *ss, int32_t n) const
Decode multiple ready streams in parallel.
RAII wrapper for online speech denoising.
Definition cxx-api.h:1285
int32_t GetFrameShiftInSamples() const
Return the recommended frame shift in samples for streaming input.
DenoisedAudio Flush() const
Flush buffered audio and reset the denoiser.
DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate) const
Process one chunk of streaming audio.
int32_t GetSampleRate() const
Return the expected input sample rate.
void Destroy(const SherpaOnnxOnlineSpeechDenoiser *p) const
Destroy the wrapped C handle.
void Reset() const
Reset the denoiser for a new stream.
static OnlineSpeechDenoiser Create(const OnlineSpeechDenoiserConfig &config)
Create an online speech denoiser.
void InputFinished() const
Indicate that no more input audio will be provided.
void AcceptWaveform(int32_t sample_rate, const float *samples, int32_t n) const
Append audio samples to the stream.
void Destroy(const SherpaOnnxOnlineStream *p) const
Destroy the wrapped C handle.
int32_t HasOption(const char *key) const
Check whether a per-stream option exists.
OnlineStream(const SherpaOnnxOnlineStream *p)
Wrap an existing C online stream handle.
void SetOption(const char *key, const char *value) const
Set a per-stream string option.
const char * GetOption(const char *key) const
Get a per-stream string option.
RAII wrapper for voice activity detection.
Definition cxx-api.h:1418
void Clear() const
Remove all queued speech segments.
void Pop() const
Remove the front queued speech segment.
void Destroy(const SherpaOnnxVoiceActivityDetector *p) const
Destroy the wrapped C handle.
std::shared_ptr< SpeechSegment > FrontPtr() const
Like Front(), but returns the segment in a shared pointer.
static VoiceActivityDetector Create(const VadModelConfig &config, float buffer_size_in_seconds)
Create a VAD instance.
void AcceptWaveform(const float *samples, int32_t n) const
Feed more audio samples to the detector.
bool IsEmpty() const
Check whether no speech segments are currently queued.
bool IsDetected() const
Check whether speech is currently detected.
void Flush() const
Flush buffered context at end of input.
SpeechSegment Front() const
Return the front queued speech segment.
void Reset() const
Reset the detector state.
std::string GetGitSha1()
Return the build Git SHA1 as a C++ string.
int32_t(*)(const float *samples, int32_t num_samples, float progress, void *arg) OfflineTtsCallback
TTS progress callback.
Definition cxx-api.h:1065
std::string GetVersionStr()
Return the sherpa-onnx version string as a C++ string.
Wave ReadWave(const std::string &filename)
Read a mono WAVE file into a C++ value object.
std::string GetGitDate()
Return the build Git date as a C++ string.
bool FileExists(const std::string &filename)
Return true if a file exists.
bool WriteWave(const std::string &filename, const Wave &wave)
Write a mono WAVE file from a C++ value object.
One audio-tagging event returned by the C++ wrapper.
Definition cxx-api.h:1614
Configuration for audio tagging.
Definition cxx-api.h:1604
AudioTaggingModelConfig model
Definition cxx-api.h:1606
Audio-tagging model configuration.
Definition cxx-api.h:1590
OfflineZipformerAudioTaggingModelConfig zipformer
Definition cxx-api.h:1592
Denoised waveform returned by speech enhancement wrappers.
Definition cxx-api.h:1249
std::vector< float > samples
Definition cxx-api.h:1251
Feature extraction settings shared by ASR and KWS wrappers.
Definition cxx-api.h:151
Generated audio returned by the C++ TTS wrapper.
Definition cxx-api.h:1051
std::vector< float > samples
Definition cxx-api.h:1053
Generation-time options for advanced TTS synthesis.
Definition cxx-api.h:1016
std::unordered_map< std::string, std::string > extra
Definition cxx-api.h:1033
std::vector< float > reference_audio
Definition cxx-api.h:1024
Homophone replacement resources used by some Chinese ASR setups.
Definition cxx-api.h:167
Current keyword spotting result copied into C++ containers.
Definition cxx-api.h:1142
std::vector< float > timestamps
Definition cxx-api.h:1148
std::vector< std::string > tokens
Definition cxx-api.h:1146
Configuration for the C++ keyword spotting wrapper.
Definition cxx-api.h:1156
Offline Canary model configuration.
Definition cxx-api.h:465
Offline Cohere Transcribe model configuration.
Definition cxx-api.h:479
Offline Dolphin model file.
Definition cxx-api.h:523
Offline FireRed ASR CTC model file.
Definition cxx-api.h:501
Offline FireRed ASR model files.
Definition cxx-api.h:493
Offline FunASR Nano model configuration.
Definition cxx-api.h:567
Optional language-model rescoring configuration for offline ASR.
Definition cxx-api.h:684
Offline MedASR CTC model file.
Definition cxx-api.h:547
Acoustic model configuration for offline ASR.
Definition cxx-api.h:627
OfflineMedAsrCtcModelConfig medasr
Definition cxx-api.h:672
OfflineTdnnModelConfig tdnn
Definition cxx-api.h:637
OfflineParaformerModelConfig paraformer
Definition cxx-api.h:631
OfflineSenseVoiceModelConfig sense_voice
Definition cxx-api.h:656
OfflineZipformerCtcModelConfig zipformer_ctc
Definition cxx-api.h:664
OfflineFireRedAsrCtcModelConfig fire_red_asr_ctc
Definition cxx-api.h:676
OfflineMoonshineModelConfig moonshine
Definition cxx-api.h:658
OfflineCanaryModelConfig canary
Definition cxx-api.h:666
OfflineWhisperModelConfig whisper
Definition cxx-api.h:635
OfflineCohereTranscribeModelConfig cohere_transcribe
Definition cxx-api.h:680
OfflineDolphinModelConfig dolphin
Definition cxx-api.h:662
OfflineWenetCtcModelConfig wenet_ctc
Definition cxx-api.h:668
OfflineFunASRNanoModelConfig funasr_nano
Definition cxx-api.h:674
OfflineFireRedAsrModelConfig fire_red_asr
Definition cxx-api.h:660
OfflineTransducerModelConfig transducer
Definition cxx-api.h:629
OfflineNemoEncDecCtcModelConfig nemo_ctc
Definition cxx-api.h:633
OfflineQwen3ASRModelConfig qwen3_asr
Definition cxx-api.h:678
OfflineOmnilingualAsrCtcModelConfig omnilingual
Definition cxx-api.h:670
Offline Moonshine model configuration.
Definition cxx-api.h:553
Offline NeMo EncDec CTC model file.
Definition cxx-api.h:441
Offline omnilingual ASR CTC model file.
Definition cxx-api.h:541
Offline Paraformer model file.
Definition cxx-api.h:435
Configuration for offline punctuation.
Definition cxx-api.h:1513
OfflinePunctuationModelConfig model
Definition cxx-api.h:1515
Offline punctuation model configuration.
Definition cxx-api.h:1501
Offline Qwen3-ASR model configuration.
Definition cxx-api.h:597
Configuration for offline ASR.
Definition cxx-api.h:723
Offline ASR result copied into C++ containers.
Definition cxx-api.h:752
std::vector< std::string > tokens
Definition cxx-api.h:758
Offline SenseVoice model configuration.
Definition cxx-api.h:513
Configuration for offline source separation.
Definition cxx-api.h:1689
OfflineSourceSeparationModelConfig model
Definition cxx-api.h:1691
Source-separation model configuration.
Definition cxx-api.h:1675
OfflineSourceSeparationUvrModelConfig uvr
Definition cxx-api.h:1679
OfflineSourceSeparationSpleeterModelConfig spleeter
Definition cxx-api.h:1677
Spleeter source-separation model configuration.
Definition cxx-api.h:1657
UVR (MDX-Net) source-separation model configuration.
Definition cxx-api.h:1665
Configuration for offline speech denoising.
Definition cxx-api.h:1243
OfflineSpeechDenoiserModelConfig model
Definition cxx-api.h:1245
DPDFNet speech denoiser model configuration.
Definition cxx-api.h:1218
GTCRN speech denoiser model configuration.
Definition cxx-api.h:1212
Speech denoiser model configuration.
Definition cxx-api.h:1229
OfflineSpeechDenoiserGtcrnModelConfig gtcrn
Definition cxx-api.h:1231
OfflineSpeechDenoiserDpdfNetModelConfig dpdfnet
Definition cxx-api.h:1233
Offline transducer model files.
Definition cxx-api.h:425
Configuration for offline TTS.
Definition cxx-api.h:1037
OfflineTtsModelConfig model
Definition cxx-api.h:1039
Model configuration for offline TTS.
Definition cxx-api.h:991
OfflineTtsKittenModelConfig kitten
Definition cxx-api.h:999
OfflineTtsSupertonicModelConfig supertonic
Definition cxx-api.h:1005
OfflineTtsVitsModelConfig vits
Definition cxx-api.h:993
OfflineTtsKokoroModelConfig kokoro
Definition cxx-api.h:997
OfflineTtsMatchaModelConfig matcha
Definition cxx-api.h:995
OfflineTtsPocketModelConfig pocket
Definition cxx-api.h:1003
OfflineTtsZipvoiceModelConfig zipvoice
Definition cxx-api.h:1001
Pocket TTS model configuration.
Definition cxx-api.h:947
Supertonic model configuration.
Definition cxx-api.h:968
ZipVoice model configuration.
Definition cxx-api.h:922
Offline WeNet CTC model file.
Definition cxx-api.h:535
Offline Whisper model configuration.
Definition cxx-api.h:447
Zipformer audio-tagging model configuration.
Definition cxx-api.h:1579
Offline Zipformer CTC model file.
Definition cxx-api.h:529
Decoder graph configuration for online CTC + FST decoding.
Definition cxx-api.h:159
Acoustic model configuration for streaming ASR.
Definition cxx-api.h:120
OnlineNemoCtcModelConfig nemo_ctc
Definition cxx-api.h:128
OnlineZipformer2CtcModelConfig zipformer2_ctc
Definition cxx-api.h:126
OnlineToneCtcModelConfig t_one_ctc
Definition cxx-api.h:130
OnlineParaformerModelConfig paraformer
Definition cxx-api.h:124
OnlineTransducerModelConfig transducer
Definition cxx-api.h:122
Streaming NeMo CTC model file.
Definition cxx-api.h:84
Streaming Paraformer model files.
Definition cxx-api.h:70
Configuration for online punctuation.
Definition cxx-api.h:1553
OnlinePunctuationModelConfig model
Definition cxx-api.h:1555
Online punctuation model configuration.
Definition cxx-api.h:1539
Configuration for streaming ASR.
Definition cxx-api.h:199
OnlineCtcFstDecoderConfig ctc_fst_decoder_config
Definition cxx-api.h:230
Current streaming ASR result copied into C++ containers.
Definition cxx-api.h:245
std::vector< std::string > tokens
Definition cxx-api.h:249
Configuration for online speech denoising.
Definition cxx-api.h:1278
OfflineSpeechDenoiserModelConfig model
Definition cxx-api.h:1280
Streaming T-One CTC model file.
Definition cxx-api.h:90
Streaming transducer model files.
Definition cxx-api.h:60
Streaming Zipformer2 CTC model file.
Definition cxx-api.h:78
Silero VAD model configuration.
Definition cxx-api.h:1318
Output of a source-separation run.
Definition cxx-api.h:1701
std::vector< SourceSeparationStem > stems
Definition cxx-api.h:1703
A single stem (output track) with one or more channels.
Definition cxx-api.h:1695
std::vector< std::vector< float > > samples
Definition cxx-api.h:1697
One speech segment produced by the VAD wrapper.
Definition cxx-api.h:1372
std::vector< float > samples
Definition cxx-api.h:1376
Ten VAD model configuration.
Definition cxx-api.h:1334
VAD model configuration.
Definition cxx-api.h:1355
SileroVadModelConfig silero_vad
Definition cxx-api.h:1357
Mono PCM waveform used by the helper I/O functions.
Definition cxx-api.h:257
std::vector< float > samples
Definition cxx-api.h:259