TTS: Matcha (Chinese)
Generate speech with the Matcha Chinese (baker) model. This model requires a vocoder and rule FSTs for Chinese text normalization. It supports both synchronous and asynchronous generation.
For model documentation, see Matcha Chinese.
Source files
Synchronous generation
1// Copyright (c) 2025 Xiaomi Corporation
2//
3// Text-to-speech with the Matcha Chinese (baker) model.
4// Requires a separate vocoder model (vocos-22khz-univ.onnx) and
5// rule FSTs for phone, date, and number normalization.
6//
7// Usage:
8// node tts_matcha_zh.js
9//
10const sherpa_onnx = require('sherpa-onnx-node');
11
12function createOfflineTts() {
13 const config = {
14 model: {
15 matcha: {
16 acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
17 vocoder: './vocos-22khz-univ.onnx',
18 lexicon: './matcha-icefall-zh-baker/lexicon.txt',
19 tokens: './matcha-icefall-zh-baker/tokens.txt',
20 },
21 debug: true,
22 numThreads: 1,
23 provider: 'cpu',
24 },
25 maxNumSentences: 1,
26 // Rule FSTs for Chinese text normalization (phone, date, number).
27 ruleFsts:
28 './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst',
29 };
30 return new sherpa_onnx.OfflineTts(config);
31}
32
33const tts = createOfflineTts();
34
35const text =
36 '当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔. 某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。';
37
38const generationConfig = new sherpa_onnx.GenerationConfig({
39 sid: 0,
40 speed: 1.0,
41 silenceScale: 0.2,
42});
43
44let start = Date.now();
45const audio = tts.generate({text, generationConfig});
46let stop = Date.now();
47const elapsed_seconds = (stop - start) / 1000;
48const duration = audio.samples.length / audio.sampleRate;
49const real_time_factor = elapsed_seconds / duration;
50console.log('Wave duration', duration.toFixed(3), 'seconds');
51console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
52console.log(
53 `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
54 real_time_factor.toFixed(3));
55
56const filename = 'test-matcha-zh.wav';
57sherpa_onnx.writeWave(
58 filename, {samples: audio.samples, sampleRate: audio.sampleRate});
59
60console.log(`Saved to ${filename}`);
Asynchronous generation
1// Copyright (c) 2026 Xiaomi Corporation
2//
3// Asynchronous text-to-speech with the Matcha Chinese model.
4//
5// Usage:
6// node tts_matcha_zh_async.js
7//
8const sherpa_onnx = require('sherpa-onnx-node');
9
10async function createOfflineTts() {
11 const config = {
12 model: {
13 matcha: {
14 acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
15 vocoder: './vocos-22khz-univ.onnx',
16 lexicon: './matcha-icefall-zh-baker/lexicon.txt',
17 tokens: './matcha-icefall-zh-baker/tokens.txt',
18 },
19 debug: false,
20 numThreads: 1,
21 provider: 'cpu',
22 },
23 maxNumSentences: 1,
24 ruleFsts:
25 './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst',
26 };
27 return await sherpa_onnx.OfflineTts.createAsync(config);
28}
29
30async function main() {
31 const tts = await createOfflineTts();
32
33 const text =
34 '当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔. 某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。';
35
36 const generationConfig = new sherpa_onnx.GenerationConfig({
37 sid: 0,
38 speed: 1.0,
39 silenceScale: 0.2,
40 });
41
42 const start = Date.now();
43 const audio = await tts.generateAsync({
44 text,
45 enableExternalBuffer: true,
46 generationConfig,
47 onProgress: ({samples, progress}) => {
48 process.stdout.write(
49 `Progress: ${(progress * 100).toFixed(1)}%, ` +
50 `Samples: ${samples.length}\r`);
51 return 1;
52 },
53 });
54
55 console.log('');
56 const stop = Date.now();
57 const elapsed_seconds = (stop - start) / 1000;
58 const duration = audio.samples.length / audio.sampleRate;
59 const real_time_factor = elapsed_seconds / duration;
60 console.log('Wave duration', duration.toFixed(3), 'seconds');
61 console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
62 console.log(
63 `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
64 real_time_factor.toFixed(3));
65
66 const filename = 'test-matcha-zh-async.wav';
67 sherpa_onnx.writeWave(
68 filename, {samples: audio.samples, sampleRate: audio.sampleRate});
69 console.log(`Saved to ${filename}`);
70}
71
72main().catch((err) => {
73 console.error('Error:', err);
74});
How to run
Install the package:
npm install sherpa-onnx-node
Download the model, vocoder, and rule FSTs:
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
Set the library path and run:
# macOS export DYLD_LIBRARY_PATH=$(npm root)/sherpa-onnx-node/lib:$DYLD_LIBRARY_PATH # Linux export LD_LIBRARY_PATH=$(npm root)/sherpa-onnx-node/lib:$LD_LIBRARY_PATH # Choose one: node tts_matcha_zh.js node tts_matcha_zh_async.js
Notes
In addition to the
matchaconfig fields, the Chinese model uses: -lexicon: Maps Chinese characters to phonemes. -ruleFsts: Comma-separated FST files for phone, date, and numbernormalization (e.g.,
phone.fst,date.fst,number.fst).The text contains dates, phone numbers, and monetary amounts that are normalized by the rule FSTs before synthesis.
The sync API uses
new sherpa_onnx.OfflineTts(config)andtts.generate({text, generationConfig}).The async API uses
OfflineTts.createAsync()andtts.generateAsync()with anonProgresscallback.For English, see TTS: Matcha (English).