TTS: Kitten
Generate speech with the Kitten Nano English model. Kitten is a lightweight TTS model that supports both synchronous and asynchronous generation.
For model documentation, see Kitten TTS.
Source files
Synchronous generation
1// Copyright (c) 2026 Xiaomi Corporation
2//
3// Synchronous text-to-speech with the Kitten Nano model.
4//
5// Usage:
6// node tts_kitten_sync.js
7//
8const sherpa_onnx = require('sherpa-onnx-node');
9
10function createOfflineTts() {
11 const config = {
12 model: {
13 kitten: {
14 model: './kitten-nano-en-v0_1-fp16/model.fp16.onnx',
15 voices: './kitten-nano-en-v0_1-fp16/voices.bin',
16 tokens: './kitten-nano-en-v0_1-fp16/tokens.txt',
17 dataDir: './kitten-nano-en-v0_1-fp16/espeak-ng-data',
18 },
19 debug: true,
20 numThreads: 1,
21 provider: 'cpu',
22 },
23 maxNumSentences: 1,
24 };
25 return new sherpa_onnx.OfflineTts(config);
26}
27
28const tts = createOfflineTts();
29
30const text =
31 'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';
32
33const generationConfig = new sherpa_onnx.GenerationConfig({
34 sid: 6,
35 speed: 1.0,
36 silenceScale: 0.2,
37});
38
39let start = Date.now();
40const audio = tts.generate({text, generationConfig});
41let stop = Date.now();
42const elapsed_seconds = (stop - start) / 1000;
43const duration = audio.samples.length / audio.sampleRate;
44const real_time_factor = elapsed_seconds / duration;
45console.log('Wave duration', duration.toFixed(3), 'seconds');
46console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
47console.log(
48 `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
49 real_time_factor.toFixed(3));
50
51const filename = 'test-kitten-en-sync.wav';
52sherpa_onnx.writeWave(
53 filename, {samples: audio.samples, sampleRate: audio.sampleRate});
54
55console.log(`Saved to ${filename}`);
Asynchronous generation
1// Copyright (c) 2025 Xiaomi Corporation
2//
3// Text-to-speech with the Kitten Nano model (async generation).
4//
5// Usage:
6// node tts_kitten_async.js
7//
8const sherpa_onnx = require('sherpa-onnx-node');
9
10async function createOfflineTts() {
11 const config = {
12 model: {
13 kitten: {
14 model: './kitten-nano-en-v0_1-fp16/model.fp16.onnx',
15 voices: './kitten-nano-en-v0_1-fp16/voices.bin',
16 tokens: './kitten-nano-en-v0_1-fp16/tokens.txt',
17 dataDir: './kitten-nano-en-v0_1-fp16/espeak-ng-data',
18 },
19 debug: true,
20 numThreads: 1,
21 provider: 'cpu',
22 },
23 maxNumSentences: 1,
24 };
25
26 return await sherpa_onnx.OfflineTts.createAsync(config);
27}
28
29async function main() {
30 const tts = await createOfflineTts();
31
32 const text =
33 'Today as always, men fall into two groups: slaves and free men. ' +
34 'Whoever does not have two-thirds of his day for himself, is a slave, ' +
35 'whatever he may be: a statesman, a businessman, an official, or a scholar.';
36
37 console.log('Number of speakers:', tts.numSpeakers);
38 console.log('Sample rate:', tts.sampleRate);
39
40 const start = Date.now();
41 const generationConfig = new sherpa_onnx.GenerationConfig({
42 sid: 6,
43 speed: 1.0,
44 silenceScale: 0.2,
45 });
46
47 const audio = await tts.generateAsync({
48 text,
49 generationConfig,
50 onProgress({samples, progress}) {
51 process.stdout.write(`\rGenerating... ${
52 (progress * 100).toFixed(1)}% (chunk length: ${samples.length})`);
53 return true;
54 },
55 });
56
57 console.log('\nGeneration finished.');
58
59 const stop = Date.now();
60 const elapsedSeconds = (stop - start) / 1000;
61 const durationSeconds = audio.samples.length / audio.sampleRate;
62 const realTimeFactor = elapsedSeconds / durationSeconds;
63
64 console.log('Wave duration:', durationSeconds.toFixed(3), 'seconds');
65 console.log('Elapsed time:', elapsedSeconds.toFixed(3), 'seconds');
66 console.log(
67 `RTF = ${elapsedSeconds.toFixed(3)} / ${durationSeconds.toFixed(3)} =`,
68 realTimeFactor.toFixed(3));
69
70 const filename = 'test-kitten-en.wav';
71 sherpa_onnx.writeWave(filename, {
72 samples: audio.samples,
73 sampleRate: audio.sampleRate,
74 });
75
76 console.log(`Saved to ${filename}`);
77}
78
79main().catch((err) => {
80 console.error('TTS failed:', err);
81 process.exit(1);
82});
How to run
Install the package:
npm install sherpa-onnx-node
Download the model:
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 tar xf kitten-nano-en-v0_1-fp16.tar.bz2 rm kitten-nano-en-v0_1-fp16.tar.bz2
Set the library path and run:
# macOS export DYLD_LIBRARY_PATH=$(npm root)/sherpa-onnx-node/lib:$DYLD_LIBRARY_PATH # Linux export LD_LIBRARY_PATH=$(npm root)/sherpa-onnx-node/lib:$LD_LIBRARY_PATH # Choose one: node tts_kitten_sync.js node tts_kitten_async.js
Notes
GenerationConfigfields:sid(speaker ID),speed(1.0 = normal),silenceScale(controls pause length).The sync API uses
new sherpa_onnx.OfflineTts(config)andtts.generate({text, generationConfig}).The async API uses
OfflineTts.createAsync()andtts.generateAsync()with anonProgresscallback that receives audio chunks as they are generated. Return1to continue,0to cancel.