Spoken Language Identification
Identify the language spoken in a WAV file using a Whisper multilingual model. This example classifies audio files into their corresponding languages.
Source file
nodejs-addon-examples/test_spoken_language_identification.js
Code
1// Copyright (c) 2023-2024 Xiaomi Corporation
2//
3// Spoken language identification using a Whisper multilingual model.
4//
5// Usage:
6// node spoken_language_identification.js
7//
8const sherpa_onnx = require('sherpa-onnx-node');
9
10// Download whisper multi-lingual models from
11// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
12function createSpokenLanguageID() {
13 const config = {
14 whisper: {
15 encoder: './sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx',
16 decoder: './sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx',
17 },
18 debug: true,
19 numThreads: 1,
20 provider: 'cpu',
21 };
22 return new sherpa_onnx.SpokenLanguageIdentification(config);
23}
24
25const slid = createSpokenLanguageID();
26
27const testWaves = [
28 './spoken-language-identification-test-wavs/ar-arabic.wav',
29 './spoken-language-identification-test-wavs/de-german.wav',
30 './spoken-language-identification-test-wavs/en-english.wav',
31 './spoken-language-identification-test-wavs/fr-french.wav',
32 './spoken-language-identification-test-wavs/pt-portuguese.wav',
33 './spoken-language-identification-test-wavs/es-spanish.wav',
34 './spoken-language-identification-test-wavs/zh-chinese.wav',
35];
36
37// Intl.DisplayNames converts ISO language codes to human-readable names.
38const display = new Intl.DisplayNames(['en'], {type: 'language'});
39
40for (let f of testWaves) {
41 const stream = slid.createStream();
42
43 const wave = sherpa_onnx.readWave(f);
44 stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
45
46 const lang = slid.compute(stream);
47 console.log(`${f}: ${lang} (${display.of(lang)})`);
48}
How to run
Install the package:
npm install sherpa-onnx-node
Download the Whisper multilingual model and test files:
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 tar xvf sherpa-onnx-whisper-tiny.tar.bz2 rm sherpa-onnx-whisper-tiny.tar.bz2 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2 tar xvf spoken-language-identification-test-wavs.tar.bz2 rm spoken-language-identification-test-wavs.tar.bz2
Set the library path and run:
# macOS export DYLD_LIBRARY_PATH=$(npm root)/sherpa-onnx-node/lib:$DYLD_LIBRARY_PATH # Linux export LD_LIBRARY_PATH=$(npm root)/sherpa-onnx-node/lib:$LD_LIBRARY_PATH node spoken_language_identification.js
Expected output
ar-arabic.wav: ar (Arabic)
de-german.wav: de (German)
en-english.wav: en (English)
fr-french.wav: fr (French)
pt-portuguese.wav: pt (Portuguese)
es-spanish.wav: es (Spanish)
zh-chinese.wav: zh (Chinese)
Notes
SpokenLanguageIdentificationrequires a Whisper multilingual model (not an English-only model).compute()returns an ISO 639-1 language code (e.g.,en,zh,fr).Intl.DisplayNamesis a built-in JavaScript API that converts language codes to human-readable names.