IPython demo Pykaldi decoders on short noise wav
Pykaldi: Demostrating Python extension and customized Kaldi decoder
The Pykaldi (for of Kaldi) for building the Python wrapper and decoder is available at:
Pykaldi itself depends on forked pyfst
The decoder is used in Dialog system Alex
https://github.com/UFAL-DSG/alex
For Czech: Test it for FREE at 800 899 998!
More about the public transport information can be found at:
- In English: http://ufal.mff.cuni.cz/alex-dialogue-systems-framework/
- In Czech (with updates): http://ufal.mff.cuni.cz/alex-dialogue-systems-framework/ptics
In [2]:
from pykaldi.decoders import PyGmmLatgenWrapper
from pykaldi.utils import load_wav
from IPython.display import display # display multiple SVG in one cell
import fst # fork of pyfst: https://github.com/UFAL-DSG/pyfst
test_wav = '/ha/projects/vystadial/data/asr/cs/voip/test/all-2012-06-08-13-32-40.800581.recorded-0148.75-0150.09.wav'
test_pcm = load_wav(test_wav)
d = PyGmmLatgenWrapper()
# Settings mainly paths to AM, HCLG.fst, mfcc.conf and other settings
argv = ['--config=/ha/work/people/oplatek/alex-dsg/alex/resources/asr/voip_cs/kaldi/mfcc.conf',
'--verbose=0', '--max-mem=10000000000', '--lat-lm-scale=10', '--beam=12.0',
'--lattice-beam=6.0', '--max-active=5000',
'/ha/work/people/oplatek/alex-dsg/alex/resources/asr/voip_cs/kaldi/tri2b_bmmi.mdl',
'/ha/work/people/oplatek/alex-dsg/alex/applications/PublicTransportInfoCS/hclg/models/HCLG_tri2b_bmmi.fst',
'1:2:3:4:5:6:7:8:9:10:11:12:13:14:15:16:17:18:19:20:21:22:23:24:25',
'/ha/work/people/oplatek/alex-dsg/alex/resources/asr/voip_cs/kaldi/tri2b_bmmi.mat']
d.setup(argv)
# Usually we send few frames and run forward decoding afterward
# Here we buffer all the test wav
d.frame_in(test_pcm)
# Decode until nothing is in buffer
decoded, total = d.decode(max_frames=10), 0
while decoded > 0:
total += decoded
decoded = d.decode(max_frames=10)
d.prune_final()
utt_lik, lat = d.get_lattice()
# Printing results
print 'The likelihood of posterior lattice is %f' % utt_lik
print 'Forward decoded grames: %d' % total
# Change integer ids to English words
lat.isyms = lat.osyms = fst.read_symbols_text('/ha/work/people/oplatek/alex-dsg/alex/applications/PublicTransportInfoCS/hclg/models/words.txt')
display(lat)
with open(test_wav+'.trn', 'r') as r:
print 'REFERENCE %s ' % r.read()