speech_recognition
kaldi_gstreamer_app.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 #
3 # Kaldi Gstreamer App
4 
5 # Make python 2/3 compatible
6 from __future__ import (absolute_import, division,
7  print_function, unicode_literals)
8 
9 # System imports
10 import os
11 import subprocess
12 
13 # Gstreamer imports
14 import gi
15 gi.require_version('Gst', '1.0')
16 from gi.repository import Gst
17 
18 # Speech recognition
19 from .gstreamer_app import GstApp
20 from .kaldi_grammar import Grammar
21 
23  """Kaldi Gstreamer Application"""
24  def __init__(self, model_path, grammar, target, ispreemt_requested):
25  """Initialize a KaldiGstApp object"""
26  GstApp.__init__(self)
27 
28  self.type = 'Kaldi-Gst-App'
29  self.grammar = Grammar(model_path, grammar, target)
30 
31  # Prepare grammar and decoding graphs
32  subp_status = subprocess.call(["mkdynamicgraph.bash",
33  self.grammar.model_path, self.grammar.model_path_tmp])
34 
35  if subp_status == 1:
36  self._error("mkdynamicgraph failed")
37 
38  self.pub_str = ""
39  self.sentence = None
40  self.asr = Gst.ElementFactory.make("onlinegmmdecodefaster", "asr")
41 
42  if self.asr:
43  if not os.path.isdir(model_path):
44  self._error("Model (%s) not downloaded. Place the model at (%s) first" % model_path)
45 
46  self.asr.set_property("fst", model_path + "HCLG.fst")
47  # Add LDA matrix if it exists
48  if os.path.exists(model_path + "final.mat"):
49  self.asr.set_property("lda-mat", model_path + "final.mat")
50 
51  self.asr.set_property("model", model_path + "final.mdl")
52  self.asr.set_property("word-syms", model_path + "words.txt")
53  self.asr.set_property("silence-phones", "1:2:3:4:5")
54  self.asr.set_property("max-active", 4000)
55  self.asr.set_property("beam", 12.0)
56  self.asr.set_property("acoustic-scale", 0.0769)
57  else:
58  print_msg = "Couldn't create the onlinegmmfasterdecoder element.\n"
59  if "GST_PLUGIN_PATH" in os.environ:
60  print_msg += "Kaldi Gstreamer Plugin probably not compiled."
61  else:
62  print_msg += "GST_PLUGIN_PATH unset.\nTry running: export GST_PLUGIN_PATH=$KALDI_ROOT/src/gst-plugin"
63  self._error(print_msg)
64 
65  # Complete Gstreamer pipeline and start playing
66  self.pipeline.add(self.asr)
67  self.audioresample.link(self.asr)
68  self.asr.link(self.fakesink)
69  self.asr.connect('hyp-word', self._wait_for_sentence)
70  self.pipeline.set_state(Gst.State.PLAYING)
71 
72  def _wait_for_sentence(self, asr, word):
73  # TODO: If the 'word' input changes from single words to a sentence (i.e. if the c++ file of the Gstreamer-kaldi
74  # -plugin is edited to push out sentences instead of words) then this function should change
75  # Publish only when a pause has been registered (might be less robust than single words when pauses are not
76  # recognized due to, e.g., too much noise or talking in the background):
77  if word == "<#s>": # Silence
78  self.sentence = self.pub_str
79  self.pub_str = ""
80  elif self.pub_str == "": # No spaces at start of new sentence
81  self.pub_str += word.lower()
82  else:
83  self.pub_str += " " + word.lower()
speech_recognition.kaldi_grammar.Grammar
Definition: kaldi_grammar.py:14
speech_recognition.gstreamer_app.GstApp.audioresample
audioresample
Definition: gstreamer_app.py:22
speech_recognition.gstreamer_app.GstApp.type
type
Definition: gstreamer_app.py:16
speech_recognition.kaldi_gstreamer_app.KaldiGstApp.__init__
def __init__(self, model_path, grammar, target, ispreemt_requested)
Definition: kaldi_gstreamer_app.py:24
speech_recognition.kaldi_gstreamer_app.KaldiGstApp.grammar
grammar
Definition: kaldi_gstreamer_app.py:29
speech_recognition.kaldi_gstreamer_app.KaldiGstApp
Definition: kaldi_gstreamer_app.py:22
speech_recognition.kaldi_gstreamer_app.KaldiGstApp.sentence
sentence
Definition: kaldi_gstreamer_app.py:39
speech_recognition.gstreamer_app.GstApp
Definition: gstreamer_app.py:12
speech_recognition.gstreamer_app.GstApp.fakesink
fakesink
Definition: gstreamer_app.py:23
speech_recognition.gstreamer_app.GstApp._error
def _error(self, *args, **kwargs)
Definition: gstreamer_app.py:32
speech_recognition.gstreamer_app.GstApp.pipeline
pipeline
Definition: gstreamer_app.py:26
speech_recognition.kaldi_gstreamer_app.KaldiGstApp.asr
asr
Definition: kaldi_gstreamer_app.py:40
speech_recognition.kaldi_gstreamer_app.KaldiGstApp.pub_str
pub_str
Definition: kaldi_gstreamer_app.py:38
speech_recognition.kaldi_gstreamer_app.KaldiGstApp._wait_for_sentence
def _wait_for_sentence(self, asr, word)
Definition: kaldi_gstreamer_app.py:72