4 from __future__
import (absolute_import, division,
5 print_function, unicode_literals)
10 from grammar_parser.cfgparser
import CFGParser
11 from graphviz
import render
16 Class Grammar uses as input a grammar file with extension '.fcfg' and has two functions:
17 get_rule_element: extracts the defined grammar rules
18 get_words: extracts the unique words and creates 'corpus.txt' which is used to build 'G.fst'
20 def __init__(self, model_path, grammar_file_string, target):
27 raise Exception(
"Model path '{}' does not exist".format(self.
model_path))
35 if os.path.exists(grammar_file_string):
36 self.
parser = CFGParser.fromfile(grammar_file_string)
40 self.
parser = CFGParser.fromstring(grammar_file_string)
52 Extracts list with all the unique words, used within the grammar and
53 create file 'corpus.txt' which is used to build 'G.fst'
62 for key, value
in rules.iteritems():
64 options = value.options
65 for option
in options:
67 conjuncts = option.conjuncts
68 for conjunct
in conjuncts:
70 if not conjunct.is_variable:
71 words.add(conjunct.name)
73 words = [word.upper()
for word
in list(words)]
78 with open(corpus_path,
"w")
as f:
84 # TODO: expand the full tree, not only the first words
85 # replace raw_input with the speech recognition output
86 # add an option to skip a word if it is not a match and to check the
89 recognised_sentence = []
91 recognition = raw_input(
"Recognised word: ")
95 initial_list, recognised = self.
check_word(recognition)
100 first_word = [line.pop(0)
for line
in initial_list]
101 recognised_sentence.append(first_word[0])
102 sentence_list = initial_list
104 print(
'Initial filtered list: \n')
107 while len(sentence_list[0]) > 0:
108 next_recognition = raw_input(
"Next recognised word: ")
109 type(next_recognition)
112 new_initial_list, recognised = self.
check_word(next_recognition, sentence_list)
118 next_word = [line.pop(0)
for line
in new_initial_list]
119 recognised_sentence.append(next_word[0])
120 sentence_list = new_initial_list
122 print(
'New filtered list: \n')
125 print(
'Recognised sentence: \n' + str(recognised_sentence))
126 return recognised_sentence
130 Checks if the recognised word is matching with the first element in the expanded sentences
131 As output it keeps a list of only the sentences, starting with the recognised word.
133 :param recognition: the recognised word
134 :param initial_list: bla
135 :return filtered_list: sentence list, filtered by its first word
140 if len(initial_list) == 0:
141 initial_list = self.expand_tree()
144 for sentence
in initial_list:
145 line = [item.name
for item
in sentence]
146 if line[0] == recognition:
147 filtered_list.append(line)
152 for sentence
in initial_list:
153 line = [item
for item
in sentence]
154 if line[0] == recognition:
155 filtered_list.append(line)
158 if len(filtered_list) > 0:
161 print(
'Filtered list: \n')
164 return filtered_list, recognised
168 Prints cleanly the output of the tree traversal functions
170 :param sentence_list: list of possible completions
172 for sentence
in sentence_list:
173 line = [item
for item
in sentence]
174 print(
" ".join(line))
179 Expands the grammar tree based on the words in the grammar rules for the
182 :return: tree of sentence nodes
190 Parses the input sentence to generate the semantics for the pre-set
193 :param sentence: The sentence to be parsed
201 Wrapper around the print_graphviz function to print the current tree
208 A node in a sentence.
209 :ivar edges: Edges to the next node.
210 :ivar done: Reached the end of the sentence.
219 An edge in a sentence.
220 :ivar word: The word to be understood.
221 :ivar node: Node for the remainder of the sentence.
230 Expands the grammar tree based on the words in the grammar rules.
232 :param rules: Extracted rules from the grammar file.
233 :param target: Target rule to expand, default is 'T'.
234 :return: The root of the expanded tree.
244 root_list = [opt.conjuncts[:]
for opt
in rules[target].options]
245 root_node =
assign_node(root_list, available_nodes, work_list, rules)
247 node, expanded_list = work_list.pop()
251 for item
in expanded_list:
252 successors = prefix_dict.get(item[0].name)
255 successors.append(item[1:])
258 prefix_dict[item[0].name] = [item[1:]]
261 for word, successors
in prefix_dict.items():
263 nextnode =
assign_node(successors, available_nodes, work_list, rules)
265 node.edges.append(edge)
272 Expands the grammar rules until elimination of all variables at the first position
274 :param sentence_list: List of grammar rules
275 :param rules: Rules of the grammar
276 :return: Expanded list, an whether an end of an sentence was found.
282 for item
in sentence_list:
290 if item[0].is_variable:
299 for item
in sentence_list:
302 if not item[0].is_variable:
303 expanded_list.append(item)
306 for opt
in rules[item[0].name].options:
307 d = opt.conjuncts + item[1:]
308 expanded_list.append(d)
310 sentence_list = expanded_list
312 return end_found, sentence_list
317 Convert the current rule suffixes to string form.
319 :param expanded_list: List of rule suffixes to convert.
320 :return: Set of suffixes, after converting each to a string.
323 for sentence
in expanded_list:
324 sentence_text =
" ".join(conjunct.name
for conjunct
in sentence)
325 sentence_set.add(sentence_text)
329 def assign_node(sentence_list, available_nodes, work_list, rules):
331 For a given list of rule suffixes, find or add a node, and update the work list if necessary.
333 :param sentence_list: List of rule suffixes to find or add a node for.
334 :type sentence_list: List of rule alternatives (a list of conjuncts, partly expanded to words,
335 in particular, the first conjuct shou d not be a variable).
337 :param available_nodes: Known set of rule sufixes and their associated nodes. May be updated.
338 :type available_nodes: Dict of str to SentenceNode
340 :param work_list: List or rule suffixes that need further processing. May be updated.
341 :type work_list: List of pairs (node, rule suffixes).
343 :param rules: Rules of the grammar.
345 :return: Node associated with the provided sentence_list.
349 sentence_set = frozenset(sentence_set)
350 node = available_nodes.get(sentence_set)
353 node.done = end_found
354 available_nodes[sentence_set] = node
356 non_empty_sentences = []
357 for sentence
in sentence_list:
359 non_empty_sentences.append(sentence)
363 work_list.append((node, non_empty_sentences))
369 Prints Graphviz input of the tree.
371 :param root_node: Root of the tree
374 work_list = [root_node]
376 printed_numbers = set()
379 graphviz_dotfile_string =
"digraph G {\n"
382 node = work_list.pop()
383 number = node_numbers.get(node)
385 node_numbers[node] = next_free_number
386 number = next_free_number
387 next_free_number += 1
389 if number
in printed_numbers:
397 node_text =
"node{}".format(number)
398 printed_numbers.add(number)
399 graphviz_dotfile_string +=
"{} [shape={}];".format(node_text, shape) \
403 for edge
in node.edges:
404 number = node_numbers.get(edge.node)
406 node_numbers[edge.node] = next_free_number
407 number = next_free_number
408 next_free_number += 1
409 dest_text =
"node{}".format(number)
410 work_list.append(edge.node)
411 graphviz_dotfile_string +=
"{} -> {} [label={}];".format(node_text,
412 dest_text, edge.word) +
"\n"
414 graphviz_dotfile_string +=
"}"
417 dotfile_path = os.path.join(outpath,
"grammar_tree.dot")
418 with open(dotfile_path,
'w')
as f:
419 f.write(graphviz_dotfile_string)
422 render(
"dot",
"pdf", dotfile_path)