speech_recognition
egs
voxforge
local
make_trans.py
Go to the documentation of this file.
1
#!/usr/bin/env python
2
3
# Copyright 2012 Vassil Panayotov
4
# Apache 2.0
5
6
"""
7
Takes a "PROMPTS" file with lines like:
8
1snoke-20120412-hge/mfc/a0405 IT SEEMED THE ORDAINED ORDER OF THINGS THAT DOGS SHOULD WORK
9
10
, an ID prefix and a list of audio file names (e.g. for above example the list will contain "a0405").
11
It checks if the prompts file have transcription for all audio files in the list and
12
if this is the case produces a transcript line for each file in the format:
13
prefix_a0405 IT SEEMED THE ORDAINED ORDER OF THINGS THAT DOGS SHOULD WORK
14
"""
15
16
import
sys
17
18
def
err
(msg):
19
print
>> sys.stderr, msg
20
21
if
len(sys.argv) < 3:
22
err
(
"Usage: %s <prompts-file> <id-prefix> <utt-id1> <utt-id2> ... "
% sys.argv[0])
23
sys.exit(1)
24
25
#err(str(sys.argv))
26
id_prefix = sys.argv[2]
27
utt_ids = sys.argv[3:]
28
utt2trans = dict()
29
unnorm_utt = set()
30
for
l
in
file(sys.argv[1]):
31
u, trans = l.split(
None
, 1)
32
u = u.strip().split(
'/'
)[-1]
33
trans = trans.strip().replace(
"-"
,
" "
)
34
if
not
trans.isupper()
or
\
35
not
trans.strip().replace(
' '
,
''
).replace(
"'"
,
""
).isalpha():
36
err
(
"The transcript for '%s'(user '%s') is not properly normalized - skipped!"
37
% (u, id_prefix))
38
err
(trans)
39
unnorm_utt.add(u)
40
continue
41
utt2trans[u] = trans
42
43
for
uid
in
utt_ids:
44
if
uid
in
unnorm_utt:
45
continue
# avoid double reporting the same problem
46
if
not
uid
in
utt2trans:
47
err
(
"No transcript found for %s_%s"
% (id_prefix, uid))
48
continue
49
print
"%s-%s %s"
% (id_prefix, uid, utt2trans[uid])
50
make_trans.err
def err(msg)
Definition:
make_trans.py:18
Generated on Wed Apr 16 2025 04:36:08 for speech_recognition by
1.8.17