Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#
# JiWER - Jitsi Word Error Rate
#
# Copyright @ 2018 - present 8x8, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Provide a simple CLI wrapper for JiWER. The CLI does not support custom transforms.
"""
import click
import pathlib
import jiwer
@click.command()
@click.option(
"-r",
"--reference",
"reference_file",
type=pathlib.Path,
required=True,
help="Path to new-line delimited text file of reference sentences.",
)
@click.option(
"-h",
"--hypothesis",
"hypothesis_file",
type=pathlib.Path,
required=True,
help="Path to new-line delimited text file of hypothesis sentences.",
)
@click.option(
"--cer",
"-c",
"compute_cer",
is_flag=True,
default=False,
help="Compute CER instead of WER.",
)
@click.option(
"--align",
"-a",
"show_alignment",
is_flag=True,
default=False,
help="Print alignment of each sentence.",
)
@click.option(
"--global",
"-g",
"global_alignment",
is_flag=True,
default=False,
help="Apply a global minimal alignment between reference and hypothesis sentences "
"before computing the WER.",
)
def cli(
reference_file: pathlib.Path,
hypothesis_file: pathlib.Path,
compute_cer: bool,
show_alignment: bool,
global_alignment: bool,
):
"""
JiWER is a python tool for computing the word-error-rate of ASR systems. To use
this CLI, store the reference and hypothesis sentences in a text file, where
each sentence is delimited by a new-line character.
The text files are expected to have an equal number of lines, unless the `-g` flag
is used. The `-g` flag joins computation of the WER by doing a global minimal
alignment.
"""
with reference_file.open("r") as f:
reference_sentences = [
ln.strip() for ln in f.readlines() if len(ln.strip()) > 1
]
with hypothesis_file.open("r") as f:
hypothesis_sentences = [
ln.strip() for ln in f.readlines() if len(ln.strip()) > 1
]
if not global_alignment and len(reference_sentences) != len(hypothesis_sentences):
raise ValueError(
f"Number of sentences does not match. "
f"{reference_file} contains {len(reference_sentences)} lines."
f"{hypothesis_file} contains {len(hypothesis_sentences)} lines."
)
if global_alignment and compute_cer:
raise ValueError("--global and --cer are mutually exclusive.")
if compute_cer:
out = jiwer.process_characters(
reference_sentences,
hypothesis_sentences,
)
else:
if global_alignment:
out = jiwer.process_words(
reference_sentences,
hypothesis_sentences,
reference_transform=jiwer.wer_contiguous,
hypothesis_transform=jiwer.wer_contiguous,
)
else:
out = jiwer.process_words(reference_sentences, hypothesis_sentences)
if show_alignment:
print(jiwer.visualize_alignment(out, show_measures=True))
else:
if compute_cer:
print(out.cer)
else:
print(out.wer, out.wil, out.wip, out.mer, out.hits, out.substitutions, out.insertions, out.deletions, sep=',')
if __name__ == "__main__":
cli()