Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • transcribe-educational-videos/preliminary-study-dai2023
1 result
Show changes
Commits on Source (2)
The cli.py is updated to print the hits, substitutions, deletions, and insertions.
Update the cli.py in the <venv>/lib/python3.10/site-packages/jiwer/cli.py
venv/lib/python3.10/site-packages/jiwer/cli.py
\ No newline at end of file
#
# JiWER - Jitsi Word Error Rate
#
# Copyright @ 2018 - present 8x8, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Provide a simple CLI wrapper for JiWER. The CLI does not support custom transforms.
"""
import click
import pathlib
import jiwer
@click.command()
@click.option(
"-r",
"--reference",
"reference_file",
type=pathlib.Path,
required=True,
help="Path to new-line delimited text file of reference sentences.",
)
@click.option(
"-h",
"--hypothesis",
"hypothesis_file",
type=pathlib.Path,
required=True,
help="Path to new-line delimited text file of hypothesis sentences.",
)
@click.option(
"--cer",
"-c",
"compute_cer",
is_flag=True,
default=False,
help="Compute CER instead of WER.",
)
@click.option(
"--align",
"-a",
"show_alignment",
is_flag=True,
default=False,
help="Print alignment of each sentence.",
)
@click.option(
"--global",
"-g",
"global_alignment",
is_flag=True,
default=False,
help="Apply a global minimal alignment between reference and hypothesis sentences "
"before computing the WER.",
)
def cli(
reference_file: pathlib.Path,
hypothesis_file: pathlib.Path,
compute_cer: bool,
show_alignment: bool,
global_alignment: bool,
):
"""
JiWER is a python tool for computing the word-error-rate of ASR systems. To use
this CLI, store the reference and hypothesis sentences in a text file, where
each sentence is delimited by a new-line character.
The text files are expected to have an equal number of lines, unless the `-g` flag
is used. The `-g` flag joins computation of the WER by doing a global minimal
alignment.
"""
with reference_file.open("r") as f:
reference_sentences = [
ln.strip() for ln in f.readlines() if len(ln.strip()) > 1
]
with hypothesis_file.open("r") as f:
hypothesis_sentences = [
ln.strip() for ln in f.readlines() if len(ln.strip()) > 1
]
if not global_alignment and len(reference_sentences) != len(hypothesis_sentences):
raise ValueError(
f"Number of sentences does not match. "
f"{reference_file} contains {len(reference_sentences)} lines."
f"{hypothesis_file} contains {len(hypothesis_sentences)} lines."
)
if global_alignment and compute_cer:
raise ValueError("--global and --cer are mutually exclusive.")
if compute_cer:
out = jiwer.process_characters(
reference_sentences,
hypothesis_sentences,
)
else:
if global_alignment:
out = jiwer.process_words(
reference_sentences,
hypothesis_sentences,
reference_transform=jiwer.wer_contiguous,
hypothesis_transform=jiwer.wer_contiguous,
)
else:
out = jiwer.process_words(reference_sentences, hypothesis_sentences)
if show_alignment:
print(jiwer.visualize_alignment(out, show_measures=True))
else:
if compute_cer:
print(out.cer)
else:
print(out.wer, out.wil, out.wip, out.mer, out.hits, out.substitutions, out.insertions, out.deletions, sep=',')
if __name__ == "__main__":
cli()