defcalculate_wer(reference, hypothesis):
ref_words = reference.split()
hyp_words = hypothesis.split()# Counting the number of substitutions, deletions, and insertions
substitutions =sum(1for ref, hyp inzip(ref_words, hyp_words)if ref != hyp)
deletions =len(ref_words)-len(hyp_words)
insertions =len(hyp_words)-len(ref_words)# Total number of words in the reference text
total_words =len(ref_words)# Calculating the Word Error Rate (WER)
wer =(substitutions + deletions + insertions)/ total_words
return wer
if __name__ =="__main__":
reference ="the cat sat on the mat"
hypothesis ="the cat mat"print(calculate_wer(reference, hypothesis))
wer_accurate.py
import numpy as np
defcalculate_wer(reference, hypothesis):# Split the reference and hypothesis sentences into words
ref_words = reference.split()
hyp_words = hypothesis.split()# Initialize a matrix with size |ref_words|+1 x |hyp_words|+1# The extra row and column are for the case when one of the strings is empty
d = np.zeros((len(ref_words)+1,len(hyp_words)+1))# The number of operations for an empty hypothesis to become the reference# is just the number of words in the reference (i.e., deleting all words)for i inrange(len(ref_words)+1):
d[i,0]= i
# The number of operations for an empty reference to become the hypothesis# is just the number of words in the hypothesis (i.e., inserting all words)for j inrange(len(hyp_words)+1):
d[0, j]= j
# Iterate over the words in the reference and hypothesisfor i inrange(1,len(ref_words)+1):for j inrange(1,len(hyp_words)+1):# If the current words are the same, no operation is needed# So we just take the previous minimum number of operationsif ref_words[i -1]== hyp_words[j -1]:
d[i, j]= d[i -1, j -1]else:# If the words are different, we consider three operations:# substitution, insertion, and deletion# And we take the minimum of these three possibilities
substitution = d[i -1, j -1]+1
insertion = d[i, j -1]+1
deletion = d[i -1, j]+1
d[i, j]=min(substitution, insertion, deletion)# The minimum number of operations to transform the hypothesis into the reference# is in the bottom-right cell of the matrix# We divide this by the number of words in the reference to get the WER
wer = d[len(ref_words),len(hyp_words)]/len(ref_words)return wer
if __name__ =="__main__":
reference ="The cat is sleeping on the mat."
hypothesis ="The cat is playing on mat."print(calculate_wer(reference, hypothesis))
wer_jiwer.py
from jiwer import wer
if __name__ =="__main__":# reference = "the cat sat on the mat"# hypothesis = "the cat mat"
reference ="The cat is sleeping on the mat."
hypothesis ="The cat is playing on mat."print(wer(reference, hypothesis))
wer_evaluate.py
import evaluate
wer = evaluate.load("wer")# reference = "the cat sat on the mat"# hypothesis = "the cat mat"
reference ="The cat is sleeping on the mat."
hypothesis ="The cat is playing on mat."print(wer.compute(references=[reference], predictions=[hypothesis]))