12 #ifndef MLPACK_CORE_METRICS_BLEU_IMPL_HPP 13 #define MLPACK_CORE_METRICS_BLEU_IMPL_HPP 21 template <
typename ElemType,
typename PrecisionType>
30 template <
typename ElemType,
typename PrecisionType>
31 template <
typename WordVector>
33 const WordVector& segment)
35 std::map<WordVector, size_t> ngramsCount;
36 for (
size_t order = 1; order < maxOrder + 1; ++order)
38 for (
size_t i = 0; i + order < segment.size() + 1; ++i)
40 WordVector seq = WordVector(segment.cbegin() + i,
41 segment.cbegin() + i + order);
48 template <
typename ElemType,
typename PrecisionType>
49 template <
typename ReferenceCorpusType,
typename TranslationCorpusType>
51 const ReferenceCorpusType& referenceCorpus,
52 const TranslationCorpusType& translationCorpus,
57 typedef typename TranslationCorpusType::value_type WordVector;
61 std::vector<size_t> matchesByOrder(maxOrder, 0);
65 std::vector<size_t> possibleMatchesByOrder(maxOrder, 0);
70 referenceLength = 0, translationLength = 0;
72 auto refIt = referenceCorpus.cbegin();
73 auto trIt = translationCorpus.cbegin();
74 for (; refIt != referenceCorpus.cend() && trIt != translationCorpus.cend();
77 size_t min = std::numeric_limits<size_t>::max();
78 for (
const auto& t : *refIt)
86 if (min == std::numeric_limits<size_t>::max())
89 referenceLength += min;
90 translationLength += trIt->size();
95 std::map<WordVector, size_t> mergedRefNGramCounts;
96 for (
const auto& t : *refIt)
99 const std::map<WordVector, size_t> ngrams = GetNGrams(t);
100 for (
auto it = ngrams.cbegin(); it != ngrams.cend(); ++it)
102 mergedRefNGramCounts[it->first] = std::max(it->second,
103 mergedRefNGramCounts[it->first]);
108 const std::map<WordVector, size_t> translationNGramCounts
113 std::map<WordVector, size_t> overlap;
114 for (
auto it = translationNGramCounts.cbegin();
115 it != translationNGramCounts.cend();
118 auto mergedIt = mergedRefNGramCounts.find(it->first);
119 if (mergedIt != mergedRefNGramCounts.end())
124 overlap[it->first] = std::min(mergedIt->second, it->second);
128 for (
auto it = overlap.cbegin(); it != overlap.cend(); ++it)
130 matchesByOrder[it->first.size() - 1] += it->second;
133 for (
size_t order = 1; order < maxOrder + 1; ++order)
135 if (order < trIt->size() + 1)
136 possibleMatchesByOrder[order - 1] += trIt->size() - order + 1;
140 precisions = PrecisionType(maxOrder, 0.0);
144 for (
size_t i = 0; i < maxOrder; ++i)
147 = (matchesByOrder[i] + 1.0) / (possibleMatchesByOrder[i] + 1.0);
152 for (
size_t i = 0; i < maxOrder; ++i)
154 if (possibleMatchesByOrder[i] > 0)
155 precisions[i] = ElemType(matchesByOrder[i]) / possibleMatchesByOrder[i];
161 ElemType minPrecision = std::numeric_limits<ElemType>::max();
162 for (
size_t i = 0; i < maxOrder; ++i)
164 if (minPrecision > precisions[i])
165 minPrecision = precisions[i];
168 ElemType geometricMean;
169 if (minPrecision > 0)
171 ElemType pLogSum = 0.0;
172 for (
const auto& t : precisions)
174 pLogSum += (1.0 / maxOrder) * std::log(t);
176 geometricMean = std::exp(pLogSum);
181 ratio = ElemType(translationLength);
182 if (referenceLength > 0)
183 ratio /= referenceLength;
185 brevityPenalty = (ratio > 1.0) ? 1.0 : std::exp(1.0 - 1.0 / ratio);
186 bleuScore = geometricMean * brevityPenalty;
191 template <
typename ElemType,
typename PrecisionType>
192 template <
typename Archive>
194 const uint32_t version)
196 ar(CEREAL_NVP(maxOrder));
BLEU(const size_t maxOrder=4)
Create an instance of BLEU class.
Definition: bleu_impl.hpp:22
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
void serialize(Archive &ar, const uint32_t version)
Serialize the metric.
Definition: bleu_impl.hpp:193
ElemType Evaluate(const ReferenceCorpusType &referenceCorpus, const TranslationCorpusType &translationCorpus, const bool smooth=false)
Computes the BLEU Score.
Definition: bleu_impl.hpp:50
BLEU, or the Bilingual Evaluation Understudy, is an algorithm for evaluating the quality of text whic...
Definition: bleu.hpp:53