60 mean_document_length(0),
61 length_correction(document_lengths.size())
63 double one_minus_b = 1.0 - b;
66 for (
auto length : document_lengths)
69 mean_document_length =
static_cast<double>(sum) / static_cast<double>(document_lengths.size() - 1);
71 auto correction = &length_correction[0];
72 for (
auto length : document_lengths)
73 *correction++ = k1 * (one_minus_b + b *
static_cast<double>(length) / mean_document_length);
106 idf = log((
double)documents_in_collection / (
double)document_frequency);
143 double tf = term_frequency;
144 return idf * (top_row / (tf + length_correction[document_id]));
157 std::vector<compress_integer::integer> lengths{30, 40, 50, 60, 70};
164 JASS_assert(static_cast<uint32_t>(rsv * 1000) == 1635);
165 puts(
"ranking_function_atire_bm25::PASSED");
~ranking_function_atire_bm25()
Destructor.
Definition: ranking_function_atire_bm25.h:83
uint32_t integer
This class and descendants will work on integers of this size. Do not change without also changing JA...
Definition: compress_integer.h:40
The ATIRE verison of BM25.
Definition: ranking_function_atire_bm25.h:36
std::vector< float > length_correction
most of the bottom row of BM25 (k1 * ((1 - b) + b * length / mean_document_length)) for the current t...
Definition: ranking_function_atire_bm25.h:43
#define JASS_assert(expression)
Drop in replacement for assert() that aborts in Release as well as Debug.
Definition: asserts.h:33
Compression codexes for integer sequences.
Holder class for an impact ordered postings list.
operating system and compiler independant definition of forceinline
forceinline void compute_tf_component(index_postings_impact::impact_type term_frequency)
Compute and store internally the term-frequency based component of the ranking function (useful when ...
Definition: ranking_function_atire_bm25.h:117
double k1_plus_1
k1 + 1
Definition: ranking_function_atire_bm25.h:41
static void unittest(void)
Unit test this class.
Definition: ranking_function_atire_bm25.h:154
double idf
the IDF of the term being processed
Definition: ranking_function_atire_bm25.h:39
forceinline double compute_score(compress_integer::integer document_id, index_postings_impact::impact_type term_frequency)
Compute BM25 from the given document, assuming pieces have already been computed. ...
Definition: ranking_function_atire_bm25.h:132
ranking_function_atire_bm25(double k1, double b, std::vector< compress_integer::integer > &document_lengths)
Constructor.
Definition: ranking_function_atire_bm25.h:56
Definition: document_id.h:16
uint16_t impact_type
An impact value (i.e. a term frequency value) is of this type.
Definition: index_postings_impact.h:41
double mean_document_length
the mean of the document lengths
Definition: ranking_function_atire_bm25.h:42
double top_row
the top-row of the ranking function for the term being processed (tf(td) * (k1 + 1)) ...
Definition: ranking_function_atire_bm25.h:40
Definition: compress_integer_elias_delta_simd.c:23
forceinline void compute_idf_component(compress_integer::integer document_frequency, compress_integer::integer documents_in_collection)
Called once per term (per query). Computes the IDF component of the ranking function and stores it in...
Definition: ranking_function_atire_bm25.h:97