JASSv2
stem_porter.h
Go to the documentation of this file.
1 /*
2  STEM_PORTER.H
3  -------------
4 */
11 #pragma once
12 
13 #include <stdio.h>
14 
15 #include "stem.h"
16 #include "ascii.h"
17 
18 #define MAX_TERM_LENGTH 1024
19 
20 namespace JASS
21  {
22  /*
23  CLASS STEM_PORTER
24  -----------------
25  */
32  class stem_porter : public stem
33  {
34  private:
35  std::string workspace; // temporary workspace used to reverse the string.
36 
37  private:
38  /*
39  STEM_PORTER::ISVOWELY()
40  -----------------------
41  */
53  bool isvowely(const char *c)
54  {
55  return *c != '\0' && (ascii::isvowel(*c) || (*c == 'y' && !ascii::isvowel(*(c + 1))));
56  }
57 
58  /*
59  STEM_PORTER::CVC()
60  ------------------
61  the stem ends cvc, where the second c is not W, X or Y (e.g. -WIL, -HOP).
62  */
68  bool cvc(const char *what)
69  {
70  return ((strchr("aeiouwxy", *what) == NULL) && (isvowely(what + 1)) && (!isvowely(what + 2))) ? true : false;
71  }
72 
73  /*
74  STEM_PORTER::LENGTH()
75  ---------------------
76  */
82  size_t length(const char *reversed);
83 
84  /*
85  STEM_PORTER::HAS_VOWEL()
86  ------------------------
87  */
93  bool has_vowel(const char *what);
94 
95  public:
96  /*
97  STEM_PORTER::~STEM_PORTER()
98  ---------------------------
99  */
103  virtual ~stem_porter()
104  {
105  /* Nothing */
106  }
107 
108  /*
109  STEM_PORTER::NAME()
110  -------------------
111  */
116  virtual std::string name(void)
117  {
118  return "Porter";
119  }
120 
121  /*
122  STEM_PORTER::TOSTEM()
123  ---------------------
124  */
133  using stem::tostem;
134  virtual size_t tostem(char *destination, const char *source, size_t source_length);
135 
136  /*
137  STEM_PORTER::UNITTEST()
138  -----------------------
139  */
143  static void unittest(void);
144  } ;
145 
146  }
virtual size_t tostem(char *destination, const char *source, size_t source_length)=0
Stem from source into destination.
Baseclass for stemming algorithms.
Definition: stem.h:26
virtual size_t tostem(char *destination, const char *source, size_t source_length)
Stem from source into destination.
Definition: stem_porter.cpp:72
virtual ~stem_porter()
Detructor.
Definition: stem_porter.h:103
bool cvc(const char *what)
Porter&#39;s "*o" rule, the stem ends cvc, where the second c is not W, X or Y (e.g. -WIL, -HOP).
Definition: stem_porter.h:68
static void unittest(void)
Unit test this class.
Definition: stem_porter.cpp:326
static int isvowel(uint8_t c)
Is this character a vowel (i.e. in {aeiouAEIOU})
Definition: ascii.h:260
bool has_vowel(const char *what)
Does the string contain a vowel?
Definition: stem_porter.cpp:56
virtual std::string name(void)
Return the name of the stemming algorithm.
Definition: stem_porter.h:116
Generate the stem of a word using Porter&#39;s algorithm (version 1)
Definition: stem_porter.h:32
size_t length(const char *reversed)
Return Porter&#39;s m in [C](VC)m[V], the length of the stem.
Definition: stem_porter.cpp:19
Definition: compress_integer_elias_delta_simd.c:23
Baseclass for stemmers.
bool isvowely(const char *c)
Is the character at this point a vowel according to Porter&#39;s definision.
Definition: stem_porter.h:53
fast locale-ignoring version of the C runtime library ctype methods for plain 7-bit ASCII...