Pakman
utils.cc
1 #include <string>
2 #include <vector>
3 #include <sstream>
4 #include <stdexcept>
5 #include <string.h>
6 #include <map>
7 #include <assert.h>
8 
9 #include "utils.h"
10 
11 bool is_whitespace(const char letter)
12 {
13  return (letter == ' ')
14  || (letter == '\t')
15  || (letter == '\n');
16 }
17 
18 std::vector<std::string> parse_command_tokens(const std::string& raw_command)
19 {
20  // Define states of finite state machine
21  enum state_t { start, unquoted, quote_next_letter, singly_quoted,
22  doubly_quoted };
23 
24  // Initialize
25  state_t state = start;
26  std::vector<std::string> cmd_tokens;
27  std::stringstream token_strm;
28 
29  // Iterate over raw_command string
30  for (auto it = raw_command.cbegin(); it != raw_command.cend(); it++)
31  {
32  // Get current letter
33  char letter = *it;
34 
35  switch (state)
36  {
37 
38  // Starting state
39  case start:
40 
41  // If whitespace, stay in starting state
42  while (is_whitespace(letter))
43  {
44  it++;
45  if (it == raw_command.cend()) goto endloop;
46  letter = *it;
47  }
48 
49  // If backslash, transition to quote_next_letter
50  if (letter == '\\')
51  {
52  state = quote_next_letter;
53  }
54 
55  // If single quote, transition to singly_quoted
56  else if (letter == '\'')
57  {
58  state = singly_quoted;
59  }
60 
61  // If double quote, transition to doubly_quoted
62  else if (letter == '\"')
63  {
64  state = doubly_quoted;
65  }
66 
67  // If normal character, add character
68  // and transition to unquoted
69  else
70  {
71  token_strm << letter;
72  state = unquoted;
73  }
74 
75  break;
76 
77  // Quote next letter state
78  case quote_next_letter:
79 
80  // Add next character literally
81  token_strm << letter;
82 
83  // Transition to unquoted
84  state = unquoted;
85 
86  break;
87 
88  // Singly quoted state
89  case singly_quoted:
90 
91  // If any other character than single quote,
92  // add character and stay in singly quoted state
93  while (letter != '\'')
94  {
95  token_strm << letter;
96  it++;
97  if (it == raw_command.cend()) goto endloop;
98  letter = *it;
99  }
100 
101  // If single quote, transition to unquoted
102  state = unquoted;
103 
104  break;
105 
106  // Double quoted state
107  case doubly_quoted:
108 
109  // If any other character than double quote,
110  // add character and stay in doubly quoted state
111  while (letter != '\"')
112  {
113  token_strm << letter;
114  it++;
115  if (it == raw_command.cend()) goto endloop;
116  letter = *it;
117  }
118 
119  // If double quote, transition to unquoted
120  state = unquoted;
121 
122  break;
123 
124  // Unquoted state
125  case unquoted:
126 
127  // If normal character, add and stay in
128  // unquoted state
129  while ( !is_whitespace(letter) &&
130  (letter != '\\') &&
131  (letter != '\'') &&
132  (letter != '\"') )
133  {
134  token_strm << letter;
135  it++;
136  if (it == raw_command.cend()) goto endloop;
137  letter = *it;
138  }
139 
140  // If whitespace, push token and
141  // transition to starting state
142  if (is_whitespace(letter))
143  {
144  cmd_tokens.push_back(token_strm.str());
145  token_strm.str("");
146  state = start;
147  }
148 
149  // If backslash, transition
150  // to quote_next_letter state
151  else if (letter == '\\')
152  state = quote_next_letter;
153 
154  // If single quote, transition
155  // to singly quoted state
156  else if (letter == '\'')
157  state = singly_quoted;
158 
159  // If double quote, transition
160  // to doubly quoted state
161  else if (letter == '\"')
162  state = doubly_quoted;
163 
164  break;
165  }
166  }
167 
168 endloop:
169 
170  // Check for unfinished quotations
171  if ( (state == singly_quoted) || (state == doubly_quoted) ||
172  (state == quote_next_letter))
173  {
174  std::string error_msg;
175  error_msg += "Encountered unfinished quotations "
176  "while parsing command: ";
177  error_msg += raw_command;
178  throw std::runtime_error(error_msg);
179  }
180 
181  // Push back last token
182  if (state == unquoted)
183  {
184  cmd_tokens.push_back(token_strm.str());
185  }
186 
187  return cmd_tokens;
188 }
189 
190 std::vector<std::string> parse_tokens(const std::string& str,
191  const std::string& delimiters)
192 {
193  std::vector<std::string> str_vector;
194 
195  char *c_str = strdup(str.c_str());
196  char *pch = strtok(c_str, delimiters.c_str());
197 
198  while (pch != nullptr)
199  {
200  str_vector.push_back(pch);
201  pch = strtok(nullptr, delimiters.c_str());
202  }
203 
204  free(c_str);
205 
206  return str_vector;
207 }
208 
209 std::map<std::string, std::string> parse_key_value_pairs(const std::string& str)
210 {
211  // Define states of finite state machine
212  enum state_t { read_key, read_value, quote_next_key_letter,
213  quote_next_value_letter };
214 
215  // Initialize
216  state_t state = read_key;
217  std::map<std::string, std::string> dict;
218  std::stringstream key_strm, value_strm;
219 
220  // Iterate over str
221  for (auto it = str.cbegin(); it != str.cend(); it++)
222  {
223  // Get current letter
224  char letter = *it;
225 
226  switch (state)
227  {
228 
229  // Read key state
230  case read_key:
231 
232  // If backslash, transition to quote_next_key_letter
233  if (letter == '\\')
234  {
235  state = quote_next_key_letter;
236  }
237 
238  // If equal sign, transition to read_value
239  else if (letter == '=')
240  {
241  state = read_value;
242  }
243 
244  // If normal character, push character to key_strm and remain
245  // in read_key
246  else
247  {
248  key_strm << letter;
249  }
250 
251  break;
252 
253  // Read value state
254  case read_value:
255 
256  // If backslash, transition to quote_next_value_letter
257  if (letter == '\\')
258  {
259  state = quote_next_value_letter;
260  }
261 
262  // If semicolon sign, push key-value pair, clear key_strm and
263  // value_strm, and transition to read_key
264  else if (letter == ';')
265  {
266  dict[key_strm.str()] = value_strm.str();
267 
268  key_strm.str("");
269  key_strm.clear();
270  value_strm.str("");
271  value_strm.clear();
272 
273  state = read_key;
274  }
275 
276  // If normal character, push character to value_strm and remain
277  // in read_value
278  else
279  {
280  value_strm << letter;
281  }
282 
283  break;
284 
285  // Quote next key letter state
286  case quote_next_key_letter:
287 
288  // Add next character literally
289  key_strm << letter;
290 
291  // Transition to read key state
292  state = read_key;
293 
294  break;
295 
296  // Quote next value letter state
297  case quote_next_value_letter:
298 
299  // Add next character literally
300  value_strm << letter;
301 
302  // Transition to read value state
303  state = read_value;
304 
305  break;
306 
307  }
308  }
309 
310  // Check for unfinished quotations
311  if (state == quote_next_value_letter)
312  {
313  std::string error_msg;
314  error_msg += "Encountered unfinished quotations "
315  "while parsing key-value pairs: ";
316  error_msg += str;
317  throw std::runtime_error(error_msg);
318  }
319 
320  // Check for incomplete key-value pairs
321  if ( (state == read_key) || (state == quote_next_key_letter) )
322  {
323  std::string error_msg;
324  error_msg += "Encountered incomplete key-value pairs "
325  "while parsing key-value pairs: ";
326  error_msg += str;
327  throw std::runtime_error(error_msg);
328  }
329 
330  // Sanity check: final state should be read_value
331  assert(state == read_value);
332 
333  // Push last key-value pair
334  dict[key_strm.str()] = value_strm.str();
335 
336  return dict;
337 }
std::map< std::string, std::string > parse_key_value_pairs(const std::string &str)
Definition: utils.cc:209
void read_key(const LineString &key, std::istream &in)
bool is_whitespace(const char letter)
Definition: utils.cc:11
std::vector< std::string > parse_tokens(const std::string &str, const std::string &delimiters=" ")
Definition: utils.cc:190
std::vector< std::string > parse_command_tokens(const std::string &raw_command)
Definition: utils.cc:18
std::string read_value(std::istream &in)