-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTokenizer.h
58 lines (45 loc) · 1.9 KB
/
Tokenizer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#pragma once
#include <string>
#include <vector>
#include <optional>
#include "Token.h"
class Tokenizer {
public:
explicit Tokenizer(const std::string_view input)
: input(input)
{ }
struct Options {
Options() {}
/* which character to count until `until` is satisfied
* for example: for a subtokenizer tokenizing the inside of `$( ... )`,
* Options#until should equal ')' and Options#countToUntil should be '('.
* so that $( (cmd; (cmd)) ) is tokenized properly
*/
std::optional<char> countToUntil; // TODO: counting parens is incorrect because the case statement
std::optional<char> until; // has unmatched parens. This will work fine until case is implemented
// It also breaks in the case of `x=$(echo ${a/)/})`
/* should the tokenizer handle comments? it's useful to have this option when
* subtokenizing ${}, when # means string length and not a comment
*/
bool handleComments = true;
/* in subtokenizing we don't care about the tokenized output (it's swallowed as one
* whole string), so make it possible to disable delimiting then to conserve time
*/
bool delimit = true;
};
struct SyntaxError {
SyntaxError(const std::string &explanation)
: explanation(explanation)
{}
std::string explanation;
};
std::vector<Token> tokenize(const Options &opt = Options());
size_t consumedChars();
Tokenizer &dontThrowOnIncompleteInput() { throwOnIncompleteInput = false; return *this; }
private:
std::string_view input;
size_t input_i = 0;
// set to none when tokenizing input on <tab> presses
bool throwOnIncompleteInput = true;
void delimit(std::vector<Token> &output, std::string ¤t_token, Token::Type token_type, int position);
};