-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLexer.rb
135 lines (120 loc) · 3.75 KB
/
Lexer.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Our lexer will be used like so: `Lexer.new.tokenize("line")`,
# and will return an array of tokens (a token being a tuple of `[TOKEN_TYPE, TOKEN_VALUE]`).
# This lexer is for single lines of SimPL as provided in CS 401 Ruby assignment
require_relative 'Token'
class Lexer
KEYWORDS = ["if","then","else","end","while","do","and","true","false"]
OPS = ['<=','<','=','(',')']
VOPS = %w[ + - * / ]
attr_reader :tokens
def initialize
@tokens = Array.new
@position = 0
end
def tokenize_line(line)
# Advance one character at the time until you find something to parse.
# We'll use regular expressions to scan from the current position (`i`)
# up to the end of the line.
i = 0 # Current character position
while i < line.size
chunk = line[i..-1] # [[i..-1] means from i to and including last character
# Each of the following `if/elsif`s will test the current line chunk with
# a regular expression. The order is important as we want to match `if`
# as a keyword, and not a method name, we'll need to apply it first.
#
# First, we'll scan for names: method names and variable names, which we'll call identifiers.
# Also scanning for special reserved keywords such as `if`, `def`
# and `true`.
if (id = chunk[/\A([A-Za-z]+)/, -1])
if KEYWORDS.include?(id) # keywords will generate [:IF, "if"]
@tokens << [Token.new(:IDENTIFIER, id)]
i += id.size #skip what we just parsed
else
@tokens << [Token.new(:VAR, id)]
i += id.size # skip what we just parsed
end
# Now scanning for numbers
elsif (number = chunk[/\A([0-9]+)/, -1])
@tokens << [Token.new(:NUMBERS, number)]
i += number.size
# Now scanning for variables, using letters.
elsif (stop = chunk[/^;/])
@tokens << [Token.new(:END, stop)]
i += stop.size
#checking for empty spaces
elsif (space = chunk[/\A(\s+)/, 1])
i += space.size
# catches and advances past comments
elsif (comment = chunk[/^[\/]+.*/])
i += comment.size
# checks for equals
elsif (assign = chunk[/^:=/])
@tokens << [Token.new(:ASSIGN, assign)]
i += assign.size
elsif (nline = chunk[/^[\n]+/])
@tokens << [Token.new(:NLINE, nline )]
i += assign.size
elsif (ops = chunk[/\A(\*|\/|\+|-|:=|\(|\)|<=|<|=)/])
if VOPS.include?(ops)
@tokens << [Token.new(:VOPS, ops)]
i += 1
elsif OPS.include?(ops)
@tokens << [Token.new(:OPS, ops)]
i += 1
end
else
i += 1
end
end
end
def tokenize(filename)
# stop = ';'
File.foreach(filename).with_index do |line, line_num|
tokenize_line(line)
@tokens << [Token.new(:NLINE, 'newline')]
thing = tokens[-1]
#if thing[0].value == stop
# puts 'ya'
# end
end
@tokens << [Token.new(:EOF, 'eof')] #end of file character
end
# returns text for next sequential token
def get_token_text
thing = @tokens[@position]
return thing[0].text
end
# returns type of the token.
def get_token_label
thing = @tokens[@position]
return thing[0].label
end
def get_last_value
thing = @tokens[-1]
return thing[0].text
end
# consumes the current token.
def get_next
# return @tokens.delete_at 0
begin
if @position < @tokens.size
@position +=1
return @tokens[@position-1]
end
rescue
return false
end
end
# checks to see if there is a next token
def has_next
@position < @tokens.size
end
# push position of the pointer to current token back one
def push_back
@position -= 1
end
# set position counter ro 0
def start_over
@position =0
end
end