Skip to content

Commit

Permalink
Lexer refactoring.
Browse files Browse the repository at this point in the history
  • Loading branch information
Uralstech committed Dec 24, 2024
1 parent 5488fec commit 68877eb
Showing 1 changed file with 35 additions and 34 deletions.
69 changes: 35 additions & 34 deletions src/Syntax/Lexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public class Lexer
/// The <see cref="Position"/> of the current lexing iteration in the script.
/// </summary>
private Position _position;

/// <summary>
/// The character in the <see cref="Position"/> of the current lexing iteration in the script.
/// </summary>
Expand Down Expand Up @@ -92,21 +92,16 @@ private void ReverseTo(int index)
{
switch (_currentChar)
{
case '\r':
case '\t':
case ' ':
case '\r' or '\t' or ' ':
Advance();
break;
case ';':
case '\n':
case ';' or '\n':
tokens.Add(CompileNewLines());
break;
case '@':
SkipComment();
break;
case '"':
case '`':
case '\'':
case '"' or '`' or '\'':
tokens.Add(CompileStringLike(out EzrSyntaxError? error));
if (error is not null)
return error;
Expand Down Expand Up @@ -236,8 +231,7 @@ private void ReverseTo(int index)
tokens.Add(new Token(TokenType.Tilde, TokenTypeGroup.Symbol, string.Empty, _position));
Advance();
break;
case '#':
case '_':
case '#' or '_':
case char current when char.IsLetter(current):
tokens.Add(CompileIdentifier(out error));
if (error is not null)
Expand Down Expand Up @@ -275,7 +269,7 @@ private void SkipComment()
private Token CompileNumber()
{
Position startPosition = _position;
bool hasPeriod = false;
bool isFloat = false;

while (!_reachedEnd && (char.IsDigit(_currentChar) || _currentChar == '.'))
{
Expand All @@ -284,17 +278,17 @@ private Token CompileNumber()
int peekIndex = _position.Index + 1;
char next = peekIndex < _script.Length ? _script[peekIndex] : '\0';

if (hasPeriod || !char.IsDigit(next))
if (isFloat || !char.IsDigit(next))
break;

hasPeriod = true;
isFloat = true;
}

Advance();
}

return new Token(
hasPeriod ? TokenType.FloatingPoint : TokenType.Integer,
isFloat ? TokenType.FloatingPoint : TokenType.Integer,
TokenTypeGroup.Value,
_script[startPosition.Index.._position.Index],
startPosition,
Expand Down Expand Up @@ -349,15 +343,15 @@ private Token CompileStringLike(out EzrSyntaxError? error)
{
if (_currentChar == '\\')
{
ProcessEscapeSequence(toReturn, ref error);
ProcessEscapeSequence(toReturn, out error);
if (error is not null)
return Token.Empty;

continue;
}
else
{
toReturn.Append(_currentChar);
Advance();
}

toReturn.Append(_currentChar);
Advance();
}

if (_currentChar != enclosingChar)
Expand Down Expand Up @@ -395,18 +389,19 @@ private Token CompileStringLike(out EzrSyntaxError? error)
/// </summary>
/// <param name="builder">The <see cref="StringBuilder"/> to append the special character to.</param>
/// <param name="error">Any <see cref="EzrSyntaxError"/> that occurred in the process; <see langword="null"/> if none occurred.</param>
private void ProcessEscapeSequence(StringBuilder builder, ref EzrSyntaxError? error)
private void ProcessEscapeSequence(StringBuilder builder, out EzrSyntaxError? error)
{
error = null;
Position startPosition = _position;
Advance();

switch (_currentChar)
{
case 'u':
builder.Append(ProcessUtf16Sequence(ref error));
builder.Append(ProcessUtf16Sequence(out error));
break;
case 'U':
builder.Append(ProcessUtf32Sequence(ref error));
builder.Append(ProcessUtf32Sequence(out error));
break;
case 'n':
builder.Append('\n');
Expand Down Expand Up @@ -436,10 +431,7 @@ private void ProcessEscapeSequence(StringBuilder builder, ref EzrSyntaxError? er
builder.Append('\v');
Advance();
break;
case '"':
case '\'':
case '`':
case '\\':
case '"' or '\'' or '`' or '\\':
builder.Append(_currentChar);
Advance();
break;
Expand All @@ -454,12 +446,15 @@ private void ProcessEscapeSequence(StringBuilder builder, ref EzrSyntaxError? er
/// </summary>
/// <param name="error">Any <see cref="EzrSyntaxError"/> that occurred in the process; <see langword="null"/> if none occurred.</param>
/// <returns>The UTF-16 character.</returns>
private char[] ProcessUtf16Sequence(ref EzrSyntaxError? error)
private char[] ProcessUtf16Sequence(out EzrSyntaxError? error)
{
const int Utf16SequenceLength = 4;

error = null;
Advance();

Position startPosition = _position;
for (int i = 0; i < 4; i++)
for (int i = 0; i < Utf16SequenceLength; i++)
{
if (_currentChar is (not >= 'a' or not <= 'f') and (not >= 'A' or not <= 'F') and (not >= '0' or not <= '9'))
{
Expand All @@ -470,20 +465,26 @@ private char[] ProcessUtf16Sequence(ref EzrSyntaxError? error)
Advance();
}

return Encoding.Unicode.GetChars([Convert.ToByte(_script[(startPosition.Index + 2).._position.Index], 16), Convert.ToByte(_script[startPosition.Index..(_position.Index - 2)], 16)]);
string upper = _script[(startPosition.Index + 2).._position.Index];
string lower = _script[startPosition.Index..(_position.Index - 2)];

return Encoding.Unicode.GetChars([Convert.ToByte(upper, 16), Convert.ToByte(lower, 16)]);
}

/// <summary>
/// Processes a UTF-32 escaped sequence in a stringlike.
/// </summary>
/// <param name="error">Any <see cref="EzrSyntaxError"/> that occurred in the process; <see langword="null"/> if none occurred.</param>
/// <returns>The UTF-32 character.</returns>
private string ProcessUtf32Sequence(ref EzrSyntaxError? error)
private string ProcessUtf32Sequence(out EzrSyntaxError? error)
{
const int Utf32SequenceLength = 4;

error = null;
Advance();

Position startPosition = _position;
for (int i = 0; i < 6; i++)
for (int i = 0; i < Utf32SequenceLength; i++)
{
if (_currentChar is (not >= 'a' or not <= 'f') and (not >= 'A' or not <= 'F') and (not >= '0' or not <= '9'))
{
Expand Down Expand Up @@ -569,7 +570,7 @@ private Token CompileIdentifier(out EzrSyntaxError? error)
Advance();
if (!char.IsLetterOrDigit(_currentChar) && _currentChar != '_')
{
error = new EzrSyntaxError(EzrSyntaxError.UnexpectedCharacter, "The hash symbol should only be used before identifiers to escape keyword detection.", startPosition, _position);
error = new EzrSyntaxError(EzrSyntaxError.UnexpectedCharacter, "The hash (#) symbol should only be used before identifiers to escape keyword detection.", startPosition, _position);
return Token.Empty;
}
}
Expand Down

0 comments on commit 68877eb

Please sign in to comment.