0% found this document useful (0 votes)
15 views8 pages

Lexer

Uploaded by

dizzydile
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
15 views8 pages

Lexer

Uploaded by

dizzydile
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 8

-- MIT License

--
-- Copyright (c) 2018 LoganDark
--
-- Permission is hereby granted, free of charge, to any person obtaining a copy
-- of this software and associated documentation files (the "Software"), to deal
-- in the Software without restriction, including without limitation the rights
-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-- copies of the Software, and to permit persons to whom the Software is
-- furnished to do so, subject to the following conditions:
--
-- The above copyright notice and this permission notice shall be included in all
-- copies or substantial portions of the Software.
--
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-- SOFTWARE.

local function lookupify(src, list)


list = list or {}

if type(src) == 'string' then


for i = 1, src:len() do
list[src:sub(i, i)] = true
end
elseif type(src) == 'table' then
for i = 1, #src do
list[src[i]] = true
end
end

return list
end

local base_ident = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'


local base_digits = '0123456789'
local base_operators = '+-*/^%#'

local chars = {
whitespace = lookupify(' \n\t\r'),
validEscapes = lookupify('abfnrtv"\'\\'),
ident = lookupify(
base_ident .. base_digits,
{
start = lookupify(base_ident),
}
),

digits = lookupify(
base_digits,
{
hex = lookupify(base_digits .. 'abcdefABCDEF')
}
),
symbols = lookupify(
base_operators .. ',{}[]();.:', {
equality = lookupify('~=><'),
operators = lookupify(base_operators)
}
)
}

local keywords = {
structure = lookupify({
'and', 'break', 'do', 'else', 'elseif', 'end', 'for', 'function',
'goto', 'if', 'in', 'local', 'not', 'or', 'repeat', 'return', 'then',
'until', 'while'
}),

values = lookupify({
'true', 'false', 'nil'
})
}

return function(text)
local pos = 1
local start = 1
local buffer = {}
local lines = {}

local function look(delta)


delta = pos + (delta or 0)

return text:sub(delta, delta)


end

local function get()


pos = pos + 1

return look(-1)
end

local function getDataLevel()


local num = 0

while look(num) == '=' do


num = num + 1
end

if look(num) == '[' then


pos = pos + num + 1

return num
end
end

local function getCurrentTokenText()


return text:sub(start, pos - 1)
end

local currentLineLength = 0
local lineoffset = 0
local function pushToken(type, text)
text = text or getCurrentTokenText()

local tk = buffer[#buffer]

if not tk or tk.type ~= type then


tk = {
type = type,
data = text,
posFirst = start - lineoffset,
posLast = pos - 1 - lineoffset
}

if tk.data ~= '' then


buffer[#buffer + 1] = tk
end
else
tk.data = tk.data .. text
tk.posLast = tk.posLast + text:len()
end

currentLineLength = currentLineLength + text:len()


start = pos

return tk
end

local function newline()


lines[#lines + 1] = buffer
buffer = {}

get()
pushToken('newline')
buffer[1] = nil

lineoffset = lineoffset + currentLineLength


currentLineLength = 0
end

local function getData(level, type)


while true do
local char = get()

if char == '' then


return
elseif char == '\n' then
pos = pos - 1
pushToken(type)
newline()
elseif char == ']' then
local valid = true

for i = 1, level do
if look() == '=' then
pos = pos + 1
else
valid = false
break
end
end

if valid and look() == ']' then


pos = pos - level - 1

return
end
end
end
end

local function chompWhitespace()


while true do
local char = look()

if char == '\n' then


pushToken('whitespace')
newline()
elseif chars.whitespace[char] then
pos = pos + 1
else
break
end
end

pushToken('whitespace')
end

while true do
chompWhitespace()

local char = get()

if char == '' then


break
elseif char == '-' and look() == '-' then
pos = pos + 1

if look() == '[' then


pos = pos + 1

local level = getDataLevel()

if level then
getData(level, 'comment')

pos = pos + level + 2


pushToken('comment')
else
while true do
local char2 = get()

if char2 == '' or char2 == '\n' then


pos = pos - 1
pushToken('comment')

if char2 == '\n' then


newline()
end
break
end
end
end
else
while true do
local char2 = get()

if char2 == '' or char2 == '\n' then


pos = pos - 1
pushToken('comment')

if char2 == '\n' then


newline()
end

break
end
end
end

pushToken('comment')
elseif char == '\'' or char == '"' then
pushToken('string_start')

while true do
local char2 = get()

if char2 == '\\' then


pos = pos - 1
pushToken('string')
get()

local char3 = get()

if chars.digits[char3] then
for i = 1, 2 do
if chars.digits[look()] then
pos = pos + 1
end
end
elseif char3 == 'x' then
if chars.digits.hex[look()] and
chars.digits.hex[look(1)] then
pos = pos + 2
else
pushToken('unidentified')
end
elseif char3 == '\n' then
pos = pos - 1
pushToken('escape')
newline()
elseif not chars.validEscapes[char3] then
pushToken('unidentified')
end

pushToken('escape')
elseif char2 == '\n' then
pos = pos - 1
pushToken('string')
newline()

break
elseif char2 == char or char2 == '' then
pos = pos - 1
pushToken('string')
get()

break
end
end

pushToken('string_end')
elseif chars.ident.start[char] then
while chars.ident[look()] do
pos = pos + 1
end

local word = getCurrentTokenText()

if keywords.structure[word] then
pushToken('keyword')
elseif keywords.values[word] then
pushToken('value')
else
pushToken('ident')
end
elseif chars.digits[char] or (char == '.' and chars.digits[look()])
then
if char == '0' and look() == 'x' then
pos = pos + 1

while chars.digits.hex[look()] do
pos = pos + 1
end
else
while chars.digits[look()] do
pos = pos + 1
end

if look() == '.' then


pos = pos + 1

while chars.digits[look()] do
pos = pos + 1
end
end

if look():lower() == 'e' then


pos = pos + 1

if look() == '-' then


pos = pos + 1
end

while chars.digits[look()] do
pos = pos + 1
end
end
end

pushToken('number')
elseif char == '[' then
local level = getDataLevel()

if level then
pushToken('string_start')

getData(level, 'string')
pushToken('string')

pos = pos + level + 2


pushToken('string_end')
else
pushToken('symbol')
end
elseif char == '.' then
if look() == '.' then
pos = pos + 1

if look() == '.' then


pos = pos + 1
end
end

if getCurrentTokenText():len() == 3 then
pushToken('vararg')
else
pushToken('symbol')
end
elseif char == ':' and look() == ':' then
get()

pushToken('label_start')

chompWhitespace()

if chars.ident.start[look()] then
get()

while chars.ident[look()] do
get()
end

pushToken('label')

chompWhitespace()

if look() == ':' and look(1) == ':' then


get()
get()

pushToken('label_end')
end
end
elseif chars.symbols.equality[char] then
if look() == '=' then
pos = pos + 1
end

pushToken('operator')
elseif chars.symbols[char] then
if chars.symbols.operators[char] then
pushToken('operator')
else
pushToken('symbol')
end
else
pushToken('unidentified')
end
end

lines[#lines + 1] = buffer

return lines
end

You might also like