diff --git a/include/globals.h b/include/globals.h index 949ae38..c8f2fa1 100644 --- a/include/globals.h +++ b/include/globals.h @@ -1,30 +1,35 @@ #pragma once +#include typedef enum { LBRACK, RBRACK, LCURLY, RCURLY, + LPAREN, + RPAREN, - ID, SEMI, COMMA, ARROW, STAR, + ANDREF, + DOLLAR, + COMMENT, + ID, NUM, + STRING_LITERAL, RETURN, IF, ELSE, - - STRING_LITERAL, - + ERROR } TokenType; typedef struct { TokenType type; char *data; - int line; + uint32_t line; } Token; diff --git a/include/lex.h b/include/lex.h index e69de29..db36da2 100644 --- a/include/lex.h +++ b/include/lex.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include + +#include "globals.h" + +#define LEX_BUF_SIZE 4096 + +typedef struct Lexer { + uint32_t linepos; + uint32_t lineno; + + FILE *source; + + int flag_EOF; + + char *buffer; + char *curr; + + size_t bytes_in_buffer; +} Lexer; + +Lexer *new_lexer(); + +void lexer_set_source(Lexer *lexer, FILE *source); + +void lexer_fill_buffer(Lexer *lexer); + +void lexer_consume(Lexer *lexer); + +char lexer_curr(Lexer *lexer); + +char lexer_peek(Lexer *lexer); + +Token lexer_next_token(Lexer *lexer); + +void free_lexer(Lexer *lexer); diff --git a/src/lex.c b/src/lex.c index e69de29..bf291da 100644 --- a/src/lex.c +++ b/src/lex.c @@ -0,0 +1,78 @@ +#include "lex.h" + +Lexer *new_lexer() { + Lexer *lexer = malloc(sizeof(Lexer)); + + lexer->linepos = 0; + lexer->lineno = 0; + + lexer->source = NULL; + + lexer->flag_EOF = 0; + + lexer->buffer = calloc(LEX_BUF_SIZE, sizeof(char)); + lexer->curr = lexer->buffer; + lexer->buffer[0] = '\0'; +} + +void lexer_set_source(Lexer *lexer, FILE *source) { + lexer->lineno = 0; + lexer->linepos = 0; + + lexer->source = source; +} + +void lexer_fill_buffer(Lexer *lexer) { + if (lexer->flag_EOF) { + return; + } + size_t bytes_read; + if (*lexer->curr == 0) { + bytes_read = fread(lexer->buffer, sizeof(char), LEX_BUF_SIZE, lexer->source); + if (bytes_read < LEX_BUF_SIZE) { + lexer->flag_EOF = 1; + lexer->buffer[bytes_read] = '\0'; + } + lexer->bytes_in_buffer = bytes_read; + } else { + char tmp = lexer->buffer[LEX_BUF_SIZE - 1]; + bytes_read = fread(lexer->buffer, sizeof(char), LEX_BUF_SIZE - 1, lexer->source + 1); + lexer->buffer[0] = tmp; + if (bytes_read < LEX_BUF_SIZE - 1) { + lexer->flag_EOF = 1; + lexer->buffer[1 + bytes_read] = '\0'; + } + lexer->bytes_in_buffer = bytes_read + 1; + } + + lexer->curr = lexer->buffer; +} + +void lexer_consume(Lexer *lexer) { + size_t read = lexer->curr - lexer->buffer; + if (read >= lexer->bytes_in_buffer) { + return; + } + if (read == LEX_BUF_SIZE - 2) { + lexer_fill_buffer(lexer); + } else { + lexer->curr++; + } +} + +char lexer_curr(Lexer *lexer) { + return *(lexer->curr); +} + +char lexer_peek(Lexer *lexer) { + return *(lexer->curr + 1); +} + +Token lexer_next_token(Lexer *lexer) { + // TODO: implement +} + +void free_lexer(Lexer *lexer) { + free(lexer->buffer); + free(lexer); +} \ No newline at end of file