implement lexical analysis function for c-minus
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,2 +1,4 @@
|
||||
.idea
|
||||
.vscode
|
||||
|
||||
*.o
|
||||
38
notes/1.md
38
notes/1.md
@@ -1,9 +1,41 @@
|
||||
컴파일러 1
|
||||
===
|
||||
# ISA & Compiler Basics
|
||||
|
||||
## Basic Computer
|
||||
|
||||
To get a task done by a general-purpose computer, we need:
|
||||
|
||||
* Program: A sequence of instructions
|
||||
* Instruction set: A set of possible instructions
|
||||
|
||||
### Von Neuman Architecture
|
||||
|
||||
Both Instructions and data are stored in the memory.
|
||||
Instructions dictate:
|
||||
|
||||
1. which and how data are manipulated
|
||||
2. which instruction should be next
|
||||
|
||||
The memory is independent of the CPU.
|
||||
|
||||
|
||||
# Interpreter in Modern Processors
|
||||
### How to load a program
|
||||
```
|
||||
*.c -(compiler)-> *.s: Assembly program
|
||||
*.s -(assembler)-> *.o: Object file
|
||||
*.o -(linker)-[with library(*.o)]-> *.exe: Executable
|
||||
*.exe -(loader)-> to in memory
|
||||
```
|
||||
|
||||
### Program
|
||||
|
||||
Computer is essentially a complex state machine.
|
||||
**programmer visible state**:
|
||||
|
||||
* Memory
|
||||
* Registers
|
||||
* Program Counter
|
||||
|
||||
Instructions(Program) specify how to transform the values of programmer visible state.
|
||||
|
||||
# Compiler
|
||||
|
||||
|
||||
@@ -23,17 +23,17 @@
|
||||
#endif
|
||||
|
||||
/* MAXRESERVED = the number of reserved words */
|
||||
#define MAXRESERVED 8
|
||||
#define MAXRESERVED 6
|
||||
|
||||
typedef enum
|
||||
/* book-keeping tokens */
|
||||
{ENDFILE,ERROR,
|
||||
/* reserved words */
|
||||
IF,THEN,ELSE,END,REPEAT,UNTIL,READ,WRITE,
|
||||
IF,ELSE,WHILE,RETURN,INT,VOID,
|
||||
/* multicharacter tokens */
|
||||
ID,NUM,
|
||||
/* special symbols */
|
||||
ASSIGN,EQ,LT,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,SEMI
|
||||
ASSIGN,EQ,NE,LT,LE,GT,GE,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,LBRACE,RBRACE,LCURLY,RCURLY,SEMI,COMMA
|
||||
} TokenType;
|
||||
|
||||
extern FILE* source; /* source code text file */
|
||||
|
||||
75
src/lex/tiny.l
Normal file
75
src/lex/tiny.l
Normal file
@@ -0,0 +1,75 @@
|
||||
/****************************************************/
|
||||
/* File: tiny.l */
|
||||
/* Lex specification for TINY */
|
||||
/* Compiler Construction: Principles and Practice */
|
||||
/* Kenneth C. Louden */
|
||||
/****************************************************/
|
||||
|
||||
%{
|
||||
#include "globals.h"
|
||||
#include "util.h"
|
||||
#include "scan.h"
|
||||
/* lexeme of identifier or reserved word */
|
||||
char tokenString[MAXTOKENLEN+1];
|
||||
%}
|
||||
|
||||
digit [0-9]
|
||||
number {digit}+
|
||||
letter [a-zA-Z]
|
||||
identifier {letter}+
|
||||
newline \n
|
||||
whitespace [ \t]+
|
||||
|
||||
%%
|
||||
|
||||
"if" {return IF;}
|
||||
"then" {return THEN;}
|
||||
"else" {return ELSE;}
|
||||
"end" {return END;}
|
||||
"repeat" {return REPEAT;}
|
||||
"until" {return UNTIL;}
|
||||
"read" {return READ;}
|
||||
"write" {return WRITE;}
|
||||
":=" {return ASSIGN;}
|
||||
"=" {return EQ;}
|
||||
"<" {return LT;}
|
||||
"+" {return PLUS;}
|
||||
"-" {return MINUS;}
|
||||
"*" {return TIMES;}
|
||||
"/" {return OVER;}
|
||||
"(" {return LPAREN;}
|
||||
")" {return RPAREN;}
|
||||
";" {return SEMI;}
|
||||
{number} {return NUM;}
|
||||
{identifier} {return ID;}
|
||||
{newline} {lineno++;}
|
||||
{whitespace} {/* skip whitespace */}
|
||||
"{" { char c;
|
||||
do
|
||||
{ c = input();
|
||||
if (c == EOF) break;
|
||||
if (c == '\n') lineno++;
|
||||
} while (c != '}');
|
||||
}
|
||||
. {return ERROR;}
|
||||
|
||||
%%
|
||||
|
||||
TokenType getToken(void)
|
||||
{ static int firstTime = TRUE;
|
||||
TokenType currentToken;
|
||||
if (firstTime)
|
||||
{ firstTime = FALSE;
|
||||
lineno++;
|
||||
yyin = source;
|
||||
yyout = listing;
|
||||
}
|
||||
currentToken = yylex();
|
||||
strncpy(tokenString,yytext,MAXTOKENLEN);
|
||||
if (TraceScan) {
|
||||
fprintf(listing,"\t%d: ",lineno);
|
||||
printToken(currentToken,tokenString);
|
||||
}
|
||||
return currentToken;
|
||||
}
|
||||
|
||||
107
src/main.c
107
src/main.c
@@ -8,14 +8,14 @@
|
||||
#include "globals.h"
|
||||
|
||||
/* set NO_PARSE to TRUE to get a scanner-only compiler */
|
||||
#define NO_PARSE FALSE
|
||||
#define NO_PARSE TRUE
|
||||
/* set NO_ANALYZE to TRUE to get a parser-only compiler */
|
||||
#define NO_ANALYZE FALSE
|
||||
#define NO_ANALYZE TRUE
|
||||
|
||||
/* set NO_CODE to TRUE to get a compiler that does not
|
||||
* generate code
|
||||
*/
|
||||
#define NO_CODE FALSE
|
||||
#define NO_CODE TRUE
|
||||
|
||||
#include "util.h"
|
||||
#if NO_PARSE
|
||||
@@ -32,71 +32,70 @@
|
||||
|
||||
/* allocate global variables */
|
||||
int lineno = 0;
|
||||
FILE * source;
|
||||
FILE * listing;
|
||||
FILE * code;
|
||||
FILE *source;
|
||||
FILE *listing;
|
||||
FILE *code;
|
||||
|
||||
/* allocate and set tracing flags */
|
||||
int EchoSource = FALSE;
|
||||
int TraceScan = FALSE;
|
||||
int TraceScan = TRUE;
|
||||
int TraceParse = FALSE;
|
||||
int TraceAnalyze = FALSE;
|
||||
int TraceCode = FALSE;
|
||||
|
||||
int Error = FALSE;
|
||||
|
||||
main( int argc, char * argv[] )
|
||||
{ TreeNode * syntaxTree;
|
||||
char pgm[120]; /* source code file name */
|
||||
if (argc != 2)
|
||||
{ fprintf(stderr,"usage: %s <filename>\n",argv[0]);
|
||||
exit(1);
|
||||
main(int argc, char *argv[]) {
|
||||
TreeNode *syntaxTree;
|
||||
char pgm[120]; /* source code file name */
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "usage: %s <filename>\n", argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
strcpy(pgm,argv[1]) ;
|
||||
if (strchr (pgm, '.') == NULL)
|
||||
strcat(pgm,".tny");
|
||||
source = fopen(pgm,"r");
|
||||
if (source==NULL)
|
||||
{ fprintf(stderr,"File %s not found\n",pgm);
|
||||
exit(1);
|
||||
}
|
||||
listing = stdout; /* send listing to screen */
|
||||
fprintf(listing,"\nTINY COMPILATION: %s\n",pgm);
|
||||
strcpy(pgm, argv[1]);
|
||||
if (strchr(pgm, '.') == NULL)
|
||||
strcat(pgm, ".tny");
|
||||
source = fopen(pgm, "r");
|
||||
if (source == NULL) {
|
||||
fprintf(stderr, "File %s not found\n", pgm);
|
||||
exit(1);
|
||||
}
|
||||
listing = stdout; /* send listing to screen */
|
||||
fprintf(listing, "\nC-MINUS COMPILATION: %s\n", pgm);
|
||||
#if NO_PARSE
|
||||
while (getToken()!=ENDFILE);
|
||||
while (getToken() != ENDFILE);
|
||||
#else
|
||||
syntaxTree = parse();
|
||||
if (TraceParse) {
|
||||
fprintf(listing,"\nSyntax tree:\n");
|
||||
printTree(syntaxTree);
|
||||
}
|
||||
#if !NO_ANALYZE
|
||||
if (! Error)
|
||||
{ if (TraceAnalyze) fprintf(listing,"\nBuilding Symbol Table...\n");
|
||||
buildSymtab(syntaxTree);
|
||||
if (TraceAnalyze) fprintf(listing,"\nChecking Types...\n");
|
||||
typeCheck(syntaxTree);
|
||||
if (TraceAnalyze) fprintf(listing,"\nType Checking Finished\n");
|
||||
}
|
||||
#if !NO_CODE
|
||||
if (! Error)
|
||||
{ char * codefile;
|
||||
int fnlen = strcspn(pgm,".");
|
||||
codefile = (char *) calloc(fnlen+4, sizeof(char));
|
||||
strncpy(codefile,pgm,fnlen);
|
||||
strcat(codefile,".tm");
|
||||
code = fopen(codefile,"w");
|
||||
if (code == NULL)
|
||||
{ printf("Unable to open %s\n",codefile);
|
||||
exit(1);
|
||||
syntaxTree = parse();
|
||||
if (TraceParse) {
|
||||
fprintf(listing, "\nSyntax tree:\n");
|
||||
printTree(syntaxTree);
|
||||
}
|
||||
#if !NO_ANALYZE
|
||||
if (!Error) {
|
||||
if (TraceAnalyze) fprintf(listing, "\nBuilding Symbol Table...\n");
|
||||
buildSymtab(syntaxTree);
|
||||
if (TraceAnalyze) fprintf(listing, "\nChecking Types...\n");
|
||||
typeCheck(syntaxTree);
|
||||
if (TraceAnalyze) fprintf(listing, "\nType Checking Finished\n");
|
||||
}
|
||||
#if !NO_CODE
|
||||
if (!Error) {
|
||||
char *codefile;
|
||||
int fnlen = strcspn(pgm, ".");
|
||||
codefile = (char *) calloc(fnlen + 4, sizeof(char));
|
||||
strncpy(codefile, pgm, fnlen);
|
||||
strcat(codefile, ".tm");
|
||||
code = fopen(codefile, "w");
|
||||
if (code == NULL) {
|
||||
printf("Unable to open %s\n", codefile);
|
||||
exit(1);
|
||||
}
|
||||
codeGen(syntaxTree, codefile);
|
||||
fclose(code);
|
||||
}
|
||||
codeGen(syntaxTree,codefile);
|
||||
fclose(code);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
fclose(source);
|
||||
return 0;
|
||||
fclose(source);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
396
src/scan.c
396
src/scan.c
@@ -5,69 +5,81 @@
|
||||
/* Kenneth C. Louden */
|
||||
/****************************************************/
|
||||
|
||||
#include "scan.h"
|
||||
#include "globals.h"
|
||||
#include "util.h"
|
||||
#include "scan.h"
|
||||
|
||||
/* states in scanner DFA */
|
||||
typedef enum
|
||||
{ START,INASSIGN,INCOMMENT,INNUM,INID,DONE }
|
||||
StateType;
|
||||
typedef enum { START,
|
||||
INOVER,
|
||||
INCOMMENT,
|
||||
ASTERCOMMENT,
|
||||
INASSIGN,
|
||||
INLT /* > */,
|
||||
INGT /* < */,
|
||||
INNE /* !*/,
|
||||
INNUM,
|
||||
INID,
|
||||
DONE } StateType;
|
||||
|
||||
/* lexeme of identifier or reserved word */
|
||||
char tokenString[MAXTOKENLEN+1];
|
||||
char tokenString[MAXTOKENLEN + 1];
|
||||
|
||||
/* BUFLEN = length of the input buffer for
|
||||
source code lines */
|
||||
#define BUFLEN 256
|
||||
|
||||
static char lineBuf[BUFLEN]; /* holds the current line */
|
||||
static int linepos = 0; /* current position in LineBuf */
|
||||
static int bufsize = 0; /* current size of buffer string */
|
||||
static int linepos = 0; /* current position in LineBuf */
|
||||
static int bufsize = 0; /* current size of buffer string */
|
||||
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */
|
||||
|
||||
/* getNextChar fetches the next non-blank character
|
||||
from lineBuf, reading in a new line if lineBuf is
|
||||
exhausted */
|
||||
static int getNextChar(void)
|
||||
{ if (!(linepos < bufsize))
|
||||
{ lineno++;
|
||||
if (fgets(lineBuf,BUFLEN-1,source))
|
||||
{ if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf);
|
||||
bufsize = strlen(lineBuf);
|
||||
linepos = 0;
|
||||
return lineBuf[linepos++];
|
||||
}
|
||||
else
|
||||
{ EOF_flag = TRUE;
|
||||
return EOF;
|
||||
}
|
||||
}
|
||||
else return lineBuf[linepos++];
|
||||
static int getNextChar(void) {
|
||||
if (!(linepos < bufsize)) {
|
||||
lineno++;
|
||||
if (fgets(lineBuf, BUFLEN - 1, source)) {
|
||||
if (EchoSource) fprintf(listing, "%4d: %s", lineno, lineBuf);
|
||||
bufsize = strlen(lineBuf);
|
||||
linepos = 0;
|
||||
return lineBuf[linepos++];
|
||||
} else {
|
||||
EOF_flag = TRUE;
|
||||
return EOF;
|
||||
}
|
||||
} else
|
||||
return lineBuf[linepos++];
|
||||
}
|
||||
|
||||
/* ungetNextChar backtracks one character
|
||||
in lineBuf */
|
||||
static void ungetNextChar(void)
|
||||
{ if (!EOF_flag) linepos-- ;}
|
||||
static void ungetNextChar(void) {
|
||||
if (!EOF_flag) linepos--;
|
||||
}
|
||||
|
||||
/* lookup table of reserved words */
|
||||
static struct
|
||||
{ char* str;
|
||||
TokenType tok;
|
||||
} reservedWords[MAXRESERVED]
|
||||
= {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END},
|
||||
{"repeat",REPEAT},{"until",UNTIL},{"read",READ},
|
||||
{"write",WRITE}};
|
||||
static struct {// keywords
|
||||
char *str;
|
||||
TokenType tok;
|
||||
} reservedWords[MAXRESERVED] = {
|
||||
{"if", IF},
|
||||
{"else", ELSE},
|
||||
{"while", WHILE},
|
||||
{"return", RETURN},
|
||||
{"int", INT},
|
||||
{"void", VOID},
|
||||
};
|
||||
|
||||
/* lookup an identifier to see if it is a reserved word */
|
||||
/* uses linear search */
|
||||
static TokenType reservedLookup (char * s)
|
||||
{ int i;
|
||||
for (i=0;i<MAXRESERVED;i++)
|
||||
if (!strcmp(s,reservedWords[i].str))
|
||||
return reservedWords[i].tok;
|
||||
return ID;
|
||||
static TokenType reservedLookup(char *s) {
|
||||
int i;
|
||||
for (i = 0; i < MAXRESERVED; i++)
|
||||
if (!strcmp(s, reservedWords[i].str))
|
||||
return reservedWords[i].tok;
|
||||
return ID;
|
||||
}
|
||||
|
||||
/****************************************/
|
||||
@@ -76,128 +88,190 @@ static TokenType reservedLookup (char * s)
|
||||
/* function getToken returns the
|
||||
* next token in source file
|
||||
*/
|
||||
TokenType getToken(void)
|
||||
{ /* index for storing into tokenString */
|
||||
int tokenStringIndex = 0;
|
||||
/* holds current token to be returned */
|
||||
TokenType currentToken;
|
||||
/* current state - always begins at START */
|
||||
StateType state = START;
|
||||
/* flag to indicate save to tokenString */
|
||||
int save;
|
||||
while (state != DONE)
|
||||
{ int c = getNextChar();
|
||||
save = TRUE;
|
||||
switch (state)
|
||||
{ case START:
|
||||
if (isdigit(c))
|
||||
state = INNUM;
|
||||
else if (isalpha(c))
|
||||
state = INID;
|
||||
else if (c == ':')
|
||||
state = INASSIGN;
|
||||
else if ((c == ' ') || (c == '\t') || (c == '\n'))
|
||||
save = FALSE;
|
||||
else if (c == '{')
|
||||
{ save = FALSE;
|
||||
state = INCOMMENT;
|
||||
}
|
||||
else
|
||||
{ state = DONE;
|
||||
switch (c)
|
||||
{ case EOF:
|
||||
save = FALSE;
|
||||
currentToken = ENDFILE;
|
||||
break;
|
||||
case '=':
|
||||
currentToken = EQ;
|
||||
break;
|
||||
case '<':
|
||||
currentToken = LT;
|
||||
break;
|
||||
case '+':
|
||||
currentToken = PLUS;
|
||||
break;
|
||||
case '-':
|
||||
currentToken = MINUS;
|
||||
break;
|
||||
case '*':
|
||||
currentToken = TIMES;
|
||||
break;
|
||||
case '/':
|
||||
currentToken = OVER;
|
||||
break;
|
||||
case '(':
|
||||
currentToken = LPAREN;
|
||||
break;
|
||||
case ')':
|
||||
currentToken = RPAREN;
|
||||
break;
|
||||
case ';':
|
||||
currentToken = SEMI;
|
||||
break;
|
||||
default:
|
||||
currentToken = ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case INCOMMENT:
|
||||
save = FALSE;
|
||||
if (c == EOF)
|
||||
{ state = DONE;
|
||||
currentToken = ENDFILE;
|
||||
}
|
||||
else if (c == '}') state = START;
|
||||
break;
|
||||
case INASSIGN:
|
||||
state = DONE;
|
||||
if (c == '=')
|
||||
currentToken = ASSIGN;
|
||||
else
|
||||
{ /* backup in the input */
|
||||
ungetNextChar();
|
||||
save = FALSE;
|
||||
currentToken = ERROR;
|
||||
}
|
||||
break;
|
||||
case INNUM:
|
||||
if (!isdigit(c))
|
||||
{ /* backup in the input */
|
||||
ungetNextChar();
|
||||
save = FALSE;
|
||||
state = DONE;
|
||||
currentToken = NUM;
|
||||
}
|
||||
break;
|
||||
case INID:
|
||||
if (!isalpha(c))
|
||||
{ /* backup in the input */
|
||||
ungetNextChar();
|
||||
save = FALSE;
|
||||
state = DONE;
|
||||
currentToken = ID;
|
||||
}
|
||||
break;
|
||||
case DONE:
|
||||
default: /* should never happen */
|
||||
fprintf(listing,"Scanner Bug: state= %d\n",state);
|
||||
state = DONE;
|
||||
currentToken = ERROR;
|
||||
break;
|
||||
}
|
||||
if ((save) && (tokenStringIndex <= MAXTOKENLEN))
|
||||
tokenString[tokenStringIndex++] = (char) c;
|
||||
if (state == DONE)
|
||||
{ tokenString[tokenStringIndex] = '\0';
|
||||
if (currentToken == ID)
|
||||
currentToken = reservedLookup(tokenString);
|
||||
}
|
||||
}
|
||||
if (TraceScan) {
|
||||
fprintf(listing,"\t%d: ",lineno);
|
||||
printToken(currentToken,tokenString);
|
||||
}
|
||||
return currentToken;
|
||||
} /* end getToken */
|
||||
TokenType getToken(void) { /* index for storing into tokenString */
|
||||
int tokenStringIndex = 0;
|
||||
/* holds current token to be returned */
|
||||
TokenType currentToken;
|
||||
/* current state - always begins at START */
|
||||
StateType state = START;
|
||||
/* flag to indicate save to tokenString */
|
||||
int save;
|
||||
while (state != DONE) {
|
||||
int c = getNextChar();
|
||||
save = TRUE;
|
||||
/* main char process (state transition)*/
|
||||
switch (state) {
|
||||
case START:
|
||||
if (isdigit(c))
|
||||
state = INNUM;
|
||||
else if (isalpha(c))
|
||||
state = INID;
|
||||
else if ((c == ' ') || (c == '\t') || (c == '\n'))
|
||||
save = FALSE;
|
||||
else if (c == '=')
|
||||
state = INASSIGN;
|
||||
else if (c == '<') {
|
||||
state = INLT;
|
||||
} else if (c == '>') {
|
||||
state = INGT;
|
||||
} else if (c == '/') {// comment in or not
|
||||
save = FALSE;
|
||||
state = INOVER;
|
||||
} else {
|
||||
state = DONE;
|
||||
switch (c) {
|
||||
case EOF:
|
||||
save = FALSE;
|
||||
currentToken = ENDFILE;
|
||||
break;
|
||||
case '+':
|
||||
currentToken = PLUS;
|
||||
break;
|
||||
case '-':
|
||||
currentToken = MINUS;
|
||||
break;
|
||||
case '*':
|
||||
currentToken = TIMES;
|
||||
break;
|
||||
case '[':
|
||||
currentToken = LBRACE;
|
||||
break;
|
||||
case ']':
|
||||
currentToken = RBRACE;
|
||||
break;
|
||||
case '{':
|
||||
currentToken = LCURLY;
|
||||
break;
|
||||
case '}':
|
||||
currentToken = RCURLY;
|
||||
break;
|
||||
case '(':
|
||||
currentToken = LPAREN;
|
||||
break;
|
||||
case ')':
|
||||
currentToken = RPAREN;
|
||||
break;
|
||||
case ';':
|
||||
currentToken = SEMI;
|
||||
break;
|
||||
case ',':
|
||||
currentToken = COMMA;
|
||||
break;
|
||||
default:
|
||||
currentToken = ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
/* begin comment process */
|
||||
case INCOMMENT:
|
||||
save = FALSE;
|
||||
if (c == EOF) {
|
||||
state = DONE;
|
||||
currentToken = ENDFILE;
|
||||
} else if (c == '*')// comment out
|
||||
state = ASTERCOMMENT;
|
||||
break;
|
||||
case INOVER:
|
||||
if (c == '*') {
|
||||
save = FALSE;
|
||||
state = INCOMMENT;
|
||||
} else {
|
||||
state = DONE;
|
||||
ungetNextChar();
|
||||
save = FALSE;
|
||||
currentToken = OVER;
|
||||
}
|
||||
break;
|
||||
case ASTERCOMMENT:
|
||||
save = FALSE;
|
||||
if (c == EOF) {
|
||||
state = DONE;
|
||||
currentToken = ENDFILE;
|
||||
} else if (c == '/') {
|
||||
state = START;
|
||||
}
|
||||
|
||||
else {
|
||||
state = INCOMMENT;
|
||||
}
|
||||
break;
|
||||
/* end comment process */
|
||||
case INASSIGN:
|
||||
state = DONE;
|
||||
if (c == '=')
|
||||
currentToken = EQ;
|
||||
else { /* backup in the input */
|
||||
ungetNextChar();
|
||||
save = FALSE;
|
||||
currentToken = ASSIGN;
|
||||
}
|
||||
break;
|
||||
case INNE:
|
||||
state = DONE;
|
||||
if (c == '=') {
|
||||
currentToken = NE;
|
||||
} else {
|
||||
ungetNextChar();
|
||||
save = FALSE;
|
||||
currentToken = ERROR;
|
||||
}
|
||||
break;
|
||||
|
||||
case INLT:
|
||||
state = DONE;
|
||||
if (c == '=') {
|
||||
currentToken = LE;
|
||||
} else {
|
||||
ungetNextChar();
|
||||
save = FALSE;
|
||||
currentToken = LT;
|
||||
}
|
||||
break;
|
||||
case INGT:
|
||||
state = DONE;
|
||||
if (c == '=') {
|
||||
currentToken = GE;
|
||||
} else {
|
||||
ungetNextChar();
|
||||
save = FALSE;
|
||||
currentToken = GT;
|
||||
}
|
||||
break;
|
||||
case INNUM:
|
||||
if (!isdigit(c)) { /* backup in the input */
|
||||
ungetNextChar();
|
||||
save = FALSE;
|
||||
state = DONE;
|
||||
currentToken = NUM;
|
||||
}
|
||||
break;
|
||||
case INID:
|
||||
if (!isalpha(c)) { /* backup in the input */
|
||||
ungetNextChar();
|
||||
save = FALSE;
|
||||
state = DONE;
|
||||
currentToken = ID;
|
||||
}
|
||||
break;
|
||||
case DONE:
|
||||
default: /* should never happen */
|
||||
fprintf(listing, "Scanner Bug: state= %d\n", state);
|
||||
state = DONE;
|
||||
currentToken = ERROR;
|
||||
break;
|
||||
}
|
||||
if ((save) && (tokenStringIndex <= MAXTOKENLEN))
|
||||
tokenString[tokenStringIndex++] = (char) c;
|
||||
if (state == DONE) {
|
||||
tokenString[tokenStringIndex] = '\0';
|
||||
if (currentToken == ID)
|
||||
currentToken = reservedLookup(tokenString);
|
||||
}
|
||||
}
|
||||
if (TraceScan) {
|
||||
fprintf(listing, "\t%d: ", lineno);
|
||||
printToken(currentToken, tokenString);
|
||||
}
|
||||
return currentToken;
|
||||
} /* end getToken */
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#ifndef _SCAN_H_
|
||||
#define _SCAN_H_
|
||||
#include "globals.h"
|
||||
|
||||
/* MAXTOKENLEN is the maximum size of a token */
|
||||
#define MAXTOKENLEN 40
|
||||
|
||||
16
src/test.cm
Normal file
16
src/test.cm
Normal file
@@ -0,0 +1,16 @@
|
||||
/* A program to perform Euclid's
|
||||
Algorithm to compute gcd */
|
||||
|
||||
int gcd(int u, int v)
|
||||
{
|
||||
if(v == 0) return u;
|
||||
else return gcd(v, u- u/v * v);
|
||||
/* hello u-u/v*v == u mod v */
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
int x; int y;
|
||||
x = input(); y = input();
|
||||
print(gcd(x, y));
|
||||
}
|
||||
311
src/util.c
311
src/util.c
@@ -6,103 +6,151 @@
|
||||
/* Kenneth C. Louden */
|
||||
/****************************************************/
|
||||
|
||||
#include "globals.h"
|
||||
#include "util.h"
|
||||
#include "globals.h"
|
||||
|
||||
/* Procedure printToken prints a token
|
||||
* and its lexeme to the listing file
|
||||
*/
|
||||
void printToken( TokenType token, const char* tokenString )
|
||||
{ switch (token)
|
||||
{ case IF:
|
||||
case THEN:
|
||||
case ELSE:
|
||||
case END:
|
||||
case REPEAT:
|
||||
case UNTIL:
|
||||
case READ:
|
||||
case WRITE:
|
||||
fprintf(listing,
|
||||
"reserved word: %s\n",tokenString);
|
||||
break;
|
||||
case ASSIGN: fprintf(listing,":=\n"); break;
|
||||
case LT: fprintf(listing,"<\n"); break;
|
||||
case EQ: fprintf(listing,"=\n"); break;
|
||||
case LPAREN: fprintf(listing,"(\n"); break;
|
||||
case RPAREN: fprintf(listing,")\n"); break;
|
||||
case SEMI: fprintf(listing,";\n"); break;
|
||||
case PLUS: fprintf(listing,"+\n"); break;
|
||||
case MINUS: fprintf(listing,"-\n"); break;
|
||||
case TIMES: fprintf(listing,"*\n"); break;
|
||||
case OVER: fprintf(listing,"/\n"); break;
|
||||
case ENDFILE: fprintf(listing,"EOF\n"); break;
|
||||
case NUM:
|
||||
fprintf(listing,
|
||||
"NUM, val= %s\n",tokenString);
|
||||
break;
|
||||
case ID:
|
||||
fprintf(listing,
|
||||
"ID, name= %s\n",tokenString);
|
||||
break;
|
||||
case ERROR:
|
||||
fprintf(listing,
|
||||
"ERROR: %s\n",tokenString);
|
||||
break;
|
||||
default: /* should never happen */
|
||||
fprintf(listing,"Unknown token: %d\n",token);
|
||||
}
|
||||
void printToken(TokenType token, const char *tokenString) {
|
||||
switch (token) {
|
||||
case IF:
|
||||
case ELSE:
|
||||
case WHILE:
|
||||
case RETURN:
|
||||
case INT:
|
||||
case VOID:
|
||||
fprintf(listing,
|
||||
"reserved word: %s\n", tokenString);
|
||||
break;
|
||||
case ASSIGN:
|
||||
fprintf(listing, "=\n");
|
||||
break;
|
||||
case EQ:
|
||||
fprintf(listing, "==\n");
|
||||
break;
|
||||
case NE:
|
||||
fprintf(listing, "!=\n");
|
||||
break;
|
||||
case LT:
|
||||
fprintf(listing, "<\n");
|
||||
break;
|
||||
case LE:
|
||||
fprintf(listing, "<=\n");
|
||||
break;
|
||||
case GT:
|
||||
fprintf(listing, ">\n");
|
||||
break;
|
||||
case GE:
|
||||
fprintf(listing, ">=\n");
|
||||
break;
|
||||
case PLUS:
|
||||
fprintf(listing, "+\n");
|
||||
break;
|
||||
case MINUS:
|
||||
fprintf(listing, "-\n");
|
||||
break;
|
||||
case TIMES:
|
||||
fprintf(listing, "*\n");
|
||||
break;
|
||||
case OVER:
|
||||
fprintf(listing, "/\n");
|
||||
break;
|
||||
case LPAREN:
|
||||
fprintf(listing, "(\n");
|
||||
break;
|
||||
case RPAREN:
|
||||
fprintf(listing, ")\n");
|
||||
break;
|
||||
case LBRACE:
|
||||
fprintf(listing, "[\n");
|
||||
break;
|
||||
case RBRACE:
|
||||
fprintf(listing, "]\n");
|
||||
break;
|
||||
case LCURLY:
|
||||
fprintf(listing, "{\n");
|
||||
break;
|
||||
case RCURLY:
|
||||
fprintf(listing, "}\n");
|
||||
break;
|
||||
case SEMI:
|
||||
fprintf(listing, ";\n");
|
||||
break;
|
||||
case COMMA:
|
||||
fprintf(listing, ",\n");
|
||||
break;
|
||||
case ENDFILE:
|
||||
fprintf(listing, "EOF\n");
|
||||
break;
|
||||
case NUM:
|
||||
fprintf(listing,
|
||||
"NUM, val= %s\n", tokenString);
|
||||
break;
|
||||
case ID:
|
||||
fprintf(listing,
|
||||
"ID, name= %s\n", tokenString);
|
||||
break;
|
||||
case ERROR:
|
||||
fprintf(listing,
|
||||
"ERROR: %s\n", tokenString);
|
||||
break;
|
||||
default: /* should never happen */
|
||||
fprintf(listing, "Unknown token: %d\n", token);
|
||||
}
|
||||
}
|
||||
|
||||
/* Function newStmtNode creates a new statement
|
||||
* node for syntax tree construction
|
||||
*/
|
||||
TreeNode * newStmtNode(StmtKind kind)
|
||||
{ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode));
|
||||
int i;
|
||||
if (t==NULL)
|
||||
fprintf(listing,"Out of memory error at line %d\n",lineno);
|
||||
else {
|
||||
for (i=0;i<MAXCHILDREN;i++) t->child[i] = NULL;
|
||||
t->sibling = NULL;
|
||||
t->nodekind = StmtK;
|
||||
t->kind.stmt = kind;
|
||||
t->lineno = lineno;
|
||||
}
|
||||
return t;
|
||||
TreeNode *newStmtNode(StmtKind kind) {
|
||||
TreeNode *t = (TreeNode *) malloc(sizeof(TreeNode));
|
||||
int i;
|
||||
if (t == NULL)
|
||||
fprintf(listing, "Out of memory error at line %d\n", lineno);
|
||||
else {
|
||||
for (i = 0; i < MAXCHILDREN; i++) t->child[i] = NULL;
|
||||
t->sibling = NULL;
|
||||
t->nodekind = StmtK;
|
||||
t->kind.stmt = kind;
|
||||
t->lineno = lineno;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Function newExpNode creates a new expression
|
||||
* node for syntax tree construction
|
||||
*/
|
||||
TreeNode * newExpNode(ExpKind kind)
|
||||
{ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode));
|
||||
int i;
|
||||
if (t==NULL)
|
||||
fprintf(listing,"Out of memory error at line %d\n",lineno);
|
||||
else {
|
||||
for (i=0;i<MAXCHILDREN;i++) t->child[i] = NULL;
|
||||
t->sibling = NULL;
|
||||
t->nodekind = ExpK;
|
||||
t->kind.exp = kind;
|
||||
t->lineno = lineno;
|
||||
t->type = Void;
|
||||
}
|
||||
return t;
|
||||
TreeNode *newExpNode(ExpKind kind) {
|
||||
TreeNode *t = (TreeNode *) malloc(sizeof(TreeNode));
|
||||
int i;
|
||||
if (t == NULL)
|
||||
fprintf(listing, "Out of memory error at line %d\n", lineno);
|
||||
else {
|
||||
for (i = 0; i < MAXCHILDREN; i++) t->child[i] = NULL;
|
||||
t->sibling = NULL;
|
||||
t->nodekind = ExpK;
|
||||
t->kind.exp = kind;
|
||||
t->lineno = lineno;
|
||||
t->type = Void;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Function copyString allocates and makes a new
|
||||
* copy of an existing string
|
||||
*/
|
||||
char * copyString(char * s)
|
||||
{ int n;
|
||||
char * t;
|
||||
if (s==NULL) return NULL;
|
||||
n = strlen(s)+1;
|
||||
t = malloc(n);
|
||||
if (t==NULL)
|
||||
fprintf(listing,"Out of memory error at line %d\n",lineno);
|
||||
else strcpy(t,s);
|
||||
return t;
|
||||
char *copyString(char *s) {
|
||||
int n;
|
||||
char *t;
|
||||
if (s == NULL) return NULL;
|
||||
n = strlen(s) + 1;
|
||||
t = malloc(n);
|
||||
if (t == NULL)
|
||||
fprintf(listing, "Out of memory error at line %d\n", lineno);
|
||||
else
|
||||
strcpy(t, s);
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Variable indentno is used by printTree to
|
||||
@@ -111,67 +159,66 @@ char * copyString(char * s)
|
||||
static indentno = 0;
|
||||
|
||||
/* macros to increase/decrease indentation */
|
||||
#define INDENT indentno+=2
|
||||
#define UNINDENT indentno-=2
|
||||
#define INDENT indentno += 2
|
||||
#define UNINDENT indentno -= 2
|
||||
|
||||
/* printSpaces indents by printing spaces */
|
||||
static void printSpaces(void)
|
||||
{ int i;
|
||||
for (i=0;i<indentno;i++)
|
||||
fprintf(listing," ");
|
||||
static void printSpaces(void) {
|
||||
int i;
|
||||
for (i = 0; i < indentno; i++)
|
||||
fprintf(listing, " ");
|
||||
}
|
||||
|
||||
/* procedure printTree prints a syntax tree to the
|
||||
* listing file using indentation to indicate subtrees
|
||||
*/
|
||||
void printTree( TreeNode * tree )
|
||||
{ int i;
|
||||
INDENT;
|
||||
while (tree != NULL) {
|
||||
printSpaces();
|
||||
if (tree->nodekind==StmtK)
|
||||
{ switch (tree->kind.stmt) {
|
||||
case IfK:
|
||||
fprintf(listing,"If\n");
|
||||
break;
|
||||
case RepeatK:
|
||||
fprintf(listing,"Repeat\n");
|
||||
break;
|
||||
case AssignK:
|
||||
fprintf(listing,"Assign to: %s\n",tree->attr.name);
|
||||
break;
|
||||
case ReadK:
|
||||
fprintf(listing,"Read: %s\n",tree->attr.name);
|
||||
break;
|
||||
case WriteK:
|
||||
fprintf(listing,"Write\n");
|
||||
break;
|
||||
default:
|
||||
fprintf(listing,"Unknown ExpNode kind\n");
|
||||
break;
|
||||
}
|
||||
void printTree(TreeNode *tree) {
|
||||
int i;
|
||||
INDENT;
|
||||
while (tree != NULL) {
|
||||
printSpaces();
|
||||
if (tree->nodekind == StmtK) {
|
||||
switch (tree->kind.stmt) {
|
||||
case IfK:
|
||||
fprintf(listing, "If\n");
|
||||
break;
|
||||
case RepeatK:
|
||||
fprintf(listing, "Repeat\n");
|
||||
break;
|
||||
case AssignK:
|
||||
fprintf(listing, "Assign to: %s\n", tree->attr.name);
|
||||
break;
|
||||
case ReadK:
|
||||
fprintf(listing, "Read: %s\n", tree->attr.name);
|
||||
break;
|
||||
case WriteK:
|
||||
fprintf(listing, "Write\n");
|
||||
break;
|
||||
default:
|
||||
fprintf(listing, "Unknown ExpNode kind\n");
|
||||
break;
|
||||
}
|
||||
} else if (tree->nodekind == ExpK) {
|
||||
switch (tree->kind.exp) {
|
||||
case OpK:
|
||||
fprintf(listing, "Op: ");
|
||||
printToken(tree->attr.op, "\0");
|
||||
break;
|
||||
case ConstK:
|
||||
fprintf(listing, "Const: %d\n", tree->attr.val);
|
||||
break;
|
||||
case IdK:
|
||||
fprintf(listing, "Id: %s\n", tree->attr.name);
|
||||
break;
|
||||
default:
|
||||
fprintf(listing, "Unknown ExpNode kind\n");
|
||||
break;
|
||||
}
|
||||
} else
|
||||
fprintf(listing, "Unknown node kind\n");
|
||||
for (i = 0; i < MAXCHILDREN; i++)
|
||||
printTree(tree->child[i]);
|
||||
tree = tree->sibling;
|
||||
}
|
||||
else if (tree->nodekind==ExpK)
|
||||
{ switch (tree->kind.exp) {
|
||||
case OpK:
|
||||
fprintf(listing,"Op: ");
|
||||
printToken(tree->attr.op,"\0");
|
||||
break;
|
||||
case ConstK:
|
||||
fprintf(listing,"Const: %d\n",tree->attr.val);
|
||||
break;
|
||||
case IdK:
|
||||
fprintf(listing,"Id: %s\n",tree->attr.name);
|
||||
break;
|
||||
default:
|
||||
fprintf(listing,"Unknown ExpNode kind\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else fprintf(listing,"Unknown node kind\n");
|
||||
for (i=0;i<MAXCHILDREN;i++)
|
||||
printTree(tree->child[i]);
|
||||
tree = tree->sibling;
|
||||
}
|
||||
UNINDENT;
|
||||
UNINDENT;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user