commit 28a4827cb1d7e9f4868563b5194332825b9faf82 Author: yenru0 Date: Tue Sep 9 14:12:03 2025 +0900 initial commit diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..66271d0 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +[attr]lfs-file filter=lfs diff=lfs merge=lfs -text + +*.pdf lfs-file \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d48c759 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea +.vscode \ No newline at end of file diff --git a/1.md b/1.md new file mode 100644 index 0000000..cfa494d --- /dev/null +++ b/1.md @@ -0,0 +1,62 @@ +컴파일러 1 +=== + + +# Interpreter in Modern Processors + + +# Compiler + +* Front-End + +보이는 부분(HW에 신경을 안써도 됨) + +* Back-End + +HW에 밀접한 최적화를 해줌 + + +## General Structure of a Modern Compiler + +* Front-End + * Lexial Analysis + * Syntax Analysis + * Semantic Analysis +* Code Generation - 1 +* Back-End + * Control/DataFlow Analysis + * Optimization + * Code Generation - 2 + +이런 모듈식 구조는 다양한 언어와 하드웨어에 쉽게 적용할 수 있도록 도운다. + +### Lexical Analysis (Scanner) + +프로그램을 `token`의 의미 단위로 나눔. +그리고 의미가 없는 단위를 지움. + +보통 FSA로 구현함. + +### Syntax Analysis (Parser) + +미리 있는 Grammar를 바탕으로 Syntax Correctness를 진행함. + +### Semantic Analysis + +* identifier 정의 등 +* 타입 체크 +* 스코프 체크 +* 오버로딩 모호화 해소 +* IR로 변환 + +### Optimization + +최적화함 +* 상수 최적화 +* 안쓰는 변수 +* loop에서 안바뀌는 변수 +* 다시 똑같이 계산하는 변수 제거 +* ... + +### Code Generation + diff --git a/2.md b/2.md new file mode 100644 index 0000000..eeca19e --- /dev/null +++ b/2.md @@ -0,0 +1,72 @@ +Lexical Analysis +=== + +포트란은 모든 whitespace를 지움. + +```fortran +do 5 I = 1.25 + +``` + +``` +do 5 I = 1,25 +``` + + +## Tokens + +대표적인 토큰의 예시 + +* Identifiers +* Keywords +* Integers +* Floating-points +* Symbols +* Strings + +하기 위해서 하는 것 + +* Specification + +확실하게 명세를 해줘야함. + +* Recognition + +DFA를 이용해서 패턴 매칭 + +* Automation + +RE로 부터 DFA를 generate해야함 + +Lex라는 툴을 이용 + +그러나 내부적으로는 Tompson's construction (RE -> NFA), Subset Construction(NFA -> DFA)도 알아야함 + + +## Specification + +Regular Expression + +Multiple Matches + +`elsex = 0`이라는 코드에서 + +`else / x / = / 0` +또는 + +`elsex / = 0` 두가지 선택지가 있음. 둘 중 하나를 무조건 골라야함. 이때 가장 긴 토큰이 선택된다. + +* `elsex`가 `else`보다 더 길어서 `elsex`가 선택됨. + +만약에 두 경우가 모두 똑같다면 토큰 종류의 우선순위에 따라 선택된다. + +* `Keyword`가 `Identifier`가 더 높음. + +## Recognition + +FSA를 이용함. + +DFA와 NFA의 표현력은 동일하나 DFA는 편하게 구현할 수 있다는 장점이 있음. +NFA는 RE로부터 쉽게 변환가능하다는 장점이 있음. + +**Lexical ANalysis** \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6efa8c4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive LANG=C.UTF-8 LC_ALL=C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential gcc g++ make cmake ninja-build \ + vim nano sudo \ + flex bison libfl-dev \ + git ca-certificates pkg-config \ + python3 python3-pip \ + curl wget \ + gdb valgrind \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /work + +# Default: interactive shell inside the container +CMD ["bash"] diff --git a/pdf/L0.pdf b/pdf/L0.pdf new file mode 100644 index 0000000..1ab0dea Binary files /dev/null and b/pdf/L0.pdf differ diff --git a/pdf/L1.pdf b/pdf/L1.pdf new file mode 100644 index 0000000..89ab3f5 Binary files /dev/null and b/pdf/L1.pdf differ diff --git a/pdf/L2.pdf b/pdf/L2.pdf new file mode 100644 index 0000000..1534084 Binary files /dev/null and b/pdf/L2.pdf differ diff --git a/src/analyze.c b/src/analyze.c new file mode 100644 index 0000000..61d2400 --- /dev/null +++ b/src/analyze.c @@ -0,0 +1,159 @@ +/****************************************************/ +/* File: analyze.c */ +/* Semantic analyzer implementation */ +/* for the TINY compiler */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#include "globals.h" +#include "symtab.h" +#include "analyze.h" + +/* counter for variable memory locations */ +static int location = 0; + +/* Procedure traverse is a generic recursive + * syntax tree traversal routine: + * it applies preProc in preorder and postProc + * in postorder to tree pointed to by t + */ +static void traverse( TreeNode * t, + void (* preProc) (TreeNode *), + void (* postProc) (TreeNode *) ) +{ if (t != NULL) + { preProc(t); + { int i; + for (i=0; i < MAXCHILDREN; i++) + traverse(t->child[i],preProc,postProc); + } + postProc(t); + traverse(t->sibling,preProc,postProc); + } +} + +/* nullProc is a do-nothing procedure to + * generate preorder-only or postorder-only + * traversals from traverse + */ +static void nullProc(TreeNode * t) +{ if (t==NULL) return; + else return; +} + +/* Procedure insertNode inserts + * identifiers stored in t into + * the symbol table + */ +static void insertNode( TreeNode * t) +{ switch (t->nodekind) + { case StmtK: + switch (t->kind.stmt) + { case AssignK: + case ReadK: + if (st_lookup(t->attr.name) == -1) + /* not yet in table, so treat as new definition */ + st_insert(t->attr.name,t->lineno,location++); + else + /* already in table, so ignore location, + add line number of use only */ + st_insert(t->attr.name,t->lineno,0); + break; + default: + break; + } + break; + case ExpK: + switch (t->kind.exp) + { case IdK: + if (st_lookup(t->attr.name) == -1) + /* not yet in table, so treat as new definition */ + st_insert(t->attr.name,t->lineno,location++); + else + /* already in table, so ignore location, + add line number of use only */ + st_insert(t->attr.name,t->lineno,0); + break; + default: + break; + } + break; + default: + break; + } +} + +/* Function buildSymtab constructs the symbol + * table by preorder traversal of the syntax tree + */ +void buildSymtab(TreeNode * syntaxTree) +{ traverse(syntaxTree,insertNode,nullProc); + if (TraceAnalyze) + { fprintf(listing,"\nSymbol table:\n\n"); + printSymTab(listing); + } +} + +static void typeError(TreeNode * t, char * message) +{ fprintf(listing,"Type error at line %d: %s\n",t->lineno,message); + Error = TRUE; +} + +/* Procedure checkNode performs + * type checking at a single tree node + */ +static void checkNode(TreeNode * t) +{ switch (t->nodekind) + { case ExpK: + switch (t->kind.exp) + { case OpK: + if ((t->child[0]->type != Integer) || + (t->child[1]->type != Integer)) + typeError(t,"Op applied to non-integer"); + if ((t->attr.op == EQ) || (t->attr.op == LT)) + t->type = Boolean; + else + t->type = Integer; + break; + case ConstK: + case IdK: + t->type = Integer; + break; + default: + break; + } + break; + case StmtK: + switch (t->kind.stmt) + { case IfK: + if (t->child[0]->type == Integer) + typeError(t->child[0],"if test is not Boolean"); + break; + case AssignK: + if (t->child[0]->type != Integer) + typeError(t->child[0],"assignment of non-integer value"); + break; + case WriteK: + if (t->child[0]->type != Integer) + typeError(t->child[0],"write of non-integer value"); + break; + case RepeatK: + if (t->child[1]->type == Integer) + typeError(t->child[1],"repeat test is not Boolean"); + break; + default: + break; + } + break; + default: + break; + + } +} + +/* Procedure typeCheck performs type checking + * by a postorder syntax tree traversal + */ +void typeCheck(TreeNode * syntaxTree) +{ traverse(syntaxTree,nullProc,checkNode); +} diff --git a/src/analyze.h b/src/analyze.h new file mode 100644 index 0000000..f838bb8 --- /dev/null +++ b/src/analyze.h @@ -0,0 +1,23 @@ +/****************************************************/ +/* File: analyze.h */ +/* Semantic analyzer interface for TINY compiler */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#ifndef _ANALYZE_H_ +#define _ANALYZE_H_ + +#include "globals.h" + +/* Function buildSymtab constructs the symbol + * table by preorder traversal of the syntax tree + */ +void buildSymtab(TreeNode *); + +/* Procedure typeCheck performs type checking + * by a postorder syntax tree traversal + */ +void typeCheck(TreeNode *); + +#endif diff --git a/src/cgen.c b/src/cgen.c new file mode 100644 index 0000000..8add64c --- /dev/null +++ b/src/cgen.c @@ -0,0 +1,212 @@ +/****************************************************/ +/* File: cgen.c */ +/* The code generator implementation */ +/* for the TINY compiler */ +/* (generates code for the TM machine) */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#include "globals.h" +#include "symtab.h" +#include "code.h" +#include "cgen.h" + +/* tmpOffset is the memory offset for temps + It is decremented each time a temp is + stored, and incremeted when loaded again +*/ +static int tmpOffset = 0; + +/* prototype for internal recursive code generator */ +static void cGen (TreeNode * tree); + +/* Procedure genStmt generates code at a statement node */ +static void genStmt( TreeNode * tree) +{ TreeNode * p1, * p2, * p3; + int savedLoc1,savedLoc2,currentLoc; + int loc; + switch (tree->kind.stmt) { + + case IfK : + if (TraceCode) emitComment("-> if") ; + p1 = tree->child[0] ; + p2 = tree->child[1] ; + p3 = tree->child[2] ; + /* generate code for test expression */ + cGen(p1); + savedLoc1 = emitSkip(1) ; + emitComment("if: jump to else belongs here"); + /* recurse on then part */ + cGen(p2); + savedLoc2 = emitSkip(1) ; + emitComment("if: jump to end belongs here"); + currentLoc = emitSkip(0) ; + emitBackup(savedLoc1) ; + emitRM_Abs("JEQ",ac,currentLoc,"if: jmp to else"); + emitRestore() ; + /* recurse on else part */ + cGen(p3); + currentLoc = emitSkip(0) ; + emitBackup(savedLoc2) ; + emitRM_Abs("LDA",pc,currentLoc,"jmp to end") ; + emitRestore() ; + if (TraceCode) emitComment("<- if") ; + break; /* if_k */ + + case RepeatK: + if (TraceCode) emitComment("-> repeat") ; + p1 = tree->child[0] ; + p2 = tree->child[1] ; + savedLoc1 = emitSkip(0); + emitComment("repeat: jump after body comes back here"); + /* generate code for body */ + cGen(p1); + /* generate code for test */ + cGen(p2); + emitRM_Abs("JEQ",ac,savedLoc1,"repeat: jmp back to body"); + if (TraceCode) emitComment("<- repeat") ; + break; /* repeat */ + + case AssignK: + if (TraceCode) emitComment("-> assign") ; + /* generate code for rhs */ + cGen(tree->child[0]); + /* now store value */ + loc = st_lookup(tree->attr.name); + emitRM("ST",ac,loc,gp,"assign: store value"); + if (TraceCode) emitComment("<- assign") ; + break; /* assign_k */ + + case ReadK: + emitRO("IN",ac,0,0,"read integer value"); + loc = st_lookup(tree->attr.name); + emitRM("ST",ac,loc,gp,"read: store value"); + break; + case WriteK: + /* generate code for expression to write */ + cGen(tree->child[0]); + /* now output it */ + emitRO("OUT",ac,0,0,"write ac"); + break; + default: + break; + } +} /* genStmt */ + +/* Procedure genExp generates code at an expression node */ +static void genExp( TreeNode * tree) +{ int loc; + TreeNode * p1, * p2; + switch (tree->kind.exp) { + + case ConstK : + if (TraceCode) emitComment("-> Const") ; + /* gen code to load integer constant using LDC */ + emitRM("LDC",ac,tree->attr.val,0,"load const"); + if (TraceCode) emitComment("<- Const") ; + break; /* ConstK */ + + case IdK : + if (TraceCode) emitComment("-> Id") ; + loc = st_lookup(tree->attr.name); + emitRM("LD",ac,loc,gp,"load id value"); + if (TraceCode) emitComment("<- Id") ; + break; /* IdK */ + + case OpK : + if (TraceCode) emitComment("-> Op") ; + p1 = tree->child[0]; + p2 = tree->child[1]; + /* gen code for ac = left arg */ + cGen(p1); + /* gen code to push left operand */ + emitRM("ST",ac,tmpOffset--,mp,"op: push left"); + /* gen code for ac = right operand */ + cGen(p2); + /* now load left operand */ + emitRM("LD",ac1,++tmpOffset,mp,"op: load left"); + switch (tree->attr.op) { + case PLUS : + emitRO("ADD",ac,ac1,ac,"op +"); + break; + case MINUS : + emitRO("SUB",ac,ac1,ac,"op -"); + break; + case TIMES : + emitRO("MUL",ac,ac1,ac,"op *"); + break; + case OVER : + emitRO("DIV",ac,ac1,ac,"op /"); + break; + case LT : + emitRO("SUB",ac,ac1,ac,"op <") ; + emitRM("JLT",ac,2,pc,"br if true") ; + emitRM("LDC",ac,0,ac,"false case") ; + emitRM("LDA",pc,1,pc,"unconditional jmp") ; + emitRM("LDC",ac,1,ac,"true case") ; + break; + case EQ : + emitRO("SUB",ac,ac1,ac,"op ==") ; + emitRM("JEQ",ac,2,pc,"br if true"); + emitRM("LDC",ac,0,ac,"false case") ; + emitRM("LDA",pc,1,pc,"unconditional jmp") ; + emitRM("LDC",ac,1,ac,"true case") ; + break; + default: + emitComment("BUG: Unknown operator"); + break; + } /* case op */ + if (TraceCode) emitComment("<- Op") ; + break; /* OpK */ + + default: + break; + } +} /* genExp */ + +/* Procedure cGen recursively generates code by + * tree traversal + */ +static void cGen( TreeNode * tree) +{ if (tree != NULL) + { switch (tree->nodekind) { + case StmtK: + genStmt(tree); + break; + case ExpK: + genExp(tree); + break; + default: + break; + } + cGen(tree->sibling); + } +} + +/**********************************************/ +/* the primary function of the code generator */ +/**********************************************/ +/* Procedure codeGen generates code to a code + * file by traversal of the syntax tree. The + * second parameter (codefile) is the file name + * of the code file, and is used to print the + * file name as a comment in the code file + */ +void codeGen(TreeNode * syntaxTree, char * codefile) +{ char * s = malloc(strlen(codefile)+7); + strcpy(s,"File: "); + strcat(s,codefile); + emitComment("TINY Compilation to TM Code"); + emitComment(s); + /* generate standard prelude */ + emitComment("Standard prelude:"); + emitRM("LD",mp,0,ac,"load maxaddress from location 0"); + emitRM("ST",ac,0,ac,"clear location 0"); + emitComment("End of standard prelude."); + /* generate code for TINY program */ + cGen(syntaxTree); + /* finish */ + emitComment("End of execution."); + emitRO("HALT",0,0,0,""); +} diff --git a/src/cgen.h b/src/cgen.h new file mode 100644 index 0000000..c20cb2b --- /dev/null +++ b/src/cgen.h @@ -0,0 +1,19 @@ +/****************************************************/ +/* File: cgen.h */ +/* The code generator interface to the TINY compiler*/ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#ifndef _CGEN_H_ +#define _CGEN_H_ + +/* Procedure codeGen generates code to a code + * file by traversal of the syntax tree. The + * second parameter (codefile) is the file name + * of the code file, and is used to print the + * file name as a comment in the code file + */ +void codeGen(TreeNode * syntaxTree, char * codefile); + +#endif diff --git a/src/code.c b/src/code.c new file mode 100644 index 0000000..743c188 --- /dev/null +++ b/src/code.c @@ -0,0 +1,97 @@ +/****************************************************/ +/* File: code.c */ +/* TM Code emitting utilities */ +/* implementation for the TINY compiler */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#include "globals.h" +#include "code.h" + +/* TM location number for current instruction emission */ +static int emitLoc = 0 ; + +/* Highest TM location emitted so far + For use in conjunction with emitSkip, + emitBackup, and emitRestore */ +static int highEmitLoc = 0; + +/* Procedure emitComment prints a comment line + * with comment c in the code file + */ +void emitComment( char * c ) +{ if (TraceCode) fprintf(code,"* %s\n",c);} + +/* Procedure emitRO emits a register-only + * TM instruction + * op = the opcode + * r = target register + * s = 1st source register + * t = 2nd source register + * c = a comment to be printed if TraceCode is TRUE + */ +void emitRO( char *op, int r, int s, int t, char *c) +{ fprintf(code,"%3d: %5s %d,%d,%d ",emitLoc++,op,r,s,t); + if (TraceCode) fprintf(code,"\t%s",c) ; + fprintf(code,"\n") ; + if (highEmitLoc < emitLoc) highEmitLoc = emitLoc ; +} /* emitRO */ + +/* Procedure emitRM emits a register-to-memory + * TM instruction + * op = the opcode + * r = target register + * d = the offset + * s = the base register + * c = a comment to be printed if TraceCode is TRUE + */ +void emitRM( char * op, int r, int d, int s, char *c) +{ fprintf(code,"%3d: %5s %d,%d(%d) ",emitLoc++,op,r,d,s); + if (TraceCode) fprintf(code,"\t%s",c) ; + fprintf(code,"\n") ; + if (highEmitLoc < emitLoc) highEmitLoc = emitLoc ; +} /* emitRM */ + +/* Function emitSkip skips "howMany" code + * locations for later backpatch. It also + * returns the current code position + */ +int emitSkip( int howMany) +{ int i = emitLoc; + emitLoc += howMany ; + if (highEmitLoc < emitLoc) highEmitLoc = emitLoc ; + return i; +} /* emitSkip */ + +/* Procedure emitBackup backs up to + * loc = a previously skipped location + */ +void emitBackup( int loc) +{ if (loc > highEmitLoc) emitComment("BUG in emitBackup"); + emitLoc = loc ; +} /* emitBackup */ + +/* Procedure emitRestore restores the current + * code position to the highest previously + * unemitted position + */ +void emitRestore(void) +{ emitLoc = highEmitLoc;} + +/* Procedure emitRM_Abs converts an absolute reference + * to a pc-relative reference when emitting a + * register-to-memory TM instruction + * op = the opcode + * r = target register + * a = the absolute location in memory + * c = a comment to be printed if TraceCode is TRUE + */ +void emitRM_Abs( char *op, int r, int a, char * c) +{ fprintf(code,"%3d: %5s %d,%d(%d) ", + emitLoc,op,r,a-(emitLoc+1),pc); + ++emitLoc ; + if (TraceCode) fprintf(code,"\t%s",c) ; + fprintf(code,"\n") ; + if (highEmitLoc < emitLoc) highEmitLoc = emitLoc ; +} /* emitRM_Abs */ diff --git a/src/code.h b/src/code.h new file mode 100644 index 0000000..9778619 --- /dev/null +++ b/src/code.h @@ -0,0 +1,86 @@ +/****************************************************/ +/* File: code.h */ +/* Code emitting utilities for the TINY compiler */ +/* and interface to the TM machine */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#ifndef _CODE_H_ +#define _CODE_H_ + +/* pc = program counter */ +#define pc 7 + +/* mp = "memory pointer" points + * to top of memory (for temp storage) + */ +#define mp 6 + +/* gp = "global pointer" points + * to bottom of memory for (global) + * variable storage + */ +#define gp 5 + +/* accumulator */ +#define ac 0 + +/* 2nd accumulator */ +#define ac1 1 + +/* code emitting utilities */ + +/* Procedure emitComment prints a comment line + * with comment c in the code file + */ +void emitComment( char * c ); + +/* Procedure emitRO emits a register-only + * TM instruction + * op = the opcode + * r = target register + * s = 1st source register + * t = 2nd source register + * c = a comment to be printed if TraceCode is TRUE + */ +void emitRO( char *op, int r, int s, int t, char *c); + +/* Procedure emitRM emits a register-to-memory + * TM instruction + * op = the opcode + * r = target register + * d = the offset + * s = the base register + * c = a comment to be printed if TraceCode is TRUE + */ +void emitRM( char * op, int r, int d, int s, char *c); + +/* Function emitSkip skips "howMany" code + * locations for later backpatch. It also + * returns the current code position + */ +int emitSkip( int howMany); + +/* Procedure emitBackup backs up to + * loc = a previously skipped location + */ +void emitBackup( int loc); + +/* Procedure emitRestore restores the current + * code position to the highest previously + * unemitted position + */ +void emitRestore(void); + +/* Procedure emitRM_Abs converts an absolute reference + * to a pc-relative reference when emitting a + * register-to-memory TM instruction + * op = the opcode + * r = target register + * a = the absolute location in memory + * c = a comment to be printed if TraceCode is TRUE + */ +void emitRM_Abs( char *op, int r, int a, char * c); + +#endif diff --git a/src/globals.h b/src/globals.h new file mode 100644 index 0000000..d3407b3 --- /dev/null +++ b/src/globals.h @@ -0,0 +1,104 @@ +/****************************************************/ +/* File: globals.h */ +/* Global types and vars for TINY compiler */ +/* must come before other include files */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#ifndef _GLOBALS_H_ +#define _GLOBALS_H_ + +#include +#include +#include +#include + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +/* MAXRESERVED = the number of reserved words */ +#define MAXRESERVED 8 + +typedef enum + /* book-keeping tokens */ + {ENDFILE,ERROR, + /* reserved words */ + IF,THEN,ELSE,END,REPEAT,UNTIL,READ,WRITE, + /* multicharacter tokens */ + ID,NUM, + /* special symbols */ + ASSIGN,EQ,LT,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,SEMI + } TokenType; + +extern FILE* source; /* source code text file */ +extern FILE* listing; /* listing output text file */ +extern FILE* code; /* code text file for TM simulator */ + +extern int lineno; /* source line number for listing */ + +/**************************************************/ +/*********** Syntax tree for parsing ************/ +/**************************************************/ + +typedef enum {StmtK,ExpK} NodeKind; +typedef enum {IfK,RepeatK,AssignK,ReadK,WriteK} StmtKind; +typedef enum {OpK,ConstK,IdK} ExpKind; + +/* ExpType is used for type checking */ +typedef enum {Void,Integer,Boolean} ExpType; + +#define MAXCHILDREN 3 + +typedef struct treeNode + { struct treeNode * child[MAXCHILDREN]; + struct treeNode * sibling; + int lineno; + NodeKind nodekind; + union { StmtKind stmt; ExpKind exp;} kind; + union { TokenType op; + int val; + char * name; } attr; + ExpType type; /* for type checking of exps */ + } TreeNode; + +/**************************************************/ +/*********** Flags for tracing ************/ +/**************************************************/ + +/* EchoSource = TRUE causes the source program to + * be echoed to the listing file with line numbers + * during parsing + */ +extern int EchoSource; + +/* TraceScan = TRUE causes token information to be + * printed to the listing file as each token is + * recognized by the scanner + */ +extern int TraceScan; + +/* TraceParse = TRUE causes the syntax tree to be + * printed to the listing file in linearized form + * (using indents for children) + */ +extern int TraceParse; + +/* TraceAnalyze = TRUE causes symbol table inserts + * and lookups to be reported to the listing file + */ +extern int TraceAnalyze; + +/* TraceCode = TRUE causes comments to be written + * to the TM code file as code is generated + */ +extern int TraceCode; + +/* Error = TRUE prevents further passes if an error occurs */ +extern int Error; +#endif diff --git a/src/lex/cminus.l b/src/lex/cminus.l new file mode 100644 index 0000000..aa96caa --- /dev/null +++ b/src/lex/cminus.l @@ -0,0 +1,75 @@ +/****************************************************/ +/* File: tiny.l */ +/* Lex specification for TINY */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +%{ +#include "globals.h" +#include "util.h" +#include "scan.h" +/* lexeme of identifier or reserved word */ +char tokenString[MAXTOKENLEN+1]; +%} + +digit [0-9] +number {digit}+ +letter [a-zA-Z] +identifier {letter}+ +newline \n +whitespace [ \t]+ + +%% + +"if" {return IF;} +"then" {return THEN;} +"else" {return ELSE;} +"end" {return END;} +"repeat" {return REPEAT;} +"until" {return UNTIL;} +"read" {return READ;} +"write" {return WRITE;} +":=" {return ASSIGN;} +"=" {return EQ;} +"<" {return LT;} +"+" {return PLUS;} +"-" {return MINUS;} +"*" {return TIMES;} +"/" {return OVER;} +"(" {return LPAREN;} +")" {return RPAREN;} +";" {return SEMI;} +{number} {return NUM;} +{identifier} {return ID;} +{newline} {lineno++;} +{whitespace} {/* skip whitespace */} +"{" { char c; + do + { c = input(); + if (c == EOF) break; + if (c == '\n') lineno++; + } while (c != '}'); + } +. {return ERROR;} + +%% + +TokenType getToken(void) +{ static int firstTime = TRUE; + TokenType currentToken; + if (firstTime) + { firstTime = FALSE; + lineno++; + yyin = source; + yyout = listing; + } + currentToken = yylex(); + strncpy(tokenString,yytext,MAXTOKENLEN); + if (TraceScan) { + fprintf(listing,"\t%d: ",lineno); + printToken(currentToken,tokenString); + } + return currentToken; +} + diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..680cc7b --- /dev/null +++ b/src/main.c @@ -0,0 +1,102 @@ +/****************************************************/ +/* File: main.c */ +/* Main program for TINY compiler */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#include "globals.h" + +/* set NO_PARSE to TRUE to get a scanner-only compiler */ +#define NO_PARSE FALSE +/* set NO_ANALYZE to TRUE to get a parser-only compiler */ +#define NO_ANALYZE FALSE + +/* set NO_CODE to TRUE to get a compiler that does not + * generate code + */ +#define NO_CODE FALSE + +#include "util.h" +#if NO_PARSE +#include "scan.h" +#else +#include "parse.h" +#if !NO_ANALYZE +#include "analyze.h" +#if !NO_CODE +#include "cgen.h" +#endif +#endif +#endif + +/* allocate global variables */ +int lineno = 0; +FILE * source; +FILE * listing; +FILE * code; + +/* allocate and set tracing flags */ +int EchoSource = FALSE; +int TraceScan = FALSE; +int TraceParse = FALSE; +int TraceAnalyze = FALSE; +int TraceCode = FALSE; + +int Error = FALSE; + +main( int argc, char * argv[] ) +{ TreeNode * syntaxTree; + char pgm[120]; /* source code file name */ + if (argc != 2) + { fprintf(stderr,"usage: %s \n",argv[0]); + exit(1); + } + strcpy(pgm,argv[1]) ; + if (strchr (pgm, '.') == NULL) + strcat(pgm,".tny"); + source = fopen(pgm,"r"); + if (source==NULL) + { fprintf(stderr,"File %s not found\n",pgm); + exit(1); + } + listing = stdout; /* send listing to screen */ + fprintf(listing,"\nTINY COMPILATION: %s\n",pgm); +#if NO_PARSE + while (getToken()!=ENDFILE); +#else + syntaxTree = parse(); + if (TraceParse) { + fprintf(listing,"\nSyntax tree:\n"); + printTree(syntaxTree); + } +#if !NO_ANALYZE + if (! Error) + { if (TraceAnalyze) fprintf(listing,"\nBuilding Symbol Table...\n"); + buildSymtab(syntaxTree); + if (TraceAnalyze) fprintf(listing,"\nChecking Types...\n"); + typeCheck(syntaxTree); + if (TraceAnalyze) fprintf(listing,"\nType Checking Finished\n"); + } +#if !NO_CODE + if (! Error) + { char * codefile; + int fnlen = strcspn(pgm,"."); + codefile = (char *) calloc(fnlen+4, sizeof(char)); + strncpy(codefile,pgm,fnlen); + strcat(codefile,".tm"); + code = fopen(codefile,"w"); + if (code == NULL) + { printf("Unable to open %s\n",codefile); + exit(1); + } + codeGen(syntaxTree,codefile); + fclose(code); + } +#endif +#endif +#endif + fclose(source); + return 0; +} + diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..3ae324b --- /dev/null +++ b/src/parse.c @@ -0,0 +1,215 @@ +/****************************************************/ +/* File: parse.c */ +/* The parser implementation for the TINY compiler */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#include "globals.h" +#include "util.h" +#include "scan.h" +#include "parse.h" + +static TokenType token; /* holds current token */ + +/* function prototypes for recursive calls */ +static TreeNode * stmt_sequence(void); +static TreeNode * statement(void); +static TreeNode * if_stmt(void); +static TreeNode * repeat_stmt(void); +static TreeNode * assign_stmt(void); +static TreeNode * read_stmt(void); +static TreeNode * write_stmt(void); +static TreeNode * exp(void); +static TreeNode * simple_exp(void); +static TreeNode * term(void); +static TreeNode * factor(void); + +static void syntaxError(char * message) +{ fprintf(listing,"\n>>> "); + fprintf(listing,"Syntax error at line %d: %s",lineno,message); + Error = TRUE; +} + +static void match(TokenType expected) +{ if (token == expected) token = getToken(); + else { + syntaxError("unexpected token -> "); + printToken(token,tokenString); + fprintf(listing," "); + } +} + +TreeNode * stmt_sequence(void) +{ TreeNode * t = statement(); + TreeNode * p = t; + while ((token!=ENDFILE) && (token!=END) && + (token!=ELSE) && (token!=UNTIL)) + { TreeNode * q; + match(SEMI); + q = statement(); + if (q!=NULL) { + if (t==NULL) t = p = q; + else /* now p cannot be NULL either */ + { p->sibling = q; + p = q; + } + } + } + return t; +} + +TreeNode * statement(void) +{ TreeNode * t = NULL; + switch (token) { + case IF : t = if_stmt(); break; + case REPEAT : t = repeat_stmt(); break; + case ID : t = assign_stmt(); break; + case READ : t = read_stmt(); break; + case WRITE : t = write_stmt(); break; + default : syntaxError("unexpected token -> "); + printToken(token,tokenString); + token = getToken(); + break; + } /* end case */ + return t; +} + +TreeNode * if_stmt(void) +{ TreeNode * t = newStmtNode(IfK); + match(IF); + if (t!=NULL) t->child[0] = exp(); + match(THEN); + if (t!=NULL) t->child[1] = stmt_sequence(); + if (token==ELSE) { + match(ELSE); + if (t!=NULL) t->child[2] = stmt_sequence(); + } + match(END); + return t; +} + +TreeNode * repeat_stmt(void) +{ TreeNode * t = newStmtNode(RepeatK); + match(REPEAT); + if (t!=NULL) t->child[0] = stmt_sequence(); + match(UNTIL); + if (t!=NULL) t->child[1] = exp(); + return t; +} + +TreeNode * assign_stmt(void) +{ TreeNode * t = newStmtNode(AssignK); + if ((t!=NULL) && (token==ID)) + t->attr.name = copyString(tokenString); + match(ID); + match(ASSIGN); + if (t!=NULL) t->child[0] = exp(); + return t; +} + +TreeNode * read_stmt(void) +{ TreeNode * t = newStmtNode(ReadK); + match(READ); + if ((t!=NULL) && (token==ID)) + t->attr.name = copyString(tokenString); + match(ID); + return t; +} + +TreeNode * write_stmt(void) +{ TreeNode * t = newStmtNode(WriteK); + match(WRITE); + if (t!=NULL) t->child[0] = exp(); + return t; +} + +TreeNode * exp(void) +{ TreeNode * t = simple_exp(); + if ((token==LT)||(token==EQ)) { + TreeNode * p = newExpNode(OpK); + if (p!=NULL) { + p->child[0] = t; + p->attr.op = token; + t = p; + } + match(token); + if (t!=NULL) + t->child[1] = simple_exp(); + } + return t; +} + +TreeNode * simple_exp(void) +{ TreeNode * t = term(); + while ((token==PLUS)||(token==MINUS)) + { TreeNode * p = newExpNode(OpK); + if (p!=NULL) { + p->child[0] = t; + p->attr.op = token; + t = p; + match(token); + t->child[1] = term(); + } + } + return t; +} + +TreeNode * term(void) +{ TreeNode * t = factor(); + while ((token==TIMES)||(token==OVER)) + { TreeNode * p = newExpNode(OpK); + if (p!=NULL) { + p->child[0] = t; + p->attr.op = token; + t = p; + match(token); + p->child[1] = factor(); + } + } + return t; +} + +TreeNode * factor(void) +{ TreeNode * t = NULL; + switch (token) { + case NUM : + t = newExpNode(ConstK); + if ((t!=NULL) && (token==NUM)) + t->attr.val = atoi(tokenString); + match(NUM); + break; + case ID : + t = newExpNode(IdK); + if ((t!=NULL) && (token==ID)) + t->attr.name = copyString(tokenString); + match(ID); + break; + case LPAREN : + match(LPAREN); + t = exp(); + match(RPAREN); + break; + default: + syntaxError("unexpected token -> "); + printToken(token,tokenString); + token = getToken(); + break; + } + return t; +} + +/****************************************/ +/* the primary function of the parser */ +/****************************************/ +/* Function parse returns the newly + * constructed syntax tree + */ +TreeNode * parse(void) +{ TreeNode * t; + token = getToken(); + t = stmt_sequence(); + if (token!=ENDFILE) + syntaxError("Code ends before file\n"); + return t; +} diff --git a/src/parse.h b/src/parse.h new file mode 100644 index 0000000..a8ca95b --- /dev/null +++ b/src/parse.h @@ -0,0 +1,16 @@ +/****************************************************/ +/* File: parse.h */ +/* The parser interface for the TINY compiler */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#ifndef _PARSE_H_ +#define _PARSE_H_ + +/* Function parse returns the newly + * constructed syntax tree + */ +TreeNode * parse(void); + +#endif diff --git a/src/scan.c b/src/scan.c new file mode 100644 index 0000000..a9cfdde --- /dev/null +++ b/src/scan.c @@ -0,0 +1,203 @@ +/****************************************************/ +/* File: scan.c */ +/* The scanner implementation for the TINY compiler */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#include "globals.h" +#include "util.h" +#include "scan.h" + +/* states in scanner DFA */ +typedef enum + { START,INASSIGN,INCOMMENT,INNUM,INID,DONE } + StateType; + +/* lexeme of identifier or reserved word */ +char tokenString[MAXTOKENLEN+1]; + +/* BUFLEN = length of the input buffer for + source code lines */ +#define BUFLEN 256 + +static char lineBuf[BUFLEN]; /* holds the current line */ +static int linepos = 0; /* current position in LineBuf */ +static int bufsize = 0; /* current size of buffer string */ +static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */ + +/* getNextChar fetches the next non-blank character + from lineBuf, reading in a new line if lineBuf is + exhausted */ +static int getNextChar(void) +{ if (!(linepos < bufsize)) + { lineno++; + if (fgets(lineBuf,BUFLEN-1,source)) + { if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf); + bufsize = strlen(lineBuf); + linepos = 0; + return lineBuf[linepos++]; + } + else + { EOF_flag = TRUE; + return EOF; + } + } + else return lineBuf[linepos++]; +} + +/* ungetNextChar backtracks one character + in lineBuf */ +static void ungetNextChar(void) +{ if (!EOF_flag) linepos-- ;} + +/* lookup table of reserved words */ +static struct + { char* str; + TokenType tok; + } reservedWords[MAXRESERVED] + = {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END}, + {"repeat",REPEAT},{"until",UNTIL},{"read",READ}, + {"write",WRITE}}; + +/* lookup an identifier to see if it is a reserved word */ +/* uses linear search */ +static TokenType reservedLookup (char * s) +{ int i; + for (i=0;i +#include +#include +#include "symtab.h" + +/* SIZE is the size of the hash table */ +#define SIZE 211 + +/* SHIFT is the power of two used as multiplier + in hash function */ +#define SHIFT 4 + +/* the hash function */ +static int hash ( char * key ) +{ int temp = 0; + int i = 0; + while (key[i] != '\0') + { temp = ((temp << SHIFT) + key[i]) % SIZE; + ++i; + } + return temp; +} + +/* the list of line numbers of the source + * code in which a variable is referenced + */ +typedef struct LineListRec + { int lineno; + struct LineListRec * next; + } * LineList; + +/* The record in the bucket lists for + * each variable, including name, + * assigned memory location, and + * the list of line numbers in which + * it appears in the source code + */ +typedef struct BucketListRec + { char * name; + LineList lines; + int memloc ; /* memory location for variable */ + struct BucketListRec * next; + } * BucketList; + +/* the hash table */ +static BucketList hashTable[SIZE]; + +/* Procedure st_insert inserts line numbers and + * memory locations into the symbol table + * loc = memory location is inserted only the + * first time, otherwise ignored + */ +void st_insert( char * name, int lineno, int loc ) +{ int h = hash(name); + BucketList l = hashTable[h]; + while ((l != NULL) && (strcmp(name,l->name) != 0)) + l = l->next; + if (l == NULL) /* variable not yet in table */ + { l = (BucketList) malloc(sizeof(struct BucketListRec)); + l->name = name; + l->lines = (LineList) malloc(sizeof(struct LineListRec)); + l->lines->lineno = lineno; + l->memloc = loc; + l->lines->next = NULL; + l->next = hashTable[h]; + hashTable[h] = l; } + else /* found in table, so just add line number */ + { LineList t = l->lines; + while (t->next != NULL) t = t->next; + t->next = (LineList) malloc(sizeof(struct LineListRec)); + t->next->lineno = lineno; + t->next->next = NULL; + } +} /* st_insert */ + +/* Function st_lookup returns the memory + * location of a variable or -1 if not found + */ +int st_lookup ( char * name ) +{ int h = hash(name); + BucketList l = hashTable[h]; + while ((l != NULL) && (strcmp(name,l->name) != 0)) + l = l->next; + if (l == NULL) return -1; + else return l->memloc; +} + +/* Procedure printSymTab prints a formatted + * listing of the symbol table contents + * to the listing file + */ +void printSymTab(FILE * listing) +{ int i; + fprintf(listing,"Variable Name Location Line Numbers\n"); + fprintf(listing,"------------- -------- ------------\n"); + for (i=0;ilines; + fprintf(listing,"%-14s ",l->name); + fprintf(listing,"%-8d ",l->memloc); + while (t != NULL) + { fprintf(listing,"%4d ",t->lineno); + t = t->next; + } + fprintf(listing,"\n"); + l = l->next; + } + } + } +} /* printSymTab */ diff --git a/src/symtab.h b/src/symtab.h new file mode 100644 index 0000000..bf30bbf --- /dev/null +++ b/src/symtab.h @@ -0,0 +1,32 @@ +/****************************************************/ +/* File: symtab.h */ +/* Symbol table interface for the TINY compiler */ +/* (allows only one symbol table) */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#ifndef _SYMTAB_H_ +#define _SYMTAB_H_ + +#include "globals.h" + +/* Procedure st_insert inserts line numbers and + * memory locations into the symbol table + * loc = memory location is inserted only the + * first time, otherwise ignored + */ +void st_insert( char * name, int lineno, int loc ); + +/* Function st_lookup returns the memory + * location of a variable or -1 if not found + */ +int st_lookup ( char * name ); + +/* Procedure printSymTab prints a formatted + * listing of the symbol table contents + * to the listing file + */ +void printSymTab(FILE * listing); + +#endif diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..cc45fcb --- /dev/null +++ b/src/util.c @@ -0,0 +1,177 @@ +/****************************************************/ +/* File: util.c */ +/* Utility function implementation */ +/* for the TINY compiler */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#include "globals.h" +#include "util.h" + +/* Procedure printToken prints a token + * and its lexeme to the listing file + */ +void printToken( TokenType token, const char* tokenString ) +{ switch (token) + { case IF: + case THEN: + case ELSE: + case END: + case REPEAT: + case UNTIL: + case READ: + case WRITE: + fprintf(listing, + "reserved word: %s\n",tokenString); + break; + case ASSIGN: fprintf(listing,":=\n"); break; + case LT: fprintf(listing,"<\n"); break; + case EQ: fprintf(listing,"=\n"); break; + case LPAREN: fprintf(listing,"(\n"); break; + case RPAREN: fprintf(listing,")\n"); break; + case SEMI: fprintf(listing,";\n"); break; + case PLUS: fprintf(listing,"+\n"); break; + case MINUS: fprintf(listing,"-\n"); break; + case TIMES: fprintf(listing,"*\n"); break; + case OVER: fprintf(listing,"/\n"); break; + case ENDFILE: fprintf(listing,"EOF\n"); break; + case NUM: + fprintf(listing, + "NUM, val= %s\n",tokenString); + break; + case ID: + fprintf(listing, + "ID, name= %s\n",tokenString); + break; + case ERROR: + fprintf(listing, + "ERROR: %s\n",tokenString); + break; + default: /* should never happen */ + fprintf(listing,"Unknown token: %d\n",token); + } +} + +/* Function newStmtNode creates a new statement + * node for syntax tree construction + */ +TreeNode * newStmtNode(StmtKind kind) +{ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode)); + int i; + if (t==NULL) + fprintf(listing,"Out of memory error at line %d\n",lineno); + else { + for (i=0;ichild[i] = NULL; + t->sibling = NULL; + t->nodekind = StmtK; + t->kind.stmt = kind; + t->lineno = lineno; + } + return t; +} + +/* Function newExpNode creates a new expression + * node for syntax tree construction + */ +TreeNode * newExpNode(ExpKind kind) +{ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode)); + int i; + if (t==NULL) + fprintf(listing,"Out of memory error at line %d\n",lineno); + else { + for (i=0;ichild[i] = NULL; + t->sibling = NULL; + t->nodekind = ExpK; + t->kind.exp = kind; + t->lineno = lineno; + t->type = Void; + } + return t; +} + +/* Function copyString allocates and makes a new + * copy of an existing string + */ +char * copyString(char * s) +{ int n; + char * t; + if (s==NULL) return NULL; + n = strlen(s)+1; + t = malloc(n); + if (t==NULL) + fprintf(listing,"Out of memory error at line %d\n",lineno); + else strcpy(t,s); + return t; +} + +/* Variable indentno is used by printTree to + * store current number of spaces to indent + */ +static indentno = 0; + +/* macros to increase/decrease indentation */ +#define INDENT indentno+=2 +#define UNINDENT indentno-=2 + +/* printSpaces indents by printing spaces */ +static void printSpaces(void) +{ int i; + for (i=0;inodekind==StmtK) + { switch (tree->kind.stmt) { + case IfK: + fprintf(listing,"If\n"); + break; + case RepeatK: + fprintf(listing,"Repeat\n"); + break; + case AssignK: + fprintf(listing,"Assign to: %s\n",tree->attr.name); + break; + case ReadK: + fprintf(listing,"Read: %s\n",tree->attr.name); + break; + case WriteK: + fprintf(listing,"Write\n"); + break; + default: + fprintf(listing,"Unknown ExpNode kind\n"); + break; + } + } + else if (tree->nodekind==ExpK) + { switch (tree->kind.exp) { + case OpK: + fprintf(listing,"Op: "); + printToken(tree->attr.op,"\0"); + break; + case ConstK: + fprintf(listing,"Const: %d\n",tree->attr.val); + break; + case IdK: + fprintf(listing,"Id: %s\n",tree->attr.name); + break; + default: + fprintf(listing,"Unknown ExpNode kind\n"); + break; + } + } + else fprintf(listing,"Unknown node kind\n"); + for (i=0;ichild[i]); + tree = tree->sibling; + } + UNINDENT; +} diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..36cfb83 --- /dev/null +++ b/src/util.h @@ -0,0 +1,38 @@ +/****************************************************/ +/* File: util.h */ +/* Utility functions for the TINY compiler */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#ifndef _UTIL_H_ +#define _UTIL_H_ + +#include "globals.h" + +/* Procedure printToken prints a token + * and its lexeme to the listing file + */ +void printToken( TokenType, const char* ); + +/* Function newStmtNode creates a new statement + * node for syntax tree construction + */ +TreeNode * newStmtNode(StmtKind); + +/* Function newExpNode creates a new expression + * node for syntax tree construction + */ +TreeNode * newExpNode(ExpKind); + +/* Function copyString allocates and makes a new + * copy of an existing string + */ +char * copyString( char * ); + +/* procedure printTree prints a syntax tree to the + * listing file using indentation to indicate subtrees + */ +void printTree( TreeNode * ); + +#endif diff --git a/src/yacc/globals.h b/src/yacc/globals.h new file mode 100644 index 0000000..cdd41c7 --- /dev/null +++ b/src/yacc/globals.h @@ -0,0 +1,120 @@ +/****************************************************/ +/* File: globals.h */ +/* Yacc/Bison Version */ +/* Global types and vars for TINY compiler */ +/* must come before other include files */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ + +#ifndef _GLOBALS_H_ +#define _GLOBALS_H_ + +#include +#include +#include +#include + +/* Yacc/Bison generates internally its own values + * for the tokens. Other files can access these values + * by including the tab.h file generated using the + * Yacc/Bison option -d ("generate header") + * + * The YYPARSER flag prevents inclusion of the tab.h + * into the Yacc/Bison output itself + */ + +#ifndef YYPARSER + +/* the name of the following file may change */ +#include "y.tab.h" + +/* ENDFILE is implicitly defined by Yacc/Bison, + * and not included in the tab.h file + */ +#define ENDFILE 0 + +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +/* MAXRESERVED = the number of reserved words */ +#define MAXRESERVED 8 + +/* Yacc/Bison generates its own integer values + * for tokens + */ +typedef int TokenType; + +extern FILE* source; /* source code text file */ +extern FILE* listing; /* listing output text file */ +extern FILE* code; /* code text file for TM simulator */ + +extern int lineno; /* source line number for listing */ + +/**************************************************/ +/*********** Syntax tree for parsing ************/ +/**************************************************/ + +typedef enum {StmtK,ExpK} NodeKind; +typedef enum {IfK,RepeatK,AssignK,ReadK,WriteK} StmtKind; +typedef enum {OpK,ConstK,IdK} ExpKind; + +/* ExpType is used for type checking */ +typedef enum {Void,Integer,Boolean} ExpType; + +#define MAXCHILDREN 3 + +typedef struct treeNode + { struct treeNode * child[MAXCHILDREN]; + struct treeNode * sibling; + int lineno; + NodeKind nodekind; + union { StmtKind stmt; ExpKind exp;} kind; + union { TokenType op; + int val; + char * name; } attr; + ExpType type; /* for type checking of exps */ + } TreeNode; + +/**************************************************/ +/*********** Flags for tracing ************/ +/**************************************************/ + +/* EchoSource = TRUE causes the source program to + * be echoed to the listing file with line numbers + * during parsing + */ +extern int EchoSource; + +/* TraceScan = TRUE causes token information to be + * printed to the listing file as each token is + * recognized by the scanner + */ +extern int TraceScan; + +/* TraceParse = TRUE causes the syntax tree to be + * printed to the listing file in linearized form + * (using indents for children) + */ +extern int TraceParse; + +/* TraceAnalyze = TRUE causes symbol table inserts + * and lookups to be reported to the listing file + */ +extern int TraceAnalyze; + +/* TraceCode = TRUE causes comments to be written + * to the TM code file as code is generated + */ +extern int TraceCode; + +/* Error = TRUE prevents further passes if an error occurs */ +extern int Error; +#endif diff --git a/src/yacc/tiny.y b/src/yacc/tiny.y new file mode 100644 index 0000000..b1d6bc0 --- /dev/null +++ b/src/yacc/tiny.y @@ -0,0 +1,164 @@ +/****************************************************/ +/* File: tiny.y */ +/* The TINY Yacc/Bison specification file */ +/* Compiler Construction: Principles and Practice */ +/* Kenneth C. Louden */ +/****************************************************/ +%{ +#define YYPARSER /* distinguishes Yacc output from other code files */ + +#include "globals.h" +#include "util.h" +#include "scan.h" +#include "parse.h" + +#define YYSTYPE TreeNode * +static char * savedName; /* for use in assignments */ +static int savedLineNo; /* ditto */ +static TreeNode * savedTree; /* stores syntax tree for later return */ +static int yylex(void); // added 11/2/11 to ensure no conflict with lex + +%} + +%token IF THEN ELSE END REPEAT UNTIL READ WRITE +%token ID NUM +%token ASSIGN EQ LT PLUS MINUS TIMES OVER LPAREN RPAREN SEMI +%token ERROR + +%% /* Grammar for TINY */ + +program : stmt_seq + { savedTree = $1;} + ; +stmt_seq : stmt_seq SEMI stmt + { YYSTYPE t = $1; + if (t != NULL) + { while (t->sibling != NULL) + t = t->sibling; + t->sibling = $3; + $$ = $1; } + else $$ = $3; + } + | stmt { $$ = $1; } + ; +stmt : if_stmt { $$ = $1; } + | repeat_stmt { $$ = $1; } + | assign_stmt { $$ = $1; } + | read_stmt { $$ = $1; } + | write_stmt { $$ = $1; } + | error { $$ = NULL; } + ; +if_stmt : IF exp THEN stmt_seq END + { $$ = newStmtNode(IfK); + $$->child[0] = $2; + $$->child[1] = $4; + } + | IF exp THEN stmt_seq ELSE stmt_seq END + { $$ = newStmtNode(IfK); + $$->child[0] = $2; + $$->child[1] = $4; + $$->child[2] = $6; + } + ; +repeat_stmt : REPEAT stmt_seq UNTIL exp + { $$ = newStmtNode(RepeatK); + $$->child[0] = $2; + $$->child[1] = $4; + } + ; +assign_stmt : ID { savedName = copyString(tokenString); + savedLineNo = lineno; } + ASSIGN exp + { $$ = newStmtNode(AssignK); + $$->child[0] = $4; + $$->attr.name = savedName; + $$->lineno = savedLineNo; + } + ; +read_stmt : READ ID + { $$ = newStmtNode(ReadK); + $$->attr.name = + copyString(tokenString); + } + ; +write_stmt : WRITE exp + { $$ = newStmtNode(WriteK); + $$->child[0] = $2; + } + ; +exp : simple_exp LT simple_exp + { $$ = newExpNode(OpK); + $$->child[0] = $1; + $$->child[1] = $3; + $$->attr.op = LT; + } + | simple_exp EQ simple_exp + { $$ = newExpNode(OpK); + $$->child[0] = $1; + $$->child[1] = $3; + $$->attr.op = EQ; + } + | simple_exp { $$ = $1; } + ; +simple_exp : simple_exp PLUS term + { $$ = newExpNode(OpK); + $$->child[0] = $1; + $$->child[1] = $3; + $$->attr.op = PLUS; + } + | simple_exp MINUS term + { $$ = newExpNode(OpK); + $$->child[0] = $1; + $$->child[1] = $3; + $$->attr.op = MINUS; + } + | term { $$ = $1; } + ; +term : term TIMES factor + { $$ = newExpNode(OpK); + $$->child[0] = $1; + $$->child[1] = $3; + $$->attr.op = TIMES; + } + | term OVER factor + { $$ = newExpNode(OpK); + $$->child[0] = $1; + $$->child[1] = $3; + $$->attr.op = OVER; + } + | factor { $$ = $1; } + ; +factor : LPAREN exp RPAREN + { $$ = $2; } + | NUM + { $$ = newExpNode(ConstK); + $$->attr.val = atoi(tokenString); + } + | ID { $$ = newExpNode(IdK); + $$->attr.name = + copyString(tokenString); + } + | error { $$ = NULL; } + ; + +%% + +int yyerror(char * message) +{ fprintf(listing,"Syntax error at line %d: %s\n",lineno,message); + fprintf(listing,"Current token: "); + printToken(yychar,tokenString); + Error = TRUE; + return 0; +} + +/* yylex calls getToken to make Yacc/Bison output + * compatible with ealier versions of the TINY scanner + */ +static int yylex(void) +{ return getToken(); } + +TreeNode * parse(void) +{ yyparse(); + return savedTree; +} +