initial commit

This commit is contained in:
2025-09-09 14:12:03 +09:00
commit 28a4827cb1
27 changed files with 2143 additions and 0 deletions

3
.gitattributes vendored Normal file
View File

@@ -0,0 +1,3 @@
[attr]lfs-file filter=lfs diff=lfs merge=lfs -text
*.pdf lfs-file

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
.idea
.vscode

62
1.md Normal file
View File

@@ -0,0 +1,62 @@
컴파일러 1
===
# Interpreter in Modern Processors
# Compiler
* Front-End
보이는 부분(HW에 신경을 안써도 됨)
* Back-End
HW에 밀접한 최적화를 해줌
## General Structure of a Modern Compiler
* Front-End
* Lexial Analysis
* Syntax Analysis
* Semantic Analysis
* Code Generation - 1
* Back-End
* Control/DataFlow Analysis
* Optimization
* Code Generation - 2
이런 모듈식 구조는 다양한 언어와 하드웨어에 쉽게 적용할 수 있도록 도운다.
### Lexical Analysis (Scanner)
프로그램을 `token`의 의미 단위로 나눔.
그리고 의미가 없는 단위를 지움.
보통 FSA로 구현함.
### Syntax Analysis (Parser)
미리 있는 Grammar를 바탕으로 Syntax Correctness를 진행함.
### Semantic Analysis
* identifier 정의 등
* 타입 체크
* 스코프 체크
* 오버로딩 모호화 해소
* IR로 변환
### Optimization
최적화함
* 상수 최적화
* 안쓰는 변수
* loop에서 안바뀌는 변수
* 다시 똑같이 계산하는 변수 제거
* ...
### Code Generation

72
2.md Normal file
View File

@@ -0,0 +1,72 @@
Lexical Analysis
===
포트란은 모든 whitespace를 지움.
```fortran
do 5 I = 1.25
```
```
do 5 I = 1,25
```
## Tokens
대표적인 토큰의 예시
* Identifiers
* Keywords
* Integers
* Floating-points
* Symbols
* Strings
하기 위해서 하는 것
* Specification
확실하게 명세를 해줘야함.
* Recognition
DFA를 이용해서 패턴 매칭
* Automation
RE로 부터 DFA를 generate해야함
Lex라는 툴을 이용
그러나 내부적으로는 Tompson's construction (RE -> NFA), Subset Construction(NFA -> DFA)도 알아야함
## Specification
Regular Expression
Multiple Matches
`elsex = 0`이라는 코드에서
`else / x / = / 0`
또는
`elsex / = 0` 두가지 선택지가 있음. 둘 중 하나를 무조건 골라야함. 이때 가장 긴 토큰이 선택된다.
* `elsex``else`보다 더 길어서 `elsex`가 선택됨.
만약에 두 경우가 모두 똑같다면 토큰 종류의 우선순위에 따라 선택된다.
* `Keyword``Identifier`가 더 높음.
## Recognition
FSA를 이용함.
DFA와 NFA의 표현력은 동일하나 DFA는 편하게 구현할 수 있다는 장점이 있음.
NFA는 RE로부터 쉽게 변환가능하다는 장점이 있음.
**Lexical ANalysis**

18
Dockerfile Normal file
View File

@@ -0,0 +1,18 @@
FROM ubuntu:22.04
ENV DEBIAN_FRONTEND=noninteractive LANG=C.UTF-8 LC_ALL=C.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential gcc g++ make cmake ninja-build \
vim nano sudo \
flex bison libfl-dev \
git ca-certificates pkg-config \
python3 python3-pip \
curl wget \
gdb valgrind \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /work
# Default: interactive shell inside the container
CMD ["bash"]

BIN
pdf/L0.pdf Normal file

Binary file not shown.

BIN
pdf/L1.pdf Normal file

Binary file not shown.

BIN
pdf/L2.pdf Normal file

Binary file not shown.

159
src/analyze.c Normal file
View File

@@ -0,0 +1,159 @@
/****************************************************/
/* File: analyze.c */
/* Semantic analyzer implementation */
/* for the TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#include "globals.h"
#include "symtab.h"
#include "analyze.h"
/* counter for variable memory locations */
static int location = 0;
/* Procedure traverse is a generic recursive
* syntax tree traversal routine:
* it applies preProc in preorder and postProc
* in postorder to tree pointed to by t
*/
static void traverse( TreeNode * t,
void (* preProc) (TreeNode *),
void (* postProc) (TreeNode *) )
{ if (t != NULL)
{ preProc(t);
{ int i;
for (i=0; i < MAXCHILDREN; i++)
traverse(t->child[i],preProc,postProc);
}
postProc(t);
traverse(t->sibling,preProc,postProc);
}
}
/* nullProc is a do-nothing procedure to
* generate preorder-only or postorder-only
* traversals from traverse
*/
static void nullProc(TreeNode * t)
{ if (t==NULL) return;
else return;
}
/* Procedure insertNode inserts
* identifiers stored in t into
* the symbol table
*/
static void insertNode( TreeNode * t)
{ switch (t->nodekind)
{ case StmtK:
switch (t->kind.stmt)
{ case AssignK:
case ReadK:
if (st_lookup(t->attr.name) == -1)
/* not yet in table, so treat as new definition */
st_insert(t->attr.name,t->lineno,location++);
else
/* already in table, so ignore location,
add line number of use only */
st_insert(t->attr.name,t->lineno,0);
break;
default:
break;
}
break;
case ExpK:
switch (t->kind.exp)
{ case IdK:
if (st_lookup(t->attr.name) == -1)
/* not yet in table, so treat as new definition */
st_insert(t->attr.name,t->lineno,location++);
else
/* already in table, so ignore location,
add line number of use only */
st_insert(t->attr.name,t->lineno,0);
break;
default:
break;
}
break;
default:
break;
}
}
/* Function buildSymtab constructs the symbol
* table by preorder traversal of the syntax tree
*/
void buildSymtab(TreeNode * syntaxTree)
{ traverse(syntaxTree,insertNode,nullProc);
if (TraceAnalyze)
{ fprintf(listing,"\nSymbol table:\n\n");
printSymTab(listing);
}
}
static void typeError(TreeNode * t, char * message)
{ fprintf(listing,"Type error at line %d: %s\n",t->lineno,message);
Error = TRUE;
}
/* Procedure checkNode performs
* type checking at a single tree node
*/
static void checkNode(TreeNode * t)
{ switch (t->nodekind)
{ case ExpK:
switch (t->kind.exp)
{ case OpK:
if ((t->child[0]->type != Integer) ||
(t->child[1]->type != Integer))
typeError(t,"Op applied to non-integer");
if ((t->attr.op == EQ) || (t->attr.op == LT))
t->type = Boolean;
else
t->type = Integer;
break;
case ConstK:
case IdK:
t->type = Integer;
break;
default:
break;
}
break;
case StmtK:
switch (t->kind.stmt)
{ case IfK:
if (t->child[0]->type == Integer)
typeError(t->child[0],"if test is not Boolean");
break;
case AssignK:
if (t->child[0]->type != Integer)
typeError(t->child[0],"assignment of non-integer value");
break;
case WriteK:
if (t->child[0]->type != Integer)
typeError(t->child[0],"write of non-integer value");
break;
case RepeatK:
if (t->child[1]->type == Integer)
typeError(t->child[1],"repeat test is not Boolean");
break;
default:
break;
}
break;
default:
break;
}
}
/* Procedure typeCheck performs type checking
* by a postorder syntax tree traversal
*/
void typeCheck(TreeNode * syntaxTree)
{ traverse(syntaxTree,nullProc,checkNode);
}

23
src/analyze.h Normal file
View File

@@ -0,0 +1,23 @@
/****************************************************/
/* File: analyze.h */
/* Semantic analyzer interface for TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#ifndef _ANALYZE_H_
#define _ANALYZE_H_
#include "globals.h"
/* Function buildSymtab constructs the symbol
* table by preorder traversal of the syntax tree
*/
void buildSymtab(TreeNode *);
/* Procedure typeCheck performs type checking
* by a postorder syntax tree traversal
*/
void typeCheck(TreeNode *);
#endif

212
src/cgen.c Normal file
View File

@@ -0,0 +1,212 @@
/****************************************************/
/* File: cgen.c */
/* The code generator implementation */
/* for the TINY compiler */
/* (generates code for the TM machine) */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#include "globals.h"
#include "symtab.h"
#include "code.h"
#include "cgen.h"
/* tmpOffset is the memory offset for temps
It is decremented each time a temp is
stored, and incremeted when loaded again
*/
static int tmpOffset = 0;
/* prototype for internal recursive code generator */
static void cGen (TreeNode * tree);
/* Procedure genStmt generates code at a statement node */
static void genStmt( TreeNode * tree)
{ TreeNode * p1, * p2, * p3;
int savedLoc1,savedLoc2,currentLoc;
int loc;
switch (tree->kind.stmt) {
case IfK :
if (TraceCode) emitComment("-> if") ;
p1 = tree->child[0] ;
p2 = tree->child[1] ;
p3 = tree->child[2] ;
/* generate code for test expression */
cGen(p1);
savedLoc1 = emitSkip(1) ;
emitComment("if: jump to else belongs here");
/* recurse on then part */
cGen(p2);
savedLoc2 = emitSkip(1) ;
emitComment("if: jump to end belongs here");
currentLoc = emitSkip(0) ;
emitBackup(savedLoc1) ;
emitRM_Abs("JEQ",ac,currentLoc,"if: jmp to else");
emitRestore() ;
/* recurse on else part */
cGen(p3);
currentLoc = emitSkip(0) ;
emitBackup(savedLoc2) ;
emitRM_Abs("LDA",pc,currentLoc,"jmp to end") ;
emitRestore() ;
if (TraceCode) emitComment("<- if") ;
break; /* if_k */
case RepeatK:
if (TraceCode) emitComment("-> repeat") ;
p1 = tree->child[0] ;
p2 = tree->child[1] ;
savedLoc1 = emitSkip(0);
emitComment("repeat: jump after body comes back here");
/* generate code for body */
cGen(p1);
/* generate code for test */
cGen(p2);
emitRM_Abs("JEQ",ac,savedLoc1,"repeat: jmp back to body");
if (TraceCode) emitComment("<- repeat") ;
break; /* repeat */
case AssignK:
if (TraceCode) emitComment("-> assign") ;
/* generate code for rhs */
cGen(tree->child[0]);
/* now store value */
loc = st_lookup(tree->attr.name);
emitRM("ST",ac,loc,gp,"assign: store value");
if (TraceCode) emitComment("<- assign") ;
break; /* assign_k */
case ReadK:
emitRO("IN",ac,0,0,"read integer value");
loc = st_lookup(tree->attr.name);
emitRM("ST",ac,loc,gp,"read: store value");
break;
case WriteK:
/* generate code for expression to write */
cGen(tree->child[0]);
/* now output it */
emitRO("OUT",ac,0,0,"write ac");
break;
default:
break;
}
} /* genStmt */
/* Procedure genExp generates code at an expression node */
static void genExp( TreeNode * tree)
{ int loc;
TreeNode * p1, * p2;
switch (tree->kind.exp) {
case ConstK :
if (TraceCode) emitComment("-> Const") ;
/* gen code to load integer constant using LDC */
emitRM("LDC",ac,tree->attr.val,0,"load const");
if (TraceCode) emitComment("<- Const") ;
break; /* ConstK */
case IdK :
if (TraceCode) emitComment("-> Id") ;
loc = st_lookup(tree->attr.name);
emitRM("LD",ac,loc,gp,"load id value");
if (TraceCode) emitComment("<- Id") ;
break; /* IdK */
case OpK :
if (TraceCode) emitComment("-> Op") ;
p1 = tree->child[0];
p2 = tree->child[1];
/* gen code for ac = left arg */
cGen(p1);
/* gen code to push left operand */
emitRM("ST",ac,tmpOffset--,mp,"op: push left");
/* gen code for ac = right operand */
cGen(p2);
/* now load left operand */
emitRM("LD",ac1,++tmpOffset,mp,"op: load left");
switch (tree->attr.op) {
case PLUS :
emitRO("ADD",ac,ac1,ac,"op +");
break;
case MINUS :
emitRO("SUB",ac,ac1,ac,"op -");
break;
case TIMES :
emitRO("MUL",ac,ac1,ac,"op *");
break;
case OVER :
emitRO("DIV",ac,ac1,ac,"op /");
break;
case LT :
emitRO("SUB",ac,ac1,ac,"op <") ;
emitRM("JLT",ac,2,pc,"br if true") ;
emitRM("LDC",ac,0,ac,"false case") ;
emitRM("LDA",pc,1,pc,"unconditional jmp") ;
emitRM("LDC",ac,1,ac,"true case") ;
break;
case EQ :
emitRO("SUB",ac,ac1,ac,"op ==") ;
emitRM("JEQ",ac,2,pc,"br if true");
emitRM("LDC",ac,0,ac,"false case") ;
emitRM("LDA",pc,1,pc,"unconditional jmp") ;
emitRM("LDC",ac,1,ac,"true case") ;
break;
default:
emitComment("BUG: Unknown operator");
break;
} /* case op */
if (TraceCode) emitComment("<- Op") ;
break; /* OpK */
default:
break;
}
} /* genExp */
/* Procedure cGen recursively generates code by
* tree traversal
*/
static void cGen( TreeNode * tree)
{ if (tree != NULL)
{ switch (tree->nodekind) {
case StmtK:
genStmt(tree);
break;
case ExpK:
genExp(tree);
break;
default:
break;
}
cGen(tree->sibling);
}
}
/**********************************************/
/* the primary function of the code generator */
/**********************************************/
/* Procedure codeGen generates code to a code
* file by traversal of the syntax tree. The
* second parameter (codefile) is the file name
* of the code file, and is used to print the
* file name as a comment in the code file
*/
void codeGen(TreeNode * syntaxTree, char * codefile)
{ char * s = malloc(strlen(codefile)+7);
strcpy(s,"File: ");
strcat(s,codefile);
emitComment("TINY Compilation to TM Code");
emitComment(s);
/* generate standard prelude */
emitComment("Standard prelude:");
emitRM("LD",mp,0,ac,"load maxaddress from location 0");
emitRM("ST",ac,0,ac,"clear location 0");
emitComment("End of standard prelude.");
/* generate code for TINY program */
cGen(syntaxTree);
/* finish */
emitComment("End of execution.");
emitRO("HALT",0,0,0,"");
}

19
src/cgen.h Normal file
View File

@@ -0,0 +1,19 @@
/****************************************************/
/* File: cgen.h */
/* The code generator interface to the TINY compiler*/
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#ifndef _CGEN_H_
#define _CGEN_H_
/* Procedure codeGen generates code to a code
* file by traversal of the syntax tree. The
* second parameter (codefile) is the file name
* of the code file, and is used to print the
* file name as a comment in the code file
*/
void codeGen(TreeNode * syntaxTree, char * codefile);
#endif

97
src/code.c Normal file
View File

@@ -0,0 +1,97 @@
/****************************************************/
/* File: code.c */
/* TM Code emitting utilities */
/* implementation for the TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#include "globals.h"
#include "code.h"
/* TM location number for current instruction emission */
static int emitLoc = 0 ;
/* Highest TM location emitted so far
For use in conjunction with emitSkip,
emitBackup, and emitRestore */
static int highEmitLoc = 0;
/* Procedure emitComment prints a comment line
* with comment c in the code file
*/
void emitComment( char * c )
{ if (TraceCode) fprintf(code,"* %s\n",c);}
/* Procedure emitRO emits a register-only
* TM instruction
* op = the opcode
* r = target register
* s = 1st source register
* t = 2nd source register
* c = a comment to be printed if TraceCode is TRUE
*/
void emitRO( char *op, int r, int s, int t, char *c)
{ fprintf(code,"%3d: %5s %d,%d,%d ",emitLoc++,op,r,s,t);
if (TraceCode) fprintf(code,"\t%s",c) ;
fprintf(code,"\n") ;
if (highEmitLoc < emitLoc) highEmitLoc = emitLoc ;
} /* emitRO */
/* Procedure emitRM emits a register-to-memory
* TM instruction
* op = the opcode
* r = target register
* d = the offset
* s = the base register
* c = a comment to be printed if TraceCode is TRUE
*/
void emitRM( char * op, int r, int d, int s, char *c)
{ fprintf(code,"%3d: %5s %d,%d(%d) ",emitLoc++,op,r,d,s);
if (TraceCode) fprintf(code,"\t%s",c) ;
fprintf(code,"\n") ;
if (highEmitLoc < emitLoc) highEmitLoc = emitLoc ;
} /* emitRM */
/* Function emitSkip skips "howMany" code
* locations for later backpatch. It also
* returns the current code position
*/
int emitSkip( int howMany)
{ int i = emitLoc;
emitLoc += howMany ;
if (highEmitLoc < emitLoc) highEmitLoc = emitLoc ;
return i;
} /* emitSkip */
/* Procedure emitBackup backs up to
* loc = a previously skipped location
*/
void emitBackup( int loc)
{ if (loc > highEmitLoc) emitComment("BUG in emitBackup");
emitLoc = loc ;
} /* emitBackup */
/* Procedure emitRestore restores the current
* code position to the highest previously
* unemitted position
*/
void emitRestore(void)
{ emitLoc = highEmitLoc;}
/* Procedure emitRM_Abs converts an absolute reference
* to a pc-relative reference when emitting a
* register-to-memory TM instruction
* op = the opcode
* r = target register
* a = the absolute location in memory
* c = a comment to be printed if TraceCode is TRUE
*/
void emitRM_Abs( char *op, int r, int a, char * c)
{ fprintf(code,"%3d: %5s %d,%d(%d) ",
emitLoc,op,r,a-(emitLoc+1),pc);
++emitLoc ;
if (TraceCode) fprintf(code,"\t%s",c) ;
fprintf(code,"\n") ;
if (highEmitLoc < emitLoc) highEmitLoc = emitLoc ;
} /* emitRM_Abs */

86
src/code.h Normal file
View File

@@ -0,0 +1,86 @@
/****************************************************/
/* File: code.h */
/* Code emitting utilities for the TINY compiler */
/* and interface to the TM machine */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#ifndef _CODE_H_
#define _CODE_H_
/* pc = program counter */
#define pc 7
/* mp = "memory pointer" points
* to top of memory (for temp storage)
*/
#define mp 6
/* gp = "global pointer" points
* to bottom of memory for (global)
* variable storage
*/
#define gp 5
/* accumulator */
#define ac 0
/* 2nd accumulator */
#define ac1 1
/* code emitting utilities */
/* Procedure emitComment prints a comment line
* with comment c in the code file
*/
void emitComment( char * c );
/* Procedure emitRO emits a register-only
* TM instruction
* op = the opcode
* r = target register
* s = 1st source register
* t = 2nd source register
* c = a comment to be printed if TraceCode is TRUE
*/
void emitRO( char *op, int r, int s, int t, char *c);
/* Procedure emitRM emits a register-to-memory
* TM instruction
* op = the opcode
* r = target register
* d = the offset
* s = the base register
* c = a comment to be printed if TraceCode is TRUE
*/
void emitRM( char * op, int r, int d, int s, char *c);
/* Function emitSkip skips "howMany" code
* locations for later backpatch. It also
* returns the current code position
*/
int emitSkip( int howMany);
/* Procedure emitBackup backs up to
* loc = a previously skipped location
*/
void emitBackup( int loc);
/* Procedure emitRestore restores the current
* code position to the highest previously
* unemitted position
*/
void emitRestore(void);
/* Procedure emitRM_Abs converts an absolute reference
* to a pc-relative reference when emitting a
* register-to-memory TM instruction
* op = the opcode
* r = target register
* a = the absolute location in memory
* c = a comment to be printed if TraceCode is TRUE
*/
void emitRM_Abs( char *op, int r, int a, char * c);
#endif

104
src/globals.h Normal file
View File

@@ -0,0 +1,104 @@
/****************************************************/
/* File: globals.h */
/* Global types and vars for TINY compiler */
/* must come before other include files */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#ifndef _GLOBALS_H_
#define _GLOBALS_H_
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
/* MAXRESERVED = the number of reserved words */
#define MAXRESERVED 8
typedef enum
/* book-keeping tokens */
{ENDFILE,ERROR,
/* reserved words */
IF,THEN,ELSE,END,REPEAT,UNTIL,READ,WRITE,
/* multicharacter tokens */
ID,NUM,
/* special symbols */
ASSIGN,EQ,LT,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,SEMI
} TokenType;
extern FILE* source; /* source code text file */
extern FILE* listing; /* listing output text file */
extern FILE* code; /* code text file for TM simulator */
extern int lineno; /* source line number for listing */
/**************************************************/
/*********** Syntax tree for parsing ************/
/**************************************************/
typedef enum {StmtK,ExpK} NodeKind;
typedef enum {IfK,RepeatK,AssignK,ReadK,WriteK} StmtKind;
typedef enum {OpK,ConstK,IdK} ExpKind;
/* ExpType is used for type checking */
typedef enum {Void,Integer,Boolean} ExpType;
#define MAXCHILDREN 3
typedef struct treeNode
{ struct treeNode * child[MAXCHILDREN];
struct treeNode * sibling;
int lineno;
NodeKind nodekind;
union { StmtKind stmt; ExpKind exp;} kind;
union { TokenType op;
int val;
char * name; } attr;
ExpType type; /* for type checking of exps */
} TreeNode;
/**************************************************/
/*********** Flags for tracing ************/
/**************************************************/
/* EchoSource = TRUE causes the source program to
* be echoed to the listing file with line numbers
* during parsing
*/
extern int EchoSource;
/* TraceScan = TRUE causes token information to be
* printed to the listing file as each token is
* recognized by the scanner
*/
extern int TraceScan;
/* TraceParse = TRUE causes the syntax tree to be
* printed to the listing file in linearized form
* (using indents for children)
*/
extern int TraceParse;
/* TraceAnalyze = TRUE causes symbol table inserts
* and lookups to be reported to the listing file
*/
extern int TraceAnalyze;
/* TraceCode = TRUE causes comments to be written
* to the TM code file as code is generated
*/
extern int TraceCode;
/* Error = TRUE prevents further passes if an error occurs */
extern int Error;
#endif

75
src/lex/cminus.l Normal file
View File

@@ -0,0 +1,75 @@
/****************************************************/
/* File: tiny.l */
/* Lex specification for TINY */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
%{
#include "globals.h"
#include "util.h"
#include "scan.h"
/* lexeme of identifier or reserved word */
char tokenString[MAXTOKENLEN+1];
%}
digit [0-9]
number {digit}+
letter [a-zA-Z]
identifier {letter}+
newline \n
whitespace [ \t]+
%%
"if" {return IF;}
"then" {return THEN;}
"else" {return ELSE;}
"end" {return END;}
"repeat" {return REPEAT;}
"until" {return UNTIL;}
"read" {return READ;}
"write" {return WRITE;}
":=" {return ASSIGN;}
"=" {return EQ;}
"<" {return LT;}
"+" {return PLUS;}
"-" {return MINUS;}
"*" {return TIMES;}
"/" {return OVER;}
"(" {return LPAREN;}
")" {return RPAREN;}
";" {return SEMI;}
{number} {return NUM;}
{identifier} {return ID;}
{newline} {lineno++;}
{whitespace} {/* skip whitespace */}
"{" { char c;
do
{ c = input();
if (c == EOF) break;
if (c == '\n') lineno++;
} while (c != '}');
}
. {return ERROR;}
%%
TokenType getToken(void)
{ static int firstTime = TRUE;
TokenType currentToken;
if (firstTime)
{ firstTime = FALSE;
lineno++;
yyin = source;
yyout = listing;
}
currentToken = yylex();
strncpy(tokenString,yytext,MAXTOKENLEN);
if (TraceScan) {
fprintf(listing,"\t%d: ",lineno);
printToken(currentToken,tokenString);
}
return currentToken;
}

102
src/main.c Normal file
View File

@@ -0,0 +1,102 @@
/****************************************************/
/* File: main.c */
/* Main program for TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#include "globals.h"
/* set NO_PARSE to TRUE to get a scanner-only compiler */
#define NO_PARSE FALSE
/* set NO_ANALYZE to TRUE to get a parser-only compiler */
#define NO_ANALYZE FALSE
/* set NO_CODE to TRUE to get a compiler that does not
* generate code
*/
#define NO_CODE FALSE
#include "util.h"
#if NO_PARSE
#include "scan.h"
#else
#include "parse.h"
#if !NO_ANALYZE
#include "analyze.h"
#if !NO_CODE
#include "cgen.h"
#endif
#endif
#endif
/* allocate global variables */
int lineno = 0;
FILE * source;
FILE * listing;
FILE * code;
/* allocate and set tracing flags */
int EchoSource = FALSE;
int TraceScan = FALSE;
int TraceParse = FALSE;
int TraceAnalyze = FALSE;
int TraceCode = FALSE;
int Error = FALSE;
main( int argc, char * argv[] )
{ TreeNode * syntaxTree;
char pgm[120]; /* source code file name */
if (argc != 2)
{ fprintf(stderr,"usage: %s <filename>\n",argv[0]);
exit(1);
}
strcpy(pgm,argv[1]) ;
if (strchr (pgm, '.') == NULL)
strcat(pgm,".tny");
source = fopen(pgm,"r");
if (source==NULL)
{ fprintf(stderr,"File %s not found\n",pgm);
exit(1);
}
listing = stdout; /* send listing to screen */
fprintf(listing,"\nTINY COMPILATION: %s\n",pgm);
#if NO_PARSE
while (getToken()!=ENDFILE);
#else
syntaxTree = parse();
if (TraceParse) {
fprintf(listing,"\nSyntax tree:\n");
printTree(syntaxTree);
}
#if !NO_ANALYZE
if (! Error)
{ if (TraceAnalyze) fprintf(listing,"\nBuilding Symbol Table...\n");
buildSymtab(syntaxTree);
if (TraceAnalyze) fprintf(listing,"\nChecking Types...\n");
typeCheck(syntaxTree);
if (TraceAnalyze) fprintf(listing,"\nType Checking Finished\n");
}
#if !NO_CODE
if (! Error)
{ char * codefile;
int fnlen = strcspn(pgm,".");
codefile = (char *) calloc(fnlen+4, sizeof(char));
strncpy(codefile,pgm,fnlen);
strcat(codefile,".tm");
code = fopen(codefile,"w");
if (code == NULL)
{ printf("Unable to open %s\n",codefile);
exit(1);
}
codeGen(syntaxTree,codefile);
fclose(code);
}
#endif
#endif
#endif
fclose(source);
return 0;
}

215
src/parse.c Normal file
View File

@@ -0,0 +1,215 @@
/****************************************************/
/* File: parse.c */
/* The parser implementation for the TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "parse.h"
static TokenType token; /* holds current token */
/* function prototypes for recursive calls */
static TreeNode * stmt_sequence(void);
static TreeNode * statement(void);
static TreeNode * if_stmt(void);
static TreeNode * repeat_stmt(void);
static TreeNode * assign_stmt(void);
static TreeNode * read_stmt(void);
static TreeNode * write_stmt(void);
static TreeNode * exp(void);
static TreeNode * simple_exp(void);
static TreeNode * term(void);
static TreeNode * factor(void);
static void syntaxError(char * message)
{ fprintf(listing,"\n>>> ");
fprintf(listing,"Syntax error at line %d: %s",lineno,message);
Error = TRUE;
}
static void match(TokenType expected)
{ if (token == expected) token = getToken();
else {
syntaxError("unexpected token -> ");
printToken(token,tokenString);
fprintf(listing," ");
}
}
TreeNode * stmt_sequence(void)
{ TreeNode * t = statement();
TreeNode * p = t;
while ((token!=ENDFILE) && (token!=END) &&
(token!=ELSE) && (token!=UNTIL))
{ TreeNode * q;
match(SEMI);
q = statement();
if (q!=NULL) {
if (t==NULL) t = p = q;
else /* now p cannot be NULL either */
{ p->sibling = q;
p = q;
}
}
}
return t;
}
TreeNode * statement(void)
{ TreeNode * t = NULL;
switch (token) {
case IF : t = if_stmt(); break;
case REPEAT : t = repeat_stmt(); break;
case ID : t = assign_stmt(); break;
case READ : t = read_stmt(); break;
case WRITE : t = write_stmt(); break;
default : syntaxError("unexpected token -> ");
printToken(token,tokenString);
token = getToken();
break;
} /* end case */
return t;
}
TreeNode * if_stmt(void)
{ TreeNode * t = newStmtNode(IfK);
match(IF);
if (t!=NULL) t->child[0] = exp();
match(THEN);
if (t!=NULL) t->child[1] = stmt_sequence();
if (token==ELSE) {
match(ELSE);
if (t!=NULL) t->child[2] = stmt_sequence();
}
match(END);
return t;
}
TreeNode * repeat_stmt(void)
{ TreeNode * t = newStmtNode(RepeatK);
match(REPEAT);
if (t!=NULL) t->child[0] = stmt_sequence();
match(UNTIL);
if (t!=NULL) t->child[1] = exp();
return t;
}
TreeNode * assign_stmt(void)
{ TreeNode * t = newStmtNode(AssignK);
if ((t!=NULL) && (token==ID))
t->attr.name = copyString(tokenString);
match(ID);
match(ASSIGN);
if (t!=NULL) t->child[0] = exp();
return t;
}
TreeNode * read_stmt(void)
{ TreeNode * t = newStmtNode(ReadK);
match(READ);
if ((t!=NULL) && (token==ID))
t->attr.name = copyString(tokenString);
match(ID);
return t;
}
TreeNode * write_stmt(void)
{ TreeNode * t = newStmtNode(WriteK);
match(WRITE);
if (t!=NULL) t->child[0] = exp();
return t;
}
TreeNode * exp(void)
{ TreeNode * t = simple_exp();
if ((token==LT)||(token==EQ)) {
TreeNode * p = newExpNode(OpK);
if (p!=NULL) {
p->child[0] = t;
p->attr.op = token;
t = p;
}
match(token);
if (t!=NULL)
t->child[1] = simple_exp();
}
return t;
}
TreeNode * simple_exp(void)
{ TreeNode * t = term();
while ((token==PLUS)||(token==MINUS))
{ TreeNode * p = newExpNode(OpK);
if (p!=NULL) {
p->child[0] = t;
p->attr.op = token;
t = p;
match(token);
t->child[1] = term();
}
}
return t;
}
TreeNode * term(void)
{ TreeNode * t = factor();
while ((token==TIMES)||(token==OVER))
{ TreeNode * p = newExpNode(OpK);
if (p!=NULL) {
p->child[0] = t;
p->attr.op = token;
t = p;
match(token);
p->child[1] = factor();
}
}
return t;
}
TreeNode * factor(void)
{ TreeNode * t = NULL;
switch (token) {
case NUM :
t = newExpNode(ConstK);
if ((t!=NULL) && (token==NUM))
t->attr.val = atoi(tokenString);
match(NUM);
break;
case ID :
t = newExpNode(IdK);
if ((t!=NULL) && (token==ID))
t->attr.name = copyString(tokenString);
match(ID);
break;
case LPAREN :
match(LPAREN);
t = exp();
match(RPAREN);
break;
default:
syntaxError("unexpected token -> ");
printToken(token,tokenString);
token = getToken();
break;
}
return t;
}
/****************************************/
/* the primary function of the parser */
/****************************************/
/* Function parse returns the newly
* constructed syntax tree
*/
TreeNode * parse(void)
{ TreeNode * t;
token = getToken();
t = stmt_sequence();
if (token!=ENDFILE)
syntaxError("Code ends before file\n");
return t;
}

16
src/parse.h Normal file
View File

@@ -0,0 +1,16 @@
/****************************************************/
/* File: parse.h */
/* The parser interface for the TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#ifndef _PARSE_H_
#define _PARSE_H_
/* Function parse returns the newly
* constructed syntax tree
*/
TreeNode * parse(void);
#endif

203
src/scan.c Normal file
View File

@@ -0,0 +1,203 @@
/****************************************************/
/* File: scan.c */
/* The scanner implementation for the TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#include "globals.h"
#include "util.h"
#include "scan.h"
/* states in scanner DFA */
typedef enum
{ START,INASSIGN,INCOMMENT,INNUM,INID,DONE }
StateType;
/* lexeme of identifier or reserved word */
char tokenString[MAXTOKENLEN+1];
/* BUFLEN = length of the input buffer for
source code lines */
#define BUFLEN 256
static char lineBuf[BUFLEN]; /* holds the current line */
static int linepos = 0; /* current position in LineBuf */
static int bufsize = 0; /* current size of buffer string */
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */
/* getNextChar fetches the next non-blank character
from lineBuf, reading in a new line if lineBuf is
exhausted */
static int getNextChar(void)
{ if (!(linepos < bufsize))
{ lineno++;
if (fgets(lineBuf,BUFLEN-1,source))
{ if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf);
bufsize = strlen(lineBuf);
linepos = 0;
return lineBuf[linepos++];
}
else
{ EOF_flag = TRUE;
return EOF;
}
}
else return lineBuf[linepos++];
}
/* ungetNextChar backtracks one character
in lineBuf */
static void ungetNextChar(void)
{ if (!EOF_flag) linepos-- ;}
/* lookup table of reserved words */
static struct
{ char* str;
TokenType tok;
} reservedWords[MAXRESERVED]
= {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END},
{"repeat",REPEAT},{"until",UNTIL},{"read",READ},
{"write",WRITE}};
/* lookup an identifier to see if it is a reserved word */
/* uses linear search */
static TokenType reservedLookup (char * s)
{ int i;
for (i=0;i<MAXRESERVED;i++)
if (!strcmp(s,reservedWords[i].str))
return reservedWords[i].tok;
return ID;
}
/****************************************/
/* the primary function of the scanner */
/****************************************/
/* function getToken returns the
* next token in source file
*/
TokenType getToken(void)
{ /* index for storing into tokenString */
int tokenStringIndex = 0;
/* holds current token to be returned */
TokenType currentToken;
/* current state - always begins at START */
StateType state = START;
/* flag to indicate save to tokenString */
int save;
while (state != DONE)
{ int c = getNextChar();
save = TRUE;
switch (state)
{ case START:
if (isdigit(c))
state = INNUM;
else if (isalpha(c))
state = INID;
else if (c == ':')
state = INASSIGN;
else if ((c == ' ') || (c == '\t') || (c == '\n'))
save = FALSE;
else if (c == '{')
{ save = FALSE;
state = INCOMMENT;
}
else
{ state = DONE;
switch (c)
{ case EOF:
save = FALSE;
currentToken = ENDFILE;
break;
case '=':
currentToken = EQ;
break;
case '<':
currentToken = LT;
break;
case '+':
currentToken = PLUS;
break;
case '-':
currentToken = MINUS;
break;
case '*':
currentToken = TIMES;
break;
case '/':
currentToken = OVER;
break;
case '(':
currentToken = LPAREN;
break;
case ')':
currentToken = RPAREN;
break;
case ';':
currentToken = SEMI;
break;
default:
currentToken = ERROR;
break;
}
}
break;
case INCOMMENT:
save = FALSE;
if (c == EOF)
{ state = DONE;
currentToken = ENDFILE;
}
else if (c == '}') state = START;
break;
case INASSIGN:
state = DONE;
if (c == '=')
currentToken = ASSIGN;
else
{ /* backup in the input */
ungetNextChar();
save = FALSE;
currentToken = ERROR;
}
break;
case INNUM:
if (!isdigit(c))
{ /* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = NUM;
}
break;
case INID:
if (!isalpha(c))
{ /* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = ID;
}
break;
case DONE:
default: /* should never happen */
fprintf(listing,"Scanner Bug: state= %d\n",state);
state = DONE;
currentToken = ERROR;
break;
}
if ((save) && (tokenStringIndex <= MAXTOKENLEN))
tokenString[tokenStringIndex++] = (char) c;
if (state == DONE)
{ tokenString[tokenStringIndex] = '\0';
if (currentToken == ID)
currentToken = reservedLookup(tokenString);
}
}
if (TraceScan) {
fprintf(listing,"\t%d: ",lineno);
printToken(currentToken,tokenString);
}
return currentToken;
} /* end getToken */

22
src/scan.h Normal file
View File

@@ -0,0 +1,22 @@
/****************************************************/
/* File: scan.h */
/* The scanner interface for the TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#ifndef _SCAN_H_
#define _SCAN_H_
/* MAXTOKENLEN is the maximum size of a token */
#define MAXTOKENLEN 40
/* tokenString array stores the lexeme of each token */
extern char tokenString[MAXTOKENLEN+1];
/* function getToken returns the
* next token in source file
*/
TokenType getToken(void);
#endif

122
src/symtab.c Normal file
View File

@@ -0,0 +1,122 @@
/****************************************************/
/* File: symtab.c */
/* Symbol table implementation for the TINY compiler*/
/* (allows only one symbol table) */
/* Symbol table is implemented as a chained */
/* hash table */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symtab.h"
/* SIZE is the size of the hash table */
#define SIZE 211
/* SHIFT is the power of two used as multiplier
in hash function */
#define SHIFT 4
/* the hash function */
static int hash ( char * key )
{ int temp = 0;
int i = 0;
while (key[i] != '\0')
{ temp = ((temp << SHIFT) + key[i]) % SIZE;
++i;
}
return temp;
}
/* the list of line numbers of the source
* code in which a variable is referenced
*/
typedef struct LineListRec
{ int lineno;
struct LineListRec * next;
} * LineList;
/* The record in the bucket lists for
* each variable, including name,
* assigned memory location, and
* the list of line numbers in which
* it appears in the source code
*/
typedef struct BucketListRec
{ char * name;
LineList lines;
int memloc ; /* memory location for variable */
struct BucketListRec * next;
} * BucketList;
/* the hash table */
static BucketList hashTable[SIZE];
/* Procedure st_insert inserts line numbers and
* memory locations into the symbol table
* loc = memory location is inserted only the
* first time, otherwise ignored
*/
void st_insert( char * name, int lineno, int loc )
{ int h = hash(name);
BucketList l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->name) != 0))
l = l->next;
if (l == NULL) /* variable not yet in table */
{ l = (BucketList) malloc(sizeof(struct BucketListRec));
l->name = name;
l->lines = (LineList) malloc(sizeof(struct LineListRec));
l->lines->lineno = lineno;
l->memloc = loc;
l->lines->next = NULL;
l->next = hashTable[h];
hashTable[h] = l; }
else /* found in table, so just add line number */
{ LineList t = l->lines;
while (t->next != NULL) t = t->next;
t->next = (LineList) malloc(sizeof(struct LineListRec));
t->next->lineno = lineno;
t->next->next = NULL;
}
} /* st_insert */
/* Function st_lookup returns the memory
* location of a variable or -1 if not found
*/
int st_lookup ( char * name )
{ int h = hash(name);
BucketList l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->name) != 0))
l = l->next;
if (l == NULL) return -1;
else return l->memloc;
}
/* Procedure printSymTab prints a formatted
* listing of the symbol table contents
* to the listing file
*/
void printSymTab(FILE * listing)
{ int i;
fprintf(listing,"Variable Name Location Line Numbers\n");
fprintf(listing,"------------- -------- ------------\n");
for (i=0;i<SIZE;++i)
{ if (hashTable[i] != NULL)
{ BucketList l = hashTable[i];
while (l != NULL)
{ LineList t = l->lines;
fprintf(listing,"%-14s ",l->name);
fprintf(listing,"%-8d ",l->memloc);
while (t != NULL)
{ fprintf(listing,"%4d ",t->lineno);
t = t->next;
}
fprintf(listing,"\n");
l = l->next;
}
}
}
} /* printSymTab */

32
src/symtab.h Normal file
View File

@@ -0,0 +1,32 @@
/****************************************************/
/* File: symtab.h */
/* Symbol table interface for the TINY compiler */
/* (allows only one symbol table) */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#ifndef _SYMTAB_H_
#define _SYMTAB_H_
#include "globals.h"
/* Procedure st_insert inserts line numbers and
* memory locations into the symbol table
* loc = memory location is inserted only the
* first time, otherwise ignored
*/
void st_insert( char * name, int lineno, int loc );
/* Function st_lookup returns the memory
* location of a variable or -1 if not found
*/
int st_lookup ( char * name );
/* Procedure printSymTab prints a formatted
* listing of the symbol table contents
* to the listing file
*/
void printSymTab(FILE * listing);
#endif

177
src/util.c Normal file
View File

@@ -0,0 +1,177 @@
/****************************************************/
/* File: util.c */
/* Utility function implementation */
/* for the TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#include "globals.h"
#include "util.h"
/* Procedure printToken prints a token
* and its lexeme to the listing file
*/
void printToken( TokenType token, const char* tokenString )
{ switch (token)
{ case IF:
case THEN:
case ELSE:
case END:
case REPEAT:
case UNTIL:
case READ:
case WRITE:
fprintf(listing,
"reserved word: %s\n",tokenString);
break;
case ASSIGN: fprintf(listing,":=\n"); break;
case LT: fprintf(listing,"<\n"); break;
case EQ: fprintf(listing,"=\n"); break;
case LPAREN: fprintf(listing,"(\n"); break;
case RPAREN: fprintf(listing,")\n"); break;
case SEMI: fprintf(listing,";\n"); break;
case PLUS: fprintf(listing,"+\n"); break;
case MINUS: fprintf(listing,"-\n"); break;
case TIMES: fprintf(listing,"*\n"); break;
case OVER: fprintf(listing,"/\n"); break;
case ENDFILE: fprintf(listing,"EOF\n"); break;
case NUM:
fprintf(listing,
"NUM, val= %s\n",tokenString);
break;
case ID:
fprintf(listing,
"ID, name= %s\n",tokenString);
break;
case ERROR:
fprintf(listing,
"ERROR: %s\n",tokenString);
break;
default: /* should never happen */
fprintf(listing,"Unknown token: %d\n",token);
}
}
/* Function newStmtNode creates a new statement
* node for syntax tree construction
*/
TreeNode * newStmtNode(StmtKind kind)
{ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode));
int i;
if (t==NULL)
fprintf(listing,"Out of memory error at line %d\n",lineno);
else {
for (i=0;i<MAXCHILDREN;i++) t->child[i] = NULL;
t->sibling = NULL;
t->nodekind = StmtK;
t->kind.stmt = kind;
t->lineno = lineno;
}
return t;
}
/* Function newExpNode creates a new expression
* node for syntax tree construction
*/
TreeNode * newExpNode(ExpKind kind)
{ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode));
int i;
if (t==NULL)
fprintf(listing,"Out of memory error at line %d\n",lineno);
else {
for (i=0;i<MAXCHILDREN;i++) t->child[i] = NULL;
t->sibling = NULL;
t->nodekind = ExpK;
t->kind.exp = kind;
t->lineno = lineno;
t->type = Void;
}
return t;
}
/* Function copyString allocates and makes a new
* copy of an existing string
*/
char * copyString(char * s)
{ int n;
char * t;
if (s==NULL) return NULL;
n = strlen(s)+1;
t = malloc(n);
if (t==NULL)
fprintf(listing,"Out of memory error at line %d\n",lineno);
else strcpy(t,s);
return t;
}
/* Variable indentno is used by printTree to
* store current number of spaces to indent
*/
static indentno = 0;
/* macros to increase/decrease indentation */
#define INDENT indentno+=2
#define UNINDENT indentno-=2
/* printSpaces indents by printing spaces */
static void printSpaces(void)
{ int i;
for (i=0;i<indentno;i++)
fprintf(listing," ");
}
/* procedure printTree prints a syntax tree to the
* listing file using indentation to indicate subtrees
*/
void printTree( TreeNode * tree )
{ int i;
INDENT;
while (tree != NULL) {
printSpaces();
if (tree->nodekind==StmtK)
{ switch (tree->kind.stmt) {
case IfK:
fprintf(listing,"If\n");
break;
case RepeatK:
fprintf(listing,"Repeat\n");
break;
case AssignK:
fprintf(listing,"Assign to: %s\n",tree->attr.name);
break;
case ReadK:
fprintf(listing,"Read: %s\n",tree->attr.name);
break;
case WriteK:
fprintf(listing,"Write\n");
break;
default:
fprintf(listing,"Unknown ExpNode kind\n");
break;
}
}
else if (tree->nodekind==ExpK)
{ switch (tree->kind.exp) {
case OpK:
fprintf(listing,"Op: ");
printToken(tree->attr.op,"\0");
break;
case ConstK:
fprintf(listing,"Const: %d\n",tree->attr.val);
break;
case IdK:
fprintf(listing,"Id: %s\n",tree->attr.name);
break;
default:
fprintf(listing,"Unknown ExpNode kind\n");
break;
}
}
else fprintf(listing,"Unknown node kind\n");
for (i=0;i<MAXCHILDREN;i++)
printTree(tree->child[i]);
tree = tree->sibling;
}
UNINDENT;
}

38
src/util.h Normal file
View File

@@ -0,0 +1,38 @@
/****************************************************/
/* File: util.h */
/* Utility functions for the TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#ifndef _UTIL_H_
#define _UTIL_H_
#include "globals.h"
/* Procedure printToken prints a token
* and its lexeme to the listing file
*/
void printToken( TokenType, const char* );
/* Function newStmtNode creates a new statement
* node for syntax tree construction
*/
TreeNode * newStmtNode(StmtKind);
/* Function newExpNode creates a new expression
* node for syntax tree construction
*/
TreeNode * newExpNode(ExpKind);
/* Function copyString allocates and makes a new
* copy of an existing string
*/
char * copyString( char * );
/* procedure printTree prints a syntax tree to the
* listing file using indentation to indicate subtrees
*/
void printTree( TreeNode * );
#endif

120
src/yacc/globals.h Normal file
View File

@@ -0,0 +1,120 @@
/****************************************************/
/* File: globals.h */
/* Yacc/Bison Version */
/* Global types and vars for TINY compiler */
/* must come before other include files */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#ifndef _GLOBALS_H_
#define _GLOBALS_H_
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
/* Yacc/Bison generates internally its own values
* for the tokens. Other files can access these values
* by including the tab.h file generated using the
* Yacc/Bison option -d ("generate header")
*
* The YYPARSER flag prevents inclusion of the tab.h
* into the Yacc/Bison output itself
*/
#ifndef YYPARSER
/* the name of the following file may change */
#include "y.tab.h"
/* ENDFILE is implicitly defined by Yacc/Bison,
* and not included in the tab.h file
*/
#define ENDFILE 0
#endif
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
/* MAXRESERVED = the number of reserved words */
#define MAXRESERVED 8
/* Yacc/Bison generates its own integer values
* for tokens
*/
typedef int TokenType;
extern FILE* source; /* source code text file */
extern FILE* listing; /* listing output text file */
extern FILE* code; /* code text file for TM simulator */
extern int lineno; /* source line number for listing */
/**************************************************/
/*********** Syntax tree for parsing ************/
/**************************************************/
typedef enum {StmtK,ExpK} NodeKind;
typedef enum {IfK,RepeatK,AssignK,ReadK,WriteK} StmtKind;
typedef enum {OpK,ConstK,IdK} ExpKind;
/* ExpType is used for type checking */
typedef enum {Void,Integer,Boolean} ExpType;
#define MAXCHILDREN 3
typedef struct treeNode
{ struct treeNode * child[MAXCHILDREN];
struct treeNode * sibling;
int lineno;
NodeKind nodekind;
union { StmtKind stmt; ExpKind exp;} kind;
union { TokenType op;
int val;
char * name; } attr;
ExpType type; /* for type checking of exps */
} TreeNode;
/**************************************************/
/*********** Flags for tracing ************/
/**************************************************/
/* EchoSource = TRUE causes the source program to
* be echoed to the listing file with line numbers
* during parsing
*/
extern int EchoSource;
/* TraceScan = TRUE causes token information to be
* printed to the listing file as each token is
* recognized by the scanner
*/
extern int TraceScan;
/* TraceParse = TRUE causes the syntax tree to be
* printed to the listing file in linearized form
* (using indents for children)
*/
extern int TraceParse;
/* TraceAnalyze = TRUE causes symbol table inserts
* and lookups to be reported to the listing file
*/
extern int TraceAnalyze;
/* TraceCode = TRUE causes comments to be written
* to the TM code file as code is generated
*/
extern int TraceCode;
/* Error = TRUE prevents further passes if an error occurs */
extern int Error;
#endif

164
src/yacc/tiny.y Normal file
View File

@@ -0,0 +1,164 @@
/****************************************************/
/* File: tiny.y */
/* The TINY Yacc/Bison specification file */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
%{
#define YYPARSER /* distinguishes Yacc output from other code files */
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "parse.h"
#define YYSTYPE TreeNode *
static char * savedName; /* for use in assignments */
static int savedLineNo; /* ditto */
static TreeNode * savedTree; /* stores syntax tree for later return */
static int yylex(void); // added 11/2/11 to ensure no conflict with lex
%}
%token IF THEN ELSE END REPEAT UNTIL READ WRITE
%token ID NUM
%token ASSIGN EQ LT PLUS MINUS TIMES OVER LPAREN RPAREN SEMI
%token ERROR
%% /* Grammar for TINY */
program : stmt_seq
{ savedTree = $1;}
;
stmt_seq : stmt_seq SEMI stmt
{ YYSTYPE t = $1;
if (t != NULL)
{ while (t->sibling != NULL)
t = t->sibling;
t->sibling = $3;
$$ = $1; }
else $$ = $3;
}
| stmt { $$ = $1; }
;
stmt : if_stmt { $$ = $1; }
| repeat_stmt { $$ = $1; }
| assign_stmt { $$ = $1; }
| read_stmt { $$ = $1; }
| write_stmt { $$ = $1; }
| error { $$ = NULL; }
;
if_stmt : IF exp THEN stmt_seq END
{ $$ = newStmtNode(IfK);
$$->child[0] = $2;
$$->child[1] = $4;
}
| IF exp THEN stmt_seq ELSE stmt_seq END
{ $$ = newStmtNode(IfK);
$$->child[0] = $2;
$$->child[1] = $4;
$$->child[2] = $6;
}
;
repeat_stmt : REPEAT stmt_seq UNTIL exp
{ $$ = newStmtNode(RepeatK);
$$->child[0] = $2;
$$->child[1] = $4;
}
;
assign_stmt : ID { savedName = copyString(tokenString);
savedLineNo = lineno; }
ASSIGN exp
{ $$ = newStmtNode(AssignK);
$$->child[0] = $4;
$$->attr.name = savedName;
$$->lineno = savedLineNo;
}
;
read_stmt : READ ID
{ $$ = newStmtNode(ReadK);
$$->attr.name =
copyString(tokenString);
}
;
write_stmt : WRITE exp
{ $$ = newStmtNode(WriteK);
$$->child[0] = $2;
}
;
exp : simple_exp LT simple_exp
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = LT;
}
| simple_exp EQ simple_exp
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = EQ;
}
| simple_exp { $$ = $1; }
;
simple_exp : simple_exp PLUS term
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = PLUS;
}
| simple_exp MINUS term
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = MINUS;
}
| term { $$ = $1; }
;
term : term TIMES factor
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = TIMES;
}
| term OVER factor
{ $$ = newExpNode(OpK);
$$->child[0] = $1;
$$->child[1] = $3;
$$->attr.op = OVER;
}
| factor { $$ = $1; }
;
factor : LPAREN exp RPAREN
{ $$ = $2; }
| NUM
{ $$ = newExpNode(ConstK);
$$->attr.val = atoi(tokenString);
}
| ID { $$ = newExpNode(IdK);
$$->attr.name =
copyString(tokenString);
}
| error { $$ = NULL; }
;
%%
int yyerror(char * message)
{ fprintf(listing,"Syntax error at line %d: %s\n",lineno,message);
fprintf(listing,"Current token: ");
printToken(yychar,tokenString);
Error = TRUE;
return 0;
}
/* yylex calls getToken to make Yacc/Bison output
* compatible with ealier versions of the TINY scanner
*/
static int yylex(void)
{ return getToken(); }
TreeNode * parse(void)
{ yyparse();
return savedTree;
}