编译原理 词法分析Word文档格式.docx
《编译原理 词法分析Word文档格式.docx》由会员分享,可在线阅读,更多相关《编译原理 词法分析Word文档格式.docx(22页珍藏版)》请在冰点文库上搜索。
[_A-Za-z][0-9_A-Za-z]
标识符
ID
整型常量
INT
[0-9]+
浮点常量
FLOAT
[0-9](\.[0-9]+)?
(E[+-]?
[0-9]+)?
界符
DELIM
[,;
\[\]\{\}\(\)]
运算符
OPT
[!
%\^&
\*\+\-<
>
?
/]|&
&
|<
=|>
=|==|\|\|
字符常量
CHAR
^\'
$\'
字符串常量
STR
^\"
$\"
源程序:
#include<
stdio.h>
string.h>
stdlib.h>
ctype.h>
stdarg.h>
#defineKEY0
#defineID1
#defineINT2
#defineFLOAT3
#defineCHAR4
#defineSTR5
#defineDELIM6
#defineOPT7
char*TYPE[]={"
KEY"
"
ID"
INT"
FLOAT"
CHAR"
STR"
DELIM"
OPT"
};
charfilename[256];
//要处理的文件名
longlength;
//文件长度
char*key[]={"
auto"
break"
case"
char"
const"
continue"
"
default"
do"
double"
else"
enum"
extern"
float"
for"
goto"
if"
int"
long"
register"
"
return"
short"
signed"
sizeof"
static"
struct"
switch"
typedef"
union"
unsigned"
void"
volatile"
while"
NULL};
constintKEYCOUNT=32;
char*source;
//源码的内存缓冲区
longpos=0;
//指向源码的指针
longline=1;
//行号
longcolumn=0;
//列号
chartoken[1024];
//临时存放标识符
voiderror(constchar*error,...){
va_listarglist;
va_start(arglist,error);
vfprintf(stderr,error,arglist);
va_end(arglist);
exit(EXIT_FAILURE);
}
voidwarning(constchar*error,...){
//判断是否为分隔符,是的话返回1,否则返回0
intisdelim(constcharch){
switch(ch){
case'
('
:
case'
)'
['
]'
{'
}'
'
;
'
return1;
}
return0;
intisoperator(constcharch){
+'
-'
*'
/'
<
='
!
~'
%'
case'
|'
^'
.'
intiskey(constchar*str){
for(inti=0;
i<
KEYCOUNT;
i++){
if(strcmp(key[i],str)==0)
returni+1;
//读入源码的一个字符
charreadSource(){
if(pos<
length){
charch=source[pos++];
if(ch=='
\n'
){
line++;
column=0;
else
column++;
returnch;
else{
error("
未预料的EOF\n"
);
//回退源码指针
voidback(){
if(pos==0){
errorinback():
posnowis0\n"
pos--;
longgetFileLength(FILE*fp){
longpos=ftell(fp);
longlength;
fseek(fp,0,SEEK_END);
length=ftell(fp);
fseek(fp,pos,SEEK_SET);
returnlength;
voidpreprocess(){
for(charch;
ch=readSource();
#'
inti=0;
token[i++]=ch;
while((ch=readSource())!
='
)
token[i]='
\0'
if(token[0]=='
printf("
%d\t%s\n"
line,token);
if(ch!
back();
break;
voidgetOperator(charch){
token[0]=ch;
ch=readSource();
token[1]=ch;
token[2]='
switch(token[0]){
if(token[0]==token[1]||token[1]=='
if(token[0]==token[1])
if(token[1]=='
default:
token[1]='
%ld\t%s\t%s\n"
line,TYPE[OPT],token);
voidcomment(){
charch=readSource();
while
(1){
elseif(ch=='
getOperator('
voidgetChar(charch){
for(;
(ch=readSource())!
\'
token[i]=ch;
\\'
token[++i]=ch;
if(token[0]!
&
strlen(token)>
1){
字符常量字符多余1个\n"
%ld\t%s\t\'
%s\'
\n"
line,TYPE[CHAR],token);
voidgetString(charch){
"
i--;
%ld\t%s\t\"
%s\"
line,TYPE[STR],token);
voidgetIdentify(charch){
inti=1;
!
isspace(ch=readSource());
if(isalpha(ch)||isdigit(ch)||ch=='
_'
token[i]=ch;
}
elseif(isoperator(ch)||isdelim(ch)||ch=='
else{
error("
非法标识符字符\n"
filename,line,column);
token[i++]='
if(iskey(token))
line,TYPE[KEY],token);
line,TYPE[ID],token);
voidgetNum(charch){
inthasDot=0;
inttype=0;
//0isint1isfloat
intafterE=0;
hasDot=1;
type=1;
token[0]=ch;
for(i=1;
i<
256;
i++){
i==1){
if(!
isdigit(ch)){
return;
hasDot){
数字中小数点多于1个\n"
E'
||ch=='
e'
afterE=1;
if(afterE){
afterE=0;
elseif(!
strlen(token)==1){
//error("
未预料的符号.\n"
getOperator(ch);
if(type==0){
line,TYPE[INT],token);
elseif(type==1){
line,TYPE[FLOAT],token);
//获得界符与运算符
voidgetDelim(charch){
%ld\t%s\t%c\n"
line,TYPE[DELIM],ch);
voidlexer(){
pos=0;
line=1;
preprocess();
pos<
length;
charch=readSource();
//printf("
%ld%ld%c\n"
line,column,ch);
if(isalpha(ch)||ch=='
getIdentify(ch);
elseif(isdigit(ch)||ch=='
getNum(ch);
elseif(isspace(ch)){
elseif(ch=='
getChar(ch);
getString(ch);
comment();
elseif(isoperator(ch)){
elseif(isdelim(ch)){
getDelim(ch);
%ld\tPRE\t%c\n"
line,ch);
/*命令行参数
*lexerfilename
*/
intmain(intargc,char**argv){
if(argc!
=2){
Usage:
%sfilename\n"
argv[0]);
strcpy(filename,argv[1]);
FILE*file;
if(NULL==(file=fopen(argv[1],"
r"
))){
Cannotopen%s.\n"
filename);
longfileLength=getFileLength(file);
source=(char*)malloc(fileLength+1);
fseek(file,0,SEEK_SET);
inthasRead;
for(hasRead=0;
feof(file);
intcount=fread(source+hasRead,1,1024,file);
hasRead+=count;
source[hasRead]=0;
length=hasRead;
lexer();
//调用词法处理器
fclose(file);
测试程序:
intgetSum(intnum){
intsum=0;
while(i<
=num){
sum+=i;
i++;
returnsum;
/*Thisfunctionistogettwonumandcalculatetheresum
*andprintthesumof12...to10
intmain(){
intt=10;
floata=10E-5,b=5.2;
scanf("
%f%f"
&
a,&
b);
a=%f,b=%f\n"
a,b);
if(a==b)
aisequaltob\n"
elseif(a<
b)
a<
=b\n"
a>
b\n"
//invokeafunction
sum=%d\n"
sum(t));
测试结果:
2#include<
3KEYint
3IDgetSum
3DELIM(
3IDnum
3DELIM)
3DELIM{
4KEYint
4IDi
4OPT=
4INT0
4DELIM;
5KEYint
5IDsum
5OPT=
5INT0
5DELIM;
6KEYwhile
6DELIM(
6IDi
6OPT<
=
6IDnum
6DELIM)
6DELIM{
7IDsum
7OPT+=
7IDi
7DELIM;
8IDi
8OPT++
8DELIM;
9DELIM}
10KEYreturn
10IDsum
10DELIM;
11DELIM}
17KEYint
17IDmain
17DELIM(
17DELIM)
17DELIM{
18KEYint
18IDt
18OPT=
18INT10
18DELIM;
19KEYfloat
19IDa
19OPT=
19FLOAT10E-5
19DELIM,
19IDb
19FLOAT5.2
19DELIM;
21IDscanf
21DELIM(
21STR"
21DELIM,
21OPT&
21IDa
21IDb
21DELIM)
21DELIM;
22IDprintf
22DELIM(
22STR"
22DELIM,
22IDa
22IDb
22DELIM)
22DELIM;
23KEYif
23DELIM(
23IDa
23OPT==
23IDb
23DELIM)
24IDprintf
24DELIM(
24STR"
24DELIM)
24DELIM;
25KEYelse
25KEYif
25DELIM(
25IDa
25OPT<
25IDb
25DELIM)
26IDprintf
26DELIM(
26STR"
26DELIM)
26DELIM;
28KEYelse
28IDprintf
28DELIM(
28STR"
28DELIM)
28DELIM;
31IDprintf
31DELIM(
31STR"
31DELIM,
31IDsum
31IDt
31DELIM)
31DELIM;
32KEYreturn
32INT0
32DELIM;
33DELIM}
实验感想:
1.开始时的程序整体无框架,所有程序都写在了一个函数里,写到中间发现不宜维护,可读性差,后来就整体重新写了一遍,把不相关的功能剥离,提高模块性及可读性。
。
2.开始时注释处理在符号处理后面,后来发现处理不了注释,经过排查后发现把注释处理放到最前面就行了。
3.本程序添加了许多错误处理,使代码更加健壮。