编译原理词法分析器ll1lr0python实现代码.docx
《编译原理词法分析器ll1lr0python实现代码.docx》由会员分享,可在线阅读,更多相关《编译原理词法分析器ll1lr0python实现代码.docx(29页珍藏版)》请在冰点文库上搜索。
编译原理词法分析器ll1lr0python实现代码
计算机科学与通信工程学院
编译原理实验报告
题目:
1.词法分析器
2.LL
(1)分析器
3.LR(0)分析器
班级:
:
学号:
指导老师:
2017年月
一、实验题目
1.词法分析器
分析一段程序代码,将代码中的单词符号分解出来,并对其进行检查,输出token表和error表
2.LL
(1)文法分析器
分析给定文法。
求出文法的FIRST集,FOLLOW集,并构建分析表,对给定输入串进行分析。
3.LR(0)文法分析器
分析给定文法。
用Ꜫ_CLOSURE方法构造文法的LR(0)项目集规族,根据状态转换函数GO构造出文法的DFA,并转换为分析表,对给定输入串进行分析。
二、实验目的和要求
1.学会词法分析器的实现思路。
2.学会求解FIRST集,FOLLOW集,构造LL
(1)分析表。
3.学会Ꜫ_CLOSURE方法,状态转换函数GO,构造LR(0)分析表。
三、代码实现
1.词法分析器
program.txt中存放要分析的文法:
E->TR
R->+TR|-TR|~
T->FG
G->*FG|/FG|~
F->(E)|i
代码:
KEYWORD_LIST=['while','if','else','switch','case']
SEPARATOR_LIST=[';',':
',',','(',')','[',']','{','}']
OPERATOR_LIST1=['+','-','*']
OPERATOR_LIST2=['<=','<','==','=','>','>=']
CATEGORY_DICT={
#KEYWORD
"while":
{"while":
""},
"if":
{"if":
""},
"else":
{"else":
""},
"switch":
{"switch":
""},
"case":
{"case":
""},
#OPERATOR
"+":
{"+":
""},
"-":
{"-":
""},
"*":
{"*":
""},
"<=":
{"relop":
"LE"},
"<":
{"relop":
"LT"},
">=":
{"relop":
"GE"},
">":
{"relop":
"GT"},
"==":
{"relop":
"EQ"},
"=":
{"=":
""},
#SEPARATOR
";":
{";":
""},
":
":
{":
":
""},
",":
{",":
""},
"(":
{"(":
""},
")":
{")":
""},
"[":
{"]":
""},
"]":
{"]":
""},
"{":
{"{":
""},
"}":
{"}":
""},
}
CONSTANTTABLE=[]
TOKENTABLE=[]
OPERATORTABLE=[]
KEYWORDTABLE=[]
SEPARATORTABLE=[]
UNDEFINEDTABLE=[]
#READFILE
defread_file(path,method):
temp_str=""
try:
file=open(path,method)
forlineinfile:
line=line.replace('\n',"")
temp_str+=line
temp_str=str(temp_str)
exceptIOErrorase:
print(e)
exit()
finally:
file.close()
returntemp_str.strip()+""
#GETBE
defgetbe():
globaltoken
getchar()
token=""
return
#GETCHAR
defgetchar():
globalcharacter
globallocation
whileall_string[location]=="":
location=location+1
character=all_string[location]
returncharacter
#LINKTOKEN
defconcatenation():
globaltoken
globalcharacter
token=token+character
#ISNUMBER
defdigit():
if'0'<=character<='9':
returnTrue
returnFalse
#ISALPHABET
defletter():
if'A'<=character<='Z'or'a'<=character<='z':
returnTrue
returnFalse
#ISIDENTIFIER
defreserve():
iftokeninKEYWORD_LIST:
returnCATEGORY_DICT[token]
else:
return0
#RETRACT
defretract():
globallocation
globalcharacter
#location=location-1
character=""
return
#MAINFUNCTION
defmain():
globaltoken
globalcharacter
globallocation
s=getchar()
getbe()
if'a'<=s<='z'or'A'<=s<='Z':
whileletter()ordigit():
concatenation()
location=location+1
character=all_string[location]
retract()
c=reserve()
ifc==0:
TOKENTABLE.append(token)
print("这是标识符:
{'",token,"':
'",TOKENTABLE.index(token),"'}")
else:
KEYWORDTABLE.append(token)
print("这是保留字:
",CATEGORY_DICT[token])
elif'0'<=s<='9':
whiledigit():
concatenation()
location=location+1
character=all_string[location]
retract()
CONSTANTTABLE.append(token)
print("这是常数:
{'",token,"':
'",CONSTANTTABLE.index(token),"'}")
elifsinOPERATOR_LIST1:
location=location+1
OPERATORTABLE.append(s)
print("这是单操作符:
",CATEGORY_DICT[s])
elifsinOPERATOR_LIST2:
location=location+1
character=all_string[location]
ifcharacter=='=':
OPERATORTABLE.append(s+character)
print("这是双操作符:
",CATEGORY_DICT[s+character])
else:
retract()
location=location+1
OPERATORTABLE.append(s)
print("这是单操作符:
",CATEGORY_DICT[s])
elifsinSEPARATOR_LIST:
location=location+1
SEPARATORTABLE.append(s)
print("这是分隔符:
",CATEGORY_DICT[s])
else:
location+=1
UNDEFINEDTABLE.append(s)
print("error:
undefinedidentity:
'",s,"'")
if__name__=='__main__':
character=""
token=""
all_string=read_file("program.txt","r")
location=0
whilelocation+1main()
print('KEYWORDTABLE:
',KEYWORDTABLE)
print('TOKENTABLE:
',TOKENTABLE)
print('CONSTANTTABLE:
',CONSTANTTABLE)
print('OPERATORTABLE:
',OPERATORTABLE)
print('SEPARATORTABLE:
',SEPARATORTABLE)
运行结果:
2.LL
(1)分析器
program.txt中存放要分析的文法:
E->TR
R->+TR|-TR|~
T->FG
G->*FG|/FG|~
F->(E)|i
输入串:
i+i*i
代码:
NonTermSet=set()#非终结符集合
TermSet=set()#终结符集合
First={}#First集
Follow={}#Follow集
GramaDict={}#处理过的产生式
Code=[]#读入的产生式
AnalysisList={}#分析表
StartSym=""#开始符号
EndSym='#'#结束符号为“#“
Epsilon="~"#由于没有epsilon符号用“~”代替
#构造First集
defgetFirst():
globalNonTermSet,TermSet,First,Follow,FirstA
forXinNonTermSet:
First[X]=set()#初始化非终结符First集为空
forXinTermSet:
First[X]=set(X)#初始化终结符First集为自己
Change=True
whileChange:
#当First集没有更新则算法结束
Change=False
forXinNonTermSet:
forYinGramaDict[X]:
k=0
Continue=True
whileContinueandkifnotFirst[Y[k]]-set(Epsilon)<=First[X]:
#没有一样的就添加,并且改变标志
ifEpsilonnotinFirst[Y[k]]andY[k]inNonTermSetandk>0:
#Y1到Yi候选式都有~存在
Continue=False
else:
First[X]|=First[Y[k]]-set(Epsilon)
Change=True
ifEpsilonnotinFirst[Y[k]]:
Continue=False
k+=1
ifContinue:
#X->~或者Y1到Yk均有~产生式
First[X]|=set(Epsilon)
#FirstA[Y]|=set(Epsilon)
#构造Follow集
defgetFollow():
globalNonTermSet,TermSet,First,Follow,StartSym
forAinNonTermSet:
Follow[A]=set()
Follow[StartSym].add(EndSym)#将结束符号加入Follow[开始符号]中
Change=True
whileChange:
#当Follow集没有更新算法结束
Change=False
forXinNonTermSet:
forYinGramaDict[X]:
foriinrange(len(Y)):
ifY[i]inTermSet:
continue
Flag=True
forjinrange(i+1,len(Y)):
#continue
ifnotFirst[Y[j]]-set(Epsilon)<=Follow[Y[i]]:
Follow[Y[i]]|=First[Y[j]]-set(Epsilon)#步骤2FIRST(β)/~加入到FOLLOW(B)中。
Change=True
ifEpsilonnotinFirst[Y[j]]:
Flag=False
break
ifFlag:
ifnotFollow[X]<=Follow[Y[i]]:
#步骤3β->~,把FOLLOW(A)加到FOLLOW(B)中
Follow[Y[i]]|=Follow[X]
Change=True
#构造分析表
defgetAnalysisList():
fornonXinNonTermSet:
AnalysisList[nonX]=dict()
row=AnalysisList[nonX]
flag=True
forYinGramaDict[nonX]:
forterminTermSet:
ifterminFirst[Y[0]]andterminFirst[nonX]:
row[term]=nonX+'->'+Y
ifEpsiloninFirst[nonX]andflag:
flag=False
fortmpinFollow[nonX]:
row[tmp]=nonX+'->'+Epsilon
print('分析表:
')
fornonXinNonTermSet:
print('',nonX,AnalysisList[nonX])
#查询分析表
deffindAnalysisList(non,ter):
try:
tmp=AnalysisList[non][ter]
X,Y=tmp.split('->')
exceptExceptionase:
print('finderror')#M[A,a]为空,发现语法错误
print(e)
pass
returnY
#显示格式
defdisplay(show_list):
foriteminshow_list:
print('%-25s'%item,end='')
print()
#LL
(1)分析器
defanalyzer():
head=["Stack","StackTop",'NowStr',"InputStr","Action"]
#inputStr='i+i*i'+EndSym
inputStr=input("请输入表达式:
")+EndSym
print('分析过程:
')
display(head)
stack=[]
location=0
stack.append(EndSym)
stack.append(StartSym)
stack_top=stack.pop()
whilestack_top!
=EndSymandlocationifstack_topinTermSetandinputStr[location]==stack_top:
#x=a!
='#',
mess='匹配,弹出栈顶符号'+stack_top+'并读入输入串的下一符号'+inputStr[location+1]
display([stack,stack_top,inputStr[location],inputStr[location+1:
len(inputStr)],mess])
location+=1
stack_top=stack.pop()
elifstack_topinNonTermSetandinputStr[location]inTermSet:
#x为一非终结符A,则查M[A,a]
result=findAnalysisList(stack_top,inputStr[location])
ifresult==Epsilon:
#M[A,a]中的产生式为A->~,只将A弹出
mess='弹出栈顶符号'+stack_top+'因M['+stack_top+','+inputStr[location]+']中为'+stack_top
mess=mess+'->~,故不压栈'
else:
#M[A,a]中的产生式右部符号串按逆序逐一压入栈中
mess='弹出栈顶符号'+stack_top+',将M['+stack_top+','+inputStr[
location]+']中的'+stack_top+'->'+result+'的'+result
mess=mess+'逆序压栈'
tmp_list=[]
forcharinresult:
tmp_list.append(char)
tmp_list.reverse()
stack.extend(tmp_list)
display([stack,stack_top,inputStr[location],inputStr[location+1:
len(inputStr)],mess])
stack_top=stack.pop()
else:
break
ifstack_top==EndSymandinputStr[location]==EndSym:
#x=a='#'分析成功,分析器停止工作
display([[],'#','#','','匹配,分析成功'])
print()
print('************************')
print('*AnalysisSuccess*')
print('************************')
else:
print('AnalysisError')
#读取文法
defreadGrammar():
try:
file=open('grammar.txt','r')
forlineinfile:
line=line.replace('\n',"")
Code.append(line)
exceptIOErrorase:
print(e)
exit()
finally:
file.close()
returnCode
#初始化
definit():
globalNonTermSet,TermSet,First,Follow,StartSym,Code
Code=readGrammar()
n=int(len(Code))
print('产生式个数:
',n)
StartSym=Code[0][0]
print("开始符号:
",StartSym)
print('产生式:
G[',StartSym,']:
')
foriinrange(n):
X,Y=Code[i].split('->')
print('',Code[i])
NonTermSet.add(X)
Y=Y.split('|')
forYiinY:
TermSet|=set(Yi)
ifXnotinGramaDict:
GramaDict[X]=set()
GramaDict[X]|=set(Y)#生成产生式集
TermSet-=NonTermSet
print('非终结符:
',NonTermSet)
print('终结符:
',TermSet)
getFirst()
getFollow()
print("FIRST集:
")
forkinNonTermSet:
print('FIRST[',k,']:
',First[k])
print("FOLLOW集:
")
fork,vinFollow.items():
print('FOLLOW[',k,']:
',v)
TermSet-=set(Epsilon)
TermSet|=set(EndSym)
getAnalysisList()
analyzer()
init()
运行结果:
3.LR(0)分析器
program.txt中存放要分析的文法:
X->S
S->BB
B->aB
B->b
输入串:
abab
代码:
VN=[]#非终结符
VT=[]#终结符
NFA=[]#NFA表
DFA=[]#DFA表
grammar=[]#读入的文法
doted_grammar=[]#加点后的文法
VN2Int={}#非终结符映射
VT2Int={}#终结符映射
action=[]#action表
goto=[]#goto表
DFA_node=[]#DFA节点表
status_stack=[]#状态栈
symbol_stack=[]#符号栈
now