编译原理——词法分析C语言程序JAVA源代码
一、实验目的:
加深对词法分析器的工作过程的理解;加强对词法分析方法的掌握;能够采用一种编程语言实现简单的词法分析程序;能够使用自己编写的分析程序对简单的程序段进行词法分析。
二、实验内容:
1.要识别的词素:
(1)保留字或关键字:如:BEGIN、 END、VAR、INTEGER、REAL、 IF、 THEN、READ、WRITE、WHILE。
(2)运算符: 如:+、-、*、/、:=、=、>、<、>=、<=
(3)标识符: 用户定义的变量名、常数名、过程名
(4)常数: 如:10、25、100、2.3等整数或实数
(5)界符: 如:‘,’、‘.’ 、‘;’ 、‘(’ 、‘)’、‘{’、‘}’,‘:’
2.词法分析过程所要完成的任务:
(1)给出源程序(要求一段完整的代码)
(2)滤空格
(3)识别保留字
(4)识别标识符
(5)拼数
(6)拼复合词(如:=)
(7)输出源程序的token(词法单元)序列。
需要进行词法分析的源程序:

识别后的输出序列:
源代码:
package cifaapp;import java.io.*;
import java.util.*;//存放token序列
class token {Integer key;String value;token(Integer key, String value) {this.key = key;this.value = value;}
}
public class app {// 单词种别码, 1-17为关键字种别码public static final int CHAR = 1;public static final int SHORT = 2;public static final int INT = 3;public static final int LONG = 4;public static final int FLOAT = 5;public static final int DOUBLE = 6;public static final int FINAL = 7;public static final int STATIC = 8;public static final int IF = 9;public static final int ELSE = 10;public static final int WHILE = 11;public static final int DO = 12;public static final int FOR = 13;public static final int BREAK = 14;public static final int CONTINUE = 15;public static final int VOID = 16;public static final int RETURN = 17;public static String key[]={"char","short","int","long","float","double","final","static","if","else","while","do","for","break","continue","void","return"}; // 20为标识符种别码public static final int ID = 20;// 30为常量种别码public static final int NUM = 30;// 31-40为运算符种别码public static final int AS = 31; // =public static final int EQ = 32; // ==public static final int GT = 33; // >public static final int LT = 34; // =public static final int LE = 36; // <=public static final int ADD = 37; // +public static final int SUB = 38; // -public static final int MUL = 39; // *public static final int DIV = 40; // /// 41-49为界限符种别码public static final int LP = 41; // (public static final int RP = 42; // )public static final int LBT = 43; // [public static final int RBT = 44; // ]public static final int LBS = 45; // {public static final int RBS = 46; // }public static final int COM = 47; // ,public static final int COL = 48; // :public static final int SEM = 49; // ;// -1为无法识别的字符标志码public static final int ERROR = -1;public static int errorNum = 0; // 记录词法分析错误的个数public static LinkedList list = new LinkedList();public static LinkedList tokenlist = new LinkedList();//以行为单位读取文件内容public static void readFileByLines() {File file = new File("code.txt");BufferedReader reader = null;try {reader = new BufferedReader(new FileReader(file));String tempString = null;// 一次读入一行,直到读入null为文件结束while ((tempString = reader.readLine()) != null) {//System.out.println(tempString);separate(tempString);}reader.close();} catch (IOException e) {} finally {if (reader != null) {try {reader.close();} catch (IOException e1) {}}}}//正则表达式分离字符串,放入链表中public static void separate(String linestring) {String temp[]=linestring.split("\\s+|\\n|(?<=\\+)|(?=\\+)|(?<=-)|(?=-)|(?<=\\*)|(?=\\*)|(?<=/)|(?=/)|(?<=\\>)|(?=\\>)|(?<=\\<)|(?=\\<)|(?<==)|(?==)|(?<=\\()|(?=\\()|(?<=\\))|(?=\\))|(?<=\\[)|(?=\\[)|(?<=])|(?=])|(?<=\\{)|(?=\\{)|(?<=})|(?=})|(?<=,)|(?=,)|(?<=:)|(?=:)|(?<=;)|(?=;)");for(int i=0;i")) { // 运算符">"if (list.get(i+1).equals("=")) { // 若后面跟的是"=",则是运算符">="tokenlist.add(new token(GE, list.get(i) + list.get(++i)));} else { // 否则是运算符">"tokenlist.add(new token(GT, list.get(i)));}} else if (list.get(i).equals("<")) { // 运算符"<"if (list.get(i+1).equals("=")) { // 若后面跟的是"=",则是运算符"<="tokenlist.add(new token(LE, list.get(i) + list.get(++i)));} else { // 否则是运算符"<"tokenlist.add(new token(LT, list.get(i)));}} else if (list.get(i).equals("+")) { // 运算符"+"if ((list.get(i-1).equals("=") || list.get(i-1).equals("("))&& isNum(list.get(i+1))) { // 判断是否是有符号常量(正数)tokenlist.add(new token(NUM, list.get(i) + list.get(++i)));} else { // 否则是运算符"+"tokenlist.add(new token(ADD, list.get(i)));}} else if (list.get(i).equals("-")) { // 运算符"-"if ((list.get(i-1).equals("=") || list.get(i-1).equals("("))&& isNum(list.get(i+1))) { // 判断是否是有符号常量(负数)tokenlist.add(new token(NUM, list.get(i) + list.get(++i)));} else { // 否则是运算符"-"tokenlist.add(new token(SUB, list.get(i)));}} else if (list.get(i).equals("*")) { // 运算符"*"tokenlist.add(new token(MUL, list.get(i)));} else if (list.get(i).equals("/")) { // 运算符"/"tokenlist.add(new token(DIV, list.get(i)));} else if (list.get(i).equals("(")) { // 界限符"("tokenlist.add(new token(LP, list.get(i)));} else if (list.get(i).equals(")")) { // 界限符")"tokenlist.add(new token(RP, list.get(i)));} else if (list.get(i).equals("[")) { // 界限符"["tokenlist.add(new token(LBT, list.get(i)));} else if (list.get(i).equals("]")) { // 界限符"]"tokenlist.add(new token(RBT, list.get(i)));} else if (list.get(i).equals("{")) { // 界限符"{"tokenlist.add(new token(LBS, list.get(i)));} else if (list.get(i).equals("}")) { // 界限符"}"tokenlist.add(new token(RBS, list.get(i)));} else if (list.get(i).equals(",")) { // 界限符","tokenlist.add(new token(COM, list.get(i)));} else if (list.get(i).equals(":")) { // 界限符":"tokenlist.add(new token(COL, list.get(i)));} else if (list.get(i).equals(";")) { // 界限符";"tokenlist.add(new token(SEM, list.get(i)));} else if (list.get(i).charAt(0) >= '0' && list.get(i).charAt(0) <= '9') { // 判断是否是一位数字常量tokenlist.add(new token(NUM, list.get(i)));} else if (isLetter(list.get(i).charAt(0))) { // 判断是否是一位字母标识符tokenlist.add(new token(ID, list.get(i)));} else { // 否则是无法识别的字符tokenlist.add(new token(ERROR, list.get(i)));errorNum++;}} else if ((list.get(i).charAt(0) >= '0' && list.get(i).charAt(0) <= '9')|| list.get(i).charAt(0) == '.') { // 判断是否是正确的常量if (!isNum(list.get(i))) { // 不是常量,则是无法识别的字符tokenlist.add(new token(ERROR, list.get(i)));errorNum++;} else if ((list.get(i+1).charAt(0) == '+' || list.get(i+1).charAt(0) == '-')&& isNum(list.get(i+2))) { // 判断是否是有符号的常量tokenlist.add(new token(NUM, list.get(i) + list.get(++i) + list.get(++i)));} else { // 否则是无符号的常量tokenlist.add(new token(NUM, list.get(i)));}} else if (isKeyID(list.get(i)) != 0) { // 判断是否为关键字tokenlist.add(new token(isKeyID(list.get(i)), list.get(i)));} else if (isLetter(list.get(i).charAt(0)) || list.get(i).charAt(0) == '_') { // 判断是否为标识符(以字母或者下划线开头)tokenlist.add(new token(ID, list.get(i)));} else { // 否则是无法识别的单词tokenlist.add(new token(ERROR, list.get(i)));errorNum++;}}}//判断是否为数字public static boolean isNum(String str){for (int i = str.length();--i>=0;){if (!Character.isDigit(str.charAt(i))){return false;}}return true;}//判断是否为关键字static int isKeyID(String s){int i;for(i=0;i<6;i++){if(s.equals(key[i])) {return i;}}return 0;}static boolean isLetter(char c){if(c>='a' && c<='z')return true;return false;}public static void main(String args[]) {readFileByLines();analyse();System.out.println(list);System.out.println("词法分析结果如下:\n<单词种别码,单词> //所属类别");for(int i=0;i ");if (tokenlist.get(i).key > 0 && tokenlist.get(i).key < 20) {System.out.println("//关键字");} else if (tokenlist.get(i).key == 20) {System.out.println("//标识符");} else if (tokenlist.get(i).key == 30) {System.out.println("//常量");} else if (tokenlist.get(i).key > 30 && tokenlist.get(i).key <= 40) {System.out.println("//运算符");} else if (tokenlist.get(i).key > 40 && tokenlist.get(i).key < 50) {System.out.println("//界限符");} else if (tokenlist.get(i).key == -1) {System.out.println("//无法识别的符号");}}System.out.println("词法分析结束!共" + errorNum + "个无法识别的符号");}
}
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
