From c2219325c9d6c6ca945040776095cefe4ed16b34 Mon Sep 17 00:00:00 2001 From: AlexVanin Date: Fri, 21 Feb 2014 15:02:01 +0400 Subject: [PATCH] Added first final version of Compiler for VaninVM --- CompilerVVM/ByteCode.cs | 456 ++++++++++++++++++++++++++ CompilerVVM/CodeGen.cs | 68 ++++ CompilerVVM/CustomExceptions.cs | 35 ++ CompilerVVM/Parser.cs | 558 ++++++++++++++++++++++++++++++++ CompilerVVM/ProcedureScanner.cs | 53 +++ CompilerVVM/Program.cs | 43 +++ CompilerVVM/TokenScanner.cs | 293 +++++++++++++++++ CompilerVVM/grammar.txt | 43 +++ 8 files changed, 1549 insertions(+) create mode 100644 CompilerVVM/ByteCode.cs create mode 100644 CompilerVVM/CodeGen.cs create mode 100644 CompilerVVM/CustomExceptions.cs create mode 100644 CompilerVVM/Parser.cs create mode 100644 CompilerVVM/ProcedureScanner.cs create mode 100644 CompilerVVM/Program.cs create mode 100644 CompilerVVM/TokenScanner.cs create mode 100644 CompilerVVM/grammar.txt diff --git a/CompilerVVM/ByteCode.cs b/CompilerVVM/ByteCode.cs new file mode 100644 index 0000000..7fb0dfc --- /dev/null +++ b/CompilerVVM/ByteCode.cs @@ -0,0 +1,456 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; + +namespace CompilerVVM +{ + class pair + { + public int start; + public int end; + public pair(int one, int two) + { + start = one; + end = two; + } + } + + class str_header + { + public byte[] signature + { get; set; } + public int version + { get; set; } + public int const_count + { get; set; } + public int size_const + { get; set; } + + public str_header() + { + signature = new byte[2]; + signature[0] = 0xBA; + signature[1] = 0xBA; + version = 0100; + } + + public byte[] Serialize() + { + using (MemoryStream m = new MemoryStream()) + { + using (BinaryWriter writer = new BinaryWriter(m)) + { + writer.Write(signature); + writer.Write(version); + writer.Write(const_count); + writer.Write(size_const); + } + return m.ToArray(); + } + } + } + + class funcH_common + { + public ushort start_id + { get; set; } + public int count_of_funcs + { get; set; } + + public funcH_common(ushort id, int count) + { + start_id = id; + count_of_funcs = count; + } + + public byte[] Serialize() + { + using (MemoryStream m = new MemoryStream()) + { + using (BinaryWriter writer = new BinaryWriter(m)) + { + writer.Write(start_id); + writer.Write(count_of_funcs); + + } + return m.ToArray(); + } + } + } + + class funcH_signature + { + public int size_func + { get; set; } + public int size_bytecode + { get; set; } + public int size_signature + { get; set; } + + public byte[] Serialize() + { + using (MemoryStream m = new MemoryStream()) + { + using (BinaryWriter writer = new BinaryWriter(m)) + { + writer.Write(size_func); + writer.Write(size_bytecode); + writer.Write(size_signature); + } + return m.ToArray(); + } + } + } + + class funcH_bytecode + { + public ushort id + { get; set; } + public int count_locals + { get; set; } + public int count_args + { get; set; } + + public byte[] Serialize() + { + using (MemoryStream m = new MemoryStream()) + { + using (BinaryWriter writer = new BinaryWriter(m)) + { + writer.Write(id); + writer.Write(count_locals); + writer.Write(count_args); + } + return m.ToArray(); + } + } + + public funcH_bytecode() + { + count_locals = 0; + count_args = 0; + } + } + + public class ByteCode + { + public static void GenerateByteCode(string[] code, string outname) + { + Dictionary ConstID_VALUE = new Dictionary(); + Dictionary ConstKEY_ID = new Dictionary(); + Dictionary CodeMARK_POS = new Dictionary(); + + List FuncPos = new List(); + + int[] pos_d = new int[2] { 0, 0 }; + string[] source = code; + List src = PositionAnalyse(source, ref pos_d, ref FuncPos, ref CodeMARK_POS); + + + using (var bw = new BinaryWriter(File.Open(outname, FileMode.OpenOrCreate))) + { + + HeaderAnalyse(src, pos_d, bw, ref ConstID_VALUE, ref ConstKEY_ID); + funcH_common FuncCommonH = new funcH_common(CRC16_alg("main"), FuncPos.Count); + bw.Write(FuncCommonH.Serialize()); + for (int i = 0; i < FuncPos.Count; i++) + { + FuncAnalyse(src, FuncPos[i], bw, ConstKEY_ID, CodeMARK_POS); + } + } + } + + private static List PositionAnalyse(string[] input, ref int[] posD, ref List posC, ref Dictionary marks) + { + List src = new List(); + bool func_flag = false; + int pos1 = 0, pos2 = 0, numline = 0; + foreach (string s in input) + if (s != "") + { + if (func_flag == true && Regex.IsMatch(s, @"\w+:")) + { + marks.Add(s.Trim(' ', '\t', ':'), numline); + } + else + { + src.Add(s.Trim(' ', '\t')); + + if (s.Contains(".data")) + posD[0] = src.Count - 1; + if (s.Contains(".endd")) + posD[1] = src.Count - 1; + + if (s.Contains(".proc")) + { + numline = 0; + pos1 = src.Count - 1; + func_flag = true; + } + + if (s.Contains(".endp")) + { + pos2 = src.Count - 1; + if (func_flag == true) + { + func_flag = false; + posC.Add(new pair(pos1, pos2)); + } + } + numline++; + } + } + return src; + } + private static void HeaderAnalyse(List src, int[] pos, BinaryWriter bw, ref Dictionary id_v, ref Dictionary k_id) + { + str_header ConstH = new str_header(); + string pattern = "\".*\""; + int j = 1; + for (int i = pos[0] + 1; i < pos[1]; i++) + { + int position = src[i].IndexOf(" "); + string key = src[i].Substring(0, position); + string value = Regex.Match(src[i], pattern).ToString().Trim('"').Replace(@"\n", "\n").Replace(@"\r", "\r") + "\0"; + id_v.Add(j, value); k_id.Add(key, j++); + ConstH.const_count++; ConstH.size_const += (value.Length); + } + bw.Write(ConstH.Serialize()); + for (int i = 1; i < j; i++) + { + bw.Write(Encoding.ASCII.GetBytes(id_v[i])); + } + } + private static void FuncAnalyse(List code, pair pos, BinaryWriter bw, Dictionary dictStr, Dictionary dictJmp) + { + string name = ""; + MemoryStream str = new MemoryStream(); + funcH_signature sign = new funcH_signature(); + funcH_bytecode bc = new funcH_bytecode(); + + + string[] current_str = code[pos.start].Split(' '); + switch (current_str.Length) + { + case 4: + bc.count_args = System.Convert.ToInt32(current_str[3]); + bc.count_locals = System.Convert.ToInt32(current_str[2]); + name = current_str[1]; + break; + case 3: + bc.count_locals = System.Convert.ToInt32(current_str[2]); + name = current_str[1]; + break; + + case 2: + name = current_str[1]; + break; + } + bc.id = CRC16_alg(name); + name += "\0"; + sign.size_signature = name.Length; + using (BinaryWriter writer = new BinaryWriter(str)) + { + int j = 1; + for (int i = pos.start + 1; i < pos.end; i++) + { + current_str = code[i].Split(' '); + opcode current_opc = (opcode)Enum.Parse(typeof(opcode), current_str[0].ToUpper()); + writer.Write((byte)current_opc); + + if (current_opc == opcode.DLOAD) + writer.Write(Convert.ToDouble(current_str[1])); + else if (current_opc == opcode.ILOAD) + writer.Write(Convert.ToInt64(current_str[1])); + else if (current_opc == opcode.SLOAD) + writer.Write((ushort)dictStr[current_str[1]]); + else if (current_opc == opcode.CALL) + writer.Write(CRC16_alg(current_str[1])); + else if (threebytes.Contains(current_opc)) + writer.Write(ushort.Parse(current_str[1])); + else if (fivebytes.Contains(current_opc)) + { + writer.Write(CRC16_alg(current_str[1])); + writer.Write(ushort.Parse(current_str[2])); + } + else if (jumps.Contains(current_opc)) + writer.Write(FindOffset(code, pos, j, ((ushort)dictJmp[current_str[1]] - j))); + j++; + } + } + + byte[] bcode = str.ToArray(); + sign.size_bytecode = bcode.Length; + sign.size_func = 22 + sign.size_bytecode + sign.size_signature; + + bw.Write(sign.Serialize()); + bw.Write(Encoding.ASCII.GetBytes(name)); + bw.Write(bc.Serialize()); + bw.Write(bcode); + } + + private static short FindOffset(List code, pair pos, int curr_pos, int off) + { + short result = 0; + if (off > 0) + { + for (int i = curr_pos + 1; i < curr_pos + off; i++) + { + result += OpCodeSize((opcode)Enum.Parse(typeof(opcode), code[pos.start + i].Split(' ')[0].ToUpper())); + } + } + else + { + for (int i = curr_pos; i >= curr_pos + off; i--) + { + result -= OpCodeSize((opcode)Enum.Parse(typeof(opcode), code[pos.start + i].Split(' ')[0].ToUpper())); + } + } + return result; + } + + private static short OpCodeSize(opcode opc) + { + short result = 0; + if (jumps.Contains(opc) || threebytes.Contains(opc)) + result += 3; + else if (fivebytes.Contains(opc)) + result += 5; + else if (ninebytes.Contains(opc)) + result += 9; + else result++; + return result; + + } + + private static ushort CRC16_alg(string msg) + { + byte[] text = Encoding.ASCII.GetBytes(msg); + const ushort polinom = 0xa001; + ushort code = 0xffff; + + for (int i = 0, size = text.Length; i < size; ++i) + { + code ^= (ushort)(text[i] << 8); + + for (uint j = 0; j < 8; ++j) + { + code >>= 1; + if ((code & 0x01) != 0) code ^= polinom; + } + } + + return code; + } + + /*static List onebyte = new List{opcode.INVALID, opcode.DLOAD0, opcode.ILOAD0, opcode.SLOAD0, opcode.DLOAD1, opcode.ILOAD1, opcode.DLOADM1, + opcode.ILOADM1, opcode.DADD, opcode.IADD, opcode.DSUB, opcode.ISUB, opcode.DMUL, opcode.IMUL, opcode.DDIV, opcode.IDIV, opcode.IMOD, opcode.DNEG, + opcode.INEG, opcode.IAOR, opcode.IAAND, opcode.IAXOR, opcode.IPRINT, opcode.DPRINT, opcode.SPRINT, opcode.I2D, opcode.D2I, opcode.S2I, opcode.SWAP, + opcode.POP, opcode.LOADDVAR0, opcode.LOADDVAR1, opcode.LOADDVAR2, opcode.LOADDVAR3, opcode.LOADIVAR0, opcode.LOADIVAR1, opcode.LOADIVAR2, opcode.LOADIVAR3, + opcode.LOADSVAR0, opcode.LOADSVAR1, opcode.LOADSVAR2, opcode.LOADSVAR3, opcode.STOREDVAR0, opcode.STOREDVAR1, opcode.STOREDVAR2, opcode.STOREDVAR3, + opcode.STOREIVAR0, opcode.STOREIVAR1, opcode.STOREIVAR2, opcode.STOREIVAR3, opcode.STORESVAR0, opcode.STORESVAR1, opcode.STORESVAR2, opcode.STORESVAR3, + opcode.ICMP, opcode.DCMP, opcode.DUMP, opcode.STOP, opcode.RETURN, opcode.BREAK};*/ + + static List ninebytes = new List { opcode.DLOAD, opcode.ILOAD }; + + static List threebytes = new List { opcode.LOADDVAR, opcode.LOADIVAR, opcode.LOADSVAR, opcode.STOREDVAR, + opcode.STOREIVAR, opcode.STORESVAR, opcode.SLOAD, opcode.CALL}; + + static List fivebytes = new List {opcode.LOADCTXDVAR, opcode.LOADCTXIVAR, opcode.LOADCTXSVAR, opcode.STORECTXDVAR, + opcode.STORECTXIVAR, opcode.STORECTXSVAR}; + + static List jumps = new List {opcode.JA, opcode.IFICMPE, opcode.IFICMPG, opcode.IFICMPGE, opcode.IFICMPL, + opcode.IFICMPLE, opcode.IFICMPNE}; + } + + enum opcode + { + INVALID, + DLOAD, + ILOAD, + SLOAD, + DLOAD0, + ILOAD0, + SLOAD0, + DLOAD1, + ILOAD1, + DLOADM1, + ILOADM1, + DADD, + IADD, + DSUB, + ISUB, + DMUL, + IMUL, + DDIV, + IDIV, + IMOD, + DNEG, + INEG, + IAOR, + IAAND, + IAXOR, + IPRINT, + DPRINT, + SPRINT, + I2D, + D2I, + S2I, + SWAP, + POP, + LOADDVAR0, + LOADDVAR1, + LOADDVAR2, + LOADDVAR3, + LOADIVAR0, + LOADIVAR1, + LOADIVAR2, + LOADIVAR3, + LOADSVAR0, + LOADSVAR1, + LOADSVAR2, + LOADSVAR3, + STOREDVAR0, + STOREDVAR1, + STOREDVAR2, + STOREDVAR3, + STOREIVAR0, + STOREIVAR1, + STOREIVAR2, + STOREIVAR3, + STORESVAR0, + STORESVAR1, + STORESVAR2, + STORESVAR3, + LOADDVAR, + LOADIVAR, + LOADSVAR, + STOREDVAR, + STOREIVAR, + STORESVAR, + LOADCTXDVAR, + LOADCTXIVAR, + LOADCTXSVAR, + STORECTXDVAR, + STORECTXIVAR, + STORECTXSVAR, + DCMP, + ICMP, + JA, + IFICMPNE, + IFICMPE, + IFICMPG, + IFICMPGE, + IFICMPL, + IFICMPLE, + DUMP, + STOP, + CALL, + RETURN, + BREAK + }; +} diff --git a/CompilerVVM/CodeGen.cs b/CompilerVVM/CodeGen.cs new file mode 100644 index 0000000..dea005a --- /dev/null +++ b/CompilerVVM/CodeGen.cs @@ -0,0 +1,68 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace CompilerVVM +{ + class CodeGen + { + Dictionary ConstData; + Dictionary Methods; + public string[] asm { get; set; } + + + public CodeGen(Dictionary ConstData, Dictionary Methods) + { + this.ConstData = ConstData; + this.Methods = Methods; + asm = null; + + foreach (KeyValuePair p in Methods) + { + Optimization(p.Value); + } + + + FillAsmCode(); + } + + private void FillAsmCode() + { + List Code = new List(); + if (ConstData.Count > 0) + { + Code.Add(".data"); + foreach (KeyValuePair pair in ConstData) + { + Code.Add(string.Format("{0} \"{1}\"", pair.Value, pair.Key)); + } + Code.Add(".endd"); + } + + foreach (KeyValuePair entry in Methods) + { + for (int i = 0; i < entry.Value.Code.Count; i++) + Code.Add(entry.Value.Code[i]); + } + + asm = Code.ToArray(); + } + + private void Optimization(MethodAtom Method) + { + List OptimizationSub = new List(){"LOADDVAR", "LOADIVAR", "LOADSVAR", "STOREDVAR", "STOREIVAR", + "STORESVAR"}; + + for (int i = 0; i < Method.Code.Count; i++ ) + { + string[] parts = Method.Code[i].Split(' '); + if (OptimizationSub.Contains(parts[0]) && int.Parse(parts[1]) < 4) + { + Method.Code[i] = Method.Code[i].Replace(" ", ""); + } + } + } + } +} diff --git a/CompilerVVM/CustomExceptions.cs b/CompilerVVM/CustomExceptions.cs new file mode 100644 index 0000000..9a4fff8 --- /dev/null +++ b/CompilerVVM/CustomExceptions.cs @@ -0,0 +1,35 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace CompilerVVM +{ + public class ScannerException : Exception + { + public ScannerException(string message) + : base(message) + { + ; + } + } + + public class ProcedureException : Exception + { + public ProcedureException(string message) + : base(message) + { + ; + } + } + + public class ParserException : Exception + { + public ParserException(string message) + : base(message) + { + ; + } + } +} diff --git a/CompilerVVM/Parser.cs b/CompilerVVM/Parser.cs new file mode 100644 index 0000000..614d62d --- /dev/null +++ b/CompilerVVM/Parser.cs @@ -0,0 +1,558 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace CompilerVVM +{ + public class Parser + { + int pointer; + IList Tokens; + int idvar; + Dictionary Constants; + Dictionary Methods; + int jumpid; + + public Parser(IList tokens, Dictionary TextConst, Dictionary Methods) + { + Tokens = tokens; + pointer = 0; + jumpid = 0; + Constants = TextConst; + this.Methods = Methods; + ParseMethod(); + } + + private void ParseMethod() + { + // := + + while (pointer != Tokens.Count) + { + // := method (*) + // := int | double | string + // := | void + MethodAtom method = null; + if (!Tokens[pointer++].Equals("method")) + throw new ParserException("No method detected"); + else + { + method = Methods[Tokens[++pointer].ToString()]; + idvar = method.NumOfParams; + method.Code.Add(String.Format(".proc {0}", method.Name, method.NumOfParams)); + + pointer += 3 + (2 * method.NumOfParams); + if (Tokens[pointer].Equals(OP.OpenBlock)) + { + pointer++; + ParseBlock(method); + } + else throw new ParserException("No code block in method: " + method.Name); + + if (method.Name == "main") + method.Code.Add("STOP"); + else if (method.Type == "void") + method.Code.Add("RETURN"); + else + { + if (method.Code[method.Code.Count - 1] != "RETURN") + throw new ParserException("No return sequence in method:" + method.Name); + } + method.Code.Add(".endp"); + + method.NumOfLocals = method.Variables.Count; + method.Code[0] = String.Format(".proc {0} {1} {2}", method.Name, method.NumOfLocals, method.NumOfParams); + } + } + + } + + private void ParseBlock(MethodAtom method) + { + while (!Tokens[pointer].Equals(OP.CloseBlock)) + { + if (Tokens[pointer].Equals(OP.Semicolon)) + { + pointer++; + } + else if (code_types.Contains(Tokens[pointer].ToString())) + DeclareVar(method); + else if (Tokens[pointer + 1].Equals(OP.Assigment)) + { + List NeededVars = method.Variables.FindAll(x => x.Name == Tokens[pointer].ToString()); + if (NeededVars.Count == 0) + throw new ParserException(string.Format("Variable {0} is not defined", Tokens[pointer].ToString())); + else + { + pointer += 2; + ParseExpression(method, NeededVars[NeededVars.Count - 1].Type); + switch (NeededVars[NeededVars.Count - 1].Type) + { + case "int": + method.Code.Add(string.Format("STOREIVAR {0}", NeededVars[NeededVars.Count - 1].ID)); + break; + case "double": + method.Code.Add(string.Format("STOREDVAR {0}", NeededVars[NeededVars.Count - 1].ID)); + break; + case "string": + method.Code.Add(string.Format("STORESVAR {0}", NeededVars[NeededVars.Count - 1].ID)); + break; + } + } + } + else if (Tokens[pointer].ToString() == "return") + { + ParseSingleExpr(method, pointer + 1, method.Type); + method.Code.Add("RETURN"); + pointer += 3; + } + else if (Tokens[pointer].ToString() == "if") + { + if (!Tokens[pointer + 1].Equals(OP.OpenParam) || !Tokens[pointer + 5].Equals(OP.CloseParam)) + throw new ParserException("Wrong if statement"); + string var1 = GetTypeOfVar(method, pointer + 2); + string var2 = GetTypeOfVar(method, pointer + 4); + if (var1 != var2) + throw new ParserException("Incompatible variable types in if statement"); + int jump = jumpid++; + ParseSingleExpr(method, pointer + 4, var2); + ParseSingleExpr(method, pointer + 2, var1); + + switch ((OP)Tokens[pointer + 3]) + { + case OP.Equal: + method.Code.Add(string.Format("IFICMPNE jump_{0}", jump)); + break; + case OP.NotEqual: + method.Code.Add(string.Format("IFICMPE jump_{0}", jump)); + break; + case OP.Greater: + method.Code.Add(string.Format("IFICMPLE jump_{0}", jump)); + break; + case OP.GreaterEqual: + method.Code.Add(string.Format("IFICMPL jump_{0}", jump)); + break; + case OP.Less: + method.Code.Add(string.Format("IFICMPGE jump_{0}", jump)); + break; + case OP.LessEqual: + method.Code.Add(string.Format("IFICMPG jump_{0}", jump)); + break; + } + method.Code.Add("POP"); + method.Code.Add("POP"); + if (!Tokens[pointer + 6].Equals(OP.OpenBlock)) + throw new ParserException("No { code } in if statement"); + pointer += 7; + ParseBlock(method); + if (Tokens[pointer].ToString() != "else") + { + method.Code.Add(string.Format("jump_{0}:", jump)); + method.Code.Add("POP"); + method.Code.Add("POP"); + } + else + { + int jump2 = jumpid++; + method.Code.Add(string.Format("JA jump_{0}", jump2)); + if (!Tokens[pointer + 1].Equals(OP.OpenBlock)) + throw new ParserException("No { code } in else statement"); + method.Code.Add(string.Format("jump_{0}:", jump)); + method.Code.Add("POP"); + method.Code.Add("POP"); + pointer += 2; + ParseBlock(method); + method.Code.Add(string.Format("jump_{0}:", jump2)); + } + } + else if (Tokens[pointer].ToString() == "while") + { + if (!Tokens[pointer + 1].Equals(OP.OpenParam) || !Tokens[pointer + 5].Equals(OP.CloseParam)) + throw new ParserException("Wrong while statement"); + string var1 = GetTypeOfVar(method, pointer + 2); + string var2 = GetTypeOfVar(method, pointer + 4); + if (var1 != var2) + throw new ParserException("Incompatible variable types in while statement"); + int jump = jumpid++; + int jump2 = jumpid++; + + method.Code.Add(string.Format("jump_{0}:", jump)); + ParseSingleExpr(method, pointer + 4, var2); + ParseSingleExpr(method, pointer + 2, var1); + + switch ((OP)Tokens[pointer + 3]) + { + case OP.Equal: + method.Code.Add(string.Format("IFICMPNE jump_{0}", jump2)); + break; + case OP.NotEqual: + method.Code.Add(string.Format("IFICMPE jump_{0}", jump2)); + break; + case OP.Greater: + method.Code.Add(string.Format("IFICMPLE jump_{0}", jump2)); + break; + case OP.GreaterEqual: + method.Code.Add(string.Format("IFICMPL jump_{0}", jump2)); + break; + case OP.Less: + method.Code.Add(string.Format("IFICMPGE jump_{0}", jump2)); + break; + case OP.LessEqual: + method.Code.Add(string.Format("IFICMPG jump_{0}", jump2)); + break; + } + method.Code.Add("POP"); + method.Code.Add("POP"); + if (!Tokens[pointer + 6].Equals(OP.OpenBlock)) + throw new ParserException("No { code } in while statement"); + pointer += 7; + ParseBlock(method); + method.Code.Add(string.Format("JA jump_{0}", jump)); + method.Code.Add(string.Format("jump_{0}:", jump2)); + } + else if (Tokens[pointer].ToString() == "do") + { + int jump = jumpid++; + int jump2 = jumpid++; + method.Code.Add(string.Format("JA jump_{0}", jump2)); + method.Code.Add(string.Format("jump_{0}:", jump)); + method.Code.Add("POP"); + method.Code.Add("POP"); + method.Code.Add(string.Format("jump_{0}:", jump2)); + pointer += 2; + ParseBlock(method); + + if (!Tokens[pointer + 1].Equals(OP.OpenParam) || !Tokens[pointer + 5].Equals(OP.CloseParam) || + Tokens[pointer].ToString() != "until") + throw new ParserException("Wrong until statement"); + string var1 = GetTypeOfVar(method, pointer + 2); + string var2 = GetTypeOfVar(method, pointer + 4); + if (var1 != var2) + throw new Exception("Incompatible variable types in until statement"); + + ParseSingleExpr(method, pointer + 4, var2); + ParseSingleExpr(method, pointer + 2, var1); + + switch ((OP)Tokens[pointer + 3]) + { + case OP.Equal: + method.Code.Add(string.Format("IFICMPNE jump_{0}", jump)); + break; + case OP.NotEqual: + method.Code.Add(string.Format("IFICMPE jump_{0}", jump)); + break; + case OP.Greater: + method.Code.Add(string.Format("IFICMPLE jump_{0}", jump)); + break; + case OP.GreaterEqual: + method.Code.Add(string.Format("IFICMPL jump_{0}", jump)); + break; + case OP.Less: + method.Code.Add(string.Format("IFICMPGE jump_{0}", jump)); + break; + case OP.LessEqual: + method.Code.Add(string.Format("IFICMPG jump_{0}", jump)); + break; + } + method.Code.Add("POP"); + method.Code.Add("POP"); + pointer += 7; + } + else if (Tokens[pointer].ToString() == "print") + { + pointer += 2; + string type = GetTypeOfVar(method, pointer); + ParseSingleExpr(method, pointer, type); + pointer += 3; + switch (type) + { + case "int": + method.Code.Add("IPRINT"); + break; + case "double": + method.Code.Add("DPRINT"); + break; + case "string": + method.Code.Add("SPRINT"); + break; + } + } + else if (Tokens[pointer+1].Equals(OP.OpenParam)) + { + ParseCall(method); + } + } + pointer++; + } + private void ParseExpression(MethodAtom method, string type) + { + if (Tokens[pointer + 1].Equals(OP.Semicolon)) + { + ParseSingleExpr(method, pointer, type); + pointer += 2; + } + else if (Tokens[pointer + 1].Equals(OP.OpenParam)) + { + ParseCall(method, type); + } + else if (Tokens[pointer].Equals(OP.Sub)) + { + ParseSingleExpr(method, pointer + 1, type); + switch (type) + { + case "int": + method.Code.Add("INEG"); + break; + case "double": + method.Code.Add("DNEG"); + break; + default: + throw new ParserException("Incompatible types"); + } + } + else if (Tokens[pointer + 1].Equals(OP.Add)) + { + int prevpointer = pointer; + pointer += 2; + + ParseExpression(method, type); + ParseSingleExpr(method, prevpointer, type); + + switch (type) + { + case "int": + method.Code.Add("IADD"); + break; + case "double": + method.Code.Add("DADD"); + break; + default: + throw new ParserException("Incompatible types"); + } + } + else if (Tokens[pointer + 1].Equals(OP.Sub)) + { + int prevpointer = pointer; + pointer += 2; + + ParseExpression(method, type); + ParseSingleExpr(method, prevpointer, type); + + switch (type) + { + case "int": + method.Code.Add("ISUB"); + break; + case "double": + method.Code.Add("DSUB"); + break; + default: + throw new ParserException("Incompatible types"); + } + } + else if (Tokens[pointer + 1].Equals(OP.Mul)) + { + int prevpointer = pointer; + pointer += 2; + + ParseExpression(method, type); + ParseSingleExpr(method, prevpointer, type); + + switch (type) + { + case "int": + method.Code.Add("IMUL"); + break; + case "double": + method.Code.Add("DMUL"); + break; + default: + throw new ParserException("Incompatible types"); + } + } + else if (Tokens[pointer + 1].Equals(OP.Div)) + { + int prevpointer = pointer; + pointer += 2; + + ParseExpression(method, type); + ParseSingleExpr(method, prevpointer, type); + + switch (type) + { + case "int": + method.Code.Add("IDIV"); + break; + case "double": + method.Code.Add("DDIV"); + break; + default: + throw new ParserException("Incompatible types"); + } + } + + else if (Tokens[pointer + 1].Equals(OP.Mod)) + { + int prevpointer = pointer; + pointer += 2; + + ParseExpression(method, type); + ParseSingleExpr(method, prevpointer, type); + + switch (type) + { + case "int": + method.Code.Add("IMOD"); + break; + default: + throw new ParserException("Incompatible types"); + } + } + } + + private void ParseCall(MethodAtom method, string type = null) + { + if (Methods.ContainsKey(Tokens[pointer].ToString())) + { + MethodAtom CallMethod = Methods[Tokens[pointer].ToString()]; + if (type != null && type != CallMethod.Type) + throw new ParserException("Incompatible types when call method" + CallMethod.Name); + + pointer += 2; + List param = new List(); + while (!Tokens[pointer++].Equals(OP.CloseParam)) + { + param.Add(Tokens[pointer]); + } + + if (param.Count != CallMethod.NumOfParams) + throw new ParserException("Wrong params when call method" + CallMethod.Name); + + for (int i = 0; i < param.Count; i++) + { + ParseSingleExpr(method, pointer - 2 - i, CallMethod.Variables[i].Type); + } + + method.Code.Add(string.Format("CALL {0}", CallMethod.Name)); + + } + else throw new ParserException("Undefined method to call"); + } + + private void ParseSingleExpr(MethodAtom method, int pointer, string type) + { + if (Tokens[pointer] is StringBuilder) + { + if (type == "string") + method.Code.Add(string.Format("SLOAD {0}", Constants[Tokens[pointer++].ToString()])); + else + throw new ParserException("Incompatible type"); + } + else if (Tokens[pointer] is Number) + { + switch (type) + { + case "int": + if (!Tokens[pointer].ToString().Contains(".")) + { + method.Code.Add(string.Format("ILOAD {0}", Tokens[pointer++].ToString())); + break; + } + else throw new ParserException("Incompatible type"); + + case "double": + method.Code.Add(string.Format("DLOAD {0}", Tokens[pointer++].ToString())); + break; + } + } + else + { + List NeededVars = method.Variables.FindAll(x => x.Name == Tokens[pointer].ToString()); + if (NeededVars.Count != 0) + { + if (NeededVars[NeededVars.Count - 1].Type != type) + throw new ParserException("Incompatible type"); + else + { + switch (type) + { + case "int": + method.Code.Add(string.Format("LOADIVAR {0}", NeededVars[NeededVars.Count - 1].ID)); + break; + case "double": + method.Code.Add(string.Format("LOADDVAR {0}", NeededVars[NeededVars.Count - 1].ID)); + break; + case "string": + method.Code.Add(string.Format("LOADSVAR {0}", NeededVars[NeededVars.Count - 1].ID)); + break; + } + } + } + else throw new ParserException("Can't parse sequence"); + } + } + + private void DeclareVar(MethodAtom method) + { + method.Variables.Add(new Variable(idvar++, Tokens[pointer].ToString(), Tokens[pointer + 1].ToString())); + if (Tokens[pointer + 2].Equals(OP.Semicolon)) + { + pointer += 3; return; + } + else if (Tokens[pointer + 2].Equals(OP.Assigment)) + { + pointer += 3; + ParseExpression(method, method.Variables[idvar - 1].Type); + switch (method.Variables[idvar - 1].Type) + { + case "int": + method.Code.Add(string.Format("STOREIVAR {0}", idvar - 1)); + break; + + case "double": + method.Code.Add(string.Format("STOREDVAR {0}", idvar - 1)); + break; + + case "string": + method.Code.Add(string.Format("STORESVAR {0}", idvar - 1)); + break; + } + + if (Tokens[pointer].Equals(OP.Semicolon)) + { + pointer++; return; + } + } + else + throw new ParserException("Wrong variable defenition"); + } + + private string GetTypeOfVar(MethodAtom method, int pointer) + { + string result = null; + if (Tokens[pointer] is StringBuilder) + result = "string"; + else if (Tokens[pointer] is Number) + { + if (Tokens[pointer].ToString().Contains(".")) + result = "double"; + else result = "int"; + } + else if (Methods.ContainsKey(Tokens[pointer].ToString())) + { + result = Methods[Tokens[pointer].ToString()].Type; + } + else + { + List isVariable = method.Variables.FindAll(x => x.Name == Tokens[pointer].ToString()); + if (isVariable.Count == 0) + throw new ParserException("No defined variable with name "+Tokens[pointer].ToString()); + else result = isVariable[isVariable.Count - 1].Type; + } + return result; + } + + List code_types = new List() { "int", "double", "string" }; + } +} diff --git a/CompilerVVM/ProcedureScanner.cs b/CompilerVVM/ProcedureScanner.cs new file mode 100644 index 0000000..bd16810 --- /dev/null +++ b/CompilerVVM/ProcedureScanner.cs @@ -0,0 +1,53 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace CompilerVVM +{ + class ProcedureScanner + { + public Dictionary Methods { get; set; } + int pointer; + + public ProcedureScanner(IList Tokens) + { + // TODO: Complete member initialization + pointer = 0; + ScanForMethods(Tokens); + } + + private void ScanForMethods(IList Tokens) + { + Methods = new Dictionary(); + while (pointer != Tokens.Count) + { + MethodAtom method = null; + if (Tokens[pointer++].Equals("method")) + { + if (!Tokens[pointer].Equals("void") && !Tokens[pointer].Equals("int") && + !Tokens[pointer].Equals("double") && !Tokens[pointer].Equals("string")) + { + throw new ProcedureException("Wrong method defenition"); + } + int idvar = 0; + method = new MethodAtom(Tokens[++pointer].ToString()); + method.Type = Tokens[pointer++ - 1].ToString(); + if (!Tokens[pointer++].Equals(OP.OpenParam)) + throw new ProcedureException("Wrong method defenition of method: "+method.Name); + else + { + while (!Tokens[pointer].Equals(OP.CloseParam)) + { + Variable a = new Variable(idvar++, Tokens[pointer].ToString(), Tokens[pointer + 1].ToString()); + method.Variables.Add(a); + method.NumOfParams++; + pointer += 2; + } + } + Methods.Add(method.Name, method); + } + } + } + } +} diff --git a/CompilerVVM/Program.cs b/CompilerVVM/Program.cs new file mode 100644 index 0000000..1226d5a --- /dev/null +++ b/CompilerVVM/Program.cs @@ -0,0 +1,43 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace CompilerVVM +{ + class Program + { + static void Main(string[] args) + { + if (args.Length != 1) + { + Console.WriteLine("No input file"); + return; + } + try + { + TokenScanner scanner = null; + ProcedureScanner procscanner = null; + Parser parser = null; + CodeGen code = null; + Dictionary TextConst = new Dictionary(); + using (System.IO.TextReader input = System.IO.File.OpenText(args[0])) + { + scanner = new TokenScanner(input, TextConst); + procscanner = new ProcedureScanner(scanner.Tokens); + parser = new Parser(scanner.Tokens, TextConst, procscanner.Methods); + + code = new CodeGen(TextConst, procscanner.Methods); + ByteCode.GenerateByteCode(code.asm, args[0] + ".vvm"); + return; + } + } + catch (Exception e) + { + Console.Error.WriteLine(e.Message); + } + + } + } +} diff --git a/CompilerVVM/TokenScanner.cs b/CompilerVVM/TokenScanner.cs new file mode 100644 index 0000000..841e3f1 --- /dev/null +++ b/CompilerVVM/TokenScanner.cs @@ -0,0 +1,293 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.IO; +using System.Globalization; + +namespace CompilerVVM +{ + class TokenScanner + { + int idconst = 1; + private IList tokens; + private IList list; + public IList Tokens { get { return tokens; } } + private void Scan(System.IO.TextReader input, Dictionary dict) + { + while (input.Peek() != -1) + { + char ch = (char)input.Peek(); + + if (char.IsWhiteSpace(ch)) + { + input.Read(); + } + + else if (char.IsLetter(ch) || ch == '_') + { + StringBuilder accum = new StringBuilder(); + + while (char.IsLetter(ch) || ch == '_' || char.IsNumber(ch)) + { + accum.Append(ch); + input.Read(); + + if (input.Peek() == -1) + { + break; + } + else + { + ch = (char)input.Peek(); + } + } + this.tokens.Add(accum.ToString()); + } + else if (ch == '"') + { + StringBuilder accum = new StringBuilder(); + + input.Read(); + + if (input.Peek() == -1) + { + throw new ScannerException("Unterminated string"); + } + + while ((ch = (char)input.Peek()) != '"') + { + accum.Append(ch); + input.Read(); + + if (input.Peek() == -1) + { + throw new ScannerException("Unterminated string"); + } + } + + input.Read(); + dict.Add(accum.ToString(), string.Format("text_const_{0}", idconst++)); + this.tokens.Add(accum); + } + else if (char.IsDigit(ch)) + { + StringBuilder accum = new StringBuilder(); + + while (char.IsDigit(ch) || ch == '.') + { + accum.Append(ch); + input.Read(); + + if (input.Peek() == -1) + { + break; + } + else + { + ch = (char)input.Peek(); + } + } + this.tokens.Add(new Number(accum.ToString())); + } + else if (ch == '(') + { + this.tokens.Add(OP.OpenParam); + input.Read(); + + if (input.Peek() == -1) + { + throw new ScannerException("Unterminated parameter section"); + } + + while ((ch = (char)input.Peek()) != ')') + { + if (char.IsLetter(ch) || ch == '_') + { + StringBuilder accum = new StringBuilder(); + + while (char.IsLetter(ch) || ch == '_') + { + accum.Append(ch); + input.Read(); + + if (input.Peek() == -1) + { + break; + } + else + { + ch = (char)input.Peek(); + } + } + this.tokens.Add(accum.ToString()); + } + else if (char.IsDigit(ch)) + { + StringBuilder accum = new StringBuilder(); + + while (char.IsDigit(ch) || ch == '.') + { + accum.Append(ch); + input.Read(); + + if (input.Peek() == -1) + { + break; + } + else + { + ch = (char)input.Peek(); + } + } + this.tokens.Add(new Number(accum.ToString())); + } + else if (ch == '"') + { + StringBuilder accum = new StringBuilder(); + + input.Read(); + + if (input.Peek() == -1) + { + throw new ScannerException("Unterminated string"); + } + + while ((ch = (char)input.Peek()) != '"') + { + accum.Append(ch); + input.Read(); + + if (input.Peek() == -1) + { + throw new ScannerException("Unterminated string"); + } + } + + input.Read(); + dict.Add(accum.ToString(), string.Format("text_const_{0}", idconst++)); + this.tokens.Add(accum); + } + else if (char.IsWhiteSpace(ch) || ch == ',') + input.Read(); + else switch (ch) + { + case '=': + input.Read(); + if ((char)input.Peek() == '=') + { + input.Read(); + this.tokens.Add(OP.Equal); + } + else this.tokens.Add(OP.Assigment); + break; + case '<': + input.Read(); + if ((char)input.Peek() == '=') + { + input.Read(); + this.tokens.Add(OP.LessEqual); + } + else this.tokens.Add(OP.Less); + break; + case '>': + input.Read(); + if ((char)input.Peek() == '=') + { + input.Read(); + this.tokens.Add(OP.GreaterEqual); + } + else this.tokens.Add(OP.Greater); + break; + case '!': + input.Read(); + if ((char)input.Peek() == '=') + { + input.Read(); + this.tokens.Add(OP.NotEqual); + } + else throw new Exception("!"); + break; + } + } + this.tokens.Add(OP.CloseParam); + input.Read(); + } + else switch (ch) + { + case ';': + input.Read(); + this.tokens.Add(OP.Semicolon); + break; + case '{': + input.Read(); + this.tokens.Add(OP.OpenBlock); + break; + case '}': + input.Read(); + this.tokens.Add(OP.CloseBlock); + break; + case '+': + input.Read(); + this.tokens.Add(OP.Add); + break; + case '-': + input.Read(); + this.tokens.Add(OP.Sub); + break; + case '*': + input.Read(); + this.tokens.Add(OP.Mul); + break; + case '/': + input.Read(); + this.tokens.Add(OP.Div); + break; + case '%': + input.Read(); + this.tokens.Add(OP.Mod); + break; + case '=': + input.Read(); + if ((char)input.Peek() == '=') + { + input.Read(); + this.tokens.Add(OP.Equal); + } + else this.tokens.Add(OP.Assigment); + break; + default: + throw new ScannerException("Scanner encountered unrecognized character '" + ch + "'"); + } + } + } + + public TokenScanner(TextReader input, Dictionary TextConstant) + { + tokens = new List(); + this.Scan(input, TextConstant); + } + } + + enum OP + { + Semicolon, + OpenBlock, + CloseBlock, + OpenParam, + CloseParam, + Assigment, + Equal, + NotEqual, + Less, + Greater, + LessEqual, + GreaterEqual, + Add, + Sub, + Mul, + Div, + Mod + } +} diff --git a/CompilerVVM/grammar.txt b/CompilerVVM/grammar.txt new file mode 100644 index 0000000..ca91cc7 --- /dev/null +++ b/CompilerVVM/grammar.txt @@ -0,0 +1,43 @@ + := + + + := method (* ) + + := int | double | string + := | void + + := * + := | + := 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 + + := + + := . + := " ? " + + := + + := ; + | = ; + | ; + | = ; + | if + | if else + | while + | do until ; + + := + := < | > | == | <= | >= + + := + | + | + | + | + | + | + + := + | - | * | / + + + + +