Minor changes: more comments, changed pattern for constant matching

This commit is contained in:
Alex Vanin 2015-02-16 18:26:06 +08:00
parent c2219325c9
commit 6ccb8b56fb

View file

@ -140,33 +140,34 @@ namespace AssemblerVVM
{ {
static int Main(string[] args) static int Main(string[] args)
{ {
/*
*
*/
if (args.Length < 1) if (args.Length < 1)
{ {
Console.WriteLine("No input files"); Console.WriteLine("No input files");
return 1; return 1;
} }
Dictionary<int, string> ConstID_VALUE = new Dictionary<int, string>(); Dictionary<int, string> ConstID_VALUE = new Dictionary<int, string>();
Dictionary<string, int> ConstKEY_ID = new Dictionary<string, int>(); Dictionary<string, int> ConstKEY_ID = new Dictionary<string, int>();
Dictionary<string, int> CodeMARK_POS = new Dictionary<string, int>(); Dictionary<string, int> CodeMARK_POS = new Dictionary<string, int>();
List<pair> FuncPos = new List<pair>(); List<pair> FuncPos = new List<pair>(); //This array defines first and last line of every procedure
string outname = args[0]+".vvm"; string outname = args[0].Replace(".vasm","")+".vvm";
if (args.Length > 1) if (args.Length > 1)
outname = args[1]; outname = args[1];
int[] pos_d = new int[2] { 0, 0 }; //This array defines first and last line of data segment
string[] source = System.IO.File.ReadAllLines(args[0]); //Array of source code
int[] pos_d = new int[2] { 0, 0 };
string[] source = System.IO.File.ReadAllLines(args[0]);
List<string> src = PositionAnalyse(source, ref pos_d, ref FuncPos, ref CodeMARK_POS); List<string> src = PositionAnalyse(source, ref pos_d, ref FuncPos, ref CodeMARK_POS);
using (var bw = new BinaryWriter(File.Open(outname, FileMode.OpenOrCreate))) //Writing into a file with bytecode
using (var bw = new BinaryWriter(File.Open(outname, FileMode.OpenOrCreate)))
{ {
HeaderAnalyse(src, pos_d, bw, ref ConstID_VALUE, ref ConstKEY_ID);
HeaderAnalyse(src, pos_d, bw, ref ConstID_VALUE, ref ConstKEY_ID); funcH_common FuncCommonH = new funcH_common(CRC16_alg("main"), FuncPos.Count); //We define there that start procedure calls "main"
funcH_common FuncCommonH = new funcH_common(CRC16_alg("main"), FuncPos.Count); bw.Write(FuncCommonH.Serialize()); //We also define there number of procedures in our code
bw.Write(FuncCommonH.Serialize());
for (int i = 0; i < FuncPos.Count; i++) for (int i = 0; i < FuncPos.Count; i++)
{ {
FuncAnalyse(src, FuncPos[i], bw, ConstKEY_ID, CodeMARK_POS); FuncAnalyse(src, FuncPos[i], bw, ConstKEY_ID, CodeMARK_POS);
@ -178,13 +179,22 @@ namespace AssemblerVVM
static List<string> PositionAnalyse(string[] input, ref int[] posD, ref List<pair> posC, ref Dictionary<string, int> marks) static List<string> PositionAnalyse(string[] input, ref int[] posD, ref List<pair> posC, ref Dictionary<string, int> marks)
{ {
/*
* This function fills arrays that define positions of functions and data in source text
* Return value - is an array of source text without comments, empty strings etc
* ---
* input - is an array of source text
* posD - is an array of start and end position of data segment
* posC - is an array of start and end position of every procedure
* marks - is an array of labes
*/
List<string> src = new List<string>(); List<string> src = new List<string>();
bool func_flag = false; bool func_flag = false;
int pos1 = 0, pos2 = 0, numline = 0; int pos1 = 0, pos2 = 0, numline = 0;
foreach (string s in input) foreach (string s in input)
if (s != "") if (s != "") //Skip empty strings
{ {
if (func_flag == true && Regex.IsMatch(s, @"\w+:")) if (func_flag == true && Regex.IsMatch(s, @"\w+:")) //Labels cannot be outside of procedure
{ {
marks.Add(s.Trim(' ', '\t', ':'), numline); marks.Add(s.Trim(' ', '\t', ':'), numline);
} }
@ -192,12 +202,12 @@ namespace AssemblerVVM
{ {
src.Add(s.Trim(' ', '\t')); src.Add(s.Trim(' ', '\t'));
if (s.Contains(".data")) if (s.Contains(".data")) //Checking data segment
posD[0] = src.Count - 1; posD[0] = src.Count - 1;
if (s.Contains(".endd")) if (s.Contains(".endd"))
posD[1] = src.Count - 1; posD[1] = src.Count - 1;
if (s.Contains(".proc")) if (s.Contains(".proc")) //Checking procedure segment
{ {
numline = 0; numline = 0;
pos1 = src.Count - 1; pos1 = src.Count - 1;
@ -218,18 +228,29 @@ namespace AssemblerVVM
} }
return src; return src;
} }
static void HeaderAnalyse(List<string> src, int[] pos, BinaryWriter bw, ref Dictionary<int, string> id_v, ref Dictionary<string, int> k_id) static void HeaderAnalyse(List<string> src, int[] pos, BinaryWriter bw, ref Dictionary<int, string> id_v, ref Dictionary<string, int> k_id)
{ {
str_header ConstH = new str_header(); /*
string pattern = "\".*\""; * This function creates bytecode header. Header contains signature, version, text constants and their size
* ---
* src - clear source text
* pos - position of data segment in source text
* bw - writer to a file
* id_v, k_id - dictionaries for text constatns
*/
str_header ConstH = new str_header(); //Object that stores all text consntants.
string pattern = "\".*\""; //Pattern to take text constants (to delete)
string pattern_adv = "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\""; //Advanced patternn
int j = 1; int j = 1;
for (int i = pos[0] + 1; i < pos[1]; i++) for (int i = pos[0] + 1; i < pos[1]; i++) //pos[0] = .data ; pos[0]+1 = first text const
{ {
int position = src[i].IndexOf(" "); int position = src[i].IndexOf(" ");
string key = src[i].Substring(0, position); string key = src[i].Substring(0, position);
string value = Regex.Match(src[i], pattern).ToString().Trim('"').Replace(@"\n", "\n").Replace(@"\r", "\r") + "\0"; string value = Regex.Match(src[i], pattern_adv).ToString();
id_v.Add(j, value); k_id.Add(key, j++); value = value.Substring(1, value.Length-2).Replace(@"\n", "\n").Replace(@"\r", "\r").Replace("\\\"","\"") + "\0";
ConstH.const_count++; ConstH.size_const += (value.Length); id_v.Add(j, value); k_id.Add(key, j++); //All contstants have their numeric equivalent, so we store both.
ConstH.const_count++; ConstH.size_const += (value.Length); //Defining total size of constants
} }
bw.Write(ConstH.Serialize()); bw.Write(ConstH.Serialize());
for (int i = 1; i < j; i++) for (int i = 1; i < j; i++)
@ -237,32 +258,41 @@ namespace AssemblerVVM
bw.Write(Encoding.ASCII.GetBytes(id_v[i])); bw.Write(Encoding.ASCII.GetBytes(id_v[i]));
} }
} }
static void FuncAnalyse(List<string> code, pair pos, BinaryWriter bw, Dictionary<string, int> dictStr, Dictionary<string, int> dictJmp) static void FuncAnalyse(List<string> code, pair pos, BinaryWriter bw, Dictionary<string, int> dictStr, Dictionary<string, int> dictJmp)
{ {
/*
* This function writes an actual procedure in bytecode.
* It will decode text-name of instructio into bytecode aswell as agruments for instruction
* ---
* code - clear source code
* pos - position of procedures
* dicStr - dictionary for text constants
* dictJmp - dictionary for every jump
*/
string name = ""; string name = "";
MemoryStream str = new MemoryStream(); MemoryStream str = new MemoryStream();
funcH_signature sign = new funcH_signature(); funcH_signature sign = new funcH_signature();
funcH_bytecode bc = new funcH_bytecode(); funcH_bytecode bc = new funcH_bytecode();
string[] current_str = code[pos.start].Split(' '); //Spliting string in case of arguments for instruction
string[] current_str = code[pos.start].Split(' ');
switch (current_str.Length) switch (current_str.Length)
{ {
case 4: case 4: //2 arg instruction
bc.count_args = System.Convert.ToInt32(current_str[3]); bc.count_args = System.Convert.ToInt32(current_str[3]);
bc.count_locals = System.Convert.ToInt32(current_str[2]); bc.count_locals = System.Convert.ToInt32(current_str[2]);
name = current_str[1]; name = current_str[1];
break; break;
case 3: case 3: //1 arg intruction
bc.count_locals = System.Convert.ToInt32(current_str[2]); bc.count_locals = System.Convert.ToInt32(current_str[2]);
name = current_str[1]; name = current_str[1];
break; break;
case 2: case 2: //No arg
name = current_str[1]; name = current_str[1];
break; break;
} }
bc.id = CRC16_alg(name); bc.id = CRC16_alg(name); //Hash encode for function name
name += "\0"; name += "\0";
sign.size_signature = name.Length; sign.size_signature = name.Length;
using (BinaryWriter writer = new BinaryWriter(str)) using (BinaryWriter writer = new BinaryWriter(str))
@ -289,7 +319,7 @@ namespace AssemblerVVM
writer.Write(CRC16_alg(current_str[1])); writer.Write(CRC16_alg(current_str[1]));
writer.Write(ushort.Parse(current_str[2])); writer.Write(ushort.Parse(current_str[2]));
} }
else if (jumps.Contains(current_opc)) else if (jumps.Contains(current_opc)) //Pain in the arse
writer.Write(FindOffset(code, pos, j, ((ushort)dictJmp[current_str[1]]-j))); writer.Write(FindOffset(code, pos, j, ((ushort)dictJmp[current_str[1]]-j)));
j++; j++;
} }
@ -297,7 +327,7 @@ namespace AssemblerVVM
byte[] bcode = str.ToArray(); byte[] bcode = str.ToArray();
sign.size_bytecode = bcode.Length; sign.size_bytecode = bcode.Length;
sign.size_func = 22 + sign.size_bytecode + sign.size_signature; sign.size_func = 22 + sign.size_bytecode + sign.size_signature; //Magic number 22 - size of meta-info for
bw.Write(sign.Serialize()); bw.Write(sign.Serialize());
bw.Write(Encoding.ASCII.GetBytes(name)); bw.Write(Encoding.ASCII.GetBytes(name));
@ -307,15 +337,18 @@ namespace AssemblerVVM
public static short FindOffset(List<string> code, pair pos, int curr_pos, int off) public static short FindOffset(List<string> code, pair pos, int curr_pos, int off)
{ {
/*
* This function calculating offset of bytes to jump a label.
*/
short result = 0; short result = 0;
if (off > 0) if (off > 0) //Jumping forward
{ {
for (int i = curr_pos + 1; i < curr_pos + off; i++) for (int i = curr_pos + 1; i < curr_pos + off; i++)
{ {
result += OpCodeSize((opcode)Enum.Parse(typeof(opcode), code[pos.start+i].Split(' ')[0].ToUpper())); result += OpCodeSize((opcode)Enum.Parse(typeof(opcode), code[pos.start+i].Split(' ')[0].ToUpper()));
} }
} }
else else //Jumping backward
{ {
for (int i = curr_pos; i >= curr_pos + off; i--) for (int i = curr_pos; i >= curr_pos + off; i--)
{ {
@ -341,6 +374,9 @@ namespace AssemblerVVM
public static ushort CRC16_alg(string msg) public static ushort CRC16_alg(string msg)
{ {
/*
* HashFunction on Cyclic redundacy check algorythm
*/
byte[] text = Encoding.ASCII.GetBytes(msg); byte[] text = Encoding.ASCII.GetBytes(msg);
const ushort polinom = 0xa001; const ushort polinom = 0xa001;
ushort code = 0xffff; ushort code = 0xffff;