using System; using System.Linq; using System.Collections.Generic; using System.Text; using Sunweaver.Commands; using Sunweaver.Commands.Abstracts; using Sunweaver.VM; using Sunweaver.MetaTags; using Sunweaver; using System.IO; namespace Sunweaver.Parsing { public static class Parser { public const string AnonymousCoduleLabel = "__anon"; public const string EntryCoduleName = "__main"; public static void ParseDNA(string DNAFile, out Chromosome dna) { List messages; dna = new Chromosome(); ParseDNA(DNAFile, ref dna, out messages); } public static void ParseDNA(string DNAFile, ref Chromosome returnMe, out List messages) { messages = new List(); // Phase 1: tokenize DNA and parse any metatags we find int numberOfLines; var tokens = TokenizeAndMetatags(DNAFile, returnMe, messages, out numberOfLines); // Phase 2: Extract codules int implicitCoduleCount = 0; var codules = ExtractCodules(tokens, returnMe, messages, ref implicitCoduleCount); // Phase 3: Create handles for all explicit codules which are reserving a memory location CreateExplicitCoduleHandles(messages, returnMe, codules); // Phase 4: Now assign out codule indices on a first come, first serve basis. CreateImplicitCoduleHandles(messages, returnMe, codules); // Phase 5 Now that codules have been assigned out, run the metatags. RunMetaTags(returnMe, messages); // Phase 5: Now parse the codules themselves. ParseCodules(returnMe, codules, messages); } public static void RunMetaTags(Chromosome returnMe, List messages) { // TODO: if we need to add more MetaTags, we should not just iterate over their lists one at a time. foreach (var tag in returnMe.ConstantTags) { tag.Implement(returnMe, messages); } foreach (var tag in returnMe.MacroTags.Values) { tag.Implement(returnMe, messages); } } /// /// Turns a raw line in to a collection of tokens. /// public static IList TokenizeLine(string line, int lineNumber) { var returnMe = new List(); //Crop comments { if (line.Contains("'")) { line = line.Substring(0, line.IndexOf('\'')); } if (line.Contains(@"//")) { line = line.Substring(0, line.IndexOf(@"//")); } } string[] splitLine = line.Split(" \t".ToCharArray()); int cumulativeColumnIndex = 0; for (int i = 0; i < splitLine.Length; ++i) { var word = splitLine[i].Trim(); if (!string.IsNullOrEmpty(word)) { returnMe.Add(new Token { line = lineNumber, token = splitLine[i], column = cumulativeColumnIndex + 1, }); } if (i < splitLine.Length - 1) { cumulativeColumnIndex = line.IndexOf(splitLine[i], cumulativeColumnIndex) + splitLine[i].Length; cumulativeColumnIndex = line.IndexOf(splitLine[i + 1], cumulativeColumnIndex); } } return returnMe; } public static bool TryToParseAsMetaTag(IList tokens, Chromosome dna, List errors) { Type type; if (tokens.Count > 0 && DNASystem.MetaTags.TryGetValue(tokens[0].token, out type)) { MetaTag tag; try { tag = (MetaTag)System.Activator.CreateInstance(type, tokens); } catch (Exception exc) { // Eat the exception and just warn. errors.Add(new CompilationMessage { message = String.Format("The {0} is not defined properly: {1}", type, exc.InnerException.Message), token = tokens[0], }); return true; } if (tag.GetType() == typeof(@const)) { tag.PostTokenizeImplement(dna, errors); dna.ConstantTags.Add(tag as @const); } else if (tag.GetType() == typeof(macro)) { var macroTag = tag as macro; if (dna.MacroTags.ContainsKey(macroTag.Label)) { if (!dna.MacroTags[macroTag.Label].CodeFragment.SequenceEqual(macroTag.CodeFragment)) { errors.Add(new CompilationMessage { message = String.Format("The macro '{0}' was defined more than once. " + "Ignoring second definition...", macroTag.Label), token = tokens[0], }); } } else { tag.PostTokenizeImplement(dna, errors); dna.MacroTags.Add(macroTag.Label, macroTag); } } else { throw new NotImplementedException(); } return true; } else { return false; } } public static List TokenizeAndMetatags(string DNAFile, Chromosome returnMe, List errors) { int dummy; return TokenizeAndMetatags(DNAFile, returnMe, errors, out dummy); } public static List TokenizeAndMetatags(string DNAFile, Chromosome returnMe, List errors, out int lineNumber) { int currLine = 0; var tokens = new List(); foreach (string line in DNAFile.ToLower().Split(new string[] {"\r\n", "\n"}, StringSplitOptions.None)) { currLine++; var lineTokens = TokenizeLine(line, currLine); if (TryToParseAsMetaTag(lineTokens, returnMe, errors)) { // Was a metatag, and its action was applied to the DNA. } else { tokens.AddRange(lineTokens); } } lineNumber = currLine; return tokens; } public static List ExtractCodules(List tokens, Chromosome dna, List errors, ref int implicitCoduleCount) { var codules = new List(); int currentIndex = 0; var embeddedCodules = new Stack(); Action PushCodule = x => { embeddedCodules.Push(x); }; Action PopCodule = () => { codules.Add(embeddedCodules.Peek()); embeddedCodules.Pop(); }; PushCodule(new CoduleDefinition { identifier = EntryCoduleName, desiredCoduleIndex = 0, coduleDefinitionToken = null, }); while (currentIndex <= tokens.Count) { // Search tokens for any containing { inside the current codule bounds var currentCodule = embeddedCodules.Peek(); int nextCoduleStart = tokens.FindIndex(currentIndex, x => x.token.Contains("{")); int nextCoduleEnd = tokens.FindIndex(currentIndex, x => x.token.Contains("}")); if(nextCoduleEnd < 0 && nextCoduleStart < 0) { // No more codule starts or ends to be found. Assume one auto closer for the main codule, and then // warn for the rest currentCodule.tokens.AddRange(tokens.GetRange(currentIndex, tokens.Count - currentIndex)); currentIndex = tokens.Count; while(embeddedCodules.Count > 1) { errors.Add(new CompilationMessage { token = embeddedCodules.Peek().coduleDefinitionToken, message = "Could not find matching end brace for codule before end of file. " + "Automatically closing...", }); PopCodule(); } PopCodule(); currentIndex = tokens.Count + 1; } else if (nextCoduleEnd == nextCoduleStart) { errors.Add(new CompilationMessage { token = tokens[nextCoduleEnd], message = "Opening and closing brace found in a single word. Ignoring it...", }); currentCodule.tokens.AddRange(tokens.GetRange(currentIndex, nextCoduleStart - currentIndex)); currentIndex = nextCoduleStart + 1; continue; } else if (nextCoduleStart >= 0 && (nextCoduleStart < nextCoduleEnd || nextCoduleEnd < 0)) { // Found the start of another codule before the current one is done. // Add what isn't another codule to the current codule. currentCodule.tokens.AddRange(tokens.GetRange(currentIndex, nextCoduleStart - currentIndex)); // Starting another codule string coduleToken; var newCodule = _NewCoduleStart(tokens, errors, ref implicitCoduleCount, currentIndex, currentCodule, nextCoduleStart, dna, out coduleToken); // Add placeholder for new codule currentCodule.tokens.Add(tokens[nextCoduleStart].CloneAndRename(coduleToken)); PushCodule(newCodule); currentIndex = nextCoduleStart + 1; } else { // Found the end of the current codule. currentCodule.tokens.AddRange(tokens.GetRange(currentIndex, nextCoduleEnd - currentIndex)); _CoduleEnd(tokens[nextCoduleEnd], errors, currentCodule); PopCodule(); currentIndex = nextCoduleEnd + 1; if (embeddedCodules.Count == 0) { errors.Add(new CompilationMessage { token = tokens[nextCoduleEnd], message = "Unmatched closing brace. Ignoring rest of DNA...", }); break; } } } // Check for conflicts between Codule names and personal constants. foreach (var codule in codules) { if (dna.PersonalConstants.ContainsKey(codule.identifier)) { if (codule.desiredCoduleIndex > 0 && dna.PersonalConstants[codule.identifier] != codule.desiredCoduleIndex) { errors.Add(new CompilationMessage { token = codule.coduleDefinitionToken, message = String.Format( "Codule has name '{0}' and requests slot {1}, but there is a constant with name {0} " + "defined as {2}. This constant will hide this codule.", codule.identifier, codule.desiredCoduleIndex, dna.PersonalConstants[codule.identifier]), }); } if (codule.desiredCoduleIndex < 0 && dna.PersonalConstants[codule.identifier] != codule.desiredCoduleIndex) { codule.desiredCoduleIndex = dna.PersonalConstants[codule.identifier]; } } } return codules; } private static void _CoduleEnd(Token coduleEndToken, List errors, CoduleDefinition currentCodule) { var split = (" " + coduleEndToken.token + " ").Split('}'); if (split.Length != 2) { errors.Add(new CompilationMessage { token = coduleEndToken, message = "Syntax error found on closing brace", }); } if (!string.IsNullOrEmpty(split[1].Trim())) { Int16 desiredIndex; if (!Int16.TryParse(split[1].Trim(), out desiredIndex)) { errors.Add(new CompilationMessage { token = coduleEndToken, message = "Desired codule index invalid syntax. Ignoring...", }); } else if (desiredIndex >= 0 && currentCodule.desiredCoduleIndex >= 0 && currentCodule.desiredCoduleIndex != desiredIndex) { errors.Add(new CompilationMessage { token = coduleEndToken, message = "Codule tail desired different index from codule head. Ignoring tail's request...", }); } else if (desiredIndex < 0) { errors.Add(new CompilationMessage { token = coduleEndToken, message = "Cannot request negative codule indices. Reverting to automatic assignment.", }); } else if (desiredIndex >= Chromosome.CoduleSlots) { errors.Add(new CompilationMessage { token = coduleEndToken, message = "Cannot request codule indices larger or equal to " + Chromosome.CoduleSlots + ". Reverting to automatic assignment.", }); } else { currentCodule.desiredCoduleIndex = desiredIndex; } } } private static CoduleDefinition _NewCoduleStart(List tokens, List errors, ref int uniqueID, int currentIndex, CoduleDefinition currentCodule, int nextCoduleStart, Chromosome dna, out string token) { string identifier; string punctuation; { var word = tokens[nextCoduleStart].token; var split = (" " + word + " ").Split("{".ToCharArray()); if (split.Length != 2) { errors.Add(new CompilationMessage { token = tokens[nextCoduleStart], message = "Too many { detected in token when parsing codule definition. " + "Ignoring everything after the second {", }); } identifier = split[1].Trim(); punctuation = split[0].Trim(); if (string.IsNullOrEmpty(identifier)) { identifier = AnonymousCoduleLabel + uniqueID++; } } Int16 desiredIndex; if (!Int16.TryParse(identifier, out desiredIndex)) { desiredIndex = -1; } else { identifier = AnonymousCoduleLabel + uniqueID++; if (desiredIndex < 0) { errors.Add(new CompilationMessage { token = tokens[nextCoduleStart], message = "Codules cannot request negative codule indices. Reverting to implicit assignment.", }); desiredIndex = -1; } else if (desiredIndex >= Chromosome.CoduleSlots) { errors.Add(new CompilationMessage { token = tokens[nextCoduleStart], message = "Codules cannot request codule indices larger or equal to " + Chromosome.CoduleSlots + ". Reverting to implicit assignment.", }); desiredIndex = -1; } } var newCodule = new CoduleDefinition { identifier = identifier, desiredCoduleIndex = desiredIndex, coduleDefinitionToken = tokens[nextCoduleStart], }; token = punctuation + identifier; return newCodule; } public static void CreateExplicitCoduleHandles(List errors, Chromosome returnMe, IList codules) { var currentChromosome = returnMe; for(int i = 0; i < codules.Count; ++i) { var codule = codules[i]; if (codule == null) { continue; } if(codule.desiredCoduleIndex < 0) { continue; } if (codule.desiredCoduleIndex == 0 && codule.coduleDefinitionToken != null) { errors.Add(new CompilationMessage { token = codule.coduleDefinitionToken, message = "Codule " + codule.identifier + " is attempting to use codule slot 0, " + "but that is reserved for the main codule.", }); codules[i] = null; continue; } if (codule.desiredCoduleIndex == 0) { // definition already will exist continue; } if (currentChromosome.CoduleNames.ContainsKey(codule.identifier)) { errors.Add(new CompilationMessage { token = codule.coduleDefinitionToken, message = "Codule " + codule.identifier + " is already defined. " + "Redefinition will not be compiled.", }); codules[i] = null; continue; } if(currentChromosome.Codules.ContainsKey(codule.desiredCoduleIndex)) { var coduleName = currentChromosome.CoduleNames.First(x => x.Value == codule.desiredCoduleIndex); errors.Add(new CompilationMessage { token = codule.coduleDefinitionToken, message = "Codule " + codule.identifier + " desires slot " + codule.desiredCoduleIndex + " but that slot is already taken by codule " + coduleName + ". Falling back to implicit assignment of index.", }); codule.desiredCoduleIndex = -1; continue; } currentChromosome.CoduleNames.Add(codule.identifier, codule.desiredCoduleIndex); currentChromosome.Codules.Add(codule.desiredCoduleIndex, new Codule()); } } public static void CreateImplicitCoduleHandles(List errors, Chromosome returnMe, IList codules) { var currentChromosome = returnMe; for (int i = 0; i < codules.Count; ++i) { var codule = codules[i]; if (codule == null) { continue; } if (codule.desiredCoduleIndex >= 0) { continue; } if (currentChromosome.CoduleNames.ContainsKey(codule.identifier)) { errors.Add(new CompilationMessage { token = codule.coduleDefinitionToken, message = String.Format("Codule {0} is already defined. Redefinition will not be compiled.", codule.identifier), }); codules[i] = null; continue; } Int16 index = currentChromosome.FindFirstOpenCoduleSlot(); if (index < 0) { errors.Add(new CompilationMessage { token = codule.coduleDefinitionToken, message = "Cannot add implicit codule because we are out of codule slots. " + "Skipping codule compilation.", }); codules[i] = null; continue; } currentChromosome.CoduleNames.Add(codule.identifier, index); currentChromosome.Codules.Add(index, new Codule()); } } public static bool TryParseLabel(string word, Chromosome dna, out IList codeFragment) { return TryParseLabel(null, word, dna, out codeFragment); } /// /// Tries to convert word into a label using a DNA's custom /// label list, as well as sysvars /// public static bool TryParseLabel(char? punctuation, string word, Chromosome dna, out IList codeFragment) { Int16? number = null; codeFragment = null; if (dna != null) { if (dna.ObjectMacros != null && dna.ObjectMacros.ContainsKey(word)) { if (punctuation.HasValue) { codeFragment = null; return false; } else { codeFragment = dna.ObjectMacros[word]; return true; } } if (dna.PersonalConstants != null && dna.PersonalConstants.ContainsKey(word)) { number = dna.PersonalConstants[word]; } else if (dna.CoduleNames.ContainsKey(word)) { number = dna.CoduleNames[word]; } } string possibleSysvar = System.Text.RegularExpressions.Regex.Match(word, "([a-z]+)").Groups[0].Value; // Fall back to sysvars if (number == null && DNASystem.Sysvars.ContainsKey(possibleSysvar)) { number = DNASystem.Sysvars[possibleSysvar].Memloc; } if(!number.HasValue) { codeFragment = null; return false; } Type punctType; if (DNASystem.Punctuations.TryGetValue(punctuation ?? (char)0, out punctType)) { var punct = (BasePair)System.Activator.CreateInstance(punctType, word, number.Value); codeFragment = new BasePair[] { punct }; } else { codeFragment = new BasePair[] { new Label(word, number.Value) }; } return true; } public static IList ParseToken(Token token, Chromosome dna, List errors) { var word = token.token; if (String.IsNullOrEmpty(word)) { return new BasePair[] { }; } char? punctuation = null; if (char.IsPunctuation(word[0]) && word[0] != '_' && word[0] != '!') { if (word.Length == 1) { errors.Add(new CompilationMessage { token = token, message = "Lone punctuation found. Ignoring...", }); return new BasePair[] { }; } else { punctuation = word[0]; word = word.Substring(1, word.Length - 1); } } Int16 number; if (char.IsDigit(word[0])) { // It's just a negative number, so combine the number and the '-' sign again if (punctuation == '-') { word = "-" + word; punctuation = null; } //TODO: We're using Constant's min/max values, but we should probably customize it for whatever // type the base pair is going to be parsed as. if (Int16.TryParse(word, out number) && number <= Constant.MaxValue && number >= Constant.MinValue) { if (punctuation.HasValue) { Type type; if (DNASystem.Punctuations.TryGetValue(punctuation.Value, out type)) { return new BasePair[] { (BasePair)System.Activator.CreateInstance(type, word, number) }; } else { errors.Add(new CompilationMessage { token = token, message = "Unrecognized punctuation " + punctuation, }); return new BasePair[] { }; } } else { return new BasePair[] { new Constant(number) }; } } else { errors.Add(new CompilationMessage { token = token, message = "Can't parse '" + word + "' as number: either too large or not a number.", }); return new BasePair[] { }; } } IList codeFragment; if (TryParseLabel(punctuation, word, dna, out codeFragment)) { return codeFragment; } else if (punctuation.HasValue) { errors.Add(new CompilationMessage { token = token, message = "Punctuation next to unrecognized label. Ignoring token...", }); return new BasePair[] { }; } if (DNASystem.Commands.ContainsKey(word)) { return new BasePair[] { (BasePair)System.Activator.CreateInstance(DNASystem.Commands[word]), }; } errors.Add(new CompilationMessage { token = token, message = "Unknown token " + token.token + ".", }); return new BasePair[] { }; } public static List ParseCodule(IList tokens, Chromosome dna, List errors) { List Final = new List(); foreach (var token in tokens) { Final.AddRange(ParseToken(token, dna, errors)); } return Final; } public static void ParseCodules(Chromosome returnMe, List codules, List errors) { var currentChromosome = returnMe; for(int i = 0; i < codules.Count; ++i) { var codule = codules[i]; if (codule == null) { continue; } var basePairs = ParseCodule(codule.tokens, returnMe, errors); Int16 index; if (codule.identifier == EntryCoduleName) { index = 0; } else { index = currentChromosome.CoduleNames[codule.identifier]; } currentChromosome.Codules[index].BasePairs.AddRange(basePairs); } } } }