I am trying to write a programming language but I have gotten stuck lately, see I am trying to add a way I can make java recognize when something is surrounded by normal brackets (‘(‘, ‘)’). Those objects I call a “Context” (mainly because I don’t know what else to call it since it’s basically just there to provide context for functions, if statements, etc.). I am currently not getting the result I want and have been stuck on it lately. If you have any questions about the code or something then tell me and yes, I know my code sucks, I’ll refactor it sometime later to not be as ass as it is now.
Here’s my source code:
Lexer.java:
import java.sql.SQLOutput;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.*;
public class Lexer {
private String input;
private int position;
private int tests = 1;
public int tokenNumber = -1;
public static final String redColoredText = "u001B[31m";
public static final String yellowColoredText = "u001B[33m";
public static final String resetColoredText = "u001B[0m";
public Lexer(String input) {
this.input = input;
this.position = 0;
}
public List<Token> tokenize() {
List<String> recentTokens = new ArrayList<>();
List<Token> tokens = new ArrayList<>();
Token previousToken = null;
while (position < input.length()) {
char currentChar = input.charAt(position);
List<Character> special_characters = Arrays.asList('!', '$', '+', '-', '/', '*', '.', ':', '§', ',', '%', '_',
'§', '?', '=', '"', ''', '²', '³', '&',
'~');
List<Character> bracket_special_characters = Arrays.asList('(', ')', '[', ']', '{', '}');
tokenNumber++;
if (!Character.isWhitespace(currentChar)) {
if (Character.isDigit(currentChar)) {
tokens.add(new Token(TokenType.NUMBER, Character.toString(currentChar)));
} else if (currentChar == '+') {
tokens.add(new Token(TokenType.PLUS, "+"));
} else if (currentChar == '-') {
tokens.add(new Token(TokenType.MINUS, "-"));
} else if (currentChar == '*') {
tokens.add(new Token(TokenType.MULTIPLY, "*"));
} else if (currentChar == '/') {
tokens.add(new Token(TokenType.DIVIDE, "/"));
} else if (special_characters.contains(currentChar) || bracket_special_characters.contains(currentChar)) {
tokens.add(new Token(TokenType.SPECIAL_CHARACTER, "" + currentChar));
} else {
tokens.add(new Token(TokenType.LETTER, Character.toString(currentChar)));
}
StringBuilder identifier = new StringBuilder();
Pattern pattern = Pattern.compile("[a-zA-Z]");
Pattern p_special_characters = Pattern.compile("[+\-*/_!.;:§,"²³$%&{\[()\]}=?`´°^u00A7]");
Matcher sc_matcher = p_special_characters.matcher(identifier);
Matcher matcher = pattern.matcher(identifier);
/*if ((matcher.find() || sc_matcher.find()) && !Character.isDigit(currentChar)) {
identifier.append(currentChar);
}*/
if ((Character.isLetter(currentChar) || sc_matcher.find()) && !Character.isDigit(currentChar)) {
identifier.append(currentChar);
}
if (special_characters.contains(currentChar)) {
identifier.append(currentChar);
}
while (position + 1 < input.length() && (Character.isLetterOrDigit(input.charAt(position + 1)) || special_characters.contains(input.charAt(position+1)))) {
identifier.append(input.charAt(position + 1));
position++;
}
if (identifier.length() > 0) {
if (identifier.toString().matches("\d+")) {
tokens.add(new Token(TokenType.CONSTANT, identifier.toString()));
} else {
if (identifier.length() > 1) {
tokens.add(new Token(TokenType.KEYWORD, identifier.toString()));
}
}
}
recentTokens.add(identifier.toString());
String command = identifier.toString();
// Checking for Keywords
switch (command) {
case "!void" -> System.out.println("{void}");
case "!echo" -> {
String output = input.substring(input.indexOf("!echo ") + 6);
System.out.println(output);
}
case "!test" -> {
System.out.println("Running Test " + tests + "...");
tests++;
}
case "_abstract" -> System.out.println("{abstract}");
case "$boolean" -> System.out.println("{boolean}");
case "!break" -> System.out.println("{break}");
case "$byte" -> System.out.println("{byte}");
case "case" -> System.out.println("{case}");
case "catch" -> System.out.println("{catch}");
case "$char" -> System.out.println("{char}");
case "@class" -> System.out.println("{class}");
case "*const" -> System.out.println("{const}");
case "_continue_" -> System.out.println("{continue}");
case ":default:" -> System.out.println("{default}");
case "!do" -> System.out.println("{do}");
case "$double" -> System.out.println("{double}");
case "else" -> System.out.println("{else}");
case "Enum" -> System.out.println("{enum}");
case "extends" -> System.out.println("{extends}");
case "final" -> System.out.println("{final}");
case "FINAL" -> System.out.println("{finally}");
case "$float" -> System.out.println("{float}");
case "for" -> System.out.println("{for}");
case "*goto" -> System.out.println("{goto}");
case "goto" -> System.out.println("{goto_line}");
case "if" -> System.out.println("{if}");
case "implements_" -> System.out.println("{implements");
case "§import" /*u00A7 = §*/ -> System.out.println("{import}");
case "?instanceof" -> System.out.println("{instanceof}");
case "$int" -> System.out.println("{int}");
case "interface" -> System.out.println("{interface}");
case "$long" -> System.out.println("{long}");
case "native" -> System.out.println("{native}");
case "new" -> System.out.println("{new}");
case "package" -> System.out.println("{package}");
case "private:" -> System.out.println("{private}");
case "protected" -> System.out.println("{protected}");
case "public" -> System.out.println("{public}");
case ":return" -> System.out.println("{return}");
case "$short" -> System.out.println("{short}");
case "static" -> System.out.println("{static}");
case "strict" -> System.out.println("{sctrictfp}");
case "super" -> System.out.println("{super}");
case "switch" -> System.out.println("{switch}");
case "synchronized" -> System.out.println("{synchronized}");
case "this" -> System.out.println("{this}");
case "throw" -> System.out.println("{throw}");
case "throws" -> System.out.println("{throws}");
case "transcient" -> System.out.println("{transcient}");
case "try" -> System.out.println("{try}");
case "volatile" -> System.out.println("{volatile}");
case "while" -> System.out.println("{while}");
case "encode" -> System.out.println("{encode}");
case "decode" -> System.out.println("{decode}");
case "!res" -> System.out.println(resetColoredText);
}
// End of Checking for Keywords
/*if (tokens.get(tokenNumber).value.startsWith(tokens.get(tokenNumber - 1).value) && tokens.get(tokenNumber - 1) != null) {
tokens.remove(tokenNumber - 1);
}*/
}
position++;
}
//Deleting useless and annoying letter, special character and number tokens
for (int i = 0; i < tokens.size(); i++) {
try {
if (tokens.size() > -1 && i > 0) {
if (tokens.get(i).value.matches("\d+") && tokens.get(i - 1) != null &&
tokens.get(i).type == TokenType.CONSTANT && tokens.get(i - 1).type == TokenType.NUMBER) {
tokens.get(i).value = tokens.get(i - 1).value + tokens.get(i).value;
tokens.remove(i - 1);
i--;
// This up there was the code to clean up useless number tokens
} if (tokens.get(i).type == TokenType.KEYWORD && !tokens.get(i - 1).value.matches("[(){}\[\]]")) {
// Code to clean up useless letter tokens
//tokens.get(tokenNumber).value = tokens.get(tokenNumber - 1).value + tokens.get(tokenNumber).value;
tokens.remove(i - 1);
i--;
}
}
} catch (Exception e) {
//throw new RuntimeException(e);
System.out.println(redColoredText + "Error 001: Corruption of Lexer.java.n Corrupted line beginning: ~198" + yellowColoredText);
System.out.println(e.toString() + "nnn");
}
}
return tokens;
}
public static void main(String[] arg) {
long startTime = System.currentTimeMillis();
Scanner scanner = new Scanner(System.in);
//while (true) {
Lexer lexer = new Lexer("new !void(some (value (12))) 1921987 test(test)");
List<Token> tokens = lexer.tokenize();
for (Token token : tokens) {
System.out.println(token);
} new Parser(tokens);
long endTime = System.currentTimeMillis();
long elapsedTime = endTime - startTime;
double elapsedTimeSeconds = elapsedTime / 1000.0;
System.out.println("Runtime = " + elapsedTime + ", End_Time = " + endTime + ", R_Runtime = " + elapsedTimeSeconds + "s");//}R standing for Readable
}
}
class Token {
TokenType type;
String value;
public Token(TokenType type, String value) {
this.type = type;
this.value = value;
}
@Override
public String toString() {
return "Token{" +
"type=" + type +
", value='" + value + ''' +
'}';
}
}
enum TokenType {
NUMBER,
PLUS,
MINUS,
MULTIPLY,
DIVIDE,
LETTER,
ASSIGN,
IDENTIFIER,
KEYWORD,
CONSTANT,
SPECIAL_CHARACTER,
}
//
And here’s the parser.java where the problem is:
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.Callable;
import org.json.*;
public class Parser {
private Queue<Token> Tokens;
public List<Integer> ints;
public int contextCounter = 0;
public int addIntIFromContext = 0;
public List<Boolean> ClosedContextWithIDList;
private int nestedContextCounter = 0;
public int testingInt = 0;
/*public static void main(String[] arg) {
/*LexersToken = new ArrayList<>();
LexersToken.add("A token");
new Parser(LexersToken);
}*/ public Parser(List<Token> tokenList) {
ClosedContextWithIDList = new ArrayList<>();
System.out.println("Parser began...");
this.Tokens = new LinkedList<>();
System.out.println(tokenList.get(0));
System.out.println("Parser ran...");
JSONObject one = new JSONObject();
int currentToken = 0;
int contextCounter = 0; // //
try {
for (int i = 0; i < tokenList.size(); i++) {
if (i + 1 < tokenList.size() && tokenList.get(i + 1) != null) {
JSONObject cT = new JSONObject();
cT.put("TokenType", tokenList.get(i).type);
cT.put("Value", tokenList.get(i).value);
one.put(String.valueOf(i), cT);
/*if (i + 1 < tokenList.size() && tokenList.get(i + 1) != null && tokenList.get(i).value.equals("(")) {
JSONObject context = new JSONObject();
boolean whileLoop = true;
i++;
while (whileLoop) {
if (i + 1 < tokenList.size() || tokenList.get(i + 1) == null || tokenList.get(i).value.equals(")")) {
whileLoop = false;
}
JSONObject cT2 = new JSONObject();
context.put(String.valueOf(i), cT2);
cT2.put("TokenType", tokenList.get(i).type);
cT2.put("Value", tokenList.get(i).value);
cT.put("Context", context);
i++;
}
}*/
/*
if (tokenList.get(i + 1).value.equals("(") && tokenList.get(i + 1) != null) {
JSONObject context = new JSONObject();
contextCounter++; // //
ClosedContextWithIDList.add(contextCounter, false);
i++;
boolean whileLoop = true;
tokenList.remove(i);
while (whileLoop) {
if (i + 1 > tokenList.size() || tokenList.get(i + 1) == null || tokenList.get(i).value.equals(")")) {
if (tokenList.get(i).value.equals(")")) {
tokenList.remove(i);
ClosedContextWithIDList.set(contextCounter, true);
}
whileLoop = false;
cT.put("Context", context);
} else {
JSONObject cT2 = new JSONObject();
cT2.put("TokenType", tokenList.get(i).type);
cT2.put("Value", tokenList.get(i).value);
context.put(JSONObject.valueToString(i), cT2);
System.out.println(cT2);
i++;
}
}
}
*/
if (tokenList.get(i + 1).value.equals("(") && tokenList.get(i + 1) != null && nestedContextCounter == 0) {
cT.put("Context", parseContext(tokenList, ++i));
i += addIntIFromContext;
addIntIFromContext = 0;
}
}
}
JSONObject cT = new JSONObject();
String c = new String(Files.readAllBytes(Paths.get("AST.json"))); // this variable will never get called again so fuck the naming
if (tokenList.get(tokenList.size() - 1) != null && !c.contains(""" + (tokenList.size() -1) + "":")) {
cT.put("TokenType", tokenList.get(tokenList.size() - 1).type);
cT.put("Value", tokenList.get(tokenList.size() - 1).value);
one.put(String.valueOf(tokenList.size() - 1), cT);
}
} catch(Exception e) {
System.out.println(Lexer.redColoredText + "Error 002: Unexpected Error when adding json objects." + Lexer.yellowColoredText);
System.out.println(e.toString() + "nnn");
throw new RuntimeException(e);
}
try(FileWriter fw = new FileWriter("AST.json")) {
fw.write(one.toString(+2));
fw.close();
//FileReader fr = new FileReader("AST.json")
} catch (Exception e) {
throw new RuntimeException(e);
}
try (FileReader fr = new FileReader("AST.json")) {
//JSON
} catch (Exception e) {
System.out.println(Lexer.redColoredText + "Error 003: Unexpected Error when reading json objects." + Lexer.yellowColoredText);
System.out.println(e.toString() + "nnn");
}
System.out.println("Parser ran...");
}
public JSONObject parseContext(List<Token> tokenList, int i) {
int localContextCounter = contextCounter + 1; // Increment contextCounter for unique ID //
i += addIntIFromContext;
testingInt++;
System.out.println("AOBR: " + testingInt);
JSONObject context = new JSONObject();
ClosedContextWithIDList.add(contextCounter, false);
context.put("ID", contextCounter);
i++;
addIntIFromContext++;
boolean whileLoop = true;
if (tokenList.size() > i && tokenList.get(i).value.equals("(")) tokenList.remove(i);
while (whileLoop && i < tokenList.size()) {
if (i + 1 >= tokenList.size() || tokenList.get(i + 1) == null || tokenList.get(i).value.equals(")")) {
//if (tokenList.get(i).value.equals(")")) {
tokenList.remove(i);
if (contextCounter < ClosedContextWithIDList.size()) {
ClosedContextWithIDList.set(contextCounter, true);
}
if (nestedContextCounter > 0) {
nestedContextCounter--;
}
//}
whileLoop = false;
//cT.put("Context", context);
} else if (tokenList.get(i).value.equals("(") /*&& ClosedContextWithIDList.get(contextCounter).equals(false)*/) {
// Start a new nested context
nestedContextCounter++;
System.out.println("nCC: " + nestedContextCounter);
JSONObject nestedContext = parseContext(tokenList, i + 1);
System.out.println(nestedContext.get("ID"));
context.put("Context", nestedContext);
i += addIntIFromContext;
addIntIFromContext = 0;
} else {
JSONObject cT2 = new JSONObject();
cT2.put("TokenType", tokenList.get(i).type);
cT2.put("Value", tokenList.get(i).value);
context.put(JSONObject.valueToString(i), cT2);
System.out.println("cT2: " + cT2 + ", nCC: " + nestedContextCounter);
i++;
addIntIFromContext++;
/*if (tokenList.get(i + 1).value.equals("(") && tokenList.get(i + 1) != null) {
context.put("Context",parseContext(tokenList, i));
}*/
}
}
contextCounter++; // //
return context;
}
}
class AST {
public AST() {
}
}
I tried adding a nestedContextCounter to see if that would help me fix the problem, I also asked AI but it couldn’t help.
I am currently expecting an output in the AST.json file that would look like this:
"0": {
"TokenType": "KEYWORD",
"Value": "new"
},
"1": {
"Context": {
"2": {
"Context": {
"3": {
"TokenType": "KEYWORD",
"Value": "value"
},
"4": {
"TokenType": "CONSTANT",
"Value": 12
},
"ID": 1
},
"TokenType": "KEYWORD",
"Value": "some"
},
"ID": 0
},
"TokenType": "KEYWORD",
"Value": "!void"
},
"5": {
"TokenType": "CONSTANT",
"Value": 1921987
},
"6": {
"Context": {
"7": {
"TokenType": "KEYWORD",
"Value": "test"
},
"ID": 2
},
"TokenType": "KEYWORD",
"Value": "test"
}
}`
But the only output I am receiving is:
`{
"0": {
"TokenType": "KEYWORD",
"Value": "new"
},
"1": {
"Context": {
"Context": {"ID": 0},
"3": {
"TokenType": "KEYWORD",
"Value": "some"
},
"7": {
"TokenType": "CONSTANT",
"Value": "12"
},
"ID": 0
},
"TokenType": "KEYWORD",
"Value": "!void"
},
"4": {
"TokenType": "SPECIAL_CHARACTER",
"Value": "("
},
"5": {
"Context": {
"7": {
"TokenType": "CONSTANT",
"Value": "12"
},
"ID": 2
},
"TokenType": "KEYWORD",
"Value": "value"
},
"9": {
"Context": {
"11": {
"TokenType": "KEYWORD",
"Value": "test"
},
"ID": 3
},
"TokenType": "KEYWORD",
"Value": "test"
}
}