Description:
I’m developing a C++ program that converts .jack files into XML format using a JackTokenizer class. The program is supposed to read each .jack file, tokenize its content, and generate a corresponding XML file. However, I’m encountering an issue with the XML structure of the output.
Problem Details:
Expected Behavior:
The output XML file for each .jack file should start with <tokens> and end with </tokens>, encapsulating all tokenized elements within.
Actual Behavior:
Currently, the generated XML files are missing the <tokens> tag at the beginning and </tokens> at the end. Instead, each XML file starts directly with token tags like <keyword>, <identifier>, <symbol>, etc., without the expected enclosing tags.
Steps Taken:
Ensured correct reading and processing of .jack files from the specified directory.
JackTokenizer class correctly tokenizes lines from .jack files and outputs corresponding XML tags for each token type.
Verified file handling and tokenizer logic within the convertFile function to ensure tokens are processed correctly.
JackAnalyzer.cpp
#include <iostream>
#include "../Compiler/JackCompiler/JackTokenizer.h"
#include "../Compiler/Utility.h"
int main() {
try {
while (true) {
Ui::uiLogo();
string directoryPath = File::askDirectoryPath();
// Check for valid directory and clear console.
bool isValidDir = File::isValidDirectory(directoryPath);
Ui::clear();
// If not valid directory, force user to re-enter.
while (!isValidDir) {
Ui::uiLogo();
std::cout << "Invalid directory!" << std::endl;
directoryPath = File::askDirectoryPath();
isValidDir = File::isValidDirectory(directoryPath);
Ui::clear();
}
vector<string> jackFiles = File::getJackFiles(directoryPath);
if (jackFiles.empty()) {
std::cerr << "No .jack files found in the directory!" << std::endl;
continue;
}
JackTokenizer jackTokenizer;
try {
std::cout << "Files successfully opened" << std::endl;
for (const auto& file : jackFiles) {
string xmlFileName = file + ".xml";
// Converting each .jack file to corresponding .xml file
std::cout << "Converting " << file << " to " << xmlFileName << std::endl;
jackTokenizer.convertFile(file, xmlFileName);
std::cout << "Finished converting " << file << " to " << xmlFileName << "!" << std::endl;
}
}
catch (const std::runtime_error& e) {
std::cerr << "Error during translating: " << e.what() << std::endl;
}
}
}
catch (const std::exception& e) {
std::cerr << "Unhandled exception: " << e.what() << 'n';
return 1;
}
return 0;
}
JackTokenizer.h
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <stdexcept>
#include "../Compiler/JackCompiler/Utility/Utility.h"
#include "Utility.h"
using namespace std;
class JackTokenizer {
public:
JackTokenizer() = default;
void convertFile(const string& inputPath, const string& outputPath) {
ifstream input(inputPath);
if (!input.is_open()) {
cerr << "Error opening input file: " << inputPath << endl;
throw runtime_error("Failed to open input file.");
}
ofstream output(outputPath);
if (!output.is_open()) {
cerr << "Error opening output file: " << outputPath << endl;
throw runtime_error("Failed to open output file.");
}
XmlTokenizer xmlTokenizer(output);
bool advComment = false;
string line;
output << "<tokens>" << endl;
while (getline(input, line)) {
line = Parser::removeComments(line);
if (Parser::isNotCommentLine(line, advComment)) {
try {
string validLine = Parser::cleanAndValidateLine(line);
if (!validLine.empty()) {
xmlTokenizer.xmlTokenizer(validLine);
}
}
catch (const runtime_error& e) {
cerr << "Error: " << e.what() << endl;
}
}
}
output << "</tokens>" << endl;
input.close();
output.close();
}
};
Utility.h
#pragma once
#include <iostream>
#include <filesystem>
#include <vector>
#include <string>
#include <regex>
#include <unordered_set>
#include <fstream>
#include <sstream>
using namespace std;
namespace fs = filesystem;
class Ui {
public:
static void uiLogo() {
//I deleted it for more compact code
}
static void clear() {
cout << "x1B[2Jx1B[H";
}
};
class File {
public:
static string askDirectoryPath() {
cout << "Please insert directory path containing .jack filesn";
cout << "Directory path -> ";
string path;
getline(cin, path);
return path;
}
static bool isValidDirectory(const string& path) {
return fs::is_directory(path);
}
static vector<string> getJackFiles(const string& directoryPath) {
vector<string> jackFiles;
for (const auto& entry : fs::directory_iterator(directoryPath)) {
if (entry.path().extension() == ".jack") {
jackFiles.push_back(entry.path().string());
}
}
return jackFiles;
}
};
class Parser {
public:
static string cleanAndValidateLine(const string& line) {
regex pattern("\s+");
string validLine = regex_replace(line, pattern, " ");
// Remove leading and trailing spaces
size_t firstNonSpace = validLine.find_first_not_of(" ");
size_t lastNonSpace = validLine.find_last_not_of(" ");
if (firstNonSpace != string::npos && lastNonSpace != string::npos) {
validLine = validLine.substr(firstNonSpace, lastNonSpace - firstNonSpace + 1);
}
else {
validLine = "";
}
return validLine;
}
static bool isNotCommentLine(const string& line, bool& inMultilineComment) {
string trimmedLine = cleanAndValidateLine(line);
if (inMultilineComment) {
size_t endComment = trimmedLine.find("*/");
if (endComment != string::npos) {
inMultilineComment = false;
trimmedLine = trimmedLine.substr(endComment + 2);
}
else {
return false;
}
}
size_t startComment = trimmedLine.find("/*");
if (startComment != string::npos) {
size_t endComment = trimmedLine.find("*/", startComment + 2);
if (endComment != string::npos) {
trimmedLine = trimmedLine.substr(0, startComment) + trimmedLine.substr(endComment + 2);
}
else {
inMultilineComment = true;
trimmedLine = trimmedLine.substr(0, startComment);
}
}
trimmedLine = cleanAndValidateLine(trimmedLine);
return !trimmedLine.empty();
}
static string removeComments(const string& line) {
size_t commentStart = line.find("//");
if (commentStart != string::npos) {
return line.substr(0, commentStart);
}
return line;
}
static bool is_digits(const string& str) {
for (char ch : str) {
if (!isdigit(ch)) {
return false;
}
}
return true;
}
};
class XmlTokenizer {
private:
ofstream& output;
unordered_set<string> keywords;
unordered_set<char> symbols;
public:
XmlTokenizer(ofstream& output) : output(output) {
keywords = {
"class", "constructor", "function", "method", "field", "static", "var",
"int", "char", "boolean", "void", "true", "false", "null", "this",
"let", "do", "if", "else", "while", "return"
};
symbols = {
'{', '}', '(', ')', '.', ',', ';', '+', '-', '*', '/', '&', '|',
'<', '>', '=', '~'
};
}
string classifyToken(const string& token) {
if (keywords.find(token) != keywords.end()) {
return "<keyword> " + token + " </keyword>";
}
else if (isdigit(token[0])) {
return "<integerConstant> " + token + " </integerConstant>";
}
else if (token.front() == '"' && token.back() == '"') {
return "<stringConstant> " + token.substr(1, token.size() - 2) + " </stringConstant>";
}
else if (symbols.find(token[0]) != symbols.end()) {
return "<symbol> " + token + " </symbol>";
}
else {
return "<identifier> " + token + " </identifier>";
}
}
void xmlTokenizer(const string& validLine) {
bool inString = false;
string currentToken;
string tokenBuffer;
for (char ch : validLine) {
if (ch == '"') {
if (inString) {
tokenBuffer += ch;
output << classifyToken(tokenBuffer) << endl;
tokenBuffer.clear();
inString = false;
}
else {
if (!currentToken.empty()) {
output << classifyToken(currentToken) << endl;
currentToken.clear();
}
tokenBuffer += ch;
inString = true;
}
}
else if (inString) {
tokenBuffer += ch;
}
else if (isspace(ch)) {
if (!currentToken.empty()) {
output << classifyToken(currentToken) << endl;
currentToken.clear();
}
}
else if (symbols.find(ch) != symbols.end()) {
if (!currentToken.empty()) {
output << classifyToken(currentToken) << endl;
currentToken.clear();
}
output << "<symbol> " << ch << " </symbol>" << endl;
}
else {
currentToken += ch;
}
}
if (!currentToken.empty()) {
output << classifyToken(currentToken) << endl;
}
}
};
Console result:
_ _ _____ _ _
| | | | / ____| (_) | |
| | __ _ ___ | | __ | | ___ _ __ ___ _ __ _ | | ___ _ __
_ | | / _` | / __| | |/ / | | / _ | '_ ` _ | '_ | | | | / _ | '__|
| |__| | | (_| | | (__ | < | |____ | (_) | | | | | | | | |_) | | | | | | __/ | |
____/ __,_| ___| |_|_ _____| ___/ |_| |_| |_| | .__/ |_| |_| ___| |_|
| |
|_|
Please insert directory path containing .jack files
Directory path -> Tests/ArrayTest
Files successfully opened
Converting Tests/ArrayTestMain.jack to Tests/ArrayTestMain.jack.xml
Finished converting Tests/ArrayTestMain.jack to Tests/ArrayTestMain.jack.xml!
_ _ _____ _ _
| | | | / ____| (_) | |
| | __ _ ___ | | __ | | ___ _ __ ___ _ __ _ | | ___ _ __
_ | | / _` | / __| | |/ / | | / _ | '_ ` _ | '_ | | | | / _ | '__|
| |__| | | (_| | | (__ | < | |____ | (_) | | | | | | | | |_) | | | | | | __/ | |
____/ __,_| ___| |_|_ _____| ___/ |_| |_| |_| | .__/ |_| |_| ___| |_|
| |
|_|
Please insert directory path containing .jack files
Directory path ->
XML file result without token:
//Here -> should be <tokens>
<keyword> class </keyword>
<identifier> Test </identifier>
<symbol> { </symbol>
<keyword> function </keyword>
<keyword> void </keyword>
<identifier> main </identifier>
<symbol> ( </symbol>
<symbol> ) </symbol>
<symbol> { </symbol>
<keyword> do </keyword>
<identifier> Output </identifier>
<symbol> . </symbol>
<identifier> printString </identifier>
<symbol> ( </symbol>
<stringConstant> Hello, world! </stringConstant>
<symbol> ) </symbol>
<symbol> ; </symbol>
<keyword> return </keyword>
<symbol> ; </symbol>
<symbol> } </symbol>
<symbol> } </symbol>
//Here -> should be </tokens>
I need help modifying the convertFile function in JackTokenizer so that each generated XML file starts with <tokens> and ends with </tokens>, encompassing all tokenized elements from the .jack file.