I am working on a Tic-Tac-Toe engine using the Monte Carlo Tree Search (MCTS) algorithm. However, I’ve encountered a bug where the AI sometimes fails to block the opponent’s winning moves, resulting in losses. Additionally, the program occasionally crashes due to a segmentation fault.
I’ve included MCVE of my code below:
main.cc
#include "node.hpp"
#include <cassert>
#include <cfloat>
#include <iostream>
#include <string>
#include <cctype>
#include <deque>
void print_board(uint16_t ai_board, uint16_t enemy_board, char ai, char player) {
char board[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '};
for (int i = 0; i < 9; i++) {
if (ai_board & (1 << i))
board[i] = ai;
else if (enemy_board & (1 << i))
board[i] = player;
}
std::cout << " A B Cnn";
std::cout << " " << board[0] << " | " << board[1] << " | " << board[2] << " 1n";
std::cout << "---|---|---n";
std::cout << " " << board[3] << " | " << board[4] << " | " << board[5] << " 2n";
std::cout << "---|---|---n";
std::cout << " " << board[6] << " | " << board[7] << " | " << board[8] << " 3n";
}
bool is_valid_coordinate(char col, char row) {
return (col >= 'A' && col <= 'C') && (row >= '1' && row <= '3');
}
int coordinate_to_idx(char col, char row) {
return (col - 'A') + 3 * (row - '1');
}
int main(int argc, char **argv) {
char player;
for(;;) {
std::string selection;
std::cout << "Select your symbol (X/O): ";
std::cin >> selection;
if(selection.length() != 1) {
std::cout << "Invalid input format. Try again.n";
continue;
}
player = std::toupper(selection[0]);
if(player != 'X' && player != 'O') {
std::cout << "Invalid selection. Try again.n";
continue;
}
break;
}
char ai = player == 'O' ? 'X' : 'O';
bool is_ai_turn = ai == 'X';
std::deque<Node> arena;
Node root(&arena, is_ai_turn);
Node *curr_node = &root;
if(!is_ai_turn)
print_board(0, 0, ai, player);
curr_node->enemy_board = 0b101000000;
while (true) {
if (curr_node->GetWinner()) {
std::cout << "Game Over! ";
if (curr_node->GetWinner() == 1)
std::cout << "AI wins!" << std::endl;
else
std::cout << "You win!" << std::endl;
break;
}
if (curr_node->is_ai_turn) {
curr_node = curr_node->CalculateBestMove(10000);
std::cout << "AI's move:" << std::endl;
print_board(curr_node->ai_board, curr_node->enemy_board, ai, player);
}
else {
std::cout << "Enter your move (e.g., A1, B2): ";
std::string move;
std::cin >> move;
if (move.size() != 2) {
std::cout << "Invalid input format. Try again.n";
continue;
}
char col = std::toupper(move[0]);
char row = move[1];
if (!std::isalpha(col) || !std::isdigit(row) || !is_valid_coordinate(col, row)) {
std::cout << "Invalid input format. Try again.n";
continue;
}
int idx = coordinate_to_idx(col, row);
if (curr_node->ai_board & (1 << idx) || curr_node->enemy_board & (1 << idx)) {
std::cout << "The position is already taken. Try again.n";
continue;
}
curr_node = curr_node->SearchEnemyMove(idx);
assert(curr_node != nullptr);
std::cout << "Your move:" << std::endl;
print_board(curr_node->ai_board, curr_node->enemy_board, ai, player);
}
}
return 0;
}
node.hpp
#ifndef NODE_HPP
#define NODE_HPP
#include <cstddef>
#include <cstdint>
#include <deque>
struct Node {
int64_t eval;
size_t visit_count;
size_t win_count;
Node *parent;
uint16_t ai_board;
uint16_t enemy_board;
uint8_t child_count;
Node *children[9];
std::deque<Node>* arena;
bool is_ai_turn;
Node(std::deque<Node> *arena, bool is_ai_turn);
Node *CalculateBestMove(size_t iter_count);
Node *SearchEnemyMove(int move);
int GetWinner();
private:
Node *GetBestChild();
Node *FindBestLeafNode();
void CreateChildren();
void SimulateAndPropagate();
double GetUcbScore();
};
#endif // NODE_HPP
node.cc
#include "node.hpp"
#include <cassert>
#include <cfloat>
#include <cmath>
#include <iostream>
#include <random>
#include <vector>
Node::Node(std::deque<Node> *arena, bool is_ai_turn) {
this->eval = 0;
this->visit_count = 0;
this->win_count = 0;
this->parent = nullptr;
this->ai_board = 0;
this->enemy_board = 0;
this->child_count = 0;
this->arena = arena;
this->is_ai_turn = is_ai_turn;
}
Node *Node::GetBestChild() {
double best_score = DBL_MIN;
Node* best_child;
for (auto i = 0; i < this->child_count; i++) {
auto score = this->children[i]->GetUcbScore();
if (score > best_score) {
best_score = score;
best_child = this->children[i];
}
}
return best_child;
}
Node *Node::FindBestLeafNode() {
if (this->child_count == 0)
return this;
return this->GetBestChild()->FindBestLeafNode();
}
Node *Node::SearchEnemyMove(int move) {
this->CreateChildren();
for(int i = 0; i < this->child_count; i++) {
if(this->children[i]->enemy_board & (1 << move))
return this->children[i];
}
assert(false);
return nullptr;
}
void Node::CreateChildren() {
if(this->GetWinner() || this->child_count > 0)
return;
for(int pos = 0; pos < 9; pos++) {
bool p1 = this->ai_board & (1 << pos);
bool p2 = this->enemy_board & (1 << pos);
if (!p1 && !p2) {
Node n(this->arena, !this->is_ai_turn);
n.parent = this;
n.ai_board = this->ai_board;
n.enemy_board = this->enemy_board;
if(this->is_ai_turn)
n.ai_board |= 1 << pos;
else
n.enemy_board |= 1 << pos;
this->arena->push_back(n);
assert(this->child_count < 9);
this->children[this->child_count++] = &this->arena->back();
}
}
}
void Node::SimulateAndPropagate() {
int eval = this->GetWinner();
uint16_t saved_ai_board = this->ai_board;
uint16_t saved_enemy_board = this->enemy_board;
bool saved_turn = this->is_ai_turn;
std::random_device rd;
std::mt19937 gen(rd());
assert((saved_ai_board & saved_enemy_board) == 0);
while (!eval) {
std::vector<int> available_moves;
for (int pos = 0; pos < 9; pos++) {
bool p1 = this->ai_board & (1 << pos);
bool p2 = this->enemy_board & (1 << pos);
if (!p1 && !p2)
available_moves.push_back(pos);
}
if (available_moves.empty())
break;
std::uniform_int_distribution<> dis(0, available_moves.size() - 1);
int move = available_moves[dis(gen)];
if (this->is_ai_turn) {
this->ai_board |= 1 << move;
} else {
this->enemy_board |= 1 << move;
}
this->is_ai_turn = !this->is_ai_turn;
eval = this->GetWinner();
assert((this->ai_board & this->enemy_board) == 0);
}
this->ai_board = saved_ai_board;
this->enemy_board = saved_enemy_board;
this->is_ai_turn = saved_turn;
auto curr_node = this;
while (curr_node != nullptr) {
curr_node->visit_count++;
curr_node->eval += eval;
if (eval == 1) {
curr_node->win_count++;
}
curr_node = curr_node->parent;
}
}
Node *Node::CalculateBestMove(size_t iter_count) {
std::random_device rd;
std::mt19937 gen(rd());
for (size_t i = 0; i < iter_count; i++) {
Node *leaf = this->FindBestLeafNode();
leaf->CreateChildren();
if (leaf->child_count > 0) {
std::uniform_int_distribution<> dis(0, leaf->child_count - 1);
leaf = leaf->children[dis(gen)];
}
if(leaf->ai_board == 128 && leaf->enemy_board == 320) {
std::cout << "Wow!n";
}
leaf->SimulateAndPropagate();
}
int64_t best_eval = INT64_MIN;
Node *best_node;
for (int i = 0; i < this->child_count; i++) {
if (this->children[i]->eval > best_eval) {
best_eval = this->children[i]->eval;
best_node = this->children[i];
}
}
return best_node;
}
int Node::GetWinner() {
for (const uint16_t mask :
{0b000000111, 0b000111000, 0b111000000, 0b001001001, 0b010010010,
0b100100100, 0b100010001, 0b001010100}) {
if ((this->ai_board & mask) == mask) {
return 1;
} else if ((this->enemy_board & mask) == mask) {
return -1;
}
}
return 0;
}
double Node::GetUcbScore() {
auto parent = this->parent ? this->parent : this;
if (this->visit_count == 0)
return DBL_MAX;
constexpr double c = 1.4;
double exploitation = (double)this->win_count / this->visit_count;
double exploration = c * sqrt(log(parent->visit_count) / this->visit_count);
return exploration + exploitation;
}
Compilation Command
$ clang++ -o mcts -g -std=c++11 -fsanitize=undefined src/main.cc src/node.cc
Example Bug
Here is an example where the AI (X) makes a move that ignores the opponent’s winning move:
A B C
| X | 1
---|---|---
| | 2
---|---|---
O | | O 3