I am trying to implement Huffman code encoder in C++:
#include <iostream>
#include <fstream>
#include <string>
#include <list>
#include <algorithm>
using namespace std;
struct node{
char c;
int weight;
string code = "";
node *left_child;
node *right_child;
};
string read_file(string input_file){
string input = "";
ifstream fin;
fin.open(input_file.c_str());
if ( fin.fail() ){
cout << "open file fail" << endl;
}
else{
string line;
while (getline(fin, line)){
input += line;
}
}
fin.close();
return input;
}
bool operator <(const node & a, const node & b) { // set up the logic to sort the nodes in the list
if (a.weight < b.weight) return true;
if (a.weight > b.weight) return false;
return a.c < b.c;
}
/* this set up tree is moved to mian function for easier debugging
void setup_tree(list<node> & node_list){ // the node_list here is sorted in increasing order with required logic
while (node_list.size() != 1){ // keep removing the front two trees to form a new tree until only one tree is left
node * n3 = new node;
if (node_list.begin()->c < next(node_list.begin())->c){
n3->left_child = & (*node_list.begin());
node_list.pop_front();
n3->right_child = & (*node_list.begin());
node_list.pop_front();
}
else{
n3->right_child = & (*node_list.begin());
node_list.pop_front();
n3->left_child = & (*node_list.begin());
node_list.pop_front();
}
n3->c = n3->left_child->c;
cout << "building tree" << endl;
cout << n3->left_child << endl;
cout << n3->right_child << endl;
n3->weight = n3->left_child->weight + n3->right_child->weight;
node_list.push_back((*n3)); // put combined tree back to list
node_list.sort(); // sort the list again for another combination
}
}
*/
void output_code(string code, node *n, list<node> & node_list, int height){
if ((n->left_child == nullptr) && (n->right_child == nullptr)){
n->code = code;
node_list.push_back((*n));
return;
}
output_code(code + '0', n->left_child, node_list, height + 1);
output_code(code + '1', n->right_child, node_list, height + 1);
}
void generate_code_file(list<node> n){
ofstream fout;
fout.open("code.txt");
list<node>::iterator itr;
for (itr = n.begin(); itr != n.end(); itr++) {
fout << itr->code << endl;
}
fout.close();
}
int main(int argc, char *argv[]) {
string input_file = argv[1];
int freq_count[95]; // space complexity = O(95) = O(C)
for (int i = 0; i<95; i++){// initialize freq_count, time complexity = O(95) = O(C)
freq_count[i] = 0;
}
string input = read_file(input_file);
for (int i=0; i<input.length(); i++){ // count frequency of characters, time complexity = O(n), n = num of characters
freq_count[input[i]-32] += 1; // printable character start from code 32 to 126
}
list<node> node_list; //set up a list to store all nodes, space complexity = O(95) = O(C)
for (int i = 0; i<95; i++){// form nodes and then put into list, time complexity = O(95) = O(C)
if (freq_count[i] != 0){
node n;
n.c = static_cast<char>(i+32); // convert ASCII back to characters
n.weight = freq_count[i];
n.left_child = nullptr;
n.right_child = nullptr;
node_list.push_back(n);
}
}
node_list.sort(); //this sorting function is provided by <algorithm>. Since num of nodes is at most 95, time complexity is constant
/////////////////////setup_tree(node_list);
while (node_list.size() != 1){ // keep removing the front two trees to form a new tree until only one tree is left
node * n3 = new node;
cout << "here is address of new node: " << n3 << endl;
node *left_child = &(*node_list.begin());
node_list.pop_front();
node *right_child = &(*node_list.begin());
node_list.pop_front();
if (left_child->c < right_child->c){
n3->left_child = left_child;
n3->right_child = right_child;
}
else{
n3->right_child = left_child;
n3->left_child = right_child;
}
n3->c = n3->left_child->c;
cout << "building tree" << endl; ////debug printing
n3->weight = n3->left_child->weight + n3->right_child->weight;
node_list.push_back((*n3)); // put combined tree back to list
node_list.sort(); // sort the list again for another combination
list<node>::iterator itr;
for (itr = node_list.begin(); itr != node_list.end(); itr++) { // print all the address of all element in the list
cout << &(*itr) << endl;
}
}
node *result_tree = & (*node_list.begin());
//debug printing
node *p = result_tree;
cout << "check the address of nodes in a tree" << endl;
while ((p->left_child != nullptr) || (p->right_child != nullptr)){
cout << p->left_child << endl;
p = p->left_child;
}
output_code("", result_tree, node_list, 0);
node_list.sort();
generate_code_file(node_list);
return 0;
}
However, I found that the building of tree:
while (node_list.size() != 1){ // keep removing the front two trees to form a new tree until only one tree is left
node * n3 = new node;
cout << "here is address of new node: " << n3 << endl;
node *left_child = &(*node_list.begin());
node_list.pop_front();
node *right_child = &(*node_list.begin());
node_list.pop_front();
if (left_child->c < right_child->c){
n3->left_child = left_child;
n3->right_child = right_child;
}
else{
n3->right_child = left_child;
n3->left_child = right_child;
}
n3->c = n3->left_child->c;
cout << "building tree" << endl; ////debug printing
n3->weight = n3->left_child->weight + n3->right_child->weight;
node_list.push_back((*n3)); // put combined tree back to list
node_list.sort(); // sort the list again for another combination
list<node>::iterator itr;
for (itr = node_list.begin(); itr != node_list.end(); itr++) { // print all the address of all element in the list
cout << &(*itr) << endl;
}
}
in the main function results in inappropriate address of nodes in the tree.
Here is the compiling code of my program:
g++ hmencoder.cpp -o hmencoder -std=c++17
Here is the running code of my program:
./hmencoder input1.txt
Here is the content of input file “input1.txt”:
“Hello world!”
Here is the output of my program:
“here is address of new node: 0x1067318
building tree
0x1066ed0
0x10671d0
0x1067208
0x10672b0
0x10672e8
0x1066e60
0x1067278
0x1067240
here is address of new node: 0x1066e90
building tree
0x1067208
0x10672b0
0x10672e8
0x1066e60
0x10671d0
0x1067278
0x1067240
here is address of new node: 0x1066ec8
building tree
0x10672e8
0x1066e60
0x10671d0
0x10672b0
0x1067278
0x1067240
here is address of new node: 0x1067200
building tree
0x10671d0
0x10672b0
0x1067278
0x1066e60
0x1067240
here is address of new node: 0x10672e0
building tree
0x1067278
0x1066e60
0x1067240
0x10672b0
here is address of new node: 0x10671c8
building tree
0x1067240
0x10672b0
0x1066e60
here is address of new node: 0x1067270
building tree
0x1066e60
0x10672b0
here is address of new node: 0x1067238
building tree
0x10672b0
check the address of nodes in a tree
0x1066e60
0x1066e60
0x1066e60
.
.
.
// output the same address forever…”
I tried to apply a tree in Huffman code encoder by first storing all nodes in a list with increasing order of weight, then repeat the process of taking out first two nodes, combining as a new tree and pushing it back to list until only one tree is left (subtree with smaller ASCII code will be the left subtree).
However, when I check the address of nodes in the list, I found that address of new nodes are not inside the list throughout the process. While the address of combined nodes appears again in the list which is not expected. This further results in the bug that left child of root of tree contains the address of itself (see checking the address of node in output).
Can anyone suggest what I am doing wrong and how to fix it , please?
chan billy is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.
1