The problem is simple, given an array/vector/whatever of uint8_t
, that is a sequence of bytes, represent it in a format like this: b'Hello, World!x03xb3T'
, it is the format used by Python. The input is a sequence of bytes and the output is std::string
, the output is directed to std::cout
.
I need this for reverse engineering purposes.
I don’t know if it has already been done before, but I am unable to find it. Anyway below is my cobbled together solution in C++20, I am still a beginner in C++.
#include <algorithm>
#include <iostream>
#include <format>
#include <fstream>
#include <string>
#include <unordered_map>
#include <vector>
typedef std::vector<uint8_t> bytes;
using std::string;
using std::cout;
std::unordered_map<uint8_t, std::string> ASCII = {
{0, "\x00"},
{1, "\x01"},
{2, "\x02"},
{3, "\x03"},
{4, "\x04"},
{5, "\x05"},
{6, "\x06"},
{7, "\x07"},
{8, "\x08"},
{9, "\t"},
{10, "\n"},
{11, "\x0b"},
{12, "\x0c"},
{13, "\r"},
{14, "\x0e"},
{15, "\x0f"},
{16, "\x10"},
{17, "\x11"},
{18, "\x12"},
{19, "\x13"},
{20, "\x14"},
{21, "\x15"},
{22, "\x16"},
{23, "\x17"},
{24, "\x18"},
{25, "\x19"},
{26, "\x1a"},
{27, "\x1b"},
{28, "\x1c"},
{29, "\x1d"},
{30, "\x1e"},
{31, "\x1f"},
{32, " "},
{33, "!"},
{34, """},
{35, "#"},
{36, "$"},
{37, "%"},
{38, "&"},
{39, "'"},
{40, "("},
{41, ")"},
{42, "*"},
{43, "+"},
{44, ","},
{45, "-"},
{46, "."},
{47, "/"},
{48, "0"},
{49, "1"},
{50, "2"},
{51, "3"},
{52, "4"},
{53, "5"},
{54, "6"},
{55, "7"},
{56, "8"},
{57, "9"},
{58, ":"},
{59, ";"},
{60, "<"},
{61, "="},
{62, ">"},
{63, "?"},
{64, "@"},
{65, "A"},
{66, "B"},
{67, "C"},
{68, "D"},
{69, "E"},
{70, "F"},
{71, "G"},
{72, "H"},
{73, "I"},
{74, "J"},
{75, "K"},
{76, "L"},
{77, "M"},
{78, "N"},
{79, "O"},
{80, "P"},
{81, "Q"},
{82, "R"},
{83, "S"},
{84, "T"},
{85, "U"},
{86, "V"},
{87, "W"},
{88, "X"},
{89, "Y"},
{90, "Z"},
{91, "["},
{92, "\\"},
{93, "]"},
{94, "^"},
{95, "_"},
{96, "`"},
{97, "a"},
{98, "b"},
{99, "c"},
{100, "d"},
{101, "e"},
{102, "f"},
{103, "g"},
{104, "h"},
{105, "i"},
{106, "j"},
{107, "k"},
{108, "l"},
{109, "m"},
{110, "n"},
{111, "o"},
{112, "p"},
{113, "q"},
{114, "r"},
{115, "s"},
{116, "t"},
{117, "u"},
{118, "v"},
{119, "w"},
{120, "x"},
{121, "y"},
{122, "z"},
{123, "{"},
{124, "|"},
{125, "}"},
{126, "~"},
{127, "\x7f"},
{128, "\x80"},
{129, "\x81"},
{130, "\x82"},
{131, "\x83"},
{132, "\x84"},
{133, "\x85"},
{134, "\x86"},
{135, "\x87"},
{136, "\x88"},
{137, "\x89"},
{138, "\x8a"},
{139, "\x8b"},
{140, "\x8c"},
{141, "\x8d"},
{142, "\x8e"},
{143, "\x8f"},
{144, "\x90"},
{145, "\x91"},
{146, "\x92"},
{147, "\x93"},
{148, "\x94"},
{149, "\x95"},
{150, "\x96"},
{151, "\x97"},
{152, "\x98"},
{153, "\x99"},
{154, "\x9a"},
{155, "\x9b"},
{156, "\x9c"},
{157, "\x9d"},
{158, "\x9e"},
{159, "\x9f"},
{160, "\xa0"},
{161, "¡"},
{162, "¢"},
{163, "£"},
{164, "¤"},
{165, "¥"},
{166, "¦"},
{167, "§"},
{168, "¨"},
{169, "©"},
{170, "ª"},
{171, "«"},
{172, "¬"},
{173, "\xad"},
{174, "®"},
{175, "¯"},
{176, "°"},
{177, "±"},
{178, "²"},
{179, "³"},
{180, "´"},
{181, "µ"},
{182, "¶"},
{183, "·"},
{184, "¸"},
{185, "¹"},
{186, "º"},
{187, "»"},
{188, "¼"},
{189, "½"},
{190, "¾"},
{191, "¿"},
{192, "À"},
{193, "Á"},
{194, "Â"},
{195, "Ã"},
{196, "Ä"},
{197, "Å"},
{198, "Æ"},
{199, "Ç"},
{200, "È"},
{201, "É"},
{202, "Ê"},
{203, "Ë"},
{204, "Ì"},
{205, "Í"},
{206, "Î"},
{207, "Ï"},
{208, "Ð"},
{209, "Ñ"},
{210, "Ò"},
{211, "Ó"},
{212, "Ô"},
{213, "Õ"},
{214, "Ö"},
{215, "×"},
{216, "Ø"},
{217, "Ù"},
{218, "Ú"},
{219, "Û"},
{220, "Ü"},
{221, "Ý"},
{222, "Þ"},
{223, "ß"},
{224, "à"},
{225, "á"},
{226, "â"},
{227, "ã"},
{228, "ä"},
{229, "å"},
{230, "æ"},
{231, "ç"},
{232, "è"},
{233, "é"},
{234, "ê"},
{235, "ë"},
{236, "ì"},
{237, "í"},
{238, "î"},
{239, "ï"},
{240, "ð"},
{241, "ñ"},
{242, "ò"},
{243, "ó"},
{244, "ô"},
{245, "õ"},
{246, "ö"},
{247, "÷"},
{248, "ø"},
{249, "ù"},
{250, "ú"},
{251, "û"},
{252, "ü"},
{253, "ý"},
{254, "þ"},
{255, "ÿ"}
};
template<typename T>
inline bytes LittleEndian(const T& number) {
size_t size = sizeof(T);
bytes _bytes(size);
uint64_t mask = 255;
int shift = 0;
for (size_t i = 0; i < size; i++) {
_bytes[i] = (number & mask) >> shift;
mask <<= 8;
shift += 8;
}
return _bytes;
}
template<typename T>
inline bytes BigEndian(const T& number) {
bytes _bytes = LittleEndian<T>(number);
std::reverse(_bytes.begin(), _bytes.end());
return _bytes;
}
static inline string hexlify(bytes arr) {
string repr = "";
for (auto& chr : arr) {
repr += std::format(" {:02x}", chr);
}
repr.erase(0, 1);
return repr;
}
static inline string binascii(bytes arr) {
string repr = "";
for (auto& chr : arr) {
repr += ASCII[chr];
}
return repr;
}
int main() {
cout << hexlify(LittleEndian<uint32_t>(123456789)) << "n";
cout << hexlify(BigEndian<uint32_t>(123456789)) << "n";
cout << binascii({
84, 104, 105, 115, 32, 105, 115, 32, 97,
110, 32, 101, 120, 97, 109, 112, 108, 101,
32, 115, 116, 114, 105, 110, 103, 44, 32,
105, 116, 32, 105, 115, 32, 97, 32, 109,
105, 120, 116, 117, 114, 101, 32, 111, 102,
32, 116, 101, 120, 116, 32, 97, 110, 100,
32, 110, 117, 109, 98, 101, 114, 115, 44,
32, 97, 102, 116, 101, 114, 32, 116, 104,
101, 32, 116, 101, 120, 116, 32, 112, 97,
114, 116, 32, 105, 115, 32, 97, 110, 32,
97, 114, 114, 97, 121, 32, 111, 102, 32,
116, 104, 101, 32, 102, 105, 114, 115, 116,
32, 51, 50, 32, 70, 105, 98, 111, 110,
97, 99, 99, 105, 32, 110, 117, 109, 98,
101, 114, 115, 32, 101, 110, 99, 111, 100,
101, 100, 32, 105, 110, 32, 85, 73, 110,
116, 51, 50, 32, 66, 69, 58, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0,
1, 0, 0, 0, 2, 0, 0, 0, 3,
0, 0, 0, 5, 0, 0, 0, 8, 0,
0, 0, 13, 0, 0, 0, 21, 0, 0,
0, 34, 0, 0, 0, 55, 0, 0, 0,
89, 0, 0, 0, 144, 0, 0, 0, 233,
0, 0, 1, 121, 0, 0, 2, 98, 0,
0, 3, 219, 0, 0, 6, 61, 0, 0,
10, 24, 0, 0, 16, 85, 0, 0, 26,
109, 0, 0, 42, 194, 0, 0, 69, 47,
0, 0, 111, 241, 0, 0, 181, 32, 0,
1, 37, 17, 0, 1, 218, 49, 0, 2,
255, 66, 0, 4, 217, 115, 0, 7, 216,
181, 0, 12, 178, 40, 0, 20, 138, 221
});
}
That ASCII
table was generated with Python’s chr
, and below is the output:
15 cd 5b 07
07 5b cd 15
This is an example string, it is a mixture of text and numbers, after the text part is an array of the first 32 Fibonacci numbers encoded in UInt32 BE:x00x00x00x00x00x00x00x01x00x00x00x01x00x00x00x02x00x00x00x03x00x00x00x05x00x00x00x08x00x00x00rx00x00x00x15x00x00x00"x00x00x007x00x00x00Yx00x00x00x90x00x00x00Θx00x00x01yx00x00x02bx00x00x03█x00x00x06=x00x00nx18x00x00x10Ux00x00x1amx00x00*┬x00x00E/x00x00o±x00x00╡ x00x01%x11x00x01┌1x00x02 Bx00x04┘sx00x07╪╡x00x0c▓(x00x14x8a▌
The last line differs from Python’s representation:
bytearray(b'This is an example string, it is a mixture of text and numbers, after the text part is an array of the first 32 Fibonacci numbers encoded in UInt32 BE:x00x00x00x00x00x00x00x01x00x00x00x01x00x00x00x02x00x00x00x03x00x00x00x05x00x00x00x08x00x00x00rx00x00x00x15x00x00x00"x00x00x007x00x00x00Yx00x00x00x90x00x00x00xe9x00x00x01yx00x00x02bx00x00x03xdbx00x00x06=x00x00nx18x00x00x10Ux00x00x1amx00x00*xc2x00x00E/x00x00oxf1x00x00xb5 x00x01%x11x00x01xda1x00x02xffBx00x04xd9sx00x07xd8xb5x00x0cxb2(x00x14x8axdd')
I wonder if there is already a standardized way to do this.