I have a requirement that I need to split a file into multiple smaller files and I am trying to figure out what is the most efficient way to read and write to a file in C++. I tried two approaches:
- Using ifstream and ofstream directly to read and write a file line by line.
- Using a ostringstream buffer to get all the data from the input file in a buffer and then writing it the output file.
On benchmarking, I found that the 2nd approach is way more better than the 1st.
Is there anything which I am missing, which can affect the performance of the code?
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <chrono>
const int BUFFER_SIZE = 4096;
class FileWriter {
private:
std::ofstream m_OutFile;
char m_CharBuffer[BUFFER_SIZE];
public:
FileWriter(const std::string& filename) {
m_OutFile.open(filename.c_str(), std::ios::out);
m_OutFile.rdbuf()->pubsetbuf(m_CharBuffer, BUFFER_SIZE);
}
~FileWriter() {
m_OutFile.close();
}
void WriteToFile(std::ostringstream& oStr) {
oStr << std::ends;
m_OutFile << oStr.str() << std::endl;
oStr.str("");
}
};
void fileWriterV2 (std::string inp, std::string out) {
FileWriter writer(out);
std::ostringstream oStr;
std::ifstream inputFile(inp);
oStr << inputFile.rdbuf();
writer.WriteToFile(oStr);
}
void fileWriterV1 (std::string inputFilename, std::string outputFilename){
std::ifstream inputFile(inputFilename);
std::ofstream outputFile(outputFilename);
if (!inputFile.is_open()) {
std::cerr << "Failed to open input file: " << inputFilename << std::endl;
return;
}
if (!outputFile.is_open()) {
std::cerr << "Failed to open output file: " << outputFilename << std::endl;
inputFile.close();
return;
}
std::string line;
while (std::getline(inputFile, line)) {
outputFile << line << std::endl;
}
inputFile.close();
outputFile.close();
}
int main(int argc, char** argv) {
auto start = std::chrono::high_resolution_clock::now();
std::string inputFilename = argv[1];
std::string outputFilename = argv[2];
// fileWriterV1(inputFilename, outputFilename);
fileWriterV2(inputFilename, outputFilename)
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
std::cout << "File writing complete in "<< duration << " ms" << std::endl;
return 0;
}