Thiết kế website giá rẻ

Question

I want to make a C++ lib named cppdub which will mimic the python module pydub.

One main function is to export the AudioSegment to a file with a specific format (example: mp3).

The code is:


AudioSegment AudioSegment::from_file(const std::string& file_path, const std::string& format, const std::string& codec,
    const std::map<std::string, int>& parameters, int start_second, int duration) {

    avformat_network_init();
    av_log_set_level(AV_LOG_ERROR); // Adjust logging level as needed

    AVFormatContext* format_ctx = nullptr;
    if (avformat_open_input(&format_ctx, file_path.c_str(), nullptr, nullptr) != 0) {
        std::cerr << "Error: Could not open audio file." << std::endl;
        return AudioSegment();  // Return an empty AudioSegment on failure
    }

    if (avformat_find_stream_info(format_ctx, nullptr) < 0) {
        std::cerr << "Error: Could not find stream information." << std::endl;
        avformat_close_input(&format_ctx);
        return AudioSegment();
    }

    int audio_stream_index = -1;
    for (unsigned int i = 0; i < format_ctx->nb_streams; i++) {
        if (format_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
            audio_stream_index = i;
            break;
        }
    }

    if (audio_stream_index == -1) {
        std::cerr << "Error: Could not find audio stream." << std::endl;
        avformat_close_input(&format_ctx);
        return AudioSegment();
    }

    AVCodecParameters* codec_par = format_ctx->streams[audio_stream_index]->codecpar;
    const AVCodec* my_codec = avcodec_find_decoder(codec_par->codec_id);
    AVCodecContext* codec_ctx = avcodec_alloc_context3(my_codec);

    if (!codec_ctx) {
        std::cerr << "Error: Could not allocate codec context." << std::endl;
        avformat_close_input(&format_ctx);
        return AudioSegment();
    }

    if (avcodec_parameters_to_context(codec_ctx, codec_par) < 0) {
        std::cerr << "Error: Could not initialize codec context." << std::endl;
        avcodec_free_context(&codec_ctx);
        avformat_close_input(&format_ctx);
        return AudioSegment();
    }

    if (avcodec_open2(codec_ctx, my_codec, nullptr) < 0) {
        std::cerr << "Error: Could not open codec." << std::endl;
        avcodec_free_context(&codec_ctx);
        avformat_close_input(&format_ctx);
        return AudioSegment();
    }

    SwrContext* swr_ctx = swr_alloc();
    if (!swr_ctx) {
        std::cerr << "Error: Could not allocate SwrContext." << std::endl;
        avcodec_free_context(&codec_ctx);
        avformat_close_input(&format_ctx);
        return AudioSegment();
    }
    codec_ctx->sample_rate = 44100;
    // Set up resampling context to convert to S16 format with 2 bytes per sample
    av_opt_set_chlayout(swr_ctx, "in_chlayout", &codec_ctx->ch_layout, 0);
    av_opt_set_int(swr_ctx, "in_sample_rate", codec_ctx->sample_rate, 0);
    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", codec_ctx->sample_fmt, 0);

    AVChannelLayout dst_ch_layout;
    av_channel_layout_copy(&dst_ch_layout, &codec_ctx->ch_layout);
    av_channel_layout_uninit(&dst_ch_layout);
    av_channel_layout_default(&dst_ch_layout, 2);

    av_opt_set_chlayout(swr_ctx, "out_chlayout", &dst_ch_layout, 0);
    av_opt_set_int(swr_ctx, "out_sample_rate", codec_ctx->sample_rate, 0);  // Match input sample rate
    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);  // Force S16 format

    if (swr_init(swr_ctx) < 0) {
        std::cerr << "Error: Failed to initialize the resampling context" << std::endl;
        swr_free(&swr_ctx);
        avcodec_free_context(&codec_ctx);
        avformat_close_input(&format_ctx);
        return AudioSegment();
    }

    AVPacket packet;
    AVFrame* frame = av_frame_alloc();
    if (!frame) {
        std::cerr << "Error: Could not allocate frame." << std::endl;
        swr_free(&swr_ctx);
        avcodec_free_context(&codec_ctx);
        avformat_close_input(&format_ctx);
        return AudioSegment();
    }

    std::vector<char> output;
    while (av_read_frame(format_ctx, &packet) >= 0) {
        if (packet.stream_index == audio_stream_index) {
            if (avcodec_send_packet(codec_ctx, &packet) == 0) {
                while (avcodec_receive_frame(codec_ctx, frame) == 0) {
                    if (frame->pts != AV_NOPTS_VALUE) {
                        frame->pts = av_rescale_q(frame->pts, codec_ctx->time_base, format_ctx->streams[audio_stream_index]->time_base);
                    }

                    uint8_t* output_buffer;
                    int output_samples = av_rescale_rnd(
                        swr_get_delay(swr_ctx, codec_ctx->sample_rate) + frame->nb_samples,
                        codec_ctx->sample_rate, codec_ctx->sample_rate, AV_ROUND_UP);

                    int output_buffer_size = av_samples_get_buffer_size(
                        nullptr, 2, output_samples, AV_SAMPLE_FMT_S16, 1);

                    output_buffer = (uint8_t*)av_malloc(output_buffer_size);

                    if (output_buffer) {
                        memset(output_buffer, 0, output_buffer_size); // Zero padding to avoid random noise
                        int converted_samples = swr_convert(swr_ctx, &output_buffer, output_samples,
                            (const uint8_t**)frame->extended_data, frame->nb_samples);

                        if (converted_samples >= 0) {
                            output.insert(output.end(), output_buffer, output_buffer + output_buffer_size);
                        }
                        else {
                            std::cerr << "Error: Failed to convert audio samples." << std::endl;
                        }
                        // Make sure output_buffer is valid before freeing
                        if (output_buffer != nullptr) {
                            av_free(output_buffer);
                            output_buffer = nullptr; // Prevent double-free
                        }
                    }
                    else {
                        std::cerr << "Error: Could not allocate output buffer." << std::endl;
                    }
                }
            }
            else {
                std::cerr << "Error: Failed to send packet to codec context." << std::endl;
            }
        }
        av_packet_unref(&packet);
    }

    int frame_width = av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * 2;  // Use 2 bytes per sample and 2 channels

    std::map<std::string, int> metadata = {
        {"sample_width", 2},  // S16 format has 2 bytes per sample
        {"frame_rate", codec_ctx->sample_rate},  // Use the input sample rate
        {"channels", 2},  // Assuming stereo output
        {"frame_width", frame_width}
    };

    av_frame_free(&frame);
    swr_free(&swr_ctx);
    avcodec_free_context(&codec_ctx);
    avformat_close_input(&format_ctx);

    return AudioSegment(static_cast<const char*>(output.data()), output.size(), metadata);
}









std::ofstream AudioSegment::export_segment(const std::string& out_f,
    const std::string& format,
    const std::string& codec,
    const std::string& bitrate,
    const std::vector<std::string>& parameters,
    const std::map<std::string, std::string>& tags,
    const std::string& id3v2_version,
    const std::string& cover) {

    av_log_set_level(AV_LOG_DEBUG);
    AVCodecContext* codec_ctx = nullptr;
    AVFormatContext* format_ctx = nullptr;
    AVStream* stream = nullptr;
    AVFrame* frame = nullptr;
    AVPacket* pkt = nullptr;
    SwrContext* swr_ctx = nullptr;
    int ret;

    // Initialize format context
    if (avformat_alloc_output_context2(&format_ctx, nullptr, format.c_str(), out_f.c_str()) < 0) {
        throw std::runtime_error("Could not allocate format context.");
    }

    // Find encoder
    const AVCodec* codec_ptr = avcodec_find_encoder_by_name(codec.c_str());
    if (!codec_ptr) {
        throw std::runtime_error("Codec not found.");
    }

    // Add stream
    stream = avformat_new_stream(format_ctx, codec_ptr);
    if (!stream) {
        throw std::runtime_error("Failed to create new stream.");
    }

    // Allocate codec context
    codec_ctx = avcodec_alloc_context3(codec_ptr);
    if (!codec_ctx) {
        throw std::runtime_error("Could not allocate audio codec context.");
    }

    // Set codec parameters
    codec_ctx->bit_rate = std::stoi(bitrate);
    codec_ctx->sample_rate = this->get_frame_rate(); // Ensure this returns the correct sample rate
    av_channel_layout_default(&codec_ctx->ch_layout, 2);
    codec_ctx->sample_fmt = codec_ptr->sample_fmts ? codec_ptr->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;

    // Open codec
    if (avcodec_open2(codec_ctx, codec_ptr, nullptr) < 0) {
        throw std::runtime_error("Could not open codec.");
    }

    // Set codec parameters to the stream
    if (avcodec_parameters_from_context(stream->codecpar, codec_ctx) < 0) {
        throw std::runtime_error("Could not initialize stream codec parameters.");
    }

    // Open output file
    std::ofstream out_file(out_f, std::ios::binary);
    if (!out_file) {
        throw std::runtime_error("Failed to open output file.");
    }

    if (!(format_ctx->oformat->flags & AVFMT_NOFILE)) {
        if (avio_open(&format_ctx->pb, out_f.c_str(), AVIO_FLAG_WRITE) < 0) {
            throw std::runtime_error("Could not open output file.");
        }
    }

    // Write file header
    if (avformat_write_header(format_ctx, nullptr) < 0) {
        throw std::runtime_error("Error occurred when opening output file.");
    }

    // Initialize packet
    pkt = av_packet_alloc();
    if (!pkt) {
        throw std::runtime_error("Could not allocate AVPacket.");
    }

    // Initialize frame
    frame = av_frame_alloc();
    if (!frame) {
        throw std::runtime_error("Could not allocate AVFrame.");
    }
    frame->nb_samples = codec_ctx->frame_size;
    frame->format = codec_ctx->sample_fmt;
    frame->ch_layout = codec_ctx->ch_layout;

    // Allocate data buffer
    if (av_frame_get_buffer(frame, 0) < 0) {
        throw std::runtime_error("Could not allocate audio data buffers.");
    }

    // Initialize SwrContext for resampling
    swr_ctx = swr_alloc();
    if (!swr_ctx) {
        throw std::runtime_error("Could not allocate SwrContext.");
    }

    // Set options for resampling
    av_opt_set_chlayout(swr_ctx, "in_chlayout", &codec_ctx->ch_layout, 0);
    av_opt_set_chlayout(swr_ctx, "out_chlayout", &codec_ctx->ch_layout, 0);
    av_opt_set_int(swr_ctx, "in_sample_rate", codec_ctx->sample_rate, 0);
    av_opt_set_int(swr_ctx, "out_sample_rate", codec_ctx->sample_rate, 0);
    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0); // Assuming input is S16
    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", codec_ctx->sample_fmt, 0);

    // Initialize the resampling context
    if (swr_init(swr_ctx) < 0) {
        throw std::runtime_error("Failed to initialize SwrContext.");
    }

    int samples_read = 0;
    int total_samples = data_.size() / (av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * 2); // Assuming input is stereo

    while (samples_read < total_samples) {
        if (av_frame_make_writable(frame) < 0) {
            throw std::runtime_error("Frame not writable.");
        }

        int num_samples = std::min(codec_ctx->frame_size, total_samples - samples_read);

        // Prepare input data
        const uint8_t* input_data[2] = { reinterpret_cast<const uint8_t*>(data_.data() + samples_read * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * 2), nullptr };
        int output_samples = swr_convert(swr_ctx, frame->data, frame->nb_samples,
            input_data, num_samples);

        if (output_samples < 0) {
            throw std::runtime_error("Error converting audio samples.");
        }

        frame->nb_samples = output_samples;

        // Send the frame for encoding
        if (avcodec_send_frame(codec_ctx, frame) < 0) {
            throw std::runtime_error("Error sending frame for encoding.");
        }

        // Receive and write packets
        while (avcodec_receive_packet(codec_ctx, pkt) >= 0) {
            out_file.write(reinterpret_cast<char*>(pkt->data), pkt->size);
            av_packet_unref(pkt);
        }

        samples_read += num_samples;
    }

    // Flush the encoder
    if (avcodec_send_frame(codec_ctx, nullptr) < 0) {
        throw std::runtime_error("Error flushing the encoder.");
    }

    while (avcodec_receive_packet(codec_ctx, pkt) >= 0) {
        out_file.write(reinterpret_cast<char*>(pkt->data), pkt->size);
        av_packet_unref(pkt);
    }

    // Write file trailer
    av_write_trailer(format_ctx);

    // Cleanup
    av_frame_free(&frame);
    av_packet_free(&pkt);
    swr_free(&swr_ctx);
    avcodec_free_context(&codec_ctx);

    if (!(format_ctx->oformat->flags & AVFMT_NOFILE)) {
        avio_closep(&format_ctx->pb);
    }
    avformat_free_context(format_ctx);

    out_file.close();
    return out_file;
}

//declaration
/*
std::ofstream export_segment(const std::string& out_f,
    const std::string& format = "mp3",
    const std::string& codec = "libmp3lame",
    const std::string& bitrate = "128000",
    const std::vector<std::string>& parameters = {},
    const std::map<std::string, std::string>& tags = {},
    const std::string& id3v2_version = "4",
    const std::string& cover = "");
*/

This code only works for mp3 format. I also want to export to aac,ogg,flv,wav and any other popular formats.

Thiết kế website giá rẻ

Danh mục

C++ ffmpeg lib version 7.0 – export audio to different format