Hello everyone,
I’m working on a project that involves trimming a video using FFmpeg libraries in C++. While I can successfully append and trim the end of the video, I’m facing an issue with trimming the start. The output video is always a correct length clip of a single frame from the original video, rather than the expected trimmed segment.
Here is the primary code I’m using:
int addPacketsToOutput(AVFormatContext* in_format_context, AVFormatContext* out_format_context,
int64_t* last_video_pts, int64_t* last_audio_pts,
int64_t trim_start_millisec, int64_t trim_end_millisec) {
AVPacket pkt;
uint64_t temp_video_pts = 0;
uint64_t temp_audio_pts = 0;
bool is_video_over_trim = false;
bool is_audio_over_trim = false;
bool is_trim_start = trim_start_millisec > 0;
bool is_trim_end = trim_end_millisec > 0;
bool is_video_started = false;
bool is_audio_started = false;
while (av_read_frame(in_format_context, &pkt) >= 0) {
AVStream *in_stream = in_format_context->streams[pkt.stream_index];
AVStream *out_stream = out_format_context->streams[pkt.stream_index];
const AVCodecDescriptor* codec_desc = avcodec_descriptor_get(out_stream->codecpar->codec_id);
bool is_video = codec_desc->type == AVMEDIA_TYPE_VIDEO;
bool is_audio = codec_desc->type == AVMEDIA_TYPE_AUDIO;
pkt.pts = av_rescale_q_rnd(pkt.pts + 1, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF);
pkt.dts = av_rescale_q_rnd(pkt.dts + 1, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF);
uint64_t current_duration = (av_q2d(out_stream->time_base) * pkt.pts) * 1000;
if (is_trim_start) {
if (current_duration < trim_start_millisec) {
av_packet_unref(&pkt);
continue;
} else if (is_video && !is_video_started) {
*last_video_pts = -pkt.pts;
is_video_started = true;
} else if (is_audio && !is_audio_started) {
*last_audio_pts = -pkt.pts;
is_audio_started = true;
} else if (is_audio_started && is_video_started) {
is_trim_start = false;
}
}
int offset = (is_video) ? *last_video_pts : *last_audio_pts;
pkt.pts = pkt.pts + offset;
pkt.dts = pkt.dts + offset;
if (is_video && !is_video_over_trim) temp_video_pts = pkt.pts;
else if (is_audio && !is_audio_over_trim) temp_audio_pts = pkt.pts;
std::cout << "pts: " << pkt.pts << " current duration: " << current_duration << " > " << trim_start_millisec << " last video pts: " << *last_video_pts << " last audio pts: " << *last_audio_pts << std::endl;
current_duration = (av_q2d(out_stream->time_base) * pkt.pts) * 1000;
pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
if (is_trim_end && (current_duration > trim_end_millisec)) {
av_packet_unref(&pkt);
if (is_audio_over_trim && is_video_over_trim) break;
if (is_video) is_video_over_trim = true;
if (is_audio) is_audio_over_trim = true;
continue;
}
// Write packet
if (av_interleaved_write_frame(out_format_context, &pkt) < 0) {
std::cerr << "Error muxing packetn";
break;
}
av_packet_unref(&pkt);
}
*last_video_pts = temp_video_pts;
*last_audio_pts = temp_audio_pts;
return 0;
}
void Video::trimVideo(const double start_millisecond, const double end_millisecond, const std::string& output_path) {
AVFormatContext* out_format_ctx = nullptr;
AVFormatContext* video_format_ctx = nullptr;
std::string temp_output_path = (output_path == file_path) ? getPathToFile(output_path) + "/temp" + getFileFormat(output_path)
: output_path;
initInputContext(file_path, &video_format_ctx);
initOutputContext(temp_output_path, &out_format_ctx);
copyStreamParameters(video_format_ctx, out_format_ctx);
initOutputFile(temp_output_path, out_format_ctx);
int64_t last_video_pts = 0;
int64_t last_audio_pts = 0;
addPacketsToOutput(video_format_ctx, out_format_ctx, &last_video_pts, &last_audio_pts, start_millisecond, end_millisecond);
av_write_trailer(out_format_ctx);
// Cleanup
if (!(out_format_ctx->oformat->flags & AVFMT_NOFILE))
avio_closep(&out_format_ctx->pb);
avformat_free_context(out_format_ctx);
avformat_close_input(&video_format_ctx);
}
int main() {
Video video1("path_to_video/example1.mp4");
Video video2("path_to_video/example2.mp4");
Video video3("path_to_video/example1.mp4");
std::vector<Video> videos;
videos.push_back(video2);
videos.push_back(video3);
video1.trimVideo(1000, 000, "path_to_video/temp_example1.mp4");
//video1.appendVideos(videos, "path_to_video/temp_example2.mp4");
return 0;
}
Problem Description:
The trimming works fine for the end of the video.
Trimming the start results in a video clip of the correct length, but it’s just a single frame of the original video, not the intended trimmed section.
There are no errors during execution.
Additional Information:
The goal is to trim both the start and the end of the video correctly.
The output video should be a continuous segment from the specified start to end times.
For testing i have just done the trimming of the end, and start seperately for now.
Start and end of debug:
pts: 0 current duration: 1000 > 1000 last video pts: -90003 last audio pts: 0
pts: 3000 current duration: 1033 > 1000 last video pts: -90003 last audio pts: 0
pts: 6000 current duration: 1066 > 1000 last video pts: -90003 last audio pts: 0
pts: 9000 current duration: 1100 > 1000 last video pts: -90003 last audio pts: 0
pts: 12000 current duration: 1133 > 1000 last video pts: -90003 last audio pts: 0
pts: 15000 current duration: 1166 > 1000 last video pts: -90003 last audio pts: 0
pts: 18000 current duration: 1200 > 1000 last video pts: -90003 last audio pts: 0
pts: 21000 current duration: 1233 > 1000 last video pts: -90003 last audio pts: 0
pts: 24000 current duration: 1266 > 1000 last video pts: -90003 last audio pts: 0
pts: 0 current duration: 1002 > 1000 last video pts: -90003 last audio pts: -48129
pts: 1024 current duration: 1024 > 1000 last video pts: -90003 last audio pts: -48129
pts: 2048 current duration: 1045 > 1000 last video pts: -90003 last audio pts: -48129
...
pts: 171000 current duration: 2900 > 1000 last video pts: -90003 last audio pts: -48129
pts: 174000 current duration: 2933 > 1000 last video pts: -90003 last audio pts: -48129
pts: 177000 current duration: 2966 > 1000 last video pts: -90003 last audio pts: -48129
pts: 180000 current duration: 3000 > 1000 last video pts: -90003 last audio pts: -48129
pts: 90112 current duration: 2880 > 1000 last video pts: -90003 last audio pts: -48129
pts: 91136 current duration: 2901 > 1000 last video pts: -90003 last audio pts: -48129
pts: 92160 current duration: 2922 > 1000 last video pts: -90003 last audio pts: -48129
pts: 93184 current duration: 2944 > 1000 last video pts: -90003 last audio pts: -48129
pts: 94208 current duration: 2965 > 1000 last video pts: -90003 last audio pts: -48129
pts: 95232 current duration: 2986 > 1000 last video pts: -90003 last audio pts: -48129
pts: 96256 current duration: 3008 > 1000 last video pts: -90003 last audio pts: -48129
Attempts to Resolve:
Verified the time base rescaling logic.
Ensured correct PTS/DTS handling and packet writing.
Does anyone have insights or suggestions on why trimming the start might not be working correctly? Any help or pointers would be greatly appreciated!
ChatGPT used for better writing.