Telegram-Android/TMessagesProj/jni/video_audio_out.c

367 lines
12 KiB
C
Raw Normal View History

2013-12-20 23:25:49 +04:00
#include "video.h"
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include "log.h"
AVFrame *out_frame = NULL;
struct SwsContext *sws_ctx = NULL;
AVStream *video_st = NULL, *audio_st = NULL;
AVFormatContext *oc = NULL;
AVOutputFormat *fmt = NULL;
AVPicture dst_picture;
uint8_t **dst_samples_data = NULL;
SwrContext *swr_ctx = NULL;
int current_n_out = 0;
int current_in_buff = 0;
uint8_t buff[4096 * 2];
int min(int val1, int val2) {
return val1 < val2 ? val1 : val2;
}
int prepare_for_video_conversion(const char *dst_filename, AVCodecContext *video_dec_ctx, AVCodecContext *audio_dec_ctx, AVFormatContext *fmt_ctx, AVStream *src_video_stream, AVStream *src_audio_stream, int bitr) {
if (!video_dec_ctx && !audio_dec_ctx) {
onError();
return -1;
}
avformat_alloc_output_context2(&oc, NULL, "mp4", dst_filename);
if (!oc) {
onError();
return -1;
}
fmt = oc->oformat;
av_dict_copy(&oc->metadata, fmt_ctx->metadata, 0);
int ret = 0;
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, dst_filename, AVIO_FLAG_WRITE);
if (ret < 0) {
LOGD("Could not open '%s': %s\n", dst_filename, av_err2str(ret));
onError();
return -1;
}
}
AVCodecContext *c;
if (video_dec_ctx && src_video_stream && fmt_ctx) {
//calculate video resolution
int dst_width = video_dec_ctx->width, dst_height = video_dec_ctx->height;
if (video_dec_ctx->width > video_dec_ctx->height) {
if (video_dec_ctx->width > 480) {
float scale = video_dec_ctx->width / 480.0f;
dst_width = 480;
dst_height = ceilf(video_dec_ctx->height / scale);
}
} else {
if (video_dec_ctx->width > 480) {
float scale = video_dec_ctx->height / 480.0f;
dst_height = 480;
dst_width = ceilf(video_dec_ctx->width / scale);
}
}
if (video_dec_ctx->height != dst_height || video_dec_ctx->width != dst_width || video_dec_ctx->pix_fmt != AV_PIX_FMT_YUV420P) {
sws_ctx = sws_getContext(video_dec_ctx->width, video_dec_ctx->height, video_dec_ctx->pix_fmt, dst_width, dst_height, AV_PIX_FMT_YUV420P, SWS_BILINEAR, NULL, NULL, NULL);
if (!sws_ctx) {
LOGD("Could not initialize the conversion context\n");
onError();
return -1;
}
}
//create video stream
AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
if (!codec) {
LOGD("Could not find encoder for '%s'\n", avcodec_get_name(AV_CODEC_ID_MPEG4));
onError();
return -1;
}
video_st = avformat_new_stream(oc, codec);
if (!video_st) {
LOGD("Could not allocate stream\n");
onError();
return -1;
}
video_st->id = oc->nb_streams - 1;
av_dict_copy(&video_st->metadata, src_video_stream->metadata, 0);
c = video_st->codec;
c->codec_id = AV_CODEC_ID_MPEG4;
c->bit_rate = bitr;
c->width = dst_width;
c->height = dst_height;
double fps = (double)src_video_stream->avg_frame_rate.num / (double)src_video_stream->avg_frame_rate.den;
c->time_base.den = 65535;
c->time_base.num = floor(65635 / fps);
c->gop_size = 12;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if (oc->oformat->flags & AVFMT_GLOBALHEADER) {
c->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
LOGD("Could not open video codec: %s\n", av_err2str(ret));
onError();
return -1;
}
out_frame = avcodec_alloc_frame();
if (!out_frame) {
LOGD("Could not allocate video frame\n");
onError();
return -1;
}
ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
if (ret < 0) {
LOGD("Could not allocate picture: %s\n", av_err2str(ret));
onError();
return -1;
}
*((AVPicture *)out_frame) = dst_picture;
}
//create audio stream
if (audio_dec_ctx && src_audio_stream) {
AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
if (!codec) {
LOGD("Could not find encoder for '%s'\n", avcodec_get_name(AV_CODEC_ID_AAC));
onError();
return -1;
}
audio_st = avformat_new_stream(oc, codec);
if (!audio_st) {
LOGD("Could not allocate stream\n");
onError();
return -1;
}
audio_st->id = oc->nb_streams - 1;
av_dict_copy(&audio_st->metadata, src_audio_stream->metadata, 0);
c = audio_st->codec;
c->sample_fmt = AV_SAMPLE_FMT_FLTP;
c->bit_rate = 40000;
c->sample_rate = min(audio_dec_ctx->sample_rate, 44100);
c->channels = 1;
if (oc->oformat->flags & AVFMT_GLOBALHEADER) {
c->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
c = audio_st->codec;
c->strict_std_compliance = -2;
swr_ctx = swr_alloc_set_opts(NULL, AV_CH_LAYOUT_MONO, c->sample_fmt, c->sample_rate, audio_dec_ctx->channel_layout, audio_dec_ctx->sample_fmt, audio_dec_ctx->sample_rate, 0, NULL);
if (!swr_ctx) {
LOGD("Could not allocate resampler context\n");
onError();
return -1;
}
if ((ret = swr_init(swr_ctx)) < 0) {
LOGD("Failed to initialize the resampling context\n");
onError();
return -1;
}
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
LOGD("Could not open audio codec: %s\n", av_err2str(ret));
onError();
return -1;
}
av_dump_format(oc, 0, dst_filename, 1);
ret = avformat_write_header(oc, NULL);
if (ret < 0) {
LOGD("Error occurred when opening output file: %s\n", av_err2str(ret));
onError();
return -1;
}
if (out_frame) {
out_frame->pts = 0;
}
}
return 0;
}
void cleanup_out() {
if (video_st) {
avcodec_close(video_st->codec);
if (dst_picture.data) {
av_free(dst_picture.data[0]);
}
if (out_frame) {
av_free(out_frame);
out_frame = NULL;
}
video_st = NULL;
}
if (audio_st) {
avcodec_close(audio_st->codec);
if (dst_samples_data) {
av_free(dst_samples_data[0]);
dst_samples_data = NULL;
}
audio_st = NULL;
}
if (fmt && !(fmt->flags & AVFMT_NOFILE)) {
avio_close(oc->pb);
fmt = NULL;
}
if (oc) {
avformat_free_context(oc);
oc = NULL;
}
if (sws_ctx) {
sws_freeContext(sws_ctx);
sws_ctx = NULL;
}
if (swr_ctx) {
swr_free(&swr_ctx);
swr_ctx = NULL;
}
current_n_out = 0;
current_in_buff = 0;
}
int write_video_frame(AVFrame *src_frame) {
int ret;
if (sws_ctx) {
ret = sws_scale(sws_ctx, (const uint8_t * const *)src_frame->data, src_frame->linesize, 0, src_frame->height, out_frame->data, out_frame->linesize);
if (ret < 0) {
LOGD("scale error: %s\n", av_err2str(ret));
onError();
return -1;
}
} else {
for (int i = 0; i < 4; i++){
out_frame->data[i] = src_frame->data[i];
out_frame->linesize[i] = src_frame->linesize[i];
}
}
AVPacket pkt = { 0 };
int got_packet;
av_init_packet(&pkt);
ret = avcodec_encode_video2(video_st->codec, &pkt, out_frame, &got_packet);
if (ret < 0) {
LOGD("Error encoding video frame: %s\n", av_err2str(ret));
onError();
return -1;
}
if (!ret && got_packet && pkt.size) {
pkt.stream_index = video_st->index;
ret = av_interleaved_write_frame(oc, &pkt);
} else {
ret = 0;
}
if (ret != 0) {
LOGD("Error while writing video frame: %s\n", av_err2str(ret));
onError();
return -1;
}
int64_t val = av_rescale_q(1, video_st->codec->time_base, video_st->time_base);
out_frame->pts += val;
return 0;
}
int check_write_packet(int flush) {
int got_packet, ret;
int writed = 0;
int dst_samples_size = av_samples_get_buffer_size(NULL, audio_st->codec->channels, audio_st->codec->frame_size, audio_st->codec->sample_fmt, 1);
while (current_n_out > audio_st->codec->frame_size || (flush && current_n_out)) {
AVFrame *frame = avcodec_alloc_frame();
AVPacket pkt2 = { 0 };
av_init_packet(&pkt2);
frame->nb_samples = min(audio_st->codec->frame_size, current_n_out);
int nb_samples_size = min(dst_samples_size, current_in_buff);
ret = avcodec_fill_audio_frame(frame, audio_st->codec->channels, audio_st->codec->sample_fmt, buff + writed, nb_samples_size, 1);
if (ret < 0) {
LOGD("Error fill frame: %s\n", av_err2str(ret));
onError();
return -1;
}
ret = avcodec_encode_audio2(audio_st->codec, &pkt2, frame, &got_packet);
if (ret < 0) {
LOGD("Error encoding audio frame: %s\n", av_err2str(ret));
onError();
return -1;
}
if (got_packet) {
pkt2.stream_index = audio_st->index;
ret = av_interleaved_write_frame(oc, &pkt2);
if (ret != 0) {
LOGD("Error while writing audio frame: %s\n", av_err2str(ret));
onError();
return -1;
}
}
writed += dst_samples_size;
current_n_out -= frame->nb_samples;
current_in_buff -= nb_samples_size;
avcodec_free_frame(&frame);
}
if (current_in_buff != 0 && writed != 0) {
memcpy(buff, buff + writed, current_in_buff);
}
return 0;
}
int write_audio_frame(AVFrame *src_frame, AVCodecContext *src_codec) {
const int n_in = src_frame->nb_samples;
double ratio = (double)audio_st->codec->sample_rate / src_frame->sample_rate;
int n_out = n_in * ratio + 32;
int64_t delay = swr_get_delay(swr_ctx, audio_st->codec->sample_rate);
if (delay > 0) {
n_out += delay;
}
if (!dst_samples_data) {
int ret = av_samples_alloc_array_and_samples(&dst_samples_data, NULL, audio_st->codec->channels, n_out, audio_st->codec->sample_fmt, 0);
if (ret < 0) {
LOGD("Could not allocate destination samples\n");
onError();
return -1;
}
}
n_out = swr_convert(swr_ctx, dst_samples_data, n_out, (const uint8_t **)src_frame->extended_data, src_frame->nb_samples);
if (n_out <= 0) {
LOGD("Error while converting\n");
onError();
return -1;
}
int total_size = av_samples_get_buffer_size(NULL, audio_st->codec->channels, n_out, audio_st->codec->sample_fmt, 1);
memcpy(buff + current_in_buff, dst_samples_data[0], total_size);
current_n_out += n_out;
current_in_buff += total_size;
return check_write_packet(0);
}
void post_video_conversion() {
check_write_packet(1);
av_write_trailer(oc);
}