mirror of
https://github.com/DrKLO/Telegram.git
synced 2025-01-05 18:27:11 +01:00
545 lines
16 KiB
C++
545 lines
16 KiB
C++
#include "StreamingPart.h"
|
|
|
|
#include "rtc_base/logging.h"
|
|
#include "rtc_base/third_party/base64/base64.h"
|
|
|
|
extern "C" {
|
|
#include <libavutil/timestamp.h>
|
|
#include <libavformat/avformat.h>
|
|
#include <libavcodec/avcodec.h>
|
|
}
|
|
|
|
#include <string>
|
|
#include <set>
|
|
#include <map>
|
|
|
|
namespace tgcalls {
|
|
|
|
namespace {
|
|
|
|
static absl::optional<uint32_t> readInt32(std::string const &data, int &offset) {
|
|
if (offset + 4 > data.length()) {
|
|
return absl::nullopt;
|
|
}
|
|
|
|
int32_t value = 0;
|
|
memcpy(&value, data.data() + offset, 4);
|
|
offset += 4;
|
|
|
|
return value;
|
|
}
|
|
|
|
struct ChannelUpdate {
|
|
int frameIndex = 0;
|
|
int id = 0;
|
|
uint32_t ssrc = 0;
|
|
};
|
|
|
|
static std::vector<ChannelUpdate> parseChannelUpdates(std::string const &data, int &offset) {
|
|
std::vector<ChannelUpdate> result;
|
|
|
|
auto channels = readInt32(data, offset);
|
|
if (!channels) {
|
|
return {};
|
|
}
|
|
|
|
auto count = readInt32(data, offset);
|
|
if (!count) {
|
|
return {};
|
|
}
|
|
|
|
for (int i = 0; i < count.value(); i++) {
|
|
auto frameIndex = readInt32(data, offset);
|
|
if (!frameIndex) {
|
|
return {};
|
|
}
|
|
|
|
auto channelId = readInt32(data, offset);
|
|
if (!channelId) {
|
|
return {};
|
|
}
|
|
|
|
auto ssrc = readInt32(data, offset);
|
|
if (!ssrc) {
|
|
return {};
|
|
}
|
|
|
|
ChannelUpdate update;
|
|
update.frameIndex = frameIndex.value();
|
|
update.id = channelId.value();
|
|
update.ssrc = ssrc.value();
|
|
|
|
result.push_back(update);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
class AVIOContextImpl {
|
|
public:
|
|
AVIOContextImpl(std::vector<uint8_t> &&fileData) :
|
|
_fileData(std::move(fileData)) {
|
|
_buffer.resize(4 * 1024);
|
|
_context = avio_alloc_context(_buffer.data(), (int)_buffer.size(), 0, this, &AVIOContextImpl::read, NULL, &AVIOContextImpl::seek);
|
|
}
|
|
|
|
~AVIOContextImpl() {
|
|
av_free(_context);
|
|
}
|
|
|
|
static int read(void *opaque, unsigned char *buffer, int bufferSize) {
|
|
AVIOContextImpl *instance = static_cast<AVIOContextImpl *>(opaque);
|
|
|
|
int bytesToRead = std::min(bufferSize, ((int)instance->_fileData.size()) - instance->_fileReadPosition);
|
|
if (bytesToRead < 0) {
|
|
bytesToRead = 0;
|
|
}
|
|
|
|
if (bytesToRead > 0) {
|
|
memcpy(buffer, instance->_fileData.data() + instance->_fileReadPosition, bytesToRead);
|
|
instance->_fileReadPosition += bytesToRead;
|
|
|
|
return bytesToRead;
|
|
} else {
|
|
return AVERROR_EOF;
|
|
}
|
|
}
|
|
|
|
static int64_t seek(void *opaque, int64_t offset, int whence) {
|
|
AVIOContextImpl *instance = static_cast<AVIOContextImpl *>(opaque);
|
|
|
|
if (whence == 0x10000) {
|
|
return (int64_t)instance->_fileData.size();
|
|
} else {
|
|
int64_t seekOffset = std::min(offset, (int64_t)instance->_fileData.size());
|
|
if (seekOffset < 0) {
|
|
seekOffset = 0;
|
|
}
|
|
instance->_fileReadPosition = (int)seekOffset;
|
|
return seekOffset;
|
|
}
|
|
}
|
|
|
|
AVIOContext *getContext() {
|
|
return _context;
|
|
}
|
|
|
|
private:
|
|
std::vector<uint8_t> _fileData;
|
|
int _fileReadPosition = 0;
|
|
|
|
std::vector<uint8_t> _buffer;
|
|
AVIOContext *_context = nullptr;
|
|
};
|
|
|
|
}
|
|
|
|
struct ReadPcmResult {
|
|
int numSamples = 0;
|
|
int numChannels = 0;
|
|
};
|
|
|
|
class StreamingPartInternal {
|
|
public:
|
|
StreamingPartInternal(std::vector<uint8_t> &&fileData) :
|
|
_avIoContext(std::move(fileData)) {
|
|
int ret = 0;
|
|
|
|
_frame = av_frame_alloc();
|
|
|
|
AVInputFormat *inputFormat = av_find_input_format("ogg");
|
|
if (!inputFormat) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
_inputFormatContext = avformat_alloc_context();
|
|
if (!_inputFormatContext) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
_inputFormatContext->pb = _avIoContext.getContext();
|
|
|
|
if ((ret = avformat_open_input(&_inputFormatContext, "", inputFormat, nullptr)) < 0) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
if ((ret = avformat_find_stream_info(_inputFormatContext, nullptr)) < 0) {
|
|
_didReadToEnd = true;
|
|
|
|
avformat_close_input(&_inputFormatContext);
|
|
_inputFormatContext = nullptr;
|
|
return;
|
|
}
|
|
|
|
AVCodecParameters *audioCodecParameters = nullptr;
|
|
AVStream *audioStream = nullptr;
|
|
for (int i = 0; i < _inputFormatContext->nb_streams; i++) {
|
|
AVStream *inStream = _inputFormatContext->streams[i];
|
|
|
|
AVCodecParameters *inCodecpar = inStream->codecpar;
|
|
if (inCodecpar->codec_type != AVMEDIA_TYPE_AUDIO) {
|
|
continue;
|
|
}
|
|
audioCodecParameters = inCodecpar;
|
|
audioStream = inStream;
|
|
|
|
_durationInMilliseconds = (int)((inStream->duration + inStream->first_dts) * 1000 / 48000);
|
|
|
|
if (inStream->metadata) {
|
|
AVDictionaryEntry *entry = av_dict_get(inStream->metadata, "TG_META", nullptr, 0);
|
|
if (entry && entry->value) {
|
|
std::string result;
|
|
size_t data_used = 0;
|
|
std::string sourceBase64 = (const char *)entry->value;
|
|
rtc::Base64::Decode(sourceBase64, rtc::Base64::DO_LAX, &result, &data_used);
|
|
|
|
if (result.size() != 0) {
|
|
int offset = 0;
|
|
_channelUpdates = parseChannelUpdates(result, offset);
|
|
}
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
if (audioCodecParameters && audioStream) {
|
|
AVCodec *codec = avcodec_find_decoder(audioCodecParameters->codec_id);
|
|
if (codec) {
|
|
_codecContext = avcodec_alloc_context3(codec);
|
|
ret = avcodec_parameters_to_context(_codecContext, audioCodecParameters);
|
|
if (ret < 0) {
|
|
_didReadToEnd = true;
|
|
|
|
avcodec_free_context(&_codecContext);
|
|
_codecContext = nullptr;
|
|
} else {
|
|
_codecContext->pkt_timebase = audioStream->time_base;
|
|
|
|
_channelCount = _codecContext->channels;
|
|
|
|
ret = avcodec_open2(_codecContext, codec, nullptr);
|
|
if (ret < 0) {
|
|
_didReadToEnd = true;
|
|
|
|
avcodec_free_context(&_codecContext);
|
|
_codecContext = nullptr;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
~StreamingPartInternal() {
|
|
if (_frame) {
|
|
av_frame_unref(_frame);
|
|
}
|
|
if (_codecContext) {
|
|
avcodec_close(_codecContext);
|
|
avcodec_free_context(&_codecContext);
|
|
}
|
|
if (_inputFormatContext) {
|
|
avformat_close_input(&_inputFormatContext);
|
|
}
|
|
}
|
|
|
|
ReadPcmResult readPcm(std::vector<int16_t> &outPcm) {
|
|
int outPcmSampleOffset = 0;
|
|
ReadPcmResult result;
|
|
|
|
int readSamples = (int)outPcm.size() / _channelCount;
|
|
|
|
result.numChannels = _channelCount;
|
|
|
|
while (outPcmSampleOffset < readSamples) {
|
|
if (_pcmBufferSampleOffset >= _pcmBufferSampleSize) {
|
|
fillPcmBuffer();
|
|
|
|
if (_pcmBufferSampleOffset >= _pcmBufferSampleSize) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
int readFromPcmBufferSamples = std::min(_pcmBufferSampleSize - _pcmBufferSampleOffset, readSamples - outPcmSampleOffset);
|
|
if (readFromPcmBufferSamples != 0) {
|
|
std::copy(_pcmBuffer.begin() + _pcmBufferSampleOffset * _channelCount, _pcmBuffer.begin() + _pcmBufferSampleOffset * _channelCount + readFromPcmBufferSamples * _channelCount, outPcm.begin() + outPcmSampleOffset * _channelCount);
|
|
_pcmBufferSampleOffset += readFromPcmBufferSamples;
|
|
outPcmSampleOffset += readFromPcmBufferSamples;
|
|
result.numSamples += readFromPcmBufferSamples;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
int getDurationInMilliseconds() {
|
|
return _durationInMilliseconds;
|
|
}
|
|
|
|
int getChannelCount() {
|
|
return _channelCount;
|
|
}
|
|
|
|
std::vector<ChannelUpdate> const &getChannelUpdates() {
|
|
return _channelUpdates;
|
|
}
|
|
|
|
private:
|
|
static int16_t sampleFloatToInt16(float sample) {
|
|
return av_clip_int16 (static_cast<int32_t>(lrint(sample*32767)));
|
|
}
|
|
|
|
void fillPcmBuffer() {
|
|
_pcmBufferSampleSize = 0;
|
|
_pcmBufferSampleOffset = 0;
|
|
|
|
if (_didReadToEnd) {
|
|
return;
|
|
}
|
|
if (!_inputFormatContext) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
if (!_codecContext) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
int ret = 0;
|
|
do {
|
|
ret = av_read_frame(_inputFormatContext, &_packet);
|
|
if (ret < 0) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
ret = avcodec_send_packet(_codecContext, &_packet);
|
|
if (ret < 0) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
int bytesPerSample = av_get_bytes_per_sample(_codecContext->sample_fmt);
|
|
if (bytesPerSample != 2 && bytesPerSample != 4) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
ret = avcodec_receive_frame(_codecContext, _frame);
|
|
} while (ret == AVERROR(EAGAIN));
|
|
|
|
if (ret != 0) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
if (_frame->channels != _channelCount || _frame->channels > 8) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
if (_pcmBuffer.size() < _frame->nb_samples * _frame->channels) {
|
|
_pcmBuffer.resize(_frame->nb_samples * _frame->channels);
|
|
}
|
|
|
|
switch (_codecContext->sample_fmt) {
|
|
case AV_SAMPLE_FMT_S16: {
|
|
memcpy(_pcmBuffer.data(), _frame->data[0], _frame->nb_samples * 2 * _frame->channels);
|
|
} break;
|
|
|
|
case AV_SAMPLE_FMT_S16P: {
|
|
int16_t *to = _pcmBuffer.data();
|
|
for (int sample = 0; sample < _frame->nb_samples; ++sample) {
|
|
for (int channel = 0; channel < _frame->channels; ++channel) {
|
|
int16_t *shortChannel = (int16_t*)_frame->data[channel];
|
|
*to++ = shortChannel[sample];
|
|
}
|
|
}
|
|
} break;
|
|
|
|
case AV_SAMPLE_FMT_FLT: {
|
|
float *floatData = (float *)&_frame->data[0];
|
|
for (int i = 0; i < _frame->nb_samples * _frame->channels; i++) {
|
|
_pcmBuffer[i] = sampleFloatToInt16(floatData[i]);
|
|
}
|
|
} break;
|
|
|
|
case AV_SAMPLE_FMT_FLTP: {
|
|
int16_t *to = _pcmBuffer.data();
|
|
for (int sample = 0; sample < _frame->nb_samples; ++sample) {
|
|
for (int channel = 0; channel < _frame->channels; ++channel) {
|
|
float *floatChannel = (float*)_frame->data[channel];
|
|
*to++ = sampleFloatToInt16(floatChannel[sample]);
|
|
}
|
|
}
|
|
} break;
|
|
|
|
default: {
|
|
//RTC_FATAL() << "Unexpected sample_fmt";
|
|
} break;
|
|
}
|
|
|
|
_pcmBufferSampleSize = _frame->nb_samples;
|
|
_pcmBufferSampleOffset = 0;
|
|
}
|
|
|
|
private:
|
|
AVIOContextImpl _avIoContext;
|
|
|
|
AVFormatContext *_inputFormatContext = nullptr;
|
|
AVPacket _packet;
|
|
AVCodecContext *_codecContext = nullptr;
|
|
AVFrame *_frame = nullptr;
|
|
|
|
bool _didReadToEnd = false;
|
|
|
|
int _durationInMilliseconds = 0;
|
|
int _channelCount = 0;
|
|
|
|
std::vector<ChannelUpdate> _channelUpdates;
|
|
|
|
std::vector<int16_t> _pcmBuffer;
|
|
int _pcmBufferSampleOffset = 0;
|
|
int _pcmBufferSampleSize = 0;
|
|
};
|
|
|
|
class StreamingPartState {
|
|
struct ChannelMapping {
|
|
uint32_t ssrc = 0;
|
|
int channelIndex = 0;
|
|
|
|
ChannelMapping(uint32_t ssrc_, int channelIndex_) :
|
|
ssrc(ssrc_), channelIndex(channelIndex_) {
|
|
}
|
|
};
|
|
|
|
public:
|
|
StreamingPartState(std::vector<uint8_t> &&data) :
|
|
_parsedPart(std::move(data)) {
|
|
if (_parsedPart.getChannelUpdates().size() == 0) {
|
|
_didReadToEnd = true;
|
|
return;
|
|
}
|
|
|
|
_remainingMilliseconds = _parsedPart.getDurationInMilliseconds();
|
|
_pcm10ms.resize(480 * _parsedPart.getChannelCount());
|
|
|
|
for (const auto &it : _parsedPart.getChannelUpdates()) {
|
|
_allSsrcs.insert(it.ssrc);
|
|
}
|
|
}
|
|
|
|
~StreamingPartState() {
|
|
}
|
|
|
|
int getRemainingMilliseconds() const {
|
|
return _remainingMilliseconds;
|
|
}
|
|
|
|
std::vector<StreamingPart::StreamingPartChannel> get10msPerChannel() {
|
|
if (_didReadToEnd) {
|
|
return {};
|
|
}
|
|
|
|
for (const auto &update : _parsedPart.getChannelUpdates()) {
|
|
if (update.frameIndex == _frameIndex) {
|
|
updateCurrentMapping(update.ssrc, update.id);
|
|
}
|
|
}
|
|
|
|
auto readResult = _parsedPart.readPcm(_pcm10ms);
|
|
if (readResult.numSamples <= 0) {
|
|
_didReadToEnd = true;
|
|
return {};
|
|
}
|
|
|
|
std::vector<StreamingPart::StreamingPartChannel> resultChannels;
|
|
for (const auto ssrc : _allSsrcs) {
|
|
StreamingPart::StreamingPartChannel emptyPart;
|
|
emptyPart.ssrc = ssrc;
|
|
resultChannels.push_back(emptyPart);
|
|
}
|
|
|
|
for (auto &channel : resultChannels) {
|
|
auto mappedChannelIndex = getCurrentMappedChannelIndex(channel.ssrc);
|
|
|
|
if (mappedChannelIndex) {
|
|
int sourceChannelIndex = mappedChannelIndex.value();
|
|
for (int j = 0; j < readResult.numSamples; j++) {
|
|
channel.pcmData.push_back(_pcm10ms[sourceChannelIndex + j * readResult.numChannels]);
|
|
}
|
|
} else {
|
|
for (int j = 0; j < readResult.numSamples; j++) {
|
|
channel.pcmData.push_back(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
_remainingMilliseconds -= 10;
|
|
if (_remainingMilliseconds < 0) {
|
|
_remainingMilliseconds = 0;
|
|
}
|
|
_frameIndex++;
|
|
|
|
return resultChannels;
|
|
}
|
|
|
|
private:
|
|
absl::optional<int> getCurrentMappedChannelIndex(uint32_t ssrc) {
|
|
for (const auto &it : _currentChannelMapping) {
|
|
if (it.ssrc == ssrc) {
|
|
return it.channelIndex;
|
|
}
|
|
}
|
|
return absl::nullopt;
|
|
}
|
|
|
|
void updateCurrentMapping(uint32_t ssrc, int channelIndex) {
|
|
for (int i = (int)_currentChannelMapping.size() - 1; i >= 0; i--) {
|
|
const auto &entry = _currentChannelMapping[i];
|
|
if (entry.ssrc == ssrc && entry.channelIndex == channelIndex) {
|
|
return;
|
|
} else if (entry.ssrc == ssrc || entry.channelIndex == channelIndex) {
|
|
_currentChannelMapping.erase(_currentChannelMapping.begin() + i);
|
|
}
|
|
}
|
|
_currentChannelMapping.emplace_back(ssrc, channelIndex);
|
|
}
|
|
|
|
private:
|
|
StreamingPartInternal _parsedPart;
|
|
std::set<uint32_t> _allSsrcs;
|
|
|
|
std::vector<int16_t> _pcm10ms;
|
|
std::vector<ChannelMapping> _currentChannelMapping;
|
|
int _frameIndex = 0;
|
|
int _remainingMilliseconds = 0;
|
|
|
|
bool _didReadToEnd = false;
|
|
};
|
|
|
|
StreamingPart::StreamingPart(std::vector<uint8_t> &&data) {
|
|
if (!data.empty()) {
|
|
_state = new StreamingPartState(std::move(data));
|
|
}
|
|
}
|
|
|
|
StreamingPart::~StreamingPart() {
|
|
if (_state) {
|
|
delete _state;
|
|
}
|
|
}
|
|
|
|
int StreamingPart::getRemainingMilliseconds() const {
|
|
return _state ? _state->getRemainingMilliseconds() : 0;
|
|
}
|
|
|
|
std::vector<StreamingPart::StreamingPartChannel> StreamingPart::get10msPerChannel() {
|
|
return _state
|
|
? _state->get10msPerChannel()
|
|
: std::vector<StreamingPart::StreamingPartChannel>();
|
|
}
|
|
|
|
}
|