Telegram-Android/TMessagesProj/jni/libtgvoip3/VoIPGroupController.cpp
2020-04-24 12:21:58 +03:00

602 lines
21 KiB
C++

//
// libtgvoip is free and unencumbered public domain software.
// For more information, see http://unlicense.org or the UNLICENSE file
// you should have received with this source code distribution.
//
#include "logging.h"
#include "PrivateDefines.h"
#include "VoIPGroupController.h"
#include "VoIPServerConfig.h"
#include <cassert>
#include <cmath>
#include <cstdint>
#include <cstring>
using namespace tgvoip;
VoIPGroupController::VoIPGroupController(std::int32_t timeDifference)
: m_audioMixer(new AudioMixer())
, m_timeDifference(timeDifference)
, m_userSelfID(0)
{
std::memset(&m_callbacks, 0, sizeof(m_callbacks));
LOGV("Created VoIPGroupController; timeDifference=%d", timeDifference);
}
VoIPGroupController::~VoIPGroupController()
{
if (m_audioOutput != nullptr)
{
m_audioOutput->Stop();
}
LOGD("before stop audio mixer");
m_audioMixer->Stop();
delete m_audioMixer;
for (GroupCallParticipant& participant : m_participants)
{
delete participant.levelMeter;
}
}
void VoIPGroupController::SetGroupCallInfo(std::uint8_t* encryptionKey, std::uint8_t* reflectorGroupTag, std::uint8_t* reflectorSelfTag,
std::uint8_t* reflectorSelfSecret, std::uint8_t* reflectorSelfTagHash, std::int32_t selfUserID,
const NetworkAddress& reflectorAddress, const NetworkAddress& reflectorAddressV6, std::uint16_t reflectorPort)
{
Endpoint e;
e.address = reflectorAddress;
e.v6address = reflectorAddressV6;
e.port = reflectorPort;
std::memcpy(e.peerTag, reflectorGroupTag, 16);
e.type = Endpoint::Type::UDP_RELAY;
e.id = FOURCC('G', 'R', 'P', 'R');
m_endpoints[e.id] = e;
m_groupReflector = e;
m_currentEndpoint = e.id;
std::memcpy(m_encryptionKey, encryptionKey, 256);
std::memcpy(m_reflectorSelfTag, reflectorSelfTag, 16);
std::memcpy(m_reflectorSelfSecret, reflectorSelfSecret, 16);
std::memcpy(m_reflectorSelfTagHash, reflectorSelfTagHash, 16);
std::uint8_t sha256[SHA256_LENGTH];
crypto.sha256(encryptionKey, 256, sha256);
std::memcpy(m_callID, sha256 + (SHA256_LENGTH - 16), 16);
std::memcpy(m_keyFingerprint, sha256 + (SHA256_LENGTH - 16), 8);
m_userSelfID = selfUserID;
}
void VoIPGroupController::AddGroupCallParticipant(std::int32_t userID, std::uint8_t* memberTagHash, std::uint8_t* serializedStreams, std::size_t streamsLength)
{
if (userID == m_userSelfID)
return;
if (m_userSelfID == 0)
return;
MutexGuard m(m_participantsMutex);
LOGV("Adding group call user %d, streams length %u", userID, static_cast<unsigned int>(streamsLength));
for (const GroupCallParticipant& participant : m_participants)
{
if (participant.userID == userID)
{
LOGE("user %d already added", userID);
std::abort();
}
}
GroupCallParticipant participant;
participant.userID = userID;
std::memcpy(participant.memberTagHash, memberTagHash, sizeof(participant.memberTagHash));
participant.levelMeter = new AudioLevelMeter();
BufferInputStream ss(serializedStreams, streamsLength);
std::vector<std::shared_ptr<Stream>> streams = DeserializeStreams(ss);
std::uint8_t audioStreamID = 0;
for (std::shared_ptr<Stream>& stream : streams)
{
stream->userID = userID;
if (stream->type == StreamType::AUDIO && stream->codec == CODEC_OPUS && !audioStreamID)
{
audioStreamID = stream->id;
stream->jitterBuffer = std::make_shared<JitterBuffer>(nullptr, stream->frameDuration);
if (stream->frameDuration > 50)
stream->jitterBuffer->SetMinPacketCount(static_cast<std::uint32_t>(ServerConfig::GetSharedInstance()->GetInt("jitter_initial_delay_60", 2)));
else if (stream->frameDuration > 30)
stream->jitterBuffer->SetMinPacketCount(static_cast<std::uint32_t>(ServerConfig::GetSharedInstance()->GetInt("jitter_initial_delay_40", 4)));
else
stream->jitterBuffer->SetMinPacketCount(static_cast<std::uint32_t>(ServerConfig::GetSharedInstance()->GetInt("jitter_initial_delay_20", 6)));
stream->callbackWrapper = std::make_shared<CallbackWrapper>();
stream->decoder = std::make_shared<OpusDecoder>(stream->callbackWrapper.get(), false, false);
stream->decoder->SetJitterBuffer(stream->jitterBuffer);
stream->decoder->SetFrameDuration(stream->frameDuration);
stream->decoder->SetDTX(true);
stream->decoder->SetLevelMeter(participant.levelMeter);
m_audioMixer->AddInput(stream->callbackWrapper);
}
m_incomingStreams.emplace_back(stream);
}
if (audioStreamID == 0)
{
LOGW("User %d has no usable audio stream", userID);
}
participant.streams.insert(participant.streams.end(), streams.begin(), streams.end());
m_participants.emplace_back(participant);
LOGI("Added group call participant %d", userID);
}
void VoIPGroupController::RemoveGroupCallParticipant(std::int32_t userID)
{
MutexGuard m(m_participantsMutex);
for (auto it = m_incomingStreams.begin(); it != m_incomingStreams.end();)
{
if ((*it)->userID == userID)
{
LOGI("Removed stream %d belonging to user %d", (*it)->id, userID);
m_audioMixer->RemoveInput((*it)->callbackWrapper);
(*it)->decoder->Stop();
it = m_incomingStreams.erase(it);
continue;
}
++it;
}
for (auto it = m_participants.begin(); it != m_participants.end(); ++it)
{
if (it->userID == userID)
{
delete it->levelMeter;
m_participants.erase(it);
LOGI("Removed group call participant %d", userID);
break;
}
}
}
std::vector<std::shared_ptr<VoIPController::Stream>> VoIPGroupController::DeserializeStreams(BufferInputStream& in)
{
std::vector<std::shared_ptr<Stream>> res;
try
{
std::uint8_t count = in.ReadUInt8();
for (int i = 0; i < count; ++i)
{
std::uint16_t len = in.ReadUInt16();
BufferInputStream inner = in.GetPartBuffer(len, true);
std::shared_ptr<Stream> s = std::make_shared<Stream>();
s->id = inner.ReadUInt8();
s->type = static_cast<StreamType>(inner.ReadUInt8());
s->codec = inner.ReadUInt32();
std::uint32_t flags = inner.ReadUInt32();
s->enabled = (flags & STREAM_FLAG_ENABLED) == STREAM_FLAG_ENABLED;
s->frameDuration = inner.ReadUInt16();
res.emplace_back(s);
}
}
catch (const std::out_of_range& exception)
{
LOGW("Error deserializing streams.\nwhat():\n%s", exception.what());
}
return res;
}
void VoIPGroupController::SetParticipantStreams(std::int32_t userID, std::uint8_t* serializedStreams, std::size_t length)
{
LOGD("Set participant streams for %d", userID);
MutexGuard m(m_participantsMutex);
for (const GroupCallParticipant& participant : m_participants)
{
if (participant.userID != userID)
continue;
BufferInputStream in(serializedStreams, length);
std::vector<std::shared_ptr<Stream>> streams = DeserializeStreams(in);
for (const std::shared_ptr<Stream>& ns : streams)
{
bool found = false;
for (const std::shared_ptr<Stream>& s : participant.streams)
{
if (s->id == ns->id)
{
s->enabled = ns->enabled;
if (m_groupCallbacks.participantAudioStateChanged)
m_groupCallbacks.participantAudioStateChanged(this, userID, s->enabled);
found = true;
break;
}
}
if (!found)
{
LOGW("Tried to add stream %d for user %d but adding/removing streams is not supported", ns->id, userID);
}
}
break;
}
}
std::size_t VoIPGroupController::GetInitialStreams(std::uint8_t* buf, std::size_t size)
{
BufferOutputStream s(buf, size);
s.WriteUInt8(1); // streams count
s.WriteInt16(12); // this object length
s.WriteUInt8(1); // stream id
s.WriteUInt8(static_cast<std::uint8_t>(StreamType::AUDIO));
s.WriteUInt32(CODEC_OPUS);
s.WriteInt32(STREAM_FLAG_ENABLED | STREAM_FLAG_DTX); // flags
s.WriteInt16(60); // frame duration
return s.GetLength();
}
void VoIPGroupController::SendInit()
{
SendRecentPacketsRequest();
}
void VoIPGroupController::ProcessIncomingPacket(NetworkPacket& packet, Endpoint& srcEndpoint)
{
}
void VoIPGroupController::SendUdpPing(Endpoint& endpoint)
{
}
void VoIPGroupController::SetNetworkType(NetType type)
{
m_networkType = type;
UpdateDataSavingState();
UpdateAudioBitrateLimit();
std::string itfName = m_udpSocket->GetLocalInterfaceInfo(nullptr, nullptr);
if (itfName != m_activeNetItfName)
{
m_udpSocket->OnActiveInterfaceChanged();
LOGI("Active network interface changed: %s -> %s", m_activeNetItfName.c_str(), itfName.c_str());
bool isFirstChange = m_activeNetItfName.length() == 0;
m_activeNetItfName = itfName;
if (isFirstChange)
return;
m_udpConnectivityState = UdpState::UNKNOWN;
m_udpPingCount = 0;
m_lastUdpPingTime = 0;
if (m_proxyProtocol == Proxy::SOCKS5)
InitUDPProxy();
m_selectCanceller->CancelSelect();
}
}
void VoIPGroupController::SendRecentPacketsRequest()
{
BufferOutputStream out(1024);
out.WriteInt32(TLID_UDP_REFLECTOR_REQUEST_PACKETS_INFO); // TL function
out.WriteInt32(GetCurrentUnixtime()); // date:int
out.WriteInt64(0); // query_id:long
out.WriteInt32(64); // recv_num:int
out.WriteInt32(0); // sent_num:int
SendSpecialReflectorRequest(out.GetBuffer(), out.GetLength());
}
void VoIPGroupController::SendSpecialReflectorRequest(std::uint8_t* data, std::size_t len)
{
}
void VoIPGroupController::SendRelayPings()
{
double currentTime = GetCurrentTime();
if (currentTime - m_groupReflector.m_lastPingTime >= 0.25)
{
SendRecentPacketsRequest();
m_groupReflector.m_lastPingTime = currentTime;
}
}
void VoIPGroupController::OnAudioOutputReady()
{
m_encoder->SetDTX(true);
m_audioMixer->SetOutput(m_audioOutput);
m_audioMixer->SetEchoCanceller(m_echoCanceller);
m_audioMixer->Start();
m_audioOutput->Start();
m_audioOutStarted = true;
m_encoder->SetLevelMeter(&m_selfLevelMeter);
}
void VoIPGroupController::WritePacketHeader(std::uint32_t seq, BufferOutputStream* s, PktType type, std::uint32_t length, PacketSender* source)
{
s->WriteUInt32(TLID_DECRYPTED_AUDIO_BLOCK);
std::int64_t randomID;
crypto.rand_bytes(reinterpret_cast<std::uint8_t*>(&randomID), 8);
s->WriteInt64(randomID);
std::uint8_t randBytes[7];
crypto.rand_bytes(randBytes, 7);
s->WriteUInt8(std::uint8_t{7});
s->WriteBytes(randBytes, 7);
std::uint32_t pflags = PFLAG_HAS_SEQ | PFLAG_HAS_SENDER_TAG_HASH;
if (length > 0)
pflags |= PFLAG_HAS_DATA;
pflags |= static_cast<std::uint32_t>(type) << 24;
s->WriteUInt32(pflags);
if (type == PktType::STREAM_DATA || type == PktType::STREAM_DATA_X2 || type == PktType::STREAM_DATA_X3)
{
m_congestionControl->PacketSent(seq, length);
}
s->WriteUInt32(seq);
s->WriteBytes(m_reflectorSelfTagHash, 16);
if (length > 0)
{
if (length <= 253)
{
s->WriteUInt8(static_cast<std::uint8_t>(length));
}
else
{
s->WriteUInt8(254);
s->WriteUInt8(static_cast<std::uint8_t>(length & 0xFF));
s->WriteUInt8(static_cast<std::uint8_t>((length >> 8) & 0xFF));
s->WriteUInt8(static_cast<std::uint8_t>((length >> 16) & 0xFF));
}
}
}
void VoIPGroupController::SendPacket(std::uint8_t* data, std::size_t len, Endpoint& ep, PendingOutgoingPacket& srcPacket)
{
if (m_stopping)
return;
if (ep.type == Endpoint::Type::TCP_RELAY && !m_useTCP)
return;
BufferOutputStream out(len + 128);
out.WriteBytes(m_reflectorSelfTag, 16);
if (len > 0)
{
BufferOutputStream inner(len + 128);
inner.WriteUInt32(static_cast<std::uint32_t>(len));
inner.WriteBytes(data, len);
std::size_t padLen = 16 - inner.GetLength() % 16;
if (padLen < 12)
padLen += 16;
std::uint8_t padding[28];
crypto.rand_bytes(padding, padLen);
inner.WriteBytes(padding, padLen);
assert(inner.GetLength() % 16 == 0);
std::uint8_t key[32], iv[32], msgKey[16];
out.WriteBytes(m_keyFingerprint, 8);
BufferOutputStream buf(len + 32);
std::size_t x = 0;
buf.WriteBytes(m_encryptionKey + 88 + x, 32);
buf.WriteBytes(inner.GetBuffer() + 4, inner.GetLength() - 4);
std::uint8_t msgKeyLarge[32];
crypto.sha256(buf.GetBuffer(), buf.GetLength(), msgKeyLarge);
std::memcpy(msgKey, msgKeyLarge + 8, 16);
KDF2(msgKey, 0, key, iv);
out.WriteBytes(msgKey, 16);
std::vector<std::uint8_t> aesOut(MSC_STACK_FALLBACK(inner.GetLength(), 1500));
crypto.aes_ige_encrypt(inner.GetBuffer(), aesOut.data(), inner.GetLength(), key, iv);
out.WriteBytes(aesOut.data(), inner.GetLength());
}
// relay signature
out.WriteBytes(m_reflectorSelfSecret, 16);
std::uint8_t sig[32];
crypto.sha256(out.GetBuffer(), out.GetLength(), sig);
out.Rewind(16);
out.WriteBytes(sig, 16);
if (srcPacket.type == PktType::STREAM_DATA || srcPacket.type == PktType::STREAM_DATA_X2 || srcPacket.type == PktType::STREAM_DATA_X3)
{
PacketIdMapping mapping =
{
.ackTime = 0.0,
.seq = srcPacket.seq,
.id = *reinterpret_cast<std::uint16_t*>(sig + 14),
};
MutexGuard m(m_sentPacketsMutex);
m_recentSentPackets.emplace_back(mapping);
while (m_recentSentPackets.size() > 64)
m_recentSentPackets.erase(m_recentSentPackets.begin());
}
m_lastSentSeq = srcPacket.seq;
if (IS_MOBILE_NETWORK(m_networkType))
m_stats.bytesSentMobile += static_cast<std::uint64_t>(out.GetLength());
else
m_stats.bytesSentWifi += static_cast<std::uint64_t>(out.GetLength());
}
void VoIPGroupController::SetCallbacks(VoIPGroupController::Callbacks callbacks)
{
VoIPController::SetCallbacks(static_cast<VoIPController::Callbacks&>(callbacks));
this->m_groupCallbacks = callbacks;
}
std::int32_t VoIPGroupController::GetCurrentUnixtime()
{
return static_cast<std::int32_t>(time(nullptr)) + m_timeDifference;
}
float VoIPGroupController::GetParticipantAudioLevel(std::int32_t userID)
{
if (userID == m_userSelfID)
return m_selfLevelMeter.GetLevel();
MutexGuard m(m_participantsMutex);
for (const GroupCallParticipant& participant : m_participants)
{
if (participant.userID == userID)
{
return participant.levelMeter->GetLevel();
}
}
return 0;
}
void VoIPGroupController::SetMicMute(bool mute)
{
m_micMuted = mute;
if (m_audioInput)
{
if (mute)
m_audioInput->Stop();
else
m_audioInput->Start();
if (!m_audioInput->IsInitialized())
{
m_lastError = Error::AUDIO_IO;
SetState(State::FAILED);
return;
}
}
m_outgoingStreams[0]->enabled = !mute;
SerializeAndUpdateOutgoingStreams();
}
void VoIPGroupController::SetParticipantVolume(std::int32_t userID, float volume)
{
MutexGuard m(m_participantsMutex);
for (const GroupCallParticipant& participant : m_participants)
{
if (participant.userID != userID)
continue;
for (const std::shared_ptr<Stream>& stream : participant.streams)
{
if (stream->type == StreamType::AUDIO)
{
if (stream->decoder != nullptr)
{
float db;
if (volume == 0.0f)
db = -INFINITY;
else if (volume < 1.0f)
db = -50.0f * (1.0f - volume);
else if (volume > 1.0f && volume <= 2.0f)
db = 10.0f * (volume - 1.0f);
else
db = 0.0f;
m_audioMixer->SetInputVolume(stream->callbackWrapper, db);
}
break;
}
}
break;
}
}
void VoIPGroupController::SerializeAndUpdateOutgoingStreams()
{
BufferOutputStream out(1024);
out.WriteUInt8(static_cast<std::uint8_t>(m_outgoingStreams.size()));
for (const std::shared_ptr<Stream>& stream : m_outgoingStreams)
{
BufferOutputStream o(128);
o.WriteUInt8(stream->id);
o.WriteUInt8(static_cast<std::uint8_t>(stream->type));
o.WriteUInt32(stream->codec);
o.WriteInt32(static_cast<std::uint8_t>((stream->enabled ? STREAM_FLAG_ENABLED : 0) | STREAM_FLAG_DTX));
o.WriteUInt16(stream->frameDuration);
out.WriteUInt16(static_cast<std::uint16_t>(o.GetLength()));
out.WriteBytes(o.GetBuffer(), o.GetLength());
}
if (m_groupCallbacks.updateStreams)
m_groupCallbacks.updateStreams(this, out.GetBuffer(), out.GetLength());
}
std::string VoIPGroupController::GetDebugString()
{
std::string result = "Remote endpoints: \n";
char buffer[2048];
for (const auto& [_, endpoint] : m_endpoints)
{
const char* type;
switch (endpoint.type)
{
case Endpoint::Type::UDP_P2P_INET:
type = "UDP_P2P_INET";
break;
case Endpoint::Type::UDP_P2P_LAN:
type = "UDP_P2P_LAN";
break;
case Endpoint::Type::UDP_RELAY:
type = "UDP_RELAY";
break;
case Endpoint::Type::TCP_RELAY:
type = "TCP_RELAY";
break;
// default:
// type = "UNKNOWN";
// break;
}
std::snprintf(buffer, sizeof(buffer), "%s:%u %dms [%s%s]\n", endpoint.address.ToString().c_str(), endpoint.port,
static_cast<int>(endpoint.m_averageRTT * 1000), type, m_currentEndpoint == endpoint.id ? ", IN_USE" : "");
result += buffer;
}
double avgLate[3];
std::shared_ptr<JitterBuffer> jitterBuffer = m_incomingStreams.size() == 1 ? m_incomingStreams[0]->jitterBuffer : nullptr;
if (jitterBuffer != nullptr)
jitterBuffer->GetAverageLateCount(avgLate);
else
std::fill(std::begin(avgLate), std::end(avgLate), 0);
std::snprintf(
buffer,
sizeof(buffer),
"RTT avg/min: %d/%d\n"
"Congestion window: %d/%d bytes\n"
"Key fingerprint: %02hhX%02hhX%02hhX%02hhX%02hhX%02hhX%02hhX%02hhX\n"
"Last sent/ack'd seq: %u/%u\n"
"Send/recv losses: %u/%u (%d%%)\n"
"Audio bitrate: %d kbit\n"
"Bytes sent/recvd: %llu/%llu\n\n",
static_cast<int>(m_congestionControl->GetAverageRTT() * 1000),
static_cast<int>(m_congestionControl->GetMinimumRTT() * 1000),
static_cast<int>(m_congestionControl->GetInflightDataSize()),
static_cast<int>(m_congestionControl->GetCongestionWindow()),
m_keyFingerprint[0],
m_keyFingerprint[1],
m_keyFingerprint[2],
m_keyFingerprint[3],
m_keyFingerprint[4],
m_keyFingerprint[5],
m_keyFingerprint[6],
m_keyFingerprint[7],
m_lastSentSeq,
m_lastRemoteAckSeq,
m_congestionControl->GetSendLossCount(),
m_recvLossCount,
m_encoder ? m_encoder->GetPacketLoss() : 0,
m_encoder ? (m_encoder->GetBitrate() / 1000) : 0,
static_cast<unsigned long long>(m_stats.bytesSentMobile + m_stats.bytesSentWifi),
static_cast<unsigned long long>(m_stats.bytesRecvdMobile + m_stats.bytesRecvdWifi));
MutexGuard m(m_participantsMutex);
for (const GroupCallParticipant& participant : m_participants)
{
std::snprintf(buffer, sizeof(buffer), "Participant id: %d\n", participant.userID);
result += buffer;
for (const std::shared_ptr<Stream>& stream : participant.streams)
{
char* codec = reinterpret_cast<char*>(&stream->codec);
std::snprintf(buffer, sizeof(buffer), "Stream %d (type %u, codec '%c%c%c%c', %sabled)\n",
stream->id, static_cast<std::uint8_t>(stream->type), codec[3], codec[2], codec[1], codec[0], stream->enabled ? "en" : "dis");
result += buffer;
if (stream->enabled)
{
if (stream->jitterBuffer != nullptr)
{
std::snprintf(buffer, sizeof(buffer), "Jitter buffer: %d/%.2f\n",
stream->jitterBuffer->GetMinPacketCount(), stream->jitterBuffer->GetAverageDelay());
result += buffer;
}
}
}
result += "\n";
}
return result;
}