I want to estimate speech stream quality in a VoIP call, I found an unfamiliar [for developer] formula in ITU G.107 (number 1) that I can’t understand how to fill variables from RTP/UDP/IP/Ethernet stack.
- R = Ro – Is – Id – Ie-eff + A
I found an implementation in the Microsip codebase with several fixed values instead of variables and estimations. Still, there isn’t any explanation of how the formula in 1 converted to that code either on the internet or in codebase comments.
I need to find some reliable academic or standard reference or method that clearly explains how RTCP data can convert to MOS score.
The implementation was global.cpp line 404 (I paste method here)
bool msip_call_statistics(call_user_data *user_data, float *MOS)
{
if (pjsua_var.state != PJSUA_STATE_RUNNING) {
return false;
}
if (!pjsua_call_has_media(user_data->call_id)) {
return false;
}
pjsua_stream_stat stat;
pj_status_t status = pjsua_call_get_stream_stat(user_data->call_id, 0, &stat);
if (status != PJ_SUCCESS) {
return false;
}
int LOCAL_DELAY = 30;
float R;
float a = 0.0f;
float b = 19.8f;
float c = 29.7f;
float rx_loss = 0.0;
float rx_jit = 0.0;
float avg_latency = 0.0;
int pkt_last = stat.rtcp.rx.pkt - user_data->rx_pkt_prev;
int loss_last = stat.rtcp.rx.loss - user_data->rx_loss_prev;
rx_loss = (pkt_last == 0) ? 1.0f : ((float)loss_last / (float)(pkt_last + loss_last));
user_data->rx_pkt_prev = stat.rtcp.rx.pkt;
user_data->rx_loss_prev = stat.rtcp.rx.loss;
rx_jit = (float)stat.rtcp.rx.jitter.last / 1000;
avg_latency = (stat.rtcp.rtt.last / 2000.0f) + LOCAL_DELAY + PJMEDIA_SND_DEFAULT_PLAY_LATENCY +
PJMEDIA_SND_DEFAULT_REC_LATENCY + rx_jit;
{
float d = avg_latency;
float d2 = d - 177.3f;
float Id = 0.024f * d + 0.11f * (d - 177.3f) * (d2 < 0 ? 0 : 1);
float P = rx_loss;
float Ie = a + b * (float)log(1 + c * P);
R = 94.2f - Id - Ie;
}
if (R < 0) {
*MOS = 1;
}
else if (R > 100) {
*MOS = 4.5;
}
else {
*MOS = 1 + 0.035f * R + 7.10f / 1000000 * R * (R - 60) * (100 - R);
}
return true;
}