Currently, tc_calc_xmittime and tc_calc_xmitsize round from double to int three times — once when they call tc_core_time2tick / tc_core_tick2time (whose argument is int), once when those functions return (their return value is int), and then finally when the tc_calc_* functions return. This leads to extremely granular and inaccurate conversions. As a result, for example, on my test system (where tick_in_usec=15.625, clock_factor=1, and hz=1000000000) for a bitrate of 1Gbps, all tc htb burst values between 0 and 999 bytes get encoded as 0 ticks; all values between 1000 and 1999 bytes get encoded as 15 ticks (equivalent to 960 bytes); all values between 2000 and 2999 bytes as 31 ticks (1984 bytes); etc. The patch changes the code so these calculations are done internally in floating-point, and only rounded to integer values when the value is returned. It also changes tc_calc_xmittime to round its calculated value up, rather than down, to ensure that the calculated time is actually sufficient for the requested size. Signed-off-by: Jonathan Lennox <jonathan.lennox@8x8.com> Signed-off-by: David Ahern <dsahern@kernel.org>
245 lines
5.4 KiB
C
245 lines
5.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* tc_core.c TC core library.
|
|
*
|
|
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <math.h>
|
|
#include <sys/socket.h>
|
|
#include <netinet/in.h>
|
|
#include <arpa/inet.h>
|
|
#include <string.h>
|
|
|
|
#include "utils.h"
|
|
#include "tc_core.h"
|
|
#include <linux/atm.h>
|
|
|
|
static double tick_in_usec = 1;
|
|
static double clock_factor = 1;
|
|
|
|
static double tc_core_time2tick(double time)
|
|
{
|
|
return time * tick_in_usec;
|
|
}
|
|
|
|
double tc_core_tick2time(double tick)
|
|
{
|
|
return tick / tick_in_usec;
|
|
}
|
|
|
|
unsigned int tc_core_time2ktime(unsigned int time)
|
|
{
|
|
return time * clock_factor;
|
|
}
|
|
|
|
unsigned int tc_core_ktime2time(unsigned int ktime)
|
|
{
|
|
return ktime / clock_factor;
|
|
}
|
|
|
|
unsigned int tc_calc_xmittime(__u64 rate, unsigned int size)
|
|
{
|
|
return ceil(tc_core_time2tick(TIME_UNITS_PER_SEC*((double)size/(double)rate)));
|
|
}
|
|
|
|
unsigned int tc_calc_xmitsize(__u64 rate, unsigned int ticks)
|
|
{
|
|
return ((double)rate*tc_core_tick2time(ticks))/TIME_UNITS_PER_SEC;
|
|
}
|
|
|
|
/*
|
|
* The align to ATM cells is used for determining the (ATM) SAR
|
|
* alignment overhead at the ATM layer. (SAR = Segmentation And
|
|
* Reassembly). This is for example needed when scheduling packet on
|
|
* an ADSL connection. Note that the extra ATM-AAL overhead is _not_
|
|
* included in this calculation. This overhead is added in the kernel
|
|
* before doing the rate table lookup, as this gives better precision
|
|
* (as the table will always be aligned for 48 bytes).
|
|
* --Hawk, d.7/11-2004. <hawk@diku.dk>
|
|
*/
|
|
static unsigned int tc_align_to_atm(unsigned int size)
|
|
{
|
|
int linksize, cells;
|
|
|
|
cells = size / ATM_CELL_PAYLOAD;
|
|
if ((size % ATM_CELL_PAYLOAD) > 0)
|
|
cells++;
|
|
|
|
linksize = cells * ATM_CELL_SIZE; /* Use full cell size to add ATM tax */
|
|
return linksize;
|
|
}
|
|
|
|
static unsigned int tc_adjust_size(unsigned int sz, unsigned int mpu, enum link_layer linklayer)
|
|
{
|
|
if (sz < mpu)
|
|
sz = mpu;
|
|
|
|
switch (linklayer) {
|
|
case LINKLAYER_ATM:
|
|
return tc_align_to_atm(sz);
|
|
case LINKLAYER_ETHERNET:
|
|
default:
|
|
/* No size adjustments on Ethernet */
|
|
return sz;
|
|
}
|
|
}
|
|
|
|
/* Notice, the rate table calculated here, have gotten replaced in the
|
|
* kernel and is no-longer used for lookups.
|
|
*
|
|
* This happened in kernel release v3.8 caused by kernel
|
|
* - commit 56b765b79 ("htb: improved accuracy at high rates").
|
|
* This change unfortunately caused breakage of tc overhead and
|
|
* linklayer parameters.
|
|
*
|
|
* Kernel overhead handling got fixed in kernel v3.10 by
|
|
* - commit 01cb71d2d47 (net_sched: restore "overhead xxx" handling)
|
|
*
|
|
* Kernel linklayer handling got fixed in kernel v3.11 by
|
|
* - commit 8a8e3d84b17 (net_sched: restore "linklayer atm" handling)
|
|
*/
|
|
|
|
/*
|
|
rtab[pkt_len>>cell_log] = pkt_xmit_time
|
|
*/
|
|
|
|
int tc_calc_rtable(struct tc_ratespec *r, __u32 *rtab,
|
|
int cell_log, unsigned int mtu,
|
|
enum link_layer linklayer)
|
|
{
|
|
int i;
|
|
unsigned int sz;
|
|
unsigned int bps = r->rate;
|
|
unsigned int mpu = r->mpu;
|
|
|
|
if (mtu == 0)
|
|
mtu = 2047;
|
|
|
|
if (cell_log < 0) {
|
|
cell_log = 0;
|
|
while ((mtu >> cell_log) > 255)
|
|
cell_log++;
|
|
}
|
|
|
|
for (i = 0; i < 256; i++) {
|
|
sz = tc_adjust_size((i + 1) << cell_log, mpu, linklayer);
|
|
rtab[i] = tc_calc_xmittime(bps, sz);
|
|
}
|
|
|
|
r->cell_align = -1;
|
|
r->cell_log = cell_log;
|
|
r->linklayer = (linklayer & TC_LINKLAYER_MASK);
|
|
return cell_log;
|
|
}
|
|
|
|
int tc_calc_rtable_64(struct tc_ratespec *r, __u32 *rtab,
|
|
int cell_log, unsigned int mtu,
|
|
enum link_layer linklayer, __u64 rate)
|
|
{
|
|
int i;
|
|
unsigned int sz;
|
|
__u64 bps = rate;
|
|
unsigned int mpu = r->mpu;
|
|
|
|
if (mtu == 0)
|
|
mtu = 2047;
|
|
|
|
if (cell_log < 0) {
|
|
cell_log = 0;
|
|
while ((mtu >> cell_log) > 255)
|
|
cell_log++;
|
|
}
|
|
|
|
for (i = 0; i < 256; i++) {
|
|
sz = tc_adjust_size((i + 1) << cell_log, mpu, linklayer);
|
|
rtab[i] = tc_calc_xmittime(bps, sz);
|
|
}
|
|
|
|
r->cell_align = -1;
|
|
r->cell_log = cell_log;
|
|
r->linklayer = (linklayer & TC_LINKLAYER_MASK);
|
|
return cell_log;
|
|
}
|
|
|
|
/*
|
|
stab[pkt_len>>cell_log] = pkt_xmit_size>>size_log
|
|
*/
|
|
|
|
int tc_calc_size_table(struct tc_sizespec *s, __u16 **stab)
|
|
{
|
|
int i;
|
|
enum link_layer linklayer = s->linklayer;
|
|
unsigned int sz;
|
|
|
|
if (linklayer <= LINKLAYER_ETHERNET && s->mpu == 0) {
|
|
/* don't need data table in this case (only overhead set) */
|
|
s->mtu = 0;
|
|
s->tsize = 0;
|
|
s->cell_log = 0;
|
|
s->cell_align = 0;
|
|
*stab = NULL;
|
|
return 0;
|
|
}
|
|
|
|
if (s->mtu == 0)
|
|
s->mtu = 2047;
|
|
if (s->tsize == 0)
|
|
s->tsize = 512;
|
|
|
|
s->cell_log = 0;
|
|
while ((s->mtu >> s->cell_log) > s->tsize - 1)
|
|
s->cell_log++;
|
|
|
|
*stab = malloc(s->tsize * sizeof(__u16));
|
|
if (!*stab)
|
|
return -1;
|
|
|
|
again:
|
|
for (i = s->tsize - 1; i >= 0; i--) {
|
|
sz = tc_adjust_size((i + 1) << s->cell_log, s->mpu, linklayer);
|
|
if ((sz >> s->size_log) > UINT16_MAX) {
|
|
s->size_log++;
|
|
goto again;
|
|
}
|
|
(*stab)[i] = sz >> s->size_log;
|
|
}
|
|
|
|
s->cell_align = -1; /* Due to the sz calc */
|
|
return 0;
|
|
}
|
|
|
|
int tc_core_init(void)
|
|
{
|
|
FILE *fp;
|
|
__u32 clock_res;
|
|
__u32 t2us;
|
|
__u32 us2t;
|
|
|
|
fp = fopen("/proc/net/psched", "r");
|
|
if (fp == NULL)
|
|
return -1;
|
|
|
|
if (fscanf(fp, "%08x%08x%08x", &t2us, &us2t, &clock_res) != 3) {
|
|
fclose(fp);
|
|
return -1;
|
|
}
|
|
fclose(fp);
|
|
|
|
/* compatibility hack: for old iproute binaries (ignoring
|
|
* the kernel clock resolution) the kernel advertises a
|
|
* tick multiplier of 1000 in case of nano-second resolution,
|
|
* which really is 1. */
|
|
if (clock_res == 1000000000)
|
|
t2us = us2t;
|
|
|
|
clock_factor = (double)clock_res / TIME_UNITS_PER_SEC;
|
|
tick_in_usec = (double)t2us / us2t * clock_factor;
|
|
return 0;
|
|
}
|