-
Notifications
You must be signed in to change notification settings - Fork 10
/
vq.c
286 lines (240 loc) · 7.01 KB
/
vq.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#include "vq.h"
/*
s64 vq_total_tokens;
ktime_t vq_last_update_time;
ktime_t vq_last_check_time;
spinlock_t vq_spinlock;
struct list_head vq_list;
struct hlist_head vq_bucket[ISO_MAX_VQ_BUCKETS];
atomic_t vq_active_rate;
*/
void iso_vqs_init(struct iso_rx_context *ctx) {
int i;
INIT_LIST_HEAD(&ctx->vq_list);
ctx->vq_last_update_time = ktime_get();
spin_lock_init(&ctx->vq_spinlock);
atomic_set(&ctx->vq_active_rate, 0);
for(i = 0; i < ISO_MAX_VQ_BUCKETS; i++) {
INIT_HLIST_HEAD(&ctx->vq_bucket[i]);
}
}
void iso_vqs_exit(struct iso_rx_context *ctx) {
struct iso_vq *vq, *vq_next;
for_each_vq(vq, ctx) {
iso_vq_free(vq);
}
}
struct iso_vq *iso_vq_alloc(iso_class_t klass, struct iso_rx_context *rxctx) {
struct iso_vq *vq = kmalloc(sizeof(struct iso_vq), GFP_KERNEL);
u32 hash;
struct hlist_head *head;
if(vq) {
vq->rxctx = rxctx;
iso_vq_init(vq);
rcu_read_lock();
vq->klass = klass;
hash = iso_class_hash(klass);
head = &rxctx->vq_bucket[hash & (ISO_MAX_VQ_BUCKETS - 1)];
list_add_tail_rcu(&vq->list, &rxctx->vq_list);
hlist_add_head_rcu(&vq->hash_node, head);
iso_vq_calculate_rates(rxctx);
rcu_read_unlock();
}
return vq;
}
/*
* Called in slow path when configuring each VQ's rates. This can be
* made much faster (i.e., remove this completely), but not worth the
* trouble now.
*/
void iso_vq_calculate_rates(struct iso_rx_context *rxctx) {
u32 total_weight = 0;
struct iso_vq *vq, *vq_next;
for_each_vq(vq, rxctx) {
total_weight += vq->weight;
}
if(total_weight > 0) {
for_each_vq(vq, rxctx) {
if (vq->is_static)
continue;
vq->rate = ISO_VQ_DRAIN_RATE_MBPS * vq->weight / total_weight;
}
}
}
int iso_vq_init(struct iso_vq *vq) {
int i;
vq->enabled = 1;
vq->is_static = 0;
vq->rate = ISO_MIN_RFAIR;
vq->total_bytes_queued = 0;
vq->feedback_rate = ISO_MIN_RFAIR;
vq->last_rx_bytes = 0;
vq->rx_rate = 0;
vq->weight = 1;
vq->alpha = 0;
vq->last_update_time = vq->last_borrow_time = ktime_get();
vq->percpu_stats = alloc_percpu(struct iso_vq_stats);
if(vq->percpu_stats == NULL)
return -ENOMEM;
for_each_possible_cpu(i) {
struct iso_vq_stats *stats = per_cpu_ptr(vq->percpu_stats, i);
stats->bytes_queued = 0;
stats->network_marked = 0;
stats->rx_bytes = 0;
stats->rx_packets = 0;
stats->rx_since_last_feedback = 0;
stats->rx_marked_since_last_feedback = 0;
}
spin_lock_init(&vq->spinlock);
INIT_LIST_HEAD(&vq->list);
INIT_HLIST_NODE(&vq->hash_node);
atomic_set(&vq->refcnt, 0);
return 0;
}
void iso_vq_free(struct iso_vq *vq) {
if(atomic_read(&vq->refcnt) > 0)
return;
synchronize_rcu();
list_del(&vq->list);
free_percpu(vq->percpu_stats);
kfree(vq);
}
void iso_vq_enqueue(struct iso_vq *vq, struct sk_buff *pkt) {
ktime_t now;
u64 dt;
unsigned long flags;
int cpu = smp_processor_id();
struct iso_vq_stats *stats = per_cpu_ptr(vq->percpu_stats, cpu);
u32 len = skb_size(pkt);
struct ethhdr *eth;
struct iphdr *iph;
eth = eth_hdr(pkt);
stats->rx_since_last_feedback++;
stats->rx_packets++;
if(likely(eth->h_proto == __constant_htons(ETH_P_IP))) {
iph = ip_hdr(pkt);
if((iph->tos & 0x3) == 0x3) {
stats->network_marked++;
stats->rx_marked_since_last_feedback++;
}
}
now = ktime_get();
dt = ktime_us_delta(now, vq->last_update_time);
if(unlikely(dt > ISO_VQ_UPDATE_INTERVAL_US)) {
if(spin_trylock_irqsave(&vq->spinlock, flags)) {
iso_vq_drain(vq, dt);
spin_unlock_irqrestore(&vq->spinlock, flags);
}
}
stats->bytes_queued += len;
stats->rx_bytes += len;
}
/* called with vq's lock */
void iso_vq_drain(struct iso_vq *vq, u64 dt) {
u64 rx_bytes, dt2, rate;
u32 rx_pkts, rx_marked;
int i, factor;
ktime_t now = ktime_get();
struct iso_rx_context *rxctx = vq->rxctx;
dt2 = ktime_us_delta(now, vq->last_update_time);
if(dt2 < ISO_VQ_UPDATE_INTERVAL_US)
return;
vq->last_update_time = now;
rx_bytes = 0;
factor = 0;
rx_pkts = 0;
rx_marked = 0;
/* assimilate and reset per-cpu counters */
for_each_online_cpu(i) {
struct iso_vq_stats *stats = per_cpu_ptr(vq->percpu_stats, i);
rx_bytes += stats->rx_bytes;
stats->bytes_queued = 0;
rx_pkts += stats->rx_since_last_feedback;
rx_marked += stats->rx_marked_since_last_feedback;
stats->rx_since_last_feedback = 0;
stats->rx_marked_since_last_feedback = 0;
}
if (unlikely(rx_pkts == 0)) {
if (net_ratelimit())
printk(KERN_INFO "EyeQ: BUG: rx_pkts is 0, but it shouldn't be.\n");
rx_pkts = 1;
}
/* The rate is at least vq's rate */
rate = vq->weight * rxctx->rcp_rate;
/* If we want to cap a VQ's rate, do it now */
if (vq->is_static) {
rate = min_t(u64, rate, vq->rate);
}
/* The control algorithms */
{
/* RCP calculation */
{
u64 diff = rx_bytes - vq->last_rx_bytes;
int rx_rate = (diff << 3) / dt;
#define ECN1
#ifdef ECN1
/* ECN1 is the preferred method of
* incorporating ECN feedback. It's better
* than ECN2 as it explicitly accounts for the
* rate mismatch at the bottleneck queue. */
u32 frac = (rx_marked << ECN_ALPHA_FRAC_SHIFT) / rx_pkts;
u32 den = (1 << ECN_ALPHA_FRAC_SHIFT);
/* Safeguard against races. */
frac = min_t(u32, den, frac);
vq->alpha = EWMA_G16(vq->alpha, frac);
if (frac) {
rx_rate += (ISO_ECN_MARK_THRESH_BYTES << 3) * (den + frac) / den / dt;
rx_rate = min_t(int, rx_rate, 3 * rate);
}
#endif
if (ISO_VQ_DRAIN_RATE_MBPS <= ISO_MAX_TX_RATE) {
u32 rate2 = (rate << 1);
vq->feedback_rate = vq->feedback_rate * (rate2 + rate - rx_rate) / rate2;
vq->feedback_rate = min_t(u64, rate, vq->feedback_rate);
vq->feedback_rate = max_t(u64, ISO_MIN_RFAIR, vq->feedback_rate);
} else {
vq->feedback_rate = ISO_MAX_TX_RATE;
}
vq->rx_rate = rx_rate;
vq->last_rx_bytes = rx_bytes;
}
#ifdef ECN2
/* ECN calculation */
{
u32 frac = (rx_marked << ECN_ALPHA_FRAC_SHIFT) / rx_pkts;
u32 mult = 1 << (ECN_ALPHA_FRAC_SHIFT + 1);
/* Safeguard against races. */
frac = min_t(u32, (1 << ECN_ALPHA_FRAC_SHIFT), frac);
vq->alpha = EWMA_G16(vq->alpha, frac);
vq->feedback_rate = (vq->feedback_rate * (mult - frac)) >> (ECN_ALPHA_FRAC_SHIFT + 1);
vq->feedback_rate = min_t(u64, ISO_VQ_DRAIN_RATE_MBPS, vq->feedback_rate);
vq->feedback_rate = max_t(u64, ISO_MIN_RFAIR, vq->feedback_rate);
}
#endif
}
}
void iso_vq_show(struct iso_vq *vq, struct seq_file *s) {
char buff[128];
int first = 1, i;
struct iso_vq_stats *stats;
iso_class_show(vq->klass, buff);
seq_printf(s, "vq class %s flags %d,%d rate %llu rx_rate %llu fb_rate %llu alpha %u/%u "
" backlog - weight %llu refcnt %d\n",
buff, vq->enabled, vq->is_static,
vq->rate, vq->rx_rate, vq->feedback_rate, vq->alpha, (1 << 10),
vq->weight, atomic_read(&vq->refcnt));
for_each_online_cpu(i) {
if(first) {
first = 0;
seq_printf(s, "\t cpu enqueued network-mark rx\n");
}
stats = per_cpu_ptr(vq->percpu_stats, i);
if(stats->bytes_queued > 0 || stats->network_marked > 0) {
seq_printf(s, "\t %3d %8llu %12llu %llu\n",
i, stats->bytes_queued, stats->network_marked, stats->rx_bytes);
}
}
}
/* Local Variables: */
/* indent-tabs-mode:t */
/* End: */