1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
|
#include <linux/in.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_ether.h>
#include "libendian.h"
#include "crc32_hash.h"
#include "bpf_config_kernel.h"
#define IP6_EXTENSIONS_COUNT 11
struct packet
{
__u8 is_ipv4;
__u8 is_ipv6;
__u8 is_udp;
__u8 is_tcp;
__u8 is_fragmented;
__u16 src_port;
__u16 dst_port;
__u32 in4_src;
__u32 in4_dst;
struct in6_addr in6_src;
struct in6_addr in6_dst;
__u32 src_addr_hash;
__u32 dst_addr_hash;
__u32 src_port_hash;
__u32 dst_port_hash;
__u32 last_hash;
int select_queue;
struct __sk_buff *skb;
};
static inline void dump_ipv4_header(struct packet *packet, struct iphdr *ip4)
{
bpf_printk("tun_rss_steering: ipv4 %p src_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_src) & 0xFF, (packet->in4_src >> 8) & 0xFF);
bpf_printk("tun_rss_steering: ipv4 %p src_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_src >> 16) & 0xFF, (packet->in4_src >> 24) & 0xFF);
bpf_printk("tun_rss_steering: ipv4 %p dst_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_dst) & 0xFF, (packet->in4_dst >> 8) & 0xFF);
bpf_printk("tun_rss_steering: ipv4 %p dst_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_dst >> 16) & 0xFF, (packet->in4_dst >> 24) & 0xFF);
char *ptr = (char *)ip4;
int len = sizeof(*ip4);
for (int i = 0; i < len; i++)
{
bpf_printk("tun_rss_steering: ipv4 %p header hex[%d]: %0x", packet->skb, i, ptr[i]);
}
}
static inline void dump_ipv6_packet(struct packet *packet, struct ipv6hdr *ip6)
{
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[0]), bpf_ntohs(packet->in6_src.s6_addr16[1]));
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[2]), bpf_ntohs(packet->in6_src.s6_addr16[3]));
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[4]), bpf_ntohs(packet->in6_src.s6_addr16[5]));
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[6]), bpf_ntohs(packet->in6_src.s6_addr16[7]));
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[0]), bpf_ntohs(packet->in6_dst.s6_addr16[1]));
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[2]), bpf_ntohs(packet->in6_dst.s6_addr16[3]));
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[4]), bpf_ntohs(packet->in6_dst.s6_addr16[5]));
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[6]), bpf_ntohs(packet->in6_dst.s6_addr16[7]));
char *ptr = (char *)ip6;
int len = sizeof(*ip6);
for (int i = 0; i < len; i++)
{
bpf_printk("tun_rss_steering: ipv6 %p header hex[%d]: %0x", packet->skb, i, ptr[i]);
}
}
static inline void dump_packet_info(struct packet *packet, struct bpf_config *config)
{
if (packet->is_ipv4)
{
bpf_printk("tun_rss_steering ipv4 %p src_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_src) & 0xFF, (packet->in4_src >> 8) & 0xFF);
bpf_printk("tun_rss_steering ipv4 %p src_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_src >> 16) & 0xFF, (packet->in4_src >> 24) & 0xFF);
bpf_printk("tun_rss_steering ipv4 %p dst_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_dst) & 0xFF, (packet->in4_dst >> 8) & 0xFF);
bpf_printk("tun_rss_steering ipv4 %p dst_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_dst >> 16) & 0xFF, (packet->in4_dst >> 24) & 0xFF);
bpf_printk("tun_rss_steering ipv4 %p src_port: %d dst_port: %d", packet->skb, bpf_ntohs(packet->src_port), bpf_ntohs(packet->dst_port));
bpf_printk("tun_rss_steering ipv4 %p src_addr_hash: %d dst_addr_hash: %d", packet->skb, packet->src_addr_hash, packet->dst_addr_hash);
bpf_printk("tun_rss_steering ipv4 %p src_port_hash: %d dst_port_hash: %d", packet->skb, packet->src_port_hash, packet->dst_port_hash);
bpf_printk("tun_rss_steering ipv4 %p last_hash: %d select_queue: %d", packet->skb, packet->last_hash, packet->select_queue);
}
if (packet->is_ipv6)
{
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[0]), bpf_ntohs(packet->in6_src.s6_addr16[1]));
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[2]), bpf_ntohs(packet->in6_src.s6_addr16[3]));
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[4]), bpf_ntohs(packet->in6_src.s6_addr16[5]));
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[6]), bpf_ntohs(packet->in6_src.s6_addr16[7]));
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[0]), bpf_ntohs(packet->in6_dst.s6_addr16[1]));
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[2]), bpf_ntohs(packet->in6_dst.s6_addr16[3]));
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[4]), bpf_ntohs(packet->in6_dst.s6_addr16[5]));
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[6]), bpf_ntohs(packet->in6_dst.s6_addr16[7]));
bpf_printk("tun_rss_steering ipv6 %p src_port: %d dst_port: %d", packet->skb, bpf_ntohs(packet->src_port), bpf_ntohs(packet->dst_port));
bpf_printk("tun_rss_steering ipv6 %p src_addr_hash: %d dst_addr_hash: %d", packet->skb, packet->src_addr_hash, packet->dst_addr_hash);
bpf_printk("tun_rss_steering ipv6 %p src_port_hash: %d dst_port_hash: %d", packet->skb, packet->src_port_hash, packet->dst_port_hash);
bpf_printk("tun_rss_steering ipv6 %p last_hash: %d select_queue: %d", packet->skb, packet->last_hash, packet->select_queue);
}
}
static inline void dump_config_info(struct bpf_config *config)
{
if (bpf_config_get_debug_log(config))
{
bpf_printk("tun_rss_steering: config->bpf_debug_log %d", bpf_config_get_debug_log(config));
bpf_printk("tun_rss_steering: config->bpf_queue_num %d", bpf_config_get_queue_num(config));
bpf_printk("tun_rss_steering: config->bpf_hash_mode %d", bpf_config_get_hash_mode(config));
}
}
/*
* reutrn 1: 表示不需要处理扩展头
* return 0: 表示需要处理扩展头
*/
static inline int ipv6_extension_need_skip(__u8 hdr_type)
{
/*
* TODO
* 因为 kni_ipv6_header_parse() 中只处理了以下 4 种 IPv6 扩展头部:
* IPPROTO_AH
* IPPROTO_HOPOPTS
* IPPROTO_ROUTING
* IPPROTO_DSTOPTS
*
* 即 KNI 回流给 TFE 的 IPv6 流量中只支持以上 4 种 IPv6 扩展头部。
* 当 TFE 回注给 KNI 的 IPv6 流量中不会出现其他 IPv6 扩展头部,故此处 BPF 只处理这 4 种 IPv6 扩展头部。
*
* 由于 BPF 要支持四元组分流,所以要判断 IPv6 是否分片,故此处要处理 IPPROTO_FRAGMENT IPv6 扩展头部。
*/
switch (hdr_type)
{
case IPPROTO_AH:
case IPPROTO_HOPOPTS:
case IPPROTO_ROUTING:
case IPPROTO_DSTOPTS:
case IPPROTO_FRAGMENT:
return 0;
default:
return 1;
}
}
static inline int parse_ipv6_extension(struct packet *packet, __u8 *l4_protocol, int *l4_offset)
{
if (ipv6_extension_need_skip(*l4_protocol))
{
return 0;
}
struct ipv6_opt_hdr ext_hdr = {0};
for (unsigned int i = 0; i < IP6_EXTENSIONS_COUNT; ++i)
{
if (bpf_skb_load_bytes_relative(packet->skb, *l4_offset, &ext_hdr, sizeof(ext_hdr), BPF_HDR_START_NET))
{
bpf_printk("tun_rss_steering: unable get ipv6 ext header");
return -1;
}
if (*l4_protocol == IPPROTO_FRAGMENT)
{
packet->is_fragmented = 1;
}
*l4_protocol = ext_hdr.nexthdr;
*l4_offset += (ext_hdr.hdrlen + 1) * 8;
if (ipv6_extension_need_skip(ext_hdr.nexthdr))
{
return 0;
}
}
return -1;
}
static inline int parse_packet(struct packet *packet, struct bpf_config *config)
{
int l3_offset = 12;
int l4_offset = 0;
__u8 l4_protocol = 0;
__u16 l3_protocol = 0;
if (!packet || !packet->skb)
{
bpf_printk("tun_rss_steering: invalid __sk_buff pointer");
return -1;
}
if (bpf_skb_load_bytes_relative(packet->skb, l3_offset, &l3_protocol, sizeof(l3_protocol), BPF_HDR_START_MAC))
{
bpf_printk("tun_rss_steering: unable get l3 protocol");
return -1;
}
if (bpf_ntohs(l3_protocol) == ETH_P_IP)
{
packet->is_ipv4 = 1;
struct iphdr ip = {0};
if (bpf_skb_load_bytes_relative(packet->skb, 0, &ip, sizeof(ip), BPF_HDR_START_NET))
{
bpf_printk("tun_rss_steering: unable get ipv4 header");
return -1;
}
packet->in4_src = ip.saddr;
packet->in4_dst = ip.daddr;
/*
* The frag_off portion of the header consists of:
* +----+----+----+----------------------------------+
* | RS | DF | MF | ...13 bits of fragment offset... |
* +----+----+----+----------------------------------+
* If "More fragments" or the offset is nonzero, then this is an IP fragment (RFC791).
*/
packet->is_fragmented = !!(bpf_ntohs(ip.frag_off) & (0x2000 | 0x1fff));
l4_protocol = ip.protocol;
l4_offset = ip.ihl * 4;
if (packet->is_fragmented)
{
bpf_printk("tun_rss_steering: ipv4 %p is fragmented", packet->skb);
if (bpf_config_get_debug_log(config))
{
dump_ipv4_header(packet, &ip);
}
return -1;
}
}
else if (bpf_ntohs(l3_protocol) == ETH_P_IPV6)
{
packet->is_ipv6 = 1;
struct ipv6hdr ip6 = {0};
if (bpf_skb_load_bytes_relative(packet->skb, 0, &ip6, sizeof(ip6), BPF_HDR_START_NET))
{
bpf_printk("tun_rss_steering: unable get ipv6 header");
return -1;
}
packet->in6_src = ip6.saddr;
packet->in6_dst = ip6.daddr;
l4_protocol = ip6.nexthdr;
l4_offset = sizeof(ip6);
if (parse_ipv6_extension(packet, &l4_protocol, &l4_offset) == -1)
{
return -1;
}
if (packet->is_fragmented)
{
bpf_printk("tun_rss_steering: ipv6 %p is fragmented", packet->skb);
if (bpf_config_get_debug_log(config))
{
dump_ipv6_packet(packet, &ip6);
}
return -1;
}
}
else
{
bpf_printk("tun_rss_steering: invalid l3 protocol %d", bpf_ntohs(l3_protocol));
return -1;
}
if (l4_protocol == IPPROTO_TCP)
{
packet->is_tcp = 1;
struct tcphdr tcp = {0};
if (bpf_skb_load_bytes_relative(packet->skb, l4_offset, &tcp, sizeof(tcp), BPF_HDR_START_NET))
{
bpf_printk("tun_rss_steering: unable get tcp header");
return -1;
}
packet->src_port = tcp.source;
packet->dst_port = tcp.dest;
}
else if (l4_protocol == IPPROTO_UDP)
{
packet->is_udp = 1;
struct udphdr udp = {0};
if (bpf_skb_load_bytes_relative(packet->skb, l4_offset, &udp, sizeof(udp), BPF_HDR_START_NET))
{
bpf_printk("tun_rss_steering: unable get udp header");
return -1;
}
packet->src_port = udp.source;
packet->dst_port = udp.dest;
}
else
{
bpf_printk("tun_rss_steering: invalid l4 protocol %d", l4_protocol);
return -1;
}
return 0;
}
static inline void select_rss_queue(struct packet *packet, struct bpf_config *config)
{
packet->select_queue = -1;
if (packet->is_ipv4)
{
if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE4)
{
packet->src_addr_hash = crc32_hash(&packet->in4_src, 4, 0);
packet->dst_addr_hash = crc32_hash(&packet->in4_dst, 4, 0);
packet->last_hash = packet->src_addr_hash + packet->dst_addr_hash;
packet->src_port_hash = crc32_hash(&packet->src_port, 2, packet->last_hash);
packet->dst_port_hash = crc32_hash(&packet->dst_port, 2, packet->last_hash);
packet->last_hash = packet->src_port_hash + packet->dst_port_hash;
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
}
else if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE2)
{
packet->src_addr_hash = crc32_hash(&packet->in4_src, 4, 0);
packet->dst_addr_hash = crc32_hash(&packet->in4_dst, 4, 0);
packet->last_hash = packet->src_addr_hash + packet->dst_addr_hash;
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
}
}
if (packet->is_ipv6)
{
if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE4)
{
packet->src_addr_hash = crc32_hash(&packet->in6_src, 16, 0);
packet->dst_addr_hash = crc32_hash(&packet->in6_dst, 16, 0);
packet->last_hash = packet->src_addr_hash + packet->dst_addr_hash;
packet->src_port_hash = crc32_hash(&packet->src_port, 2, packet->last_hash);
packet->dst_port_hash = crc32_hash(&packet->dst_port, 2, packet->last_hash);
packet->last_hash = packet->src_port_hash + packet->dst_port_hash;
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
}
else if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE2)
{
packet->src_addr_hash = crc32_hash(&packet->in6_src, 16, 0);
packet->dst_addr_hash = crc32_hash(&packet->in6_dst, 16, 0);
packet->last_hash = packet->src_addr_hash + packet->dst_addr_hash;
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
}
}
}
SEC("tun_rss_steering")
int bpf_tun_rss_steering(struct __sk_buff *skb)
{
struct packet packet = {0};
struct bpf_config config = {0};
bpf_config_lookup_map(&config);
dump_config_info(&config);
if (bpf_config_get_queue_num(&config) <= 0)
{
bpf_printk("tun_rss_steering: invalid queue num %d", bpf_config_get_queue_num(&config));
return 0;
}
if (bpf_config_get_hash_mode(&config) != BPF_HASH_MODE_TUPLE2 && bpf_config_get_hash_mode(&config) != BPF_HASH_MODE_TUPLE4)
{
bpf_printk("tun_rss_steering: invalid hash mode %d", bpf_config_get_hash_mode(&config));
return 0;
}
packet.is_ipv4 = 0;
packet.is_ipv6 = 0;
packet.is_fragmented = 0;
packet.skb = skb;
if (parse_packet(&packet, &config) == -1)
{
return 0;
}
select_rss_queue(&packet, &config);
if (bpf_config_get_debug_log(&config))
{
dump_packet_info(&packet, &config);
}
return packet.select_queue;
}
char _license[] SEC("license") = "GPL";
|