-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathdebloat
executable file
·2030 lines (1628 loc) · 59.7 KB
/
debloat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/lua
-- Use of various qdiscs for ethernet and wireless
-- This script expects to be run in /etc/network/if-pre-up.d To run it
-- manually, do a IFACE=yournetworkcard ./debloat
-- For NATTED interfaces, use a NAT=y for a better filter
-- To select QFQ use QMODEL=qfq.
-- Some options currently require a new version of tc. Build a version
-- of iproute2 and stick it somewhere and change the TC variable to
-- suit.
-- These are the possible parameters that can be changed via
-- environment variables.
params = { "MDISC", "BIGDISC", "NORMDISC", "BINS", "MAX_HWQ_BYTES",
"QMODEL", "FORCE_SPEED", "FORCE_RING", "QDEBUG", "VERBOSE",
"UPLINK", "DOWNLINK", "IFACE", "SPEED", "DEPTH", "DEBLOATLOG",
"NAT", "NAT64", "CLAMP_MSS", "SYN_FLOOD", "MTU", "INGRESS",
"TC", "TCARG", "ETHTOOL", "INSMOD", "LSMOD", "IPTABLES",
"IP6TABLES", "PINGOPT", "FLOWS", "TSO", "BURST", "IPV6",
"OVERHEAD", "LINKLAYER", "MPU", "PPPOE", "ADSL",
"NOPRIOHOSTSRC","NOPRIOHOSTDST","NOPRIOPORTSRC",
"NOPRIOPORTDST", "EST_MIN", "EST_MAX", "HEADDROP", "ECNMASK",
"qlen_vo", "qlen_vi", "qlen_be", "qlen_bk", "CODEL_LL_QUANTUM" }
-- Useful defaults
env = { ["TC"] = "/sbin/tc",
["TCARG"] = "-b",
["INSMOD"] = "/sbin/modprobe",
["ETHTOOL"] = "/sbin/ethtool",
["LSMOD"] = "/sbin/lsmod",
["IPTABLES"] = "/sbin/iptables",
["IP6TABLES"] = "/sbin/ip6tables",
["MDISC"] = "codel",
["BIGDISC"] = "codel",
["NORMDISC"] = "codel",
["BINS"] = 2048,
["DEPTH"] = 24,
["QMODEL"] = "fq_codel_ll",
["MAX_HWQ_BYTES"] = 3000,
["ECNMASK"] = 0xfc,
["EST_MIN"] = 1,
["EST_MAX"] = 4,
["IPV6"] = true,
["LINKLAYER"] = "ethernet",
["DEBLOATLOG"] = "/dev/null",
["qlen_vo"] = 2,
["qlen_vi"] = 4,
["qlen_be"] = 12,
["qlen_bk"] = 12,
["MTU"] = 1500,
["CODEL_LL_QUANTUM"] = 1000
}
wireless_debug = "/sys/kernel/debug/ieee80211/"
qlens = { "qlen_vo", "qlen_vi", "qlen_bk", "qlen_be" }
-- various shortcuts for commonly used functions
local sf=string.format
local exec=os.execute
local popen=io.popen
local open=io.open
-- FIXME, override above to redirect stderr
VO=0x10; VI=0x20; BE=0x30; BK=0x40
local WQUEUES = { BE, VO, VI, BK }
local function usage(s) o=[[
The debloat tool aims for minimal latency (particularly under load) on
the network, for hosts, servers, wireless devices, and routers.
There are various forms of traffic shapers and tools in here because
this is an unsolved problem! Most of the known techniques are in here,
however, and the results can be quite remarkable. At tested rates of
100Mbit and 4Mbit, we see interstream latencies drop by over two
orders of magnitude.
This script expects to be run in /etc/network/if-pre-up.d
To run it manually, do a:
IFACE=yournetworkcard ./this_script
For NATTED interfaces, use a NAT=y for a better filter.
There are many environment variables and at some point will be a conf
file. The one of greatest importance is "QMODEL" to which I keep
adding various models for various circumstances. See the end of this
file for more details.
This script can be run on both debian and openwrt.
Usage of QFQ and the advanced SFQ and SFQRED options currently
requires a patched version of iproute2 and a Linux 3.3 kernel.
Build a version and stick it somewhere and change TC to suit.
Also, if you are interested in seeing the rules being generated,
rather than reconfiguring your system
export QDEBUG=1
is helpful.
* Some general overall design notes:
This started out life as a shell script to exercise qfq,
Now it does a lot more than that and is getting crufty.
FQ_CODEL is now the default. SFQ has been improved significantly
in Linux 3.3 (eliminating a head of line problem), and in this case
no new TC utility is required. Also a bug in red was fixed, and no
new tc utility is required there either. So if you were using either
or both of these qdiscs, you should automagically see your life
improve...
QFQ is too buggy prior to 3.3 to use.
More advanced SFQ options and REDSFQ and QFQ all require a patched
version of TC. Also, most builds for the linux kernel do not
enable QFQ by default. QFQ and SFQ are behaving competitively now
in most circumstances, however.
* Byte Queue Limits is supposed to have a rate limiter that works.
It is not very effective at less than 100Mbit. I get ~32k peak there
and with GSO on, at 100Mbit, I have seen latency spikes of up to 70ms.
(Not recently tested, however)
A per queue limit of 2 large packets appears to be the best
compromise at 100Mbit and below. So typically I hammer down BQL to
3k at < 100Mbit, and turn GSO/TSO off, and as a result see
ping against load latencies in the 1 to 2ms range, which is about
what you would expect. I have tried 1500 bytes, which limited the top
end performance to about 84Mbit. At 10Mbit, 1514 works on most OSes.
For comparison, you will see PFIFO_FAST doing 130+ms, pre BQL, no
SFQ at 100Mbit.
* A BQL enabled ethernet device driver is helpful
But there is currently no good way to detect if you have one at run
time. 10 of the most major drivers have been convered to BQL, more
remain.
* Wireless still has problems
This stuff helps on wireless stations, desktops, etc, and on P2P
wireless links.
** caveat 1
There remains so much device buffering and retries below the qdisc
layer as to defeat both FQ and and AQM to a large extent. Also packets
tend to be held 'forever' (ping rtts of over 10 seconds have been
observed)
A time in queue optimization at the qdisc layer for the latter problem
has been proposed, but not implemented, and much further work on the
wireless driver portion of the stack remains to be designed and agreed
upon.
BQL has not (and cannot, to a large extent) be implemented on the
wireless portion of the stack as it currently stands.
** caveat 2
There is not a particularly good way to apply much of this to the
wireless interface on an AP as yet. FQ messes with wireless-n packet
aggregation. That said, under home use with a limited number of user,
SFQ+RED does seem to work pretty good.
* Some QFQ related notes:
** QFQ can handle up to 32k bins
Whether you are willing to wait for them to be generated is a better
question. How this interacts with bittorrent etc is also a good
question. 512 is 4x as many bins as the old SFQ implementation.
I have tested as many as 2048 bins, problems ensue with kernel
memory allocation at various levels higher than that.
The 'bin creation' problem is why this code uses tc in batch mode. It
used to take minutes to create the bins. Now, a split second. (there
was also a patch that helped this in 3.3)
** Various sub-qdiscs in QFQ
I have tried pfifo_drop_head, SFB, and RED here. All had bugs until
3.3. And linux RED & SFB, being byte oriented, was often not good.
pfifo_drop_head generates interesting results.
The very new combination of REDSFQ which compensates for both bytes
and packets was very interesting, as it combines everything we have
learned in the past year into one single qdisc which can be brought up
as a shaper in three lines of code.
FQ_Codel is better.
In other news:
I have not tried the new 'adaptive red' implementation as a stand
alone qdisc, nor revisited SFB in light about what I now know about
GSO behavior.
I would like to try QFQ and SFQ in combination to attempt to defeat
the bittorrent problem at some point.
** Calculating a sane per-queue packet limit is an issue, too.
iw10 requires a minimum of 10, and more likely 12 (fin, close) so...
In places we arbitrarily double that, and wave hands. I almost never
see packet drop with 24, which is far, far better than 1000. Might
need to be larger on gigE+. Might be wrong headed entirely.
** Multicast
We try to maltreat multicast especially in the QFQ implementation.
When handed to a load balancing filter based on IPs, multicast
addresses are all over the map. It would be trivial to do a DOS with
this multi-bin setup. So we toss all multicast into a single bin
whenever possible. This is suboptimal, also. It would be good
to get multicast into the VO queue on wireless but bugs exist.
Multicast concerns me also when using SFQ on general purpose ethernet.
** Default Bins
You can do tricks with the DEFAULTB concept, creating a filter to
optimize for ping, for example, which makes tests reproducable. (this
is done for wshaper and QFQ) Another example would be to set aside
bins for voip or dns, etc. Still, it is saner to just let the filter
do all the work of finding a decent bin.
The only sane purpose for DEFAULTB at the moment is to have a safe
place to put QFQ packets until all the filters and bins are setup.
* Other important debloat options
There are many environment variables that can be set. Most
notably - the QMODEL var has various forms of AQM/FQ/shaper available.
Available QMODELS are qfq, sfq, sfqred, efq and various combinations
thereof, as well as a hard coded 4mbit htb_sfq_red model, and emulations
of the original wondershaper and a mildly improved one. See the
tail end of the code for what is available.
Most work on either ethernet or wireless and try to deal with
the problems of each.
Usage of QFQ and the advanced SFQ options currently requires a new
version of iproute2 and a Linux 3.3 kernel and some patches.
A byte Queue limit enabled device driver is required for ethernet,
except for when the HTB rate limiter is used.
In all cases a Linux 3.3 or later kernel is required for best results.
]]
print(o)
-- print("Available Shaper Models Are:")
-- for i,v in pairs (ECALLBACKS) do
-- print(i)
-- end
print(s)
os.exit(-1)
end
pingopt = function(...) return nil end
warn = function(...) return nil end
function file_exists(name)
local f=open(name,"r")
if f ~= nil then f:close(); return true else return false end
end
local function is_openwrt()
if file_exists("/etc/uci-defaults") then
return true
else
return false
end
end
-- Override various defaults with env vars
if is_openwrt() then
env.INSMOD = "/sbin/insmod"
env.ETHTOOL = "/usr/sbin/ethtool"
env.TC = "/usr/sbin/tc"
end
-- pull params from conf file
local function getconf()
end
-- getenv pulls in everything as strings,
-- so do the conversion here
local function fromenv(v)
local s = os.getenv(v)
if s == nil then return nil end
-- FIXME allow .
local m = string.match(s,"^%d+")
if m ~= nil then return tonumber(s) end
if s == "true" then return true end
if s == "false" then return false end
return s
end
-- From the possible parameters in t, override o
local function getenvs(t,o)
for i,v in pairs(t) do
local s = fromenv(v)
if s ~= nil then o[v] = s end
end
return o
end
env = getenvs(params, env)
if (env["IFACE"] == nil) then
usage("Error: The IFACE environment variable must be set")
end
IFACE=env.IFACE
QMODEL=env.QMODEL
BINS=env.BINS
MULTICAST=BINS+1
DEFAULTB=BINS+2
PREREQS = { "sch_qfq", "sch_codel", "sch_fq_codel", "cls_u32", "cls_flow",
"sch_sfq", "sch_red", "sch_htb", "cls_fw", "sch_efq_codel",
"sch_ns2_codel", "sch_nfq_codel" }
-- we can get more complex later
PREREQS2 = {
["qfq"] = { "sch_qfq", "cls_u32", "cls_flow" },
["sfq"] = { "sch_sfq", "cls_u32", "cls_flow" },
["red"] = { "sch_qfq", "sch_red", "cls_u32", "cls_flow" },
["ared"] = { "sch_qfq", "sch_red", "cls_u32", "cls_flow" }
}
-- FIXME: Merge multiple tables into one table on values in first
-- return table
function merge(...)
local t = { }
for i,v in pairs(...) do
for i,v in pairs(v) do
t[v] = true
end
end
return t
end
-- slurp a file
function slurpf(file)
local f = open(file,"r")
if f ~= nil then
local s = f:read("*all")
f:close()
return s
end
return nil
end
-- spew output into a command
function spewc(command,s)
local f = popen(command,"w")
if f ~= nil then
local v = f:write(s)
f:close()
return v
end
return nil
end
-- spew output into a file
function spewf(file,s)
local f = open(file,"w")
if f ~= nil then
local v = f:write(s)
f:close()
return v
end
return nil
end
-- slurp a file into a table
function tslurpf(file)
local s = slurpf(file)
if s ~= nil then return s:split("\n") end
return nil
end
-- return the output of a command as a big string
function slurpc(command)
local f = popen(command,"r")
if f ~= nil then
local s = f:read("*all")
f:close()
return s
end
return nil
end
-- return the output of a command as a table
function tslurpc(command)
local s = slurpc(command)
if s ~= nil then return s:split("\n") end
return nil
end
-- Some utility functions
-- can't depend on 'wlan or eth' patterns, so try sysfs
-- FIXME: This needs to be made smarter and detect other forms
-- of tunnel.
function interface_type(iface)
if iface == 'lo' then return('localhost') end
if iface:sub(1,3) == 'ifb' then return('ifb') end
if iface:find('%.') ~= nil then return('vlan') end
if iface:sub(1,3) == 'gre' then return('tunnel') end
if iface:sub(1,2) == 'br' then return('bridge') end
if file_exists(sf("/sys/class/net/%s/phy80211/name",iface)) then return ('wireless') end
return ('ethernet')
end
local function ethtool_popen(...)
return popen(sf("%s %s 2>> %s",env.ETHTOOL,sf(...),env.DEBLOATLOG),"r")
end
local function ethtool(...)
exec(sf("%s %s 2>> %s",env.ETHTOOL,sf(...),env.DEBLOATLOG))
end
-- lua doesn't have a split function. Grr.
function string:split(sep)
local sep, fields = sep or ":", {}
local pattern = string.format("([^%s]+)", sep)
self:gsub(pattern, function(c) fields[#fields+1] = c end)
return fields
end
-- return the modules already installed
local function lsmod()
local t = { }
local k = { }
for i,v in pairs(tslurpc(env.LSMOD)) do
k = v:split(" ")
if k[1] ~= "Module" then
table.insert(t,k[1])
end
end
return t
end
-- take a table of modules to insert
local function insmod(modules)
for i,v in pairs(modules) do
exec(sf("%s %s",env.INSMOD,v))
end
return true
end
-- there must be a more lua-specific way for this
function exists(t,s)
for i,v in pairs(t) do
if v == s then return true end
end
return false
end
-- Return pre-reqs not installed
function prereq_check(prereqs)
s = lsmod()
local t = { }
for i,v in pairs(prereqs) do
if exists(s,v) == false then
table.insert(t,v)
end
end
return t
end
-- install pre-reqs if not installed
function kernel_prereqs(prereqs)
return insmod(prereq_check(prereqs))
end
function round(v)
return math.ceil(v-.5)
end
function kleinrock(bandwidth, delay, flows)
return round(math.sqrt(flows)*bandwidth*delay)
end
function bound(v,min,max)
if max < min then
local t = max
max = min
min = t
end
if v > min and v < max then return v end
if v > max then return max end
if v < min then return min end
return v
end
function rbound(v,min,max)
return round(bound(v,min,max))
end
local function lowrate(rate)
if rate < 15000 then return true else return false end
end
-- Don't like these side effects
local function htb_est()
s = " "
if env.PPPOE then env.OVERHEAD=40 end
if env.ADSL then env.LINKLAYER="adsl" end
if lowrate(env.UPLINK) then env.R2Q=1 end
end
-- ADSL overhead calculation for htb
local function overhead(rate)
local s = " "
if env.OVERHEAD then s = s .. "OVERHEAD=" .. OVERHEAD .. " " end
if env.LINKLAYER then s = s .. "LINKLAYER=" .. env.LINKLAYER .. " " end
if lowrate(rate) then s = s .. "R2Q=1 " end
return(s)
end
-- Attempt at finding useful values for sfqred
-- FIXME: re-read ared paper
-- Yes, you want a depth=10 (or 15) limit, to cope with the increase
-- of CWND done by Google.
-- Best thing would be to use SFQRED and headdrop, so that there is no
-- assumption on packet lengths.
-- tc qdisc add ... sfq headdrop limit 200 depth 15 redflowlimit 50000
-- min 5000 max 10000 probability 0.15 ecn
-- Another often hidden assumption is that RTTs are 100ms
target = 50
-- The problem with byte oriented red is that it will never
-- kick in or mark acks.
local function redflowlimit(up,down)
local OVERHEAD=0
if env.OVERHEAD then OVERHEAD=env.OVERHEAD end
bytes_per_ms = up / 12.5
pkt_per_ms_worst = bytes_per_ms/(env.MTU + OVERHEAD)
pkt_per_ms_best = bytes_per_ms/(64 + OVERHEAD)
local limit = round(pkt_per_ms_best * target)
local perflow = round(pkt_per_ms_best)
local rlimit = rbound(target * bytes_per_ms,1500,15000)
local depth = 10
return limit,rlimit,depth,pkt_per_ms_worst
end
--print(redflowlimit(4000,20000))
--print(redflowlimit(2000,20000))
--print(redflowlimit(400,2000))
-- Choke is packet oriented at the UI, but bytes underlie it...
local function chokelimit(up,down)
local OVERHEAD=0
if env.OVERHEAD then OVERHEAD=env.OVERHEAD end
bytes_per_ms = up / 12.5
pkt_per_ms_worst = bytes_per_ms/(env.MTU + OVERHEAD)
pkt_per_ms_best = bytes_per_ms/(64 + OVERHEAD)
local limit = round(pkt_per_ms_best * target)
local perflow = round(pkt_per_ms_best)
local rlimit = rbound(target * bytes_per_ms,1500,15000)
local depth = 10
return limit,rlimit,depth,pkt_per_ms_worst
end
-- Some htb info that is probably hopelessly out of date
-- Counting packets with quantum can be strange. If we have a low rate
-- class (rate = 5kbit), default quantum = 5000 / 10 = 500 bytes. But
-- most packets are more then 500 bytes. Htb version 1 and 2 uses DRR,
-- so a packet larger then 1000 bytes will be sent and it will
-- remember how much it sent and wait until the packet is paid back
-- before another packet is send. So if you send 1000 byte, next time
-- the class is polled, you will not be allowed to send.
-- Htb3 uses the WRR scheduler. When a packet with size > quantum is
-- sent, it will be sent and an error that the quantum is too small
-- will be logged. But there is no pay back. The WRR scheduler is
-- faster then the DRR scheduler. So make sure quantum is bigger then
-- the default packet size. For 15 kbyte/s and default r2q, quantum is
-- 1500 and this is exactly the maximum packet size. If you want to
-- tune htb for rates smaller then 15 kbyte/s, you can manually set
-- the r2q and/or quantum.
-- FIXME:
-- We want to capture the characteristics of
-- the interface in a table.
-- So we need to parse the output of ethtool better
-- ["tx-ring"] = X
-- ["speed"] = X
-- etc
-- return a hash of the properties of the interface
-- terrific, this is hard to parse.
-- ethtool -g eth0
-- Ring parameters for eth0:
-- Pre-set maximums:
-- RX: 4096
-- RX Mini: 0
-- RX Jumbo: 0
-- TX: 4096
-- Current hardware settings:
-- RX: 256
-- RX Mini: 0
-- RX Jumbo: 0
-- TX: 64
-- ethtool -g wlan0
-- Ring parameters for wlan0:
-- Pre-set maximums:
-- RX: 0
-- RX Mini: 0
-- RX Jumbo: 0
-- TX: 0
-- Current hardware settings:
-- RX: 0
-- RX Mini: 0
-- RX Jumbo: 0
-- TX: 0
-- -k is easier
-- ethtool -k eth0
-- Offload parameters for eth0:
-- rx-checksumming: on
-- tx-checksumming: on
-- scatter-gather: on
-- tcp-segmentation-offload: off
-- udp-fragmentation-offload: off
-- generic-segmentation-offload: off
-- generic-receive-offload: on
-- large-receive-offload: off
-- rx-vlan-offload: on
-- tx-vlan-offload: on
-- ntuple-filters: off
-- receive-hashing: off
function string:trim ()
return (string.gsub(self, "^%s*(.-)%s*$", "%1"))
end
function offloads(iface)
local t = { }
local s = tslurpc(sf("%s -k %s",env.ETHTOOL,iface))
if s ~= nil then
for i,v in ipairs(s) do
if v == "no offload info available" then
return nil
end
end
for i,v in ipairs(s) do
local h = v:split(":")
local j = h[1]:split(" ")
if # j > 1 then
if j[1] ~= "Offload" then
if # h > 1 then
t[h[1]:trim()] = h[2]:trim()
end
-- FIXME: should probably change 'off' and 'on' to false and true
end
end
end
end
return t
end
-- test the offloads problem
-- we have a wide range of possible inputs to test against as yet
function test_offloads(iface)
local o = offloads(iface)
if o ~= nil then
for i,v in pairs(o) do
print(sf("%s %s",i,v))
end
end
end
-- test_offloads("eth1")
-- FIXME - could use a little more thought on creating the
-- hash
function ring_params(iface)
local t = { }
local state = 0
for i,v in ipairs(tslurpc(sf("%s -g %s",env.ETHTOOL,iface))) do
local h = v:split(":")
local j = h[1]:split(" ")
if j[1] == "Ring" or j[1] == "Pre-set" then
-- do nothing
elseif j[1] == "Current" then state = 1
elseif state == 0 then
t['max_' .. h[1]:trim()] = h[2]:trim()
elseif state == 1 then
t['cur_' .. h[1]:trim()] = h[2]:trim()
end
end
return t
end
function test_ring_params(iface)
local o = ring_params(iface)
for i,v in pairs(o) do
print(sf("%s %s",i,v))
end
end
-- test_offloads(IFACE)
-- test_ring_params(IFACE)
function iface_get(iface)
end
-- return number of hardware queues found
local function bql_setup(iface)
local c = 0
while spewf(sf("/sys/class/net/%s/queues/tx-%d/byte_queue_limits/limit_max",iface,c),
env.MAX_HWQ_BYTES) ~= nil do
c = c + 1
end
return c
end
-- Maybe better done with ethtool
local function speed_set(iface,speed)
return spewf(sf("/sys/class/net/%s/speed",iface),speed)
end
local function speed_get(iface)
return slurpf(sf("/sys/class/net/%s/speed",iface))
end
local function rate_quantum(rate)
if rate < 10000 then return(1500) end
return(1500)
end
-- Doing this as a lookup table hurt lua
-- FIXME: Not clear how to reset to advertising all
-- Not clear how to reset this parameter from
-- userspace to autonegotiate
-- What to do with non-sensical values that you
-- get before an interface is live?
-- Maybe use ethtool speed option?
local function advertise_speed(s)
local x = "0x000"
if s < 10001 then x = "0x1000" end
if s < 1001 then x = "0x020" end
if s < 101 then x = "0x008" end
if s < 11 then x = "0x002" end
if s == 0 then x = "0x000" end
return x
end
-- TSO does terrible things to the scheduler
-- GSO does as well
-- UFO is not a feature of most devices
-- In the long run I think we want to disable
-- TSO and GSO entirely below 100Mbit. I'd
-- argue for same for gigE, too, for desktops
local function ethernet_setup(iface)
local o = offloads(iface)
local tx = ring_params(iface)
-- for testing, limit ethernet to SPEED
if env.FORCE_SPEED then
ethtool(sf("-s %s advertise %s",iface,
advertise_speed(env.FORCE_SPEED)))
end
if env.FORCE_RING then
if env.FORCE_RING < tx['cur_TX'] then
ethtool(sf("-G %s tx %d",iface,env.FORCE_RING))
end
end
local queues = bql_setup(iface)
if env.TSO == nil then
ethtool("-K %s gso off",iface)
ethtool("-K %s tso off",iface)
ethtool("-K %s ufo off",iface)
ethtool("-K %s gro off",iface)
end
return queues
end
-- Some TC helpers
-- rates are specified as kilobits (xkbits)
-- does this need to be integer?
-- FIXME the effect of rounding really isn't what we want
local function r2s(rate)
if round(rate) < 1000 then return round(rate) .. "kbit" end
local r = rate/1000
if round(r) < 1000 then return round(r) .. "mbit" end
r = r/1000
return round(r) .. "gbit"
end
-- print(r2s(10))
-- print(r2s(1001))
-- print(r2s(1050)) -- yea, rounding bad
-- print(r2s(999999.4))
-- print(r2s(10000000.6))
-- print(r2s(1000000))
-- TC tends to be repetitive and hard to read
-- So these function shorten things considerably by doing
-- the "{class,qdisc,filter} add dev %s" for us
-- It also means lua keeps less unique strings around.
-- Constructing something that was ** reversible **
-- and cleaner to express would be better that this
local castring=sf("class add dev %s ", env.IFACE)
local fastring=sf("filter add dev %s ",env.IFACE)
local qastring=sf("qdisc add dev %s ", env.IFACE)
-- Similarly, the 'parent' string is often used
local capstring=sf("class add dev %s parent ", env.IFACE)
local fapstring=sf("filter add dev %s parent ",env.IFACE)
local qapstring=sf("qdisc add dev %s parent ", env.IFACE)
local function ca(...)
return tc:write(castring,sf(...),"\n")
end
local function cap(...)
return tc:write(capstring,sf(...),"\n")
end
local function fa(...)
return tc:write(fastring,sf(...),"\n")
end
local function fap(...)
return tc:write(fapstring,sf(...),"\n")
end
local function qa(...)
return tc:write(qastring,sf(...),"\n")
end
local function qap(...)
return tc:write(qapstring,sf(...),"\n")
end
-- FIXME sanely calculate htb rate, overhead, etc, etc
local function hap(parent,child,rate,str)
cap("%x: classid %x:%x %s htb rate %skibit mtu %d mpu 64 quantum %d %s",
parent,parent,child,est,rate,env.MTU,quantum,str)
end
local function opentc()
return popen(sf("%s %s",env.TC, env.TCARG),'w')
end
function ingress()
if env.INGRESS then
kernel_prereqs({"sch_ingress"})
local tc = opentc()
tc:write(sf("qdisc del dev %s ingress\n",IFACE))
tc:close()
end
end
function resettc()
local tc=opentc()
tc:write(sf("qdisc del dev %s root\n",IFACE))
tc:close()
return opentc()
end
-- QFQ: Create a bin attached to the parent class
local function cb(base,bin,disc)
cap("%x classid %x:%x qfq",base,base,bin)
qap("%x:%x %s",base,bin,disc)
end
-- FIXME: It would be nice to have a cleaner way to match all multicast
local function fa_mcast(parent)
fap("%x: protocol ip prio 5 u32 match u8 0x01 0x01 at -14 flowid %x:%x",parent,parent,MULTICAST)
fap("%x: protocol ipv6 prio 6 u32 match u8 0x01 0x01 at -14 flowid %x:%x",parent,parent,MULTICAST)
fap("%x: protocol arp prio 7 u32 match u8 0x01 0x01 at -14 flowid %x:%x",parent,parent,MULTICAST)
end
local function fa_defb(parent)
fap("%x: protocol all prio 999 u32 match ip protocol 0 0x00 flowid %x:%x",parent,parent,DEFAULTB)
end
-- FIXME: This needs a correct hash for natted sources when NAT=y and ipv6
-- handle 3 repeated bad?
local function fa_bins(parent)
if env.NAT then
fap("%x: handle 3 protocol ipv6 prio 94 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS)
fap("%x: handle 4 protocol all prio 97 flow hash keys proto-dst,nfct-src divisor %d",parent,env.BINS)
else
fap("%x: handle 3 protocol all prio 97 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS)
end
pingopt(sf("%x:",parent),sf("%x:%x",parent,DEFAULTB))
-- At one point I was trying to handle ipv6 separately
-- fa("protocol ipv6 parent %x: handle 4 prio 98 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS)
end
local function faip_bins(parent)
if env.NAT then
fap("%x: handle 3 protocol ipv6 prio 94 flow hash keys src divisor %d",parent,env.BINS)
fap("%x: handle 4 protocol all prio 97 flow hash keys nfct-src divisor %d",parent,env.BINS)
else
fap("%x: handle 3 protocol all prio 97 flow hash keys src divisor %d",parent,env.BINS)
end
pingopt(sf("%x:",parent),sf("%x:%x",parent,DEFAULTB))
-- At one point I was trying to handle ipv6 separately
-- fa("protocol ipv6 parent %x: handle 4 prio 98 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS)
end
local function q_bins(parent)
for i=0,env.BINS
do
cap("%x: classid %x:%x qfq",parent,parent,i)
qap("%x:%x %s",parent,i,env.BIGDISC)
end
end
-- We can do simple per-stream load balancing across multiple hardware
-- queues thusly. This assumes your IPv6 isn't natted....
local function mqprio_bins(parent,queues)
if env.NAT then
fap("%x: handle 3 protocol ipv6 prio 94 flow hash keys proto-dst,rxhash divisor %d",parent,queues)
fap("%x: handle 4 protocol all prio 97 flow hash keys proto-dst,nfct-src divisor %d",parent,queues)
else
fap("%x: handle 3 protocol all prio 97 flow hash keys proto-dst,rxhash divisor %d",parent,queues)
end
-- At one point I was trying to handle ipv6 separately
-- fa("protocol ipv6 parent %x: handle 4 prio 98 flow hash keys proto-dst,rxhash divisor %d",parent,BINS)
end