Index: usr.sbin/pfctl/parse.y =================================================================== RCS file: /cvs/src/usr.sbin/pfctl/parse.y,v retrieving revision 1.2 diff -u -p -r1.2 parse.y --- usr.sbin/pfctl/parse.y 11 Feb 2005 22:31:45 -0000 1.2 +++ usr.sbin/pfctl/parse.y 4 Apr 2008 00:31:58 -0000 @@ -42,6 +42,7 @@ #include #include #include #include +#include #include #include @@ -239,6 +240,7 @@ } pool_opts; struct node_hfsc_opts hfsc_opts; +struct node_fairq_opts fairq_opts; int yyerror(const char *, ...); int disallow_table(struct node_host *, const char *); @@ -368,6 +370,7 @@ struct scrub_opts scrub_opts; struct table_opts table_opts; struct pool_opts pool_opts; struct node_hfsc_opts hfsc_opts; + struct node_fairq_opts fairq_opts; } v; int lineno; } YYSTYPE; @@ -402,8 +405,8 @@ %token SET OPTIMIZATION TIMEOUT LIMIT LO %token REQUIREORDER SYNPROXY FINGERPRINTS NOSYNC DEBUG HOSTID %token ANTISPOOF FOR %token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT -%token ALTQ CBQ PRIQ HFSC BANDWIDTH TBRSIZE LINKSHARE REALTIME UPPERLIMIT -%token QUEUE PRIORITY QLIMIT +%token ALTQ CBQ PRIQ HFSC FAIRQ BANDWIDTH TBRSIZE LINKSHARE REALTIME UPPERLIMIT +%token QUEUE PRIORITY QLIMIT HOGS BUCKETS %token LOAD %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE %token TAGGED TAG IFBOUND GRBOUND FLOATING STATEPOLICY @@ -443,6 +446,7 @@ %type scheduler %type cbqflags_list cbqflags_item %type priqflags_list priqflags_item %type hfscopts_list hfscopts_item hfsc_opts +%type fairqopts_list fairqopts_item fairq_opts %type bandwidth %type filter_opts filter_opt filter_opts_l %type antispoof_opts antispoof_opt antispoof_opts_l @@ -1280,6 +1284,15 @@ | HFSC '(' hfsc_opts ')' { $$.qtype = ALTQT_HFSC; $$.data.hfsc_opts = $3; } + | FAIRQ { + $$.qtype = ALTQT_FAIRQ; + bzero(&$$.data.fairq_opts, + sizeof(struct node_fairq_opts)); + } + | FAIRQ '(' fairq_opts ')' { + $$.qtype = ALTQT_FAIRQ; + $$.data.fairq_opts = $3; + } ; cbqflags_list : cbqflags_item { $$ |= $1; } @@ -1413,6 +1426,61 @@ free($1); } ; +fairq_opts : { + bzero(&fairq_opts, + sizeof(struct node_fairq_opts)); + } + fairqopts_list { + $$ = fairq_opts; + } + ; + +fairqopts_list : fairqopts_item + | fairqopts_list comma fairqopts_item + ; + +fairqopts_item : LINKSHARE bandwidth { + if (fairq_opts.linkshare.used) { + yyerror("linkshare already specified"); + YYERROR; + } + fairq_opts.linkshare.m2 = $2; + fairq_opts.linkshare.used = 1; + } + | LINKSHARE '(' bandwidth number bandwidth ')' { + if (fairq_opts.linkshare.used) { + yyerror("linkshare already specified"); + YYERROR; + } + fairq_opts.linkshare.m1 = $3; + fairq_opts.linkshare.d = $4; + fairq_opts.linkshare.m2 = $5; + fairq_opts.linkshare.used = 1; + } + | HOGS bandwidth { + fairq_opts.hogs_bw = $2; + } + | BUCKETS number { + fairq_opts.nbuckets = $2; + } + | STRING { + if (!strcmp($1, "default")) + fairq_opts.flags |= FARF_DEFAULTCLASS; + else if (!strcmp($1, "red")) + fairq_opts.flags |= FARF_RED; + else if (!strcmp($1, "ecn")) + fairq_opts.flags |= FARF_RED|FARF_ECN; + else if (!strcmp($1, "rio")) + fairq_opts.flags |= FARF_RIO; + else { + yyerror("unknown fairq flag \"%s\"", $1); + free($1); + YYERROR; + } + free($1); + } + ; + qassign : /* empty */ { $$ = NULL; } | qassign_item { $$ = $1; } | '{' qassign_list '}' { $$ = $2; } @@ -3921,7 +3989,8 @@ n = calloc(1, sizeof(struct node_que if (n == NULL) err(1, "expand_altq: calloc"); if (pa.scheduler == ALTQT_CBQ || - pa.scheduler == ALTQT_HFSC) + pa.scheduler == ALTQT_HFSC /*|| + pa.scheduler == ALTQT_FAIRQ*/) if (strlcpy(n->parent, qname, sizeof(n->parent)) >= sizeof(n->parent)) @@ -4310,6 +4379,7 @@ { "binat-anchor", BINATANCHOR}, { "bitmask", BITMASK}, { "block", BLOCK}, { "block-policy", BLOCKPOLICY}, + { "buckets", BUCKETS}, { "cbq", CBQ}, { "code", CODE}, { "crop", FRAGCROP}, @@ -4317,6 +4387,7 @@ { "debug", DEBUG}, { "drop", DROP}, { "drop-ovl", FRAGDROP}, { "dup-to", DUPTO}, + { "fairq", FAIRQ}, { "fastroute", FASTROUTE}, { "file", FILENAME}, { "fingerprints", FINGERPRINTS}, @@ -4329,6 +4400,7 @@ { "global", GLOBAL}, { "group", GROUP}, { "group-bound", GRBOUND}, { "hfsc", HFSC}, + { "hogs", HOGS}, { "hostid", HOSTID}, { "icmp-type", ICMPTYPE}, { "icmp6-type", ICMP6TYPE}, Index: usr.sbin/pfctl/pf.conf.5 =================================================================== RCS file: /cvs/src/usr.sbin/pfctl/pf.conf.5,v retrieving revision 1.9 diff -u -p -r1.9 pf.conf.5 --- usr.sbin/pfctl/pf.conf.5 2 Sep 2007 19:30:48 -0000 1.9 +++ usr.sbin/pfctl/pf.conf.5 5 Apr 2008 22:04:18 -0000 @@ -698,6 +698,52 @@ .Ar Priority mainly controls the time packets take to get sent out, while .Ar bandwidth has primarily effects on throughput. +.It Ar fairq +Fair Queue. +.Ar Queues +are flat attached to the interface, thus, +.Ar queues +cannot have further child +.Ar queues . +Each queue must be given a unique priority and one must be marked +as the default queue. +Each queue implements a number of buckets (default 256) which sorts the +traffic based on a hash key generated by the +.Ar keep state +facility in your pass rules. Each bucket contains a list of packets +controlled by +.Ar qlimit . +In order for +.Ar fairq +to function properly, +.Ar keep state +must be enabled on most of the rule sets that route packets to the queue. +.Pp +Packet selection operates as follows: +The queues are scanned from highest priority to lowest priority. If a +queue has pending packets and has not reached its bandwidth limit the +scan stops and a packet is selected from that queue. +If a queue has reached its bandwidth limit the scan continues searching for +other, lower priority queues which have not. If no queue is found to be +suitable then the highest priority queue with pending packets is used +regardless of whether it has reached its bandwidth limit or not. +.Pp +A +.Ar fairq +round robins between its buckets, extracting one packet from each bucket. +This essentially prevents large backlogs of packets from high volume +connections from destroying the interactive response of other connections. +.Pp +The +.Ar bandwidth +parameter for a +.Ar fairq +is guaranteed minimum and more will be used if no higher priority traffic is +present. Creating a queue with one bucket as a catch-all for pass rules +not characterized by +.Ar keep state +is supported. Such a queue serves as a basic priority queue with a bandwidth +specification. .El .Pp The interfaces on which queueing should be activated are declared using @@ -716,9 +762,11 @@ .It Ar .Ar cbq for Class Based Queueing, .Ar priq -for Priority Queueing and +for Priority Queueing, .Ar hfsc -for the Hierarchical Fair Service Curve scheduler. +for the Hierarchical Fair Service Curve scheduler, and +.Ar fairq +for the Fair Queueing. .It Ar bandwidth The maximum bitrate for all queues on an interface may be specified using the @@ -738,6 +786,11 @@ .Ar Gb If .Ar bandwidth is not specified, the interface bandwidth is used. +.Pp +When used with +.Ar fairq , +.Ar bandwidth +specifies a guaranteed minimum but the fairq is allowed to exceed it. .It Ar qlimit The maximum number of packets held in the queue. The default is 50. @@ -786,18 +839,26 @@ .Ar queue The .Ar priq scheduler does not support bandwidth specification. +The +.Ar fairq +scheduler uses the bandwidth specification as a guaranteed minimum and +may exceed it. .It Ar priority Between queues a priority level can be set. For -.Ar cbq -and +.Ar cbq , .Ar hfsc , +and +.Ar fairq the range is 0 to 7 and for .Ar priq , the range is 0 to 15. The default for all is 1. .Ar Priq queues with a higher priority are always served first. +.Ar Fairq +queues with a higher priority are served first unless they exceed their +bandwidth specification. .Ar Cbq and .Ar Hfsc @@ -805,6 +866,9 @@ .Ar Hfsc .It Ar qlimit The maximum number of packets held in the queue. The default is 50. +When used with a +.Ar fairq +this specified the maximum number of packets held per bucket. .El .Pp The @@ -831,6 +895,27 @@ .It Ar ecn .El .Pp The +.Ar fairq +.Ar scheduler +supports the following additional options: +.Bl -tag -width Fl +.It Ar buckets +Specify the number of buckets, from 1 to 2048 in powers of 2. A bucket +size of 1 causes a +.Ar fairq +to essentially degenerate into a priority queue. +.It Ar linkshare +The bandwidth share of a backlogged queue. This option is parsed but not +yet supported. +.It Ar hogs +This option allows low bandwidth connections to burst up to the specified +bandwidth by not advancing the round robin when taking packets out of +the related queue. +When using this option a small value no greater then 1/20 available interface +bandwidth is recommended. +.El +.Pp +The .Ar cbq .Ar scheduler supports an additional option: Index: usr.sbin/pfctl/pfctl_altq.c =================================================================== RCS file: /cvs/src/usr.sbin/pfctl/pfctl_altq.c,v retrieving revision 1.2 diff -u -p -r1.2 pfctl_altq.c --- usr.sbin/pfctl/pfctl_altq.c 11 Feb 2005 22:31:45 -0000 1.2 +++ usr.sbin/pfctl/pfctl_altq.c 4 Apr 2008 00:34:09 -0000 @@ -42,6 +42,7 @@ #include #include #include #include +#include #include "pfctl_parser.h" #include "pfctl.h" @@ -68,6 +69,11 @@ static int print_hfsc_opts(const struct pf_altq *, const struct node_queue_opt *); +static int eval_pfqueue_fairq(struct pfctl *, struct pf_altq *); +static int print_fairq_opts(const struct pf_altq *, + const struct node_queue_opt *); +static int check_commit_fairq(int, int, struct pf_altq *); + static void gsc_add_sc(struct gen_sc *, struct service_curve *); static int is_gsc_under_sc(struct gen_sc *, struct service_curve *); @@ -84,6 +90,8 @@ u_int32_t); u_int32_t eval_bwspec(struct node_queue_bw *, u_int32_t); void print_hfsc_sc(const char *, u_int, u_int, u_int, const struct node_hfsc_sc *); +void print_fairq_sc(const char *, u_int, u_int, u_int, + const struct node_fairq_sc *); void pfaltq_store(struct pf_altq *a) @@ -179,6 +187,10 @@ case ALTQT_HFSC: if (!print_hfsc_opts(a, qopts)) printf("hfsc "); break; + case ALTQT_FAIRQ: + if (!print_fairq_opts(a, qopts)) + printf("hfsc "); + break; } if (bw != NULL && bw->bw_percent > 0) { @@ -204,7 +216,8 @@ printf(" "); printf("%s ", a->qname); if (print_interface) printf("on %s ", a->ifname); - if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) { + if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC || + a->scheduler == ALTQT_FAIRQ) { if (bw != NULL && bw->bw_percent > 0) { if (bw->bw_percent < 100) printf("bandwidth %u%% ", bw->bw_percent); @@ -225,6 +238,9 @@ break; case ALTQT_HFSC: print_hfsc_opts(a, qopts); break; + case ALTQT_FAIRQ: + print_fairq_opts(a, qopts); + break; } } @@ -291,6 +307,9 @@ break; case ALTQT_HFSC: error = check_commit_hfsc(dev, opts, altq); break; + case ALTQT_FAIRQ: + error = check_commit_fairq(dev, opts, altq); + break; default: break; } @@ -338,7 +357,8 @@ } if (pa->qlimit == 0) pa->qlimit = DEFAULT_QLIMIT; - if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) { + if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC || + pa->scheduler == ALTQT_FAIRQ) { if ((pa->bandwidth = eval_bwspec(bw, parent == NULL ? 0 : parent->bandwidth)) == 0) { fprintf(stderr, "bandwidth for %s invalid (%d / %d)\n", @@ -371,6 +391,9 @@ break; case ALTQT_HFSC: error = eval_pfqueue_hfsc(pf, pa); break; + case ALTQT_FAIRQ: + error = eval_pfqueue_fairq(pf, pa); + break; default: break; } @@ -778,6 +801,85 @@ gsc_destroy(&lssc); return (-1); } +/* + * FAIRQ support functions + */ +static int +eval_pfqueue_fairq(struct pfctl *pf __unused, struct pf_altq *pa) +{ + struct pf_altq *altq, *parent; + struct fairq_opts *opts; + struct service_curve sc; + + opts = &pa->pq_u.fairq_opts; + + if (pa->parent[0] == 0) { + /* root queue */ + opts->lssc_m1 = pa->ifbandwidth; + opts->lssc_m2 = pa->ifbandwidth; + opts->lssc_d = 0; + return (0); + } + + LIST_INIT(&lssc); + + /* if link_share is not specified, use bandwidth */ + if (opts->lssc_m2 == 0) + opts->lssc_m2 = pa->bandwidth; + + /* + * admission control: + * for the real-time service curve, the sum of the service curves + * should not exceed 80% of the interface bandwidth. 20% is reserved + * not to over-commit the actual interface bandwidth. + * for the link-sharing service curve, the sum of the child service + * curve should not exceed the parent service curve. + * for the upper-limit service curve, the assigned bandwidth should + * be smaller than the interface bandwidth, and the upper-limit should + * be larger than the real-time service curve when both are defined. + */ + parent = qname_to_pfaltq(pa->parent, pa->ifname); + if (parent == NULL) + errx(1, "parent %s not found for %s", pa->parent, pa->qname); + + TAILQ_FOREACH(altq, &altqs, entries) { + if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) + continue; + if (altq->qname[0] == 0) /* this is for interface */ + continue; + + if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0) + continue; + + /* if the class has a link-sharing service curve, add it. */ + if (opts->lssc_m2 != 0 && altq->pq_u.fairq_opts.lssc_m2 != 0) { + sc.m1 = altq->pq_u.fairq_opts.lssc_m1; + sc.d = altq->pq_u.fairq_opts.lssc_d; + sc.m2 = altq->pq_u.fairq_opts.lssc_m2; + gsc_add_sc(&lssc, &sc); + } + } + + /* check the link-sharing service curve. */ + if (opts->lssc_m2 != 0) { + sc.m1 = parent->pq_u.fairq_opts.lssc_m1; + sc.d = parent->pq_u.fairq_opts.lssc_d; + sc.m2 = parent->pq_u.fairq_opts.lssc_m2; + if (!is_gsc_under_sc(&lssc, &sc)) { + warnx("link-sharing sc exceeds parent's sc"); + goto err_ret; + } + } + + gsc_destroy(&lssc); + + return (0); + +err_ret: + gsc_destroy(&lssc); + return (-1); +} + static int check_commit_hfsc(int dev __unused, int opts __unused, struct pf_altq *pa) { @@ -818,6 +920,43 @@ return (error); } static int +check_commit_fairq(int dev __unused, int opts __unused, struct pf_altq *pa) +{ + struct pf_altq *altq, *def = NULL; + int default_class; + int error = 0; + + /* check if fairq has one default queue for this interface */ + default_class = 0; + TAILQ_FOREACH(altq, &altqs, entries) { + if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) + continue; + if (altq->qname[0] == 0) /* this is for interface */ + continue; + if (altq->pq_u.fairq_opts.flags & FARF_DEFAULTCLASS) { + default_class++; + def = altq; + } + } + if (default_class != 1) { + warnx("should have one default queue on %s", pa->ifname); + return (1); + } + /* make sure the default queue is a leaf */ + TAILQ_FOREACH(altq, &altqs, entries) { + if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0) + continue; + if (altq->qname[0] == 0) /* this is for interface */ + continue; + if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) { + warnx("default queue is not a leaf"); + error++; + } + } + return (error); +} + +static int print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) { const struct hfsc_opts *opts; @@ -863,6 +1002,43 @@ } else return (0); } +static int +print_fairq_opts(const struct pf_altq *a, const struct node_queue_opt *qopts) +{ + const struct fairq_opts *opts; + const struct node_fairq_sc *loc_lssc; + + opts = &a->pq_u.fairq_opts; + if (qopts == NULL) + loc_lssc = NULL; + else + loc_lssc = &qopts->data.fairq_opts.linkshare; + + if (opts->flags || + (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth || + opts->lssc_d != 0))) { + printf("fairq("); + if (opts->flags & FARF_RED) + printf(" red"); + if (opts->flags & FARF_ECN) + printf(" ecn"); + if (opts->flags & FARF_RIO) + printf(" rio"); + if (opts->flags & FARF_CLEARDSCP) + printf(" cleardscp"); + if (opts->flags & FARF_DEFAULTCLASS) + printf(" default"); + if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth || + opts->lssc_d != 0)) + print_fairq_sc("linkshare", opts->lssc_m1, opts->lssc_d, + opts->lssc_m2, loc_lssc); + printf(" ) "); + + return (1); + } else + return (0); +} + /* * admission control using generalized service curve */ @@ -1164,6 +1340,23 @@ pa->pq_u.hfsc_opts.ulsc_d = opts->data.hfsc_opts.upperlimit.d; } break; + case ALTQT_FAIRQ: + pa->pq_u.fairq_opts.flags = opts->data.fairq_opts.flags; + pa->pq_u.fairq_opts.nbuckets = opts->data.fairq_opts.nbuckets; + pa->pq_u.fairq_opts.hogs_m1 = + eval_bwspec(&opts->data.fairq_opts.hogs_bw, ref_bw); + + if (opts->data.fairq_opts.linkshare.used) { + pa->pq_u.fairq_opts.lssc_m1 = + eval_bwspec(&opts->data.fairq_opts.linkshare.m1, + ref_bw); + pa->pq_u.fairq_opts.lssc_m2 = + eval_bwspec(&opts->data.fairq_opts.linkshare.m2, + ref_bw); + pa->pq_u.fairq_opts.lssc_d = + opts->data.fairq_opts.linkshare.d; + } + break; default: warnx("eval_queue_opts: unknown scheduler type %u", opts->qtype); @@ -1209,3 +1402,28 @@ if (d != 0) printf(")"); } + +void +print_fairq_sc(const char *scname, u_int m1, u_int d, u_int m2, + const struct node_fairq_sc *sc) +{ + printf(" %s", scname); + + if (d != 0) { + printf("("); + if (sc != NULL && sc->m1.bw_percent > 0) + printf("%u%%", sc->m1.bw_percent); + else + printf("%s", rate2str((double)m1)); + printf(" %u", d); + } + + if (sc != NULL && sc->m2.bw_percent > 0) + printf(" %u%%", sc->m2.bw_percent); + else + printf(" %s", rate2str((double)m2)); + + if (d != 0) + printf(")"); +} + Index: usr.sbin/pfctl/pfctl_parser.h =================================================================== RCS file: /cvs/src/usr.sbin/pfctl/pfctl_parser.h,v retrieving revision 1.1 diff -u -p -r1.1 pfctl_parser.h --- usr.sbin/pfctl/pfctl_parser.h 21 Sep 2004 21:25:28 -0000 1.1 +++ usr.sbin/pfctl/pfctl_parser.h 3 Apr 2008 22:55:47 -0000 @@ -126,12 +126,27 @@ struct node_hfsc_sc upperlimit; int flags; }; +struct node_fairq_sc { + struct node_queue_bw m1; /* slope of 1st segment; bps */ + u_int d; /* x-projection of m1; msec */ + struct node_queue_bw m2; /* slope of 2nd segment; bps */ + u_int8_t used; +}; + +struct node_fairq_opts { + struct node_fairq_sc linkshare; + struct node_queue_bw hogs_bw; + u_int nbuckets; + int flags; +}; + struct node_queue_opt { int qtype; union { struct cbq_opts cbq_opts; struct priq_opts priq_opts; struct node_hfsc_opts hfsc_opts; + struct node_fairq_opts fairq_opts; } data; }; Index: usr.sbin/pfctl/pfctl_qstats.c =================================================================== RCS file: /cvs/src/usr.sbin/pfctl/pfctl_qstats.c,v retrieving revision 1.2 diff -u -p -r1.2 pfctl_qstats.c --- usr.sbin/pfctl/pfctl_qstats.c 11 Feb 2005 22:31:45 -0000 1.2 +++ usr.sbin/pfctl/pfctl_qstats.c 3 Apr 2008 22:22:46 -0000 @@ -36,6 +36,7 @@ #include #include #include #include +#include #include "pfctl.h" #include "pfctl_parser.h" @@ -44,6 +45,7 @@ class_stats_t cbq_stats; struct priq_classstats priq_stats; struct hfsc_classstats hfsc_stats; + struct fairq_classstats fairq_stats; }; #define AVGN_MAX 8 @@ -75,6 +77,7 @@ unsigned, int); void print_cbqstats(struct queue_stats); void print_priqstats(struct queue_stats); void print_hfscstats(struct queue_stats); +void print_fairqstats(struct queue_stats); void pfctl_free_altq_node(struct pf_altq_node *); void pfctl_print_altq_nodestat(int, const struct pf_altq_node *); @@ -283,6 +286,9 @@ break; case ALTQT_HFSC: print_hfscstats(a->qstats); break; + case ALTQT_FAIRQ: + print_fairqstats(a->qstats); + break; } } @@ -348,6 +354,27 @@ rate2str((8 * cur.avg_bytes) / STAT } void +print_fairqstats(struct queue_stats cur) +{ + printf(" [ pkts: %10llu bytes: %10llu " + "dropped pkts: %6llu bytes: %6llu ]\n", + (unsigned long long)cur.data.fairq_stats.xmit_cnt.packets, + (unsigned long long)cur.data.fairq_stats.xmit_cnt.bytes, + (unsigned long long)cur.data.fairq_stats.drop_cnt.packets, + (unsigned long long)cur.data.fairq_stats.drop_cnt.bytes); + printf(" [ qlength: %3d/%3d ]\n", + cur.data.fairq_stats.qlength, cur.data.fairq_stats.qlimit); + + if (cur.avgn < 2) + return; + + printf(" [ measured: %7.1f packets/s, %s/s ]\n", + cur.avg_packets / STAT_INTERVAL, + rate2str((8 * cur.avg_bytes) / STAT_INTERVAL)); +} + + +void pfctl_free_altq_node(struct pf_altq_node *node) { while (node != NULL) { @@ -387,6 +414,10 @@ case ALTQT_HFSC: b = qs->data.hfsc_stats.xmit_cnt.bytes; p = qs->data.hfsc_stats.xmit_cnt.packets; break; + case ALTQT_FAIRQ: + b = qs->data.fairq_stats.xmit_cnt.bytes; + p = qs->data.fairq_stats.xmit_cnt.packets; + break; default: b = 0; p = 0; Index: sys/net/altq/altq.h =================================================================== RCS file: /cvs/src/sys/net/altq/altq.h,v retrieving revision 1.1 diff -u -p -r1.1 altq.h --- sys/net/altq/altq.h 11 Feb 2005 22:25:57 -0000 1.1 +++ sys/net/altq/altq.h 3 Apr 2008 18:23:54 -0000 @@ -36,7 +36,8 @@ #define ALTQT_RED 2 /* red */ #define ALTQT_RIO 3 /* rio */ #define ALTQT_HFSC 4 /* hfsc */ #define ALTQT_PRIQ 5 /* priority queue */ -#define ALTQT_MAX 6 /* should be max discipline type + 1 */ +#define ALTQT_FAIRQ 6 /* fair queue (requires keep state) */ +#define ALTQT_MAX 7 /* should be max discipline type + 1 */ /* simple token packet meter profile */ struct tb_profile { Index: sys/net/altq/altq_fairq.c =================================================================== RCS file: sys/net/altq/altq_fairq.c diff -N sys/net/altq/altq_fairq.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/net/altq/altq_fairq.c 5 Apr 2008 21:59:29 -0000 @@ -0,0 +1,880 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly$ + */ +/* + * Matt: I gutted altq_priq.c and used it as a skeleton on which to build + * fairq. The fairq algorithm is completely different then priq, of course, + * but because I used priq's skeleton I believe I should include priq's + * copyright. + * + * Copyright (C) 2000-2003 + * Sony Computer Science Laboratories Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * FAIRQ - take traffic classified by keep state (hashed into + * mbuf->m_pkthdr.altq_state_hash) and bucketize it. Fairly extract + * the first packet from each bucket in a round-robin fashion. + * + * TODO - better overall qlimit support (right now it is per-bucket). + * - NOTE: red etc is per bucket, not overall. + * - better service curve support. + * + * EXAMPLE: + * + * altq on em0 fairq bandwidth 650Kb queue { std, bulk } + * queue std priority 3 bandwidth 400Kb \ + * fairq (buckets 64, default, hogs 1Kb) qlimit 50 + * queue bulk priority 2 bandwidth 100Kb \ + * fairq (buckets 64, hogs 1Kb) qlimit 50 + * + * pass out on em0 from any to any keep state queue std + * pass out on em0 inet proto tcp ..... port ... keep state queue bulk + */ +#include "opt_altq.h" +#include "opt_inet.h" +#include "opt_inet6.h" + +#ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +/* + * function prototypes + */ +static int fairq_clear_interface(struct fairq_if *); +static int fairq_request(struct ifaltq *, int, void *); +static void fairq_purge(struct fairq_if *); +static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int); +static int fairq_class_destroy(struct fairq_class *); +static int fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); +static struct mbuf *fairq_dequeue(struct ifaltq *, struct mbuf *, int); + +static int fairq_addq(struct fairq_class *, struct mbuf *); +static struct mbuf *fairq_getq(struct fairq_class *, uint64_t); +static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *); +static fairq_bucket_t *fairq_selectq(struct fairq_class *, int); +static void fairq_purgeq(struct fairq_class *); + +static void get_class_stats(struct fairq_classstats *, struct fairq_class *); +static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t); + +int +fairq_pfattach(struct pf_altq *a) +{ + struct ifnet *ifp; + int error; + + if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) + return (EINVAL); + crit_enter(); + error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc, + fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL); + crit_exit(); + return (error); +} + +int +fairq_add_altq(struct pf_altq *a) +{ + struct fairq_if *pif; + struct ifnet *ifp; + + if ((ifp = ifunit(a->ifname)) == NULL) + return (EINVAL); + if (!ifq_is_ready(&ifp->if_snd)) + return (ENODEV); + + pif = kmalloc(sizeof(*pif), M_ALTQ, M_WAITOK | M_ZERO); + pif->pif_bandwidth = a->ifbandwidth; + pif->pif_maxpri = -1; + pif->pif_ifq = &ifp->if_snd; + ifq_purge(&ifp->if_snd); + + /* keep the state in pf_altq */ + a->altq_disc = pif; + + return (0); +} + +int +fairq_remove_altq(struct pf_altq *a) +{ + struct fairq_if *pif; + + if ((pif = a->altq_disc) == NULL) + return (EINVAL); + a->altq_disc = NULL; + + fairq_clear_interface(pif); + + kfree(pif, M_ALTQ); + return (0); +} + +int +fairq_add_queue(struct pf_altq *a) +{ + struct fairq_if *pif; + struct fairq_class *cl; + + if ((pif = a->altq_disc) == NULL) + return (EINVAL); + + /* check parameters */ + if (a->priority >= FAIRQ_MAXPRI) + return (EINVAL); + if (a->qid == 0) + return (EINVAL); + if (pif->pif_classes[a->priority] != NULL) + return (EBUSY); + if (clh_to_clp(pif, a->qid) != NULL) + return (EBUSY); + + cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth, + &a->pq_u.fairq_opts, a->qid); + if (cl == NULL) + return (ENOMEM); + + return (0); +} + +int +fairq_remove_queue(struct pf_altq *a) +{ + struct fairq_if *pif; + struct fairq_class *cl; + + if ((pif = a->altq_disc) == NULL) + return (EINVAL); + + if ((cl = clh_to_clp(pif, a->qid)) == NULL) + return (EINVAL); + + return (fairq_class_destroy(cl)); +} + +int +fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +{ + struct fairq_if *pif; + struct fairq_class *cl; + struct fairq_classstats stats; + int error = 0; + + if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL) + return (EBADF); + + if ((cl = clh_to_clp(pif, a->qid)) == NULL) + return (EINVAL); + + if (*nbytes < sizeof(stats)) + return (EINVAL); + + get_class_stats(&stats, cl); + + if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) + return (error); + *nbytes = sizeof(stats); + return (0); +} + +/* + * bring the interface back to the initial state by discarding + * all the filters and classes. + */ +static int +fairq_clear_interface(struct fairq_if *pif) +{ + struct fairq_class *cl; + int pri; + + /* clear out the classes */ + for (pri = 0; pri <= pif->pif_maxpri; pri++) { + if ((cl = pif->pif_classes[pri]) != NULL) + fairq_class_destroy(cl); + } + + return (0); +} + +static int +fairq_request(struct ifaltq *ifq, int req, void *arg) +{ + struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; + + crit_enter(); + switch (req) { + case ALTRQ_PURGE: + fairq_purge(pif); + break; + } + crit_exit(); + return (0); +} + +/* discard all the queued packets on the interface */ +static void +fairq_purge(struct fairq_if *pif) +{ + struct fairq_class *cl; + int pri; + + for (pri = 0; pri <= pif->pif_maxpri; pri++) { + if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head) + fairq_purgeq(cl); + } + if (ifq_is_enabled(pif->pif_ifq)) + pif->pif_ifq->ifq_len = 0; +} + +static struct fairq_class * +fairq_class_create(struct fairq_if *pif, int pri, int qlimit, + u_int bandwidth, struct fairq_opts *opts, int qid) +{ + struct fairq_class *cl; + int flags = opts->flags; + u_int nbuckets = opts->nbuckets; + int i; + +#ifndef ALTQ_RED + if (flags & FARF_RED) { +#ifdef ALTQ_DEBUG + kprintf("fairq_class_create: RED not configured for FAIRQ!\n"); +#endif + return (NULL); + } +#endif + if (nbuckets == 0) + nbuckets = 256; + if (nbuckets > FAIRQ_MAX_BUCKETS) + nbuckets = FAIRQ_MAX_BUCKETS; + /* enforce power-of-2 size */ + while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1)) + ++nbuckets; + + if ((cl = pif->pif_classes[pri]) != NULL) { + /* modify the class instead of creating a new one */ + crit_enter(); + if (cl->cl_head) + fairq_purgeq(cl); + crit_exit(); +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + rio_destroy((rio_t *)cl->cl_red); +#endif +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + red_destroy(cl->cl_red); +#endif + } else { + cl = kmalloc(sizeof(*cl), M_ALTQ, M_WAITOK | M_ZERO); + cl->cl_nbuckets = nbuckets; + cl->cl_nbucket_mask = nbuckets - 1; + + cl->cl_buckets = kmalloc(sizeof(*cl->cl_buckets) * + cl->cl_nbuckets, + M_ALTQ, M_WAITOK | M_ZERO); + cl->cl_head = NULL; + } + + pif->pif_classes[pri] = cl; + if (flags & FARF_DEFAULTCLASS) + pif->pif_default = cl; + if (qlimit == 0) + qlimit = 50; /* use default */ + cl->cl_qlimit = qlimit; + for (i = 0; i < cl->cl_nbuckets; ++i) { + qlimit(&cl->cl_buckets[i].queue) = qlimit; + } + cl->cl_bandwidth = bandwidth / 8; + cl->cl_qtype = Q_DROPTAIL; + cl->cl_flags = flags & FARF_USERFLAGS; + cl->cl_pri = pri; + if (pri > pif->pif_maxpri) + pif->pif_maxpri = pri; + cl->cl_pif = pif; + cl->cl_handle = qid; + cl->cl_hogs_m1 = opts->hogs_m1 / 8; + cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */ + +#ifdef ALTQ_RED + if (flags & (FARF_RED|FARF_RIO)) { + int red_flags, red_pkttime; + + red_flags = 0; + if (flags & FARF_ECN) + red_flags |= REDF_ECN; +#ifdef ALTQ_RIO + if (flags & FARF_CLEARDSCP) + red_flags |= RIOF_CLEARDSCP; +#endif + if (pif->pif_bandwidth < 8) + red_pkttime = 1000 * 1000 * 1000; /* 1 sec */ + else + red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu + * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8); +#ifdef ALTQ_RIO + if (flags & FARF_RIO) { + cl->cl_red = (red_t *)rio_alloc(0, NULL, + red_flags, red_pkttime); + if (cl->cl_red != NULL) + cl->cl_qtype = Q_RIO; + } else +#endif + if (flags & FARF_RED) { + cl->cl_red = red_alloc(0, 0, + cl->cl_qlimit * 10/100, + cl->cl_qlimit * 30/100, + red_flags, red_pkttime); + if (cl->cl_red != NULL) + cl->cl_qtype = Q_RED; + } + } +#endif /* ALTQ_RED */ + + return (cl); +} + +static int +fairq_class_destroy(struct fairq_class *cl) +{ + struct fairq_if *pif; + int pri; + + crit_enter(); + + if (cl->cl_head) + fairq_purgeq(cl); + + pif = cl->cl_pif; + pif->pif_classes[cl->cl_pri] = NULL; + if (pif->pif_poll_cache == cl) + pif->pif_poll_cache = NULL; + if (pif->pif_maxpri == cl->cl_pri) { + for (pri = cl->cl_pri; pri >= 0; pri--) + if (pif->pif_classes[pri] != NULL) { + pif->pif_maxpri = pri; + break; + } + if (pri < 0) + pif->pif_maxpri = -1; + } + crit_exit(); + + if (cl->cl_red != NULL) { +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + rio_destroy((rio_t *)cl->cl_red); +#endif +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + red_destroy(cl->cl_red); +#endif + } + kfree(cl->cl_buckets, M_ALTQ); + cl->cl_head = NULL; /* sanity */ + cl->cl_polled = NULL; /* sanity */ + cl->cl_buckets = NULL; /* sanity */ + kfree(cl, M_ALTQ); + + return (0); +} + +/* + * fairq_enqueue is an enqueue function to be registered to + * (*altq_enqueue) in struct ifaltq. + */ +static int +fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) +{ + struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; + struct fairq_class *cl; + int error; + int len; + + crit_enter(); + + /* grab class set by classifier */ + if ((m->m_flags & M_PKTHDR) == 0) { + /* should not happen */ + if_printf(ifq->altq_ifp, "altq: packet does not have pkthdr\n"); + m_freem(m); + error = ENOBUFS; + goto done; + } + + if (m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED) + cl = clh_to_clp(pif, m->m_pkthdr.altq_qid); + else + cl = NULL; + if (cl == NULL) { + cl = pif->pif_default; + if (cl == NULL) { + m_freem(m); + error = ENOBUFS; + goto done; + } + } + cl->cl_flags |= FARF_HAS_PACKETS; + cl->cl_pktattr = NULL; + len = m_pktlen(m); + if (fairq_addq(cl, m) != 0) { + /* drop occurred. mbuf was freed in fairq_addq. */ + PKTCNTR_ADD(&cl->cl_dropcnt, len); + error = ENOBUFS; + goto done; + } + ifq->ifq_len++; + error = 0; +done: + crit_exit(); + return (error); +} + +/* + * fairq_dequeue is a dequeue function to be registered to + * (*altq_dequeue) in struct ifaltq. + * + * note: ALTDQ_POLL returns the next packet without removing the packet + * from the queue. ALTDQ_REMOVE is a normal dequeue operation. + * ALTDQ_REMOVE must return the same packet if called immediately + * after ALTDQ_POLL. + */ +static struct mbuf * +fairq_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op) +{ + struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc; + struct fairq_class *cl; + struct fairq_class *best_cl; + struct mbuf *best_m; + struct mbuf *m; + uint64_t cur_time = read_machclk(); + int pri; + int hit_limit; + + if (ifq_is_empty(ifq)) { + /* no packet in the queue */ + KKASSERT(mpolled == NULL); + return (NULL); + } + + crit_enter(); + if (pif->pif_poll_cache && op == ALTDQ_REMOVE) { + best_cl = pif->pif_poll_cache; + m = fairq_getq(best_cl, cur_time); + pif->pif_poll_cache = NULL; + if (m) { + ifq->ifq_len--; + PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); + } + } else { + best_cl = NULL; + best_m = NULL; + + for (pri = pif->pif_maxpri; pri >= 0; pri--) { + if ((cl = pif->pif_classes[pri]) == NULL) + continue; + if ((cl->cl_flags & FARF_HAS_PACKETS) == 0) + continue; + m = fairq_pollq(cl, cur_time, &hit_limit); + if (m == NULL) { + cl->cl_flags &= ~FARF_HAS_PACKETS; + continue; + } + + /* + * Only override the best choice if we are under + * the BW limit. + */ + if (hit_limit == 0 || best_cl == NULL) { + best_cl = cl; + best_m = m; + } + + /* + * Remember the highest priority mbuf in case we + * do not find any lower priority mbufs. + */ + if (hit_limit) + continue; + break; + } + if (op == ALTDQ_POLL) { + pif->pif_poll_cache = best_cl; + m = best_m; + } else if (best_cl) { + m = fairq_getq(best_cl, cur_time); + KKASSERT(best_m == m); + ifq->ifq_len--; + PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m)); + } else { + m = NULL; + } + } + crit_exit(); + KKASSERT(mpolled == NULL || mpolled == m); + return (m); +} + +static int +fairq_addq(struct fairq_class *cl, struct mbuf *m) +{ + fairq_bucket_t *b; + u_int hindex; + uint64_t bw; + + /* + * If the packet doesn't have any keep state put it on the end of + * our queue. XXX this can result in out of order delivery. + */ + if ((m->m_pkthdr.fw_flags & ALTQ_MBUF_STATE_HASHED) == 0) { + if (cl->cl_head) + b = cl->cl_head->prev; + else + b = &cl->cl_buckets[0]; + } else { + hindex = m->m_pkthdr.altq_state_hash & cl->cl_nbucket_mask; + b = &cl->cl_buckets[hindex]; + } + + /* + * Add the bucket to the end of the circular list of active buckets. + * + * As a special case we add the bucket to the beginning of the list + * instead of the end if it was not previously on the list and if + * its traffic is less then the hog level. + */ + if (b->in_use == 0) { + b->in_use = 1; + if (cl->cl_head == NULL) { + cl->cl_head = b; + b->next = b; + b->prev = b; + } else { + b->next = cl->cl_head; + b->prev = cl->cl_head->prev; + b->prev->next = b; + b->next->prev = b; + + if (b->bw_delta && cl->cl_hogs_m1) { + bw = b->bw_bytes * machclk_freq / b->bw_delta; + if (bw < cl->cl_hogs_m1) + cl->cl_head = b; + } + } + } + +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr); +#endif +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr); +#endif + if (qlen(&b->queue) >= qlimit(&b->queue)) { + m_freem(m); + return (-1); + } + + if (cl->cl_flags & FARF_CLEARDSCP) + write_dsfield(m, cl->cl_pktattr, 0); + + _addq(&b->queue, m); + + return (0); +} + +static struct mbuf * +fairq_getq(struct fairq_class *cl, uint64_t cur_time) +{ + fairq_bucket_t *b; + struct mbuf *m; + + b = fairq_selectq(cl, 0); + if (b == NULL) + m = NULL; +#ifdef ALTQ_RIO + else if (cl->cl_qtype == Q_RIO) + m = rio_getq((rio_t *)cl->cl_red, &b->queue); +#endif +#ifdef ALTQ_RED + else if (cl->cl_qtype == Q_RED) + m = red_getq(cl->cl_red, &b->queue); +#endif + else + m = _getq(&b->queue); + + /* + * Calculate the BW change + */ + if (m != NULL) { + uint64_t delta; + + /* + * Per-class bandwidth calculation + */ + delta = (cur_time - cl->cl_last_time); + if (delta > machclk_freq * 8) + delta = machclk_freq * 8; + cl->cl_bw_delta += delta; + cl->cl_bw_bytes += m->m_pkthdr.len; + cl->cl_last_time = cur_time; + cl->cl_bw_delta -= cl->cl_bw_delta >> 3; + cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3; + + /* + * Per-bucket bandwidth calculation + */ + delta = (cur_time - b->last_time); + if (delta > machclk_freq * 8) + delta = machclk_freq * 8; + b->bw_delta += delta; + b->bw_bytes += m->m_pkthdr.len; + b->last_time = cur_time; + b->bw_delta -= b->bw_delta >> 3; + b->bw_bytes -= b->bw_bytes >> 3; + } + return(m); +} + +/* + * Figure out what the next packet would be if there were no limits. If + * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise + * it is set to 0. A non-NULL mbuf is returned either way. + */ +static struct mbuf * +fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit) +{ + fairq_bucket_t *b; + struct mbuf *m; + uint64_t delta; + uint64_t bw; + + *hit_limit = 0; + b = fairq_selectq(cl, 1); + if (b == NULL) + return(NULL); + m = qhead(&b->queue); + + /* + * Did this packet exceed the class bandwidth? Calculate the + * bandwidth component of the packet. + * + * - Calculate bytes per second + */ + delta = cur_time - cl->cl_last_time; + if (delta > machclk_freq * 8) + delta = machclk_freq * 8; + cl->cl_bw_delta += delta; + cl->cl_last_time = cur_time; + if (cl->cl_bw_delta) { + bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta; + + if (bw > cl->cl_bandwidth) + *hit_limit = 1; +#if 0 + kprintf("BW %6lld relative to %6u %d queue %p\n", + bw, cl->cl_bandwidth, *hit_limit, b); +#endif + } + return(m); +} + +/* + * Locate the next queue we want to pull a packet out of. This code + * is also responsible for removing empty buckets from the circular list. + */ +static +fairq_bucket_t * +fairq_selectq(struct fairq_class *cl, int ispoll) +{ + fairq_bucket_t *b; + uint64_t bw; + + if (ispoll == 0 && cl->cl_polled) { + b = cl->cl_polled; + cl->cl_polled = NULL; + return(b); + } + + while ((b = cl->cl_head) != NULL) { + /* + * Remove empty queues from consideration + */ + if (qempty(&b->queue)) { + b->in_use = 0; + cl->cl_head = b->next; + if (cl->cl_head == b) { + cl->cl_head = NULL; + } else { + b->next->prev = b->prev; + b->prev->next = b->next; + } + continue; + } + + /* + * Advance the round robin. Queues with bandwidths less + * then the hog bandwidth are allowed to burst. + */ + if (cl->cl_hogs_m1 == 0) { + cl->cl_head = b->next; + } else if (b->bw_delta) { + bw = b->bw_bytes * machclk_freq / b->bw_delta; + if (bw >= cl->cl_hogs_m1) { + cl->cl_head = b->next; + } + /* + * XXX TODO - + */ + } + + /* + * Return bucket b. + */ + break; + } + if (ispoll) + cl->cl_polled = b; + return(b); +} + +static void +fairq_purgeq(struct fairq_class *cl) +{ + fairq_bucket_t *b; + struct mbuf *m; + + while ((b = fairq_selectq(cl, 0)) != NULL) { + while ((m = _getq(&b->queue)) != NULL) { + PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m)); + m_freem(m); + } + KKASSERT(qlen(&b->queue) == 0); + } +} + +static void +get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl) +{ + fairq_bucket_t *b; + + sp->class_handle = cl->cl_handle; + sp->qlimit = cl->cl_qlimit; + sp->xmit_cnt = cl->cl_xmitcnt; + sp->drop_cnt = cl->cl_dropcnt; + sp->qtype = cl->cl_qtype; + sp->qlength = 0; + + if (cl->cl_head) { + b = cl->cl_head; + do { + sp->qlength += qlen(&b->queue); + b = b->next; + } while (b != cl->cl_head); + } + +#ifdef ALTQ_RED + if (cl->cl_qtype == Q_RED) + red_getstats(cl->cl_red, &sp->red[0]); +#endif +#ifdef ALTQ_RIO + if (cl->cl_qtype == Q_RIO) + rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); +#endif +} + +/* convert a class handle to the corresponding class pointer */ +static struct fairq_class * +clh_to_clp(struct fairq_if *pif, uint32_t chandle) +{ + struct fairq_class *cl; + int idx; + + if (chandle == 0) + return (NULL); + + for (idx = pif->pif_maxpri; idx >= 0; idx--) + if ((cl = pif->pif_classes[idx]) != NULL && + cl->cl_handle == chandle) + return (cl); + + return (NULL); +} + +#endif /* ALTQ_FAIRQ */ Index: sys/net/altq/altq_fairq.h =================================================================== RCS file: sys/net/altq/altq_fairq.h diff -N sys/net/altq/altq_fairq.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/net/altq/altq_fairq.h 5 Apr 2008 21:33:48 -0000 @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly$ + */ + +#ifndef _ALTQ_ALTQ_FAIRQ_H_ +#define _ALTQ_ALTQ_FAIRQ_H_ + +#include +#include +#include +#include +#include + +#define FAIRQ_MAX_BUCKETS 2048 /* maximum number of sorting buckets */ +#define FAIRQ_MAXPRI RM_MAXPRIO +#define FAIRQ_BITMAP_WIDTH (sizeof(fairq_bitmap_t)*8) +#define FAIRQ_BITMAP_MASK (FAIRQ_BITMAP_WIDTH - 1) + +/* fairq class flags */ +#define FARF_RED 0x0001 /* use RED */ +#define FARF_ECN 0x0002 /* use RED/ECN */ +#define FARF_RIO 0x0004 /* use RIO */ +#define FARF_CLEARDSCP 0x0010 /* clear diffserv codepoint */ +#define FARF_DEFAULTCLASS 0x1000 /* default class */ + +#define FARF_HAS_PACKETS 0x2000 /* might have queued packets */ + +#define FARF_USERFLAGS (FARF_RED|FARF_ECN|FARF_RIO|FARF_CLEARDSCP| \ + FARF_DEFAULTCLASS) + +/* special class handles */ +#define FAIRQ_NULLCLASS_HANDLE 0 + +typedef u_int fairq_bitmap_t; + +struct fairq_classstats { + uint32_t class_handle; + + u_int qlength; + u_int qlimit; + struct pktcntr xmit_cnt; /* transmitted packet counter */ + struct pktcntr drop_cnt; /* dropped packet counter */ + + /* red and rio related info */ + int qtype; + struct redstats red[3]; /* rio has 3 red stats */ +}; + +#ifdef _KERNEL + +typedef struct fairq_bucket { + struct fairq_bucket *next; /* circular list */ + struct fairq_bucket *prev; /* circular list */ + class_queue_t queue; /* the actual queue */ + uint64_t bw_bytes; /* statistics used to calculate bw */ + uint64_t bw_delta; /* statistics used to calculate bw */ + uint64_t last_time; + int in_use; +} fairq_bucket_t; + +struct fairq_class { + uint32_t cl_handle; /* class handle */ + u_int cl_nbuckets; /* (power of 2) */ + u_int cl_nbucket_mask; /* bucket mask */ + fairq_bucket_t *cl_buckets; + fairq_bucket_t *cl_head; /* head of circular bucket list */ + fairq_bucket_t *cl_polled; + struct red *cl_red; /* RED state */ + u_int cl_hogs_m1; + u_int cl_lssc_m1; + u_int cl_bandwidth; + uint64_t cl_bw_bytes; + uint64_t cl_bw_delta; + uint64_t cl_last_time; + int cl_qtype; /* rollup */ + int cl_qlimit; + int cl_pri; /* priority */ + int cl_flags; /* class flags */ + struct fairq_if *cl_pif; /* back pointer to pif */ + struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ + + /* round robin index */ + + /* statistics */ + struct pktcntr cl_xmitcnt; /* transmitted packet counter */ + struct pktcntr cl_dropcnt; /* dropped packet counter */ +}; + +/* + * fairq interface state + */ +struct fairq_if { + struct fairq_if *pif_next; /* interface state list */ + struct ifaltq *pif_ifq; /* backpointer to ifaltq */ + u_int pif_bandwidth; /* link bandwidth in bps */ + int pif_maxpri; /* max priority in use */ + struct fairq_class *pif_poll_cache;/* cached poll */ + struct fairq_class *pif_default; /* default class */ + struct fairq_class *pif_classes[FAIRQ_MAXPRI]; /* classes */ +}; + +#endif /* _KERNEL */ + +#endif /* _ALTQ_ALTQ_FAIRQ_H_ */ Index: sys/net/altq/altq_subr.c =================================================================== RCS file: /cvs/src/sys/net/altq/altq_subr.c,v retrieving revision 1.9 diff -u -p -r1.9 altq_subr.c --- sys/net/altq/altq_subr.c 22 Dec 2006 23:44:55 -0000 1.9 +++ sys/net/altq/altq_subr.c 3 Apr 2008 21:18:08 -0000 @@ -360,6 +360,11 @@ case ALTQT_HFSC: error = hfsc_pfattach(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_pfattach(a); + break; +#endif default: error = ENXIO; } @@ -444,6 +449,11 @@ case ALTQT_HFSC: error = hfsc_add_altq(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_add_altq(a); + break; +#endif default: error = ENXIO; } @@ -478,6 +488,11 @@ case ALTQT_HFSC: error = hfsc_remove_altq(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_remove_altq(a); + break; +#endif default: error = ENXIO; } @@ -509,6 +524,11 @@ case ALTQT_HFSC: error = hfsc_add_queue(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_add_queue(a); + break; +#endif default: error = ENXIO; } @@ -540,6 +560,11 @@ case ALTQT_HFSC: error = hfsc_remove_queue(a); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_remove_queue(a); + break; +#endif default: error = ENXIO; } @@ -571,6 +596,11 @@ case ALTQT_HFSC: error = hfsc_getqstats(a, ubuf, nbytes); break; #endif +#ifdef ALTQ_FAIRQ + case ALTQT_FAIRQ: + error = fairq_getqstats(a, ubuf, nbytes); + break; +#endif default: error = ENXIO; } Index: sys/net/altq/altq_var.h =================================================================== RCS file: /cvs/src/sys/net/altq/altq_var.h,v retrieving revision 1.2 diff -u -p -r1.2 altq_var.h --- sys/net/altq/altq_var.h 21 May 2006 03:43:46 -0000 1.2 +++ sys/net/altq/altq_var.h 3 Apr 2008 19:44:29 -0000 @@ -100,5 +100,12 @@ int hfsc_remove_queue(struct pf_altq *); int hfsc_getqstats(struct pf_altq *, void *, int *); +int fairq_pfattach(struct pf_altq *); +int fairq_add_altq(struct pf_altq *); +int fairq_remove_altq(struct pf_altq *); +int fairq_add_queue(struct pf_altq *); +int fairq_remove_queue(struct pf_altq *); +int fairq_getqstats(struct pf_altq *, void *, int *); + #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_VAR_H_ */ Index: sys/net/pf/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf/pf.c,v retrieving revision 1.14 diff -u -p -r1.14 pf.c --- sys/net/pf/pf.c 23 May 2007 08:57:10 -0000 1.14 +++ sys/net/pf/pf.c 3 Apr 2008 21:45:12 -0000 @@ -322,6 +322,18 @@ } return (0); } +static +int +pf_state_hash(struct pf_state *s) +{ + int hv = (intptr_t)s / sizeof(*s); + + hv ^= crc32(&s->lan, sizeof(s->lan)); + hv ^= crc32(&s->gwy, sizeof(s->gwy)); + hv ^= crc32(&s->ext, sizeof(s->ext)); + return(hv); +} + static int pf_state_compare_lan_ext(struct pf_state *a, struct pf_state *b) { @@ -5461,6 +5473,10 @@ if (pd.tos == IPTOS_LOWDELAY) m->m_pkthdr.altq_qid = r->pqid; else m->m_pkthdr.altq_qid = r->qid; + if (s) { + m->m_pkthdr.fw_flags |= ALTQ_MBUF_STATE_HASHED; + m->m_pkthdr.altq_state_hash = pf_state_hash(s); + } m->m_pkthdr.ecn_af = AF_INET; m->m_pkthdr.header = h; } @@ -5771,6 +5787,10 @@ if (pd.tos == IPTOS_LOWDELAY) m->m_pkthdr.altq_qid = r->pqid; else m->m_pkthdr.altq_qid = r->qid; + if (s) { + m->m_pkthdr.fw_flags |= ALTQ_MBUF_STATE_HASHED; + m->m_pkthdr.altq_state_hash = pf_state_hash(s); + } m->m_pkthdr.ecn_af = AF_INET6; m->m_pkthdr.header = h; } Index: sys/net/pf/pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pf/pfvar.h,v retrieving revision 1.5 diff -u -p -r1.5 pfvar.h --- sys/net/pf/pfvar.h 21 Jan 2008 21:16:59 -0000 1.5 +++ sys/net/pf/pfvar.h 3 Apr 2008 23:56:27 -0000 @@ -1037,6 +1037,20 @@ u_int ulsc_m2; int flags; }; +/* + * XXX this needs some work + */ +struct fairq_opts { + u_int nbuckets; /* hash buckets */ + u_int hogs_m1; /* hog detection bandwidth */ + int flags; + + /* link-sharing service curve */ + u_int lssc_m1; + u_int lssc_d; + u_int lssc_m2; +}; + struct pf_altq { char ifname[IFNAMSIZ]; @@ -1060,6 +1074,7 @@ union { struct cbq_opts cbq_opts; struct priq_opts priq_opts; struct hfsc_opts hfsc_opts; + struct fairq_opts fairq_opts; } pq_u; u_int32_t qid; /* return value */ Index: sys/sys/mbuf.h =================================================================== RCS file: /cvs/src/sys/sys/mbuf.h,v retrieving revision 1.46 diff -u -p -r1.46 mbuf.h --- sys/sys/mbuf.h 10 Mar 2008 10:47:57 -0000 1.46 +++ sys/sys/mbuf.h 3 Apr 2008 20:48:41 -0000 @@ -129,6 +129,7 @@ /* variables for ALTQ processing */ uint8_t ecn_af; /* address family for ECN */ uint32_t altq_qid; /* queue id */ + uint32_t altq_state_hash; /* identifies 'connections' */ uint16_t ether_vlantag; /* ethernet 802.1p+q vlan tag */ uint16_t pad; /* explicit padding */ @@ -246,6 +247,7 @@ #define ALTQ_MBUF_TAGGED 0x00000020 /* a #define PF_MBUF_GENERATED FW_MBUF_GENERATED #define IPFW_MBUF_GENERATED FW_MBUF_GENERATED #define DUMMYNET_MBUF_TAGGED 0x00000080 +#define ALTQ_MBUF_STATE_HASHED 0x00000100 /* * mbuf types. Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.211 diff -u -p -r1.211 files --- sys/conf/files 22 Mar 2008 21:24:44 -0000 1.211 +++ sys/conf/files 3 Apr 2008 19:39:54 -0000 @@ -686,6 +686,7 @@ # XXX drhodus net/altq/altq_cbq.c optional altq net/altq/altq_hfsc.c optional altq net/altq/altq_priq.c optional altq +net/altq/altq_fairq.c optional altq net/altq/altq_red.c optional altq net/altq/altq_rio.c optional altq net/altq/altq_rmclass.c optional altq Index: sys/conf/options =================================================================== RCS file: /cvs/src/sys/conf/options,v retrieving revision 1.79 diff -u -p -r1.79 options --- sys/conf/options 2 Apr 2008 13:11:48 -0000 1.79 +++ sys/conf/options 3 Apr 2008 18:09:14 -0000 @@ -259,6 +259,7 @@ # altq stuff ALTQ_RIO opt_altq.h ALTQ_HFSC opt_altq.h ALTQ_PRIQ opt_altq.h +ALTQ_FAIRQ opt_altq.h ALTQ_NOPCC opt_altq.h ALTQ_DEBUG opt_altq.h