// dsl_qos_queue - user space queue to maximize QoS over ADSL
//
// Version 0.4 (see README for version history)
//
// Written by Dan Singletary - dvsing@sonicspike.net - 8/26/2002
// 
// This program implements a user space QUEUE processor which
// controls outbound traffic over a DSL modem using a pseudo-
// token-bucket-filter style queue with starvation protection.
// 
// Main feature of this outbound rate limiter is it's ability
// to rate limit based on the calculated raw bandwidth used
// rather than just the IP bandwidth used.  This provides a MUCH
// more accurate way to prevent packet queueing in the network
// device (in this case, ADSL modem).
//
#include <linux/netfilter.h>
#include <libipq.h>
#include <stdio.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <sys/time.h>
#include <time.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <syslog.h>
#include <string.h>

#include "daemon.h"

////////////////////////////////
//
// ADJUST HERE FOR YOUR RATE!!!
//
// NOTE: These are now used as the default rates as rates can be altered with
//       command line options. 
//
// The setting of 15900 works for me, I have an SBC ADSL line
// that gives 1500/128.  128kbits/sec = 16000 raw bytes/sec,
// so setting this to 15900 allows a tiny bit of slack to
// let the queue empty if it's already loaded when we
// install the queue.
//
//
//
// Other config stuff that you shouldn't have to change probably.
// 
#define VERSION_STR "v0.9.2"
#define BUFSIZE 2048				// Largest packet buffer. Leave at 2048.
#define DEBUG_TIME_LIMIT 0  // Set time limit for queue to exist if testing remotely. 0=disable.
#define IPT_OPTS ""					// Additional iptables options
#define NUM_BANDS 7					// Number of bands
#define START_FWMARK 20			// fwmark for highest priority band
#define MAX_QLEN 50 				// total packets to queue
#define TX_RATE_CAP 15900   // Rate cap (set for your outbound bandwidth)
#define DEFAULT_QDEV "eth0" // Default queueing device
//
///////////////////////////////
///////////////////////////////

#define TX_MIN_BAND_RATE (_tx_rate_cap/_num_bands)
#define TRUE -1
#define FALSE 0

#define TRANSPORT_RFC1483  1000
#define TRANSPORT_PPPoE    2000

// Structures
struct ipq_buff
{
  unsigned int id;	
	struct ipq_buff *prev;
	struct ipq_buff *next;
	unsigned char buf[BUFSIZE];
	ipq_packet_msg_t *m;
};

struct ipq_buff_list
{
	struct ipq_buff *head;
	struct ipq_buff *tail;
	int len;
	struct timeval next_minrate_dequeue;
};

// Functions
int hook_iptables(char *dev);
int unhook_iptables(char *dev);
void sig_handler(int signal);
void setup();
void cleanup();
void debug_pm(ipq_packet_msg_t *m);
int is_ip(ipq_packet_msg_t *m);
unsigned char* ip_data(ipq_packet_msg_t *m);
int is_tcp(ipq_packet_msg_t *m);
unsigned char* tcp_data(ipq_packet_msg_t *m);
void debug_ip(ipq_packet_msg_t *m);
void debug_tcp(ipq_packet_msg_t *m);
void set_tcp_win(ipq_packet_msg_t *m, unsigned short int win);
unsigned short int get_tcp_win(ipq_packet_msg_t *m);
void set_tcp_csum(ipq_packet_msg_t *m, unsigned short int csum);
unsigned short int get_tcp_csum(ipq_packet_msg_t *m);
struct timeval tv_add(struct timeval a, struct timeval b);
struct timeval tv_sub(struct timeval a, struct timeval b);
long int tv_usec(struct timeval t);
int tv_cmp(struct timeval a, struct timeval b);
int process_pkt(struct ipq_buff *ib);
int process_tx_pkt(struct ipq_buff *ib);
int process_rx_pkt(struct ipq_buff *ib);
unsigned short int dsl_len(unsigned short int ip_len);
struct timeval tv_txtime(unsigned short int bytes, unsigned int bytes_per_sec);
void dequeue_tx();
void enqueue_tx(struct ipq_buff *ib);
void ipq_die();
void ibl_enqueue(struct ipq_buff_list *ibl, struct ipq_buff *ib);
struct ipq_buff *ibl_dequeue(struct ipq_buff_list *ibl);
struct ipq_buff *ipq_buff_get();
void ipq_buff_release(struct ipq_buff *ib);

// some global variables
struct timezone null_tz;
struct ipq_handle *_qh=NULL;
char *_qdev=DEFAULT_QDEV;
unsigned long int _tx_rate_cap=TX_RATE_CAP;
unsigned long int _num_bands=NUM_BANDS;
unsigned long int _max_qlen=MAX_QLEN;
unsigned long int _start_fwmark=START_FWMARK;
int transport = TRANSPORT_RFC1483;

// global accounting
unsigned long long total_tx_bytes=0;
unsigned long long total_tx_pkts=0;
unsigned long long total_rx_bytes=0;
unsigned long long total_rx_pkts=0;

// outbound traffic queues
struct ipq_buff_list *tx_queue;
struct timeval dequeue_tx_next;
unsigned int tx_qlen=0;

// usable buffers
struct ipq_buff *ipq_buff_pile;
int *ipq_buff_recycle_list;
unsigned int ipq_buff_get_next;
unsigned int ipq_buff_release_next;
struct ipq_buff *ib;

// main()
//
// This function gets QUEUE'd packets, reads them into the user space (here)
// and calls process_pkt() on them.  After process_packet() returns, the
// packets are accepted.
//
int main(int argc, char **argv)
{
	int status;
	unsigned int i;
	struct timeval now;
	int rval;
	int timeout;
	int run_as_daemon=FALSE;

	printf("-- dsl_qos_queue (%s) --\n", VERSION_STR);

	// Parse command line arguments
	for (i=1; i<argc; i++)
	{
		if (strcmp(argv[i],"-h")==0)
		{
			// Show help
			printf("Usage:\n");
			printf("     dsl_qos_queue <options>\n");
			printf("\n");
			printf("Options:\n");
			printf("     -h             Display this help message.\n");
			printf("     -d             Run as daemon.\n");
			printf("     -i <device>    Specify DSL device (ie eth0, ppp0) Default: %s\n", _qdev);
			printf("     -r <caprate>   Specify outbound rate cap in bytes per second.  Default: %d\n", _tx_rate_cap);
			printf("     -n <numbands>  Specify number of bands.  Default: %d\n", _num_bands);
			printf("     -m <fwmark>    Specify fwmark of first band.  Default: %d\n", _start_fwmark);
			printf("     -l <qlen>      Specify max total queue length in packets.  Default: %d\n", _max_qlen);
			printf("     -t <transport> Specify transport mode, RFC1483 or PPPoE. Default: RFC1483\n");
			exit(1);
		}
		else if (strcmp(argv[i], "-d")==0)
		{
			run_as_daemon=TRUE;
		}
		else if (strcmp(argv[i], "-i")==0 && i+1<argc)
		{
			i++;
			_qdev=argv[i];
		}
		else if (strcmp(argv[i], "-r")==0 && i+1<argc)
		{
			i++;
			_tx_rate_cap=atoi(argv[i]);
		}
		else if (strcmp(argv[i], "-n")==0 && i+1<argc)
		{
			i++;
			_num_bands=atoi(argv[i]);
		}
		else if (strcmp(argv[i], "-m")==0 && i+1<argc)
		{
			i++;
			_start_fwmark=atoi(argv[i]);
		}
		else if (strcmp(argv[i], "-l")==0 && i+1<argc)
		{
			i++;
			_max_qlen=atoi(argv[i]);
		}
		else if (strcmp(argv[i], "-t")==0 && i+1<argc)
		{
			i++;
			if (strcasecmp(argv[i], "RFC1483")==0) transport=TRANSPORT_RFC1483;
			else if (strcasecmp(argv[i], "PPPoE")==0) transport=TRANSPORT_PPPoE;
			else
			{
				printf("Unrecognized transport: %s\n", argv[i]);
				exit(1);
			}
		}
		else
		{
			// unknown argument - exit w/ error
			printf("Unrecognized argument: %s\n", argv[i]);
			exit(1);
		}
	}

	// Check bounds
	if (_tx_rate_cap<=0)
	{
		printf("<caprate> must be a positive integer.\n");
		exit(1);
	}

	if (_num_bands<2 || _num_bands>50)
	{
		printf("<numbands> must be between 2 and 50.\n");
		exit(1);
	}

	if (_start_fwmark < 0 || _start_fwmark > 100)
	{
		printf("<fwmark> must be a positive integer less than or equal to 100\n");
		exit(1);
	}

	if (_max_qlen < _num_bands*2 || _max_qlen > 1000)
	{
		printf("<qlen> must be >= <numbands>*2 and <= 1000\n");
		exit(1);
	}

	// Allocate memory according to command line options
	tx_queue=malloc(sizeof(struct ipq_buff_list)*_num_bands);
	ipq_buff_pile=malloc(sizeof(struct ipq_buff)*_max_qlen);
	ipq_buff_recycle_list=malloc(sizeof(int)*_max_qlen);

  // Check permission
	if (getuid()!=0)
	{
		printf("You must be root to use this program.\n");
		exit(1);
	}

	// Show running options
	printf("Queue Parameters:\n");
	printf("  Device:          %s\n", _qdev);
	printf("  Rate:            %d\n", _tx_rate_cap);
	printf("  Bands:           %d\n", _num_bands);
	printf("  Initial fwmark:  %d\n", _start_fwmark);
	printf("  Queue length:    %d\n", _max_qlen);
	printf("  Transport:       %s\n", (transport==TRANSPORT_RFC1483)?"RFC1483":
			                              (transport==TRANSPORT_PPPoE)?"PPPoE":"Unknown");
	printf("\n");
	printf("Initialized %d-band tx queue using fwmarks %d to %d.\n", _num_bands, _start_fwmark, _start_fwmark+_num_bands-1);
	printf("TX rate cap: %d\n", _tx_rate_cap);
	printf("TX band minimum rate: %d\n", TX_MIN_BAND_RATE);

	// Register cleanup functions
	signal(SIGTERM, &sig_handler);
	signal(SIGINT, &sig_handler);
	signal(SIGSEGV, &sig_handler);
	signal(SIGALRM, &sig_handler);

	// Set program time limit (so if we screw something up we 
	// won't get locked out!!)
	if (DEBUG_TIME_LIMIT) alarm(DEBUG_TIME_LIMIT);

	setup();

	// launch as daemon, if that's how we're gonna do it
	if (run_as_daemon) daemon_init();

	syslog(LOG_INFO, "Queue started.");

	do
	{
		// Create buffer if necessary
		if (!ib)
		{
			ib=ipq_buff_get();
		}

		if (dequeue_tx_next.tv_sec==0 && dequeue_tx_next.tv_usec==0)
		{
			timeout=0;
		}
		else
		{
		  gettimeofday(&now, &null_tz);
	    if (tv_cmp(now, dequeue_tx_next) >= 0)
			{
				dequeue_tx();
				continue;
			}
			else
			{
				timeout=tv_usec(tv_sub(dequeue_tx_next, now));
			}
		}
  	status = ipq_read(_qh, ib->buf, BUFSIZE, timeout);
		if (status < 0) 
		{
			syslog(LOG_INFO, "Warning: mainloop: ipq_read() error: %s", ipq_errstr());
			syslog(LOG_INFO, "Resetting queue in an attempt to prevent crash...");

			cleanup();
			setup();

			syslog(LOG_INFO, "Queue has been reset.");

			continue;
		}

		if (status==0)
		{
			dequeue_tx();
			continue;
		}

		switch (ipq_message_type(ib->buf)) 
		{

		case NLMSG_ERROR:
			syslog(LOG_INFO, "Warning: Received ipq error message: %d\n",
				ipq_get_msgerr(ib->buf));
			break;
			
		case IPQM_PACKET:
		{
			ib->m = ipq_get_packet(ib->buf);

			if (process_pkt(ib))
			{
				// Packet should be immediately accepted
  			// Accept packet w/ any modifications we may have made.
  			status = ipq_set_verdict(_qh, ib->m->packet_id,
  				NF_ACCEPT, ib->m->data_len, ib->m->payload);
 			
  			if (status < 0)
				{
					syslog(LOG_INFO, "Warning: mainloop: ipq_set_verdict returned error.");
					continue;
				}

			}
			else
			{
				// Packet was queued or dropped.
				// Set ib to NULL so new memory will be allocated.
				ib=NULL; 
			}

			break;
    }
		default:
			fprintf(stderr, "Unknown message type!\n");
			break;
		}
	} while (1);

	syslog(LOG_INFO, "Error: Packet loop broken.");
	cleanup();
	return 0;
}

// debug_pm() - shows debugging information for an ipq message
void debug_pm(ipq_packet_msg_t *m)
{
	printf("debug_pm()\n");
	printf("  t=%u.%06u\n", m->timestamp_sec, m->timestamp_usec);
	printf("  in=%s\n", m->indev_name);
	printf("  out=%s\n", m->outdev_name);
	printf("  data_len=%d\n", m->data_len);
	printf("  ip=%s\n", is_ip(m) ? "yes" : "no");
	debug_ip(m);
  if (is_ip(m))
	{
		printf("  tcp=%s\n", is_tcp(m) ? "yes" : "no");
		if (is_tcp(m))
		{
						debug_tcp(m);
		}
	}
	printf("\n");
}

// returns true if IPv4
int is_ip(ipq_packet_msg_t *m)
{
	return (ntohs(m->hw_protocol)==0x0800 && m->data_len>=20 && (m->payload[0]>>4)==4);
}

// returns pointer to the IP payload
unsigned char* ip_data(ipq_packet_msg_t *m)
{
	return (m->payload+((m->payload[0] & 0x0F)*4));
}

// returns true if TCP
int is_tcp(ipq_packet_msg_t *m)
{
	return (is_ip(m) && m->payload[9]==6);
}

// returns pointer to the TCP payload
unsigned char* tcp_data(ipq_packet_msg_t *m)
{
	return NULL;
}

// shows debugging information fo IP packets
void debug_ip(ipq_packet_msg_t *m)
{
	if (!is_ip(m)) return;
	printf("    ver=%u\n", m->payload[0]>>4);
	printf("    ihl=%u(*4)\n", m->payload[0]&0x0F);
	printf("    tos=0x%02X\n", m->payload[1]);
	printf("    len=%u\n", ntohs(*(unsigned short int*)&m->payload[2]));
	printf("    idf=0x%04X\n", ntohs(*(unsigned short int*)&m->payload[4]));
	printf("    flg=0x%X\n", m->payload[6]&0xE0);
	printf("    fos=%hu\n", ntohs((*(unsigned short int*)&m->payload[6])&0x1F));
	printf("    ttl=%hhu\n", m->payload[8]);
	printf("    prt=%hhu\n", m->payload[9]);
	printf("    hcs=0x%04X\n", ntohs(*(unsigned short int*)&m->payload[10]));
	printf("    src=%hhu.%hhu.%hhu.%hhu\n", m->payload[12], m->payload[13], m->payload[14], m->payload[15]);
	printf("    dst=%hhu.%hhu.%hhu.%hhu\n", m->payload[16], m->payload[17], m->payload[18], m->payload[19]);
}

// shows debugging information for TCP packets
void debug_tcp(ipq_packet_msg_t *m)
{
	unsigned short int win;
	if (!is_tcp(m)) return;

  printf("      spt=%u\n", ntohs(*(unsigned short int*)&ip_data(m)[0]));
	printf("      dpt=%u\n", ntohs(*(unsigned short int*)&ip_data(m)[2]));
	printf("      sqn=%u\n", ntohl(*(unsigned long int*)&ip_data(m)[4]));
	printf("      ack=%u\n", ntohl(*(unsigned long int*)&ip_data(m)[8]));
	printf("      dof=%u(*4)\n", ip_data(m)[12]>>4);
  printf("      flg=0x%02X\n", ip_data(m)[13]);
  printf("      win=%u\n", ntohs(*(unsigned short int*)&ip_data(m)[14]));
	printf("      chk=0x%04X\n", ntohs(*(unsigned short int*)&ip_data(m)[16]));
	printf("      urg=0x%04X\n", ntohs(*(unsigned short int*)&ip_data(m)[18]));
}

// changes the TCP advertized window and recalculates
// TCP checksum
void set_tcp_win(ipq_packet_msg_t *m, unsigned short int win)
{
	long int cs;

	if (!is_tcp(m)) return;

	// Recalculate the checksum
	cs=get_tcp_csum(m);
	cs=~cs & 0xffff;
	cs-=get_tcp_win(m);
	if (cs<=0)
	{
		cs--;
		cs&=0xffff;
 	}

  cs+=win & 0xffff;
	if (cs & 0x10000)
	{
		cs++;
		cs&=0xffff;
	}
	cs=~cs&0xffff;
	set_tcp_csum(m, cs);

        // Apply new window size to packet	
	(*(unsigned short int*)&ip_data(m)[14])=htons(win);
}

// returns the TCP advertized window size
unsigned short int get_tcp_win(ipq_packet_msg_t *m)
{
	if (!is_tcp(m)) return 0;
  return ntohs(*(unsigned short int*)&ip_data(m)[14]);
}

// sets the TCP checksum
void set_tcp_csum(ipq_packet_msg_t *m, unsigned short int csum)
{
  (*(unsigned short int*)&ip_data(m)[16])=htons(csum); 
}

// returns the current TCP checksum
unsigned short int get_tcp_csum(ipq_packet_msg_t *m)
{
	if (!is_tcp(m)) return 0;
	return ntohs(*(unsigned short int*)&ip_data(m)[16]);
}

// generic signal handler
//
// Performs cleanup routines when terminated with kill, aborted
// by user with ctrl-c, when the debug alarm expires,
// or during a program crash (seg fault).
void sig_handler(int sig)
{
	printf("Received signal %d...\n", sig);
	syslog(LOG_INFO,"Received signal %d.", sig);
	signal(sig, SIG_DFL); // unhook signal to avoid endless loops
	cleanup();
	exit(1);
}

// hooks iptables, sets up queue
void setup()
{
	struct timeval now;
	int status;
  unsigned int i;
	int rval;
	
  syslog(LOG_INFO, "setup()");
	
  rval=system("modprobe ip_queue");
	if (WEXITSTATUS(rval)!=0)
	{
		printf("Unable to install ip_queue module.\n");
		exit(1);
	}
	
	hook_iptables(_qdev);

	_qh = ipq_create_handle(0, PF_INET);
	if (!_qh) 
	{
		printf("Unable to create ipq handle.\n");
		ipq_die();
	}
	
	status = ipq_set_mode(_qh, IPQ_COPY_PACKET, BUFSIZE);
	if (status < 0)
	{
		printf("Unable to set ipq mode.\n");
		ipq_die();
	}

  // Clear null timezone and initialize random seed
	memset (&null_tz, 0, sizeof (null_tz));
  gettimeofday (&now, &null_tz);
	srand(now.tv_usec);

	// initialize buffer pile
	for (i=0; i<_max_qlen; i++) ipq_buff_recycle_list[i]=i;

	// initialize tx queues
  for (i=0; i<NUM_BANDS; i++)
	{
		tx_queue[i].head=NULL;
		tx_queue[i].tail=NULL;
		tx_queue[i].len=0;
    tx_queue[i].next_minrate_dequeue.tv_sec=0;
		tx_queue[i].next_minrate_dequeue.tv_usec=0;
	}	

	tx_qlen=0;
	dequeue_tx_next.tv_sec=0;
	dequeue_tx_next.tv_usec=0;

	ipq_buff_get_next=0;
	ipq_buff_release_next=0;

	ib=NULL;
}

// removes QUEUE hooks from iptables, destroys ipq handle
void cleanup()
{
	syslog(LOG_INFO, "cleanup()");
	// release handle
	if (_qh) ipq_destroy_handle(_qh);

	// clean up iptables
	if (_qdev) unhook_iptables(_qdev);
}

// add iptables QUEUE targets
int hook_iptables(char *dev)
{
	int rval;
	char command[BUFSIZE];

	printf("Installing QUEUE targets for device %s in iptables...\n", dev);

	// install iptables rules
	snprintf(command, BUFSIZE, "iptables -t mangle -A PREROUTING %s -i %s -j QUEUE", IPT_OPTS, dev);
	rval=system(command);
	if (WEXITSTATUS(rval)!=0)
	{
		printf("Unable to install queue target in iptables.\n");
		return 0;
	}

	snprintf(command, BUFSIZE, "iptables -t mangle -A POSTROUTING %s -o %s -j QUEUE", IPT_OPTS, dev);
	rval=system(command);
	if (WEXITSTATUS(rval)!=0)
	{
		printf("Unable to install queue target in iptables.\n");
		return 0;
	}

	return -1;
}

// remove iptables QUEUE targets
int unhook_iptables(char *dev)
{
	int rval;
	char command[BUFSIZE];

	printf("Removing QUEUE targets for device %s in iptables...\n", dev);

	// install iptables rules
	snprintf(command, BUFSIZE, "iptables -t mangle -D PREROUTING %s -i %s -j QUEUE", IPT_OPTS, dev);
	rval=system(command);
	if (WEXITSTATUS(rval)!=0)
	{
		printf("Unable to remove queue targets in iptables.\n");
		return 0;
	}

	snprintf(command, BUFSIZE, "iptables -t mangle -D POSTROUTING %s -o %s -j QUEUE", IPT_OPTS, dev);
	rval=system(command);
	if (WEXITSTATUS(rval)!=0)
	{
		printf("Unable to remove queue targets in iptables.\n");
		return 0;
	}

	return -1;
}

// adds two timeval structures
struct timeval tv_add(struct timeval a, struct timeval b)
{
	struct timeval c;
	c.tv_usec=a.tv_usec+b.tv_usec;
	if (c.tv_usec>=1000000)
	{
			c.tv_usec-=1000000;
		c.tv_sec=a.tv_sec+b.tv_sec+1;
	}
	else
	{
		c.tv_sec=a.tv_sec+b.tv_sec;
	}

	return c;
}

// struct timeval subtraction
struct timeval tv_sub(struct timeval a, struct timeval b)
{
	struct timeval c;
	c.tv_usec=a.tv_usec-b.tv_usec;
	if (c.tv_usec<0)
	{
		c.tv_usec+=1000000;
		c.tv_sec=a.tv_sec-b.tv_sec-1;
	}
	else
	{
		c.tv_sec=a.tv_sec-b.tv_sec;
	}
	return c;
}

// struct timeval comparison
int tv_cmp(struct timeval a, struct timeval b)
{
	if (a.tv_sec>b.tv_sec) return 1;
	if (a.tv_sec<b.tv_sec) return -1;
	if (a.tv_usec>b.tv_usec) return 1;
	if (a.tv_usec<b.tv_usec) return -1;
	return 0;
}

// process_pkt()
//
// Packet dispatcher.  This function returns true if the
// packet should be imediately accepted, or false if
// the packet was queued.
int process_pkt(struct ipq_buff *ib)
{
	ipq_packet_msg_t *m=ib->m;

	if (!is_ip(m)) return -1;
	if (m->indev_name[0]==0 && m->outdev_name[0]!=0)
	{
		// TX packet
		return process_tx_pkt(ib);
	}
	else if (m->indev_name[0]!=0 && m->outdev_name[0]==0)
	{
		// RX packet
		return process_rx_pkt(ib);
	}

	// ...?
	printf("Unable to determine packet direction for:\n");
	debug_pm(m);
	return -1;
}


// process_tx_pkt()
//
// Here we process all outbound packets.  Most likely
// here we are taking outbound packets 
int process_tx_pkt(struct ipq_buff *ib)
{
	total_tx_bytes+=dsl_len(ib->m->data_len);
	total_tx_pkts++;

  
  // Mangle Packet Here
	




  // send/queue packet for transmission
  enqueue_tx(ib);	
	
	return 0;
}

int process_rx_pkt(struct ipq_buff *ib)
{
  total_rx_bytes+=dsl_len(ib->m->data_len);
	total_rx_pkts++;
	return -1;
}

// dsl_len()
// 
// This function returns the bytes of bandwidth used on the
// DSL line.
// 
// This function is set to return the bandwidth used on a
// ADSL modem ethernet bridge which uses LLC/SNAP encapsulation
// with no FCS.
// 
unsigned short int dsl_len(unsigned short int ip_pkt_len)
{
  // faster calculation of packet length here.  You may have
	// to adjust to match your transport/encapsulation.
	
  if (transport == TRANSPORT_RFC1483)
  {
  	// RFC 1483 Bridged Ethernet, LLC/SNAP Method, MAC FCS not preserved. (Default bridged ethernet)
	  return (((((3+3+2+2)+(6+6+2)+((ip_pkt_len<46)?46:ip_pkt_len)+(1+1+2+4))-1)/48)+1)*53;
  }
	else if (transport == TRANSPORT_PPPoE)
	{
  	// PPPoE - (oE = RFC 1483 Bridged Ethernet, LLC/SNAP Method, MAC FCS not preserved.)
  	// This is the same as the calculation above except for two additional layers, PPP and PPPoE.
  	// The PPP layer adds 2 bytes of overhead per IP packet, and the PPPoE layer adds 6 bytes.
  	return (((((3+3+2+2)+(6+6+2)+(((ip_pkt_len+2+6)<46)?46:(ip_pkt_len+2+6))+(1+1+2+4))-1)/48)+1)*53;
	}
	else
	{
		// Don't calculate anything extra.
		return ip_pkt_len;
	}

	// long version explained here:
/*	
	unsigned short int enet_pld_len;
  unsigned short int enet_pkt_len;
	unsigned short int llc_pld_len;
	unsigned short int llc_pkt_len;
	unsigned short int aal5_pkt_len;
	unsigned short int num_atm_pkts;


	// OPTIONAL - for PPPoE
	// PPPoE means that the IP packet has a PPP header plus a PPPoE header added,
	// which adds 2 + 6 bytes before encapsulation into Ethernet packets.
	  
	ip_pkt_len = ip_pkt_len + 2 + 6;

	// END OPTIONAL for PPPoE

	// Ethernet payload
	// - same as total IP packet length, with minimum of 46 bytes.
	enet_pld_len=(ip_pkt_len<46)?46:ip_pkt_len;

	// Ethernet packet
	// - dst[6] + src[6] + type[2] + pld[n] + fcs[4]
	enet_pkt_len=6+6+2+enet_pld_len+4;

	// LLC payload
	// - same as enet_pkt_len minus the 4 byte fcs.
	llc_pld_len=enet_pkt_len-4;

	// LLC packet
	// - llc(3) + oui(3) + pid(2) + pad(2) + llc_pld_len
	llc_pkt_len=3+3+2+2+llc_pld_len;

  // ATM Application Layer 5 packet
	// - llc_pkt_len + (pad) + cpcs_uu(1) + cpi(1) + len(2) + crc(4)
	aal5_pkt_len=llc_pkt_len+1+1+2+4;

	// AAL5 frames are padded (pad) such that their length is
	// evenly divisible by 48 bytes (ATM payload size).
	// However, this is just to right-justify the AAL5 trailer in the
	// last ATM frame generated for the packet.  We can find the
	// number of ATM cells required by finding out how many 48-byte
	// buckets are required to send aal5_pkt_len bytes.

	num_atm_pkts=((aal5_pkt_len-1)/48)+1;

	return num_atm_pkts*53;
*/
}

// tv_txtime()
//
// returns a timeval with the duration required to send the
// specified amount of bytes at a specified bytes per second rate
struct timeval tv_txtime(unsigned short int bytes, unsigned int bytes_per_sec)
{
	struct timeval r;
	long long int us;
	us=(long long)bytes*1000000/bytes_per_sec;
	r.tv_sec=us/1000000;
	r.tv_usec=us%1000000;
	return r;
}

long int tv_usec(struct timeval t)
{
	return t.tv_usec+(1000000*t.tv_sec);
}

void dequeue_tx()
{
	struct ipq_buff *ib;
	int status;
	int band, sband;
	int ts;
	struct timeval minrate_tv;

	if (tx_qlen==0)
	{
		// Last packet transmitted and no more packets to transmit --> IDLE
	  dequeue_tx_next.tv_sec=0;
		dequeue_tx_next.tv_usec=0;
	}
	else
	{
		// Find a packet to dequeue
		
	  // Pick priority band	
    for (band=0; band<_num_bands; band++) if (tx_queue[band].len) break;


		// Override priority band if there is a band starving
		for (sband=0; sband<_num_bands; sband++)
		{
			if (sband!=band && 
					tx_queue[sband].len && 
					tv_cmp(dequeue_tx_next,tx_queue[sband].next_minrate_dequeue)>=0)
			{
        band=sband;
				break;
			}
		}

 		ib=ibl_dequeue(&tx_queue[band]);

		// NF_ACCEPT packet
    status = ipq_set_verdict(_qh, ib->m->packet_id, NF_ACCEPT, ib->m->data_len, ib->m->payload);
    if (status < 0) 
		{
			syslog(LOG_INFO, "Warning: dequeue_tx: ipq_set_verdict returned error.");
		}

		// Update next packet dequeue time
    dequeue_tx_next=tv_add(dequeue_tx_next, tv_txtime(dsl_len(ib->m->data_len), _tx_rate_cap));

	  // set next minrate dequeue time
		if (ib->m->mark >= _start_fwmark && ib->m->mark < _start_fwmark+_num_bands)
		{
			tx_queue[ib->m->mark-_start_fwmark].next_minrate_dequeue=tv_add(dequeue_tx_next, 
		                       tv_txtime(dsl_len(ib->m->data_len), TX_MIN_BAND_RATE));
		}
		else
		{
      tx_queue[_num_bands-1].next_minrate_dequeue=tv_add(dequeue_tx_next, 
                           tv_txtime(dsl_len(ib->m->data_len), TX_MIN_BAND_RATE));
		}

		ipq_buff_release(ib);

  	tx_qlen--;
	}
}

void enqueue_tx(struct ipq_buff *ib)
{
	struct timeval now;
	int status;


	gettimeofday(&now, &null_tz);
	
	if (dequeue_tx_next.tv_sec==0 && dequeue_tx_next.tv_usec==0)
	{
		// Send packet w/o queue, update dequeue_tx_next

	  // NF_ACCEPT packet	
 		status = ipq_set_verdict(_qh, ib->m->packet_id, NF_ACCEPT, ib->m->data_len, ib->m->payload);
		if (status < 0) 
		{
			syslog(LOG_INFO, "Warning: enqueue_tx: ipq_set_verdict returned error.");
		}

		// schedule next allowed packet dequeue
		dequeue_tx_next=tv_add(now, tv_txtime(dsl_len(ib->m->data_len), _tx_rate_cap));

	  // set next minrate dequeue time
		if (ib->m->mark >= _start_fwmark && ib->m->mark < _start_fwmark+_num_bands)
		{
			tx_queue[ib->m->mark-_start_fwmark].next_minrate_dequeue=tv_add(now, 
		                       tv_txtime(dsl_len(ib->m->data_len), TX_MIN_BAND_RATE));
		}
		else
		{
      tx_queue[_num_bands-1].next_minrate_dequeue=tv_add(now, 
                           tv_txtime(dsl_len(ib->m->data_len), TX_MIN_BAND_RATE));
		}

		ipq_buff_release(ib);
	}
	else
	{
		// Queue packet
		
		if (ib->m->mark >= _start_fwmark && ib->m->mark < _start_fwmark+_num_bands)
		{
			ibl_enqueue(&tx_queue[ib->m->mark-_start_fwmark], ib);
		}
		else
		{
			ibl_enqueue(&tx_queue[_num_bands-1], ib);
		}

		tx_qlen++;

		if (tx_qlen>=_max_qlen)
		{
			// Drop packet
			int band;

			for (band=_num_bands-1; band>=0; band--)
			{
				// Only drop from bands with more than their 'fair share' of packets
				if (tx_queue[band].len >= (_max_qlen/_num_bands))
				{
					if (ibl_drop(&tx_queue[band]))
			  	{
			  		tx_qlen--;
			  		break;
				  }	
				}
			}
		}
	}
}

void ipq_die()
{
	ipq_perror("dsl_qos_queue");
	syslog(LOG_INFO, "Error: ipq died.");
	cleanup();
	exit(1);
}


void ibl_enqueue(struct ipq_buff_list *ibl, struct ipq_buff *ib)
{
	if (ibl->tail)
	{
		// Add packet to tail of list
		ibl->tail->next=ib;
		ib->prev=ibl->tail;
		ib->next=NULL;
		ibl->tail=ib;
		ibl->len++;
	}
	else
	{
		// start list
		ibl->head=ibl->tail=ib;
		ib->prev=NULL;
		ib->next=NULL;
		ibl->len++;
	}
}

struct ipq_buff *ibl_dequeue(struct ipq_buff_list *ibl)
{
	struct ipq_buff *ib;


	if (ibl->head)
	{

		ibl->len--;


		ib=ibl->head;

		ibl->head=ib->next;

		if (ibl->head)
		{
			ibl->head->prev=NULL;
		}
		else
		{
			ibl->tail=NULL;
		}

		ib->next=NULL;
		return ib;
	}
	else
	{
	  return NULL;
	}
}

int ibl_drop(struct ipq_buff_list *ibl)
{
	int status;

 
	
	if (ibl->tail)
	{
		struct ipq_buff *ib=ibl->tail;
		ibl->len--;
		ibl->tail=ibl->tail->prev;
		if (ibl->tail) ibl->tail->next=NULL;
		else ibl->head=NULL;

		// tell ip_queue we dropped this packet
 		status = ipq_set_verdict(_qh, ib->m->packet_id, NF_DROP, ib->m->data_len, ib->m->payload);
		if (status < 0)
		{
			syslog(LOG_INFO,"Warning: ibl_drop: ipq_set_verdict returned error.");
		}	

		ipq_buff_release(ib);
		return TRUE;
	}
	else return FALSE;
}

struct ipq_buff *ipq_buff_get()
{
	unsigned int x;


	x=ipq_buff_recycle_list[ipq_buff_get_next];
	if (x==-1)
	{
		syslog(LOG_INFO, "Error: Ran out of buffers!");
		cleanup();
		exit(2);
	}
	ipq_buff_recycle_list[ipq_buff_get_next]=-1;
  ipq_buff_get_next=(ipq_buff_get_next+1)%_max_qlen;
	
	ipq_buff_pile[x].id=x;

	ipq_buff_pile[x].next=NULL;
	ipq_buff_pile[x].prev=NULL;
	ipq_buff_pile[x].m=NULL;

	return &ipq_buff_pile[x];
}

void ipq_buff_release(struct ipq_buff *ib)
{
	if (ipq_buff_recycle_list[ipq_buff_release_next]!=-1)
	{
		printf("Released non-issued buffer.\n");
		syslog(LOG_INFO, "Error: Released non-issued buffer.\n");
		cleanup();
		exit(2);
	}

	ipq_buff_recycle_list[ipq_buff_release_next]=ib->id;

	ipq_buff_release_next=(ipq_buff_release_next+1)%_max_qlen;
}

