/*
 *	Copyright (c) 1994 The CAD lab of the
 *	Novosibirsk Institute of Broadcasting and Telecommunication
 *
 *	TNSDrive $Id$
 *
 *	$Log$
 *
 * Redistribution and use in source forms, with and without modification,
 * are permitted provided that this entire comment appears intact.
 *
 * THIS SOURCE CODE IS PROVIDED ``AS IS'' WITHOUT ANY WARRANTIES OF ANY KIND.
 */

#include <stdio.h>
#include <ctype.h>
#include <signal.h>
#include <string.h>
#include "drive.h"
#include "usenet.h"

#define	BIGLEN	2048

extern char *hash_str();

char index_file[LEN];
char *newssort;

static long last_read_article;

void
set_article(art)
	struct header *art;
{	
	art->subject = NULL;
	art->from = NULL;
	art->date = 0L;
	art->unread = 1;
	art->inthread = FALSE;
}

/*
 *  Construct the pointers to the basenotes of each thread
 *  arts[] contains every article in the group.  inthread is
 *  set on each article that is after the first article in the
 *  thread.  Articles which have been expired have their thread
 *  set to -2.
 */

find_base() {
	int i;

	top_base = 0;

	for (i = 0; i < top; i++)
		if (!arts[i].inthread && arts[i].thread != -2)
		{
			if (top_base >= max_art)
				expand_art();
			base[top_base++] = i;
		}
}


/* 
 *  Count the number of non-expired articles in arts[]
 */

num_arts() {
	int sum = 0;

	int i;

	for (i = 0; i < top; i++)
		if (arts[i].thread != -2)
			sum++;

	return sum;
}


/*
 *  Do we have an entry for article art?
 */

valid_artnum(art)
long art;
{
	int i;

	for (i = 0; i < top; i++)
		if (arts[i].artnum == art)
			return i;

	return -1;
}


/*
 *  Return TRUE if arts[] contains any expired articles
 *  (articles we have an entry for which don't have a
 *  corresponding article file in the spool directory)
 */
purge_needed()
{
	register i;

	for (i = 0; i < top; i++)
		if (arts[i].thread == -2) return TRUE;

	return FALSE;
}


/*
 *  Main group indexing routine.  Group should be the name of the
 *  newsgroup, i.e. "comp.unix.amiga".  group_path should be the
 *  same but with the .'s turned into /'s: "comp/unix/amiga"
 *
 *  Will read any existing index, create or incrementally update
 *  the index by looking at the articles in the spool directory,
 *  and attempt to write a new index if necessary.
 */
index_group(group, group_path)
	char *group;
	char *group_path;
{
	putstr("Group %s... ", group);
	hash_reclaim();
	find_local_index(group);
	if (!read_xover_file(group, group_path))
		load_index();
	if (read_group(group, group_path) || purge_needed())
		dump_index(group);
	make_threads();
	find_base();
}


/*
 *  Index a group.  Assumes any existing index has already been
 *  loaded.
 */
read_group(group, group_path)
	char *group;
	char *group_path;
{
	register i;
	FILE *fp;
	long art;
	int count = 0, total = 0;
	int modified = FALSE;
	int respnum;
	extern FILE *open_header_fp();

	setup_base(group, group_path);	  /* load article numbers into base[] */

	/* Count number of articles to index */
	for (i = 0; i < top_base; i++)
		if (base[i] > last_read_article && valid_artnum(base[i]) < 0)
			total++;

	for (i = 0; i < top_base; i++) {	/* for each article # */
		art = base[i];

		/*
		 * Do we already have this article in our index?
		 * Change thread from -2 to -1 if so and skip the header eating.
		 */
		if ((respnum = valid_artnum(art)) >= 0 || art <= last_read_article) {
			if (respnum >= 0) {
				arts[respnum].thread = -1;
				arts[respnum].unread = 1;
			}
			continue;
		}

		if (!modified)
			modified = TRUE;   /* we've modified the index */
					   /* it will need to be re-written */

		if ((fp = open_header_fp(group_path, art)) == NULL) continue;

		/*
		 * Add article to arts[]
		 */
		if (top >= max_art) expand_art();

		arts[top].artnum = art;
		arts[top].thread = -1;
		set_article(&arts[top]);

		if (!parse_headers(fp, &arts[top])) {
			fclose(fp);
			continue;
		}
		fclose(fp);
		last_read_article = arts[top].artnum;
		top++;

		if (++count % 10 == 0) {
			if (count == 10) putstr("(%d) %-4d", total, count);
			else putstr("\b\b\b\b%-4d", count);
		}
	}
	putchr('\n');
	return modified;
}


#define	int_cmp(a, b)		((a > b) ? 1 : ((a == b) ? 0 : -1))

artnum_comp(a1, a2)
	register struct header *a1, *a2;
{
	return (int_cmp(a1->artnum, a2->artnum));
}

/*
 * Compare news (c) by unknown hero Uwka
 */
int
comparenews(a1, a2)
	register struct header *a1, *a2;
{
	static int k[] = { 8, 4, 2, 1 };
	register i, sum = 0, sign;

	if (newssort == NULL) return sum;

	for (i = 0; newssort[i] && i < 4; i++) {
		sign = 1;
		switch (newssort[i]) {
			case 'a': sign = -1;
			case 'A': sum += sign * k[i] * int_cmp(a1->artnum, a2->artnum);
				break;
			case 'd': sign = -1;
			case 'D': sum += sign * k[i] * int_cmp(a1->date, a2->date);
				break;
			case 'f': sign = -1;
			case 'F': sum += sign * k[i] * strcomp(a1->from, a2->from);
				break;
			case 's': sign = -1;
			case 'S': sum += sign * k[i] * strcomp(a1->subject, a2->subject);
				break;
		}
	}
	return sum;
}

#undef	int_cmp

/*
 *  Go through the articles in arts[] and use .thread to snake threads
 *  through them.  Use the subject line to construct threads.  The
 *  first article in a thread should have .inthread set to FALSE, the
 *  rest TRUE.  Only do unexpired articles we haven't visited yet
 *  (arts[].thread == -1).
 */

make_threads()
{
	int i;
	int j;

	qsort(arts, top, sizeof(struct header), comparenews);

	for (i = 0; i < top; i++)
	{
		if (arts[i].thread == -1)
		    for (j = i+1; j < top; j++)
			if (arts[j].thread == -1
			&&  arts[i].subject == arts[j].subject)
			{
				arts[i].thread = j;
				arts[j].inthread = TRUE;
				break;
			}
	}
}


/*
 *  Return a pointer into s eliminating any leading Re:'s.  Example:
 *
 *	  Re: Reorganization of misc.jobs
 *	  ^   ^
 */

char *
eat_re(s)
char *s;
{

	while (*s == ' ')
		s++;

	while (*s == 'r' || *s == 'R') {
		if ((*(s+1) == 'e' || *(s+1) == 'E')) {
			if (*(s+2) == ':')
				s += 3;
			else if (*(s+2) == '^' && isdigit(*(s+3)) && *(s+4) == ':')
				s += 5;			/* hurray nn */
			else
				break;
		} else
			break;
		while (*s == ' ')
			s++;
	}
	return s;
}

parse_headers(fp, h)
	FILE *fp;
	struct header *h;
{
	char buf[1024];
	char *p, *q;
	char flag;
	int n;
	char buf2[1024];
	char *s;

	n = read(fileno(fp), buf, 1024);
	if (n <= 0)
		return FALSE;

	buf[n - 1] = '\0';

	h->subject = "";
	h->from = "";
	h->date = 0;

	p = buf;
	while (1) {
		for (q = p; *p && *p != '\n'; p++)
			if (((*p) & 0x7F) < 32)
				*p = ' ';
		flag = *p;
		*p++ = '\0';

		if (strncmp(q, "From: ", 6) == 0) {
			strncpy(buf2, &q[6], LEN-1);
			buf2[LEN-1] = '\0';
			h->from = hash_str(buf2);
		} else if (strncmp(q, "Subject: ", 9) == 0) {
			strncpy(buf2, &q[9], LEN-1);
			buf2[LEN-1] = '\0';
			s = eat_re(buf2);
			h->subject = hash_str(eat_re(s));
		} else if (strncmp(q, "Date: ", 6) == 0) {
			strncpy(buf2, &q[6], LEN-1);
			buf2[LEN-1] = '\0';
			h->date = parsedate(buf2, NULL);
		}
		if (!flag || *p == '\n') break;
	}
	return TRUE;
}


/* 
 *  Write out a .tindx file.  Write the group name first so if
 *  local indexing is done we can disambiguate between group name
 *  hash collisions by looking at the index file.
 */

dump_index(group)
char *group;
{
	int i;
	char nam[LEN];
	FILE *fp;
	int *iptr;
	int realnum;

	sprintf(nam, "%s.%d", index_file, (int)getpid());
	fp = sfopen(nam, "w");

	if (fp == NULL)
		return;

	fprintf(fp, "%s\n", group);
	fprintf(fp, "%d\n", num_arts());

	realnum = 0;
	for (i = 0; i < top; i++)
		if (arts[i].thread != -2) {
			fprintf(fp, "%ld\n", arts[i].artnum);

			iptr = (int *) arts[i].subject;
			iptr--;

			if (arts[i].subject[0] == '\0')
				fprintf(fp, " %s\n", arts[i].subject);
			else if (*iptr < 0) {
				fprintf(fp, " %s\n", arts[i].subject);
				*iptr = realnum;
			} else	fprintf(fp, "%%%d\n", *iptr);

			iptr = (int *) arts[i].from;
			iptr--;

			if (arts[i].from[0] == '\0')
				fprintf(fp, " %s\n", arts[i].from);
			else if (*iptr < 0) {
				fprintf(fp, " %s\n", arts[i].from);
				*iptr = realnum;
			} else	fprintf(fp, "%%%d\n", *iptr);

			fprintf(fp, "%ld\n", arts[i].date);

			realnum++;
		}
	sfclose(fp);
	chmod(nam, 0640);
	unlink(index_file);
	link(nam, index_file);
	unlink(nam);
}

/*
 *  Read in a .index file.
 */
load_index()
{
	int i;
	char buf[BIGLEN];
	FILE *fp;
	char *p;
	int n;
	char *errorstr;

	top = 0;
	last_read_article = 0L;

	fp = sfopen(index_file, "r");
	if (fp == NULL)
		return;

	if (fgets(buf, sizeof(buf), fp) == NULL || fgets(buf, sizeof(buf), fp) == NULL) {
		errorstr = "one";
		goto corrupt_index;
	}
	buf[sizeof(buf)-1] = '\0';
	i = atol(buf);
	while (top < i) {
		if (top >= max_art)
			expand_art();

		arts[top].thread = -2;
		arts[top].inthread = FALSE;

		if (fgets(buf, sizeof(buf), fp) == NULL) {
			errorstr = "two";
			goto corrupt_index;
		}
		buf[sizeof(buf)-1] = '\0';
		arts[top].artnum = atol(buf);

		if (fgets(buf, sizeof(buf), fp) == NULL) {
			errorstr = "three";
			goto corrupt_index;
		}
		buf[sizeof(buf)-1] = '\0';
		if (buf[0] == '%') {
			n = atoi(&buf[1]);
			if (n >= top || n < 0) {
				errorstr = "eight";
				goto corrupt_index;
			}
			arts[top].subject = arts[n].subject;
		} else if (buf[0] == ' ') {
			for (p = &buf[1]; *p && *p != '\n'; p++) ;
			*p = '\0';
			buf[sizeof(buf)-1] = '\0';
			arts[top].subject = hash_str(&buf[1]);
		} else {
			errorstr = "six";
			goto corrupt_index;
		}
				
		if (fgets(buf, sizeof(buf), fp) == NULL) {
			errorstr = "four";
			goto corrupt_index;
		}
		buf[sizeof(buf)-1] = '\0';
		if (buf[0] == '%') {
			n = atoi(&buf[1]);
			if (n >= top || n < 0) {
				errorstr = "nine";
				goto corrupt_index;
			}
			arts[top].from = arts[n].from;
		} else if (buf[0] == ' ') {
			for (p = &buf[1]; *p && *p != '\n'; p++) ;
			*p = '\0';
			buf[sizeof(buf)-1] = '\0';
			arts[top].from = hash_str(&buf[1]);
		} else {
			errorstr = "seven";
			goto corrupt_index;
		}

		if (fgets(buf, sizeof(buf), fp) == NULL) {
			errorstr = "eight";
			goto corrupt_index;
		}
		buf[sizeof(buf)-1] = '\0';
		arts[top].date = atol(buf);

		top++;
	}

	sfclose(fp);
	return;

corrupt_index:
	LOGIT(LOG_ERR, "%s: index file %s corrupt, top=%d",
	    errorstr, index_file, top);
	unlink(index_file);
	top = 0;
}


/*
 *  Look in the local $HOME/.index (or wherever) directory for the
 *  index file for the given group.  Hashing the group name gets
 *  a number.  See if that #.1 file exists; if so, read first line.
 *  Group we want?  If no, try #.2.  Repeat until no such file or
 *  we find an existing file that matches our group.
 */
find_local_index(group)
	char *group;
{
	unsigned long h;
	char buf[LEN];
	int i, len;
	FILE *fp;

	h = hash_groupname(group);
	len = strlen(group);
	i = 1;
	while (1) {
		sprintf(index_file, "%s/%lu.%d", indexdir, h, i);
		if ((fp = sfopen(index_file, "r")) == NULL) return;
		if (fgets(buf, LEN, fp) == NULL) {
			sfclose(fp);
			return;
		}
		buf[sizeof(buf)-1] = '\0';
		sfclose(fp);
		if (!strncmp(buf, group, len)) return;
		i++;
	}
}


/*
 *  Read in an XOVER index file. Fields are separated by TAB.
 *
 *  index file record
 *    1.  article number (ie. 183)               [mandatory]
 *    2.  Subject: line  (ie. Which newsreader?) [mandatory]
 *    3.  From: line     (ie. iain@norisc)       [mandatory]
 *    4.  Date: line     (ie. rfc822 format)     [mandatory]
 */
int
read_xover_file(group_name, group_path)
	char *group_name, *group_path;
{
 	char	*p, *q;
	char	buf[1024];
/* 	char	art_full_name[LEN];
	char	art_from_addr[LEN];
*/	FILE	*fp;
	extern FILE *open_xover_fp();
	 
	top = 0;
	last_read_article = 0L;
 
	setup_base(group_name, group_path);
 
	/* 
	 * setup the overview file (whether it be local or via nntp)
	 */
	if ((fp = open_xover_fp(group_name, group_path,
				base[0], base[top_base-1])) == NULL)
		return FALSE;
 
	while (fgets(buf, sizeof(buf), fp) != NULL) {
		buf[sizeof(buf)-1] = '\0';
		if (!strcmp(buf, ".")) break;
 		if (top >= max_art) expand_art();
		arts[top].thread = -2;
		set_article(&arts[top]);
		p = buf;
 
		/* 
		 * read the article number 
		 */
		last_read_article = arts[top].artnum = atol(p);
		if ((q = strchr(p, '\t')) == NULL) continue;
		p = q + 1;
 
		/* 
		 * read the subject 
		 */
		if ((q = strchr(p, '\t')) == NULL) continue;
		*q = '\0';
		arts[top].subject = hash_str(eat_re(eat_re(p)));
		p = q + 1;
 
		/* 
		 * read the author 
		 */
		if ((q = strchr(p, '\t')) == NULL) continue;
		*q = '\0';
		arts[top].from = hash_str(p);
/*		parsefrom(p, art_from_addr, art_full_name);
		arts[top].from = hash_str(art_from_addr);
		if (art_full_name[0]) arts[top].name = hash_str(art_full_name);
*/
		p = q + 1;
 
		/* 
		 * read the article date 
		 */
		if ((q = strchr(p, '\t')) == NULL) continue;
		*q = '\0';
		arts[top].date = parsedate(p, NULL);
		p = q + 1;
 
		/* 
		 * read past the article messageid 
		 */
		if ((q = strchr(p, '\t')) == NULL) continue;
		p = q + 1;
 
		/* 
		 * read past the article references 
		 */
		if ((q = strchr(p, '\t')) == NULL) continue;
		p = q + 1;
 
		/* 
		 * read past the article bytes 
		 */
		if ((q = strchr(p, '\t')) == NULL) continue;
		*q = '\0';
		p = q + 1;
 
		/* 
		 * read past the article lines 
		 */
 
		/* 
		 * end of overview line processing 
		 */
		top++;
	}
	fclose(fp);
 	return TRUE;
}
