/*****************************************************************************

	unsort - reorder files semi-randomly
	Copyright (C) 2007, 2008  Wessel Dankers <wsl@fruit.je>

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <http://www.gnu.org/licenses/>.

*****************************************************************************/

#include <stdbool.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <getopt.h>
#include <limits.h>
#include <fcntl.h>
#include <sys/uio.h>

#include "error.h"
#include "filebuf.h"
#include "iovec.h"
#include "shuffle.h"
#include "lsort.h"
#include "merge.h"
#include "msort.h"
#include "unfind.h"
#include "mt19937ar.h"
#include "mt19937ar_init.h"

#ifndef O_LARGEFILE
#define O_LARGEFILE 0
#endif

#ifndef VERSION
#define VERSION "git"
#endif

static const struct option long_options[] = {
	{"help\0               Print this message to stdout", 0, 0, 'h'},
	{"version\0            Print the program version", 0, 0, 'v'},
	{"random\0             Use a random permutation", 0, 0, 'r'},
	{"filenames\0[=<sub>]  Permute as if the input were filenames", 2, 0, 'f'},
	{"separator\0 <sep>    Filename separator for -f", 1, 0, 'S'},
	{"heuristic\0          Use a heuristic permutation (default)", 0, 0, 'p'},
	{"identity\0           Do not change the order of lines", 0, 0, 'n'},
	{"concatenate\0        Concatenate input before shuffling", 0, 0, 'c'},
	{"merge\0              Merge input after shuffling in given order", 0, 0, 'm'},
	{"merge-random\0       Merge input after shuffling (default)", 0, 0, 'M'},
	{"seed\0 <integer>     Seed the permutation", 1, 0, 's'},
	{"zero-terminated\0    Use \\0 line endings", 0, 0, 'z'},
	{"null\0               Use \\0 line endings", 0, 0, '0'},
	{"linefeed\0           Use \\n line endings (default)", 0, 0, 'l'},
	{0, 0, 0, 0}
};

static void usage(FILE *fh, const char *progname) {
	int i;
	fprintf(fh, "Usage: %s [-", progname);
	for(i = 0; long_options[i].name; i++)
		if(long_options[i].val && !long_options[i].has_arg)
			fputc(long_options[i].val, fh);
	fprintf(fh, "] [-s <integer>] [-f [strategy]] [file...]\n");
	for(i = 0; long_options[i].name; i++)
		fprintf(fh, "\t-%c, --%s%s\n",
			long_options[i].val,
			long_options[i].name,
			long_options[i].name + strlen(long_options[i].name) + 1);
}

static void fsck(const uint32_t *tlb, uint32_t count) {
	uint32_t *tmp;
	uint32_t u;

	tmp = xalloc(count * sizeof *tmp);
	memcpy(tmp, tlb, count * sizeof *tmp);
	msort32(tmp, count);

	for(u = 0; u < count; u++)
		if(tmp[u] != u)
			exit_error(ERROR_INTERNAL, "Bad tlb");

	free(tmp);
}

static void get_short_options(const struct option *lo, char *buf) {
	int i;
	*buf++ = ':';
	while(lo->name) {
		i = lo->val;
		if(i) {
			*buf++ = (char)i;
			i = lo->has_arg;
			while(i--)
				*buf++ = ':';
		}
		lo++;
	}
	*buf++ = '\0';
}

int main(int argc, char **argv) {
	int i, fd, option_index;
	char short_options[32];
	struct iovec *iov, *final_iov, *iov_dst;
	struct iovec newline_iov;
	uint32_t u, numfiles, count, chunk_count, chunk_start;
	uint32_t *tlb, *tmp, *bounce;
	filebuf_t *fb, *ds;
	merge_t *mg, *ms;
	unsigned long ul;

	uint32_t seed = 0;
	bool manual_seed = false;
	bool multi = true;
	shuffle_algo_t shuffle_algo = shuffle_heuristic;
	__attribute__((unused))
	bool shuffle_files = true;
	bool shuffle_unfind = false;
	char *end;
	unsigned char sep = '\n';

	get_short_options(long_options, short_options);

	opterr = 0;
	while((i = getopt_long(argc, argv, short_options, long_options, &option_index)) != EOF) {
		switch(i) {
			case 'h':
				puts("unsort - reorder files semi-randomly");
				usage(stdout, *argv);
				exit(ERROR_NONE);
			case 'v':
				printf("unsort %s\ncopyright 2007, 2008 Wessel Dankers <wsl@fruit.je>\n", VERSION);
				exit(ERROR_NONE);
			case 'r':
				shuffle_algo = shuffle_random;
				break;
			case 'p':
				shuffle_algo = shuffle_heuristic;
				break;
			case 'n':
				shuffle_algo = shuffle_none;
				break;
			case 'c':
				multi = false;
				break;
			case 'm':
				multi = true;
				shuffle_files = false;
				break;
			case 'M':
				multi = true;
				shuffle_files = true;
				break;
			case 'f':
				shuffle_unfind = true;
				if(optarg && *optarg) {
					for(u = 0; optarg[u]; u++)
						if(!strchr("1n", optarg[u]))
							exit_error(ERROR_USER, "Invalid value '%s' for -%c", optarg, optopt);
					unfind_strategy = optarg;
				}
				break;
			case 'S':
				if(!optarg || !*optarg)
					exit_error(ERROR_USER, "Missing or invalid value for -%c", optopt);
				if(optarg[1]) {
					if(*optarg != '\\')
						exit_error(ERROR_USER, "Invalid value '%s' for -%c", optarg, optopt);
					ul = strtoul(optarg, &end, 0);
					if(*end || ul > UINT8_MAX)
						exit_error(ERROR_USER, "Invalid value '%s' for -%c", optarg, optopt);
					unfind_sep = (uint8_t)ul;
				} else {
					unfind_sep = *(uint8_t *)optarg;
				}
				break;
			case 's':
				if(optarg && *optarg) {
					errno = 0;
					seed = (uint32_t)strtoul(optarg, &end, 0);
					if(errno)
						exit_perror(ERROR_USER, "Can't parse seed '%s' as an unsigned integer", optarg);
					if(end && *end)
						exit_error(ERROR_USER, "Can't parse seed '%s' as an unsigned integer", optarg);
					manual_seed = true;
				} else {
					seed = UINT32_C(0);
					manual_seed = false;
				}
				break;
			case '0':
			case 'z':
				sep = '\0';
				break;
			case 'l':
				sep = '\n';
				break;
			case '?':
				usage(stderr, *argv);
				exit_error(ERROR_USER, "Unknown option: -%c", optopt);
			case ':':
				usage(stderr, *argv);
				exit_error(ERROR_USER, "Option -%c requires an argument", optopt);
			default:
				usage(stderr, *argv);
				exit_error(ERROR_INTERNAL, "Unknown option: -%c", i);
		}
	}

	if(argc > optind)
		numfiles = (uint32_t)(argc - optind);
	else
		numfiles = 1;

	if(manual_seed) {
		mt_seed(seed);
	} else {
		if(!mt_init_urandom())
			exit_perror(ERROR_SYSTEM, "Can't read from /dev/urandom");
		seed = mt_genrand32();
	}
	shuffle_seed(seed);
	merge_seed(seed);

	ms = xalloc(numfiles * sizeof *ms);
	ds = xalloc(numfiles * sizeof *ds);
	tlb = (uint32_t *)ds;

	if(argc > optind) {
		numfiles = 0;
		for(i = optind; i < argc; i++) {
			fb = ds + numfiles;
			*fb = filebuf_0;
			if(strcmp(argv[i], "-")) {
				fd = open(argv[i], O_RDONLY | O_LARGEFILE);
				if(fd == -1) {
					warn_perror("Can't open %s", argv[i]);
					continue;
				}
				filebuf_init(fb, fd);
				close(fd);
				fb->name = argv[i];
			} else {
				filebuf_init(fb, STDIN_FILENO);
			}
			numfiles++;
		}
	} else {
		numfiles = 1;
		filebuf_init(ds, STDIN_FILENO);
	}

	count = 0;
	for(u = 0; u < numfiles; u++) {
		fb = ds + u;
		mg = ms + u;
		*mg = merge_0;
		chunk_count = iovec_parse(fb, sep, NULL);
		mg->start = count;
		mg->count = chunk_count;
		mg->ratio = 1; /* chunk_count */
		count += chunk_count;
	}

	if(!count)
		return 0;

	final_iov = xalloc((count + numfiles) * sizeof *iov);
	iov = final_iov + numfiles;

	count = 0;
	for(u = 0; u < numfiles; u++)
		count += iovec_parse(ds + u, sep, iov + count);

	tlb = xalloc(count * sizeof *tlb);

	bounce = xalloc((size_t)count * sizeof *bounce);
	tmp = xalloc((size_t)count * sizeof *tmp);
	shuffle_tmp(tmp);

	if(multi && numfiles > 1) {
		if(shuffle_unfind) {
			merge(ms, numfiles, NULL, tlb, shuffle_algo != shuffle_none);
			for(u = 0; u < numfiles; u++) {
				mg = ms + u;
				chunk_start = mg->start;
				chunk_count = mg->count;
				lsort(iov + chunk_start, chunk_count);
				unfind(iov + chunk_start, chunk_count, tlb + chunk_start, bounce, shuffle_algo);
			}
		} else {
			merge(ms, numfiles, NULL, bounce, shuffle_algo != shuffle_none);
			for(u = 0; u < numfiles; u++) {
				mg = ms + u;
				chunk_start = mg->start;
				chunk_count = mg->count;
				shuffle_algo(bounce + chunk_start, tlb + chunk_start, chunk_count);
			}
		}
	} else {
		if(shuffle_unfind) {
			shuffle_init(tlb, count);
			lsort(iov, count);
			unfind(iov, count, tlb, bounce, shuffle_algo);
		} else {
			shuffle_algo(NULL, tlb, count);
		}
	}
	shuffle_tmp(NULL);
	free(tmp);
	free(bounce);

	if(getenv("UNSORT_FSCK"))
		fsck(tlb, count);

	iovec_shuffle(iov, tlb, count);

	newline_iov.iov_base = &sep;
	newline_iov.iov_len = sizeof sep;

	iov_dst = final_iov;
	for(u = 0; u < count; u++) {
		if(iov_dst == iov) {
			iov_dst += count - u;
			break;
		}
		*iov_dst++ = *iov;
		if(((const unsigned char *)iov->iov_base)[iov->iov_len - 1] != sep)
			*iov_dst++ = newline_iov;
		iov++;
	}

	iovec_write(STDOUT_FILENO, final_iov, iov_dst - final_iov);

	free(ms);
	free(ds);
	free(final_iov);
	free(tlb);

	return 0;
}
