#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>
#include "utils.h"

// extremely stupid utility program - lsdif
// lsdif src.ls-lp dst.ls-lp
// lsdif takes as arguments two files which are the outputs of two directories
// 	and outputs the file names which are in src.ls and either aren't in dst.ls OR
//	are in dst.ls but are older.
// justification: test x -nt y, is great, but extrodinarily slow over 
// 	rimported directories. it's much faster to rcpu -c 'ls' > dst.ls then do this.
// this is needed to selectively update /only/ the modified files.
// yes, you probably are using git to do this, I can't be bothered to create a repo
// containing files from all over my fs the majority of which are synthetically
// generated. syntetically generating them on the dst would be slower than copying
// the resulting files in full every time. we are going for something a little less
// terribly slow here.
//
// assumptions : { ls -lp } output is alphabetically ordered with the following format 
// d-rwxrwxr-x M 121 sys sys     0 May 20 15:53 1a
// --rw-r--r-- M 373 jalae jalae 55670 Mar 15  2011 main.ps
// that is to say 10 total whitespace seperated fields per line with the final field being the
// file name (not path) and the 7th-9th being the month day and time/year

int filenameonly;

/*
the difference between this is the lack of : in the yr version. ls -lp will output the time modified if it was done in the past year, otherwise it will output the year it was modified. because the regexp difference is allowing : to be in the match or not, the year type is "more strict" and needs to be done first.
*/
char * lstmrx =".............[	 ]+[0-9]+[ 	]+[a-zA-Z]+[ 	]+[a-zA-Z]+[	 ]+[0-9]+ ([a-zA-Z]+)[ ]+([0-9]+)[ 	]+([0-9:]+)[	 ]+([^ 	]+)";
char * lsyrrx =".............[	 ]+[0-9]+[ 	]+[a-zA-Z]+[ 	]+[a-zA-Z]+[	 ]+[0-9]+ ([a-zA-Z]+)[ ]+([0-9]+)[ 	]+([0-9]+)[	 ]+([^ 	]+)";
char * hrminrx ="([0-9]+):([0-9]+)";

//currently only using about 300, if i need to increase this i'll switch to alv tree or something for dynamic sizing. for now, i just don't care.
//since ls -lp will generate the list in alphabetical order, we don't need to do any deep searches or sorting where avl might be useful
#define MAXFILES 1000

typedef struct blob{
	char * fname;
	Tm moddate;

} blob;

blob srcfiles[MAXFILES];
blob dstfiles[MAXFILES];

//these will store indexes into srcfiles for the first file with that letter
// this will let us jump to specific points while searching and stop when getting out of scope.
int b, c, d, e, f, g, i, j, l, m, n, o, q, t, w;

static char month[12][4] = 
		{"Jan", "Feb", "Mar", "Apr", "May", "Jun",
         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};

void
main(int argc, char *argv[])
{
	Reprog * lstmrp;
	Reprog * lsyrrp;
	Reprog * hrminrp;
	Biobuf *src;
	Biobuf *dst;
	int filesread;
	int dstread;
	lstmrp = regcomp(lstmrx);
	lsyrrp = regcomp(lsyrrx);
	hrminrp = regcomp(hrminrx);
	char * line;

	ARGBEGIN {
	case 'f': filenameonly++; break;
	default:
		fprint(2, "usage: %s [-f] src.ls dst.ls \n", argv0);
		exits("usage");
	} ARGEND

	if(argc != 2){
		fprint(2, "usage: %s [-f] src.ls dst.ls \n", argv0);
		exits("usage");
	}
	Tm now;
	tmnow(&now, nil);
	
	src=Bopen(argv[1], OREAD);
	dst=Bopen(argv[0], OREAD);
	if(src<nil){
		perror(argv[i]);
		exits("can't open");
	} else {
		filesread=0;
		while( line = Brdstr(src, '\n', 1)){
			match[0].sp = match[0].ep = 0;
			//check for year type match first. 
			if(regexec(lsyrrp, line, match, MSZ)){
				char * monthtmp = copymatch(1);
				char * daytmp = copymatch(2);
				char * yeartmp = copymatch(3);

				srcfiles[filesread].moddate.mday = atoi(daytmp);
				srcfiles[filesread].moddate.year = atoi(yeartmp);

				srcfiles[filesread].fname = copymatch(4);
				for(int j=0; j < 12; ++j){
					if(strcmp(monthtmp, month[j]) == 0){
						srcfiles[filesread].moddate.mon=j;
						j = 12;
					}
				}
				tmnorm(&(srcfiles[filesread].moddate));
				free(daytmp);
				free(yeartmp);
				free(monthtmp);
				++filesread;
			} else {
				//if it didn't match year type check time type
				match[0].sp = match[0].ep = 0;
				if(regexec(lstmrp, line, match, MSZ)){
					char * monthtmp = copymatch(1);
					char * daytmp = copymatch(2);
					char * timetmp = copymatch(3);
					srcfiles[filesread].fname = copymatch(4);
					for(int j=0; j < 12; ++j){
						if(strcmp(monthtmp, month[j]) == 0){
							srcfiles[filesread].moddate.mon=j;
							j=12;
						}
					}
					srcfiles[filesread].moddate.mday = atoi(daytmp);
					
					match[0].sp = match[0].ep = 0;
					if(regexec(hrminrp, timetmp, match, MSZ)){
						char * hr = copymatch(1);
						char * min = copymatch(2);
						srcfiles[filesread].moddate.hour = atoi(hr);
						srcfiles[filesread].moddate.min = atoi(min);
						free(hr);
						free(min);
					}
	
					srcfiles[filesread].moddate.year = now.year;
					tmnorm(&(srcfiles[filesread].moddate));		
	
			
					free(monthtmp);
					free(daytmp);
					free(timetmp);
					++filesread;
				}
	
			}
		
		}
	}
		
	
	if(dst<nil){
		perror(argv[i]);
		exits("can't open");
	} else {
		dstread=0;
		int oldds = dstread;
		while( line = Brdstr(dst, '\n', 1)){
			match[0].sp = match[0].ep = 0;
			//check for year type match first. 
			if(regexec(lsyrrp, line, match, MSZ)){
				char * monthtmp = copymatch(1);
				char * daytmp = copymatch(2);
				char * yeartmp = copymatch(3);

				dstfiles[dstread].moddate.mday = atoi(daytmp);
				dstfiles[dstread].moddate.year = atoi(yeartmp);

				dstfiles[dstread].fname = copymatch(4);
				for(int j=0; j < 12; ++j){
					if( strcmp(monthtmp, month[j]) == 0 ){
						dstfiles[dstread].moddate.mon=j;
						j = 12;
					}
				}
				tmnorm(&(dstfiles[dstread].moddate));

				free(monthtmp);
				free(daytmp);
				free(yeartmp);
				dstread++;
			} else {
				//if it didn't match year type check time type
				match[0].sp = match[0].ep = 0;
				if(regexec(lstmrp, line, match, MSZ)){
					char * monthtmp = copymatch(1);
					char * daytmp = copymatch(2);
					char * timetmp = copymatch(3);
					dstfiles[dstread].fname = copymatch(4);
					for(int j=0; j < 12; ++j){
						if( strcmp(monthtmp, month[j]) == 0 ){
							dstfiles[dstread].moddate.mon=j;
							j=12;
						}
					}
					dstfiles[dstread].moddate.mday = atoi(daytmp);
					
					match[0].sp = match[0].ep = 0;
					if(regexec(hrminrp, timetmp, match, MSZ)){
						char * hr = copymatch(1);
						char * min = copymatch(2);
						dstfiles[dstread].moddate.hour = atoi(hr);
						dstfiles[dstread].moddate.min = atoi(min);
						free(hr);
						free(min);
					}
	
					dstfiles[dstread].moddate.year = now.year;
					tmnorm(&(dstfiles[dstread].moddate));
			
					free(monthtmp);
					free(daytmp);
					free(timetmp);
					++dstread;
				}
			}
			//we should have dtmp filled now
			if(dstread > oldds){
			int placed = 0;
				for(int dot = 0; dot < filesread; ++dot){

					int c = strcmp(srcfiles[dot].fname, dstfiles[oldds].fname);
					if( c == 0 ){
						//we have filename match. check moddate
						
						//tmfmtinstall();
						//print(" comparing (%s : %s ) - %τ : %τ \n", srcfiles[dot].fname, dstfiles[oldds].fname,  tmfmt(&(srcfiles[dot].moddate), nil), tmfmt(&(dstfiles[oldds].moddate), nil) );

						if( tm2sec(&(srcfiles[dot].moddate)) < tm2sec(&(dstfiles[oldds].moddate)) ) {
							print("%s ", dstfiles[oldds].fname); 
						}
						placed = 1;
						dot = filesread;
					}
					if( c > 0 ){
						//we didn't find it and are now past it alphabetically
						//we don't need to compare dates, just add to list and move on
						print("%s ",dstfiles[oldds].fname);
						placed = 1;
						dot = filesread;
					}
				}
				if (placed == 0) {
					//we didn't place it, either because the right arg doesn't contain anything, or the current left side file would be after the whole right side list.
					print("%s ",dstfiles[oldds].fname);
				}
				oldds = dstread;
			}
		}
	}

}