/***** minrec3.c     ************************************************
*      (modified from Hudson's ms_ss.c and minrec.c)
*
Generates samples of gametes with given numbers of segsites. Usage:

minrec3 npop sample[1] ... sample[npop] nsites segsites r (mig_rate if npop>1)  howmany seed1 

     where these arguments are :

     npop:  Number of subpopulations which make up the total population
     sample[i]: the sample size from the i th subpopulation (can be zero).
     nsites: number of sites between which recombination can occur ( # of bp's)
     segsites: Number of polymorphic sites in the total sample
     r: recombination rate between ends of segment times 4N
     mig_rate: migration rate: the fraction of each subpop made up of migrants times 4N. (This argument is left off if npop is one.) 
     howmany: howmany samples to generate.
     seed1: seed for random number generator

	Output consists of the command line arguments followed by the 
	following arrays:

	B'[a] = # of trials with B' = a
	QS[a] = # of trials with QS = a
*	
***************************************************************************/

#include "my.h"
#include "max.h"
#include <time.h>

struct node{
	int abv;
	int ndes;
	float time;
	};

struct segl {
	int beg;
	struct node *ptree;
	int next;
	};

int **gamete; /* Array of 4-gamete test results */

/* New function declarations */
void Fu1 (int a, int b, double *c);
double Fu2 (int d, int e, int *f, double *g);
int numcmp (double *h, double *i);
void printG (double j, int k, double *l);
int Min_rec (char **, int, int, int);

main(argc,argv)
	int argc;
	char *argv[];
{
	int nsam ,nsites, i, ns, howmany, npop, *config, pop, count, print_flag, narg ;
	double  theta, r, *posit, mig_rate, estimator, gst, alpha, within, between ;
	char **list, **cmatrix();
	FILE *pf, *fopen();
	struct runsum  est_m, gst_m, within_m, between_m ;
	double ttot ;
	int histest[HISTLIM+1], histgst[HISTLIM+1], segsites ;

	/* New variables, different from Hudson's program */
	int a, b, c, ndes, *spectrum, run, lbun, run_total, blist, hlist;
	int runtemp, singtemp, *num_bunch, *sing_bunch, issame, ctr = 0;
	int *diff_bunch, *num_hap, *composite, ctime, *spect_temp, *sing;
	int p, q, *pi, *spect_actual, gtest, *RM, *max_run, maxtemp;
	double *var_eta, *G_eta, Gdummy, Gactual;
	char **bunchlist, **haplist;
	time_t start, finish;
	clock_t cstart, cfinish;
	long seed1;
	int counter = 0;

	time (&start); cstart = clock();

THINK_C_CL ;
        if (argc==1) {
	printf("Usage: \nminrec3 npop samplesize[1] ... samplesize[npop] nsites segsites r");
	printf(" (migrate if npop>1) howmany seed1\n");
	exit(0);
	}
	npop = atoi( argv[1] );
	config = (int *)malloc((unsigned)((npop+1)*sizeof(int)));
	for( pop=0, narg=1 ; pop<npop; pop++) {
	   config[pop] = atoi( argv[++narg] );
		if( config[pop] < 0 ) break;
	   }
	nsites = atoi( argv[++narg]);
	segsites = atoi( argv[++narg]);
	r = atof( argv[++narg]);
	if( npop > 1 )  mig_rate = atof( argv[++narg]);
	else mig_rate = 0.0 ;
	howmany = atoi( argv[++narg]);
	seed1 = atol (argv[++narg]);

	srand48 (seed1);  /* Initialize random number generator */

	lbun = 2;
	while( pop<npop ){ config[pop++]=0 ;  }
	pf = stdout ;
	alpha = npop/(npop-1.);
	alpha *= alpha ;
	nsam = 0 ;
	for(i=0;i<npop; i++) nsam += config[i] ;
	if(nsam > MAXCHR) perror("nsam too big.\n");

	/* Initialize new variables */
	bunchlist = (char **) malloc ((unsigned) (nsam * sizeof (char *)));
	haplist = (char **) malloc ((unsigned) (nsam * sizeof (char *)));
	for (a=0; a<nsam; ++a) {
	  bunchlist[a] = (char *)malloc ((unsigned) ((segsites+1)*sizeof(char)));
	  haplist[a] = (char *) malloc ((unsigned) ((segsites+1)*sizeof (char)));
	}
	spectrum = (int *) malloc (nsam * sizeof (int));
	for (a=0 ; a<nsam ; ++a)
	  spectrum[a] = 0;
	num_bunch = (int *) malloc (segsites * sizeof (int));
	sing_bunch = (int *) malloc (segsites * sizeof (int));
	diff_bunch = (int *) malloc (segsites * sizeof (int));
	num_hap = (int *) malloc ((nsam+1) * sizeof (int));
	sing = (int *) malloc ((segsites + 1) * sizeof (int));
	composite = (int *) malloc ((segsites + 2) * sizeof (int));
	var_eta = (double *) malloc (nsam * sizeof (double));
	G_eta = (double *) malloc ((howmany+1) * sizeof (double));
	spect_temp = (int *) malloc (nsam * sizeof (int));
	spect_actual = (int *) malloc (nsam * sizeof (int));
	p = (segsites * nsam * nsam) / 4 + 1;
	pi = (int *) malloc (p * sizeof (int));
	RM = (int *) malloc (segsites * sizeof (int));
	gamete = (int **) malloc (segsites * sizeof (int *));
	max_run = (int *) malloc (segsites * sizeof (int *));
	for (a=0; a<segsites; ++a)
	  gamete[a] = (int *) malloc (segsites * sizeof (int));
	run_total = blist = hlist = 0;
	for (a=0; a<segsites; ++a)
	  num_bunch[a] = sing_bunch[a] = diff_bunch[a] = RM[a] = max_run[a] = 0;
	for (a=0; a<=nsam; ++a)
	  num_hap[a] = 0;
	for (a=0; a<(segsites+2); ++a)
	  composite[a] = 0;
	for (a=0; a<=segsites; ++a)
	  sing[a] = 0;
	for (a=0; a<p; ++a)
	  pi[a] = 0;

	for (a=0; a<nsam; ++a)
	  spect_actual[a] = 0;

	/* Print command line arguments in output */
        printf("\nminrec3");
	printf("\n%d  ",npop);
	for(pop=0; pop<npop; pop++){ printf("%d ",config[pop]); }
	printf("%d ",nsites);
	printf("%d ",segsites);
	printf("%f ",r);
	if( npop > 1) {	printf("%f  ",mig_rate);}
	printf("%d %d\n\n",howmany, seed1);
	list = cmatrix(nsam,segsites+1);
	posit = (double *)malloc((unsigned)(segsites*sizeof(double)));
	if(posit==NULL) perror("malloc error. main");

	Fu1 (nsam, segsites, var_eta);
	Gactual = Fu2 (nsam, segsites, spect_actual, var_eta);


	for (count=0; count<howmany; ++count)  {  /* Loop is for each trial */
	   gensam_ss(npop,nsam,config,nsites,segsites,r,mig_rate,posit,list);
	   /*
printf("\n");
for (i=0; i<nsam; i++)
printf("%s\n", list[i]);
*/
	   for (a=1; a<nsam; ++a)  /* Initialize trial specific variables */
	     spect_temp[a] = 0;
	   q = 0;
	   maxtemp = 0;


	   	   ++RM[Min_rec(list, segsites, 0, nsam)];

	   /* Count up number of haplotypes */
	           blist = hlist = 0;
         	   for (b=0; b<nsam; ++b) {
		     for (c=0; c<hlist; ++c) {
		       for (a=0; a<segsites && list[b][a] == haplist[c][a]; \
			      ++a)
			 ;
		       if (a==segsites)
			 break;
		     }
		     if (c==hlist) {
		       for (a=0; a<segsites; ++a)
			 haplist[c][a] = list[b][a];
		       hlist++;
		     }
		   }
		       
		   run = 1;
		   runtemp = singtemp = 0;
		   for (a=0; a<(segsites-1); ++a) {
		     ndes = 0;
		     for (b=0; b<nsam; ++b)
		       if (list[b][a] == '1')
			 ++ndes;
		     ++spectrum[ndes];
		     ++spect_temp[ndes];
		     q += ndes * (nsam - ndes);
		     issame = 0;
		     for (b=0; b<nsam && list[b][a] == list[b][a+1]; ++b)
		       ;
		     if (b==nsam)
		       ++issame;
		     else if (b==0) {
		       for (b=1; b<nsam && list[b][a] != list[b][a+1]; ++b)
			 ;
		       if (b==nsam)
			 ++issame;
		     }

		     if (issame) {/* Current seg site is same as previous */
		       ++run;
		       if (run > maxtemp)
			 maxtemp = run;
		       if (run >= lbun) {  /* New bunch found */
			 ++run_total;
			 ++runtemp;
			 if (ndes == 1 || ndes == (nsam - 1))
			   ++singtemp;
			 /* Determine if partition type is new */
			 for (b=0; b < blist; ++b) {
			   for (c=0; c<nsam && list[c][a] == bunchlist[c][b]; \
				  ++c)
			     ;
			   if (c==nsam)
			     break;
			   else if (c==0) {
			     for (c=1; c<nsam && list[c][a] != bunchlist[c][b];                                    ++c)
			       ;
			     if (c==nsam)
			       break;
			   }
			 }
			 if (b==blist) {
			   for (c=0; c<nsam; ++c)
			     bunchlist[c][b] = list[c][a];
			   blist++;
			 }
		       }
		     }
		     else
		       run = 1;
		   }
		   ndes = 0;
		   for (b=0; b<nsam; ++b)
		     if (list[b][segsites-1] == '1')
		       ++ndes;
		   ++spectrum[ndes];
		   ++spect_temp[ndes];
		   q += ndes * (nsam - ndes);
		   ++num_bunch[runtemp];
		   ++sing_bunch[singtemp];
		   ++diff_bunch[blist];
		   ++num_hap[hlist];
		   ++composite[runtemp + blist];
		   ++max_run[maxtemp];
		   ++pi[q];
		   ++sing[spect_temp[1] + spect_temp[nsam-1]];
		   G_eta[count] = Fu2 (nsam, segsites, spect_temp, var_eta);
if (G_eta[count] < Gactual)
++counter;
	}

	qsort ((void *) G_eta, howmany, sizeof (double), (int (*)(const void *, const void *)) numcmp);

	/*	for (a=0 ; a<(segsites-1) ; ++a) {
	  num_bunch[a+1] += num_bunch[a];
	  sing_bunch[a+1] += sing_bunch[a];
	  diff_bunch[a+1] += diff_bunch[a];
	  RM[a+1] += RM[a];
	  max_run[a+1] += max_run[a];
	}
	for (a=0; a<nsam; ++a)
	  num_hap[a+1] += num_hap[a];
	for (a=0; a<segsites; ++a) { 
	  composite[a+1] += composite[a];
	  sing[a+1] += sing[a];
	}
	for (a=0; a<(p-1); ++a)
	  pi[a+1] += pi[a];
	

	time (&finish); cfinish = clock();
	ctime = (cfinish - cstart) / CLOCKS_PER_SEC;

	printf("\n");
	for (a=1; a<nsam; ++a)
	  printf("spec[%d] = %d\tn_hap[%d] = %d\n", a, spectrum[a], \
		 a, num_hap[a]);
	printf("\t\t\tnum_hap[%d] = %d\n\n", nsam, num_hap[nsam]);
	*/
	for (a=0; a<segsites; ++a)
	  printf("B'[%d] = %d\n", a, num_bunch[a]);
	printf("\n");
	for (a=0; a<=segsites; ++a)
	  printf("QS[%d] = %d\n", a, composite[a]);
	/*
	printf("composite[%d] = %d\tsing[%d] = %d\n", segsites, \
	       composite[segsites], segsites, sing[segsites]);
	for (a=0; a<segsites && max_run[a] != howmany; ++a)
	  printf("max_run[%d] = %d\n", a, max_run[a]);
	printf("\nTotal # of runs of length %d = %d\t", lbun, run_total);
	printf("Runtime, ctime = %d %d\n\n", (finish - start), ctime);

	for (a=0; a<(howmany-1); ++a) 
	  if (G_eta[a] < G_eta[a+1]) {
	    ++ctr;
	    printf("G[%d] = %f", a, G_eta[a]);
	    printf("%s", (ctr % 3) == 0 ? "\n" : "   ");
	  }
	if (G_eta[a-1] != G_eta[a])
	  printf("G_eta[%d] = %.5f", a, G_eta[a]);
	printG (.025, howmany, G_eta);
	printG (.05, howmany, G_eta);
	printG (.90, howmany, G_eta);
	printG (.95, howmany, G_eta);
	printG (.975, howmany, G_eta);
	printG (.99, howmany, G_eta);
	printG (.999, howmany, G_eta);
	printG (.9999, howmany, G_eta);

       	printf("\nGactual = %f;  counter = %d\n", Gactual, counter);

	ctr = 0; printf("\n");
	for (a=0; a<(p-1); ++a)  
	  if (pi[a] < pi[a+1])  {
	    printf("pi[%d] = %d  ", a+1, pi[a+1]);
	    ++ctr;
	    if ((ctr % 4) == 0)
	      printf("\n");
	  }
	*/
}

void printG (double s, int howmany, double *G_eta)
{
  int a, b, c;

  a = s * howmany - 1;
  c = (s<.5) ? a + 1 : a;
  if (G_eta[a] != G_eta[a+1])
    printf("G_eta[%d] = %f\n", c, G_eta[c]);
  else {
    for (b=a; b>0 && (G_eta[b] == G_eta[b-1]); --b)
      ;
    printf("G_eta[%d] = %f\t", b, G_eta[b]);
    for (b=a+1; (G_eta[b] == G_eta[b+1]) && b<(howmany-2); ++b)
      ;
    printf("G_eta[%d] = %f\n", b+1, G_eta[b+1]);
  }
}

int Min_rec (char **list, int x, int size, int nsam)
{  /* Calculate min # rec. events */
  int a, b, c, e, gtest, flag = 0;

  if (size<2 || x >= (size-1))
    return (0);
  for (a=x+1; a<size; ++a) {
    for (b=x; b<a; ++b) {
      gtest = 0;
      for (e=0; e<nsam; ++e)
        if (list[e][b] == '0' && list[e][a] == '0') {
          ++gtest;
          break;
        }
      for (e=0; e<nsam; ++e)
        if (list[e][b] == '0' && list[e][a] == '1') {
          ++gtest;
          break;
        }
      for (e=0; e<nsam; ++e)
        if (list[e][b] == '1' && list[e][a] == '0') {
          ++gtest;
          break;
        }
      for (e=0; e<nsam; ++e)
        if (list[e][b] == '1' && list[e][a] == '1') {
          ++gtest;
          break;
        }       
      if (gtest == 4) {
        flag = 1;
        break;
      }
    }
    if (flag == 1)
      break;
  }
  if (a==size)
    return (0);
  else {
    c = Min_rec (list, a, size, nsam);
    return (1+c);
  }
}    

int numcmp (double *a, double *b)
{
  if ((*a-*b) < 0)
    return (-1);
  else if ((*a-*b) > 0)
    return (1);
  else
    return (0);
}


void Fu1 (int nsam, int segsites, double *var_eta)
{
  double *var_xi, *beta, *sigma, *cov, temp, temp2, theta;
  int i, b;

  var_xi = (double *) malloc (nsam * sizeof (double));
  beta = (double *) malloc (nsam * sizeof (double));
  sigma = (double *) malloc (nsam * sizeof (double));
  cov = (double *) malloc (nsam * sizeof (double));

  temp = 0.0;
  for (i=1; i<nsam; ++i)
    temp += (double) 1/i;
  theta = (double) segsites / temp;

  for (i=2; i<nsam; ++i) {
    temp = 0.0;
    for (b=i; b<=nsam; ++b)
      temp += (double) 1/b;
    beta[i] = (double) (2 * nsam * temp) / ((nsam - i + 1) * (nsam - i));
    beta[i] -= (double) 2. / (nsam - i);
  }

  for (i=1; i < (nsam/2); ++i)
    sigma[i] = beta[i+1];
  if (i == (nsam/2)) {
    temp = 0.0;
    for (b=i; b<nsam; ++b)
      temp += (double) 1/b;
    sigma[i] = (double) (2. * temp) / (nsam - i) - (double) 1. / (i * i);
    i++;
  }
  for ( ; i<nsam ; ++i)
    sigma[i] = beta[i] - (double) 1. / (i * i);

  for (i=1; i<nsam; ++i)
    var_xi[i] = (double) theta / i + (double) sigma[i] * theta * theta;

  for (i=1; i<(nsam/2); ++i) {
    temp = temp2 = 0.0;
    for (b=i; b<nsam; ++b)
      temp += (double) 1./b;
    for (b=(nsam-i); b<nsam; ++b) 
      temp2 += (double) 1./b;
    cov[i] = (double) temp / (nsam - i) + (double) temp2 / i;
    cov[i] -= (double) (beta[i+1] + beta[nsam-i]) / 2.;
    cov[i] -= (double) 1. / (i * (nsam - i));
    cov[i] *= (double) theta * theta;
  }

  for (i=1; i<(nsam/2); ++i)
    var_eta[i] = var_xi[i] + var_xi[nsam-i] + 2. * cov[i];
  if (i == (nsam/2))
    var_eta[i] = var_xi[i];

  free (var_xi);
  free (sigma);
  free (beta);
  free (cov);
}


double Fu2 (int nsam, int segsites, int *spect_temp, double *var_eta)
{
  int a, i;
  double alpha, dummy, theta, G;

  G = dummy = 0.0;
  for (i=1; i<nsam; ++i)
    dummy += (double) 1./i;
  theta = (double) segsites / dummy;

  for (i=1; i<(nsam/2); ++i) {
    spect_temp[i] += spect_temp[nsam-i];
    alpha = (double) 1/i + (double) 1/(nsam-i);
    dummy = (double) spect_temp[i] - (double) theta * alpha;
    dummy *= dummy;
    dummy /= var_eta[i];
    G += dummy;
  }

  if (i == (nsam/2)) {
    dummy = (double) spect_temp[i] - (double) theta / i;
    dummy *= dummy;
    dummy /= var_eta[i];
    G += dummy;
    i++;
  }

  --i;
  G = (double) G / i;

  return (G);
}


	int 
gensam_ss( npop,nsam,inconfig, nsites, segsites, r,mig_rate,posit,list)
	int npop,nsam,inconfig[], nsites, segsites ;
	char **list;
	double  r, mig_rate, posit[] ;
{
	int nsegs, k,  seg, ns, start, end, len, i ;
	int c, *ss ;
	struct segl *seglst, *segtre_mig() ; 
	double nsinv,  tseg, tt , ttime(), *pk ;


	nsinv = 1./nsites;
	seglst = segtre_mig(npop,nsam,inconfig,nsites,r,mig_rate, &nsegs) ;
	pk = (double *)malloc((unsigned)(nsegs*sizeof(double)));
	ss = (int *)malloc((unsigned)(nsegs*sizeof(int)));
	if( (pk==NULL) || (ss==NULL) ) perror("malloc error. gensam.2");
	
	tt = 0.;
	for( seg=0, k=0; k<nsegs; seg=seglst[seg].next, k++) { 
		end = ( k<nsegs-1 ? seglst[seglst[seg].next].beg -1 : nsites-1 );
		start = seglst[seg].beg ;
		len = end - start + 1 ;
		tseg = len/(double)nsites;
		pk[k] = ttime(seglst[seg].ptree,nsam)*tseg ;
		tt += pk[k] ;
		}
	for (k=0;k<nsegs;k++) pk[k] /= tt ;
	mnmial(segsites,nsegs,pk,ss);
		
	ns = 0 ;
	for( seg=0, k=0; k<nsegs; seg=seglst[seg].next, k++) { 
		end = ( k<nsegs-1 ? seglst[seglst[seg].next].beg -1 : nsites-1 );
		start = seglst[seg].beg ;
		len = end - start + 1 ;
		tseg = len/(double)nsites;
		make_gametes(nsam,seglst[seg].ptree,tt*pk[k]/tseg, ss[k], ns, list ); 
/*		gam_ss(nsam, seglst[seg].ptree, ns, ss[k], list );*/
		free(seglst[seg].ptree) ;
/*		locate(ss[k],start*nsinv, len*nsinv,posit+ns);  */
		ns += ss[k];
		}
	for(i=0;i<nsam;i++) list[i][ns] = '\0' ;
	free(pk);
	free(ss);
}


	

/* allocates space for gametes (character strings) */
	char **
cmatrix(nsam,len)
	int nsam, len;
{
	int i;
	char **m;

	if( ! ( m = (char **) malloc( (unsigned)( nsam*sizeof( char* )) ) ) )
	   perror("alloc error in cmatrix") ;
	for( i=0; i<nsam; i++) {
		if( ! ( m[i] = (char *) malloc( (unsigned) (len*sizeof( char )) )))
			perror("alloc error in cmatric. 2");
		}
	return( m );
}



	int
locate(n,beg,len,ptr)
	int n;
	double beg, len, *ptr;
{
	int i;

	ordran(n,ptr);
	for(i=0; i<n; i++)
		ptr[i] = beg + ptr[i]*len ;

}


/**** ordran.c  ***/
/*
	int
ordran(n,pbuf)
	int n;
	double pbuf[];
{
	ranvec(n,pbuf);
	order(n,pbuf);
	return;
}


	int
ranvec(n,pbuf)
	int n;
	double pbuf[];
{
	int i;
	double ran1();

	for(i=0; i<n; i++)
		pbuf[i] = ran1();

	return;
}

	int
order(n,pbuf)
	int n;
	double pbuf[];
{
	int gap, i, j;
	double temp;

	for( gap= n/2; gap>0; gap /= 2)
	   for( i=gap; i<n; i++)
		for( j=i-gap; j>=0 && pbuf[j]>pbuf[j+gap]; j -=gap) {
		   temp = pbuf[j];
		   pbuf[j] = pbuf[j+gap];
		   pbuf[j+gap] = temp;
		   }
}
*/

