/*   psubs.c  Estimates the probability of finding crit or fewer polymorphisms
*                in a subsample of size nsubs using samples that are supplied
		by the user.
*   Usage:  psubs nsubs crit

*   where nsubs is the  subsample size and crit is the number of polymorphic
*   sites in the subsample.  The input comes typically from a file (
*   redirected stdin) which should start with the parameters under which the
*   samples were generated followed by the samples.  
*  Example using ms:     ms 20 1000 -s 19 | psubs 15 2  
*   This example will produce an estimate of the probability that a sample of size 20 with 19 segregating
*      sites has a subset of size 15 with two or fewer polymorphic sites.

*/

#include <stdio.h> 
#include <stdlib.h>
#include <math.h>

main(argc,argv)
	int argc;
	char *argv[];
{
	int nsam ,nsites, i, ns, howmany, npop, *config, pop, count, print_flag ;
	double   r, mig_rate ;
	char **list, **cmatrix(), str[100] ;
	FILE *pf, *fopen(), *pfin ;
	double ttot ;
	int  segsites, numlesseqcrit, crit, flag, sflag, nsubs, *subslist, seq, npoly ;

if( argc < 3) {
	fprintf(stderr," Usage: psubs nsubs  crit\n");
	exit(0);
	}
nsubs = atoi(argv[1]);
crit = atoi( argv[2]);
pfin = stdin ;

fscanf(pfin," %*s %d %d", &nsam, &howmany);
printf(" %d %d ",nsam, howmany);
while( 1){
	fscanf(pfin," %s", str);
	printf(" %s",str);
	if( str[1] == 's' ) { fscanf(pfin," %d", &segsites); printf(" %d\n",segsites);  break;}
	}
   if( segsites == 0 ) { fprintf(stderr," segsites == 0\n"); exit(0); }


	list = cmatrix(nsam,segsites+1);
	subslist = (int *)malloc((unsigned)(nsubs*sizeof(int)));
	if(subslist==NULL) perror("malloc error. main");


	count=0;
	numlesseqcrit = 0 ;
	while( howmany-count++ ) {
/*	   readsamples */
	while( 1){
	  fscanf(pfin," %s", str);
	  if( (str[0] == '/' ) && ( str[1] == '/' ) ) { 
		fscanf(pfin," segsites: %*d");
		fscanf(pfin," positions:" );
		for( i=0; i<segsites; i++) fscanf(pfin," %*lf" );
	        break;
		}
	  }
	for( i=0; i<nsam;i++) fscanf(pfin," %s", list[i] );
		sflag = 1 ;
		flag = 0 ;
		for (i=0;i<nsubs; i++) subslist[i] = i ;
		while( sflag ){
			npoly = poly(subslist,nsubs, list, nsam, segsites,crit,&seq);
			if( npoly >crit )
				sflag = nextsample( subslist,nsubs,nsam, seq);
			else { 
			flag = 1;
/*			fprintf(stdout," %d \n", npoly);
			for( i=0;i<nsubs; i++)
				fprintf(stdout,"%s\n",list[subslist[i]] );
			fprintf(stdout,"\n");
			for( i=0;i<nsam; i++)
				fprintf(stdout,"%s\n",list[i] );  */  /* uncomment these lines to output samples
															with crit or less seg sites. */
			 break;}
			}
		if( flag ) numlesseqcrit++;
		
	   	
		  
	   }
   fprintf(stdout," Subsample size: %d   Number of polymorphic sites in subsample: %d\n",
   	nsubs, crit);
   fprintf(stdout,"Estimated p-value: %lf\n",  numlesseqcrit/((double)howmany)  );
}

	int
poly(subslist,nsubs, list, nsam, segsites,crit,seq)
	int subslist[],nsubs, nsam, segsites,crit,*seq ;
	char **list ;
	{
	int *polyvector, npoly, i, j;
	
	polyvector = ( int *)malloc( (size_t)(segsites*sizeof( int) ))  ;
	for( i=0; i<segsites; i++) polyvector[i] = 0 ;
	npoly = 0 ;
	for( i = 1 ; i<nsubs; i++){
		for( j= 0 ; j<segsites; j++) {
			if( list[subslist[i]][j] != list[subslist[0]][j] ){
				if( polyvector[j] == 0 ) {
					polyvector[j] = 1;
					npoly++;
					if( npoly == crit+1 ) *seq = i ;
					}
				} 
			}
		}
	free(polyvector);
	return( npoly );	
	}
	
	int
nextsample(  subslist,nsubs,nsam, seq)
	int  subslist[],nsubs,nsam, seq;
	{
	int i ;
	
		subslist[seq]++;
		if( subslist[seq] > nsam - nsubs + seq ) {
			seq--;
			if( seq < 0 ) return( 0 );
			 return( nextsample( subslist,nsubs,nsam, seq) ) ;
			 }
		for(i=seq+1; i<nsubs; i++) subslist[i] = subslist[i-1]+1 ;
		return(1); 
	
	}

/* allocates space for gametes (character strings) */
	char **
cmatrix(nsam,len)
	int nsam, len;
{
	int i;
	char **m;

	if( ! ( m = (char **) malloc( (unsigned)( nsam*sizeof( char* )) ) ) )
	   perror("alloc error in cmatrix") ;
	for( i=0; i<nsam; i++) {
		if( ! ( m[i] = (char *) malloc( (unsigned) (len*sizeof( char )) )))
			perror("alloc error in cmatric. 2");
		}
	return( m );
}


