#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "clustalw.h"


/*
 *   Prototypes
 */
static sint read_dialign_list(FILE *fd,MOTIF *motifs);
static sint read_ballast_list(FILE *fd,MOTIF *motifs,sint nseqs,SEQ *seqs,sint max,Boolean propogate);
static sint start_pos(char *seq,sint p);
static sint next_pos(char *seq,sint len,sint p);

static sint debug=1;

sint read_motif_list(char *filename,MOTIF *motifs,sint nseqs,SEQ *seqs,Boolean propogate)
{
	sint nmotifs=0;
	float max;
	FILE *fd;
	char inline1[1024];

	if ((fd=fopen(filename,"r"))==NULL) {
        	error("cannot open motif file %s", filename);
        	return((sint)0);
	}

	if (fgets(inline1,1024,fd) != NULL) {
		if(keyword(inline1,"Ballast")) {
			sscanf(&inline1[7],"%f",&max);
			nmotifs=read_ballast_list(fd,motifs,nseqs,seqs,(sint)max,propogate);
		}
		else {
			rewind(fd);
			nmotifs=read_dialign_list(fd,motifs);
		}
	}
	fclose(fd);
	return nmotifs;
}

static sint read_ballast_list(FILE *fd,MOTIF *motifs,sint nseqs,SEQ *seqs,sint max,Boolean propogate)
{
	sint i,j,n,nmotifs=0;
	sint seq1,seq2,pos1,pos2,len;
	char name1[MAXNAMES],name2[MAXNAMES];
	char s1[1024],s2[1024],s3[1024];
	float weight;
	char inline1[1024];
	char *tmp;

	while (fgets(inline1,1024,fd) != NULL) {
		if((tmp=strstr(inline1,"seq:"))!=NULL) {
			tmp+=4;
			if((n=sscanf(tmp,"%s %s %s %d %d %s %d %s %f\n",
				name1,name2,s1,&pos1,&pos2,s2,&len,s3,&weight))!=9) {
				continue;
			}
			seq1=-1;
			for(i=0;i<nseqs;i++) {
				if(strcmp(name1,seqs[i].name)==0) {
					seq1=i;
					break;
				}
			}
/*
			if (seq1==-1) {
				error("sequence %s not found in sequence file\n",name1);
			}
*/
			seq2=-1;
			for(i=0;i<nseqs;i++) {
				if(strcmp(name2,seqs[i].name)==0) {
					seq2=i;
					break;
				}
			}
/*
			if (seq2==-1) {
				error("sequence %s not found in sequence file\n",name2);
			}
*/
			if(seq1 == -1 || seq2 == -1) continue;
			motifs[nmotifs].seq1=seq1+1;
			motifs[nmotifs].seq2=seq2+1;
			motifs[nmotifs].pos1=pos1;
			motifs[nmotifs].pos2=pos2;
			motifs[nmotifs].len=len;
			if(len>3) {
				weight*=(float)max/(float)nseqs;
				if(weight>100) {
printf("%s %s %s %d %d %s %d %s %f\n",name1,name2,s1,pos1,pos2,s2,len,s3,weight);
					motifs[nmotifs].weight=(weight/30.0)*INT_SCALE_FACTOR;
					if(nmotifs>=MAXMOTIFS-1) {
						warning("Superceded max motifs (= %d)\n",MAXMOTIFS);
						return nmotifs;
					}
					nmotifs++;
				}
			}
		}
	} 

fprintf(stdout,"nmotifs %d\n",nmotifs);
/* extrapolate the motifs through the other sequences */
	if(propogate) {
		n=0;
		for(i=0;i<nmotifs;i++) {
			for(j=i+1;j<nmotifs;j++) {
				if(motifs[i].pos1==motifs[j].pos1) {
					motifs[nmotifs+n].seq1=motifs[i].seq2;
					motifs[nmotifs+n].seq2=motifs[j].seq2;
					motifs[nmotifs+n].pos1=motifs[i].pos2;
					motifs[nmotifs+n].pos2=motifs[j].pos2;
					motifs[nmotifs+n].weight=motifs[i].weight;
					motifs[nmotifs+n].len=len;
printf("%d %d %d %d %d %d f\n",nmotifs+n,motifs[i].seq2,motifs[j].seq2,motifs[i].pos2,motifs[j].pos2,motifs[nmotifs+n].weight);
					n++;
					if(nmotifs>=MAXMOTIFS-1) {
						warning("Superceded max motifs (= %d)\n",MAXMOTIFS);
						return nmotifs+n;
					}
				}
			}
		}
	}
	return nmotifs+n;
}

static sint read_dialign_list(FILE *fd,MOTIF *motifs)
{
	sint n,nmotifs=0;
	sint seq1,seq2,pos1,pos2,len;
	char s1[1024],s2[1024],s3[1024];
	float weight;
	char inline1[1024];
	char *tmp;

	while (fgets(inline1,1024,fd) != NULL) {
		if((tmp=strstr(inline1,"seq:"))!=NULL) {
			tmp+=4;
			if((n=sscanf(tmp,"%d %d %s %d %d %s %d %s %f\n",
				&seq1,&seq2,s1,&pos1,&pos2,s2,&len,s3,&weight))!=9) {
				error("Wrong format in motif list file\n");
				fclose(fd);
				return((sint)0);
			}
			motifs[nmotifs].seq1=seq1;
			motifs[nmotifs].seq2=seq2;
			motifs[nmotifs].pos1=pos1;
			motifs[nmotifs].pos2=pos2;
			motifs[nmotifs].len=len;
			/*if(weight>10 && len>5) {*/
			if(weight>0 && len>5) {
			motifs[nmotifs].weight=10.0*weight*INT_SCALE_FACTOR;
			if(nmotifs>=MAXMOTIFS-1) {
				warning("Superceded max motifs (= %d)\n",MAXMOTIFS);
				return nmotifs;
			}
			nmotifs++;
			}
		}
	} 

	return nmotifs;
}

void calc_motif_scores(SEQ *seqs, sint nseqs, sint *group, MOTIF *motifs, sint nmotifs, sint **motif_score, sint prf_length1, sint prf_length2)
{

	char c;
	sint i,j, m,n1,n2;
	sint seq1, seq2, pos1, pos2, len, weight;
	sint ix1,ix2;
   
	for (i=0; i<prf_length1; i++)
		for (j=0; j<prf_length2; j++)
			motif_score[i][j]=0;

/*
   count number of sequences in each profile 
*/
	n1=n2=0;
	for(i=0;i<nseqs;i++)
		if(group[i]==1) n1++;
		else if(group[i]==2) n2++;

/*
   Check for motifs
*/
	if(nmotifs==0) return;

if(debug>=1) fprintf(stdout,"nmotifs %d\n",nmotifs);
/*     check the list of motifs */
	for(m=0;m<nmotifs;m++) {
		seq1=motifs[m].seq1-1;
		seq2=motifs[m].seq2-1;
		pos1=motifs[m].pos1;
		pos2=motifs[m].pos2;
		len=motifs[m].len;
		if(group[seq1]==1 && group[seq2]==2) {
			ix1=start_pos(seqs[seq1].data,pos1);
			ix2=start_pos(seqs[seq2].data,pos2);
			weight=motifs[m].weight/n1;
if(debug>=1) fprintf(stdout,"1. %d %d %d %d %d %d weight %d\n",seq1,seq2,pos1,pos2,ix1,ix2,weight);
			for(i=0;i<len;i++) {
				if(ix1>=prf_length1) break;
				if(ix2>=prf_length2) break;
				motif_score[ix1][ix2]+=weight;
				ix1=next_pos(seqs[seq1].data,prf_length1,ix1);
				ix2=next_pos(seqs[seq2].data,prf_length2,ix2);
			}
		}
		else if(group[seq2]==1 && group[seq1]==2) {
			ix1=start_pos(seqs[seq1].data,pos1);
			ix2=start_pos(seqs[seq2].data,pos2);
			weight=motifs[m].weight/n2;
if(debug>=1) fprintf(stdout,"2. %d %d %d %d %d %d weight %d\n",seq1,seq2,pos1,pos2,ix1,ix2,weight);
			for(i=0;i<len;i++) {
				if(ix1>=prf_length2) break;
				if(ix2>=prf_length1) break;
				motif_score[ix2][ix1]+=weight;
				ix1=next_pos(seqs[seq1].data,prf_length2,ix1);
				ix2=next_pos(seqs[seq2].data,prf_length1,ix2);
			}
		}
	}
if(debug>=2) {
fprintf(stdout,"\n");
for(i=0;i<prf_length1;i++) {
for(j=0;j<prf_length2;j++)
fprintf(stdout,"%d ",motif_score[i][j]);
fprintf(stdout,"\n");
}
}
}

/* index into the sequence including gaps of the pth residue */
static sint start_pos(char *seq,sint p)
{
	sint i,ix=0;

	for(i=0;seq[i]!='\0';i++) {
		if(isalpha(seq[i])) ix++;	
		if(ix>=p) break;
	}

	return i;
}

static sint next_pos(char *seq,sint len,sint p)
{
	p++;
	if(p>=len) return (len-1);
	while(seq[p]=='.' && p<len) p++;
	return p;
}

void calc_pw_motif_scores(SEQ *seqs,sint s1, sint s2, sint len1, sint len2, MOTIF *motifs, sint nmotifs, sint **motif_score)
{
	char c;
	sint i,j, m;
	sint pos1, pos2, len;
	sint ix1,ix2;
   
	for (i=0; i<len1; i++)
		for (j=0; j<len2; j++)
			motif_score[i][j]=0;

/*
   Check for motifs
*/
	if(nmotifs==0) return;

if(debug>=1) fprintf(stdout,"nmotifs %d\n",nmotifs);
/*     check the list of motifs */
	for(m=0;m<nmotifs;m++) {
		pos1=motifs[m].pos1;
		pos2=motifs[m].pos2;
		len=motifs[m].len;
		if(motifs[m].seq1-1 == s1 && motifs[m].seq2-1 == s2) {
			ix1=start_pos(seqs[s1].data,pos1);
			ix2=start_pos(seqs[s2].data,pos2);
if(debug>=1) fprintf(stdout,"1. %d %d %d %d %d %d weight %d\n",s1,s2,pos1,pos2,ix1,ix2,motifs[m].weight);
			for(i=0;i<len;i++) {
				if(ix1>=len1) break;
				if(ix2>=len2) break;
/*
				if (motifs[m].weight>motif_score[ix1][ix2]) 
*/
					motif_score[ix1][ix2]+=motifs[m].weight;
				ix1=next_pos(seqs[s1].data,len1,ix1);
				ix2=next_pos(seqs[s2].data,len2,ix2);
			}
		}
		else if(motifs[m].seq1-1 == s2 && motifs[m].seq2-1 == s1) {
			ix1=start_pos(seqs[s1].data,pos1);
			ix2=start_pos(seqs[s2].data,pos2);
if(debug>=1) fprintf(stdout,"2. %d %d %d %d %d %d weight %d\n",s1,s2,pos1,pos2,ix1,ix2,motifs[m].weight);
			for(i=0;i<len;i++) {
				if(ix1>=len2) break;
				if(ix2>=len1) break;
/*
				if (motifs[m].weight>motif_score[ix2][ix1]) 
*/
					motif_score[ix2][ix1]+=motifs[m].weight;
				ix1=next_pos(seqs[s1].data,len2,ix1);
				ix2=next_pos(seqs[s2].data,len1,ix2);
			}
		}
	}
if(debug>=2) {
fprintf(stdout,"\n");
for(i=0;i<len1;i++) {
for(j=0;j<len2;j++)
fprintf(stdout,"%d ",motif_score[i][j]);
fprintf(stdout,"\n");
}
}
}

