/*----------------------------------------------------------------------------o
 Copyright 1995-8 Fred Hutchinson Cancer Research Center
 www/src/matrix_logob.c
 Version of matrix_logo.c for use with the getblock function 
 of the blocks WWW server.
	Use:  matrix_logob <blocksdb> <blockac> <suffix>
		blocksdb = name of blocks db, "-" => read stdin
		blockac = block AC number in blocksdb or "-" for all blocks
		suffix = suffix for outfile files
 Transform a block's matrix (PSSM) into the format read by the Tom Schneider's
 makelogo program (file symvec, usually made by program alpro), create and copy 
 all files needed by makelogo program and create a sequence-logo with that
 program. 
 
 The names of the variables for calculating the information are taken from
 Tom Schneider's alpro program.

* set_defaults from config.c by Bill Alford

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

TO DO:

Aug '98 - Change suffix2. This variable has to be unique for each processed
          block. Currently suffix2 is made up of the suffix variable and the 
          block name. This is not robust since we have no control on the block
          name. It may contain special unix characters that can cause problems 
          when file names including suffix2 are passed to the system (problem
          adressed in Aug 98). The block names themselves might not be unique 
          and hence suffix2 won't be unique.
          A possible solution is to make suffix2 a running variable, perhaps,
          the nblock variable.

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


Jan. 1995 Written by Shmuel Pietrokovski
Feb. 1995 Modified by Jorja Henikoff for blocks WWW server (JGH)
2/14/95  Write files named makelogop.arg[1] & symvec.argv[1].
         Read blocks from stdin instead of bdbname.
	 If only Prosite group name in argv[2], takes the first block
	 found for that group (eg BL00094 => BL00094A).
         Don't execute viewer.
         Same character size for all blocks, only rotate those > 45 wide.
2/16/95  Center logo on the page.
2/17/95  If db file name = "-" read stdin. Suffix is arg 3.
3/31/95  Changed to execute makelogob for each block in the input file.
12/3/95  Changed to use load_frequencies() & block_to_matrix() 
         from blimps-3.0.0
May 20 96 Modified procedure write_makelogop to position the logos in the top 
          of the page and the plcae the logo cation closer to the logo. This
          will make it easier to join logos together. SP

 7/24/96 Modified for Blimps 3.1; matrix->weights are now type double
 3/24/97 Modified name of logo.* file to be logo.suffixAC 
 3/26/97 Detect whether input is BLOCK or MATRIX
 7/30/98 version 1.3 Procedure write_symvec:
         Small sample correction (variable e) and variance (variable avarhnb)
         are calculted separately for each column using the actual number of
         specified residues: gaps, unspecified residues, termination codons etc.
         are not counted, only the 20 aa, B (Asp or Asn) and Z (Glu or Gln).
         A number of print commands for debug mode added, use debug value (4th
         argument) > 2 to see each block column, actual number of residues and
         small sample correction calculations of each column. 

         When the input is of a matrix type warning messages are generated by 
         the program when it first tries to read the input as a block.

 8/17/98 version 1.35 fixing problem with use of suffix2. 
         Single quoted ('...') to strings with the suffix2 variable that are
         passed to the system. Suffix2 is made up of the suffix parameter
         and the block name. The block name can potentially include 
         unix-system special characters (&, $ etc.) which with out the quotes
         would be interpted as unix directives. SP.

11/27/98 Don't rotate wide blocks; reduce size so 55 wide fits on 8.5" page
12/ 4/98 Check for "; MATRIX" instead of just "MATRIX" on ID line (see
	 PR00138, PR00234) to determine input type.
         
-----------------------------------------------------------------------------*/

#define EXTERN
#define SEEK_SET 0 	/* for fseek() */
#define MAXNAME 80	/* Maximum file name length */
#define OK    0
#define YES  1
#define NO   0
#define ERROR 1

#define AAs 20
#define infofield 8        /* size of field for printing information in bits */
#define infodecim 5     /* number of decimal places for printing information */
#define nfield 4        /* size of field for printing n, the number of sites */

#define SEARCH_TYPE_UNSET  -1

#include <sys/types.h>
#include <sys/stat.h>
#include <blocksprogs.h>

/* variables set by the configuration file in blimps program */

int StrandsToSearch;
int NumberToReport;
int SearchType;
int GeneticCodeInitializer;
int SiteSpecificScoringMatrixType;
int BlockToMatrixConversionMethod; /* default method is two */
int SequenceMatrixScoringMethod;   /* default method is zero */

extern
Matrix *block_to_matrix() ;


/*
 * Local variables and data structures
 */

int    getargs() ;
void   set_defaults() ;
void   write_symvec();
void   write_makelogop();
void   view_logo();
int    dbg_lvl ;
int    isblock ;

/*=======================================================================*/
/*
 * main 
 *   controls flow of program
 *   Parameters: argc, argv
 *   Error codes:
 */

int main(argc, argv)
     int argc;
     char *argv[];
{
   FILE       *bfp, *out ;
   Block      *block ;
   Matrix     *matrix ;
   int	      itemp, nblock, done, all;
   long	      bfp_pos;
   char       bdbname[MAXNAME], suffix[MAXNAME], outname[MAXNAME] ;
   char       blockAC[MAXNAME], blockID[MAXNAME], version[32], line[160] ;
   char       logodir[MAXNAME], suffix2[MAXNAME], blockAC2[MAXNAME];
   char       symvec[MAXNAME], makelogop[MAXNAME], colors[MAXNAME];

   struct stat status ;

   strcpy(version,"1.35") ;					/*JGH*/
   strcpy(logodir, "./") ;					/*JGH*/

   dbg_lvl = 0 ;		/* debugging level */

                                    /* getting input and output file names */
   if (getargs(argc,argv,bdbname,suffix,&bfp,blockAC) != OK) exit(ERROR) ; 
   strcpy(suffix2, suffix);   /* may be different if more than one block */

   set_defaults();                      /*set the blimps default values */
                 /* load the frequencies for converting blocks to matrices */
   load_frequencies("default.amino.frq");

   /* create and check the presence of files needed by program makelogo */
   /* If file colors.suffix doesn't exist copy file colors to that name SP */

   sprintf(colors, "colors.%s", suffix);
   if (stat(colors, &status) < 0) 
      {
         sprintf(line, "cp %s/colors %s", logodir, colors) ; 
         system(line) ;

      }

                         /* create empty wave and marks files 
                            if they do not exist in the present directory */
   if (stat("wave", &status) < 0) system("touch wave") ;
   if (stat("marks", &status) < 0) system("touch marks") ;



   /* If partial name is given, take all blocks with partial name    JGH*/
   /* If the name is "-" process all the blocks                         */
   nblock = 0;
   done = all = NO;
   if ( strcmp(blockAC, "-") == 0 ) { all = YES; blockAC[0] = '\0'; }
   strcpy(blockAC2, blockAC);
   bfp_pos = ftell(bfp);
   while ((block = read_a_block(bfp)) != NULL && (all || !done))
   {
      if ( all ||
           (strcmp(block->number,blockAC) == 0) ||
            ( strlen(blockAC) < strlen(block->number) &&
              strncmp(blockAC, block->number, (int) strlen(blockAC)) == 0) )
      {
         nblock++;
         strcpy(suffix2, suffix);
         itemp = (int) strlen(block->number) - strlen(blockAC);
         if ( itemp > 0 )
         {
             strcpy(blockAC2, block->number);
             strncat(suffix2, block->number + strlen(blockAC), itemp);
             suffix2[ (int) strlen(suffix) + itemp ] = '\0';

/* Single-quote the suffix strings to avoid problems from the possible
   occurrence of unix-system special characters (&, $ etc.) in them 
   SP */
             sprintf(line, "cp \'colors.%s\' \'colors.%s\'", suffix, suffix2);
             system(line);
         }
         else if (!all) done = YES;
		 /* just get the ID, not the BLOCK key word*/
         sscanf(block->id, "%s %*s", blockID) ;

         /*   Have we been looking at a matrix?   */
         if (strstr(block->id, "; MATRIX"))
         {
         isblock = 0 ; /* input data is NOT a block */
printf("MATRIX input\n");
            fseek(bfp, bfp_pos, SEEK_SET);	/* rewind input */
            matrix = read_a_matrix(bfp);
         }
         else
         {
         isblock = 1 ; /* input data is a block */
            /*   Convert a block to a matrix  */
            /*  Use method #2; values b/w 0 and 100 that add to 100 in each
                column of the alignment */
            matrix = block_to_matrix(block, 2);
         }

         /*   Open symvec output file   */
         strcpy(symvec, "symvec");
         if (strlen(suffix2))
         {  strcat(symvec, "."); strcat(symvec, suffix2); }
         if ( (out=fopen(symvec, "w")) == NULL)
         {
            printf("\nCannot open file %s\n", symvec);
            exit(ERROR);
         }

         fprintf(out, "* %s %s\n", argv[0], version) ;
         fprintf(out, "* PSSM of block %s (%s) from file %s.\n",
	      blockAC2, blockID, bdbname) ;	
         write_symvec(out, matrix);
         fclose(out) ;                                 /* close file symvec */


                  /* create file makelogop - the makelogo parameter file */
         strcpy(makelogop, "makelogop");
         if (strlen(suffix2))
         {   strcat(makelogop, "."); strcat(makelogop, suffix2);  }
         if ( (out=fopen(makelogop, "w")) == NULL) 
         {
            printf("\nCannot open file %s\n", makelogop) ;
            exit(ERROR) ;
         }

         write_makelogop(out, matrix);
         fprintf(out, "PSSM of %s (%s) %d sequences.\n", 
	      blockAC2, blockID, matrix->num_sequences) ;   
         fclose(out) ;                             /* close file makelogop */


                /* run program makelogo to generate file logo 
                   containing the postscript represenation of the matrix */
/* Single-quote the suffix2 string to avoid problems from the possible
   occurrence of unix-system special characters (&, $ etc.) in that string 
   SP */
         sprintf(line, "%s/makelogob \'%s\' > /dev/null", logodir, suffix2) ; 
         system(line) ;
         sprintf(outname, "logo.%s", suffix2);
/*
         view_logo(outname); 
*/
         free_block(block); free_matrix(matrix);

         /*   Remove all the intermediate files   */
/* Single-quote the suffix2 string to avoid problems from the possible
   occurrence of unix-system special characters (&, $ etc.) in that string 
   SP */
/* Disable intermediate file removal on WWW server version of program.
   Shell running the program removes these files.

         sprintf(line, "rm \'colors.%s\' \'symvec.%s\' \'makelogop.%s\'",
                        suffix2, suffix2, suffix2);

         system(line);
*/
      }   /* end of if it's a block we want to process */
      bfp_pos = ftell(bfp);
   }  /* end of input file */

   fclose(bfp) ;                             /* close blocks database file */

   if (nblock == 0)                  /* No block with name blockAC found */
      {
      printf("No block with name %s found in file %s !\n", blockAC, bdbname) ; 
      exit(ERROR) ;
      }

   exit(0);

}  /* end of main */

/****************************************************************************
 * get input file names and other program parameters interactively
 * or from command line. 
 ****************************************************************************/
int getargs(argc,argv,bdbname,suffix,bfp,blockAC)

int   argc;
char  *argv[];
char  bdbname[MAXNAME], suffix[MAXNAME], blockAC[];
FILE  **bfp ;
{

   if (argc < 4)
   {
      printf("MATRIX_LOGOB: Copyright 1995-8 Fred Hutchinson Cancer ");
      printf("Research Center\n");
      printf("USAGE: matrix_logob blocks|pssms AC|- suffix\n");
      printf("   pssms must be in blimps format and consist of values ");
      printf("betwen 0 and 100\n    which sum to 100 in each column.\n");
      printf("These files must be in the current directory:\n");
      printf("   makelogob, colors, default.amino.frq\n");
   }

/* ------------1st arg = block file 1 ------------------------------------*/
   if (argc > 1) strcpy(bdbname, argv[1]);
   else                               
      {
      printf("\nEnter name of BLOCK or MATRIX file: ");
      gets(bdbname);
      }

   /*   Read stdin if the filename starts with '-'   */
   if (!strlen(bdbname) || bdbname[0] == '-')  *bfp = stdin;
   else if ( (*bfp=fopen(bdbname, "r")) == NULL)
      {
      printf("\nCannot open file \"%s\"\n", bdbname);
      return(ERROR);
      }

/* ------------block accession --------------------------------------------*/
   if (argc > 2) strcpy(blockAC, argv[2]);
   else                                   /* get input file(s) interactively */
      {
      printf("\nEnter AC in %s or - to make logos for all: ", bdbname);
      gets(blockAC);
      }

/* ------------3rd arg = file name suffix ------------------------------JGH*/
   suffix[0] = '\0';
   if (argc > 3) strcpy(suffix, argv[3]);
   else
      {
      printf("\nEnter file name suffix: ");
      gets(suffix);
      printf("Logos will be written to logo.%s\n", suffix);
      }

/* ------------last arg = debug level (unprompted) ------------------------*/

   if (argc > 4) dbg_lvl = atoi(argv[argc-1]) ;			/*JGH*/

   return(OK) ;

}  /* end of getargs */


/****************************************************************************
 * set the default values for some of the variables.
 ****************************************************************************/

void set_defaults()
{

  GeneticCodeInitializer = 0;           /* the standard genetic code */

  StrandsToSearch = 2;          /* == 2 if want to search both strands */
  NumberToReport  = 0;          /* <0 means all, 0 means judge, */
                                /* >0 means use that number */
  SearchType      = SEARCH_TYPE_UNSET;

  BlockToMatrixConversionMethod = 2; /* Patmat method is two */
  SequenceMatrixScoringMethod   = 0; /* default method is zero */

  ErrorLevelReport = WARNING_ERR_LVL;

}

/*=======================================================================*/
void write_symvec(out, matrix)
FILE *out;
Matrix *matrix;
{
   double ln2, e, r, hmax, ntrue, avarhnb, dtemp;
   int aa, pos, seq, num_residues;

   fprintf(out, "* position, samples, information, variance\n");
   fprintf(out, "%d number of symbols\n", AAs);

  
   ln2 = log(2.0) ;
   hmax = log((double) AAs) ;

/* print debug output headers */
   if (dbg_lvl > 1) 
      {
      printf("col") ;
      for(aa=1; aa < AAs+1; aa++) printf("   %c", aa_btoa[aa]) ;
      printf("   info.  sum") ;
      if (dbg_lvl > 2) 
         printf("     residues   #_of_specified_residues/#_of_seqs   e   var") ;
      printf("\n") ;
      }

   for(pos=0; pos < matrix->width; pos++)
      {
/* if input is a block, count actual number of residues in each column. 
   Including the 20 aa ('residues' matrix values 1-20) 
   and B (Asp or Asn, value 21) and Z (Glu or Gln, value 22) 
   Excluding gaps (-, value 0) unidentified aa (X, value 23) and 
   stop codon (*, value 24). */

      num_residues = matrix->num_sequences ;

      if (isblock)
         for(seq=0; seq < matrix->num_sequences; seq++) 
            if (matrix->block->residues[seq][pos] < 1 ||
                matrix->block->residues[seq][pos] > 22) num_residues-- ; 

                                /* correction factor for small sample size */
      e = (AAs-1) / ((double) 2 * ln2 * num_residues) ; 

      avarhnb = e * e ;

      r = hmax ;
      for(aa=1, ntrue=0.; aa < AAs+1; aa++) 
	ntrue += (double) round( matrix->weights[aa][pos] ) ;

      for(aa=1; aa < AAs+1; aa++) 
                    /* start loop at 1 and end at AAs+1 because aa values 
                      at the weight matrix start at 1, 0 is the gap value */
      	 {
         if (matrix->weights[aa][pos] > 0)
            {
            dtemp = (double) matrix->weights[aa][pos] / ntrue;
            r += dtemp * log(dtemp);
            }
	 }

      r /= ln2 ;                                       /* convert to bits */
      r -= e ;                     /* a correction for small sample sizes */

      fprintf(out, "%*ld %*.0lf %*.*f % .1E\n",
              nfield, pos+1, infofield, ntrue, infofield, infodecim,
	      r, avarhnb);

      for(aa=1; aa < AAs+1; aa++) 
         fprintf(out, "%c %4d\n", aa_btoa[aa],
                 round(matrix->weights[aa][pos]) ) ;

      if (dbg_lvl > 1) 
/* print PSSM values, their info value and the values sum for this column */
	 {
         printf("%2d|", pos+1) ;
         for(aa=1; aa < AAs+1; aa++) 
            printf(" %3d", round(matrix->weights[aa][pos])) ;
         printf("  %6.3f  %3.0f", r, ntrue) ;

         if (dbg_lvl > 2) 
/* print block column, number of specified residues and sample correction values */
	    {
            printf("     ") ;
            for(seq=0; seq < matrix->num_sequences; seq++) 
               printf("%c", aa_btoa[matrix->block->residues[seq][pos]]); 
            printf("  %2d/%d %6.3f %6.3f", 
                   num_residues, matrix->num_sequences, e, avarhnb) ;
            }
         printf("\n") ;
         }

      }

   if (dbg_lvl > 1) 
      printf("\nsmall sample correction (e) : %6.3f, variance (avarhnb) : %6.3f\n(these are are actually the values for the last column and will be different for the other columns if they contain a different number of unspecified residues - gaps, Xs, etc.)\n",
	     e, avarhnb) ;
}   /*  end of write_symvec */

/*=================================================*/
void write_makelogop(out, matrix)
FILE *out;
Matrix *matrix;
{
   double legendsize;

                /* FROM to TO range to of the logo showing all the block */
   fprintf(out, "1 %d\n", matrix->width) ; 

   fprintf(out, "1\n") ;   /* put the vertical bar before first position */

   /* lower left hand corner of the logo (in cm), landscape coordinates */ 
   /*  Rotate wide blocks     JGH*/
/*
   if ( matrix->width > 44) 
   {
      fprintf(out, "12 26\n") ;

      fprintf(out, "-90\n");
   }
   else
*/
   {
      /* Change the corner depending on block width to center it JGH*/
      /* Width = 44 => x = 1, width = 0 => x = 11 (page about 21cm wide) */
      /*xcorner = (double) 11.0 - 10.0 * matrix->width / 44.0; SP */

      fprintf(out, "1 20\n"); /* corner *//*modified from '"%.2f 10\n", xcorner' SP */
      fprintf(out, "0\n") ;             /* rotation: angle to rotate the graph */
   }
      {
      fprintf(out, "0.365\n") ; /* width of the logo characters, in cm */
      fprintf(out, "5.48 0.1\n") ; /* height and width of the vertical bar*/
      legendsize = 1.0 ;
      }
/*
   else
      {
      fprintf(out, "0.454\n") ;   
      fprintf(out, "6.82 0.1\n") ;
      legendsize = 1.0 ;
      }
*/

                 /* height of the vertical bar, in bits; < 0: no I-beams */
   fprintf(out, "-4\n") ;

   fprintf(out, "1\n") ;     /* Ibeamfraction: fraction of Ibeam to draw */

               /* if begins with 'b' put bars before and after each line */
   fprintf(out, "bars on\n") ;

           /* if begins with 's' show a dashed box around each character */
   fprintf(out, "-\n") ;

                 /* if begins with 'o' draw each character as an outline */
   fprintf(out, "no outline\n") ;

                    /* if begins with 'c' sequence letters in upper case */
   fprintf(out, "capitals\n") ;

   fprintf(out, "55\n") ; /* maximal number of character stacks per line */

   fprintf(out, "1\n") ;              /* number of lines per page output */

   fprintf(out, "1.5\n") ; /* lines separation relative to the barheight */

   fprintf(out, "numbers\n") ;  /* if begins with 'n' then number stacks */

   fprintf(out, "1.0\n") ;              /* shrinking factor inside boxes */

   fprintf(out, "1\n") ;     /* number of user defined strings following */

                    /* coordinates of string 1 (in cm) and relative size */
                                   /* modified from "0.0 -3.0 %.2f\n" SP */
   fprintf(out, "0.0 -1.5 %.2f\n", legendsize) ; 
}  /* end of write_makelogop */
/*=====================================================================*/
void view_logo(outname)
char *outname;
{
   char       PSviewer[MAXNAME], PSviewer_opt[MAXNAME] ;
   char       PSprint[MAXNAME], ans[2] , line[80] ;

   strcpy(PSviewer, "pageview") ;
   strcpy(PSviewer_opt, "-left") ;       
   strcpy(PSprint, "lpr") ;
   ans[0] = 'V' ;

   while (toupper(ans[0]) == 'V')
      {
      printf("\nThe created sequence-logo file (%s) is a PostScript file.\n", 
	     outname) ;
      printf("  (V to view logo with program %s (%s %s %s))\n",
	     PSviewer, PSviewer, PSviewer_opt, outname) ;
      printf("     after viewing you can also choose to print,\n") ;
      printf("   P to print logo (%s %s).)\n", PSprint, outname) ;

      gets(ans) ;

      switch (toupper(ans[0]))
	 {
         case 'V' :
            sprintf(line, "%s %s %s", PSviewer, PSviewer_opt, outname) ; 
            system(line) ;
            break ;
         case 'P' :
            sprintf(line, "%s %s", PSprint, outname) ; 
            system(line) ;
            printf("\nFile %s sent to printer.\n", outname) ;
            break ;
	 }
      }

}   /* end of view_logo  */
