/* This is an example HY-PHY Batch File.



   It reads in two NEXUS nucleotide dataset data/aspectrin1.nuc

   and data/aspectrin2.nuc and performs

   the relative ratio test on the 4-taxa tree, using F81 model.

   

   Output is printed out as a Newick Style tree with branch lengths

   representing the number of expected substitutions per branch (which

   is the default setting for nucleotide models w/o rate variation).

   Also, the likelihood ratio statistic is evaluated and the P-value

   for the test is reported.

   

   

   Sergei L. Kosakovsky Pond and Spencer V. Muse 

   December 1999. 

*/



/* 1. Read in the data and store the result in  DataSet variables.*/



DataSet 		nucleotideSequence1 = ReadDataFile ("data/aspectrin1.nuc");

DataSet 		nucleotideSequence2 = ReadDataFile ("data/aspectrin2.nuc");

   

/* 2. Filter the data, specifying that all of the data is to be used

	  and that it is to be treated as nucleotides. */

	  

DataSetFilter	filteredData1 = CreateFilter (nucleotideSequence1,1);

DataSetFilter	filteredData2 = CreateFilter (nucleotideSequence2,1);



/* 3. Collect observed nucleotide frequencies from the filtered data. observedFreqs will

	  store the vector of frequencies. */



HarvestFrequencies (observedFreqs1, filteredData1, 1, 1, 1);

HarvestFrequencies (observedFreqs2, filteredData2, 1, 1, 1);



/* 4. Define the F81 substitution matrix. '*' is defined to be -(sum of off-diag row elements) */



F81RateMatrix = 

		{{*,mu,mu,mu}

		 {mu,*,mu,mu}

		 {mu,mu,*,mu}

		 {mu,mu,mu,*}};

		 

/*5.  Define the F81 models, by combining the substitution matrix with the vector of observed 

	  (equilibrium) frequencies. We define one for each block, since the equilibrium 

	  frequencies will differ. */



Model 	F81Block1 = (F81RateMatrix, observedFreqs1);

Model 	F81Block2 = (F81RateMatrix, observedFreqs2);



/*6.  Now we can define 2 4-taxa trees - one for each data block. We use appropriate models for each one.
	  Note the use of TRY_NUMERIC_SEQUENCE_MATCH constant to instruct HyPhy to map sequence numbers to 
	  tree leaves (by default, HyPhy expects sequence names and leaf names to match).*/

TRY_NUMERIC_SEQUENCE_MATCH = 1;	  

UseModel (F81Block1);

Tree	fourTaxaTree1 = ((1,2),3,4);

UseModel (F81Block2);

Tree	fourTaxaTree2 = ((1,2),3,4);



/*7.  Since all the likelihood function ingredients (data, tree, equilibrium frequencies)

	  have been defined we are ready to construct the likelihood function. We

	  combine both datasets into one likelihood function. */

	  

LikelihoodFunction  theLnLik = (filteredData1, fourTaxaTree1, filteredData2, fourTaxaTree2);



/*8.  Maximize the likelihood function, storing parameter values in the matrix paramValues. 

	  We also store the resulting ln-lik and the number of model parameters. */



Optimize (paramValues, theLnLik);

unconstrainedLnLik = paramValues[1][0];

paramCount = paramValues[1][1];



/*9.  Print the tree with optimal branch lengths to the console. */

fprintf  (stdout, "\n0). UNCONSTRAINED MODEL:", theLnLik);



/*10. We now constrain the rate of evolution along each branch to be proportional in both trees. 

	  R will represent the ratio. We use ReplicateConstraint to automatically

	  attach the same constraint to all branches of the tree. */

	  

global R;

ReplicateConstraint ("this1.?.mu:=R*this2.?.mu", fourTaxaTree2, fourTaxaTree1);

Optimize (paramValues, theLnLik);



/*11. Now we compute the ln-lik ratio statistic and the P-Value, using the Chi^2 dist'n 

	  with an appropriate degree of freedom. */

	  

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, paramCount - paramValues[1][1]);



fprintf (stdout, "\n\n1). Relative ratio constraint; the P-value is:", pValue, "\n", theLnLik);

