adding an extra bit of data to come out of CTT (number of chips with actual data)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2091 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-11-19 17:46:10 +00:00
parent 1fcd28bba9
commit b4babb82eb
3 changed files with 34 additions and 21 deletions

View File

@ -55,7 +55,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new StingException("File "+outputFileString+" could not be opened.", e); throw new StingException("File "+outputFileString+" could not be opened.", e);
} }
output.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Chrom","Pos","Ref","Var","Num_Alleles","Depth","Power","Support","Called"); output.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Chrom","Pos","Ref","Var","Num_Alleles","Num_Chips","Depth","Power","Support","Called");
//System.out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Chrom","Pos","Ref","Var","Num_Alleles","Depth","Power","Support","Called"); //System.out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Chrom","Pos","Ref","Var","Num_Alleles","Depth","Power","Support","Called");
return output; return output;
} }
@ -66,7 +66,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
long pos = loc.getStart(); long pos = loc.getStart();
char refBase = Character.toUpperCase(ref.getBase()); char refBase = Character.toUpperCase(ref.getBase());
List<Pair<Genotype, Genotype>> chips = getChips(sampleNames, tracker); List<Pair<Genotype, Genotype>> chips = getChips(sampleNames, tracker);
Pair<Genotype,Integer> alleleFreqInfo = ctt.getPooledAlleleFrequency(chips,refBase); Pair<Genotype,Pair<Integer,Integer>> alleleFreqInfo = ctt.getPooledAlleleFrequency(chips,refBase);
char alternate; char alternate;
if ( alleleFreqInfo.getFirst() != null && alleleFreqInfo.getFirst().isVariant(refBase)) { if ( alleleFreqInfo.getFirst() != null && alleleFreqInfo.getFirst().isVariant(refBase)) {
//System.out.println(refBase + " " + alleleFreqInfo.getFirst().getBases()); //System.out.println(refBase + " " + alleleFreqInfo.getFirst().getBases());
@ -75,7 +75,8 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
} else { } else {
return null; // early return return null; // early return
} }
int numVariantAllele = alleleFreqInfo.getSecond(); int numVariantAllele = alleleFreqInfo.getSecond().getFirst();
int numChipsObserved = alleleFreqInfo.getSecond().getSecond();
int depth = context.numReads(); int depth = context.numReads();
double power = powerWalker.calculatePowerAtFrequency(context,numVariantAllele); double power = powerWalker.calculatePowerAtFrequency(context,numVariantAllele);
int called; int called;
@ -98,7 +99,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
} }
} }
return String.format("%s\t%d\t%c\t%c\t%d\t%d\t%f\t%d\t%d",chrom,pos,refBase,alternate,numVariantAllele,depth,power,support,called); return String.format("%s\t%d\t%c\t%c\t%d\t%d\t%d\t%f\t%d\t%d",chrom,pos,refBase,alternate,numVariantAllele,numChipsObserved,depth,power,support,called);
} }

View File

@ -104,7 +104,7 @@ public class ConcordanceTruthTable {
} else { // if we cannot associate tables with individuals, then we are working in a pooled context } else { // if we cannot associate tables with individuals, then we are working in a pooled context
// first we need to expand our tables to include frequency information // first we need to expand our tables to include frequency information
Pair<Genotype,Integer> poolVariant = getPooledAlleleFrequency(chipEvals, ref); Pair<Genotype, Pair<Integer,Integer> > poolVariant = getPooledAlleleFrequency(chipEvals, ref);
int truthType = getGenotype(poolVariant.getFirst(),ref); // convenience method; now to interpret int truthType = getGenotype(poolVariant.getFirst(),ref); // convenience method; now to interpret
@ -114,7 +114,7 @@ public class ConcordanceTruthTable {
int callType = getCallIndex(eval,ref); int callType = getCallIndex(eval,ref);
addFrequencyEntry( truthType,callType,poolVariant.getSecond() ); addFrequencyEntry( truthType,callType,poolVariant.getSecond().getFirst() );
} }
@ -124,16 +124,18 @@ public class ConcordanceTruthTable {
// TODO -- Indexes like TRUE_POSITIVE are defined above for you // TODO -- Indexes like TRUE_POSITIVE are defined above for you
} }
public Pair<Genotype,Integer> getPooledAlleleFrequency( List<Pair<Genotype,Genotype>> chips, char ref) { public Pair< Genotype , Pair<Integer,Integer> > getPooledAlleleFrequency( List<Pair<Genotype,Genotype>> chips, char ref) {
// TODO -- this is actually just a note that I wanted to appear in blue. This method explicitly uses // TODO -- this is actually just a note that I wanted to appear in blue. This method explicitly uses
// TODO --- the assumption that tri-allelic sites do not really exist, and that if they do the // TODO --- the assumption that tri-allelic sites do not really exist, and that if they do the
// TODO --- site will be marked as such by an 'N' in the reference, so we will not get to this point. // TODO --- site will be marked as such by an 'N' in the reference, so we will not get to this point.
Genotype variant = null; Genotype variant = null;
int frequency = 0; int frequency = 0;
int nChips = 0;
if ( chips != null ) { if ( chips != null ) {
for ( Pair<Genotype,Genotype> chip : chips ) { for ( Pair<Genotype,Genotype> chip : chips ) {
Genotype c = chip.getFirst(); Genotype c = chip.getFirst();
if ( c != null ) { if ( c != null ) {
nChips ++;
if ( c.isVariant(ref) ) { if ( c.isVariant(ref) ) {
if ( c.isHet() ) { if ( c.isHet() ) {
frequency ++; frequency ++;
@ -146,7 +148,7 @@ public class ConcordanceTruthTable {
} }
} }
return new Pair<Genotype, Integer>(variant, frequency); return new Pair< Genotype, Pair<Integer,Integer> >(variant, new Pair<Integer,Integer>(frequency,nChips));
} }

View File

@ -36,6 +36,7 @@ public class ConcordanceTruthTableTest extends BaseTest {
List<Pair<Genotype,Genotype>> oneHom = new ArrayList<Pair<Genotype,Genotype>>(4); List<Pair<Genotype,Genotype>> oneHom = new ArrayList<Pair<Genotype,Genotype>>(4);
oneHom.add(new Pair<Genotype,Genotype>(ref1,null)); oneHom.add(new Pair<Genotype,Genotype>(ref1,null));
oneHom.add(new Pair<Genotype,Genotype>(null,null));
oneHom.add(new Pair<Genotype,Genotype>(ref2,null)); oneHom.add(new Pair<Genotype,Genotype>(ref2,null));
oneHom.add(new Pair<Genotype,Genotype>(ref3,null)); oneHom.add(new Pair<Genotype,Genotype>(ref3,null));
oneHom.add(new Pair<Genotype,Genotype>(hom2,null)); oneHom.add(new Pair<Genotype,Genotype>(hom2,null));
@ -44,6 +45,7 @@ public class ConcordanceTruthTableTest extends BaseTest {
oneHet.add(new Pair<Genotype,Genotype>(ref1,null)); oneHet.add(new Pair<Genotype,Genotype>(ref1,null));
oneHet.add(new Pair<Genotype,Genotype>(ref2,null)); oneHet.add(new Pair<Genotype,Genotype>(ref2,null));
oneHet.add(new Pair<Genotype,Genotype>(ref3,null)); oneHet.add(new Pair<Genotype,Genotype>(ref3,null));
oneHet.add(new Pair<Genotype,Genotype>(null,null));
oneHet.add(new Pair<Genotype,Genotype>(het1,null)); oneHet.add(new Pair<Genotype,Genotype>(het1,null));
List<Pair<Genotype,Genotype>> twoHetOneHom = new ArrayList<Pair<Genotype,Genotype>>(5); List<Pair<Genotype,Genotype>> twoHetOneHom = new ArrayList<Pair<Genotype,Genotype>>(5);
@ -57,6 +59,7 @@ public class ConcordanceTruthTableTest extends BaseTest {
List<Pair<Genotype,Genotype>> twoHetTwoHom = new ArrayList<Pair<Genotype,Genotype>>(7); List<Pair<Genotype,Genotype>> twoHetTwoHom = new ArrayList<Pair<Genotype,Genotype>>(7);
twoHetTwoHom.add(new Pair<Genotype,Genotype>(ref1,null)); twoHetTwoHom.add(new Pair<Genotype,Genotype>(ref1,null));
twoHetTwoHom.add(new Pair<Genotype,Genotype>(ref2,null)); twoHetTwoHom.add(new Pair<Genotype,Genotype>(ref2,null));
twoHetTwoHom.add(new Pair<Genotype,Genotype>(null,null));
twoHetTwoHom.add(new Pair<Genotype,Genotype>(ref3,null)); twoHetTwoHom.add(new Pair<Genotype,Genotype>(ref3,null));
twoHetTwoHom.add(new Pair<Genotype,Genotype>(het1,null)); twoHetTwoHom.add(new Pair<Genotype,Genotype>(het1,null));
twoHetTwoHom.add(new Pair<Genotype,Genotype>(het2,null)); twoHetTwoHom.add(new Pair<Genotype,Genotype>(het2,null));
@ -70,25 +73,32 @@ public class ConcordanceTruthTableTest extends BaseTest {
List<Pair<Genotype,Genotype>> homNoRef = new ArrayList<Pair<Genotype,Genotype>>(1); List<Pair<Genotype,Genotype>> homNoRef = new ArrayList<Pair<Genotype,Genotype>>(1);
homNoRef.add(new Pair<Genotype,Genotype>(hom1,null)); homNoRef.add(new Pair<Genotype,Genotype>(hom1,null));
Pair<Genotype,Integer> countShouldBeOne = ctt.getPooledAlleleFrequency(oneHet,'G'); Pair<Genotype,Pair<Integer,Integer>> countShouldBeOne = ctt.getPooledAlleleFrequency(oneHet,'G');
Pair<Genotype,Integer> countShouldBeTwo = ctt.getPooledAlleleFrequency(oneHom,'G'); Pair<Genotype,Pair<Integer,Integer>> countShouldBeTwo = ctt.getPooledAlleleFrequency(oneHom,'G');
Pair<Genotype,Integer> countShouldBeFour = ctt.getPooledAlleleFrequency(twoHetOneHom,'G'); Pair<Genotype,Pair<Integer,Integer>> countShouldBeFour = ctt.getPooledAlleleFrequency(twoHetOneHom,'G');
Pair<Genotype,Integer> countShouldBeSix = ctt.getPooledAlleleFrequency(twoHetTwoHom,'G'); Pair<Genotype,Pair<Integer,Integer>> countShouldBeSix = ctt.getPooledAlleleFrequency(twoHetTwoHom,'G');
Pair<Genotype,Integer> countShouldBeThree = ctt.getPooledAlleleFrequency(hetHomNoRef,'G'); Pair<Genotype,Pair<Integer,Integer>> countShouldBeThree = ctt.getPooledAlleleFrequency(hetHomNoRef,'G');
Pair<Genotype,Integer> countShouldBeTwoHereToo = ctt.getPooledAlleleFrequency(homNoRef, 'G'); Pair<Genotype,Pair<Integer,Integer>> countShouldBeTwoHereToo = ctt.getPooledAlleleFrequency(homNoRef, 'G');
int expecChips = 4+4+6+7+2+1;
int numChips = countShouldBeOne.getSecond().getSecond() + countShouldBeTwo.getSecond().getSecond() +
countShouldBeFour.getSecond().getSecond() + countShouldBeSix.getSecond().getSecond() +
countShouldBeThree.getSecond().getSecond() + countShouldBeTwoHereToo.getSecond().getSecond();
logger.warn("Testing single het"); logger.warn("Testing single het");
Assert.assertTrue(countShouldBeOne.getSecond() == 1); Assert.assertTrue(countShouldBeOne.getSecond().getFirst() == 1);
logger.warn("Testing single hom"); logger.warn("Testing single hom");
Assert.assertTrue(countShouldBeTwo.getSecond() == 2); Assert.assertTrue(countShouldBeTwo.getSecond().getFirst() == 2);
logger.warn("Testing two hets + hom"); logger.warn("Testing two hets + hom");
Assert.assertTrue(countShouldBeFour.getSecond() == 4); Assert.assertTrue(countShouldBeFour.getSecond().getFirst() == 4);
logger.warn("Testing two hets + two homs"); logger.warn("Testing two hets + two homs");
Assert.assertTrue(countShouldBeSix.getSecond() == 6); Assert.assertTrue(countShouldBeSix.getSecond().getFirst() == 6);
logger.warn("Testing het + hom without ref"); logger.warn("Testing het + hom without ref");
Assert.assertTrue(countShouldBeThree.getSecond() == 3); Assert.assertTrue(countShouldBeThree.getSecond().getFirst() == 3);
logger.warn("Testing hom without ref"); logger.warn("Testing hom without ref");
Assert.assertTrue(countShouldBeTwoHereToo.getSecond() == 2); Assert.assertTrue(countShouldBeTwoHereToo.getSecond().getFirst() == 2);
logger.warn("Testing chip count sum");
Assert.assertTrue( expecChips == numChips);
} }
} }