adding an extra bit of data to come out of CTT (number of chips with actual data)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2091 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-11-19 17:46:10 +00:00
parent 1fcd28bba9
commit b4babb82eb
3 changed files with 34 additions and 21 deletions

View File

@ -55,7 +55,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
} catch (FileNotFoundException e) {
throw new StingException("File "+outputFileString+" could not be opened.", e);
}
output.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Chrom","Pos","Ref","Var","Num_Alleles","Depth","Power","Support","Called");
output.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Chrom","Pos","Ref","Var","Num_Alleles","Num_Chips","Depth","Power","Support","Called");
//System.out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n","Chrom","Pos","Ref","Var","Num_Alleles","Depth","Power","Support","Called");
return output;
}
@ -66,7 +66,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
long pos = loc.getStart();
char refBase = Character.toUpperCase(ref.getBase());
List<Pair<Genotype, Genotype>> chips = getChips(sampleNames, tracker);
Pair<Genotype,Integer> alleleFreqInfo = ctt.getPooledAlleleFrequency(chips,refBase);
Pair<Genotype,Pair<Integer,Integer>> alleleFreqInfo = ctt.getPooledAlleleFrequency(chips,refBase);
char alternate;
if ( alleleFreqInfo.getFirst() != null && alleleFreqInfo.getFirst().isVariant(refBase)) {
//System.out.println(refBase + " " + alleleFreqInfo.getFirst().getBases());
@ -75,7 +75,8 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
} else {
return null; // early return
}
int numVariantAllele = alleleFreqInfo.getSecond();
int numVariantAllele = alleleFreqInfo.getSecond().getFirst();
int numChipsObserved = alleleFreqInfo.getSecond().getSecond();
int depth = context.numReads();
double power = powerWalker.calculatePowerAtFrequency(context,numVariantAllele);
int called;
@ -98,7 +99,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
}
}
return String.format("%s\t%d\t%c\t%c\t%d\t%d\t%f\t%d\t%d",chrom,pos,refBase,alternate,numVariantAllele,depth,power,support,called);
return String.format("%s\t%d\t%c\t%c\t%d\t%d\t%d\t%f\t%d\t%d",chrom,pos,refBase,alternate,numVariantAllele,numChipsObserved,depth,power,support,called);
}

View File

@ -104,7 +104,7 @@ public class ConcordanceTruthTable {
} else { // if we cannot associate tables with individuals, then we are working in a pooled context
// first we need to expand our tables to include frequency information
Pair<Genotype,Integer> poolVariant = getPooledAlleleFrequency(chipEvals, ref);
Pair<Genotype, Pair<Integer,Integer> > poolVariant = getPooledAlleleFrequency(chipEvals, ref);
int truthType = getGenotype(poolVariant.getFirst(),ref); // convenience method; now to interpret
@ -114,7 +114,7 @@ public class ConcordanceTruthTable {
int callType = getCallIndex(eval,ref);
addFrequencyEntry( truthType,callType,poolVariant.getSecond() );
addFrequencyEntry( truthType,callType,poolVariant.getSecond().getFirst() );
}
@ -124,16 +124,18 @@ public class ConcordanceTruthTable {
// TODO -- Indexes like TRUE_POSITIVE are defined above for you
}
public Pair<Genotype,Integer> getPooledAlleleFrequency( List<Pair<Genotype,Genotype>> chips, char ref) {
public Pair< Genotype , Pair<Integer,Integer> > getPooledAlleleFrequency( List<Pair<Genotype,Genotype>> chips, char ref) {
// TODO -- this is actually just a note that I wanted to appear in blue. This method explicitly uses
// TODO --- the assumption that tri-allelic sites do not really exist, and that if they do the
// TODO --- site will be marked as such by an 'N' in the reference, so we will not get to this point.
Genotype variant = null;
int frequency = 0;
int nChips = 0;
if ( chips != null ) {
for ( Pair<Genotype,Genotype> chip : chips ) {
Genotype c = chip.getFirst();
if ( c != null ) {
if ( c != null ) {
nChips ++;
if ( c.isVariant(ref) ) {
if ( c.isHet() ) {
frequency ++;
@ -146,7 +148,7 @@ public class ConcordanceTruthTable {
}
}
return new Pair<Genotype, Integer>(variant, frequency);
return new Pair< Genotype, Pair<Integer,Integer> >(variant, new Pair<Integer,Integer>(frequency,nChips));
}

View File

@ -36,6 +36,7 @@ public class ConcordanceTruthTableTest extends BaseTest {
List<Pair<Genotype,Genotype>> oneHom = new ArrayList<Pair<Genotype,Genotype>>(4);
oneHom.add(new Pair<Genotype,Genotype>(ref1,null));
oneHom.add(new Pair<Genotype,Genotype>(null,null));
oneHom.add(new Pair<Genotype,Genotype>(ref2,null));
oneHom.add(new Pair<Genotype,Genotype>(ref3,null));
oneHom.add(new Pair<Genotype,Genotype>(hom2,null));
@ -44,6 +45,7 @@ public class ConcordanceTruthTableTest extends BaseTest {
oneHet.add(new Pair<Genotype,Genotype>(ref1,null));
oneHet.add(new Pair<Genotype,Genotype>(ref2,null));
oneHet.add(new Pair<Genotype,Genotype>(ref3,null));
oneHet.add(new Pair<Genotype,Genotype>(null,null));
oneHet.add(new Pair<Genotype,Genotype>(het1,null));
List<Pair<Genotype,Genotype>> twoHetOneHom = new ArrayList<Pair<Genotype,Genotype>>(5);
@ -57,6 +59,7 @@ public class ConcordanceTruthTableTest extends BaseTest {
List<Pair<Genotype,Genotype>> twoHetTwoHom = new ArrayList<Pair<Genotype,Genotype>>(7);
twoHetTwoHom.add(new Pair<Genotype,Genotype>(ref1,null));
twoHetTwoHom.add(new Pair<Genotype,Genotype>(ref2,null));
twoHetTwoHom.add(new Pair<Genotype,Genotype>(null,null));
twoHetTwoHom.add(new Pair<Genotype,Genotype>(ref3,null));
twoHetTwoHom.add(new Pair<Genotype,Genotype>(het1,null));
twoHetTwoHom.add(new Pair<Genotype,Genotype>(het2,null));
@ -70,25 +73,32 @@ public class ConcordanceTruthTableTest extends BaseTest {
List<Pair<Genotype,Genotype>> homNoRef = new ArrayList<Pair<Genotype,Genotype>>(1);
homNoRef.add(new Pair<Genotype,Genotype>(hom1,null));
Pair<Genotype,Integer> countShouldBeOne = ctt.getPooledAlleleFrequency(oneHet,'G');
Pair<Genotype,Integer> countShouldBeTwo = ctt.getPooledAlleleFrequency(oneHom,'G');
Pair<Genotype,Integer> countShouldBeFour = ctt.getPooledAlleleFrequency(twoHetOneHom,'G');
Pair<Genotype,Integer> countShouldBeSix = ctt.getPooledAlleleFrequency(twoHetTwoHom,'G');
Pair<Genotype,Integer> countShouldBeThree = ctt.getPooledAlleleFrequency(hetHomNoRef,'G');
Pair<Genotype,Integer> countShouldBeTwoHereToo = ctt.getPooledAlleleFrequency(homNoRef, 'G');
Pair<Genotype,Pair<Integer,Integer>> countShouldBeOne = ctt.getPooledAlleleFrequency(oneHet,'G');
Pair<Genotype,Pair<Integer,Integer>> countShouldBeTwo = ctt.getPooledAlleleFrequency(oneHom,'G');
Pair<Genotype,Pair<Integer,Integer>> countShouldBeFour = ctt.getPooledAlleleFrequency(twoHetOneHom,'G');
Pair<Genotype,Pair<Integer,Integer>> countShouldBeSix = ctt.getPooledAlleleFrequency(twoHetTwoHom,'G');
Pair<Genotype,Pair<Integer,Integer>> countShouldBeThree = ctt.getPooledAlleleFrequency(hetHomNoRef,'G');
Pair<Genotype,Pair<Integer,Integer>> countShouldBeTwoHereToo = ctt.getPooledAlleleFrequency(homNoRef, 'G');
int expecChips = 4+4+6+7+2+1;
int numChips = countShouldBeOne.getSecond().getSecond() + countShouldBeTwo.getSecond().getSecond() +
countShouldBeFour.getSecond().getSecond() + countShouldBeSix.getSecond().getSecond() +
countShouldBeThree.getSecond().getSecond() + countShouldBeTwoHereToo.getSecond().getSecond();
logger.warn("Testing single het");
Assert.assertTrue(countShouldBeOne.getSecond() == 1);
Assert.assertTrue(countShouldBeOne.getSecond().getFirst() == 1);
logger.warn("Testing single hom");
Assert.assertTrue(countShouldBeTwo.getSecond() == 2);
Assert.assertTrue(countShouldBeTwo.getSecond().getFirst() == 2);
logger.warn("Testing two hets + hom");
Assert.assertTrue(countShouldBeFour.getSecond() == 4);
Assert.assertTrue(countShouldBeFour.getSecond().getFirst() == 4);
logger.warn("Testing two hets + two homs");
Assert.assertTrue(countShouldBeSix.getSecond() == 6);
Assert.assertTrue(countShouldBeSix.getSecond().getFirst() == 6);
logger.warn("Testing het + hom without ref");
Assert.assertTrue(countShouldBeThree.getSecond() == 3);
Assert.assertTrue(countShouldBeThree.getSecond().getFirst() == 3);
logger.warn("Testing hom without ref");
Assert.assertTrue(countShouldBeTwoHereToo.getSecond() == 2);
Assert.assertTrue(countShouldBeTwoHereToo.getSecond().getFirst() == 2);
logger.warn("Testing chip count sum");
Assert.assertTrue( expecChips == numChips);
}
}