Minor changes to oneoff walkers. PlinkRod altered but still commented.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2808 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-02-08 18:49:56 +00:00
parent 21369869b7
commit 935e76daa1
5 changed files with 1010 additions and 937 deletions

File diff suppressed because it is too large Load Diff

View File

@ -6,6 +6,8 @@ import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import net.sf.samtools.SAMRecord;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Random;
/**
@ -17,7 +19,7 @@ import java.util.Random;
*
* @author Kiran Garimella
*/
public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
public class ReadErrorRateWalker extends ReadWalker<boolean[], ReadErrorRateCollection> {
@Argument(fullName="printVisualHits", shortName="v", doc="print visual hits", required=false) public boolean printVisualHits = false;
@Argument(fullName="useNextBestBase", shortName="nb", doc="use next best base", required=false) public boolean useNextBestBase = false;
@Argument(fullName="useNonNextBestBase",shortName="nnb",doc="use nonnext best base",required=false) public boolean useNonNextBestBase = false;
@ -134,38 +136,80 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
*
* @return null
*/
public int[] reduceInit() {
return null;
public ReadErrorRateCollection reduceInit() {
return new ReadErrorRateCollection();
}
/**
* Summarize the error rate data.
*
* @param value the read mismatch array
* @param sum the summed mismatch array
* @param collection the summed mismatch array
* @return the summed mismatch array with the new read mismatch array added
*/
public int[] reduce(boolean[] value, int[] sum) {
if (sum == null) {
sum = new int[value.length];
}
public ReadErrorRateCollection reduce(boolean[] value, ReadErrorRateCollection collection) {
for (int cycle = 0; cycle < value.length; cycle++) {
sum[cycle] += (value[cycle] ? 1 : 0);
}
collection.update(value);
return sum;
return collection;
}
/**
* We've processed all the reads. Emit the final, normalized error rate data.
*
* @param sum the summed mismatch array
* @param collection the summed mismatch arrays
*/
public void onTraversalDone(int[] sum) {
for (int cycle = 0; cycle < sum.length - 1; cycle++) {
double errorrate = ((double) sum[cycle])/((double) sum[sum.length - 1]);
out.println("[ERROR_RATE] " + cycle + " " + errorrate);
}
public void onTraversalDone(ReadErrorRateCollection collection) {
out.print(collection.toString());
}
}
class ReadErrorRateCollection {
private HashMap<Integer,int[]> readsByReadLength;
public ReadErrorRateCollection() {
readsByReadLength = new HashMap<Integer, int[]>();
}
public void update(boolean[] mismatchArray) {
if ( ! readsByReadLength.containsKey(mismatchArray.length) ) {
readsByReadLength.put(mismatchArray.length, zeroArray(mismatchArray.length));
}
updateErrorCounts(readsByReadLength.get(mismatchArray.length), mismatchArray);
}
public String toString() {
StringBuilder builder = new StringBuilder();
for ( int length : readsByReadLength.keySet() ) {
for ( int cycle = 0; cycle < length-1; cycle++) {
int[] counts = readsByReadLength.get(length);
builder.append(length);
builder.append("\t");
builder.append(cycle);
builder.append("\t");
builder.append( ( ( double ) counts[cycle] / ( (double) counts[length-1])));
builder.append("\n");
}
}
return builder.toString();
}
private static int[] zeroArray( int length ) {
int[] array = new int[length];
for ( int ii = 0; ii < length; ii ++ ) {
array[ii] = 0;
}
return array;
}
public static void updateErrorCounts(int[] sum, boolean[] value) {
for (int cycle = 0; cycle < value.length; cycle++) {
sum[cycle] += (value[cycle] ? 1 : 0);
}
}
}

View File

@ -22,11 +22,6 @@ class MultiSampleConcordanceSet {
private long truthSitesFilteredOut;
private int genotypeQuality;
private int truePositiveLoci;
private int trueNegativeLoci;
private int falsePositiveLoci;
private int falseNegativeLoci;
public MultiSampleConcordanceSet(int minDepth, boolean assumeRef, int genotypeQuality) {
concordanceSet = new HashSet<VCFConcordanceCalculator>();
truthOnlySites = 0l;

View File

@ -58,35 +58,58 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
ReferenceOrderedDatum truthData = tracker.lookup("truth", null);
ReferenceOrderedDatum variantData = tracker.lookup("variants",null);
LocusConcordanceInfo concordance;
if ( truthData == null && variantData == null) {
concordance = null;
} else if ( truthData == null ) {
// not in the truth set
if ( ( (RodVCF) variantData ).isFiltered() ) {
concordance = null;
} else {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref);
}
} else if ( variantData == null ) {
// not in the variant set
if ( ( (RodVCF) truthData).isFiltered() ) {
concordance = null;
} else {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,( (RodVCF) truthData).getRecord(),null,ref);
}
} else {
// in both
// check for filtering
boolean truth_filter = ((RodVCF) truthData).isFiltered();
boolean call_filter = ((RodVCF) variantData).isFiltered();
if ( truth_filter && call_filter ) {
concordance = null;
} else if ( truth_filter ) {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref);
} else if ( call_filter ) {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,( (RodVCF) truthData).getRecord(), null ,ref);
} else {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,( (RodVCF) truthData).getRecord(),( (RodVCF) variantData).getRecord(),ref);
}
}

View File

@ -1,243 +1,247 @@
//package org.broadinstitute.sting.gatk.refdata;
//
//import org.broadinstitute.sting.BaseTest;
//import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
//import org.broadinstitute.sting.utils.StingException;
//import org.broadinstitute.sting.utils.GenomeLocParser;
//import org.broadinstitute.sting.utils.GenomeLoc;
//import org.junit.BeforeClass;
//import org.junit.Test;
//import org.junit.Assert;
//
//import java.io.File;
//import java.io.FileNotFoundException;
//import java.io.BufferedReader;
//import java.io.FileReader;
//import java.util.*;
//
///**
// * Created by IntelliJ IDEA.
// * User: Ghost
// * Date: Jan 22, 2010
// * Time: 11:27:33 PM
// * To change this template use File | Settings | File Templates.
// */
//public class PlinkRodTest extends BaseTest {
// private static IndexedFastaSequenceFile seq;
//
// @BeforeClass
// public static void beforeTests() {
// try {
// seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
// } catch (FileNotFoundException e) {
// throw new StingException("unable to load the sequence dictionary");
// }
// GenomeLocParser.setupRefContigOrdering(seq);
//
// }
//
// public BufferedReader openFile(String filename) {
// try {
// return new BufferedReader(new FileReader(filename));
// } catch (FileNotFoundException e) {
// throw new StingException("Couldn't open file " + filename);
// }
//
// }
//
// @Test
// public void testStandardPedFile() {
// PlinkRod rod = new PlinkRod("test");
// try {
// rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
// } catch ( FileNotFoundException e ) {
// throw new StingException("test file for testStandardPedFile() does not exist",e);
// }
//
// // test that the sample names are correct
//
// List<String> rodNames = rod.getVariantSampleNames();
// List<String> expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
//
// Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
//
// boolean namesCorrect = true;
// for ( int i = 0; i < expectedNames.size(); i++ ) {
// namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
// }
//
// Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
//
// // test that rod can be iterated over
//
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
// ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
// do {
// genotypesInRod.add(rod.getGenotypes());
// sampleNamesInRod.add(rod.getVariantSampleNames());
// lociInRod.add(rod.getLocation());
// } while ( rod.parseLine(null,null) );
//
// Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
//
// ArrayList<Genotype> snp1 = genotypesInRod.get(0);
// ArrayList<Genotype> snp3 = genotypesInRod.get(2);
//
// Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
// Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
// Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
//
//
// List<Allele> snp1_individual3_alleles = snp1.get(2).getAlleles();
// List<Allele> snp3_individual2_alleles = snp3.get(1).getAlleles();
//
// String alleleStr1 = snp1_individual3_alleles.get(0).getBases()+snp1_individual3_alleles.get(1).getBases();
// String alleleStr2 = snp3_individual2_alleles.get(0).getBases()+snp3_individual2_alleles.get(1).getBases();
//
// Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
// Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
//
// boolean name2isSame = true;
//
// for ( ArrayList<String> names : sampleNamesInRod ) {
// name2isSame = name2isSame && names.get(1).equals("NAMELY");
// }
//
// Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
//
// // test that the loci are correctly parsed and in order
//
// List<String> expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
// boolean lociCorrect = true;
// for ( int i = 0; i < 3; i ++ ) {
// lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
// }
// }
//
// @Test
// public void testStandardPedFileWithIndels() {
// PlinkRod rod = new PlinkRod("test");
// try {
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
// } catch ( FileNotFoundException e) {
// throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
// }
//
// // Iterate through the rod
//
// List<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
// ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
// ArrayList<Boolean> snpSites = new ArrayList<Boolean>();
// do {
// genotypesInRod.add(rod.getGenotypes());
// sampleNamesInRod.add(rod.getVariantSampleNames());
// lociInRod.add(rod.getLocation());
// snpSites.add(rod.variantIsSNP());
// } while ( rod.parseLine(null,null) );
//
// boolean snpOrder = true;
// List<Boolean> expectedOrder = Arrays.asList(true,false,true,false);
// for ( int i = 0; i < 4; i ++ ) {
// snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
// }
//
// Assert.assertTrue("That the variant type order is as expected", snpOrder);
// Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() );
// Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
// Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
// Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
// Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
// Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
// Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
// }
//
// @Test
// public void testBinaryPedFileNoIndels() {
// PlinkRod rod = new PlinkRod("binaryTest1");
// try {
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
// } catch (FileNotFoundException e) {
// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
// }
//
// // iterate through the ROD and get stuff
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
// ArrayList<ArrayList<String>> samplesInRod = new ArrayList<ArrayList<String>>();
//
// do {
// lociInRod.add(rod.getLocation());
// genotypesInRod.add(rod.getGenotypes());
// samplesInRod.add(rod.getVariantSampleNames());
// } while ( rod.parseLine(null,null) );
//
// List<String> expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
//
// for ( int i = 0; i < expecLoc.size(); i ++ ) {
// Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
// }
//
// List<String> expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
// "GG","GG","AG","TT","CC","CT","TG","GG","GG");
// List<Boolean> expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
// false,false,false,true,false,false,true,true,false,false);
// List<String> expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
// "NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
// "NA12878","NA12890","NA07000");
// int snpNo = 1;
// int indiv = 1;
// int alleleOffset = 0;
// for ( ArrayList<Genotype> snp : genotypesInRod ) {
// for ( Genotype gen : snp ) {
// String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
// Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
// "(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
// Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
// Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
// indiv++;
// alleleOffset++;
// }
// indiv = 1;
// snpNo++;
// }
// }
//
// @Test
// public void testIndelBinary() {
// PlinkRod rod = new PlinkRod("binaryTest2");
// try {
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed"));
// } catch (FileNotFoundException e) {
// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
// }
//
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
// do {
// genotypesInRod.add(rod.getGenotypes());
// } while ( rod.parseLine(null,null) );
//
// List<String> expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT",
// "TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG");
// List<Boolean> expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true,
// false,false,false,false,false,false,true,false,true,false,false,false);
// List<Boolean> expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false,
// false,false,false,false,false,false,false,false,false,false,false,false);
//
// int al = 0;
// for ( ArrayList<Genotype> snp : genotypesInRod ) {
// for ( Genotype gen : snp ) {
// String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
// Allele firstAl = gen.getAlleles().get(0);
// Allele secondAl = gen.getAlleles().get(1);
// boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION );
// boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION );
// Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr);
// Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion);
// Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion);
// al++;
// }
// }
// }
//}
package org.broadinstitute.sting.gatk.refdata;
/*
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele;
import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.Assert;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Jan 22, 2010
* Time: 11:27:33 PM
* To change this template use File | Settings | File Templates.
*
public class PlinkRodTest extends BaseTest {
// todo :: get the isIndel() isInsertion() and isDeletion() tests working again -- this may require new
// todo :: methods in the objects themselves
private static IndexedFastaSequenceFile seq;
@BeforeClass
public static void beforeTests() {
try {
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
GenomeLocParser.setupRefContigOrdering(seq);
}
public BufferedReader openFile(String filename) {
try {
return new BufferedReader(new FileReader(filename));
} catch (FileNotFoundException e) {
throw new StingException("Couldn't open file " + filename);
}
}
@Test
public void testStandardPedFile() {
PlinkRod rod = new PlinkRod("test");
try {
rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
} catch ( FileNotFoundException e ) {
throw new StingException("test file for testStandardPedFile() does not exist",e);
}
// test that the sample names are correct
List<String> rodNames = rod.getVariantSampleNames();
List<String> expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
boolean namesCorrect = true;
for ( int i = 0; i < expectedNames.size(); i++ ) {
namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
}
Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
// test that rod can be iterated over
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
do {
genotypesInRod.add(rod.getGenotypes());
sampleNamesInRod.add(rod.getVariantSampleNames());
lociInRod.add(rod.getLocation());
} while ( rod.parseLine(null,null) );
Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
ArrayList<Genotype> snp1 = genotypesInRod.get(0);
ArrayList<Genotype> snp3 = genotypesInRod.get(2);
Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
List<Allele> snp1_individual3_alleles = snp1.get(2).getAlleles();
List<Allele> snp3_individual2_alleles = snp3.get(1).getAlleles();
String alleleStr1 = new String(snp1_individual3_alleles.get(0).getBases())+new String(snp1_individual3_alleles.get(1).getBases());
String alleleStr2 = new String(snp3_individual2_alleles.get(0).getBases())+new String(snp3_individual2_alleles.get(1).getBases());
Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
boolean name2isSame = true;
for ( ArrayList<String> names : sampleNamesInRod ) {
name2isSame = name2isSame && names.get(1).equals("NAMELY");
}
Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
// test that the loci are correctly parsed and in order
List<String> expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
boolean lociCorrect = true;
for ( int i = 0; i < 3; i ++ ) {
lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
}
}
@Test
public void testStandardPedFileWithIndels() {
PlinkRod rod = new PlinkRod("test");
try {
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
} catch ( FileNotFoundException e) {
throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
}
// Iterate through the rod
List<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
ArrayList<Boolean> snpSites = new ArrayList<Boolean>();
do {
genotypesInRod.add(rod.getGenotypes());
sampleNamesInRod.add(rod.getVariantSampleNames());
lociInRod.add(rod.getLocation());
snpSites.add(rod.variantIsSNP());
} while ( rod.parseLine(null,null) );
boolean snpOrder = true;
List<Boolean> expectedOrder = Arrays.asList(true,false,true,false);
for ( int i = 0; i < 4; i ++ ) {
snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
}
Assert.assertTrue("That the variant type order is as expected", snpOrder);
// Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1). );
// Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
Assert.assertEquals("That the snp bases are correct","GC",new String(genotypesInRod.get(2).get(2).getAlleles().get(0).getBases())+new String(genotypesInRod.get(2).get(2).getAlleles().get(1).getBases()));
}
@Test
public void testBinaryPedFileNoIndels() {
PlinkRod rod = new PlinkRod("binaryTest1");
try {
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
} catch (FileNotFoundException e) {
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
}
// iterate through the ROD and get stuff
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
ArrayList<ArrayList<String>> samplesInRod = new ArrayList<ArrayList<String>>();
do {
lociInRod.add(rod.getLocation());
genotypesInRod.add(rod.getGenotypes());
samplesInRod.add(rod.getVariantSampleNames());
} while ( rod.parseLine(null,null) );
List<String> expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
for ( int i = 0; i < expecLoc.size(); i ++ ) {
Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
}
List<String> expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
"GG","GG","AG","TT","CC","CT","TG","GG","GG");
List<Boolean> expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
false,false,false,true,false,false,true,true,false,false);
List<String> expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
"NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
"NA12878","NA12890","NA07000");
int snpNo = 1;
int indiv = 1;
int alleleOffset = 0;
for ( ArrayList<Genotype> snp : genotypesInRod ) {
for ( Genotype gen : snp ) {
String alStr = new String(gen.getAlleles().get(0).getBases())+new String(gen.getAlleles().get(1).getBases());
Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
"(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
indiv++;
alleleOffset++;
}
indiv = 1;
snpNo++;
}
}
@Test
public void testIndelBinary() {
PlinkRod rod = new PlinkRod("binaryTest2");
try {
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed"));
} catch (FileNotFoundException e) {
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
}
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
do {
genotypesInRod.add(rod.getGenotypes());
} while ( rod.parseLine(null,null) );
List<String> expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT",
"TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG");
List<Boolean> expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true,
false,false,false,false,false,false,true,false,true,false,false,false);
List<Boolean> expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false,
false,false,false,false,false,false,false,false,false,false,false,false);
int al = 0;
for ( ArrayList<Genotype> snp : genotypesInRod ) {
for ( Genotype gen : snp ) {
String alStr = new String(gen.getAlleles().get(0).getBases())+new String(gen.getAlleles().get(1).getBases());
Allele firstAl = gen.getAlleles().get(0);
Allele secondAl = gen.getAlleles().get(1);
// boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION );
// boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION );
Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr);
// Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion);
// Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion);
al++;
}
}
}
}*/