1. Fixed VCFGenotypeRecord bug (it needs to emit fields in the order specified by the GenotypeFormatString)

2. isNoCall() added to Genotype interface so that we can distinguish between ref and no calls (all we had before was isVariant())
3. Added Hardy-Weinberg annotation; still experimental - not working yet so don't use it.
4. Move 'output type' argument out of the UnifiedArgumentCollection and into the UnifiedGenotyper, in preparation for parallelization.
5. Improved some of the UG integration tests.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2398 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-18 04:14:14 +00:00
parent 98839193b7
commit 94f5edb68a
15 changed files with 140 additions and 49 deletions

View File

@ -0,0 +1,52 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.List;
import java.util.Map;
public class HardyWeinberg implements VariantAnnotation {
private static final int MIN_SAMPLES = 10;
private static final int MIN_GENOTYPE_QUALITY = 10;
private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
if ( !(variation instanceof VariantBackedByGenotype) )
return null;
final List<Genotype> genotypes = ((VariantBackedByGenotype)variation).getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
return null;
int refCount = 0;
int hetCount = 0;
int homCount = 0;
for ( Genotype genotype : genotypes ) {
if ( genotype.isNoCall() )
continue;
if ( genotype.getNegLog10PError() < MIN_NEG_LOG10_PERROR )
continue;
if ( !genotype.isVariant(ref.getBase()) )
refCount++;
else if ( genotype.isHet() )
hetCount++;
else
homCount++;
}
double pvalue = HardyWeinbergCalculation.hwCalculate(refCount, hetCount, homCount);
System.out.println(refCount + " " + hetCount + " " + homCount + " " + pvalue);
return String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue));
}
public String getKeyName() { return "HW"; }
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("HW", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value for Hardy-Weinberg violation"); }
}

View File

@ -49,16 +49,18 @@ public abstract class GenotypeCalculationModel implements Cloneable {
* @param samples samples in input bam
* @param logger logger
* @param UAC unified arg collection
* @param outputFormat output format
*/
protected void initialize(Set<String> samples,
Logger logger,
UnifiedArgumentCollection UAC) {
UnifiedArgumentCollection UAC,
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat) {
this.samples = new TreeSet<String>(samples);
this.logger = logger;
baseModel = UAC.baseModel;
heterozygosity = UAC.heterozygosity;
defaultPlatform = UAC.defaultPlatform;
OUTPUT_FORMAT = UAC.VAR_FORMAT;
OUTPUT_FORMAT = outputFormat;
ALL_BASE_MODE = UAC.ALL_BASES;
GENOTYPE_MODE = UAC.GENOTYPE;
POOL_SIZE = UAC.POOLSIZE;

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import static org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model.*;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.apache.log4j.Logger;
import java.util.Set;
@ -45,14 +46,16 @@ public class GenotypeCalculationModelFactory {
/**
* General and correct way to create GenotypeCalculationModel objects for arbitrary models
*
* @param UAC The unified argument collection
* @param samples samples in bam
* @param logger logger
* @param UAC the unified argument collection
* @param outputFormat the output format
* @return model
*/
public static GenotypeCalculationModel makeGenotypeCalculation(Set<String> samples,
Logger logger,
UnifiedArgumentCollection UAC) {
UnifiedArgumentCollection UAC,
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat) {
GenotypeCalculationModel gcm;
switch ( UAC.genotypeModel ) {
case EM_POINT_ESTIMATE:
@ -67,7 +70,7 @@ public class GenotypeCalculationModelFactory {
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
}
gcm.initialize(samples, logger, UAC);
gcm.initialize(samples, logger, UAC, outputFormat);
return gcm;
}
}

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
public class UnifiedArgumentCollection {
@ -45,9 +44,6 @@ public class UnifiedArgumentCollection {
public int POOLSIZE = 0;
// control the output
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false)
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public boolean GENOTYPE = false;

View File

@ -57,6 +57,9 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false)
public File VARIANTS_FILE = null;
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false)
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
// the model used for calculating genotypes
private GenotypeCalculationModel gcm;
@ -124,7 +127,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// for ( String sample : samples )
// logger.debug("SAMPLE: " + sample);
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC);
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, VAR_FORMAT);
// *** If we were called by another walker, then we don't ***
// *** want to do any of the other initialization steps. ***
@ -143,11 +146,11 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// create the output writer stream
if ( VARIANTS_FILE != null )
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
samples,
headerInfo);
else
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out,
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out,
samples,
headerInfo);
@ -158,7 +161,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// this is only applicable to VCF
if ( UAC.VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF )
if ( VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.VCF )
return headerInfo;
// first, the basic info

View File

@ -91,6 +91,8 @@ public class BasicGenotype implements Genotype {
return true;
}
public boolean isNoCall() { return false; }
/**
* Returns true if observed allele bases differ (regardless of whether they are ref or alt)
*

View File

@ -45,6 +45,13 @@ public interface Genotype {
*/
public boolean isHet();
/**
* Returns true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
*
* @return true if we're het, false otherwise
*/
public boolean isNoCall();
/**
* get the genotype's location
*

View File

@ -205,6 +205,8 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements Genot
return !isHom();
}
public boolean isNoCall() { return false; }
/**
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
* is located right <i>after</i> the specified location

View File

@ -126,6 +126,8 @@ public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBac
return !isHom();
}
public boolean isNoCall() { return false; }
/**
*
* @return return this genotype as a variant

View File

@ -129,6 +129,9 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements Genoty
return !isHom();
}
// You can't make a 'no call' genotype call
public boolean isNoCall() { return false; }
/**
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
* is located right <i>after</i> the specified location

View File

@ -176,6 +176,14 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
return !isHom();
}
public boolean isNoCall() {
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) {
if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED )
return false;
}
return true;
}
public int getPloidy() {
return 2;
}
@ -232,18 +240,25 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
* output a string representation of the VCFGenotypeRecord, given the alternate alleles
*
* @param altAlleles the alternate alleles, needed for toGenotypeString()
* @param genotypeFormatStrings genotype format strings
*
* @return a string
*/
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles) {
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings) {
StringBuilder builder = new StringBuilder();
builder.append(toGenotypeString(altAlleles));
for (String field : mFields.keySet()) {
for ( String field : genotypeFormatStrings ) {
String value = mFields.get(field);
if ( value == null && field.equals(OLD_DEPTH_KEY) )
value = mFields.get(DEPTH_KEY);
if ( value == null )
continue;
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
if (mFields.get(field).equals(""))
if (value.equals(""))
builder.append(getMissingFieldValue(field));
else
builder.append(mFields.get(field));
builder.append(value);
}
return builder.toString();
}

View File

@ -114,7 +114,7 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
*/
public VCFRecord(Map<VCFHeader.HEADER_FIELDS, String> columnValues) {
extractFields(columnValues);
mGenotypeFormatString = null;
mGenotypeFormatString = "";
}
/**
@ -553,17 +553,18 @@ public class VCFRecord implements Variation, VariantBackedByGenotype {
* @param header the header object
*/
private void addGenotypeData(StringBuilder builder, VCFHeader header) {
builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString());
builder.append(FIELD_SEPERATOR + mGenotypeFormatString);
if (header.getGenotypeSamples().size() < getGenotypes().size())
throw new RuntimeException("We have more genotype samples than the header specified");
Map<String, VCFGenotypeRecord> gMap = genotypeListToMap(getGenotypes());
String[] genotypeFormatStrings = mGenotypeFormatString.split(":");
for (String genotype : header.getGenotypeSamples()) {
builder.append(FIELD_SEPERATOR);
if (gMap.containsKey(genotype)) {
VCFGenotypeRecord rec = gMap.get(genotype);
builder.append(rec.toStringEncoding(this.mAlts));
builder.append(rec.toStringEncoding(this.mAlts, genotypeFormatStrings));
gMap.remove(genotype);
} else {
builder.append(VCFGenotypeRecord.EMPTY_GENOTYPE);

View File

@ -50,7 +50,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("5c73e85a91bf8f8e9c47c445ec77233c"));
Arrays.asList("fecfec68226bbd9b458ede55d48e0762"));
executeTest("test file has annotations, not asking for annotations, #1", spec);
}
@ -58,7 +58,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("5298c24e956361d209f14ac6138a3bbd"));
Arrays.asList("59845ada9dc5bc66e0042cfefdf8f16f"));
executeTest("test file has annotations, not asking for annotations, #2", spec);
}
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("2d3e220e96eb00c4c7bb2dbc8353d9bd"));
Arrays.asList("1b9815b89dde8a705b3744527fe1200a"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("d0e70ee36ed1a59b5f086bc30c9b2673"));
Arrays.asList("618056046f1ce83afad9effdebe1a74e"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("acef97201f2d17007dbdee6c639536ea"));
Arrays.asList("c70cc6bb6b83748ec8d968dc3bf879c4"));
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
}
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("aaae89c48fcab615fe4204220ec62859"));
Arrays.asList("e7c8900ff9a18f2c8a033ae741e7143b"));
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
}
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("2031599f8554d16342f53cb80fae296a"));
Arrays.asList("85ec66095c67865be9777c7c93ceb263"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("2b47c7a6a7f0ce15fd1d1dd02ecab73b"));
Arrays.asList("97da2e0699faaa17ea85e03ce3cd61fa"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}

View File

@ -22,7 +22,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("b19f85fddd08485c668e7192df79d944"));
Arrays.asList("44942d4e56aec62520903d297cee5fd8"));
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
}
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("a6c8e77d1741f3f6d958a0634fa59e14"));
Arrays.asList("aabea51b271df324eb543bc3417c0a42"));
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
}
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testPooled1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
Arrays.asList("459b8f6a6265b67718495e9bffe96fa8"));
Arrays.asList("a733878ce9c257148a224aa00c0e63e9"));
executeTest("testPooled1", spec);
}
@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("c132c93cec5fdf02c3235180a7aa7dcc"));
Arrays.asList("b68ad77195f6e926c4fce5f1a91f3bc9"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("09d48c56eae25fb6418e10feeb5b3fc5"));
Arrays.asList("b63c43ba827cfc74bb8ef679ce3d847d"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -72,7 +72,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("3544b1eb97834e2050239c101eaddb2d"));
Arrays.asList("8ae0be3b65c83d973519abad858c0073"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
@ -85,11 +85,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "dc908834d97bde696b6bdccf756c9ab6" );
e.put( "-all_bases", "484d424cbcd646b3c5d823cad43f4f5f" );
e.put( "--min_base_quality_score 10", "45e336766e94f05e6ac44d1e50473e18" );
e.put( "--min_mapping_quality_score 10", "de417c1156483ab87b0f8a25f10d4d87" );
e.put( "--max_mismatches_in_40bp_window 5", "05132003c73e10fbdab2af14af677764" );
e.put( "-genotype", "c984952b91193d9550066f9514434183" );
e.put( "-all_bases", "0062942f2323ea0f2c9d3a3739a28d20" );
e.put( "--min_base_quality_score 10", "8a645a76d85e8a6edda6acdf8071ee6b" );
e.put( "--min_mapping_quality_score 10", "cf6c38de9872bdbf5da1a22d8c962ebe" );
e.put( "--max_mismatches_in_40bp_window 5", "e21a319eb0efbe11c3c1eeea7c9c0144" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -103,7 +103,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1,
Arrays.asList("ea154d6ab6bbaf40da0dac5162b7e9fd"));
Arrays.asList("6823ce46547cf9693cda040dcbd4c9c1"));
executeTest("testConfidence", spec);
}
@ -117,7 +117,6 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testOtherFormat() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "GLF", "8c72131dfb2b830efb9938a582672a3e" );
e.put( "GELI", "e9e00bdb32ce63420988956c1a9b805f" );
e.put( "GELI_BINARY", "46162567eac3a5004f5f9b4c93d1b8d3" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
@ -128,6 +127,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
}
}
// --------------------------------------------- //
// ALL REMAINING TESTS ARE OUTPUT IN GELI FORMAT //
// --------------------------------------------- //
// --------------------------------------------------------------------------------------------------------------
//
// testing heterozygosity
@ -136,12 +139,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "ae0134840e0c9fa295b23ecf4ba3e768" );
e.put( 1.0 / 1850, "71ddcf22f4c0538d03508b720d01d3fe" );
e.put( 0.01, "700e6426c4142c823f7ac1dde2aa19ea" );
e.put( 1.0 / 1850, "e9e00bdb32ce63420988956c1a9b805f" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 --heterozygosity " + entry.getKey(), 1,
"-T UnifiedGenotyper -vf GELI -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 --heterozygosity " + entry.getKey(), 1,
Arrays.asList(entry.getValue()));
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
}
@ -156,12 +159,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOtherBaseCallModel() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "one_state", "ab71d814e897d7f1440af8f02365b4fa" );
e.put( "three_state", "ce55849c55c58c59773200b2a88db0ee" );
e.put( "one_state", "d69abadc3bf861d621017c0e41b87b0a" );
e.put( "three_state", "ebcc76cc4579393f98aecb59bdc56507" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -gm JOINT_ESTIMATE -confidence 30 -bm " + entry.getKey(), 1,
"-T UnifiedGenotyper -vf GELI -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -gm JOINT_ESTIMATE -confidence 30 -bm " + entry.getKey(), 1,
Arrays.asList(entry.getValue()));
executeTest(String.format("testOtherBaseCallModel[%s]", entry.getKey()), spec);
}

View File

@ -195,8 +195,8 @@ public class VCFReaderTest extends BaseTest {
if (!header.contains(record.getSampleName())) {
Assert.fail("Parsed header doesn't contain field " + record.getSampleName());
}
if (!variant.get(header.indexOf(record.getSampleName())).equals(record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles()))) {
Assert.fail("Parsed value for " + record.getSampleName() + " doesn't contain the same value ( " + record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles())
if (!variant.get(header.indexOf(record.getSampleName())).equals(record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":")))) {
Assert.fail("Parsed value for " + record.getSampleName() + " doesn't contain the same value ( " + record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":"))
+ " != " + variant.get(header.indexOf(record.getSampleName())));
}
}