- Depth annotation now includes MQ0 reads

- Removed MQ0 annotation
- Updated RMS MQ annotation to use new pileup
- UG now outputs all of its arguments as key/value pairs in the header (for VCF)
- Cleaned up VCFGenotypeWriterAdapter interface a bit



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2288 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-09 02:53:00 +00:00
parent e8822a3fb4
commit 717eb1de96
10 changed files with 57 additions and 82 deletions

View File

@ -15,7 +15,7 @@ public class DepthOfCoverage extends StandardVariantAnnotation {
public String getKeyName() { return VCFRecord.DEPTH_KEY; }
public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth\""; }
public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth (including MQ0 reads)\""; }
public boolean useZeroQualityReads() { return false; }
public boolean useZeroQualityReads() { return true; }
}

View File

@ -1,28 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Variation;
import net.sf.samtools.SAMRecord;
import java.util.List;
public class MappingQualityZero extends StandardVariantAnnotation {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
List<SAMRecord> reads = pileup.getReads();
int MQ0Count = 0;
for (int i=0; i < reads.size(); i++) {
if ( reads.get(i).getMappingQuality() == 0 )
MQ0Count++;
}
return String.format("%d", MQ0Count);
}
public String getKeyName() { return "MQ0"; }
public String getDescription() { return "MQ0,1,Integer,\"Total Mapping Quality Zero Reads\""; }
public boolean useZeroQualityReads() { return true; }
}

View File

@ -3,21 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import net.sf.samtools.SAMRecord;
import java.util.List;
public class RMSMappingQuality extends StandardVariantAnnotation {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
List<SAMRecord> reads = pileup.getReads();
int[] qualities = new int[reads.size()];
for (int i=0; i < reads.size(); i++)
qualities[i] = reads.get(i).getMappingQuality();
int[] qualities = new int[pileup.size()];
int index = 0;
for (PileupElement p : pileup )
qualities[index++] = p.getRead().getMappingQuality();
double rms = MathUtils.rms(qualities);
return String.format("%.2f", rms);
}

View File

@ -134,31 +134,53 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
samples.clear();
// get the optional header fields
Set<String> headerInfo = getHeaderInfo();
// create the output writer stream
if ( VARIANTS_FILE != null )
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
samples,
headerInfo);
else
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out,
samples,
headerInfo);
callsMetrics = new CallMetrics();
}
private Set<String> getHeaderInfo() {
Set<String> headerInfo = new HashSet<String>();
headerInfo.add("source=UnifiedGenotyper");
headerInfo.add("reference=" + getToolkit().getArguments().referenceFile.getName());
if ( UAC.ALL_ANNOTATIONS )
headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions());
else
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
headerInfo.add("INFO=AF,1,Float,\"Allele Frequency\"");
headerInfo.add("INFO=NS,1,Integer,\"Number of Samples With Data\"");
if ( !UAC.NO_SLOD )
headerInfo.add("INFO=SB,1,Float,\"Strand Bias\"");
// create the output writer stream
if ( VARIANTS_FILE != null )
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
"UnifiedGenotyper",
this.getToolkit().getArguments().referenceFile.getName(),
samples,
headerInfo);
else
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out,
"UnifiedGenotyper",
this.getToolkit().getArguments().referenceFile.getName(),
samples,
headerInfo);
headerInfo.add("UG_genotype_model=" + UAC.genotypeModel);
headerInfo.add("UG_base_model=" + UAC.baseModel);
headerInfo.add("UG_heterozygosity=" + UAC.heterozygosity);
headerInfo.add("UG_genotype_mode=" + UAC.GENOTYPE);
headerInfo.add("UG_all_bases_mode=" + UAC.ALL_BASES);
headerInfo.add("UG_min_confidence=" + UAC.CONFIDENCE_THRESHOLD);
headerInfo.add("UG_max_deletion_fraction=" + UAC.MAX_DELETION_FRACTION);
headerInfo.add("UG_max_coverage=" + UAC.MAX_READS_IN_PILEUP);
if ( UAC.POOLSIZE > 0 )
headerInfo.add("UG_poolSize=" + UAC.POOLSIZE);
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
headerInfo.add("UG_single_sample=" + UAC.ASSUME_SINGLE_SAMPLE);
if ( UAC.defaultPlatform != null )
headerInfo.add("UG_default_platform=" + UAC.defaultPlatform);
callsMetrics = new CallMetrics();
return headerInfo;
}
/**

View File

@ -30,8 +30,6 @@ public class GenotypeWriterFactory {
* @param format the format
* @param header the sam file header
* @param destination the destination file
* @param source the source
* @param referenceName the ref name
* @param sampleNames the sample names
* @param headerInfo the optional header info fields
* @return the genotype writer object
@ -39,8 +37,6 @@ public class GenotypeWriterFactory {
public static GenotypeWriter create(GENOTYPE_FORMAT format,
SAMFileHeader header,
File destination,
String source,
String referenceName,
Set<String> sampleNames,
Set<String> headerInfo) {
switch (format) {
@ -51,7 +47,7 @@ public class GenotypeWriterFactory {
case GELI_BINARY:
return new GeliAdapter(destination, header);
case VCF:
return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames, headerInfo);
return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo);
default:
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
}
@ -60,8 +56,6 @@ public class GenotypeWriterFactory {
public static GenotypeWriter create(GENOTYPE_FORMAT format,
SAMFileHeader header,
PrintStream destination,
String source,
String referenceName,
Set<String> sampleNames,
Set<String> headerInfo) {
switch (format) {
@ -70,7 +64,7 @@ public class GenotypeWriterFactory {
case GLF:
return new GLFWriter(header.toString(), destination);
case VCF:
return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames, headerInfo);
return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo);
default:
throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented");
}

View File

@ -19,17 +19,13 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
// our VCF objects
private VCFWriter mWriter = null;
private VCFHeader mHeader = null;
private String mSource;
private String mReferenceName;
private final Set<String> mSampleNames = new LinkedHashSet<String>();
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class);
public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo, Set<String> sampleNames, Set<String> headerInfo) {
mReferenceName = referenceName;
mSource = source;
public VCFGenotypeWriterAdapter(File writeTo, Set<String> sampleNames, Set<String> headerInfo) {
mSampleNames.addAll(sampleNames);
initializeHeader(headerInfo);
@ -38,9 +34,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
mWriter = new VCFWriter(mHeader, writeTo);
}
public VCFGenotypeWriterAdapter(String source, String referenceName, OutputStream writeTo, Set<String> sampleNames, Set<String> headerInfo) {
mReferenceName = referenceName;
mSource = source;
public VCFGenotypeWriterAdapter(OutputStream writeTo, Set<String> sampleNames, Set<String> headerInfo) {
mSampleNames.addAll(sampleNames);
initializeHeader(headerInfo);
@ -59,8 +53,6 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
// setup the header fields
hInfo.add(VCFHeader.FULL_FORMAT_LINE);
hInfo.add("source=" + mSource);
hInfo.add("reference=" + mReferenceName);
hInfo.addAll(optionalHeaderInfo);
// setup the sample names

View File

@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("31ddecd3118bd6aea00bcd0369a1b32f"));
Arrays.asList("24609ede968a90ebff949791694f70a0"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("553c55f3bcf31a6b1d9a2c1d27fde480"));
Arrays.asList("9f05c78ef9913259c02f8bda6ec97505"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("fa1071b292fd2e9db5c36410ffd44fdb"));
Arrays.asList("54811556d642a83fcfa2b359aa275023"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("9d133a91949e477523a039a0a52ac2a8"));
Arrays.asList("b67282a03c92ae3d24a7b588aa1e61df"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}

View File

@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("68fcdca40df72b3c703bab846c5f0bbd"));
Arrays.asList("7c27fc1ad1fedab6944d5f93094ef914"));
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
}
@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("eb0cd5494ae4c1781bfa00b6c4146993"));
Arrays.asList("cd08c5e4dbd0cf477efd1204c681c7c6"));
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
}
@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testPooled1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
Arrays.asList("ec1aeb69d7d54a7ced1ce625146d1d59"));
Arrays.asList("d7579207d2521f36648b2b43be805bc4"));
executeTest("testPooled1", spec);
}
@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("e27552dad05ddf17403aaa7176b9cfe2"));
Arrays.asList("f4e8721a905e42f511967a1cfdb966ec"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("3618711e41b7e37f47b995d39adbc76b"));
Arrays.asList("962838ae188b1c07a35ead671019062e"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("2a32add40319ab2de44951624df2be4b"));
Arrays.asList("a9cf1abecf709de9f158f6ffe57b00e7"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}

View File

@ -44,7 +44,6 @@
<class>org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance</class>
<class>org.broadinstitute.sting.gatk.walkers.annotator.DepthOfCoverage</class>
<class>org.broadinstitute.sting.gatk.walkers.annotator.HomopolymerRun</class>
<class>org.broadinstitute.sting.gatk.walkers.annotator.MappingQualityZero</class>
<class>org.broadinstitute.sting.gatk.walkers.annotator.RMSMappingQuality</class>
<class>org.broadinstitute.sting.gatk.walkers.annotator.SpanningDeletions</class>
</dependencies>

View File

@ -44,7 +44,6 @@
<class>org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance</class>
<class>org.broadinstitute.sting.gatk.walkers.annotator.DepthOfCoverage</class>
<class>org.broadinstitute.sting.gatk.walkers.annotator.HomopolymerRun</class>
<class>org.broadinstitute.sting.gatk.walkers.annotator.MappingQualityZero</class>
<class>org.broadinstitute.sting.gatk.walkers.annotator.RMSMappingQuality</class>
<class>org.broadinstitute.sting.gatk.walkers.annotator.SpanningDeletions</class>
</dependencies>