- Depth annotation now includes MQ0 reads
- Removed MQ0 annotation - Updated RMS MQ annotation to use new pileup - UG now outputs all of its arguments as key/value pairs in the header (for VCF) - Cleaned up VCFGenotypeWriterAdapter interface a bit git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2288 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e8822a3fb4
commit
717eb1de96
|
|
@ -15,7 +15,7 @@ public class DepthOfCoverage extends StandardVariantAnnotation {
|
|||
|
||||
public String getKeyName() { return VCFRecord.DEPTH_KEY; }
|
||||
|
||||
public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth\""; }
|
||||
public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth (including MQ0 reads)\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
public boolean useZeroQualityReads() { return true; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,28 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class MappingQualityZero extends StandardVariantAnnotation {
|
||||
|
||||
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
|
||||
List<SAMRecord> reads = pileup.getReads();
|
||||
int MQ0Count = 0;
|
||||
for (int i=0; i < reads.size(); i++) {
|
||||
if ( reads.get(i).getMappingQuality() == 0 )
|
||||
MQ0Count++;
|
||||
}
|
||||
return String.format("%d", MQ0Count);
|
||||
}
|
||||
|
||||
public String getKeyName() { return "MQ0"; }
|
||||
|
||||
public String getDescription() { return "MQ0,1,Integer,\"Total Mapping Quality Zero Reads\""; }
|
||||
|
||||
public boolean useZeroQualityReads() { return true; }
|
||||
}
|
||||
|
|
@ -3,21 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class RMSMappingQuality extends StandardVariantAnnotation {
|
||||
|
||||
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
|
||||
List<SAMRecord> reads = pileup.getReads();
|
||||
int[] qualities = new int[reads.size()];
|
||||
for (int i=0; i < reads.size(); i++)
|
||||
qualities[i] = reads.get(i).getMappingQuality();
|
||||
int[] qualities = new int[pileup.size()];
|
||||
int index = 0;
|
||||
for (PileupElement p : pileup )
|
||||
qualities[index++] = p.getRead().getMappingQuality();
|
||||
double rms = MathUtils.rms(qualities);
|
||||
return String.format("%.2f", rms);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -134,31 +134,53 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
samples.clear();
|
||||
|
||||
// get the optional header fields
|
||||
Set<String> headerInfo = getHeaderInfo();
|
||||
|
||||
// create the output writer stream
|
||||
if ( VARIANTS_FILE != null )
|
||||
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
|
||||
samples,
|
||||
headerInfo);
|
||||
else
|
||||
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out,
|
||||
samples,
|
||||
headerInfo);
|
||||
|
||||
callsMetrics = new CallMetrics();
|
||||
}
|
||||
|
||||
private Set<String> getHeaderInfo() {
|
||||
Set<String> headerInfo = new HashSet<String>();
|
||||
|
||||
headerInfo.add("source=UnifiedGenotyper");
|
||||
headerInfo.add("reference=" + getToolkit().getArguments().referenceFile.getName());
|
||||
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions());
|
||||
else
|
||||
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
|
||||
|
||||
headerInfo.add("INFO=AF,1,Float,\"Allele Frequency\"");
|
||||
headerInfo.add("INFO=NS,1,Integer,\"Number of Samples With Data\"");
|
||||
if ( !UAC.NO_SLOD )
|
||||
headerInfo.add("INFO=SB,1,Float,\"Strand Bias\"");
|
||||
|
||||
// create the output writer stream
|
||||
if ( VARIANTS_FILE != null )
|
||||
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
|
||||
"UnifiedGenotyper",
|
||||
this.getToolkit().getArguments().referenceFile.getName(),
|
||||
samples,
|
||||
headerInfo);
|
||||
else
|
||||
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out,
|
||||
"UnifiedGenotyper",
|
||||
this.getToolkit().getArguments().referenceFile.getName(),
|
||||
samples,
|
||||
headerInfo);
|
||||
headerInfo.add("UG_genotype_model=" + UAC.genotypeModel);
|
||||
headerInfo.add("UG_base_model=" + UAC.baseModel);
|
||||
headerInfo.add("UG_heterozygosity=" + UAC.heterozygosity);
|
||||
headerInfo.add("UG_genotype_mode=" + UAC.GENOTYPE);
|
||||
headerInfo.add("UG_all_bases_mode=" + UAC.ALL_BASES);
|
||||
headerInfo.add("UG_min_confidence=" + UAC.CONFIDENCE_THRESHOLD);
|
||||
headerInfo.add("UG_max_deletion_fraction=" + UAC.MAX_DELETION_FRACTION);
|
||||
headerInfo.add("UG_max_coverage=" + UAC.MAX_READS_IN_PILEUP);
|
||||
if ( UAC.POOLSIZE > 0 )
|
||||
headerInfo.add("UG_poolSize=" + UAC.POOLSIZE);
|
||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||
headerInfo.add("UG_single_sample=" + UAC.ASSUME_SINGLE_SAMPLE);
|
||||
if ( UAC.defaultPlatform != null )
|
||||
headerInfo.add("UG_default_platform=" + UAC.defaultPlatform);
|
||||
|
||||
callsMetrics = new CallMetrics();
|
||||
return headerInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -30,8 +30,6 @@ public class GenotypeWriterFactory {
|
|||
* @param format the format
|
||||
* @param header the sam file header
|
||||
* @param destination the destination file
|
||||
* @param source the source
|
||||
* @param referenceName the ref name
|
||||
* @param sampleNames the sample names
|
||||
* @param headerInfo the optional header info fields
|
||||
* @return the genotype writer object
|
||||
|
|
@ -39,8 +37,6 @@ public class GenotypeWriterFactory {
|
|||
public static GenotypeWriter create(GENOTYPE_FORMAT format,
|
||||
SAMFileHeader header,
|
||||
File destination,
|
||||
String source,
|
||||
String referenceName,
|
||||
Set<String> sampleNames,
|
||||
Set<String> headerInfo) {
|
||||
switch (format) {
|
||||
|
|
@ -51,7 +47,7 @@ public class GenotypeWriterFactory {
|
|||
case GELI_BINARY:
|
||||
return new GeliAdapter(destination, header);
|
||||
case VCF:
|
||||
return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames, headerInfo);
|
||||
return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo);
|
||||
default:
|
||||
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
|
||||
}
|
||||
|
|
@ -60,8 +56,6 @@ public class GenotypeWriterFactory {
|
|||
public static GenotypeWriter create(GENOTYPE_FORMAT format,
|
||||
SAMFileHeader header,
|
||||
PrintStream destination,
|
||||
String source,
|
||||
String referenceName,
|
||||
Set<String> sampleNames,
|
||||
Set<String> headerInfo) {
|
||||
switch (format) {
|
||||
|
|
@ -70,7 +64,7 @@ public class GenotypeWriterFactory {
|
|||
case GLF:
|
||||
return new GLFWriter(header.toString(), destination);
|
||||
case VCF:
|
||||
return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames, headerInfo);
|
||||
return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo);
|
||||
default:
|
||||
throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,17 +19,13 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
// our VCF objects
|
||||
private VCFWriter mWriter = null;
|
||||
private VCFHeader mHeader = null;
|
||||
private String mSource;
|
||||
private String mReferenceName;
|
||||
private final Set<String> mSampleNames = new LinkedHashSet<String>();
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class);
|
||||
|
||||
|
||||
public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo, Set<String> sampleNames, Set<String> headerInfo) {
|
||||
mReferenceName = referenceName;
|
||||
mSource = source;
|
||||
public VCFGenotypeWriterAdapter(File writeTo, Set<String> sampleNames, Set<String> headerInfo) {
|
||||
mSampleNames.addAll(sampleNames);
|
||||
|
||||
initializeHeader(headerInfo);
|
||||
|
|
@ -38,9 +34,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
mWriter = new VCFWriter(mHeader, writeTo);
|
||||
}
|
||||
|
||||
public VCFGenotypeWriterAdapter(String source, String referenceName, OutputStream writeTo, Set<String> sampleNames, Set<String> headerInfo) {
|
||||
mReferenceName = referenceName;
|
||||
mSource = source;
|
||||
public VCFGenotypeWriterAdapter(OutputStream writeTo, Set<String> sampleNames, Set<String> headerInfo) {
|
||||
mSampleNames.addAll(sampleNames);
|
||||
|
||||
initializeHeader(headerInfo);
|
||||
|
|
@ -59,8 +53,6 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
|
||||
// setup the header fields
|
||||
hInfo.add(VCFHeader.FULL_FORMAT_LINE);
|
||||
hInfo.add("source=" + mSource);
|
||||
hInfo.add("reference=" + mReferenceName);
|
||||
hInfo.addAll(optionalHeaderInfo);
|
||||
|
||||
// setup the sample names
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("31ddecd3118bd6aea00bcd0369a1b32f"));
|
||||
Arrays.asList("24609ede968a90ebff949791694f70a0"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("553c55f3bcf31a6b1d9a2c1d27fde480"));
|
||||
Arrays.asList("9f05c78ef9913259c02f8bda6ec97505"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("fa1071b292fd2e9db5c36410ffd44fdb"));
|
||||
Arrays.asList("54811556d642a83fcfa2b359aa275023"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("9d133a91949e477523a039a0a52ac2a8"));
|
||||
Arrays.asList("b67282a03c92ae3d24a7b588aa1e61df"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("68fcdca40df72b3c703bab846c5f0bbd"));
|
||||
Arrays.asList("7c27fc1ad1fedab6944d5f93094ef914"));
|
||||
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("eb0cd5494ae4c1781bfa00b6c4146993"));
|
||||
Arrays.asList("cd08c5e4dbd0cf477efd1204c681c7c6"));
|
||||
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testPooled1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
|
||||
Arrays.asList("ec1aeb69d7d54a7ced1ce625146d1d59"));
|
||||
Arrays.asList("d7579207d2521f36648b2b43be805bc4"));
|
||||
executeTest("testPooled1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("e27552dad05ddf17403aaa7176b9cfe2"));
|
||||
Arrays.asList("f4e8721a905e42f511967a1cfdb966ec"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("3618711e41b7e37f47b995d39adbc76b"));
|
||||
Arrays.asList("962838ae188b1c07a35ead671019062e"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("2a32add40319ab2de44951624df2be4b"));
|
||||
Arrays.asList("a9cf1abecf709de9f158f6ffe57b00e7"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@
|
|||
<class>org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.DepthOfCoverage</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.HomopolymerRun</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.MappingQualityZero</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.RMSMappingQuality</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.SpanningDeletions</class>
|
||||
</dependencies>
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@
|
|||
<class>org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.DepthOfCoverage</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.HomopolymerRun</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.MappingQualityZero</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.RMSMappingQuality</class>
|
||||
<class>org.broadinstitute.sting.gatk.walkers.annotator.SpanningDeletions</class>
|
||||
</dependencies>
|
||||
|
|
|
|||
Loading…
Reference in New Issue