diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 7f9df6644..8f31f7d4e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -26,6 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -114,6 +117,10 @@ public class VariantsToTable extends RodWalker { @Argument(fullName="fields", shortName="F", doc="The name of each field to capture for output in the table", required=true) public List fieldsToTake = new ArrayList(); + @Hidden + @Argument(fullName="genotypeFields", shortName="GF", doc="The name of each field to capture for output in the table", required=true) + public List genotypeFieldsToTake = new ArrayList(); + /** * By default this tool only emits values for fields where the FILTER field is either PASS or . (unfiltered). * Throwing this flag will cause VariantsToTable to emit values regardless of the FILTER field value. @@ -151,9 +158,26 @@ public class VariantsToTable extends RodWalker { public boolean ALLOW_MISSING_DATA = false; private final static String MISSING_DATA = "NA"; + private TreeSet samples = new TreeSet(); + public void initialize() { + + String genotypeHeader = ""; + if (!genotypeFieldsToTake.isEmpty()) { + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), variants); + TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); + samples.addAll(vcfSamples); + + StringBuilder sb = new StringBuilder(); + for (final String sample : samples) { + for (final String gf : genotypeFieldsToTake) { + sb.append(sample+"."+gf+"\t"); + } + } + genotypeHeader = sb.toString(); + } // print out the header - out.println(Utils.join("\t", fieldsToTake)); + out.println(Utils.join("\t", fieldsToTake) + "\t"+genotypeHeader); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { @@ -162,7 +186,8 @@ public class VariantsToTable extends RodWalker { for ( VariantContext vc : tracker.getValues(variants, context.getLocation())) { if ( showFiltered || vc.isNotFiltered() ) { - for ( final List record : extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA, splitMultiAllelic) ) + for ( final List record : extractFields(vc, fieldsToTake, genotypeFieldsToTake, samples, + ALLOW_MISSING_DATA, splitMultiAllelic) ) out.println(Utils.join("\t", record)); } } @@ -186,16 +211,25 @@ public class VariantsToTable extends RodWalker { * * @param vc the VariantContext whose field values we can to capture * @param fields a non-null list of fields to capture from VC + * @param genotypeFields a (possibly) null) list of fields to capture from each genotype + * @param samples set of samples in vc, can be null in case of sites-only file * @param allowMissingData if false, then throws a UserException if any field isn't found in vc. Otherwise provides a value of NA * @param splitMultiAllelic if true, multiallelic variants are to be split into multiple records * @return List of lists of field values */ - private static List> extractFields(VariantContext vc, List fields, boolean allowMissingData, boolean splitMultiAllelic) { + private static List> extractFields(VariantContext vc, List fields, List genotypeFields, + Set samples, boolean allowMissingData, boolean splitMultiAllelic) { final int numRecordsToProduce = splitMultiAllelic ? vc.getAlternateAlleles().size() : 1; final List> records = new ArrayList>(numRecordsToProduce); + + int numFields = fields.size(); + final boolean addGenotypeFields = (genotypeFields != null && !genotypeFields.isEmpty() && samples != null && !samples.isEmpty()); + if (addGenotypeFields) + numFields += genotypeFields.size()*samples.size(); + for ( int i = 0; i < numRecordsToProduce; i++ ) - records.add(new ArrayList(fields.size())); + records.add(new ArrayList(numFields)); for ( String field : fields ) { @@ -228,6 +262,16 @@ public class VariantsToTable extends RodWalker { } } + if (addGenotypeFields) { + for (final String sample : samples) { + for (final String gf : genotypeFields) { + if (vc.hasGenotype(sample) && vc.getGenotype(sample).hasAttribute(gf)) + addFieldValue(vc.getGenotype(sample).getAttribute(gf),records); + else + addFieldValue(MISSING_DATA, records); + } + } + } return records; } @@ -253,7 +297,7 @@ public class VariantsToTable extends RodWalker { } public static List> extractFields(VariantContext vc, List fields, boolean allowMissingData) { - return extractFields(vc, fields, allowMissingData, false); + return extractFields(vc, fields, null, null, allowMissingData, false); } // // default reduce -- doesn't do anything at all diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index b1720e509..256f93473 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -26,10 +26,13 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; @@ -94,10 +97,14 @@ public class ArtificialReadPileupTestProvider { public GenomeLocParser getGenomeLocParser() { return genomeLocParser; } public Map getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) { + return getAlignmentContextFromAlleles(eventLength, altBases, numReadsPerAllele, false, BASE_QUAL); + } + public Map getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele, + boolean addBaseErrors, int phredScaledBaseErrorRate) { // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext); - ArrayList vcAlleles = new ArrayList(); + ArrayList vcAlleles = new ArrayList(); Allele refAllele, altAllele; if (eventLength == 0) {// SNP case refAllele =Allele.create(refBases.substring(offset,offset+1),true); @@ -128,7 +135,7 @@ public class ArtificialReadPileupTestProvider { Map contexts = new HashMap(); for (String sample: sampleNames) { - AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample)); + AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate)); contexts.put(sample,context); } @@ -143,7 +150,7 @@ public class ArtificialReadPileupTestProvider { return rg; } private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases, - int[] numReadsPerAllele, String sample) { + int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) { List pileupElements = new ArrayList(); int readStart = contigStart; int offset = (contigStop-contigStart+1)/2; @@ -158,8 +165,11 @@ public class ArtificialReadPileupTestProvider { for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) { byte[] readBases = trueHaplotype(allele, offset, refAlleleLength); + if (addErrors) + addBaseErrors(readBases, phredScaledErrorRate); + byte[] readQuals = new byte[readBases.length]; - Arrays.fill(readQuals, (byte)BASE_QUAL); + Arrays.fill(readQuals, (byte)phredScaledErrorRate); GATKSAMRecord read = new GATKSAMRecord(header); read.setBaseQualities(readQuals); @@ -208,4 +218,20 @@ public class ArtificialReadPileupTestProvider { } + private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) { + double errorProbability = QualityUtils.qualToErrorProb((byte)phredScaledErrorRate); + + for (int k=0; k < readBases.length; k++) { + if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < errorProbability) { + // random offset + int offset = BaseUtils.simpleBaseToBaseIndex(readBases[k]); //0..3 + offset += (GenomeAnalysisEngine.getRandomGenerator().nextInt(3)+1); // adds 1,2 or 3 + offset %= 4; + readBases[k] = BaseUtils.baseIndexToSimpleBase(offset); + + } + + } + + } }