package org.broadinstitute.sting.gatk.walkers; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodSAMPileup; import org.broadinstitute.sting.gatk.refdata.SAMPileupRecord; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.StingException; import java.util.Arrays; /** * Created by IntelliJ IDEA. * User: mdepristo * Date: Feb 22, 2009 * Time: 3:22:14 PM * To change this template use File | Settings | File Templates. */ @Requires(value={DataSource.READS,DataSource.REFERENCE},referenceMetaData=@RMD(name="pileup",type=rodSAMPileup.class)) public class ValidatingPileupWalker extends LocusWalker implements TreeReducible { @Argument(fullName="continue_after_error",doc="Continue after an error",required=false) public boolean CONTINUE_AFTER_AN_ERROR = false; public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { ReadBackedPileup pileup = context.getPileup(); SAMPileupRecord truePileup = getTruePileup( tracker ); if ( truePileup == null ) { out.printf("No truth pileup data available at %s%n", pileup.getPileupString(ref.getBase(), false)); if ( ! CONTINUE_AFTER_AN_ERROR ) { Utils.scareUser(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases", context.getLocation(), new String(pileup.getBases()))); } } else { String pileupDiff = pileupDiff(pileup, truePileup, true); if ( pileupDiff != null ) { out.printf("%s vs. %s%n", pileup.getPileupString(ref.getBase(), true), truePileup.getPileupString()); if ( ! CONTINUE_AFTER_AN_ERROR ) { throw new RuntimeException(String.format("Pileups aren't equal: %s", pileupDiff)); } } } return pileup.size(); } private static String maybeSorted( final String x, boolean sortMe ) { if ( sortMe ) { byte[] bytes = x.getBytes(); Arrays.sort(bytes); return new String(bytes); } else return x; } public static String pileupDiff(final ReadBackedPileup a, final SAMPileupRecord b, boolean orderDependent) { if ( a.size() != b.size() ) return "Sizes not equal"; if ( a.getLocation().compareTo(b.getLocation()) != 0 ) return "Locations not equal"; String aBases = maybeSorted(new String(a.getBases()), ! orderDependent ); String bBases = maybeSorted(b.getBasesAsString(), ! orderDependent ); if ( ! aBases.toUpperCase().equals(bBases.toUpperCase()) ) return "Bases not equal"; String aQuals = maybeSorted(new String(a.getQuals()), ! orderDependent ); String bQuals = maybeSorted(b.getQualsAsString(), ! orderDependent ); if ( ! aQuals.equals(bQuals) ) return "Quals not equal"; return null; } // Given result of map function public ValidationStats reduceInit() { return new ValidationStats(); } public ValidationStats reduce(Integer value, ValidationStats sum) { sum.nLoci++; sum.nBases += value; return sum; } public ValidationStats treeReduce( ValidationStats lhs, ValidationStats rhs ) { ValidationStats combined = new ValidationStats(); combined.nLoci = lhs.nLoci + rhs.nLoci; combined.nBases = lhs.nBases + rhs.nBases; return combined; } /** * Extracts the true pileup data from the given rodSAMPileup. Note that this implementation * assumes that the genotype will only be point or indel. * @param tracker ROD tracker from which to extract pileup data. * @return True pileup data. */ private SAMPileupRecord getTruePileup( RefMetaDataTracker tracker ) { rodSAMPileup pileup = (rodSAMPileup)tracker.lookup("pileup", null); if( pileup == null ) return null; if( pileup.hasPointGenotype() ) return (SAMPileupRecord)pileup.getPointGenotype(); else if( pileup.hasIndelGenotype() ) return (SAMPileupRecord)pileup.getIndelGenotype(); else throw new StingException("Unsupported pileup type: " + pileup); } } class ValidationStats { public long nLoci = 0; public long nBases = 0; public ValidationStats() { } public String toString() { return String.format("Validated %d sites covered by %d bases%n", nLoci, nBases); } }