2009-04-15 06:13:10 +08:00
|
|
|
package org.broadinstitute.sting.gatk.walkers;
|
|
|
|
|
|
2009-08-05 05:01:37 +08:00
|
|
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
|
|
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
2009-04-15 06:13:10 +08:00
|
|
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
|
|
|
import org.broadinstitute.sting.gatk.refdata.rodSAMPileup;
|
2009-11-25 11:51:41 +08:00
|
|
|
import org.broadinstitute.sting.gatk.refdata.SAMPileupRecord;
|
2009-04-15 06:13:10 +08:00
|
|
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|
|
|
|
import org.broadinstitute.sting.utils.Utils;
|
2009-11-25 11:51:41 +08:00
|
|
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
2009-05-23 05:20:24 +08:00
|
|
|
import org.broadinstitute.sting.utils.StingException;
|
2009-04-15 06:13:10 +08:00
|
|
|
|
2009-11-25 11:51:41 +08:00
|
|
|
import java.util.Arrays;
|
|
|
|
|
|
2009-04-15 06:13:10 +08:00
|
|
|
/**
|
|
|
|
|
* Created by IntelliJ IDEA.
|
|
|
|
|
* User: mdepristo
|
|
|
|
|
* Date: Feb 22, 2009
|
|
|
|
|
* Time: 3:22:14 PM
|
|
|
|
|
* To change this template use File | Settings | File Templates.
|
|
|
|
|
*/
|
2009-05-20 07:26:17 +08:00
|
|
|
@Requires(value={DataSource.READS,DataSource.REFERENCE},referenceMetaData=@RMD(name="pileup",type=rodSAMPileup.class))
|
2009-05-23 05:20:24 +08:00
|
|
|
public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats> implements TreeReducible<ValidationStats> {
|
2009-05-07 09:22:01 +08:00
|
|
|
@Argument(fullName="continue_after_error",doc="Continue after an error",required=false)
|
|
|
|
|
public boolean CONTINUE_AFTER_AN_ERROR = false;
|
2009-04-15 06:13:10 +08:00
|
|
|
|
2009-08-05 05:01:37 +08:00
|
|
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
2009-11-26 04:54:44 +08:00
|
|
|
ReadBackedPileup pileup = context.getPileup();
|
2009-11-25 11:51:41 +08:00
|
|
|
SAMPileupRecord truePileup = getTruePileup( tracker );
|
2009-05-23 05:20:24 +08:00
|
|
|
|
2009-04-17 11:13:11 +08:00
|
|
|
if ( truePileup == null ) {
|
2009-11-26 04:54:44 +08:00
|
|
|
out.printf("No truth pileup data available at %s%n", pileup.getPileupString(ref.getBase(), false));
|
2009-04-17 11:13:11 +08:00
|
|
|
if ( ! CONTINUE_AFTER_AN_ERROR ) {
|
|
|
|
|
Utils.scareUser(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases",
|
2009-11-25 11:51:41 +08:00
|
|
|
context.getLocation(), new String(pileup.getBases())));
|
2009-04-17 11:13:11 +08:00
|
|
|
}
|
|
|
|
|
} else {
|
2009-11-25 11:51:41 +08:00
|
|
|
String pileupDiff = pileupDiff(pileup, truePileup, true);
|
2009-04-17 11:13:11 +08:00
|
|
|
if ( pileupDiff != null ) {
|
2009-11-26 04:54:44 +08:00
|
|
|
out.printf("%s vs. %s%n", pileup.getPileupString(ref.getBase(), true), truePileup.getPileupString());
|
2009-04-17 11:13:11 +08:00
|
|
|
if ( ! CONTINUE_AFTER_AN_ERROR ) {
|
|
|
|
|
throw new RuntimeException(String.format("Pileups aren't equal: %s", pileupDiff));
|
|
|
|
|
}
|
|
|
|
|
}
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return pileup.size();
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-25 11:51:41 +08:00
|
|
|
private static String maybeSorted( final String x, boolean sortMe )
|
|
|
|
|
{
|
|
|
|
|
if ( sortMe ) {
|
|
|
|
|
byte[] bytes = x.getBytes();
|
|
|
|
|
Arrays.sort(bytes);
|
|
|
|
|
return new String(bytes);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
return x;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static String pileupDiff(final ReadBackedPileup a, final SAMPileupRecord b, boolean orderDependent)
|
|
|
|
|
{
|
|
|
|
|
if ( a.size() != b.size() )
|
|
|
|
|
return "Sizes not equal";
|
|
|
|
|
if ( a.getLocation().compareTo(b.getLocation()) != 0 )
|
|
|
|
|
return "Locations not equal";
|
|
|
|
|
|
|
|
|
|
String aBases = maybeSorted(new String(a.getBases()), ! orderDependent );
|
|
|
|
|
String bBases = maybeSorted(b.getBasesAsString(), ! orderDependent );
|
|
|
|
|
if ( ! aBases.toUpperCase().equals(bBases.toUpperCase()) )
|
|
|
|
|
return "Bases not equal";
|
|
|
|
|
|
|
|
|
|
String aQuals = maybeSorted(new String(a.getQuals()), ! orderDependent );
|
|
|
|
|
String bQuals = maybeSorted(b.getQualsAsString(), ! orderDependent );
|
|
|
|
|
if ( ! aQuals.equals(bQuals) )
|
|
|
|
|
return "Quals not equal";
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
2009-04-15 06:13:10 +08:00
|
|
|
// Given result of map function
|
|
|
|
|
public ValidationStats reduceInit() { return new ValidationStats(); }
|
|
|
|
|
public ValidationStats reduce(Integer value, ValidationStats sum) {
|
|
|
|
|
sum.nLoci++;
|
|
|
|
|
sum.nBases += value;
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
2009-05-23 05:20:24 +08:00
|
|
|
|
|
|
|
|
public ValidationStats treeReduce( ValidationStats lhs, ValidationStats rhs ) {
|
|
|
|
|
ValidationStats combined = new ValidationStats();
|
|
|
|
|
combined.nLoci = lhs.nLoci + rhs.nLoci;
|
|
|
|
|
combined.nBases = lhs.nBases + rhs.nBases;
|
|
|
|
|
return combined;
|
|
|
|
|
}
|
2009-05-24 04:50:28 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Extracts the true pileup data from the given rodSAMPileup. Note that this implementation
|
|
|
|
|
* assumes that the genotype will only be point or indel.
|
|
|
|
|
* @param tracker ROD tracker from which to extract pileup data.
|
|
|
|
|
* @return True pileup data.
|
|
|
|
|
*/
|
2009-11-25 11:51:41 +08:00
|
|
|
private SAMPileupRecord getTruePileup( RefMetaDataTracker tracker ) {
|
2009-05-24 04:50:28 +08:00
|
|
|
rodSAMPileup pileup = (rodSAMPileup)tracker.lookup("pileup", null);
|
2009-05-26 23:58:21 +08:00
|
|
|
|
|
|
|
|
if( pileup == null )
|
|
|
|
|
return null;
|
|
|
|
|
|
2009-05-24 04:50:28 +08:00
|
|
|
if( pileup.hasPointGenotype() )
|
2009-11-25 11:51:41 +08:00
|
|
|
return (SAMPileupRecord)pileup.getPointGenotype();
|
2009-05-24 04:50:28 +08:00
|
|
|
else if( pileup.hasIndelGenotype() )
|
2009-11-25 11:51:41 +08:00
|
|
|
return (SAMPileupRecord)pileup.getIndelGenotype();
|
2009-05-24 04:50:28 +08:00
|
|
|
else
|
|
|
|
|
throw new StingException("Unsupported pileup type: " + pileup);
|
|
|
|
|
}
|
2009-04-15 06:13:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class ValidationStats {
|
|
|
|
|
public long nLoci = 0;
|
|
|
|
|
public long nBases = 0;
|
|
|
|
|
|
|
|
|
|
public ValidationStats() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public String toString() {
|
|
|
|
|
return String.format("Validated %d sites covered by %d bases%n", nLoci, nBases);
|
|
|
|
|
}
|
|
|
|
|
}
|