Fix for instability in output of fasta alternative reference maker when snpmask and snp files are provided and have overlapping records. The order of the records changed due to optimization of the refmetadatatracker, and uncovered this non-determinanism. Now preferrentially masks out includes sites from snps before considering masking out sites in snpmask

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5669 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-04-20 21:54:09 +00:00
parent 8619f49d20
commit 29857f5ba6
1 changed files with 24 additions and 10 deletions

View File

@ -33,6 +33,8 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.Collection;
/**
* Generates an alternative reference sequence over the specified interval. Given variant ROD tracks,
@ -55,19 +57,31 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
String refBase = String.valueOf(ref.getBaseAsChar());
for ( VariantContext vc : tracker.getAllVariantContexts(ref) ) {
// if we have multiple variants at a locus, just take the first one we see
if (!vc.getSource().startsWith("snpmask") && vc.isDeletion()) {
deletionBasesRemaining = vc.getReference().length();
// delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if (!vc.getSource().startsWith("snpmask") && vc.isInsertion()) {
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
} else if (vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), (vc.getSource().startsWith("snpmask") ? "N" : vc.getAlternateAllele(0).toString()));
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref);
// Check to see if we have a called snp
for ( VariantContext vc : vcs ) {
if ( !vc.getSource().startsWith("snpmask") ) {
if ( vc.isDeletion()) {
deletionBasesRemaining = vc.getReference().length();
// delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if ( vc.isInsertion()) {
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
} else if (vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
}
}
}
// if we don't have a called site, and we have a mask at this site, mask it
for ( VariantContext vc : vcs ) {
if ( vc.getSource().startsWith("snpmask") && vc.isSNP()) {
return new Pair<GenomeLoc, String>(context.getLocation(), "N");
}
}
// if we got here then we're just ref
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
}