Merge branch 'master' of ssh://chartl@ni.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Christopher Hartl 2012-02-03 13:42:09 -05:00
commit aa3638ecb3
5 changed files with 62 additions and 23 deletions

View File

@ -443,7 +443,16 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
// } // }
// } // }
return eof() ? -1 : length-remaining; // If any data was copied into the buffer, return the amount of data copied.
if(remaining < length)
return length - remaining;
// Otherwise, if at eof(), return -1.
else if(eof())
return -1;
// Otherwise, we must've hit a bug in the system.
throw new ReviewedStingException("BUG: read returned no data, but eof() reports false.");
} }
public void close() { public void close() {

View File

@ -193,6 +193,9 @@ public class LocusIteratorByState extends LocusIterator {
// we reenter in order to re-check cigarElementCounter against curElement's length // we reenter in order to re-check cigarElementCounter against curElement's length
return stepForwardOnGenome(); return stepForwardOnGenome();
} else { } else {
if (curElement != null && curElement.getOperator() == CigarOperator.D)
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString());
// Reads that contain indels model the genomeOffset as the following base in the reference. Because // Reads that contain indels model the genomeOffset as the following base in the reference. Because
// we fall into this else block only when indels end the read, increment genomeOffset such that the // we fall into this else block only when indels end the read, increment genomeOffset such that the
// current offset of this read is the next ref base after the end of the indel. This position will // current offset of this read is the next ref base after the end of the indel. This position will
@ -228,7 +231,7 @@ public class LocusIteratorByState extends LocusIterator {
// we see insertions only once, when we step right onto them; the position on the read is scrolled // we see insertions only once, when we step right onto them; the position on the read is scrolled
// past the insertion right after that // past the insertion right after that
if (eventDelayedFlag > 1) if (eventDelayedFlag > 1)
throw new UserException.MalformedBAM(read, "Adjacent I/D events in read " + read.getReadName()); throw new UserException.MalformedBAM(read, String.format("Adjacent I/D events in read %s -- cigar: %s", read.getReadName(), read.getCigarString()));
insertedBases = Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + curElement.getLength()); insertedBases = Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + curElement.getLength());
eventLength = curElement.getLength(); eventLength = curElement.getLength();
eventStart = readOffset; eventStart = readOffset;
@ -247,7 +250,7 @@ public class LocusIteratorByState extends LocusIterator {
// generate an extended event only if we just stepped into the deletion (i.e. don't // generate an extended event only if we just stepped into the deletion (i.e. don't
// generate the event at every deleted position on the ref, that's what cigarElementCounter==1 is for!) // generate the event at every deleted position on the ref, that's what cigarElementCounter==1 is for!)
if (eventDelayedFlag > 1) if (eventDelayedFlag > 1)
throw new UserException.MalformedBAM(read, "Adjacent I/D events in read " + read.getReadName()); throw new UserException.MalformedBAM(read, String.format("Adjacent I/D events in read %s -- cigar: %s", read.getReadName(), read.getCigarString()));
eventLength = curElement.getLength(); eventLength = curElement.getLength();
eventDelayedFlag = 2; // deletion on the ref causes an immediate return, so we have to delay by 1 only eventDelayedFlag = 2; // deletion on the ref causes an immediate return, so we have to delay by 1 only
eventStart = readOffset; eventStart = readOffset;

View File

@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@ -17,16 +18,13 @@ import java.util.*;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
* User: rpoplin, lfran * User: rpoplin, lfran, ebanks
* Date: 11/14/11 * Date: 11/14/11
*/ */
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation { public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
private Set<Sample> trios = null; private Set<Sample> trios = null;
private final static int REF = 0;
private final static int HET = 1;
private final static int HOM = 2;
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
@ -38,10 +36,10 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
} }
} }
final Map<String,Object> toRet = new HashMap<String,Object>(1); final Map<String, Object> toRet = new HashMap<String, Object>(1);
final HashSet<Sample> triosToTest = new HashSet<Sample>(); final HashSet<Sample> triosToTest = new HashSet<Sample>();
for( final Sample child : trios) { for( final Sample child : trios ) {
final boolean hasAppropriateGenotypes = vc.hasGenotype(child.getID()) && vc.getGenotype(child.getID()).hasLikelihoods() && final boolean hasAppropriateGenotypes = vc.hasGenotype(child.getID()) && vc.getGenotype(child.getID()).hasLikelihoods() &&
vc.hasGenotype(child.getPaternalID()) && vc.getGenotype(child.getPaternalID()).hasLikelihoods() && vc.hasGenotype(child.getPaternalID()) && vc.getGenotype(child.getPaternalID()).hasLikelihoods() &&
vc.hasGenotype(child.getMaternalID()) && vc.getGenotype(child.getMaternalID()).hasLikelihoods(); vc.hasGenotype(child.getMaternalID()) && vc.getGenotype(child.getMaternalID()).hasLikelihoods();
@ -65,28 +63,54 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) { private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM) + calculateNChildren(vc, triosToTest, HET, HOM, HET); double nABGivenABandBB = 0.0;
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM) + calculateNChildren(vc, triosToTest, HOM, HOM, HET); double nBBGivenABandBB = 0.0;
final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET); double nAAGivenABandAB = 0.0;
final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET); double nBBGivenABandAB = 0.0;
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET) + calculateNChildren(vc, triosToTest, REF, HET, REF); double nAAGivenAAandAB = 0.0;
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET) + calculateNChildren(vc, triosToTest, HET, HET, REF); double nABGivenAAandAB = 0.0;
// for each pair of alleles, add the likelihoods
int numAlleles = vc.getNAlleles();
for ( int allele1 = 0; allele1 < numAlleles; allele1++ ) {
for ( int allele2 = allele1 + 1; allele2 < numAlleles; allele2++ ) {
// TODO -- cache these for better performance
final int HOM1index = determineHomIndex(allele1, numAlleles);
final int HETindex = HOM1index + (allele2 - allele1);
final int HOM2index = determineHomIndex(allele2, numAlleles);
nABGivenABandBB += calculateNChildren(vc, triosToTest, HETindex, HETindex, HOM2index) + calculateNChildren(vc, triosToTest, HETindex, HOM2index, HETindex);
nBBGivenABandBB += calculateNChildren(vc, triosToTest, HOM2index, HETindex, HOM2index) + calculateNChildren(vc, triosToTest, HOM2index, HOM2index, HETindex);
nAAGivenABandAB += calculateNChildren(vc, triosToTest, HOM1index, HETindex, HETindex);
nBBGivenABandAB += calculateNChildren(vc, triosToTest, HOM2index, HETindex, HETindex);
nAAGivenAAandAB += calculateNChildren(vc, triosToTest, HOM1index, HOM1index, HETindex) + calculateNChildren(vc, triosToTest, HOM1index, HETindex, HOM1index);
nABGivenAAandAB += calculateNChildren(vc, triosToTest, HETindex, HOM1index, HETindex) + calculateNChildren(vc, triosToTest, HETindex, HETindex, HOM1index);
}
}
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
return (numer * numer) / denom; return (numer * numer) / denom;
} }
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int parent1Idx, final int parent2Idx ) { private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
final double likelihoodVector[] = new double[triosToTest.size()]; final double likelihoodVector[] = new double[triosToTest.size()];
int iii = 0; int iii = 0;
for( final Sample child : triosToTest ) { for( final Sample child : triosToTest ) {
final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector(); final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector();
final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector(); final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector();
final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector(); final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector();
likelihoodVector[iii++] = momGL[parent1Idx] + dadGL[parent2Idx] + childGL[childIdx]; likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
} }
return MathUtils.sumLog10(likelihoodVector); return MathUtils.sumLog10(likelihoodVector);
} }
private static int determineHomIndex(final int alleleIndex, int numAlleles) {
int result = 0;
for ( int i = 0; i < alleleIndex; i++ )
result += numAlleles--;
return result;
}
} }

View File

@ -357,11 +357,11 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
final GATKSAMRecord gatkRead = (GATKSAMRecord) p.getRead(); final GATKSAMRecord gatkRead = (GATKSAMRecord) p.getRead();
int offset = p.getOffset(); int offset = p.getOffset();
if( gatkRead.containsTemporaryAttribute( SKIP_RECORD_ATTRIBUTE ) ) { if( gatkRead.containsTemporaryAttribute( SKIP_RECORD_ATTRIBUTE ) ) {
continue; continue;
} }
if( !gatkRead.containsTemporaryAttribute( SEEN_ATTRIBUTE ) ) if( !gatkRead.containsTemporaryAttribute( SEEN_ATTRIBUTE ) )
{ {
gatkRead.setTemporaryAttribute( SEEN_ATTRIBUTE, true ); gatkRead.setTemporaryAttribute( SEEN_ATTRIBUTE, true );
RecalDataManager.parseSAMRecord( gatkRead, RAC ); RecalDataManager.parseSAMRecord( gatkRead, RAC );
@ -377,7 +377,6 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
RecalDataManager.computeCovariates( gatkRead, requestedCovariates )); RecalDataManager.computeCovariates( gatkRead, requestedCovariates ));
} }
// Skip this position if base quality is zero // Skip this position if base quality is zero
if( gatkRead.getBaseQualities()[offset] > 0 ) { if( gatkRead.getBaseQualities()[offset] > 0 ) {

View File

@ -4,6 +4,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/** /**
@ -146,9 +147,12 @@ public class PileupElement implements Comparable<PileupElement> {
public int getRepresentativeCount() { public int getRepresentativeCount() {
int representativeCount = 1; int representativeCount = 1;
if (read.isReducedRead() && !isInsertionAtBeginningOfRead()) if (read.isReducedRead() && !isInsertionAtBeginningOfRead()) {
representativeCount = (isDeletion()) ? Math.round((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2) : read.getReducedCount(offset); if (isDeletion() && (offset + 1 >= read.getReadLength()) ) // deletion in the end of the read
throw new UserException.MalformedBAM(read, String.format("Adjacent I/D events in read %s -- cigar: %s", read.getReadName(), read.getCigarString()));
representativeCount = (isDeletion()) ? Math.round((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2) : read.getReducedCount(offset);
}
return representativeCount; return representativeCount;
} }