Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
d9641e3d57
|
|
@ -207,7 +207,7 @@ plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
|
|||
|
||||
if ( requestedStrat == "Sample" ) {
|
||||
perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale
|
||||
perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
|
||||
perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)")
|
||||
} else { # by AlleleCount
|
||||
perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs)))
|
||||
perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount")
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.util.List;
|
||||
|
|
@ -147,6 +148,9 @@ public class ArgumentDefinition {
|
|||
this.exclusiveOf = exclusiveOf;
|
||||
this.validation = validation;
|
||||
this.validOptions = validOptions;
|
||||
|
||||
validateName(shortName);
|
||||
validateName(fullName);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -192,6 +196,9 @@ public class ArgumentDefinition {
|
|||
else
|
||||
shortName = null;
|
||||
|
||||
validateName(shortName);
|
||||
validateName(fullName);
|
||||
|
||||
this.ioType = ioType;
|
||||
this.argumentType = argumentType;
|
||||
this.fullName = fullName;
|
||||
|
|
@ -277,4 +284,14 @@ public class ArgumentDefinition {
|
|||
String validation = (String)CommandLineUtils.getValue(annotation, "validation");
|
||||
return validation.trim().length() > 0 ? validation.trim() : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure the argument's name is valid
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
private void validateName(final String name) {
|
||||
if ( name != null && name.startsWith("-") )
|
||||
throw new ReviewedStingException("Invalid argument definition: " + name + " begins with a -");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,23 +62,24 @@ public class ReferenceDataSource {
|
|||
* @param fastaFile Fasta file to be used as reference
|
||||
*/
|
||||
public ReferenceDataSource(File fastaFile) {
|
||||
|
||||
// does the fasta file exist? check that first...
|
||||
if (!fastaFile.exists())
|
||||
throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");
|
||||
|
||||
File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
|
||||
File dictFile;
|
||||
if (fastaFile.getAbsolutePath().endsWith("fa")) {
|
||||
dictFile = new File(fastaFile.getAbsolutePath().replace(".fa", ".dict"));
|
||||
}
|
||||
else
|
||||
dictFile = new File(fastaFile.getAbsolutePath().replace(".fasta", ".dict"));
|
||||
final boolean isGzipped = fastaFile.getAbsolutePath().endsWith(".gz");
|
||||
|
||||
final File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
|
||||
|
||||
// determine the name for the dict file
|
||||
final String fastaExt = (fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta" ) + (isGzipped ? ".gz" : "");
|
||||
final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));
|
||||
|
||||
/*
|
||||
if index file does not exist, create it manually
|
||||
*/
|
||||
* if index file does not exist, create it manually
|
||||
*/
|
||||
if (!indexFile.exists()) {
|
||||
if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
|
||||
|
||||
logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath()));
|
||||
FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
|
||||
try {
|
||||
|
|
@ -95,7 +96,7 @@ public class ReferenceDataSource {
|
|||
}
|
||||
catch(UserException e) {
|
||||
// Rethrow all user exceptions as-is; there should be more details in the UserException itself.
|
||||
throw e;
|
||||
throw e;
|
||||
}
|
||||
catch (Exception e) {
|
||||
// If lock creation succeeded, the failure must have been generating the index.
|
||||
|
|
@ -114,6 +115,8 @@ public class ReferenceDataSource {
|
|||
* This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary
|
||||
*/
|
||||
if (!dictFile.exists()) {
|
||||
if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
|
||||
|
||||
logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath()));
|
||||
|
||||
/*
|
||||
|
|
@ -218,9 +221,9 @@ public class ReferenceDataSource {
|
|||
for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
|
||||
final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
|
||||
shards.add(new LocusShard(parser,
|
||||
readsDataSource,
|
||||
Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
|
||||
null));
|
||||
readsDataSource,
|
||||
Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
|
||||
null));
|
||||
}
|
||||
}
|
||||
return shards;
|
||||
|
|
|
|||
|
|
@ -29,9 +29,19 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Filter out reads with wonky cigar strings.
|
||||
*
|
||||
* - No reads with Hard/Soft clips in the middle of the cigar
|
||||
* - No reads starting with deletions (with or without preceding clips)
|
||||
* - No reads ending in deletions (with or without follow-up clips)
|
||||
* - No reads that are fully hard or soft clipped
|
||||
* - No reads that have consecutive indels in the cigar (II, DD, ID or DI)
|
||||
*
|
||||
* ps: apparently an empty cigar is okay...
|
||||
*
|
||||
* @author ebanks
|
||||
* @version 0.1
|
||||
*/
|
||||
|
|
@ -40,28 +50,72 @@ public class BadCigarFilter extends ReadFilter {
|
|||
|
||||
public boolean filterOut(final SAMRecord rec) {
|
||||
final Cigar c = rec.getCigar();
|
||||
if( c.isEmpty() ) { return false; } // if there is no Cigar then it can't be bad
|
||||
|
||||
boolean previousElementWasIndel = false;
|
||||
CigarOperator lastOp = c.getCigarElement(0).getOperator();
|
||||
|
||||
if (lastOp == CigarOperator.D) // filter out reads starting with deletion
|
||||
return true;
|
||||
|
||||
for (CigarElement ce : c.getCigarElements()) {
|
||||
CigarOperator op = ce.getOperator();
|
||||
if (op == CigarOperator.D || op == CigarOperator.I) {
|
||||
if (previousElementWasIndel)
|
||||
return true; // filter out reads with adjacent I/D
|
||||
|
||||
previousElementWasIndel = true;
|
||||
}
|
||||
else // this is a regular base (match/mismatch/hard or soft clip)
|
||||
previousElementWasIndel = false; // reset the previous element
|
||||
|
||||
lastOp = op;
|
||||
// if there is no Cigar then it can't be bad
|
||||
if( c.isEmpty() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return lastOp == CigarOperator.D;
|
||||
Iterator<CigarElement> elementIterator = c.getCigarElements().iterator();
|
||||
|
||||
CigarOperator firstOp = CigarOperator.H;
|
||||
while (elementIterator.hasNext() && (firstOp == CigarOperator.H || firstOp == CigarOperator.S)) {
|
||||
CigarOperator op = elementIterator.next().getOperator();
|
||||
|
||||
// No reads with Hard/Soft clips in the middle of the cigar
|
||||
if (firstOp != CigarOperator.H && op == CigarOperator.H) {
|
||||
return true;
|
||||
}
|
||||
firstOp = op;
|
||||
}
|
||||
|
||||
// No reads starting with deletions (with or without preceding clips)
|
||||
if (firstOp == CigarOperator.D) {
|
||||
return true;
|
||||
}
|
||||
|
||||
boolean hasMeaningfulElements = (firstOp != CigarOperator.H && firstOp != CigarOperator.S);
|
||||
boolean previousElementWasIndel = firstOp == CigarOperator.I;
|
||||
CigarOperator lastOp = firstOp;
|
||||
CigarOperator previousOp = firstOp;
|
||||
|
||||
while (elementIterator.hasNext()) {
|
||||
CigarOperator op = elementIterator.next().getOperator();
|
||||
|
||||
if (op != CigarOperator.S && op != CigarOperator.H) {
|
||||
|
||||
// No reads with Hard/Soft clips in the middle of the cigar
|
||||
if (previousOp == CigarOperator.S || previousOp == CigarOperator.H)
|
||||
return true;
|
||||
|
||||
lastOp = op;
|
||||
|
||||
if (!hasMeaningfulElements && op.consumesReadBases()) {
|
||||
hasMeaningfulElements = true;
|
||||
}
|
||||
|
||||
if (op == CigarOperator.I || op == CigarOperator.D) {
|
||||
|
||||
// No reads that have consecutive indels in the cigar (II, DD, ID or DI)
|
||||
if (previousElementWasIndel) {
|
||||
return true;
|
||||
}
|
||||
previousElementWasIndel = true;
|
||||
}
|
||||
else {
|
||||
previousElementWasIndel = false;
|
||||
}
|
||||
}
|
||||
// No reads with Hard/Soft clips in the middle of the cigar
|
||||
else if (op == CigarOperator.S && previousOp == CigarOperator.H) {
|
||||
return true;
|
||||
}
|
||||
|
||||
previousOp = op;
|
||||
}
|
||||
|
||||
// No reads ending in deletions (with or without follow-up clips)
|
||||
// No reads that are fully hard or soft clipped
|
||||
return lastOp == CigarOperator.D || !hasMeaningfulElements;
|
||||
}
|
||||
}
|
||||
|
|
@ -159,7 +159,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
return stepForwardOnGenome();
|
||||
} else {
|
||||
if (curElement != null && curElement.getOperator() == CigarOperator.D)
|
||||
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads ending in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
|
||||
throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
|
||||
// Reads that contain indels model the genomeOffset as the following base in the reference. Because
|
||||
// we fall into this else block only when indels end the read, increment genomeOffset such that the
|
||||
|
|
@ -185,7 +185,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
break;
|
||||
case D: // deletion w.r.t. the reference
|
||||
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
|
||||
throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads starting in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
|
||||
throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
|
||||
// should be the same as N case
|
||||
genomeOffset++;
|
||||
done = true;
|
||||
|
|
|
|||
|
|
@ -75,8 +75,6 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
|||
if( !dataProvider.hasReads() )
|
||||
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
|
||||
|
||||
boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES);
|
||||
|
||||
ReadView reads = new ReadView(dataProvider);
|
||||
ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
||||
|
||||
|
|
@ -91,7 +89,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
|||
ReferenceContext refContext = null;
|
||||
|
||||
// get the array of characters for the reference sequence, since we're a mapped read
|
||||
if (needsReferenceBasesP && !read.getReadUnmappedFlag() && dataProvider.hasReference())
|
||||
if (!read.getReadUnmappedFlag() && dataProvider.hasReference())
|
||||
refContext = reference.getReferenceContext(read);
|
||||
|
||||
// update the number of reads we've seen
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ import java.util.List;
|
|||
*/
|
||||
|
||||
@By(DataSource.READS)
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
@PartitionBy(PartitionType.READ)
|
||||
@ActiveRegionExtension(extension=50,maxRegion=1500)
|
||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
|
||||
|
|
|
|||
|
|
@ -16,8 +16,18 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
* Allow user to choose between a number of different data sources.
|
||||
*/
|
||||
public enum DataSource {
|
||||
/**
|
||||
* Does this walker require read (BAM) data to work?
|
||||
*/
|
||||
READS,
|
||||
|
||||
/**
|
||||
* Does this walker require reference data to work?
|
||||
*/
|
||||
REFERENCE,
|
||||
REFERENCE_BASES, // Do I actually need the reference bases passed to the walker?
|
||||
|
||||
/**
|
||||
* Does this walker require reference order data (VCF) to work?
|
||||
*/
|
||||
REFERENCE_ORDERED_DATA
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@By(DataSource.READS)
|
||||
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
@Requires({DataSource.READS,DataSource.REFERENCE})
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||
@RemoveProgramRecords
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
* Time: 2:52:28 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
@PartitionBy(PartitionType.READ)
|
||||
public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
public boolean requiresOrderedReads() { return false; }
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
@Requires({DataSource.REFERENCE})
|
||||
@Allows(DataSource.REFERENCE)
|
||||
public abstract class RefWalker<MapType, ReduceType> extends LocusWalker<MapType, ReduceType> {
|
||||
}
|
||||
|
|
@ -107,7 +107,7 @@ import java.util.ArrayList;
|
|||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@By(DataSource.READS)
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES}) // filter out all reads with zero or unavailable mapping quality
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
||||
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
|
||||
public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
|
||||
@ArgumentCollection
|
||||
|
|
|
|||
|
|
@ -474,10 +474,17 @@ public class UnifiedGenotyperEngine {
|
|||
// add the MLE AC and AF annotations
|
||||
if ( alleleCountsofMLE.size() > 0 ) {
|
||||
attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
|
||||
final double AN = (double)builder.make().getCalledChrCount();
|
||||
final int AN = builder.make().getCalledChrCount();
|
||||
|
||||
// let's sanity check that we don't have an invalid MLE value in there
|
||||
for ( int MLEAC : alleleCountsofMLE ) {
|
||||
if ( MLEAC > AN )
|
||||
throw new ReviewedStingException(String.format("MLEAC value (%d) is larger than AN (%d) at position %s:%d", MLEAC, AN, loc.getContig(), loc.getStart()));
|
||||
}
|
||||
|
||||
final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
|
||||
for ( int AC : alleleCountsofMLE )
|
||||
MLEfrequencies.add((double)AC / AN);
|
||||
MLEfrequencies.add((double)AC / (double)AN);
|
||||
attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -645,7 +645,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
bestChildGenotype.clear();
|
||||
bestChildGenotype.add(childGenotype.getKey());
|
||||
}
|
||||
else if(MathUtils.compareDoubles(configurationLikelihood, bestConfigurationLikelihood) == 0) {
|
||||
else if(configurationLikelihood == bestConfigurationLikelihood) {
|
||||
bestFirstParentGenotype.add(firstParentGenotype.getKey());
|
||||
bestSecondParentGenotype.add(secondParentGenotype.getKey());
|
||||
bestChildGenotype.add(childGenotype.getKey());
|
||||
|
|
|
|||
|
|
@ -118,6 +118,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
|
||||
final int sitesBlockSize = decoder.readBlockSize(inputStream);
|
||||
final int genotypeBlockSize = decoder.readBlockSize(inputStream);
|
||||
|
||||
decoder.readNextBlock(sitesBlockSize, inputStream);
|
||||
decodeSiteLoc(builder);
|
||||
final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder);
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ public final class BCF2Decoder {
|
|||
public void skipNextBlock(final int blockSizeInBytes, final InputStream stream) {
|
||||
try {
|
||||
final int bytesRead = (int)stream.skip(blockSizeInBytes);
|
||||
validateReadBytes(bytesRead, blockSizeInBytes);
|
||||
validateReadBytes(bytesRead, 1, blockSizeInBytes);
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e);
|
||||
}
|
||||
|
|
@ -316,17 +316,37 @@ public final class BCF2Decoder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Read all bytes for a BCF record block into a byte[], and return it
|
||||
*
|
||||
* @param inputStream
|
||||
* @return
|
||||
* Is smart about reading from the stream multiple times to fill the buffer, if necessary
|
||||
*
|
||||
* @param blockSizeInBytes number of bytes to read
|
||||
* @param inputStream the stream to read from
|
||||
* @return a non-null byte[] containing exactly blockSizeInBytes bytes from the inputStream
|
||||
*/
|
||||
private final static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
|
||||
@Requires({"blockSizeInBytes >= 0", "inputStream != null"})
|
||||
@Ensures("result != null")
|
||||
private static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
|
||||
assert blockSizeInBytes >= 0;
|
||||
|
||||
final byte[] record = new byte[blockSizeInBytes];
|
||||
try {
|
||||
final int bytesRead = inputStream.read(record);
|
||||
validateReadBytes(bytesRead, blockSizeInBytes);
|
||||
int bytesRead = 0;
|
||||
int nReadAttempts = 0; // keep track of how many times we've read
|
||||
|
||||
// because we might not read enough bytes from the file in a single go, do it in a loop until we get EOF
|
||||
while ( bytesRead < blockSizeInBytes ) {
|
||||
final int read1 = inputStream.read(record, bytesRead, blockSizeInBytes - bytesRead);
|
||||
if ( read1 == -1 )
|
||||
validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
|
||||
else
|
||||
bytesRead += read1;
|
||||
}
|
||||
|
||||
if ( nReadAttempts > 1 ) // TODO -- remove me
|
||||
logger.warn("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior");
|
||||
|
||||
validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e);
|
||||
}
|
||||
|
|
@ -334,14 +354,20 @@ public final class BCF2Decoder {
|
|||
return record;
|
||||
}
|
||||
|
||||
private final static void validateReadBytes(final int actuallyRead, final int expected) {
|
||||
/**
|
||||
* Make sure we read the right number of bytes, or throw an error
|
||||
*
|
||||
* @param actuallyRead
|
||||
* @param nReadAttempts
|
||||
* @param expected
|
||||
*/
|
||||
private static void validateReadBytes(final int actuallyRead, final int nReadAttempts, final int expected) {
|
||||
assert expected >= 0;
|
||||
|
||||
if ( actuallyRead < expected ) {
|
||||
throw new UserException.MalformedBCF2(String.format("Failed to read next complete record: %s",
|
||||
actuallyRead == -1 ?
|
||||
"premature end of input stream" :
|
||||
String.format("expected %d bytes but read only %d", expected, actuallyRead)));
|
||||
throw new UserException.MalformedBCF2(
|
||||
String.format("Failed to read next complete record: expected %d bytes but read only %d after %d iterations",
|
||||
expected, actuallyRead, nReadAttempts));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -340,6 +340,17 @@ public class UserException extends ReviewedStingException {
|
|||
}
|
||||
}
|
||||
|
||||
public static class CouldNotCreateReferenceFAIorDictForGzippedRef extends UserException {
|
||||
public CouldNotCreateReferenceFAIorDictForGzippedRef(final File f) {
|
||||
super("Although the GATK can process .gz reference sequences, it currently cannot create FAI " +
|
||||
"or DICT files for them. In order to use the GATK with reference.fasta.gz you will need to " +
|
||||
"create .dict and .fai files for reference.fasta.gz and name them reference.fasta.gz.fai and " +
|
||||
"reference.dict. Potentially the easiest way to do this is to uncompress reference.fasta, " +
|
||||
"run the GATK to create the .dict and .fai files, and copy them to the appropriate location. " +
|
||||
"Sorry for the inconvenience.");
|
||||
}
|
||||
}
|
||||
|
||||
public static class CouldNotCreateReferenceIndexFileBecauseOfLock extends UserException.CouldNotCreateReferenceIndexFile {
|
||||
public CouldNotCreateReferenceIndexFileBecauseOfLock(File f) {
|
||||
super(f, "could not be written because an exclusive file lock could not be obtained. " +
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ import java.util.Arrays;
|
|||
* Thread-safe! Uses a lock object to protect write and access to the cache.
|
||||
*/
|
||||
public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
||||
protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(CachingIndexedFastaSequenceFile.class);
|
||||
|
||||
/** global enable flag */
|
||||
private static final boolean USE_CACHE = true;
|
||||
|
||||
|
|
@ -125,7 +127,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
|
|||
public void printEfficiency() {
|
||||
// comment out to disable tracking
|
||||
if ( (cacheHits + cacheMisses) % PRINT_FREQUENCY == 0 ) {
|
||||
System.out.printf("### CachingIndexedFastaReader: hits=%d misses=%d efficiency %.6f%%%n", cacheHits, cacheMisses, calcEfficiency());
|
||||
logger.info(String.format("### CachingIndexedFastaReader: hits=%d misses=%d efficiency %.6f%%%n", cacheHits, cacheMisses, calcEfficiency()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import net.sf.samtools.Cigar;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipperTestUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Checks that the Bad Cigar filter works for all kinds of wonky cigars
|
||||
*
|
||||
|
|
@ -14,6 +17,29 @@ import org.testng.annotations.Test;
|
|||
*/
|
||||
public class BadCigarFilterUnitTest {
|
||||
|
||||
public static final String[] BAD_CIGAR_LIST = {
|
||||
"2D4M", // starting with multiple deletions
|
||||
"4M2D", // ending with multiple deletions
|
||||
"3M1I1D", // adjacent indels AND ends in deletion
|
||||
"1M1I1D2M", // adjacent indels I->D
|
||||
"1M1D2I1M", // adjacent indels D->I
|
||||
"1M1I2M1D", // ends in single deletion with insertion in the middle
|
||||
"4M1D", // ends in single deletion
|
||||
"1D4M", // starts with single deletion
|
||||
"2M1D1D2M", // adjacent D's
|
||||
"1M1I1I1M", // adjacent I's
|
||||
"1H1D4M", // starting with deletion after H
|
||||
"1S1D3M", // starting with deletion after S
|
||||
"1H1S1D3M", // starting with deletion after HS
|
||||
"4M1D1H", // ending with deletion before H
|
||||
"3M1D1S", // ending with deletion before S
|
||||
"3M1D1S1H", // ending with deletion before HS
|
||||
"10M2H10M", // H in the middle
|
||||
"10M2S10M", // S in the middle
|
||||
"1H1S10M2S10M1S1H", // deceiving S in the middle
|
||||
"1H1S10M2H10M1S1H" // deceiving H in the middle
|
||||
};
|
||||
|
||||
BadCigarFilter filter;
|
||||
|
||||
@BeforeClass
|
||||
|
|
@ -21,40 +47,20 @@ public class BadCigarFilterUnitTest {
|
|||
filter = new BadCigarFilter();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = true)
|
||||
public void testWonkyCigars () {
|
||||
byte[] bases = {'A', 'A', 'A', 'A'};
|
||||
byte[] quals = {30, 30, 30, 30};
|
||||
GATKSAMRecord read;
|
||||
// starting with multiple deletions
|
||||
read = ArtificialSAMUtils.createArtificialRead(bases, quals, "2D4M");
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
for (String cigarString : BAD_CIGAR_LIST) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigarString);
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
}
|
||||
}
|
||||
|
||||
read = ArtificialSAMUtils.createArtificialRead(bases, quals, "4M2D"); // ending with multiple deletions
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
|
||||
read = ArtificialSAMUtils.createArtificialRead(bases, quals, "3M1I1D"); // adjacent indels AND ends in deletion
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
|
||||
read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I1D2M"); // adjacent indels I->D
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
|
||||
read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1D2I1M"); // adjacent indels D->I
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
|
||||
read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I2M1D"); // ends in single deletion with insertion in the middle
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
|
||||
read = ArtificialSAMUtils.createArtificialRead(bases, quals, "4M1D"); // ends in single deletion
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
|
||||
read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1D4M"); // starts with single deletion
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
|
||||
read = ArtificialSAMUtils.createArtificialRead(bases, quals, "2M1D1D2M"); // adjacent D's
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
|
||||
read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I1I1M"); // adjacent I's
|
||||
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
|
||||
@Test(enabled = true)
|
||||
public void testGoodCigars() {
|
||||
List<Cigar> cigarList = ReadClipperTestUtils.generateCigarList(10);
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
Assert.assertFalse(filter.filterOut(read), read.getCigarString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -37,17 +38,22 @@ public class ReadClipperTestUtils {
|
|||
return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, cigar.getReadLength()), Utils.arrayFromArrayWithLength(QUALS, cigar.getReadLength()), cigar.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* This function generates every valid permutation of cigar strings with a given length.
|
||||
*
|
||||
* A valid cigar object obeys the following rules:
|
||||
* - No Hard/Soft clips in the middle of the read
|
||||
* - No deletions in the beginning / end of the read
|
||||
* - No repeated adjacent element (e.g. 1M2M -> this should be 3M)
|
||||
*
|
||||
* @param maximumLength the maximum number of elements in the cigar
|
||||
* @return a list with all valid Cigar objects
|
||||
*/
|
||||
public static GATKSAMRecord makeReadFromCigar(String cigarString) {
|
||||
return makeReadFromCigar(cigarFromString(cigarString));
|
||||
}
|
||||
|
||||
/**
|
||||
* This function generates every valid permutation of cigar strings with a given length.
|
||||
*
|
||||
* A valid cigar object obeys the following rules:
|
||||
* - No Hard/Soft clips in the middle of the read
|
||||
* - No deletions in the beginning / end of the read
|
||||
* - No repeated adjacent element (e.g. 1M2M -> this should be 3M)
|
||||
* - No consecutive I/D elements
|
||||
*
|
||||
* @param maximumLength the maximum number of elements in the cigar
|
||||
* @return a list with all valid Cigar objects
|
||||
*/
|
||||
public static List<Cigar> generateCigarList(int maximumLength) {
|
||||
int numCigarElements = cigarElements.length;
|
||||
LinkedList<Cigar> cigarList = new LinkedList<Cigar>();
|
||||
|
|
@ -137,7 +143,10 @@ public class ReadClipperTestUtils {
|
|||
CigarElement lastElement = null;
|
||||
int lastElementLength = 0;
|
||||
for (CigarElement cigarElement : rawCigar.getCigarElements()) {
|
||||
if (lastElement != null && lastElement.getOperator() == cigarElement.getOperator())
|
||||
if (lastElement != null &&
|
||||
((lastElement.getOperator() == cigarElement.getOperator()) ||
|
||||
(lastElement.getOperator() == CigarOperator.I && cigarElement.getOperator() == CigarOperator.D) ||
|
||||
(lastElement.getOperator() == CigarOperator.D && cigarElement.getOperator() == CigarOperator.I)))
|
||||
lastElementLength += cigarElement.getLength();
|
||||
else
|
||||
{
|
||||
|
|
@ -191,7 +200,7 @@ public class ReadClipperTestUtils {
|
|||
/**
|
||||
* Checks whether or not the read has any cigar element that is not H or S
|
||||
*
|
||||
* @param read
|
||||
* @param read the read
|
||||
* @return true if it has any M, I or D, false otherwise
|
||||
*/
|
||||
public static boolean readHasNonClippedBases(GATKSAMRecord read) {
|
||||
|
|
@ -201,5 +210,79 @@ public class ReadClipperTestUtils {
|
|||
return false;
|
||||
}
|
||||
|
||||
public static Cigar cigarFromString(String cigarString) {
|
||||
Cigar cigar = new Cigar();
|
||||
|
||||
boolean isNumber = false;
|
||||
int number = 0;
|
||||
for (int i = 0; i < cigarString.length(); i++) {
|
||||
char x = cigarString.charAt(i);
|
||||
|
||||
if (x >= '0' && x <='9') {
|
||||
if (isNumber) {
|
||||
number *= 10;
|
||||
}
|
||||
else {
|
||||
isNumber = true;
|
||||
}
|
||||
number += x - '0';
|
||||
}
|
||||
|
||||
else {
|
||||
CigarElement e;
|
||||
switch (x) {
|
||||
case 'M':
|
||||
case 'm':
|
||||
e = new CigarElement(number, CigarOperator.M);
|
||||
break;
|
||||
|
||||
case 'I':
|
||||
case 'i':
|
||||
e = new CigarElement(number, CigarOperator.I);
|
||||
break;
|
||||
|
||||
case 'D':
|
||||
case 'd':
|
||||
e = new CigarElement(number, CigarOperator.D);
|
||||
break;
|
||||
|
||||
case 'S':
|
||||
case 's':
|
||||
e = new CigarElement(number, CigarOperator.S);
|
||||
break;
|
||||
|
||||
case 'N':
|
||||
case 'n':
|
||||
e = new CigarElement(number, CigarOperator.N);
|
||||
break;
|
||||
|
||||
case 'H':
|
||||
case 'h':
|
||||
e = new CigarElement(number, CigarOperator.H);
|
||||
break;
|
||||
|
||||
case 'P':
|
||||
case 'p':
|
||||
e = new CigarElement(number, CigarOperator.P);
|
||||
break;
|
||||
|
||||
case '=':
|
||||
e = new CigarElement(number, CigarOperator.EQ);
|
||||
break;
|
||||
|
||||
case 'X':
|
||||
case 'x':
|
||||
e = new CigarElement(number, CigarOperator.X);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new ReviewedStingException("Unrecognized cigar operator: " + x + " (number: " + number + ")");
|
||||
}
|
||||
cigar.add(e);
|
||||
}
|
||||
}
|
||||
return cigar;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner
|
|||
import java.util.regex.Pattern
|
||||
import java.lang.StringBuffer
|
||||
import java.util.Date
|
||||
import com.sun.jna.{Structure, StringArray, NativeLong}
|
||||
import com.sun.jna.{Pointer, Structure, StringArray, NativeLong}
|
||||
import com.sun.jna.ptr.IntByReference
|
||||
|
||||
/**
|
||||
|
|
@ -295,9 +295,17 @@ object Lsf706JobRunner extends Logging {
|
|||
// the platform LSF startTimes are in seconds, not milliseconds, so convert to the java convention
|
||||
runner.getRunInfo.startTime = new Date(jobInfo.startTime.longValue * 1000)
|
||||
runner.getRunInfo.doneTime = new Date(jobInfo.endTime.longValue * 1000)
|
||||
val exHostsRaw = jobInfo.exHosts.getStringArray(0)
|
||||
//logger.warn("exHostsRaw = " + exHostsRaw)
|
||||
val exHostsList = exHostsRaw.toSeq
|
||||
|
||||
val exHostsList =
|
||||
if (jobInfo.numExHosts != 1) {
|
||||
// this is necessary because
|
||||
val exHostsString = "multipleHosts_" + jobInfo.numExHosts
|
||||
logger.debug("numExHosts = " + jobInfo.numExHosts + " != 1 for job " + runner.jobId + ", cannot safely get exhosts, setting to " + exHostsString)
|
||||
List(exHostsString)
|
||||
} else {
|
||||
jobInfo.exHosts.getStringArray(0).toSeq
|
||||
}
|
||||
|
||||
//logger.warn("exHostsList = " + exHostsList)
|
||||
val exHosts = exHostsList.reduceLeft(_ + "," + _)
|
||||
//logger.warn("exHosts = " + exHosts)
|
||||
|
|
|
|||
Loading…
Reference in New Issue