diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 7c436ce44..9edf5b5d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -32,16 +32,11 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; -import java.io.File; -import java.io.FileWriter; -import java.io.PrintStream; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 84cb69b07..74cbfa05f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -36,12 +36,8 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter; -import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; -import org.broadinstitute.sting.utils.codecs.refseq.Transcript; -import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec; -import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; @@ -51,6 +47,9 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec; +import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature; +import org.broadinstitute.sting.utils.codecs.refseq.Transcript; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.collections.CircularArray; import org.broadinstitute.sting.utils.collections.PrimitivePair; @@ -265,7 +264,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { Set headerInfo = new HashSet(); // first, the basic info - headerInfo.add(new VCFHeaderLine("source", "IndelGenotyperV2")); + headerInfo.add(new VCFHeaderLine("source", "SomaticIndelDetector")); headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); // FORMAT and INFO fields @@ -283,10 +282,10 @@ public class SomaticIndelDetectorWalker extends ReadWalker { args.addAll(getToolkit().getFilters()); Map commandLineArgs = getToolkit().getApproximateCommandLineArguments(args); for ( Map.Entry commandLineArg : commandLineArgs.entrySet() ) - headerInfo.add(new VCFHeaderLine(String.format("IGv2_%s", commandLineArg.getKey()), commandLineArg.getValue())); + headerInfo.add(new VCFHeaderLine(String.format("SID_%s", commandLineArg.getKey()), commandLineArg.getValue())); // also, the list of input bams for ( String fileName : getToolkit().getArguments().samFiles ) - headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", fileName)); + headerInfo.add(new VCFHeaderLine("SID_bam_file_used", fileName)); return headerInfo; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 174e810c2..e04f5bc4b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -170,9 +170,9 @@ public class TableRecalibrationWalker extends ReadWalker requestedCovariates = new ArrayList(); // List of covariates to be used in this calculation - private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); - private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*"); - private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*"); + public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); + public static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*"); + public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*"); public static final String EOF_MARKER = "EOF"; private long numReadsWithMalformedColorSpace = 0; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 11a59de10..0106442e0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -4,7 +4,6 @@ import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 43b07476d..c86b91b79 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -36,6 +36,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, // for ParsingUtils.split protected String[] GTValueArray = new String[100]; protected String[] genotypeKeyArray = new String[100]; + protected String[] infoFieldArray = new String[1000]; protected String[] infoValueArray = new String[1000]; // for performance testing purposes @@ -351,23 +352,28 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 ) generateException("The VCF specification does not allow for whitespace in the INFO field"); - int infoValueSplitSize = ParsingUtils.split(infoField, infoValueArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR); - for (int i = 0; i < infoValueSplitSize; i++) { + int infoFieldSplitSize = ParsingUtils.split(infoField, infoFieldArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR, false); + for (int i = 0; i < infoFieldSplitSize; i++) { String key; Object value; - int eqI = infoValueArray[i].indexOf("="); + int eqI = infoFieldArray[i].indexOf("="); if ( eqI != -1 ) { - key = infoValueArray[i].substring(0, eqI); - String str = infoValueArray[i].substring(eqI+1, infoValueArray[i].length()); + key = infoFieldArray[i].substring(0, eqI); + String str = infoFieldArray[i].substring(eqI+1); - // lets see if the string contains a , separator - if ( str.contains(",") ) - value = Arrays.asList(str.split(",")); - else - value = str; + // split on the INFO field separator + int infoValueSplitSize = ParsingUtils.split(str, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false); + if ( infoValueSplitSize == 1 ) { + value = infoValueArray[0]; + } else { + ArrayList valueList = new ArrayList(infoValueSplitSize); + for ( int j = 0; j < infoValueSplitSize; j++ ) + valueList.add(infoValueArray[j]); + value = valueList; + } } else { - key = infoValueArray[i]; + key = infoFieldArray[i]; value = true; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java index 91cf86c70..8e9d989cc 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java @@ -71,6 +71,7 @@ public final class VCFConstants { public static final char FIELD_SEPARATOR_CHAR = '\t'; public static final String FILTER_CODE_SEPARATOR = ";"; public static final String INFO_FIELD_ARRAY_SEPARATOR = ","; + public static final char INFO_FIELD_ARRAY_SEPARATOR_CHAR = ','; public static final String ID_FIELD_SEPARATOR = ";"; public static final String INFO_FIELD_SEPARATOR = ";"; public static final char INFO_FIELD_SEPARATOR_CHAR = ';'; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 25f0d0fe2..d1e9a236f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -888,9 +888,20 @@ public class ReadUtils { if (endsWithinCigar) fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION; - // if we end outside the current cigar element, we need to check if the next element is a deletion. + // if we end outside the current cigar element, we need to check if the next element is an insertion or deletion. else { nextCigarElement = cigarElementIterator.next(); + + // if it's an insertion, we need to clip the whole insertion before looking at the next element + if (nextCigarElement.getOperator() == CigarOperator.INSERTION) { + readBases += nextCigarElement.getLength(); + if (!cigarElementIterator.hasNext()) + throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); + + nextCigarElement = cigarElementIterator.next(); + } + + // if it's a deletion, we will pass the information on to be handled downstream. fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION; } diff --git a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java index c14fea24c..cc0007439 100755 --- a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java @@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; -import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index d8695cf38..cd0de27f5 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -25,16 +25,14 @@ package org.broadinstitute.sting.utils.clipreads; -import net.sf.samtools.*; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import java.util.LinkedList; -import java.util.List; - /** * Created by IntelliJ IDEA. * User: roger