Merge branch 'master' into ped

This commit is contained in:
Mark DePristo 2011-10-05 15:03:09 -07:00
commit 3226d5dc0d
9 changed files with 41 additions and 33 deletions

View File

@ -32,16 +32,11 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintStream;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;

View File

@ -36,12 +36,8 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter; import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@ -51,6 +47,9 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.collections.CircularArray; import org.broadinstitute.sting.utils.collections.CircularArray;
import org.broadinstitute.sting.utils.collections.PrimitivePair; import org.broadinstitute.sting.utils.collections.PrimitivePair;
@ -265,7 +264,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>(); Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// first, the basic info // first, the basic info
headerInfo.add(new VCFHeaderLine("source", "IndelGenotyperV2")); headerInfo.add(new VCFHeaderLine("source", "SomaticIndelDetector"));
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
// FORMAT and INFO fields // FORMAT and INFO fields
@ -283,10 +282,10 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
args.addAll(getToolkit().getFilters()); args.addAll(getToolkit().getFilters());
Map<String,String> commandLineArgs = getToolkit().getApproximateCommandLineArguments(args); Map<String,String> commandLineArgs = getToolkit().getApproximateCommandLineArguments(args);
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() ) for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
headerInfo.add(new VCFHeaderLine(String.format("IGv2_%s", commandLineArg.getKey()), commandLineArg.getValue())); headerInfo.add(new VCFHeaderLine(String.format("SID_%s", commandLineArg.getKey()), commandLineArg.getValue()));
// also, the list of input bams // also, the list of input bams
for ( String fileName : getToolkit().getArguments().samFiles ) for ( String fileName : getToolkit().getArguments().samFiles )
headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", fileName)); headerInfo.add(new VCFHeaderLine("SID_bam_file_used", fileName));
return headerInfo; return headerInfo;
} }

View File

@ -170,9 +170,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
///////////////////////////// /////////////////////////////
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*"); public static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*"); public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
public static final String EOF_MARKER = "EOF"; public static final String EOF_MARKER = "EOF";
private long numReadsWithMalformedColorSpace = 0; private long numReadsWithMalformedColorSpace = 0;

View File

@ -4,7 +4,6 @@ import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;

View File

@ -36,6 +36,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
// for ParsingUtils.split // for ParsingUtils.split
protected String[] GTValueArray = new String[100]; protected String[] GTValueArray = new String[100];
protected String[] genotypeKeyArray = new String[100]; protected String[] genotypeKeyArray = new String[100];
protected String[] infoFieldArray = new String[1000];
protected String[] infoValueArray = new String[1000]; protected String[] infoValueArray = new String[1000];
// for performance testing purposes // for performance testing purposes
@ -351,23 +352,28 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 ) if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 )
generateException("The VCF specification does not allow for whitespace in the INFO field"); generateException("The VCF specification does not allow for whitespace in the INFO field");
int infoValueSplitSize = ParsingUtils.split(infoField, infoValueArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR); int infoFieldSplitSize = ParsingUtils.split(infoField, infoFieldArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR, false);
for (int i = 0; i < infoValueSplitSize; i++) { for (int i = 0; i < infoFieldSplitSize; i++) {
String key; String key;
Object value; Object value;
int eqI = infoValueArray[i].indexOf("="); int eqI = infoFieldArray[i].indexOf("=");
if ( eqI != -1 ) { if ( eqI != -1 ) {
key = infoValueArray[i].substring(0, eqI); key = infoFieldArray[i].substring(0, eqI);
String str = infoValueArray[i].substring(eqI+1, infoValueArray[i].length()); String str = infoFieldArray[i].substring(eqI+1);
// lets see if the string contains a , separator // split on the INFO field separator
if ( str.contains(",") ) int infoValueSplitSize = ParsingUtils.split(str, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false);
value = Arrays.asList(str.split(",")); if ( infoValueSplitSize == 1 ) {
else value = infoValueArray[0];
value = str; } else {
ArrayList<String> valueList = new ArrayList<String>(infoValueSplitSize);
for ( int j = 0; j < infoValueSplitSize; j++ )
valueList.add(infoValueArray[j]);
value = valueList;
}
} else { } else {
key = infoValueArray[i]; key = infoFieldArray[i];
value = true; value = true;
} }

View File

@ -71,6 +71,7 @@ public final class VCFConstants {
public static final char FIELD_SEPARATOR_CHAR = '\t'; public static final char FIELD_SEPARATOR_CHAR = '\t';
public static final String FILTER_CODE_SEPARATOR = ";"; public static final String FILTER_CODE_SEPARATOR = ";";
public static final String INFO_FIELD_ARRAY_SEPARATOR = ","; public static final String INFO_FIELD_ARRAY_SEPARATOR = ",";
public static final char INFO_FIELD_ARRAY_SEPARATOR_CHAR = ',';
public static final String ID_FIELD_SEPARATOR = ";"; public static final String ID_FIELD_SEPARATOR = ";";
public static final String INFO_FIELD_SEPARATOR = ";"; public static final String INFO_FIELD_SEPARATOR = ";";
public static final char INFO_FIELD_SEPARATOR_CHAR = ';'; public static final char INFO_FIELD_SEPARATOR_CHAR = ';';

View File

@ -888,9 +888,20 @@ public class ReadUtils {
if (endsWithinCigar) if (endsWithinCigar)
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION; fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
// if we end outside the current cigar element, we need to check if the next element is a deletion. // if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
else { else {
nextCigarElement = cigarElementIterator.next(); nextCigarElement = cigarElementIterator.next();
// if it's an insertion, we need to clip the whole insertion before looking at the next element
if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
readBases += nextCigarElement.getLength();
if (!cigarElementIterator.hasNext())
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
nextCigarElement = cigarElementIterator.next();
}
// if it's a deletion, we will pass the information on to be handled downstream.
fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION; fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
} }

View File

@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeTest; import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test; import org.testng.annotations.Test;

View File

@ -25,16 +25,14 @@
package org.broadinstitute.sting.utils.clipreads; package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.*; import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
* User: roger * User: roger