Merge branch 'master' into ped

This commit is contained in:
Mark DePristo 2011-10-05 15:03:09 -07:00
commit 3226d5dc0d
9 changed files with 41 additions and 33 deletions

View File

@ -32,16 +32,11 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;

View File

@ -36,12 +36,8 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@ -51,6 +47,9 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.collections.CircularArray;
import org.broadinstitute.sting.utils.collections.PrimitivePair;
@ -265,7 +264,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// first, the basic info
headerInfo.add(new VCFHeaderLine("source", "IndelGenotyperV2"));
headerInfo.add(new VCFHeaderLine("source", "SomaticIndelDetector"));
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
// FORMAT and INFO fields
@ -283,10 +282,10 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
args.addAll(getToolkit().getFilters());
Map<String,String> commandLineArgs = getToolkit().getApproximateCommandLineArguments(args);
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
headerInfo.add(new VCFHeaderLine(String.format("IGv2_%s", commandLineArg.getKey()), commandLineArg.getValue()));
headerInfo.add(new VCFHeaderLine(String.format("SID_%s", commandLineArg.getKey()), commandLineArg.getValue()));
// also, the list of input bams
for ( String fileName : getToolkit().getArguments().samFiles )
headerInfo.add(new VCFHeaderLine("IGv2_bam_file_used", fileName));
headerInfo.add(new VCFHeaderLine("SID_bam_file_used", fileName));
return headerInfo;
}

View File

@ -170,9 +170,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
/////////////////////////////
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
public static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
public static final String EOF_MARKER = "EOF";
private long numReadsWithMalformedColorSpace = 0;

View File

@ -4,7 +4,6 @@ import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;

View File

@ -36,6 +36,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
// for ParsingUtils.split
protected String[] GTValueArray = new String[100];
protected String[] genotypeKeyArray = new String[100];
protected String[] infoFieldArray = new String[1000];
protected String[] infoValueArray = new String[1000];
// for performance testing purposes
@ -351,23 +352,28 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 )
generateException("The VCF specification does not allow for whitespace in the INFO field");
int infoValueSplitSize = ParsingUtils.split(infoField, infoValueArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR);
for (int i = 0; i < infoValueSplitSize; i++) {
int infoFieldSplitSize = ParsingUtils.split(infoField, infoFieldArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR, false);
for (int i = 0; i < infoFieldSplitSize; i++) {
String key;
Object value;
int eqI = infoValueArray[i].indexOf("=");
int eqI = infoFieldArray[i].indexOf("=");
if ( eqI != -1 ) {
key = infoValueArray[i].substring(0, eqI);
String str = infoValueArray[i].substring(eqI+1, infoValueArray[i].length());
key = infoFieldArray[i].substring(0, eqI);
String str = infoFieldArray[i].substring(eqI+1);
// lets see if the string contains a , separator
if ( str.contains(",") )
value = Arrays.asList(str.split(","));
else
value = str;
// split on the INFO field separator
int infoValueSplitSize = ParsingUtils.split(str, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false);
if ( infoValueSplitSize == 1 ) {
value = infoValueArray[0];
} else {
ArrayList<String> valueList = new ArrayList<String>(infoValueSplitSize);
for ( int j = 0; j < infoValueSplitSize; j++ )
valueList.add(infoValueArray[j]);
value = valueList;
}
} else {
key = infoValueArray[i];
key = infoFieldArray[i];
value = true;
}

View File

@ -71,6 +71,7 @@ public final class VCFConstants {
public static final char FIELD_SEPARATOR_CHAR = '\t';
public static final String FILTER_CODE_SEPARATOR = ";";
public static final String INFO_FIELD_ARRAY_SEPARATOR = ",";
public static final char INFO_FIELD_ARRAY_SEPARATOR_CHAR = ',';
public static final String ID_FIELD_SEPARATOR = ";";
public static final String INFO_FIELD_SEPARATOR = ";";
public static final char INFO_FIELD_SEPARATOR_CHAR = ';';

View File

@ -888,9 +888,20 @@ public class ReadUtils {
if (endsWithinCigar)
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
// if we end outside the current cigar element, we need to check if the next element is a deletion.
// if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
else {
nextCigarElement = cigarElementIterator.next();
// if it's an insertion, we need to clip the whole insertion before looking at the next element
if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
readBases += nextCigarElement.getLength();
if (!cigarElementIterator.hasNext())
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
nextCigarElement = cigarElementIterator.next();
}
// if it's a deletion, we will pass the information on to be handled downstream.
fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
}

View File

@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;

View File

@ -25,16 +25,14 @@
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.*;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: roger