Merge branch 'master' of ssh://gsa2/humgen/gsa-scr1/chartl/dev/git

This commit is contained in:
Christopher Hartl 2011-09-22 11:01:58 -04:00
commit 4f4a0fc38a
5 changed files with 49 additions and 41 deletions

View File

@ -115,15 +115,21 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
} }
arrayIndex++; arrayIndex++;
} }
boolean sawFormatTag = false;
if ( arrayIndex < strings.length ) { if ( arrayIndex < strings.length ) {
if ( !strings[arrayIndex].equals("FORMAT") ) if ( !strings[arrayIndex].equals("FORMAT") )
throw new TribbleException.InvalidHeader("we were expecting column name 'FORMAT' but we saw '" + strings[arrayIndex] + "'"); throw new TribbleException.InvalidHeader("we were expecting column name 'FORMAT' but we saw '" + strings[arrayIndex] + "'");
sawFormatTag = true;
arrayIndex++; arrayIndex++;
} }
while (arrayIndex < strings.length) while ( arrayIndex < strings.length )
auxTags.add(strings[arrayIndex++]); auxTags.add(strings[arrayIndex++]);
if ( sawFormatTag && auxTags.size() == 0 )
throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data");
} else { } else {
if ( str.startsWith("##INFO=") ) { if ( str.startsWith("##INFO=") ) {
VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version); VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version);
@ -200,28 +206,24 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
* @return a VariantContext * @return a VariantContext
*/ */
public Feature decode(String line) { public Feature decode(String line) {
return reallyDecode(line); // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
} if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
private Feature reallyDecode(String line) { // our header cannot be null, we need the genotype sample names and counts
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
// our header cannot be null, we need the genotype sample names and counts if (parts == null)
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record"); parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
if (parts == null) int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true); // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
" tokens, and saw " + nParts + " )", lineNo);
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data) return parseVCFLine(parts);
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
" tokens, and saw " + nParts + " )", lineNo);
return parseVCFLine(parts);
} }
protected void generateException(String message) { protected void generateException(String message) {

View File

@ -35,9 +35,6 @@ public class VCFHeader {
// the header string indicator // the header string indicator
public static final String HEADER_INDICATOR = "#"; public static final String HEADER_INDICATOR = "#";
/** do we have genotying data? */
private boolean hasGenotypingData = false;
// were the input samples sorted originally (or are we sorting them)? // were the input samples sorted originally (or are we sorting them)?
private boolean samplesWereAlreadySorted = true; private boolean samplesWereAlreadySorted = true;
@ -57,17 +54,15 @@ public class VCFHeader {
* create a VCF header, given a list of meta data and auxillary tags * create a VCF header, given a list of meta data and auxillary tags
* *
* @param metaData the meta data associated with this header * @param metaData the meta data associated with this header
* @param genotypeSampleNames the genotype format field, and the sample names * @param genotypeSampleNames the sample names
*/ */
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) { public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
mMetaData = new TreeSet<VCFHeaderLine>(); mMetaData = new TreeSet<VCFHeaderLine>();
if ( metaData != null ) if ( metaData != null )
mMetaData.addAll(metaData); mMetaData.addAll(metaData);
for (String col : genotypeSampleNames) {
if (!col.equals("FORMAT")) mGenotypeSampleNames.addAll(genotypeSampleNames);
mGenotypeSampleNames.add(col);
}
if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
loadVCFVersion(); loadVCFVersion();
loadMetaDataMaps(); loadMetaDataMaps();
@ -157,7 +152,7 @@ public class VCFHeader {
* @return true if we have genotyping columns, false otherwise * @return true if we have genotyping columns, false otherwise
*/ */
public boolean hasGenotypingData() { public boolean hasGenotypingData() {
return hasGenotypingData; return mGenotypeSampleNames.size() > 0;
} }
/** /**
@ -171,7 +166,7 @@ public class VCFHeader {
/** @return the column count */ /** @return the column count */
public int getColumnCount() { public int getColumnCount() {
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0); return HEADER_FIELDS.values().length + (hasGenotypingData() ? mGenotypeSampleNames.size() + 1 : 0);
} }
/** /**

View File

@ -174,6 +174,12 @@ public class UserException extends ReviewedStingException {
} }
} }
public static class MalformedVCFHeader extends UserException {
public MalformedVCFHeader(String message) {
super(String.format("The provided VCF file has a malformed header: %s", message));
}
}
public static class ReadMissingReadGroup extends MalformedBAM { public static class ReadMissingReadGroup extends MalformedBAM {
public ReadMissingReadGroup(SAMRecord read) { public ReadMissingReadGroup(SAMRecord read) {
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName())); super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));

View File

@ -16,7 +16,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant:VCF3 " + testfile), baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
1, 1,
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6") Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
); );
@ -30,7 +30,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s -NO_HEADER -xl_sn A -xl_sf " + samplesFile + " --variant:VCF3 " + testfile, "-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s -NO_HEADER -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
1, 1,
Arrays.asList("730f021fd6ecf1d195dabbee2e233bfd") Arrays.asList("730f021fd6ecf1d195dabbee2e233bfd")
); );
@ -43,7 +43,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String testfile = validationDataLocation + "test.dup.vcf"; String testfile = validationDataLocation + "test.dup.vcf";
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -sn B -sn C --variant:VCF3 " + testfile), baseTestString(" -sn A -sn B -sn C --variant " + testfile),
1, 1,
Arrays.asList("b74038779fe6485dbb8734ae48178356") Arrays.asList("b74038779fe6485dbb8734ae48178356")
); );
@ -56,7 +56,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant:VCF " + b37hapmapGenotypes + " -disc:VCF " + testFile + " -o %s -NO_HEADER", "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s -NO_HEADER",
1, 1,
Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e") Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e")
); );
@ -69,7 +69,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER", "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER",
1, 1,
Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a") Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a")
); );
@ -90,16 +90,16 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testVariantTypeSelection--" + testFile, spec); executeTest("testVariantTypeSelection--" + testFile, spec);
} }
@Test(enabled=false) @Test
public void testRemovePLs() { public void testUsingDbsnpName() {
String testFile = validationDataLocation + "combine.3.vcf"; String testFile = validationDataLocation + "combine.3.vcf";
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s -NO_HEADER", "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s -NO_HEADER",
1, 1,
Arrays.asList("") Arrays.asList("167a1265df820978a74c267df44d5c43")
); );
executeTest("testWithPLs--" + testFile, spec); executeTest("testUsingDbsnpName--" + testFile, spec);
} }
} }

View File

@ -266,13 +266,18 @@ class MethodsDevelopmentCallingPipeline extends QScript {
this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" ) this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" ) this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS") this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
if(t.nSamples >= 10) { if(t.nSamples >= 10) { // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
this.use_annotation ++= List("InbreedingCoeff") this.use_annotation ++= List("InbreedingCoeff")
} }
if(!t.isExome) { if(!t.isExome) {
this.use_annotation ++= List("DP") this.use_annotation ++= List("DP")
} else { } else { // exome specific parameters
this.resource :+= new TaggedFile( badSites_1000G, "bad=true,prior=2.0" )
this.mG = 6 this.mG = 6
if(t.nSamples <= 3) { // very few exome samples means very few variants
this.mG = 4
this.percentBad = 0.04
}
} }
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile } this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile }
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile } this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }