Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Mark DePristo 2011-07-13 22:38:27 -04:00
commit caa3629467
7 changed files with 61 additions and 20 deletions

View File

@ -220,6 +220,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
} }
else { else {
unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly unprocessedList.add(vc); // Finished with the unprocessed variant, and writer can enforce sorting on-the-fly
if (DEBUG)
logger.debug("Unprocessed variant = " + VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc));
} }
int numReads = 0; int numReads = 0;
@ -1105,7 +1108,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
this.alleles = vc.getAlleles(); this.alleles = vc.getAlleles();
this.genotypes = new HashMap<String, Genotype>(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable this.genotypes = new HashMap<String, Genotype>(vc.getGenotypes()); // since vc.getGenotypes() is unmodifiable
this.negLog10PError = vc.getNegLog10PError(); this.negLog10PError = vc.getNegLog10PError();
this.filters = vc.getFilters(); this.filters = vc.filtersWereApplied() ? vc.getFilters() : null;
this.attributes = new HashMap<String, Object>(vc.getAttributes()); this.attributes = new HashMap<String, Object>(vc.getAttributes());
} }

View File

@ -199,8 +199,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
// setup the header fields // setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "VariantsToVCF")); //hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
for ( VCFHeaderLine field : hInfo ) { for ( VCFHeaderLine field : hInfo ) {

View File

@ -444,9 +444,10 @@ public class StandardVCFWriter implements VCFWriter {
break; break;
} }
for (String s : attrs ) { for (int i = 0; i < attrs.size(); i++) {
mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); if ( i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY) )
mWriter.write(s); mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
mWriter.write(attrs.get(i));
} }
} }
} }

View File

@ -20,7 +20,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test @Test
public void testVariantsToVCFUsingGeliInput() { public void testVariantsToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>(); List<String> md5 = new ArrayList<String>();
md5.add("815b82fff92aab41c209eedce2d7e7d9"); md5.add("4accae035d271b35ee2ec58f403c68c6");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference + "-R " + b36KGReference +
@ -38,7 +38,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test @Test
public void testGenotypesToVCFUsingGeliInput() { public void testGenotypesToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>(); List<String> md5 = new ArrayList<String>();
md5.add("22336ee9c12aa222ce29c3c5babca7d0"); md5.add("71e8c98d7c3a73b6287ecc339086fe03");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference + "-R " + b36KGReference +
@ -56,7 +56,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test @Test
public void testGenotypesToVCFUsingHapMapInput() { public void testGenotypesToVCFUsingHapMapInput() {
List<String> md5 = new ArrayList<String>(); List<String> md5 = new ArrayList<String>();
md5.add("9bedaa7670b86a07be5191898c3727cf"); md5.add("f343085305e80c7a2493422e4eaad983");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference + "-R " + b36KGReference +
@ -73,7 +73,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test @Test
public void testGenotypesToVCFUsingVCFInput() { public void testGenotypesToVCFUsingVCFInput() {
List<String> md5 = new ArrayList<String>(); List<String> md5 = new ArrayList<String>();
md5.add("cc215edec9ca28e5c79ab1b67506f9f7"); md5.add("86f02e2e764ba35854cff2aa05a1fdd8");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference + "-R " + b36KGReference +

View File

@ -0,0 +1,28 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.List;
public class VCFIntegrationTest extends WalkerTest {
@Test
public void testReadingAndWritingWitHNoChanges() {
String md5ofInputVCF = "a990ba187a69ca44cb9bc2bb44d00447";
String testVCF = validationDataLocation + "vcf4.1.example.vcf";
String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s ";
String test1 = baseCommand + "-T VariantAnnotator -BTI variant -B:variant,vcf " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF));
List<File> result = executeTest("Test Variant Annotator with no changes", spec1).getFirst();
String test2 = baseCommand + "-T VariantsToVCF -B:variant,vcf " + result.get(0).getAbsolutePath();
WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF));
executeTest("Test Variants To VCF from new output", spec2);
}
}

View File

@ -147,13 +147,22 @@ class DataProcessingPipeline extends QScript {
} }
} }
println("\n\n*** DEBUG ***\n")
// Creating one file for each sample in the dataset // Creating one file for each sample in the dataset
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File] val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
for ((sample, flist) <- sampleTable) { for ((sample, flist) <- sampleTable) {
println(sample + ":")
for (f <- flist)
println (f)
println()
val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".bam") val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".bam")
sampleBamFiles(sample) = sampleFileName sampleBamFiles(sample) = sampleFileName
add(joinBams(flist, sampleFileName)) add(joinBams(flist, sampleFileName))
} }
println("*** DEBUG ***\n\n")
return sampleBamFiles.toMap return sampleBamFiles.toMap
} }
@ -211,8 +220,10 @@ class DataProcessingPipeline extends QScript {
if (in.toString.endsWith("bam")) if (in.toString.endsWith("bam"))
return List(in) return List(in)
var l: List[File] = List() var l: List[File] = List()
for (bam <- fromFile(in).getLines) for (bam <- fromFile(in).getLines) {
l :+= new File(bam) if (!bam.startsWith("#") && !bam.isEmpty)
l :+= new File(bam.trim)
}
return l return l
} }
@ -234,9 +245,6 @@ class DataProcessingPipeline extends QScript {
// Generate a BAM file per sample joining all per lane files if necessary // Generate a BAM file per sample joining all per lane files if necessary
val sampleBamFiles: Map[String, File] = createSampleFiles(bams, realignedBams) val sampleBamFiles: Map[String, File] = createSampleFiles(bams, realignedBams)
println("nContigs: " + nContigs)
// Final output list of processed bam files // Final output list of processed bam files
var cohortList: List[File] = List() var cohortList: List[File] = List()
@ -244,6 +252,7 @@ class DataProcessingPipeline extends QScript {
println("\nFound the following samples: ") println("\nFound the following samples: ")
for ((sample, file) <- sampleBamFiles) for ((sample, file) <- sampleBamFiles)
println("\t" + sample + " -> " + file) println("\t" + sample + " -> " + file)
println("\n")
// If this is a 'knowns only' indel realignment run, do it only once for all samples. // If this is a 'knowns only' indel realignment run, do it only once for all samples.
val globalIntervals = new File(outputDir + projectName + ".intervals") val globalIntervals = new File(outputDir + projectName + ".intervals")

View File

@ -41,11 +41,11 @@ class RecalibrateBaseQualities extends QScript {
nContigs = getNumberOfContigs(input) nContigs = getNumberOfContigs(input)
val recalFile1: File = swapExt(input, ".bam", "recal1.csv") val recalFile1: File = swapExt(input, ".bam", ".recal1.csv")
val recalFile2: File = swapExt(input, ".bam", "recal2.csv") val recalFile2: File = swapExt(input, ".bam", ".recal2.csv")
val recalBam: File = swapExt(input, ".bam", "recal.bam") val recalBam: File = swapExt(input, ".bam", ".recal.bam")
val path1: String = "before" val path1: String = input + "before"
val path2: String = "after" val path2: String = input + "after"
add(cov(input, recalFile1), add(cov(input, recalFile1),
recal(input, recalFile1, recalBam), recal(input, recalFile1, recalBam),