Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
15b44ac2c3
|
|
@ -29,9 +29,7 @@ import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMRecordIterator;
|
import net.sf.samtools.SAMRecordIterator;
|
||||||
import net.sf.samtools.util.BlockCompressedInputStream;
|
import net.sf.samtools.util.BlockCompressedInputStream;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.*;
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -102,8 +100,10 @@ public class BAMDiffableReader implements DiffableReader {
|
||||||
final byte[] BAM_MAGIC = "BAM\1".getBytes();
|
final byte[] BAM_MAGIC = "BAM\1".getBytes();
|
||||||
final byte[] buffer = new byte[BAM_MAGIC.length];
|
final byte[] buffer = new byte[BAM_MAGIC.length];
|
||||||
try {
|
try {
|
||||||
FileInputStream fstream = new FileInputStream(file);
|
InputStream fstream = new BufferedInputStream(new FileInputStream(file));
|
||||||
new BlockCompressedInputStream(fstream).read(buffer,0,BAM_MAGIC.length);
|
if ( !BlockCompressedInputStream.isValidFile(fstream) )
|
||||||
|
return false;
|
||||||
|
new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length);
|
||||||
return Arrays.equals(buffer, BAM_MAGIC);
|
return Arrays.equals(buffer, BAM_MAGIC);
|
||||||
} catch ( IOException e ) {
|
} catch ( IOException e ) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
|
import org.apache.poi.hpsf.Variant;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Hidden;
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
|
@ -149,7 +150,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
// get all of the vcf rods at this locus
|
// get all of the vcf rods at this locus
|
||||||
// Need to provide reference bases to simpleMerge starting at current locus
|
// Need to provide reference bases to simpleMerge starting at current locus
|
||||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, null,context.getLocation(), true, false);
|
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, null, context.getLocation(), true, false);
|
||||||
|
|
||||||
if ( sitesOnlyVCF ) {
|
if ( sitesOnlyVCF ) {
|
||||||
vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs);
|
vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs);
|
||||||
|
|
@ -172,17 +173,25 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
||||||
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
|
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
VariantContext mergedVC;
|
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||||
if ( master ) {
|
if ( master ) {
|
||||||
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
|
mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master"));
|
||||||
} else {
|
} else {
|
||||||
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType,
|
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
|
// iterate over the types so that it's deterministic
|
||||||
|
for ( VariantContext.Type type : VariantContext.Type.values() ) {
|
||||||
|
if ( VCsByType.containsKey(type) )
|
||||||
|
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
||||||
|
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||||
|
ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//out.printf(" merged => %s%nannotated => %s%n", mergedVC, annotatedMergedVC);
|
for ( VariantContext mergedVC : mergedVCs ) {
|
||||||
|
// only operate at the start of events
|
||||||
|
if ( mergedVC == null )
|
||||||
|
continue;
|
||||||
|
|
||||||
if ( mergedVC != null ) { // only operate at the start of events
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
|
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
|
||||||
// re-compute chromosome counts
|
// re-compute chromosome counts
|
||||||
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
|
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
|
||||||
|
|
|
||||||
|
|
@ -289,8 +289,8 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a newly allocated VC that is the same as VC, but without genotypes
|
* Returns a newly allocated VC that is the same as VC, but without genotypes
|
||||||
* @param vc
|
* @param vc variant context
|
||||||
* @return
|
* @return new VC without genotypes
|
||||||
*/
|
*/
|
||||||
@Requires("vc != null")
|
@Requires("vc != null")
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
|
|
@ -303,8 +303,8 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a newly allocated list of VC, where each VC is the same as the input VCs, but without genotypes
|
* Returns a newly allocated list of VC, where each VC is the same as the input VCs, but without genotypes
|
||||||
* @param vcs
|
* @param vcs collection of VCs
|
||||||
* @return
|
* @return new VCs without genotypes
|
||||||
*/
|
*/
|
||||||
@Requires("vcs != null")
|
@Requires("vcs != null")
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
|
|
@ -362,9 +362,9 @@ public class VariantContextUtils {
|
||||||
* information per genotype. The master merge will add the PQ information from each genotype record, where
|
* information per genotype. The master merge will add the PQ information from each genotype record, where
|
||||||
* appropriate, to the master VC.
|
* appropriate, to the master VC.
|
||||||
*
|
*
|
||||||
* @param unsortedVCs
|
* @param unsortedVCs collection of VCs
|
||||||
* @param masterName
|
* @param masterName name of master VC
|
||||||
* @return
|
* @return master-merged VC
|
||||||
*/
|
*/
|
||||||
public static VariantContext masterMerge(Collection<VariantContext> unsortedVCs, String masterName) {
|
public static VariantContext masterMerge(Collection<VariantContext> unsortedVCs, String masterName) {
|
||||||
VariantContext master = findMaster(unsortedVCs, masterName);
|
VariantContext master = findMaster(unsortedVCs, masterName);
|
||||||
|
|
@ -435,11 +435,15 @@ public class VariantContextUtils {
|
||||||
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||||
* the sample name
|
* the sample name
|
||||||
*
|
*
|
||||||
* @param unsortedVCs
|
* @param genomeLocParser loc parser
|
||||||
* @param priorityListOfVCs
|
* @param unsortedVCs collection of unsorted VCs
|
||||||
* @param filteredRecordMergeType
|
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||||
* @param genotypeMergeOptions
|
* @param filteredRecordMergeType merge type for filtered records
|
||||||
* @return
|
* @param genotypeMergeOptions merge option for genotypes
|
||||||
|
* @param annotateOrigin should we annotate the set it came from?
|
||||||
|
* @param printMessages should we print messages?
|
||||||
|
* @param inputRefBase the ref base
|
||||||
|
* @return new VariantContext
|
||||||
*/
|
*/
|
||||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||||
|
|
@ -448,6 +452,24 @@ public class VariantContextUtils {
|
||||||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
|
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||||
|
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||||
|
* the sample name
|
||||||
|
*
|
||||||
|
* @param genomeLocParser loc parser
|
||||||
|
* @param unsortedVCs collection of unsorted VCs
|
||||||
|
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||||
|
* @param filteredRecordMergeType merge type for filtered records
|
||||||
|
* @param genotypeMergeOptions merge option for genotypes
|
||||||
|
* @param annotateOrigin should we annotate the set it came from?
|
||||||
|
* @param printMessages should we print messages?
|
||||||
|
* @param inputRefBase the ref base
|
||||||
|
* @param setKey the key name of the set
|
||||||
|
* @param filteredAreUncalled are filtered records uncalled?
|
||||||
|
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||||
|
* @return new VariantContext
|
||||||
|
*/
|
||||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
||||||
|
|
@ -470,7 +492,7 @@ public class VariantContextUtils {
|
||||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||||
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
|
VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false));
|
||||||
}
|
}
|
||||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredareUncalled
|
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// establish the baseline info from the first VC
|
// establish the baseline info from the first VC
|
||||||
|
|
@ -615,6 +637,17 @@ public class VariantContextUtils {
|
||||||
return merged;
|
return merged;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
||||||
|
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
||||||
|
for ( VariantContext vc : VCs ) {
|
||||||
|
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||||
|
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||||
|
mappedVCs.get(vc.getType()).add(vc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return mappedVCs;
|
||||||
|
}
|
||||||
|
|
||||||
private static class AlleleMapper {
|
private static class AlleleMapper {
|
||||||
private VariantContext vc = null;
|
private VariantContext vc = null;
|
||||||
private Map<Allele, Allele> map = null;
|
private Map<Allele, Allele> map = null;
|
||||||
|
|
@ -834,6 +867,7 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a genome location, given a variant context
|
* create a genome location, given a variant context
|
||||||
|
* @param genomeLocParser parser
|
||||||
* @param vc the variant context
|
* @param vc the variant context
|
||||||
* @return the genomeLoc
|
* @return the genomeLoc
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -80,9 +80,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
|
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
|
||||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
|
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
|
||||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5b82f37df1f5ba40f0474d71c94142ec", false); }
|
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); }
|
||||||
|
|
||||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "c58dca482bf97069eac6d9f1a07a2cba", false); }
|
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083", false); }
|
||||||
|
|
||||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
|
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
|
||||||
|
|
||||||
|
|
@ -100,7 +100,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("8b78339ccf7a5a5a837f79e88a3a38e5"));
|
Arrays.asList("1de95f91ca15d2a8856de35dee0ce33e"));
|
||||||
executeTest("threeWayWithRefs", spec);
|
executeTest("threeWayWithRefs", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue