Bug fix to GenotypeConcordance. AC metrics get instantiated based on number of eval samples; if Comp has more samples, we can see AC indeces outside the bounds of the array.
Bug fix to LiftoverVariants - no barfing at reference sites. AlleleFrequencyComparison - local changes added to make sure parsing works properly Added HammingDistance annotation. Mostly useless. But only mostly. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4622 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3d27defe93
commit
42e9987e69
|
|
@ -261,13 +261,16 @@ public class GenotypeConcordance extends VariantEvaluator implements StandardEva
|
|||
qualityScoreHistograms = new QualityScoreHistograms();
|
||||
}
|
||||
|
||||
if ( alleleCountStats == null && eval != null && validation != null ) {
|
||||
alleleCountStats = new ACStats(eval,validation,Genotype.Type.values().length);
|
||||
alleleCountSummary = new ACSummaryStats(eval, validation);
|
||||
}
|
||||
|
||||
if (sampleStats == null) {
|
||||
if (eval != null) {
|
||||
// initialize the concordance table
|
||||
sampleStats = new SampleStats(eval,Genotype.Type.values().length);
|
||||
alleleCountStats = new ACStats(eval,Genotype.Type.values().length, new CompACNames(veWalker.getLogger()));
|
||||
sampleSummaryStats = new SampleSummaryStats(eval);
|
||||
alleleCountSummary = new ACSummaryStats(eval, new CompACNames(veWalker.getLogger()));
|
||||
for (final VariantContext vc : missedValidationData) {
|
||||
determineStats(null, vc);
|
||||
}
|
||||
|
|
@ -319,10 +322,8 @@ public class GenotypeConcordance extends VariantEvaluator implements StandardEva
|
|||
}
|
||||
|
||||
sampleStats.incrValue(sample, truth, called);
|
||||
if ( evalAC != null ) {
|
||||
if ( evalAC != null && validationAC != null) {
|
||||
alleleCountStats.incrValue(evalAC,truth,called);
|
||||
}
|
||||
if ( validationAC != null ) {
|
||||
alleleCountStats.incrValue(validationAC,truth,called);
|
||||
}
|
||||
}
|
||||
|
|
@ -510,24 +511,21 @@ class SampleStats implements TableType {
|
|||
* Sample stats, but for AC
|
||||
*/
|
||||
class ACStats extends SampleStats {
|
||||
private final CompACNames myComp;
|
||||
private String[] rowKeys;
|
||||
|
||||
public ACStats(VariantContext vc, int nGenotypeTypes, CompACNames comp) {
|
||||
public ACStats(VariantContext evalvc, VariantContext compvc, int nGenotypeTypes) {
|
||||
super(nGenotypeTypes);
|
||||
rowKeys = new String[2+4*vc.getGenotypes().size()];
|
||||
for ( int i = 0; i <= 2*vc.getGenotypes().size(); i++ ) { // todo -- assuming ploidy 2 here...
|
||||
rowKeys = new String[1+2*evalvc.getGenotypes().size()+1+2*compvc.getGenotypes().size()];
|
||||
for ( int i = 0; i <= 2*evalvc.getGenotypes().size(); i++ ) { // todo -- assuming ploidy 2 here...
|
||||
concordanceStats.put(String.format("evalAC%d",i),new long[nGenotypeTypes][nGenotypeTypes]);
|
||||
rowKeys[i] = String.format("evalAC%d",i);
|
||||
|
||||
}
|
||||
|
||||
for ( int i = 0; i <= 2*vc.getGenotypes().size(); i++ ) {
|
||||
for ( int i = 0; i <= 2*compvc.getGenotypes().size(); i++ ) {
|
||||
concordanceStats.put(String.format("compAC%d",i), new long[nGenotypeTypes][nGenotypeTypes]);
|
||||
rowKeys[1+2*vc.getGenotypes().size()+i] = String.format("compAC%d",i);
|
||||
rowKeys[1+2*evalvc.getGenotypes().size()+i] = String.format("compAC%d",i);
|
||||
}
|
||||
|
||||
myComp = comp;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
|
@ -717,23 +715,21 @@ class SampleSummaryStats implements TableType {
|
|||
* SampleSummaryStats .. but for allele counts
|
||||
*/
|
||||
class ACSummaryStats extends SampleSummaryStats {
|
||||
final private CompACNames myComp;
|
||||
private String[] rowKeys;
|
||||
|
||||
public ACSummaryStats (final VariantContext vc, CompACNames comp) {
|
||||
public ACSummaryStats (final VariantContext evalvc, final VariantContext compvc) {
|
||||
concordanceSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]);
|
||||
rowKeys = new String[3+4*vc.getGenotypes().size()];
|
||||
rowKeys = new String[3+2*evalvc.getGenotypes().size() + 2*compvc.getGenotypes().size()];
|
||||
rowKeys[0] = ALL_SAMPLES_KEY;
|
||||
for( int i = 0; i <= 2*vc.getGenotypes().size() ; i ++ ) {
|
||||
for( int i = 0; i <= 2*evalvc.getGenotypes().size() ; i ++ ) {
|
||||
concordanceSummary.put(String.format("evalAC%d",i), new double[COLUMN_KEYS.length]);
|
||||
rowKeys[i+1] = String.format("evalAC%d",i);
|
||||
}
|
||||
for( int i = 0; i <= 2*vc.getGenotypes().size() ; i ++ ) {
|
||||
for( int i = 0; i <= 2*compvc.getGenotypes().size() ; i ++ ) {
|
||||
concordanceSummary.put(String.format("compAC%d",i), new double[COLUMN_KEYS.length]);
|
||||
rowKeys[2+2*vc.getGenotypes().size()+i] = String.format("compAC%d",i);
|
||||
rowKeys[2+2*evalvc.getGenotypes().size()+i] = String.format("compAC%d",i);
|
||||
}
|
||||
|
||||
myComp = comp;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length));
|
||||
VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false);
|
||||
|
||||
if ( VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
||||
if ( originalVC.isVariant() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
||||
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
|
||||
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
|
||||
originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0)));
|
||||
|
|
|
|||
|
|
@ -0,0 +1,111 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Oct 20, 2010
|
||||
* Time: 3:08:06 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class HammingDistance implements ExperimentalAnnotation, InfoFieldAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( tracker == null ) {
|
||||
return null;
|
||||
}
|
||||
VariantContext hamCon = tracker.getVariantContext(ref,"hamming",null,ref.getLocus(),true);
|
||||
if ( hamCon == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Set<String> interSamples = new HashSet<String>(vc.getSampleNames());
|
||||
interSamples.retainAll(hamCon.getSampleNames());
|
||||
|
||||
int dist = 0;
|
||||
int nrd_num = 0;
|
||||
int hamCalls = 0;
|
||||
int vcCallsAtHamCalls = 0;
|
||||
int num_variant = 0;
|
||||
|
||||
for ( String s : interSamples ) {
|
||||
dist += dist(vc.getGenotype(s),hamCon.getGenotype(s),true);
|
||||
nrd_num += dist(vc.getGenotype(s),hamCon.getGenotype(s),false);
|
||||
if ( vc.getGenotype(s).isHet() || vc.getGenotype(s).isHomVar() || hamCon.getGenotype(s).isHet() || hamCon.getGenotype(s).isHomVar() ) {
|
||||
num_variant ++;
|
||||
}
|
||||
if ( hamCon.getGenotype(s).isCalled() ) {
|
||||
hamCalls++;
|
||||
if ( vc.getGenotype(s).isCalled() ) {
|
||||
vcCallsAtHamCalls++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
HashMap<String,Object> map = new HashMap<String,Object>(1);
|
||||
map.put("HMD",dist);
|
||||
map.put("HCR",(0.0+vcCallsAtHamCalls)/(0.0+hamCalls));
|
||||
map.put("NRD",(0.0+nrd_num)/(0.0+num_variant));
|
||||
map.put("OGC",(0.0+nrd_num)/(0.0+interSamples.size()));
|
||||
return map;
|
||||
|
||||
}
|
||||
|
||||
public int dist(Genotype a, Genotype b, boolean weightByAC) {
|
||||
if ( a.isNoCall() || b.isNoCall() ) {
|
||||
return 0;
|
||||
}
|
||||
if ( weightByAC ) {
|
||||
if ( a.isHomRef() ) {
|
||||
if ( b.isHomVar() ) {
|
||||
return 2;
|
||||
} else if ( b.isHet() ) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
} else if ( a.isHet() ) {
|
||||
if ( b.isHom() ) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
if ( b.isHomRef() ) {
|
||||
return 2;
|
||||
} else if ( b.isHet() ) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ( ! a.equals(b) ) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("HMD","HCR","NRD","OGC"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HMD",1, VCFHeaderLineType.Integer,"The hamming distance between record in Hamming ROD and this record"),
|
||||
new VCFInfoHeaderLine("HCR",1,VCFHeaderLineType.Float,"The differential call rate between record in Hamming ROD and this record"),
|
||||
new VCFInfoHeaderLine("NRD",1,VCFHeaderLineType.Float,"The Non-reference discrepancy between Hamming ROD and this record"),
|
||||
new VCFInfoHeaderLine("OGC",1,VCFHeaderLineType.Float,"The Overall Genotype Concordance between Hamming ROD and this one")); }
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@ import org.broadinstitute.sting.utils.report.tags.DataPoint;
|
|||
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -43,27 +44,38 @@ public class AlleleFrequencyComparison extends VariantEvaluator {
|
|||
if ( ! (isValidVC(eval) && isValidVC(comp)) ) {
|
||||
return null;
|
||||
} else {
|
||||
if ( missingField(eval) ) {
|
||||
// todo -- this is a godawful hack. The "right way" isn't working, so do it the unsafe way for now. Note that
|
||||
// todo -- this precludes getting the AC/AF values from the info field because some may not be there...
|
||||
/*if ( missingField(eval) ) {
|
||||
recalculateCounts(eval);
|
||||
}
|
||||
if ( missingField(comp) ) {
|
||||
recalculateCounts(comp);
|
||||
}
|
||||
afTable.update(getAF(eval),getAF(comp));
|
||||
acTable.update(getAC(eval),getAC(comp));
|
||||
}*/
|
||||
HashMap<String,Object> evalCounts = new HashMap<String,Object>(2);
|
||||
HashMap<String,Object> compCounts = new HashMap<String,Object>(2);
|
||||
|
||||
VariantContextUtils.calculateChromosomeCounts(eval,evalCounts,false);
|
||||
VariantContextUtils.calculateChromosomeCounts(comp,compCounts,false);
|
||||
afTable.update(((List<Double>)evalCounts.get("AF")).get(0),((List<Double>)compCounts.get("AF")).get(0));
|
||||
acTable.update(((List<Integer>)evalCounts.get("AC")).get(0),((List<Integer>)compCounts.get("AC")).get(0));
|
||||
}
|
||||
|
||||
return null; // there is nothing interesting
|
||||
}
|
||||
|
||||
private static boolean missingField(final VariantContext vc) {
|
||||
return ! ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY));
|
||||
return ! ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) );
|
||||
}
|
||||
|
||||
private static void recalculateCounts(VariantContext vc) {
|
||||
Map<String,Object> attributes = vc.getAttributes();
|
||||
private void recalculateCounts(VariantContext vc) {
|
||||
Map<String,Object> attributes = new HashMap<String,Object>();
|
||||
VariantContextUtils.calculateChromosomeCounts(vc,attributes,false);
|
||||
VariantContext.modifyAttributes(vc,attributes);
|
||||
vc = VariantContext.modifyAttributes(vc,attributes);
|
||||
getVEWalker().getLogger().debug(String.format("%s %s | %s %s",attributes.get("AC"),attributes.get("AF"),vc.getAttribute("AC"),vc.getAttribute("AF")));
|
||||
if ( attributes.size() == 2 && missingField(vc) ) {
|
||||
throw new org.broadinstitute.sting.utils.exceptions.StingException("VariantContext should have had attributes modified but did not");
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isValidVC(final VariantContext vc) {
|
||||
|
|
@ -72,7 +84,11 @@ public class AlleleFrequencyComparison extends VariantEvaluator {
|
|||
|
||||
private static double getAF(VariantContext vc) {
|
||||
Object af = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY);
|
||||
if ( List.class.isAssignableFrom(af.getClass())) {
|
||||
if ( af == null ) {
|
||||
//throw new UserException("Variant context "+vc.getName()+" does not have allele frequency entry which is required for this walker");
|
||||
// still none after being re-computed; this is 0.00
|
||||
return 0.00;
|
||||
} else if ( List.class.isAssignableFrom(af.getClass())) {
|
||||
return ( (List<Double>) af ).get(0);
|
||||
} else if ( String.class.isAssignableFrom(af.getClass())) {
|
||||
// two possibilities
|
||||
|
|
@ -84,7 +100,7 @@ public class AlleleFrequencyComparison extends VariantEvaluator {
|
|||
return Double.parseDouble(s);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
throw new UserException("Allele frequency field may be improperly formatted, found AF="+s);
|
||||
throw new UserException("Allele frequency field may be improperly formatted, found AF="+s,e);
|
||||
}
|
||||
} else if ( Double.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY).getClass())) {
|
||||
return (Double) af;
|
||||
|
|
@ -94,8 +110,11 @@ public class AlleleFrequencyComparison extends VariantEvaluator {
|
|||
}
|
||||
|
||||
private static int getAC(VariantContext vc) {
|
||||
Object ac = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY);
|
||||
if ( List.class.isAssignableFrom(ac.getClass())) {
|
||||
Object ac = vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY);
|
||||
if ( ac == null ) {
|
||||
// still none after being re computed; this is 0
|
||||
return 0;
|
||||
} else if ( List.class.isAssignableFrom(ac.getClass())) {
|
||||
return ( (List<Integer>) ac ).get(0);
|
||||
} else if ( String.class.isAssignableFrom(ac.getClass())) {
|
||||
// two possibilities
|
||||
|
|
@ -107,9 +126,9 @@ public class AlleleFrequencyComparison extends VariantEvaluator {
|
|||
return Integer.parseInt(s);
|
||||
}
|
||||
} catch (NumberFormatException e) {
|
||||
throw new UserException("Allele count field may be improperly formatted, found AC="+s);
|
||||
throw new UserException(String.format("Allele count field may be improperly formatted, found AC=%s for record %s:%d",ac,vc.getChr(),vc.getStart()),e);
|
||||
}
|
||||
} else if ( Double.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY).getClass())) {
|
||||
} else if ( Integer.class.isAssignableFrom(ac.getClass())) {
|
||||
return (Integer) ac;
|
||||
} else {
|
||||
throw new UserException(String.format("Class of Allele Frequency does not appear to be formated, had AF=%s, of class %s",ac.toString(),ac.getClass()));
|
||||
|
|
|
|||
|
|
@ -3,10 +3,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.vcftools;
|
|||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFWriter;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
|
|
@ -21,6 +18,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -46,7 +44,8 @@ public class FixRefBases extends RodWalker<Integer,Integer> {
|
|||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
||||
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
|
||||
headerLines.add(new VCFHeaderLine("source", "FixRefBases"));
|
||||
headerLines.add(new VCFInfoHeaderLine("FRI",1,VCFHeaderLineType.String,"The Fix-Ref-Info (if present, either \"Flipped\" or \"Fixed\")"));
|
||||
out.writeHeader(new VCFHeader(headerLines,vcfSamples));
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +55,13 @@ public class FixRefBases extends RodWalker<Integer,Integer> {
|
|||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker != null && tracker.hasROD("variant") ) {
|
||||
VariantContext vc = tracker.getVariantContext(ref,"variant",null,context.getLocation(),true);
|
||||
VariantContext vc = null;
|
||||
try {
|
||||
vc = tracker.getVariantContext(ref,"variant",null,context.getLocation(),true);
|
||||
} catch ( ReviewedStingException e ) {
|
||||
logger.warn("Multiple variants found, catching exception ",e);
|
||||
return 0;
|
||||
}
|
||||
VariantContext newContext = null;
|
||||
if ( vc.isSNP() && ref.getBase() != vc.getReference().getBases()[0] && vc.getReference().length() == 1 ) {
|
||||
if ( basesAreFlipped(vc,ref) ) {
|
||||
|
|
@ -66,13 +71,19 @@ public class FixRefBases extends RodWalker<Integer,Integer> {
|
|||
HashSet<Allele> newAlleles = new HashSet<Allele>(vc.getAlternateAlleles());
|
||||
Allele refAllele = Allele.create(ref.getBase(),true);
|
||||
newAlleles.add(refAllele);
|
||||
HashMap<String,Object> newAttributes = new HashMap<String,Object>(vc.getAttributes());
|
||||
newAttributes.put("FRI",String.format("Fixed%s-%s",vc.getReference().toString(),refAllele));
|
||||
newContext = new VariantContext("FixRefBasesVC", ref.getLocus().getContig(),
|
||||
ref.getLocus().getStart(), ref.getLocus().getStop(), newAlleles, fixGenotypes(vc.getGenotypes(),refAllele),
|
||||
vc.hasNegLog10PError() ? 10.0*vc.getNegLog10PError() : VCFConstants.MISSING_QUALITY_v3_DOUBLE,
|
||||
vc.isFiltered() ? null : vc.getFilters(), vc.getAttributes());
|
||||
vc.isFiltered() ? vc.getFilters() : null, newAttributes);
|
||||
}
|
||||
|
||||
if ( ! newContext.hasAttribute("FRI") ) {
|
||||
throw new StingException("FRI for fixed base not propagated. vc="+vc.toString());
|
||||
}
|
||||
out.add(newContext,ref.getBase());
|
||||
return 1;
|
||||
|
||||
} else {
|
||||
out.add(vc,ref.getBase());
|
||||
|
|
@ -100,9 +111,12 @@ public class FixRefBases extends RodWalker<Integer,Integer> {
|
|||
}
|
||||
|
||||
private VariantContext flipBases(VariantContext vc, ReferenceContext ref) {
|
||||
logger.info(String.format("Flipping bases at variant position %s:%d",vc.getChr(),vc.getStart()));
|
||||
HashSet<Allele> newAlleles = new HashSet<Allele>(vc.getAlleles().size());
|
||||
newAlleles.add(Allele.create(ref.getBase(),true));
|
||||
newAlleles.add(Allele.create(vc.getReference().getBases()[0],false));
|
||||
Map<String,Object> attribs = new HashMap<String,Object>(vc.getAttributes());
|
||||
attribs.put("FRI",String.format("Flipped%s-%s",vc.getReference().toString(),Allele.create(ref.getBase(),true).toString()));
|
||||
for ( Allele a : vc.getAlternateAlleles() ) {
|
||||
if ( a.getBases()[0] != ref.getBase() ) {
|
||||
newAlleles.add(a);
|
||||
|
|
@ -112,9 +126,8 @@ public class FixRefBases extends RodWalker<Integer,Integer> {
|
|||
VariantContext newVC = new VariantContext("FixRefBasesVC", ref.getLocus().getContig(),
|
||||
ref.getLocus().getStart(), ref.getLocus().getStop(), newAlleles, flipGenotypes(vc.getGenotypes(),newAlleles),
|
||||
vc.hasNegLog10PError() ? 10.0*vc.getNegLog10PError() : VCFConstants.MISSING_QUALITY_v3_DOUBLE,
|
||||
vc.isFiltered() ? null : vc.getFilters(), vc.getAttributes());
|
||||
vc.isFiltered() ? vc.getFilters() : null, attribs);
|
||||
|
||||
Map<String,Object> attribs = new HashMap<String,Object>(newVC.getAttributes());
|
||||
VariantContextUtils.calculateChromosomeCounts(newVC,attribs,false);
|
||||
VariantContext.modifyAttributes(newVC,attribs);
|
||||
return newVC;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class
|
|||
String extraArgs = "-L 1:1-10,000,000";
|
||||
for (String tests : testsEnumerations) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s",
|
||||
1, Arrays.asList("158ac8e6d32eb2ea1bbeebfa512965de"));
|
||||
1, Arrays.asList("8a0203f0533b628ad7f1f230a43f105f"));
|
||||
executeTestParallel("testSelect1", spec);
|
||||
//executeTest("testSelect1", spec);
|
||||
}
|
||||
|
|
@ -39,7 +39,7 @@ public class
|
|||
public void testSelect2() {
|
||||
String extraArgs = "-L 1:1-10,000,000";
|
||||
WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s",
|
||||
1, Arrays.asList("cee96f61ffa1d042fe0c63550c508ec9"));
|
||||
1, Arrays.asList("a55653980b750fdc8396eecb00e3b18c"));
|
||||
executeTestParallel("testSelect2", spec);
|
||||
//executeTest("testSelect2", spec);
|
||||
}
|
||||
|
|
@ -60,8 +60,8 @@ public class
|
|||
@Test
|
||||
public void testVESimple() {
|
||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
expectations.put("-L 1:1-10,000,000", "ff8c4ba16c7c14b4edbaf440f20641f9");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0 -E MendelianViolationEvaluator", "9b509ce5d31658eb09bb9597799b2908");
|
||||
expectations.put("-L 1:1-10,000,000", "6c9a12fdd62672b69b2ed4f0bc7e8f97");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0 -E MendelianViolationEvaluator", "6a52a6454121974b71d9cfe2dad68c28");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs = entry.getKey();
|
||||
|
|
@ -84,10 +84,10 @@ public class
|
|||
" -B:comp_hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
||||
|
||||
|
||||
String matchingMD5 = "1e6d6e152c9a90513dd5b6eaa3729388";
|
||||
String matchingMD5 = "7a8183402fe29f9c62c3c3cc4d45b46e";
|
||||
expectations.put("", matchingMD5);
|
||||
expectations.put(" -known comp_hapmap -known dbsnp", matchingMD5);
|
||||
expectations.put(" -known comp_hapmap", "d28dd504017f39a91cde8e6f096879d6");
|
||||
expectations.put(" -known comp_hapmap", "519b590635b7de035f8d5971115b47ba");
|
||||
for (String tests : testsEnumerations) {
|
||||
for (Map.Entry<String, String> entry : expectations.entrySet()) {
|
||||
String extraArgs2 = entry.getKey();
|
||||
|
|
@ -124,7 +124,7 @@ public class
|
|||
for (String tests : testsEnumerations) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s -NO_HEADER",
|
||||
2,
|
||||
Arrays.asList("6b97a019402b3984fead9a4e8b7c7c2a", "989bc30dea6c8a4cf771cd1b9fdab488"));
|
||||
Arrays.asList("50321436a65ef7d574286cb0a1c55f7e", "989bc30dea6c8a4cf771cd1b9fdab488"));
|
||||
executeTestParallel("testVEWriteVCF", spec);
|
||||
//executeTest("testVEWriteVCF", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,17 +24,21 @@ class omni_qc extends QScript {
|
|||
var august_calls_EUR = new TaggedFile("/humgen/1kg/processing/release/august/EUR.vcf","vcf")
|
||||
var august_calls_ASN = new TaggedFile("/humgen/1kg/processing/release/august/ASN.vcf","vcf")
|
||||
var august_calls_AFR = new TaggedFile("/humgen/1kg/processing/release/august/AFR.vcf","vcf")
|
||||
var august_calls_EUR_refined = new TaggedFile("/humgen/1kg/processing/release/august/EUR.beagle.vcf.gz","vcf")
|
||||
var august_calls_ASN_refined = new TaggedFile("/humgen/1kg/processing/release/august/ASN.beagle.vcf.gz","vcf")
|
||||
var august_calls_AFR_refined = new TaggedFile("/humgen/1kg/processing/release/august/AFR.beagle.vcf.gz","vcf")
|
||||
var august_calls_EUR_refined = new TaggedFile("/humgen/1kg/processing/release/august/bgzip_for_release/EUR.beagle.vcf.gz","vcf")
|
||||
var august_calls_ASN_refined = new TaggedFile("/humgen/1kg/processing/release/august/bgzip_for_release/ASN.beagle.vcf.gz","vcf")
|
||||
var august_calls_AFR_refined = new TaggedFile("/humgen/1kg/processing/release/august/bgzip_for_release/AFR.beagle.vcf.gz","vcf")
|
||||
var hiseq_calls_vcf = new TaggedFile("/humgen/gsa-scr1/chartl/projects/omni/resources/NA12878.HiSeq.v9.b36.vcf.gz","vcf")
|
||||
var pilot1_with_na12878_vcf = new TaggedFile("/humgen/1kg/analysis/bamsForDataProcessingPapers/lowpass_b36/calls/v2/N60/lowpass.N60.recal.mG6.retranche.vcf","vcf")
|
||||
var pilot1_na12878_beagle = new File("/humgen/1kg/analysis/bamsForDataProcessingPapers/lowpass_b36/calls/beagle/lowpass.N60.recal.CEUTSI.bgl.output.vcf")
|
||||
//var august_calls_other_genotypes = _
|
||||
|
||||
// OMNI VCF FILES
|
||||
var OMNI_b36_vcf = new TaggedFile("/humgen/illumina/1kg_seq_vcfs/Illumina_HapMap_Omni_2.5_764samples.vcf","vcf")
|
||||
var OMNI_b37_vcf = new TaggedFile("/broad/shptmp/chartl/Omni_2.5_764_samples.b37.deduped.vcf","vcf")
|
||||
var OMNI_hapmap_b36_vcf = new TaggedFile("/humgen/gsa-scr1/chartl/projects/omni/resources_oct7/Omni_2_5_pilot.b36.vcf","vcf")
|
||||
var OMNI_b36_panel_vcf = new TaggedFile("/broad/shptmp/chartl/omni/vcfs/Omni_b36_with_panel_sets.vcf","vcf")
|
||||
var OMNI_b37_birdseed = new File("/humgen/gsa-scr1/chartl/projects/omni/resources_oct7/OMNI_birdseed_only.vcf")
|
||||
var OMNI_b37_joint = new File("/humgen/gsa-scr1/chartl/projects/omni/resources_oct7/OMNI_joint_birdseed_lowpass.vcf")
|
||||
|
||||
// INTERVALS
|
||||
var pilot3_interval_list: String = "/humgen/gsa-hpprojects/1kg/1kg_pilot3/documents/CenterSpecificTargetLists/results/p3overlap.targets.b36.interval_list"
|
||||
|
|
@ -52,6 +56,14 @@ class omni_qc extends QScript {
|
|||
val scratch_dir = analysis_dir + "scratch/"
|
||||
val eval_dir = analysis_dir + "eval/"
|
||||
val vcf_dir = analysis_dir + "vcfs/"
|
||||
val p1_ceu_only = scratch_dir+"Pilot1_CEU_only_sites.intervals.list"
|
||||
val p1_chbjpt_only = scratch_dir+"Pilot1_CHBJPT_only_sites.intervals.list"
|
||||
val p1_yri_only = scratch_dir+"Pilot1_YRI_only_sites.intervals.list"
|
||||
|
||||
// OTHER CHIPS
|
||||
|
||||
val OMNI_QUAD_1KG = new File("/humgen/gsa-scr1/chartl/projects/omni/resources_oct7/other_chips/1KG_OMNI.ref_fixed.vcf")
|
||||
val AFFY_6_0 = new File("/humgen/gsa-scr1/chartl/projects/omni/resources_oct7/other_chips/1KG_ARRAY.ref_fixed.vcf")
|
||||
|
||||
trait OmniArgs extends CommandLineGATK {
|
||||
this.jarFile = new File("/humgen/gsa-scr1/chartl/sting/dist/GenomeAnalysisTK.jar")
|
||||
|
|
@ -128,38 +140,42 @@ class omni_qc extends QScript {
|
|||
fix_421.variantVCF = OMNI_hapmap_b36_vcf
|
||||
fix_421.reference_sequence = b36_ref
|
||||
fix_421.out = new File(vcf_dir+swapExt(OMNI_hapmap_b36_vcf.getName,".vcf",".ref_fixed.vcf"))
|
||||
fix_421.bypassException = true
|
||||
fix_764.variantVCF = OMNI_b36_vcf
|
||||
fix_764.reference_sequence = b36_ref
|
||||
fix_764.out = new File(vcf_dir+swapExt(OMNI_b36_vcf.getName,".vcf",".ref_fixed.vcf"))
|
||||
fix_764.bypassException = true
|
||||
fix_764_b37.variantVCF = OMNI_b37_vcf
|
||||
fix_764_b37.reference_sequence = b37_ref
|
||||
fix_764_b37.out = new File(vcf_dir+swapExt(OMNI_b37_vcf.getName,".vcf",".ref_fixed.vcf"))
|
||||
|
||||
fix_764_b37.bypassException = true
|
||||
|
||||
add(fix_421,fix_764,fix_764_b37)
|
||||
|
||||
/** Propagate AC/AN annotations to Omni files via variant annotator **/
|
||||
/** Propagate AC/AN annotations to Omni files via variant annotator **/
|
||||
var annotate_421 = new VariantAnnotator with OmniArgs
|
||||
var annotate_764 = new VariantAnnotator with OmniArgs
|
||||
var annotate_764_b37 = new VariantAnnotator with OmniArgs
|
||||
|
||||
annotate_421.variantVCF = fix_421.out
|
||||
annotate_421.variantVCF = OMNI_hapmap_b36_vcf
|
||||
annotate_421.reference_sequence = b36_ref
|
||||
annotate_421.annotation :+= "ChromosomeCounts"
|
||||
annotate_421.out = new File(vcf_dir+swapExt(fix_421.out.getName,".vcf",".annot.vcf"))
|
||||
annotate_764.variantVCF = fix_764.out
|
||||
annotate_421.out = new File(vcf_dir+swapExt(annotate_421.variantVCF.getName,".vcf",".annot.vcf"))
|
||||
annotate_764.variantVCF = OMNI_b36_vcf
|
||||
annotate_764.reference_sequence = b36_ref
|
||||
annotate_764.annotation :+= "ChromosomeCounts"
|
||||
annotate_764.out = new File(vcf_dir+swapExt(fix_764.out.getName,".vcf",".annot.vcf"))
|
||||
annotate_764_b37.variantVCF = fix_764_b37.out
|
||||
annotate_764.out = new File(vcf_dir+swapExt(annotate_764.variantVCF.getName,".vcf",".annot.vcf"))
|
||||
annotate_764_b37.variantVCF = OMNI_b37_vcf
|
||||
annotate_764_b37.reference_sequence = b37_ref
|
||||
annotate_764_b37.annotation :+= "ChromosomeCounts"
|
||||
annotate_764_b37.out = new File(vcf_dir+swapExt(fix_764_b37.out.getName,".vcf",".annot.vcf"))
|
||||
annotate_764_b37.out = new File(vcf_dir+swapExt(annotate_764_b37.variantVCF.getName,".vcf",".annot.vcf"))
|
||||
|
||||
add(annotate_421,annotate_764,annotate_764_b37)
|
||||
|
||||
/** Eval the omni chip against the various comps **/
|
||||
runEval(annotate_764.out,gunzip_p1_ceu.outFile,"OMNI_764","Pilot1_CEU",pilot1_interval_list, b36_ref)
|
||||
runEval(annotate_421.out,gunzip_p1_ceu.outFile,"OMNI_421","Pilot1_CEU",pilot1_interval_list, b36_ref)
|
||||
runEval(annotate_421.out,gunzip_p1_ceu.outFile,"OMNI_421","Pilot1_CEU",pilot1_interval_list, b36_ref,true)
|
||||
//runEval(OMNI_hapmap_b36_vcf,gunzip_p1_ceu.outFile,"OMNI_421_Unfixed","Pilot1_CEU",pilot1_interval_list,b36_ref)
|
||||
runEval(annotate_764.out,gunzip_p1_chb.outFile,"OMNI_764","Pilot1_CHB",pilot1_interval_list, b36_ref)
|
||||
runEval(annotate_421.out,gunzip_p1_chb.outFile,"OMNI_421","Pilot1_CHB",pilot1_interval_list, b36_ref)
|
||||
runEval(annotate_764.out,gunzip_p1_yri.outFile,"OMNI_764","Pilot1_YRI",pilot1_interval_list, b36_ref)
|
||||
|
|
@ -170,7 +186,13 @@ class omni_qc extends QScript {
|
|||
runEval(annotate_764_b37.out,gunzip_ag_asn.outFile,"OMNI_764","August_ASN",production_interval_list, b37_ref)
|
||||
runEval(annotate_764_b37.out,gunzip_ag_afr.outFile,"OMNI_764","Ausust_AFR",production_interval_list, b37_ref)
|
||||
runEval(annotate_764.out,gunzip_hiseq.outFile,"OMNI_764","HiSeq",hiseq_interval_list, b36_ref)
|
||||
runEval(annotate_764.out,OMNI_hapmap_b36_vcf,"OMNI_764","OMNI_421",pilot1_interval_list,b36_ref)
|
||||
runEval(annotate_764.out,annotate_421.out,"OMNI_764","OMNI_421_FIXED",pilot1_interval_list,b36_ref)
|
||||
runEval(annotate_764.out,OMNI_QUAD_1KG,"OMNI_764","OMNI_QUAD",pilot1_interval_list,b36_ref)
|
||||
runEval(annotate_764.out,AFFY_6_0,"OMNI_764","AFFY_6_0",pilot1_interval_list,b36_ref)
|
||||
runEval(OMNI_b37_birdseed,gunzip_ag_eur.outFile,"OMNI_birdseed","August_EUR",production_interval_list,b37_ref)
|
||||
runEval(OMNI_b37_joint,gunzip_ag_eur.outFile,"OMNI_joint","August_EUR",production_interval_list,b37_ref)
|
||||
runEval(OMNI_QUAD_1KG,gunzip_p1_ceu.outFile,"OMNI_QUAD_1KG","Pilot1_CEU",pilot1_interval_list,b36_ref)
|
||||
runEval(AFFY_6_0,gunzip_p1_ceu.outFile,"AFFY_6_0","Pilot1_CEU",pilot1_interval_list,b36_ref)
|
||||
|
||||
var eval1KG_exclude = new VariantEval with OmniArgs
|
||||
eval1KG_exclude.samples :+= "/broad/shptmp/chartl/omni/scratch/OMNI_764_vs_Pilot3.sample_overlap.exclude.mixups.txt"
|
||||
|
|
@ -187,6 +209,31 @@ class omni_qc extends QScript {
|
|||
|
||||
runAFComparison(annotate_764.out, gunzip_p1_ceu.outFile, gunzip_p1_chb.outFile, gunzip_p1_yri.outFile)
|
||||
|
||||
var subset421: SelectVariants = new SelectVariants with OmniArgs
|
||||
subset421.reference_sequence = b36_ref
|
||||
subset421.sample :+= (new File(scratch_dir+"OMNI_421_vs_Pilot1_CEU.sample_overlap.txt")).getAbsolutePath
|
||||
subset421.variantVCF = annotate_764.out
|
||||
subset421.out = new File(vcf_dir+swapExt(annotate_764.out.getName,".vcf",".subset.pilot1CEU.vcf"))
|
||||
|
||||
add(subset421)// lastly to find things in the three-way pilot venn
|
||||
|
||||
var combine: CombineVariants = new CombineVariants with OmniArgs
|
||||
combine.reference_sequence = b36_ref
|
||||
combine.rodBind :+= new RodBind("CEU","VCF",gunzip_p1_ceu.outFile)
|
||||
combine.rodBind :+= new RodBind("ASN","VCF",gunzip_p1_chb.outFile)
|
||||
combine.rodBind :+= new RodBind("YRI","VCF",gunzip_p1_yri.outFile)
|
||||
combine.genotypeMergeOptions = Some(VariantContextUtils.GenotypeMergeType.UNIQUIFY)
|
||||
combine.priority = "%s,%s,%s".format("CEU","ASN","YRI")
|
||||
combine.out = new File(vcf_dir+"Pilot1_Populations_Combined.vcf")
|
||||
|
||||
add(combine)
|
||||
|
||||
selectSites(OMNI_b36_panel_vcf,p1_ceu_only,"ceu_only_sites")
|
||||
selectSites(OMNI_b36_panel_vcf,p1_chbjpt_only,"chbjpt_only_sites")
|
||||
selectSites(OMNI_b36_panel_vcf,p1_yri_only,"yri_only_sites")
|
||||
|
||||
runBeagleAnalysis(new File(vcf_dir + "Illumina_HapMap_Omni_2.5_764samples.annot.stripped.vcf"))
|
||||
|
||||
}
|
||||
|
||||
def processAuxiliaryChipData(otherChips: File) : List[(String,File)] = {
|
||||
|
|
@ -194,7 +241,7 @@ class omni_qc extends QScript {
|
|||
return Nil
|
||||
}
|
||||
|
||||
def runEval(eval: File, comp: File, eBase: String, cBase: String, intervals: String, reference: File) = {
|
||||
def runEval(eval: File, comp: File, eBase: String, cBase: String, intervals: String, reference: File, interesting: Boolean = false) = {
|
||||
var base = "%s_vs_%s".format(eBase,cBase)
|
||||
var getOverlap = new GetSampleOverlap
|
||||
getOverlap.in_vcfs :+= eval
|
||||
|
|
@ -214,6 +261,11 @@ class omni_qc extends QScript {
|
|||
|
||||
vEval.out = new File(eval_dir+base+".eval.csv")
|
||||
|
||||
if ( interesting ) {
|
||||
vEval.discordantInteresting = true
|
||||
vEval.outputVCF = new File(vcf_dir+"%s_vs_%s.interesting_sites.vcf".format(eBase,cBase))
|
||||
}
|
||||
|
||||
add(vEval)
|
||||
|
||||
}
|
||||
|
|
@ -245,7 +297,7 @@ class omni_qc extends QScript {
|
|||
}
|
||||
|
||||
// step three -- compare the pilot 1 files against all populations and panels
|
||||
|
||||
|
||||
runComps("Pilot1CEU",p1ceu,"CEU",nameToSubset("CEU").out)
|
||||
runComps("Pilot1CEU",p1ceu,"EUR",nameToSubset("EUR").out)
|
||||
runComps("Pilot1CHBJPT",p1asn,"CHBJPT",nameToSubset("CHBJPT").out)
|
||||
|
|
@ -257,6 +309,20 @@ class omni_qc extends QScript {
|
|||
runComps("EUR",nameToSubset("EUR").out,"ASW",nameToSubset("ASW").out)
|
||||
runComps("EUR",nameToSubset("EUR").out,"AMR",nameToSubset("ADM").out)
|
||||
|
||||
|
||||
var panelCombine: CombineVariants = new CombineVariants with OmniArgs
|
||||
panelCombine.reference_sequence = b36_ref
|
||||
panelCombine.priority = ""
|
||||
for ( p <- panels ) {
|
||||
panelCombine.rodBind :+= new RodBind(p,"VCF",nameToSubset(p).out)
|
||||
panelCombine.priority = if ( panelCombine.priority.equals("") ) p else panelCombine.priority + "," + p
|
||||
}
|
||||
panelCombine.out = OMNI_b36_panel_vcf
|
||||
panelCombine.genotypeMergeOptions = Some(VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)
|
||||
panelCombine.variantMergeOptions = Some(VariantContextUtils.VariantMergeType.UNION)
|
||||
panelCombine.setKey = "panel"
|
||||
|
||||
add(panelCombine)
|
||||
return true
|
||||
|
||||
}
|
||||
|
|
@ -282,6 +348,8 @@ class omni_qc extends QScript {
|
|||
eval.rodBind :+= new RodBind("comp%s".format(cBase),"VCF",compVCF)
|
||||
eval.noStandard = true
|
||||
eval.E :+= "AlleleFrequencyComparison"
|
||||
eval.reportType = Some(VE2TemplateType.CSV)
|
||||
eval.out = new File(eval_dir+"%s_vs_%s_allele_frequency.eval".format(eBase,cBase))
|
||||
|
||||
add(eval)
|
||||
|
||||
|
|
@ -293,7 +361,147 @@ class omni_qc extends QScript {
|
|||
combine.genotypeMergeOptions = Some(VariantContextUtils.GenotypeMergeType.UNIQUIFY)
|
||||
combine.priority = "%s,%s".format(eBase,cBase)
|
||||
|
||||
add(combine)
|
||||
//add(combine)
|
||||
|
||||
}
|
||||
|
||||
def selectSites(vcf: File, intervals: String, base: String) {
|
||||
var sv = new SelectVariants with OmniArgs
|
||||
sv.reference_sequence = b36_ref
|
||||
sv.variantVCF = vcf
|
||||
sv.out = swapExt(vcf,".vcf",base+".vcf")
|
||||
sv.intervalsString :+= intervals
|
||||
|
||||
add(sv)
|
||||
}
|
||||
|
||||
def runBeagleAnalysis(omnivcf: File) {
|
||||
var combine : CombineVariants = new CombineVariants with OmniArgs
|
||||
combine.reference_sequence = b36_ref
|
||||
for ( c <- 1 until 23) {
|
||||
combine.rodBind :+= new RodBind("beagle%s".format(c),"VCF",runBeagle(omnivcf,"%s".format(c)))
|
||||
if ( c > 1 ) {
|
||||
combine.priority = combine.priority+",%s%s".format("beagle",c)
|
||||
} else {
|
||||
combine.priority = "%s%s".format("beagle",c)
|
||||
}
|
||||
}
|
||||
combine.genotypeMergeOptions = Some(VariantContextUtils.GenotypeMergeType.PRIORITIZE)
|
||||
combine.variantMergeOptions = Some(VariantContextUtils.VariantMergeType.UNION)
|
||||
|
||||
combine.out = swapExt(pilot1_with_na12878_vcf,".vcf",".beagle_refined_with_omni.vcf")
|
||||
|
||||
add(combine)
|
||||
|
||||
var select : SelectVariants = new SelectVariants with OmniArgs
|
||||
select.reference_sequence = b36_ref
|
||||
select.variantVCF = combine.out
|
||||
select.sample :+= "NA12878"
|
||||
select.out = new File(vcf_dir + "NA12878.lowpass.beagle.refined.with.pilot1.vcf")
|
||||
|
||||
add(select)
|
||||
|
||||
var eval : VariantEval = new VariantEval with OmniArgs
|
||||
eval.reference_sequence = b36_ref
|
||||
eval.rodBind :+= new RodBind("evalNA12878LowPass","VCF",select.out)
|
||||
eval.rodBind :+= new RodBind("compNA12878HiSeq","VCF",hiseq_calls_vcf)
|
||||
eval.E :+= "GenotypeConcordance"
|
||||
eval.out = new File(eval_dir+"NA12878.lowpass.beagle.vs.HiSeq.eval")
|
||||
eval.excludeIntervals :+= new File(pilot1_interval_list)
|
||||
eval.reportType = Some(VE2TemplateType.CSV)
|
||||
|
||||
add(eval)
|
||||
|
||||
var eval2: VariantEval = new VariantEval with OmniArgs
|
||||
eval2.reference_sequence = b36_ref
|
||||
eval2.rodBind :+= new RodBind("evalNA12878Beagle","VCF",pilot1_na12878_beagle)
|
||||
eval2.rodBind :+= new RodBind("compNA12878HiSeq","VCF",hiseq_calls_vcf)
|
||||
eval2.E :+= "GenotypeConcordance"
|
||||
eval2.sample :+= "NA12878"
|
||||
eval2.out = new File(eval_dir+"NA12878.lowpass.nochip.vs.Hiseq.eval")
|
||||
eval2.excludeIntervals :+= new File(pilot1_interval_list)
|
||||
eval2.reportType = Some(VE2TemplateType.CSV)
|
||||
|
||||
add(eval2)
|
||||
|
||||
var eval3: VariantEval = new VariantEval with OmniArgs
|
||||
eval3.reference_sequence = b36_ref
|
||||
eval3.rodBind :+= new RodBind("evalNA12878NoBeagle","VCF",pilot1_with_na12878_vcf)
|
||||
eval3.rodBind :+= new RodBind("compNA12878HiSeq","VCF",hiseq_calls_vcf)
|
||||
eval3.E :+= "GenotypeConcordance"
|
||||
eval3.sample :+= "NA12878"
|
||||
eval3.out = new File(eval_dir+"NA12878.lowpass.nochip.norefined.vs.Hiseq.eval")
|
||||
eval3.excludeIntervals :+= new File(pilot1_interval_list)
|
||||
eval3.reportType = Some(VE2TemplateType.CSV)
|
||||
|
||||
add(eval3)
|
||||
}
|
||||
|
||||
def runBeagle(omnivcf: File, chr: String): File = {
|
||||
var beagleInput = new ProduceBeagleInput with OmniArgs
|
||||
beagleInput.reference_sequence = b36_ref
|
||||
beagleInput.intervalsString :+= chr
|
||||
beagleInput.variantVCF = pilot1_with_na12878_vcf
|
||||
beagleInput.rodBind :+= new RodBind("validation","VCF",omnivcf)
|
||||
beagleInput.validation_genotype_ptrue = Some(0.99)
|
||||
beagleInput.out = new File(scratch_dir+"/"+swapExt(beagleInput.variantVCF.getName,".vcf",".%s.beagle".format(chr)))
|
||||
println (beagleInput.out.getAbsolutePath)
|
||||
|
||||
var runBeagle : BeagleRefinement = new BeagleRefinement
|
||||
runBeagle.beagleInput = beagleInput.out
|
||||
runBeagle.beagleOutputBase = "Pilot1_NA12878_Beagle_with_OMNI_chr%s".format(chr)
|
||||
runBeagle.beagleMemoryGigs = 6
|
||||
runBeagle.memoryLimit = Some(6)
|
||||
runBeagle.beagleOutputDir = ""
|
||||
runBeagle.freezeOutputs
|
||||
|
||||
var gunzipPhased = new GunzipFile
|
||||
gunzipPhased.gunzipMe = runBeagle.beaglePhasedFile
|
||||
gunzipPhased.outFile = new File(scratch_dir+swapExt(runBeagle.beaglePhasedFile.getName,".gz",""))
|
||||
|
||||
var gunzipLike = new GunzipFile
|
||||
gunzipLike.gunzipMe = runBeagle.beagleLikelihoods
|
||||
gunzipLike.outFile = new File(scratch_dir+swapExt(runBeagle.beagleLikelihoods.getName,".gz",""))
|
||||
|
||||
|
||||
var convertBack : BeagleOutputToVCF = new BeagleOutputToVCF with OmniArgs
|
||||
convertBack.reference_sequence = b36_ref
|
||||
convertBack.variantVCF = pilot1_with_na12878_vcf
|
||||
convertBack.intervalsString :+= chr
|
||||
convertBack.rodBind :+= new RodBind("beagleR2","beagle",runBeagle.beagleRSquared)
|
||||
convertBack.rodBind :+= new RodBind("beagleProbs","beagle",gunzipLike.outFile)
|
||||
convertBack.rodBind :+= new RodBind("beaglePhased","beagle",gunzipPhased.outFile)
|
||||
convertBack.out = new File(vcf_dir+swapExt(pilot1_with_na12878_vcf.getName,".vcf",".chr%s.beagle_refined_plus_omni.vcf".format(chr)))
|
||||
|
||||
add(beagleInput,runBeagle,gunzipPhased,gunzipLike,convertBack)
|
||||
|
||||
return convertBack.out
|
||||
}
|
||||
|
||||
class BeagleRefinement extends CommandLineFunction {
|
||||
@Input(doc="The beagle input file") var beagleInput: File = _
|
||||
var beagleOutputBase: String = _
|
||||
var beagleMemoryGigs: Int = 4
|
||||
|
||||
/**
|
||||
* Note: These get set
|
||||
*/
|
||||
@Output(doc="The beagle phased file") var beaglePhasedFile: File = _
|
||||
@Output(doc="The beagle likelihood file") var beagleLikelihoods: File = _
|
||||
@Output(doc="The beagle r2 file") var beagleRSquared: File = _
|
||||
var beagleOutputDir: String = _
|
||||
|
||||
def freezeOutputs = {
|
||||
if ( beagleOutputDir == null && beagleInput.getParent == null ) {
|
||||
beagleOutputDir = ""
|
||||
} else if ( beagleOutputDir == null ) {
|
||||
beagleOutputDir = beagleInput.getParent+"/"
|
||||
}
|
||||
beaglePhasedFile = new File(beagleOutputDir+beagleOutputBase+"."+beagleInput.getName+".phased.gz")
|
||||
beagleLikelihoods = new File(beagleOutputDir+beagleOutputBase+"."+beagleInput.getName+".gprobs.gz")
|
||||
beagleRSquared = new File(beagleOutputDir+beagleOutputBase+"."+beagleInput.getName+".r2")
|
||||
}
|
||||
|
||||
def commandLine = "java -Djava.io.tmpdir=%s -Xmx%dg -jar /humgen/gsa-hpprojects/software/beagle/beagle.jar like=%s out=%s".format(beagleInput.getParent,beagleMemoryGigs,beagleInput.getAbsolutePath,beagleOutputBase)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue