switching the concordance walker over to the new Variation system
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1735 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bce2f0d7cf
commit
39598f1f0a
|
|
@ -13,11 +13,11 @@ import java.util.*;
|
||||||
* CallsetConcordanceWalker finds the concordance between multiple callsets (different tests are available).
|
* CallsetConcordanceWalker finds the concordance between multiple callsets (different tests are available).
|
||||||
*/
|
*/
|
||||||
@Requires(value={DataSource.REFERENCE},
|
@Requires(value={DataSource.REFERENCE},
|
||||||
referenceMetaData={@RMD(name="callset1",type=AllelicVariant.class),
|
referenceMetaData={@RMD(name="callset1",type=VariationRod.class),
|
||||||
@RMD(name="callset2",type=AllelicVariant.class)})
|
@RMD(name="callset2",type=VariationRod.class)})
|
||||||
@Reference(window=@Window(start=-20,stop=20))
|
@Reference(window=@Window(start=-20,stop=20))
|
||||||
public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
|
public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
|
||||||
@Argument(fullName="concordance_output_path", shortName="O", doc="File path to which split sets should be written", required=false)
|
@Argument(fullName="concordance_output_path", shortName="O", doc="File path to which split sets should be written", required=true)
|
||||||
private String OUTPUT_PATH = null;
|
private String OUTPUT_PATH = null;
|
||||||
@Argument(fullName="concordanceType", shortName="CT", doc="Concordance subset types to apply to given callsets. Syntax: 'type[:key1=arg1,key2=arg2,...]'", required=false)
|
@Argument(fullName="concordanceType", shortName="CT", doc="Concordance subset types to apply to given callsets. Syntax: 'type[:key1=arg1,key2=arg2,...]'", required=false)
|
||||||
private String[] TYPES = null;
|
private String[] TYPES = null;
|
||||||
|
|
@ -52,10 +52,6 @@ public class CallsetConcordanceWalker extends RefWalker<Integer, Integer> {
|
||||||
out.println("\t" + types.next());
|
out.println("\t" + types.next());
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( OUTPUT_PATH == null )
|
|
||||||
throw new StingException("--concordance_output_path must be specified");
|
|
||||||
|
|
||||||
requestedTypes = new ArrayList<ConcordanceType>();
|
requestedTypes = new ArrayList<ConcordanceType>();
|
||||||
|
|
||||||
// initialize requested concordance types
|
// initialize requested concordance types
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,13 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.io.PrintWriter;
|
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -54,8 +55,8 @@ public class IndelSubsets implements ConcordanceType {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
|
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
|
||||||
AllelicVariant indel1 = (AllelicVariant)tracker.lookup("callset1", null);
|
Variation indel1 = (Variation)tracker.lookup("callset1", null);
|
||||||
AllelicVariant indel2 = (AllelicVariant)tracker.lookup("callset2", null);
|
Variation indel2 = (Variation)tracker.lookup("callset2", null);
|
||||||
|
|
||||||
int set1 = ( indel1 != null && indel1.isIndel() ? 0 : 1 );
|
int set1 = ( indel1 != null && indel1.isIndel() ? 0 : 1 );
|
||||||
int set2 = ( indel2 != null && indel2.isIndel() ? 0 : 1 );
|
int set2 = ( indel2 != null && indel2.isIndel() ? 0 : 1 );
|
||||||
|
|
@ -65,21 +66,21 @@ public class IndelSubsets implements ConcordanceType {
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// only deal with a valid indel
|
// only deal with a valid indel
|
||||||
AllelicVariant indel = ( indel1 != null ? indel1 : indel2 );
|
Variation indel = ( indel1 != null ? indel1 : indel2 );
|
||||||
|
|
||||||
int size = ( indel.length() <= sizeCutoff ? 0 : 1 );
|
int size = ( indel.getAlternateBases().length() <= sizeCutoff ? 0 : 1 );
|
||||||
int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 );
|
int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 );
|
||||||
|
|
||||||
writers[set1][set2][size][homopol].println(indel.toString());
|
writers[set1][set2][size][homopol].println(indel.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
private int homopolymerRunSize(ReferenceContext ref, AllelicVariant indel) {
|
private int homopolymerRunSize(ReferenceContext ref, Variation indel) {
|
||||||
char[] bases = ref.getBases();
|
char[] bases = ref.getBases();
|
||||||
GenomeLoc window = ref.getWindow();
|
GenomeLoc window = ref.getWindow();
|
||||||
GenomeLoc locus = ref.getLocus();
|
GenomeLoc locus = ref.getLocus();
|
||||||
|
|
||||||
int refBasePos = (int)(locus.getStart() - window.getStart());
|
int refBasePos = (int)(locus.getStart() - window.getStart());
|
||||||
char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAltBasesFWD().charAt(0);
|
char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAlternateBases().charAt(0);
|
||||||
int leftRun = 0;
|
int leftRun = 0;
|
||||||
for ( int i = refBasePos; i >= 0; i--) {
|
for ( int i = refBasePos; i >= 0; i--) {
|
||||||
if ( bases[i] != indelBase )
|
if ( bases[i] != indelBase )
|
||||||
|
|
@ -87,9 +88,9 @@ public class IndelSubsets implements ConcordanceType {
|
||||||
leftRun++;
|
leftRun++;
|
||||||
}
|
}
|
||||||
|
|
||||||
indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.length(),bases.length-1)] : indel.getAltBasesFWD().charAt(indel.getAltBasesFWD().length()-1);
|
indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateBases().length(),bases.length-1)] : indel.getAlternateBases().charAt(indel.getAlternateBases().length()-1);
|
||||||
int rightRun = 0;
|
int rightRun = 0;
|
||||||
for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.length() : 1); i < bases.length; i++) {
|
for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateBases().length() : 1); i < bases.length; i++) {
|
||||||
if ( bases[i] != indelBase )
|
if ( bases[i] != indelBase )
|
||||||
break;
|
break;
|
||||||
rightRun++;
|
rightRun++;
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,14 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Split up two call sets into their various concordance sets
|
* Split up two call sets into their various concordance sets
|
||||||
|
|
@ -50,24 +52,26 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
|
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
|
||||||
AllelicVariant call1 = (AllelicVariant)tracker.lookup("callset1", null);
|
Variation call1 = (Variation)tracker.lookup("callset1", null);
|
||||||
AllelicVariant call2 = (AllelicVariant)tracker.lookup("callset2", null);
|
Variation call2 = (Variation)tracker.lookup("callset2", null);
|
||||||
|
|
||||||
// the only reason they would be null is a lack of coverage
|
// the only reason they would be null is a lack of coverage
|
||||||
if ( call1 == null || call2 == null ) {
|
if ( call1 == null || call2 == null ) {
|
||||||
if ( call1 != null && call1.isSNP() && call1.getVariationConfidence() >= LOD )
|
if ( call1 != null && call1.isSNP() && call1.getNegLog10PError() >= LOD )
|
||||||
printVariant(coverageVar1Writer, call1);
|
printVariant(coverageVar1Writer, call1);
|
||||||
else if ( call2 != null && call2.isSNP() && call2.getVariationConfidence() >= LOD )
|
else if ( call2 != null && call2.isSNP() && call2.getNegLog10PError() >= LOD )
|
||||||
printVariant(coverageVar2Writer, call2);
|
printVariant(coverageVar2Writer, call2);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!(call1 instanceof VariantBackedByGenotype) || !(call2 instanceof VariantBackedByGenotype))
|
||||||
|
throw new StingException("Both parents ROD tracks must be backed by genotype data. Ensure that your rod(s) contain genotyping information");
|
||||||
|
|
||||||
double bestVsRef1 = call1.getVariationConfidence();
|
double bestVsRef1 = call1.getNegLog10PError();
|
||||||
double bestVsRef2 = call2.getVariationConfidence();
|
double bestVsRef2 = call2.getNegLog10PError();
|
||||||
//double bestVsNext1 = call1.getConsensusConfidence();
|
//double bestVsNext1 = call1.getConsensusConfidence();
|
||||||
//double bestVsNext2 = call2.getConsensusConfidence();
|
//double bestVsNext2 = call2.getConsensusConfidence();
|
||||||
String genotype1 = call1.getGenotype().get(0);
|
String genotype1 = ((VariantBackedByGenotype)call1).getCalledGenotype().getBases();
|
||||||
String genotype2 = call2.getGenotype().get(0);
|
String genotype2 = ((VariantBackedByGenotype)call2).getCalledGenotype().getBases();
|
||||||
|
|
||||||
// are they both variant SNPs?
|
// are they both variant SNPs?
|
||||||
if ( call1.isSNP() && call2.isSNP() ) {
|
if ( call1.isSNP() && call2.isSNP() ) {
|
||||||
|
|
@ -113,7 +117,7 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
||||||
return altAllele1 == altAllele2;
|
return altAllele1 == altAllele2;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printVariant(PrintWriter writer, AllelicVariant variant) {
|
private static void printVariant(PrintWriter writer, Variation variant) {
|
||||||
writer.println(variant.toString());
|
writer.println(variant.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
package org.broadinstitute.sting.playground.gatk.walkers.concordance;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
|
|
@ -37,8 +38,8 @@ public class SimpleVenn implements ConcordanceType {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
|
public void computeConcordance(RefMetaDataTracker tracker, ReferenceContext ref) {
|
||||||
AllelicVariant call1 = (AllelicVariant)tracker.lookup("callset1", null);
|
Variation call1 = (Variation)tracker.lookup("callset1", null);
|
||||||
AllelicVariant call2 = (AllelicVariant)tracker.lookup("callset2", null);
|
Variation call2 = (Variation)tracker.lookup("callset2", null);
|
||||||
|
|
||||||
if ( call1 == null && call2 == null )
|
if ( call1 == null && call2 == null )
|
||||||
return;
|
return;
|
||||||
|
|
@ -55,7 +56,7 @@ public class SimpleVenn implements ConcordanceType {
|
||||||
printVariant(set2_writer, call2);
|
printVariant(set2_writer, call2);
|
||||||
|
|
||||||
// intersection (concordant)
|
// intersection (concordant)
|
||||||
else if ( call1.getAltBasesFWD().equalsIgnoreCase(call2.getAltBasesFWD()) )
|
else if ( call1.getAlternativeBaseForSNP() == call2.getAlternativeBaseForSNP())
|
||||||
printVariant(intersect_writer, call1);
|
printVariant(intersect_writer, call1);
|
||||||
|
|
||||||
// intersection (discordant)
|
// intersection (discordant)
|
||||||
|
|
@ -63,7 +64,7 @@ public class SimpleVenn implements ConcordanceType {
|
||||||
printVariant(discord_writer, call1);
|
printVariant(discord_writer, call1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printVariant(PrintWriter writer, AllelicVariant variant) {
|
private static void printVariant(PrintWriter writer, Variation variant) {
|
||||||
writer.println(variant.toString());
|
writer.println(variant.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue