Fixing/cleaning up the vcf merge util
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3047 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cdec84aa8f
commit
c88a2a3027
|
|
@ -90,20 +90,6 @@ public class RodVCF extends BasicReferenceOrderedDatum implements Variation, Ite
|
||||||
return mCurrentRecord.getNonRefAlleleFrequency();
|
return mCurrentRecord.getNonRefAlleleFrequency();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasStrandBias() {
|
|
||||||
assertNotNull();
|
|
||||||
return this.mCurrentRecord.getInfoValues().containsKey(VCFRecord.STRAND_BIAS_KEY);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get the strand bias of this variant
|
|
||||||
*
|
|
||||||
* @return StrandBias with the stored slod
|
|
||||||
*/
|
|
||||||
public double getStrandBias() {
|
|
||||||
return hasStrandBias() ? Double.valueOf(this.mCurrentRecord.getInfoValues().get(VCFRecord.STRAND_BIAS_KEY)) : 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @return the VARIANT_TYPE of the current variant */
|
/** @return the VARIANT_TYPE of the current variant */
|
||||||
public Variation.VARIANT_TYPE getType() {
|
public Variation.VARIANT_TYPE getType() {
|
||||||
assertNotNull();
|
assertNotNull();
|
||||||
|
|
|
||||||
|
|
@ -69,14 +69,10 @@ public class VCFUtils {
|
||||||
VCFParameters params = new VCFParameters();
|
VCFParameters params = new VCFParameters();
|
||||||
params.addFormatItem(VCFGenotypeRecord.GENOTYPE_KEY);
|
params.addFormatItem(VCFGenotypeRecord.GENOTYPE_KEY);
|
||||||
|
|
||||||
// keep track of the locus specific data so we can merge them intelligently
|
// keep track of the data so we can merge them intelligently
|
||||||
int totalReadDepth = 0;
|
|
||||||
double maxConfidence = 0.0;
|
double maxConfidence = 0.0;
|
||||||
double totalSLOD = 0.0;
|
|
||||||
int SLODsSeen = 0;
|
|
||||||
double totalFreq = 0.0;
|
|
||||||
int freqsSeen = 0;
|
|
||||||
String id = null;
|
String id = null;
|
||||||
|
Map<String, String> infoFields = new HashMap<String, String>();
|
||||||
List<String> filters = new ArrayList<String>();
|
List<String> filters = new ArrayList<String>();
|
||||||
|
|
||||||
for ( RodVCF rod : rods ) {
|
for ( RodVCF rod : rods ) {
|
||||||
|
|
@ -87,9 +83,6 @@ public class VCFUtils {
|
||||||
if ( params.getPosition() < 1 )
|
if ( params.getPosition() < 1 )
|
||||||
params.setLocations(rod.getLocation(), call.getReference());
|
params.setLocations(rod.getLocation(), call.getReference());
|
||||||
params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod.mCurrentRecord));
|
params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod.mCurrentRecord));
|
||||||
int depth = call.getReadCount();
|
|
||||||
if ( depth > 0 )
|
|
||||||
totalReadDepth += call.getReadCount();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// set the overall confidence to be the max entry we see
|
// set the overall confidence to be the max entry we see
|
||||||
|
|
@ -97,32 +90,16 @@ public class VCFUtils {
|
||||||
if ( confidence > maxConfidence )
|
if ( confidence > maxConfidence )
|
||||||
maxConfidence = confidence;
|
maxConfidence = confidence;
|
||||||
|
|
||||||
if ( !rod.isReference() && rod.hasNonRefAlleleFrequency() ) {
|
|
||||||
totalFreq += rod.getNonRefAlleleFrequency();
|
|
||||||
freqsSeen++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( rod.hasStrandBias() ) {
|
|
||||||
totalSLOD += rod.getStrandBias();
|
|
||||||
SLODsSeen++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( rod.getID() != null )
|
if ( rod.getID() != null )
|
||||||
id = rod.getID();
|
id = rod.getID();
|
||||||
|
|
||||||
if ( rod.isFiltered() )
|
if ( rod.isFiltered() )
|
||||||
filters.add(rod.getFilterString());
|
filters.add(rod.getFilterString());
|
||||||
|
|
||||||
|
// just take the last value we see for a given key
|
||||||
|
infoFields.putAll(rod.getInfoValues());
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, String> infoFields = new HashMap<String, String>();
|
|
||||||
infoFields.put(VCFRecord.DEPTH_KEY, String.format("%d", totalReadDepth));
|
|
||||||
|
|
||||||
// set the overall strand bias and allele frequency to be the average of all entries we've seen
|
|
||||||
if ( SLODsSeen > 0 )
|
|
||||||
infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen)));
|
|
||||||
if ( freqsSeen > 0 )
|
|
||||||
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
|
|
||||||
|
|
||||||
return new VCFRecord(params.getReferenceBases(),
|
return new VCFRecord(params.getReferenceBases(),
|
||||||
params.getContig(),
|
params.getContig(),
|
||||||
params.getPosition(),
|
params.getPosition(),
|
||||||
|
|
@ -152,16 +129,10 @@ public class VCFUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
VCFGenotypeRecord record = new VCFGenotypeRecord(gtype.getSampleName(), alleles, VCFGenotypeRecord.PHASE.UNPHASED);
|
VCFGenotypeRecord record = new VCFGenotypeRecord(gtype.getSampleName(), alleles, VCFGenotypeRecord.PHASE.UNPHASED);
|
||||||
|
for ( Map.Entry<String, String> entry : gtype.getFields().entrySet() ) {
|
||||||
// calculate the genotype quality and the read depth
|
record.setField(entry.getKey(), entry.getValue());
|
||||||
record.setField(VCFGenotypeRecord.DEPTH_KEY, String.valueOf(gtype.getReadCount()));
|
params.addFormatItem(entry.getKey());
|
||||||
params.addFormatItem(VCFGenotypeRecord.DEPTH_KEY);
|
}
|
||||||
double qual = Math.min(10.0 * gtype.getNegLog10PError(), VCFGenotypeRecord.MAX_QUAL_VALUE);
|
|
||||||
if ( qual >= 0 )
|
|
||||||
record.setField(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, String.format("%.2f", qual));
|
|
||||||
else
|
|
||||||
record.setField(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, String.format("%d", VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY));
|
|
||||||
params.addFormatItem(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY);
|
|
||||||
|
|
||||||
record.setVCFRecord(vcfrecord);
|
record.setVCFRecord(vcfrecord);
|
||||||
return record;
|
return record;
|
||||||
|
|
|
||||||
|
|
@ -10,35 +10,35 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
|
||||||
return "-T CallsetConcordance -R " + oneKGLocation + "reference/human_b36_both.fasta -L 1:1-8000 -CO %s";
|
return "-T CallsetConcordance -R " + oneKGLocation + "reference/human_b36_both.fasta -L 1:1-8000 -CO %s";
|
||||||
}
|
}
|
||||||
|
|
||||||
//@Test
|
@Test
|
||||||
public void testSimpleVenn() {
|
public void testSimpleVenn() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn", 1,
|
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn", 1,
|
||||||
Arrays.asList("cdd027a0d7bbfae2ba75480fcaa14356"));
|
Arrays.asList("d9124d2b0fb5bec5bc50c26a16b4e900"));
|
||||||
executeTest("testSimpleVenn", spec);
|
executeTest("testSimpleVenn", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
//@Test
|
@Test
|
||||||
public void testSNPConcordance() {
|
public void testSNPConcordance() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1,
|
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1,
|
||||||
Arrays.asList("310bf0824fc407b2e40cbbaea234471e"));
|
Arrays.asList("df1fbc744947f316f65f51a21368b0e4"));
|
||||||
executeTest("testSNPConcordance", spec);
|
executeTest("testSNPConcordance", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
//@Test
|
@Test
|
||||||
public void testNWayVenn() {
|
public void testNWayVenn() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
|
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
|
||||||
Arrays.asList("179adafae6efdc879fc22442ab9e599f"));
|
Arrays.asList("aa835ae5368b35f376b844d7f8ef2976"));
|
||||||
executeTest("testNWayVenn", spec);
|
executeTest("testNWayVenn", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
//@Test
|
@Test
|
||||||
public void testMulti() {
|
public void testMulti() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn -CT NWayVenn -CT SNPGenotypeConcordance:qscore=5", 1,
|
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn -CT NWayVenn -CT SNPGenotypeConcordance:qscore=5", 1,
|
||||||
Arrays.asList("c8fe63633ef6ed2b6068958f06cddfe0"));
|
Arrays.asList("c9ef68cc3b7dc08f1d2b49170e6560ab"));
|
||||||
executeTest("testMulti", spec);
|
executeTest("testMulti", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue