Fixing/cleaning up the vcf merge util

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3047 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-03-19 15:13:32 +00:00
parent cdec84aa8f
commit c88a2a3027
3 changed files with 17 additions and 60 deletions

View File

@ -90,20 +90,6 @@ public class RodVCF extends BasicReferenceOrderedDatum implements Variation, Ite
return mCurrentRecord.getNonRefAlleleFrequency();
}
public boolean hasStrandBias() {
assertNotNull();
return this.mCurrentRecord.getInfoValues().containsKey(VCFRecord.STRAND_BIAS_KEY);
}
/**
* get the strand bias of this variant
*
* @return StrandBias with the stored slod
*/
public double getStrandBias() {
return hasStrandBias() ? Double.valueOf(this.mCurrentRecord.getInfoValues().get(VCFRecord.STRAND_BIAS_KEY)) : 0.0;
}
/** @return the VARIANT_TYPE of the current variant */
public Variation.VARIANT_TYPE getType() {
assertNotNull();

View File

@ -69,14 +69,10 @@ public class VCFUtils {
VCFParameters params = new VCFParameters();
params.addFormatItem(VCFGenotypeRecord.GENOTYPE_KEY);
// keep track of the locus specific data so we can merge them intelligently
int totalReadDepth = 0;
// keep track of the data so we can merge them intelligently
double maxConfidence = 0.0;
double totalSLOD = 0.0;
int SLODsSeen = 0;
double totalFreq = 0.0;
int freqsSeen = 0;
String id = null;
Map<String, String> infoFields = new HashMap<String, String>();
List<String> filters = new ArrayList<String>();
for ( RodVCF rod : rods ) {
@ -87,9 +83,6 @@ public class VCFUtils {
if ( params.getPosition() < 1 )
params.setLocations(rod.getLocation(), call.getReference());
params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod.mCurrentRecord));
int depth = call.getReadCount();
if ( depth > 0 )
totalReadDepth += call.getReadCount();
}
// set the overall confidence to be the max entry we see
@ -97,32 +90,16 @@ public class VCFUtils {
if ( confidence > maxConfidence )
maxConfidence = confidence;
if ( !rod.isReference() && rod.hasNonRefAlleleFrequency() ) {
totalFreq += rod.getNonRefAlleleFrequency();
freqsSeen++;
}
if ( rod.hasStrandBias() ) {
totalSLOD += rod.getStrandBias();
SLODsSeen++;
}
if ( rod.getID() != null )
id = rod.getID();
if ( rod.isFiltered() )
filters.add(rod.getFilterString());
// just take the last value we see for a given key
infoFields.putAll(rod.getInfoValues());
}
Map<String, String> infoFields = new HashMap<String, String>();
infoFields.put(VCFRecord.DEPTH_KEY, String.format("%d", totalReadDepth));
// set the overall strand bias and allele frequency to be the average of all entries we've seen
if ( SLODsSeen > 0 )
infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen)));
if ( freqsSeen > 0 )
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
return new VCFRecord(params.getReferenceBases(),
params.getContig(),
params.getPosition(),
@ -152,16 +129,10 @@ public class VCFUtils {
}
VCFGenotypeRecord record = new VCFGenotypeRecord(gtype.getSampleName(), alleles, VCFGenotypeRecord.PHASE.UNPHASED);
// calculate the genotype quality and the read depth
record.setField(VCFGenotypeRecord.DEPTH_KEY, String.valueOf(gtype.getReadCount()));
params.addFormatItem(VCFGenotypeRecord.DEPTH_KEY);
double qual = Math.min(10.0 * gtype.getNegLog10PError(), VCFGenotypeRecord.MAX_QUAL_VALUE);
if ( qual >= 0 )
record.setField(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, String.format("%.2f", qual));
else
record.setField(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, String.format("%d", VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY));
params.addFormatItem(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY);
for ( Map.Entry<String, String> entry : gtype.getFields().entrySet() ) {
record.setField(entry.getKey(), entry.getValue());
params.addFormatItem(entry.getKey());
}
record.setVCFRecord(vcfrecord);
return record;

View File

@ -10,35 +10,35 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
return "-T CallsetConcordance -R " + oneKGLocation + "reference/human_b36_both.fasta -L 1:1-8000 -CO %s";
}
//@Test
@Test
public void testSimpleVenn() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn", 1,
Arrays.asList("cdd027a0d7bbfae2ba75480fcaa14356"));
Arrays.asList("d9124d2b0fb5bec5bc50c26a16b4e900"));
executeTest("testSimpleVenn", spec);
}
//@Test
@Test
public void testSNPConcordance() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1,
Arrays.asList("310bf0824fc407b2e40cbbaea234471e"));
Arrays.asList("df1fbc744947f316f65f51a21368b0e4"));
executeTest("testSNPConcordance", spec);
}
//@Test
@Test
public void testNWayVenn() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
Arrays.asList("179adafae6efdc879fc22442ab9e599f"));
Arrays.asList("aa835ae5368b35f376b844d7f8ef2976"));
executeTest("testNWayVenn", spec);
}
//@Test
@Test
public void testMulti() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn -CT NWayVenn -CT SNPGenotypeConcordance:qscore=5", 1,
Arrays.asList("c8fe63633ef6ed2b6068958f06cddfe0"));
Arrays.asList("c9ef68cc3b7dc08f1d2b49170e6560ab"));
executeTest("testMulti", spec);
}