-Misc improvements to VCF code

-Small fix to callset concordance


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2497 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-01-04 02:28:47 +00:00
parent 29c129aced
commit ed2fff13aa
6 changed files with 44 additions and 17 deletions

View File

@ -25,7 +25,8 @@ public class NWayVenn implements ConcordanceType {
TreeSet<String> concordantSamples = new TreeSet<String>();
for ( Entry<String, Genotype> entry : samplesToRecords.entrySet() ) {
concordantSamples.add(entry.getKey());
if ( !entry.getValue().isNoCall() )
concordantSamples.add(entry.getKey());
}
StringBuffer tag = new StringBuffer();

View File

@ -34,7 +34,11 @@ public class SNPGenotypeConcordance implements ConcordanceType {
char refBase = ref.getBase();
Genotype call1 = samplesToRecords.get(sample1);
if ( call1 != null && call1.isNoCall() )
call1 = null;
Genotype call2 = samplesToRecords.get(sample2);
if ( call2 != null && call2.isNoCall() )
call2 = null;
if ( call1 == null || call2 == null ) {
if ( call1 != null && call1.isPointGenotype() && call1.isVariant(refBase) ) {

View File

@ -29,7 +29,11 @@ public class SimpleVenn implements ConcordanceType {
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref) {
Genotype call1 = samplesToRecords.get(sample1);
if ( call1 != null && call1.isNoCall() )
call1 = null;
Genotype call2 = samplesToRecords.get(sample2);
if ( call2 != null && call2.isNoCall() )
call2 = null;
if ( call1 == null && call2 == null )
return null;

View File

@ -247,19 +247,23 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings) {
StringBuilder builder = new StringBuilder();
builder.append(toGenotypeString(altAlleles));
for ( String field : genotypeFormatStrings ) {
String value = mFields.get(field);
if ( value == null && field.equals(OLD_DEPTH_KEY) )
if ( !isEmptyGenotype() ) {
for ( String field : genotypeFormatStrings ) {
String value = mFields.get(field);
if ( value == null && field.equals(OLD_DEPTH_KEY) )
value = mFields.get(DEPTH_KEY);
if ( value == null )
if ( value == null )
continue;
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
if (value.equals(""))
builder.append(getMissingFieldValue(field));
else
builder.append(value);
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
if (value.equals(""))
builder.append(getMissingFieldValue(field));
else
builder.append(value);
}
}
return builder.toString();
}

View File

@ -139,6 +139,8 @@ public class VCFUtils {
int SLODsSeen = 0;
double totalFreq = 0.0;
int freqsSeen = 0;
String id = null;
String filter = null;
for ( RodVCF rod : rods ) {
List<VCFGenotypeRecord> myGenotypes = rod.getVCFGenotypeRecords();
@ -158,7 +160,7 @@ public class VCFUtils {
if ( confidence > maxConfidence )
maxConfidence = confidence;
if ( rod.hasNonRefAlleleFrequency() ) {
if ( !rod.isReference() && rod.hasNonRefAlleleFrequency() ) {
totalFreq += rod.getNonRefAlleleFrequency();
freqsSeen++;
}
@ -167,6 +169,12 @@ public class VCFUtils {
totalSLOD += rod.getStrandBias();
SLODsSeen++;
}
if ( rod.getID() != null )
id = rod.getID();
if ( rod.hasFilteringCodes() )
filter = rod.getFilterString();
}
Map<String, String> infoFields = new HashMap<String, String>();
@ -178,16 +186,14 @@ public class VCFUtils {
infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen)));
if ( freqsSeen > 0 )
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
// TODO -- "." and "0" are wrong -- need to use values from the records
return new VCFRecord(params.getReferenceBase(),
params.getContig(),
params.getPosition(),
".",
(id != null ? id : "."),
params.getAlternateBases(),
maxConfidence,
"0",
(filter != null ? filter : "."),
infoFields,
params.getFormatString(),
params.getGenotypesRecords());

View File

@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
public void testNWayVenn() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
Arrays.asList("0527ea8ec7de3a144bd0a56db80d62ba"));
Arrays.asList("86d2342fabc8c0916a6d42a29f750ea2"));
executeTest("testNWayVenn", spec);
}
@ -41,4 +41,12 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
Arrays.asList("6fbe00cb68d2cdc59dfcb79024fd9893"));
executeTest("testMulti", spec);
}
@Test
public void testComplex() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "complexExample.vcf -B set2,VCF," + validationDataLocation + "complexExample.vcf -CT NWayVenn", 1,
Arrays.asList("8b72e557c0dd111738eaa69e9003fb3f"));
executeTest("testComplex", spec);
}
}