-Misc improvements to VCF code
-Small fix to callset concordance git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2497 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
29c129aced
commit
ed2fff13aa
|
|
@ -25,7 +25,8 @@ public class NWayVenn implements ConcordanceType {
|
|||
|
||||
TreeSet<String> concordantSamples = new TreeSet<String>();
|
||||
for ( Entry<String, Genotype> entry : samplesToRecords.entrySet() ) {
|
||||
concordantSamples.add(entry.getKey());
|
||||
if ( !entry.getValue().isNoCall() )
|
||||
concordantSamples.add(entry.getKey());
|
||||
}
|
||||
|
||||
StringBuffer tag = new StringBuffer();
|
||||
|
|
|
|||
|
|
@ -34,7 +34,11 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
|||
char refBase = ref.getBase();
|
||||
|
||||
Genotype call1 = samplesToRecords.get(sample1);
|
||||
if ( call1 != null && call1.isNoCall() )
|
||||
call1 = null;
|
||||
Genotype call2 = samplesToRecords.get(sample2);
|
||||
if ( call2 != null && call2.isNoCall() )
|
||||
call2 = null;
|
||||
|
||||
if ( call1 == null || call2 == null ) {
|
||||
if ( call1 != null && call1.isPointGenotype() && call1.isVariant(refBase) ) {
|
||||
|
|
|
|||
|
|
@ -29,7 +29,11 @@ public class SimpleVenn implements ConcordanceType {
|
|||
public String computeConcordance(Map<String, Genotype> samplesToRecords, ReferenceContext ref) {
|
||||
|
||||
Genotype call1 = samplesToRecords.get(sample1);
|
||||
if ( call1 != null && call1.isNoCall() )
|
||||
call1 = null;
|
||||
Genotype call2 = samplesToRecords.get(sample2);
|
||||
if ( call2 != null && call2.isNoCall() )
|
||||
call2 = null;
|
||||
|
||||
if ( call1 == null && call2 == null )
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -247,19 +247,23 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
|
|||
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append(toGenotypeString(altAlleles));
|
||||
for ( String field : genotypeFormatStrings ) {
|
||||
String value = mFields.get(field);
|
||||
if ( value == null && field.equals(OLD_DEPTH_KEY) )
|
||||
|
||||
if ( !isEmptyGenotype() ) {
|
||||
for ( String field : genotypeFormatStrings ) {
|
||||
String value = mFields.get(field);
|
||||
if ( value == null && field.equals(OLD_DEPTH_KEY) )
|
||||
value = mFields.get(DEPTH_KEY);
|
||||
if ( value == null )
|
||||
if ( value == null )
|
||||
continue;
|
||||
|
||||
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
|
||||
if (value.equals(""))
|
||||
builder.append(getMissingFieldValue(field));
|
||||
else
|
||||
builder.append(value);
|
||||
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
|
||||
if (value.equals(""))
|
||||
builder.append(getMissingFieldValue(field));
|
||||
else
|
||||
builder.append(value);
|
||||
}
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -139,6 +139,8 @@ public class VCFUtils {
|
|||
int SLODsSeen = 0;
|
||||
double totalFreq = 0.0;
|
||||
int freqsSeen = 0;
|
||||
String id = null;
|
||||
String filter = null;
|
||||
|
||||
for ( RodVCF rod : rods ) {
|
||||
List<VCFGenotypeRecord> myGenotypes = rod.getVCFGenotypeRecords();
|
||||
|
|
@ -158,7 +160,7 @@ public class VCFUtils {
|
|||
if ( confidence > maxConfidence )
|
||||
maxConfidence = confidence;
|
||||
|
||||
if ( rod.hasNonRefAlleleFrequency() ) {
|
||||
if ( !rod.isReference() && rod.hasNonRefAlleleFrequency() ) {
|
||||
totalFreq += rod.getNonRefAlleleFrequency();
|
||||
freqsSeen++;
|
||||
}
|
||||
|
|
@ -167,6 +169,12 @@ public class VCFUtils {
|
|||
totalSLOD += rod.getStrandBias();
|
||||
SLODsSeen++;
|
||||
}
|
||||
|
||||
if ( rod.getID() != null )
|
||||
id = rod.getID();
|
||||
|
||||
if ( rod.hasFilteringCodes() )
|
||||
filter = rod.getFilterString();
|
||||
}
|
||||
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
|
|
@ -178,16 +186,14 @@ public class VCFUtils {
|
|||
infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen)));
|
||||
if ( freqsSeen > 0 )
|
||||
infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen)));
|
||||
|
||||
// TODO -- "." and "0" are wrong -- need to use values from the records
|
||||
|
||||
|
||||
return new VCFRecord(params.getReferenceBase(),
|
||||
params.getContig(),
|
||||
params.getPosition(),
|
||||
".",
|
||||
(id != null ? id : "."),
|
||||
params.getAlternateBases(),
|
||||
maxConfidence,
|
||||
"0",
|
||||
(filter != null ? filter : "."),
|
||||
infoFields,
|
||||
params.getFormatString(),
|
||||
params.getGenotypesRecords());
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
|
|||
public void testNWayVenn() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
|
||||
Arrays.asList("0527ea8ec7de3a144bd0a56db80d62ba"));
|
||||
Arrays.asList("86d2342fabc8c0916a6d42a29f750ea2"));
|
||||
executeTest("testNWayVenn", spec);
|
||||
}
|
||||
|
||||
|
|
@ -41,4 +41,12 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
|
|||
Arrays.asList("6fbe00cb68d2cdc59dfcb79024fd9893"));
|
||||
executeTest("testMulti", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplex() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B set1,VCF," + validationDataLocation + "complexExample.vcf -B set2,VCF," + validationDataLocation + "complexExample.vcf -CT NWayVenn", 1,
|
||||
Arrays.asList("8b72e557c0dd111738eaa69e9003fb3f"));
|
||||
executeTest("testComplex", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue