Skip to content

Commit

Permalink
epsitatis01, strange bug in htsjdk samtools/htsjdk#1026
Browse files Browse the repository at this point in the history
  • Loading branch information
lindenb committed Nov 14, 2017
1 parent efe68f7 commit 871a7cc
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 31 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,7 @@ $(eval $(call compile-htsjdk-cmd,vcfserver,${jvarkit.package}.tools.vcfserver.Vc
$(eval $(call compile-htsjdk-cmd,tviewserver,${jvarkit.package}.tools.tview.TViewServer,${jcommander.jar} ${jetty.jars}))
$(eval $(call compile-htsjdk-cmd,trapindexer,${jvarkit.package}.tools.trap.TrapIndexer,${jcommander.jar}))
$(eval $(call compile-htsjdk-cmd,vcftrap,${jvarkit.package}.tools.trap.VcfTrap,${jcommander.jar}))
$(eval $(call compile-htsjdk-cmd,vcfepistatis01,${jvarkit.package}.tools.epistasis.VcfEpistatis01,${jcommander.jar}))


$(eval $(call compile-htsjdk-cmd,jeter,${jvarkit.package}.tools.burden.VcfBurdenEpistasis,${jcommander.jar}))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ public int compare(GTFGene.Exon o1, GTFGene.Exon o2)
}
}

Interval interval=new Interval(
final Interval interval=new Interval(
g.chrom,
exon.start,
exon.end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@ of this software and associated documentation files (the "Software"), to deal
package com.github.lindenb.jvarkit.tools.epistasis;

import java.io.File;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Vector;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import com.beust.jcommander.Parameter;
Expand All @@ -42,6 +44,7 @@ of this software and associated documentation files (the "Software"), to deal
import com.github.lindenb.jvarkit.util.Pedigree;
import com.github.lindenb.jvarkit.util.jcommander.Launcher;
import com.github.lindenb.jvarkit.util.log.Logger;
import com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate;

import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
Expand All @@ -65,6 +68,10 @@ public class VcfEpistatis01 extends Launcher {
private boolean load_variants_in_memory=false;
@Parameter(names={"-j","--jobs"},description="Number of parallel jobs.")
private int number_of_jobs =1;
@Parameter(names={"-start","--start"},description="Specify start index in variant list. (for parallelisation)")
private int start_index_at=0;
@Parameter(names={"-jexl","--jexl"},description=JexlVariantPredicate.PARAMETER_DESCRIPTION,converter=JexlVariantPredicate.Converter.class)
private Predicate<VariantContext> variantFilter = (CTX)->true;


private static final Function<Long,Integer> CTRLS_nAlt2score=(N)->{switch(N.intValue()){
Expand Down Expand Up @@ -153,6 +160,7 @@ private double score(
public Result call() throws Exception {
final VariantContext ctx1 = this.variants.get(this.startIndex);
final long startup = System.currentTimeMillis();

int i = this.startIndex + 1;

while(i< this.variants.size())
Expand Down Expand Up @@ -209,6 +217,7 @@ public int doWork(final List<String> args) {
final int variantsCount;
final List<VariantContext> inMemoryVariants;
final File vcfFile = new File(oneAndOnlyOneFile(args));
final File tmpIndexFile;

if(vcfFile.equals(this.outputFile))
{
Expand All @@ -218,23 +227,36 @@ public int doWork(final List<String> args) {

VCFFileReader vcfFileReader = new VCFFileReader(vcfFile,false);
if(this.load_variants_in_memory) {
LOG.info("loading variant in memory");
LOG.info("loading variants in memory");
tmpIndexFile = null;
final CloseableIterator<VariantContext> iter2=vcfFileReader.iterator();
inMemoryVariants = iter2.stream().collect(Collectors.toList());
inMemoryVariants = Collections.unmodifiableList(iter2.stream().
filter(this.variantFilter).
filter(V->V.getGenotypes().stream().filter(G->G.isCalled()).count()>0).//should fix https://github.com/samtools/htsjdk/issues/1026 ?
collect(Collectors.toList())
);
variantsCount = inMemoryVariants.size();
iter2.close();
}
else
{
new VcfOffsetsIndexFactory().setLogger(LOG).indexVcfFileIfNeeded(vcfFile);
CloseableIterator<VariantContext> iter2=vcfFileReader.iterator();
variantsCount = (int)iter2.stream().count();
iter2.close();
tmpIndexFile = File.createTempFile("epistatsis",VcfOffsetsIndexFactory.INDEX_EXTENSION);
tmpIndexFile.deleteOnExit();
new VcfOffsetsIndexFactory().
setLogger(LOG).
setPredicate(variantFilter).
indexVcfFile(vcfFile,tmpIndexFile);
VcfList tmpList = VcfList.fromFile(vcfFile, tmpIndexFile);
variantsCount = tmpList.size();
tmpList.close();
inMemoryVariants = null;
}



final VCFHeader header = vcfFileReader.getFileHeader();
vcfFileReader.close();

LOG.info("Number of variants: "+variantsCount);

final Pedigree pedigree;
if(this.pedigreeFile!=null)
Expand All @@ -259,16 +281,22 @@ public int doWork(final List<String> args) {
}

Result bestResult =null;
int x=0;
int x= this.start_index_at;
while(x+1 < variantsCount)
{
final List<Runner> runners = new Vector<>(this.number_of_jobs);
while(x+1 < variantsCount && runners.size() < this.number_of_jobs)
{


LOG.info("starting "+x+"/"+variantsCount);
runners.add(new Runner(
inMemoryVariants == null? VcfList.fromFile(vcfFile): inMemoryVariants
,x,ctrlSamples,caseSamples));
inMemoryVariants == null?
VcfList.fromFile(vcfFile,tmpIndexFile):
new Vector<>(inMemoryVariants)
,x,ctrlSamples,caseSamples
)
);
++x;
}
final ExecutorService execSvc;
Expand All @@ -287,14 +315,18 @@ public int doWork(final List<String> args) {
}
else
{
execSvc.invokeAll(runners).stream().forEach(F->{try {F.get();}catch(Exception err2) {err2.printStackTrace();}});
execSvc.invokeAll(runners);
}

if(execSvc!=null) {
execSvc.shutdown();
execSvc.awaitTermination(10000L, TimeUnit.DAYS);
execSvc.shutdownNow();
}
runners.stream().mapToLong(R->R.duration).min().ifPresent(D->{
LOG.info("That took "+ (D/1000f)+" seconds.");
});

if(execSvc!=null) execSvc.shutdownNow();

for(final Runner r: runners)
{
Expand All @@ -315,7 +347,7 @@ public int doWork(final List<String> args) {
}
LOG.info("best: "+bestResult);
}

if(tmpIndexFile!=null) tmpIndexFile.delete();

return 0;
}
Expand All @@ -333,7 +365,7 @@ public int doWork(final List<String> args) {

public static void main( String[] args)
{
//args=new String[] {"-j","2","--memory","-o","/home/lindenb/jeter2.vcf","/home/lindenb/jeter.vcf"};
//args=new String[] {"-jexl","vc.getStart()%105==0","-j","2","--memory","-o","/home/lindenb/jeter2.vcf","/home/lindenb/jeter.vcf"};
new VcfEpistatis01().instanceMainWithExit(args);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ of this software and associated documentation files (the "Software"), to deal

import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalTreeMap;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.StringUtil;
Expand Down Expand Up @@ -238,7 +237,7 @@ public void add(final VariantContext ctx) {
final Set<String> annotations=new HashSet<String>();

if(getOwner().intervalTreeMap!=null) {
for(final Set<BedLine> bedLines :getOwner().intervalTreeMap.getOverlapping(new Interval(ctx.getContig(),ctx.getStart(),ctx.getEnd()))) {
for(final Set<BedLine> bedLines :getOwner().intervalTreeMap.getOverlapping(ctx)) {
for(final BedLine bedLine:bedLines) {
final String newannot=getOwner().parsedFormat.toString(bedLine);
found_overlap=true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class DefaultVcfFileList extends AbstractList<VariantContext>
private final RandomAccessFile vcfrandom;
private final VCFCodec codec = new VCFCodec();
private final int _size;
private int last_list_index = -1;

DefaultVcfFileList(final File vcf) throws IOException {
this(vcf,VcfOffsetsIndexFactory.getDefaultIndexFile(vcf));
Expand Down Expand Up @@ -111,18 +112,33 @@ public VCFHeader getHeader() {
public VariantContext get(final int index) {
if(index<0 || index>=this.size()) throw new IndexOutOfBoundsException("0<"+index+"<"+size() +" in "+vcfFile);
try {
this.indexio.seek((long)VcfOffsetsIndexFactory.MAGIC.length+ (long)index*(long)Long.BYTES);
long offset = this.indexio.readLong();
final String line;
if(this.bgzfin!=null) {
this.bgzfin.seek(offset);
line = this.bgzfin.readLine();
if(this.last_list_index==-1 || this.last_list_index+1!=index)
{
this.indexio.seek((long)VcfOffsetsIndexFactory.MAGIC.length+ (long)index*(long)Long.BYTES);
final long offset = this.indexio.readLong();

if(this.bgzfin!=null) {
this.bgzfin.seek(offset);
line = this.bgzfin.readLine();
}
else
{
this.vcfrandom.seek(offset);
line = this.vcfrandom.readLine();
}
}
else
{
this.vcfrandom.seek(offset);
line = this.vcfrandom.readLine();
if(this.bgzfin!=null) {
line = this.bgzfin.readLine();
}
else
{
line = this.vcfrandom.readLine();
}
}
this.last_list_index = index;
return this.codec.decode(line);
}
catch(final IOException err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,13 @@ public static File getDefaultIndexFile(final File vcf) {
return new File(vcf.getParentFile(),vcf.getName()+INDEX_EXTENSION);
}
/** set a predicate for the variant that will be indexed (default is 'all' ) */
public void setPredicate(final Predicate<VariantContext> acceptVariant) {
public VcfOffsetsIndexFactory setPredicate(final Predicate<VariantContext> acceptVariant) {
this.acceptVariant = acceptVariant;
return this;
}
public void setLogger(final Logger logger) {
public VcfOffsetsIndexFactory setLogger(final Logger logger) {
this.logger = logger;
return this;
}
public File indexVcfFileIfNeeded(final File vcfFile) throws IOException
{
Expand Down Expand Up @@ -116,6 +118,7 @@ public File indexVcfFile(final File vcfFile) throws IOException
/** index a vcf file for its variant offsets */
public File indexVcfFile(final File vcfFile,final File indexFile) throws IOException
{
LOG.info("indexing "+vcfFile);
IOUtil.assertFileIsReadable(vcfFile);
DataOutputStream daos = null;
BlockCompressedInputStream bgzin = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ of this software and associated documentation files (the "Software"), to deal
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalTreeMap;
import htsjdk.samtools.util.Locatable;
import htsjdk.samtools.util.RuntimeIOException;
Expand Down Expand Up @@ -1608,15 +1607,15 @@ public List<KnownGene> getOverlappingKnownGenes(final VariantContext ctx)
{
if(this.knownGeneTreeMap==null) return Collections.emptyList();
final List<KnownGene> L = new ArrayList<>();
for(final List<KnownGene> lkg:VcfStats.this.knownGeneTreeMap.getOverlapping(new Interval(ctx.getContig(),ctx.getStart(),ctx.getEnd())))
for(final List<KnownGene> lkg:VcfStats.this.knownGeneTreeMap.getOverlapping(ctx))
{
L.addAll(lkg);
}
return L;
}

// https://en.wikipedia.org/wiki/File:Transitions-transversions-v3.png
private static boolean isTransversion(Character a1, Character a2)
private static boolean isTransversion(final Character a1, final Character a2)
{
if(a1==null || a2==null) return false;
if(a1=='A' && a2=='C') return true;
Expand All @@ -1626,7 +1625,7 @@ private static boolean isTransversion(Character a1, Character a2)
return false;
}

private static boolean isTransition(Character a1, Character a2)
private static boolean isTransition(final Character a1, final Character a2)
{
if(a1==null || a2==null) return false;
if(a1=='A' && a2=='G') return true;
Expand Down
Loading

0 comments on commit 871a7cc

Please sign in to comment.