Skip to content

Commit

Permalink
config-compile
Browse files Browse the repository at this point in the history
  • Loading branch information
telatin committed Oct 26, 2021
1 parent d09d2cd commit 24ef107
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 32 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ multiqc/
.DS_Store
local
nimbledeps/
test/
8 changes: 0 additions & 8 deletions .travis.yml

This file was deleted.

2 changes: 1 addition & 1 deletion bamtocov.nimble
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Package

version = "2.0.4"
version = "2.1.0"
author = "Andrea Telatin, Giovanni Birolo"
description = "BAM to Coverage"
license = "MIT"
Expand Down
4 changes: 0 additions & 4 deletions docs/1_install.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,3 @@ You can install _bamtocov_ from BioConda, if you have [_conda_](https://docs.con
conda install -c conda-forge -c bioconda bamtocov
```

## Pre-compiled binaries

We release pre-compiled binaries for Linux, that can be downloaded from the
[GitHub releases page](https://github.com/telatin/bamtocov/releases).
2 changes: 1 addition & 1 deletion docs/2_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ permalink: /usage
# bamtocov

```text
BamToCov 2.0.2
BamToCov
Usage: bamtocov [options] [<BAM>]
Expand Down
17 changes: 17 additions & 0 deletions docs/5_history.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
sort: 5
permalink: /history
---
# History

This project extends [covtobed](https://github.com/telatin/covtobed),
reimplementing the core algorithm in Nim.

* 2.0.4:
* Added new internal classes, like _output\_t_ and _coverage\_t_
* This release is not yet optimized for speed.
* 2.0.2:
* Added target support. Performance improvement.
* 2.0.0:
* Initial release

32 changes: 14 additions & 18 deletions nim.cfg
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
# in htslib: ./configure --with-libdeflate --disable-libcurl
# nim c -a -d:static -d:release
# nim c -a -d:static -d:release mosdepth.nim
--opt:speed
-d:release
@if static:
passC:"-static"
passl:"-static -no-pie -fno-pie"
passl:"./local/libhts.a"
passl:"./local/libdeflate.a"
passl:"./local/libz.a"
passl:"./local/libbz2.a"
passl:"./local/liblzma.a"
passl:"./local/libopenblas.a"
passl:"./local/libcrypto.a"
passl:"./local/libpcre.a"

passl:"/usr/lib/x86_64-linux-gnu/libm.a"
passl:"/usr/lib/x86_64-linux-gnu/libpthread.a"
passl:"/usr/lib/gcc/x86_64-linux-gnu/5/libquadmath.a"
passl:"/usr/lib/x86_64-linux-gnu/libdl.a"
passl:"/usr/lib/x86_64-linux-gnu/libc.a"
passl:"/usr/lib/x86_64-linux-gnu/librt.a"
dynlibOverrideAll
passl:"-static"
passl:"/local/miniconda3/pkgs/htslib-1.11-hd3b49d5_1/lib/libhts.a"
passl:"/local/miniconda3/pkgs/libdeflate-1.2-h516909a_1/lib/libdeflate.a"
passl:"/usr/lib/x86_64-linux-gnu/liblzma.a"
passl:"/usr/lib/x86_64-linux-gnu/libz.a"
passl:"-lbz2"
passl:"-llzma"
passl:"-lpthread"
dynlibOverride:"hts"
dynlibOverride:"bz2"
dynlibOverride:"pthread"
@end
93 changes: 93 additions & 0 deletions scripts/sim-shotgun.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python
"""
Given a list of chromosome names and their lengths, generate a sorted SAM file
of a shotgun.
"""

import os, sys, random
import argparse

def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)

def parseSize(size):
# Size in the format of NUMBER[K|M|G|T]
size = size.upper()
try:
if size.endswith('K'):
return int(size[:-1]) * 1000
elif size.endswith('M'):
return int(size[:-1]) * 1000 * 1000
elif size.endswith('G'):
return int(size[:-1]) * 1000 * 1000 * 1000
elif size.endswith('T'):
return int(size[:-1]) * 1000 * 1000 * 1000 * 1000
else:
return int(size)
except ValueError:
eprint("Invalid size:", size)
sys.exit(1)

def aln(readname, chrname, pos):
qual = 100
flag = 0
cigar = "100M"
insertLen = 0
posN = 0
chrN = "*"
if args.randomstrand and random.random() < 0.5:
flag = 16
return f"{readname}\t{flag}\t{chrname}\t{pos}\t{qual}\t{cigar}\t{chrN}\t{posN}\t{insertLen}\t*\t*"

if __name__ == "__main__":
args = argparse.ArgumentParser()
args.add_argument("chrlen", help="Chromosome names and lengths in the NAME:SIZE format or simply SIZE", nargs="+")
args.add_argument("-s", "--step", help="Step size for the coverage [default: 100]", type=int, default=100)
args.add_argument("--stack", help="Depth in region stacks [default: 10]", default=10, type=int)
args.add_argument("--noise", help="Random error in position [default: 10]", default=10, type=int)
args.add_argument("randomstrand", help="Random strand [default: no]", action="store_true")
args.add_argument("--out", help="Output file in SAM format [default: stdout]")
args = args.parse_args()

if args.out:
eprint("Writing to", args.out)
out = open(args.out, "w")
else:
eprint("Writing to stdout")
out = sys.stdout

# Read chromosome names and lengths
chroms = {}
c = 0
for chromSize in args.chrlen:
c += 1
try:
chrom, size = chromSize.split(":")
chroms[chrom] = parseSize(size)
eprint(f" - {chrom}")
except ValueError:
chroms[f"chr{c}"] = parseSize(chromSize)


# Sort chroms by size (descending)
chroms = sorted(chroms.items(), key=lambda x: x[1], reverse=True)

# Write header
print("@HD\tVN:1.4\tSO:coordinate", file=out)
for chrom, length in chroms:
print(f"@SQ\tSN:{chrom}\tLN:{length}", file=out)

# Write reads
for chrom, length in chroms:
for pos in range(1, length, args.step):
if pos + args.step > length:
continue
readname = f"{chrom}-{pos}"
for i in range(0, args.stack):
# Random number between -10 and 10
pos = pos + random.randint(-1 * args.noise, args.noise)
if pos <= 0:
pos = 1
elif pos >= length:
pos = length - args.step
print( aln(readname, chrom, pos), file=out)
1 change: 1 addition & 0 deletions src/bamtocov.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import tables
import algorithm
import ./covutils
import sets
#import nimprof

# ✅ FIXME total min/max coverage cannot be computed from the two strands! - DONE
# ✅ FIXME per la cronaca ho trovato un mini “bachetto”, se il BED non ha nomi, giustamente, ficca tutto in un mega intervallo immaginario. Ora, potrebbe essere la cosa giusta da fare, l’alternativa è che se il nome non c’è lo creiamo noi tipo “chr2:100-200" e cosi li manteniamo forzatamente separati e se uno vuole il megatarget specifica lo stesso nome in tutto il file
Expand Down

0 comments on commit 24ef107

Please sign in to comment.