Skip to content

Commit

Permalink
Add DSL for fasterq-dump in SRATools BioContainer API
Browse files Browse the repository at this point in the history
  • Loading branch information
kMutagene committed Mar 17, 2020
1 parent 02e33f9 commit 425fbb9
Show file tree
Hide file tree
Showing 2 changed files with 241 additions and 7 deletions.
35 changes: 29 additions & 6 deletions src/BioFSharp.BioContainers/BioFSharp.BioContainers.fsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#load "ClustalO.fs"
#load "HMMER.fs"
#load "LastAlign.fs"
#load "SRAToolkit.fs"

open System.Threading
open System.Threading
Expand Down Expand Up @@ -424,8 +425,6 @@ open BioFSharp.BioContainers.BioContainer
open BioFSharp.BioContainers.BioContainerIO
open Blast

let client = Docker.connect "npipe://./pipe/docker_engine"

let ImageBlast = Docker.DockerId.ImageId "blast"

let blastContext =
Expand All @@ -434,9 +433,9 @@ let blastContext =

let paramz =
[
MakeDbParams.DbType Protein
MakeDbParams.Input @"C:\Users\Kevin\source\repos\CsbScaffold\Docker\data\Chlamy_Cp.fastA"
MakeDbParams.Output@"C:\Users\Kevin\source\repos\CsbScaffold\Docker\data\Chlamy_Cp.fastA"
MakeBlastDbParams.DbType Protein
MakeBlastDbParams.Input @"C:\Users\Kevin\source\repos\CsbScaffold\Docker\data\Chlamy_Cp.fastA"
MakeBlastDbParams.Output@"C:\Users\Kevin\source\repos\CsbScaffold\Docker\data\Chlamy_Cp.fastA"
]

let outputFormat=
Expand Down Expand Up @@ -577,4 +576,28 @@ let alignParams =

runLastAlignAsync lastAlignContext alignParams
|> Async.RunSynchronously
|> fun x -> File.WriteAllLines(@"C:\Users\kevin\Desktop\Microbiology_CrossGenomics\Data\Genomes\GenomeAlignment.maf",x.Split([|"\r\n";"\r";"\n"|],StringSplitOptions.None))
|> fun x -> File.WriteAllLines(@"C:\Users\kevin\Desktop\Microbiology_CrossGenomics\Data\Genomes\GenomeAlignment.maf",x.Split([|"\r\n";"\r";"\n"|],StringSplitOptions.None))


open SRATools

let sraImage = Docker.ImageId "quay.io/biocontainers/sra-tools:2.10.3--pl526haddd2b5_0"

let sraContext =
BioContainer.initBcContextWithMountAsync client sraImage @"C:\Users\kevin\Downloads\CsbScaffold-master\MetaIndexing_New\data"
|> Async.RunSynchronously

let FQDOptions =
[
FasterQDumpParams.OutDirectory @"C:\Users\kevin\Downloads\CsbScaffold-master\MetaIndexing_New\data\lol"
FasterQDumpParams.TempDirectory @"C:\Users\kevin\Downloads\CsbScaffold-master\MetaIndexing_New\data\lol\tmp"
FasterQDumpParams.Split SplitOptions.Split3
FasterQDumpParams.PrintDetails
FasterQDumpParams.ShowProgress
]

runFasterQDump sraContext FQDOptions "SRR000001"

sraContext
|> BioContainer.disposeAsync
|> Async.RunSynchronously
213 changes: 212 additions & 1 deletion src/BioFSharp.BioContainers/SRAToolkit.fs
Original file line number Diff line number Diff line change
@@ -1 +1,212 @@
namespace BioFSharp.BioContainers
namespace BioFSharp.BioContainers
open BioContainer

module SRATools =

//type PrefetchParams =
// |Placeholder

// static member makeCmd = function
// |Placeholder -> [""]

// static member makeCmdWith (m:MountInfo) = function
// |Placeholder -> [""]




//let runPrefetchAsync (bcContext:BioContainer.BcContext) (opt:PrefetchParams list) =

// let cmds = (opt |> List.map (PrefetchParams.makeCmdWith bcContext.Mount))
// let tp = "prefetch"::(cmds |> List.concat)

// printfn "Starting process prefetch\r\nparameters:"
// cmds |> List.iter (fun op -> printfn "\t%s" (String.concat " " op))

// async {
// let! res = BioContainer.execAsync bcContext tp
// return res
// }

type SplitOptions =
///Split spots into reads
|SplitSpot
///Write reads into different files
|SplitFiles
///Writes single reads into special file
|Split3
///Writes whole spots into one file
|ConcatenateReads

static member make = function
|SplitSpot -> "--split-spot"
|SplitFiles -> "--split-files"
|Split3 -> "--split-3"
|ConcatenateReads -> "--concatenate-read"

//type FastQDumpParams =
// |Placeholder

// static member makeCmd = function
// |Placeholder -> [""]

// static member makeCmdWith (m:MountInfo) = function
// |Placeholder -> [""]




//let runFastQDumpAsync (bcContext:BioContainer.BcContext) (opt:FastQDumpParams list) =

// let cmds = (opt |> List.map (FastQDumpParams.makeCmdWith bcContext.Mount))
// let tp = "fastq-dump"::(cmds |> List.concat)

// printfn "Starting process fastq-dump\r\nparameters:"
// cmds |> List.iter (fun op -> printfn "\t%s" (String.concat " " op))

// async {
// let! res = BioContainer.execAsync bcContext tp
// return res
// }


///DSL for command line arguments for the fasterq-dump tool contained in the SRA Toolkit
type FasterQDumpParams =
///full path of outputfile (overrides usage of current directory and given accession)
|OutFile of string
///path for outputfile (overrides usage of current directory, but uses given accession)
|OutDirectory of string
///path to directory for temp. files (dflt=current dir.)
|TempDirectory of string
//size of file-buffer (dflt=1MB)
|BufferSize of string
///Determine how to handle paired reads
|Split of SplitOptions
///size of cursor-cache (dflt=10MB, takes number or number and unit)
|CursorCacheSize of string
///memory limit for sorting (dflt=100MB, takes number or number and unit)
|SortingMemoryLimit of string
///how many threads to use (dflt=6)
|Threads of int
///show progress (not possible if stdout used)
|ShowProgress
///print details of all options selected
|PrintDetails
///print output to stdout
|StdOut
//force overwrite of existing file(s)
|Force
///use rowid as name (avoids using the name column)
|RowIdAsName
///skip technical reads
|SkipTechnical
///explicitly include technical reads
|IncludeTechnical
///include read-number in defline
|PrintReadNumber
///filter by sequence-lenght
|MinReadLength of int
///which seq-table to use in case of pacbio
|PacBioTableName of string
///terminate on invalid read
|Strict
///filter output by matching against given bases
|FilterBases of string
///append to output-file, instead of overwriting it
|AppendOutput
///path to ngc file
|NGCFilePath of string
///path to permission file
|PermissionFilePath of string
///location in cloud
|CloudLocation of string
//path to cart file
|CartPath of string
///disable multithreading
|DisableMultiThreading
//Display the version of the program
|Version
//Logging level as number or enum string. One of (fatal|sys|int|err|warn|info|debug) or (0-6) Current/default is warn
|LogLevel of string
///Read more options and parameters from the file.
|OptionFilePath of string

static member makeCmd = function
|OutFile o -> ["-o"; o]
|OutDirectory o -> ["-O"; o]
|TempDirectory t -> ["-t"; t]
|NGCFilePath n -> ["--ngc"; n]
|PermissionFilePath p -> ["--perm"; p]
|CartPath c -> ["--cart"; c]
|OptionFilePath o -> ["option-file"; o]
|Split so -> [SplitOptions.make so]
|Threads t -> ["-e"; string t]
|MinReadLength l -> ["-M"; string l]
|FilterBases b -> ["-B"; b]
|BufferSize b -> ["-b"; b]
|CursorCacheSize c -> ["-c"; c]
|SortingMemoryLimit m -> ["-m"; m]
|ShowProgress -> ["-p"]
|PrintDetails -> ["-x"]
|StdOut -> ["-Z"]
|Force -> ["-f"]
|RowIdAsName -> ["-N"]
|PrintReadNumber -> ["-P"]
|AppendOutput -> ["-A"]
|Version -> ["-V"]
|LogLevel l -> ["-L"; l]
|SkipTechnical -> ["--skip-technical"]
|IncludeTechnical -> ["--include-technical"]
|PacBioTableName t -> ["--table"; t]
|Strict -> ["--strict"]
|CloudLocation c -> ["--location"; c]
|DisableMultiThreading -> ["--disable-multithreading"]

static member makeCmdWith (m:MountInfo) = function
|OutFile o -> ["-o"; MountInfo.containerPathOf m o]
|OutDirectory o -> ["-O"; MountInfo.containerPathOf m o]
|TempDirectory t -> ["-t"; MountInfo.containerPathOf m t]
|NGCFilePath n -> ["--ngc" ; MountInfo.containerPathOf m n]
|PermissionFilePath p -> ["--perm" ; MountInfo.containerPathOf m p]
|CartPath c -> ["--cart" ; MountInfo.containerPathOf m c]
|OptionFilePath o -> ["option-file"; MountInfo.containerPathOf m o]
|Split so -> [SplitOptions.make so]
|Threads t -> ["-e"; string t]
|MinReadLength l -> ["-M"; string l]
|FilterBases b -> ["-B"; b]
|BufferSize b -> ["-b"; b]
|CursorCacheSize c -> ["-c"; c]
|SortingMemoryLimit m -> ["-m"; m]
|ShowProgress -> ["-p"]
|PrintDetails -> ["-x"]
|StdOut -> ["-Z"]
|Force -> ["-f"]
|RowIdAsName -> ["-N"]
|PrintReadNumber -> ["-P"]
|AppendOutput -> ["-A"]
|Version -> ["-V"]
|LogLevel l -> ["-L"; l]
|SkipTechnical -> ["--skip-technical"]
|IncludeTechnical -> ["--include-technical"]
|PacBioTableName t -> ["--table"; t]
|Strict -> ["--strict"]
|CloudLocation c -> ["--location"; c]
|DisableMultiThreading -> ["--disable-multithreading"]


let runFasterQDumpAsync (bcContext:BioContainer.BcContext) (opt:FasterQDumpParams list) (accession:string) =

let cmds = (opt |> List.map (FasterQDumpParams.makeCmdWith bcContext.Mount))
let tp = "fasterq-dump"::(cmds |> List.concat)@[accession]

printfn "Starting process fasterq-dump\r\nparameters:"
cmds |> List.iter (fun op -> printfn "\t%s" (String.concat " " op))

async {
let! res = BioContainer.execAsync bcContext tp
return res
}

let runFasterQDump (bcContext:BioContainer.BcContext) (opt:FasterQDumpParams list) (accession:string) =
runFasterQDumpAsync bcContext opt accession
|> Async.RunSynchronously

0 comments on commit 425fbb9

Please sign in to comment.