-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add reverseComplement function to all BioCollections.
- Loading branch information
Showing
6 changed files
with
129 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
(*** hide ***) | ||
// This block of code is omitted in the generated HTML documentation. Use | ||
// it to define helpers that you do not want to show in the documentation. | ||
#I @"../../bin/BioFSharp/net47/" | ||
#I @"../../bin/BioFSharp.BioDB/net45/" | ||
#I @"../../bin/BioFSharp.ImgP/net47" | ||
#I @"../../bin/BioFSharp.IO/net47/" | ||
#I @"../../bin/BioFSharp.Parallel/net47/" | ||
#I @"../../bin/BioFSharp.Stats/net47/" | ||
#I @"../../bin/BioFSharp.Vis/net47/" | ||
#r @"../../lib/Formatting/FSharp.Plotly.dll" | ||
#r "BioFSharp.dll" | ||
#r "BioFSharp.IO.dll" | ||
#r "FSharpAux.dll" | ||
#r "FSharpAux.IO.dll" | ||
|
||
(** | ||
*) | ||
open System | ||
open FSharpAux | ||
open FSharpAux.IO | ||
open BioFSharp.IO | ||
|
||
let fileDir = __SOURCE_DIRECTORY__ + "/data/" | ||
|
||
// http://www.bx.psu.edu/~dcking/man/maf.xhtml | ||
|
||
|
||
/// Reads FastaItem from file. Converter determines type of sequence by converting seq<char> -> type | ||
let fromFileEnumerator (converter:seq<char>-> 'a) (fileEnumerator) = | ||
|
||
// Conditon of grouping lines | ||
let same_group l = | ||
not (String.length l = 0 || l.[0] <> 'a') | ||
|
||
// Matches grouped lines and concatenates them | ||
let record d (converter:seq<char>-> 'a) = | ||
match d with | ||
| [] -> raise (System.Exception "Incorrect MAF format") | ||
| (h:string) :: l when h.StartsWith "a" -> let header = h .Remove(0,1) | ||
let line = (Seq.concat l) |> converter | ||
h,l | ||
//createFastaItem header sequence | ||
|
||
| h :: _ -> raise (System.Exception "Incorrect MAF format") | ||
|
||
// main | ||
fileEnumerator | ||
|> Seq.filter (fun (l:string) -> not (l.StartsWith " " || l.StartsWith "#")) | ||
//|> Seq.filter (fun (l:string) -> not (l.Length < 1)) | ||
|
||
|> Seq.groupWhen same_group | ||
|> Seq.map (fun l -> record (List.ofSeq l) converter) | ||
|
||
|
||
/// Reads FastaItem from file. Converter determines type of sequence by converting seq<char> -> type | ||
let fromFile converter (filePath) = | ||
FileIO.readFile filePath | ||
|> fromFileEnumerator converter | ||
|
||
|
||
fromFile id (fileDir + "alignment.maf") |> Seq.length | ||
|
||
|
||
|
||
|
||
|
||
//let rec parseS src start size strand srcSize text = | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
##maf version=1 scoring=tba.v8 | ||
# tba.v8 (((human chimp) baboon) (mouse rat)) | ||
|
||
a score=23262.0 | ||
s hg18.chr7 27578828 38 + 158545518 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG | ||
s panTro1.chr6 28741140 38 + 161576975 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG | ||
s baboon 116834 38 + 4622798 AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG | ||
s mm4.chr6 53215344 38 + 151104725 -AATGGGAATGTTAAGCAAACGA---ATTGTCTCTCAGTGTG | ||
s rn3.chr4 81344243 40 + 187371129 -AA-GGGGATGCTAAGCCAATGAGTTGTTGTCTCTCAATGTG | ||
|
||
a score=5062.0 | ||
s hg18.chr7 27699739 6 + 158545518 TAAAGA | ||
s panTro1.chr6 28862317 6 + 161576975 TAAAGA | ||
s baboon 241163 6 + 4622798 TAAAGA | ||
s mm4.chr6 53303881 6 + 151104725 TAAAGA | ||
s rn3.chr4 81444246 6 + 187371129 taagga | ||
|
||
a score=6636.0 | ||
s hg18.chr7 27707221 13 + 158545518 gcagctgaaaaca | ||
s panTro1.chr6 28869787 13 + 161576975 gcagctgaaaaca | ||
s baboon 249182 13 + 4622798 gcagctgaaaaca | ||
s mm4.chr6 53310102 13 + 151104725 ACAGCTGAAAATA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters