Skip to content

Commit

Permalink
Add reverseComplement function to all BioCollections.
Browse files Browse the repository at this point in the history
Fix: #65 and #66
  • Loading branch information
muehlhaus committed Aug 29, 2019
1 parent d5527ab commit 27ab68f
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 31 deletions.
71 changes: 71 additions & 0 deletions docsrc/content/MAF.fsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
(*** hide ***)
// This block of code is omitted in the generated HTML documentation. Use
// it to define helpers that you do not want to show in the documentation.
#I @"../../bin/BioFSharp/net47/"
#I @"../../bin/BioFSharp.BioDB/net45/"
#I @"../../bin/BioFSharp.ImgP/net47"
#I @"../../bin/BioFSharp.IO/net47/"
#I @"../../bin/BioFSharp.Parallel/net47/"
#I @"../../bin/BioFSharp.Stats/net47/"
#I @"../../bin/BioFSharp.Vis/net47/"
#r @"../../lib/Formatting/FSharp.Plotly.dll"
#r "BioFSharp.dll"
#r "BioFSharp.IO.dll"
#r "FSharpAux.dll"
#r "FSharpAux.IO.dll"

(**
*)
open System
open FSharpAux
open FSharpAux.IO
open BioFSharp.IO

let fileDir = __SOURCE_DIRECTORY__ + "/data/"

// http://www.bx.psu.edu/~dcking/man/maf.xhtml


/// Reads FastaItem from file. Converter determines type of sequence by converting seq<char> -> type
let fromFileEnumerator (converter:seq<char>-> 'a) (fileEnumerator) =

// Conditon of grouping lines
let same_group l =
not (String.length l = 0 || l.[0] <> 'a')

// Matches grouped lines and concatenates them
let record d (converter:seq<char>-> 'a) =
match d with
| [] -> raise (System.Exception "Incorrect MAF format")
| (h:string) :: l when h.StartsWith "a" -> let header = h .Remove(0,1)
let line = (Seq.concat l) |> converter
h,l
//createFastaItem header sequence

| h :: _ -> raise (System.Exception "Incorrect MAF format")

// main
fileEnumerator
|> Seq.filter (fun (l:string) -> not (l.StartsWith " " || l.StartsWith "#"))
//|> Seq.filter (fun (l:string) -> not (l.Length < 1))

|> Seq.groupWhen same_group
|> Seq.map (fun l -> record (List.ofSeq l) converter)


/// Reads FastaItem from file. Converter determines type of sequence by converting seq<char> -> type
let fromFile converter (filePath) =
FileIO.readFile filePath
|> fromFileEnumerator converter


fromFile id (fileDir + "alignment.maf") |> Seq.length





//let rec parseS src start size strand srcSize text =

22 changes: 22 additions & 0 deletions docsrc/content/data/alignment.maf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
##maf version=1 scoring=tba.v8
# tba.v8 (((human chimp) baboon) (mouse rat))

a score=23262.0
s hg18.chr7 27578828 38 + 158545518 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG
s panTro1.chr6 28741140 38 + 161576975 AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG
s baboon 116834 38 + 4622798 AAA-GGGAATGTTAACCAAATGA---GTTGTCTCTTATGGTG
s mm4.chr6 53215344 38 + 151104725 -AATGGGAATGTTAAGCAAACGA---ATTGTCTCTCAGTGTG
s rn3.chr4 81344243 40 + 187371129 -AA-GGGGATGCTAAGCCAATGAGTTGTTGTCTCTCAATGTG

a score=5062.0
s hg18.chr7 27699739 6 + 158545518 TAAAGA
s panTro1.chr6 28862317 6 + 161576975 TAAAGA
s baboon 241163 6 + 4622798 TAAAGA
s mm4.chr6 53303881 6 + 151104725 TAAAGA
s rn3.chr4 81444246 6 + 187371129 taagga

a score=6636.0
s hg18.chr7 27707221 13 + 158545518 gcagctgaaaaca
s panTro1.chr6 28869787 13 + 161576975 gcagctgaaaaca
s baboon 249182 13 + 4622798 gcagctgaaaaca
s mm4.chr6 53310102 13 + 151104725 ACAGCTGAAAATA
10 changes: 10 additions & 0 deletions src/BioFSharp/BioArray.fs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,17 @@ module BioArray =
|> Seq.toArray


/// Create the reverse DNA or RNA strand. For example, the sequence "ATGC" is converted to "CGTA"
let reverse (nucs:BioArray<Nucleotides.Nucleotide>) =
nucs |> Array.rev

/// Create the complement DNA or cDNA (from RNA) strand. For example, the sequence "ATGC" is converted to "TACG"
let complement (nucs:BioArray<Nucleotides.Nucleotide>) =
nucs |> Array.map Nucleotides.complement

/// Create the reverse complement strand meaning antiparallel DNA strand or the cDNA (from RNA) respectivly. For example, the sequence "ATGC" is converted to "GCAT". "Antiparallel" combines the two functions "Complement" and "Inverse".
let reverseComplement (nucs:BioArray<Nucleotides.Nucleotide>) =
nucs |> Array.map Nucleotides.complement |> Array.rev


/// Builts a new collection whose elements are the result of applying
Expand Down
12 changes: 12 additions & 0 deletions src/BioFSharp/BioList.fs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ module BioList =
|> Seq.choose OptionConverter.charToOptionNucleotid
|> Seq.toList


/// Create the reverse DNA or RNA strand. For example, the sequence "ATGC" is converted to "CGTA"
let reverse (nucs:BioList<Nucleotides.Nucleotide>) =
nucs |> List.rev

/// Create the complement DNA or cDNA (from RNA) strand. For example, the sequence "ATGC" is converted to "TACG"
let complement (nucs:BioList<Nucleotides.Nucleotide>) =
nucs |> List.map Nucleotides.complement

/// Create the reverse complement strand meaning antiparallel DNA strand or the cDNA (from RNA) respectivly. For example, the sequence "ATGC" is converted to "GCAT". "Antiparallel" combines the two functions "Complement" and "Inverse".
let reverseComplement (nucs:BioList<Nucleotides.Nucleotide>) =
nucs |> List.map Nucleotides.complement |> List.rev

// /// Builts a new collection whose elements are the result of applying
// /// the given function to each triplet of the collection.
Expand Down
15 changes: 13 additions & 2 deletions src/BioFSharp/BioSeq.fs
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,24 @@ module BioSeq =
| _ -> sourceIsEmpty := true
}


/// Create the reverse DNA or RNA strand. For example, the sequence "ATGC" is converted to "CGTA"
let reverse (nucs:seq<Nucleotides.Nucleotide>) =
nucs |> Seq.rev

/// Create the complement DNA or cDNA (from RNA) strand. For example, the sequence "ATGC" is converted to "TACG"
let complement (nucs:seq<Nucleotides.Nucleotide>) =
nucs |> Seq.map Nucleotides.complement

/// Create the reverse complement strand meaning antiparallel DNA strand or the cDNA (from RNA) respectivly. For example, the sequence "ATGC" is converted to "GCAT". "Antiparallel" combines the two functions "Complement" and "Inverse".
let reverseComplement (nucs:seq<Nucleotides.Nucleotide>) =
nucs |> Seq.map Nucleotides.complement |> Seq.rev

// Replace T by U
/// Transcribe a given DNA coding strand (5'-----3')
let transcribeCodeingStrand (nucs:seq<Nucleotides.Nucleotide>) =
nucs |> Seq.map (fun nuc -> Nucleotides.replaceTbyU nuc)



//
/// Transcribe a given DNA template strand (3'-----5')
let transcribeTemplateStrand (nucs:seq<Nucleotides.Nucleotide>) =
Expand Down
30 changes: 1 addition & 29 deletions src/BioFSharp/Nucleotides.fs
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ module Nucleotides =



/// Create the complement DNA or RNA strand. For example, the sequence "ATGC" is converted to "TACG"
/// Returns the Nucleotide from the complementary strand
let complement (nuc:Nucleotide) =
match nuc with
| A -> T
Expand All @@ -289,34 +289,6 @@ module Nucleotides =
| _ -> nuc


/// Create the inverse DNA or RNA strand. For example, the sequence "ATGC" is converted to "CGTA"
let inverse (nuc:Nucleotide) =
match nuc with
| A -> C
| T -> G
| G -> T
| C -> A

| U -> A

// 'Ambiguous Nucleotide Codes: double base codes
| R -> W
| Y -> S
| K -> M
| M -> K
| S -> Y
| W -> R
// 'Ambiguous Nucleotide Codes: triple base codes
| B -> V
| D -> H
| H -> D
| V -> B

| _ -> nuc

/// Create the antiparallel DNA or RNA strand. For example, the sequence "ATGC" is converted to "GCAT". "Antiparallel" combines the two functions "Complement" and "Inverse".
let antiparallel (nuc:Nucleotide) =
inverse (complement nuc)


/// Replace thymidine (T) by uracil (U). For example, the sequence "ATUGC" is converted to "AUUGC".
Expand Down

0 comments on commit 27ab68f

Please sign in to comment.