Skip to content

Commit

Permalink
#84: Add ePost and eSummary Entrez query DSL
Browse files Browse the repository at this point in the history
  • Loading branch information
kMutagene committed Mar 27, 2020
1 parent 07ab9b9 commit d8e0daf
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 23 deletions.
81 changes: 61 additions & 20 deletions src/BioFSharp.BioDB/BioFSharp.BioDB.fsx
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,8 @@ let eSearchQuery =
let eSearchRequest =

job {
use! response = getResponse eSearchQuery // disposed at the end of async, don't
// fetch outside async body
// the above doesn't download the response, so you'll have to do that:
use! response = getResponse eSearchQuery
let! bodyStr = Response.readBodyAsString response
// OR:
//let! bodyBs = Response.readBodyAsBytes

// remember HttpFs doesn't buffer the stream (how would we know if we're
// downloading 3GiB?), so once you use one of the above methods, you can't do it
// again, but have to buffer/stash it yourself somewhere.
return bodyStr
}

Expand Down Expand Up @@ -105,8 +97,8 @@ let eFetchQuery =
OptionalParameters =
[
HistoryServerParameters [
EntrezFetchHistoryServerParams.QueryKey 1
EntrezFetchHistoryServerParams.WebEnvironment "NCID_1_60315023_130.14.22.76_9001_1585298131_991043432_0MetA0_S_MegaStore"
EntrezFetchHistoryServerParams.QueryKey (queryKey |> int )
EntrezFetchHistoryServerParams.WebEnvironment webenv
]
RetrievalParameters [
EntrezFetchRetrievalParams.RetrievalType "RunInfo"
Expand All @@ -118,16 +110,8 @@ let eFetchQuery =
let eFetchRequest =

job {
use! response = getResponse eFetchQuery // disposed at the end of async, don't
// fetch outside async body
// the above doesn't download the response, so you'll have to do that:
use! response = getResponse eFetchQuery
let! bodyStr = Response.readBodyAsString response
// OR:
//let! bodyBs = Response.readBodyAsBytes

// remember HttpFs doesn't buffer the stream (how would we know if we're
// downloading 3GiB?), so once you use one of the above methods, you can't do it
// again, but have to buffer/stash it yourself somewhere.
return bodyStr
}

Expand All @@ -149,3 +133,60 @@ xmlResponse.SelectNodes ("EXPERIMENT_PACKAGE_SET/EXPERIMENT_PACKAGE/RUN_SET/RUN"
fun node ->
node.Attributes.["accession"].Value
)


//=============================== ePost Tests ======================================
open EntrezPost

let ePostQuery =
let r =
{
Db = "sra"
UIDs = ["336327"]
WebEnvironment = Some webenv
}
r |> EntrezPostQuery.makeRequest

let ePostRequest =

job {
use! response = getResponse ePostQuery
let! bodyStr = Response.readBodyAsString response
return bodyStr
}

let eIPostResponse = ePostRequest |> run

//=============================== eSummary Tests ======================================
open EntrezSummary

let eSummaryQuery =
let r =
{
Db = "sra"
UIDs = ["336327";"336326"]
OptionalParameters = [
HistoryServerParameters [
EntrezSummaryHistoryServerParams.WebEnvironment webenv
]
]
}
r |> EntrezSummaryQuery.makeRequest

let eSummaryRequest =

job {
use! response = getResponse eSummaryQuery // disposed at the end of async, don't
// fetch outside async body
// the above doesn't download the response, so you'll have to do that:
let! bodyStr = Response.readBodyAsString response
// OR:
//let! bodyBs = Response.readBodyAsBytes

// remember HttpFs doesn't buffer the stream (how would we know if we're
// downloading 3GiB?), so once you use one of the above methods, you can't do it
// again, but have to buffer/stash it yourself somewhere.
return bodyStr
}

let eSummaryResponse = eSummaryRequest |> run
120 changes: 117 additions & 3 deletions src/BioFSharp.BioDB/Entrez.fs
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,6 @@ module Entrez =

Request.createUrl Get BaseUrls.eInfo
|> Request.queryStringItems optParams



///DSL for constructing and executing eSearch queries
///
Expand All @@ -99,7 +97,6 @@ module Entrez =
/// - Combines or limits UID datasets stored on the History server
///
/// - Sorts sets of UIDs
module EntrezSearch =

type EntrezSearchRetrievalTypeOptions =
Expand Down Expand Up @@ -328,3 +325,120 @@ module Entrez =
|"" -> r
|_ -> r |> Request.queryStringItem "id" uIDs
|> Request.queryStringItems optParams

///DSL for constructing and executing ePost queries
///
///Endpoint Functions:
///
/// - Uploads a list of UIDs to the Entrez History server
///
/// - Appends a list of UIDs to an existing set of UID lists attached to a Web Environment
module EntrezPost =

type EntrezPostQuery =
{
///Database containing the UIDs in the input list. The value must be a valid Entrez database name (default = pubmed).
Db : string
///UID list. Either a single UID or a comma-delimited list of UIDs may be provided. All of the UIDs must be from the database specified by db. There is no set maximum for the number of UIDs that can be passed to epost, but if more than about 200 UIDs are to be posted, the request should be made using the HTTP POST method.
///
///For sequence databases (nuccore, nucest, nucgss, popset, protein), the UID list may be a mixed list of GI numbers and accession.version identifiers.
UIDs : string list
///Web Environment. If provided, this parameter specifies the Web Environment that will receive the UID list sent by post. EPost will create a new query key associated with that Web Environment. Usually this WebEnv value is obtained from the output of a previous ESearch, EPost or ELink call. If no WebEnv parameter is provided, EPost will create a new Web Environment and post the UID list to query_key 1.
WebEnvironment : string option
}

static member makeRequest (q : EntrezPostQuery) =

let db = q.Db

let uIDs =
match q.UIDs with
| [] -> ""
| _ -> q.UIDs |> String.concat ","

Request.createUrl Get BaseUrls.ePost
|> Request.queryStringItem "db" db
|> fun r ->
match uIDs with
|"" -> r
|_ -> r |> Request.queryStringItem "id" uIDs
|> fun r ->
match q.WebEnvironment with
|None -> r
|Some w -> r |> Request.queryStringItem "WebEnv" w

///DSL for constructing and executing eSummary queries
///
///Functions
///
/// - Returns document summaries (DocSums) for a list of input UIDs
///
/// - Returns DocSums for a set of UIDs stored on the Entrez History server
module EntrezSummary =

type EntrezSummaryHistoryServerParams =
///Query key. This integer specifies which of the UID lists attached to the given Web Environment will be used as input to EFetch. Query keys are obtained from the output of previous ESearch, EPost or ELInk calls. The query_key parameter must be used in conjunction with WebEnv.
|WebEnvironment of string
///Web Environment. This parameter specifies the Web Environment that contains the UID list to be provided as input to EFetch. Usually this WebEnv value is obtained from the output of a previous ESearch, EPost or ELink call. The WebEnv parameter must be used in conjunction with query_key.
|QueryKey of int

static member makeQuery = function
|WebEnvironment q -> ("WebEnv" , q )
|QueryKey q -> ("query_key" , string q )

type EntrezSummaryRetrievalParams =
///Sequential index of the first record to be retrieved (default=0, corresponding to the first record of the entire set). This parameter can be used in conjunction with retmax to download an arbitrary subset of records from the input set.
|RetrievalStart of int
///Total number of records from the input set to be retrieved, up to a maximum of 10,000. Optionally, for a large set the value of retstart can be iterated while holding retmax constant, thereby downloading the entire set in batches of size retmax.
|RetrievalMax of int
///Retrieval mode. Determines the format of the returned output. The default value is ‘xml’ for ESummary XML, but ‘json’ is also supported to return output in JSON format.
|RetrievalMode of RetrievalModeOptions
///Used to specify version 2.0 ESummary XML. The only supported value is ‘2.0’. When present, ESummary will return version 2.0 DocSum XML that is unique to each Entrez database and that often contains more data than the default DocSum XML.
|Version of string

static member makeQuery = function
|RetrievalStart q -> ("retstart" ,q |> string)
|RetrievalMax q -> ("retmax" ,q |> string)
|RetrievalMode q -> ("retmode" ,q |> RetrievalModeOptions.make)
|Version q -> ("version" ,q )

type EntrezSummaryParameters =
|HistoryServerParameters of EntrezSummaryHistoryServerParams list
|RetrievalParameters of EntrezSummaryRetrievalParams list

static member makeQuery = function
|HistoryServerParameters ql -> ql |> List.map EntrezSummaryHistoryServerParams .makeQuery
|RetrievalParameters ql -> ql |> List.map EntrezSummaryRetrievalParams .makeQuery

type EntrezSummaryQuery =
{
///Database containing the UIDs in the input list. The value must be a valid Entrez database name (default = pubmed).
Db : string
///UID list. Either a single UID or a comma-delimited list of UIDs may be provided. All of the UIDs must be from the database specified by db. There is no set maximum for the number of UIDs that can be passed to epost, but if more than about 200 UIDs are to be posted, the request should be made using the HTTP POST method.
///
///For sequence databases (nuccore, nucest, nucgss, popset, protein), the UID list may be a mixed list of GI numbers and accession.version identifiers.
UIDs : string list
///Web Environment. If provided, this parameter specifies the Web Environment that will receive the UID list sent by post. EPost will create a new query key associated with that Web Environment. Usually this WebEnv value is obtained from the output of a previous ESearch, EPost or ELink call. If no WebEnv parameter is provided, EPost will create a new Web Environment and post the UID list to query_key 1.
OptionalParameters : EntrezSummaryParameters list
}

static member makeRequest (q : EntrezSummaryQuery) =

let db = q.Db

let uIDs =
match q.UIDs with
| [] -> ""
| _ -> q.UIDs |> String.concat ","

let optParams =
q.OptionalParameters
|> List.map EntrezSummaryParameters.makeQuery
|> List.concat

Request.createUrl Get BaseUrls.eSummary
|> Request.queryStringItem "db" db
|> fun r ->
match uIDs with
|"" -> r
|_ -> r |> Request.queryStringItem "id" uIDs

0 comments on commit d8e0daf

Please sign in to comment.