diff --git a/.github/workflows/R-Git-Check.yml b/.github/workflows/R-Git-Check.yml index eee0733..1a3fd1f 100644 --- a/.github/workflows/R-Git-Check.yml +++ b/.github/workflows/R-Git-Check.yml @@ -26,7 +26,6 @@ jobs: fail-fast: false matrix: config: - - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} diff --git a/DESCRIPTION b/DESCRIPTION index b7065cc..6f7c2ca 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ridigbio Title: Interface to the iDigBio Data API Version: 0.3.9 -Date: 2024-7-02 +Date: 2024-8-19 Encoding: UTF-8 Authors@R: c(person("Francois", "Michonneau", comment="Original Author", role=c("aut", "cph"), email="francois.michonneau@gmail.com"), @@ -22,7 +22,7 @@ Description: An interface to iDigBio's search API that allows downloading such as the metadata end points return lists of information. iDigBio is a US project focused on digitizing and serving museum specimen collections on the web. See for information on iDigBio. -URL: https://github.com/iDigBio/ridigbio, http://idigbio.github.io/ridigbio/ +URL: https://github.com/iDigBio/ridigbio, https://idigbio.github.io/ridigbio/ BugReports: https://github.com/iDigBio/ridigbio/issues Depends: R (>= 3.0.1) @@ -33,7 +33,8 @@ Imports: jsonlite, leaflet, kableExtra, - tidyverse + tidyverse, + cowplot License: MIT + file LICENSE LazyData: true Suggests: diff --git a/R/base.R b/R/base.R index 64e8dd0..ea35a99 100644 --- a/R/base.R +++ b/R/base.R @@ -7,9 +7,9 @@ ##' @author Francois Michonneau idig_url <- function(dev = FALSE) { if (dev) { - "http://beta-search.idigbio.org" + "https://beta-search.idigbio.org" } else { - "http://search.idigbio.org" + "https://search.idigbio.org" } } @@ -134,4 +134,5 @@ ignore_unused_imports <- function() { leaflet::`%>%`() kableExtra::kable() tidyverse::tidyverse_logo() + cowplot::theme_minimal_grid() } diff --git a/R/idig_search_media.R b/R/idig_search_media.R index 49aa1ea..684d202 100644 --- a/R/idig_search_media.R +++ b/R/idig_search_media.R @@ -29,17 +29,17 @@ ##' \item{dqs: Data quality score assigned by iDigBio.} ##' \item{etag: Tag assigned by iDigBio.} ##' \item{flags: Data quality flag assigned by iDigBio.} -##' \item{[format](http://purl.org/dc/terms/format) } +##' \item{[format](https://purl.org/dc/terms/format) } ##' \item{hasSpecimen: TRUE or FALSE, indicates if there is an associated record for this media.} ##' \item{[licenselogourl](https://ac.tdwg.org/termlist/#ac_licenseLogoURL)} ##' \item{mediatype: Media object type.} -##' \item{[modified](http://purl.org/dc/terms/modified)} +##' \item{[modified](https://purl.org/dc/terms/modified)} ##' \item{recordids: List of UUID for associated records.} ##' \item{records: UUID for the associated record.} ##' \item{recordset: Record set ID assigned by iDigBio.} -##' \item{[rights](http://purl.org/dc/terms/rights)} -##' \item{[tag](http://rs.tdwg.org/ac/terms/tag)} -##' \item{[type](http://purl.org/dc/terms/type)} +##' \item{[rights](https://purl.org/dc/terms/rights)} +##' \item{[tag](https://rs.tdwg.org/ac/terms/tag)} +##' \item{[type](https://purl.org/dc/terms/type)} ##' \item{uuid: Unique identifier assigned by iDigBio.} ##' \item{version: Media record version assigned by iDigBio.} ##' \item{[webstatement](https://developer.adobe.com/xmp/docs/XMPNamespaces/xmpRights/)} diff --git a/R/idig_search_records.R b/R/idig_search_records.R index 997cec1..94a426d 100644 --- a/R/idig_search_records.R +++ b/R/idig_search_records.R @@ -92,12 +92,12 @@ ##' \itemize{ ##' \item{UUID: Unique identifier assigned by iDigBio.} ##' \item{[occurrenceID](https://dwc.tdwg.org/list/#dwc_occurrenceID)} -##' \item{[catalognumber](http://rs.tdwg.org/dwc/terms/catalogNumber)} -##' \item{[family](http://rs.tdwg.org/dwc/terms/family) - may be reassigned by iDigBio} +##' \item{[catalognumber](https://rs.tdwg.org/dwc/terms/catalogNumber)} +##' \item{[family](https://rs.tdwg.org/dwc/terms/family) - may be reassigned by iDigBio} ##' \item{[genus](https://dwc.tdwg.org/list/#dwc_genus) - may be reassigned by iDigBio} -##' \item{[scientificname](http://rs.tdwg.org/dwc/terms/scientificName) - may be reassigned by iDigBio} -##' \item{[country](http://rs.tdwg.org/dwc/terms/country) - may be modified by iDigBio} -##' \item{[stateprovince](http://rs.tdwg.org/dwc/terms/stateProvince) } +##' \item{[scientificname](https://rs.tdwg.org/dwc/terms/scientificName) - may be reassigned by iDigBio} +##' \item{[country](https://rs.tdwg.org/dwc/terms/country) - may be modified by iDigBio} +##' \item{[stateprovince](https://rs.tdwg.org/dwc/terms/stateProvince) } ##' \item{geopoint: Assigned by iDigBio.} ##' \item{[data.dwc:eventDate](https://dwc.tdwg.org/list/#dwc_eventDate)} ##' \item{[data.dwc:year](https://dwc.tdwg.org/list/#dwc_year)} diff --git a/README.md b/README.md index 90ce22f..b335e5e 100644 --- a/README.md +++ b/README.md @@ -40,14 +40,14 @@ sudo apt install libcurl4 ## Getting Started There are several articles that can help get you started: -* [Introduction to ridigbio](http://idigbio.github.io/ridigbio/articles/BasicUsage.html) -* [Record API Demo](http://idigbio.github.io/ridigbio/articles/RecordAPIDemo.html) -* [Media API Demo](http://idigbio.github.io/ridigbio/articles/MediaAPIDemo.html) -* [Fields in ridigibio](http://idigbio.github.io/ridigbio/articles/Fields.html) -* [Tissue Samples Locator Demo](http://idigbio.github.io/ridigbio/articles/FindTissue.html) -* [Identification of Modified Data](http://idigbio.github.io/ridigbio/articles/ModifiedDataID.html) -* [Identification of Suspicious Coordinates](http://idigbio.github.io/ridigbio/articles/BadCoordinateID.html) -* [Identification of Data Flags](http://idigbio.github.io/ridigbio/articles/IDDataFlags.html) +* [Introduction to ridigbio](https://idigbio.github.io/ridigbio/articles/BasicUsage.html) +* [Record API Demo](https://idigbio.github.io/ridigbio/articles/RecordAPIDemo.html) +* [Media API Demo](https://idigbio.github.io/ridigbio/articles/MediaAPIDemo.html) +* [Fields in ridigibio](https://idigbio.github.io/ridigbio/articles/Fields.html) +* [Tissue Samples Locator Demo](https://idigbio.github.io/ridigbio/articles/FindTissue.html) +* [Identification of Modified Data](https://idigbio.github.io/ridigbio/articles/ModifiedDataID.html) +* [Identification of Suspicious Coordinates](https://idigbio.github.io/ridigbio/articles/BadCoordinateID.html) +* [Identification of Data Flags](https://idigbio.github.io/ridigbio/articles/IDDataFlags.html) Most iDigBio users are interested in downloading occurrence records: diff --git a/_pkgdown.yml b/_pkgdown.yml index f39492e..768ce52 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,4 @@ -url: http://idigbio.github.io/ridigbio/ +url: https://idigbio.github.io/ridigbio/ template: bootstrap: 5 params: diff --git a/man/idig_search_media.Rd b/man/idig_search_media.Rd index 87bce35..fadff82 100755 --- a/man/idig_search_media.Rd +++ b/man/idig_search_media.Rd @@ -43,17 +43,17 @@ A data frame with fields requested or the following default fields: \item{dqs: Data quality score assigned by iDigBio.} \item{etag: Tag assigned by iDigBio.} \item{flags: Data quality flag assigned by iDigBio.} -\item{\href{http://purl.org/dc/terms/format}{format} } +\item{\href{https://purl.org/dc/terms/format}{format} } \item{hasSpecimen: TRUE or FALSE, indicates if there is an associated record for this media.} \item{\href{https://ac.tdwg.org/termlist/#ac_licenseLogoURL}{licenselogourl}} \item{mediatype: Media object type.} -\item{\href{http://purl.org/dc/terms/modified}{modified}} +\item{\href{https://purl.org/dc/terms/modified}{modified}} \item{recordids: List of UUID for associated records.} \item{records: UUID for the associated record.} \item{recordset: Record set ID assigned by iDigBio.} -\item{\href{http://purl.org/dc/terms/rights}{rights}} -\item{\href{http://rs.tdwg.org/ac/terms/tag}{tag}} -\item{\href{http://purl.org/dc/terms/type}{type}} +\item{\href{https://purl.org/dc/terms/rights}{rights}} +\item{\href{https://rs.tdwg.org/ac/terms/tag}{tag}} +\item{\href{https://purl.org/dc/terms/type}{type}} \item{uuid: Unique identifier assigned by iDigBio.} \item{version: Media record version assigned by iDigBio.} \item{\href{https://developer.adobe.com/xmp/docs/XMPNamespaces/xmpRights/}{webstatement}} diff --git a/man/idig_search_records.Rd b/man/idig_search_records.Rd index 617c535..343aec5 100755 --- a/man/idig_search_records.Rd +++ b/man/idig_search_records.Rd @@ -37,12 +37,12 @@ A data frame with fields requested or the following default fields: \itemize{ \item{UUID: Unique identifier assigned by iDigBio.} \item{\href{https://dwc.tdwg.org/list/#dwc_occurrenceID}{occurrenceID}} -\item{\href{http://rs.tdwg.org/dwc/terms/catalogNumber}{catalognumber}} -\item{\href{http://rs.tdwg.org/dwc/terms/family}{family} - may be reassigned by iDigBio} +\item{\href{https://rs.tdwg.org/dwc/terms/catalogNumber}{catalognumber}} +\item{\href{https://rs.tdwg.org/dwc/terms/family}{family} - may be reassigned by iDigBio} \item{\href{https://dwc.tdwg.org/list/#dwc_genus}{genus} - may be reassigned by iDigBio} -\item{\href{http://rs.tdwg.org/dwc/terms/scientificName}{scientificname} - may be reassigned by iDigBio} -\item{\href{http://rs.tdwg.org/dwc/terms/country}{country} - may be modified by iDigBio} -\item{\href{http://rs.tdwg.org/dwc/terms/stateProvince}{stateprovince} } +\item{\href{https://rs.tdwg.org/dwc/terms/scientificName}{scientificname} - may be reassigned by iDigBio} +\item{\href{https://rs.tdwg.org/dwc/terms/country}{country} - may be modified by iDigBio} +\item{\href{https://rs.tdwg.org/dwc/terms/stateProvince}{stateprovince} } \item{geopoint: Assigned by iDigBio.} \item{\href{https://dwc.tdwg.org/list/#dwc_eventDate}{data.dwc:eventDate}} \item{\href{https://dwc.tdwg.org/list/#dwc_year}{data.dwc:year}} diff --git a/man/ridigbio-package.Rd b/man/ridigbio-package.Rd index a0fdda2..2260fa7 100644 --- a/man/ridigbio-package.Rd +++ b/man/ridigbio-package.Rd @@ -11,7 +11,7 @@ An interface to iDigBio's search API that allows downloading specimen records. S Useful links: \itemize{ \item \url{https://github.com/iDigBio/ridigbio} - \item \url{http://idigbio.github.io/ridigbio/} + \item \url{https://idigbio.github.io/ridigbio/} \item Report bugs at \url{https://github.com/iDigBio/ridigbio/issues} } diff --git a/vignettes/BadCoordinateID.Rmd b/vignettes/BadCoordinateID.Rmd index 518bf7a..de58c71 100644 --- a/vignettes/BadCoordinateID.Rmd +++ b/vignettes/BadCoordinateID.Rmd @@ -30,11 +30,13 @@ library(kableExtra) # Load libraries for visualizing geographic data library(leaflet) + +library(cowplot) ``` ## Write a query to search for specimen records -First, let's find all the specimen records for the data quality flag we are interested in. Do this using the `idig_search_records` function from the `ridigbio` package. You can learn more about this function from the [iDigBio API documentation](https://github.com/iDigBio/idigbio-search-api/wiki) and [ridigbio documentation](https://cran.r-project.org/web/packages/ridigbio/ridigbio.pdf). In this example, we want to start by searching for specimens flagged with "rev_geocode_corrected." +First, let's find all the specimen records for the data quality flag we are interested in. Do this using the `idig_search_records` function from the `ridigbio` package. You can learn more about this function from the [iDigBio API documentation](https://github.com/iDigBio/idigbio-search-api/wiki) and [ridigbio documentation](https://cran.r-project.org/package=ridigbio/ridigbio.pdf). In this example, we want to start by searching for specimens flagged with "rev_geocode_corrected." ```{r} # Edit the fields (e.g. `flags`) and values (e.g. "rev_geocode_corrected") in @@ -134,7 +136,7 @@ map <- df_rev_geocode_lat_sign[1:10,] %>% We can visualize this data on a map to better understand what the data quality flag is telling us. For example, in the map below you can see the effect of accidentally reversing the latitude on three example georeferenced specimen records. -```{r echo = FALSE} +```{r echo = FALSE, out.width = '100%'} map ``` @@ -178,8 +180,16 @@ percentFlagged <- sum(spmByColl$n)/totalInstSpm*100 For example, we can ask how many specimen records from which collections at the Natural History Museum of Los Angeles (LACM) have been flagged as "rev_geocode_corrected" by iDigBio. *As an aside, although this graph highlights the number of specimen records with data quality issues, these represent only `r round(percentFlagged, 2)`% of the total specimen records published by LACM.* -```{r echo = FALSE} -graph_spmByColl +```{r out.width="700px", echo = FALSE} +graph_spmByColl <- graph_spmByColl + + theme_minimal_grid() + + theme( + text = element_text(size = 22), + axis.text = element_text(size = 22), + plot.title = element_text(size = 22, face = "bold") + ) + +knitr::include_graphics(save_plot("plot.png", graph_spmByColl, base_height = 10, base_width = 24)) ``` We can also explore what *other* data quality flags these specimen records have been flagged with. @@ -202,7 +212,7 @@ df_flagAssoc <- df_flagCoord %>% arrange(category, desc(n)) # Visualize associated data quality flags -ggplot(df_flagAssoc, aes(x = reorder(flags, -percent), y = percent, fill = category)) + +graph_spmByColl <- ggplot(df_flagAssoc, aes(x = reorder(flags, -percent), y = percent, fill = category)) + geom_col() + theme(axis.title.x = element_text(face = "bold"), axis.text.x = element_text(angle = 75, hjust = 1), @@ -215,3 +225,15 @@ ggplot(df_flagAssoc, aes(x = reorder(flags, -percent), y = percent, fill = categ title = "LACM records flagged for geo-coordinate issues are also flagged for...", fill = "flag category") ``` +```{r out.width="700px", echo = FALSE} +graph_spmByColl <- graph_spmByColl + + theme_minimal_grid() + + theme( + text = element_text(size = 22), + axis.text = element_text(size = 22), + axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), + plot.title = element_text(size = 22, face = "bold") + ) + +knitr::include_graphics(save_plot("plot2.png", graph_spmByColl, base_height = 10, base_width = 24)) +``` \ No newline at end of file diff --git a/vignettes/BasicUsage.Rmd b/vignettes/BasicUsage.Rmd index d1bed8f..8a41527 100644 --- a/vignettes/BasicUsage.Rmd +++ b/vignettes/BasicUsage.Rmd @@ -19,7 +19,7 @@ In this demo we will cover how to: ## Getting Started -First, you must install the ridigbio package. If you are new to R and R studio, please refer to our QUBES module to get started: Introduction to R with Biodiversity Data, [doi:10.25334/84FC-TE88](htpps://www.doi.org/10.25334/84FC-TE88) . +First, you must install the ridigbio package. If you are new to R and R studio, please refer to our QUBES module to get started: Introduction to R with Biodiversity Data, [doi:10.25334/84FC-TE88](https://www.doi.org/10.25334/84FC-TE88) . The lastest version of our R package can be installed via CRAN. ```{r eval=FALSE, include=TRUE} @@ -49,21 +49,21 @@ When fields are not specified, default columns include the following: | Column | Description | | ----------- | ----------- | | uuid | Universally Unique IDentifier assigned by iDigBio | -| occurrenceid |identifier for the occurrence, http://rs.tdwg.org/dwc/terms/occurrenceID| -| catalognumber |identifier for the record within the collection, http://rs.tdwg.org/dwc/terms/catalogNumber| -| family | scientific name of the family, http://rs.tdwg.org/dwc/terms/family| -| genus | scientific name of the genus, http://rs.tdwg.org/dwc/terms/genus | -| scientificname | scientific name, http://rs.tdwg.org/dwc/terms/scientificName | -| country | country, http://rs.tdwg.org/dwc/terms/country | -| stateprovince |name of the next smaller administrative region than country, http://rs.tdwg.org/dwc/terms/stateProvince| -| geopoint.lon | equivalent to decimalLongitude, http://rs.tdwg.org/dwc/terms/decimalLongitude| -| geopoint.lat | equivalent to decimalLatitude,http://rs.tdwg.org/dwc/terms/decimalLatitude | +| occurrenceid |identifier for the occurrence, https://rs.tdwg.org/dwc/terms/occurrenceID| +| catalognumber |identifier for the record within the collection, https://rs.tdwg.org/dwc/terms/catalogNumber| +| family | scientific name of the family, https://rs.tdwg.org/dwc/terms/family| +| genus | scientific name of the genus, https://rs.tdwg.org/dwc/terms/genus | +| scientificname | scientific name, https://rs.tdwg.org/dwc/terms/scientificName | +| country | country, https://rs.tdwg.org/dwc/terms/country | +| stateprovince |name of the next smaller administrative region than country, https://rs.tdwg.org/dwc/terms/stateProvince| +| geopoint.lon | equivalent to decimalLongitude, https://rs.tdwg.org/dwc/terms/decimalLongitude| +| geopoint.lat | equivalent to decimalLatitude,https://rs.tdwg.org/dwc/terms/decimalLatitude | | datecollected | [Modified field and could lack biological meaning](https://github.com/iDigBio/idb-backend/issues/229) | | data.dwc:eventDate | equivalent to eventDate, https://dwc.tdwg.org/list/#dwc_eventDate | | data.dwc:year | year of collection event, https://dwc.tdwg.org/list/#dwc_year | | data.dwc:month | month of collection event, https://dwc.tdwg.org/list/#dwc_month | | data.dwc:day | day of collection event | (https://dwc.tdwg.org/list/#dwc_day | -| collector | equivalent to recordedBy, http://rs.tdwg.org/dwc/terms/recordedBy | +| collector | equivalent to recordedBy, https://rs.tdwg.org/dwc/terms/recordedBy | | recordset | indicates the iDigBio recordset the observation belongs too! | @@ -121,17 +121,17 @@ When fields are not specified, default columns include the following: | dqs | data quality score assigned by iDigBio | | etag | tag assigned by iDigBio | | flags | data quality flag assigned by iDigBio | -| format | media format, http://purl.org/dc/terms/format | +| format | media format, https://purl.org/dc/terms/format | | hasSpecimen | TRUE or FALSE, indicates if there is an associated record for this media | | licenselogourl | media license, https://ac.tdwg.org/termlist/#ac_licenseLogoURL) | | mediatype | media object type | -| modified | date modified, http://purl.org/dc/terms/modified | +| modified | date modified, https://purl.org/dc/terms/modified | | recordids | list of UUID for associated records | | records | UUID for the associated record. Use this field to connect Record downloads with Media downloads | | recordset | indicates the iDigBio recordset the observation belongs too! | -| rights | media rights, http://purl.org/dc/terms/rights | -| tag | general keywords or tags, http://rs.tdwg.org/ac/terms/tag | -| type | media type, http://purl.org/dc/terms/type | +| rights | media rights, https://purl.org/dc/terms/rights | +| tag | general keywords or tags, https://rs.tdwg.org/ac/terms/tag | +| type | media type, https://purl.org/dc/terms/type | | uuid | Universally Unique IDentifier assigned by iDigBio | | version | media record version assigned by iDigBio | | webstatement | media rights, https://developer.adobe.com/xmp/docs/XMPNamespaces/xmpRights/ | diff --git a/vignettes/FindTissue.Rmd b/vignettes/FindTissue.Rmd index d6f37ed..147de87 100644 --- a/vignettes/FindTissue.Rmd +++ b/vignettes/FindTissue.Rmd @@ -1,6 +1,9 @@ --- -title: Tissue Samples Locator Demo -output: rmarkdown::html_vignette +title: "Tissue Samples Locator Demo" +output: + rmarkdown::html_vignette: + md_extensions: + - "-autolink_bare_uris" vignette: > %\VignetteIndexEntry{Tissue Samples Locator Demo} %\VignetteEngine{knitr::rmarkdown} @@ -31,7 +34,7 @@ library(kableExtra) ## Write a query to search for specimen records -First, let's find all the specimen records for the species you are interested in. Do this using the `idig_search_records` function from the `ridigbio` package. You can learn more about this function from the [iDigBio API documentation](https://github.com/iDigBio/idigbio-search-api/wiki) and [ridigbio documentation](https://cran.r-project.org/web/packages/ridigbio/ridigbio.pdf). In this example, we want to search for specimens identified as being in one of the following genera: _Manis_, _Rhinolophus_, or _Paguma_. +First, let's find all the specimen records for the species you are interested in. Do this using the `idig_search_records` function from the `ridigbio` package. You can learn more about this function from the [iDigBio API documentation](https://github.com/iDigBio/idigbio-search-api/wiki) and [ridigbio documentation](https://cran.r-project.org/package=ridigbio/ridigbio.pdf). In this example, we want to search for specimens identified as being in one of the following genera: _Manis_, _Rhinolophus_, or _Paguma_. ```{r} # Edit the fields (e.g. `genus`) and values (e.g. "manis") in `list()` @@ -155,7 +158,40 @@ collections <- tibble(collection = attr(recordsfiltered, "attribution")) %>% na.rm = TRUE) %>% # Restructure data frame so that there is one row per recordset group_by(recordset_uuid) %>% - mutate(contact_index = row_number()) %>% + mutate(contact_index = row_number()) %>% + mutate(recordset_url = if_else(grepl("^http://", recordset_url), + gsub("^http://", "https://", recordset_url), + recordset_url + )) %>% + mutate(recordset_url = if_else(grepl("www.amnh.org/our-research/vertebrate-zoology/mammalogy", recordset_url), + gsub("www.amnh.org/our-research/vertebrate-zoology/mammalogy", "www.amnh.org/research/vertebrate-zoology/mammalogy", recordset_url), + recordset_url + )) %>% + mutate(recordset_url = if_else(grepl("www.burkemuseum.org/mammalogy", recordset_url), + gsub("www.burkemuseum.org/mammalogy", "www.burkemuseum.org/collections-and-research/biology/mammalogy", recordset_url), + recordset_url + )) %>% + mutate(recordset_url = if_else(grepl("www.nhm.org", recordset_url), + gsub("www.nhm.org", "nhm.org", recordset_url), + recordset_url + )) %>% + mutate(recordset_url = if_else( + grepl("appl003.lsu.edu/natsci/lmns.nsf/\\$Content/Mammals\\?OpenDocument", recordset_url), + gsub("appl003.lsu.edu/natsci/lmns.nsf/\\$Content/Mammals\\?OpenDocument", "appl103.lsu.edu/natsci/Collections/natscicolsearch.nsf/OpenMainPage?OpenAgent&ID=1042", recordset_url), + recordset_url + )) %>% + mutate(recordset_url = if_else(grepl("www.nsrl.ttu.edu/collections/Mammals/index.htm", recordset_url), + gsub("www.nsrl.ttu.edu/collections/Mammals/index.htm", "www.depts.ttu.edu/nsrl/collections/mammal.php", recordset_url), + recordset_url + )) %>% + mutate(recordset_url = if_else(grepl("sites01.lsu.edu/wp/mns/research-collections/genetic-resources/", recordset_url), + gsub("sites01.lsu.edu/wp/mns/research-collections/genetic-resources/", "appl103.lsu.edu/natsci/Collections/natscicolsearch.nsf/OpenMainPage?OpenAgent&ID=1050", recordset_url), + recordset_url + )) %>% + mutate(recordset_url = if_else(grepl("https://www.msb.unm.edu", recordset_url), + gsub("www.msb.unm.edu", "www.msb.unm.edu", recordset_url), + recordset_url + )) %>% pivot_wider(names_from = contact_index, values_from = c(contact_name, contact_role, contact_email)) %>% # Include how many records in the data were contributed by each recordset diff --git a/vignettes/IDDataFlags.Rmd b/vignettes/IDDataFlags.Rmd index 5501678..c3c576c 100644 --- a/vignettes/IDDataFlags.Rmd +++ b/vignettes/IDDataFlags.Rmd @@ -31,7 +31,7 @@ library(kableExtra) ## Write a query to search for specimen records -First, let's find all the specimen records for the data quality flag we are interested in. Do this using the `idig_search_records` function from the `ridigbio` package. You can learn more about this function from the [iDigBio API documentation](https://github.com/iDigBio/idigbio-search-api/wiki) and [ridigbio documentation](https://cran.r-project.org/web/packages/ridigbio/ridigbio.pdf). +First, let's find all the specimen records for the data quality flag we are interested in. Do this using the `idig_search_records` function from the `ridigbio` package. You can learn more about this function from the [iDigBio API documentation](https://github.com/iDigBio/idigbio-search-api/wiki) and [ridigbio documentation](https://cran.r-project.org/package=ridigbio/ridigbio.pdf). In this example, we want to start by searching for specimens flagged with "rev_geocode_flip" which means that iDigBio has swapped the values of the latitude and longitude fields in order to place the coordinate point in the country stated by the record. For example, iDigBio ingests a record with the coordinates "-87.646166, 41.89542" that says it was collected in the United States, but the verbatim coordinates actually plot to Antarctica. If the latitude and longitude are flipped, then the coordinates plot to the United States, so iDigBio assumes that this is what the data provider meant. diff --git a/vignettes/MediaAPIDemo.Rmd b/vignettes/MediaAPIDemo.Rmd index 55e630f..527df86 100644 --- a/vignettes/MediaAPIDemo.Rmd +++ b/vignettes/MediaAPIDemo.Rmd @@ -33,7 +33,7 @@ library(kableExtra) ## Write a query to search for specimen records -First, you need to find all the media records for which you are interested in downloading media files. Do this using the `idig_search_media` function from the ridigbio package, which allows you to search for media records based on data contained in linked specimen records, like species or collecting locality. You can learn more about this function from the [iDigBio API documentation](https://github.com/iDigBio/idigbio-search-api/wiki) and [ridigbio documentation](https://cran.r-project.org/web/packages/ridigbio/ridigbio.pdf). In this example, we want to search for images of herbarium specimens of species in the genus _Acer_ that were collected in the United States. +First, you need to find all the media records for which you are interested in downloading media files. Do this using the `idig_search_media` function from the ridigbio package, which allows you to search for media records based on data contained in linked specimen records, like species or collecting locality. You can learn more about this function from the [iDigBio API documentation](https://github.com/iDigBio/idigbio-search-api/wiki) and [ridigbio documentation](https://cran.r-project.org/package=ridigbio/ridigbio.pdf). In this example, we want to search for images of herbarium specimens of species in the genus _Acer_ that were collected in the United States. ```{r} # Edit the fields (e.g. `genus`) and values (e.g. "manis") in `list()` @@ -48,6 +48,19 @@ records <- idig_search_media(rq = list(genus = "acer", "format", "records"), limit = 10) + +records$accessuri <- if_else(grepl("^http://", records$accessuri), + gsub("^http://", "", records$accessuri), + records$accessuri +) +records$accessuri <- if_else(grepl("https://mam.ansp.org", records$accessuri), + gsub("https://mam.ansp.org", "mam.ansp.org", records$accessuri), + records$accessuri +) +records$accessuri <- if_else(grepl("https://ibss-images.calacademy.org", records$accessuri), + gsub("https://ibss-images.calacademy.org", "ibss-images.calacademy.org", records$accessuri), + records$accessuri +) ``` The result of the code above is a data frame called `records`: diff --git a/vignettes/ModifiedDataID.Rmd b/vignettes/ModifiedDataID.Rmd index 1e990f7..3624aae 100644 --- a/vignettes/ModifiedDataID.Rmd +++ b/vignettes/ModifiedDataID.Rmd @@ -34,7 +34,7 @@ library(kableExtra) ## Write a query to search for specimen records -First, let's find all the specimen records from a given recordset, e.g. all of the records published by a single collection. Do this using the `idig_search_records` function from the `ridigbio` package. You can learn more about this function from the [iDigBio API documentation](https://github.com/iDigBio/idigbio-search-api/wiki) and [ridigbio documentation](https://cran.r-project.org/web/packages/ridigbio/ridigbio.pdf). In this example, we want to start by searching for specimens from the [Invertebrate Paleontology collection](https://www.idigbio.org/portal/recordsets/5082e6c8-8f5b-4bf6-a930-e3e6de7bf6fb) at the Natural History Museum of Los Angeles. +First, let's find all the specimen records from a given recordset, e.g. all of the records published by a single collection. Do this using the `idig_search_records` function from the `ridigbio` package. You can learn more about this function from the [iDigBio API documentation](https://github.com/iDigBio/idigbio-search-api/wiki) and [ridigbio documentation](https://cran.r-project.org/package=ridigbio/ridigbio.pdf). In this example, we want to start by searching for specimens from the [Invertebrate Paleontology collection](https://www.idigbio.org/portal/recordsets/5082e6c8-8f5b-4bf6-a930-e3e6de7bf6fb) at the Natural History Museum of Los Angeles. ```{r} # Edit the value after `recordset` to search for data from a different collection diff --git a/vignettes/RecordAPIDemo.Rmd b/vignettes/RecordAPIDemo.Rmd index 815fb02..48c3c16 100644 --- a/vignettes/RecordAPIDemo.Rmd +++ b/vignettes/RecordAPIDemo.Rmd @@ -1,18 +1,21 @@ --- title: Record API Demo -output: rmarkdown::html_vignette +output: + rmarkdown::html_vignette: + md_extensions: + - "-autolink_bare_uris" vignette: > %\VignetteIndexEntry{Record API Demo} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- -Code here written by [Erica Krimmel](https://orcid.org/0000-0003-3192-0080) for a workshop at the [2020 Digital Data conference](https://bit.ly/DigiData4). +Code here written by [Erica Krimmel](https://orcid.org/0000-0003-3192-0080) for a workshop at the [2020 Digital Data conference](https://www.idigbio.org/wiki/index.php/4th_Annual_Digital_Data_Conference,_Indiana_University). ## General Overview -You can use the [iDigBio API](https://github.com/idigbio/idigbio-search-api/wiki) to find specimen records using the same search parameters available in the [iDigBio Portal](https://www.idigbio.org/portal/search). Wrappers like [ridigbio](https://cran.r-project.org/web/packages/ridigbio/index.html), which we are covering in this demo, provide a simple way to use the iDigBio API in the context of your research pipeline. If you already use, or are considering using, R for data exploration or analysis, it makes sense to bring data into R directly from iDigBio via the API. In this demo we will cover a brief overview of fundamental functions in the ridigbio package that you can use to make your research pipeline more reproducible. +You can use the [iDigBio API](https://github.com/idigbio/idigbio-search-api/wiki) to find specimen records using the same search parameters available in the [iDigBio Portal](https://www.idigbio.org/portal/search). Wrappers like [ridigbio](https://cran.r-project.org/package=ridigbio), which we are covering in this demo, provide a simple way to use the iDigBio API in the context of your research pipeline. If you already use, or are considering using, R for data exploration or analysis, it makes sense to bring data into R directly from iDigBio via the API. In this demo we will cover a brief overview of fundamental functions in the ridigbio package that you can use to make your research pipeline more reproducible. In this demo we will cover how to: @@ -33,7 +36,7 @@ library(kableExtra) ## Write a query to search for specimens using `idig_search_records` -When you use an interface like the iDigBio Portal, you are already writing a query to search for specimens. If you are new to coding, it can be helpful to begin by figuring out your query in a user-friendly interface such as the Portal, then translating it to code in R once you understand what you want to search for. One of the hardest parts of using ridigbio to search for specimen records is know what the field you want to search is named. The iDigBio API provides a list of field names [here](https://search.idigbio.org/v2/meta/fields/records), but you will need to reference other sources, like [documentation for the Darwin Core standard](https://dwc.tdwg.org/terms/), to understand what kind of information these fields typically contain. +When you use an interface like the iDigBio Portal, you are already writing a query to search for specimens. If you are new to coding, it can be helpful to begin by figuring out your query in a user-friendly interface such as the Portal, then translating it to code in R once you understand what you want to search for. One of the hardest parts of using ridigbio to search for specimen records is know what the field you want to search is named. The iDigBio API provides a list of field names [here](https://www.idigbio.org/content/idigbio-search-api-fields), but you will need to reference other sources, like [documentation for the Darwin Core standard](https://dwc.tdwg.org/terms/), to understand what kind of information these fields typically contain. ```{r} # Let's start with a simple search introducing the primary arguments for the @@ -69,6 +72,26 @@ knitr::kable(records_1A) %>% records_1B <- idig_search_records( rq = list(genus = "shortia")) +records_1B$occurrenceid <- if_else(grepl("^http://", records_1B$occurrenceid), + gsub("^http://", "", records_1B$occurrenceid), + records_1B$occurrenceid +) + +records_1B$occurrenceid <- if_else(grepl("data.biodiversitydata.nl/naturalis", records_1B$occurrenceid), + gsub("data.biodiversitydata.nl/naturalis", "bioportal.naturalis.nl/nl", records_1B$occurrenceid), + records_1B$occurrenceid +) + +records_1B$occurrenceid <- if_else(grepl("https://grbio.org/cool", records_1B$occurrenceid), + gsub("https://grbio.org/cool", "grbio.org/cool", records_1B$occurrenceid), + records_1B$occurrenceid +) + +records_1B$occurrenceid <- if_else(grepl("https://biocol.org", records_1B$occurrenceid), + gsub("https://biocol.org", "biocol.org", records_1B$occurrenceid), + records_1B$occurrenceid +) + # Display the data frame we just created above in a nice pretty table for HTML knitr::kable(records_1B) %>% kable_styling(bootstrap_options = @@ -104,6 +127,11 @@ records_2B <- idig_search_records( fields = "all", limit = 10) +records_2B$institutionid <- if_else(grepl("^http://", records_2B$institutionid), + gsub("^http://", "https://", records_2B$institutionid), + records_2B$institutionid +) + # Display the data frame we just created above in a nice pretty table for HTML knitr::kable(records_2B) %>% kable_styling(bootstrap_options = diff --git a/vignettes/plot.png b/vignettes/plot.png new file mode 100644 index 0000000..97ba28c Binary files /dev/null and b/vignettes/plot.png differ diff --git a/vignettes/plot2.png b/vignettes/plot2.png new file mode 100644 index 0000000..058a948 Binary files /dev/null and b/vignettes/plot2.png differ