Merge pull request #244 from ropenscilabs/develop

Develop to Master
ropensci · Jan 10, 2018 · ee10b78 · ee10b78
2 parents 55f67c8 + 5b0d9dd
commit ee10b78
Show file tree

Hide file tree

Showing 10 changed files with 69 additions and 52 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,7 +16,6 @@ Authors@R: c(
   person("Connor", "Kirkpatrick", email="[email protected]", role = 'ctb'),
   person("Scott","Brenstuhl", email="[email protected]", role = 'ctb')
   )
-Maintainer: Elin Waring <[email protected]>
 Description: A simple to use summary function that can be used with pipes
     and displays nicely in the console. The default summary statistics may be 
     modified by the user as can the default formatting. Support for data frames 
@@ -43,7 +42,8 @@ Suggests:
     extrafont,
     rmarkdown,
     testthat (>= 2.0.0),
-    withr
+    withr,
+    covr
 License: GPL-3
 Encoding: UTF-8
 LazyData: true

diff --git a/NEWS.md b/NEWS.md
@@ -1,14 +1,17 @@
 skimr 1.0.1 (2018-01-xx)
 ========================
 ### NEW FEATURES
-   * Add support for spark plots on Windows
+  * Add support for spark plots on Windows
 
 ### MAJOR CHANGES
-   * spark_line() and spark_bar() are no longer exported
+  * spark_line() and spark_bar() are no longer exported
+  * Default statistics for numeric changed from min(x) and max(x) to 
+    quantile(x, probs = 0) and quantile(x, probs = 1). These changes
+    lead to more predictable behaviors when a column is all NA values.
 
 #### MINOR IMPROVEMENTS
-   * Add minimimum required version for stringr
-   * Improve documentation related to fonts
+  * Add minimimum required version for stringr
+  * Improve documentation related to fonts
 
 ### BUG FIXES
   * Fix issue where a histogram for data with all NAs threw an error

diff --git a/R/functions.R b/R/functions.R
@@ -136,11 +136,11 @@ numeric_funs <- list(
   n = length,
   mean = purrr::partial(mean, na.rm = TRUE),
   sd = purrr::partial(sd, na.rm = TRUE),
-  min = purrr::partial(min, na.rm = TRUE),
+  p0 = purrr::partial(quantile, probs = 0, na.rm = TRUE, names = FALSE),
   p25 = purrr::partial(quantile, probs = .25, na.rm = TRUE, names = FALSE),
   median = purrr::partial(median, na.rm = TRUE),
   p75 = purrr::partial(quantile, probs = .75, na.rm = TRUE, names = FALSE),
-  max = purrr::partial(max, na.rm = TRUE),
+  p100 = purrr::partial(quantile, probs = 1, na.rm = TRUE, names = FALSE),
   hist = inline_hist
 )
 

diff --git a/appveyor.yml b/appveyor.yml
@@ -43,3 +43,6 @@ artifacts:
 
   - path: '\*_*.zip'
     name: Bits
+
+on_success:
+  - Rscript -e "covr::codecov()"
diff --git a/tests/testthat/test-functions.R b/tests/testthat/test-functions.R
@@ -166,8 +166,8 @@ test_that("show_skimmers() lets you pick which type you want returned", {
 })
 
 test_that("show_skimmers() lets you pick which many types you want returned", {
-  correct <- list(numeric = c("missing", "complete", "n", "mean",  "sd", "min",
-                              "p25", "median", "p75", "max", "hist"),
+  correct <- list(numeric = c("missing", "complete", "n", "mean",  "sd", "p0",
+                              "p25", "median", "p75", "p100", "hist"),
                   character = c("missing",  "complete", "n", "min", "max",
                                 "empty", "n_unique"))
   skimmers <- show_skimmers(c("numeric", "character"))
@@ -188,8 +188,8 @@ test_that("show_skimmers() returns something if given an unassigned type", {
 
 test_that("Skim functions can be removed by setting them to NULL", {
   skim_with(numeric = list(hist = NULL))
-  correct <- list(numeric = c("missing", "complete", "n", "mean",  "sd", "min",
-                              "p25", "median", "p75", "max"))
+  correct <- list(numeric = c("missing", "complete", "n", "mean",  "sd", "p0",
+                              "p25", "median", "p75", "p100"))
   input <- show_skimmers("numeric")
   expect_identical(correct, input)
   skim_with_defaults()

diff --git a/tests/testthat/test-skim.R b/tests/testthat/test-skim.R
@@ -17,7 +17,7 @@ test_that("Skimming a data frame works as expected", {
   expect_identical(input$type, c(rep("numeric", each = 11), 
                                  rep("factor", each = 12)))
   expect_identical(head(input$stat),
-                   c("missing", "complete", "n", "mean", "sd", "min"))
+                   c("missing", "complete", "n", "mean", "sd", "p0"))
   expect_identical(tail(input$stat), c(rep("top_counts", 5), rep("ordered", 1)))
   expect_identical(head(input$level), rep(".all", 6))
   expect_identical(tail(input$level),
@@ -67,7 +67,7 @@ test_that("Skimming a grouped data frame works as expected", {
                                     "carb"), 8, each = 11))
   expect_identical(input$type, rep("numeric", 792))
   expect_identical(input$stat, rep(c("missing", "complete", "n", "mean", "sd",
-                                     "min", "p25", "median", "p75", "max",
+                                     "p0", "p25", "median", "p75", "p100",
                                      "hist"), 72))
   expect_identical(input$level, rep(".all", 792))
   expect_identical(input$value[1:5], c(0, 1, 1, 21.5, NA))
@@ -95,7 +95,7 @@ test_that("skim_to_list works as expected", {
   expect_equal(dim(input[["numeric"]]), c(1, 12))
   expect_identical(names(input[["numeric"]]), 
                    c("variable", "missing", "complete", "n", "mean",
-                     "sd", "min", "p25", "median", "p75", "max", "hist" ))
+                     "sd", "p0", "p25", "median", "p75", "p100", "hist" ))
 })
 
 test_that("skim_to_list works with grouped data", {
@@ -137,7 +137,7 @@ test_that("Skimming a column of a data frame works as expected", {
   expect_identical(input$variable, c(rep(c("chickwts$weight"), each = 11)))
   expect_identical(input$type, c(rep("numeric", each = 11)))
   expect_identical(head(input$stat),
-                   c("missing", "complete", "n", "mean", "sd", "min"))
+                   c("missing", "complete", "n", "mean", "sd", "p0"))
   expect_identical(head(input$level), rep(".all", 6))
   expect_equal(head(input$value), c(0, 71, 71, 261.3, 78.1, 108), tol = .01)
   expect_identical(head(input$formatted),
@@ -164,7 +164,7 @@ test_that("Skimming a data frame with selected columns works as expected", {
   expect_identical(input$variable, rep("weight", 11))
   expect_identical(input$type, rep("numeric", 11))
   expect_identical(head(input$stat),
-                   c("missing", "complete", "n", "mean", "sd", "min"))
+                   c("missing", "complete", "n", "mean", "sd", "p0"))
   expect_identical(head(input$level), rep(".all", 6))
   expect_equal(head(input$value), c(0, 71, 71, 261.3, 78.1, 108), tol = .01)
   expect_identical(head(input$formatted),
@@ -214,7 +214,7 @@ test_that("Tidyselect helpers work as expected", {
                                        each = 11))
   expect_identical(input$type, rep("numeric", 22))
   expect_identical(head(input$stat),
-                   c("missing", "complete", "n", "mean", "sd", "min"))
+                   c("missing", "complete", "n", "mean", "sd", "p0"))
   expect_identical(head(input$level), rep(".all", 6))
   expect_equal(head(input$value), c(0, 150, 150, 5.84, 0.82, 4.3), 0.1)
   expect_identical(head(input$formatted),
@@ -242,7 +242,7 @@ test_that("Skimming a grouped df works as expected selecting two columns", {
   expect_identical(input$variable, rep(c("mpg", "disp"), 8, each = 11))
   expect_identical(input$type, rep("numeric", 176))
   expect_identical(input$stat, rep(c("missing", "complete", "n", "mean", "sd",
-                                     "min", "p25", "median", "p75", "max",
+                                     "p0", "p25", "median", "p75", "p100",
                                      "hist"), 16))
   expect_identical(input$level, rep(".all", 176))
   expect_identical(input$value[1:5], c(0, 1, 1, 21.5, NA))

diff --git a/tests/testthat/test-skim_print.R b/tests/testthat/test-skim_print.R
@@ -68,26 +68,26 @@ test_that("Skimr kable prints as expected, 64-bit", {
   expect_equal(input[11], "Variable type: numeric")
   expect_equal(input[12], "")  
   expect_equal(input[15], 
-"|Petal.Length |0       |150      |150 |3.76 |1.77 |1   |1.6 |4.35   |5.1 |6.9 |▇▁▁▂▅▅▃▁ |"
+"|Petal.Length |0       |150      |150 |3.76 |1.77 |1   |1.6 |4.35   |5.1 |6.9  |▇▁▁▂▅▅▃▁ |"
    )
   expect_equal(input[16], 
-"|Petal.Width  |0       |150      |150 |1.2  |0.76 |0.1 |0.3 |1.3    |1.8 |2.5 |▇▁▁▅▃▃▂▂ |"
+"|Petal.Width  |0       |150      |150 |1.2  |0.76 |0.1 |0.3 |1.3    |1.8 |2.5  |▇▁▁▅▃▃▂▂ |"
   )
   expect_equal(input[17], 
-"|Sepal.Length |0       |150      |150 |5.84 |0.83 |4.3 |5.1 |5.8    |6.4 |7.9 |▂▇▅▇▆▅▂▂ |"
+"|Sepal.Length |0       |150      |150 |5.84 |0.83 |4.3 |5.1 |5.8    |6.4 |7.9  |▂▇▅▇▆▅▂▂ |"
    )
   expect_equal(input[18], 
-"|Sepal.Width  |0       |150      |150 |3.06 |0.44 |2   |2.8 |3      |3.3 |4.4 |▁▂▅▇▃▂▁▁ |"
+"|Sepal.Width  |0       |150      |150 |3.06 |0.44 |2   |2.8 |3      |3.3 |4.4  |▁▂▅▇▃▂▁▁ |"
    )
 
   # The headers are different on windows
   # Just ignore them
   skip_on_os("windows")
   expect_equal(input[13], 
-"|variable     |missing |complete |n   |mean |sd   |min |p25 |median |p75 |max |hist     |"
+"|variable     |missing |complete |n   |mean |sd   |p0  |p25 |median |p75 |p100 |hist     |"
   )
   expect_equal(input[14], 
-"|:------------|:-------|:--------|:---|:----|:----|:---|:---|:------|:---|:---|:--------|"
+"|:------------|:-------|:--------|:---|:----|:----|:---|:---|:------|:---|:----|:--------|"
   )
 })
 
@@ -99,16 +99,16 @@ test_that("Skimr kable prints as expected, 32-bit windows", {
 
   expect_length(input, 18)
   expect_equal(input[15], 
-"|Petal.Length |0       |150      |150 |3.76 |1.77 |1   |1.6 |4.35   |5.1 |6.9 |▇▁▁▂▅▅▃▁ |"
+"|Petal.Length |0       |150      |150 |3.76 |1.77 |1   |1.6 |4.35   |5.1 |6.9  |▇▁▁▂▅▅▃▁ |"
    )
   expect_equal(input[16], 
-"|Petal.Width  |0       |150      |150 |1.2  |0.76 |0.1 |0.3 |1.3    |1.8 |2.5 |▇▁▁▃▃▃▂▂ |"
+"|Petal.Width  |0       |150      |150 |1.2  |0.76 |0.1 |0.3 |1.3    |1.8 |2.5  |▇▁▁▃▃▃▂▂ |"
   )
   expect_equal(input[17], 
-"|Sepal.Length |0       |150      |150 |5.84 |0.83 |4.3 |5.1 |5.8    |6.4 |7.9 |▂▇▅▇▆▅▂▂ |"
+"|Sepal.Length |0       |150      |150 |5.84 |0.83 |4.3 |5.1 |5.8    |6.4 |7.9  |▂▇▅▇▆▅▂▂ |"
    )
   expect_equal(input[18], 
-"|Sepal.Width  |0       |150      |150 |3.06 |0.44 |2   |2.8 |3      |3.3 |4.4 |▁▂▅▇▃▂▁▁ |"
+"|Sepal.Width  |0       |150      |150 |3.06 |0.44 |2   |2.8 |3      |3.3 |4.4  |▁▂▅▇▃▂▁▁ |"
    )
 })
 
@@ -154,7 +154,7 @@ test_that("skimr::pander prints as expected", {
  "----------------------------------------------------------------------------"
   )
   expect_equal(input[23], 
- " variable   missing   complete   n     mean     sd     min    p25    median "
+ " variable   missing   complete   n     mean     sd     p0     p25    median "
   )
   expect_equal(input[24], 
  "---------- --------- ---------- ---- -------- ------- ----- ------- --------"
@@ -169,11 +169,11 @@ test_that("skimr::pander prints as expected", {
   expect_equal(input[28], "Table: Table continues below")
   expect_equal(input[29], "")
   expect_equal(input[30], " ")
-  expect_equal(input[31], "------------------------")
-  expect_equal(input[32], "  p75    max     hist   ")
-  expect_equal(input[33], "------- ----- ----------")
-  expect_equal(input[34], " 323.5   423   ▃▅▅▇▃▇▂▂ ")
-  expect_equal(input[35], "------------------------")
+  expect_equal(input[31], "-------------------------")
+  expect_equal(input[32], "  p75    p100     hist   ")
+  expect_equal(input[33], "------- ------ ----------")
+  expect_equal(input[34], " 323.5   423    ▃▅▅▇▃▇▂▂ ")
+  expect_equal(input[35], "-------------------------")
   expect_equal(input[36], "")
 })
 

diff --git a/tests/testthat/test-skim_v.R b/tests/testthat/test-skim_v.R
@@ -9,11 +9,11 @@ test_that("skim_v returns expected response for numeric vectors", {
     "numeric",        "n",   ".all", 32,                   "32",
     "numeric",     "mean",   ".all", mean(mtcars$mpg),     "20.09",
     "numeric",       "sd",   ".all", sd(mtcars$mpg),       "6.03", 
-    "numeric",      "min",   ".all", min(mtcars$mpg),      "10.4",
+    "numeric",      "p0",   ".all", min(mtcars$mpg),      "10.4",
     "numeric",      "p25",   ".all", quantiles[1],         "15.43",
     "numeric",   "median",   ".all", median(mtcars$mpg),   "19.2",
     "numeric",      "p75",   ".all", quantiles[2],         "22.8",
-    "numeric",      "max",   ".all", max(mtcars$mpg),      "33.9",
+    "numeric",      "p100",   ".all", max(mtcars$mpg),      "33.9",
     "numeric",     "hist",   ".all", NA,                   "▃▇▇▇▃▂▂▂")
   input <- skimr:::skim_v(mtcars$mpg)
   expect_identical(input, correct)
@@ -63,11 +63,11 @@ test_that("skim_v handles numeric vectors with NAs and extreme numbers", {
     "numeric",        "n", ".all",  3,                              "3",
     "numeric",     "mean", ".all",  0,                              "0", 
     "numeric",       "sd", ".all",  sd(patho, na.rm = TRUE),        "1.3e+16",
-    "numeric",      "min", ".all",  -(2^.Machine$double.digits),    "-9e+15",
+    "numeric",      "p0", ".all",  -(2^.Machine$double.digits),    "-9e+15",
     "numeric",      "p25", ".all",  pqs[1],                     "-4.5e+15",
     "numeric",   "median", ".all",  0,                              "0",
     "numeric",      "p75", ".all",  pqs[2],                     "4.5e+15", 
-    "numeric",      "max", ".all",  +(2^.Machine$double.digits),   "9e+15",
+    "numeric",      "p100", ".all",  +(2^.Machine$double.digits),   "9e+15",
     "numeric",     "hist", ".all", NA, "▇▁▁▁▁▁▁▇")
   input <- skimr:::skim_v(patho)
   expect_identical(input, correct_patho)
@@ -312,3 +312,22 @@ test_that("Skim_v works when a function generates top_count
   )
   expect_identical(skimr:::skim_v(iris$Species)[7:8,], expected)
 })
+
+test_that("numeric skim is calculated correctly when x is all NAs.", {
+  x <- as.numeric(c(NA, NA, NA))
+  input <- skim(x)
+  correct <- tibble::tribble(
+    ~type,          ~stat, ~level,   ~value,              ~ formatted, ~variable,
+    "numeric",  "missing",   ".all", 3,                    "3",         "x",
+    "numeric", "complete",   ".all", 0,                    "0",         "x",
+    "numeric",        "n",   ".all", 3,                    "3",         "x",
+    "numeric",     "mean",   ".all", NaN,                 "NaN",         "x",
+    "numeric",       "sd",   ".all", NA,                   "NA",         "x", 
+    "numeric",      "p0",   ".all", NA,                    "NA",         "x",
+    "numeric",      "p25",   ".all", NA,                   "NA",         "x",
+    "numeric",   "median",   ".all", NA,                   "NA",         "x",
+    "numeric",      "p75",   ".all", NA,                   "NA",         "x",
+    "numeric",      "p100",  ".all", NA,                    "NA",         "x",
+    "numeric",     "hist",   ".all", NA,                   " ",         "x" )
+  expect_identical(input[1:6], correct[1:6])
+})
diff --git a/tests/testthat/test-stats.R b/tests/testthat/test-stats.R
@@ -161,5 +161,3 @@ test_that("sorted count is calculated correctly with a NA." , {
   expect_equal(unname(sorted_count(dat)), c(4, 3, 2, 1))
   expect_equal(names(sorted_count(dat)), c("A", "C", NA, "B"))
 })
-
-
diff --git a/vignettes/Using_skimr.Rmd b/vignettes/Using_skimr.Rmd
@@ -232,17 +232,11 @@ The details of rendering are dependent on the operating system R is running on,
 the locale of the installation, and the fonts installed. Rendering may also
 differ based on whether it occurs in the console or when knitting to specific
 types of documents such as HTML and PDF. The most commonly reported problems
-involve rendering the spark graphs (inline histogram and line graphs). 
-This section will summarize known issues.  
-
-Windows cannot print the spark-histogram characters when printing a data-frame. For example, 
-`"▂▅▇"` is printed as `"<U+2582><U+2585><U+2587>"`. This longstanding problem [originates in 
-the low-level code]
-(http://r.789695.n4.nabble.com/Unicode-display-problem-with-data-frames-under-Windows-td4707639.html) 
-for printing dataframes. One workaround for showing these characters in Windows is to set the
-CTYPE part of your locale to Chinese/Japanese/Korean with 
-`Sys.setlocale("LC_CTYPE", "Chinese")`. These values do show up by default when printing a
-data-frame created by `skim()` as a list (`as.list()`) or as a matrix (`as.matrix()`).
+involve rendering the spark graphs (inline histogram). 
+This section will summarize known issues. 
+
+Currently pander() does not support inline_histograms on Windows. Also, Windows 
+does not support spark line graphs. 
 
 In order to render the spark graphs in html or PDF histogram you may need to change
 fonts to one that supports blocks or braille (depending on which you need).
Original file line number	Diff line number	Diff line change
Expand Up		@@ -161,5 +161,3 @@ test_that("sorted count is calculated correctly with a NA." , {
		expect_equal(unname(sorted_count(dat)), c(4, 3, 2, 1))
		expect_equal(names(sorted_count(dat)), c("A", "C", NA, "B"))
		})