Merge pull request #349 from vincentarelbundock/update-world_bank_scr…

…aper update world_bank scraper
vincentarelbundock · Sep 27, 2024 · ecf0013 · ecf0013
2 parents 6b393c6 + e2dd246
commit ecf0013
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 10 deletions.
diff --git a/dictionary/data_world_bank.csv b/dictionary/data_world_bank.csv
@@ -46,12 +46,12 @@ Comoros,COM
 "Congo, Dem. Rep.",COD
 "Congo, Rep.",COG
 Costa Rica,CRI
-Côte d'Ivoire,CIV
 Croatia,HRV
 Cuba,CUB
 Curaçao,CUW
 Cyprus,CYP
 Czech Republic,CZE
+Côte d’Ivoire,CIV
 Denmark,DNK
 Djibouti,DJI
 Dominica,DMA
@@ -164,7 +164,6 @@ Russian Federation,RUS
 Rwanda,RWA
 Samoa,WSM
 San Marino,SMR
-São Tomé and Principe,STP
 Saudi Arabia,SAU
 Senegal,SEN
 Serbia,SRB
@@ -189,6 +188,7 @@ Suriname,SUR
 Sweden,SWE
 Switzerland,CHE
 Syrian Arab Republic,SYR
+São Tomé and Príncipe,STP
 "Taiwan, China",TWN
 Tajikistan,TJK
 Tanzania,TZA
@@ -198,10 +198,10 @@ Togo,TGO
 Tonga,TON
 Trinidad and Tobago,TTO
 Tunisia,TUN
-Turkey,TUR
 Turkmenistan,TKM
 Turks and Caicos Islands,TCA
 Tuvalu,TUV
+Türkiye,TUR
 Uganda,UGA
 Ukraine,UKR
 United Arab Emirates,ARE

diff --git a/dictionary/get_world_bank.R b/dictionary/get_world_bank.R
@@ -1,17 +1,19 @@
 source(here::here('dictionary/utilities.R'))
 
-url <- 'http://databank.worldbank.org/data/download/site-content/CLASS.xls'
+url <- 'https://databankfiles.worldbank.org/public/ddpext_download/site-content/CLASS.xlsx'
 
-filename <- tempfile(fileext = '.xls')
+filename <- tempfile(fileext = '.xlsx')
 download.file(url, filename, quiet = TRUE)
 
 not_countries <- c("Arab World", "Caribbean small states", "Central Europe and the Baltics", "Early-demographic dividend", "East Asia & Pacific", "East Asia & Pacific (excluding high income)", "East Asia & Pacific (IDA & IBRD)", "Euro area", "Europe & Central Asia", "Europe & Central Asia (excluding high income)", "Europe & Central Asia (IDA & IBRD)", "European Union", "Fragile and conflict affected situations", "Heavily indebted poor countries (HIPC)", "High income", "IBRD only", "IDA & IBRD total", "IDA blend", "IDA only", "IDA total", "Late-demographic dividend", "Latin America & Caribbean", "Latin America & Caribbean (excluding high income)", "Latin America & Caribbean (IDA & IBRD)", "Least developed countries: UN classification", "Low & middle income", "Low income", "Lower middle income", "Middle East & North Africa", "Middle East & North Africa (excluding high income)", "Middle East & North Africa (IDA & IBRD)", "Middle income", "North America", "OECD members", "Other small states", "Pacific island small states", "Post-demographic dividend", "Pre-demographic dividend", "Small states", "South Asia", "South Asia (IDA & IBRD)", "Sub-Saharan Africa", "Sub-Saharan Africa (excluding high income)", "Sub-Saharan Africa (IDA & IBRD)", "Upper middle income", "World")
 
-# weird read_excel call to silence warnings
-wb <- read_excel(filename, skip = 6, col_names = letters[1:9]) %>%
-      select(3:4) %>%
+wb <- read_excel(filename, sheet = "List of economies") %>%
+      select(Economy, Code) %>%
       setNames(c('country', 'wb')) %>%
-      filter(!country %in% not_countries,
-             !is.na(wb)) 
+      filter(
+        !country %in% not_countries,
+        !is.na(wb)
+      ) %>%
+      arrange(country)
 
 wb %>% write_csv('dictionary/data_world_bank.csv', na = "")