Skip to content

Commit

Permalink
Merge pull request #349 from vincentarelbundock/update-world_bank_scr…
Browse files Browse the repository at this point in the history
…aper

update world_bank scraper
  • Loading branch information
cjyetman authored Sep 27, 2024
2 parents 6b393c6 + e2dd246 commit ecf0013
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
6 changes: 3 additions & 3 deletions dictionary/data_world_bank.csv
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ Comoros,COM
"Congo, Dem. Rep.",COD
"Congo, Rep.",COG
Costa Rica,CRI
Côte d'Ivoire,CIV
Croatia,HRV
Cuba,CUB
Curaçao,CUW
Cyprus,CYP
Czech Republic,CZE
Côte d’Ivoire,CIV
Denmark,DNK
Djibouti,DJI
Dominica,DMA
Expand Down Expand Up @@ -164,7 +164,6 @@ Russian Federation,RUS
Rwanda,RWA
Samoa,WSM
San Marino,SMR
São Tomé and Principe,STP
Saudi Arabia,SAU
Senegal,SEN
Serbia,SRB
Expand All @@ -189,6 +188,7 @@ Suriname,SUR
Sweden,SWE
Switzerland,CHE
Syrian Arab Republic,SYR
São Tomé and Príncipe,STP
"Taiwan, China",TWN
Tajikistan,TJK
Tanzania,TZA
Expand All @@ -198,10 +198,10 @@ Togo,TGO
Tonga,TON
Trinidad and Tobago,TTO
Tunisia,TUN
Turkey,TUR
Turkmenistan,TKM
Turks and Caicos Islands,TCA
Tuvalu,TUV
Türkiye,TUR
Uganda,UGA
Ukraine,UKR
United Arab Emirates,ARE
Expand Down
16 changes: 9 additions & 7 deletions dictionary/get_world_bank.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
source(here::here('dictionary/utilities.R'))

url <- 'http://databank.worldbank.org/data/download/site-content/CLASS.xls'
url <- 'https://databankfiles.worldbank.org/public/ddpext_download/site-content/CLASS.xlsx'

filename <- tempfile(fileext = '.xls')
filename <- tempfile(fileext = '.xlsx')
download.file(url, filename, quiet = TRUE)

not_countries <- c("Arab World", "Caribbean small states", "Central Europe and the Baltics", "Early-demographic dividend", "East Asia & Pacific", "East Asia & Pacific (excluding high income)", "East Asia & Pacific (IDA & IBRD)", "Euro area", "Europe & Central Asia", "Europe & Central Asia (excluding high income)", "Europe & Central Asia (IDA & IBRD)", "European Union", "Fragile and conflict affected situations", "Heavily indebted poor countries (HIPC)", "High income", "IBRD only", "IDA & IBRD total", "IDA blend", "IDA only", "IDA total", "Late-demographic dividend", "Latin America & Caribbean", "Latin America & Caribbean (excluding high income)", "Latin America & Caribbean (IDA & IBRD)", "Least developed countries: UN classification", "Low & middle income", "Low income", "Lower middle income", "Middle East & North Africa", "Middle East & North Africa (excluding high income)", "Middle East & North Africa (IDA & IBRD)", "Middle income", "North America", "OECD members", "Other small states", "Pacific island small states", "Post-demographic dividend", "Pre-demographic dividend", "Small states", "South Asia", "South Asia (IDA & IBRD)", "Sub-Saharan Africa", "Sub-Saharan Africa (excluding high income)", "Sub-Saharan Africa (IDA & IBRD)", "Upper middle income", "World")

# weird read_excel call to silence warnings
wb <- read_excel(filename, skip = 6, col_names = letters[1:9]) %>%
select(3:4) %>%
wb <- read_excel(filename, sheet = "List of economies") %>%
select(Economy, Code) %>%
setNames(c('country', 'wb')) %>%
filter(!country %in% not_countries,
!is.na(wb))
filter(
!country %in% not_countries,
!is.na(wb)
) %>%
arrange(country)

wb %>% write_csv('dictionary/data_world_bank.csv', na = "")

0 comments on commit ecf0013

Please sign in to comment.