Skip to content

Commit

Permalink
make country.lang() and lang.country() functions work with real count…
Browse files Browse the repository at this point in the history
…ry names (Ekaterina Zalivina)
  • Loading branch information
agricolamz committed Jun 24, 2022
1 parent fe17b76 commit b812bf8
Show file tree
Hide file tree
Showing 13 changed files with 276 additions and 196 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ importFrom(stats,coef)
importFrom(stats,complete.cases)
importFrom(stats,density)
importFrom(stats,glm)
importFrom(stats,na.omit)
importFrom(stats,sd)
importFrom(stringdist,stringdist)
importFrom(utils,download.file)
Expand Down
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ lingtypology 1.1.9
- update the `autotyp.feature()` function after the new release 1.0.0
- make it possible to have just one group in the `control`
- update glottolog to v. 4.6
- make `country.lang()` and `lang.country()` functions work with real country names (Ekaterina Zalivina)

lingtypology 1.1.8 (12.10.2021)
- fix `country.lang()` functions, close #70
Expand Down
15 changes: 15 additions & 0 deletions R/countries.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#' Catalogue of countries
#'
#' Catalogue of countries, ISO-codes and official languages
#'
#' @format A data frame with 189 rows and 5 variables:
#' \describe{
#' \item{alpha3}{ISO 3166-3 code of the country}
#' \item{alpha2}{ISO 3166-2 code of the country}
#' \item{country_name}{Country name}
#' \item{additional_names}{Additional names of the country}
#' \item{official_languages}{Official languages}
#' }
#'

"countries"
23 changes: 20 additions & 3 deletions R/country.lang.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,40 @@
#'
#' Takes any vector of languages and returns countries where those languages are used as ISO 3166-1 alpha-2 codes.
#' @param x A character vector of the languages (can be written in lower case)
#' @param full_name A logical value, whether return ISO 3166-2 codes or full names.
#' @author George Moroz <[email protected]>
#' @seealso \code{\link{aff.lang}}, \code{\link{area.lang}}, \code{\link{gltc.lang}}, \code{\link{iso.lang}}, \code{\link{lat.lang}}, \code{\link{long.lang}}, \code{\link{subc.lang}}, \code{\link{url.lang}}
#' @examples
#' country.lang('Korean')
#' country.lang(c('Korean', 'Polish'))
#' @export
#'
#'


country.lang <- function(x) {
country.lang <- function(x, full_name = TRUE) {
if (typeof(x) == "list") {
x <- unlist(x)
}
glottolog <- lingtypology::glottolog
vapply(x, function(y) {
countries <- lingtypology::countries

result <- vapply(x, function(y) {
ifelse(
is.glottolog(y, response = TRUE) == TRUE,
isTRUE(is.glottolog(y, response = TRUE)),
glottolog[tolower(glottolog$language) %in% tolower(y), ]$countries,
NA_character_
)
}, character(1))

if(full_name){
vapply(result, function(y) {
ifelse(
!is.na(y),
paste0(unique(countries[countries$alpha2 %in% unlist(strsplit(y, ";")),"country_name"]),
collapse = ";"),
NA_character_
)
}, character(1))
}
}
41 changes: 13 additions & 28 deletions R/lang.country.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,46 +9,31 @@
#' lang.country('AD')
#' lang.country(c('AD', 'AE'))
#' @export
#' @importFrom stats na.omit
#'

lang.country <- function(x, list = FALSE) {
lang.country <- function(x, list = TRUE) {
if (typeof(x) == "list") {
x <- unlist(x)
}
countries <- c('AD', 'AE', 'AF', 'AG', 'AI', 'AL', 'AM', 'AO', 'AR', 'AS',
'AT', 'AU', 'AW', 'AX', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF',
'BG', 'BH', 'BI', 'BJ', 'BL', 'BM', 'BN', 'BO', 'BQ', 'BR',
'BS', 'BT', 'BW', 'BY', 'BZ', 'CA', 'CC', 'CD', 'CF', 'CG',
'CH', 'CI', 'CK', 'CL', 'CM', 'CN', 'CO', 'CR', 'CU', 'CV',
'CW', 'CX', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DM', 'DO', 'DZ',
'EC', 'EE', 'EG', 'EH', 'ER', 'ES', 'ET', 'FI', 'FJ', 'FK',
'FM', 'FO', 'FR', 'GA', 'GB', 'GD', 'GE', 'GF', 'GG', 'GH',
'GI', 'GL', 'GM', 'GN', 'GP', 'GQ', 'GR', 'GS', 'GT', 'GU',
'GW', 'GY', 'HK', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL',
'IM', 'IN', 'IO', 'IQ', 'IR', 'IS', 'IT', 'JE', 'JM', 'JO',
'JP', 'KE', 'KG', 'KH', 'KI', 'KM', 'KN', 'KP', 'KR', 'KW',
'KY', 'KZ', 'LA', 'LB', 'LC', 'LI', 'LK', 'LR', 'LS', 'LT',
'LU', 'LV', 'LY', 'MA', 'MC', 'MD', 'ME', 'MF', 'MG', 'MH',
'MK', 'ML', 'MM', 'MN', 'MO', 'MP', 'MQ', 'MR', 'MS', 'MT',
'MU', 'MV', 'MW', 'MX', 'MY', 'MZ', 'NA', 'NC', 'NE', 'NF',
'NG', 'NI', 'NL', 'NO', 'NP', 'NR', 'NU', 'NZ', 'OM', 'PA',
'PE', 'PF', 'PG', 'PH', 'PK', 'PL', 'PM', 'PN', 'PR', 'PS',
'PT', 'PW', 'PY', 'QA', 'RE', 'RO', 'RS', 'RU', 'RW', 'SA',
'SB', 'SC', 'SD', 'SE', 'SG', 'SH', 'SI', 'SJ', 'SK', 'SL',
'SM', 'SN', 'SO', 'SR', 'SS', 'ST', 'SV', 'SX', 'SY', 'SZ',
'TC', 'TD', 'TF', 'TG', 'TH', 'TJ', 'TK', 'TL', 'TM', 'TN',
'TO', 'TR', 'TT', 'TV', 'TW', 'TZ', 'UA', 'UG', 'UM', 'US',
'UY', 'UZ', 'VA', 'VC', 'VE', 'VG', 'VI', 'VN', 'VU', 'WF',
'WS', 'YE', 'YT', 'ZA', 'ZM', 'ZW')
glottolog <- lingtypology::glottolog
countries <- lingtypology::countries
result <- lapply(x, function(y) {
if(toupper(y) %in% countries == TRUE){
if(!is.na(y)){
if(nchar(y) > 2){
y <- unique(countries[countries$country_name %in% y, "alpha2"],
countries[countries$additional_names %in% y, "alpha2"])
}}

if(toupper(y) %in% stats::na.omit(countries$alpha2) == TRUE){
glottolog[grepl(toupper(y), glottolog$countries),]$language
} else{
NA_character_
}})
if (list == FALSE) {
if(isFALSE(list)) {
unlist(result)
} else {
names(result) <- x
result
}
}
Binary file added data/countries.RData
Binary file not shown.
190 changes: 190 additions & 0 deletions database_creation/country.data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
alpha3,alpha2,country_name,additional_names,official_languages
ABW,AW,Aruba,Country of Aruba,NA
AFG,AF,Afganistan,Islamic Republic of Afghanistan,"Dari, Southern Pashto, Central Pashto, Northern Pashto"
AGO,AO,Angola,Republic of Angola,Portuguese
AIA,AI,Anguilla,NA,NA
ALA,AX,Åland,Åland Islands,Swedish
ALB,AL,Albania,Republic of Albania,Northern Tosk Albanian
AND,AD,Andorra,Principality of Andorra,Catalan
AND,AD,Andorra,Principality of the Valleys of Andorra,Catalan
ARE,AE,United Arab Emirates,the Emirates,Standard Arabic
ARE,AE,United Arab Emirates,Emirates,Standard Arabic
ARE,AE,United Arab Emirates,UAE,Standard Arabic
ARG,AR,Argentina,Argentine Republic,Spanish
ARG,AR,Argentina,the Argentine,Spanish
ARM,AM,Armenia,Republic of Armenia,Armenian
ASM,AS,American Samoa,NA,NA
ATG,AG,Antigua and Barbuda,NA,English
AUS,AU,Australia,Commonwealth of Australia,English
AUT,AT,Austria,Republic of Austria,German
AZE,AZ,Azerbaijan,Republic of Azerbaijan,Azerbaijani
BDI,BI,Burundi,Republic of Burundi,"Rundi, Rundi-Kitwa, French, English"
BEL,BE,Belgium,Kingdom of Belgium,"Dutch, French, German"
BEN,BJ,Benin,Republic of Benin,French
BFA,BF,Burkina Faso,NA,French
BGD,BD,Bangladesh,People's Republic of Bangladesh,Bengali
BGR,BG,Bulgaria,Republic of Bulgaria,Bulgarian
BHR,BH,Bahrain,Kingdom of Bahrain,Standard Arabic
BHS,BS,Bahamas,Commonwealth of the Bahamas,English
BIH,BA,Bosnia and Herzegovina,B&H,"Bosnian, Croatian, Serbian"
BLR,BY,Belarus,Republic of Belarus,"Belarusian, Russian"
BLR,BY,Belarus,Byelorussia,"Belarusian, Russian"
BLZ,BZ,Belize,NA,English
BOL,BO,Bolivia,Plurinational State of Bolivia,"Spanish, Central Aymara, Southern Aymara, Araona, Baure, Chiquitano, Canichana, Cavinena, Cayubaba, Chacobo, Moseten-Chimane, Ese Ejja, Eastern Bolivian Guarani, Western Bolivian Guarani, Pauserna, Guarayu, Itonama, Leco, Callawalla, Machinere, Reyesano, Trinitario, Ignaciano, Moseten-Chimane, Itene, Movima, Chacobo, Pacahuara, Karipuna (Pano-Tacanan), Shinabo, Puquina, North Bolivian Quechua, South Bolivian Quechua, Siriono, Jora, Yuqui, Tacana, Toromono, Chipaya, Uru, Zamuco, Ayoreo, Yuracare, Shanenawa, Sharanahua, Yaminahua, Yawanawa, Yora, Wichi Lhamtes Nocten"
BRA,BR,Brazil,Federative Republic of Brazil,Portuguese
BRB,BB,Barbados,NA,English
BRN,BN,Brunei,Nation of Brunei,"English, Standard Malay"
BRN,BN,Brunei,the Abode of Peace,"English, Standard Malay"
BTN,BT,Bhutan,Kingdom of Bhutan,Dzongkha
BWA,BW,Botswana,Republic of Botswana,English
CAF,CF,Central African Republic,CAR,French
CAF,CF,Central African Republic,RCA,French
CAF,CF,Central African Republic,Centrafrique,French
CAN,CA,Canada,NA,"English, French"
CHE,CH,Switzerland,Swiss Confederation,"German, French, Italian, Romansh"
CHL,CL,Chile,Republic of Chile,Spanish
CHN,CN,China,People's Republic of China,Mandarin Chinese
CHN,CN,China,PRC,Mandarin Chinese
CIV,CI,Ivory Coast,Cote d'Ivoire,French
CMR,CM,Cameroon,Republic of Cameroon,"English, French"
COD,CD,Congo-Kinshasa,Democratic Republic of the Congo,French
COD,CD,Congo-Kinshasa, Congo-Kinshasa,French
COD,CD,Congo-Kinshasa,DR Congo,French
COD,CD,Congo-Kinshasa,the DRC,French
COD,CD,Congo-Kinshasa,the DROC,French
COD,CD,Congo-Kinshasa,the Congo,French
COG,CG,Congo-Brazzaville,Republic of the Congo,French
COG,CG,Congo-Brazzaville,Congo-Brazzaville,French
COL,CO,Colombia,Republic of Colombia,Spanish
COM,KM,Comoros,Union of the Comoros,"French, Standard Arabic, Maore Comorian"
CPV,CV,Cape Verde,Cabo Verde,Portuguese
CRI,CR,Costa Rica,Republic of Costa Rica,Spanish
CUB,CU,Cuba,Republic of Cuba,Spanish
CYP,CY,Cyprus,Republic of Cyprus,"Modern Greek, Turkish"
CZE,CZ,Czech,Czech Republic,"Czech, Slovak"
CZE,CZ,Czech,Czechia,"Czech, Slovak"
DEU,DE,Germany,Federal Republic of Germany,German
DJI,DJ,Djibouti,Republic of Djibouti,"Standard Arabic, French"
DMA,DM,Dominica,Commonwealth of Dominica,English
DNK,DK,Denmark,Kingdom of Denmark,Danish
DOM,DO,Dominican Republic,NA,Spanish
DZA,DZ,Algeria,People's Democratic Republic of Algeria,"Standard Arabic, Algerian Arabic, Western Algerian Berber"
ECU,EC,Ecuador,Republic of Ecuador,Spanish
EGY,EG,Egypt,Arab Republic of Egypt,"Egyptian Arabic, Standard Arabic"
ERI,ER,Eritrea,State of Eritrea,"Standard Arabic, English, Tigrinya"
EST,EE,Estonia,Republic of Estonia,Estonian
ETH,ET,Ethiopia,Federal Democratic Republic of Ethiopia,Amharic
FIN,FI,Finland,Republic of Finland,"Finnish, Swedish"
FJI,FJ,Fiji,Republic of Fiji,"English, Fijian, Fiji Hindi"
FRA,FR,France,French Republic,French
GAB,GA,Gabon,Gabonese Republic,French
GBR,GB,United Kingdom,United Kingdom of Great Britain and Northern Ireland,English
GBR,GB,United Kingdom,UK,English
GEO,GE,Georgia,Republic of Georgia,Georgian
GHA,GH,Ghana,Republic of Ghana,English
GIN,GN,Guinea,Republic of Guinea,French
GIN,GN,Guinea,Guinea-Conakry,French
GMB,GM,Gambia,Republic of the Gambia,English
GMB,GM,Gambia,The Gambia,English
GNB,GW,Guinea-Bissau,Republic of Guinea-Bissau,Portuguese
GNQ,GQ,Equatorial Guinea,Republic of Equatorial Guinea,"Spanish, French, Portuguese"
GRC,GR,Greece,Hellenic Republic,Modern Greek
GRC,GR,Greece,the Hellenic Republic,Modern Greek
GRD,GD,Grenada,NA,English
GTM,GT,Guatemala,Republic of Guatemala,Spanish
GUY,GY,Guyana,Co-operative Republic of Guyana,English
HKG,HK,Hong Kong,HKSAR,"Yue Chinese, Mandarin Chinese, English"
HND,HN,Honduras,Republic of Honduras,Spanish
HRV,HR,Croatia,Republic of Croatia,Croatian
HTI,HT,Haiti,Republic of Haiti,"Haitian, French"
HUN,HU,Hungary,NA,Hungarian
IDN,ID,Indonesia,Republic of Indonesia,"Indonesian, Indonesian Sign Language"
IND,IN,India,Republic of India,"English, Hindi"
IRL,IE,Ireland,Republic of Ireland,"Irish, English"
IRN,IR,Iran,Islamic Republic of Iran,Western Farsi
IRN,IR,Iran,Persia,Western Farsi
IRQ,IQ,Iraq,NA,Standard Arabic
ISL,IS,Iceland,Republic of Iceland,"Icelandic, Icelandic Sign Language"
ISR,IL,Israel,State of Israel,"Hebrew, Standard Arabic"
ITA,IT,Italy,Italian Republic,Italian
JAM,JM,Jamaica,NA,English
JOR,JO,Jordan,Hashemite Kingdom of Jordan,Standard Arabic
JPN,JP,Japan,NA,Japanese
KAZ,KZ,Kazakhstan,Republic of Kazakhstan,"Kazakh, Russian"
KEN,KE,Kenya,Republic of Kenya,"English, Swahili"
KGZ,KG,Kyrgyzstan,Kyrgyz Republic,"Kirghiz, Russian"
KHM,KH,Cambodia,Kingdom of Cambodia,"Central Khmer, Northern Khmer"
KIR,KI,Kiribati,Republic of Kiribati,"Gilbertese, English"
KNA,KN,Saint Kitts and Nevis,Federation of Saint Kitts and Nevis,English
KNA,KN,Saint Kitts and Nevis,Federation of Saint Christopher and Nevis,English
KNA,KN,Saint Kitts and Nevis,SKN,English
KNA,KN,Saint Kitts and Nevis,SKAN,English
KOR,KR,South Korea,Republic of Korea,Korean
KOR,KR,South Korea,ROK,Korean
KWT,KW,Kuwait,State of Kuwait,Standard Arabic
LAO,LA,Laos,Lao People's Democratic Republic,Lao
LBN,LB,Lebanon,The Lebanese Republic,Standard Arabic
LBN,LB,Lebanon,Lebanese Republic,Standard Arabic
LBR,LR,Liberia,Republic of Liberia,English
LBY,LY,Libya,NA,"Libyan Arabic, Standard Arabic"
LIE,LI,Liechtenstein,Principality of Liechtenstein,German
LKA,LK,Sri Lanka,Democratic Socialist Republic of Sri Lanka,"Sinhala, Tamil"
LSO,LS,Lesotho,Kingdom of Lesotho,"Southern Sotho, English"
LTU,LT,Lithuania,Republic of Lithuania,Lithuanian
LUX,LU,Luxembourg,Grand Duchy of Luxembourg,"French, German, Luxembourgish"
LVA,LV,Latvia,Republic of Latvia,Latvian
MAR,MA,Morocco,Kingdom of Morocco,"Standard Arabic, Berber"
MDA,MD,Moldova,Republic of Moldova,Romanian
MDG,MG,Madagascar,Republic of Madagascar,"Plateau Malagasy, French"
MDV,MV,Maldives,Republic of the Maldives,Dhivehi
MEX,MX,Mexico,United Mexican States,Spanish
MKD,MK,Macedonia,Republic of Macedonia,Macedonian
MKD,MK,Macedonia,Republic of North Macedonia,Macedonian
MMR,MM,Myanmar,Republic of the Union of Myanmar,Central Moroccan Berber
MRT,MR,Mauritania,Islamic Republic of Mauritania,Standard Arabic
MWI,MW,Malawi,Republic of Malawi,English
MYS,MY,Malaysia,Federation of Malaysia,Standard Malay
NLD,NL,Netherlands,Kingdom of the Netherlands,Dutch
NLD,NL,Netherlands,the Netherlands,Dutch
NLD,NL,Netherlands,Holland,Dutch
NOR,NO,Norway,Kingdom of Norway,Norwegian
NPL,NP,Nepal,Federal Democratic Republic Of Nepal,Nepali
NZL,NZ,New Zealand,Realm of New Zealand,"English, Maori, New Zealand Sign Language"
PAK,PK,Pakistan,Islamic Republic of Pakistan,"Urdu, English"
PER,PE,Peru,Republic of Peru,"Spanish, Quechua, Aymara"
PHL,PH,Philippines,Republic of the Philippines,"Filipino, English"
PLW,PW,Palau,Republic of Palau,"English, Palauan"
POL,PL,Poland,Republic of Poland,Polish
PRI,PR,Puerto Rico,Commonwealth of Puerto Rico,"Spanish, English"
PRK,KP,North Korea,Democratic People's Republic of Korea,Korean
PRK,KP,North Korea,DPRK,Korean
PRT,PT,Portugal,Portuguese Republic,Portuguese
RUS,RU,Russia,Russian Federation,Russian
RWA,RW,Rwanda,Republic of Rwanda,"Kinyarwanda, English, French"
SAU,SA,Saudi Arabia,Kingdom of Saudi Arabia,Standard Arabic
SAU,SA,Saudi Arabia,KSA,Standard Arabic
SDN,SD,Sudan,Republic of the Sudan,"Arabic, English"
SGP,SG,Singapore,Republic of Singapore,"English, Standard Malay, Mandarin Chinese, Tamil"
SLV,SV,El Salvador,Republic of El Salvador,Spanish
SRB,RS,Serbia,Republic of Serbia,Serbian
SUR,SR,Suriname,Surinam,Dutch
SUR,SR,Suriname,Republic of Suriname,Dutch
TCD,TD,Chad,Republic of Chad,"Chadian Arabic, Standard Arabic, French"
TCD,TD,Chad,Tchad,"Chadian Arabic, Standard Arabic, French"
TGO,TG,Togo,Togolese Republic,French
THA,TH,Thailand,Kingdom of Thailand,Thai
TLS,TL,East Timor,Democratic Republic of Timor-Leste,"Portuguese, Tetum"
TTO,TT,Trinidad and Tobago,Republic of Trinidad and Tobago,English
TUR,TR,Turkey,Republic of Turkey,Turkish
TWN,TW,Taiwan,Republic of China,Mandarin Chinese
TWN,TW,Taiwan,ROC,Mandarin Chinese
TZA,TZ,Tanzania,United Republic of Tanzania,Swahili
USA,US,United States,United States of America,English
USA,US,United States,USA,English
VNM,VN,Vietnam,Socialist Republic of Vietnam,Vietnamese
VNM,VN,Vietnam,SRV,Vietnamese
VNM,VN,Vietnam,Viet Nam,Vietnamese
ZMB,ZM,Zambia,Republic of Zambia,English
ZWE,ZW,Zimbabwe,Republic of Zimbabwe,"English, Nyanja, Sena, Kalanga, Tsoa, Nambya, Ndau, Zimbabwean Ndebele, Tsonga, Zimbabwe Sign Language, Southern Sotho, Tonga (Zambia), Tswana, Venda, Xhosa"
NA,NA,Abkhazia,NA,"Abkhaz, Russian"
NA,NA,Kosovo,Republic of Kosovo,"Northern Tosk Albanian, Turkish"
Loading

0 comments on commit b812bf8

Please sign in to comment.