Setup
Show the code
library (tidyverse)
library (ggtext)
library (showtext)
library (ggrepel)
library (rnaturalearth)
library (sf)
font_add (
'fa7-brands' ,
here:: here ('fonts/Font Awesome 7 Brands-Regular-400.otf' )
)
font_add_google ('Syne Mono' , 'syne' )
showtext_auto (enable = TRUE )
Load the data
Show the code
tuesdata <- tidytuesdayR:: tt_load (2026 , week = 2 )
africa_lang <- tuesdata$ africa |> distinct ()
Explore
Show the code
Rows: 762
Columns: 4
$ language <chr> "ǂKxʼaoǁʼae", "ǂKxʼaoǁʼae", "Abon", "Abron", "Abron", …
$ family <chr> "Kxʼa", "Kxʼa", "Niger–Congo", "Niger–Congo", "Niger–C…
$ native_speakers <dbl> 5000, 5000, 800, 1393000, 1393000, 20000, 300000, 2500…
$ country <chr> "Namibia", "Botswana", "Cameroon", "Ghana", "Ivory Coa…
Show the code
africa_lang |>
summarize (speakers = sum (native_speakers), .by = 'family' )
# A tibble: 17 × 2
family speakers
<chr> <dbl>
1 Kxʼa 107500
2 Niger–Congo 1214254982
3 Afroasiatic 2266358060
4 Indo-European 101665300
5 Nilo-Saharan 111091000
6 Mande 230000
7 Portuguese 871000
8 Khoe–Kwadi 259500
9 Afro-Asiatic 714300
10 Arabic-based 350000
11 Kongo-based 13000000
12 English 866000
13 Austronesian 18000000
14 French 1173000
15 Ubangian 1230000
16 Language 400
17 Tuu 5000
Show the code
africa_lang |>
count (country) |>
arrange (n)
# A tibble: 51 × 2
country n
<chr> <int>
1 Burundi 1
2 Egypt 1
3 Eswatini 1
4 Liberia 1
5 Madagascar 1
6 Seychelles 1
7 Tunisia 1
8 Cape Verde 2
9 Comoros 2
10 Gambia 2
# ℹ 41 more rows
Show the code
africa_lang |>
filter (country == 'Ethiopia' )
# A tibble: 16 × 4
language family native_speakers country
<chr> <chr> <dbl> <chr>
1 Afar Afroasiatic 2500000 Ethiopia
2 Amharic Afroasiatic 35000000 Ethiopia
3 Bambassi Afroasiatic 2300 Ethiopia
4 Berta Nilo-Saharan 380000 Ethiopia
5 Gumuz Nilo-Saharan 160000 Ethiopia
6 Komo Nilo-Saharan 10000 Ethiopia
7 Kunama Nilo-Saharan 180000 Ethiopia
8 Kwama Nilo-Saharan 15000 Ethiopia
9 Nuer Nilo-Saharan 1700000 Ethiopia
10 Opuo Nilo-Saharan 20000 Ethiopia
11 Oromo Afroasiatic 37071900 Ethiopia
12 Saho Niger–Congo 180000 Ethiopia
13 Shabo Language 400 Ethiopia
14 Somali Afroasiatic 21937940 Ethiopia
15 Tigrinya Afroasiatic 9700000 Ethiopia
16 Tsamai Afroasiatic 18000 Ethiopia
Show the code
lang_count <- africa_lang |> distinct (language) |> nrow ()
One Country Languages
Show the code
one_country_lang <- africa_lang |>
add_count (language, name = 'country_count' ) |>
filter (country_count == 1 ) |>
arrange (desc (native_speakers))
lang_perc <- scales:: percent (nrow (one_country_lang) / lang_count)
Some 69% of the languages from Wikipedia’s table are spoken in a single country.
Quick Plot 1
Show the code
one_country_lang |>
select (language, country) |>
add_count (country, name = 'unique_languages' ) |>
select (- language) |>
distinct () |>
left_join (africa_lang |> count (country, name = 'languages' )) |>
arrange (country) |>
ggplot (aes (x = languages, y = unique_languages)) +
geom_point () +
geom_text_repel (aes (label = country), max.overlaps = 20 ) +
geom_smooth (method = 'loess' )
Adding population data
Pulling in a different Wikipedia table with country population data reveals some unsurprising oddities. Comparing the numbers of native speakers to the country level populations for languages which are only spoken in a single country finds a few languages spoken by more people than the listed population size.
Show the code
pop_data <- rvest:: read_html (
'https://en.wikipedia.org/wiki/List_of_African_countries_by_population'
) |>
rvest:: html_table () |>
purrr:: pluck (1 ) |>
select (country = Country, population = 'Population[1]' ) |>
mutate (population = as.numeric (gsub (',' , '' , population)))
glimpse (pop_data)
Rows: 57
Columns: 2
$ country <chr> "Nigeria", "Ethiopia", "Egypt", "DR Congo", "Tanzania", "So…
$ population <dbl> 227882945, 128691692, 114535772, 105789731, 66617606, 63212…
Show the code
lang_data <- one_country_lang |>
mutate (country = str_replace (country, 'Congo' , 'DR Congo' )) |>
left_join (pop_data) |>
mutate (pop_prop = round (native_speakers / population, 1 ))
lang_data |>
arrange (desc (pop_prop)) |>
select (- family, - country_count) |>
filter (pop_prop >= 1 ) |>
mutate (Population_Prop = scales:: percent (pop_prop)) |>
select (- pop_prop) |>
gt:: gt ()
Cape Verdean Creole
871000
Cape Verde
522331
170%
Comorian
1100000
Comoros
850387
130%
Kinyarwanda
15000000
Rwanda
13954471
110%
Show the code
top_unique_lang <- lang_data |>
select (language, country, pop_prop) |>
slice_max (order_by = pop_prop, by = country, n = 1 , with_ties = FALSE )
Quick Plot 2
Show the code
africa_map <- ne_countries (
continent = "Africa" ,
scale = "medium"
) |>
select (name_en, geometry) |>
mutate (
name_en = case_when (
name_en == "The Gambia" ~ "Gambia" ,
name_en == "Democratic Republic of the Congo" ~ "DR Congo" ,
TRUE ~ name_en
)
)
map_data <-
africa_map |>
left_join (top_unique_lang, by = c ('name_en' = 'country' )) |>
mutate (
language = if_else (is.na (language), '' , language),
pop_prop = if_else (pop_prop >= 1 , 1 , pop_prop)
)
map_labels <-
map_data |>
st_centroid () |>
st_coordinates () |>
as_tibble () |>
bind_cols (
map_data |> st_drop_geometry ()
)
Show the code
af_lang_map <- ggplot () +
geom_sf (
data = map_data,
aes (fill = pop_prop),
color = 'grey70' ,
linewidth = 0.1
) +
scale_fill_viridis_c (option = 'C' , na.value = 'gray50' , alpha = 0.8 ) +
geom_text_repel (
data = map_labels,
aes (x = X, y = Y, label = language),
seed = 42
) +
labs (fill = 'Population \n Proportion' )
af_lang_map
Final Map
Show the code
set_theme (theme_void (paper = '#d4e4f0' , ink = '#333333' ))
title_text <- str_glue ('Most Widely Spoken Languages' )
subtitle_text <- str_glue ('(Which are only spoken in a single country)' )
linkedin = str_glue ("<span style='font-family:fa7-brands'></span>" )
github = str_glue ("<span style='font-family:fa7-brands'></span>" )
bluesky = str_glue ("<span style='font-family:fa7-brands'></span>" )
socials <- str_glue (
"{linkedin} sethkasowitz • {bluesky} skasowitz • {github} skasowitz"
)
tidytuesday_desc <- str_glue (
"#TidyTuesday: 2026 Week 2 \n Source: Wikipedia 'Languages of Africa'<br>"
)
full_caption <- str_glue ("{tidytuesday_desc} {socials}" )
Show the code
af_lang_map +
labs (
title = title_text,
subtitle = subtitle_text,
caption = full_caption,
x = '' ,
y = ''
) +
theme (
plot.title = element_markdown (
size = rel (1.4 ),
face = "bold" ,
family = 'syne'
),
plot.subtitle = element_markdown (
family = 'syne'
),
plot.caption = element_markdown (
family = 'syne' ,
size = rel (0.8 ),
hjust = 0
)
)
Citation BibTeX citation:
@online{kasowitz2026,
author = {Kasowitz, Seth},
title = {TidyTuesday 2026 - {Week} 2},
date = {2026-01-13},
url = {https://sethkasowitz.com/posts/2026-01-13_TidyTuesday-wk2/},
langid = {en}
}
For attribution, please cite this work as:
Kasowitz, Seth. 2026.
“TidyTuesday 2026 - Week 2.” January
13, 2026.
https://sethkasowitz.com/posts/2026-01-13_TidyTuesday-wk2/ .