TidyTuesday 2026 - Week 2

Visualisation of TidyTuesday data set: Languages of Africa
TidyTuesday
Data visualization
R
ggplot2
Geospatial
Author

Seth Kasowitz

Published

January 13, 2026

1 Setup

Show the code
library(tidyverse)
library(ggtext)
library(showtext)
library(ggrepel)
library(rnaturalearth)
library(sf)


font_add(
  'fa7-brands',
  here::here('fonts/Font Awesome 7 Brands-Regular-400.otf')
)
font_add_google('Syne Mono', 'syne')
showtext_auto(enable = TRUE)

2 Load the data

Show the code
tuesdata <- tidytuesdayR::tt_load(2026, week = 2)
africa_lang <- tuesdata$africa |> distinct()

3 Explore

Show the code
africa_lang |> glimpse()
Rows: 762
Columns: 4
$ language        <chr> "ǂKxʼaoǁʼae", "ǂKxʼaoǁʼae", "Abon", "Abron", "Abron", …
$ family          <chr> "Kxʼa", "Kxʼa", "Niger–Congo", "Niger–Congo", "Niger–C…
$ native_speakers <dbl> 5000, 5000, 800, 1393000, 1393000, 20000, 300000, 2500…
$ country         <chr> "Namibia", "Botswana", "Cameroon", "Ghana", "Ivory Coa…
Show the code
africa_lang |>
  summarize(speakers = sum(native_speakers), .by = 'family')
# A tibble: 17 × 2
   family          speakers
   <chr>              <dbl>
 1 Kxʼa              107500
 2 Niger–Congo   1214254982
 3 Afroasiatic   2266358060
 4 Indo-European  101665300
 5 Nilo-Saharan   111091000
 6 Mande             230000
 7 Portuguese        871000
 8 Khoe–Kwadi        259500
 9 Afro-Asiatic      714300
10 Arabic-based      350000
11 Kongo-based     13000000
12 English           866000
13 Austronesian    18000000
14 French           1173000
15 Ubangian         1230000
16 Language             400
17 Tuu                 5000
Show the code
africa_lang |>
  count(country) |>
  arrange(n)
# A tibble: 51 × 2
   country        n
   <chr>      <int>
 1 Burundi        1
 2 Egypt          1
 3 Eswatini       1
 4 Liberia        1
 5 Madagascar     1
 6 Seychelles     1
 7 Tunisia        1
 8 Cape Verde     2
 9 Comoros        2
10 Gambia         2
# ℹ 41 more rows
Show the code
africa_lang |>
  filter(country == 'Ethiopia')
# A tibble: 16 × 4
   language family       native_speakers country 
   <chr>    <chr>                  <dbl> <chr>   
 1 Afar     Afroasiatic          2500000 Ethiopia
 2 Amharic  Afroasiatic         35000000 Ethiopia
 3 Bambassi Afroasiatic             2300 Ethiopia
 4 Berta    Nilo-Saharan          380000 Ethiopia
 5 Gumuz    Nilo-Saharan          160000 Ethiopia
 6 Komo     Nilo-Saharan           10000 Ethiopia
 7 Kunama   Nilo-Saharan          180000 Ethiopia
 8 Kwama    Nilo-Saharan           15000 Ethiopia
 9 Nuer     Nilo-Saharan         1700000 Ethiopia
10 Opuo     Nilo-Saharan           20000 Ethiopia
11 Oromo    Afroasiatic         37071900 Ethiopia
12 Saho     Niger–Congo           180000 Ethiopia
13 Shabo    Language                 400 Ethiopia
14 Somali   Afroasiatic         21937940 Ethiopia
15 Tigrinya Afroasiatic          9700000 Ethiopia
16 Tsamai   Afroasiatic            18000 Ethiopia
Show the code
lang_count <- africa_lang |> distinct(language) |> nrow()

3.1 One Country Languages

Show the code
one_country_lang <- africa_lang |>
  add_count(language, name = 'country_count') |>
  filter(country_count == 1) |>
  arrange(desc(native_speakers))

lang_perc <- scales::percent(nrow(one_country_lang) / lang_count)

Some 69% of the languages from Wikipedia’s table are spoken in a single country.

country languages
Cameroon 77
Congo 56
Nigeria 48
Sudan 29
Chad 14
Figure 1: Five countries with the most languages spoken no where else

4 Quick Plot 1

Show the code
one_country_lang |>
  select(language, country) |>
  add_count(country, name = 'unique_languages') |>
  select(-language) |>
  distinct() |>
  left_join(africa_lang |> count(country, name = 'languages')) |>
  arrange(country) |>
  ggplot(aes(x = languages, y = unique_languages)) +
  geom_point() +
  geom_text_repel(aes(label = country), max.overlaps = 20) +
  geom_smooth(method = 'loess')
Figure 2: Counts of languages by country in Africa compared with number of languages spoken exclusively in that country

5 Adding population data

Pulling in a different Wikipedia table with country population data reveals some unsurprising oddities. Comparing the numbers of native speakers to the country level populations for languages which are only spoken in a single country finds a few languages spoken by more people than the listed population size.

Show the code
pop_data <- rvest::read_html(
  'https://en.wikipedia.org/wiki/List_of_African_countries_by_population'
) |>
  rvest::html_table() |>
  purrr::pluck(1) |>
  select(country = Country, population = 'Population[1]') |>
  mutate(population = as.numeric(gsub(',', '', population)))

glimpse(pop_data)
Rows: 57
Columns: 2
$ country    <chr> "Nigeria", "Ethiopia", "Egypt", "DR Congo", "Tanzania", "So…
$ population <dbl> 227882945, 128691692, 114535772, 105789731, 66617606, 63212…
Show the code
lang_data <- one_country_lang |>
  mutate(country = str_replace(country, 'Congo', 'DR Congo')) |>
  left_join(pop_data) |>
  mutate(pop_prop = round(native_speakers / population, 1))

lang_data |>
  arrange(desc(pop_prop)) |>
  select(-family, -country_count) |>
  filter(pop_prop >= 1) |>
  mutate(Population_Prop = scales::percent(pop_prop)) |>
  select(-pop_prop) |>
  gt::gt()
language native_speakers country population Population_Prop
Cape Verdean Creole 871000 Cape Verde 522331 170%
Comorian 1100000 Comoros 850387 130%
Kinyarwanda 15000000 Rwanda 13954471 110%
Show the code
top_unique_lang <- lang_data |>
  select(language, country, pop_prop) |>
  slice_max(order_by = pop_prop, by = country, n = 1, with_ties = FALSE)

6 Quick Plot 2

Show the code
africa_map <- ne_countries(
  continent = "Africa",
  scale = "medium"
) |>
  select(name_en, geometry) |>
  mutate(
    name_en = case_when(
      name_en == "The Gambia" ~ "Gambia",
      name_en == "Democratic Republic of the Congo" ~ "DR Congo",
      TRUE ~ name_en
    )
  )

map_data <-
  africa_map |>
  left_join(top_unique_lang, by = c('name_en' = 'country')) |>
  mutate(
    language = if_else(is.na(language), '', language),
    pop_prop = if_else(pop_prop >= 1, 1, pop_prop)
  )

map_labels <-
  map_data |>
  st_centroid() |>
  st_coordinates() |>
  as_tibble() |>
  bind_cols(
    map_data |> st_drop_geometry()
  )
Show the code
af_lang_map <- ggplot() +
  geom_sf(
    data = map_data,
    aes(fill = pop_prop),
    color = 'grey70',
    linewidth = 0.1
  ) +
  scale_fill_viridis_c(option = 'C', na.value = 'gray50', alpha = 0.8) +
  geom_text_repel(
    data = map_labels,
    aes(x = X, y = Y, label = language),
    seed = 42
  ) +
  labs(fill = 'Population\nProportion')

af_lang_map
Figure 3

7 Final Map

Show the code
set_theme(theme_void(paper = '#d4e4f0', ink = '#333333'))
title_text <- str_glue('Most Widely Spoken Languages')
subtitle_text <- str_glue('(Which are only spoken in a single country)')

linkedin = str_glue("<span style='font-family:fa7-brands'>&#xf08c;</span>")
github = str_glue("<span style='font-family:fa7-brands'>&#xf09b;</span>")
bluesky = str_glue("<span style='font-family:fa7-brands'>&#xe671;</span>")

socials <- str_glue(
  "{linkedin} sethkasowitz &bull; {bluesky} skasowitz &bull; {github} skasowitz"
)

tidytuesday_desc <- str_glue(
  "#TidyTuesday: 2026 Week 2\nSource: Wikipedia 'Languages of Africa'<br>"
)

full_caption <- str_glue("{tidytuesday_desc} {socials}")
Show the code
af_lang_map +
  labs(
    title = title_text,
    subtitle = subtitle_text,
    caption = full_caption,
    x = '',
    y = ''
  ) +
  theme(
    plot.title = element_markdown(
      size = rel(1.4),
      face = "bold",
      family = 'syne'
    ),
    plot.subtitle = element_markdown(
      family = 'syne'
    ),
    plot.caption = element_markdown(
      family = 'syne',
      size = rel(0.8),
      hjust = 0
    )
  )
Figure 4

Citation

BibTeX citation:
@online{kasowitz2026,
  author = {Kasowitz, Seth},
  title = {TidyTuesday 2026 - {Week} 2},
  date = {2026-01-13},
  url = {https://sethkasowitz.com/posts/2026-01-13_TidyTuesday-wk2/},
  langid = {en}
}
For attribution, please cite this work as:
Kasowitz, Seth. 2026. “TidyTuesday 2026 - Week 2.” January 13, 2026. https://sethkasowitz.com/posts/2026-01-13_TidyTuesday-wk2/.