Metropolitan areas - CITIES

Data - OECD

Info

LAST_DOWNLOAD

Code
tibble(LAST_DOWNLOAD = as.Date(file.info("~/Library/Mobile\ Documents/com~apple~CloudDocs/website/data/oecd/CITIES.RData")$mtime)) %>%
  print_table_conditional()
LAST_DOWNLOAD
2022-09-30

LAST_COMPILE

LAST_COMPILE
2024-09-15

Last

obsTime Nobs
2022 5681

Data Structure

Code
CITIES_var %>%
  pluck("VAR_DESC") %>%
  {if (is_html_output()) print_table(.) else .}
id description
METRO_ID Metropolitan areas
VAR Variables
TIME Year
OBS_VALUE Observation Value
TIME_FORMAT Time Format
OBS_STATUS Observation Status
UNIT Unit
POWERCODE Unit multiplier
REFERENCEPERIOD Reference period

METRO_ID

Code
CITIES_var %>%
  pluck("METRO_ID") %>%
  rename(METRO_ID = id) %>%
  right_join(CITIES %>%
               group_by(METRO_ID) %>%
               summarise(Nobs = n()), 
             by = "METRO_ID") %>%
  arrange(-Nobs) %>%
  {if (is_html_output()) datatable(., filter = 'top', rownames = F) else .}

VAR

Code
CITIES_var %>%
  pluck("VAR") %>%
  rename(VAR = id) %>%
  right_join(CITIES %>%
               group_by(VAR) %>%
               summarise(Nobs = n()), 
             by = "VAR") %>%
  arrange(-Nobs) %>%
  {if (is_html_output()) datatable(., filter = 'top', rownames = F) else .}

World

List

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015") %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  {if (is_html_output()) datatable(., filter = 'top', rownames = F) else .}

> 10000000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015") %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 8000000) %>%
  ggplot(aes(x = obsValue, y = rank)) + geom_point() + theme_minimal() +
  geom_text_repel(aes(label = `METRO_ID desc`), hjust = 0, vjust = 0) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma,
                limits = c(8000000, 40000000)) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

> 5000000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015") %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 5000000) %>%
  ggplot(aes(x = obsValue, y = rank)) + geom_point() + theme_minimal() +
  geom_text_repel(aes(label = `METRO_ID desc`), hjust = 0, vjust = 0) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma,
                limits = c(4000000, 40000000)) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

> 1000000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015") %>%
  left_join(CITIES_var$METRO_ID %>%
              rename(METRO_ID = id),
            by = "METRO_ID") %>%
  select(METRO_ID, label, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 1000000) %>%
  ggplot(aes(x = obsValue, y = rank)) + geom_point() + theme_minimal() +
  geom_text(aes(label = label), hjust = 0, vjust = 0) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

> 500000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015") %>%
  left_join(CITIES_var$METRO_ID %>%
              rename(METRO_ID = id),
            by = "METRO_ID") %>%
  select(METRO_ID, label, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 500000) %>%
  ggplot(aes(x = obsValue, y = rank)) + geom_point() + theme_minimal() +
  geom_text(aes(label = label), hjust = 0, vjust = 0) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  ylab("Rank (City Size)") + xlab("City Size (Population)") +
  stat_smooth(aes(x = obsValue, y = rank), linetype = 2, 
              method = "lm", color = viridis(3)[2], 
              data = . %>% 
                filter(obsValue <= 10000000))

Japan

List

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("JPN", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  {if (is_html_output()) datatable(., filter = 'top', rownames = F) else .}

> 500000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("JPN", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>%
              rename(METRO_ID = id),
            by = "METRO_ID") %>%
  select(METRO_ID, label, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 500000) %>%
  ggplot(aes(x = obsValue, y = rank)) + geom_point() + theme_minimal() +
  geom_text(aes(label = label), hjust = 0, vjust = 0) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

> 500000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("JPN", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>%
              rename(METRO_ID = id),
            by = "METRO_ID") %>%
  select(METRO_ID, label, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 50000) %>%
  ggplot(aes(x = obsValue, y = rank)) + geom_point() + theme_minimal() +
  geom_text(aes(label = label), hjust = 0, vjust = 0) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  ylab("Rank (City Size)") + xlab("City Size (Population)") +
  stat_smooth(aes(x = obsValue, y = rank), linetype = 2, 
              method = "lm", color = viridis(3)[2], 
              data = . %>% 
                filter(obsValue <= 10000000))

Code
CITIES_JPN_2015 <- CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("JPN", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>%
              rename(METRO_ID = id),
            by = "METRO_ID") %>%
  select(METRO_ID, label, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n())
Code
CITIES_JPN_2015_1 <- CITIES_JPN_2015 %>%
  filter(obsValue >= 200000) %>%
  lm(log(rank) ~ log(obsValue), data = .)

CITIES_JPN_2015_2 <- CITIES_JPN_2015 %>%
  filter(obsValue >= 500000) %>%
  lm(log(rank) ~ log(obsValue), data = .)

CITIES_JPN_2015_3 <- CITIES_JPN_2015 %>%
  filter(obsValue >= 1000000) %>%
  lm(log(rank) ~ log(obsValue), data = .)

CITIES_JPN_2015_4 <- CITIES_JPN_2015 %>%
  filter(obsValue >= 2000000) %>%
  lm(log(rank) ~ log(obsValue), data = .)

> 500000 - repel

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("JPN", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>%
              rename(METRO_ID = id),
            by = "METRO_ID") %>%
  select(METRO_ID, label, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 500000) %>%
  ggplot(aes(x = obsValue, y = rank)) + geom_point() + theme_minimal() +
  geom_text_repel(aes(label = label)) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

United States

List

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("US", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  {if (is_html_output()) datatable(., filter = 'top', rownames = F) else .}

> 2000000 - repel

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("US", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 2000000) %>%
  ggplot() + geom_point() + theme_minimal() +
  aes(x = obsValue, y = rank) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  geom_text_repel(aes(label = `METRO_ID desc`)) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

> 1000000 - repel

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("US", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 1000000) %>%
  ggplot() + geom_point() + theme_minimal() +
  aes(x = obsValue, y = rank) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  geom_text_repel(aes(label = `METRO_ID desc`)) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

> 500000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("US", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 500000) %>%
  ggplot() + geom_point() + theme_minimal() +
  aes(x = obsValue, y = rank) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  geom_text_repel(aes(label = `METRO_ID desc`), hjust = 0, vjust = 0) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

France

List

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("FR", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  {if (is_html_output()) datatable(., filter = 'top', rownames = F) else .}

> 500000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("FR", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 500000) %>%
  ggplot() + geom_point() + theme_minimal() +
  aes(x = obsValue, y = rank) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  geom_text_repel(aes(label = `METRO_ID desc`), hjust = 0, vjust = 0) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

> 100000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("FR", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 100000) %>%
  ggplot() + geom_point() + theme_minimal() +
  aes(x = obsValue, y = rank) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  geom_text_repel(aes(label = `METRO_ID desc`), hjust = 0, vjust = 0) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

Italy

List

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("IT", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  {if (is_html_output()) datatable(., filter = 'top', rownames = F) else .}

> 500000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("IT", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 500000) %>%
  ggplot() + geom_point() + theme_minimal() +
  aes(x = obsValue, y = rank) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  geom_text_repel(aes(label = `METRO_ID desc`), hjust = 0, vjust = 0) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

> 100000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("IT", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 100000) %>%
  ggplot() + geom_point() + theme_minimal() +
  aes(x = obsValue, y = rank) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  geom_text_repel(aes(label = `METRO_ID desc`), hjust = 0, vjust = 0) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

Germany

List

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("DE", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  {if (is_html_output()) datatable(., filter = 'top', rownames = F) else .}

> 500000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("DE", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 500000) %>%
  ggplot() + geom_point() + theme_minimal() +
  aes(x = obsValue, y = rank) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  geom_text_repel(aes(label = `METRO_ID desc`), hjust = 0, vjust = 0) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")

> 100000

Code
CITIES %>%
  filter(VAR == "T_T",
         obsTime == "2015",
         grepl("DE", METRO_ID)) %>%
  left_join(CITIES_var$METRO_ID %>% 
              setNames(c("METRO_ID", "METRO_ID desc")),
            by = "METRO_ID") %>%
  select(METRO_ID, `METRO_ID desc`, obsValue) %>%
  arrange(-obsValue) %>%
  mutate(rank = 1:n()) %>%
  filter(obsValue >= 100000) %>%
  ggplot() + geom_point() + theme_minimal() +
  aes(x = obsValue, y = rank) +
  scale_y_log10(breaks = 2^(seq(0, 10, 1))) +
  scale_x_log10(breaks = 500000*2^seq(0, 10, 1),
                labels = comma) +
  geom_text_repel(aes(label = `METRO_ID desc`), hjust = 0, vjust = 0) +
  ylab("Rank (City Size)") + xlab("City Size (Population)")