Logo

How Bangalore Uses The Metro

October 1, 2025

Show the code
# Load libraries
library(tidyverse)
library(sf)
library(gganimate)
library(viridis)
library(scales)
library(ggraph)
library(igraph)
library(tidyr)
library(knitr)
library(ggnewscale)
library(kableExtra)
library(lubridate)
library(jsonlite)
library(gt)
library(wesanderson)
library(tidylo)
library(tidygraph)
library(broom)
library(terra)
library(exactextractr)

# Global options
options(kable_styling_bootstrap_options = c("striped", "hover", "condensed"))
knitr::opts_chunk$set(
  fig.width = 12,
  fig.height = 8,
  fig.align = "center",
  out.width = "100%",
  warning = FALSE,
  message = FALSE
)

# Colors and Themes
wes_colors <- wes_palette("Darjeeling1", 8, type = "continuous")
primary_color <- "#3182bd"
secondary_color <- "#de77ae"
tertiary_color <- "#2c7fb8"

theme_metro <- function() {
  theme_minimal(base_family = "sans") +
    theme(
      plot.title = element_text(face = "bold", size = 20, margin = margin(b = 8)),
      plot.subtitle = element_text(size = 15, color = "gray40", margin = margin(b = 15)),
      panel.grid.major.y = element_blank(),
      panel.grid.minor = element_blank(),
      axis.text = element_text(size = 14, color = "gray40"),
      strip.text = element_text(size = 13, face = "bold", color = "gray20"),
      plot.margin = margin(0, 0, 0, 0),
      legend.position = "none"
    )
}

# Helper function to add temporal features (DRY)
add_temporal_features <- function(df) {
  df %>%
    mutate(
      wday = wday(Date, label = TRUE, week_start = 1),
      day_type = if_else(wday %in% c("Sat", "Sun"), "Weekend", "Weekday"),
      peak_category = case_when(
        day_type == "Weekday" & Hour %in% c(8, 9, 10) ~ "Morning Peak",
        day_type == "Weekday" & Hour %in% c(17, 18, 19) ~ "Evening Peak",
        TRUE ~ "Off-Peak"
      )
    )
}

# Load and preprocess data
hourly <- read_delim("data/station-hourly.csv", delim = ";") %>%
  add_temporal_features()

station_pair_hourly <- read_delim("data/stationpair-hourly.csv", delim = ";") %>%
  rename(To_Station = `Destination Station`, From_Station = `Origin Station`) %>%
  mutate(Date = as.Date(Date)) %>%
  add_temporal_features()

metro_geojson <- read_sf("data/metro.geojson")

1 Introduction

This analysis is based on ridership data obtained through a Right to Information (RTI) request to BMRCL. The RTI request sought basic ridership information between stations. However, BMRCL’s response provided station-pairwise hourly ridership between every station pair over 19 days! A treat.

The dataset contains over 1.2 million rows of ridership data, covering a particularly interesting period that includes the opening of the new Yellow Line.

1.1 Questions

  1. How popular is your specific daily commute compared to other routes?
  2. Which stations experience the heaviest traffic during morning and evening rush hours?
  3. How does ridership vary by day of the week, time of day?
  4. What are the most popular travel corridors within the metro network?
  5. Do certain stations show dramatically different usage patterns between weekdays and weekends?
  6. How has the Yellow Line impacted ridership on existing stations in the metro network?
  7. What is the length of an average commute, and how does it vary across stations?
  8. Have their been particular instances of unusual spikes in traffic due to events in the city?

1.2 Dataset Overview

Our analysis uses multiple data sources:

  • Station-level hourly ridership: Passenger counts by station and hour
  • Station-pair hourly data: Origin-destination flows with temporal granularity
  • Spatial data from OpenStreetMap for mapping locations and additional metadata

Let’s begin by examining the structure of our datasets:

Show the code
set.seed(42) # For reproducible sampling
sample_ridership_data <- station_pair_hourly %>%
  filter(Ridership > 0) %>%
  sample_n(15) %>%
  arrange(desc(Ridership)) %>%
  mutate(Date = as.character(Date))

head(hourly) %>%
  kable(caption = "Sample of Hourly Station Ridership Data") %>%
  kable_styling()
Sample of Hourly Station Ridership Data
Date Hour Station Ridership wday day_type peak_category
2025-08-01 0 Attiguppe 0 Fri Weekday Off-Peak
2025-08-01 1 Attiguppe 0 Fri Weekday Off-Peak
2025-08-01 2 Attiguppe 0 Fri Weekday Off-Peak
2025-08-01 3 Attiguppe 0 Fri Weekday Off-Peak
2025-08-01 4 Attiguppe 6 Fri Weekday Off-Peak
2025-08-01 5 Attiguppe 65 Fri Weekday Off-Peak
Show the code
head(station_pair_hourly) %>%
  kable(caption = "Sample of Hourly Station-Pair Ridership Data") %>%
  kable_styling()
Sample of Hourly Station-Pair Ridership Data
Date Hour From_Station To_Station Ridership wday day_type peak_category
2025-08-01 4 Magadi Road Krantivira Sangolli Rayanna Railway Station 1 Fri Weekday Off-Peak
2025-08-01 5 Attiguppe Attiguppe 2 Fri Weekday Off-Peak
2025-08-01 5 Attiguppe Deepanjali Nagar 1 Fri Weekday Off-Peak
2025-08-01 5 Attiguppe Dr. B. R. Ambedkar Station, Vidhana Soudha 2 Fri Weekday Off-Peak
2025-08-01 5 Attiguppe Indiranagar 1 Fri Weekday Off-Peak
2025-08-01 5 Attiguppe Krantivira Sangolli Rayanna Railway Station 21 Fri Weekday Off-Peak

2 Data Preparation

We need to classify each station by its metro line. Namma Metro currently operates three main lines:

  • Purple Line: The east-west corridor
  • Green Line: The north-south corridor
  • Yellow Line: The newest line serving south-eastern regions in the periphery
Show the code
# Extract station data from GeoJSON as the single source of truth
metro_stations <- metro_geojson %>%
  filter(st_geometry_type(.) == "POINT") %>%
  st_drop_geometry() %>%
  select(Station = name, code, line = colour) %>%
  mutate(
    code = if_else(is.na(code),
                   paste0(str_sub(str_to_upper(str_replace_all(Station, "[^A-Za-z]", "")), 1, 4),
                          sprintf("%02d", row_number())),
                   code)
  ) %>%
  arrange(line, Station)

# Create a mapping from long/inconsistent names in CSV to short names in GeoJSON
manual_station_mapping <- tribble(
  ~csv_name, ~geojson_name,
  "Biocon Hebbagodi", "Hebbagodi",
  "Central Silk Board", "Silk Board",
  "Delta Electronics Bommasandra", "Bommasandra",
  "Dr. B. R. Ambedkar Station, Vidhana Soudha", "Vidhana Soudha",
  "Infosys Foundation Konappana Agrahara", "Konappana Agrahara",
  "Jaya Prakash Nagar", "JP Nagar",
  "Krantivira Sangolli Rayanna Railway Station", "KSR Railway Station",
  "Krishna Rajendra Market", "KR Market",
  "Krishnarajapura", "KR Pura",
  "Mahakavi Kuvempu Road", "MKK Road",
  "Mahatma Gandhi Road", "MG Road",
  "Mantri Square Sampige Road", "Mantri Square",
  "Nadaprabhu Kempegowda Station, Majestic", "Majestic",
  "Pantharapalya - Nayandahalli", "Nayandahalli",
  "Rajarajeshwari Nagar", "RR Nagar",
  "Rashtreeya Vidyalaya Road", "RV Road",
  "Sir M. Visvesvaraya Stn., Central College", "Central College",
  "Sri Balagangadharanatha Swamiji Station, Hosahalli", "Hosahalli",
  "Vijayanagar", "Vijayanagara"
)

all_csv_names <- unique(c(hourly$Station, station_pair_hourly$From_Station, station_pair_hourly$To_Station))

identity_mapping <- tibble(csv_name = setdiff(all_csv_names, manual_station_mapping$csv_name)) %>%
  mutate(geojson_name = csv_name)

station_name_mapping <- bind_rows(manual_station_mapping, identity_mapping)

# Helper functions for name conversion
convert_to_short_names <- function(data) {
  if (is.data.frame(data)) {
    station_cols <- intersect(names(data), c("Station", "From_Station", "To_Station"))
    for (col in station_cols) {
      data[[col]] <- station_name_mapping$geojson_name[match(data[[col]], station_name_mapping$csv_name)]
    }
  } else if (is.list(data)) {
    data <- map(data, convert_to_short_names)
  }
  return(data)
}
get_short_name <- function(station_name) {
  short_name <- station_name_mapping$geojson_name[match(station_name, station_name_mapping$csv_name)]
  return(ifelse(is.na(short_name), station_name, short_name))
}

# Create final map for joining and add Line info to hourly data
station_map <- station_name_mapping %>%
  left_join(metro_stations, by = c("geojson_name" = "Station")) %>%
  select(Station = csv_name, Line = line, code) %>%
  mutate(Line = str_to_title(Line)) %>%
  filter(!is.na(Line))

hourly <- hourly %>%
  left_join(station_map, by = "Station")

# Export data for web components
station_codes <- metro_stations %>% select(Station, code, line)
write_json(station_codes, "../src/lib/data/charts/station-codes.json", pretty = TRUE, auto_unbox = TRUE)

station_lines_list <- metro_stations %>%
  group_by(line) %>%
  summarise(stations = list(Station), .groups = 'drop') %>%
  deframe()
write_json(list(lines = station_lines_list), "../src/lib/data/charts/01-line-classification.json", pretty = TRUE, auto_unbox = TRUE)

ridership_sample_export <- list(
  sample_data = convert_to_short_names(sample_ridership_data) %>%
    select(From_Station, To_Station, Ridership, Hour, wday, Date)
)
write_json(ridership_sample_export, "../src/lib/data/charts/ridership-sample.json", pretty = TRUE, auto_unbox = TRUE)

# Display station counts per line
station_map %>%
  count(Line, name = "Stations") %>%
  mutate(Line = if_else(Line == "Black", "Interchange", Line)) %>%
  kable(caption = "Number of stations per metro line") %>%
  kable_styling()
Number of stations per metro line
Line Stations
Interchange 2
Green 30
Purple 36
Yellow 15

3 Exploratory Data Analysis

3.1 Days and Hours

Understanding when people use the metro most heavily.

Show the code
# Busiest days by average ridership
busiest_days <- hourly %>%
  group_by(Date) %>%
  summarise(daily_total = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  mutate(day = wday(Date, label = TRUE)) %>%
  group_by(day) %>%
  summarise(average_daily_ridership = mean(daily_total), .groups = 'drop') %>%
  arrange(desc(average_daily_ridership))

# Busiest hours by average ridership
busiest_hours <- hourly %>%
  group_by(Hour) %>%
  summarise(average_hourly_ridership = mean(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  arrange(desc(average_hourly_ridership))

# Most popular routes by total trips
popular_routes <- station_pair_hourly %>%
  group_by(From_Station, To_Station) %>%
  summarise(total_trips = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  arrange(desc(total_trips)) %>%
  head(15)

# Export for web component
temporal_patterns <- list(
  busiest_days = busiest_days,
  busiest_hours = head(busiest_hours, 10),
  popular_routes = convert_to_short_names(popular_routes)
)
write_json(temporal_patterns, "../src/lib/data/charts/02-temporal-patterns.json", pretty = TRUE, auto_unbox = TRUE)

# Display tables
busiest_days %>%
  mutate(average_daily_ridership = comma(round(average_daily_ridership))) %>%
  kable(col.names = c("Day", "Average Daily Ridership")) %>%
  kable_styling()
Day Average Daily Ridership
Thu 778,964
Wed 766,369
Mon 763,243
Tue 733,002
Fri 628,215
Sat 620,690
Sun 507,833
Show the code
busiest_hours %>%
  head(10) %>%
  mutate(
    Time = paste0(Hour, ":00"),
    average_hourly_ridership = comma(round(average_hourly_ridership))
  ) %>%
  select(Time, average_hourly_ridership) %>%
  kable(col.names = c("Hour", "Average Hourly Ridership")) %>%
  kable_styling()
Hour Average Hourly Ridership
18:00 851
9:00 783
17:00 751
19:00 720
8:00 644
16:00 618
10:00 596
15:00 504
20:00 489
13:00 482
Show the code
convert_to_short_names(popular_routes) %>%
  mutate(
    Route = paste(From_Station, "→", To_Station),
    total_trips = comma(total_trips)
  ) %>%
  select(Route, total_trips) %>%
  kable(col.names = c("Route", "Cumulative Passengers")) %>%
  kable_styling()
Route Cumulative Passengers
Benniganahalli → Majestic 44,477
Kadugodi Tree Park → Sri Sathya Sai Hospital 34,924
Sri Sathya Sai Hospital → Kadugodi Tree Park 34,736
Benniganahalli → Indiranagar 28,838
Yeshwantpur → Majestic 27,539
Banashankari → Majestic 27,130
Indiranagar → Benniganahalli 27,008
Chickpete → Majestic 26,947
MG Road → Indiranagar 24,365
Majestic → Benniganahalli 23,921
MG Road → Benniganahalli 23,641
Benniganahalli → MG Road 23,408
KR Pura → Majestic 23,402
Konanakunte Cross → Majestic 23,108
Majestic → Yeshwantpur 22,513

Weekdays are busier than weekends by a significant amount. The hourly breakdown shows distinct morning (8-10 AM) and evening (5-7 PM) peaks. Routes involving Benniganahalli and Majestic appear frequently among the most popular routes.


6 Peak Hour Stations

Finding out which stations experience the heaviest arrival load during peak hours.

Show the code
busiest_peak_stations <- hourly %>%
  filter(peak_category %in% c("Morning Peak", "Evening Peak")) %>%
  group_by(peak_category, Station, Line) %>%
  summarise(avg_ridership = mean(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  group_by(peak_category) %>%
  slice_max(order_by = avg_ridership, n = 15) %>%
  ungroup() %>%
  arrange(peak_category, desc(avg_ridership))

# Export for web component
peak_hour_export <- list(
  peak_stations = convert_to_short_names(busiest_peak_stations),
  peak_summary = busiest_peak_stations %>% group_by(peak_category)
)
write_json(peak_hour_export, "../src/lib/data/charts/05-peak-hours.json", pretty = TRUE, auto_unbox = TRUE)

# Display table
convert_to_short_names(busiest_peak_stations) %>%
  mutate(avg_ridership = round(avg_ridership, 1)) %>%
  gt() %>%
  tab_header(
    title = "Busiest Stations During Peak Hours",
    subtitle = "Top 15 stations for each peak period (Morning: 8-10 AM, Evening: 5-7 PM)"
  ) %>%
  fmt_number(columns = avg_ridership, decimals = 1) %>%
  tab_style(
    style = list(cell_fill(color = primary_color, alpha = 0.3), cell_text(weight = "bold")),
    locations = cells_column_labels()
  ) %>%
  cols_align(align = "center", columns = avg_ridership) %>%
  tab_options(
    table.font.names = "Atkinson Hyperlegible",
    heading.title.font.size = 16,
    heading.subtitle.font.size = 12,
    table.font.size = 11
  )
Busiest Stations During Peak Hours
Top 15 stations for each peak period (Morning: 8-10 AM, Evening: 5-7 PM)
peak_category Station Line avg_ridership
Evening Peak MG Road Purple 3,050.4
Evening Peak Indiranagar Purple 2,876.4
Evening Peak Trinity Purple 2,664.6
Evening Peak Majestic Black 2,473.4
Evening Peak Cubbon Park Purple 2,439.4
Evening Peak Vidhana Soudha Purple 2,194.9
Evening Peak Pattandur Agrahara Purple 1,992.3
Evening Peak Central College Purple 1,769.5
Evening Peak Benniganahalli Purple 1,749.9
Evening Peak Sri Sathya Sai Hospital Purple 1,694.5
Evening Peak Mantri Square Green 1,677.1
Evening Peak Chickpete Green 1,501.8
Evening Peak Jayanagar Green 1,447.7
Evening Peak KR Pura Purple 1,441.0
Evening Peak Seetharampalya Purple 1,332.8
Morning Peak Benniganahalli Purple 3,205.9
Morning Peak Majestic Black 2,175.9
Morning Peak KR Pura Purple 2,090.6
Morning Peak Baiyappanahalli Purple 2,062.0
Morning Peak Vijayanagara Purple 1,880.5
Morning Peak Kadugodi Tree Park Purple 1,850.9
Morning Peak Hosahalli Purple 1,670.8
Morning Peak Dasarahalli Green 1,545.7
Morning Peak Indiranagar Purple 1,529.2
Morning Peak Attiguppe Purple 1,440.9
Morning Peak Yelachenahalli Green 1,386.9
Morning Peak Whitefield (Kadugodi) Purple 1,364.8
Morning Peak Nagasandra Green 1,356.4
Morning Peak KSR Railway Station Purple 1,335.2
Morning Peak Konanakunte Cross Green 1,289.5

7 Station Activity

Show the code
# Get the average ridership when station is origin station
origin_avg <- station_pair_hourly %>%
  group_by(From_Station, day_type, Hour, Date) %>%
  summarise(total_ridership = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  select(Station = From_Station, day_type, total_ridership, Hour, Date)

# Get the average ridership when station is destination station
destination_avg <- station_pair_hourly %>%
  group_by(To_Station, day_type, Hour, Date) %>%
  summarise(total_ridership = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  select(Station = To_Station, day_type, total_ridership, Hour, Date)

# Get total ridership at each station at each hour by summing the origin and destination ridership
total_ridership <- bind_rows(origin_avg, destination_avg) %>%
  group_by(Station, day_type, Hour, Date) %>%
  summarise(total_ridership = sum(total_ridership, na.rm = TRUE), .groups = 'drop') %>%
  select(Station, day_type, total_ridership, Hour, Date)

# Get median at each hour across all dates
median_ridership <- total_ridership %>%
  group_by(Station, day_type, Hour) %>%
  summarise(median_ridership = median(total_ridership, na.rm = TRUE), .groups = 'drop') %>%
  select(Station, day_type, median_ridership, Hour)

# Get total ridership throughout each day by summing over all median ridership
daily_ridership <- median_ridership %>%
  group_by(Station, day_type) %>%
  summarise(total_ridership = sum(median_ridership, na.rm = TRUE), .groups = 'drop') %>%
  select(Station, day_type, total_ridership)

# Get the ratio of median ridership at each hour to the total ridership throughout each day
hourly_heatmap_data <- median_ridership %>%
  left_join(daily_ridership, by = c("Station", "day_type")) %>%
  mutate(hourly_ratio = median_ridership / total_ridership) %>%
  select(Station, day_type, hourly_ratio, Hour, median_ridership)

# Fill missing hours with 0
hourly_heatmap_data <- hourly_heatmap_data %>%
  complete(Hour = full_seq(c(0, 23), 1), Station, day_type) %>%
  replace_na(list(hourly_ratio = 0, median_ridership = 0))

hourly_heatmap_data <- convert_to_short_names(hourly_heatmap_data)

# Get the peak hours
peak_hours <- c(8:10, 17:19)

heatmap_data <- hourly_heatmap_data %>%
  select(Station, Hour, day_type, hourly_ratio)

# Export for web component
heatmap_export <- list(
  heatmap_data
)
write_json(heatmap_export, "../src/lib/data/charts/07-station-heatmaps.json", pretty = TRUE, auto_unbox = TRUE)

# Station order for graph
station_order <- hourly_heatmap_data %>%
  filter(Hour %in% c(8:10, 17:19)) %>% # Peak hours
  group_by(Station) %>%
  summarise(peak_ridership = mean(median_ridership), .groups = 'drop') %>%
  arrange(desc(peak_ridership)) %>%
  pull(Station)
station_order_display <- purrr::map_chr(station_order, get_short_name)

# Plot heatmap
ggplot(hourly_heatmap_data, aes(x = Hour, y = factor(Station, levels = rev(station_order_display)), fill = hourly_ratio)) +
  geom_tile(color = "white", size = 0.1) +
  scale_fill_viridis_c(name = "Avg\nRidership", trans = "sqrt", labels = comma, option = "plasma") +
  facet_wrap(~day_type, ncol = 2) +
  scale_x_continuous(breaks = c(6, 9, 12, 15, 18, 21), labels = c("6AM", "9AM", "12PM", "3PM", "6PM", "9PM")) +
  labs(
    title = "Metro Station Activity Throughout the Day",
    subtitle = "Stations ordered by peak hour ridership. Color scale is square-root transformed.",
    x = "Hour of Day", y = "Station"
  ) +
  theme_metro() +
  theme(
    axis.text.y = element_text(size = 6),
    strip.text = element_text(face = "bold", size = 12),
    panel.grid = element_blank(),
    plot.margin = margin(20, 25, 20, 25)
  )


8 Interchange Station Footfall

Analyzing how footfall at interchange stations has changed with the opening of the Yellow Line.

Show the code
# Define key dates
yellow_line_opening <- as.Date("2025-08-11")

# Populate line attribute in station_pair_hourly
station_pair_hourly <- station_pair_hourly %>%
  left_join(station_map, by = c("From_Station" = "Station")) %>%
  left_join(station_map, by = c("To_Station" = "Station"), suffix = c("_from", "_to")) %>%
  mutate(From_line = if_else(Line_from == "Black", "Purple", Line_from)) %>%
  mutate(To_line = if_else(Line_to == "Black", "Green", Line_to)) %>%
  select(-Line_from, -Line_to)

# Calculate average daily number of passengers transferring at Rashtreeya Vidyalaya Road before Yellow Line
rashtreeya_vidyalaya_road_footfall <- station_pair_hourly %>%
  filter((From_line == "Yellow" & To_line != "Yellow") | (From_line != "Yellow" & To_line == "Yellow") | (From_Station == "Rashtreeya Vidyalaya Road") | (To_Station == "Rashtreeya Vidyalaya Road")) %>%
  group_by(Date) %>%
  summarise(daily_footfall = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  filter(Date < yellow_line_opening) %>%
  summarise(average_footfall = round(mean(daily_footfall), 0)) %>%
  mutate(Station = "Rashtreeya Vidyalaya Road", Before = average_footfall) %>%
  select(-average_footfall)

# Calculate average daily number of passengers transferring at Majestic before Yellow Line
majestic_footfall <- station_pair_hourly %>%
  filter((From_line == "Purple" & To_line != "Purple") | (From_line != "Purple" & To_line == "Purple") | (From_Station == "Majestic") | (To_Station == "Majestic")) %>%
  group_by(Date) %>%
  summarise(daily_footfall = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  filter(Date < yellow_line_opening) %>%
  summarise(average_footfall = round(mean(daily_footfall), 0)) %>%
  mutate(Station = "Majestic", Before = average_footfall) %>%
  select(-average_footfall)

# Calculate average daily number of passengers transferring at Rashtreeya Vidyalaya Road after Yellow Line
rashtreeya_vidyalaya_road_footfall_after <- station_pair_hourly %>%
  filter((From_line == "Yellow" & To_line != "Yellow") | (From_line != "Yellow" & To_line == "Yellow") | (From_Station == "Rashtreeya Vidyalaya Road") | (To_Station == "Rashtreeya Vidyalaya Road")) %>%
  group_by(Date) %>%
  summarise(daily_footfall = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  filter(Date >= yellow_line_opening) %>%
  summarise(average_footfall = round(mean(daily_footfall), 0)) %>%
  mutate(Station = "Rashtreeya Vidyalaya Road", After = average_footfall) %>%
  select(-average_footfall)

# Calculate average daily number of passengers transferring at Majestic after Yellow Line
majestic_footfall_after <- station_pair_hourly %>%
  filter((From_line == "Purple" & To_line != "Purple") | (From_line != "Purple" & To_line == "Purple") | (From_Station == "Majestic") | (To_Station == "Majestic")) %>%
  group_by(Date) %>%
  summarise(daily_footfall = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  filter(Date >= yellow_line_opening) %>%
  summarise(average_footfall = round(mean(daily_footfall), 0)) %>%
  mutate(Station = "Majestic", After = average_footfall) %>%
  select(-average_footfall)

# Combine results
interchange_footfall <- bind_rows(rashtreeya_vidyalaya_road_footfall, majestic_footfall, rashtreeya_vidyalaya_road_footfall_after, majestic_footfall_after)

# Export for web component
interchange_footfall_export <- list(
  interchange_footfall = interchange_footfall
)
write_json(interchange_footfall_export, "../src/lib/data/charts/13-interchange-station-footfall.json", pretty = TRUE, auto_unbox = TRUE)

9 The Yellow Line Effect

Analyzing how the opening of the Yellow Line on August 11th, 2025 affected daily boarding totals at existing Green and Purple Line stations. The impact of the Yellow Line was more noticeable at Green Line stations than at Purple Line stations.

Show the code
# Define key dates
yellow_line_opening <- as.Date("2025-08-11")
holidays <- as.Date(c("2025-08-15", "2025-08-16")) # Independence Day, Krishna Janmashtami

# Use a linear model to find stations with statistically significant changes
significant_stations <- hourly %>%
  group_by(Date, Station, Line, wday, day_type) %>%
  summarise(daily_ridership = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  mutate(
    after_yellow_line = if_else(Date >= yellow_line_opening, 1, 0),
    is_event_day = if_else(Date %in% holidays, 1, 0)
  ) %>%
  filter(day_type == "Weekday" & is_event_day == 0 & Line %in% c("Green", "Purple", "Black")) %>%
  group_by(Station) %>%
  filter(n_distinct(after_yellow_line) == 2) %>% # Ensure data for both periods
  ungroup() %>%
  nest_by(Station, Line) %>%
  mutate(model = list(lm(daily_ridership ~ after_yellow_line + wday, data = data))) %>%
  summarise(tidy(model)) %>%
  ungroup() %>%
  filter(term == "after_yellow_line" & p.value < 0.09)

# Calculate median percentage change for context
impact_by_percent_change <- hourly %>%
  group_by(Date, Station, Line, day_type) %>%
  summarise(daily_ridership = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  mutate(
    period = if_else(Date >= yellow_line_opening, "after", "before"),
    is_event_day = if_else(Date %in% holidays, 1, 0)
  ) %>%
  filter(day_type == "Weekday" & is_event_day == 0 & Line %in% c("Green", "Purple", "Black")) %>%
  group_by(Station, Line, period) %>%
  summarise(median_weekday_ridership = median(daily_ridership, na.rm = TRUE), .groups = 'drop') %>%
  pivot_wider(names_from = period, values_from = median_weekday_ridership) %>%
  filter(!is.na(before) & !is.na(after)) %>%
  mutate(percent_change = ((after - before) / before) * 100)

# Combine results into a final summary
final_impact_summary <- significant_stations %>%
  left_join(impact_by_percent_change, by = c("Station", "Line")) %>%
  select(
    Station, Line,
    `Median Change (%)` = percent_change,
    `Abs. Change (est.)` = estimate,
    `p-value` = p.value,
    `Before Median` = before,
    `After Median` = after
  ) %>%
  arrange(desc(`Median Change (%)`))

print(final_impact_summary)
# A tibble: 10 × 7
   Station              Line  `Median Change (%)` `Abs. Change (est.)` `p-value`
   <chr>                <chr>               <dbl>                <dbl>     <dbl>
 1 Lalbagh              Green               38.7                 2486.  0.000916
 2 Manjunathanagara     Green               27.0                  196.  0.0891  
 3 Nadaprabhu Kempegow… Black               26.3                 4985.  0.00457 
 4 Jalahalli            Green               22.6                 1186.  0.00772 
 5 Nagasandra           Green               22.2                 1478.  0.0157  
 6 Rashtreeya Vidyalay… Black               17.8                 1573.  0.0542  
 7 Goraguntepalya       Green               16.3                  893.  0.0502  
 8 Sandal Soap Factory  Green                6.90                 878.  0.0610  
 9 Banashankari         Green              -12.9                -1300.  0.0432  
10 Chickpete            Green              -13.9                -3132.  0.0414  
# ℹ 2 more variables: `Before Median` <dbl>, `After Median` <dbl>
Show the code
# Export for web component
yellow_line_impact_export <- list(
  station_impacts = final_impact_summary %>%
    select(Station, Line, percent_change = `Median Change (%)`) %>%
    convert_to_short_names()
)
write_json(yellow_line_impact_export, "../src/lib/data/charts/10-yellow-line-impact.json", pretty = TRUE, auto_unbox = TRUE)

10 Station Pair Ridership Spikes

During the days for which data is available, there have been unusual spikes in ridership between pairs of stations, compared to their typical ridership.

Show the code
# Calculate typical ridership (hourly mean) for each station pair
station_pair_hourly_means <- station_pair_hourly %>%
  filter(From_Station != To_Station) %>%
  group_by(From_Station, To_Station, Hour) %>%
  summarise(
    typical_ridership = mean(Ridership, na.rm = TRUE),
    total_observations = n(),
    .groups = 'drop'
  ) %>%
  filter(total_observations >= 5) # Ensure stable mean

# Identify spikes by comparing actual ridership to the hourly mean
station_pair_spikes <- station_pair_hourly %>%
  filter(From_Station != To_Station) %>%
  left_join(station_pair_hourly_means, by = c("From_Station", "To_Station", "Hour")) %>%
  filter(!is.na(typical_ridership)) %>%
  mutate(
    deviation_multiple = if_else(typical_ridership > 0, Ridership / typical_ridership, 0),
    spike_category = case_when(
      deviation_multiple >= 5 ~ "Extreme Spike (5x+)",
      deviation_multiple >= 3 ~ "High Spike (3-5x)",
      deviation_multiple >= 2 ~ "Moderate Spike (2-3x)",
      TRUE ~ "Normal"
    )
  ) %>%
  arrange(desc(deviation_multiple))

top_spikes <- station_pair_spikes %>%
  filter(spike_category != "Normal") %>%
  head(50)

# Export data for visualization
spikes_export <- list(
  top_spikes = convert_to_short_names(top_spikes) %>%
    select(Date, Hour, From_Station, To_Station, Ridership, typical_ridership, deviation_multiple, spike_category, peak_category) %>%
    mutate(Date = as.character(Date)),
  spike_summary = station_pair_spikes %>% count(spike_category, name = "count") %>%
    mutate(percentage = round(count / sum(count) * 100, 2)) %>%
    arrange(desc(count))
)
write_json(spikes_export, "../src/lib/data/charts/09-station-pair-spikes.json", pretty = TRUE, auto_unbox = TRUE)

# Display top spikes table
convert_to_short_names(top_spikes) %>%
  head(20) %>%
  mutate(
    Route = paste(From_Station, " & ", To_Station),
    `Date & Time` = paste(Date, paste0(Hour, ":00")),
    `Actual Ridership` = Ridership,
    `Typical Ridership` = round(typical_ridership, 1)
  ) %>%
  select(Route, `Date & Time`, `Actual Ridership`, `Typical Ridership`) %>%
  gt() %>%
  tab_header(
    title = "Top Ridership Spikes Between Station Pairs",
    subtitle = "Instances where ridership was significantly higher than the hourly average"
  ) %>%
  fmt_number(columns = c(`Actual Ridership`, `Typical Ridership`), decimals = 0) %>%
  tab_style(
    style = list(cell_fill(color = primary_color, alpha = 0.3), cell_text(weight = "bold")),
    locations = cells_column_labels()
  ) %>%
  tab_options(
    table.font.names = "Atkinson Hyperlegible",
    heading.title.font.size = 16,
    heading.subtitle.font.size = 12,
    table.font.size = 10
  )
Top Ridership Spikes Between Station Pairs
Instances where ridership was significantly higher than the hourly average
Route Date & Time Actual Ridership Typical Ridership
Central College & Vidhana Soudha 2025-08-15 19:00 130 12
Majestic & Vidhana Soudha 2025-08-15 19:00 301 28
Lalbagh & Vidhana Soudha 2025-08-15 19:00 210 20
Chickpete & Vidhana Soudha 2025-08-15 18:00 128 12
Central College & Garudacharpalya 2025-08-03 10:00 48 5
Baiyappanahalli & Vidhana Soudha 2025-08-15 19:00 47 5
Vidhana Soudha & Central College 2025-08-15 21:00 127 13
Sandal Soap Factory & Central College 2025-08-02 14:00 72 8
Central College & Vidhana Soudha 2025-08-15 20:00 110 12
Halasuru & Vidhana Soudha 2025-08-02 11:00 178 19
Chickpete & Vidhana Soudha 2025-08-15 16:00 159 17
Mahalakshmi & Halasuru 2025-08-16 22:00 49 5
Vidhana Soudha & MG Road 2025-08-15 21:00 118 13
KSR Railway Station & Cubbon Park 2025-08-01 23:00 32 4
Baiyappanahalli & Nayandahalli 2025-08-15 16:00 60 7
KSR Railway Station & Majestic 2025-08-10 11:00 268 31
Sandal Soap Factory & Vidhana Soudha 2025-08-15 18:00 97 11
Chickpete & Vidhana Soudha 2025-08-15 19:00 84 10
Mahalakshmi & Majestic 2025-08-16 23:00 58 7
Vidhana Soudha & Kengeri 2025-08-08 14:00 88 10
Show the code
# Export daily totals for specific stations for a targeted spike visualization
selected_stations <- c("Mahalakshmi", "Lalbagh", "Dr. B. R. Ambedkar Station, Vidhana Soudha")

station_daily_selected <- hourly %>%
  filter(Station %in% selected_stations) %>%
  group_by(Station, Line, Date) %>%
  summarise(Total = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  arrange(Station, Date) %>%
  mutate(Date = as.character(Date)) %>%
  convert_to_short_names() # Use short names for consistency

export_daily_payload <- list(
  window = list(start = "2025-08-15", end = "2025-08-18"),
  stations = convert_to_short_names(tibble(Station = selected_stations))$Station,
  data = station_daily_selected
)

write_json(export_daily_payload, "../src/lib/data/charts/09-station-spike-daily.json", pretty = TRUE, auto_unbox = TRUE)

11 Weekdays vs Weekends - Network

Does Bangalore’s metro serve different functions throughout the week? Weekdays for work commute and a visible difference in the weekends?

Show the code
hourly_avg_wide <- hourly %>%
  group_by(day_type, Hour) %>%
  summarise(Average_Ridership = mean(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  pivot_wider(names_from = day_type, values_from = Average_Ridership, values_fill = 0)

ribbon_data <- hourly_avg_wide %>%
  mutate(
    weekday_ribbon_ymax = if_else(Weekday > Weekend, Weekday, Weekend),
    weekend_ribbon_ymax = if_else(Weekend > Weekday, Weekend, Weekday)
  )

# Export for web component
weekday_weekend_export <- list(
  hourly_averages = pivot_longer(hourly_avg_wide, -Hour, names_to = "day_type", values_to = "Average_Ridership")
)
write_json(weekday_weekend_export, "../src/lib/data/charts/06-weekday-weekend-patterns.json", pretty = TRUE, auto_unbox = TRUE)

# Plot comparison
ggplot(ribbon_data, aes(x = Hour)) +
  geom_ribbon(aes(ymin = Weekend, ymax = weekday_ribbon_ymax), fill = primary_color, alpha = 0.3) +
  geom_ribbon(aes(ymin = Weekday, ymax = weekend_ribbon_ymax), fill = secondary_color, alpha = 0.3) +
  geom_line(aes(y = Weekday), color = primary_color, linewidth = 1.2) +
  geom_line(aes(y = Weekend), color = secondary_color, linewidth = 1.2) +
  annotate("text", x = 8, y = max(ribbon_data$Weekday) * 0.9, label = "Weekday", color = primary_color, size = 4, fontface = "bold") +
  annotate("text", x = 14, y = max(ribbon_data$Weekend) * 0.8, label = "Weekend", color = secondary_color, size = 4, fontface = "bold") +
  scale_x_continuous(breaks = seq(0, 23, 3), labels = paste0(seq(0, 23, 3), ":00")) +
  scale_y_continuous(labels = scales::comma_format()) +
  labs(
    title = "Ridership Patterns: Weekday vs Weekend",
    subtitle = "Average ridership per station across all hours of the day",
    x = "Hour of Day", y = "Average Ridership per Station"
  ) +
  theme_metro() +
  theme(panel.grid.major.x = element_line(color = "gray90", linetype = "dotted"))


12 Weekdays vs Weekends - Stations at Peak Hour

Some stations serve very different functions on weekdays versus weekends.

Show the code
# Calculate weekday vs weekend peak traffic differences
peak_diff_data <- hourly %>%
  filter(!is.na(Line)) %>%
  group_by(Station, Hour, day_type, Line) %>%
  summarise(avg_ridership = mean(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  filter(Hour %in% c(8:10, 17:19)) %>%
  group_by(Station, day_type, Line) %>%
  summarise(peak_ridership = mean(avg_ridership), .groups = 'drop') %>%
  pivot_wider(names_from = day_type, values_from = peak_ridership, values_fill = 0) %>%
  mutate(
    weekday_weekend_diff = Weekday - Weekend,
    diff_category = case_when(
      weekday_weekend_diff > 500 ~ "Strongly Weekday-Oriented",
      weekday_weekend_diff > 200 ~ "Moderately Weekday-Oriented",
      weekday_weekend_diff < -50 ~ "Weekend-Oriented",
      TRUE ~ "Balanced"
    )
  ) %>%
  arrange(desc(weekday_weekend_diff))

# Export for web component
differential_export <- list(
  station_differences = convert_to_short_names(peak_diff_data),
  category_summary = peak_diff_data %>%
    count(diff_category, name = "station_count") %>%
    mutate(percentage = round(station_count / sum(station_count) * 100, 1))
)
write_json(differential_export, "../src/lib/data/charts/08-weekday-weekend-differential.json", pretty = TRUE, auto_unbox = TRUE)

# Prepare data for diverging bar chart
top_diff_data <- convert_to_short_names(peak_diff_data) %>%
  head(25) %>%
  mutate(Station = fct_reorder(Station, weekday_weekend_diff)) %>%
  select(Station, Weekday, Weekend) %>%
  pivot_longer(cols = c(Weekday, Weekend), names_to = "day_type", values_to = "ridership") %>%
  mutate(
    ridership_direction = if_else(day_type == "Weekend", -ridership, ridership),
    day_type = factor(day_type, levels = c("Weekend", "Weekday"))
  )

# Plot diverging bar chart
ggplot(top_diff_data, aes(x = ridership_direction, y = Station, fill = day_type)) +
  geom_col(alpha = 0.8) +
  geom_vline(xintercept = 0, color = "black", linewidth = 0.5) +
  scale_fill_manual(values = c("Weekday" = primary_color, "Weekend" = secondary_color), name = "Day Type") +
  scale_x_continuous(labels = ~ comma(abs(.x)), breaks = seq(-1000, 2000, 500), expand = expansion(mult = 0.1)) +
  labs(
    title = "Weekday vs Weekend Peak Hour Ridership",
    subtitle = "Top 25 stations with the largest difference between weekday and weekend usage",
    x = "Average Peak Hour Ridership", y = "Station"
  ) +
  theme_metro() +
  theme(
    axis.text.y = element_text(size = 9),
    panel.grid.major.x = element_line(color = "gray90", linetype = "dotted"),
    legend.position = "bottom"
  ) +
  annotate("text", x = 1500, y = 2, label = "Higher Weekday Usage →", color = primary_color, fontface = "bold", size = 3.5) +
  annotate("text", x = -600, y = 2, label = "← Higher Weekend Usage", color = secondary_color, fontface = "bold", size = 3.5)


13 Weekdays vs Weekends - Stations Overall

Which stations and routes show the most consistent ridership patterns between weekdays and weekends? This analysis identifies stations and station pairs that vary the least and most between weekday and weekend usage.

Show the code
# Get number of weekdays and weekends in the dataset
num_weekdays <- n_distinct(hourly$Date[hourly$day_type == "Weekday"])
num_weekends <- n_distinct(hourly$Date[hourly$day_type == "Weekend"])

# Station-level weekday vs weekend variation analysis
station_variation <- hourly %>%
  group_by(Station, Line, day_type) %>%
  summarise(total_ridership = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  mutate(
    days = if_else(day_type == "Weekday", num_weekdays, num_weekends),
    avg_per_day = total_ridership / days
  ) %>%
  select(Station, Line, day_type, avg_per_day) %>%
  pivot_wider(names_from = day_type, values_from = avg_per_day, values_fill = 0)

# Station pair-level (bidirectional) weekday vs weekend variation
station_pair_variation <- station_pair_hourly %>%
  filter(From_Station != To_Station) %>%
  mutate(
    station_pair = ifelse(From_Station < To_Station,
                          paste(From_Station, "↔", To_Station),
                          paste(To_Station, "↔", From_Station)),
    # Ensure consistent ordering for bidirectional pairs
    station_1 = ifelse(From_Station < To_Station, From_Station, To_Station),
    station_2 = ifelse(From_Station < To_Station, To_Station, From_Station)
  ) %>%
  group_by(station_1, station_2, station_pair, day_type) %>%
  summarise(total_ridership = sum(Ridership, na.rm = TRUE), .groups = 'drop') %>%
  mutate(
    days = if_else(day_type == "Weekday", num_weekdays, num_weekends),
    avg_per_day = total_ridership / days
  ) %>%
  select(station_1, station_2, station_pair, day_type, avg_per_day) %>%
  pivot_wider(names_from = day_type, values_from = avg_per_day, values_fill = 0)

# Export station-level analysis
variation_export <- list(stations = convert_to_short_names(station_variation))
write_json(variation_export, "../src/lib/data/charts/09-station-variation-analysis.json", pretty = TRUE, auto_unbox = TRUE)

# Display tables for top variations
station_variation %>%
  mutate(`Difference` = Weekday - Weekend) %>%
  select(Station, Weekday, Weekend, `Difference`) %>%
  arrange(desc(abs(`Difference`))) %>%
  head(15) %>%
  kable(caption = "Top 15 Stations by Weekday-Weekend Ridership Difference") %>%
  kable_styling(full_width = FALSE)
Top 15 Stations by Weekday-Weekend Ridership Difference
Station Weekday Weekend Difference
Pattandur Agrahara 13412.000 3258.667 10153.333
Sri Sathya Sai Hospital 10633.333 1768.167 8865.167
Trinity 13903.000 5313.833 8589.167
Indiranagar 23785.583 16117.000 7668.583
Sir M. Visvesvaraya Stn., Central College 13442.583 6665.167 6777.417
Dr. B. R. Ambedkar Station, Vidhana Soudha 13528.250 6880.667 6647.583
Krishnarajapura 20445.667 14162.167 6283.500
Benniganahalli 27766.500 21907.167 5859.333
Chickpete 14020.500 19728.500 -5708.000
Cubbon Park 14892.417 9512.000 5380.417
Nallurahalli 9332.000 4311.833 5020.167
Kundalahalli 9303.500 4320.667 4982.833
Seetharampalya 9995.917 5103.167 4892.750
Baiyappanahalli 14517.167 9756.500 4760.667
Kadugodi Tree Park 13084.250 8771.167 4313.083
Show the code
station_pair_variation %>%
  filter(Weekday + Weekend > 50) %>% # Exclude low-ridership pairs
  mutate(`Difference` = Weekday - Weekend) %>%
  select(`Station Pair` = station_pair, Weekday, Weekend, `Difference`) %>%
  arrange(desc(abs(`Difference`))) %>%
  head(15) %>%
  kable(caption = "Top 15 Station Pairs by Weekday-Weekend Ridership Difference") %>%
  kable_styling(full_width = FALSE)
Top 15 Station Pairs by Weekday-Weekend Ridership Difference
Station Pair Weekday Weekend Difference
Kadugodi Tree Park ↔︎ Sri Sathya Sai Hospital 5584.833 440.3333 5144.500
Chickpete ↔︎ Nadaprabhu Kempegowda Station, Majestic 1916.167 4229.6667 -2313.500
Kadugodi Tree Park ↔︎ Pattandur Agrahara 2516.917 574.3333 1942.583
Sri Sathya Sai Hospital ↔︎ Whitefield (Kadugodi) 2519.750 922.3333 1597.417
Benniganahalli ↔︎ Indiranagar 3580.667 2146.3333 1434.333
Mahalakshmi ↔︎ Nadaprabhu Kempegowda Station, Majestic 1302.583 2719.3333 -1416.750
Pattandur Agrahara ↔︎ Whitefield (Kadugodi) 2001.583 589.6667 1411.917
Benniganahalli ↔︎ Pattandur Agrahara 1589.000 261.0000 1328.000
Krishnarajapura ↔︎ Pattandur Agrahara 1486.167 272.5000 1213.667
Benniganahalli ↔︎ Kundalahalli 1700.250 501.0000 1199.250
Dr. B. R. Ambedkar Station, Vidhana Soudha ↔︎ Sir M. Visvesvaraya Stn., Central College 1434.250 271.5000 1162.750
Halasuru ↔︎ Mahatma Gandhi Road 2713.417 1638.8333 1074.583
Benniganahalli ↔︎ Nallurahalli 1556.417 488.8333 1067.583
Baiyappanahalli ↔︎ Trinity 1534.750 485.0000 1049.750
Benniganahalli ↔︎ Sri Sathya Sai Hospital 1191.750 160.6667 1031.083

14 Average Travel Distance

Analyzing the average number of stations travelled by commuters at each station. Commuters from city periphery stations travel longer distances on average (and spend more on metro), compared to city center stations.

Show the code
# Define station orders for distance calculation
purple_line_stations <- c("Whitefield (Kadugodi)", "Hopefarm Channasandra", "Kadugodi Tree Park", "Pattandur Agrahara", "Sri Sathya Sai Hospital", "Nallurahalli", "Kundalahalli", "Seetharampalya", "Hoodi", "Garudacharpalya", "Singayyanapalya", "Krishnarajapura", "Benniganahalli", "Baiyappanahalli", "Swami Vivekananda Road", "Indiranagar", "Halasuru", "Trinity", "Mahatma Gandhi Road", "Cubbon Park", "Dr. B. R. Ambedkar Station, Vidhana Soudha", "Sir M. Visvesvaraya Stn., Central College", "Nadaprabhu Kempegowda Station, Majestic", "Krantivira Sangolli Rayanna Railway Station", "Magadi Road", "Sri Balagangadharanatha Swamiji Station, Hosahalli", "Vijayanagar", "Attiguppe", "Deepanjali Nagar", "Mysore Road", "Pantharapalya - Nayandahalli", "Rajarajeshwari Nagar", "Jnanabharathi", "Pattanagere", "Kengeri Bus Terminal", "Kengeri", "Challaghatta")
green_line_stations <- c("Silk Institute", "Thalaghattapura", "Vajarahalli", "Doddakallasandra", "Konanakunte Cross", "Yelachenahalli", "Jaya Prakash Nagar", "Banashankari", "Rashtreeya Vidyalaya Road", "Jayanagar", "South End Circle", "Lalbagh", "National College", "Krishna Rajendra Market", "Chickpete", "Nadaprabhu Kempegowda Station, Majestic", "Mantri Square Sampige Road", "Srirampura", "Mahakavi Kuvempu Road", "Rajajinagar", "Mahalakshmi", "Sandal Soap Factory", "Yeshwantpur", "Goraguntepalya", "Peenya", "Peenya Industry", "Jalahalli", "Dasarahalli", "Nagasandra", "Manjunathanagara", "Chikkabidarakallu", "Madavara")
yellow_line_stations <- c("Rashtreeya Vidyalaya Road", "Ragigudda", "Jayadeva Hospital", "BTM Layout", "Central Silk Board", "Bommanahalli", "Hongasandra", "Kudlu Gate", "Singasandra", "Hosa Road", "Beratena Agrahara", "Electronic City", "Infosys Foundation Konappana Agrahara", "Huskur Road", "Biocon Hebbagodi", "Delta Electronics Bommasandra")

station_positions <- list(
  Purple = setNames(seq_along(purple_line_stations), purple_line_stations),
  Green = setNames(seq_along(green_line_stations), green_line_stations),
  Yellow = setNames(seq_along(yellow_line_stations), yellow_line_stations)
)

# Function to calculate stations travelled (shortest path on the network)
calculate_stations_travelled <- function(origin, destination, origin_line, dest_line) {
  # Handle same station case
  if (origin == destination) {
    return(0)
  }

  # Define interchange stations
  majestic <- "Nadaprabhu Kempegowda Station, Majestic"
  rvr <- "Rashtreeya Vidyalaya Road"

  # Map Black line stations to their physical lines for distance calculation
  map_physical_line <- function(station, line) {
    if (line == "Black") {
      if (station == majestic) return("Purple")
      if (station == rvr) return("Green")
    }
    return(line)
  }

  # Map lines to physical lines
  origin_physical_line <- map_physical_line(origin, origin_line)
  dest_physical_line <- map_physical_line(destination, dest_line)

  # Same line travel (using physical lines)
  if (origin_physical_line == dest_physical_line) {
    if (origin_physical_line == "Purple") {
      return(abs(station_positions$Purple[destination] - station_positions$Purple[origin]))
    } else if (origin_physical_line == "Green") {
      return(abs(station_positions$Green[destination] - station_positions$Green[origin]))
    } else if (origin_physical_line == "Yellow") {
      return(abs(station_positions$Yellow[destination] - station_positions$Yellow[origin]))
    }
  }

  # Cross-line travel - need to find optimal interchange
  min_distance <- Inf

  # Try Majestic interchange (Purple-Green)
  if ((origin_physical_line %in% c("Purple", "Green")) && (dest_physical_line %in% c("Purple", "Green"))) {
    if (origin_physical_line == "Purple") {
      dist_to_majestic <- abs(station_positions$Purple[majestic] - station_positions$Purple[origin])
      dist_from_majestic <- abs(station_positions$Green[destination] - station_positions$Green[majestic])
    } else {
      dist_to_majestic <- abs(station_positions$Green[majestic] - station_positions$Green[origin])
      dist_from_majestic <- abs(station_positions$Purple[destination] - station_positions$Purple[majestic])
    }
    min_distance <- min(min_distance, dist_to_majestic + dist_from_majestic)
  }

  # Try RVR interchange (Green-Yellow)
  if ((origin_physical_line %in% c("Green", "Yellow")) && (dest_physical_line %in% c("Green", "Yellow"))) {
    if (origin_physical_line == "Green") {
      dist_to_rvr <- abs(station_positions$Green[rvr] - station_positions$Green[origin])
      dist_from_rvr <- abs(station_positions$Yellow[destination] - station_positions$Yellow[rvr])
    } else {
      dist_to_rvr <- abs(station_positions$Yellow[rvr] - station_positions$Yellow[origin])
      dist_from_rvr <- abs(station_positions$Green[destination] - station_positions$Green[rvr])
    }
    min_distance <- min(min_distance, dist_to_rvr + dist_from_rvr)
  }

  # Purple to Yellow via Majestic + RVR
  if ((origin_physical_line == "Purple" && dest_physical_line == "Yellow") || (origin_physical_line == "Yellow" && dest_physical_line == "Purple")) {
    if (origin_physical_line == "Purple") {
      dist_to_majestic <- abs(station_positions$Purple[majestic] - station_positions$Purple[origin])
      dist_majestic_to_rvr <- abs(station_positions$Green[rvr] - station_positions$Green[majestic])
      dist_from_rvr <- abs(station_positions$Yellow[destination] - station_positions$Yellow[rvr])
    } else {
      dist_to_rvr <- abs(station_positions$Yellow[rvr] - station_positions$Yellow[origin])
      dist_rvr_to_majestic <- abs(station_positions$Green[majestic] - station_positions$Green[rvr])
      dist_from_majestic <- abs(station_positions$Purple[destination] - station_positions$Purple[majestic])
      dist_to_majestic <- dist_to_rvr + dist_rvr_to_majestic
      dist_majestic_to_rvr <- 0
      dist_from_rvr <- dist_from_majestic
    }
    min_distance <- min(min_distance, dist_to_majestic + dist_majestic_to_rvr + dist_from_rvr)
  }

  return(min_distance)
}

# Calculate weighted average travel distance for each station
station_travel_analysis <- station_pair_hourly %>%
  left_join(distinct(hourly, Station, Line), by = c("From_Station" = "Station")) %>%
  rename(Origin_Line = Line) %>%
  left_join(distinct(hourly, Station, Line), by = c("To_Station" = "Station")) %>%
  rename(Dest_Line = Line) %>%
  filter(!is.na(Origin_Line), !is.na(Dest_Line)) %>%
  rowwise() %>%
  mutate(stations_travelled = calculate_stations_travelled(From_Station, To_Station, Origin_Line, Dest_Line)) %>%
  ungroup() %>%
  summarise(
    avg_stations_travelled = weighted.mean(stations_travelled, Ridership, na.rm = TRUE),
    .by = c(From_Station, Origin_Line)
  ) %>%
  filter(avg_stations_travelled > 0) %>%
  arrange(desc(avg_stations_travelled))

# Export data for web interface
travel_distance_export <- list(station_averages = convert_to_short_names(station_travel_analysis))
write_json(travel_distance_export, "../src/lib/data/charts/11-average-travel-distance.json", pretty = TRUE, auto_unbox = TRUE)

travel_time_selected <- travel_distance_export$station_averages %>%
  filter(From_Station %in% c("Madavara", "Halasuru"))
write_json(list(station_averages = travel_time_selected), "../src/lib/data/charts/11-travel-time-selected.json", pretty = TRUE, auto_unbox = TRUE)

travel_distance_export$station_averages %>%
  select(Station = From_Station, Line = Origin_Line, `Trip Length (stations)` = avg_stations_travelled) %>%
  kable(caption = "Average Trip Length by Origin Station") %>%
  kable_styling(full_width = FALSE)
Average Trip Length by Origin Station
Station Line Trip Length (stations)
Madavara Green 18.318033
Bommasandra Yellow 18.168083
Kengeri Purple 18.133553
Challaghatta Purple 17.219071
Kengeri Bus Terminal Purple 16.206533
Hopefarm Channasandra Purple 16.027048
Hebbagodi Yellow 16.001832
Nagasandra Green 15.965496
Chikkabidarakallu Green 15.814025
Silk Institute Green 15.376101
Konappana Agrahara Yellow 15.336117
Pattandur Agrahara Purple 15.188164
Jnanabharathi Purple 15.017945
Whitefield (Kadugodi) Purple 14.829962
Pattanagere Purple 14.452997
Manjunathanagara Green 14.369237
Dasarahalli Green 14.225490
Mysore Road Purple 14.156109
Huskur Road Yellow 13.947501
RR Nagar Purple 13.933226
Jalahalli Green 13.868533
Thalaghattapura Green 13.516886
Electronic City Yellow 13.410300
Doddakallasandra Green 13.212075
Seetharampalya Purple 13.209974
Nallurahalli Purple 13.180427
Kundalahalli Purple 13.150757
Peenya Industry Green 12.980126
Kadugodi Tree Park Purple 12.961417
Peenya Green 12.675326
Vajarahalli Green 12.613190
Hosa Road Yellow 12.586739
Yelachenahalli Green 12.241598
Konanakunte Cross Green 12.139961
JP Nagar Green 11.926389
Yeshwantpur Green 11.390557
Deepanjali Nagar Purple 11.324541
Singasandra Yellow 11.319291
KR Pura Purple 11.174772
Hoodi Purple 11.107346
Goraguntepalya Green 10.873059
Sri Sathya Sai Hospital Purple 10.849163
RV Road Black 10.847466
Kudlu Gate Yellow 10.815676
Benniganahalli Purple 10.733026
Banashankari Green 10.641094
Sandal Soap Factory Green 10.577654
Bommanahalli Yellow 10.557306
Nayandahalli Purple 10.370858
Mahalakshmi Green 10.326264
Silk Board Yellow 10.274344
BTM Layout Yellow 10.109569
Jayanagar Green 10.065463
Attiguppe Purple 10.027482
Jayadeva Hospital Yellow 10.007712
Swami Vivekananda Road Purple 9.968105
Hongasandra Yellow 9.803566
Baiyappanahalli Purple 9.655835
Lalbagh Green 9.538695
Vijayanagara Purple 9.493056
Garudacharpalya Purple 9.466842
Beratena Agrahara Yellow 9.451065
Rajajinagar Green 9.429973
Hosahalli Purple 9.112417
Indiranagar Purple 9.100586
South End Circle Green 9.085347
Ragigudda Yellow 9.025700
Trinity Purple 8.923230
Singayyanapalya Purple 8.718092
Cubbon Park Purple 8.521418
MG Road Purple 8.488166
National College Green 8.476627
MKK Road Green 8.393956
Majestic Black 8.373717
KSR Railway Station Purple 8.332015
Srirampura Green 8.101633
KR Market Green 7.869279
Magadi Road Purple 7.760953
Mantri Square Green 7.760831
Chickpete Green 7.613030
Vidhana Soudha Purple 7.507872
Central College Purple 7.423647
Halasuru Purple 6.642569

15 Land use

Show the code
input_raster_path <- "built-up-volume/built-up-volume.tif"
output_geojson_path <- "../src/lib/data/charts/12-built_up_volume_hex.json"

# Load raster, set 0 to NA, and create a hex grid
commercial_built_volume <- rast(input_raster_path)
commercial_built_volume[commercial_built_volume == 0] <- NA
hex_grid <- st_make_grid(commercial_built_volume, cellsize = 0.009, square = FALSE) %>% st_as_sf()

# Extract mean raster values for each hex cell
hex_grid$mean_volume <- exact_extract(commercial_built_volume, hex_grid, fun = "mean")

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |=========                                                             |  14%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=======================                                               |  34%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |==============================                                        |  44%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |============================================                          |  64%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==========================================================            |  84%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |=================================================================     |  94%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
Show the code
hex_summary_filtered <- hex_grid[!is.na(hex_grid$mean_volume), ]

# Write to GeoJSON
st_write(hex_summary_filtered, output_geojson_path, delete_dsn = TRUE, driver = "GeoJSON")
Deleting source `../src/lib/data/charts/12-built_up_volume_hex.json' using driver `GeoJSON'
Writing layer `12-built_up_volume_hex' to data source 
  `../src/lib/data/charts/12-built_up_volume_hex.json' using driver `GeoJSON'
Writing 1732 features with 1 fields and geometry type Polygon.
Show the code
# Create a quick visualization
breaks <- pretty(hex_summary_filtered$mean_volume, n = 10)
pal <- hcl.colors(length(breaks) - 1, "viridis", rev = TRUE)
plot(hex_summary_filtered["mean_volume"], main = "Built-up Volume Aggregated into Hexagons",
     breaks = breaks, pal = pal, key.pos = 4, lwd = 0.1, border = NA)


16 Network Analysis

Show the code
# --- Network Analysis (Corrected Final Version) ---

# 1. Calculate Station Importance (using short names)
station_importance <- hourly %>%
  convert_to_short_names() %>%
  count(Station, wt = Ridership, name = "total_ridership") %>%
  rename(id = Station)

# 2. Calculate Hybrid Top Links using consistent short names
# The `metro_stations` table from your data prep chunk already has short names and line info
line_info <- metro_stations %>%
  select(Station, line)

# Create a base table of all links, converting to short names immediately
all_links_with_lines <- station_pair_hourly %>%
  count(From_Station, To_Station, wt = Ridership, name = "Ridership") %>%
  convert_to_short_names() %>% # This ensures all names are short and consistent
  bind_log_odds(set = From_Station, feature = To_Station, n = Ridership) %>%
  left_join(line_info, by = c("From_Station" = "Station")) %>%
  rename(from_line = line) %>%
  left_join(line_info, by = c("To_Station" = "Station")) %>%
  rename(to_line = line) %>%
  filter(!is.na(from_line), !is.na(to_line)) %>%
  mutate(is_interline = (from_line != to_line))

# Create the hybrid set of top links
top_links <-
  bind_rows(
    # Top 8 intra-line (same-line) links
    all_links_with_lines %>%
      filter(!is_interline, Ridership > 100) %>%
      group_by(From_Station) %>%
      slice_max(order_by = log_odds_weighted, n = 8, with_ties = FALSE),

    # Top 3 inter-line (cross-line) links
    all_links_with_lines %>%
      filter(is_interline, Ridership > 100) %>%
      group_by(From_Station) %>%
      slice_max(order_by = log_odds_weighted, n = 3, with_ties = FALSE)
  ) %>%
  ungroup() %>%
  distinct(From_Station, To_Station, .keep_all = TRUE)

# 3. Prepare Nodes and Links for D3 Export
links_for_graph <- top_links %>%
  select(From_Station, To_Station, value = log_odds_weighted)

nodes_final <- {
  base_nodes <- tibble(id = unique(c(links_for_graph$From_Station, links_for_graph$To_Station))) %>%
    left_join(line_info, by = c("id" = "Station")) %>%
    select(id, group = line) %>%
    mutate(group = str_to_title(coalesce(group, "Unknown")))

  tbl_graph(nodes = base_nodes, edges = links_for_graph, directed = FALSE) %>%
    activate(nodes) %>%
    mutate(betweenness = centrality_betweenness(weights = value - min(value) + 1)) %>%
    as_tibble() %>%
    left_join(station_importance, by = "id") %>%
    mutate(total_ridership = coalesce(total_ridership, 0L))
}

links_final_indexed <- {
  station_indices <- setNames(seq_len(nrow(nodes_final)) - 1, nodes_final$id)
  links_for_graph %>%
    mutate(
      source = station_indices[From_Station],
      target = station_indices[To_Station]
    ) %>%
    filter(!is.na(source), !is.na(target)) %>%
    select(source, target, value) %>%
    purrr::pmap(~c(...)) %>%
    unname()
}

# 4. Export to JSON
d3_network_output <- list(
  nodes = nodes_final,
  links = links_final_indexed
)

write_json(
  d3_network_output,
  "../src/lib/data/charts/metro-network-final.json",
  pretty = FALSE,
  auto_unbox = TRUE
)

cat(
  "Successfully exported refined D3 network data.\n",
  "Exported", nrow(nodes_final), "stations and", length(links_final_indexed), "connections.\n"
)
Successfully exported refined D3 network data.
 Exported 83 stations and 900 connections.