Summary of Luke’s Spotify listening

Author

Luke Pilling

Published

July 18, 2025

I’ve had a Spotify account since 23rd March 2012, and have been using it pretty much every day since then. I thought it would be interesting to summarise my listening habits over the years, so here I present some plots of my listening data.

The code and data are available here: https://github.com/lcpilling/spotify

Read and tidy data

dat_music <- readr::read_tsv(here::here("data/derived/music.tsv.gz"))
Rows: 52372 Columns: 30
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr  (10): platform, conn_country, ip_addr, master_metadata_track_name, mast...
dbl   (5): ms_played, offline_timestamp, duration, year, day_week
lgl  (11): episode_name, episode_show_name, spotify_episode_uri, audiobook_t...
dttm  (3): ts, start_time, end_time
date  (1): date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# remove the birdsong I used as an alarm for a while
dat_music <- dat_music |> dplyr::filter(master_metadata_album_artist_name != "Nature Sounds")

Play time

Time by year

Determine total hours and average per day to display.

Code
dat_time <- dat_music |>
    dplyr::group_by(year) |>
    dplyr::summarise(n=round(sum(duration)/60/60))

# summary
sum(dat_time$n)
[1] 3245
Code
sum(dat_time$n) / as.numeric(lubridate::dmy("11-07-2025")-lubridate::dmy("23-03-2012"))
[1] 0.6679704
Code
# plot
dat_time  |> 
    dplyr::arrange(-n) |>
    ggplot2::ggplot(ggplot2::aes(as.character(year), n, fill=n)) +
    ggplot2::geom_col(ggplot2::aes(y = n), alpha = 0.9) +
    ggplot2::geom_text(ggplot2::aes(y = n, label = n), hjust = -0.2, size = 5, color="black") +
    ggplot2::scale_y_continuous(limits = c(0, 440)) +
    viridis::scale_fill_viridis(option="H") +
    ggplot2::coord_flip() +
    ggplot2::labs(x = "", y = "Hours played", 
                                title = "Luke's Spotify music play time", 
                                subtitle = "23rd March 2012 to 11th July 2025",
                                caption = "Total = 3,245 hours (avg. 0.67 hours per day over the 4,858 days)") +
    ggplot2::theme_minimal() +
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        legend.position = "none",
        text = ggplot2::element_text(size = 15, color="black"),
        plot.title.position = "plot",
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.grid.minor.x = ggplot2::element_blank(),      
        panel.grid.major.y = ggplot2::element_blank(), 
        panel.grid.minor.y = ggplot2::element_blank()
    )

Code
ggplot2::ggsave(here::here("outputs/music_time_byyear.png"), width=14, height=14, units="cm", dpi=150, bg="white")

Time by month

Code
dat_time <- dat_music |>
    dplyr::mutate(month=lubridate::month(date, label=TRUE)) |>
    dplyr::group_by(month) |>
    dplyr::summarise(n=round(sum(duration)/60/60))

dat_time  |> 
    dplyr::arrange(-n) |>
    ggplot2::ggplot(ggplot2::aes(forcats::fct_rev(month), n, fill=n)) +
    ggplot2::geom_col(ggplot2::aes(y = n), alpha = 0.9) +
    ggplot2::geom_text(ggplot2::aes(y = n, label = n), hjust = -0.2, size = 5, color="black") +
    ggplot2::scale_y_continuous(limits = c(0, 400)) +
    viridis::scale_fill_viridis(option="H") +
    ggplot2::coord_flip() +
    ggplot2::labs(x = "", y = "Hours played", 
                                title = "Luke's Spotify music play time", 
                                subtitle = "23rd March 2012 to 11th July 2025",
                                caption = "Total = 3,245 hours (avg. 0.67 hours per day over the 4,858 days)") +
    ggplot2::theme_minimal() +
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        legend.position = "none",
        text = ggplot2::element_text(size = 15, color="black"),
        plot.title.position = "plot",
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.grid.minor.x = ggplot2::element_blank(),      
        panel.grid.major.y = ggplot2::element_blank(), 
        panel.grid.minor.y = ggplot2::element_blank()
    )

Code
ggplot2::ggsave(here::here("outputs/music_time_bymonth.png"), width=14, height=12, units="cm", dpi=150, bg="white")

Time by month - average

Code
dat_time <- dat_music |>
    dplyr::mutate(month=lubridate::month(date, label=TRUE)) |>
    dplyr::group_by(month) |>
    dplyr::summarise(n=round(sum(duration)/60/60/13))

dat_time  |> 
    dplyr::arrange(-n) |>
    ggplot2::ggplot(ggplot2::aes(forcats::fct_rev(month), n, fill=n)) +
    ggplot2::geom_col(ggplot2::aes(y = n), alpha = 0.9) +
    ggplot2::geom_text(ggplot2::aes(y = n, label = n), hjust = -0.2, size = 5, color="black") +
    ggplot2::scale_y_continuous(limits = c(0, 30)) +
    viridis::scale_fill_viridis(option="H") +
    ggplot2::coord_flip() +
    ggplot2::labs(x = "", y = "Average Hours played", 
                                title = "Luke's Spotify music play time", 
                                subtitle = "23rd March 2012 to 11th July 2025",
                                caption = "Total = 3,245 hours (avg. 0.67 hours per day over the 4,858 days)") +
    ggplot2::theme_minimal() +
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        legend.position = "none",
        text = ggplot2::element_text(size = 15, color="black"),
        plot.title.position = "plot",
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.grid.minor.x = ggplot2::element_blank(),      
        panel.grid.major.y = ggplot2::element_blank(), 
        panel.grid.minor.y = ggplot2::element_blank()
    )

Code
ggplot2::ggsave(here::here("outputs/music_time_bymonth_average.png"), width=14, height=12, units="cm", dpi=150, bg="white")

Time by day

Code
dat_time <- dat_music |>
    dplyr::mutate(day=lubridate::wday(date, label=TRUE, week_start=1)) |>
    dplyr::group_by(day) |>
    dplyr::summarise(n=round(sum(duration)/60/60))

dat_time  |> 
    dplyr::arrange(-n) |>
    ggplot2::ggplot(ggplot2::aes(forcats::fct_rev(day), n, fill=n)) +
    ggplot2::geom_col(ggplot2::aes(y = n), alpha = 0.9) +
    ggplot2::geom_text(ggplot2::aes(y = n, label = n), hjust = -0.2, size = 5, color="black") +
    ggplot2::scale_y_continuous(limits = c(0, 700)) +
    viridis::scale_fill_viridis(option="H") +
    ggplot2::coord_flip() +
    ggplot2::labs(x = "", y = "Hours played", 
                                title = "Luke's Spotify music play time", 
                                subtitle = "23rd March 2012 to 11th July 2025",
                                caption = "Total = 3,245 hours (avg. 0.67 hours per day over the 4,858 days)") +
    ggplot2::theme_minimal() +
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        legend.position = "none",
        text = ggplot2::element_text(size = 15, color="black"),
        plot.title.position = "plot",
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.grid.minor.x = ggplot2::element_blank(),      
        panel.grid.major.y = ggplot2::element_blank(), 
        panel.grid.minor.y = ggplot2::element_blank()
    )

Code
ggplot2::ggsave(here::here("outputs/music_time_byday.png"), width=14, height=12, units="cm", dpi=150, bg="white")

Time by day - average

Code
dat_time <- dat_music |>
    dplyr::mutate(day=lubridate::wday(date, label=TRUE, week_start=1)) |>
    dplyr::group_by(day) |>
    dplyr::summarise(n=round(sum(duration)/60/52/13))

dat_time  |> 
    dplyr::arrange(-n) |>
    ggplot2::ggplot(ggplot2::aes(forcats::fct_rev(day), n, fill=n)) +
    ggplot2::geom_col(ggplot2::aes(y = n), alpha = 0.9) +
    ggplot2::geom_text(ggplot2::aes(y = n, label = n), hjust = -0.2, size = 5, color="black") +
    ggplot2::scale_y_continuous(limits = c(0, 70)) +
    viridis::scale_fill_viridis(option="H") +
    ggplot2::coord_flip() +
    ggplot2::labs(x = "", y = "Average minutes played", 
                                title = "Luke's Spotify music play time", 
                                subtitle = "23rd March 2012 to 11th July 2025",
                                caption = "Total = 194,700 mins (avg. 40 mins per day over the 4,858 days)") +
    ggplot2::theme_minimal() +
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        legend.position = "none",
        text = ggplot2::element_text(size = 15, color="black"),
        plot.title.position = "plot",
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.grid.minor.x = ggplot2::element_blank(),      
        panel.grid.major.y = ggplot2::element_blank(), 
        panel.grid.minor.y = ggplot2::element_blank()
    )

Code
ggplot2::ggsave(here::here("outputs/music_time_byday_average.png"), width=14, height=12, units="cm", dpi=150, bg="white")

Summary stats by day

Code
dat_time <- dat_music |>
    dplyr::group_by(date) |>
    dplyr::summarise(n=sum(duration)/60)

# get median and IQRs
dat_time_stats <- data.frame(
    median = stats::median(dat_time$n, na.rm=TRUE),
    lower = stats::quantile(dat_time$n, 0.25, na.rm=TRUE),
    upper = stats::quantile(dat_time$n, 0.75, na.rm=TRUE)
    )

# histogram
dat_time |>
    dplyr::mutate(n = dplyr::if_else(n>500, 500, n)) |>
    ggplot2::ggplot(ggplot2::aes(n)) +
    ggplot2::geom_histogram(binwidth = 10, fill="darkblue", color="black", alpha=0.7) +
    ggplot2::labs(x = "Minutes listened", y = "Number of days",
                                title = "Luke's Spotify music play time by day",
                                subtitle = "23rd March 2012 to 11th July 2025",
                                caption = "Median 57.8 mins (IQR 22.8 to 115.0)") +
    ggplot2::theme_minimal() +
    ggplot2::theme(
        text = ggplot2::element_text(size = 15, color="black"),
        plot.title.position = "plot",
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.grid.minor.x = ggplot2::element_blank(),      
        panel.grid.major.y = ggplot2::element_blank(), 
        panel.grid.minor.y = ggplot2::element_blank()
    )

Code
ggplot2::ggsave(here::here("outputs/mins_by_day.png"), width=14, height=14, units="cm", dpi=150, bg="white")

Several days over 500 mins!

Code
dat_time |> dplyr::filter(n>500)
# A tibble: 5 × 2
  date           n
  <date>     <dbl>
1 2013-07-11  569.
2 2017-01-02  524.
3 2024-12-30 1439.
4 2024-12-31  780.
5 2025-01-12  708.

These look a lot like I just left it running at NYE parties or similar.

Songs

Ever

Code
dat_songs <- dat_music |>
    dplyr::count(master_metadata_track_name, master_metadata_album_artist_name) |>
    dplyr::arrange(-n)


dat_songs |>
    head(n=15) |>
    ggplot2::ggplot(ggplot2::aes(forcats::fct_reorder(master_metadata_track_name, n), fill=n)) +
    ggplot2::geom_col(ggplot2::aes(y = n), alpha = 0.9) +
    ggplot2::geom_text(ggplot2::aes(y = n, label = n), hjust = -0.2, size = 5, color="black") +
    ggplot2::scale_y_continuous(limits = c(0, 140)) +
    viridis::scale_fill_viridis(option="H") +
    ggplot2::coord_flip() +
    ggplot2::labs(x = "", y = "Number of plays", 
                                title = "Luke's top 15 Spotify songs", 
                                subtitle = "23rd March 2012 to 11th July 2025") +
    ggplot2::theme_minimal() +
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        legend.position = "none",
        text = ggplot2::element_text(size = 15, color="black"),
        plot.title.position = "plot",
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.grid.minor.x = ggplot2::element_blank(),      
        panel.grid.major.y = ggplot2::element_blank(), 
        panel.grid.minor.y = ggplot2::element_blank()
    )

Code
ggplot2::ggsave(here::here("outputs/songs_top15.png"), width=14, height=14, units="cm", dpi=150, bg="white")

By year

Code
dat_songs_year <- dat_music |>
    dplyr::group_by(year) |>
    dplyr::count(master_metadata_track_name, master_metadata_album_artist_name) |>
    dplyr::arrange(-n)

# for this I want to shorten some names by removing brackets...
dat_songs_year <- dat_songs_year |>
    dplyr::mutate(
        master_metadata_track_name = stringr::str_replace_all(
            master_metadata_track_name,
            "\\[.*?\\]|\\(.*?\\)", # Regular expression to match text within brackets
            "..."
        )
    )

# plot
dat_songs_year |>
    dplyr::filter(dplyr::row_number() <= 5) |>
    ggplot2::ggplot(ggplot2::aes(tidytext::reorder_within(master_metadata_track_name, n, year), fill=n)) +
    ggplot2::geom_col(ggplot2::aes(y = n), alpha = 0.9) +
    ggplot2::geom_text(ggplot2::aes(y = n, label = n), hjust = -0.2, size = 3, color="black") +
    ggplot2::scale_y_continuous(limits = c(0, 55)) +
    viridis::scale_fill_viridis(option="H") +
    ggplot2::coord_flip() +
    ggplot2::labs(x = "", y = "Number of plays", 
                                title = "Luke's top Spotify songs",
                                subtitle = "23rd March 2012 to 11th July 2025") +
    ggplot2::theme_minimal() +
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        legend.position = "none",
        text = ggplot2::element_text(size = 11, color="black"),
        plot.title.position = "plot",
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.grid.minor.x = ggplot2::element_blank(),      
        panel.grid.major.y = ggplot2::element_blank(), 
        panel.grid.minor.y = ggplot2::element_blank()
    ) +
    tidytext::scale_x_reordered() +
    ggplot2::facet_grid(factor(year, levels=c(2025:2012)) ~ ., scale="free", space="free")

Code
ggplot2::ggsave(here::here("outputs/songs_top5_byyear.png"), width=18, height=25, units="cm", dpi=150, bg="white")

Artists

Ever

Code
dat_artists <- dat_music |>
    dplyr::group_by(master_metadata_album_artist_name) |>
    dplyr::summarise(n=round(sum(duration)/60/60)) |> 
    dplyr::arrange(-n)

dat_artists |>
    head(n=15) |>
    ggplot2::ggplot(ggplot2::aes(forcats::fct_reorder(master_metadata_album_artist_name, n), fill=n)) +
    ggplot2::geom_col(ggplot2::aes(y = n), alpha = 0.9) +
    ggplot2::geom_text(ggplot2::aes(y = n, label = n), hjust = -0.2, size = 4, color="black") +
    ggplot2::scale_y_continuous(limits = c(0, 65)) +
    viridis::scale_fill_viridis(option="H") +
    ggplot2::coord_flip() +
    ggplot2::labs(x = "", y = "Hours listend", 
                                title = "Luke's top 15 Spotify artists", 
                                subtitle = "23rd March 2012 to 11th July 2025") +
    ggplot2::theme_minimal() +
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        legend.position = "none",
        text = ggplot2::element_text(size = 13, color="black"),
        plot.title.position = "plot",
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.grid.minor.x = ggplot2::element_blank(),      
        panel.grid.major.y = ggplot2::element_blank(), 
        panel.grid.minor.y = ggplot2::element_blank()
    )

Code
ggplot2::ggsave(here::here("outputs/artists_top15.png"), width=16, height=13, units="cm", dpi=150, bg="white")

By year

Code
dat_artists_year <- dat_music |>
    dplyr::group_by(year, master_metadata_album_artist_name) |>
    dplyr::summarise(n=round(sum(duration)/60/60)) |> 
    dplyr::arrange(-n)
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
Code
#plot

dat_artists_year |>
    dplyr::filter(dplyr::row_number() <= 5) |>
    ggplot2::ggplot(ggplot2::aes(tidytext::reorder_within(master_metadata_album_artist_name, n, year), fill=n)) +
    ggplot2::geom_col(ggplot2::aes(y = n), alpha = 0.9) +
    ggplot2::geom_text(ggplot2::aes(y = n, label = n), hjust = -0.2, size = 3, color="black") +
    ggplot2::scale_y_continuous(limits = c(0, 39)) +
    viridis::scale_fill_viridis(option="H") +
    ggplot2::coord_flip() +
    ggplot2::labs(x = "", y = "Hours listened", 
                                title = "Luke's top Spotify artists",
                                subtitle = "23rd March 2012 to 11th July 2025") +
    ggplot2::theme_minimal() +
    ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),
        legend.position = "none",
        text = ggplot2::element_text(size = 11, color="black"),
        plot.title.position = "plot",
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.grid.minor.x = ggplot2::element_blank(),      
        panel.grid.major.y = ggplot2::element_blank(), 
        panel.grid.minor.y = ggplot2::element_blank()
    ) +
    tidytext::scale_x_reordered() +
    ggplot2::facet_grid(factor(year, levels=c(2025:2012)) ~ ., scale="free", space="free")

Code
ggplot2::ggsave(here::here("outputs/artists_top5_byyear.png"), width=20, height=25, units="cm", dpi=150, bg="white")