I’ve had a Spotify account since 23rd March 2012, and have been using it pretty much every day since then. I thought it would be interesting to summarise my listening habits over the years, so here I present some plots of my listening data.
The code and data are available here: https://github.com/lcpilling/spotify
Read and tidy data
dat_music <- readr:: read_tsv (here:: here ("data/derived/music.tsv.gz" ))
Rows: 52372 Columns: 30
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (10): platform, conn_country, ip_addr, master_metadata_track_name, mast...
dbl (5): ms_played, offline_timestamp, duration, year, day_week
lgl (11): episode_name, episode_show_name, spotify_episode_uri, audiobook_t...
dttm (3): ts, start_time, end_time
date (1): date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# remove the birdsong I used as an alarm for a while
dat_music <- dat_music |> dplyr:: filter (master_metadata_album_artist_name != "Nature Sounds" )
Play time
Time by year
Determine total hours and average per day to display.
Code
dat_time <- dat_music |>
dplyr:: group_by (year) |>
dplyr:: summarise (n= round (sum (duration)/ 60 / 60 ))
# summary
sum (dat_time$ n)
Code
sum (dat_time$ n) / as.numeric (lubridate:: dmy ("11-07-2025" )- lubridate:: dmy ("23-03-2012" ))
Code
# plot
dat_time |>
dplyr:: arrange (- n) |>
ggplot2:: ggplot (ggplot2:: aes (as.character (year), n, fill= n)) +
ggplot2:: geom_col (ggplot2:: aes (y = n), alpha = 0.9 ) +
ggplot2:: geom_text (ggplot2:: aes (y = n, label = n), hjust = - 0.2 , size = 5 , color= "black" ) +
ggplot2:: scale_y_continuous (limits = c (0 , 440 )) +
viridis:: scale_fill_viridis (option= "H" ) +
ggplot2:: coord_flip () +
ggplot2:: labs (x = "" , y = "Hours played" ,
title = "Luke's Spotify music play time" ,
subtitle = "23rd March 2012 to 11th July 2025" ,
caption = "Total = 3,245 hours (avg. 0.67 hours per day over the 4,858 days)" ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text.x = ggplot2:: element_blank (),
legend.position = "none" ,
text = ggplot2:: element_text (size = 15 , color= "black" ),
plot.title.position = "plot" ,
panel.grid.major.x = ggplot2:: element_blank (),
panel.grid.minor.x = ggplot2:: element_blank (),
panel.grid.major.y = ggplot2:: element_blank (),
panel.grid.minor.y = ggplot2:: element_blank ()
)
Code
ggplot2:: ggsave (here:: here ("outputs/music_time_byyear.png" ), width= 14 , height= 14 , units= "cm" , dpi= 150 , bg= "white" )
Time by month
Code
dat_time <- dat_music |>
dplyr:: mutate (month= lubridate:: month (date, label= TRUE )) |>
dplyr:: group_by (month) |>
dplyr:: summarise (n= round (sum (duration)/ 60 / 60 ))
dat_time |>
dplyr:: arrange (- n) |>
ggplot2:: ggplot (ggplot2:: aes (forcats:: fct_rev (month), n, fill= n)) +
ggplot2:: geom_col (ggplot2:: aes (y = n), alpha = 0.9 ) +
ggplot2:: geom_text (ggplot2:: aes (y = n, label = n), hjust = - 0.2 , size = 5 , color= "black" ) +
ggplot2:: scale_y_continuous (limits = c (0 , 400 )) +
viridis:: scale_fill_viridis (option= "H" ) +
ggplot2:: coord_flip () +
ggplot2:: labs (x = "" , y = "Hours played" ,
title = "Luke's Spotify music play time" ,
subtitle = "23rd March 2012 to 11th July 2025" ,
caption = "Total = 3,245 hours (avg. 0.67 hours per day over the 4,858 days)" ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text.x = ggplot2:: element_blank (),
legend.position = "none" ,
text = ggplot2:: element_text (size = 15 , color= "black" ),
plot.title.position = "plot" ,
panel.grid.major.x = ggplot2:: element_blank (),
panel.grid.minor.x = ggplot2:: element_blank (),
panel.grid.major.y = ggplot2:: element_blank (),
panel.grid.minor.y = ggplot2:: element_blank ()
)
Code
ggplot2:: ggsave (here:: here ("outputs/music_time_bymonth.png" ), width= 14 , height= 12 , units= "cm" , dpi= 150 , bg= "white" )
Time by month - average
Code
dat_time <- dat_music |>
dplyr:: mutate (month= lubridate:: month (date, label= TRUE )) |>
dplyr:: group_by (month) |>
dplyr:: summarise (n= round (sum (duration)/ 60 / 60 / 13 ))
dat_time |>
dplyr:: arrange (- n) |>
ggplot2:: ggplot (ggplot2:: aes (forcats:: fct_rev (month), n, fill= n)) +
ggplot2:: geom_col (ggplot2:: aes (y = n), alpha = 0.9 ) +
ggplot2:: geom_text (ggplot2:: aes (y = n, label = n), hjust = - 0.2 , size = 5 , color= "black" ) +
ggplot2:: scale_y_continuous (limits = c (0 , 30 )) +
viridis:: scale_fill_viridis (option= "H" ) +
ggplot2:: coord_flip () +
ggplot2:: labs (x = "" , y = "Average Hours played" ,
title = "Luke's Spotify music play time" ,
subtitle = "23rd March 2012 to 11th July 2025" ,
caption = "Total = 3,245 hours (avg. 0.67 hours per day over the 4,858 days)" ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text.x = ggplot2:: element_blank (),
legend.position = "none" ,
text = ggplot2:: element_text (size = 15 , color= "black" ),
plot.title.position = "plot" ,
panel.grid.major.x = ggplot2:: element_blank (),
panel.grid.minor.x = ggplot2:: element_blank (),
panel.grid.major.y = ggplot2:: element_blank (),
panel.grid.minor.y = ggplot2:: element_blank ()
)
Code
ggplot2:: ggsave (here:: here ("outputs/music_time_bymonth_average.png" ), width= 14 , height= 12 , units= "cm" , dpi= 150 , bg= "white" )
Time by day
Code
dat_time <- dat_music |>
dplyr:: mutate (day= lubridate:: wday (date, label= TRUE , week_start= 1 )) |>
dplyr:: group_by (day) |>
dplyr:: summarise (n= round (sum (duration)/ 60 / 60 ))
dat_time |>
dplyr:: arrange (- n) |>
ggplot2:: ggplot (ggplot2:: aes (forcats:: fct_rev (day), n, fill= n)) +
ggplot2:: geom_col (ggplot2:: aes (y = n), alpha = 0.9 ) +
ggplot2:: geom_text (ggplot2:: aes (y = n, label = n), hjust = - 0.2 , size = 5 , color= "black" ) +
ggplot2:: scale_y_continuous (limits = c (0 , 700 )) +
viridis:: scale_fill_viridis (option= "H" ) +
ggplot2:: coord_flip () +
ggplot2:: labs (x = "" , y = "Hours played" ,
title = "Luke's Spotify music play time" ,
subtitle = "23rd March 2012 to 11th July 2025" ,
caption = "Total = 3,245 hours (avg. 0.67 hours per day over the 4,858 days)" ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text.x = ggplot2:: element_blank (),
legend.position = "none" ,
text = ggplot2:: element_text (size = 15 , color= "black" ),
plot.title.position = "plot" ,
panel.grid.major.x = ggplot2:: element_blank (),
panel.grid.minor.x = ggplot2:: element_blank (),
panel.grid.major.y = ggplot2:: element_blank (),
panel.grid.minor.y = ggplot2:: element_blank ()
)
Code
ggplot2:: ggsave (here:: here ("outputs/music_time_byday.png" ), width= 14 , height= 12 , units= "cm" , dpi= 150 , bg= "white" )
Time by day - average
Code
dat_time <- dat_music |>
dplyr:: mutate (day= lubridate:: wday (date, label= TRUE , week_start= 1 )) |>
dplyr:: group_by (day) |>
dplyr:: summarise (n= round (sum (duration)/ 60 / 52 / 13 ))
dat_time |>
dplyr:: arrange (- n) |>
ggplot2:: ggplot (ggplot2:: aes (forcats:: fct_rev (day), n, fill= n)) +
ggplot2:: geom_col (ggplot2:: aes (y = n), alpha = 0.9 ) +
ggplot2:: geom_text (ggplot2:: aes (y = n, label = n), hjust = - 0.2 , size = 5 , color= "black" ) +
ggplot2:: scale_y_continuous (limits = c (0 , 70 )) +
viridis:: scale_fill_viridis (option= "H" ) +
ggplot2:: coord_flip () +
ggplot2:: labs (x = "" , y = "Average minutes played" ,
title = "Luke's Spotify music play time" ,
subtitle = "23rd March 2012 to 11th July 2025" ,
caption = "Total = 194,700 mins (avg. 40 mins per day over the 4,858 days)" ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text.x = ggplot2:: element_blank (),
legend.position = "none" ,
text = ggplot2:: element_text (size = 15 , color= "black" ),
plot.title.position = "plot" ,
panel.grid.major.x = ggplot2:: element_blank (),
panel.grid.minor.x = ggplot2:: element_blank (),
panel.grid.major.y = ggplot2:: element_blank (),
panel.grid.minor.y = ggplot2:: element_blank ()
)
Code
ggplot2:: ggsave (here:: here ("outputs/music_time_byday_average.png" ), width= 14 , height= 12 , units= "cm" , dpi= 150 , bg= "white" )
Summary stats by day
Code
dat_time <- dat_music |>
dplyr:: group_by (date) |>
dplyr:: summarise (n= sum (duration)/ 60 )
# get median and IQRs
dat_time_stats <- data.frame (
median = stats:: median (dat_time$ n, na.rm= TRUE ),
lower = stats:: quantile (dat_time$ n, 0.25 , na.rm= TRUE ),
upper = stats:: quantile (dat_time$ n, 0.75 , na.rm= TRUE )
)
# histogram
dat_time |>
dplyr:: mutate (n = dplyr:: if_else (n> 500 , 500 , n)) |>
ggplot2:: ggplot (ggplot2:: aes (n)) +
ggplot2:: geom_histogram (binwidth = 10 , fill= "darkblue" , color= "black" , alpha= 0.7 ) +
ggplot2:: labs (x = "Minutes listened" , y = "Number of days" ,
title = "Luke's Spotify music play time by day" ,
subtitle = "23rd March 2012 to 11th July 2025" ,
caption = "Median 57.8 mins (IQR 22.8 to 115.0)" ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
text = ggplot2:: element_text (size = 15 , color= "black" ),
plot.title.position = "plot" ,
panel.grid.major.x = ggplot2:: element_blank (),
panel.grid.minor.x = ggplot2:: element_blank (),
panel.grid.major.y = ggplot2:: element_blank (),
panel.grid.minor.y = ggplot2:: element_blank ()
)
Code
ggplot2:: ggsave (here:: here ("outputs/mins_by_day.png" ), width= 14 , height= 14 , units= "cm" , dpi= 150 , bg= "white" )
Several days over 500 mins!
Code
dat_time |> dplyr:: filter (n> 500 )
# A tibble: 5 × 2
date n
<date> <dbl>
1 2013-07-11 569.
2 2017-01-02 524.
3 2024-12-30 1439.
4 2024-12-31 780.
5 2025-01-12 708.
These look a lot like I just left it running at NYE parties or similar.
Songs
Ever
Code
dat_songs <- dat_music |>
dplyr:: count (master_metadata_track_name, master_metadata_album_artist_name) |>
dplyr:: arrange (- n)
dat_songs |>
head (n= 15 ) |>
ggplot2:: ggplot (ggplot2:: aes (forcats:: fct_reorder (master_metadata_track_name, n), fill= n)) +
ggplot2:: geom_col (ggplot2:: aes (y = n), alpha = 0.9 ) +
ggplot2:: geom_text (ggplot2:: aes (y = n, label = n), hjust = - 0.2 , size = 5 , color= "black" ) +
ggplot2:: scale_y_continuous (limits = c (0 , 140 )) +
viridis:: scale_fill_viridis (option= "H" ) +
ggplot2:: coord_flip () +
ggplot2:: labs (x = "" , y = "Number of plays" ,
title = "Luke's top 15 Spotify songs" ,
subtitle = "23rd March 2012 to 11th July 2025" ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text.x = ggplot2:: element_blank (),
legend.position = "none" ,
text = ggplot2:: element_text (size = 15 , color= "black" ),
plot.title.position = "plot" ,
panel.grid.major.x = ggplot2:: element_blank (),
panel.grid.minor.x = ggplot2:: element_blank (),
panel.grid.major.y = ggplot2:: element_blank (),
panel.grid.minor.y = ggplot2:: element_blank ()
)
Code
ggplot2:: ggsave (here:: here ("outputs/songs_top15.png" ), width= 14 , height= 14 , units= "cm" , dpi= 150 , bg= "white" )
By year
Code
dat_songs_year <- dat_music |>
dplyr:: group_by (year) |>
dplyr:: count (master_metadata_track_name, master_metadata_album_artist_name) |>
dplyr:: arrange (- n)
# for this I want to shorten some names by removing brackets...
dat_songs_year <- dat_songs_year |>
dplyr:: mutate (
master_metadata_track_name = stringr:: str_replace_all (
master_metadata_track_name,
" \\ [.*? \\ ]| \\ (.*? \\ )" , # Regular expression to match text within brackets
"..."
)
)
# plot
dat_songs_year |>
dplyr:: filter (dplyr:: row_number () <= 5 ) |>
ggplot2:: ggplot (ggplot2:: aes (tidytext:: reorder_within (master_metadata_track_name, n, year), fill= n)) +
ggplot2:: geom_col (ggplot2:: aes (y = n), alpha = 0.9 ) +
ggplot2:: geom_text (ggplot2:: aes (y = n, label = n), hjust = - 0.2 , size = 3 , color= "black" ) +
ggplot2:: scale_y_continuous (limits = c (0 , 55 )) +
viridis:: scale_fill_viridis (option= "H" ) +
ggplot2:: coord_flip () +
ggplot2:: labs (x = "" , y = "Number of plays" ,
title = "Luke's top Spotify songs" ,
subtitle = "23rd March 2012 to 11th July 2025" ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text.x = ggplot2:: element_blank (),
legend.position = "none" ,
text = ggplot2:: element_text (size = 11 , color= "black" ),
plot.title.position = "plot" ,
panel.grid.major.x = ggplot2:: element_blank (),
panel.grid.minor.x = ggplot2:: element_blank (),
panel.grid.major.y = ggplot2:: element_blank (),
panel.grid.minor.y = ggplot2:: element_blank ()
) +
tidytext:: scale_x_reordered () +
ggplot2:: facet_grid (factor (year, levels= c (2025 : 2012 )) ~ ., scale= "free" , space= "free" )
Code
ggplot2:: ggsave (here:: here ("outputs/songs_top5_byyear.png" ), width= 18 , height= 25 , units= "cm" , dpi= 150 , bg= "white" )
Artists
Ever
Code
dat_artists <- dat_music |>
dplyr:: group_by (master_metadata_album_artist_name) |>
dplyr:: summarise (n= round (sum (duration)/ 60 / 60 )) |>
dplyr:: arrange (- n)
dat_artists |>
head (n= 15 ) |>
ggplot2:: ggplot (ggplot2:: aes (forcats:: fct_reorder (master_metadata_album_artist_name, n), fill= n)) +
ggplot2:: geom_col (ggplot2:: aes (y = n), alpha = 0.9 ) +
ggplot2:: geom_text (ggplot2:: aes (y = n, label = n), hjust = - 0.2 , size = 4 , color= "black" ) +
ggplot2:: scale_y_continuous (limits = c (0 , 65 )) +
viridis:: scale_fill_viridis (option= "H" ) +
ggplot2:: coord_flip () +
ggplot2:: labs (x = "" , y = "Hours listend" ,
title = "Luke's top 15 Spotify artists" ,
subtitle = "23rd March 2012 to 11th July 2025" ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text.x = ggplot2:: element_blank (),
legend.position = "none" ,
text = ggplot2:: element_text (size = 13 , color= "black" ),
plot.title.position = "plot" ,
panel.grid.major.x = ggplot2:: element_blank (),
panel.grid.minor.x = ggplot2:: element_blank (),
panel.grid.major.y = ggplot2:: element_blank (),
panel.grid.minor.y = ggplot2:: element_blank ()
)
Code
ggplot2:: ggsave (here:: here ("outputs/artists_top15.png" ), width= 16 , height= 13 , units= "cm" , dpi= 150 , bg= "white" )
By year
Code
dat_artists_year <- dat_music |>
dplyr:: group_by (year, master_metadata_album_artist_name) |>
dplyr:: summarise (n= round (sum (duration)/ 60 / 60 )) |>
dplyr:: arrange (- n)
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
Code
#plot
dat_artists_year |>
dplyr:: filter (dplyr:: row_number () <= 5 ) |>
ggplot2:: ggplot (ggplot2:: aes (tidytext:: reorder_within (master_metadata_album_artist_name, n, year), fill= n)) +
ggplot2:: geom_col (ggplot2:: aes (y = n), alpha = 0.9 ) +
ggplot2:: geom_text (ggplot2:: aes (y = n, label = n), hjust = - 0.2 , size = 3 , color= "black" ) +
ggplot2:: scale_y_continuous (limits = c (0 , 39 )) +
viridis:: scale_fill_viridis (option= "H" ) +
ggplot2:: coord_flip () +
ggplot2:: labs (x = "" , y = "Hours listened" ,
title = "Luke's top Spotify artists" ,
subtitle = "23rd March 2012 to 11th July 2025" ) +
ggplot2:: theme_minimal () +
ggplot2:: theme (
axis.text.x = ggplot2:: element_blank (),
legend.position = "none" ,
text = ggplot2:: element_text (size = 11 , color= "black" ),
plot.title.position = "plot" ,
panel.grid.major.x = ggplot2:: element_blank (),
panel.grid.minor.x = ggplot2:: element_blank (),
panel.grid.major.y = ggplot2:: element_blank (),
panel.grid.minor.y = ggplot2:: element_blank ()
) +
tidytext:: scale_x_reordered () +
ggplot2:: facet_grid (factor (year, levels= c (2025 : 2012 )) ~ ., scale= "free" , space= "free" )
Code
ggplot2:: ggsave (here:: here ("outputs/artists_top5_byyear.png" ), width= 20 , height= 25 , units= "cm" , dpi= 150 , bg= "white" )