2 In Class 1

Author

Sezer Turkmen

Published

January 9, 2024

2.0.1 Most Streamed Spotify Songs 2023

The ‘Most Streamed Spotify Songs 2023’ dataset is a fantastic choice for class demos. It offers insights into the music industry by showcasing popular songs and their attributes. Students can explore the science of hit songs, understand their impact through social media analysis, and learn about cross-platform music data. It’s engaging and up-to-date, making it an excellent teaching resource.

# Use Google Drive to connect Dataset with library of 'googleDrive'
id <- "1nM0Cr-gVFF9UjvxSZXHNfFnrDWIUoTHi"
total_list <- read.csv(sprintf("https://docs.google.com/uc?id=%s&export=download", id))

# Filter out rows with non-numeric values in the 'streams' column
filtered_spotify_data <- total_list %>%
   filter(!grepl("[a-zA-Z]", streams))

# Sort the filtered dataset by the 'streams' column in descending order
sorted_filtered_spotify_data <- filtered_spotify_data %>%
  arrange(desc(streams))

# Create HTML view of Dataset
DT::datatable(head(sorted_filtered_spotify_data), editable = list(
  target = 'row', disable = list(columns = c(1, 3, 4))
))

2.0.2 Exercises

class(filtered_spotify_data$streams)

[1] "character"

# Convert character to numeric , original r
filtered_spotify_data$streams <- as.numeric(filtered_spotify_data$streams)

# 1. The mean of all streams in the dataset
mean_streams <- filtered_spotify_data %>%
  summarise(mean_streams = mean(streams, na.rm = TRUE))

print(mean_streams)

  mean_streams
1    514137425

# 2. The mean of streams for each month
month_names <- month.name

mean_streams_by_month <- filtered_spotify_data %>%
  group_by(released_month) %>%
  summarise(mean_streams = mean(streams, na.rm = TRUE)) %>%
  mutate(released_month = month_names[as.numeric(released_month)])

print(mean_streams_by_month)

# A tibble: 12 × 2
   released_month mean_streams
   <chr>                 <dbl>
 1 January          727506137.
 2 February         353153502.
 3 March            477052609.
 4 April            404145980.
 5 May              415669054.
 6 June             410044671.
 7 July             482176478.
 8 August           631265701.
 9 September        734644353.
10 October          588902402.
11 November         552592271.
12 December         369573392.

# 3. Transmute mutate example
spotify_data_millions <- filtered_spotify_data %>%
  mutate(streams_in_millions = streams / 1000000) %>%
  transmute(track_name, artist.s._name, released_year, streams_in_millions) %>%
  arrange(desc(streams_in_millions))

print(head(spotify_data_millions))

                                     track_name        artist.s._name
1                               Blinding Lights            The Weeknd
2                                  Shape of You            Ed Sheeran
3                             Someone You Loved         Lewis Capaldi
4                                  Dance Monkey           Tones and I
5 Sunflower - Spider-Man: Into the Spider-Verse Post Malone, Swae Lee
6                                     One Dance   Drake, WizKid, Kyla
  released_year streams_in_millions
1          2019            3703.895
2          2017            3562.544
3          2018            2887.242
4          2019            2864.792
5          2018            2808.097
6          2016            2713.922