# Use Google Drive to connect Dataset with library of 'googleDrive'
<- "1nM0Cr-gVFF9UjvxSZXHNfFnrDWIUoTHi"
id <- read.csv(sprintf("https://docs.google.com/uc?id=%s&export=download", id))
total_list
# Filter out rows with non-numeric values in the 'streams' column
<- total_list %>%
filtered_spotify_data filter(!grepl("[a-zA-Z]", streams))
# Sort the filtered dataset by the 'streams' column in descending order
<- filtered_spotify_data %>%
sorted_filtered_spotify_data arrange(desc(streams))
# Create HTML view of Dataset
::datatable(head(sorted_filtered_spotify_data), editable = list(
DTtarget = 'row', disable = list(columns = c(1, 3, 4))
))
2 In Class 1
2.0.1 Most Streamed Spotify Songs 2023
The ‘Most Streamed Spotify Songs 2023’ dataset is a fantastic choice for class demos. It offers insights into the music industry by showcasing popular songs and their attributes. Students can explore the science of hit songs, understand their impact through social media analysis, and learn about cross-platform music data. It’s engaging and up-to-date, making it an excellent teaching resource.
2.0.2 Exercises
class(filtered_spotify_data$streams)
[1] "character"
# Convert character to numeric , original r
$streams <- as.numeric(filtered_spotify_data$streams)
filtered_spotify_data
# 1. The mean of all streams in the dataset
<- filtered_spotify_data %>%
mean_streams summarise(mean_streams = mean(streams, na.rm = TRUE))
print(mean_streams)
mean_streams
1 514137425
# 2. The mean of streams for each month
<- month.name
month_names
<- filtered_spotify_data %>%
mean_streams_by_month group_by(released_month) %>%
summarise(mean_streams = mean(streams, na.rm = TRUE)) %>%
mutate(released_month = month_names[as.numeric(released_month)])
print(mean_streams_by_month)
# A tibble: 12 × 2
released_month mean_streams
<chr> <dbl>
1 January 727506137.
2 February 353153502.
3 March 477052609.
4 April 404145980.
5 May 415669054.
6 June 410044671.
7 July 482176478.
8 August 631265701.
9 September 734644353.
10 October 588902402.
11 November 552592271.
12 December 369573392.
# 3. Transmute mutate example
<- filtered_spotify_data %>%
spotify_data_millions mutate(streams_in_millions = streams / 1000000) %>%
transmute(track_name, artist.s._name, released_year, streams_in_millions) %>%
arrange(desc(streams_in_millions))
print(head(spotify_data_millions))
track_name artist.s._name
1 Blinding Lights The Weeknd
2 Shape of You Ed Sheeran
3 Someone You Loved Lewis Capaldi
4 Dance Monkey Tones and I
5 Sunflower - Spider-Man: Into the Spider-Verse Post Malone, Swae Lee
6 One Dance Drake, WizKid, Kyla
released_year streams_in_millions
1 2019 3703.895
2 2017 3562.544
3 2018 2887.242
4 2019 2864.792
5 2018 2808.097
6 2016 2713.922