Spotify Project: Exploratory Data Analysis

1. Data Explanation

Our data obtained directly from Spotify Web API. For API connection, we created “Client ID” and “Client Secret” from Spotify for Developers Website. For this purpose, “spotifyr” package used for making connection.

2. Accessing Spotifty Web API

library(httpuv)
library(spotifyr)
library(tidyverse)
library(knitr)
library(lubridate)
library(ggalt)
library(plotly)
library(scales)

options(max.print=1000000)

When connection is made successfully, we can access lots of difrent type data such as aritst, albums, tracks, user profile etc. Here is the Spotify API References. In our project, we will usually use playlist, artist and track data.

3. Gathering Turkey, USA, Japan and Brazil Top 50 Playlists

#Get Turkey Top 50
turkey_top_50_id="37i9dQZEVXbIVYVBNw9D5K"
turkey_top_50_audio_features <- get_playlist_audio_features("spotifycharts", turkey_top_50_id)
#Get USA Top 50
usa_top_50_id = "37i9dQZEVXbLRQDuF5jeBp"
usa_top_50_audio_features <- get_playlist_audio_features("spotifycharts", usa_top_50_id)
#Get Japan Top 50
japan_top_50_id = "37i9dQZEVXbKXQ4mDTEBXq"
japan_top_50_audio_features <- get_playlist_audio_features("spotifycharts", japan_top_50_id)
#Get Brazil Top 50
brazil_top_50_id = "37i9dQZEVXbMXbN3EUUhlg"
brazil_top_50_audio_features <- get_playlist_audio_features("spotifycharts", brazil_top_50_id)
#Combining TR, USA, Japan and Brazil top 50 lists
combined_lists <- bind_rows(turkey_top_50_audio_features, usa_top_50_audio_features, japan_top_50_audio_features, brazil_top_50_audio_features)
glimpse(combined_lists)

## Observations: 200
## Variables: 61
## $ playlist_id                        <chr> "37i9dQZEVXbIVYVBNw9D5K", "...
## $ playlist_name                      <chr> "Turkey Top 50", "Turkey To...
## $ playlist_img                       <chr> "https://charts-images.scdn...
## $ playlist_owner_name                <chr> "spotifycharts", "spotifych...
## $ playlist_owner_id                  <chr> "spotifycharts", "spotifych...
## $ danceability                       <dbl> 0.628, 0.801, 0.810, 0.743,...
## $ energy                             <dbl> 0.725, 0.688, 0.631, 0.680,...
## $ key                                <int> 7, 9, 4, 5, 6, 7, 11, 9, 9,...
## $ loudness                           <dbl> -7.387, -6.620, -7.855, -4....
## $ mode                               <int> 1, 1, 0, 0, 0, 0, 0, 0, 0, ...
## $ speechiness                        <dbl> 0.1100, 0.1130, 0.3270, 0.1...
## $ acousticness                       <dbl> 0.0266, 0.2780, 0.1170, 0.1...
## $ instrumentalness                   <dbl> 0.00e+00, 8.88e-03, 0.00e+0...
## $ liveness                           <dbl> 0.0549, 0.1500, 0.1190, 0.1...
## $ valence                            <dbl> 0.458, 0.410, 0.407, 0.694,...
## $ tempo                              <dbl> 173.952, 158.003, 144.978, ...
## $ track.id                           <chr> "36ulbeGLdspdIYSFKXIlmN", "...
## $ analysis_url                       <chr> "https://api.spotify.com/v1...
## $ time_signature                     <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, ...
## $ added_at                           <chr> "1970-01-01T00:00:00Z", "19...
## $ is_local                           <lgl> FALSE, FALSE, FALSE, FALSE,...
## $ primary_color                      <lgl> NA, NA, NA, NA, NA, NA, NA,...
## $ added_by.href                      <chr> "https://api.spotify.com/v1...
## $ added_by.id                        <chr> "", "", "", "", "", "", "",...
## $ added_by.type                      <chr> "user", "user", "user", "us...
## $ added_by.uri                       <chr> "spotify:user:", "spotify:u...
## $ added_by.external_urls.spotify     <chr> "https://open.spotify.com/u...
## $ track.artists                      <list> [<data.frame[1 x 6]>, <dat...
## $ track.available_markets            <list> [<"AD", "AE", "AR", "AT", ...
## $ track.disc_number                  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ track.duration_ms                  <int> 269002, 154251, 185566, 196...
## $ track.episode                      <lgl> FALSE, FALSE, FALSE, FALSE,...
## $ track.explicit                     <lgl> FALSE, TRUE, TRUE, FALSE, F...
## $ track.href                         <chr> "https://api.spotify.com/v1...
## $ track.is_local                     <lgl> FALSE, FALSE, FALSE, FALSE,...
## $ track.name                         <chr> "Toz Taneleri", "Wir sind K...
## $ track.popularity                   <int> 66, 60, 78, 70, 81, 78, 62,...
## $ track.preview_url                  <chr> "https://p.scdn.co/mp3-prev...
## $ track.track                        <lgl> TRUE, TRUE, TRUE, TRUE, TRU...
## $ track.track_number                 <int> 1, 11, 1, 1, 1, 1, 2, 3, 6,...
## $ track.type                         <chr> "track", "track", "track", ...
## $ track.uri                          <chr> "spotify:track:36ulbeGLdspd...
## $ track.album.album_type             <chr> "single", "album", "single"...
## $ track.album.artists                <list> [<data.frame[1 x 6]>, <dat...
## $ track.album.available_markets      <list> [<"AD", "AE", "AR", "AT", ...
## $ track.album.href                   <chr> "https://api.spotify.com/v1...
## $ track.album.id                     <chr> "4abQKohVs72OlJci0C94pl", "...
## $ track.album.images                 <list> [<data.frame[3 x 3]>, <dat...
## $ track.album.name                   <chr> "Sarkastik", "Lights Out", ...
## $ track.album.release_date           <chr> "2019-11-29", "2019-11-14",...
## $ track.album.release_date_precision <chr> "day", "day", "day", "day",...
## $ track.album.total_tracks           <int> 5, 12, 1, 1, 1, 1, 5, 5, 12...
## $ track.album.type                   <chr> "album", "album", "album", ...
## $ track.album.uri                    <chr> "spotify:album:4abQKohVs72O...
## $ track.album.external_urls.spotify  <chr> "https://open.spotify.com/a...
## $ track.external_ids.isrc            <chr> "TR0681900389", "DECE719007...
## $ track.external_urls.spotify        <chr> "https://open.spotify.com/t...
## $ video_thumbnail.url                <lgl> NA, NA, NA, NA, NA, NA, NA,...
## $ key_name                           <chr> "G", "A", "E", "F", "F#", "...
## $ mode_name                          <chr> "major", "major", "minor", ...
## $ key_mode                           <chr> "G major", "A major", "E mi...

4. Adding Sentiments in Each Track

The purpose of this function named “classify_track_sentiment” is important for us to work primarily to reveal the mood of songs and song lists along these lines. Energy and valence are two important factors in terms of interpreting emotion in music. The variations of these two factors, which have values between 0 and 1, in this range determine the songs to be turbulent/angry, happy/joyful, sad/depressing and chill/peaceful.

According to Get Audio Features for a Track, explanations of the corresponding factors are as follows.

4.1. Energy

Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale. Perceptual features contributing to this attribute include dynamic range, perceived loudness, timbre, onset rate, and general entropy.

4.2. Valence

A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track. Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry).

classify_track_sentiment <- function(valence, energy) {
  if (is.na(valence) | is.na(energy)) {
    return(NA)
  }
  else if (valence >= .5) {
    if (energy >= .5) {
      return('Happy/Joyful')
    } else {
      return('Chill/Peaceful')
    }
  } else {
    if (energy >= .5) {
      return('Turbulent/Angry')
    } else {
      return('Sad/Depressing')
    }
  }
}
track_sentiment = c()
for (i in 1:200){
  
  track_sentiment[i] = classify_track_sentiment(combined_lists[[15]][[i]], combined_lists[[7]][[i]])
  
}
#Adding sentiment column to Combined of four countries
combined_lists<-cbind(combined_lists,track_sentiment)
#Adding Artist Column to Combined Music List
track_audio_combined <- combined_lists %>% 
  select(track.name,track.id,track.artists,track.album.release_date,track.popularity,danceability:tempo,track_sentiment,track.duration_ms)
head(track_audio_combined)

##      track.name               track.id
## 1  Toz Taneleri 36ulbeGLdspdIYSFKXIlmN
## 2 Wir sind Kral 2KlbLTnQ5Wch2oOelW0Y2k
## 3       Arkadas 6bBnnrknLbDoOCUdKMkmnq
## 4           AYA 4IJEw3fDvS6XF4sDc3bvjK
## 5  Dance Monkey 1rgnBhdG2JDFTbYkYRZAku
## 6         Nalan 1LNUxWJifZNEPpd273N2le
##                                                                                                                                                                                                                                                                                                                                                                                       track.artists
## 1                                                                                                                                                                                           https://api.spotify.com/v1/artists/1KXTegXtnCPKXjRaX1llcD, 1KXTegXtnCPKXjRaX1llcD, Sagopa Kajmer, artist, spotify:artist:1KXTegXtnCPKXjRaX1llcD, https://open.spotify.com/artist/1KXTegXtnCPKXjRaX1llcD
## 2 https://api.spotify.com/v1/artists/5pVRwX5ZQR7hfJ18w8ZYkl, https://api.spotify.com/v1/artists/6LnJKrtFnTEGdbWQ2riWCL, 5pVRwX5ZQR7hfJ18w8ZYkl, 6LnJKrtFnTEGdbWQ2riWCL, Ufo361, Ezhel, artist, artist, spotify:artist:5pVRwX5ZQR7hfJ18w8ZYkl, spotify:artist:6LnJKrtFnTEGdbWQ2riWCL, https://open.spotify.com/artist/5pVRwX5ZQR7hfJ18w8ZYkl, https://open.spotify.com/artist/6LnJKrtFnTEGdbWQ2riWCL
## 3                                                                                                                                                                                                https://api.spotify.com/v1/artists/2kS0jWMkkFBL0mrl0VotD0, 2kS0jWMkkFBL0mrl0VotD0, Ben Fero, artist, spotify:artist:2kS0jWMkkFBL0mrl0VotD0, https://open.spotify.com/artist/2kS0jWMkkFBL0mrl0VotD0
## 4  https://api.spotify.com/v1/artists/2y1VzMKAa5nmfXKtJL9jnj, https://api.spotify.com/v1/artists/6LnJKrtFnTEGdbWQ2riWCL, 2y1VzMKAa5nmfXKtJL9jnj, 6LnJKrtFnTEGdbWQ2riWCL, Murda, Ezhel, artist, artist, spotify:artist:2y1VzMKAa5nmfXKtJL9jnj, spotify:artist:6LnJKrtFnTEGdbWQ2riWCL, https://open.spotify.com/artist/2y1VzMKAa5nmfXKtJL9jnj, https://open.spotify.com/artist/6LnJKrtFnTEGdbWQ2riWCL
## 5                                                                                                                                                                                             https://api.spotify.com/v1/artists/2NjfBq1NflQcKSeiDooVjY, 2NjfBq1NflQcKSeiDooVjY, Tones and I, artist, spotify:artist:2NjfBq1NflQcKSeiDooVjY, https://open.spotify.com/artist/2NjfBq1NflQcKSeiDooVjY
## 6                                                                                                                                                                                          https://api.spotify.com/v1/artists/4XP7cGw4t8BqZ8Du5q3bHg, 4XP7cGw4t8BqZ8Du5q3bHg, Emir Can Igrek, artist, spotify:artist:4XP7cGw4t8BqZ8Du5q3bHg, https://open.spotify.com/artist/4XP7cGw4t8BqZ8Du5q3bHg
##   track.album.release_date track.popularity danceability energy key
## 1               2019-11-29               66        0.628  0.725   7
## 2               2019-11-14               60        0.801  0.688   9
## 3               2019-11-08               78        0.810  0.631   4
## 4               2019-09-20               70        0.743  0.680   5
## 5               2019-05-10               81        0.825  0.593   6
## 6               2019-09-06               78        0.540  0.418   7
##   loudness mode speechiness acousticness instrumentalness liveness valence
## 1   -7.387    1      0.1100       0.0266         0.000000   0.0549   0.458
## 2   -6.620    1      0.1130       0.2780         0.008880   0.1500   0.410
## 3   -7.855    0      0.3270       0.1170         0.000000   0.1190   0.407
## 4   -4.344    0      0.1030       0.1130         0.123000   0.1830   0.694
## 5   -6.401    0      0.0988       0.6880         0.000161   0.1700   0.540
## 6  -15.570    0      0.0761       0.1900         0.685000   0.1980   0.189
##     tempo track_sentiment track.duration_ms
## 1 173.952 Turbulent/Angry            269002
## 2 158.003 Turbulent/Angry            154251
## 3 144.978 Turbulent/Angry            185566
## 4 180.059    Happy/Joyful            196583
## 5  98.078    Happy/Joyful            209754
## 6  91.042  Sad/Depressing            199958

artist_names = c()
for (i in 1:200){
  
  artist_names[i] <- track_audio_combined[[3]][[i]][[3]]
  
}
combined_lists <-cbind(combined_lists, artist_names)
glimpse(combined_lists)

## Observations: 200
## Variables: 63
## $ playlist_id                        <chr> "37i9dQZEVXbIVYVBNw9D5K", "...
## $ playlist_name                      <chr> "Turkey Top 50", "Turkey To...
## $ playlist_img                       <chr> "https://charts-images.scdn...
## $ playlist_owner_name                <chr> "spotifycharts", "spotifych...
## $ playlist_owner_id                  <chr> "spotifycharts", "spotifych...
## $ danceability                       <dbl> 0.628, 0.801, 0.810, 0.743,...
## $ energy                             <dbl> 0.725, 0.688, 0.631, 0.680,...
## $ key                                <int> 7, 9, 4, 5, 6, 7, 11, 9, 9,...
## $ loudness                           <dbl> -7.387, -6.620, -7.855, -4....
## $ mode                               <int> 1, 1, 0, 0, 0, 0, 0, 0, 0, ...
## $ speechiness                        <dbl> 0.1100, 0.1130, 0.3270, 0.1...
## $ acousticness                       <dbl> 0.0266, 0.2780, 0.1170, 0.1...
## $ instrumentalness                   <dbl> 0.00e+00, 8.88e-03, 0.00e+0...
## $ liveness                           <dbl> 0.0549, 0.1500, 0.1190, 0.1...
## $ valence                            <dbl> 0.458, 0.410, 0.407, 0.694,...
## $ tempo                              <dbl> 173.952, 158.003, 144.978, ...
## $ track.id                           <chr> "36ulbeGLdspdIYSFKXIlmN", "...
## $ analysis_url                       <chr> "https://api.spotify.com/v1...
## $ time_signature                     <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, ...
## $ added_at                           <chr> "1970-01-01T00:00:00Z", "19...
## $ is_local                           <lgl> FALSE, FALSE, FALSE, FALSE,...
## $ primary_color                      <lgl> NA, NA, NA, NA, NA, NA, NA,...
## $ added_by.href                      <chr> "https://api.spotify.com/v1...
## $ added_by.id                        <chr> "", "", "", "", "", "", "",...
## $ added_by.type                      <chr> "user", "user", "user", "us...
## $ added_by.uri                       <chr> "spotify:user:", "spotify:u...
## $ added_by.external_urls.spotify     <chr> "https://open.spotify.com/u...
## $ track.artists                      <list> [<data.frame[1 x 6]>, <dat...
## $ track.available_markets            <list> [<"AD", "AE", "AR", "AT", ...
## $ track.disc_number                  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ track.duration_ms                  <int> 269002, 154251, 185566, 196...
## $ track.episode                      <lgl> FALSE, FALSE, FALSE, FALSE,...
## $ track.explicit                     <lgl> FALSE, TRUE, TRUE, FALSE, F...
## $ track.href                         <chr> "https://api.spotify.com/v1...
## $ track.is_local                     <lgl> FALSE, FALSE, FALSE, FALSE,...
## $ track.name                         <chr> "Toz Taneleri", "Wir sind K...
## $ track.popularity                   <int> 66, 60, 78, 70, 81, 78, 62,...
## $ track.preview_url                  <chr> "https://p.scdn.co/mp3-prev...
## $ track.track                        <lgl> TRUE, TRUE, TRUE, TRUE, TRU...
## $ track.track_number                 <int> 1, 11, 1, 1, 1, 1, 2, 3, 6,...
## $ track.type                         <chr> "track", "track", "track", ...
## $ track.uri                          <chr> "spotify:track:36ulbeGLdspd...
## $ track.album.album_type             <chr> "single", "album", "single"...
## $ track.album.artists                <list> [<data.frame[1 x 6]>, <dat...
## $ track.album.available_markets      <list> [<"AD", "AE", "AR", "AT", ...
## $ track.album.href                   <chr> "https://api.spotify.com/v1...
## $ track.album.id                     <chr> "4abQKohVs72OlJci0C94pl", "...
## $ track.album.images                 <list> [<data.frame[3 x 3]>, <dat...
## $ track.album.name                   <chr> "Sarkastik", "Lights Out", ...
## $ track.album.release_date           <chr> "2019-11-29", "2019-11-14",...
## $ track.album.release_date_precision <chr> "day", "day", "day", "day",...
## $ track.album.total_tracks           <int> 5, 12, 1, 1, 1, 1, 5, 5, 12...
## $ track.album.type                   <chr> "album", "album", "album", ...
## $ track.album.uri                    <chr> "spotify:album:4abQKohVs72O...
## $ track.album.external_urls.spotify  <chr> "https://open.spotify.com/a...
## $ track.external_ids.isrc            <chr> "TR0681900389", "DECE719007...
## $ track.external_urls.spotify        <chr> "https://open.spotify.com/t...
## $ video_thumbnail.url                <lgl> NA, NA, NA, NA, NA, NA, NA,...
## $ key_name                           <chr> "G", "A", "E", "F", "F#", "...
## $ mode_name                          <chr> "major", "major", "minor", ...
## $ key_mode                           <chr> "G major", "A major", "E mi...
## $ track_sentiment                    <fct> Turbulent/Angry, Turbulent/...
## $ artist_names                       <fct> Sagopa Kajmer, Ufo361, Ben ...

5. Plot Analysis

5.1. Country Playlists by Key

country_by_key <- combined_lists%>%
  select(playlist_name, key_name, track.name)%>%
  group_by(playlist_name) %>% count(key_name, sort = TRUE)
  
country_by_key

## # A tibble: 47 x 3
## # Groups:   playlist_name [4]
##    playlist_name        key_name     n
##    <chr>                <chr>    <int>
##  1 United States Top 50 G           11
##  2 Japan Top 50         C#           8
##  3 Turkey Top 50        A            8
##  4 Brazil Top 50        B            7
##  5 Brazil Top 50        F#           7
##  6 Japan Top 50         G            7
##  7 United States Top 50 C#           7
##  8 Brazil Top 50        D            6
##  9 Turkey Top 50        F            6
## 10 Turkey Top 50        F#           6
## # ... with 37 more rows

ggplot(country_by_key, aes(x = key_name, y = n, fill = playlist_name)) + 
  geom_bar(stat = "identity") +
  labs(title = "Playlists by Key Name", x = "Key Name", y = "Total Number of Keys") + 
  theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), 
        axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank())

5.2. Common Songs in Playlists

common_songs <- combined_lists %>% group_by(track.name, artist_names) %>%
  summarise(n_songs = n()) %>% 
  filter(n_songs >= 2) %>% 
  arrange(desc(n_songs))
common_songs

## # A tibble: 7 x 3
## # Groups:   track.name [7]
##   track.name          artist_names  n_songs
##   <chr>               <fct>           <int>
## 1 bad guy             Billie Eilish       3
## 2 Dance Monkey        Tones and I         3
## 3 everything i wanted Billie Eilish       3
## 4 Don't Start Now     Dua Lipa            2
## 5 Heartless           The Weeknd          2
## 6 Memories            Maroon 5            2
## 7 Señorita            Shawn Mendes        2

ggplot(common_songs, aes(x = reorder(track.name, n_songs), y = n_songs, fill = artist_names)) + 
  geom_bar(stat = "identity") + 
  labs(title = "Common Songs on Playlists", x = "Song Name", y = "Number of Songs") + 
  theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), 
        axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank()) +
  coord_flip()

5.3 .Danceability Density of Playlists

ggplot(combined_lists, aes(x = danceability, fill = playlist_name)) + 
  geom_density(alpha = 0.7, color = NA)+
  labs(x = "Danceability", y = "Density") +
  guides(fill = guide_legend(title = "Playlist"))+
  theme_minimal()+
  ggtitle("Distribution of Danceability Data") +
  theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), 
        axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank())

5.4. Energy and Valence Range of Playlists

playlist_feature_range <- combined_lists %>%
  group_by(playlist_name)%>%
  mutate(max_energy=max(energy), max_valence = max(valence))%>%
  mutate(min_energy=min(energy), min_valence = min(valence))%>%
  select(playlist_name, min_energy, max_energy, min_valence, max_valence)%>%
  unique()
playlist_feature_range

## # A tibble: 4 x 5
## # Groups:   playlist_name [4]
##   playlist_name        min_energy max_energy min_valence max_valence
##   <chr>                     <dbl>      <dbl>       <dbl>       <dbl>
## 1 Turkey Top 50             0.191      0.931      0.105        0.928
## 2 United States Top 50      0.158      0.816      0.0605       0.947
## 3 Japan Top 50              0.225      0.979      0.184        0.962
## 4 Brazil Top 50             0.426      0.958      0.152        0.964

5.4.1. Energy and Valence Range of Playlists with Dumbbell Plot

energy_range_plot <- plot_ly(playlist_feature_range, color = I("gray80"),  
                hoverinfo = 'text') %>%
  add_segments(x = ~max_energy, xend = ~min_energy, y = ~playlist_name, yend = ~playlist_name, showlegend = FALSE) %>%
  add_segments(x = ~max_valence, xend = ~min_valence, y = ~playlist_name, yend = ~playlist_name, showlegend = FALSE) %>%
  add_markers(x = ~max_energy, y = ~playlist_name, name = "Maximum Energy Value", color = I("red"), size = 2.5, text=~paste('Max Energy: ', max_energy)) %>%
  add_markers(x = ~min_energy, y = ~playlist_name, name = "Minimum Energy Value", color = I("blue"), size = 2.5, text=~paste('Min Energy: ', min_energy))%>%
  add_markers(x = ~max_valence, y = ~playlist_name, name = "Maximum Valence Value", color = I("#395B74"), size = 2.5, text=~paste('Max Valence: ', max_valence)) %>%
  add_markers(x = ~min_valence, y = ~playlist_name, name = "Minimum Valence Value", color = I("#F7BC08"), size = 2.5, text=~paste('Min Valence: ', min_valence))%>%
  layout(
    title = "Playlist Energy and Valence Range",
    xaxis = list(title = "Energy and Valence"),
    yaxis= list(title="Country Lists"))
ggplotly(energy_range_plot)

5.5. Excitement of Playlists

excitement_of_playlist <- combined_lists %>% group_by(playlist_name) %>% 
  select(playlist_name, track.name, valence, energy, loudness, danceability, tempo) %>% 
  mutate(excitement = loudness + tempo + (energy*100) + (danceability*100) + (valence*100), excitement_mean = mean(excitement))
ggplot(excitement_of_playlist, aes(x = excitement, fill = playlist_name, color = playlist_name)) + 
  geom_histogram(binwidth = 30, position = "identity", alpha = 0.7) +
  geom_vline(data = excitement_of_playlist, aes(xintercept = excitement_mean, color = playlist_name),
             linetype = "dashed") +
  labs(title = "Excitement Distribution of Playlists", y = "Count", x = "Excitement Scale") +
  theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), axis.title.y = element_text(size = 14, face = "bold"),
        legend.title = element_blank())

5.6. Mean Excitement of Playlists

excitement_mean <- excitement_of_playlist %>% group_by(playlist_name) %>% select(excitement_mean) %>% unique()
excitement_mean

## # A tibble: 4 x 2
## # Groups:   playlist_name [4]
##   playlist_name        excitement_mean
##   <chr>                          <dbl>
## 1 Turkey Top 50                   300.
## 2 United States Top 50            289.
## 3 Japan Top 50                    306.
## 4 Brazil Top 50                   344.

ggplot(excitement_mean, aes(x = reorder(playlist_name, excitement_mean), y = excitement_mean, fill = playlist_name)) + 
  geom_bar(stat ="identity") + 
  labs(title = "Excitement Comparison of Playlists", x = "Country Playlist Names", y = "Means of Excitement", fill = "Country Charts", 
       caption = "The low score shows that the list is boring. \n Excitement Formula: (loudness + tempo + (energy*100) + (danceability*100) + (valence*100))") +
  theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), 
        axis.title.y = element_text(size = 14, face = "bold"), 
        legend.title = element_blank())

5.7. Sentiment Analysis of Country Playlists with Gradient Chart

sentiment_by_countries <- combined_lists %>% group_by(playlist_name) %>% 
  select(playlist_name, track.name, artist_names, valence, energy, track_sentiment)

sentiment_by_countries

## # A tibble: 200 x 6
## # Groups:   playlist_name [4]
##    playlist_name track.name    artist_names  valence energy track_sentiment
##    <chr>         <chr>         <fct>           <dbl>  <dbl> <fct>          
##  1 Turkey Top 50 Toz Taneleri  Sagopa Kajmer   0.458  0.725 Turbulent/Angry
##  2 Turkey Top 50 Wir sind Kral Ufo361          0.41   0.688 Turbulent/Angry
##  3 Turkey Top 50 Arkadas       Ben Fero        0.407  0.631 Turbulent/Angry
##  4 Turkey Top 50 AYA           Murda           0.694  0.68  Happy/Joyful   
##  5 Turkey Top 50 Dance Monkey  Tones and I     0.54   0.593 Happy/Joyful   
##  6 Turkey Top 50 Nalan         Emir Can Igr~   0.189  0.418 Sad/Depressing 
##  7 Turkey Top 50 Neyse         Sagopa Kajmer   0.317  0.776 Turbulent/Angry
##  8 Turkey Top 50 Vazgeçtim In~ Sagopa Kajmer   0.507  0.638 Happy/Joyful   
##  9 Turkey Top 50 Yemin Olsun   Ufo361          0.31   0.582 Turbulent/Angry
## 10 Turkey Top 50 Neresi?       BEGE            0.652  0.562 Happy/Joyful   
## # ... with 190 more rows

5.7.1. Group of sentiment count by each contry

sentiment_by_countries %>% count(track_sentiment, sort = TRUE)

## # A tibble: 16 x 3
## # Groups:   playlist_name [4]
##    playlist_name        track_sentiment     n
##    <chr>                <fct>           <int>
##  1 Brazil Top 50        Happy/Joyful       45
##  2 Japan Top 50         Happy/Joyful       26
##  3 Turkey Top 50        Turbulent/Angry    23
##  4 Japan Top 50         Turbulent/Angry    19
##  5 Turkey Top 50        Happy/Joyful       19
##  6 United States Top 50 Happy/Joyful       16
##  7 United States Top 50 Turbulent/Angry    14
##  8 United States Top 50 Sad/Depressing     11
##  9 United States Top 50 Chill/Peaceful      9
## 10 Turkey Top 50        Sad/Depressing      6
## 11 Japan Top 50         Sad/Depressing      3
## 12 Brazil Top 50        Sad/Depressing      2
## 13 Brazil Top 50        Turbulent/Angry     2
## 14 Japan Top 50         Chill/Peaceful      2
## 15 Turkey Top 50        Chill/Peaceful      2
## 16 Brazil Top 50        Chill/Peaceful      1

ggplot(sentiment_by_countries,aes(x = valence, y = energy, color = track_sentiment)) + geom_point() +
  labs(color = "", title = "Sentiment Analysis by Each Country") +
  theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), 
        axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank()) +
  scale_x_continuous(expand = c(0, 0), limits = c(0, 1)) + 
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_label(aes(x = 0.12, y = 0.98, label = "Turbulent/Angry"), label.padding = unit(1, "mm"), fill = "grey", color="white") +
  geom_label(aes(x = 0.90, y = 0.98, label = "Happy/Joyful"), label.padding = unit(1, "mm"), fill = "grey", color="white") +
  geom_label(aes(x = 0.12, y = 0.025, label = "Sad/Depressing"), label.padding = unit(1, "mm"),  fill = "grey", color="white") +
  geom_label(aes(x = 0.895, y = 0.025, label = "Chill/Peaceful"), label.padding = unit(1, "mm"), fill = "grey", color="white") +
  geom_segment(aes(x = 1, y = 0, xend = 1, yend = 1)) +
  geom_segment(aes(x = 0, y = 0, xend = 0, yend = 1)) +
  geom_segment(aes(x = 0, y = 0, xend = 1, yend = 0)) +
  geom_segment(aes(x = 0, y = 0.5, xend = 1, yend = 0.5)) +
  geom_segment(aes(x = 0.5, y = 0, xend = 0.5, yend = 1)) +
  geom_segment(aes(x = 0, y = 1, xend = 1, yend = 1)) +
  facet_wrap(~ playlist_name)

6. Turkey Top 200 Daily Data Between 2017-2019

Data obtained from Spotify Charts.

topturkey200<-readRDS(url("https://github.com/pjournal/mef03g-spo-R-ify/blob/master/turkeytop200.rds?raw=true"))
glimpse(topturkey200)

## Observations: 211,400
## Variables: 6
## $ Position   <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ...
## $ Track.Name <chr> "Gece Gölgenin Rahatina Bak", "Starboy", "Affet", "...
## $ Artist     <chr> "Çagatay Akman", "The Weeknd", "Müslüm Gürses", "Cl...
## $ Streams    <chr> "80607", "44427", "34889", "28400", "25425", "23032...
## $ URL        <chr> "https://open.spotify.com/track/3P31rcl0ym5paqRdwSi...
## $ Date       <date> 2017-01-01, 2017-01-01, 2017-01-01, 2017-01-01, 20...

6.1. Monthly Change in Total Streams

#First cal
#topturkey200 %>% group_by(Artist)%>% summarise(Total_number=n()) %>% arrange(desc(Total_number))

change<-topturkey200 %>% mutate(Year_Month = format(Date,"%Y/%m")) %>% group_by(Year_Month) %>% summarise(Total_Stream=sum(as.numeric(Streams))) 


ggplot(change, aes(x = Year_Month,y=Total_Stream,group=1)) + geom_point() + geom_smooth() + theme(axis.text.x = element_text(angle = 90),title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5)) + labs(x = "Month", y = "Total Streams",title = "Total Stream Change") + scale_y_continuous(labels = comma)

6.2. Most Streamed 20 Tracks

rank<-topturkey200 %>% group_by(Artist,Track.Name) %>% summarise(Total_Stream=sum(as.numeric(Streams))) %>% arrange(desc(Total_Stream))

head(rank,n=20L)

## # A tibble: 20 x 3
## # Groups:   Artist [12]
##    Artist                Track.Name                    Total_Stream
##    <chr>                 <chr>                                <dbl>
##  1 Ezhel                 Geceler                           44258676
##  2 Ezhel                 Felaket                           43699884
##  3 Ufuk Beydemir         Ay Tenli Kadin                    35480606
##  4 Norm Ender            Mekanin Sahibi                    34730473
##  5 Ben Fero              Biladerim Için                    33648205
##  6 Ezhel                 Imkansizim                        32607042
##  7 Ben Fero              3 2 1                             32086343
##  8 Ben Fero              Demet Akalin                      28807007
##  9 Anil Piyanci          KAFA10                            28704551
## 10 Reynmen               Ela                               28246604
## 11 Ezhel                 Kazidik Tirnaklarla               27783484
## 12 Yüzyüzeyken Konusuruz Ne Farkeder                       27540693
## 13 Ceza                  Neyim Var Ki (feat. Sagopa K)     27067016
## 14 Murda                 AYA                               26111866
## 15 Yüzyüzeyken Konusuruz Dinle Beni Bi'                    25431753
## 16 Feride Hilal Akin     Yok Yok                           24360307
## 17 MERO                  Olabilir                          23990740
## 18 Ed Sheeran            Shape of You                      23958838
## 19 Anil Piyanci          Birakman Dogru Mu                 23643712
## 20 Reynmen               Derdim Olsun                      23545514

6.3. Sentiment Analysis of Tracks

6.3.1. Data Preparation

top_200_audio_features<-topturkey200 %>% mutate(id=substring(topturkey200$URL,32)) 

top_200_audio_features<-top_200_audio_features[!duplicated(top_200_audio_features[,c('id')]),]

glimpse(top_200_audio_features)

## Observations: 2,874
## Variables: 7
## $ Position   <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", ...
## $ Track.Name <chr> "Gece Gölgenin Rahatina Bak", "Starboy", "Affet", "...
## $ Artist     <chr> "Çagatay Akman", "The Weeknd", "Müslüm Gürses", "Cl...
## $ Streams    <chr> "80607", "44427", "34889", "28400", "25425", "23032...
## $ URL        <chr> "https://open.spotify.com/track/3P31rcl0ym5paqRdwSi...
## $ Date       <date> 2017-01-01, 2017-01-01, 2017-01-01, 2017-01-01, 20...
## $ id         <chr> "3P31rcl0ym5paqRdwSiZps", "5aAx2yezTd8zXrkmtKl66Z",...

Id_list=top_200_audio_features$id

#Tracks feautres obtaining code is below. Because of the process time data frame downloaded from github repository.
#a<-unique(Id_list)
#tracks_features=get_track_audio_features(a[1])
#for (x in 2:length(a)){
#  tracks_features <- rbind(tracks_features,get_track_audio_features(a[x]))
#}
#tracks_features<-tracks_features%>%slice(-1) 
 
tracks_features<-readRDS(url("https://github.com/pjournal/mef03g-spo-R-ify/blob/master/top200_tracks_features.rds?raw=true"))
glimpse(tracks_features)

## Observations: 2,874
## Variables: 18
## $ danceability     <dbl> 0.769, 0.681, 0.424, 0.720, 0.476, 0.748, 0.4...
## $ energy           <dbl> 0.837, 0.594, 0.666, 0.763, 0.718, 0.524, 0.3...
## $ key              <int> 6, 7, 7, 9, 8, 8, 4, 4, 1, 5, 6, 6, 0, 8, 7, ...
## $ loudness         <dbl> -4.057, -7.028, -6.683, -4.068, -5.309, -5.59...
## $ mode             <int> 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, ...
## $ speechiness      <dbl> 0.1400, 0.2820, 0.0473, 0.0523, 0.0576, 0.033...
## $ acousticness     <dbl> 0.65200, 0.16500, 0.39600, 0.40600, 0.07840, ...
## $ instrumentalness <dbl> 0.00e+00, 3.49e-06, 1.69e-04, 0.00e+00, 1.02e...
## $ liveness         <dbl> 0.0986, 0.1340, 0.1200, 0.1800, 0.1220, 0.111...
## $ valence          <dbl> 0.8190, 0.5350, 0.2750, 0.7420, 0.1420, 0.661...
## $ tempo            <dbl> 100.962, 186.054, 160.079, 101.965, 199.864, ...
## $ type             <chr> "audio_features", "audio_features", "audio_fe...
## $ id               <chr> "3P31rcl0ym5paqRdwSiZps", "5aAx2yezTd8zXrkmtK...
## $ uri              <chr> "spotify:track:3P31rcl0ym5paqRdwSiZps", "spot...
## $ track_href       <chr> "https://api.spotify.com/v1/tracks/3P31rcl0ym...
## $ analysis_url     <chr> "https://api.spotify.com/v1/audio-analysis/3P...
## $ duration_ms      <int> 163960, 230453, 279475, 251088, 205947, 24496...
## $ time_signature   <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...

top_200_audio_features <- inner_join(top_200_audio_features,tracks_features,by="id")

Sentiment=c()
for (i in 1:nrow(top_200_audio_features)){
  Sentiment[i]=classify_track_sentiment(valence=top_200_audio_features$valence[i],energy=top_200_audio_features$energy[i])
}

top_200_audio_features<-cbind(top_200_audio_features,Sentiment)

glimpse(top_200_audio_features)

## Observations: 2,874
## Variables: 25
## $ Position         <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", ...
## $ Track.Name       <chr> "Gece Gölgenin Rahatina Bak", "Starboy", "Aff...
## $ Artist           <chr> "Çagatay Akman", "The Weeknd", "Müslüm Gürses...
## $ Streams          <chr> "80607", "44427", "34889", "28400", "25425", ...
## $ URL              <chr> "https://open.spotify.com/track/3P31rcl0ym5pa...
## $ Date             <date> 2017-01-01, 2017-01-01, 2017-01-01, 2017-01-...
## $ id               <chr> "3P31rcl0ym5paqRdwSiZps", "5aAx2yezTd8zXrkmtK...
## $ danceability     <dbl> 0.769, 0.681, 0.424, 0.720, 0.476, 0.748, 0.4...
## $ energy           <dbl> 0.837, 0.594, 0.666, 0.763, 0.718, 0.524, 0.3...
## $ key              <int> 6, 7, 7, 9, 8, 8, 4, 4, 1, 5, 6, 6, 0, 8, 7, ...
## $ loudness         <dbl> -4.057, -7.028, -6.683, -4.068, -5.309, -5.59...
## $ mode             <int> 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, ...
## $ speechiness      <dbl> 0.1400, 0.2820, 0.0473, 0.0523, 0.0576, 0.033...
## $ acousticness     <dbl> 0.65200, 0.16500, 0.39600, 0.40600, 0.07840, ...
## $ instrumentalness <dbl> 0.00e+00, 3.49e-06, 1.69e-04, 0.00e+00, 1.02e...
## $ liveness         <dbl> 0.0986, 0.1340, 0.1200, 0.1800, 0.1220, 0.111...
## $ valence          <dbl> 0.8190, 0.5350, 0.2750, 0.7420, 0.1420, 0.661...
## $ tempo            <dbl> 100.962, 186.054, 160.079, 101.965, 199.864, ...
## $ type             <chr> "audio_features", "audio_features", "audio_fe...
## $ uri              <chr> "spotify:track:3P31rcl0ym5paqRdwSiZps", "spot...
## $ track_href       <chr> "https://api.spotify.com/v1/tracks/3P31rcl0ym...
## $ analysis_url     <chr> "https://api.spotify.com/v1/audio-analysis/3P...
## $ duration_ms      <int> 163960, 230453, 279475, 251088, 205947, 24496...
## $ time_signature   <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...
## $ Sentiment        <fct> Happy/Joyful, Happy/Joyful, Turbulent/Angry, ...

6.3.2. Sentiment Bar Graph

sent_count <- top_200_audio_features %>% group_by(Sentiment) %>% count()

ggplot(sent_count, aes(x=Sentiment, y=n, fill=Sentiment)) +
  geom_bar(stat="identity") + 
  labs(title = "Sentiment Count", x = "Sentiment Distribution", y = "Count of Sentiments") +
  theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), 
        axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank())

6.3.3. Sentiment Gradient Chart

ggplot(top_200_audio_features,aes(x = valence, y = energy, color = Sentiment)) + geom_point() +
  labs(color = "", title = "Sentiment Analysis of Turkey Top 200 Chart Between 2017 and 2019") +
    theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), 
        axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank()) +
  scale_x_continuous(expand = c(0, 0), limits = c(0, 1)) + 
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_label(aes(x = 0.25, y = 0.97, label = "Turbulent/Angry"), label.padding = unit(2, "mm"),  fill = "darkgrey", color="white") +
  geom_label(aes(x = 0.75, y = 0.97, label = "Happy/Joyful"), label.padding = unit(2, "mm"), fill = "darkgrey", color="white") +
  geom_label(aes(x = 0.25, y = 0.03, label = "Sad/Depressing"), label.padding = unit(2, "mm"),  fill = "darkgrey", color="white") +
  geom_label(aes(x = 0.75, y = 0.03, label = "Chill/Peaceful"), label.padding = unit(2, "mm"), fill = "darkgrey", color="white") +
  geom_segment(aes(x = 1, y = 0, xend = 1, yend = 1)) +
  geom_segment(aes(x = 0, y = 0, xend = 0, yend = 1)) +
  geom_segment(aes(x = 0, y = 0, xend = 1, yend = 0)) +
  geom_segment(aes(x = 0, y = 0.5, xend = 1, yend = 0.5)) +
  geom_segment(aes(x = 0.5, y = 0, xend = 0.5, yend = 1)) +
  geom_segment(aes(x = 0, y = 1, xend = 1, yend = 1))