1. Loading Data

library(tidyverse)
library(knitr)

url<-url("https://github.com/pjournal/mef03-KutayAkalin/blob/master/atp_tennis_data_2017.RData?raw=true")
load(url)

2. Analysis

2.1. Rank countries (Flag codes) by the singles champions

df_1<-inner_join(tourney_df,player_df,by= c("singles_winner_player_id" = "player_id"))

winner_countries<-df_1 %>% group_by(flag_code) %>% summarize(Count=n()) %>% arrange(desc(Count))
kable(head(winner_countries,n=15L))
flag_code Count
ESP 11
USA 9
SUI 8
FRA 7
GER 7
BUL 4
ARG 2
BEL 2
BIH 2
CRO 2
LUX 2
SRB 2
AUT 1
DOM 1
GBR 1

2.2. Rank countries which did not get any singles champions by the games won when they win the match

df_2<-anti_join(player_df,winner_countries,by= "flag_code")
df_3<-inner_join(score_df,df_2, by=c("winner_player_id"="player_id"))

won_games_by_countries<-df_3 %>% group_by(flag_code) %>% summarise(Total_Games_Won=sum(winner_games_won)) %>%arrange(desc(Total_Games_Won))
kable(won_games_by_countries)
flag_code Total_Games_Won
AUS 1989
CZE 1209
CAN 1190
SVK 889
BRA 873
POR 621
RSA 566
KAZ 495
KOR 438
GEO 377
GRE 359
NED 344
COL 308
IND 273
MDA 272
CYP 257
ROU 247
HUN 239
ISR 238
TUN 237
TPE 186
LAT 165
POL 165
CHI 154
BAR 152
NOR 131
CHN 103
SWE 99
EGY 95
LTU 90
IRL 70
BLR 64
ECU 57
SLO 41
NZL 36
EST 30
MAR 20
ESA 12
TUR 0

2.3. Ranks of players and Points Per Tourney Calculation for 20/11/2017

#4) Ranks of players and Point Per Tourney Calculation for 20/11/2017

joined<-left_join(rank_df,player_df,by="player_id") %>% mutate(Points_Per_Tourney=ranking_points/tourneys_played) 

df5<-joined %>% select(week_title,first_name,last_name,flag_code, rank_number, ranking_points,tourneys_played,Points_Per_Tourney)%>%  filter(week_title=="2017-11-20") %>% arrange(desc(Points_Per_Tourney))

kable(head(df5,n=30L))
week_title first_name last_name flag_code rank_number ranking_points tourneys_played Points_Per_Tourney
2017-11-20 Rafael Nadal ESP 1 10645 18 591.38889
2017-11-20 Roger Federer SUI 2 9605 17 565.00000
2017-11-20 Grigor Dimitrov BUL 3 5150 23 223.91304
2017-11-20 Stan Wawrinka SUI 9 3150 15 210.00000
2017-11-20 Alexander Zverev GER 4 4610 25 184.40000
2017-11-20 Marin Cilic CRO 6 3805 22 172.95455
2017-11-20 Novak Djokovic SRB 12 2585 16 161.56250
2017-11-20 Dominic Thiem AUT 5 4015 27 148.70370
2017-11-20 David Goffin BEL 7 3775 26 145.19231
2017-11-20 Jack Sock USA 8 3165 22 143.86364
2017-11-20 Andy Murray GBR 16 2290 16 143.12500
2017-11-20 Juan Martin del Potro ARG 11 2595 19 136.57895
2017-11-20 Jo-Wilfried Tsonga FRA 15 2320 20 116.00000
2017-11-20 Kevin Anderson RSA 14 2480 22 112.72727
2017-11-20 Tomas Berdych CZE 19 2095 19 110.26316
2017-11-20 Sam Querrey USA 13 2535 23 110.21739
2017-11-20 Nick Kyrgios AUS 21 2010 19 105.78947
2017-11-20 Pablo Carreno Busta ESP 10 2615 25 104.60000
2017-11-20 Kei Nishikori JPN 22 1885 19 99.21053
2017-11-20 John Isner USA 17 2265 24 94.37500
2017-11-20 Lucas Pouille FRA 18 2235 24 93.12500
2017-11-20 Milos Raonic CAN 24 1795 20 89.75000
2017-11-20 Roberto Bautista Agut ESP 20 2015 24 83.95833
2017-11-20 Gilles Muller LUX 25 1695 22 77.04545
2017-11-20 Fabio Fognini ITA 27 1670 24 69.58333
2017-11-20 Richard Gasquet FRA 31 1375 21 65.47619
2017-11-20 Philipp Kohlschreiber GER 29 1440 22 65.45455
2017-11-20 Diego Schwartzman ARG 26 1675 27 62.03704
2017-11-20 Albert Ramos-Vinolas ESP 23 1845 30 61.50000
2017-11-20 Gael Monfils FRA 46 1015 17 59.70588

2.4. Ranks of Countries in Points Per Tourney for 20/11/2017

#4) Ranks of players and Point Per Tourney Calculation for 20/11/2017


country<-joined %>%filter(week_title=="2017-11-20") %>% group_by(flag_code) %>% summarise(Avg_Points_Per_Tourney=mean(Points_Per_Tourney),Player_Count=n()) %>% arrange(desc(Avg_Points_Per_Tourney))

kable(head(country,n=15L))
flag_code Avg_Points_Per_Tourney Player_Count
LUX 77.04545 1
SUI 47.91940 17
BUL 47.37566 5
RSA 32.04015 4
ESP 27.74361 46
GEO 27.53333 1
CYP 26.85714 1
SRB 23.37369 16
LAT 21.50000 1
BIH 21.47851 4
URU 20.99674 3
MDA 20.53333 1
GRE 19.83871 1
UZB 19.26087 2
HUN 18.09375 2

3. Plots

3.1. Rafael Nadal’s Ranking Points

df6 <- rank_df %>% filter(player_id=="n409") %>% arrange(week_title)
df6<-df6[!duplicated(df6$ranking_points), ]

ggplot(df6,aes(week_title,ranking_points)) + geom_point()+geom_smooth() +  labs(title = "Rafael Nadal's Ranking Points ", x = "Months", y = "Points") +
  theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), 
        axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank()) 

3.2. Roger Federer’s Ranking Points

df7 <- rank_df %>% filter(player_id=="f324") %>% arrange(week_title)
df7<-df7[!duplicated(df7$ranking_points), ]

ggplot(df7,aes(week_title,ranking_points)) + geom_point()+geom_smooth() +  labs(title = "Roger Federer's Ranking Points ", x = "Months", y = "Points") +
  theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), 
        axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank()) 

3.3. Top 5 Rank Point Change Graph

player_ids=c(rank_df[[2]][[1]],rank_df[[2]][[2]],rank_df[[2]][[3]],rank_df[[2]][[4]],rank_df[[2]][[5]])

df8 <- joined %>% filter(player_id==player_ids[1] | player_id==player_ids[2] | player_id==player_ids[3] | player_id==player_ids[4] | player_id==player_ids[5])%>% arrange(week_title)
df8
## # A tibble: 210 x 19
##    week_title player_id rank_number ranking_points tourneys_played
##    <date>     <chr>           <dbl>          <dbl>           <dbl>
##  1 2017-01-02 tb69                8           3415              28
##  2 2017-01-02 n409                9           3300              16
##  3 2017-01-02 f324               16           2130              15
##  4 2017-01-02 d875               17           2035              25
##  5 2017-01-02 z355               24           1655              24
##  6 2017-01-09 tb69                8           3415              28
##  7 2017-01-09 n409                9           3195              16
##  8 2017-01-09 d875               15           2195              25
##  9 2017-01-09 f324               17           1980              14
## 10 2017-01-09 z355               24           1655              24
## # ... with 200 more rows, and 14 more variables: player_age <dbl>,
## #   player_slug <chr>, first_name <chr>, last_name <chr>, flag_code <chr>,
## #   residence <chr>, birth_place <chr>, birth_date <date>,
## #   turned_pro <dbl>, weight_kg <dbl>, height_cm <dbl>, handedness <chr>,
## #   backhand <chr>, Points_Per_Tourney <dbl>
ggplot(df8,aes(week_title,ranking_points,group=player_slug,color=player_slug)) + geom_point() + geom_line(aes( color=player_slug)) +  labs(title = "Top 5 Line Grap", x = "Months", y = "Points") +
  theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5), 
        axis.title.x = element_text(size = 14, face = "bold"), 
        axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank())