library(tidyverse)
library(knitr)
url<-url("https://github.com/pjournal/mef03-KutayAkalin/blob/master/atp_tennis_data_2017.RData?raw=true")
load(url)
df_1<-inner_join(tourney_df,player_df,by= c("singles_winner_player_id" = "player_id"))
winner_countries<-df_1 %>% group_by(flag_code) %>% summarize(Count=n()) %>% arrange(desc(Count))
kable(head(winner_countries,n=15L))
flag_code | Count |
---|---|
ESP | 11 |
USA | 9 |
SUI | 8 |
FRA | 7 |
GER | 7 |
BUL | 4 |
ARG | 2 |
BEL | 2 |
BIH | 2 |
CRO | 2 |
LUX | 2 |
SRB | 2 |
AUT | 1 |
DOM | 1 |
GBR | 1 |
df_2<-anti_join(player_df,winner_countries,by= "flag_code")
df_3<-inner_join(score_df,df_2, by=c("winner_player_id"="player_id"))
won_games_by_countries<-df_3 %>% group_by(flag_code) %>% summarise(Total_Games_Won=sum(winner_games_won)) %>%arrange(desc(Total_Games_Won))
kable(won_games_by_countries)
flag_code | Total_Games_Won |
---|---|
AUS | 1989 |
CZE | 1209 |
CAN | 1190 |
SVK | 889 |
BRA | 873 |
POR | 621 |
RSA | 566 |
KAZ | 495 |
KOR | 438 |
GEO | 377 |
GRE | 359 |
NED | 344 |
COL | 308 |
IND | 273 |
MDA | 272 |
CYP | 257 |
ROU | 247 |
HUN | 239 |
ISR | 238 |
TUN | 237 |
TPE | 186 |
LAT | 165 |
POL | 165 |
CHI | 154 |
BAR | 152 |
NOR | 131 |
CHN | 103 |
SWE | 99 |
EGY | 95 |
LTU | 90 |
IRL | 70 |
BLR | 64 |
ECU | 57 |
SLO | 41 |
NZL | 36 |
EST | 30 |
MAR | 20 |
ESA | 12 |
TUR | 0 |
#4) Ranks of players and Point Per Tourney Calculation for 20/11/2017
joined<-left_join(rank_df,player_df,by="player_id") %>% mutate(Points_Per_Tourney=ranking_points/tourneys_played)
df5<-joined %>% select(week_title,first_name,last_name,flag_code, rank_number, ranking_points,tourneys_played,Points_Per_Tourney)%>% filter(week_title=="2017-11-20") %>% arrange(desc(Points_Per_Tourney))
kable(head(df5,n=30L))
week_title | first_name | last_name | flag_code | rank_number | ranking_points | tourneys_played | Points_Per_Tourney |
---|---|---|---|---|---|---|---|
2017-11-20 | Rafael | Nadal | ESP | 1 | 10645 | 18 | 591.38889 |
2017-11-20 | Roger | Federer | SUI | 2 | 9605 | 17 | 565.00000 |
2017-11-20 | Grigor | Dimitrov | BUL | 3 | 5150 | 23 | 223.91304 |
2017-11-20 | Stan | Wawrinka | SUI | 9 | 3150 | 15 | 210.00000 |
2017-11-20 | Alexander | Zverev | GER | 4 | 4610 | 25 | 184.40000 |
2017-11-20 | Marin | Cilic | CRO | 6 | 3805 | 22 | 172.95455 |
2017-11-20 | Novak | Djokovic | SRB | 12 | 2585 | 16 | 161.56250 |
2017-11-20 | Dominic | Thiem | AUT | 5 | 4015 | 27 | 148.70370 |
2017-11-20 | David | Goffin | BEL | 7 | 3775 | 26 | 145.19231 |
2017-11-20 | Jack | Sock | USA | 8 | 3165 | 22 | 143.86364 |
2017-11-20 | Andy | Murray | GBR | 16 | 2290 | 16 | 143.12500 |
2017-11-20 | Juan Martin | del Potro | ARG | 11 | 2595 | 19 | 136.57895 |
2017-11-20 | Jo-Wilfried | Tsonga | FRA | 15 | 2320 | 20 | 116.00000 |
2017-11-20 | Kevin | Anderson | RSA | 14 | 2480 | 22 | 112.72727 |
2017-11-20 | Tomas | Berdych | CZE | 19 | 2095 | 19 | 110.26316 |
2017-11-20 | Sam | Querrey | USA | 13 | 2535 | 23 | 110.21739 |
2017-11-20 | Nick | Kyrgios | AUS | 21 | 2010 | 19 | 105.78947 |
2017-11-20 | Pablo | Carreno Busta | ESP | 10 | 2615 | 25 | 104.60000 |
2017-11-20 | Kei | Nishikori | JPN | 22 | 1885 | 19 | 99.21053 |
2017-11-20 | John | Isner | USA | 17 | 2265 | 24 | 94.37500 |
2017-11-20 | Lucas | Pouille | FRA | 18 | 2235 | 24 | 93.12500 |
2017-11-20 | Milos | Raonic | CAN | 24 | 1795 | 20 | 89.75000 |
2017-11-20 | Roberto | Bautista Agut | ESP | 20 | 2015 | 24 | 83.95833 |
2017-11-20 | Gilles | Muller | LUX | 25 | 1695 | 22 | 77.04545 |
2017-11-20 | Fabio | Fognini | ITA | 27 | 1670 | 24 | 69.58333 |
2017-11-20 | Richard | Gasquet | FRA | 31 | 1375 | 21 | 65.47619 |
2017-11-20 | Philipp | Kohlschreiber | GER | 29 | 1440 | 22 | 65.45455 |
2017-11-20 | Diego | Schwartzman | ARG | 26 | 1675 | 27 | 62.03704 |
2017-11-20 | Albert | Ramos-Vinolas | ESP | 23 | 1845 | 30 | 61.50000 |
2017-11-20 | Gael | Monfils | FRA | 46 | 1015 | 17 | 59.70588 |
#4) Ranks of players and Point Per Tourney Calculation for 20/11/2017
country<-joined %>%filter(week_title=="2017-11-20") %>% group_by(flag_code) %>% summarise(Avg_Points_Per_Tourney=mean(Points_Per_Tourney),Player_Count=n()) %>% arrange(desc(Avg_Points_Per_Tourney))
kable(head(country,n=15L))
flag_code | Avg_Points_Per_Tourney | Player_Count |
---|---|---|
LUX | 77.04545 | 1 |
SUI | 47.91940 | 17 |
BUL | 47.37566 | 5 |
RSA | 32.04015 | 4 |
ESP | 27.74361 | 46 |
GEO | 27.53333 | 1 |
CYP | 26.85714 | 1 |
SRB | 23.37369 | 16 |
LAT | 21.50000 | 1 |
BIH | 21.47851 | 4 |
URU | 20.99674 | 3 |
MDA | 20.53333 | 1 |
GRE | 19.83871 | 1 |
UZB | 19.26087 | 2 |
HUN | 18.09375 | 2 |
df6 <- rank_df %>% filter(player_id=="n409") %>% arrange(week_title)
df6<-df6[!duplicated(df6$ranking_points), ]
ggplot(df6,aes(week_title,ranking_points)) + geom_point()+geom_smooth() + labs(title = "Rafael Nadal's Ranking Points ", x = "Months", y = "Points") +
theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank())
df7 <- rank_df %>% filter(player_id=="f324") %>% arrange(week_title)
df7<-df7[!duplicated(df7$ranking_points), ]
ggplot(df7,aes(week_title,ranking_points)) + geom_point()+geom_smooth() + labs(title = "Roger Federer's Ranking Points ", x = "Months", y = "Points") +
theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank())
player_ids=c(rank_df[[2]][[1]],rank_df[[2]][[2]],rank_df[[2]][[3]],rank_df[[2]][[4]],rank_df[[2]][[5]])
df8 <- joined %>% filter(player_id==player_ids[1] | player_id==player_ids[2] | player_id==player_ids[3] | player_id==player_ids[4] | player_id==player_ids[5])%>% arrange(week_title)
df8
## # A tibble: 210 x 19
## week_title player_id rank_number ranking_points tourneys_played
## <date> <chr> <dbl> <dbl> <dbl>
## 1 2017-01-02 tb69 8 3415 28
## 2 2017-01-02 n409 9 3300 16
## 3 2017-01-02 f324 16 2130 15
## 4 2017-01-02 d875 17 2035 25
## 5 2017-01-02 z355 24 1655 24
## 6 2017-01-09 tb69 8 3415 28
## 7 2017-01-09 n409 9 3195 16
## 8 2017-01-09 d875 15 2195 25
## 9 2017-01-09 f324 17 1980 14
## 10 2017-01-09 z355 24 1655 24
## # ... with 200 more rows, and 14 more variables: player_age <dbl>,
## # player_slug <chr>, first_name <chr>, last_name <chr>, flag_code <chr>,
## # residence <chr>, birth_place <chr>, birth_date <date>,
## # turned_pro <dbl>, weight_kg <dbl>, height_cm <dbl>, handedness <chr>,
## # backhand <chr>, Points_Per_Tourney <dbl>
ggplot(df8,aes(week_title,ranking_points,group=player_slug,color=player_slug)) + geom_point() + geom_line(aes( color=player_slug)) + labs(title = "Top 5 Line Grap", x = "Months", y = "Points") +
theme(title = element_text(size = 16, face = "bold"), plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"), legend.title = element_blank())