url<-url("https://github.com/pjournal/mef03-karaahmetlid/blob/master/atp_tennis_data_2017.RData?raw=true")
load(url)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(viridis)
library(knitr)
Task1: Rank countries by the singles champions.
player_tourney_df <- left_join(tourney_df, player_df, by = c("singles_winner_player_id" = "player_id"))
player_tourney_df %>% select(flag_code) %>% group_by(flag_code) %>% summarise(Count=n()) %>% arrange(desc(Count))
## # A tibble: 21 x 2
## flag_code Count
## <chr> <int>
## 1 ESP 11
## 2 USA 9
## 3 SUI 8
## 4 FRA 7
## 5 GER 7
## 6 BUL 4
## 7 ARG 2
## 8 BEL 2
## 9 BIH 2
## 10 CRO 2
## # ... with 11 more rows
Task 2: Rank countries which did not get any singles championships by the games won when they win the match.
non_ch_pl <- player_df %>% select(player_id, flag_code) %>% anti_join(player_tourney_df)
country_non_ch_pl <- non_ch_pl %>% left_join(score_df, by = c("player_id" = "winner_player_id")) %>%
group_by(flag_code) %>% summarise(totalWon = sum(winner_games_won, na.rm = TRUE)) %>% arrange(desc(totalWon))
country_non_ch_pl
## # A tibble: 93 x 2
## flag_code totalWon
## <chr> <dbl>
## 1 AUS 1989
## 2 CZE 1209
## 3 CAN 1190
## 4 SVK 889
## 5 BRA 873
## 6 POR 621
## 7 RSA 566
## 8 KAZ 495
## 9 KOR 438
## 10 GEO 377
## # ... with 83 more rows
country_non_ch_pl %>% top_n(20) %>%
ggplot(aes(x = flag_code, y = totalWon, fill = flag_code)) +
geom_col()
Time spent on court of the players who had reached at least Quarter Finals in Grand Slam Tournements
match_dur_df <- left_join(score_df, stats_df) %>%
select("match_id", "tourney_id", "winner_player_id", "loser_player_id", "tourney_round_name", "match_duration")
match_dur_df$tourney_id <- as.numeric(match_dur_df$tourney_id)
match_dur_df
## # A tibble: 3,834 x 6
## match_id tourney_id winner_player_id loser_player_id tourney_round_n~
## <chr> <dbl> <chr> <chr> <chr>
## 1 2017-33~ 339 d875 n552 Finals
## 2 2017-33~ 339 d875 r975 Semi-Finals
## 3 2017-33~ 339 n552 w367 Semi-Finals
## 4 2017-33~ 339 r975 n409 Quarter-Finals
## 5 2017-33~ 339 w367 e831 Quarter-Finals
## 6 2017-33~ 339 n552 tc61 Quarter-Finals
## 7 2017-33~ 339 d875 tb69 Quarter-Finals
## 8 2017-33~ 339 r975 sm37 Round of 16
## 9 2017-33~ 339 w367 t840 Round of 16
## 10 2017-33~ 339 n552 dc58 Round of 16
## # ... with 3,824 more rows, and 1 more variable: match_duration <dbl>
tourney_GSlam <- tourney_df %>%
filter(tourney_singles_draw == 128) %>% select("tourney_id", "tourney_name")
tourney_GSlam
## # A tibble: 4 x 2
## tourney_id tourney_name
## <dbl> <chr>
## 1 580 Australian Open
## 2 520 Roland Garros
## 3 540 Wimbledon
## 4 560 US Open
GSlam_Mdur <- left_join(tourney_GSlam, match_dur_df)
kable(head(GSlam_Mdur, n = 20L))
580 |
Australian Open |
2017-580-f324-n409 |
f324 |
n409 |
Finals |
218 |
580 |
Australian Open |
2017-580-f324-w367 |
f324 |
w367 |
Semi-Finals |
185 |
580 |
Australian Open |
2017-580-n409-d875 |
n409 |
d875 |
Semi-Finals |
296 |
580 |
Australian Open |
2017-580-n409-r975 |
n409 |
r975 |
Quarter-Finals |
164 |
580 |
Australian Open |
2017-580-w367-t786 |
w367 |
t786 |
Quarter-Finals |
135 |
580 |
Australian Open |
2017-580-d875-gb88 |
d875 |
gb88 |
Quarter-Finals |
133 |
580 |
Australian Open |
2017-580-f324-z168 |
f324 |
z168 |
Quarter-Finals |
92 |
580 |
Australian Open |
2017-580-z168-mc10 |
z168 |
mc10 |
Round of 16 |
214 |
580 |
Australian Open |
2017-580-r975-bd06 |
r975 |
bd06 |
Round of 16 |
169 |
580 |
Australian Open |
2017-580-w367-sa93 |
w367 |
sa93 |
Round of 16 |
164 |
580 |
Australian Open |
2017-580-f324-n552 |
f324 |
n552 |
Round of 16 |
204 |
580 |
Australian Open |
2017-580-n409-mc65 |
n409 |
mc65 |
Round of 16 |
176 |
580 |
Australian Open |
2017-580-gb88-tb69 |
gb88 |
tb69 |
Round of 16 |
164 |
580 |
Australian Open |
2017-580-t786-e687 |
t786 |
e687 |
Round of 16 |
173 |
580 |
Australian Open |
2017-580-d875-i165 |
d875 |
i165 |
Round of 16 |
145 |
580 |
Australian Open |
2017-580-mc10-q927 |
mc10 |
q927 |
Round of 32 |
119 |
580 |
Australian Open |
2017-580-r975-sd32 |
r975 |
sd32 |
Round of 32 |
155 |
580 |
Australian Open |
2017-580-w367-t840 |
w367 |
t840 |
Round of 32 |
152 |
580 |
Australian Open |
2017-580-n552-l797 |
n552 |
l797 |
Round of 32 |
131 |
580 |
Australian Open |
2017-580-mc65-k435 |
mc65 |
k435 |
Round of 32 |
116 |
GSlam_QF_Players <- GSlam_Mdur %>% filter(tourney_round_name == "Round of 16") %>%
select("winner_player_id") %>% distinct(winner_player_id) %>%
left_join(player_df, by = c("winner_player_id" = "player_id")) %>%
select(1, 2, 3, 4)
kable(GSlam_QF_Players)
z168 |
mischa-zverev |
Mischa |
Zverev |
r975 |
milos-raonic |
Milos |
Raonic |
w367 |
stan-wawrinka |
Stan |
Wawrinka |
f324 |
roger-federer |
Roger |
Federer |
n409 |
rafael-nadal |
Rafael |
Nadal |
gb88 |
david-goffin |
David |
Goffin |
t786 |
jo-wilfried-tsonga |
Jo-Wilfried |
Tsonga |
d875 |
grigor-dimitrov |
Grigor |
Dimitrov |
mc10 |
andy-murray |
Andy |
Murray |
d643 |
novak-djokovic |
Novak |
Djokovic |
cd85 |
pablo-carreno-busta |
Pablo |
Carreno Busta |
tb69 |
dominic-thiem |
Dominic |
Thiem |
c977 |
marin-cilic |
Marin |
Cilic |
n552 |
kei-nishikori |
Kei |
Nishikori |
ma30 |
gilles-muller |
Gilles |
Muller |
ba47 |
tomas-berdych |
Tomas |
Berdych |
q927 |
sam-querrey |
Sam |
Querrey |
d683 |
juan-martin-del-potro |
Juan Martin |
del Potro |
re44 |
andrey-rublev |
Andrey |
Rublev |
sm37 |
diego-schwartzman |
Diego |
Schwartzman |
a678 |
kevin-anderson |
Kevin |
Anderson |
min_on_court <- GSlam_Mdur %>%
group_by(winner_player_id) %>% summarise(player_on_court = sum(match_duration))
min_on_court <- left_join(GSlam_QF_Players, min_on_court) %>% select(4, 5) %>% na.omit() %>%
arrange(desc(player_on_court))
kable(min_on_court)
Nadal |
3235 |
Federer |
2295 |
Thiem |
1885 |
Anderson |
1856 |
Rublev |
1762 |
Murray |
1706 |
Wawrinka |
1686 |
Cilic |
1675 |
Carreno Busta |
1530 |
Zverev |
1520 |
Raonic |
1440 |
Nishikori |
1422 |
Dimitrov |
1302 |
Goffin |
1270 |
del Potro |
1182 |
Djokovic |
1162 |
Berdych |
1085 |
Muller |
1055 |
Schwartzman |
1020 |
Tsonga |
944 |
min_on_court %>%
ggplot(aes(x = last_name, y = player_on_court, fill = last_name)) +
geom_col() +
scale_fill_viridis(discrete=TRUE) +
theme(legend.position = "bottom")
Point win/loss ratios of the players ranked top 5 by year end 2017
top5_players <- rank_df %>% filter(week_title == max(week_title)) %>% top_n(5, ranking_points) %>%
select(2, 3) %>% left_join(select(player_df, player_id, first_name, last_name))
kable(top5_players)
n409 |
1 |
Rafael |
Nadal |
f324 |
2 |
Roger |
Federer |
d875 |
3 |
Grigor |
Dimitrov |
z355 |
4 |
Alexander |
Zverev |
tb69 |
5 |
Dominic |
Thiem |
w_pts_win_loss <- stats_df %>%
select("winner_player_id", "winner_total_points_won", "loser_total_points_won") %>% na.omit() %>%
group_by(winner_player_id) %>%
summarise(w_tot_pts_won = sum(winner_total_points_won), w_tot_pts_lost = sum(loser_total_points_won))
l_pts_win_loss <- stats_df %>%
select("loser_player_id", "loser_total_points_won", "winner_total_points_won") %>% na.omit() %>%
group_by(loser_player_id) %>%
summarise(l_tot_pts_won = sum(loser_total_points_won), l_tot_pts_lost = sum(winner_total_points_won))
winloss_ratio <- left_join(w_pts_win_loss, l_pts_win_loss, by = c("winner_player_id" = "loser_player_id")) %>%
rename(player_id = winner_player_id) %>%
mutate(w_l_ratio = (w_tot_pts_won + l_tot_pts_won) / (w_tot_pts_lost + l_tot_pts_lost))
kable(head(winloss_ratio, n = 12L))
a479 |
1015 |
869 |
712 |
842 |
1.0093513 |
a622 |
193 |
160 |
199 |
252 |
0.9514563 |
a678 |
3210 |
2602 |
1590 |
1754 |
1.1019284 |
a829 |
1657 |
1461 |
1805 |
2126 |
0.9651519 |
a853 |
62 |
30 |
334 |
436 |
0.8497854 |
a887 |
538 |
460 |
366 |
427 |
1.0191657 |
ae14 |
402 |
361 |
309 |
404 |
0.9294118 |
af32 |
70 |
64 |
49 |
67 |
0.9083969 |
b747 |
1746 |
1467 |
1283 |
1490 |
1.0243490 |
b837 |
1450 |
1204 |
1026 |
1284 |
0.9951768 |
b884 |
1275 |
1052 |
1333 |
1620 |
0.9760479 |
b896 |
514 |
441 |
800 |
906 |
0.9755011 |
top5_wlR <- left_join(top5_players, winloss_ratio)
kable(top5_wlR)
n409 |
1 |
Rafael |
Nadal |
5572 |
4158 |
947 |
1066 |
1.247894 |
f324 |
2 |
Roger |
Federer |
4686 |
3754 |
467 |
476 |
1.218203 |
d875 |
3 |
Grigor |
Dimitrov |
4110 |
3277 |
1685 |
1862 |
1.127651 |
z355 |
4 |
Alexander |
Zverev |
4418 |
3701 |
1677 |
1939 |
1.080674 |
tb69 |
5 |
Dominic |
Thiem |
4053 |
3286 |
2119 |
2468 |
1.072645 |