library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.3
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(knitr)
load("/Users/nilaykamar/Downloads/atp_tennis_data_2017.RData")
Task 1: Rank countries(Flag codes) by the single champions
library(knitr)
single_champions_countries <- tourney_df %>% left_join(., player_df, by = c("singles_winner_player_id" = "player_id")) %>%
count(flag_code, sort = TRUE)
kable(head(single_champions_countries))
ESP |
11 |
USA |
9 |
SUI |
8 |
FRA |
7 |
GER |
7 |
BUL |
4 |
Task 2: Rank countries which did not get only singles champions by the sets won whom they win the match.
library(knitr)
not_champions_players <- player_df %>% select(player_id, flag_code) %>% anti_join(., single_champions_countries)
## Joining, by = "flag_code"
not_champions_players %>% left_join(., score_df, by=c("player_id"="winner_player_id")) %>%
group_by(flag_code) %>%
summarise(total_won = sum(winner_games_won, na.rm = TRUE)) %>%
arrange(desc(total_won))
## # A tibble: 93 x 2
## flag_code total_won
## <chr> <dbl>
## 1 AUS 1989
## 2 CZE 1209
## 3 CAN 1190
## 4 SVK 889
## 5 BRA 873
## 6 POR 621
## 7 RSA 566
## 8 KAZ 495
## 9 KOR 438
## 10 GEO 377
## # … with 83 more rows
kable(head(not_champions_players))
a002 |
CHI |
a001 |
NGR |
a005 |
BRA |
a012 |
NED |
a014 |
AUS |
a017 |
AUS |
Task 3: Rank countries of players who have made no fault
library(knitr)
no_fault <- stats_df %>% select(match_id, winner_double_faults) %>% filter(winner_double_faults == 0)
player_score <- score_df %>% select(match_id, winner_player_id)
no_fault_players <- no_fault %>% inner_join(., player_score, by = c("match_id"))
no_fault_countries <- no_fault_players %>% left_join(., player_df, by = c("winner_player_id" = "player_id")) %>%
count(flag_code, sort = TRUE)
kable(head(no_fault_countries))
ESP |
64 |
USA |
52 |
FRA |
42 |
ARG |
40 |
GER |
28 |
ITA |
25 |
Task 4: Rank players who have never break in matches by match duration, adding country
library(knitr)
no_break_match <- stats_df %>% select(match_id, winner_break_points_saved, match_duration) %>% filter(winner_break_points_saved != 0)
countries_player <- player_df %>% select(player_id, player_slug, flag_code)
no_break_players <- no_break_match %>% left_join(., player_score, by = c("match_id")) %>%
group_by(winner_player_id) %>%
summarise(total_duration = sum(match_duration, na.rm = TRUE)) %>%
left_join(., countries_player, by = c("winner_player_id" = "player_id")) %>%
arrange(desc(total_duration))
kable(head(no_break_players))
n409 |
6259 |
rafael-nadal |
ESP |
gb88 |
5056 |
david-goffin |
BEL |
z355 |
4232 |
alexander-zverev |
GER |
r772 |
4230 |
albert-ramos-vinolas |
ESP |
bd06 |
4038 |
roberto-bautista-agut |
ESP |
tb69 |
3990 |
dominic-thiem |
AUT |