library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.3
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(knitr)

load("/Users/nilaykamar/Downloads/atp_tennis_data_2017.RData")

Task 1: Rank countries(Flag codes) by the single champions

library(knitr)
single_champions_countries <- tourney_df %>% left_join(., player_df, by = c("singles_winner_player_id" = "player_id")) %>%
  count(flag_code, sort = TRUE)

kable(head(single_champions_countries))
flag_code n
ESP 11
USA 9
SUI 8
FRA 7
GER 7
BUL 4

Task 2: Rank countries which did not get only singles champions by the sets won whom they win the match.

library(knitr)
not_champions_players <- player_df %>% select(player_id, flag_code) %>% anti_join(., single_champions_countries) 
## Joining, by = "flag_code"
not_champions_players %>% left_join(., score_df, by=c("player_id"="winner_player_id")) %>%
  group_by(flag_code) %>%
  summarise(total_won = sum(winner_games_won, na.rm = TRUE)) %>%
  arrange(desc(total_won))
## # A tibble: 93 x 2
##    flag_code total_won
##    <chr>         <dbl>
##  1 AUS            1989
##  2 CZE            1209
##  3 CAN            1190
##  4 SVK             889
##  5 BRA             873
##  6 POR             621
##  7 RSA             566
##  8 KAZ             495
##  9 KOR             438
## 10 GEO             377
## # … with 83 more rows
kable(head(not_champions_players))
player_id flag_code
a002 CHI
a001 NGR
a005 BRA
a012 NED
a014 AUS
a017 AUS

Task 3: Rank countries of players who have made no fault

library(knitr)
no_fault <- stats_df %>% select(match_id, winner_double_faults) %>% filter(winner_double_faults == 0)

player_score <- score_df %>% select(match_id, winner_player_id)

no_fault_players <- no_fault %>% inner_join(., player_score, by = c("match_id"))

no_fault_countries <- no_fault_players %>% left_join(., player_df, by = c("winner_player_id" = "player_id")) %>%
  count(flag_code, sort = TRUE)

kable(head(no_fault_countries))
flag_code n
ESP 64
USA 52
FRA 42
ARG 40
GER 28
ITA 25

Task 4: Rank players who have never break in matches by match duration, adding country

library(knitr)
no_break_match <- stats_df %>% select(match_id, winner_break_points_saved, match_duration) %>% filter(winner_break_points_saved != 0)
countries_player <- player_df %>% select(player_id, player_slug, flag_code)

no_break_players <- no_break_match %>% left_join(., player_score, by = c("match_id")) %>%
  group_by(winner_player_id) %>%
  summarise(total_duration = sum(match_duration, na.rm = TRUE)) %>%
  left_join(., countries_player, by = c("winner_player_id" = "player_id")) %>%
  arrange(desc(total_duration))

kable(head(no_break_players))
winner_player_id total_duration player_slug flag_code
n409 6259 rafael-nadal ESP
gb88 5056 david-goffin BEL
z355 4232 alexander-zverev GER
r772 4230 albert-ramos-vinolas ESP
bd06 4038 roberto-bautista-agut ESP
tb69 3990 dominic-thiem AUT