library(dplyr)
library(tidyverse)
load(file = "/Users/mehtapanalyticahouse/Desktop/atp_tennis_data_2017.RData")

Task 1:

task1 <- inner_join (tourney_df, player_df, by = c("singles_winner_player_id"="player_id"))
task1 %>%
group_by(flag_code) %>% count(flag_code, sort=TRUE)
## # A tibble: 21 x 2
## # Groups:   flag_code [21]
##    flag_code     n
##    <chr>     <int>
##  1 ESP          11
##  2 USA           9
##  3 SUI           8
##  4 FRA           7
##  5 GER           7
##  6 BUL           4
##  7 ARG           2
##  8 BEL           2
##  9 BIH           2
## 10 CRO           2
## # … with 11 more rows

Task 2:

nonchamp_players <-
player_df %>% select(player_id, flag_code) %>% anti_join (.,task1)
## Joining, by = "flag_code"
nonchamp_players %>% left_join(.,score_df, by=c("player_id"="winner_player_id")) %>% 
  group_by(flag_code) %>% summarise(total_won=sum(winner_games_won, na.rm=TRUE)) %>% arrange(desc(total_won)) 
## # A tibble: 93 x 2
##    flag_code total_won
##    <chr>         <dbl>
##  1 AUS            1989
##  2 CZE            1209
##  3 CAN            1190
##  4 SVK             889
##  5 BRA             873
##  6 POR             621
##  7 RSA             566
##  8 KAZ             495
##  9 KOR             438
## 10 GEO             377
## # … with 83 more rows

Task 3:

task3 <- stats_df %>% select(match_id, match_duration)
task3_1 <- inner_join(task3, score_df) %>%
group_by(winner_player_id) %>% filter(match_duration < 25) %>% select(match_duration)
## Joining, by = "match_id"
## Adding missing grouping variables: `winner_player_id`
task3_2 <- player_df %>% select(player_id, first_name, last_name) %>% inner_join(.,task3_1, by = c("player_id"="winner_player_id")) %>% arrange(desc(match_duration))


print(task3_2)
## # A tibble: 21 x 4
##    player_id first_name last_name  match_duration
##    <chr>     <chr>      <chr>               <dbl>
##  1 i165      Denis      Istomin                24
##  2 n409      Rafael     Nadal                  24
##  3 ca12      Jeremy     Chardy                 23
##  4 d985      Joao       Domingues              23
##  5 j551      Nicolas    Jarry                  23
##  6 pd31      Benoit     Paire                  22
##  7 se73      Yuichi     Sugita                 22
##  8 mj01      Roberto    Marcora                21
##  9 t742      Janko      Tipsarevic             21
## 10 pc78      Ante       Pavic                  20
## # … with 11 more rows

Task 4:

task4 <- inner_join(tourney_df, player_df, by = c("singles_winner_player_id"="player_id"))
task4 %>%
group_by(handedness) %>% count(handedness, sort=TRUE)
## # A tibble: 2 x 2
## # Groups:   handedness [2]
##   handedness       n
##   <chr>        <int>
## 1 Right-Handed    58
## 2 Left-Handed      9