library(tidyverse)
library(dplyr)
load(file = "/Users/m2lmacbook1/Desktop/Courses/BDA 503 | Data Analytics Essentials/atp_tennis_data_2017.RData")

Task 1

task1 <- inner_join(tourney_df, player_df, by = c("singles_winner_player_id"="player_id"))
task1 %>%
group_by(flag_code) %>% count(flag_code, sort=TRUE)
## # A tibble: 21 x 2
## # Groups:   flag_code [21]
##    flag_code     n
##    <chr>     <int>
##  1 ESP          11
##  2 USA           9
##  3 SUI           8
##  4 FRA           7
##  5 GER           7
##  6 BUL           4
##  7 ARG           2
##  8 BEL           2
##  9 BIH           2
## 10 CRO           2
## # … with 11 more rows

Task 2

nonchamp_players <-
player_df %>% select(player_id, flag_code) %>% anti_join (.,task1)
## Joining, by = "flag_code"
nonchamp_players %>% left_join(.,score_df, by=c("player_id"="winner_player_id")) %>% 
  group_by(flag_code) %>% summarise(total_won=sum(winner_games_won, na.rm=TRUE)) %>% arrange(desc(total_won)) 
## # A tibble: 93 x 2
##    flag_code total_won
##    <chr>         <dbl>
##  1 AUS            1989
##  2 CZE            1209
##  3 CAN            1190
##  4 SVK             889
##  5 BRA             873
##  6 POR             621
##  7 RSA             566
##  8 KAZ             495
##  9 KOR             438
## 10 GEO             377
## # … with 83 more rows

Task 3

task3_a <- rank_df %>% distinct(player_id, .keep_all = TRUE) 
task3 <- inner_join(tourney_df, task3_a, by = c("singles_winner_player_id"="player_id"))
task3 %>%
group_by(player_age) %>% count(player_age, sort=TRUE)
## # A tibble: 16 x 2
## # Groups:   player_age [16]
##    player_age     n
##         <dbl> <int>
##  1         31     8
##  2         36     8
##  3         26     7
##  4         30     7
##  5         32     7
##  6         20     6
##  7         25     6
##  8         29     6
##  9         23     3
## 10         34     3
## 11         21     1
## 12         24     1
## 13         27     1
## 14         28     1
## 15         35     1
## 16         37     1

Task 4

task4_a <- task3_a %>% select(player_id, player_age) %>% filter(player_age > 30) %>% anti_join (.,task3)
## Joining, by = "player_age"
task4 <- player_df %>% select(player_id, first_name, last_name) %>% inner_join(.,task4_a) %>% arrange(desc(player_age))
## Joining, by = "player_id"
head(task4, 10)
## # A tibble: 10 x 4
##    player_id first_name last_name       player_age
##    <chr>     <chr>      <chr>                <dbl>
##  1 e121      Younes     El Aynaoui              46
##  2 s550      Takao      Suzuki                  41
##  3 m747      Frank      Moser                   40
##  4 s535      Razvan     Sabau                   40
##  5 h355      Tommy      Haas                    39
##  6 k418      Oh-Hee     Kwon                    39
##  7 m815      Toshihide  Matsui                  39
##  8 r383      Ruben      Ramirez Hidalgo         39
##  9 h442      Jan        Hernych                 38
## 10 k336      Ivo        Karlovic                38