library(dplyr)
library(tidyverse)

url<-url("https://github.com/pjournal/mef03-balkanburcu/blob/master/atp_tennis_data_2017.RData?raw=true")
atp_tennis<- load(url)
head(tourney_df)
## # A tibble: 6 x 12
##   tourney_id tourney_name tourney_location tourney_date tourney_singles~
##        <dbl> <chr>        <chr>            <date>                  <dbl>
## 1        339 Brisbane In~ Brisbane, Austr~ 2017-01-01                 28
## 2        891 Aircel Chen~ Chennai, India   2017-01-02                 28
## 3        451 Qatar Exxon~ Doha, Qatar      2017-01-02                 32
## 4        338 Sydney Inte~ Sydney, Austral~ 2017-01-08                 28
## 5        301 ASB Classic  Auckland, New Z~ 2017-01-09                 28
## 6        580 Australian ~ Melbourne, Aust~ 2017-01-16                128
## # ... with 7 more variables: tourney_doubles_draw <dbl>,
## #   tourney_conditions <chr>, tourney_surface <chr>,
## #   tourney_fin_commit <chr>, singles_winner_player_id <chr>,
## #   doubles_winner_1_player_id <chr>, doubles_winner_2_player_id <chr>
head(player_df)
## # A tibble: 6 x 13
##   player_id player_slug first_name last_name flag_code residence
##   <chr>     <chr>       <chr>      <chr>     <chr>     <chr>    
## 1 a002      ricardo-ac~ Ricardo    Acuna     CHI       Jupiter,~
## 2 a001      sadiq-abdu~ Sadiq      Abdullahi NGR       <NA>     
## 3 a005      nelson-aer~ Nelson     Aerts     BRA       <NA>     
## 4 a004      egan-adams  Egan       Adams     USA       Palmetto~
## 5 a006      ronald-age~ Ronald     Agenor    USA       Beverly ~
## 6 a007      juan-aguil~ Juan       Aguilera  ESP       <NA>     
## # ... with 7 more variables: birth_place <chr>, birth_date <date>,
## #   turned_pro <dbl>, weight_kg <dbl>, height_cm <dbl>, handedness <chr>,
## #   backhand <chr>
head(stats_df)
## # A tibble: 6 x 54
##   match_id match_time match_duration winner_aces winner_double_f~
##   <chr>    <time>              <dbl>       <dbl>            <dbl>
## 1 2017-33~ 02:21                 141          23                3
## 2 2017-33~ 01:49                 109           7                2
## 3 2017-33~ 01:28                  88           4                1
## 4 2017-33~ 01:42                 102           1                1
## 5 2017-33~ 02:05                 125           3                3
## 6 2017-33~ 02:36                 156          11                3
## # ... with 49 more variables: winner_first_serves_in <dbl>,
## #   winner_first_serves_total <dbl>, winner_first_serve_points_won <dbl>,
## #   winner_first_serve_points_total <dbl>,
## #   winner_second_serve_points_won <dbl>,
## #   winner_second_serve_points_total <dbl>,
## #   winner_break_points_saved <dbl>,
## #   winner_break_points_serve_total <dbl>,
## #   winner_service_points_won <dbl>, winner_service_points_total <dbl>,
## #   winner_first_serve_return_won <dbl>,
## #   winner_first_serve_return_total <dbl>,
## #   winner_second_serve_return_won <dbl>,
## #   winner_second_serve_return_total <dbl>,
## #   winner_break_points_converted <dbl>,
## #   winner_break_points_return_total <dbl>,
## #   winner_service_games_played <dbl>, winner_return_games_played <dbl>,
## #   winner_return_points_won <dbl>, winner_return_points_total <dbl>,
## #   winner_total_points_won <dbl>, winner_total_points_total <dbl>,
## #   loser_aces <dbl>, loser_double_faults <dbl>,
## #   loser_first_serves_in <dbl>, loser_first_serves_total <dbl>,
## #   loser_first_serve_points_won <dbl>,
## #   loser_first_serve_points_total <dbl>,
## #   loser_second_serve_points_won <dbl>,
## #   loser_second_serve_points_total <dbl>, loser_break_points_saved <dbl>,
## #   loser_break_points_serve_total <dbl>, loser_service_points_won <dbl>,
## #   loser_service_points_total <dbl>, loser_first_serve_return_won <dbl>,
## #   loser_first_serve_return_total <dbl>,
## #   loser_second_serve_return_won <dbl>,
## #   loser_second_serve_return_total <dbl>,
## #   loser_break_points_converted <dbl>,
## #   loser_break_points_return_total <dbl>,
## #   loser_service_games_played <dbl>, loser_return_games_played <dbl>,
## #   loser_return_points_won <dbl>, loser_return_points_total <dbl>,
## #   loser_total_points_won <dbl>, loser_total_points_total <dbl>,
## #   tourney_id <chr>, winner_player_id <chr>, loser_player_id <chr>
head(rank_df)
## # A tibble: 6 x 6
##   week_title player_id rank_number ranking_points tourneys_played
##   <date>     <chr>           <dbl>          <dbl>           <dbl>
## 1 2017-11-20 n409                1          10645              18
## 2 2017-11-20 f324                2           9605              17
## 3 2017-11-20 d875                3           5150              23
## 4 2017-11-20 z355                4           4610              25
## 5 2017-11-20 tb69                5           4015              27
## 6 2017-11-20 c977                6           3805              22
## # ... with 1 more variable: player_age <dbl>
head(score_df)
## # A tibble: 6 x 14
##   match_id tourney_id tourney_round_n~ winner_player_id loser_player_id
##   <chr>    <chr>      <chr>            <chr>            <chr>          
## 1 2017-33~ 339        Finals           d875             n552           
## 2 2017-33~ 339        Semi-Finals      d875             r975           
## 3 2017-33~ 339        Semi-Finals      n552             w367           
## 4 2017-33~ 339        Quarter-Finals   r975             n409           
## 5 2017-33~ 339        Quarter-Finals   w367             e831           
## 6 2017-33~ 339        Quarter-Finals   n552             tc61           
## # ... with 9 more variables: winner_seed <chr>, loser_seed <chr>,
## #   match_score_tiebreaks <chr>, winner_sets_won <dbl>,
## #   loser_sets_won <dbl>, winner_games_won <dbl>, loser_games_won <dbl>,
## #   winner_tiebreaks_won <dbl>, loser_tiebreaks_won <dbl>

1 - Rank countries (flag codes) by the number of singles champions)

left_join(player_df, tourney_df, by=c("player_id" = "singles_winner_player_id")) %>% 
  drop_na() %>%
  select(player_id,flag_code) %>% 
  mutate(winner_count = 1) %>% select(flag_code, winner_count) %>% 
  group_by(flag_code) %>%
  transmute(winner_count = sum(winner_count)) %>% arrange(desc(winner_count)) %>% 
  distinct()
## # A tibble: 21 x 2
## # Groups:   flag_code [21]
##    flag_code winner_count
##    <chr>            <dbl>
##  1 ESP                 11
##  2 USA                  9
##  3 SUI                  8
##  4 GER                  7
##  5 FRA                  7
##  6 BUL                  4
##  7 CRO                  2
##  8 SRB                  2
##  9 ARG                  2
## 10 BIH                  2
## # ... with 11 more rows

2 - Rank countries which did not get any singles championships by the games won when they win the match

winner_countries <- left_join(player_df, tourney_df, by=c("player_id" = "singles_winner_player_id")) %>% 
  drop_na() %>%
  select(flag_code) %>% distinct()

loser_countries <- anti_join(player_df, winner_countries) %>% select(flag_code) %>% distinct()
## Joining, by = "flag_code"
inner_join(player_df, loser_countries) %>% select(player_id, flag_code) %>% distinct() %>%
  inner_join(score_df, by = c("player_id" = "winner_player_id")) %>% select(flag_code, winner_games_won) %>%
  group_by(flag_code) %>% transmute(winner_games_won = sum(winner_games_won)) %>%distinct() %>%
  arrange(desc(winner_games_won))
## Joining, by = "flag_code"
## # A tibble: 39 x 2
## # Groups:   flag_code [39]
##    flag_code winner_games_won
##    <chr>                <dbl>
##  1 AUS                   1989
##  2 CZE                   1209
##  3 CAN                   1190
##  4 SVK                    889
##  5 BRA                    873
##  6 POR                    621
##  7 RSA                    566
##  8 KAZ                    495
##  9 KOR                    438
## 10 GEO                    377
## # ... with 29 more rows

3 - Players which won tourneys more than one and their number of championship

left_join(player_df, tourney_df, by=c("player_id" = "singles_winner_player_id")) %>% 
  drop_na() %>%
  transmute(player_id,flag_code,tourney_name,first_name,last_name,winner_count = 1) %>% 
  group_by(player_id) %>% 
  transmute(winner_count = sum(winner_count),flag_code,first_name,last_name) %>% 
  arrange(desc(winner_count)) %>% 
  distinct() %>%
  ungroup() %>% 
  select(first_name,last_name,winner_count)
## # A tibble: 35 x 3
##    first_name  last_name     winner_count
##    <chr>       <chr>                <dbl>
##  1 Roger       Federer                  7
##  2 Rafael      Nadal                    6
##  3 Alexander   Zverev                   5
##  4 Grigor      Dimitrov                 4
##  5 Jo-Wilfried Tsonga                   4
##  6 Lucas       Pouille                  3
##  7 Jack        Sock                     3
##  8 Roberto     Bautista Agut            2
##  9 Novak       Djokovic                 2
## 10 Damir       Dzumhur                  2
## # ... with 25 more rows

4 - Players which won the tourney at minimum duration

match_df <-left_join(stats_df,score_df, by=c("match_id" = "match_id")) %>%  
  select(match_id,match_duration,winner_player_id.x)

winners_match <-  left_join(player_df,match_df,by=c("player_id" = "winner_player_id.x"))

  left_join(tourney_df,winners_match,by=c("singles_winner_player_id" = "player_id")) %>% 
  drop_na() %>%
  select(singles_winner_player_id,first_name,last_name,match_duration) %>% 
  group_by(singles_winner_player_id) %>% 
  transmute(first_name,last_name,min_match_duration = min(match_duration)) %>% distinct() %>%
  arrange(min_match_duration)
## # A tibble: 35 x 4
## # Groups:   singles_winner_player_id [35]
##    singles_winner_player_id first_name last_name min_match_duration
##    <chr>                    <chr>      <chr>                  <dbl>
##  1 se73                     Yuichi     Sugita                     8
##  2 pf39                     Lucas      Pouille                   11
##  3 z355                     Alexander  Zverev                    13
##  4 f401                     David      Ferrer                    18
##  5 n409                     Rafael     Nadal                     24
##  6 i165                     Denis      Istomin                   24
##  7 g967                     Peter      Gojowczyk                 29
##  8 d923                     Damir      Dzumhur                   30
##  9 c977                     Marin      Cilic                     31
## 10 sm25                     Jack       Sock                      32
## # ... with 25 more rows