Tibet Erdoğan 01/12/2019

library(tidyverse) 
## -- Attaching packages ----------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.2.1     <U+221A> purrr   0.3.3
## <U+221A> tibble  2.1.3     <U+221A> dplyr   0.8.3
## <U+221A> tidyr   1.0.0     <U+221A> stringr 1.4.0
## <U+221A> readr   1.3.1     <U+221A> forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr) 
load("C:/Users/Dell/Downloads/atp_tennis_data_2017.RData")

Rank Countries (Flag codes) by the singles champions

country <- player_df %>% inner_join(tourney_df,by = c("player_id" ="singles_winner_player_id")) %>% group_by(flag_code)%>% summarize(total_wins = n()) %>% arrange(desc(total_wins))
country
## # A tibble: 21 x 2
##    flag_code total_wins
##    <chr>          <int>
##  1 ESP               11
##  2 USA                9
##  3 SUI                8
##  4 FRA                7
##  5 GER                7
##  6 BUL                4
##  7 ARG                2
##  8 BEL                2
##  9 BIH                2
## 10 CRO                2
## # ... with 11 more rows

Rank countries wihch did not get any single championships by the games won when they win the match

non_champ <- player_df %>% anti_join(country)
## Joining, by = "flag_code"
non_champ_win <- non_champ %>% left_join(score_df,by = c("player_id" = "winner_player_id")) %>% group_by(flag_code) %>% summarise(total_won = sum(winner_games_won, na.rm = TRUE)) %>% arrange(desc(total_won))
non_champ_win
## # A tibble: 93 x 2
##    flag_code total_won
##    <chr>         <dbl>
##  1 AUS            1989
##  2 CZE            1209
##  3 CAN            1190
##  4 SVK             889
##  5 BRA             873
##  6 POR             621
##  7 RSA             566
##  8 KAZ             495
##  9 KOR             438
## 10 GEO             377
## # ... with 83 more rows

Players who win most in finals

players_best <- score_df %>% inner_join(player_df,by = c("winner_player_id" = "player_id")) %>% select(tourney_round_name,first_name,last_name,flag_code,winner_player_id) %>% group_by(tourney_round_name,first_name,last_name) %>% filter(tourney_round_name == "Finals") %>% count(winner_player_id, sort = TRUE ) %>% filter(n > 2)
players_best
## # A tibble: 7 x 5
## # Groups:   tourney_round_name, first_name, last_name [7]
##   tourney_round_name first_name  last_name winner_player_id     n
##   <chr>              <chr>       <chr>     <chr>            <int>
## 1 Finals             Roger       Federer   f324                 7
## 2 Finals             Rafael      Nadal     n409                 6
## 3 Finals             Alexander   Zverev    z355                 5
## 4 Finals             Grigor      Dimitrov  d875                 4
## 5 Finals             Jo-Wilfried Tsonga    t786                 4
## 6 Finals             Jack        Sock      sm25                 3
## 7 Finals             Lucas       Pouille   pf39                 3

Countries of the fault winners in matches

no_fault <- stats_df %>% select(match_id, winner_double_faults) %>% filter(winner_double_faults != 0)
player_score <- score_df %>% select(match_id, winner_player_id)
no_fault_players <- no_fault %>% inner_join(., player_score, by = c("match_id"))
no_fault_countries <- no_fault_players %>% left_join(., player_df, by = c("winner_player_id" = "player_id")) %>% count(flag_code, sort = TRUE)
no_fault_countries
## # A tibble: 60 x 2
##    flag_code     n
##    <chr>     <int>
##  1 USA         352
##  2 FRA         316
##  3 ESP         256
##  4 GER         253
##  5 ARG         190
##  6 ITA         146
##  7 RUS         133
##  8 AUS         119
##  9 GBR         104
## 10 SRB         103
## # ... with 50 more rows

Rank players who have never break in matches by match duration, adding country

no_break_match <- stats_df %>% select(match_id, winner_break_points_saved, match_duration) %>% filter(winner_break_points_saved != 0)
countries_player <- player_df %>% select(player_id, player_slug, flag_code)
no_break_players <- no_break_match %>% left_join(., player_score, by = c("match_id")) %>% group_by(winner_player_id) %>% summarise(total_duration = sum(match_duration, na.rm = TRUE)) %>% left_join(., countries_player, 
by = c("winner_player_id" = "player_id")) %>% arrange(desc(total_duration))
no_break_players
## # A tibble: 325 x 4
##    winner_player_id total_duration player_slug           flag_code
##    <chr>                     <dbl> <chr>                 <chr>    
##  1 n409                       6259 rafael-nadal          ESP      
##  2 gb88                       5056 david-goffin          BEL      
##  3 z355                       4232 alexander-zverev      GER      
##  4 r772                       4230 albert-ramos-vinolas  ESP      
##  5 bd06                       4038 roberto-bautista-agut ESP      
##  6 tb69                       3990 dominic-thiem         AUT      
##  7 me82                       3932 adrian-mannarino      FRA      
##  8 f324                       3863 roger-federer         SUI      
##  9 d875                       3784 grigor-dimitrov       BUL      
## 10 n552                       3638 kei-nishikori         JPN      
## # ... with 315 more rows