ATP Tennis Analysis

Emre Usta 12/1/2019

Countries by the number of singles champions

champs_country <-
  tourney_df %>%
  left_join(player_df, by = c("singles_winner_player_id" = "player_id")) %>%
  count(flag_code,sort=TRUE,name = "championships")

champs_country
## # A tibble: 21 x 2
##    flag_code championships
##    <chr>             <int>
##  1 ESP                  11
##  2 USA                   9
##  3 SUI                   8
##  4 FRA                   7
##  5 GER                   7
##  6 BUL                   4
##  7 ARG                   2
##  8 BEL                   2
##  9 BIH                   2
## 10 CRO                   2
## # ... with 11 more rows

Countries which did not get any singles championships by the games won when they win the match

#Listing player didn't get any singles championships
nonchamps <- 
  player_df %>%
  select(player_id,flag_code) %>%
  anti_join(.,champs_country)
## Joining, by = "flag_code"
nonchamps %>%
  left_join(.,score_df,by = c("player_id"="winner_player_id")) %>%
  group_by(flag_code) %>%
  summarise(total_won = sum(winner_games_won,na.rm = TRUE)) %>%
  arrange(desc(total_won))
## # A tibble: 93 x 2
##    flag_code total_won
##    <chr>         <dbl>
##  1 AUS            1989
##  2 CZE            1209
##  3 CAN            1190
##  4 SVK             889
##  5 BRA             873
##  6 POR             621
##  7 RSA             566
##  8 KAZ             495
##  9 KOR             438
## 10 GEO             377
## # ... with 83 more rows

Players with most week at number 1

rank_df %>%
  left_join(.,player_df) %>%
  filter(rank_number == 1) %>%
  group_by(first_name,last_name) %>%
  summarise(weeks_at_one = sum(rank_number))
## Joining, by = "player_id"

## # A tibble: 2 x 3
## # Groups:   first_name [2]
##   first_name last_name weeks_at_one
##   <chr>      <chr>            <dbl>
## 1 Andy       Murray              29
## 2 Rafael     Nadal               13

Players with highest ace percentage in the matches they won

#Listing player didn't get any singles championships
stats_df %>%
  inner_join(.,player_df, by = c("winner_player_id"="player_id")) %>%
  filter(first_name != '' & last_name != '') %>%
  group_by(first_name,last_name) %>%
  summarise(won_points = sum(winner_total_points_won), won_aces = sum(winner_aces), ace_percentage = (won_aces/won_points)*100) %>%
  arrange(desc(ace_percentage))
## # A tibble: 312 x 5
## # Groups:   first_name [256]
##    first_name last_name won_points won_aces ace_percentage
##    <chr>      <chr>          <dbl>    <dbl>          <dbl>
##  1 Marsel     Ilhan             46       14           30.4
##  2 Eduardo    Struvay           78       20           25.6
##  3 John       Isner           3074      746           24.3
##  4 Ivo        Karlovic        1437      310           21.6
##  5 Reilly     Opelka           666      135           20.3
##  6 Sam        Groth            331       64           19.3
##  7 Matthew    Barton           314       59           18.8
##  8 Gilles     Muller          2807      526           18.7
##  9 Milos      Raonic          2538      454           17.9
## 10 Sam        Querrey         3132      518           16.5
## # ... with 302 more rows

Players with highest double fault per match ratio in the matches they lost at least 10 matches

stats_df %>%
  inner_join(.,player_df, by = c("winner_player_id"="player_id")) %>%
  group_by(first_name,last_name) %>%
  summarise(double_faults = sum(loser_double_faults), lost_match = n(), double_faults_per_lost_match = double_faults/lost_match) %>%
  filter(lost_match >= 10) %>%
  arrange(desc(double_faults_per_lost_match))
## # A tibble: 141 x 5
## # Groups:   first_name [127]
##    first_name   last_name  double_faults lost_match double_faults_per_lost~
##    <chr>        <chr>              <dbl>      <int>                   <dbl>
##  1 Ernests      Gulbis                55         10                    5.5 
##  2 Nicolas      Mahut                 82         16                    5.12
##  3 Ruben        Bemelmans             50         10                    5   
##  4 Stefan       Kozlov                52         11                    4.73
##  5 Denis        Shapovalov            51         11                    4.64
##  6 Cedrik-Marc~ Stebe                 55         12                    4.58
##  7 Thomas       Fabbiano              64         14                    4.57
##  8 Marius       Copil                 77         17                    4.53
##  9 Jared        Donaldson            123         28                    4.39
## 10 Rogerio      Dutra Sil~            70         16                    4.38
## # ... with 131 more rows