raw_df <- load(url("https://github.com/pjournal/mef04-nerigrsy/blob/gh-pages/atp_tennis_data_2017.RData?raw=true"))

glimpse(raw_df)
##  chr [1:6] "tourney_df" "player_df" "stats_df" "score_df" "rank_df" ...
head(raw_df)
## [1] "tourney_df"      "player_df"       "stats_df"        "score_df"       
## [5] "rank_df"         "data_source_url"
tourney_df %>% print(n=10)
## # A tibble: 67 x 12
##    tourney_id tourney_name tourney_location tourney_date tourney_singles~
##         <dbl> <chr>        <chr>            <date>                  <dbl>
##  1        339 Brisbane In~ Brisbane, Austr~ 2017-01-01                 28
##  2        891 Aircel Chen~ Chennai, India   2017-01-02                 28
##  3        451 Qatar Exxon~ Doha, Qatar      2017-01-02                 32
##  4        338 Sydney Inte~ Sydney, Austral~ 2017-01-08                 28
##  5        301 ASB Classic  Auckland, New Z~ 2017-01-09                 28
##  6        580 Australian ~ Melbourne, Aust~ 2017-01-16                128
##  7        375 Open Sud de~ Montpellier, Fr~ 2017-02-06                 28
##  8       7161 Ecuador Open Quito, Ecuador   2017-02-06                 28
##  9       7434 Garanti Koz~ Sofia, Bulgaria  2017-02-06                 28
## 10        506 Argentina O~ Buenos Aires, A~ 2017-02-13                 28
## # ... with 57 more rows, and 7 more variables: tourney_doubles_draw <dbl>,
## #   tourney_conditions <chr>, tourney_surface <chr>, tourney_fin_commit <chr>,
## #   singles_winner_player_id <chr>, doubles_winner_1_player_id <chr>,
## #   doubles_winner_2_player_id <chr>
player_df %>% print(n=5)
## # A tibble: 10,912 x 13
##   player_id player_slug first_name last_name flag_code residence birth_place
##   <chr>     <chr>       <chr>      <chr>     <chr>     <chr>     <chr>      
## 1 a002      ricardo-ac~ Ricardo    Acuna     CHI       Jupiter,~ Santiago, ~
## 2 a001      sadiq-abdu~ Sadiq      Abdullahi NGR       <NA>      <NA>       
## 3 a005      nelson-aer~ Nelson     Aerts     BRA       <NA>      Cachoeira ~
## 4 a004      egan-adams  Egan       Adams     USA       Palmetto~ Miami Beac~
## 5 a006      ronald-age~ Ronald     Agenor    USA       Beverly ~ Rabat, Mor~
## # ... with 10,907 more rows, and 6 more variables: birth_date <date>,
## #   turned_pro <dbl>, weight_kg <dbl>, height_cm <dbl>, handedness <chr>,
## #   backhand <chr>
score_df %>% print(n=10)
## # A tibble: 3,830 x 14
##    match_id tourney_id tourney_round_n~ winner_player_id loser_player_id
##    <chr>    <chr>      <chr>            <chr>            <chr>          
##  1 2017-33~ 339        Finals           d875             n552           
##  2 2017-33~ 339        Semi-Finals      d875             r975           
##  3 2017-33~ 339        Semi-Finals      n552             w367           
##  4 2017-33~ 339        Quarter-Finals   r975             n409           
##  5 2017-33~ 339        Quarter-Finals   w367             e831           
##  6 2017-33~ 339        Quarter-Finals   n552             tc61           
##  7 2017-33~ 339        Quarter-Finals   d875             tb69           
##  8 2017-33~ 339        Round of 16      r975             sm37           
##  9 2017-33~ 339        Round of 16      w367             t840           
## 10 2017-33~ 339        Round of 16      n552             dc58           
## # ... with 3,820 more rows, and 9 more variables: winner_seed <chr>,
## #   loser_seed <chr>, match_score_tiebreaks <chr>, winner_sets_won <dbl>,
## #   loser_sets_won <dbl>, winner_games_won <dbl>, loser_games_won <dbl>,
## #   winner_tiebreaks_won <dbl>, loser_tiebreaks_won <dbl>
rank_df %>% print(n=5)
## # A tibble: 87,740 x 6
##   week_title player_id rank_number ranking_points tourneys_played player_age
##   <date>     <chr>           <dbl>          <dbl>           <dbl>      <dbl>
## 1 2017-11-20 n409                1          10645              18         31
## 2 2017-11-20 f324                2           9605              17         36
## 3 2017-11-20 d875                3           5150              23         26
## 4 2017-11-20 z355                4           4610              25         20
## 5 2017-11-20 tb69                5           4015              27         24
## # ... with 87,735 more rows
stats_df %>% print(n=5)
## # A tibble: 3,811 x 54
##   match_id match_time match_duration winner_aces winner_double_f~
##   <chr>    <time>              <dbl>       <dbl>            <dbl>
## 1 2017-33~ 02:21                 141          23                3
## 2 2017-33~ 01:49                 109           7                2
## 3 2017-33~ 01:28                  88           4                1
## 4 2017-33~ 01:42                 102           1                1
## 5 2017-33~ 02:05                 125           3                3
## # ... with 3,806 more rows, and 49 more variables:
## #   winner_first_serves_in <dbl>, winner_first_serves_total <dbl>,
## #   winner_first_serve_points_won <dbl>, winner_first_serve_points_total <dbl>,
## #   winner_second_serve_points_won <dbl>,
## #   winner_second_serve_points_total <dbl>, winner_break_points_saved <dbl>,
## #   winner_break_points_serve_total <dbl>, winner_service_points_won <dbl>,
## #   winner_service_points_total <dbl>, winner_first_serve_return_won <dbl>,
## #   winner_first_serve_return_total <dbl>,
## #   winner_second_serve_return_won <dbl>,
## #   winner_second_serve_return_total <dbl>,
## #   winner_break_points_converted <dbl>,
## #   winner_break_points_return_total <dbl>, winner_service_games_played <dbl>,
## #   winner_return_games_played <dbl>, winner_return_points_won <dbl>,
## #   winner_return_points_total <dbl>, winner_total_points_won <dbl>,
## #   winner_total_points_total <dbl>, loser_aces <dbl>,
## #   loser_double_faults <dbl>, loser_first_serves_in <dbl>,
## #   loser_first_serves_total <dbl>, loser_first_serve_points_won <dbl>,
## #   loser_first_serve_points_total <dbl>, loser_second_serve_points_won <dbl>,
## #   loser_second_serve_points_total <dbl>, loser_break_points_saved <dbl>,
## #   loser_break_points_serve_total <dbl>, loser_service_points_won <dbl>,
## #   loser_service_points_total <dbl>, loser_first_serve_return_won <dbl>,
## #   loser_first_serve_return_total <dbl>, loser_second_serve_return_won <dbl>,
## #   loser_second_serve_return_total <dbl>, loser_break_points_converted <dbl>,
## #   loser_break_points_return_total <dbl>, loser_service_games_played <dbl>,
## #   loser_return_games_played <dbl>, loser_return_points_won <dbl>,
## #   loser_return_points_total <dbl>, loser_total_points_won <dbl>,
## #   loser_total_points_total <dbl>, tourney_id <chr>, winner_player_id <chr>,
## #   loser_player_id <chr>

Top 20 players with the number matches (top winners by quantity)

top_20 <- score_df %>% 
  count(winner_player_id, sort = TRUE) %>%
  slice(1:20) %>%
  inner_join(player_df, by=c("winner_player_id" = "player_id")) %>%
  select(first_name, last_name, n) %>%
  rename(., number_of_win = n)

kable(top_20, col.names = c("First Name", "Last Name", "Number of Win"))
First Name Last Name Number of Win
Rafael Nadal 67
Alexander Zverev 54
Roger Federer 53
David Goffin 53
Grigor Dimitrov 49
Dominic Thiem 47
Roberto Bautista Agut 46
Marin Cilic 43
Adrian Mannarino 39
Juan Martin del Potro 38
Jack Sock 38
Diego Schwartzman 38
Damir Dzumhur 37
Pablo Carreno Busta 36
John Isner 36
Tomas Berdych 35
Fabio Fognini 35
Sam Querrey 35
Jo-Wilfried Tsonga 35
Kevin Anderson 34

Players which won tourneys more than one and their number of championship

  left_join(player_df, tourney_df, by=c("player_id" = "singles_winner_player_id")) %>% 
  drop_na() %>%
  transmute(player_id,flag_code,tourney_name,first_name,last_name,winner_count = 1) %>%
  group_by(player_id) %>% 
  transmute(winner_count = sum(winner_count),flag_code,first_name,last_name) %>% 
  arrange(desc(winner_count)) %>%
  distinct() %>%
  ungroup() %>% 
  select(first_name,last_name,winner_count) %>%
  #head(10)
kable()
first_name last_name winner_count
Roger Federer 7
Rafael Nadal 6
Alexander Zverev 5
Grigor Dimitrov 4
Jo-Wilfried Tsonga 4
Lucas Pouille 3
Jack Sock 3
Roberto Bautista Agut 2
Novak Djokovic 2
Damir Dzumhur 2
David Goffin 2
John Isner 2
Gilles Muller 2
Sam Querrey 2
Pablo Cuevas 1
Marin Cilic 1
Pablo Carreno Busta 1
Borna Coric 1
Juan Martin del Potro 1
Alexandr Dolgopolov 1
Victor Estrella Burgos 1
David Ferrer 1
Fabio Fognini 1
Peter Gojowczyk 1
Ryan Harrison 1
Denis Istomin 1
Steve Johnson 1
Philipp Kohlschreiber 1
Feliciano Lopez 1
Andy Murray 1
Leonardo Mayer 1
Andrey Rublev 1
Yuichi Sugita 1
Dominic Thiem 1
Stan Wawrinka 1