Emre Usta 12/1/2019
champs_country <-
tourney_df %>%
left_join(player_df, by = c("singles_winner_player_id" = "player_id")) %>%
count(flag_code,sort=TRUE,name = "championships")
champs_country
## # A tibble: 21 x 2
## flag_code championships
## <chr> <int>
## 1 ESP 11
## 2 USA 9
## 3 SUI 8
## 4 FRA 7
## 5 GER 7
## 6 BUL 4
## 7 ARG 2
## 8 BEL 2
## 9 BIH 2
## 10 CRO 2
## # ... with 11 more rows
#Listing player didn't get any singles championships
nonchamps <-
player_df %>%
select(player_id,flag_code) %>%
anti_join(.,champs_country)
## Joining, by = "flag_code"
nonchamps %>%
left_join(.,score_df,by = c("player_id"="winner_player_id")) %>%
group_by(flag_code) %>%
summarise(total_won = sum(winner_games_won,na.rm = TRUE)) %>%
arrange(desc(total_won))
## # A tibble: 93 x 2
## flag_code total_won
## <chr> <dbl>
## 1 AUS 1989
## 2 CZE 1209
## 3 CAN 1190
## 4 SVK 889
## 5 BRA 873
## 6 POR 621
## 7 RSA 566
## 8 KAZ 495
## 9 KOR 438
## 10 GEO 377
## # ... with 83 more rows
rank_df %>%
left_join(.,player_df) %>%
filter(rank_number == 1) %>%
group_by(first_name,last_name) %>%
summarise(weeks_at_one = sum(rank_number))
## Joining, by = "player_id"
## # A tibble: 2 x 3
## # Groups: first_name [2]
## first_name last_name weeks_at_one
## <chr> <chr> <dbl>
## 1 Andy Murray 29
## 2 Rafael Nadal 13
#Listing player didn't get any singles championships
stats_df %>%
inner_join(.,player_df, by = c("winner_player_id"="player_id")) %>%
filter(first_name != '' & last_name != '') %>%
group_by(first_name,last_name) %>%
summarise(won_points = sum(winner_total_points_won), won_aces = sum(winner_aces), ace_percentage = (won_aces/won_points)*100) %>%
arrange(desc(ace_percentage))
## # A tibble: 312 x 5
## # Groups: first_name [256]
## first_name last_name won_points won_aces ace_percentage
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Marsel Ilhan 46 14 30.4
## 2 Eduardo Struvay 78 20 25.6
## 3 John Isner 3074 746 24.3
## 4 Ivo Karlovic 1437 310 21.6
## 5 Reilly Opelka 666 135 20.3
## 6 Sam Groth 331 64 19.3
## 7 Matthew Barton 314 59 18.8
## 8 Gilles Muller 2807 526 18.7
## 9 Milos Raonic 2538 454 17.9
## 10 Sam Querrey 3132 518 16.5
## # ... with 302 more rows
stats_df %>%
inner_join(.,player_df, by = c("winner_player_id"="player_id")) %>%
group_by(first_name,last_name) %>%
summarise(double_faults = sum(loser_double_faults), lost_match = n(), double_faults_per_lost_match = double_faults/lost_match) %>%
filter(lost_match >= 10) %>%
arrange(desc(double_faults_per_lost_match))
## # A tibble: 141 x 5
## # Groups: first_name [127]
## first_name last_name double_faults lost_match double_faults_per_lost~
## <chr> <chr> <dbl> <int> <dbl>
## 1 Ernests Gulbis 55 10 5.5
## 2 Nicolas Mahut 82 16 5.12
## 3 Ruben Bemelmans 50 10 5
## 4 Stefan Kozlov 52 11 4.73
## 5 Denis Shapovalov 51 11 4.64
## 6 Cedrik-Marc~ Stebe 55 12 4.58
## 7 Thomas Fabbiano 64 14 4.57
## 8 Marius Copil 77 17 4.53
## 9 Jared Donaldson 123 28 4.39
## 10 Rogerio Dutra Sil~ 70 16 4.38
## # ... with 131 more rows