Tibet Erdoğan 01/12/2019
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.2.1 <U+221A> purrr 0.3.3
## <U+221A> tibble 2.1.3 <U+221A> dplyr 0.8.3
## <U+221A> tidyr 1.0.0 <U+221A> stringr 1.4.0
## <U+221A> readr 1.3.1 <U+221A> forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
load("C:/Users/Dell/Downloads/atp_tennis_data_2017.RData")
country <- player_df %>% inner_join(tourney_df,by = c("player_id" ="singles_winner_player_id")) %>% group_by(flag_code)%>% summarize(total_wins = n()) %>% arrange(desc(total_wins))
country
## # A tibble: 21 x 2
## flag_code total_wins
## <chr> <int>
## 1 ESP 11
## 2 USA 9
## 3 SUI 8
## 4 FRA 7
## 5 GER 7
## 6 BUL 4
## 7 ARG 2
## 8 BEL 2
## 9 BIH 2
## 10 CRO 2
## # ... with 11 more rows
non_champ <- player_df %>% anti_join(country)
## Joining, by = "flag_code"
non_champ_win <- non_champ %>% left_join(score_df,by = c("player_id" = "winner_player_id")) %>% group_by(flag_code) %>% summarise(total_won = sum(winner_games_won, na.rm = TRUE)) %>% arrange(desc(total_won))
non_champ_win
## # A tibble: 93 x 2
## flag_code total_won
## <chr> <dbl>
## 1 AUS 1989
## 2 CZE 1209
## 3 CAN 1190
## 4 SVK 889
## 5 BRA 873
## 6 POR 621
## 7 RSA 566
## 8 KAZ 495
## 9 KOR 438
## 10 GEO 377
## # ... with 83 more rows
players_best <- score_df %>% inner_join(player_df,by = c("winner_player_id" = "player_id")) %>% select(tourney_round_name,first_name,last_name,flag_code,winner_player_id) %>% group_by(tourney_round_name,first_name,last_name) %>% filter(tourney_round_name == "Finals") %>% count(winner_player_id, sort = TRUE ) %>% filter(n > 2)
players_best
## # A tibble: 7 x 5
## # Groups: tourney_round_name, first_name, last_name [7]
## tourney_round_name first_name last_name winner_player_id n
## <chr> <chr> <chr> <chr> <int>
## 1 Finals Roger Federer f324 7
## 2 Finals Rafael Nadal n409 6
## 3 Finals Alexander Zverev z355 5
## 4 Finals Grigor Dimitrov d875 4
## 5 Finals Jo-Wilfried Tsonga t786 4
## 6 Finals Jack Sock sm25 3
## 7 Finals Lucas Pouille pf39 3
no_fault <- stats_df %>% select(match_id, winner_double_faults) %>% filter(winner_double_faults != 0)
player_score <- score_df %>% select(match_id, winner_player_id)
no_fault_players <- no_fault %>% inner_join(., player_score, by = c("match_id"))
no_fault_countries <- no_fault_players %>% left_join(., player_df, by = c("winner_player_id" = "player_id")) %>% count(flag_code, sort = TRUE)
no_fault_countries
## # A tibble: 60 x 2
## flag_code n
## <chr> <int>
## 1 USA 352
## 2 FRA 316
## 3 ESP 256
## 4 GER 253
## 5 ARG 190
## 6 ITA 146
## 7 RUS 133
## 8 AUS 119
## 9 GBR 104
## 10 SRB 103
## # ... with 50 more rows
no_break_match <- stats_df %>% select(match_id, winner_break_points_saved, match_duration) %>% filter(winner_break_points_saved != 0)
countries_player <- player_df %>% select(player_id, player_slug, flag_code)
no_break_players <- no_break_match %>% left_join(., player_score, by = c("match_id")) %>% group_by(winner_player_id) %>% summarise(total_duration = sum(match_duration, na.rm = TRUE)) %>% left_join(., countries_player,
by = c("winner_player_id" = "player_id")) %>% arrange(desc(total_duration))
no_break_players
## # A tibble: 325 x 4
## winner_player_id total_duration player_slug flag_code
## <chr> <dbl> <chr> <chr>
## 1 n409 6259 rafael-nadal ESP
## 2 gb88 5056 david-goffin BEL
## 3 z355 4232 alexander-zverev GER
## 4 r772 4230 albert-ramos-vinolas ESP
## 5 bd06 4038 roberto-bautista-agut ESP
## 6 tb69 3990 dominic-thiem AUT
## 7 me82 3932 adrian-mannarino FRA
## 8 f324 3863 roger-federer SUI
## 9 d875 3784 grigor-dimitrov BUL
## 10 n552 3638 kei-nishikori JPN
## # ... with 315 more rows