library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
url<-url("https://github.com/pjournal/mef03-scivan/blob/master/atp_tennis_data_2017.RData?raw=TRUE")
atp_tennis<-load(url)
singles_winners<-left_join(tourney_df,player_df,by=c("singles_winner_player_id"="player_id"))
champ_flags_df<-singles_winners%>%select(singles_winner_player_id,flag_code)%>%count(flag_code,sort=T)
champ_flags_df
## # A tibble: 21 x 2
## flag_code n
## <chr> <int>
## 1 ESP 11
## 2 USA 9
## 3 SUI 8
## 4 FRA 7
## 5 GER 7
## 6 BUL 4
## 7 ARG 2
## 8 BEL 2
## 9 BIH 2
## 10 CRO 2
## # … with 11 more rows
no_champ_players<- player_df %>%select(player_id, flag_code) %>%anti_join(., champ_flags_df,by="flag_code")
no_champ_players %>% left_join(.,score_df, by= c("player_id"="winner_player_id")) %>%group_by(flag_code) %>%summarise(total_won= sum(winner_games_won, na.rm=TRUE)) %>%arrange(desc(total_won))
## # A tibble: 93 x 2
## flag_code total_won
## <chr> <dbl>
## 1 AUS 1989
## 2 CZE 1209
## 3 CAN 1190
## 4 SVK 889
## 5 BRA 873
## 6 POR 621
## 7 RSA 566
## 8 KAZ 495
## 9 KOR 438
## 10 GEO 377
## # … with 83 more rows
score_df$tourney_id <- as.numeric(score_df$tourney_id)
match_num_tourney <- left_join(score_df,tourney_df) %>% select("tourney_name","match_id") %>% count(tourney_name) %>% arrange(desc(n))
## Joining, by = "tourney_id"
match_num_tourney
## # A tibble: 67 x 2
## tourney_name n
## <chr> <int>
## 1 Australian Open 239
## 2 Roland Garros 239
## 3 Wimbledon 239
## 4 BNP Paribas Open 131
## 5 Miami Open presented by Itaú 131
## 6 US Open 127
## 7 Coupe Rogers 76
## 8 Internazionali BNL d’Italia 76
## 9 Mutua Madrid Open 76
## 10 Rolex Monte-Carlo Masters 76
## # … with 57 more rows
players_tourneys <- rank_df %>% group_by(player_id) %>% summarise(tourneys_played = sum(tourneys_played)) %>% select(player_id,tourneys_played)
players_tourneys1 <- left_join(players_tourneys,player_df) %>% select("player_id","player_slug","tourneys_played")%>% na.omit() %>% arrange(desc(tourneys_played))
## Joining, by = "player_id"
players_tourneys1
## # A tibble: 977 x 3
## player_id player_slug tourneys_played
## <chr> <chr> <dbl>
## 1 o482 goncalo-oliveira 1771
## 2 n561 ivan-nedelko 1477
## 3 pg13 jaume-pla-malfeito 1434
## 4 z184 horacio-zeballos 1381
## 5 pd31 benoit-paire 1373
## 6 sq80 akira-santillan 1369
## 7 c813 ti-chen 1359
## 8 l503 paolo-lorenzi 1352
## 9 u120 kaichi-uchida 1352
## 10 ga79 lorenzo-giustino 1329
## # … with 967 more rows
players_tourneys <- left_join(player_df,rank_df) %>% select(“player_slug”,“tourneys_played”)