Join Assignment

Furkan Oktay Sevimli

library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.2.1     <U+221A> purrr   0.3.2
## <U+221A> tibble  2.1.3     <U+221A> dplyr   0.8.3
## <U+221A> tidyr   1.0.0     <U+221A> stringr 1.4.0
## <U+221A> readr   1.3.1     <U+221A> forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
url<-url("https://github.com/pjournal/mef03-FurkanSevimli/raw/master/atp_tennis_data_2017.RData?raw=TRUE")
atp_tennis<-load(url)

Rank countries (Flag codes) by the single champions.

singles_winners<-left_join(tourney_df,player_df,by=c("singles_winner_player_id"="player_id"))
champ_flags_df<-singles_winners%>%select(singles_winner_player_id,flag_code)%>%count(flag_code,sort=T)


kable(champ_flags_df) %>%
  kable_styling("striped", full_width = F) %>%
  scroll_box(width = "30%", height = "400px")
flag_code n
ESP 11
USA 9
SUI 8
FRA 7
GER 7
BUL 4
ARG 2
BEL 2
BIH 2
CRO 2
LUX 2
SRB 2
AUT 1
DOM 1
GBR 1
ITA 1
JPN 1
RUS 1
UKR 1
URU 1
UZB 1

Rank countries which did not get any singles championships by the games won when they win the match.

nonchamp_players<- player_df %>%select(player_id, flag_code) %>%anti_join(., champ_flags_df,by="flag_code")
nonchamp_players %>% left_join(.,score_df, by= c("player_id"="winner_player_id")) %>%group_by(flag_code) %>%summarise(total_won= sum(winner_games_won, na.rm=TRUE)) %>%arrange(desc(total_won))
## # A tibble: 93 x 2
##    flag_code total_won
##    <chr>         <dbl>
##  1 AUS            1989
##  2 CZE            1209
##  3 CAN            1190
##  4 SVK             889
##  5 BRA             873
##  6 POR             621
##  7 RSA             566
##  8 KAZ             495
##  9 KOR             438
## 10 GEO             377
## # ... with 83 more rows

Best country according to tourney surface at single player

Champflag_tourney_surface <- inner_join(player_df,tourney_df, by= c("player_id"="singles_winner_player_id")) %>% group_by(tourney_surface) %>% count(flag_code) %>% top_n(1,n) 

kable(Champflag_tourney_surface) %>%
  kable_styling("striped", full_width = F) %>%
  scroll_box(width = "50%", height = "200px")
tourney_surface flag_code n
Clay ESP 6
Grass SUI 2
Hard USA 7

Total Match Durations of Winners

match_dur <- inner_join(player_df,score_df, by=c("player_id"="winner_player_id"))
match_dur <- inner_join(stats_df,match_dur, by = c("match_id"="match_id")) %>% group_by(player_slug)%>% summarise(total_match_duration=sum(match_duration, na.rm=TRUE)) %>% top_n(3,total_match_duration) %>% arrange(desc(total_match_duration))

kable(match_dur) %>%
  kable_styling("striped", full_width = F) %>%
  scroll_box(width = "50%", height = "200px")
player_slug total_match_duration
rafael-nadal 7445
david-goffin 5483
alexander-zverev 5218