This assingment contains data analyzing on ATP World Tour Tennis Data
 

Data Source

Data Extraction

url<-url("https://github.com/pjournal/mef03-tugceaydin/blob/master/atp_tennis_data_2017.RData?raw=true")
atp_tennis<- load(url)
 

view(tourney_df)
view(player_df)   
view(stats_df )       
view(score_df)     
view(rank_df)         
view(data_source_url)

Rank countries (flag codes) by the number of singles champions

join_country<-left_join(tourney_df, player_df, by = c("singles_winner_player_id" = "player_id"))

rank_country<-join_country%>%  count(flag_code,sort=TRUE)

rank_country
## # A tibble: 21 x 2
##    flag_code     n
##    <chr>     <int>
##  1 ESP          11
##  2 USA           9
##  3 SUI           8
##  4 FRA           7
##  5 GER           7
##  6 BUL           4
##  7 ARG           2
##  8 BEL           2
##  9 BIH           2
## 10 CRO           2
## # ... with 11 more rows

Rank countries which did not get any singles championships by the games won when they win the match

join_country<-left_join(tourney_df, player_df, by = c("singles_winner_player_id" = "player_id"))

no_championships <- player_df %>% select(player_id,flag_code)%>% anti_join(.,rank_country)
## Joining, by = "flag_code"
no_championships<-no_championships%>% left_join(.,score_df,  by = c("player_id" = "winner_player_id"))%>%group_by(flag_code)%>%  summarise(total_won=sum(winner_games_won,na.rm=TRUE))%>%arrange(desc(total_won))


no_championships
## # A tibble: 93 x 2
##    flag_code total_won
##    <chr>         <dbl>
##  1 AUS            1989
##  2 CZE            1209
##  3 CAN            1190
##  4 SVK             889
##  5 BRA             873
##  6 POR             621
##  7 RSA             566
##  8 KAZ             495
##  9 KOR             438
## 10 GEO             377
## # ... with 83 more rows

5 winner player which had maximum match duration

join_match<-score_df%>%select(winner_player_id,match_id) %>% inner_join(stats_df, score_df, by = c("match_id" = "match_id"))

winner_player <-  inner_join(join_match, player_df, by = c("winner_player_id.x" = "player_id"))%>% select(first_name,last_name,match_duration)%>%arrange(desc(match_duration))

by_raw_data <- winner_player %>%mutate(PlayerName=paste(first_name," ",last_name),rank =row_number(),vars_group = 'match_duration' ) %>% filter(rank<6) %>%select(PlayerName,match_duration) 

by_raw_data
## # A tibble: 5 x 2
##   PlayerName             match_duration
##   <chr>                           <dbl>
## 1 Ivo   Karlovic                    315
## 2 Rafael   Nadal                    296
## 3 Albert   Ramos-Vinolas            295
## 4 Denis   Istomin                   289
## 5 Gilles   Muller                   288

The most 5 youngest single winner player

join_winner<-inner_join(player_df, tourney_df, by = c("player_id" = "singles_winner_player_id"))%>%select(player_id,first_name,last_name)


join_age<-inner_join(join_winner, rank_df, by = c("player_id" = "player_id"))

join_age <- join_age%>%arrange(player_age)%>%distinct(first_name,last_name,player_age)

by_age_data <- join_age %>%mutate(PlayerName=paste(first_name," ",last_name),rank =row_number(),vars_group = 'player_age' ) %>% filter(rank<6) %>%select(PlayerName,player_age) 

by_age_data
## # A tibble: 5 x 2
##   PlayerName         player_age
##   <chr>                   <dbl>
## 1 Andrey   Rublev            19
## 2 Alexander   Zverev         19
## 3 Borna   Coric              20
## 4 Andrey   Rublev            20
## 5 Alexander   Zverev         20