This assingment contains data analyzing on ATP World Tour Tennis Data
url<-url("https://github.com/pjournal/mef03-tugceaydin/blob/master/atp_tennis_data_2017.RData?raw=true")
atp_tennis<- load(url)
view(tourney_df)
view(player_df)
view(stats_df )
view(score_df)
view(rank_df)
view(data_source_url)
join_country<-left_join(tourney_df, player_df, by = c("singles_winner_player_id" = "player_id"))
rank_country<-join_country%>% count(flag_code,sort=TRUE)
rank_country
## # A tibble: 21 x 2
## flag_code n
## <chr> <int>
## 1 ESP 11
## 2 USA 9
## 3 SUI 8
## 4 FRA 7
## 5 GER 7
## 6 BUL 4
## 7 ARG 2
## 8 BEL 2
## 9 BIH 2
## 10 CRO 2
## # ... with 11 more rows
join_country<-left_join(tourney_df, player_df, by = c("singles_winner_player_id" = "player_id"))
no_championships <- player_df %>% select(player_id,flag_code)%>% anti_join(.,rank_country)
## Joining, by = "flag_code"
no_championships<-no_championships%>% left_join(.,score_df, by = c("player_id" = "winner_player_id"))%>%group_by(flag_code)%>% summarise(total_won=sum(winner_games_won,na.rm=TRUE))%>%arrange(desc(total_won))
no_championships
## # A tibble: 93 x 2
## flag_code total_won
## <chr> <dbl>
## 1 AUS 1989
## 2 CZE 1209
## 3 CAN 1190
## 4 SVK 889
## 5 BRA 873
## 6 POR 621
## 7 RSA 566
## 8 KAZ 495
## 9 KOR 438
## 10 GEO 377
## # ... with 83 more rows
join_match<-score_df%>%select(winner_player_id,match_id) %>% inner_join(stats_df, score_df, by = c("match_id" = "match_id"))
winner_player <- inner_join(join_match, player_df, by = c("winner_player_id.x" = "player_id"))%>% select(first_name,last_name,match_duration)%>%arrange(desc(match_duration))
by_raw_data <- winner_player %>%mutate(PlayerName=paste(first_name," ",last_name),rank =row_number(),vars_group = 'match_duration' ) %>% filter(rank<6) %>%select(PlayerName,match_duration)
by_raw_data
## # A tibble: 5 x 2
## PlayerName match_duration
## <chr> <dbl>
## 1 Ivo Karlovic 315
## 2 Rafael Nadal 296
## 3 Albert Ramos-Vinolas 295
## 4 Denis Istomin 289
## 5 Gilles Muller 288
join_winner<-inner_join(player_df, tourney_df, by = c("player_id" = "singles_winner_player_id"))%>%select(player_id,first_name,last_name)
join_age<-inner_join(join_winner, rank_df, by = c("player_id" = "player_id"))
join_age <- join_age%>%arrange(player_age)%>%distinct(first_name,last_name,player_age)
by_age_data <- join_age %>%mutate(PlayerName=paste(first_name," ",last_name),rank =row_number(),vars_group = 'player_age' ) %>% filter(rank<6) %>%select(PlayerName,player_age)
by_age_data
## # A tibble: 5 x 2
## PlayerName player_age
## <chr> <dbl>
## 1 Andrey Rublev 19
## 2 Alexander Zverev 19
## 3 Borna Coric 20
## 4 Andrey Rublev 20
## 5 Alexander Zverev 20