Importing data

library(dplyr)
library(tidyverse)
library(tidyr)
library(stringr)
library(knitr)

load("C:/Users/OF2646/Desktop/atp_tennis_data_2017.RData")

Task1

Rank countries (flag codes) by the singles champions.

champ_flags_df_n <- left_join(tourney_df,player_df,by=c("singles_winner_player_id"="player_id")) %>% count(flag_code,sort=TRUE)

kable(head(champ_flags_df_n))
flag_code n
ESP 11
USA 9
SUI 8
FRA 7
GER 7
BUL 4

Task2

Rank countries which did not get only singles champions by the sets won whom they win the match.

champ_flags_df <- left_join(tourney_df,player_df,by=c("singles_winner_player_id"="player_id")) 
no_single_winner <- anti_join(player_df,champ_flags_df,by=c("player_id"="singles_winner_player_id"))
no_single_winner <- no_single_winner %>% select(player_id,flag_code)
no_single_winner_games <- inner_join(no_single_winner,score_df,by=c("player_id"="winner_player_id")) %>%
  select(flag_code,winner_sets_won) %>% group_by(flag_code) %>% summarize(toplam=sum(winner_sets_won)) %>%
  arrange(desc(toplam))

kable(head(no_single_winner_games))
flag_code toplam
FRA 602
USA 501
ARG 358
GER 341
ITA 289
AUS 285

Task3

Rank the matches that French players won by duration.

stats_df_match <- stats_df %>% select(match_id,match_duration,winner_player_id)
france <- left_join(stats_df_match,player_df,by=c("winner_player_id"="player_id")) %>%
  filter(flag_code=="FRA") %>% select(match_id,match_duration)%>%arrange(desc(match_duration))

kable(head(france))
match_id match_duration
2017-580-t786-sm25 213
2017-540-me82-se73 213
2017-540-me82-mc65 210
2017-560-pf39-dc58 210
2017-580-pd31-f510 209
2017-560-m873-r772 207

Task4

Players who won single championship in their birth place.

location <- separate(tourney_df,tourney_location,into=c("City","Country"),sep = ",") %>%
  select(Country, singles_winner_player_id)
player_birth <- separate(player_df,birth_place,into=c("City","Country"),sep = ",") %>%
  select(Country, player_id)
## Warning: Expected 2 pieces. Additional pieces discarded in 512 rows [4, 9,
## 10, 18, 20, 37, 39, 41, 44, 46, 54, 66, 79, 91, 114, 141, 188, 408, 411,
## 415, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 180 rows
## [50, 58, 120, 129, 220, 319, 344, 771, 788, 817, 863, 880, 1021, 1052,
## 1396, 1496, 1589, 1600, 1616, 1623, ...].
winner_in_birth_place <- inner_join(location,player_birth,
                                    by=c("Country","singles_winner_player_id"="player_id"))
kable(head(winner_in_birth_place))
Country singles_winner_player_id
Bulgaria d875
France t786
Germany z355
Switzerland w367
France t786
Switzerland f324