url = url("https://mef-bda503.github.io/files/atp_tennis_data_2017.RData")
load(url)
#match_and_players_df = tourney_df %>% select(tourney_id,singles_winner_player_id,doubles_winner_1_player_id,doubles_winner_2_player_id) %>% pivot_longer(cols=c(-tourney_id), names_to="match_type", values_to="player_id")
#player_wins_df = match_and_players_df %>% count(player_id, sort = TRUE, name="win_count")
win_df = score_df %>%
transmute(player_id = winner_player_id) %>%
count(player_id, sort = TRUE, name = "win_count")
loss_df = score_df %>%
transmute(player_id = loser_player_id) %>%
count(player_id, sort = TRUE, name = "loss_count")
player_win_loss = win_df %>%
full_join(loss_df, by="player_id")
player_win_loss[is.na(player_win_loss)] = 0
player_win_loss = player_win_loss %>%
mutate(total_match_count = win_count + loss_count) %>%
mutate(win_rate = round(win_count/total_match_count,2)) %>%
inner_join(player_df, by="player_id") %>%
select("player_id","player_slug","flag_code","win_count","loss_count","total_match_count","win_rate")
clean_names = c("Player ID","Player","Country","Win Quantity","Loss Quantity","Total Matches","Win Rate")
countries_win_df = score_df %>%
transmute(player_id = winner_player_id) %>%
inner_join(player_df, by="player_id") %>%
count(flag_code, sort = TRUE, name = "win_count")
countries_loss_df = score_df %>%
transmute(player_id = loser_player_id) %>%
inner_join(player_df, by="player_id") %>%
count(flag_code, sort = TRUE, name = "loss_count")
countries_win_loss = countries_win_df %>%
full_join(countries_loss_df, by="flag_code")
countries_win_loss[is.na(countries_win_loss)] = 0
countries_win_loss = countries_win_loss %>%
mutate(total_match_count = win_count + loss_count) %>%
mutate(win_rate = round(win_count/total_match_count,2))
players_top_20_by_quantity = player_win_loss%>%top_n(.,20,win_count)
players_top_20_by_rate = player_win_loss%>%top_n(.,20,win_rate)
Pulling data from web with url and load functions and cleaning, preparing them. We used full join because there might be players with no wins or no losses and we don’t want to lose any players in our analysis.
knitr::kable(players_top_20_by_quantity%>%setNames(., clean_names), format="markdown", align='cc')
Player ID | Player | Country | Win Quantity | Loss Quantity | Total Matches | Win Rate |
---|---|---|---|---|---|---|
n409 | rafael-nadal | ESP | 67 | 12 | 79 | 0.85 |
z355 | alexander-zverev | GER | 54 | 21 | 75 | 0.72 |
f324 | roger-federer | SUI | 53 | 5 | 58 | 0.91 |
gb88 | david-goffin | BEL | 53 | 24 | 77 | 0.69 |
d875 | grigor-dimitrov | BUL | 49 | 19 | 68 | 0.72 |
tb69 | dominic-thiem | AUT | 47 | 27 | 74 | 0.64 |
bd06 | roberto-bautista-agut | ESP | 46 | 22 | 68 | 0.68 |
c977 | marin-cilic | CRO | 43 | 22 | 65 | 0.66 |
me82 | adrian-mannarino | FRA | 39 | 27 | 66 | 0.59 |
d683 | juan-martin-del-potro | ARG | 38 | 17 | 55 | 0.69 |
sm25 | jack-sock | USA | 38 | 20 | 58 | 0.66 |
sm37 | diego-schwartzman | ARG | 38 | 27 | 65 | 0.58 |
d923 | damir-dzumhur | BIH | 37 | 25 | 62 | 0.60 |
cd85 | pablo-carreno-busta | ESP | 36 | 24 | 60 | 0.60 |
i186 | john-isner | USA | 36 | 21 | 57 | 0.63 |
ba47 | tomas-berdych | CZE | 35 | 18 | 53 | 0.66 |
f510 | fabio-fognini | ITA | 35 | 23 | 58 | 0.60 |
q927 | sam-querrey | USA | 35 | 20 | 55 | 0.64 |
t786 | jo-wilfried-tsonga | FRA | 35 | 15 | 50 | 0.70 |
a678 | kevin-anderson | RSA | 34 | 21 | 55 | 0.62 |
pf39 | lucas-pouille | FRA | 34 | 20 | 54 | 0.63 |
sl28 | jan-lennard-struff | GER | 34 | 29 | 63 | 0.54 |
Lets see top 20 players by win quantity
knitr::kable(players_top_20_by_rate%>%setNames(., clean_names), format="markdown", align='cc')
Player ID | Player | Country | Win Quantity | Loss Quantity | Total Matches | Win Rate |
---|---|---|---|---|---|---|
n409 | rafael-nadal | ESP | 67 | 12 | 79 | 0.85 |
z355 | alexander-zverev | GER | 54 | 21 | 75 | 0.72 |
f324 | roger-federer | SUI | 53 | 5 | 58 | 0.91 |
gb88 | david-goffin | BEL | 53 | 24 | 77 | 0.69 |
d875 | grigor-dimitrov | BUL | 49 | 19 | 68 | 0.72 |
bd06 | roberto-bautista-agut | ESP | 46 | 22 | 68 | 0.68 |
d683 | juan-martin-del-potro | ARG | 38 | 17 | 55 | 0.69 |
t786 | jo-wilfried-tsonga | FRA | 35 | 15 | 50 | 0.70 |
d643 | novak-djokovic | SRB | 31 | 8 | 39 | 0.79 |
n552 | kei-nishikori | JPN | 30 | 14 | 44 | 0.68 |
r975 | milos-raonic | CAN | 29 | 14 | 43 | 0.67 |
mc10 | andy-murray | GBR | 26 | 10 | 36 | 0.72 |
w367 | stan-wawrinka | SUI | 26 | 11 | 37 | 0.70 |
ba98 | simone-bolelli | ITA | 14 | 7 | 21 | 0.67 |
bf55 | yuki-bhambri | IND | 12 | 5 | 17 | 0.71 |
kb05 | filip-krajinovic | SRB | 12 | 5 | 17 | 0.71 |
o513 | sebastian-ofner | AUT | 9 | 4 | 13 | 0.69 |
s694 | radek-stepanek | CZE | 8 | 2 | 10 | 0.80 |
rc96 | noah-rubin | USA | 6 | 3 | 9 | 0.67 |
bh98 | matthew-barton | AUS | 4 | 2 | 6 | 0.67 |
mm79 | blake-mott | AUS | 4 | 2 | 6 | 0.67 |
db59 | dennis-novak | AUT | 3 | 1 | 4 | 0.75 |
ci14 | jay-clarke | GBR | 2 | 1 | 3 | 0.67 |
ha47 | alexios-halebian | USA | 2 | 1 | 3 | 0.67 |
j620 | maxime-janvier | FRA | 2 | 1 | 3 | 0.67 |
mj01 | roberto-marcora | ITA | 2 | 1 | 3 | 0.67 |
pf00 | mate-pavic | CRO | 2 | 1 | 3 | 0.67 |
ta04 | finn-tearney | NZL | 2 | 1 | 3 | 0.67 |
w521 | marcus-willis | GBR | 2 | 1 | 3 | 0.67 |
Now we see top 20 players by win rate
knitr::kable(countries_win_loss%>%top_n(.,20,win_rate), format="markdown", align='cc')
flag_code | win_count | loss_count | total_match_count | win_rate |
---|---|---|---|---|
USA | 413 | 382 | 795 | 0.52 |
ESP | 322 | 287 | 609 | 0.53 |
GER | 283 | 261 | 544 | 0.52 |
ARG | 230 | 213 | 443 | 0.52 |
GBR | 122 | 98 | 220 | 0.55 |
SRB | 121 | 106 | 227 | 0.53 |
BEL | 103 | 83 | 186 | 0.55 |
JPN | 99 | 88 | 187 | 0.53 |
SUI | 95 | 43 | 138 | 0.69 |
CZE | 80 | 71 | 151 | 0.53 |
AUT | 75 | 54 | 129 | 0.58 |
UKR | 61 | 54 | 115 | 0.53 |
BUL | 49 | 23 | 72 | 0.68 |
BIH | 47 | 41 | 88 | 0.53 |
RSA | 34 | 23 | 57 | 0.60 |
LUX | 32 | 18 | 50 | 0.64 |
KOR | 31 | 26 | 57 | 0.54 |
GRE | 25 | 14 | 39 | 0.64 |
BAR | 13 | 9 | 22 | 0.59 |
CHI | 10 | 9 | 19 | 0.53 |
Top 20 countries by win rate
ggplot(analytical_df, aes(x=as.yearmon(TARIH), group=1)) +
geom_line(aes(y = YABANCI_SATIS_ORAN, color = "Non-TUR Sales") , size = 1) +
geom_line(aes(y = TC_SATIS_ORAN, color = "TUR Sales"), size = 1) +
theme_minimal() +
labs(x = 'Housing Sales Period', y = 'Number of houses sold', color = "Acquired By", title = "Turkish vs Non-Turkish Buyers", subtitle = "Sales numbers between January 2013 and August 2020")
While this graph doesn’t show huge analysis opportunity it supports our first estimate, since purchasing power is going down in the same dates we can see that Non-Turkish buyers are significantly increased.
We can see that since late-2018’s there is an economic crisis going on in Turkey. This pushes people to buy second-hand properties. This also encourages foreigners to spend money here because it’s cheaper.