Airbnb is an online lodging platform that has over 150 million users worldwide. Airbnb enables individuals to rent their own house in full or in part as an extra accommodation. The site provides a forum for searching and booking between the person offering their accommodation and the vacationer wishing to rent it out.
According to Kaggle. New York City Airbnb Open Data, the listing behavior and metrics for 2019 will be analyzed in this report.
data <- read_csv("AB_NYC_2019.csv")
## Parsed with column specification:
## cols(
## id = col_double(),
## name = col_character(),
## host_id = col_double(),
## host_name = col_character(),
## neighbourhood_group = col_character(),
## neighbourhood = col_character(),
## latitude = col_double(),
## longitude = col_double(),
## room_type = col_character(),
## price = col_double(),
## minimum_nights = col_double(),
## number_of_reviews = col_double(),
## last_review = col_date(format = ""),
## reviews_per_month = col_double(),
## calculated_host_listings_count = col_double(),
## availability_365 = col_double()
## )
glimpse(data)
## Rows: 48,895
## Columns: 16
## $ id <dbl> 2539, 2595, 3647, 3831, 5022, 5099, ...
## $ name <chr> "Clean & quiet apt home by the park"...
## $ host_id <dbl> 2787, 2845, 4632, 4869, 7192, 7322, ...
## $ host_name <chr> "John", "Jennifer", "Elisabeth", "Li...
## $ neighbourhood_group <chr> "Brooklyn", "Manhattan", "Manhattan"...
## $ neighbourhood <chr> "Kensington", "Midtown", "Harlem", "...
## $ latitude <dbl> 40.64749, 40.75362, 40.80902, 40.685...
## $ longitude <dbl> -73.97237, -73.98377, -73.94190, -73...
## $ room_type <chr> "Private room", "Entire home/apt", "...
## $ price <dbl> 149, 225, 150, 89, 80, 200, 60, 79, ...
## $ minimum_nights <dbl> 1, 1, 3, 1, 10, 3, 45, 2, 2, 1, 5, 2...
## $ number_of_reviews <dbl> 9, 45, 0, 270, 9, 74, 49, 430, 118, ...
## $ last_review <date> 2018-10-19, 2019-05-21, NA, 2019-07...
## $ reviews_per_month <dbl> 0.21, 0.38, NA, 4.64, 0.10, 0.59, 0....
## $ calculated_host_listings_count <dbl> 6, 2, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, ...
## $ availability_365 <dbl> 365, 355, 365, 194, 0, 129, 0, 220, ...
data$last_review<-as.POSIXct(data$last_review,format="%Y-%m-%d")
data1 <- data %>% mutate(reviews_per_month=ifelse(is.na(reviews_per_month), 0, reviews_per_month))
data1
## # A tibble: 48,895 x 16
## id name host_id host_name neighbourhood_g~ neighbourhood latitude
## <dbl> <chr> <dbl> <chr> <chr> <chr> <dbl>
## 1 2539 Clea~ 2787 John Brooklyn Kensington 40.6
## 2 2595 Skyl~ 2845 Jennifer Manhattan Midtown 40.8
## 3 3647 THE ~ 4632 Elisabeth Manhattan Harlem 40.8
## 4 3831 Cozy~ 4869 LisaRoxa~ Brooklyn Clinton Hill 40.7
## 5 5022 Enti~ 7192 Laura Manhattan East Harlem 40.8
## 6 5099 Larg~ 7322 Chris Manhattan Murray Hill 40.7
## 7 5121 Blis~ 7356 Garon Brooklyn Bedford-Stuy~ 40.7
## 8 5178 Larg~ 8967 Shunichi Manhattan Hell's Kitch~ 40.8
## 9 5203 Cozy~ 7490 MaryEllen Manhattan Upper West S~ 40.8
## 10 5238 Cute~ 7549 Ben Manhattan Chinatown 40.7
## # ... with 48,885 more rows, and 9 more variables: longitude <dbl>,
## # room_type <chr>, price <dbl>, minimum_nights <dbl>,
## # number_of_reviews <dbl>, last_review <dttm>, reviews_per_month <dbl>,
## # calculated_host_listings_count <dbl>, availability_365 <dbl>
typeandprice<-data1 %>% group_by(room_type)%>%summarise(mean_price=mean(price), .groups = 'drop')
ggplot(typeandprice,aes(x=room_type,y=mean_price,fill=(room_type))) +
geom_bar(stat="identity",position="dodge") +
theme_minimal() +
labs(x="Room Types",y="Avg. Price ",title="Avg. Prices for Room Type",
fill="Room Types")
ggplot(data1, aes(x=room_type, y=availability_365, fill=room_type)) +
labs(x="Room Type",y="Availability",title="Availability for Room Types")+
geom_boxplot()
neighandrev <-data1 %>% group_by(neighbourhood_group)%>%summarise(mean_number_of_reviews=mean(number_of_reviews) , .groups = 'drop')
ggplot(neighandrev,aes(x=neighbourhood_group ,y=mean_number_of_reviews,group=1,fill=(neighbourhood_group )))+
ggtitle("Reviews for Neighbourhood Groups")+ geom_line()+ geom_point() +
labs(x = "Neigbourhood Group",y = "Avg. Number of Reviews", fill="Neighbourhood Group")
room_prices_for_neigh<-data1 %>% group_by(neighbourhood_group,room_type) %>% summarise(room_type,mean_price=mean(price))
## `summarise()` regrouping output by 'neighbourhood_group', 'room_type' (override with `.groups` argument)
ggplot(room_prices_for_neigh, aes(x= room_type ,y=mean_price,color=neighbourhood_group) ) +
geom_point()+
labs(title="Average Prices for Neighbourhood Groups",x = "Neigbourhood Group",y = "Avg. Price",color="Neigbourhood Group")+
theme(plot.title = element_text(size=15))
facet_wrap(~ neighbourhood_group)
## <ggproto object: Class FacetWrap, Facet, gg>
## compute_layout: function
## draw_back: function
## draw_front: function
## draw_labels: function
## draw_panels: function
## finish_data: function
## init_scales: function
## map_data: function
## params: list
## setup_data: function
## setup_params: function
## shrink: TRUE
## train_scales: function
## vars: function
## super: <ggproto object: Class FacetWrap, Facet, gg>
data1 %>% filter(price >= mean(price)) %>% group_by(neighbourhood_group, room_type) %>% tally %>%
ggplot(aes(reorder(neighbourhood_group,desc(n)), n, fill = room_type)) +
xlab(NULL) +
ylab("Number of Rooms") +
ggtitle("Number of Better than Average Price")+
geom_bar(stat = "identity")