1 Introduction

Airbnb is an online lodging platform that has over 150 million users worldwide. Airbnb enables individuals to rent their own house in full or in part as an extra accommodation. The site provides a forum for searching and booking between the person offering their accommodation and the vacationer wishing to rent it out.

According to Kaggle. New York City Airbnb Open Data, the listing behavior and metrics for 2019 will be analyzed in this report.

data <- read_csv("AB_NYC_2019.csv")
## Parsed with column specification:
## cols(
##   id = col_double(),
##   name = col_character(),
##   host_id = col_double(),
##   host_name = col_character(),
##   neighbourhood_group = col_character(),
##   neighbourhood = col_character(),
##   latitude = col_double(),
##   longitude = col_double(),
##   room_type = col_character(),
##   price = col_double(),
##   minimum_nights = col_double(),
##   number_of_reviews = col_double(),
##   last_review = col_date(format = ""),
##   reviews_per_month = col_double(),
##   calculated_host_listings_count = col_double(),
##   availability_365 = col_double()
## )
glimpse(data)
## Rows: 48,895
## Columns: 16
## $ id                             <dbl> 2539, 2595, 3647, 3831, 5022, 5099, ...
## $ name                           <chr> "Clean & quiet apt home by the park"...
## $ host_id                        <dbl> 2787, 2845, 4632, 4869, 7192, 7322, ...
## $ host_name                      <chr> "John", "Jennifer", "Elisabeth", "Li...
## $ neighbourhood_group            <chr> "Brooklyn", "Manhattan", "Manhattan"...
## $ neighbourhood                  <chr> "Kensington", "Midtown", "Harlem", "...
## $ latitude                       <dbl> 40.64749, 40.75362, 40.80902, 40.685...
## $ longitude                      <dbl> -73.97237, -73.98377, -73.94190, -73...
## $ room_type                      <chr> "Private room", "Entire home/apt", "...
## $ price                          <dbl> 149, 225, 150, 89, 80, 200, 60, 79, ...
## $ minimum_nights                 <dbl> 1, 1, 3, 1, 10, 3, 45, 2, 2, 1, 5, 2...
## $ number_of_reviews              <dbl> 9, 45, 0, 270, 9, 74, 49, 430, 118, ...
## $ last_review                    <date> 2018-10-19, 2019-05-21, NA, 2019-07...
## $ reviews_per_month              <dbl> 0.21, 0.38, NA, 4.64, 0.10, 0.59, 0....
## $ calculated_host_listings_count <dbl> 6, 2, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, ...
## $ availability_365               <dbl> 365, 355, 365, 194, 0, 129, 0, 220, ...

2 Overview and Manipulation of Data

2.1 Variables

  • id : Listing ID
  • name : Name of the Listing
  • host_id : Host ID
  • host_name : Name of the Host
  • neighbourhood_group : Location
  • neighbourhood : Area
  • latitude : Latitude Coordinates
  • longitude : Longitude Coordinates
  • room_type : Listing Space Type
  • price : Price in Dollars
  • minimum_nights : Minimum Nights
  • number_of_reviews : Number of Reviews
  • last_review : Latest Review
  • reviews_per_month : Number of Reviews per Month
  • calculated_host_listings_count : Amounts of Listing per Host
  • availability_365 : Number of Days when Listing is Available for Booking
data$last_review<-as.POSIXct(data$last_review,format="%Y-%m-%d")
data1 <- data %>% mutate(reviews_per_month=ifelse(is.na(reviews_per_month), 0, reviews_per_month))
data1
## # A tibble: 48,895 x 16
##       id name  host_id host_name neighbourhood_g~ neighbourhood latitude
##    <dbl> <chr>   <dbl> <chr>     <chr>            <chr>            <dbl>
##  1  2539 Clea~    2787 John      Brooklyn         Kensington        40.6
##  2  2595 Skyl~    2845 Jennifer  Manhattan        Midtown           40.8
##  3  3647 THE ~    4632 Elisabeth Manhattan        Harlem            40.8
##  4  3831 Cozy~    4869 LisaRoxa~ Brooklyn         Clinton Hill      40.7
##  5  5022 Enti~    7192 Laura     Manhattan        East Harlem       40.8
##  6  5099 Larg~    7322 Chris     Manhattan        Murray Hill       40.7
##  7  5121 Blis~    7356 Garon     Brooklyn         Bedford-Stuy~     40.7
##  8  5178 Larg~    8967 Shunichi  Manhattan        Hell's Kitch~     40.8
##  9  5203 Cozy~    7490 MaryEllen Manhattan        Upper West S~     40.8
## 10  5238 Cute~    7549 Ben       Manhattan        Chinatown         40.7
## # ... with 48,885 more rows, and 9 more variables: longitude <dbl>,
## #   room_type <chr>, price <dbl>, minimum_nights <dbl>,
## #   number_of_reviews <dbl>, last_review <dttm>, reviews_per_month <dbl>,
## #   calculated_host_listings_count <dbl>, availability_365 <dbl>

3 Data Visualization

3.1 Room Types and Prices

typeandprice<-data1 %>% group_by(room_type)%>%summarise(mean_price=mean(price), .groups = 'drop')
ggplot(typeandprice,aes(x=room_type,y=mean_price,fill=(room_type))) +
  geom_bar(stat="identity",position="dodge") + 
  theme_minimal() + 
  labs(x="Room Types",y="Avg. Price ",title="Avg. Prices for Room Type",
       fill="Room Types") 

3.2 Room Type and Room Availability

ggplot(data1, aes(x=room_type, y=availability_365, fill=room_type)) +
  labs(x="Room Type",y="Availability",title="Availability for Room Types")+ 
  geom_boxplot()

3.3 Neighbourhood Group and Number of Reviews

neighandrev <-data1 %>% group_by(neighbourhood_group)%>%summarise(mean_number_of_reviews=mean(number_of_reviews) , .groups = 'drop')

ggplot(neighandrev,aes(x=neighbourhood_group ,y=mean_number_of_reviews,group=1,fill=(neighbourhood_group )))+
 ggtitle("Reviews for Neighbourhood Groups")+ geom_line()+  geom_point() +
 labs(x = "Neigbourhood Group",y = "Avg. Number of Reviews", fill="Neighbourhood Group")

3.4 Relation between Neighbourhood Groups and Average Prices

room_prices_for_neigh<-data1 %>% group_by(neighbourhood_group,room_type) %>% summarise(room_type,mean_price=mean(price))
## `summarise()` regrouping output by 'neighbourhood_group', 'room_type' (override with `.groups` argument)
ggplot(room_prices_for_neigh, aes(x= room_type ,y=mean_price,color=neighbourhood_group) ) +
geom_point()+ 
  labs(title="Average  Prices for Neighbourhood Groups",x = "Neigbourhood Group",y = "Avg. Price",color="Neigbourhood Group")+
   theme(plot.title = element_text(size=15))

  facet_wrap(~ neighbourhood_group)
## <ggproto object: Class FacetWrap, Facet, gg>
##     compute_layout: function
##     draw_back: function
##     draw_front: function
##     draw_labels: function
##     draw_panels: function
##     finish_data: function
##     init_scales: function
##     map_data: function
##     params: list
##     setup_data: function
##     setup_params: function
##     shrink: TRUE
##     train_scales: function
##     vars: function
##     super:  <ggproto object: Class FacetWrap, Facet, gg>

3.5 Rooms That Are Expensive Than Average

data1 %>% filter(price >= mean(price)) %>% group_by(neighbourhood_group, room_type) %>% tally %>% 
  ggplot(aes(reorder(neighbourhood_group,desc(n)), n, fill = room_type)) +
  xlab(NULL) +
  ylab("Number of Rooms") +
  ggtitle("Number of Better than Average Price")+
           geom_bar(stat = "identity")