In-class Exercise 9

Author

Harith Oh Yee Choon

Published

March 13, 2023

Modified

March 13, 2023

pacman::p_load(sf, spdep, GWmodel, SpatialML, tidyverse, tmap, ggpubr, olsrr, devtools,
              tidymodels, rsample)

Preparing Data

Reading data file to rds

mdata <- read_rds("data/Aspatial/mdata.rds")

Data Sampling

set.seed(1234)
resale_split <- initial_split(mdata,prop = 6.5/10,)
train_data <- training(resale_split)
test_data <- testing(resale_split)
price_mlr <- lm(resale_price ~ floor_area_sqm +
                  storey_order + remaining_lease_mths + PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_350M_BUS + WITHIN_1KM_PRISCH,
                data=train_data)


summary(price_mlr)

Call:
lm(formula = resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths + 
    PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + 
    PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + 
    WITHIN_350M_CHILDCARE + WITHIN_350M_BUS + WITHIN_1KM_PRISCH, 
    data = train_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-205193  -39120   -1930   36545  472355 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)              107601.073  10601.261  10.150  < 2e-16 ***
floor_area_sqm             2780.698     90.579  30.699  < 2e-16 ***
storey_order              14299.298    339.115  42.167  < 2e-16 ***
remaining_lease_mths        344.490      4.592  75.027  < 2e-16 ***
PROX_CBD                 -16930.196    201.254 -84.124  < 2e-16 ***
PROX_ELDERLYCARE         -14441.025    994.867 -14.516  < 2e-16 ***
PROX_HAWKER              -19265.648   1273.597 -15.127  < 2e-16 ***
PROX_MRT                 -32564.272   1744.232 -18.670  < 2e-16 ***
PROX_PARK                 -5712.625   1483.885  -3.850 0.000119 ***
PROX_MALL                -14717.388   2007.818  -7.330 2.47e-13 ***
PROX_SUPERMARKET         -26881.938   4189.624  -6.416 1.46e-10 ***
WITHIN_350M_KINDERGARTEN   8520.472    632.812  13.464  < 2e-16 ***
WITHIN_350M_CHILDCARE     -4510.650    354.015 -12.741  < 2e-16 ***
WITHIN_350M_BUS             813.493    222.574   3.655 0.000259 ***
WITHIN_1KM_PRISCH         -8010.834    491.512 -16.298  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 61650 on 10320 degrees of freedom
Multiple R-squared:  0.7373,    Adjusted R-squared:  0.737 
F-statistic:  2069 on 14 and 10320 DF,  p-value: < 2.2e-16
write_rds(price_mlr, "data/rds/price_mlr.rds")

gwr predictive model

train_data_sp <- as_Spatial(train_data)
train_data_sp
class       : SpatialPointsDataFrame 
features    : 10335 
extent      : 11597.31, 42623.63, 28217.39, 48741.06  (xmin, xmax, ymin, ymax)
crs         : +proj=tmerc +lat_0=1.36666666666667 +lon_0=103.833333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs 
variables   : 17
names       : resale_price, floor_area_sqm, storey_order, remaining_lease_mths,          PROX_CBD,     PROX_ELDERLYCARE,        PROX_HAWKER,           PROX_MRT,          PROX_PARK,   PROX_GOOD_PRISCH,        PROX_MALL,            PROX_CHAS,     PROX_SUPERMARKET, WITHIN_350M_KINDERGARTEN, WITHIN_350M_CHILDCARE, ... 
min values  :       218000,             74,            1,                  555, 0.999393538715878, 1.98943787433087e-08, 0.0333358643817954, 0.0220407324774434, 0.0441643212802781, 0.0652540365486641,                0, 6.20621206270077e-09, 1.21715176356525e-07,                        0,                     0, ... 
max values  :      1186888,            133,           17,                 1164,  19.6500691667807,     3.30163731686804,   2.86763031236184,   2.13060636038504,   2.41313695915468,   10.6223726149914, 2.27100643784442,    0.808332738794272,     1.57131703651196,                        7,                    20, ... 

Preparing coordinate data

Extracting coordinates data

The code chunks below extract the x,y coordinates of the full, training and test data sets.

coords <- st_coordinates(mdata)
coords_train <- st_coordinates(train_data)
coords_test <- st_coordinates(test_data)
coords_train <- write_rds(coords_train, "data/rds/coords_train.rds")

coords_test <- write_rds(coords_test,"data/rds/coords_test.rds")

Dropping geometry field

First, we will drop geometry column of the sf data frame by using st_drop_geometry() of sf package

train_data <- train_data %>%
  st_drop_geometry()

Calibrating Random Forest

preforming random forest calibration by using [ranger]

set.seed(1234)
rf <- ranger(resale_price ~ floor_area_sqm +
                  storey_order + remaining_lease_mths + PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_350M_BUS + WITHIN_1KM_PRISCH,
                data=train_data)

Calibrating Geographically Weighted Random Forest Model

In this section, you will learn how to calibrate a predict model by

The code chunk beow calibrate a geographic random forest model by using ‘grf()’ f SpatialML package.

set.seed(1234)

gwRF_adaptive <- grf(resale_price ~ floor_area_sqm +
                  storey_order + remaining_lease_mths + PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_350M_BUS + WITHIN_1KM_PRISCH,
                dframe=train_data,
                bw=55,
                kernel="adaptive",
                coords=coords_train)
Ranger result

Call:
 ranger(resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths +      PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK +      PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN +      WITHIN_350M_CHILDCARE + WITHIN_350M_BUS + WITHIN_1KM_PRISCH,      data = train_data, num.trees = 500, mtry = 4, importance = "impurity",      num.threads = NULL) 

Type:                             Regression 
Number of trees:                  500 
Sample size:                      10335 
Number of independent variables:  14 
Mtry:                             4 
Target node size:                 5 
Variable importance mode:         impurity 
Splitrule:                        variance 
OOB prediction error (MSE):       700081018 
R squared (OOB):                  0.9515468 
          floor_area_sqm             storey_order     remaining_lease_mths 
            7.376510e+12             1.413229e+13             2.991844e+13 
                PROX_CBD         PROX_ELDERLYCARE              PROX_HAWKER 
            5.312697e+13             7.017513e+12             5.506719e+12 
                PROX_MRT                PROX_PARK                PROX_MALL 
            7.446857e+12             4.825986e+12             4.173165e+12 
        PROX_SUPERMARKET WITHIN_350M_KINDERGARTEN    WITHIN_350M_CHILDCARE 
            2.879598e+12             1.028775e+12             1.701318e+12 
         WITHIN_350M_BUS        WITHIN_1KM_PRISCH 
            1.564038e+12             7.214027e+12 
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-236112.0  -13033.7     444.4     593.8   14831.5  358041.7 
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-79279.83  -3510.70     54.56     50.98   3909.85  83074.08 
                               Min          Max        Mean         StD
floor_area_sqm                   0 401562922035 18210850992 41426270899
storey_order             302736445 243728744368 16368419468 23620589843
remaining_lease_mths     696564138 546463600727 34119912443 70328183398
PROX_CBD                  55173040 382484896335 12154563393 29293290548
PROX_ELDERLYCARE          45182031 344081962746 10597657883 24546405941
PROX_HAWKER               43516026 342597797419 10551807020 23408387903
PROX_MRT                  54234551 299075025906  9873129985 21055852211
PROX_PARK                 49919822 322633843469  9353956995 19517077658
PROX_MALL                 43296133 433263607933 11247374493 27537334970
PROX_SUPERMARKET          52665827 417310417234 10802122271 26572460731
WITHIN_350M_KINDERGARTEN         0 186468064682  2848177740 12928886968
WITHIN_350M_CHILDCARE            0 255236737234  5526292324 18109971102
WITHIN_350M_BUS                  0 193828795378  4747552546 11886064288
WITHIN_1KM_PRISCH                0 178360608427  1778262602  7163381668