Data visualization and transformation
Rows: 98
Columns: 13
$ address <chr> "1 Learned Pl, Durham, NC 27705", "1616…
$ price <dbl> 1520000, 1030000, 420000, 680000, 42850…
$ bed <dbl> 3, 5, 2, 4, 4, 3, 5, 4, 4, 3, 4, 4, 3, …
$ bath <dbl> 4.0, 4.0, 3.0, 3.0, 3.0, 3.0, 5.0, 3.0,…
$ area <dbl> 6040, 4475, 1745, 2091, 1772, 1950, 390…
$ type <chr> "Single Family", "Single Family", "Sing…
$ year_built <dbl> 1972, 1969, 1959, 1961, 2020, 2014, 196…
$ heating <chr> "Other, Gas", "Forced air, Gas", "Force…
$ cooling <fct> central, central, central, central, cen…
$ parking <chr> "0 spaces", "Carport, Covered", "Garage…
$ lot <dbl> 0.97, 1.38, 0.51, 0.84, 0.16, 0.45, 0.9…
$ hoa <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ url <chr> "https://www.zillow.com/homedetails/1-L…
between two numerical variables
Direction: Positive
Strength: Moderately strong
Form: Linear
duke_forest |>
group_by(cooling) |>
summarize(
min_price = min(price),
median_price = median(price),
iqr_price = IQR(price),
max_price = max(price)
)
# A tibble: 2 × 5
cooling min_price median_price iqr_price max_price
<fct> <dbl> <dbl> <dbl> <dbl>
1 other 95000 525000 156500 1020000
2 central 265000 550000 221500 1520000
Create a new variable called year_built_cat
that takes on the value "1970 or later"
if the house is built in 1970 or later, and "1969 or earlier"
otherwise.
# A tibble: 98 × 2
year_built year_built_cat
<dbl> <chr>
1 1972 1970 or later
2 1969 1969 or earlier
3 1959 1969 or earlier
4 1961 1969 or earlier
5 2020 1970 or later
6 2014 1970 or later
7 1968 1969 or earlier
8 1973 1970 or later
9 1972 1970 or later
10 1964 1969 or earlier
# ℹ 88 more rows
duke_forest |>
count(year_built_cat, cooling) |>
group_by(year_built_cat) |>
mutate(prop = n / sum(n))
# A tibble: 4 × 4
# Groups: year_built_cat [2]
year_built_cat cooling n prop
<chr> <fct> <int> <dbl>
1 1969 or earlier other 39 0.591
2 1969 or earlier central 27 0.409
3 1970 or later other 14 0.438
4 1970 or later central 18 0.562
ggplot(
duke_forest,
aes(
x = area, y = price,
color = cooling, shape = cooling
)
) +
geom_point(alpha = 0.7, size = 4) +
labs(
title = "Houses in Duke Forest",
subtitle = "Durham, NC",
color = "Cooling", shape = "Cooling",
x = "Area (square feet)",
y = "Price (USD)",
caption = "Source: Zillow, November 2020"
)
ggplot(
duke_forest,
aes(
x = area, y = price,
color = cooling, shape = cooling
)
) +
geom_point(alpha = 0.7, size = 4) +
scale_color_manual(
values = c("central" = "darkblue", "other" = "lightsalmon3")
) +
labs(
title = "Houses in Duke Forest",
subtitle = "Durham, NC",
color = "Cooling", shape = "Cooling",
x = "Area (square feet)",
y = "Price (USD)",
caption = "Source: Zillow, November 2020"
)
ggplot(
duke_forest,
aes(
x = area, y = price,
color = cooling, shape = cooling
)
) +
geom_point(alpha = 0.7, size = 4) +
scale_x_continuous(labels = label_number(big.mark = ",")) +
scale_color_manual(
values = c("central" = "darkblue", "other" = "lightsalmon3")
) +
labs(
title = "Houses in Duke Forest",
subtitle = "Durham, NC",
color = "Cooling", shape = "Cooling",
x = "Area (square feet)",
y = "Price (USD)",
caption = "Source: Zillow, November 2020"
)
ggplot(
duke_forest,
aes(
x = area, y = price,
color = cooling, shape = cooling
)
) +
geom_point(alpha = 0.7, size = 4) +
scale_x_continuous(labels = label_number(big.mark = ",")) +
scale_y_continuous(labels = label_dollar()) +
scale_color_manual(
values = c("central" = "darkblue", "other" = "lightsalmon3")
) +
labs(
title = "Houses in Duke Forest",
subtitle = "Durham, NC",
color = "Cooling", shape = "Cooling",
x = "Area (square feet)",
y = "Price (USD)",
caption = "Source: Zillow, November 2020"
)
ggplot(
duke_forest,
aes(
x = area, y = price,
color = cooling, shape = cooling
)
) +
geom_point(alpha = 0.7, size = 4) +
scale_x_continuous(labels = label_number(big.mark = ",")) +
scale_y_continuous(labels = label_dollar()) +
scale_color_manual(
values = c("central" = "darkblue", "other" = "lightsalmon3")
) +
labs(
title = "Houses in Duke Forest",
subtitle = "Durham, NC",
color = "Cooling", shape = "Cooling",
x = "Area (square feet)",
y = "Price (USD)",
caption = "Source: Zillow, November 2020"
) +
facet_wrap(~cooling, ncol = 1)
ggplot(
duke_forest,
aes(
x = area, y = price,
color = cooling, shape = cooling
)
) +
geom_point(alpha = 0.7, size = 4, show.legend = FALSE) +
scale_x_continuous(labels = label_number(big.mark = ",")) +
scale_y_continuous(labels = label_dollar()) +
scale_color_manual(
values = c("central" = "darkblue", "other" = "lightsalmon3")
) +
labs(
title = "Houses in Duke Forest, by cooling type",
subtitle = "Durham, NC",
color = "Cooling", shape = "Cooling",
x = "Area (square feet)",
y = "Price (USD)",
caption = "Source: Zillow, November 2020"
) +
facet_wrap(~cooling, ncol = 1)