using DataFrames, CSV, Printf, Plots, Dates
df = DataFrame(CSV.File("housing_in_london.csv"));
df = filter(:borough_flag => !=(0), df)
df = select(df, [:date, :area, :average_price])
first(df, 5)
| Row | date | area | average_price |
|---|---|---|---|
| Date | String31 | Int64 | |
| 1 | 1995-01-01 | city of london | 91449 |
| 2 | 1995-02-01 | city of london | 82203 |
| 3 | 1995-03-01 | city of london | 79121 |
| 4 | 1995-04-01 | city of london | 77101 |
| 5 | 1995-05-01 | city of london | 84409 |
function to_proper_case(s)
s = split(s)
for i in 1:length(s)
s[i] = uppercase(s[i][1]) * lowercase(s[i][2:end])
end
join(s, " ")
end
df[!, :area] = map(to_proper_case, df[!, :area])
first(df, 5)
| Row | date | area | average_price |
|---|---|---|---|
| Date | String | Int64 | |
| 1 | 1995-01-01 | City Of London | 91449 |
| 2 | 1995-02-01 | City Of London | 82203 |
| 3 | 1995-03-01 | City Of London | 79121 |
| 4 | 1995-04-01 | City Of London | 77101 |
| 5 | 1995-05-01 | City Of London | 84409 |
df_tower_hamlets = filter(:area => ==("Tower Hamlets"), df)
df_tower_hamlets = filter(row -> month(row.date) == 1, df_tower_hamlets)
df_tower_hamlets[!, :average_price_lag] = circshift(df_tower_hamlets[!, :average_price], (+1))
df_tower_hamlets[1, :average_price_lag] = 0
df_tower_hamlets[!, :price_dif] = df_tower_hamlets[:, :average_price] .- df_tower_hamlets[:, :average_price_lag]
df_tower_hamlets = df_tower_hamlets[2:end, :] # Remove first row (which has no data in lag column)
first(df_tower_hamlets, 5)
| Row | date | area | average_price | average_price_lag | price_dif |
|---|---|---|---|---|---|
| Date | String | Int64 | Int64 | Int64 | |
| 1 | 1996-01-01 | Tower Hamlets | 67552 | 59865 | 7687 |
| 2 | 1997-01-01 | Tower Hamlets | 74439 | 67552 | 6887 |
| 3 | 1998-01-01 | Tower Hamlets | 90192 | 74439 | 15753 |
| 4 | 1999-01-01 | Tower Hamlets | 107464 | 90192 | 17272 |
| 5 | 2000-01-01 | Tower Hamlets | 128200 | 107464 | 20736 |
df_Jan_1996 = filter(row -> row[:date] == Date("1996-01-01"), df);
df_Jan_2020 = filter(row -> row[:date] == Date("2020-01-01"), df);
df_1996_2020 = leftjoin(df_Jan_1996, df_Jan_2020, on = [:area => :area], makeunique=true);
df_1996_2020 = rename!(df_1996_2020,
"average_price" => "average_price_1996",
"average_price_1" => "average_price_2020");
df_1996_2020[!, :price_increase_abs_24y] = df_1996_2020[:, :average_price_2020] .- df_1996_2020[:, :average_price_1996]
df_1996_2020[!, :price_increase_percent_24y] = (df_1996_2020[:, :average_price_2020] .- df_1996_2020[:, :average_price_1996]) ./ df_1996_2020[:, :average_price_1996] * 100
df_1996_2020 = select(df_1996_2020, [:area, :average_price_1996, :average_price_2020, :price_increase_abs_24y, :price_increase_percent_24y])
df_1996_2020 = sort(df_1996_2020, :price_increase_percent_24y, rev = true);
first(df_1996_2020, 5)
| Row | area | average_price_1996 | average_price_2020 | price_increase_abs_24y | price_increase_percent_24y |
|---|---|---|---|---|---|
| String | Int64 | Int64? | Int64 | Float64 | |
| 1 | Hackney | 66327 | 571882 | 505555 | 762.216 |
| 2 | Lambeth | 68780 | 526690 | 457910 | 665.76 |
| 3 | Southwark | 65753 | 502612 | 436859 | 664.394 |
| 4 | Westminster | 133843 | 1019028 | 885185 | 661.361 |
| 5 | Islington | 89406 | 673418 | 584012 | 653.213 |
df_1996_2020 = sort(df_1996_2020, :average_price_2020, rev = true);
df_1996_2020_top_10 = first(df_1996_2020, 10);
x = df_1996_2020_top_10[!, :area]
y = df_1996_2020_top_10[!, :average_price_2020]/1000
data_label = [(@sprintf("£%.1fk", yi),9) for yi in y]
(ymin,ymax) = extrema(y) # Compute both min and max, return as a 2-tuple
label_vertical_offset = 0.02*ymax
top_10_price= Plots.bar(x,y,
ylabel="Price (£1,000s)",
title="Top 10 Highest Average House Price (2020)",
legend=false,
xrotation=45,
fill="green",
size=(900,805))
annotate!(x,y.+label_vertical_offset, data_label, ylim = (0,ymax+label_vertical_offset))
df_1996_2020 = sort(df_1996_2020, :price_increase_abs_24y, rev = true);
df_1996_2020_top_10 = first(df_1996_2020, 10);
x = df_1996_2020_top_10[!, :area]
y = df_1996_2020_top_10[!, :price_increase_abs_24y]/1000
data_label = [(@sprintf("£%.1fk", yi),9) for yi in y]
(ymin,ymax) = extrema(y)
label_vertical_offset = 0.02*ymax
top_10_increase_abs = Plots.bar(x,y,
ylabel="Price Increase (£1,000s)",
title="Top 10 Highest Increases in Average House Price (1996-2020)",
legend=false,
xrotation=45,
size=(900,805))
annotate!(x,y.+label_vertical_offset, data_label, ylim = (0,ymax+label_vertical_offset))
df_1996_2020 = sort(df_1996_2020, :price_increase_percent_24y, rev = true);
df_1996_2020_top_10 = first(df_1996_2020, 10);
x = df_1996_2020_top_10[!, :area]
y = df_1996_2020_top_10[!, :price_increase_percent_24y]
data_label = [(@sprintf("+%.1f%%", yi),9) for yi in y]
(ymin,ymax) = extrema(y)
label_vertical_offset = 0.02*ymax
top_10_increase_percent = Plots.bar(x,y,
ylabel="Percentage Increase",
title="Top 10 Highest Percentage Increases in Average House Price (1996-2020)",
legend=false,
xrotation=45,
size=(900,805))
annotate!(x,y.+label_vertical_offset, data_label, ylim = (0,ymax+label_vertical_offset))
y_max = maximum(df_tower_hamlets.price_dif/1000)
y_min = minimum(df_tower_hamlets.price_dif/1000)
tower_hamlets_trend = Plots.plot(df_tower_hamlets.date, df_tower_hamlets.price_dif/1000,
xlabel="Date", ylabel="Price Change (£1,000s)", title="Change in Average Tower Hamlets House Price (1996-2020)",
marker=:circle, markersize=8,
legend=false,
color=:purple,
size=(900,725),
ylims=(y_min*1.1, y_max*1.1),
#xrotation=90
)
hline!(tower_hamlets_trend, [0], line=(:dash, :red))