-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear_reg_house.py
53 lines (43 loc) · 1.36 KB
/
linear_reg_house.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# linear_reg_house.py
from sklearn import datasets ## imports datasets from scikit-learn
import numpy as np
import pandas as pd# define the data/predictors as the pre-set feature names
import statsmodels.api as sm
data = datasets.load_boston() ## loads Boston dataset from datasets library
df = pd.DataFrame(data.data, columns=data.feature_names)
# Put the target (housing value -- MEDV) in another DataFrame
target = pd.DataFrame(data.target, columns=["MEDV"])
# === reg with no intercept ===
X = df["RM"]
y = target["MEDV"]
# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model
# Print out the statistics
model.summary()
# === reg with intercept ===
X = df["RM"]
y = target["MEDV"]
X = sm.add_constant(X) ## let's add an intercept (beta_0) to our model
# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model
# Print out the statistics
model.summary()
# ====reg with multiple x ===
X = df[["RM", "LSTAT"]]
y = target["MEDV"]
model = sm.OLS(y, X).fit()
predictions = model.predict(X)
model.summary()
# === use sklearn
from sklearn import linear_model
X = df
y = target["MEDV"]
lm = linear_model.LinearRegression()
model = lm.fit(X,y)
predictions = lm.predict(X)
print(predictions)
lm.score(X,y)
lm.coef_
lm.intercept_