本文参考《Google机器学习速成课程》使用TF的基本步骤
尝试合成特征
california_housing_dataframe["rooms_per_person"] =(california_housing_dataframe["total_rooms"] / california_housing_dataframe["population"])
calibration_data = train_model(
learning_rate=0.00005,
steps=500,
batch_size=5,
input_feature="rooms_per_person"
)
#period 09 : 237.29
处理离群值
#识别离群值
plt.figure(figsize=(15, 6))
plt.subplot(1, 2, 1)
plt.scatter(calibration_data["predictions"], calibration_data["targets"])
#绘制 rooms_per_person 的直方图,发现少量离群值
plt.subplot(1, 2, 2)
_ = california_housing_dataframe["rooms_per_person"].hist()
#截取离群值
california_housing_dataframe["rooms_per_person"] = (
california_housing_dataframe["rooms_per_person"]).apply(lambda x: min(x, 5))
_ = california_housing_dataframe["rooms_per_person"].hist()
#period 09 : 108.23
已阅