机器学习之实验过程-数据清理
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
data_path = '/home/py/Work/机器学习/labs/data/Feedback.csv'
df = pd.read_csv(data_path)
df.head()
print (df.tail())
rename_pairs = {
'你是否完成了实验一的Bonus部分?': 'Bonus',
'你是否认为额外实验的工作是对能力有提升的?': 'Enhancement',
'你对于这种安排的满意度如何?': 'Satisfaction',
'填写时长': 'Time'
}
for key, value in list(rename_pairs.items())[:2]:
df[value] = df[key].map({'是': 1, '否': 0})
df = df.drop(columns=list(rename_pairs.keys())[:2])
# 数据预处理:重命名列
df.rename(columns = rename_pairs, inplace=True)
def convert_to_seconds(time_str):
total_seconds = 0
parts = ti