import random
import string
from datetime import datetime
def generate_random_string(length=3):
characters = string.ascii_uppercase
return ''.join(random.choice(characters) for _ in range(length))
def generate_timestamped_string(separator='_'):
timestamp = datetime.now().strftime('%y%m%d') # %H%M%S
random_part = generate_random_string(length=3)
return random_part+separator+timestamp
timestamped_string = generate_timestamped_string()
print('【{0}】'.format(timestamped_string))
【Talk is cheap】
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文
plt.rcParams['axes.unicode_minus'] = False # 显示负号
warnings.filterwarnings("ignore")
%matplotlib inline
df.columns
Index(['Product_ID', '产品名称', '有用人数', 'Unnamed: 3', '权威性', 'Unnamed: 5', '评论者',
'评论者打分', 'Unnamed: 8', 'Unnamed: 9', '一致性', '客观性', '标题', '发布时间',
'Unnamed: 14', '发布时间距今', '时效性', '价格', '具体性', '产品评论数量', 'Unnamed: 20',
'产品平均得分', '评价正文', '评论字数', '完整性1', '评价正文.1', '情感性', 'Unnamed: 27',
'发布时长', '完整性2', '分母', '有用性', 'Unnamed: 32', 'Unnamed: 33', '800-1200',
'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38'],
dtype='object')
...
from sklearn import tree
tree_clf = tree.DecisionTreeClassifier(criterion="gini",max_depth=3)
tree_clf.fit(train_x, train_y)
from sklearn.metrics import classification_report
# 使用训练好的模型对训练数据进行预测
train_y_pred = tree_clf.predict(train_x)
# 生成并打印分类报告
report = classification_report(train_y, train_y_pred)
print(report)
precision recall f1-score support
0 1.00 0.96 0.98 117108
1 0.85 1.00 0.92 23569
accuracy 0.97 140677
macro avg 0.92 0.98 0.95 140677
weighted avg 0.97 0.97 0.97 140677