Python Prophet生产力预测教程:时间序列预测工作表现
概述
生产力波动是现代工作者的常见挑战。有些日子高效如神,有些日子则困顿低效。通过时间序列分析,我们可以:
- 识别生产力模式:每周/每月周期
- 预测未来表现:提前规划工作量
- 检测异常因素:找出影响效率的关键变量
Facebook Prophet是一个强大的时间序列预测库,特别适合处理具有季节性和趋势的日常数据。
数据收集
1. 定义生产力指标
code
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
# 生产力数据结构
def create_productivity_dataframe():
"""
生产力数据应包含:
- ds: 日期时间戳
- y: 生产力指标(完成任务数、专注时长、代码提交数等)
"""
data = {
'ds': [], # 时间戳
'y': [], # 主指标
'tasks_completed': [], # 完成任务数
'focus_hours': [], # 专注小时数
'meeting_hours': [], # 会议小时数
'energy_level': [], # 能量水平 (1-10)
'sleep_hours': [], # 睡眠小时数
'is_weekend': [], # 是否周末
'day_of_week': [] # 星期几
}
return pd.DataFrame(data)
Code collapsed
2. 从真实数据源导入
code
# 从Todoist导入任务数据
import todoist
def fetch_todoist_tasks(api_token, days=90):
"""从Todoist获取已完成任务"""
api = todoist.TodoistAPI(api_token)
tasks = []
start_date = datetime.now() - timedelta(days=days)
for item in api.completed.get_all()['items']:
task_date = datetime.fromisoformat(item['completed_at'])
if task_date >= start_date:
tasks.append({
'date': task_date.date(),
'completed': 1
})
df = pd.DataFrame(tasks)
daily_tasks = df.groupby('date').sum().reset_index()
daily_tasks.columns = ['ds', 'y']
return daily_tasks
# 从RescueTime导入专注时间数据
def fetch_rescuetime_data(api_key, days=90):
"""从RescueTime获取专注时间数据"""
import requests
url = "https://www.rescuetime.com/anapi/data"
params = {
'key': api_key,
'format': 'json',
'restriction': 'productivity time', # 只统计生产力时间
'perspective': 'interval'
}
response = requests.get(url, params=params)
data = response.json()
rows = []
for row in data['rows']:
rows.append({
'ds': pd.to_datetime(row[0]),
'y': row[1] / 60 # 转换为小时
})
return pd.DataFrame(rows).groupby('ds').sum().reset_index()
Code collapsed
3. 模拟数据生成
code
def generate_productivity_data(days=180):
"""生成模拟生产力数据"""
dates = pd.date_range(start='2025-09-01', periods=days, freq='D')
np.random.seed(42)
# 基础生产力 + 趋势 + 季节性 + 噪声
base_productivity = 6 # 基础6小时/天
trend = np.linspace(0, 1.5, days) # 逐渐改善
# 每周模式(工作日vs周末)
weekly_pattern = 2 * np.sin(2 * np.pi * np.arange(days) / 7)
weekly_pattern[dates.dayofweek >= 5] -= 3 # 周末降低
# 每月模式(发薪日前更高效)
monthly_pattern = 0.5 * np.sin(2 * np.pi * np.arange(days) / 30)
# 随机波动
noise = np.random.normal(0, 0.5, days)
# 偶尔的低效日(生病、疲劳等)
outliers = np.random.choice(days, size=5, replace=False)
noise[outliers] -= 3
productivity = (base_productivity + trend +
weekly_pattern + monthly_pattern + noise)
productivity = np.maximum(productivity, 0) # 确保非负
df = pd.DataFrame({
'ds': dates,
'y': productivity,
'is_weekend': dates.dayofweek >= 5,
'day_of_week': dates.dayofweek
})
return df
# 生成数据
productivity_data = generate_productivity_data(180)
Code collapsed
Prophet模型训练
基础模型
code
from prophet import Prophet
import matplotlib.pyplot as plt
# 划分训练集和测试集
train_size = int(len(productivity_data) * 0.8)
train_data = productivity_data[:train_size]
test_data = productivity_data[train_size:]
# 创建Prophet模型
model = Prophet(
# 趋势参数
changepoint_prior_scale=0.05, # 控制趋势灵活性(值越大越灵活)
changepoint_range=0.9, # 允许趋势变化的比例范围
# 季节性参数
yearly_seasonality=True, # 年度季节性(如果有年度数据)
weekly_seasonality=True, # 每周季节性
daily_seasonality=False, # 每日季节性(日数据不需要)
# 季节性强度
seasonality_prior_scale=10.0, # 季节性强度
seasonality_mode='additive', # 'additive' 或 'multiplicative'
# 假日效应
holidays=None, # 自定义假日
# 不确定性
interval_width=0.8, # 预测区间宽度
uncertainty_samples=1000 # MC采样数
)
# 添加额外回归变量
model.add_regressor('is_weekend', mode='additive')
# 训练模型
model.fit(train_data)
# 创建未来30天的预测
future_dates = model.make_future_dataframe(periods=30, freq='D')
future_dates['is_weekend'] = future_dates['ds'].dt.dayofweek >= 5
forecast = model.predict(future_dates)
# 可视化预测
fig1 = model.plot(forecast)
plt.title('生产力预测:未来30天')
plt.xlabel('日期')
plt.ylabel('专注时间(小时)')
ax = fig1.add_subplot(111)
ax.axvline(x=test_data['ds'].iloc[0], color='red', linestyle='--',
label='训练/测试分界')
plt.legend()
plt.show()
Code collapsed
模型解释
1. 组件分解
code
# 查看预测的各个组件
fig2 = model.plot_components(forecast)
plt.suptitle('生产力预测组件分解', y=1.02)
plt.show()
# 访问组件数据
components = forecast[['ds', 'trend', 'weekly', 'yearly']]
print(components.head(10))
Code collapsed
2. 趋势变化点
code
# 识别趋势变化的关键时间点
from prophet.plot import add_changepoints_to_plot
fig3 = model.plot(forecast)
a = add_changepoints_to_plot(fig3.gca(), model, forecast)
plt.title('生产力趋势变化点')
plt.show()
# 查看变化点详情
changepoints = model.changepoints
deltas = model.params['delta'].mean(0)
changes_df = pd.DataFrame({
'changepoint': changepoints,
'delta': deltas
}).sort_values('delta', ascending=False)
print("显著生产力变化点:")
print(changes_df.head(10))
Code collapsed
高级功能
自定义季节性
code
# 创建高级模型
advanced_model = Prophet(
changepoint_prior_scale=0.05,
seasonality_mode='multiplicative'
)
# 添加自定义季节性
# 1. 月度季节性(女性生理周期影响)
advanced_model.add_seasonality(
name='monthly',
period=30.5,
fourier_order=5,
prior_scale=10.0
)
# 2. 工作周季节性(周一到周五模式)
advanced_model.add_seasonality(
name='work_week',
period=5,
fourier_order=3,
prior_scale=15.0
)
# 3. 添加额外回归变量
# 添加睡眠影响
productivity_data['sleep_hours'] = np.random.uniform(6, 9, len(productivity_data))
advanced_model.add_regressor('sleep_hours', mode='additive')
# 添加能量水平
productivity_data['energy_level'] = np.random.uniform(1, 10, len(productivity_data))
advanced_model.add_regressor('energy_level', mode='additive')
# 训练
advanced_model.fit(productivity_data)
# 预测
future = advanced_model.make_future_dataframe(periods=30)
future['sleep_hours'] = 7.5 # 假设值
future['energy_level'] = 7 # 假设值
future['is_weekend'] = future['ds'].dt.dayofweek >= 5
advanced_forecast = advanced_model.predict(future)
Code collapsed
添加假日效应
code
# 创建自定义假日
import pandas as pd
holidays = pd.DataFrame({
'holiday': 'public_holiday',
'ds': pd.to_datetime([
'2026-01-01', '2026-01-31', '2026-02-01', # 春节
'2026-05-01', # 劳动节
'2026-10-01', # 国庆节
]),
'lower_window': -1, # 假日前一天
'upper_window': 1, # 假日后一天
})
# 添加个人假期
personal_holidays = pd.DataFrame({
'holiday': 'vacation',
'ds': pd.to_datetime([
'2026-03-15', '2026-03-16', '2026-03-17', # 个人休假
]),
'lower_window': 0,
'upper_window': 0
})
all_holidays = pd.concat([holidays, personal_holidays])
# 使用假日训练模型
holiday_model = Prophet(
holidays=all_holidays,
changepoint_prior_scale=0.05
)
holiday_model.fit(productivity_data)
holiday_forecast = holiday_model.predict(
holiday_model.make_future_dataframe(periods=30)
)
# 查看假日效应
holiday_effects = holiday_forecast[
['ds', 'holidays', 'public_holiday', 'vacation']
]
print(holiday_effects[holiday_effects['public_holiday'] != 0])
Code collapsed
异常检测
识别异常日
code
def detect_anomalies(forecast, threshold=2.0):
"""
检测生产力异常日
参数:
forecast: Prophet预测结果
threshold: 标准差倍数阈值
"""
# 计算预测误差
forecast['residual'] = forecast['y'] - forecast['yhat']
forecast['abs_residual'] = abs(forecast['residual'])
# 计算统计量
mean_residual = forecast['residual'].mean()
std_residual = forecast['residual'].std()
# 识别异常
forecast['is_anomaly'] = (
forecast['abs_residual'] > threshold * std_residual
)
# 分类异常类型
forecast['anomaly_type'] = 'normal'
forecast.loc[
(forecast['residual'] > threshold * std_residual),
'anomaly_type'
] = 'exceptional' # 异常高效
forecast.loc[
(forecast['residual'] < -threshold * std_residual),
'anomaly_type'
] = 'low_productivity' # 异常低效
anomalies = forecast[forecast['is_anomaly']][
['ds', 'y', 'yhat', 'residual', 'anomaly_type']
]
return anomalies
# 检测异常
anomalies = detect_anomalies(forecast.merge(
productivity_data[['ds', 'y']],
on='ds',
how='left'
))
print("异常生产力日:")
print(anomalies.sort_values('residual'))
Code collapsed
异常原因分析
code
def analyze_anomaly_causes(data, anomalies_df):
"""分析异常日的可能原因"""
analysis = []
for _, row in anomalies_df.iterrows():
date = row['ds']
anomaly_date = data[data['ds'] == date]
if len(anomaly_date) > 0:
factors = {
'date': date,
'anomaly_type': row['anomaly_type'],
'day_of_week': date.dayofweek,
'is_weekend': anomaly_date['is_weekend'].values[0],
'energy_level': anomaly_date['energy_level'].values[0],
'sleep_hours': anomaly_date['sleep_hours'].values[0]
}
analysis.append(factors)
return pd.DataFrame(analysis)
anomaly_analysis = analyze_anomaly_causes(productivity_data, anomalies)
print(anomaly_analysis)
Code collapsed
预测评估
模型性能指标
code
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
def evaluate_forecast(actual, predicted):
"""评估预测性能"""
mae = mean_absolute_error(actual, predicted)
rmse = np.sqrt(mean_squared_error(actual, predicted))
r2 = r2_score(actual, predicted)
mape = np.mean(np.abs((actual - predicted) / actual)) * 100
metrics = {
'MAE': mae,
'RMSE': rmse,
'R²': r2,
'MAPE': mape
}
return metrics
# 评估
test_forecast = forecast[forecast['ds'].isin(test_data['ds'])]
metrics = evaluate_forecast(
test_data['y'].values,
test_forecast['yhat'].values
)
print("模型性能指标:")
for metric, value in metrics.items():
print(f"{metric}: {value:.3f}")
Code collapsed
交叉验证
code
from prophet.diagnostics import cross_validation, performance_metrics
# 执行交叉验证
df_cv = cross_validation(
model,
initial='90 days', # 初始训练期
period='30 days', # 剪切间隔
horizon='30 days' # 预测范围
)
# 计算性能指标
df_p = performance_metrics(df_cv)
print(df_p[['horizon', 'mae', 'rmse', 'mape']])
# 可视化
from prophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(df_cv, metric='mape')
plt.title('交叉验证:MAPE随预测范围变化')
plt.show()
Code collapsed
实际应用
生产力量表
code
def create_productivity_scorecard(forecast, days_back=7, days_forward=7):
"""创建生产力记分卡"""
today = pd.Timestamp.now().normalize()
# 历史数据
historical = forecast[
(forecast['ds'] >= today - pd.Timedelta(days=days_back)) &
(forecast['ds'] < today)
]
# 预测数据
prediction = forecast[
(forecast['ds'] >= today) &
(forecast['ds'] < today + pd.Timedelta(days=days_forward))
]
# 计算指标
avg_historical = historical['yhat'].mean()
avg_prediction = prediction['yhat'].mean()
trend = (avg_prediction - avg_historical) / avg_historical * 100
# 预测置信度
uncertainty = prediction['yhat_upper'].mean() - prediction['yhat_lower'].mean()
scorecard = {
'period': f"{days_back}天历史 / {days_forward}天预测",
'current_level': f"{avg_historical:.1f} 小时/天",
'predicted_level': f"{avg_prediction:.1f} 小时/天",
'trend': f"{'↑' if trend > 0 else '↓'} {abs(trend):.1f}%",
'confidence': f"{(1 - uncertainty/avg_prediction)*100:.0f}%",
'recommendation': get_productivity_recommendation(trend)
}
return scorecard
def get_productivity_recommendation(trend):
if trend > 10:
return "表现优异,保持当前节奏"
elif trend > 0:
return "稳步提升,继续加油"
elif trend > -10:
return "注意调整,避免过度疲劳"
else:
return "建议重新评估工作安排"
scorecard = create_productivity_scorecard(forecast)
for key, value in scorecard.items():
print(f"{key}: {value}")
Code collapsed
关键要点
- Prophet简化时间序列建模:自动处理趋势和季节性
- 多变量提升预测精度:添加睡眠、能量等回归变量
- 异常检测帮助优化:找出影响效率的关键因素
- 交叉验证验证模型:确保预测可靠性
- 定期重新训练:随着数据积累更新模型
常见问题
需要多少历史数据?
建议至少90天的每日数据。6个月到1年的数据能获得最佳结果。
如何处理缺失数据?
Prophet能自动处理缺失值,但建议:
- 零星缺失:保持原样
- 大段缺失:用均值填充或标记为特殊事件
预测不准怎么办?
- 检查数据质量
- 调整
changepoint_prior_scale参数 - 添加更多相关回归变量
- 考虑使用
seasonality_mode='multiplicative'
参考资料
- Prophet官方文档: https://facebook.github.io/prophet/
- 时间序列预测最佳实践
- RescueTime API文档
- Todoist API文档
发布日期:2026年3月8日 最后更新:2026年3月8日