Python职业倦怠检测器:Git提交分析预警系统
概述
**职业倦怠(Burnout)**是工作相关的心理综合征,表现为:
- 情绪耗竭
- 去个性化
- 个人成就感降低
对于开发者,Git提交历史可以客观反映工作状态:
- 提交频率 → 工作强度
- 代码质量 → 精神状态
- 工作时间 → 工作生活平衡
检测维度
Git提交指标
| 指标 | 健康状态 | 倦怠信号 |
|---|---|---|
| 提交频率 | 稳定、有节奏 | 不规律、频繁加班 |
| 提交时段 | 正常工作时间 | 深夜/凌晨提交 |
| 代码质量 | 稳定 | bug增加、质量下降 |
| commit message | 清晰描述 | 简短、敷衍 |
环境设置
安装依赖
code
pip install gitpython
pip install pandas
pip install numpy
pip install matplotlib
pip install seaborn
pip install scikit-learn
Code collapsed
Git数据提取
1. Git提交历史分析
code
# git_analyzer.py
import git
from datetime import datetime, timedelta
from typing import List, Dict
import pandas as pd
import numpy as np
class GitAnalyzer:
"""Git仓库分析器"""
def __init__(self, repo_path: str):
self.repo = git.Repo(repo_path)
self.repo_path = repo_path
def get_commits(self, days: int = 90) -> List[Dict]:
"""
获取最近N天的提交记录
参数:
days: 分析天数
返回:
提交记录列表
"""
since = datetime.now() - timedelta(days=days)
commits = []
for commit in list(self.repo.iter_commits(since=since)):
commits.append({
'hash': commit.hexsha,
'author': commit.author.name,
'email': commit.author.email,
'date': datetime.fromtimestamp(commit.committed_date),
'message': commit.message.strip(),
'files_changed': len(commit.stats.files),
'insertions': commit.stats.total['insertions'],
'deletions': commit.stats.total['deletions'],
})
return commits
def get_commit_patterns(self, commits: List[Dict]) -> pd.DataFrame:
"""转换为DataFrame并提取模式"""
df = pd.DataFrame(commits)
# 提取时间特征
df['hour'] = df['date'].dt.hour
df['day_of_week'] = df['date'].dt.dayofweek
df['is_weekend'] = df['day_of_week'] >= 5
df['is_late_night'] = df['hour'] >= 22
df['is_early_morning'] = df['hour'] <= 6
# 计算提交间隔
df = df.sort_values('date')
df['time_since_last'] = df['date'].diff().dt.total_seconds() / 3600 # 小时
return df
def analyze_work_hours(self, df: pd.DataFrame) -> Dict:
"""分析工作时间分布"""
# 提交时段分布
hour_distribution = df['hour'].value_counts().sort_index()
# 工作日vs周末
weekday_commits = df[~df['is_weekend']].shape[0]
weekend_commits = df[df['is_weekend']].shape[0]
# 非正常工作时间
late_night = df[df['is_late_night']].shape[0]
early_morning = df[df['is_early_morning']].shape[0]
return {
'hour_distribution': hour_distribution.to_dict(),
'weekday_commits': weekday_commits,
'weekend_commits': weekend_commits,
'late_night_commits': late_night,
'early_morning_commits': early_morning,
'overtime_ratio': (late_night + early_morning) / len(df) * 100
}
def analyze_commit_quality(self, df: pd.DataFrame) -> Dict:
"""分析提交质量"""
# 平均代码变更量
avg_changes = df['files_changed'].mean()
# commit message长度
df['message_length'] = df['message'].str.len()
avg_message_length = df['message_length'].mean()
# 短message比例(可能是敷衍提交)
short_message_ratio = (df['message_length'] < 10).sum() / len(df) * 100
return {
'avg_files_per_commit': avg_changes,
'avg_message_length': avg_message_length,
'short_message_ratio': short_message_ratio
}
Code collapsed
2. 工作负荷分析
code
# burnout_detector.py
class BurnoutDetector:
"""职业倦怠检测器"""
def __init__(self, git_analyzer: GitAnalyzer):
self.analyzer = git_analyzer
self.burnout_score = 0
self.risk_factors = []
def analyze(self, days: int = 90) -> Dict:
"""
综合分析并生成报告
参数:
days: 分析天数
返回:
完整分析报告
"""
# 获取提交数据
commits = self.analyzer.get_commits(days)
df = self.analyzer.get_commit_patterns(commits)
if len(commits) == 0:
return {"error": "没有提交记录"}
# 各维度分析
work_hours = self.analyzer.analyze_work_hours(df)
commit_quality = self.analyzer.analyze_commit_quality(df)
# 工作频率分析
frequency_analysis = self._analyze_commit_frequency(df)
# 综合评估
burnout_assessment = self._assess_burnout_risk(
work_hours,
commit_quality,
frequency_analysis
)
return {
"summary": {
"total_commits": len(commits),
"analysis_period_days": days,
"burnout_risk_level": burnout_assessment['level'],
"burnout_score": burnout_assessment['score']
},
"work_hours": work_hours,
"commit_quality": commit_quality,
"frequency": frequency_analysis,
"risk_factors": burnout_assessment['factors'],
"recommendations": self._generate_recommendations(burnout_assessment)
}
def _analyze_commit_frequency(self, df: pd.DataFrame) -> Dict:
"""分析提交频率"""
# 按日期分组统计
daily_commits = df.groupby(df['date'].dt.date).size()
# 提交频率统计
avg_daily_commits = daily_commits.mean()
std_daily_commits = daily_commits.std()
# 无提交日(休息日)
no_commit_days = (daily_commits == 0).sum()
total_days = len(daily_commits)
rest_ratio = no_commit_days / total_days * 100
# 提交集中度(方差)
if len(daily_commits) > 0:
commit_variance = daily_commits.var()
else:
commit_variance = 0
return {
'avg_daily_commits': avg_daily_commits,
'commit_variance': commit_variance,
'rest_day_ratio': rest_ratio,
'total_days_analyzed': total_days,
'active_days': total_days - no_commit_days
}
def _assess_burnout_risk(self, work_hours: Dict,
commit_quality: Dict,
frequency: Dict) -> Dict:
"""评估倦怠风险"""
score = 0
factors = []
# 1. 加班检测 (30分)
overtime_ratio = work_hours['overtime_ratio']
if overtime_ratio > 30:
score += 30
factors.append({
'factor': '频繁加班',
'severity': 'high',
'value': f"{overtime_ratio:.1f}%的提交在非正常工作时间"
})
elif overtime_ratio > 15:
score += 20
factors.append({
'factor': '经常加班',
'severity': 'medium',
'value': f"{overtime_ratio:.1f}%的提交在非正常工作时间"
})
elif overtime_ratio > 5:
score += 10
factors.append({
'factor': '偶尔加班',
'severity': 'low',
'value': f"{overtime_ratio:.1f}%的提交在非正常工作时间"
})
# 2. 周末工作检测 (20分)
weekend_ratio = work_hours['weekend_commits'] / (
work_hours['weekday_commits'] + work_hours['weekend_commits']
) * 100
if weekend_ratio > 30:
score += 20
factors.append({
'factor': '周末工作过多',
'severity': 'high',
'value': f"{weekend_ratio:.1f}%的提交在周末"
})
elif weekend_ratio > 15:
score += 10
factors.append({
'factor': '经常周末工作',
'severity': 'medium',
'value': f"{weekend_ratio:.1f}%的提交在周末"
})
# 3. 休息日不足 (20分)
rest_ratio = frequency['rest_day_ratio']
if rest_ratio < 10:
score += 20
factors.append({
'factor': '休息不足',
'severity': 'high',
'value': f"只有{rest_ratio:.1f}%的日子没有提交"
})
elif rest_ratio < 20:
score += 10
factors.append({
'factor': '休息日较少',
'severity': 'medium',
'value': f"只有{rest_ratio:.1f}%的日子没有提交"
})
# 4. 提交质量下降 (15分)
short_msg_ratio = commit_quality['short_message_ratio']
if short_msg_ratio > 40:
score += 15
factors.append({
'factor': '提交质量下降',
'severity': 'medium',
'value': f"{short_msg_ratio:.1f}%的提交描述简短"
})
# 5. 工作不规律 (15分)
if frequency['commit_variance'] > 50:
score += 15
factors.append({
'factor': '工作节奏不规律',
'severity': 'medium',
'value': '提交量波动较大'
})
# 确定风险等级
if score >= 70:
level = 'high'
elif score >= 40:
level = 'medium'
elif score >= 20:
level = 'low'
else:
level = 'healthy'
return {
'score': score,
'level': level,
'factors': factors
}
def _generate_recommendations(self, assessment: Dict) -> List[str]:
"""生成改进建议"""
recommendations = []
score = assessment['score']
factors = assessment['factors']
if score >= 70:
recommendations.extend([
"⚠️ 高倦怠风险:强烈建议休假调整",
"📅 安排完全断联的休息日",
"🧘 尝试冥想或其他放松活动",
"👥 考虑与主管讨论工作负荷",
"🏃 建立规律的锻炼习惯"
])
elif score >= 40:
recommendations.extend([
"⚡ 中度倦怠风险:注意调整工作方式",
"📅 设定明确的下班时间",
"🚫 周末尽量避免查看工作消息",
"😴 保证充足睡眠(7-8小时)",
"🎯 学会拒绝额外任务"
])
elif score >= 20:
recommendations.extend([
"✅ 低倦怠风险:保持良好习惯",
"📊 定期自我评估工作状态",
"💪 继续维持工作生活平衡",
"🌟 分享经验帮助同事"
])
else:
recommendations.extend([
"🌟 工作状态良好:继续保持",
"📈 可以承担更多挑战性工作",
"👥 帮助支持压力较大的同事"
])
# 针对性建议
for factor in factors:
if '加班' in factor['factor']:
recommendations.append(f"🌙 减少夜间工作,设定工作边界")
if '周末' in factor['factor']:
recommendations.append(f"🏖️ 保护周末休息时间")
if '休息不足' in factor['factor']:
recommendations.append(f"😴 强制安排休息日")
if '质量下降' in factor['factor']:
recommendations.append(f"📝 认真写commit message,提升代码质量")
return recommendations[:10] # 最多10条建议
Code collapsed
可视化报告
生成图表
code
# visualizer.py
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from datetime import datetime
class BurnoutReportVisualizer:
"""倦怠报告可视化"""
def create_comprehensive_report(self, analysis_result: Dict, output_path: str):
"""生成综合报告图表"""
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Git提交健康分析报告', fontsize=16, fontweight='bold')
# 1. 提交时段分布
self._plot_hour_distribution(analysis_result, axes[0, 0])
# 2. 工作日vs周末
self._plot_weekday_weekend(analysis_result, axes[0, 1])
# 3. 每日提交趋势
self._plot_daily_trend(analysis_result, axes[1, 0])
# 4. 倦怠风险仪表板
self._plot_burnout_gauge(analysis_result, axes[1, 1])
plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
def _plot_hour_distribution(self, result: Dict, ax):
"""绘制时段分布"""
hour_dist = result['work_hours']['hour_distribution']
hours = list(hour_dist.keys())
counts = list(hour_dist.values())
# 区分正常/非正常时间
colors = ['#e74c3c' if h >= 22 or h <= 6 else '#3498db'
for h in hours]
ax.bar(hours, counts, color=colors)
ax.set_xlabel('小时')
ax.set_ylabel('提交数')
ax.set_title('提交时段分布')
ax.axvspan(22, 24, alpha=0.2, color='red', label='深夜')
ax.axvspan(0, 6, alpha=0.2, color='red')
ax.legend()
def _plot_weekday_weekend(self, result: Dict, ax):
"""绘制工作日vs周末"""
work_hours = result['work_hours']
categories = ['工作日', '周末']
counts = [
work_hours['weekday_commits'],
work_hours['weekend_commits']
]
bars = ax.bar(categories, counts, color=['#3498db', '#e74c3c'])
ax.set_ylabel('提交数')
ax.set_title('工作日 vs 周末提交')
# 添加数值标签
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{int(height)}',
ha='center', va='bottom')
def _plot_daily_trend(self, result: Dict, ax):
"""绘制每日提交趋势"""
# 这里需要原始数据,简化示例
ax.text(0.5, 0.5, '每日提交趋势图\n(需要原始数据)',
ha='center', va='center', fontsize=12)
ax.set_title('每日提交趋势')
def _plot_burnout_gauge(self, result: Dict, ax):
"""绘制倦怠风险仪表盘"""
score = result['summary']['burnout_score']
level = result['summary']['burnout_risk_level']
# 确定颜色
if level == 'high':
color = '#e74c3c'
elif level == 'medium':
color = '#f39c12'
elif level == 'low':
color = '#f1c40f'
else:
color = '#2ecc71'
# 绘制仪表盘
wedges, texts, autotexts = ax.pie(
[score, 100 - score],
colors=[color, '#ecf0f1'],
startangle=90,
counterclock=True,
wedgeprops={'width': 0.5}
)
ax.text(0, 0, f'{int(score)}',
ha='center', va='center',
fontsize=36, fontweight='bold')
ax.text(0, 0.2, '倦怠风险评分',
ha='center', va='center',
fontsize=10)
level_text = {
'high': '高风险',
'medium': '中风险',
'low': '低风险',
'healthy': '健康'
}
ax.set_title(f'倦怠风险: {level_text[level]}')
Code collapsed
使用示例
完整工作流
code
# main.py
from git_analyzer import GitAnalyzer
from burnout_detector import BurnoutDetector
from visualizer import BurnoutReportVisualizer
def analyze_repository(repo_path: str, days: int = 90):
"""分析Git仓库并生成报告"""
print(f"分析仓库: {repo_path}")
print(f"分析周期: {days}天\n")
# 1. 初始化分析器
git_analyzer = GitAnalyzer(repo_path)
detector = BurnoutDetector(git_analyzer)
# 2. 执行分析
print("正在分析Git提交历史...")
analysis_result = detector.analyze(days=days)
# 3. 打印摘要
print("\n=== 分析摘要 ===")
summary = analysis_result['summary']
print(f"总提交数: {summary['total_commits']}")
print(f"倦怠风险等级: {summary['burnout_risk_level']}")
print(f"倦怠风险评分: {summary['burnout_score']}/100")
# 4. 打印风险因素
print("\n=== 风险因素 ===")
for factor in analysis_result['risk_factors']:
print(f"⚠️ {factor['factor']} ({factor['severity']})")
print(f" {factor['value']}")
# 5. 打印建议
print("\n=== 改进建议 ===")
for i, rec in enumerate(analysis_result['recommendations'], 1):
print(f"{i}. {rec}")
# 6. 生成可视化报告
print("\n正在生成可视化报告...")
visualizer = BurnoutReportVisualizer()
output_path = 'burnout_analysis_report.png'
visualizer.create_comprehensive_report(analysis_result, output_path)
print(f"报告已保存: {output_path}")
return analysis_result
# 使用示例
if __name__ == '__main__':
# 分析当前仓库
result = analyze_repository('.', days=90)
# 分析指定仓库
# result = analyze_repository('/path/to/your/repo', days=60)
Code collapsed
关键要点
- Git提交是工作状态的客观指标:不依赖主观报告
- 多维度综合评估:时间、频率、质量
- 早期预警很重要:在严重倦怠前干预
- 可视化帮助理解:图表直观展示风险
- 建议需可执行:提供具体改进方案
常见问题
准确性如何?
Git分析客观但有限:
- ✅ 准确反映工作时间
- ⚠️ 无法区分工作提交和个人项目
- ⚠️ 无法衡量工作难度
隐私考虑?
注意:
- 只分析自己的仓库
- 敏感信息(邮箱、IP)需脱敏
- 不适合强制分析团队
如何改善?
根据建议调整:
- 设定工作边界时间
- 保护周末休息
- 改善commit质量
- 定期休假调整
参考资料
- WHO职业倦怠指南
- 软件工程师心理健康研究
- GitPython文档
- 职业健康安全标准
发布日期:2026年3月8日 最后更新:2026年3月8日