Auto-expire stale upgrade jobs

This commit is contained in:
FengLee
2026-05-10 09:29:14 +08:00
parent e06fc806f1
commit a68c00ff93
2 changed files with 54 additions and 1 deletions

View File

@@ -40,6 +40,8 @@ type UpgradeJobState = {
finishedAt?: string;
logs: string[];
dryRun?: boolean;
stale?: boolean;
staleAt?: string;
};
type RuntimeStatus = {
@@ -53,6 +55,11 @@ type RuntimeStatus = {
const MAX_PACKAGE_BYTES = 300 * 1024 * 1024;
const RUNNING_STATUSES = new Set<UpgradeStatus>(['queued', 'running', 'rolling_back']);
const STALE_TIMEOUTS_MS: Record<string, number> = {
queued: Number(process.env.UPGRADE_STALE_QUEUED_MS || 10 * 60 * 1000),
running: Number(process.env.UPGRADE_STALE_RUNNING_MS || 2 * 60 * 60 * 1000),
rolling_back: Number(process.env.UPGRADE_STALE_ROLLBACK_MS || 30 * 60 * 1000),
};
export async function GET(request: NextRequest) {
const authError = await requireAdmin(request);
@@ -237,7 +244,7 @@ async function readStates(): Promise<UpgradeJobState[]> {
try {
const statePath = path.join(jobsRoot, jobName, 'state.json');
const raw = await fs.readFile(statePath, 'utf8');
return JSON.parse(raw) as UpgradeJobState;
return await normalizeStaleState(JSON.parse(raw) as UpgradeJobState, statePath);
} catch {
return null;
}
@@ -249,6 +256,48 @@ async function readStates(): Promise<UpgradeJobState[]> {
.sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime());
}
async function normalizeStaleState(state: UpgradeJobState, statePath: string): Promise<UpgradeJobState> {
if (!RUNNING_STATUSES.has(state.status)) return state;
const updatedAtMs = new Date(state.updatedAt || state.startedAt).getTime();
if (!Number.isFinite(updatedAtMs)) return state;
const timeoutMs = STALE_TIMEOUTS_MS[state.status] || STALE_TIMEOUTS_MS.running;
if (Date.now() - updatedAtMs < timeoutMs) return state;
const now = new Date().toISOString();
const isRollback = state.status === 'rolling_back';
const error = isRollback
? `升级任务在回滚阶段超过 ${formatDuration(timeoutMs)} 没有状态更新,可能 runner 已退出或服务器曾重启,请人工检查备份与运行状态`
: `升级任务超过 ${formatDuration(timeoutMs)} 没有状态更新,可能 runner 已退出或服务器曾重启,已自动解除升级锁`;
const next: UpgradeJobState = {
...state,
status: isRollback ? 'rollback_failed' : 'failed',
step: isRollback ? 'rollback_stale' : 'stale',
progress: 100,
message: isRollback ? '升级回滚长时间无更新,请人工检查' : '升级任务长时间无更新,已解除升级锁',
error,
stale: true,
staleAt: now,
finishedAt: now,
updatedAt: now,
logs: [
...(state.logs || []),
`[${now}] ${error}`,
].slice(-1000),
};
await fs.writeFile(statePath, `${JSON.stringify(next, null, 2)}\n`, { mode: 0o600 });
return next;
}
function formatDuration(ms: number): string {
const minutes = Math.round(ms / 60000);
if (minutes < 60) return `${minutes} 分钟`;
const hours = Math.round(minutes / 60);
return `${hours} 小时`;
}
async function writeState(jobDir: string, state: UpgradeJobState): Promise<void> {
await fs.mkdir(jobDir, { recursive: true, mode: 0o700 });
await fs.writeFile(path.join(jobDir, 'state.json'), `${JSON.stringify(state, null, 2)}\n`, { mode: 0o600 });

View File

@@ -61,6 +61,8 @@ type UpgradeJob = {
finishedAt?: string;
logs: string[];
dryRun?: boolean;
stale?: boolean;
staleAt?: string;
};
type UpgradeResponse = {
@@ -492,6 +494,7 @@ function UpgradeStatusPanel({
<StatusIcon status={job.status} />
<Badge variant="secondary">{job.dryRun ? '预检' : job.mode === 'hot' ? '热更新' : '冷更新'}</Badge>
{job.dryRun && <Badge className="bg-sky-500/15 text-sky-600 hover:bg-sky-500/15"></Badge>}
{job.stale && <Badge className="bg-amber-500/15 text-amber-600 hover:bg-amber-500/15"></Badge>}
<Badge className={statusBadgeClass(job.status)}>{statusLabel(job.status)}</Badge>
</div>
<div className="text-xs text-muted-foreground">{formatDate(job.updatedAt)}</div>
@@ -511,6 +514,7 @@ function UpgradeStatusPanel({
<InfoRow label="当前步骤" value={job.step} />
<InfoRow label="文件数量" value={`${changedFiles.length} 个文件`} />
<InfoRow label="需要重启" value={job.restartRequired ? '是' : '否'} />
{job.staleAt && <InfoRow label="超时标记" value={formatDate(job.staleAt)} />}
{job.backupFile && <InfoRow label="数据备份" value={job.backupFile} />}
{job.backupHash && <InfoRow label="备份校验" value={job.backupHash} />}
{job.sourceBackupFile && <InfoRow label="源码快照" value={job.sourceBackupFile} />}