Auto-expire stale upgrade jobs
This commit is contained in:
@@ -40,6 +40,8 @@ type UpgradeJobState = {
|
||||
finishedAt?: string;
|
||||
logs: string[];
|
||||
dryRun?: boolean;
|
||||
stale?: boolean;
|
||||
staleAt?: string;
|
||||
};
|
||||
|
||||
type RuntimeStatus = {
|
||||
@@ -53,6 +55,11 @@ type RuntimeStatus = {
|
||||
|
||||
const MAX_PACKAGE_BYTES = 300 * 1024 * 1024;
|
||||
const RUNNING_STATUSES = new Set<UpgradeStatus>(['queued', 'running', 'rolling_back']);
|
||||
const STALE_TIMEOUTS_MS: Record<string, number> = {
|
||||
queued: Number(process.env.UPGRADE_STALE_QUEUED_MS || 10 * 60 * 1000),
|
||||
running: Number(process.env.UPGRADE_STALE_RUNNING_MS || 2 * 60 * 60 * 1000),
|
||||
rolling_back: Number(process.env.UPGRADE_STALE_ROLLBACK_MS || 30 * 60 * 1000),
|
||||
};
|
||||
|
||||
export async function GET(request: NextRequest) {
|
||||
const authError = await requireAdmin(request);
|
||||
@@ -237,7 +244,7 @@ async function readStates(): Promise<UpgradeJobState[]> {
|
||||
try {
|
||||
const statePath = path.join(jobsRoot, jobName, 'state.json');
|
||||
const raw = await fs.readFile(statePath, 'utf8');
|
||||
return JSON.parse(raw) as UpgradeJobState;
|
||||
return await normalizeStaleState(JSON.parse(raw) as UpgradeJobState, statePath);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
@@ -249,6 +256,48 @@ async function readStates(): Promise<UpgradeJobState[]> {
|
||||
.sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime());
|
||||
}
|
||||
|
||||
async function normalizeStaleState(state: UpgradeJobState, statePath: string): Promise<UpgradeJobState> {
|
||||
if (!RUNNING_STATUSES.has(state.status)) return state;
|
||||
|
||||
const updatedAtMs = new Date(state.updatedAt || state.startedAt).getTime();
|
||||
if (!Number.isFinite(updatedAtMs)) return state;
|
||||
|
||||
const timeoutMs = STALE_TIMEOUTS_MS[state.status] || STALE_TIMEOUTS_MS.running;
|
||||
if (Date.now() - updatedAtMs < timeoutMs) return state;
|
||||
|
||||
const now = new Date().toISOString();
|
||||
const isRollback = state.status === 'rolling_back';
|
||||
const error = isRollback
|
||||
? `升级任务在回滚阶段超过 ${formatDuration(timeoutMs)} 没有状态更新,可能 runner 已退出或服务器曾重启,请人工检查备份与运行状态`
|
||||
: `升级任务超过 ${formatDuration(timeoutMs)} 没有状态更新,可能 runner 已退出或服务器曾重启,已自动解除升级锁`;
|
||||
const next: UpgradeJobState = {
|
||||
...state,
|
||||
status: isRollback ? 'rollback_failed' : 'failed',
|
||||
step: isRollback ? 'rollback_stale' : 'stale',
|
||||
progress: 100,
|
||||
message: isRollback ? '升级回滚长时间无更新,请人工检查' : '升级任务长时间无更新,已解除升级锁',
|
||||
error,
|
||||
stale: true,
|
||||
staleAt: now,
|
||||
finishedAt: now,
|
||||
updatedAt: now,
|
||||
logs: [
|
||||
...(state.logs || []),
|
||||
`[${now}] ${error}`,
|
||||
].slice(-1000),
|
||||
};
|
||||
|
||||
await fs.writeFile(statePath, `${JSON.stringify(next, null, 2)}\n`, { mode: 0o600 });
|
||||
return next;
|
||||
}
|
||||
|
||||
function formatDuration(ms: number): string {
|
||||
const minutes = Math.round(ms / 60000);
|
||||
if (minutes < 60) return `${minutes} 分钟`;
|
||||
const hours = Math.round(minutes / 60);
|
||||
return `${hours} 小时`;
|
||||
}
|
||||
|
||||
async function writeState(jobDir: string, state: UpgradeJobState): Promise<void> {
|
||||
await fs.mkdir(jobDir, { recursive: true, mode: 0o700 });
|
||||
await fs.writeFile(path.join(jobDir, 'state.json'), `${JSON.stringify(state, null, 2)}\n`, { mode: 0o600 });
|
||||
|
||||
@@ -61,6 +61,8 @@ type UpgradeJob = {
|
||||
finishedAt?: string;
|
||||
logs: string[];
|
||||
dryRun?: boolean;
|
||||
stale?: boolean;
|
||||
staleAt?: string;
|
||||
};
|
||||
|
||||
type UpgradeResponse = {
|
||||
@@ -492,6 +494,7 @@ function UpgradeStatusPanel({
|
||||
<StatusIcon status={job.status} />
|
||||
<Badge variant="secondary">{job.dryRun ? '预检' : job.mode === 'hot' ? '热更新' : '冷更新'}</Badge>
|
||||
{job.dryRun && <Badge className="bg-sky-500/15 text-sky-600 hover:bg-sky-500/15">未覆盖文件</Badge>}
|
||||
{job.stale && <Badge className="bg-amber-500/15 text-amber-600 hover:bg-amber-500/15">超时解锁</Badge>}
|
||||
<Badge className={statusBadgeClass(job.status)}>{statusLabel(job.status)}</Badge>
|
||||
</div>
|
||||
<div className="text-xs text-muted-foreground">{formatDate(job.updatedAt)}</div>
|
||||
@@ -511,6 +514,7 @@ function UpgradeStatusPanel({
|
||||
<InfoRow label="当前步骤" value={job.step} />
|
||||
<InfoRow label="文件数量" value={`${changedFiles.length} 个文件`} />
|
||||
<InfoRow label="需要重启" value={job.restartRequired ? '是' : '否'} />
|
||||
{job.staleAt && <InfoRow label="超时标记" value={formatDate(job.staleAt)} />}
|
||||
{job.backupFile && <InfoRow label="数据备份" value={job.backupFile} />}
|
||||
{job.backupHash && <InfoRow label="备份校验" value={job.backupHash} />}
|
||||
{job.sourceBackupFile && <InfoRow label="源码快照" value={job.sourceBackupFile} />}
|
||||
|
||||
Reference in New Issue
Block a user