chore: 过滤可视化

2026-06-14 03:01:09 +08:00 · 2026-05-27 22:29:44 +08:00 · 2026-05-27 22:29:44 +08:00 · 32227d2c44
commit 32227d2c44
parent 4b63de743a
6 changed files with 627 additions and 70 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,3 +11,4 @@ app/model/*
 !app/model/.gitkeep
 map_images
 data/result.json
 data/result.filtered.json
--- a/data/src/auto.ts
+++ b/data/src/auto.ts
@ -1,6 +1,6 @@
 import { writeFile } from 'fs/promises';
 import { autoLabelTowers } from './auto/auto';
-import { IAutoLabelConfig, TowerColor } from './auto/types';
+import { IAutoLabelConfig, IConvertedMapInfo, TowerColor } from './auto/types';
 import { GinkaDataset, GinkaTrainData } from './types';
 import { normalizeHeatmap } from './auto/heatmap';
@ -321,63 +321,93 @@ const labelConfig: IAutoLabelConfig = {
    }
 };
 function buildTrainData(
    floor: IConvertedMapInfo,
    filterReasons?: string[]
 ): GinkaTrainData {
    const width = floor.data.map[0].length;
    const height = floor.data.map.length;
    const info = floor.info;
    const data: GinkaTrainData = {
        map: floor.data.map,
        size: [width, height],
        // heatmap: [
        //     normalizeHeatmap(info.wallHeatmap),
        //     normalizeHeatmap(info.enemyHeatmap),
        //     normalizeHeatmap(info.resourceHeatmap),
        //     normalizeHeatmap(info.potionHeatmap),
        //     normalizeHeatmap(info.gemHeatmap),
        //     normalizeHeatmap(info.keyHeatmap),
        //     normalizeHeatmap(info.itemHeatmap),
        //     normalizeHeatmap(info.entryHeatmap),
        //     normalizeHeatmap(info.doorHeatmap)
        // ],
        val: [
            info.globalDensity,
            info.wallDensity,
            info.doorDensity,
            info.enemyDensity,
            info.resourceDensity,
            info.gemDensity,
            info.potionDensity,
            info.keyDensity,
            info.itemDensity,
            info.entryCount / width / height,
            0,
            0,
            0,
            0,
            0,
            0
        ],
        symmetry: [
            info.symmetryH ? 1 : 0,
            info.symmetryV ? 1 : 0,
            info.symmetryC ? 1 : 0
        ],
        outerWall: info.outerWall ? 1 : 0,
        roomCount: info.roomCount,
        highDegBranchCount: info.highDegBranchCount
    };
    if (filterReasons && filterReasons.length > 0) {
        data.filterReasons = filterReasons;
    }
    return data;
 }
 function buildFilteredOutputPath(output: string) {
    if (/\.json$/i.test(output)) {
        return output.replace(/\.json$/i, '.filtered.json');
    }
    return `${output}.filtered.json`;
 }
 (async () => {
    const filteredOutput = buildFilteredOutputPath(output);
    const result = await autoLabelTowers(towerInfo, folders, labelConfig);
    // 转换格式并写入文件
    const dataset: GinkaDataset = {
        datasetId: Math.floor(Math.random() * 1e12),
        data: {}
    };
-    result.forEach(tower => {
+    const filteredDataset: GinkaDataset = {
        datasetId: Math.floor(Math.random() * 1e12),
        data: {}
    };
    result.accepted.forEach(tower => {
        tower.maps.forEach(floor => {
            const id = `${tower.tower.name}::${floor.mapId}`;
-            const width = floor.data.map[0].length;
+            dataset.data[id] = buildTrainData(floor);
            const height = floor.data.map.length;
            const info = floor.info;
            const data: GinkaTrainData = {
                map: floor.data.map,
                size: [width, height],
                // heatmap: [
                //     normalizeHeatmap(info.wallHeatmap),
                //     normalizeHeatmap(info.enemyHeatmap),
                //     normalizeHeatmap(info.resourceHeatmap),
                //     normalizeHeatmap(info.potionHeatmap),
                //     normalizeHeatmap(info.gemHeatmap),
                //     normalizeHeatmap(info.keyHeatmap),
                //     normalizeHeatmap(info.itemHeatmap),
                //     normalizeHeatmap(info.entryHeatmap),
                //     normalizeHeatmap(info.doorHeatmap)
                // ],
                val: [
                    info.globalDensity,
                    info.wallDensity,
                    info.doorDensity,
                    info.enemyDensity,
                    info.resourceDensity,
                    info.gemDensity,
                    info.potionDensity,
                    info.keyDensity,
                    info.itemDensity,
                    info.entryCount / width / height,
                    0,
                    0,
                    0,
                    0,
                    0,
                    0
                ],
                symmetry: [
                    info.symmetryH ? 1 : 0,
                    info.symmetryV ? 1 : 0,
                    info.symmetryC ? 1 : 0
                ],
                outerWall: info.outerWall ? 1 : 0,
                roomCount: info.roomCount,
                highDegBranchCount: info.highDegBranchCount
            };
            dataset.data[id] = data;
        });
    });
-    await writeFile(output, JSON.stringify(dataset), 'utf-8');
+    result.filtered.forEach(floor => {
        const id = `${floor.tower.name}::${floor.mapId}`;
        filteredDataset.data[id] = buildTrainData(floor, floor.filterReasons);
    });
    await Promise.all([
        writeFile(output, JSON.stringify(dataset), 'utf-8'),
        writeFile(filteredOutput, JSON.stringify(filteredDataset), 'utf-8')
    ]);
    console.log(`结果已写入 ${output}`);
    console.log(`过滤复核数据集已写入 ${filteredOutput}`);
 })();
--- a/data/src/auto/auto.ts
+++ b/data/src/auto/auto.ts
@ -13,10 +13,67 @@ export interface ILabelResult {
    readonly maps: IConvertedMapInfo[];
 }
 export interface IFilteredMapInfo extends IConvertedMapInfo {
    /** 命中的复核过滤原因 */
    readonly filterReasons: string[];
 }
 export interface IAutoLabelResult {
    /** 通过过滤的楼层 */
    readonly accepted: ILabelResult[];
    /** 命中复核规则的楼层 */
    readonly filtered: IFilteredMapInfo[];
 }
 function addIssuePrefix(maxLength: number, path: string, content: string) {
    return `${path}: ${' '.repeat(maxLength - path.length)}${content}`;
 }
 function collectReviewFilterReasons(
    floorInfo: IConvertedMapInfo['info'],
    config: IAutoLabelConfig
 ) {
    const reasons: string[] = [];
    if (!config.allowLargeDoorCluster && floorInfo.hasLargeDoorCluster) {
        reasons.push(`large_door_cluster(max=${floorInfo.maxDoorClusterSize})`);
    }
    if (!config.allowLargeEnemyCluster && floorInfo.hasLargeEnemyCluster) {
        reasons.push(
            `large_enemy_cluster(max=${floorInfo.maxEnemyClusterSize})`
        );
    }
    if (!config.allowIdleBranch && floorInfo.idleDoorBranchCount > 0) {
        reasons.push(
            `idle_door_branch(count=${floorInfo.idleDoorBranchCount})`
        );
    }
    if (
        !config.allowRepeatedGuardIdleBranch &&
        floorInfo.repeatedGuardDoorBranchCount > 0
    ) {
        reasons.push(
            `repeated_guard_door_branch(count=${floorInfo.repeatedGuardDoorBranchCount})`
        );
    }
    if (!config.allowIdleBranch && floorInfo.idleEnemyBranchCount > 0) {
        reasons.push(
            `idle_enemy_branch(count=${floorInfo.idleEnemyBranchCount})`
        );
    }
    if (
        !config.allowRepeatedGuardIdleBranch &&
        floorInfo.repeatedGuardEnemyBranchCount > 0
    ) {
        reasons.push(
            `repeated_guard_enemy_branch(count=${floorInfo.repeatedGuardEnemyBranchCount})`
        );
    }
    if (!config.allowUselessBranch && floorInfo.hasUselessBranch) {
        reasons.push('useless_branch');
    }
    return reasons;
 }
 /**
 * 自动标注塔地图
 * @param towerInfo 所有塔的信息路径，文件包括颜色、标签等
@ -27,8 +84,9 @@ export async function autoLabelTowers(
    towerInfo: string,
    pathList: string[],
    config: IAutoLabelConfig
-) {
+): Promise<IAutoLabelResult> {
    const labelResult: ILabelResult[] = [];
    const filteredMaps: IFilteredMapInfo[] = [];
    // 统计被不同规则过滤掉的楼层
    let ignoredFloorsSize = 0;
@ -132,6 +190,10 @@ export async function autoLabelTowers(
                mapId: name,
                info: floorInfo
            };
            const reviewFilterReasons = collectReviewFilterReasons(
                floorInfo,
                config
            );
            // 配置过滤楼层
            if (floorInfo.maxEmptyArea > config.maxEmptyArea) {
                ignoredMaxEmptyArea++;
@ -189,6 +251,10 @@ export async function autoLabelTowers(
            }
            if (filteredByLargeDoorCluster || filteredByLargeEnemyCluster) {
                ignoredFloorsContinuous++;
                filteredMaps.push({
                    ...floorData,
                    filterReasons: reviewFilterReasons
                });
                continue;
            }
            const filteredByIdleDoorBranch =
@ -216,10 +282,18 @@ export async function autoLabelTowers(
                    floorInfo.hasRepeatedGuardIdleBranch)
            ) {
                ignoredFloorsIdle++;
                filteredMaps.push({
                    ...floorData,
                    filterReasons: reviewFilterReasons
                });
                continue;
            }
            if (!config.allowUselessBranch && floorInfo.hasUselessBranch) {
                ignoredFloorsUseless++;
                filteredMaps.push({
                    ...floorData,
                    filterReasons: reviewFilterReasons
                });
                continue;
            }
            // 自定义过滤楼层
@ -275,6 +349,10 @@ export async function autoLabelTowers(
    console.log(`闲置节点过滤：${ignoredFloorsIdle} 层`);
    console.log(`无用节点过滤：${ignoredFloorsUseless} 层`);
    console.log(`自定义过滤：${ignoredFloorsCustom} 层`);
    console.log(`复核数据集：${filteredMaps.length} 层`);
-    return labelResult;
+    return {
        accepted: labelResult,
        filtered: filteredMaps
    };
 }
--- a/data/src/types.ts
+++ b/data/src/types.ts
@ -57,6 +57,8 @@ export interface GinkaTrainData {
    roomCount: number;
    /** 高连接度分支节点数量原始值 */
    highDegBranchCount: number;
    /** 被过滤样本的原因标签，供人工复核使用 */
    filterReasons?: string[];
 }
 export interface GinkaDataset {
--- a/docs/special-door-clean-design.md
+++ b/docs/special-door-clean-design.md
@ -0,0 +1,359 @@
 # 机关门语义与数据集清洗修正文档
 ## 背景
 当前数据集清洗已经能处理连续门团、连续怪团、无用分支和一部分闲置分支，但还有一类常见且重要的结构没有被正确表达：**机关门驱动的战斗房间**。
 这类结构的共同点是：
 1. 地图中存在机关门。
 2. 门前或门附近会摆放一组怪物。
 3. 这些怪物在局部上未必直接守护资源，也未必在拓扑上形成“打掉以后暴露新区域”的普通分支。
 4. 但它们仍然有明确设计意义，因为它们承担了“开机关门”的推进语义。
 一个典型例子可以概括为：上方是一扇机关门，下方是一个小房间，房间里放若干怪物和空地。若只按当前“邻居数”或“后侧资源收益”来判断，这些怪物很容易被视为闲置怪；但从实际玩法上看，它们显然不是噪声结构。
 因此，这个问题本质上不是“过滤阈值太松或太紧”，而是**当前标签空间里没有把机关门作为独立语义类保留下来**，导致后续清洗算法看不到这层结构信息。
 ---
 ## 问题概述
 ### 当前异常现象
 在当前规则下，存在一批地图会因为“闲置怪”而被过滤，但人工复核时会发现这些怪物实际上属于机关门房间的一部分。它们看起来像是：
 1. 站在一片空地边缘。
 2. 不直接守护资源。
 3. 拓扑上可能只连接到一个邻接节点。
 4. 但所在区域同时存在机关门，击败这些怪物本身就是推进条件的一部分。
 也就是说，这些怪物虽然满足了“局部无影响分支”的几何特征，却**不满足“没有设计意义”这一真正的清洗目标**。
 ### 误判根因
 这个问题目前至少有三层原因：
 1. **标签层压扁**：机关门在原始塔数据中可被识别，但在当前主配置里没有变成独立 tile，而是与普通门共用同一标签。
 2. **拓扑层压扁**：拓扑图只区分 `Door` 和 `Enemy` 两种分支，不区分普通门和机关门。
 3. **过滤层失语义**：闲置怪、重复守卫、无用分支等规则只能看到“门/怪/空地/资源”的局部拓扑，看不到“这个怪物与机关门联动”的语义。
 只要这三层没有拆开，即使继续微调 `allowIdleBranch` 一类阈值，也很难稳定解决问题。
 ---
 ## 当前实现现状
 ### 转换层已经能识别机关门，但没有保留独立标签
 当前实现中，原始塔数据里的 `specialDoor` 已经能在转换阶段被识别出来；这一点说明问题不是“完全不知道机关门存在”，而是**识别到了，但在标签化输出时被折叠掉了**。
 现状可以概括为：
 1. 转换阶段会把原始 `specialDoor` 映射到 `tiles.specialDoors[0]`。
 2. 但当前主配置里，`specialDoors` 与 `commonDoors` 实际共用了同一个标签值。
 3. 共享常量里，`specialDoorTiles` 也与普通门使用同一个集合值。
 这带来两个直接后果：
 1. 训练数据里的地图矩阵无法区分“普通门”和“机关门”。
 2. 当前的 `specialDoorCount` 统计并不是真正意义上的“机关门数量”，而只是复用了普通门的标签统计，观测值没有独立语义。
 换句话说，当前代码里虽然存在 `specialDoors` 这一字段，但它还没有真正成为**独立的数据集类别**。
 ### 拓扑层只有 Door/Enemy 两类分支
 当前拓扑图中的分支类型只有两种：
 1. `Door`
 2. `Enemy`
 这意味着：
 1. 连续门检测会把普通门和机关门都看成同一类门分支。
 2. 闲置门检测也无法区分普通门和机关门。
 3. 更重要的是，怪物侧根本无法知道“我所在的这个房间是否与机关门有关”。
 对于本问题来说，真正缺失的并不是“门是不是门”，而是“这个门是不是机关门”。
 ### 当前闲置筛选为什么会误杀
 现有闲置规则里，最敏感的是这两类：
 1. `neighbors.size === 1` 的闲置分支规则。
 2. 基于后侧收益缺失的无用分支规则。
 它们的问题并不是逻辑错误，而是默认假设了：
 1. 分支的价值主要来自局部守护资源。
 2. 分支的价值主要来自是否暴露新区域。
 但机关门关联怪的价值来自第三种来源：**全局或半全局的开门触发语义**。如果标签空间里没有 `Special Door`，这类语义就无法进入筛选逻辑，于是就会被误判成“看起来没用的怪”。
 ---
 ## 目标
 本轮文档希望固定以下目标：
 1. 在数据集标签层中，为机关门新增一个独立 tile 类别 `Special Door`。
 2. 在拓扑和清洗阶段，能够识别“机关门关联怪”，避免其被简单当作闲置怪过滤。
 3. 在不大幅重写现有拓扑框架的前提下，尽量复用已有的区域合并、邻接和入口连通逻辑。
 4. 让过滤结果继续保持可解释，至少能回答“为什么这个怪没有再被判成闲置怪”。
 5. 第一版实现优先保守，宁可少杀一些，也不要把常见机关门房间结构大量误删。
 ---
 ## 非目标与边界
 本轮文档暂时不做以下承诺：
 1. **不要求第一版就精确恢复脚本级联动关系**。也就是说，暂不要求从原始事件脚本中严格求出“哪一只怪控制哪一扇机关门”。
 2. **不要求立即重写所有门/怪规则**。本次重点是修正与机关门相关的误杀，而不是重构整套清洗体系。
 3. **不要求在本轮文档里完成模型改造细节**。但需要明确指出：新增 tile 会影响训练数据词表和 mask token 编号。
 4. **不改变机关门在大类上的门属性**。例如门密度、门类分支连通块等统计，默认仍然把机关门视为门的一种。
 ---
 ## 设计一：在标签层新增 `Special Door`
 ### 设计原则
 这里需要强调一个重要约束：当前训练数据里 `MASK_ID = 6`，因此**不能直接把新语义无脑塞到现有编号上而不处理 mask token**。
 第一版比较稳妥的编号方案是：
 | 语义         | 当前主要编号 | 建议编号 |
 | ------------ | ------------ | -------- |
 | Empty        | 0            | 0        |
 | Wall         | 1            | 1        |
 | Common Door  | 2            | 2        |
 | Resource     | 3            | 3        |
 | Enemy        | 4            | 4        |
 | Entry        | 5            | 5        |
 | Special Door | 无独立编号   | 6        |
 | Mask         | 6            | 7        |
 这个方案的优点是：
 1. 现有 0 到 5 的主语义尽量不动。
 2. 只为 `Special Door` 新增一个实际 tile。
 3. `MASK_ID` 顺延到 7，语义清晰，不与真实地图 tile 混用。
 ### 对数据预处理侧的影响
 新增独立标签后，数据预处理侧至少需要满足：
 1. `commonDoorTiles` 与 `specialDoorTiles` 不再共享同一标签值。
 2. `doorTiles` 仍然是二者并集，用于总门密度等大类统计。
 3. `specialDoorCount` 才能真正表示机关门数量，而不是“普通门标签的重复统计”。
 4. 主配置中的 `specialDoors` 需要改成独立编号，而不是继续与普通门共用标签。
 ### 对训练侧的影响
 虽然本轮不直接修改训练代码，但文档必须明确：新增 `Special Door` 之后，训练侧至少要同步处理以下问题：
 1. 数据集的 tile 词表大小会加 1。
 2. `MASK_ID` 需要后移。
 3. Stage 1 / Stage 2 / Stage 3 的退化逻辑需要把 `Special Door` 视作门类的一部分处理。
 4. 统计目标里的门密度若仍按“大类门”处理，则需要把普通门和机关门一起计入。
 也就是说，`Special Door` 虽然是一个新 tile，但它在训练目标上仍然应属于“门超类”中的细分子类，而不是完全独立的结构域。
 ---
 ## 设计二：在拓扑层保留机关门子语义
 ### 为什么不能只改 tile，不改拓扑
 如果只是把地图矩阵里的机关门编号拆出来，但拓扑层仍然把它和普通门都压成同一个 `Door` 分支，那么清洗算法依然无法利用这条信息。
 因此，除了标签层扩展以外，拓扑层也需要补上“门子类型”这一维语义。
 ### 建议的数据结构方向
 当前更合适的方向，不是把 `BranchType` 直接从两类扩成三类，而是保留现有大类，再补一个门子类型字段。原因是：
 1. 现有很多逻辑只关心“门 vs 怪”，并不希望被额外分支复杂化。
 2. 连续门团、门密度等统计，仍然需要把普通门和机关门归到门大类里。
 3. 但闲置修正又确实需要知道“这个门是不是机关门”。
 因此更推荐的方向是：
 1. `branch` 仍然保持 `Door | Enemy`。
 2. 对门分支额外补一个 `doorKind = Common | Special`。
 这样可以同时满足：
 1. 老规则不需要大改。
 2. 新规则在需要的时候可以单独读取机关门语义。
 ### 机关门在拓扑统计中的默认归属
 当前版本建议固定以下口径：
 1. **大类统计**：机关门计入门密度、门连通块、门热力图。
 2. **子类统计**：另外单独记录机关门数量，必要时可追加机关门密度。
 3. **异常清洗**：仅在与机关门关联怪的识别相关时，读取 `doorKind = Special` 这一子语义。
 这意味着：新增 `Special Door` 不是为了把机关门彻底从“门”里面剥出去，而是为了给清洗逻辑一个足够可靠的判别信号。
 ---
 ## 设计三：为“机关门关联怪”增加保守豁免
 ### 问题本质
 需要修正的并不是“所有机关门附近的怪都要保留”，而是：
 > 如果某只怪所在的局部区域明显属于机关门房间的一部分，那么它不能仅仅因为 `neighbors.size === 1` 或“背后无资源”就被直接视为闲置怪或无用怪。
 换句话说，当前需要补的是一条**保守豁免规则**，而不是另一条更强的删除规则。
 ### 为什么第一版不建议直接解析脚本联动
 理论上，最精确的做法是从原始塔事件里直接解析：
 1. 哪扇机关门存在。
 2. 哪些怪物被击败后会触发开门。
 3. 一只怪是否同时参与多个门的触发。
 但这个方案的工程代价很高，而且不同塔版本、脚本写法、自定义事件逻辑都可能造成兼容成本。因此，第一版不建议把问题拉到脚本层求精确真值，而是先使用当前拓扑图已经具备的信息，做一个偏保守的结构性修正。
 ### 建议主规则：基于“机关门关联区域”的豁免
 当前版本更合适的主规则是：
 1. 先沿用现有的非分支区域合并逻辑，把 `Empty` 与 `Resource` 节点合并成更大的可进入区域。
 2. 若某个合并区域与至少一个 `Special Door` 分支相邻，则把这个区域记为“机关门关联区域”。
 3. 若某个怪物分支与某个机关门关联区域相邻，则把该怪记为“机关门关联怪候选”。
 4. 对机关门关联怪候选，不再仅凭 `neighbors.size === 1` 就直接计入 `idleEnemyBranchCount`。
 5. 对机关门关联怪候选，也不再仅凭“局部无资源收益”就直接计入无用分支命中。
 这条规则的核心思想是：
 1. 当前并不试图证明“这只怪一定会开门”。
 2. 当前只要求证明“这只怪所在区域与机关门强相关”。
 3. 一旦存在这种强相关，第一版就保守地不把它当作典型闲置怪删除。
 ### 为什么这个规则适合作为第一版
 这条规则有几个优点：
 1. 它只依赖现有拓扑图与门子类型，不依赖脚本解析。
 2. 它可以复用当前重复守卫规则里已经存在的“合并非分支区域”思路。
 3. 它解释性很强，可以直接回答“这只怪因为与机关门处在同一关联区域，所以没有被判成闲置怪”。
 4. 它天然偏保守，符合当前“宁可少杀，不要误杀重要结构”的工程目标。
 ### 已知代价
 这条豁免规则也有明确代价：
 1. 它可能会保留少量“恰好和机关门在同一区域、但其实也不太重要”的怪物。
 2. 它并不能恢复真正的脚本级触发关系，只是利用局部结构做代理。
 但当前版本更需要避免的是**高频、显眼、结构性明确的误杀**。与之相比，这种偏保守的漏删代价是可以接受的。
 ---
 ## 对现有筛选规则的具体影响
 ### 规则一：`idleEnemyBranchCount`
 这是当前最需要优先修正的规则。
 建议口径：
 1. 怪物若命中 `neighbors.size === 1`，先作为普通闲置怪候选。
 2. 再检查它是否属于机关门关联怪。
 3. 若是，则不计入 `idleEnemyBranchCount`。
 4. 若不是，则维持原判。
 ### 规则二：`hasUselessBranch`
 这条规则也可能误伤机关门关联怪，因为这类怪物往往不守局部资源。
 建议口径：
 1. 对普通分支，仍按现有“删除该分支后，后侧是否失去入口连通且没有资源收益”的规则处理。
 2. 对机关门关联怪，第一版默认不参与这条规则的硬过滤。
 原因是：机关门关联怪的价值本来就未必体现为“守护后侧资源”，继续套用这条规则容易再次把它们误杀。
 ### 规则三：`hasRepeatedGuardIdleBranch`
 这条规则的风险相对次要，但仍需要关注。因为一个机关门房间里可能存在多个同类怪，它们可能被错误理解成“重复守同一连通区域”。
 当前建议是：
 1. 第一版先增加观测统计，不急着强行改判。
 2. 如果抽样发现机关门房间经常命中这条规则，再补“机关门关联怪不参与 repeated guard 统计”这一豁免。
 也就是说，这条规则暂时列为**重点观察项**，不一定与 `idleEnemyBranchCount` 同步收紧。
 ### 规则四：连续门团 / 连续怪团
 这两条规则当前不建议因机关门而修改。
 原因是：
 1. 机关门仍然是门的一种，继续计入门类连通块是合理的。
 2. 机关门关联怪即使有特殊语义，也不意味着它们应当不受连续怪团规则约束。
 换句话说，当前修的是“局部价值误判”，不是“所有机关门怪都特殊”。
 ---
 ## 统计与可观测性要求
 为了让后续人工复核和规则调整有抓手，新增机关门语义后，至少应补以下统计：
 1. `specialDoorCount`：真正意义上的机关门数量。
 2. `specialDoorFloorCount`：包含机关门的楼层数量。
 3. `specialDoorLinkedEnemyCount`：被识别为机关门关联怪的数量。
 4. `ignoredIdleEnemyBySpecialDoorCount`：原本会命中闲置怪，但因机关门关联而被豁免的数量。
 5. `ignoredUselessBranchBySpecialDoorCount`：原本会命中无用分支，但因机关门关联而被豁免的数量。
 当前 filtered 数据集与图片标注体系也应支持这一点，至少能在抽样时回答：
 1. 某张图为什么被保留。
 2. 某张图为什么仍被过滤。
 3. 如果某个闲置怪没有被过滤，是不是因为它属于机关门关联怪。
 ---
 ## 验收方向
 本问题后续落地时，至少应满足以下验收方向：
 1. 数据集标签中，机关门拥有独立 tile 编号，不再与普通门共用同一个语义值。
 2. `specialDoorCount` 能真正反映机关门数量，而不是普通门的重复统计。
 3. 典型机关门房间中的怪物，不会再仅因 `neighbors.size === 1` 被直接判成闲置怪。
 4. 对包含机关门的地图进行抽样复核时，能明显看到误杀减少。
 5. 连续门团、连续怪团等其他清洗规则在大方向上保持不变，不因为这次修正而整体失真。
 ---
 ## 当前建议的实施顺序
 从工程风险看，当前更稳妥的顺序是：
 1. 先把 `Special Door` 从标签层独立出来。
 2. 再在拓扑层补上门子类型。
 3. 优先修正 `idleEnemyBranchCount` 的误判。
 4. 然后观察 `hasUselessBranch` 和 `hasRepeatedGuardIdleBranch` 的命中样本，决定是否追加豁免。
 这个顺序的原因是：如果连独立 tile 都没有，后面的筛选修正只能停留在模糊的局部几何启发式上，无法稳定描述“机关门关联怪”这一目标对象。
 ---
 ## 本文档暂不回答的问题
 以下问题在当前版本里先不定稿，留待后续实现与抽样验证后再讨论：
 1. 是否有必要进一步解析原始脚本，恢复精确的“怪 -> 机关门”触发关系。
 2. 机关门关联怪是否应该影响资源密度、怪物密度等统计解释方式。
 3. 是否需要为“机关门房间”本身增加一个独立结构标签，供后续训练条件使用。
 4. 机关门是否需要从普通门的连续门团统计中拆出去单独观测。
 当前文档只固定一件事：**机关门必须先成为独立语义类，闲置筛选才有可能在这个问题上做对。**
--- a/shared/visual.py
+++ b/shared/visual.py
@ -1,3 +1,4 @@
 import argparse
 import os
 import cv2
 import json
@ -24,25 +25,68 @@ def load_tiles(tile_folder):
 # -------------------------
 # 主处理逻辑
 # -------------------------
-def convert_dataset_to_images(
+def normalize_filter_reasons(train_data):
-    json_path,
+    reasons = train_data.get("filterReasons", [])
-    tile_folder,
+    if isinstance(reasons, str):
-    output_folder,
+        return [reasons]
-    tile_size=32
+    if not isinstance(reasons, list):
-):
+        return []
-    # 输出路径
+    return [str(reason) for reason in reasons if reason]
 def draw_filter_reasons(img, reasons):
    if not reasons:
        return img
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.45
    thickness = 1
    padding = 8
    line_gap = 6
    text_sizes = [cv2.getTextSize(text, font, font_scale, thickness)[0] for text in reasons]
    line_height = max(height for _, height in text_sizes)
    box_width = min(max(width for width, _ in text_sizes) + padding * 2, img.shape[1])
    box_height = min(
        padding * 2 + len(reasons) * line_height + max(0, len(reasons) - 1) * line_gap,
        img.shape[0]
    )
    overlay = img.copy()
    cv2.rectangle(overlay, (0, 0), (box_width, box_height), (0, 0, 0), -1)
    cv2.addWeighted(overlay, 0.72, img, 0.28, 0, img)
    y = padding + line_height
    for text in reasons:
        cv2.putText(
            img,
            text,
            (padding, y),
            font,
            font_scale,
            (255, 255, 255),
            thickness,
            cv2.LINE_AA
        )
        y += line_height + line_gap
    return img
 def render_dataset_images(json_path, tile_dict, output_folder, tile_size=32):
    if not json_path:
        return
    if not os.path.exists(json_path):
        print(f"[WARN] 数据集 {json_path} 不存在，跳过")
        return
    os.makedirs(output_folder, exist_ok=True)
    # 加载 tiles
    tile_dict = load_tiles(tile_folder)
    # 读取 json
    with open(json_path, "r", encoding="utf-8") as f:
        dataset = json.load(f)
-    data = dataset["data"]
+    data = dataset.get("data", {})
-    for map_id, train_data in tqdm(data.items()):
+    for map_id, train_data in tqdm(data.items(), desc=os.path.basename(json_path)):
        map_matrix = np.array(train_data["map"])
        try:
@ -51,19 +95,62 @@ def convert_dataset_to_images(
            print(f"[ERROR] 地图 {map_id} 转换失败: {e}")
            continue
        reasons = normalize_filter_reasons(train_data)
        if reasons:
            img = draw_filter_reasons(img, reasons)
        out_path = os.path.join(output_folder, f"{map_id.replace('::', '-')}.png")
        cv2.imwrite(out_path, img)
-    print('地图处理完毕！')
+    print(f"{json_path} 地图处理完毕！")
 def convert_dataset_to_images(
    json_path,
    tile_folder,
    output_folder,
    tile_size=32,
    filtered_json_path=None
 ):
    # 加载 tiles
    tile_dict = load_tiles(tile_folder)
    render_dataset_images(json_path, tile_dict, output_folder, tile_size)
    if filtered_json_path:
        filtered_output_folder = os.path.join(output_folder, "filtered")
        render_dataset_images(
            filtered_json_path,
            tile_dict,
            filtered_output_folder,
            tile_size
        )
 def parse_args():
    parser = argparse.ArgumentParser(description="Convert dataset maps to preview images")
    parser.add_argument("--json-path", default="data/result.json")
    parser.add_argument("--tile-folder", default="tiles")
    parser.add_argument("--output-folder", default="map_images")
    parser.add_argument("--tile-size", type=int, default=32)
    parser.add_argument("--filtered-json-path", default="data/result.filtered.json")
    return parser.parse_args()
 # -------------------------
 # 执行
 # -------------------------
 if __name__ == "__main__":
    args = parse_args()
    filtered_json_path = args.filtered_json_path
    default_filtered_path = os.path.join("data", "result.filtered.json")
    if filtered_json_path is None and os.path.exists(default_filtered_path):
        filtered_json_path = default_filtered_path
    convert_dataset_to_images(
-        json_path="data/result.json",     # 数据集文件
+        json_path=args.json_path,
-        tile_folder="tiles",          # 贴图文件夹
+        tile_folder=args.tile_folder,
-        output_folder="map_images",  # 输出文件夹
+        output_folder=args.output_folder,
-        tile_size=32                  # tile 尺寸
+        tile_size=args.tile_size,
        filtered_json_path=filtered_json_path
    )