日期: 2025-11-22 类型: 架构重构 状态: 已完成
get_frame_state(frame1, frame2)两个模块都提供相同的接口模式:
get_frame_xxx(frame1, frame2) - 返回字典(状态或动作)sum_reward(xxx_dict) - 计算奖励####GameState 模块
def get_frame_state(self, frame1, frame2) -> dict:
# 返回: {'in_battle': True, 'dead': False, 'alive': True, 'kill': False, 'assist': False}
def sum_reward(self, state_dict: dict) -> float:
# 累加所有 True 状态的得分
def get_frame_action(self, frame1, frame2) -> Dict[str, List[float]]:
# 返回: {'move': [0,0,0,1], 'attack': [0]*10}
def sum_reward(self, action_dict: Dict[str, List[float]]) -> float:
# 计算点积:action_values * weights
enabled_states: 启用的状态列表
['in_battle', 'dead', 'alive', 'kill', 'assist']enabled_actions: 启用的动作列表
['move', 'attack']def label(filepath, compute_actions=True, compute_states=True, overwrite=False):
for i in range(num_frames):
img = grp['image'][()]
# 单独处理 kill/assist 累计计数
prev_kill_count = ...
curr_kill_count = compat.detect_kill_count(frame=img)
def label(filepath, overwrite=False):
for i in range(num_frames):
img = grp['image'][()]
next_img = f[frame_list[i + 1]]['image'][()] if i < num_frames - 1 else img
# 统一传两帧
state_dict = self.game_state.get_frame_state(img, next_img)
action_dict = self.action.get_frame_action(img, next_img)
# 保存到 HDF5
for key, value in state_dict.items():
grp.create_dataset(f'state_{key}', data=value)
for key, values in action_dict.items():
grp.create_dataset(f'action_{key}', data=values)
def load(filepath, max_frames=None):
# 调用 compat 进行状态检测
if not self.compat.is_in_battle(state):
continue
if self.compat.is_dead(state):
continue
# 读取预计算的 reward
reward = frames[i].get('reward', 0.0)
reward_extra = frames[i].get('reward_extra', 0.0)
def load(filepath, max_frames=None):
# 从 HDF5 读取状态进行筛选
in_battle = frame.get('state_in_battle', False)
is_dead = frame.get('state_dead', False)
if not in_battle or is_dead:
skipped += 1
continue
# 读取状态并计算奖励
reward = 0.0
if self.game_state:
state_dict = {...} # 从 HDF5 读取 state_*
reward += self.game_state.sum_reward(state_dict)
if self.action:
action_dict = {...} # 从 HDF5 读取 action_*
reward += self.action.sum_reward(action_dict)
python scripts/label_data.py --file xxx.hdf5 --actions --states
python scripts/label_data.py --file xxx.hdf5 --config config.yaml
# enabled_states 和 enabled_actions 从配置文件读取
frame_000000/
├── image (array)
├── timestamp (float)
├── reward (float) # 预计算的奖励
├── reward_extra (float)
├── move (array) # 直接的动作字段
└── attack (array)
frame_000000/
├── image (array)
├── timestamp (float)
├── state_in_battle (bool) # 状态字段
├── state_dead (bool)
├── state_alive (bool)
├── state_kill (bool)
├── state_assist (bool)
├── action_move (array) # 动作字段
└── action_attack (array)
# config/agent.config.yaml
enabled_states:
- in_battle
- dead
- alive
- kill
- assist
enabled_actions:
- move
- attack
# 状态奖励
state_scores = {
'in_battle': 0, # 不参与得分,只用于筛选
'dead': -2,
'alive': 0.01,
'kill': 1,
'assist': 1
}
# 动作奖励
action_weights = {
'move': [0.0, 0.0, 0.0, 0.1], # 上, 下, 左, 右
'attack': [0.0] * 10
}
# 总奖励 = sum(state_scores[state]) + sum(action_values * action_weights)
environment/reward.py → environment/game_state.py (重命名)environment/action_space.py: 添加 action_weights, get_frame_action, sum_rewardenvironment/compatibility.py: 更新模块引用data/data_manager.py: 重写 label 方法data/training_buffer.py: 重写 load 方法config/config.py: 添加 enabled_states, enabled_actionsscripts/label_data.py: 移除命令行参数scripts/test_pipeline.py: 更新调用environment/reward.py (已被 game_state.py 替代)kill/assist 需要比较两帧的计数变化,与 get_frame_action 保持一致的接口。所有比较逻辑封装在模块内部,label 不需要关心实现细节。
in_battle 只用于筛选数据,不参与得分计算。但仍然作为 enabled_states 的一部分保存,保持接口统一性。筛选逻辑:in_battle=True 且 dead=False。
状态(True/False)是客观事实,reward 是主观策略。分离后可以方便调整奖励权重而不需要重新 label。buffer 加载时动态计算,支持不同的训练策略实验。
这样设计避免了非战斗帧被错误标记为 alive 而获得奖励。
# 收集原始数据
python scripts/collect_from_video.py --video test.mp4 --output test.hdf5
# 运行 label
python scripts/label_data.py --file test.hdf5
# 检查 HDF5 字段
python -c "import h5py; f = h5py.File('test.hdf5', 'r'); print(list(f['frame_000000'].keys()))"
# 应该看到:state_in_battle, state_dead, state_alive, state_kill, state_assist, action_move, action_attack
# 加载数据到 buffer
python scripts/check_buffer.py --files test.hdf5
# 验证 reward 计算正确
# 完整流程
python scripts/test_pipeline.py --video test.mp4