import torch, numpy as np def set_seed(seed=42): np.random.seed(seed) torch.manual_seed(seed) def reward_function(state, action): return 1.0 if action == 'correct' else -0.1