import torch, numpy as np

def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)

def reward_function(state, action):
    return 1.0 if action == 'correct' else -0.1