The Markovian Thinker
Collection
Reformulating the RL of reasoning LLMs through Markovian Thinking paradigm.
•
7 items
•
Updated
•
10
deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B, trained with the Delethink RL paradigm. See the paper for full details.deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B\boxed{}.Let:
Effective thinking budget is:
For this checkpoint, we recommend:
import asyncio
import sglang as sgl
async def delethink_tracing(llm, query_ids, context_size=8192, markovian_size=4096, iteration_cap=5):
sampling_params = {"temperature": 0.6}
trace_response_ids = []
iterations = 0
prompt_ids = query_ids
while iterations < iteration_cap:
params = dict(sampling_params)
params["max_new_tokens"] = (context_size - markovian_size) if iterations > 0 else context_size
resp = await llm.async_generate(input_ids=prompt_ids, sampling_params=params, return_logprob=True)
if "output_ids" in resp:
out_ids = resp["output_ids"]
else:
_, out_ids = zip(*[(lp, tids) for lp, tids, _ in resp["meta_info"]["output_token_logprobs"]])
out_ids = list(out_ids)
trace_response_ids.append(out_ids)
if iterations == 0:
query_ids = query_ids + out_ids[:100]
finish_reason_is_eos = resp["meta_info"]["finish_reason"]["type"] == "stop"
if finish_reason_is_eos:
break
prompt_ids = query_ids + out_ids[-markovian_size:]
iterations += 1
return sum(trace_response_ids, [])
def main():
llm = sgl.Engine(
model_path="McGill-NLP/delethink-24k-1.5b",
dtype="bfloat16",
attention_backend="flashinfer",
mem_fraction_static=0.8,
log_level="WARNING",
)
prompt = (
r"There exist real numbers $x$ and $y$, both greater than 1, such that "
r"$\log_x\left(y^x\right)=\log_y\left(x^{4y}\right)=10$. Find $xy$."
"\n\nPlease reason step by step, and put your final answer within \\boxed{}."
)
tok = llm.tokenizer_manager.tokenizer
query_ids = tok.apply_chat_template(
[{"role": "user", "content": prompt}],
tokenize=True,
add_generation_prompt=True,
)
ids = asyncio.run(delethink_tracing(llm, query_ids, context_size=8192, markovian_size=4096, iteration_cap=5))
print(tok.decode(ids, skip_special_tokens=False))
if __name__ == "__main__":
main()
@misc{Aghajohari2025:TheMarkovianThinker,
title={The Markovian Thinker},
author={Milad Aghajohari and Kamran Chitsaz and Amirhossein Kazemnejad and Sarath Chandar and Alessandro Sordoni and Aaron Courville and Siva Reddy},
year={2025},
eprint={2510.06557},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2510.06557},
}
Base model
deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B