mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
Fix logprobs issues. (#5136)
Signed-off-by: Daniel Campora <961215+dcampora@users.noreply.github.com>
This commit is contained in:
parent
53983ad273
commit
e46267765f
@ -721,8 +721,8 @@ class TRTLLMSampler(Sampler):
|
||||
if request.py_return_log_probs:
|
||||
# NOTE: Log probs with drafting has not been tested yet.
|
||||
begin_log_probs_offset = request.prompt_len if request.sampling_config.beam_width == 1 else 0
|
||||
current_token = seq_len - request.prompt_len - len(
|
||||
num_new_tokens[beam]) + step
|
||||
current_token = seq_len - request.prompt_len - num_new_tokens[
|
||||
beam] + step
|
||||
|
||||
log_probs.append({
|
||||
new_token.item():
|
||||
@ -732,7 +732,7 @@ class TRTLLMSampler(Sampler):
|
||||
rank=1)
|
||||
})
|
||||
|
||||
if num_new_tokens[beam] > 0 and request.py_return_log_probs:
|
||||
if request.py_return_log_probs:
|
||||
cum_log_probs.append(
|
||||
state.host.cum_log_probs[seq_slot * beam_width +
|
||||
beam].item())
|
||||
|
||||
Loading…
Reference in New Issue
Block a user