mirror of https://github.com/maderix/ANE.git
Merge pull request #17 from TastyHeadphones/tastyheadphones/short-dataset-underflow-fix
Fix token sampling underflow for short token datasets
This commit is contained in:
commit
3efa27d7a3
|
|
@ -290,6 +290,12 @@ int main(int argc, char *argv[]) {
|
||||||
uint16_t *token_data = (uint16_t*)mmap(NULL, data_len, PROT_READ, MAP_PRIVATE, data_fd, 0);
|
uint16_t *token_data = (uint16_t*)mmap(NULL, data_len, PROT_READ, MAP_PRIVATE, data_fd, 0);
|
||||||
if (token_data == MAP_FAILED) { printf("mmap failed\n"); return 1; }
|
if (token_data == MAP_FAILED) { printf("mmap failed\n"); return 1; }
|
||||||
size_t n_tokens = data_len / 2;
|
size_t n_tokens = data_len / 2;
|
||||||
|
if (n_tokens <= (size_t)(SEQ + 1)) {
|
||||||
|
printf("Token data too short: need at least %d tokens, got %zu\n", SEQ + 2, n_tokens);
|
||||||
|
munmap(token_data, data_len);
|
||||||
|
close(data_fd);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
printf("Token data: %zu tokens (%.1f MB)\n", n_tokens, data_len/1e6);
|
printf("Token data: %zu tokens (%.1f MB)\n", n_tokens, data_len/1e6);
|
||||||
|
|
||||||
// Gradient buffers shared across layers (reused each step)
|
// Gradient buffers shared across layers (reused each step)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue