Fix token sampling underflow on short datasets

This commit is contained in:
tastyheadphones 2026-03-03 11:42:42 +09:00
parent 1b792fce34
commit 2b3b7ae5cc
1 changed files with 11 additions and 5 deletions

View File

@ -274,11 +274,17 @@ int main(int argc, char *argv[]) {
int data_fd = open(DATA_PATH, O_RDONLY);
if (data_fd < 0) { printf("Cannot open %s\n", DATA_PATH); return 1; }
struct stat st; fstat(data_fd, &st);
size_t data_len = st.st_size;
uint16_t *token_data = (uint16_t*)mmap(NULL, data_len, PROT_READ, MAP_PRIVATE, data_fd, 0);
if (token_data == MAP_FAILED) { printf("mmap failed\n"); return 1; }
size_t n_tokens = data_len / 2;
printf("Token data: %zu tokens (%.1f MB)\n", n_tokens, data_len/1e6);
size_t data_len = st.st_size;
uint16_t *token_data = (uint16_t*)mmap(NULL, data_len, PROT_READ, MAP_PRIVATE, data_fd, 0);
if (token_data == MAP_FAILED) { printf("mmap failed\n"); return 1; }
size_t n_tokens = data_len / 2;
if (n_tokens <= (size_t)(SEQ + 1)) {
printf("Token data too short: need at least %d tokens, got %zu\n", SEQ + 2, n_tokens);
munmap(token_data, data_len);
close(data_fd);
return 1;
}
printf("Token data: %zu tokens (%.1f MB)\n", n_tokens, data_len/1e6);
// Gradient buffers shared across layers (reused each step)
float *dy = (float*)malloc(SEQ*DIM*4); // gradient flowing backward