mirror of https://github.com/maderix/ANE.git
Add --data flag to pass training data path from dashboard to binary
This commit is contained in:
parent
9595b1a499
commit
389ee0dc77
|
|
@ -523,15 +523,19 @@ def draw(term):
|
|||
|
||||
buf = []
|
||||
|
||||
def put(y, x, text, style=''):
|
||||
def put(y, x, text, style='', clear_eol=False):
|
||||
if 0 <= y < h and x < w:
|
||||
text = text[:w - x]
|
||||
suffix = term.clear_eol if clear_eol else ''
|
||||
if style:
|
||||
buf.append(term.move(y, x) + style + text + term.normal)
|
||||
buf.append(term.move(y, x) + style + text + term.normal + suffix)
|
||||
return
|
||||
buf.append(term.move(y, x) + text)
|
||||
buf.append(term.move(y, x) + text + suffix)
|
||||
|
||||
buf.append(term.home + term.clear)
|
||||
buf.append(term.home)
|
||||
# Clear each line individually (avoids full-screen flash from term.clear)
|
||||
for y in range(h):
|
||||
buf.append(term.move(y, 0) + term.clear_eol)
|
||||
|
||||
mid_x = w // 2
|
||||
right_w = w - mid_x - 1
|
||||
|
|
@ -764,7 +768,7 @@ def set_nonblock(fd):
|
|||
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
|
||||
|
||||
def spawn_training(resume=False, steps=10000, dynamic=False, ane=False, scratch=False,
|
||||
lr=None, accum=None, no_ane_extras=False):
|
||||
lr=None, accum=None, no_ane_extras=False, data=None):
|
||||
if dynamic:
|
||||
cmd = 'cd training_dynamic && make 2>&1 && ./train'
|
||||
elif ane:
|
||||
|
|
@ -781,6 +785,8 @@ def spawn_training(resume=False, steps=10000, dynamic=False, ane=False, scratch=
|
|||
cmd += f' --accum {accum}'
|
||||
if no_ane_extras and ane:
|
||||
cmd += ' --no-ane-extras'
|
||||
if data is not None:
|
||||
cmd += f' --data {data}'
|
||||
cmd += f' --steps {steps}'
|
||||
proc = subprocess.Popen(
|
||||
['bash', '-c', cmd],
|
||||
|
|
@ -814,6 +820,7 @@ def main():
|
|||
parser.add_argument('--no-powermetrics', action='store_true')
|
||||
parser.add_argument('--no-generate', action='store_true', help='Disable text generation')
|
||||
parser.add_argument('--steps', type=int, default=10000, help='Total steps (default: 10000)')
|
||||
parser.add_argument('--data', type=str, default=None, help='Path to training data shard (.bin)')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.infinite:
|
||||
|
|
@ -828,7 +835,8 @@ def main():
|
|||
|
||||
train_proc = spawn_training(resume=args.resume, steps=args.steps, dynamic=args.dynamic,
|
||||
scratch=args.scratch, lr=args.lr, accum=args.accum,
|
||||
ane=args.ane, no_ane_extras=args.no_ane_extras)
|
||||
ane=args.ane, no_ane_extras=args.no_ane_extras,
|
||||
data=args.data)
|
||||
S.train_pid = train_proc.pid
|
||||
procs.append(train_proc)
|
||||
|
||||
|
|
@ -970,7 +978,8 @@ def main():
|
|||
train_proc.wait()
|
||||
train_proc = spawn_training(resume=True, steps=args.steps, dynamic=args.dynamic,
|
||||
lr=args.lr, accum=args.accum,
|
||||
ane=args.ane, no_ane_extras=args.no_ane_extras)
|
||||
ane=args.ane, no_ane_extras=args.no_ane_extras,
|
||||
data=args.data)
|
||||
S.train_pid = train_proc.pid
|
||||
procs = [p for p in procs if p.poll() is None]
|
||||
procs.append(train_proc)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
#define CKPT_PATH "ane_stories110M_dyn_ckpt.bin"
|
||||
#define MODEL_PATH "../../../assets/models/stories110M.bin"
|
||||
#define DATA_PATH "../tinystories_data00.bin"
|
||||
#define DEFAULT_DATA_PATH "../tinystories_data00.bin"
|
||||
|
||||
// Dynamic kernel set per layer
|
||||
typedef struct {
|
||||
|
|
@ -217,6 +217,7 @@ int main(int argc, char *argv[]) {
|
|||
float min_lr_frac = 0.1f; // min_lr = max_lr * 0.1
|
||||
|
||||
bool do_resume = false, from_scratch = false;
|
||||
const char *data_path = DEFAULT_DATA_PATH;
|
||||
for (int i=1; i<argc; i++) {
|
||||
if (strcmp(argv[i], "--resume") == 0) do_resume = true;
|
||||
else if (strcmp(argv[i], "--scratch") == 0) from_scratch = true;
|
||||
|
|
@ -225,6 +226,7 @@ int main(int argc, char *argv[]) {
|
|||
else if (strcmp(argv[i], "--accum") == 0 && i+1<argc) accum_steps = atoi(argv[++i]);
|
||||
else if (strcmp(argv[i], "--warmup") == 0 && i+1<argc) warmup_steps = atoi(argv[++i]);
|
||||
else if (strcmp(argv[i], "--clip") == 0 && i+1<argc) grad_clip = atof(argv[++i]);
|
||||
else if (strcmp(argv[i], "--data") == 0 && i+1<argc) data_path = argv[++i];
|
||||
}
|
||||
float lr = max_lr;
|
||||
|
||||
|
|
@ -304,8 +306,8 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
|
||||
// mmap token data
|
||||
int data_fd = open(DATA_PATH, O_RDONLY);
|
||||
if (data_fd < 0) { printf("Cannot open %s\n", DATA_PATH); return 1; }
|
||||
int data_fd = open(data_path, O_RDONLY);
|
||||
if (data_fd < 0) { printf("Cannot open %s\n", data_path); return 1; }
|
||||
struct stat st; fstat(data_fd, &st);
|
||||
size_t data_len = st.st_size;
|
||||
uint16_t *token_data = (uint16_t*)mmap(NULL, data_len, PROT_READ, MAP_PRIVATE, data_fd, 0);
|
||||
|
|
|
|||
Loading…
Reference in New Issue