-
Notifications
You must be signed in to change notification settings - Fork 118
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #231 from huggingface/pr/eliebak/220
Pr/eliebak/220
- Loading branch information
Showing
10 changed files
with
750 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
checkpoints: | ||
checkpoint_interval: 10 | ||
checkpoints_path: checkpoints | ||
checkpoints_path_is_shared_file_system: false | ||
resume_checkpoint_path: s3://phuc-experiments/temp/config_tiny_llama_with_s3_upload | ||
save_initial_state: false | ||
data_stages: | ||
- data: | ||
dataset: | ||
dataset_overwrite_cache: false | ||
dataset_processing_num_proc_per_process: 1 | ||
hf_dataset_config_name: null | ||
hf_dataset_or_datasets: stas/openwebtext-10k | ||
hf_dataset_splits: train | ||
text_column_name: text | ||
num_loading_workers: 1 | ||
seed: 42 | ||
name: Stable Training Stage | ||
start_training_step: 1 | ||
- data: | ||
dataset: | ||
dataset_overwrite_cache: false | ||
dataset_processing_num_proc_per_process: 1 | ||
hf_dataset_config_name: null | ||
hf_dataset_or_datasets: stas/openwebtext-10k | ||
hf_dataset_splits: train | ||
text_column_name: text | ||
num_loading_workers: 1 | ||
seed: 42 | ||
name: Annealing Phase | ||
start_training_step: 10 | ||
general: | ||
benchmark_csv_path: null | ||
consumed_train_samples: null | ||
ignore_sanity_checks: true | ||
project: debug | ||
run: tiny_llama_%date_%jobid | ||
seed: 42 | ||
step: null | ||
lighteval: null | ||
logging: | ||
iteration_step_info_interval: 1 | ||
log_level: info | ||
log_level_replica: info | ||
model: | ||
ddp_bucket_cap_mb: 25 | ||
dtype: bfloat16 | ||
init_method: | ||
std: 0.025 | ||
make_vocab_size_divisible_by: 1 | ||
model_config: | ||
bos_token_id: 1 | ||
eos_token_id: 2 | ||
hidden_act: silu | ||
hidden_size: 16 | ||
initializer_range: 0.02 | ||
intermediate_size: 64 | ||
is_llama_config: true | ||
max_position_embeddings: 256 | ||
num_attention_heads: 4 | ||
num_hidden_layers: 2 | ||
num_key_value_heads: 4 | ||
pad_token_id: null | ||
pretraining_tp: 1 | ||
rms_norm_eps: 1.0e-05 | ||
rope_scaling: null | ||
tie_word_embeddings: true | ||
use_cache: true | ||
vocab_size: 256 | ||
optimizer: | ||
accumulate_grad_in_fp32: true | ||
clip_grad: 1.0 | ||
learning_rate_scheduler: | ||
learning_rate: 0.0003 | ||
lr_decay_starting_step: null | ||
lr_decay_steps: 13 | ||
lr_decay_style: cosine | ||
lr_warmup_steps: 2 | ||
lr_warmup_style: linear | ||
min_decay_lr: 1.0e-05 | ||
optimizer_factory: | ||
adam_beta1: 0.9 | ||
adam_beta2: 0.95 | ||
adam_eps: 1.0e-08 | ||
name: adamW | ||
torch_adam_is_fused: true | ||
weight_decay: 0.01 | ||
zero_stage: 0 | ||
parallelism: | ||
dp: 1 | ||
expert_parallel_size: 1 | ||
pp: 1 | ||
pp_engine: 1f1b | ||
tp: 1 | ||
tp_linear_async_communication: true | ||
tp_mode: REDUCE_SCATTER | ||
profiler: null | ||
tokenizer: | ||
tokenizer_max_length: null | ||
tokenizer_name_or_path: robot-test/dummy-tokenizer-wordlevel | ||
tokenizer_revision: null | ||
tokens: | ||
batch_accumulation_per_replica: 1 | ||
limit_test_batches: 0 | ||
limit_val_batches: 0 | ||
micro_batch_size: 2 | ||
sequence_length: 256 | ||
train_steps: 30 | ||
val_check_interval: -1 | ||
s3_upload: | ||
remove_after_upload: true | ||
s5cmd_concurrency: 5 | ||
s5cmd_numworkers: 16 | ||
s5cmd_path: /fsx/nouamane/miniconda/envs/2-1-cu121/bin/s5cmd | ||
upload_s3_path: s3://phuc-experiments/temp/config_tiny_llama_with_s3_upload |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from .fsspec import check_path_is_local, fs_copy, fs_open | ||
from .s3_mover import S3Mover | ||
|
||
__all__ = ["S3Mover", "fs_open", "fs_copy", "check_path_is_local"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import contextlib | ||
from pathlib import Path | ||
from typing import Tuple, Union | ||
|
||
import fsspec | ||
from fsspec.implementations import local | ||
|
||
|
||
def get_filesystem_and_path(path: Path, storage_options=None) -> Tuple[fsspec.AbstractFileSystem, str]: | ||
# Use supported filesystems in `fsspec`. If you need another one, please use `fsspec.registry.register_implementation` | ||
# DO NOT USE `mode` argument as it adds a suffix `0.part` when using `mode="w"`. | ||
fs, _, paths = fsspec.core.get_fs_token_paths(str(path), storage_options=storage_options) | ||
assert len(paths) == 1 | ||
return fs, paths[0] | ||
|
||
|
||
@contextlib.contextmanager | ||
def fs_open( | ||
file: Union[str, Path], | ||
mode="r", | ||
): | ||
# TODO @thomasw21: pass storage options | ||
fs, path = get_filesystem_and_path(file) | ||
with fs.open(path, mode=mode) as f: | ||
yield f | ||
|
||
|
||
def fs_copy( | ||
input_file: Union[str, Path], | ||
output_file: Union[str, Path], | ||
): | ||
"""Copy file from input to output (possibly on s3/other fs)""" | ||
with fs_open(input_file, mode="rb") as fi, fs_open(output_file, mode="wb") as fo: | ||
fo.write(fi.read()) | ||
|
||
|
||
def check_path_is_local(path: Path, storage_options=None) -> bool: | ||
return isinstance(get_filesystem_and_path(path=path, storage_options=storage_options)[0], local.LocalFileSystem) |
Oops, something went wrong.