Choose from ('weather' or 'other topic')
It seems date processing will not work (bacause of math) ex: today is 30/01/2000, next week is 07/2/2000 (but not work)
- using wizard vicuna sharded is better "hiepnh/Wizard-Vicuna-7B-Uncensored-HF-sharded" (llama1)
- using when : today works
- testing using how many days from today : -1 (yesterday)
Merge lora needs 20GB of memory for just 7B parameter llama -> kaggle merge lora 2, combine base model and lora
{ "instruction":"Sebutkan kata yang berhubungan dengan cuaca", "input":"", "response":"hujan, angin, petir, berawan, lembab, panas, terik" }
Apakah besok hujan, works but context is besok
TrainOutput(global_step=20, training_loss=1.0630455672740937, metrics={'train_runtime': 111.1406, 'train_samples_per_second': 0.72, 'train_steps_per_second': 0.18, 'total_flos': 19714939895808.0, 'train_loss': 1.0630455672740937, 'epoch': 0.8})
!cd /kaggle/working && ls
Try using TheBloke/wizardLM-7B-HF, still cannot load memory not enough to shard ? should be sharded : 0001 of xxx.bin, decapoda is 405MB chunks, now working in kaggle also because 30GB memory is not available if using GPU
- need to be sharded first, if it will be crashed when loading into memory !git clone %cd text-generation-webui !pip install -r requirements.txt
model_id = "TheBloke/wizardLM-7B-HF" dest = "./dest/{}".format(model.replace("/","_")) #May need to edit based on where you're storing your models shard_size = "1000MB"
from transformers import LlamaTokenizer , LlamaForCausalLM
tokenizer = LlamaTokenizer.from_pretrained(model_id)
model = LlamaForCausalLM.from_pretrained(model_id, load_in_8bit=True, device_map='auto')
import shutil shutil.rmtree('/content/testqlora/outputs')
!pip install -r requirements.txt -q -U
using qlora with test data
git add --all git commit -m "commit1" git push
import torch from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,LlamaTokenizer
model_id = "decapoda-research/llama-7b-hf" bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 )
tokenizer = LlamaTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
from peft import LoraConfig, get_peft_model
config = LoraConfig( r=8, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" )
model = get_peft_model(model, config) print_trainable_parameters(model)
!git clone
import os os.chdir('/content/testqlora') !ls
import transformers
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
trainer = transformers.Trainer( model=model, train_dataset=data["train"], args=transformers.TrainingArguments( per_device_train_batch_size=1, gradient_accumulation_steps=4, warmup_steps=2, max_steps=20, learning_rate=2e-4, fp16=True, logging_steps=1, output_dir="outputs", optim="paged_adamw_8bit" ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), ) model.config.use_cache = False # silence the warnings. Please re-enable for inference! trainer.train()
model.config.use_cache = False text = """HUMAN: Hello MESSAGE: Welcome to pizza john ORDER DETAILS: {} RELEVANCY:unknown ORDER CONFIRMED:no
HUMAN: Who is the president of the US? MESSAGE: I'm sorry, but I only process pizza orders. ORDER DETAILS: {} RELEVANCY: No ORDER CONFIRMED: No
HUMAN: can i order some pizza please """ device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device) outputs = model.generate(**inputs, max_new_tokens=20) print(tokenizer.decode(outputs[0], skip_special_tokens=True))
import locale locale.getpreferredencoding = lambda: "UTF-8" !python -m pip install huggingface_hub !huggingface-cli login --token #
model.push_to_hub("notzero/testlora2", use_auth_token=True)
Then need to merge lora with base model (use kaggle, has 30GB memory -> if not using GPU, use kaggle)
!pip install transformers !pip install peft !pip install sentencepiece
!git clone #!git clone !git clone
#!python /content/baize-chatbot/
#--base decapoda-research/llama-7b-hf
#--target ~/model_weights/baize-7b
#--lora notzero/testlora2
!python ./testqlora/
--base decapoda-research/llama-7b-hf
--target /kaggle/working/model_weights/mergeqlora
--lora notzero/testlora2
!cd /kaggle/working/llama.cpp && mkdir qmodel && mv /root/model_weights/mergeqlora /kaggle/working/llama.cpp/qmodel/temp #!mv /kaggle/working/llama.cpp/qmodel/7B/tokenizer.model /kaggle/working/llama.cpp/qmodel/
!python -m pip install huggingface_hub !huggingface-cli login --token ###
!cd /kaggle/working/llama.cpp && mkdir qmodel && mv /root/model_weights/mergeqlora /kaggle/working/llama.cpp/qmodel/temp
from huggingface_hub import HfApi api = HfApi() api.upload_folder( folder_path="/kaggle/working/llama.cpp/qmodel/temp", repo_id="notzero/modelcombined", repo_type="dataset", )
mkdir /root/modelcombined
!cd /root/modelcombined && wget !cd /root/modelcombined && wget !cd /root/modelcombined && wget !cd /root/modelcombined && wget !cd /root/modelcombined && wget
!cd llama.cpp && make !cd /kaggle/working/llama.cpp && python /root/modelcombined
!python -m pip install huggingface_hub !huggingface-cli login --token #
from huggingface_hub import HfApi api = HfApi() api.upload_file( path_or_fileobj="/root/modelcombined/ggml-model-f16.bin", path_in_repo="ggml-model-f16.bin", repo_id="notzero/modelcombined", repo_type="dataset", )
!cd /root/modelcombined && rm pytorch_model-00001-of-00002.bin !cd /root/modelcombined && rm pytorch_model-00001-of-00002.bin
!cd llama.cpp && ./quantize /root/modelcombined/ggml-model-f16.bin //root/modelcombined/ggml-model-q4_0.bin q4_0
from huggingface_hub import HfApi api = HfApi() api.upload_file( path_or_fileobj="/root/modelcombined/ggml-model-q4_0.bin", path_in_repo="ggml-model-q4_0.bin", repo_id="notzero/modelcombined", repo_type="dataset", )