背景
最近生成式AI,chatgpt 已经火到不能在火,但是在国内存在网络问题以及费用(如何缴费)等等问题,一直断断续续的在使用,最近发现llma.cpp项目很火,能在普通的设备上运行。虽然效率不太高,但是能满足基本要求。
搭建
服务器我使用的centos 7,内存划分48G,CPU分配了24核。
具体操作步骤如下:
- 安装GCC
yum install centos-release-scl yum install devtoolset-11-gcc devtoolset-11-gcc-c++ devtoolset-11-binutils scl enable devtoolset-11 bash
- clone 项目
git clone https://github.com/ggerganov/llama.cpp cd llama.cpp
- 修改参数
vim ggml.h # 添加到第一行 #define _POSIX_C_SOURCE 199309L
修改配置2
vim Makefile # 修改amd64为x86_64 ifeq ($(UNAME_M),amd64) CFLAGS += -mavx -mavx2 -mfma -mf16c endif
- 执行编译
make
5.下载模型库
# 随时可能失效 curl -o ggml-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC curl -o ggml-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC curl -o ggml-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC
- 运行程序
./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins
其他问题
- 如果出现(too old, regenerate your model files!)
将文件存为 convert.py。
#!/usr/bin/env python3
import argparse
import glob
import os
import struct
import sys
from sentencepiece import SentencePieceProcessor
HPARAMS = keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"]
def parse_args():
parser = argparse.ArgumentParser(description='Upgrade old ggml model files to the current format')
parser.add_argument('dir_model', help='directory containing ggml .bin files')
parser.add_argument('tokenizer_model', help='path to LLaMA tokenizer.model file')
return parser.parse_args()
def read_header(f_in):
struct_fmt = "i" * (3 + len(HPARAMS))
struct_size = struct.calcsize(struct_fmt)
buf = f_in.read(struct_size)
return struct.unpack(struct_fmt, buf)
def write_header(f_out, header):
(magic, vocab_size, dim, multiple_of, n_heads, n_layers, rot, ftype) = header
if magic != 0x67676d6c:
raise Exception('Invalid file magic. Must be an old style ggml file.')
values = [
0x67676d66, # magic: ggml in hex
1, # file version
vocab_size,
dim,
multiple_of,
n_heads,
n_layers,
rot,
ftype
]
f_out.write(struct.pack("i" * len(values), *values))
def write_tokens(fout, tokenizer):
for i in range(tokenizer.vocab_size()):
if tokenizer.is_unknown(i):
text = " \u2047 ".encode("utf-8")
elif tokenizer.is_control(i):
text = b""
elif tokenizer.is_byte(i):
piece = tokenizer.id_to_piece(i)
if len(piece) != 6:
print(f"Invalid token: {piece}")
sys.exit(1)
byte_value = int(piece[3:-1], 16)
text = struct.pack("B", byte_value)
else:
text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
fout.write(struct.pack("i", len(text)))
fout.write(text)
fout.write(struct.pack("f", tokenizer.get_score(i)))
def read_tokens(f_in, tokenizer):
for i in range(tokenizer.vocab_size()):
len_b = f_in.read(4)
(length,) = struct.unpack("i", len_b)
f_in.read(length)
def copy_all_data(f_out, f_in):
while True:
buf = f_in.read(1024 * 1024)
if not buf:
break
f_out.write(buf)
def convert_one_file(path_in, tokenizer):
path_tmp = f"{path_in}.tmp"
print(f"converting {path_in}")
with open(path_in, "rb") as f_in, open(path_tmp, "wb") as f_out:
write_header(f_out, read_header(f_in))
read_tokens(f_in, tokenizer)
write_tokens(f_out, tokenizer)
copy_all_data(f_out, f_in)
#os.rename(path_tmp, path_in)
def main():
args = parse_args()
files = []
files.extend(glob.glob(f"{args.dir_model}/*.bin"))
files.extend(glob.glob(f"{args.dir_model}/*.bin.*"))
tokenizer = SentencePieceProcessor(args.tokenizer_model)
for file in files:
convert_one_file(file, tokenizer)
if __name__ == "__main__":
main()
然后下载https://huggingface.co/decapoda-research/llama-7b-hf/blob/main/tokenizer.model 这个文件
最后执行python convert.py your/models/folder/ path/to/tokenizer.model