-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCargo.toml
93 lines (75 loc) · 3.13 KB
/
Cargo.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
[package]
name = "llm_stream"
version = "0.1.1"
edition = "2021"
[dependencies]
# for candle
candle={git="https://github.com/huggingface/candle.git" ,branch='main', package = "candle-core"}
candle-nn = { git="https://github.com/huggingface/candle.git",branch='main' }
candle-transformers = { git="https://github.com/huggingface/candle.git" ,branch='main' }
candle-flash-attn = { git="https://github.com/huggingface/candle.git", branch='main', optional = true }
# for huggingface
hf-hub = { version="0.3.0", features=["tokio"]}
tokenizers = { version = "0.15", default-features = false ,features = ["onig"]}
#tokenizers = { version = "0.15.0", default-features = false }
# for acceleration
#accelerate-src = { version = "0.3.2" , optional = true}
intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] , optional = true}
# for mkl feature
#libc = { version = "0.2.147" }
cudarc = { version = "0.9.14", features = ["f16"],optional=true }
half = { version = "2.3.1", features = ["num-traits", "use-intrinsics", "rand_distr"],optional=true }
rayon = "1.7.0"
safetensors = "0.3.1"
num-traits = "0.2.15"
# for warp layer
warp = "0.3"
hyper = { version = "0.14", features = ["stream", "server", "http1", "http2", "tcp", "client"] }
tokio = { version = "1", features = ["full"] }
tokio-stream = "0.1.1"
tokio-util = { version = "0.7", features = ["io"] }
futures-util = { version = "0.3", default-features = false, features = ["sink"] }
futures-channel = { version = "0.3.17", features = ["sink"]}
#toml
toml = "0.8.8"
# various other
anyhow = { version = "1", features = ["backtrace"] }
clap = { version = "4.2.4", features = ["derive"] }
bytes = "1.0"
serde = { version = "1", features = ["derive"] }
serde_json = "1.0"
#log = "0.4"
#pretty_env_logger = "0.5"
[dev-dependencies]
anyhow = { version = "1", features = ["backtrace"] }
#tokio = "1.29.1"
#byteorder = "1.4.3"
#gemm = { version = "0.16.15", features = ["wasm-simd128-enable"] }
#image = { version = "0.24.7", default-features = false, features = ["jpeg", "png"] }
#imageproc = { version = "0.23.0", default-features = false }
#memmap2 = { version = "0.7.1", features = ["stable_deref_trait"] }
#rand = "0.8.5"
#rusttype = { version = "0.9", default-features = false }
#num_cpus = "1.15.0"
#parquet = { version = "45.0.0" }
#rand_distr = "0.4.3"
#serde_plain = "1.0.2"
#thiserror = "1"
#wav = "1.0.0"
#yoke = { version = "0.7.2", features = ["derive"] }
#zip = { version = "0.6.6", default-features = false }
#metal = { version = "0.27.1", features = ["mps"], package="candle-metal" }
[build-dependencies]
anyhow = { version = "1", features = ["backtrace"] }
bindgen_cuda = { version = "0.1.1", optional = true }
[features]
default = []
mistral = []
phi-v2 = []
llama = []
#accelerate = ["dep:accelerate-src", "candle/accelerate", "candle-nn/accelerate", "candle-transformers/accelerate"]
mkl = ["dep:intel-mkl-src", "candle/mkl", "candle-nn/mkl", "candle-transformers/mkl"]
cuda = ["candle/cuda", "candle-nn/cuda", "candle-transformers/cuda", "dep:bindgen_cuda"]
cudnn = ["candle/cudnn"]
flash-attn = ["cuda", "candle-transformers/flash-attn", "dep:candle-flash-attn"]
nccl = ["cuda", "cudarc/nccl", "dep:half"]