-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpt2-test.py
83 lines (66 loc) · 2.45 KB
/
gpt2-test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import tensorflow as tf
import estimator
from gpt2 import GPT2
import os
import json
tf.enable_eager_execution()
# Model in eager mode
model_path = "model"
with open(os.path.join(model_path, "hparams.json")) as f:
config = json.load(f)
model = GPT2(config, name="gpt2")
x = tf.zeros([0, 0], dtype=tf.int32)
_ = model(x) # build model
model.load_weights(os.path.join(model_path, "weights.h5"))
def _data_builder(file_path, batch_size, pad_size):
data = tf.data.TextLineDataset(file_path)
data = data.repeat()
def _map(x):
x = tf.expand_dims(x, 0)
tokens = tf.strings.split(x, " ").values
tokens = tf.strings.to_number(tokens, tf.int32)
length = tf.shape(tokens)[0]
return {"tokens": tokens, "length": length}
data = data.map(_map)
output_shape = {"tokens": tf.TensorShape([pad_size]), "length": tf.TensorShape([])}
data = data.padded_batch(batch_size, output_shape)
return data
def data_fn():
data_path = "data"
train = _data_builder(os.path.join(data_path, "train.txt"), 8, 1025)
dev = _data_builder(os.path.join(data_path, "test.txt"), 8, 1025)
data_spec = estimator.DataSpec(train=train, dev=dev)
return data_spec
def model_fn(data, training):
model = GPT2(config, name="gpt2")
inputs = data["tokens"][:, :-1]
labels = data["tokens"][:, 1:]
dropout = tf.cast(training, tf.float32) * 0.05
logits = model(inputs, use_2d=True, attention_dropout=dropout, dropout=dropout)
labels = tf.reshape(labels, [-1])
loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
mask = tf.sequence_mask(data["length"] - 1, maxlen=labels.shape[1])
mask = tf.reshape(mask, [-1])
mask = tf.cast(mask, loss.dtype)
loss = tf.reduce_sum(mask * loss) / tf.reduce_sum(mask)
lr = tf.Variable(1e-4, name="lr")
model_spec = estimator.ModelSpec(
loss=loss,
optimizer=tf.train.GradientDescentOptimizer(lr),
trainable_variables=model.weights,
import_variables=model.weights
)
return model_spec
run_config = estimator.RunConfig(
train_steps_per_round=200,
eval_steps_per_round=10,
model_dir="model",
)
estm = estimator.Estimator(model_fn, data_fn, run_config)
values = [v.numpy() for v in model.weights]
estm.import_variables(values)
estm.run(200)
values = estm.export_model()
for u, v in zip(values, model.weights):
v.assign(u)
model.save_weights(os.path.join(model_path, "new_weigths.h5"))