-
Notifications
You must be signed in to change notification settings - Fork 95
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Train use mlflow #287
base: main
Are you sure you want to change the base?
Train use mlflow #287
Changes from 4 commits
a2a8bd7
59c37be
8efb160
b391d82
676b411
617622c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,4 +20,6 @@ Icon? | |
|
||
# IDEs | ||
*.swp | ||
.env | ||
.env | ||
*.pkl | ||
mlruns |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,32 @@ | ||||||
version: '3.9' | ||||||
services: | ||||||
mlflow_postgres: | ||||||
image: bitnami/postgresql | ||||||
container_name: postgres_db | ||||||
environment: | ||||||
- POSTGRES_USER=postgres | ||||||
- POSTGRES_PASSWORD=postgres | ||||||
- POSTGRES_DB=mlflow_db | ||||||
volumes: | ||||||
- postgres_data:/var/lib/postgresql/data | ||||||
ports: | ||||||
- "5432:5432" | ||||||
|
||||||
mlflow_server: | ||||||
restart: always | ||||||
build: | ||||||
context: ./docker | ||||||
dockerfile: Dockerfile # Specify the Dockerfile explicitly | ||||||
image: mlflow | ||||||
container_name: mlflow_server | ||||||
environment: | ||||||
- BACKEND_STORE_URI=postgresql://postgres:postgres@mlflow_postgres:5432/mlflow_db # Connection string to Postgres | ||||||
- ARTIFACT_STORE_URI=./mlruns # Local directory for storing artifacts | ||||||
ports: | ||||||
- "5002:5000" # Expose MLflow UI | ||||||
volumes: | ||||||
- ./mlruns:/mlruns # Mount local directory for MLflow artifacts | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Add "mlruns:" to the volumes |
||||||
command: mlflow server --backend-store-uri postgresql://postgres:postgres@mlflow_postgres:5432/mlflow_db --default-artifact-root ./mlruns --host 0.0.0.0 --port 5000 | ||||||
|
||||||
volumes: | ||||||
postgres_data: {} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
FROM python:3.11 | ||
|
||
# Install python package | ||
COPY requirements.txt /tmp/ | ||
RUN pip install --no-cache-dir -r /tmp/requirements.txt |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
mlflow==2.16.2 | ||
psycopg2-binary==2.9.10 | ||
boto3==1.35.47 |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -19,7 +19,9 @@ | |||||
from data_loaders.jisfdl import JISFDL | ||||||
|
||||||
import boilerplate as tfbp | ||||||
|
||||||
import mlflow | ||||||
import time | ||||||
import pickle | ||||||
## | ||||||
# Intent Classification with BERT | ||||||
# This code is based on the paper BERT for Joint Intent Classification and Slot Filling by Chen et al. (2019), | ||||||
|
@@ -34,6 +36,7 @@ | |||||
'fr': "dbmdz/bert-base-french-europeana-cased", | ||||||
} | ||||||
|
||||||
mlflow.set_tracking_uri("http://0.0.0.0:5002") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
This should be the docker hostname |
||||||
|
||||||
@tfbp.default_export | ||||||
class IntentClassifier(tfbp.Model): | ||||||
|
@@ -42,7 +45,7 @@ class IntentClassifier(tfbp.Model): | |||||
"num_epochs": 2, | ||||||
"dropout_prob": 0.1, | ||||||
"intent_num_labels": 7, | ||||||
"gamma": 2, | ||||||
"gamma": 2.0, | ||||||
"k": 3 | ||||||
} | ||||||
data_loader: JISFDL | ||||||
|
@@ -119,35 +122,70 @@ def format_scores(self, scores: Dict[str, dict]): | |||||
@tfbp.runnable | ||||||
def fit(self): | ||||||
"""Training""" | ||||||
encoded_texts, encoded_intents, encoded_slots, intent_names, slot_names = self.data_loader( | ||||||
self.tokenizer) | ||||||
|
||||||
if self.hparams.intent_num_labels != len(intent_names): | ||||||
raise ValueError( | ||||||
f"Hyperparam intent_num_labels mismatch, should be : {len(intent_names)}" | ||||||
) | ||||||
|
||||||
# Hyperparams, Optimizer and Loss function | ||||||
opt = Adam(learning_rate=3e-5, epsilon=1e-08) | ||||||
|
||||||
losses = SparseCategoricalFocalLoss(gamma=self.hparams.gamma) | ||||||
|
||||||
metrics = [SparseCategoricalAccuracy("accuracy")] | ||||||
|
||||||
# Compile model | ||||||
self.compile(optimizer=opt, loss=losses, metrics=metrics) | ||||||
|
||||||
x = {"input_ids": encoded_texts["input_ids"], "token_type_ids": encoded_texts["token_type_ids"], | ||||||
"attention_mask": encoded_texts["attention_mask"]} | ||||||
|
||||||
super().fit( | ||||||
x, encoded_intents, epochs=self.hparams.num_epochs, batch_size=32, shuffle=True) | ||||||
|
||||||
# Persist the model | ||||||
self.extra_params["intent_names"] = intent_names | ||||||
|
||||||
self.save() | ||||||
|
||||||
# Start MLflow run | ||||||
with mlflow.start_run() as run: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this be implement in the parent class so that it would work for all models ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally it should be implemented as a helper class. Otherwise, we're logging metrics during training. The base class doesn't have a fit method. |
||||||
# Log hyperparameters | ||||||
mlflow.log_param("language", self.hparams.language) | ||||||
mlflow.log_param("num_epochs", self.hparams.num_epochs) | ||||||
mlflow.log_param("dropout_prob", self.hparams.dropout_prob) | ||||||
mlflow.log_param("intent_num_labels", self.hparams.intent_num_labels) | ||||||
|
||||||
encoded_texts, encoded_intents, encoded_slots, intent_names, slot_names = self.data_loader( | ||||||
self.tokenizer) | ||||||
|
||||||
if self.hparams.intent_num_labels != len(intent_names): | ||||||
raise ValueError( | ||||||
f"Hyperparam intent_num_labels mismatch, should be : {len(intent_names)}" | ||||||
) | ||||||
|
||||||
# Hyperparams, Optimizer and Loss function | ||||||
opt = Adam(learning_rate=3e-5, epsilon=1e-08) | ||||||
|
||||||
losses = SparseCategoricalFocalLoss(gamma=self.hparams.gamma) | ||||||
|
||||||
metrics = [SparseCategoricalAccuracy("accuracy")] | ||||||
|
||||||
# Compile model | ||||||
self.compile(optimizer=opt, loss=losses, metrics=metrics) | ||||||
|
||||||
x = {"input_ids": encoded_texts["input_ids"], "token_type_ids": encoded_texts["token_type_ids"], | ||||||
"attention_mask": encoded_texts["attention_mask"]} | ||||||
|
||||||
start_time = time.time() | ||||||
history = super().fit( | ||||||
x, encoded_intents, epochs=self.hparams.num_epochs, batch_size=32, shuffle=True) | ||||||
end_time = time.time() | ||||||
|
||||||
# Log training time | ||||||
mlflow.log_metric("training_time", end_time - start_time) | ||||||
|
||||||
# Log training metrics | ||||||
for epoch in range(len(history.history['loss'])): | ||||||
mlflow.log_metric("loss", history.history["loss"][epoch], step=epoch) | ||||||
mlflow.log_metric("accuracy", history.history["accuracy"][epoch], step=epoch) | ||||||
|
||||||
# Persist the model and log the model in MLflow | ||||||
self.extra_params["intent_names"] = intent_names | ||||||
mlflow.log_params(self.extra_params) | ||||||
model_instance = self.save_model() # Save the model using the internal method | ||||||
# Log the model in MLflow | ||||||
mlflow.keras.log_model(model_instance, "intent_classifier_model") | ||||||
# Register the model in MLflow's Model Registry | ||||||
model_uri = f"runs:/{run.info.run_id}/intent_classifier_model" | ||||||
mlflow.register_model(model_uri, "IntentClassifierModel") | ||||||
|
||||||
def get_model(self): | ||||||
# Define input layers | ||||||
input_ids = tf.keras.Input(shape=(None,), dtype=tf.int32, name='input_ids') | ||||||
attention_mask = tf.keras.Input(shape=(None,), dtype=tf.int32, name='attention_mask') | ||||||
token_type_ids = tf.keras.Input(shape=(None,), dtype=tf.int32, name='token_type_ids') | ||||||
|
||||||
# Call the model on the inputs | ||||||
outputs = self.call( | ||||||
{'input_ids': input_ids, 'attention_mask': attention_mask, 'token_type_ids': token_type_ids}) | ||||||
|
||||||
# Return a Keras Model | ||||||
return tf.keras.Model(inputs=[input_ids, attention_mask, token_type_ids], outputs=outputs) | ||||||
@tfbp.runnable | ||||||
def evaluate(self): | ||||||
encoded_texts, encoded_intents, _, _, _ = self.data_loader( | ||||||
|
@@ -168,9 +206,16 @@ def evaluate(self): | |||||
scores["Overall Scores"] = overall_score | ||||||
scores = self.format_scores(scores) | ||||||
|
||||||
# Log evaluation results to MLflow | ||||||
with mlflow.start_run(): | ||||||
mlflow.log_metrics({ | ||||||
"intent_confidence": overall_score["intent_confidence"], | ||||||
"loss": overall_score["loss"] | ||||||
}) | ||||||
|
||||||
print("\nScores per intent:") | ||||||
for intent, score in scores.items(): | ||||||
print("{}: {}".format(intent, score)) | ||||||
print(f"{intent}: {score}") | ||||||
|
||||||
return scores | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,7 +96,7 @@ | |
if os.path.isfile(os.path.join(model.save_dir, "checkpoint")): | ||
model.restore() | ||
else: | ||
model.save() | ||
model.save_model() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure why we need to rename this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To avoid confusion with keras' built-in save method. It's been throwing aberrant exceptions. Solved it by renaming the boilerplate method |
||
|
||
# Run the specified model method. | ||
if FLAGS.method not in Model._methods: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add these ROOT/docker/docker-compose.nlu.yml and ROOT/docker/docker-compose.nlu.dev.yml