Skip to content
Snippets Groups Projects
Commit 862cedb4 authored by Ivan Pavlovich's avatar Ivan Pavlovich
Browse files

Ajout de métriques supplémentaire dans le calcule de teste et ajout de la LLM hostée Cohere

parent 2dbae38d
No related branches found
No related tags found
No related merge requests found
Showing
with 113 additions and 55 deletions
import cohere
import json
def cohere_create_client():
return cohere.ClientV2("hNdIrXFRGTzxSK4bnEXONMMIYukcMuPqOLHV6P28")
def cohere_classify(client, sequence, labels):
prompt = f'I need you to give me the labels that could be given to the text (keep in mind that u can put multiple labels and select only the labels that i give you):\
text: {sequence}\
labels: {labels}\
Give the response in json format {{ "labels": [] }}'
response = client.chat(
model="command-r-plus",
messages=[{"role": "user", "content": prompt}]
)
responce_json = json.loads(response.message.content[0].text)
return responce_json
\ No newline at end of file
import cohere
import json
import time
def cohere_create_client():
return cohere.ClientV2("hNdIrXFRGTzxSK4bnEXONMMIYukcMuPqOLHV6P28")
def cohere_classify(client, sequence, labels):
prompt = 'I need you to give me the labels that could be given to the text (keep in mind that u can put multiple labels and select only the labels that i give you):\n'
prompt += 'text: ' + sequence + '\n'
prompt += 'labels: ' + "[" + ", ".join(labels) + ']\n'
prompt += 'Give the response in json format: { "labels": [] }. Follow this writing to the letter and don t add text around it. Only put the labels that you select between [].'
prompt += 'Even if you have no labels give me a response like: { "labels": [] }. And also put labels between "".'
response = client.chat(
model="command-r-plus",
messages=[{"role": "user", "content": prompt}]
)
json_str = response.message.content[0].text.strip().lstrip('```json').strip()
if json_str.endswith("```"):
json_str = json_str[0:-3]
json_str = json_str.split("}")[0] + "}"
responce_json = json.loads(json_str)
res = {}
for label in labels:
res[label] = label in responce_json["labels"]
time.sleep(6)
return res
\ No newline at end of file
def confusion_matrix(wanted, prediction):
def get_confusion_matrix(wanted, prediction):
matrix = [[0, 0], [0, 0]]
for key in wanted.keys():
if wanted[key]:
if prediction[key]:
actual = wanted[key]
predicted = prediction[key]
if key in ["Diabetes type 1", "Diabetes type 2"]:
if predicted:
if actual is False:
if wanted["Diabetes"]:
actual = True
elif key == "Diabetes":
if predicted:
if actual is False:
if wanted["Diabetes type 1"] or wanted["Diabetes type 2"]:
actual = True
if actual:
if predicted:
matrix[0][0] += 1
else:
matrix[1][0] += 1
else:
if prediction[key]:
if predicted:
matrix[0][1] += 1
else:
matrix[1][1] += 1
return matrix
def get_label_confusion_matrix(label, wanted, prediction):
matrix = [[0, 0], [0, 0]]
if wanted[label]:
if prediction[label]:
matrix[0][0] += 1
else:
matrix[1][0] += 1
else:
if prediction[label]:
matrix[0][1] += 1
else:
matrix[1][1] += 1
return matrix
def add_confusion_matrices(confusion_matrix, tmp_confusion_matrix):
for i in range(2):
for j in range(2):
......@@ -39,6 +69,12 @@ def get_precision(confusion_matrix):
return 0
return confusion_matrix[0][0] / denominator
def get_error_rate(confusion_matrix):
denominator = (confusion_matrix[0][0] + confusion_matrix[0][1] + confusion_matrix[1][0] + confusion_matrix[1][1])
if denominator == 0:
return 0
return (confusion_matrix[0][1] + confusion_matrix[1][0]) / denominator
def get_f1_score(confusion_matrix):
precision = get_precision(confusion_matrix)
recall = get_tpr(confusion_matrix)
......
......@@ -13,7 +13,7 @@ from variables.huggingface import HUGGINGFACE_MODELS
from variables.articles import LENGTH_CATEGORIES, LENGTH_CATEGORIES_TRESHOLDS
from variables.models import MODELS
from testModel.utils import get_dataset_filename, get_article_data, get_wanted_predictions, save_model_json, save_model_results_json, create_model_data_var, store_results
from testModel.metrics import confusion_matrix, add_confusion_matrices, get_tpr, get_tnr, get_precision
from testModel.metrics import get_confusion_matrix
from parsers.jsonParser import parseJsonFile
from models.ZeroShotClassifier.HuggingFace.zero_shot_classification import create_classifier, classify
......@@ -53,7 +53,7 @@ else:
print(f"File {file_path} does not exist.")
if not os.path.exists(file_path):
initial_data = create_model_data_var()
initial_data = create_model_data_var(NCDS)
print(initial_data)
with open(file_path, "w", encoding="utf-8") as file:
json.dump(initial_data, file, indent=4)
......@@ -77,12 +77,6 @@ print(f"MODEL: {model}")
print(f"TRESHOLD: {data['treshold']}")
print("---------------------------------")
result_matrix = data["results"]["ALL"]["confusion matrix"]
length_matrix = {}
for length_category in LENGTH_CATEGORIES:
length_matrix[length_category] = data["results"][length_category]["confusion matrix"]
for ncd in NCDS:
try:
......@@ -118,7 +112,6 @@ for ncd in NCDS:
end = time.time()
selected_labels = [key for key, value in predictions.items() if value]
matrix = confusion_matrix(wanted, predictions)
article_result["PMID"] = article['PMID']
article_result["Labels"] = article["Predictions"]
......@@ -126,7 +119,7 @@ for ncd in NCDS:
article_result["Wanted"] = wanted
article_result["Predictions"] = predictions
article_result["Selected Labels"] = selected_labels
article_result["confusion matrix"] = matrix
article_result["confusion matrix"] = get_confusion_matrix(wanted, predictions)
data["articles"].append(article_result)
print(f"PMID: {article_result['PMID']}")
......@@ -137,20 +130,17 @@ for ncd in NCDS:
print(f"Selected labels: {article_result['Selected Labels']}")
print(f"Confusion matrix: {article_result['confusion matrix']}")
result_matrix = add_confusion_matrices(result_matrix, matrix)
data["results"]["ALL"] = store_results(result_matrix)
data["results"]["ALL"] = store_results(data["results"]["ALL"], wanted, predictions)
added = False
for id, length_category_treshold in enumerate(LENGTH_CATEGORIES_TRESHOLDS):
if len(title+abstract) < length_category_treshold:
length_matrix[LENGTH_CATEGORIES[id]] = add_confusion_matrices(length_matrix[LENGTH_CATEGORIES[id]], matrix)
data["results"][LENGTH_CATEGORIES[id]] = store_results(length_matrix[LENGTH_CATEGORIES[id]])
data["results"][LENGTH_CATEGORIES[id]] = store_results(data["results"][LENGTH_CATEGORIES[id]], wanted, predictions)
added = True
break
if not added:
length_matrix[LENGTH_CATEGORIES[-1]] = add_confusion_matrices(length_matrix[LENGTH_CATEGORIES[-1]], matrix)
data["results"][LENGTH_CATEGORIES[-1]] = store_results(length_matrix[LENGTH_CATEGORIES[-1]])
data["results"][LENGTH_CATEGORIES[-1]] = store_results(data["results"][LENGTH_CATEGORIES[id]], wanted, predictions)
print("---------------------------------")
......@@ -166,6 +156,7 @@ for length_category in LENGTH_CATEGORIES:
print(f"True Positive Rate (TPR): {data['results'][length_category]['TPR']}")
print(f"True Negative Rate (TNR): {data['results'][length_category]['TNR']}")
print(f"Precision: {data['results'][length_category]['Precision']}")
print(f"Error Rate: {data['results'][length_category]['Error rate']}")
print(f"F1 Score: {data['results'][length_category]['F1']}")
print("---------------------------------")
......@@ -175,5 +166,6 @@ print(f"Result confusion matrix: {data['results']['ALL']['confusion matrix']}")
print(f"True Positive Rate (TPR): {data['results']['ALL']['TPR']}")
print(f"True Negative Rate (TNR): {data['results']['ALL']['TNR']}")
print(f"Precision: {data['results']['ALL']['Precision']}")
print(f"Error Rate: {data['results']['ALL']['Error rate']}")
print(f"F1 Score: {data['results']['ALL']['F1']}")
print()
\ No newline at end of file
......@@ -4,7 +4,7 @@ import json
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../")))
from testModel.metrics import get_tpr, get_tnr, get_precision, get_f1_score
from testModel.metrics import get_confusion_matrix, get_label_confusion_matrix, get_tpr, get_tnr, get_precision, get_error_rate, get_f1_score, add_confusion_matrices
from variables.articles import LENGTH_CATEGORIES
def get_dataset_filename(name):
......@@ -75,7 +75,7 @@ def save_model_results_json(filepath, model, new_entry):
with open(filename, "w") as file:
json.dump(data, file)
def create_model_data_var():
def create_model_data_var(labels):
data = {
'treshold': None,
'articles': [],
......@@ -84,27 +84,39 @@ def create_model_data_var():
}
}
data["results"]["ALL"] = create_results()
data["results"]["ALL"] = create_results(labels)
for length_category in LENGTH_CATEGORIES:
data["results"][length_category] = create_results()
data["results"][length_category] = create_results(labels)
return data
def create_results():
return {
def create_results(labels):
res = {
'confusion matrix': [[0, 0], [0, 0]],
'labels': {},
'TPR': 0,
'TNR': 0,
'Precision': 0,
'Error rate': 0,
'F1': 0
}
def store_results(confusion_matrix):
return {
"confusion matrix": confusion_matrix,
"TPR": get_tpr(confusion_matrix),
"TNR": get_tnr(confusion_matrix),
"Precision": get_precision(confusion_matrix),
"F1": get_f1_score(confusion_matrix)
}
\ No newline at end of file
for label in labels:
res["labels"][label] = [[0, 0], [0, 0]]
return res
def store_results(results, wanted, predicted):
results["confusion matrix"] = add_confusion_matrices(results["confusion matrix"], get_confusion_matrix(wanted, predicted))
for label, confusion_matrix in results["labels"].items():
confusion_matrix = add_confusion_matrices(confusion_matrix, get_label_confusion_matrix(label, wanted, predicted))
results["TPR"] = get_tpr(results["confusion matrix"])
results["TNR"] = get_tnr(results["confusion matrix"])
results["Precision"] = get_precision(results["confusion matrix"])
results["Error rate"] = get_error_rate(results["confusion matrix"])
results["F1"] = get_f1_score(results["confusion matrix"])
return results
......@@ -7,6 +7,7 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
from models.ZeroShotClassifier.HuggingFace.zero_shot_classification import create_classifier, classify
import models.LLM.Ollama.ollama_wrapper as ollama
from models.LLM.Gemini.gemini import gemini_start_chat, gemini_classify
from models.LLM.Cohere.cohere_wrapper import cohere_create_client, cohere_classify
MODELS_CATEGORIES = [
"huggingface",
......@@ -23,5 +24,6 @@ MODELS = {
'llama3.2': {'category': "ollama", 'predict': ollama.classify, 'requiresDependency': False, 'dependency': None},
'mistral-small': {'category': "ollama", 'predict': ollama.classify, 'requiresDependency': False, 'dependency': None},
'deepseek-v2': {'category': "ollama", 'predict': ollama.classify, 'requiresDependency': False, 'dependency': None},
'gemini-hosted': {'category': "hosted", 'predict': gemini_classify, 'requiresDependency': True, 'dependency': gemini_start_chat}
'gemini-hosted': {'category': "hosted", 'predict': gemini_classify, 'requiresDependency': True, 'dependency': gemini_start_chat},
'cohere-hosted': {'category': "hosted", 'predict': cohere_classify, 'requiresDependency': True, 'dependency': cohere_create_client}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment