diff --git a/clear_json.py b/clear_json.py
deleted file mode 100644
index f3a1e83224e7937854ec1a0d1540a724623f4abf..0000000000000000000000000000000000000000
--- a/clear_json.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import re
-
-def pre_clean_json(input_path, output_path):
-    """
-    Nettoie les annotations MongoDB type NumberInt(...) et les remplace par des entiers valides.
-    """
-    pattern = re.compile(r'NumberInt\((\d+)\)')
-
-    with open(input_path, 'r', encoding='utf-8') as fin, open(output_path, 'w', encoding='utf-8') as fout:
-        for line in fin:
-            cleaned_line = pattern.sub(r'\1', line)
-            fout.write(cleaned_line)
-
-    print(f"✅ Fichier nettoyé sauvegardé dans : {output_path}")
-
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--infile", type=str, required=True, help="Fichier JSON d'entrée (brut)")
-    parser.add_argument("--outfile", type=str, default="clean_dblpv13.json", help="Fichier nettoyé de sortie")
-
-    args = parser.parse_args()
-    pre_clean_json(args.infile, args.outfile)
diff --git a/client/Dockerfile b/client/Dockerfile
index fb5a3250a59193554f1f4bb61557e755172c3179..37852632c525ca35f57a87c0271ce62a58731b87 100644
--- a/client/Dockerfile
+++ b/client/Dockerfile
@@ -4,17 +4,15 @@ WORKDIR /app
 
 # Installer les dépendances
 COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
+COPY clean.py .
+COPY neo4j_import.py .
+
 
 # Télécharger et extraire le fichier
 RUN apt-get update && apt-get install -y wget tar && \
-    wget -O dblp.tar.gz https://originalfileserver.aminer.cn/misc/dblp_v14.tar.gz && \
-    mkdir /app/dblp_data && \
-    tar -xzf dblp.tar.gz -C /app/dblp_data && \
-    rm dblp.tar.gz && \
-    apt-get remove -y wget && apt-get clean && rm -rf /var/lib/apt/lists/*
+    apt install libjansson-dev -y && \
+    mkdir /app/dblp_data
 
-# Copier le script Python
-COPY insert_script.py .
+RUN pip install --no-cache-dir -r requirements.txt
 
-CMD ["python", "insert_script.py"]
+CMD sh -c "wget -O dblp.tar.gz https://originalfileserver.aminer.cn/misc/dblp_v14.tar.gz && tar -xzf dblp.tar.gz && python3 insert_script.py "
\ No newline at end of file
diff --git a/client/clean.py b/client/clean.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad77a50b81691ac161d38a40b5b1dbdf0ae98480
--- /dev/null
+++ b/client/clean.py
@@ -0,0 +1,43 @@
+# cleaner.py
+
+import re
+import json
+import ijson
+import io
+
+__all__ = ["nettoyer_gros_json"]
+
+def pre_clean_json(input_path, output_path):
+    """
+    Nettoie les annotations MongoDB type NumberInt(...) et les remplace par des entiers valides.
+    """
+    print(f"🧹 Nettoyage du fichier : {input_path}")
+    pattern = re.compile(r'NumberInt\((\d+)\)')
+
+    with open(input_path, 'r', encoding='utf-8') as fin, open(output_path, 'w', encoding='utf-8') as fout:
+        for line in fin:
+            cleaned_line = pattern.sub(r'\1', line)
+            fout.write(cleaned_line)
+
+    print(f"✅ Fichier nettoyé sauvegardé dans : {output_path}")
+
+
+def nettoyer_gros_json(input_path, output_path):
+    cleaned_path = "cleaned_" + input_path
+
+    # Ensure cleaned file exists or use input_path directly if not modifying
+    # Uncomment the line below if you have a real cleaning step before parsing
+    pre_clean_json(input_path, cleaned_path)
+
+    print("Begin clear")
+    with open(cleaned_path, 'r', encoding='utf-8') as infile, open(output_path, 'w', encoding='utf-8') as outfile:
+        outfile.write('[\n')
+        parser = ijson.items(infile, 'item')
+        first = True
+        for obj in parser:
+            filtered = {k: obj[k] for k in ['_id', 'title', 'authors', 'references'] if k in obj}
+            if not first:
+                outfile.write(',\n')
+            json.dump(filtered, outfile, ensure_ascii=False)
+            first = False
+        outfile.write('\n]')
diff --git a/client/insert_script.py b/client/insert_script.py
deleted file mode 100644
index bbc630bd4b6406067cf02aa294e31e3042cd61c4..0000000000000000000000000000000000000000
--- a/client/insert_script.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import os
-import ijson
-import argparse
-import time
-from datetime import datetime
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from py2neo import Graph
-from tqdm import tqdm
-
-NEO4J_URI = os.getenv("NEO4J_URI")
-NEO4J_USER = os.getenv("NEO4J_USER")
-NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
-BATCH_SIZE = 500
-MAX_WORKERS = 2
-
-def batch_insert_articles(graph, articles):
-    query = """
-    UNWIND $articles AS article
-    MERGE (a:ARTICLE { _id: article._id })
-    SET a.title = article.title
-
-    FOREACH (author IN article.authors |
-        FOREACH (_ IN CASE WHEN author.name IS NOT NULL THEN [1] ELSE [] END |
-            MERGE (p:AUTHOR { _id: coalesce(author._id, author.name) })
-            SET p.name = author.name
-            MERGE (p)-[:AUTHORED]->(a)
-        )
-    )
-
-    FOREACH (ref_id IN article.references |
-        MERGE (r:ARTICLE { _id: ref_id })
-        MERGE (a)-[:CITES]->(r)
-    )
-    """
-    graph.run(query, articles=articles)
-
-def main(json_file, limit):
-    start_time = time.time()
-    print(f"⏱️ Début : {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
-
-    graph = Graph(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
-
-    print(f"📄 Lecture optimisée de {json_file} (limite: {limit})")
-
-    with open(json_file, 'r', encoding='utf-8') as f:
-        article_iter = ijson.items(f, 'item')
-        total = 0
-        futures = []
-
-        def flush_batch(batch):
-            if batch:
-                futures.append(executor.submit(batch_insert_articles, graph, list(batch)))
-                batch.clear()
-
-        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
-            batch = []
-            for article in tqdm(article_iter):
-                if limit and total >= limit:
-                    break
-                batch.append(article)
-                total += 1
-
-                if len(batch) >= BATCH_SIZE:
-                    flush_batch(batch)
-                    time.sleep(0.1)
-
-            # envoyer les derniers articles
-            flush_batch(batch)
-
-            # attendre la fin de tous les threads
-            for future in tqdm(as_completed(futures), total=len(futures), desc="💾 Finalisation des insertions"):
-                future.result()
-
-    end_time = time.time()
-    elapsed_ms = int((end_time - start_time) * 1000)
-
-    print(f"✅ Import terminé")
-    print(f"⏱️ Fin   : {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
-    print(f"🕓 Durée totale : {elapsed_ms:,} ms")
-    print(f"⚡ Vitesse moyenne : {int(total / (elapsed_ms / 1000))} it/s")
-
-# --- CLI ---
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--file", type=str, default="clean_dblpv13.json", help="Chemin vers le fichier JSON nettoyé")
-    parser.add_argument("--limit", type=int, default=1000000, help="Nombre maximum d'articles à charger")
-    args = parser.parse_args()
-
-    main(args.file, args.limit)
-
diff --git a/client/neo4j_import.py b/client/neo4j_import.py
index 0adeaea5f12230a15699b9372836c7a6b3ef4490..e9523ae35527bb2b31b650313c952ade60509b19 100644
--- a/client/neo4j_import.py
+++ b/client/neo4j_import.py
@@ -1,18 +1,17 @@
+import os
 import ijson
 import argparse
 import time
 from datetime import datetime
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from neo4j import GraphDatabase
+from py2neo import Graph
 from tqdm import tqdm
 
-NEO4J_URI = "bolt://localhost:7687"
-NEO4J_USER = "neo4j"
-NEO4J_PASSWORD = "12345678"
-BATCH_SIZE = 10000
-MAX_WORKERS = 4
+NEO4J_URI = os.getenv("NEO4J_URI")
+NEO4J_USER = os.getenv("NEO4J_USER")
+NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
+BATCH_SIZE = 500
 
-def batch_insert_articles(tx, articles):
+def batch_insert_articles(graph, articles):
     query = """
     UNWIND $articles AS article
     MERGE (a:ARTICLE { _id: article._id })
@@ -31,45 +30,40 @@ def batch_insert_articles(tx, articles):
         MERGE (a)-[:CITES]->(r)
     )
     """
-    tx.run(query, articles=articles)
+    graph.run(query, articles=articles)
 
 def main(json_file, limit):
+    clear_file = "clear_"+json_file
+    nettoyer_gros_json(json_file, clear_file)
+    json_file = clear_file
+
     start_time = time.time()
     print(f"⏱️ Début : {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
 
-    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
+    graph = Graph(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
+
     print(f"📄 Lecture optimisée de {json_file} (limite: {limit})")
 
     with open(json_file, 'r', encoding='utf-8') as f:
         article_iter = ijson.items(f, 'item')
         total = 0
-        futures = []
-
-        def flush_batch(batch):
-            if batch:
-                futures.append(executor.submit(session.execute_write, batch_insert_articles, batch))
-                #print(f"📤 Batch de {len(batch)} articles envoyé")
-                batch.clear()  # Vider explicitement le batch pour libérer de la mémoire
+        batch = []
 
-        with driver.session() as session, ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
-            batch = []
-            for article in tqdm(article_iter):
-                if limit and total >= limit:
-                    break
-                batch.append(article)
-                total += 1
+        for article in tqdm(article_iter):
+            if limit and total >= limit:
+                break
 
-                if len(batch) >= BATCH_SIZE:
-                    flush_batch(batch)
+            batch.append(article)
+            total += 1
 
-            # envoyer les derniers articles
-            flush_batch(batch)
+            if len(batch) >= BATCH_SIZE:
+                batch_insert_articles(graph, batch)
+                batch.clear()
 
-            # attendre la fin de tous les threads
-            for future in tqdm(as_completed(futures), total=len(futures), desc="💾 Finalisation des insertions"):
-                future.result()
+        # insérer les derniers éléments
+        if batch:
+            batch_insert_articles(graph, batch)
 
-    driver.close()
     end_time = time.time()
     elapsed_ms = int((end_time - start_time) * 1000)
 
@@ -78,11 +72,18 @@ def main(json_file, limit):
     print(f"🕓 Durée totale : {elapsed_ms:,} ms")
     print(f"⚡ Vitesse moyenne : {int(total / (elapsed_ms / 1000))} it/s")
 
+    # Wait for user input before closing
+    test = ""
+    while test != "exit":
+        test = input("Tapez 'exit' pour quitter ou appuyez sur Entrée pour continuer...")
+    
+
+
 # --- CLI ---
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--file", type=str, default="clean_dblpv13.json", help="Chemin vers le fichier JSON nettoyé")
-    parser.add_argument("--limit", type=int, default=1000000, help="Nombre maximum d'articles à charger")
+    parser.add_argument("--file", type=str, default="dblpv13.json", help="Chemin vers le fichier JSON nettoyé")
+    parser.add_argument("--limit", type=int, default=5000000, help="Nombre maximum d'articles à charger")
     args = parser.parse_args()
 
     main(args.file, args.limit)
\ No newline at end of file
diff --git a/client/requirement.txt b/client/requirements.txt
similarity index 100%
rename from client/requirement.txt
rename to client/requirements.txt
diff --git a/docker-compose.yml b/docker-compose.yml
index 66243f35c1a1d98758e75f1c6e13ccede8677ee2..d6484fe1cfb95f061708f480d91b34eb4d957ddf 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,11 +9,15 @@ services:
       - "7474:7474"
       - "7687:7687"
     environment:
-      - NEO4J_AUTH=neo4j/testtest
+      - NEO4J_AUTH=neo4j/12345678
     volumes:
       - ${HOME}/neo4j/logs:/logs
       - ${HOME}/neo4j/data:/data
-      - ${HOME}/neo4j/import:/var/lib/neo4j/import
+    deploy:
+      resources:
+        limits:
+          memory: 3g
+
 
   neo4j_client:
     build:
@@ -25,4 +29,10 @@ services:
     environment:
       - NEO4J_URI=bolt://neo4j_db:7687
       - NEO4J_USER=neo4j
-      - NEO4J_PASSWORD=testtest
+      - NEO4J_PASSWORD=12345678
+    volumes:
+      - ${HOME}/neo4j/import:${HOME}/import
+    deploy:
+      resources:
+        limits:
+          memory: 3g
diff --git a/neo4j/Dockerfile b/neo4j/Dockerfile
index 30e060b151a5763351cd5edb0ae15f92bb7c0243..38a24b8aedea38efe21e00544646e4153835a022 100644
--- a/neo4j/Dockerfile
+++ b/neo4j/Dockerfile
@@ -3,14 +3,6 @@ FROM neo4j:latest
 
 USER root
 
-RUN apt-get update && \
-    apt-get install -y python3 python3-pip && \
-    pip3 install neo4j tqdm ijson && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+RUN apt-get update
 
 USER neo4j
-
-RUN cd ~ && \
-    git clone https://gitedu.hesge.ch/lucas.landrecy/advdaba_labo2.git
-