NFDI4BIOIMAGE · SeverusYixin · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024 · Dec 12, 2024
diff --git a/.env b/.env
@@ -0,0 +1 @@
+HF_TOKEN=set_your_token_here
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -53,6 +53,22 @@ services:
     volumes:
       - ./search_engine/search/backend/wordcloud/static:/app/static
 
+  chatbot_backend:
+    build:
+      context: ./search_engine/chatbot
+      dockerfile: Dockerfile
+    container_name: chatbot_backend
+    environment:
+      - ELASTICSEARCH_HOST=elasticsearch
+      - ELASTICSEARCH_PORT=9200
+      - SCADSAI_API_KEY=${SCADSAI_API_KEY}
+      - MODEL_NAME=meta-llama/Meta-Llama-3.1-70B-Instruct
+    depends_on:
+      elasticsearch:
+        condition: service_healthy
+    ports:
+      - "5002:5000"
+
   frontend:
     build:
       context: ./search_engine/search/frontend  # Path to the frontend code
@@ -63,6 +79,7 @@ services:
     depends_on:
       - appsubmitter_backend
       - search_backend
+      - chatbot_backend
     environment:
       - REACT_APP_BACKEND_URL=http://localhost:5001
 

diff --git a/folder structure.txt b/folder structure.txt
@@ -2,12 +2,17 @@
 ├── images
 │   ├── search_results.png
 │   └── submit_materials.png
-├── nfdi_search_engine_design
+├── search_engine
 │   ├── Elasticsearch
 │   ├── appsubmitter_backend
 │   │   ├── Dockerfile
 │   │   ├── requirements_submitter.txt
 │   │   └── submitter.py
+│   ├── chatbot/
+│   │   ├── chatbot.py
+│   │   ├── llm_utilities.py
+│   │   ├── requirements_chatbot.txt
+│   │   └── Dockerfile
 │   ├── search
 │   │   ├── backend
 │   │   │   ├── data.json
@@ -77,5 +82,6 @@
 │   └── elasticsearch setup
 ├── LICENSE
 ├── README
+├── .env
 ├── docker-compose
 └── folder structure
diff --git a/search_engine/chatbot/Dockerfile b/search_engine/chatbot/Dockerfile
@@ -0,0 +1,24 @@
+# Use the official Python image as a base
+FROM python:3.12-slim
+
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+
+# Set the working directory inside the container
+WORKDIR /app
+
+# Copy the requirements file into the container
+COPY requirements_chatbot.txt .
+
+# Install the dependencies
+RUN pip install --upgrade pip && pip install -r requirements_chatbot.txt
+
+# Copy the rest of the application code into the container
+COPY . .
+
+# Expose the port that the Flask app runs on
+EXPOSE 5000
+
+# Run the application
+CMD ["python", "chatbot.py"]
diff --git a/search_engine/chatbot/chatbot.py b/search_engine/chatbot/chatbot.py
@@ -0,0 +1,159 @@
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+from elasticsearch import Elasticsearch, ConnectionError
+from llm_utilities import LLMUtilities
+import logging
+import platform
+import time
+import os
+
+# Flask app setup
+app = Flask(__name__)
+CORS(app)
+
+# Logging setup
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Hardware Information (local laptop or HPC reference; purely informational)
+SYSTEM_INFO = {
+    "Machine": platform.node(),
+    "Processor": platform.processor(),
+    "Local GPU": "NVIDIA RTX 500 Ada Generation Laptop GPU", 
+    "NOTE": "Inference is delegated to the HPC KISSKI LLM service."
+}
+logger.info(f"System Info: {SYSTEM_INFO}")
+
+# Function to connect to Elasticsearch with retry logic
+def connect_elasticsearch():
+    """
+    Connects to Elasticsearch with retry logic.
+    Returns:
+        Elasticsearch instance if connection is successful, otherwise raises an exception.
+    """
+    es = None
+    max_attempts = 120  # Increase max attempts to 120 (20 minutes)
+    es_host = os.getenv("ELASTICSEARCH_HOST", "elasticsearch")
+    es_port = os.getenv("ELASTICSEARCH_PORT", "9200")
+
+    # Convert es_port to integer
+    try:
+        es_port = int(es_port)
+    except ValueError:
+        logger.error(f"ELASTICSEARCH_PORT is not a valid integer: {es_port}")
+        raise
+
+    for attempt in range(max_attempts):
+        try:
+            es = Elasticsearch(
+                [{"host": es_host, "port": es_port, "scheme": "http"}],
+                request_timeout=30
+            )
+            if es.ping():
+                logger.info("Connected to Elasticsearch")
+                return es
+            else:
+                logger.error("Elasticsearch ping failed")
+        except ConnectionError:
+            logger.warning(f"Elasticsearch not ready, attempt {attempt + 1}/{max_attempts}, retrying in 15 seconds...")
+            time.sleep(15)
+        except Exception as e:
+            logger.error(f"Unexpected error: {e}")
+            time.sleep(15)
+    raise Exception("Could not connect to Elasticsearch after several attempts")
+
+# Connect to Elasticsearch
+es = connect_elasticsearch()
+
+# Initialize LLM Utilities with environment-driven model name
+# Default to "meta-llama/Meta-Llama-3.1-70B-Instruct" unless otherwise specified
+model_name = os.getenv("MODEL_NAME", "meta-llama/Meta-Llama-3.1-70B-Instruct")
+
+llm_util = LLMUtilities(model_name=model_name)
+
+# Elasticsearch-based document retrieval
+def retrieve_documents(query, top_k=3):
+    """
+    Retrieves relevant documents from Elasticsearch based on the query.
+    Args:
+        query (str): The search query.
+        top_k (int): Number of top documents to retrieve.
+    Returns:
+        list: A list of retrieved documents.
+    """
+    try:
+        response = es.search(
+            index="bioimage-training",
+            body={
+                "query": {
+                    "multi_match": {
+                        "query": query,
+                        "fields": ["name^3", "description", "tags", "authors", "type", "license"],
+                        "type": "best_fields",
+                    }
+                }
+            },
+            size=top_k,
+        )
+        documents = []
+        for hit in response["hits"]["hits"]:
+            source = hit["_source"]
+            documents.append({
+                "name": source.get("name", "Unnamed"),
+                "description": source.get("description", "No description available"),
+                "url": source.get("url", ""),
+            })
+        return documents
+    except Exception as e:
+        logger.error(f"Error retrieving documents: {e}")
+        return []
+
+# RAG-based response generation (with remote HPC LLM call)
+def generate_response(query, documents):
+    """
+    Generates a response using retrieved documents and the query.
+    Args:
+        query (str): The search query.
+        documents (list): List of retrieved documents.
+    Returns:
+        str: Generated response from the KISSKI LLM service.
+    """
+    context = "\n".join(
+        [f"- {doc['name']}: {doc['description']} (URL: {doc['url']})" for doc in documents]
+    )
+    prompt = f"""
+Based on the following documents, answer the user's question concisely and include relevant links.
+
+## Documents
+{context}
+
+## Question
+{query}
+"""
+    return llm_util.generate_response(prompt)
+
+# Chatbot API endpoint
+@app.route("/api/chat", methods=["POST"])
+def chat():
+    """
+    Chat endpoint to process user queries and generate responses.
+    """
+    user_query = request.json.get("query", "")
+    if not user_query:
+        return jsonify({"error": "Query cannot be empty"}), 400
+
+    # Retrieve relevant documents
+    documents = retrieve_documents(user_query)
+
+    if not documents:
+        return jsonify({"response": "No relevant documents found.", "sources": []})
+
+    # Generate chatbot response
+    reply = generate_response(user_query, documents)
+
+    return jsonify({"response": reply, "sources": documents})
+
+# Main entry point
+if __name__ == "__main__":
+    logger.info("Starting chatbot, using HPC KISSKI LLM endpoint for GPU inference...")
+    app.run(host="0.0.0.0", port=5000, debug=True)
diff --git a/search_engine/chatbot/llm_utilities.py b/search_engine/chatbot/llm_utilities.py
@@ -0,0 +1,44 @@
+import os
+import logging
+import openai
+
+logger = logging.getLogger(__name__)
+
+class LLMUtilities:
+    def __init__(self, model_name="meta-llama/Meta-Llama-3.1-70B-Instruct"):
+        """
+        Initialize the utility to communicate with KISSKI/ScaDS.AI LLM service.
+        Args:
+            model_name (str): The LLM model name to use.
+        """
+        self.model_name = model_name
+        self.api_key = os.getenv("SCADSAI_API_KEY")  # The environment variable must contain the HPC token
+        if not self.api_key:
+            logger.error("SCADSAI_API_KEY is not set. Please set it to access the HPC LLM endpoint.")
+            raise EnvironmentError("Missing SCADSAI_API_KEY environment variable.")
+
+        # Configure the openai library to use the KISSKI HPC endpoint
+        openai.api_key = self.api_key
+        openai.api_base = "https://llm.scads.ai/v1"
+
+    def generate_response(self, prompt, max_tokens=512):
+        """
+        Generate a response based on the provided prompt by sending a request
+        to the HPC KISSKI LLM endpoint.
+        Args:
+            prompt (str): The input prompt for the HPC LLM.
+            max_tokens (int): The maximum number of tokens to generate.
+        Returns:
+            str: The generated response from the LLM.
+        """
+        try:
+            response = openai.chat.completions.create(
+                model=self.model_name,
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=max_tokens,
+                temperature=0.7,
+            )
+            return response.choices[0].message.content.strip()
+        except Exception as e:
+            logger.error(f"Error during response generation: {e}")
+            return f"Sorry, I couldn't generate a response. Error: {e}"
diff --git a/search_engine/chatbot/requirements_chatbot.txt b/search_engine/chatbot/requirements_chatbot.txt
@@ -0,0 +1,6 @@
+elasticsearch
+flask
+flask-cors
+pyyaml
+requests
+openai