# -*- coding: utf-8 -*-
"""Negative_Selection_Algorithm.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1DtaVytu3dSqFTIv76-mAoI_gDBcFJU65
"""

import numpy as np
import random

class NegativeSelectionAlgorithm:
    def __init__(self, self_set, detector_size, matching_threshold):
        """
        Initializes the Negative Selection Algorithm.

        Args:
            self_set (np.ndarray): A 2D numpy array representing the known 'self' data points.
                                   Each row is a data point.
            detector_size (int): The number of detectors to generate.
            matching_threshold (float): The maximum distance for a detector to 'match' a data point.
        """
        self.self_set = self_set
        self.detector_size = detector_size
        self.matching_threshold = matching_threshold
        self.detectors = []
        self.data_dim = self_set.shape[1] # Dimension of the data points

    def generate_random_detector(self):
        """
        Generates a random detector within the data's value range.
        Assumes data is normalized or within a known range (e.g., 0 to 1).
        For simplicity, we'll generate between 0 and 1 here.
        """
        return np.random.rand(self.data_dim)

    def tolerize_detectors(self):
        """
        Generates detectors and tolerizes them against the self_set.
        Detectors that match any self point are discarded.
        """
        print(f"Generating and tolerizing {self.detector_size} detectors...")
        generated_count = 0
        while len(self.detectors) < self.detector_size:
            candidate_detector = self.generate_random_detector()
            is_self_matching = False
            for self_point in self.self_set:
                distance = np.linalg.norm(candidate_detector - self_point)
                if distance < self.matching_threshold:
                    is_self_matching = True
                    break  # This detector matches a self-point, discard it

            if not is_self_matching:
                self.detectors.append(candidate_detector)

            generated_count += 1
            if generated_count % 10000 == 0:
                print(f"  Generated {generated_count} candidate detectors, found {len(self.detectors)} unique detectors.")

        print(f"Finished generating {len(self.detectors)} detectors.")

    def detect_anomalies(self, data_to_check):
        """
        Checks a new set of data points for anomalies using the generated detectors.

        Args:
            data_to_check (np.ndarray): A 2D numpy array of data points to check.

        Returns:
            list: A list of indices of data points from data_to_check that are considered anomalies.
        """
        anomalies_indices = []
        print(f"Checking {len(data_to_check)} data points for anomalies...")
        for i, data_point in enumerate(data_to_check):
            is_anomaly = False
            for detector in self.detectors:
                distance = np.linalg.norm(detector - data_point)
                if distance < self.matching_threshold:
                    is_anomaly = True
                    break  # This data point is 'detected' by a non-self detector

            if is_anomaly:
                anomalies_indices.append(i)

        return anomalies_indices

# --- Example Usage ---
if __name__ == "__main__":
    # 1. Define the 'self' set (normal data)
    # Let's create some 2D data points that are clustered around (0.5, 0.5)
    num_self_points = 200
    self_data = np.random.normal(loc=0.5, scale=0.1, size=(num_self_points, 2))

    # Ensure data is within a reasonable range (e.g., 0 to 1 for this example)
    self_data = np.clip(self_data, 0, 1)

    # 2. Set parameters for the NSA
    detector_population_size = 1000  # Number of detectors to generate
    matching_threshold = 0.08      # Radius for matching (how close a detector needs to be)

    # 3. Initialize and train (tolerize detectors) the NSA
    nsa = NegativeSelectionAlgorithm(self_data, detector_population_size, matching_threshold)
    nsa.tolerize_detectors()

    # 4. Create some new data to test (some normal, some anomalous)
    num_test_points = 50
    test_data = np.random.normal(loc=0.5, scale=0.1, size=(num_test_points, 2)) # Mostly normal

    # Introduce some anomalies (points far from the self-set)
    test_data[0] = np.array([0.1, 0.9])  # Anomaly 1
    test_data[1] = np.array([0.9, 0.1])  # Anomaly 2
    test_data[2] = np.array([0.05, 0.05])  # Anomaly 3

    # Ensure test data is within range
    test_data = np.clip(test_data, 0, 1)

    # 5. Detect anomalies
    detected_anomalies_indices = nsa.detect_anomalies(test_data)

    print("\n--- Detection Results ---")
    if detected_anomalies_indices:
        print(f"Found {len(detected_anomalies_indices)} anomalies at indices: {detected_anomalies_indices}")
        print("Anomalous data points detected:")
        for idx in detected_anomalies_indices:
            print(f"  Point {idx}: {test_data[idx]}")
    else:
        print("No anomalies detected.")

    # Optional: Visualization (requires matplotlib)
    try:
        import matplotlib.pyplot as plt

        plt.figure(figsize=(10, 8))
        plt.scatter(self_data[:, 0], self_data[:, 1], color='blue', label='Self Data', alpha=0.6)
        plt.scatter(np.array(nsa.detectors)[:, 0], np.array(nsa.detectors)[:, 1],
                    color='green', marker='x', s=50, label='Detectors', alpha=0.7)

        # Plot all test data
        plt.scatter(test_data[:, 0], test_data[:, 1], color='gray', label='Test Data (All)', alpha=0.5)

        # Highlight detected anomalies
        if detected_anomalies_indices:
            anomalous_points = test_data[detected_anomalies_indices]
            plt.scatter(anomalous_points[:, 0], anomalous_points[:, 1],
                        color='red', marker='o', s=100, edgecolors='black', label='Detected Anomalies')

        plt.title('Negative Selection Algorithm for Anomaly Detection')
        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
        plt.legend()
        plt.grid(True)
        plt.show()

    except ImportError:
        print("\nMatplotlib not installed. Skipping visualization.")
        print("To install: pip install matplotlib")