import torch
from torch import nn
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

#load model and weights
model = models.mobilenet_v3_small(pretrained=True)
in_features = model.classifier[3].in_features
model.classifier[3] = nn.Linear(in_features, 2)

model.load_state_dict(torch.load("litter_classifier.pth"))
model = model.to(device)
model.eval()

#get test data
test_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

test_ds = datasets.ImageFolder("litter_detection_split/test", transform=test_tf)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=32)

# get classes
class_names = ['no litter', 'litter']

# run predictions
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds = outputs.argmax(dim=1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.numpy())

# build confusion matrix directly
matrix = [[0, 0], [0, 0]]
for pred, label in zip(all_preds, all_labels):
    matrix[pred][label] += 1

# display as heatmap with labels
fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(matrix, cmap='Blues', aspect='auto')

# Set ticks and labels
ax.set_xticks(np.arange(len(class_names)))
ax.set_yticks(np.arange(len(class_names)))
ax.set_xticklabels(class_names)
ax.set_yticklabels(class_names)

# Add text annotations
for i in range(len(class_names)):
    for j in range(len(class_names)):
        text = ax.text(j, i, str(matrix[i][j]), ha="center", va="center", color="black", fontsize=14)

ax.set_ylabel('Model Prediction', fontsize=12)
ax.set_xlabel('Actual', fontsize=12)
ax.set_title('Confusion Matrix - Litter Detection', fontsize=14)
plt.tight_layout()
plt.show()

# Calculate accuracy
accuracy = sum(1 for p, l in zip(all_preds, all_labels) if p == l) / len(all_labels)
print(f"\nOverall Accuracy: {accuracy:.4f}")