-
Notifications
You must be signed in to change notification settings - Fork 1
/
app.py
131 lines (109 loc) · 5.13 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -- coding: utf-8 --
"""sr_fashionProduct.ipynb
# Nama Kelompok :
## 1. Tia Puspita Sari (22.12.2481)
## 2. Renita Tri Hastuti (22.12.2476)
## 3. Fadhila Asla Shana (22.12.2538)
"""
# Step 1: Import Libraries
import os
import subprocess
# Install required packages if not already installed
def install_requirements():
try:
import pandas
import numpy
import nltk
import streamlit
from sklearn.preprocessing import LabelEncoder
except ImportError:
subprocess.check_call(["pip", "install", "-r", "requirements.txt"])
install_requirements()
# Proceed with imports
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from sklearn.preprocessing import LabelEncoder
import streamlit as st
# Download NLTK resources
nltk.download('stopwords')
# Step 2: Load Dataset
def load_dataset():
dataset_path = 'product_fashion.csv' # Ensure the dataset is in the same directory as this script
return pd.read_csv(dataset_path)
df = load_dataset()
# Step 3: Preprocessing Data
# Handle missing values
df['PrimaryColor'] = df['PrimaryColor'].fillna('Unknown')
# Map Gender values to Men, Women, or Unisex (only if mapping is necessary)
valid_genders = ['Men', 'Women', 'Unisex']
df['Gender'] = df['Gender'].apply(lambda x: x if x in valid_genders else 'Unisex') # Default to 'Unisex' if invalid
# Normalize text data in 'Description'
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
text = str(text).lower() # Convert to lowercase
words = text.split() # Tokenize
words = [word for word in words if word not in stop_words] # Remove stopwords
return ' '.join(words)
df['Description'] = df['Description'].apply(preprocess_text)
# Step 4: Implement Jaccard Similarity
# Function to calculate Jaccard Similarity
def jaccard_similarity(set1, set2):
intersection = len(set1.intersection(set2))
union = len(set1.union(set2))
return intersection / union
# Calculate similarity scores for one product
def compute_similarity_for_product_name(product_name):
set_i = set(preprocess_text(product_name).split())
similarity_scores = []
for j in range(len(df)):
set_j = set(df['Description'].iloc[j].split())
similarity_scores.append(jaccard_similarity(set_i, set_j))
return similarity_scores
# Step 5: Create Recommendation Function
# Function to recommend products based on Jaccard similarity
def recommend_products_by_name(product_name, top_n=5):
similarity_scores = compute_similarity_for_product_name(product_name) # Get similarity scores
similarity_scores = list(enumerate(similarity_scores))
similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True) # Sort by similarity
top_products = similarity_scores[:top_n] # Get top N products
recommended_ids = [df.iloc[i[0]]['ProductID'] for i in top_products]
return df[df['ProductID'].isin(recommended_ids)]
# Step 6: Streamlit Interface
st.title("Product Recommendation System")
# Select product name from dropdown
product_name = st.selectbox("Select a Product Name", df['ProductName'].unique())
# Recommendation Button
if st.button("Get Recommendations"):
# Save recommendations to session state
similarity_scores = compute_similarity_for_product_name(product_name) # Get similarity scores
recommendations = recommend_products_by_name(product_name, top_n=5)
# Add similarity scores to the recommendations
recommendations = recommendations.reset_index() # Reset index for proper mapping
recommendations['Similarity'] = [
similarity_scores[i] for i in recommendations.index
] # Add similarity scores to dataframe
st.session_state.recommendations = recommendations.to_dict('records')
# Display recommendations
if "recommendations" in st.session_state:
st.write("### Recommended Products")
for index, row in enumerate(st.session_state.recommendations, start=1): # Add numbering with start=1
with st.container(): # Create a container for each recommendation
col1, col2 = st.columns([3, 1])
col1.write(f"**{index}. {row['ProductName']}**") # Display the numbered product name in bold
if col2.button("Detail", key=f"detail_{index}"):
# Save the selected product details to session state
st.session_state.selected_product = row
st.session_state.selected_index = index
# Display details below the relevant product
if "selected_index" in st.session_state and st.session_state.selected_index == index:
st.write("#### Product Details")
product = st.session_state.selected_product
st.write(f"**Product ID:** {product['ProductID']}")
st.write(f"**Product Name:** {product['ProductName']}")
st.write(f"**Brand:** {product['ProductBrand']}")
st.write(f"**Gender:** {product['Gender']}")
st.write(f"**Primary Color:** {product['PrimaryColor']}")
st.write(f"**Description:** {product['Description']}")
st.write(f"**Similarity Score:** {product['Similarity']:.2f}")