From a2a1642aed2b911fd0bc12df88a34fa4e905fcbc Mon Sep 17 00:00:00 2001 From: taewan2002 Date: Thu, 15 Feb 2024 23:15:12 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20=EA=B8=B0=EB=B3=B8=20=EC=85=8B=ED=8C=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env | 3 +- .env-prod | 3 +- app/core/ai.py | 91 ---------------- app/core/config.py | 1 + app/db/database.py | 10 +- app/db/models.py | 16 ++- app/router/chat.py | 2 +- app/router/house.py | 16 ++- app/service/chat.py | 130 +++++++++++++++++++++-- app/service/house.py | 240 ++++++++++++++++++++++++++++++++++--------- requirements.txt | 3 +- 11 files changed, 354 insertions(+), 161 deletions(-) delete mode 100644 app/core/ai.py diff --git a/.env b/.env index 8620f0f..b37d85b 100644 --- a/.env +++ b/.env @@ -1,3 +1,4 @@ SERVER_TYPE=local ROOT_PATH= -DB_URL=localhost \ No newline at end of file +DB_URL=localhost +HOUSE_REC_URL=https://sarabwayu5.hackathon.sparcs.net/ \ No newline at end of file diff --git a/.env-prod b/.env-prod index d16507a..c69fa93 100644 --- a/.env-prod +++ b/.env-prod @@ -1,3 +1,4 @@ SERVER_TYPE=prod ROOT_PATH=/api -DB_URL=mysql-container \ No newline at end of file +DB_URL=mysql-container +HOUSE_REC_URL=https://sarabwayu5.hackathon.sparcs.net/ \ No newline at end of file diff --git a/app/core/ai.py b/app/core/ai.py deleted file mode 100644 index 7fada9e..0000000 --- a/app/core/ai.py +++ /dev/null @@ -1,91 +0,0 @@ -from sklearn.metrics.pairwise import cosine_similarity -from sklearn.feature_extraction.text import TfidfVectorizer -import numpy as np - - -class HouseRecommender: - def __init__(self, house_info): - self.house_info = house_info - self.tfidf_vectorizer = TfidfVectorizer() - - # 모든 텍스트 데이터를 수집하여 TF-IDF 벡터화기를 학습시킵니다. - all_texts = [ - ' '.join(house['tagList']) + ' ' + - house['articleFeatureDescription'] + - (" " + house['detailDescription'] if house['detailDescription'] != "없음" else "") - for house in self.house_info - ] - - self.tfidf_vectorizer.fit(all_texts) # 여기서 fit을 호출합니다. - - def vectorize_categorical_data(self, persona): - person_count = int(persona['person_count'].replace('명', '').split()[0]) - - if '한달' in persona['period'] or '이상' in persona['period']: - period = 4 - else: - period = int(persona['period'].replace('주', '')) - - identity_vector = [1 if identity in persona['identity'] else 0 for identity in - ['학생', '직장인', '취준생', '기타']] - car = 1 if persona['car'] == '차 있음' else 0 - child = 1 if persona['child'] == '아이 있음' else 0 - - return np.array([person_count, period] + identity_vector + [car, child]) - - def vectorize_text_data(self, text): - return self.tfidf_vectorizer.transform([text]).toarray().flatten() - - def extract_room_count(self, house): - room_tags = [tag for tag in house['tagList'] if '방' in tag] - room_count_map = {"한개": 1, "두개": 2, "세개": 3, "네개": 4, "다섯개": 5} - - if room_tags: - for key, value in room_count_map.items(): - if key in room_tags[0]: - return value - return 1 - - def vectorize_data(self, house, persona): - persona_vector = self.vectorize_categorical_data(persona) - house_text = house['articleFeatureDescription'] + ' ' + ' '.join(house['tagList']) + ( - " " + house['detailDescription'] if house['detailDescription'] != "없음" else "") - house_text_vector = self.vectorize_text_data(house_text) - persona_text_vector = self.vectorize_text_data(persona['significant']) - - # 방 개수와 인원 수의 차이 계산 - room_count = self.extract_room_count(house) - person_count = int(persona['person_count'].replace('명', '').split()[0]) - room_person_diff = room_count - person_count - - # 벡터 길이 일치화 - max_length = max(len(house_text_vector), len(persona_text_vector)) - house_text_vector = np.pad(house_text_vector, (0, max_length - len(house_text_vector)), 'constant') - persona_text_vector = np.pad(persona_text_vector, (0, max_length - len(persona_text_vector)), - 'constant') - - # 벡터 결합 - house_vector = np.concatenate([persona_vector, house_text_vector, [room_person_diff]]) - persona_vector = np.concatenate([persona_vector, persona_text_vector, [0]]) # 여기서 [0] 대신 다른 값이 들어갈 수 있음 - - return house_vector, persona_vector - - def recommend(self, persona, top_n=100): - house_list = [] - selected_apt_names = set() # 선택된 매물의 이름을 추적하는 집합 - - # 필터링된 매물 정보 사용 - filtered_house_info = [house for house in self.house_info if - int(house['walkTime']) <= 10 and float(house['aptParkingCountPerHousehold']) > 0] - - for house in filtered_house_info: - if house['aptName'] not in selected_apt_names: # 매물 이름이 아직 선택되지 않았다면 - house_vector, persona_vector = self.vectorize_data(house, persona) - similarity = cosine_similarity([house_vector], [persona_vector]) - house_list.append((similarity[0][0], house)) - selected_apt_names.add(house['aptName']) # 매물 이름을 선택된 목록에 추가 - - house_list.sort(key=lambda x: x[0], reverse=True) - - return house_list[:top_n] - diff --git a/app/core/config.py b/app/core/config.py index dd3a3d1..fa3b045 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -5,6 +5,7 @@ class Settings(BaseSettings): SERVER_TYPE: str ROOT_PATH: str DB_URL: str + HOUSE_REC_URL: str class Config: env_file = ".env" diff --git a/app/db/database.py b/app/db/database.py index edf0fdf..29d442b 100644 --- a/app/db/database.py +++ b/app/db/database.py @@ -70,7 +70,15 @@ async def get_current_user( headers={"WWW-Authenticate": "Bearer"}, ) - payload = jwt.decode(token, "sarabwayu", algorithms=["HS256"]) + try: + payload = jwt.decode(token, "sarabwayu", algorithms=["HS256"]) + except: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Unauthorized", + headers={"WWW-Authenticate": "Bearer"}, + ) + nickname: str = payload.get("sub") user = db.query(User).filter(User.nickname == nickname).first() diff --git a/app/db/models.py b/app/db/models.py index dc7dbf3..53b12af 100644 --- a/app/db/models.py +++ b/app/db/models.py @@ -1,11 +1,17 @@ from sqlalchemy import Column, Integer, Text, ForeignKey, String, Boolean, DateTime, func, JSON, Date, FLOAT from sqlalchemy.orm import relationship from sqlalchemy.ext.declarative import declarative_base +from datetime import datetime +import pytz + +def get_now(): + return datetime.now(pytz.timezone('Asia/Seoul')) Base = declarative_base() class User(Base): __tablename__ = 'User' + id = Column(Integer, primary_key=True) nickname = Column(String(50), index=True, nullable=False) hashed_password = Column(String(100), nullable=False) @@ -40,14 +46,22 @@ class House(Base): class Recommendation(Base): __tablename__ = 'Recommendation' + id = Column(Integer, primary_key=True) user_id = Column(Integer, ForeignKey('User.id')) house_id = Column(Integer, ForeignKey('House.id')) reason = Column(Text, nullable=False) is_deleted = Column(Boolean, default=False) - create_date = Column(DateTime, default=func.now()) + create_date = Column(DateTime, default=get_now()) +class LikedHouse(Base): + __tablename__ = 'LikedHouse' + id = Column(Integer, primary_key=True) + user_id = Column(Integer, ForeignKey('User.id')) + house_id = Column(Integer, ForeignKey('House.id')) + is_deleted = Column(Boolean, default=False) + create_date = Column(DateTime, default=get_now()) def get_Base(): return Base \ No newline at end of file diff --git a/app/router/chat.py b/app/router/chat.py index e7e4c74..cd55a96 100644 --- a/app/router/chat.py +++ b/app/router/chat.py @@ -6,7 +6,7 @@ router = APIRouter(prefix="/chat") -@router.post("/chat", response_model=ApiResponse, tags=["Chat"]) +@router.post("/", response_model=ApiResponse, tags=["Chat"]) async def post_chat( chat_data: Chat, chat_service: Annotated[ChatService, Depends()] diff --git a/app/router/house.py b/app/router/house.py index 1f1d8c5..a353b4e 100644 --- a/app/router/house.py +++ b/app/router/house.py @@ -22,15 +22,23 @@ async def post_house_create( ): print(house_data.house_info) return ApiResponse() +@router.patch("/like/{house_id}", response_model=ApiResponse, tags=["House"]) +async def patch_house_like( + house_id: int, + house_service: Annotated[HouseService, Depends()] +): + return ApiResponse(data=await house_service.like(house_id)) -@router.get("/recommendation", response_model=ApiResponse, tags=["House"]) +@router.get("/recommendation/list/{page}", response_model=ApiResponse, tags=["House"]) async def get_house_recommendation( + page: int, house_service: Annotated[HouseService, Depends()] ): - return ApiResponse(data=await house_service.recommendation()) + return ApiResponse(data=await house_service.recommendation_list(page)) -@router.get("/list", response_model=ApiResponse, tags=["House"]) +@router.get("/list/{page}", response_model=ApiResponse, tags=["House"]) async def get_house_list( + page: int, house_service: Annotated[HouseService, Depends()] ): - return ApiResponse(data=await house_service.list()) + return ApiResponse(data=await house_service.list(page)) diff --git a/app/service/chat.py b/app/service/chat.py index 45c1535..f2900b5 100644 --- a/app/service/chat.py +++ b/app/service/chat.py @@ -1,9 +1,14 @@ -from fastapi import Depends +import json + +import requests +from fastapi import Depends, HTTPException, status from sqlalchemy.orm import Session -from app.db.database import get_db, get_current_user -from app.db.models import User +from app.core.config import settings +from app.db.database import get_db, get_current_user, save_db +from app.db.models import User, House, Recommendation from app.schemas.request import Chat +from app.service.house import HouseRecommender class ChatService: @@ -12,11 +17,114 @@ def __init__(self, db: Session = Depends(get_db), user: User = Depends(get_curre self.user = user async def chat(self, chat_data: Chat): - # print(chat_data.person_count) - # print(chat_data.period) - # print(chat_data.identity) - # print(chat_data.car) - # print(chat_data.child) - # print(chat_data.significant) - - return chat_data \ No newline at end of file + + async def check_format(data): + if data.person_count not in ["1명", "2명", "3명", "4명 이상"]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="person_count가 잘못되었습니다." + ) + if data.period not in ["1주", "2주", "3주", "4주 이상"]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="period가 잘못되었습니다." + ) + if data.identity not in ["학생", "직장인", "기타"]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="identity가 잘못되었습니다." + ) + if data.car not in ["자차", "대중교통"]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="car가 잘못되었습니다." + ) + if data.child not in ["아이 있음", "아이 없음"]: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="child가 잘못되었습니다." + ) + return data + + chat_data = await check_format(chat_data) + + persona = { + "person_count": chat_data.person_count, + "period": chat_data.period, + "identity": chat_data.identity, + "car": chat_data.car, + "child": chat_data.child, + "significant": chat_data.significant + } + + # 모든 집 데이터 가져오기 + all_houses = self.db.query(House).filter(House.is_deleted == False).all() + + # 이미 추천된 데이터 제거 + recommended_houses = self.db.query(Recommendation).filter( + Recommendation.user_id == self.user.id, + Recommendation.is_deleted == False + ).all() + for house in all_houses: + if house.id in [recommended_house.house_id for recommended_house in recommended_houses]: + all_houses.remove(house) + + # 추천 알고리즘 실행 + house_recommender = HouseRecommender([house.__dict__ for house in all_houses]) + recommended_houses = house_recommender.recommend(persona) + + # 추천된 데이터 이름 - id 매핑 + recommended_map = {} + for house in recommended_houses: + recommended_map[house[1]["aptName"]] = house[1]["id"] + + # XAI를 활용한 추천 API 호출 + candidates = [] + for house in recommended_houses: + house_dict = {} + house = house[1] + house_dict['aptName'] = house['aptName'] + house_dict['articleFeatureDescription'] = (house['articleFeatureDescription'] + ' ' + house[ + 'detailDescription'])[:100] + house_dict['tagList'] = house['tagList'] + house_dict['walkTime'] = house['walkTime'] + house_dict['studentCountPerTeacher'] = house['studentCountPerTeacher'] + house_dict['aptParkingCountPerHousehold'] = house['aptParkingCountPerHousehold'] + candidates.append(house_dict) + + request_data = { + "user_info": json.dumps(persona, ensure_ascii=False), + "candidates": json.dumps(candidates, ensure_ascii=False) + } + + retry_count = 3 + + while retry_count > 0: + try: + response = requests.post(settings.HOUSE_REC_URL, json=request_data) + rank_section = response.text.split("rank:")[1] + reason_section = rank_section.split("reason:")[1] + rank_data = rank_section.split("reason:")[0] + rank_data = rank_data[rank_data.find("["):rank_data.find("]") + 1] + reason_section = reason_section[reason_section.find("["):reason_section.find("]") + 1] + rank_data = json.loads(rank_data.replace('\\"', '"')) + return_data = json.loads(reason_section.replace('\\"', '"')) + break + except: + retry_count -= 1 + print(f"API 호출 시도 중... {retry_count}회 남음") + if retry_count == 0: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"{rank_data}, {reason_section}" + ) + + for rank in rank_data: + recommendation = Recommendation( + user_id=self.user.id, + house_id=recommended_map[rank], + reason=return_data[rank_data.index(rank)] + ) + save_db(recommendation, self.db) + + return return_data \ No newline at end of file diff --git a/app/service/house.py b/app/service/house.py index cbcd70c..4549319 100644 --- a/app/service/house.py +++ b/app/service/house.py @@ -2,11 +2,102 @@ import requests from fastapi import Depends, HTTPException, status +from sqlalchemy import select, and_ from sqlalchemy.orm import Session +from sklearn.metrics.pairwise import cosine_similarity +from sklearn.feature_extraction.text import TfidfVectorizer +import numpy as np -from app.core.ai import HouseRecommender +from app.core.config import settings from app.db.database import get_db, get_current_user, save_db -from app.db.models import User, House +from app.db.models import User, House, Recommendation, LikedHouse + + +class HouseRecommender: + def __init__(self, house_info): + self.house_info = house_info + self.tfidf_vectorizer = TfidfVectorizer() + + # 모든 텍스트 데이터를 수집하여 TF-IDF 벡터화기를 학습시킵니다. + all_texts = [ + ' '.join(house['tagList']) + ' ' + + house['articleFeatureDescription'] + + (" " + house['detailDescription'] if house['detailDescription'] != "없음" else "") + for house in self.house_info + ] + + self.tfidf_vectorizer.fit(all_texts) # 여기서 fit을 호출합니다. + + def vectorize_categorical_data(self, persona): + person_count = int(persona['person_count'].replace('명', '').split()[0]) + + if '한달' in persona['period'] or '이상' in persona['period']: + period = 4 + else: + period = int(persona['period'].replace('주', '')) + + identity_vector = [1 if identity in persona['identity'] else 0 for identity in + ['학생', '직장인', '취준생', '기타']] + car = 1 if persona['car'] == '차 있음' else 0 + child = 1 if persona['child'] == '아이 있음' else 0 + + return np.array([person_count, period] + identity_vector + [car, child]) + + def vectorize_text_data(self, text): + return self.tfidf_vectorizer.transform([text]).toarray().flatten() + + def extract_room_count(self, house): + room_tags = [tag for tag in house['tagList'] if '방' in tag] + room_count_map = {"한개": 1, "두개": 2, "세개": 3, "네개": 4, "다섯개": 5} + + if room_tags: + for key, value in room_count_map.items(): + if key in room_tags[0]: + return value + return 1 + + def vectorize_data(self, house, persona): + persona_vector = self.vectorize_categorical_data(persona) + house_text = house['articleFeatureDescription'] + ' ' + ' '.join(house['tagList']) + ( + " " + house['detailDescription'] if house['detailDescription'] != "없음" else "") + house_text_vector = self.vectorize_text_data(house_text) + persona_text_vector = self.vectorize_text_data(persona['significant']) + + # 방 개수와 인원 수의 차이 계산 + room_count = self.extract_room_count(house) + person_count = int(persona['person_count'].replace('명', '').split()[0]) + room_person_diff = room_count - person_count + + # 벡터 길이 일치화 + max_length = max(len(house_text_vector), len(persona_text_vector)) + house_text_vector = np.pad(house_text_vector, (0, max_length - len(house_text_vector)), 'constant') + persona_text_vector = np.pad(persona_text_vector, (0, max_length - len(persona_text_vector)), + 'constant') + + # 벡터 결합 + house_vector = np.concatenate([persona_vector, house_text_vector, [room_person_diff]]) + persona_vector = np.concatenate([persona_vector, persona_text_vector, [0]]) # 여기서 [0] 대신 다른 값이 들어갈 수 있음 + + return house_vector, persona_vector + + def recommend(self, persona, top_n=3): + house_list = [] + selected_apt_names = set() # 선택된 매물의 이름을 추적하는 집합 + + # 필터링된 매물 정보 사용 + filtered_house_info = [house for house in self.house_info if + int(house['walkTime']) <= 10 and float(house['aptParkingCountPerHousehold']) > 0] + + for house in filtered_house_info: + if house['aptName'] not in selected_apt_names: # 매물 이름이 아직 선택되지 않았다면 + house_vector, persona_vector = self.vectorize_data(house, persona) + similarity = cosine_similarity([house_vector], [persona_vector]) + house_list.append((similarity[0][0], house)) + selected_apt_names.add(house['aptName']) # 매물 이름을 선택된 목록에 추가 + + house_list.sort(key=lambda x: x[0], reverse=True) + + return house_list[:top_n] class HouseService: @@ -72,52 +163,103 @@ async def create(self, house_data): return house_data - async def recommendation(self): - all_houses = self.db.query(House).filter(House.is_deleted == False).all() - house_recommender = HouseRecommender([house.__dict__ for house in all_houses]) - persona = { - "person_count": "3명 이상", - "period": "한달 이상", - "identity": "직장인", - "car": "차 없음", - "child": "아이 없음", - "significant": "주변에 공원이 있었으면 좋겠어" - } - recommended_houses = house_recommender.recommend(persona) - - candidates = [] - for house in recommended_houses[:3]: - house_dict = {} - house = house[1] - house_dict['aptName'] = house['aptName'] - house_dict['articleFeatureDescription'] = (house['articleFeatureDescription'] + ' ' + house['detailDescription'])[:100] - house_dict['tagList'] = house['tagList'] - house_dict['walkTime'] = house['walkTime'] - house_dict['studentCountPerTeacher'] = house['studentCountPerTeacher'] - house_dict['aptParkingCountPerHousehold'] = house['aptParkingCountPerHousehold'] - candidates.append(house_dict) - - request_data = { - "user_info": json.dumps(persona, ensure_ascii=False), - "candidates": json.dumps(candidates, ensure_ascii=False) - } - - url = "https://sarabwayu3.hackathon.sparcs.net/" - - response = requests.post(url, json=request_data) - rank_section = response.text.split("rank:")[1] - reason_section = rank_section.split("reason:")[1] - rank_data = rank_section.split("reason:")[0] - - rank_data = rank_data[rank_data.find("["):rank_data.find("]") + 1] - reason_section = reason_section[reason_section.find("["):reason_section.find("]") + 1] - - try: - return_data = [json.loads(rank_data.replace('\\"', '"')), json.loads(reason_section.replace('\\"', '"'))] - except: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"{rank_data}, {reason_section}" + async def like(self, house_id): + + # db에서 좋아요를 누른 이력이 있는지 확인합니다. + like = self.db.query(LikedHouse).filter( + LikedHouse.user_id == self.user.id, + LikedHouse.house_id == house_id + ).first() + + # 이미 좋아요를 누른 이력이 있다면 is_deleted를 True로 변경합니다. + if like and like.is_deleted == False: + like.is_deleted = True + save_db(like, self.db) + + elif like and like.is_deleted == True: + like.is_deleted = False + save_db(like, self.db) + + # 좋아요를 누른 이력이 없다면 새로운 데이터를 생성합니다. + else: + liked_house = LikedHouse( + user_id=self.user.id, + house_id=house_id + ) + save_db(liked_house, self.db) + + async def recommendation_list(self, page): + + # Recommendation 테이블에서 삭제되지 않은 데이터를 페이지네이션 해서 가져옵니다. + # 이 때 house_id를 이용하여 House 테이블에서 데이터를 가져옵니다. + houses = self.db.execute( + select( + Recommendation.house_id, + House.aptName, + House.image_url, + House.exposureAddress, + ).join( + House, + Recommendation.house_id == House.id + ).filter( + Recommendation.user_id == self.user.id, + Recommendation.is_deleted == False, + House.is_deleted == False + ).limit(5).offset((page - 1) * 5) + ).all() + + # 사용자가 '좋아요'한 집의 ID를 세트로 생성 + liked_houses_set = {liked_house.house_id for liked_house in self.db.query(LikedHouse).filter( + LikedHouse.user_id == self.user.id, + LikedHouse.is_deleted == False + )} + + # 가져온 집 정보에 '좋아요' 정보를 추가하여 반환 + return_houses = [{ + "house_id": house[0], + "aptName": house[1], + "image_url": house[2], + "exposureAddress": house[3], + "is_like": house[0] in liked_houses_set # set를 사용하여 빠르게 확인 + } for house in houses] + + return return_houses + + async def list(self, page): + # House 테이블과 Recommendation 테이블을 left join하고, + # Recommendation 테이블의 house_id가 NULL인 경우만 필터링합니다. + houses_query = select( + House.id, + House.aptName, + House.image_url, + House.exposureAddress + ).outerjoin( + Recommendation, and_( + Recommendation.house_id == House.id, + Recommendation.user_id == self.user.id, + Recommendation.is_deleted == False ) + ).filter( + Recommendation.house_id == None, # Recommendation에 없는 House + House.is_deleted == False + ).limit(5).offset((page - 1) * 5) + + houses = self.db.execute(houses_query).all() + + # 사용자가 '좋아요'한 집 목록을 가져옵니다. + liked_houses_query = select(LikedHouse.house_id).filter( + LikedHouse.user_id == self.user.id + ) + liked_houses = {house_id for (house_id,) in self.db.execute(liked_houses_query).all()} + + # 가져온 집 정보에 '좋아요' 정보를 추가하여 반환합니다. + return_houses = [{ + "house_id": house[0], + "aptName": house[1], + "image_url": house[2], + "exposureAddress": house[3], + "is_like": house[0] in liked_houses + } for house in houses] + + return return_houses - return return_data diff --git a/requirements.txt b/requirements.txt index a54dec1..4ad0b0b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,5 @@ scikit-learn==1.4.0 python-dotenv==1.0.0 requests==2.31.0 urllib3==1.26.6 -cryptography==42.0.2 \ No newline at end of file +cryptography==42.0.2 +pytz==2024.1 \ No newline at end of file