import pandas as pd
import openpyxl
from openpyxl.chart import BarChart, Reference, PieChart
from openpyxl.chart.label import DataLabelList
from openpyxl.utils.dataframe import dataframe_to_rows
from datetime import datetime
from io import BytesIO
import matplotlib.pyplot as plt
import gradio as gr
import tempfile
# Read excel data for review analysis
def read_excel_data(file):
df = pd.read_excel(BytesIO(file), usecols="B, C, D, E", skiprows=1, names=["Review Date", "Option", "Review", "ReviewScore"])
df['Review Date'] = pd.to_datetime(df['Review Date']).dt.tz_localize(None).dt.date
df['Year-Month'] = df['Review Date'].astype(str).str.slice(0, 7)
df['Year'] = df['Review Date'].astype(str).str.slice(0, 4)
df['Month'] = df['Review Date'].astype(str).str.slice(5, 7)
df['Day'] = df['Review Date'].astype(str).str.slice(8, 10)
df['Option1'] = df['Option'].str.split(" / ").str[0] # 1차 옵션만 추출
df['Review Length'] = df['Review'].str.len() # 추가된 부분: 리뷰 길이 계산
return df
# Analyze review data
def analyze_data(df):
monthly_data = df.groupby('Year-Month').size().reset_index(name='Counts')
yearly_data = df.groupby('Year').size().reset_index(name='Counts')
return monthly_data, yearly_data
def analyze_monthly_data_for_year(df, selected_year):
monthly_data_for_year = df[df['Year'] == selected_year].groupby('Month').size().reset_index(name='Counts')
all_months = pd.DataFrame([f"{m:02d}" for m in range(1, 13)], columns=['Month'])
monthly_trend_for_year = pd.merge(all_months, monthly_data_for_year, on='Month', how='left')
monthly_trend_for_year['Counts'] = monthly_trend_for_year['Counts'].fillna(0).astype(int)
return monthly_trend_for_year
def analyze_daily_data(df, selected_year):
start_date = datetime.strptime(f"{selected_year}-01-01", "%Y-%m-%d").date()
end_date = datetime.strptime(f"{selected_year}-12-31", "%Y-%m-%d").date()
date_range = pd.date_range(start=start_date, end=end_date).date
daily_data = df[df['Year'] == selected_year].groupby('Review Date').size().reset_index(name='Counts')
daily_data['Review Date'] = pd.to_datetime(daily_data['Review Date']).dt.date
all_dates_df = pd.DataFrame(date_range, columns=['Review Date'])
all_dates_df['Review Date'] = pd.to_datetime(all_dates_df['Review Date']).dt.date
merged_data = pd.merge(all_dates_df, daily_data, on='Review Date', how='left')
merged_data['Counts'] = merged_data['Counts'].fillna(0).astype(int)
return merged_data
def analyze_option_data(df):
data_counts = df['Option1'].value_counts().reset_index()
data_counts.columns = ['Option', 'Counts']
total_counts = data_counts['Counts'].sum()
data_counts['Percentage'] = (data_counts['Counts'] / total_counts * 100).round(2)
data_counts.sort_values(by='Counts', ascending=False, inplace=True)
return data_counts
def analyze_option_review_data(df):
df["Option1"] = df["Option"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x)
df["Option2"] = df["Option"].apply(lambda x: x.split(" / ")[1] if isinstance(x, str) and len(x.split(" / ")) > 1 else "")
review_counts = df.groupby(["Option1", "Option2"])["ReviewScore"].value_counts().unstack(fill_value=0)
review_counts["Total"] = review_counts.sum(axis=1)
option1_counts = df.groupby("Option1")["Option"].count()
option2_counts = df.groupby(["Option1", "Option2"])["Option"].count()
review_counts["옵션명(1차)건수"] = review_counts.index.get_level_values("Option1").map(option1_counts)
review_counts["옵션명(2차)건수"] = option2_counts
review_counts.sort_values(by=["옵션명(1차)건수", "옵션명(2차)건수"], ascending=[False, False], inplace=True)
return review_counts
def analyze_option_data_for_year(df, selected_year):
df_year = df[df['Year'] == selected_year]
data_counts = df_year['Option1'].value_counts().reset_index()
data_counts.columns = ['Option', 'Counts']
total_counts = data_counts['Counts'].sum()
data_counts['Percentage'] = (data_counts['Counts'] / total_counts * 100).round(2)
data_counts.sort_values(by='Counts', ascending=False, inplace=True)
return data_counts
def analyze_option_review_data_for_year(df, selected_year):
df_year = df[df['Year'] == selected_year]
df_year["Option1"] = df_year["Option"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x)
df_year["Option2"] = df_year["Option"].apply(lambda x: x.split(" / ")[1] if isinstance(x, str) and len(x.split(" / ")) > 1 else "")
review_counts = df_year.groupby(["Option1", "Option2"])["ReviewScore"].value_counts().unstack(fill_value=0)
review_counts["Total"] = review_counts.sum(axis=1)
option1_counts = df_year.groupby("Option1")["Option"].count()
option2_counts = df_year.groupby(["Option1", "Option2"])["Option"].count()
review_counts["옵션명(1차)건수"] = review_counts.index.get_level_values("Option1").map(option1_counts)
review_counts["옵션명(2차)건수"] = option2_counts
review_counts.sort_values(by=["옵션명(1차)건수", "옵션명(2차)건수"], ascending=[False, False], inplace=True)
return review_counts
def extract_longest_reviews(df):
longest_reviews = df.groupby('ReviewScore').apply(lambda x: x.nlargest(100, 'Review Length', keep='all')).reset_index(drop=True)
return longest_reviews.drop(columns=['Review Length', 'Year-Month', 'Year', 'Month', 'Day', 'Option1', 'Option2']) # 삭제된 열들
def save_to_excel(original_data, monthly_counts, yearly_counts, monthly_trend, daily_counts, option_counts, review_counts, selected_option_counts, selected_review_counts, longest_reviews):
wb = openpyxl.Workbook()
# 원본 리뷰 데이터 시트 추가 및 이름 변경
ws_original = wb.active
ws_original.title = "원본리뷰데이터"
for r in dataframe_to_rows(original_data, index=False, header=True):
ws_original.append(r)
ws_original.sheet_properties.tabColor = "000000" # 검은색
# 리뷰분석 추이 시트 추가
ws1 = wb.create_sheet(title="전체월별추이(리뷰분석)")
for r in dataframe_to_rows(monthly_counts, index=False, header=True):
ws1.append(r)
chart1 = BarChart()
chart1.type = "col"
chart1.style = 10
chart1.title = "Monthly Review Trends"
chart1.y_axis.title = 'Review Counts'
chart1.x_axis.title = 'Year-Month'
data1 = Reference(ws1, min_col=2, min_row=1, max_row=ws1.max_row, max_col=2)
cats1 = Reference(ws1, min_col=1, min_row=2, max_row=ws1.max_row)
chart1.add_data(data1, titles_from_data=True)
chart1.set_categories(cats1)
chart1.width = 30
chart1.height = 15
ws1.add_chart(chart1, "C2")
ws1.sheet_properties.tabColor = "FFA500" # 주황색
# 년도별 리뷰 분석 시트 추가
ws2 = wb.create_sheet(title="년도별추이(리뷰분석)")
for r in dataframe_to_rows(yearly_counts, index=False, header=True):
ws2.append(r)
chart2 = BarChart()
chart2.type = "col"
chart2.style = 10
chart2.title = "Yearly Review Trends"
chart2.y_axis.title = 'Review Counts'
chart2.x_axis.title = 'Year'
data2 = Reference(ws2, min_col=2, min_row=1, max_row=ws2.max_row, max_col=2)
cats2 = Reference(ws2, min_col=1, min_row=2, max_row=ws2.max_row)
chart2.add_data(data2, titles_from_data=True)
chart2.set_categories(cats2)
chart2.width = 30
chart2.height = 15
ws2.add_chart(chart2, "C2")
ws2.sheet_properties.tabColor = "FFA500" # 주황색
# 월별 리뷰 분석 시트 추가
ws3 = wb.create_sheet(title="선택한 년도 월별추이(리뷰분석)")
for r in dataframe_to_rows(monthly_trend, index=False, header=True):
ws3.append(r)
chart3 = BarChart()
chart3.type = "col"
chart3.style = 10
chart3.title = "Monthly Trends for Selected Year"
chart3.y_axis.title = 'Review Counts'
chart3.x_axis.title = 'Month'
data3 = Reference(ws3, min_col=2, min_row=1, max_row=ws3.max_row, max_col=2)
cats3 = Reference(ws3, min_col=1, min_row=2, max_row=ws3.max_row)
chart3.add_data(data3, titles_from_data=True)
chart3.set_categories(cats3)
chart3.width = 30
chart3.height = 15
ws3.add_chart(chart3, "C2")
ws3.sheet_properties.tabColor = "FFA500" # 주황색
# 일별 리뷰 분석 시트 추가
ws4 = wb.create_sheet(title="선택한 년도 일별추이(리뷰분석)")
for r in dataframe_to_rows(daily_counts, index=False, header=True):
ws4.append(r)
chart4 = BarChart()
chart4.type = "col"
chart4.style = 10
chart4.title = "Daily Trends for Selected Year"
chart4.y_axis.title = 'Review Counts'
chart4.x_axis.title = 'Date'
data4 = Reference(ws4, min_col=2, min_row=2, max_row=ws4.max_row + 1, max_col=2)
cats4 = Reference(ws4, min_col=1, min_row=2, max_row=ws4.max_row + 1)
chart4.add_data(data4, titles_from_data=True)
chart4.set_categories(cats4)
chart4.width = 50
chart4.height = 15
ws4.add_chart(chart4, "C2")
ws4.sheet_properties.tabColor = "FFA500" # 주황색
# 옵션분석 결과 시트 추가
ws5 = wb.create_sheet(title="옵션분석 결과(옵션분석)")
for r in dataframe_to_rows(option_counts, index=False, header=True):
ws5.append(r)
bar_chart = BarChart()
data = Reference(ws5, min_col=2, min_row=2, max_row=ws5.max_row, max_col=2)
cats = Reference(ws5, min_col=1, min_row=2, max_row=ws5.max_row, max_col=1)
bar_chart.add_data(data, titles_from_data=False)
bar_chart.set_categories(cats)
bar_chart.title = "Option Analysis (Counts)"
bar_chart.width = 40
bar_chart.height = 20
ws5.add_chart(bar_chart, "G2")
ws5.sheet_properties.tabColor = "0000FF" # 파랑색
# Create pie chart
top_10 = option_counts.head(10)
for idx, row in enumerate(top_10.itertuples(), 1):
ws5.cell(row=idx + 1, column=5, value=row.Option)
ws5.cell(row=idx + 1, column=6, value=row.Counts)
others_sum = option_counts['Counts'][10:].sum()
ws5.cell(row=12, column=5, value='Others')
ws5.cell(row=12, column=6, value=others_sum)
ws5.cell(row=1, column=5, value='Option')
ws5.cell(row=1, column=6, value='Counts')
pie_chart = PieChart()
data = Reference(ws5, min_col=6, min_row=2, max_row=12)
categories = Reference(ws5, min_col=5, min_row=2, max_row=12)
pie_chart.add_data(data, titles_from_data=False)
pie_chart.set_categories(categories)
pie_chart.title = "Top 10 Options (Share)"
pie_chart.dataLabels = DataLabelList()
pie_chart.dataLabels.showPercent = True
pie_chart.width = 30
pie_chart.height = 20
ws5.add_chart(pie_chart, "G40")
# 옵션별평점분석 시트 추가
ws6 = wb.create_sheet(title="옵션별평점분석(옵션분석)")
ws6.append(["Option1", "Option2", "Total Counts", "Score 5", "Score 4", "Score 3", "Score 2", "Score 1", "Option1 Counts", "Option2 Counts"])
for r in dataframe_to_rows(review_counts, index=True, header=False):
ws6.append(r)
ws6.sheet_properties.tabColor = "0000FF" # 파랑색
# 선택한 년도 옵션분석 결과 시트 추가
ws7 = wb.create_sheet(title="선택한 년도 옵션분석 결과(옵션분석)")
for r in dataframe_to_rows(selected_option_counts, index=False, header=True):
ws7.append(r)
bar_chart_selected = BarChart()
data_selected = Reference(ws7, min_col=2, min_row=2, max_row=ws7.max_row, max_col=2)
cats_selected = Reference(ws7, min_col=1, min_row=2, max_row=ws7.max_row, max_col=1)
bar_chart_selected.add_data(data_selected, titles_from_data=False)
bar_chart_selected.set_categories(cats_selected)
bar_chart_selected.title = "Option Analysis for Selected Year (Counts)"
bar_chart_selected.width = 40
bar_chart_selected.height = 20
ws7.add_chart(bar_chart_selected, "G2")
ws7.sheet_properties.tabColor = "0000FF" # 파랑색
# Create pie chart for selected year
top_10_selected = selected_option_counts.head(10)
for idx, row in enumerate(top_10_selected.itertuples(), 1):
ws7.cell(row=idx + 1, column=5, value=row.Option)
ws7.cell(row=idx + 1, column=6, value=row.Counts)
others_sum_selected = selected_option_counts['Counts'][10:].sum()
ws7.cell(row=12, column=5, value='Others')
ws7.cell(row=12, column=6, value=others_sum_selected)
ws7.cell(row=1, column=5, value='Option')
ws7.cell(row=1, column=6, value='Counts')
pie_chart_selected = PieChart()
data_selected_pie = Reference(ws7, min_col=6, min_row=2, max_row=12)
categories_selected_pie = Reference(ws7, min_col=5, min_row=2, max_row=12)
pie_chart_selected.add_data(data_selected_pie, titles_from_data=False)
pie_chart_selected.set_categories(categories_selected_pie)
pie_chart_selected.title = "Top 10 Options for Selected Year (Share)"
pie_chart_selected.dataLabels = DataLabelList()
pie_chart_selected.dataLabels.showPercent = True
pie_chart_selected.width = 30
pie_chart_selected.height = 20
ws7.add_chart(pie_chart_selected, "G40")
# 선택한 년도 옵션별평점분석 시트 추가
ws8 = wb.create_sheet(title="선택한 년도 옵션별평점분석(옵션분석)")
ws8.append(["Option1", "Option2", "Total Counts", "Score 5", "Score 4", "Score 3", "Score 2", "Score 1", "Option1 Counts", "Option2 Counts"])
for r in dataframe_to_rows(selected_review_counts, index=True, header=False):
ws8.append(r)
ws8.sheet_properties.tabColor = "0000FF" # 파랑색
# 리뷰 내용이 긴 리뷰 시트 추가
ws9 = wb.create_sheet(title="긴 리뷰 내용")
for r in dataframe_to_rows(longest_reviews, index=False, header=True):
ws9.append(r)
ws9.sheet_properties.tabColor = "00FF00" # 초록색
file_path = "리뷰분석 다운로드.xlsx"
wb.save(file_path)
return file_path
def generate_plots(df, year):
# 최근 3년의 데이터만 사용
start_year = datetime.now().year - 2
recent_data = df[df['Year'].astype(int) >= start_year]
monthly_counts, yearly_counts = analyze_data(df) # Use all data for yearly counts
recent_monthly_counts, _ = analyze_data(recent_data) # Use recent data for monthly counts
monthly_trend = analyze_monthly_data_for_year(recent_data, year)
daily_counts = analyze_daily_data(recent_data, year)
option_counts = analyze_option_data(recent_data)
plot_files = []
# 월별 리뷰 추이 그래프 생성
fig1, ax1 = plt.subplots()
ax1.plot(recent_monthly_counts['Year-Month'], recent_monthly_counts['Counts'], marker='o')
ax1.set_title('Monthly Review Trends (Recent 3 Years)', fontsize=16) # 제목 폰트 크기 설정
ax1.set_ylabel('Review Counts', fontsize=14) # y축 레이블 폰트 크기 설정
# x축 레이블을 90도 회전하여 표시하고 폰트 크기 줄임
ax1.tick_params(axis='x', rotation=90, labelsize=6)
tmp_file1 = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
fig1.savefig(tmp_file1.name)
plot_files.append(tmp_file1.name)
fig2, ax2 = plt.subplots()
ax2.bar(yearly_counts['Year'], yearly_counts['Counts'])
ax2.set_title('Yearly Review Trends')
ax2.set_xlabel('Year')
ax2.set_ylabel('Review Counts')
tmp_file2 = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
fig2.savefig(tmp_file2.name)
plot_files.append(tmp_file2.name)
fig3, ax3 = plt.subplots()
ax3.bar(monthly_trend['Month'], monthly_trend['Counts'])
ax3.set_title('Monthly Trends for Selected Year')
ax3.set_xlabel('Month')
ax3.set_ylabel('Review Counts')
tmp_file3 = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
fig3.savefig(tmp_file3.name)
plot_files.append(tmp_file3.name)
fig4, ax4 = plt.subplots()
ax4.bar(daily_counts['Review Date'], daily_counts['Counts'])
ax4.set_title('Daily Trends for Selected Year')
ax4.set_xlabel('Date')
ax4.set_ylabel('Review Counts')
tmp_file4 = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
fig4.savefig(tmp_file4.name)
plot_files.append(tmp_file4.name)
return plot_files
def process_file(file, year):
df = read_excel_data(file)
monthly_counts, yearly_counts = analyze_data(df)
monthly_trend = analyze_monthly_data_for_year(df, year)
daily_counts = analyze_daily_data(df, year)
option_counts = analyze_option_data(df)
review_counts = analyze_option_review_data(df)
selected_option_counts = analyze_option_data_for_year(df, year)
selected_review_counts = analyze_option_review_data_for_year(df, year)
longest_reviews = extract_longest_reviews(df)
original_data = pd.read_excel(BytesIO(file), sheet_name=0) # 첫 번째 시트만 로드
result_file = save_to_excel(original_data, monthly_counts, yearly_counts, monthly_trend, daily_counts, option_counts, review_counts, selected_option_counts, selected_review_counts, longest_reviews)
return result_file
def process_file_with_plots(file, year):
df = read_excel_data(file)
result_file = process_file(file, year)
plots = generate_plots(df, year)
return [result_file] + plots
years = [str(year) for year in range(datetime.now().year, datetime.now().year - 10, -1)]
def analyze_and_plot(file, year):
return process_file_with_plots(file, year)
# 기존 인터페이스 정의
iface1 = gr.Interface(
fn=analyze_and_plot,
inputs=[gr.File(file_count="single", type="binary"), gr.Radio(years, label="분석년도 선택", value=str(datetime.now().year))],
outputs=[gr.File(label="세부분석 자료를 다운받으세요(Excel파일)")] + [gr.Image(label=label) for label in [
"최근3년간 월별 리뷰추이", "최근 년도별 리뷰추이", "선택년도 월 리뷰추이",
"선택년도 일일 리뷰추이"
]],
title="상품리뷰 분석-ver.1(엑셀파일)",
description="Upload an Excel file to analyze review date data and save the results to an Excel file. Also, provide visualized graphs.",
)
# 기존 인터페이스 정의
iface1 = gr.Interface(
fn=analyze_and_plot,
inputs=[gr.File(file_count="single", type="binary"), gr.Radio(years, label="분석년도 선택", value=str(datetime.now().year))],
outputs=[gr.File(label="세부분석 자료를 다운받으세요(Excel파일)")] + [gr.Image(label=label) for label in [
"최근3년간 월별 리뷰추이", "최근 년도별 리뷰추이", "선택년도 월 리뷰추이",
"선택년도 일일 리뷰추이"
]],
title="상품리뷰 분석-ver.1(엑셀파일)",
description=(
"✨스마트스토어 상품 리뷰를 분석해보세요.
"
"📢사용 방법📢
"
"1. 구글웹스토어 SMART DATA 검색 후 설치
"
"2. 리뷰분석 분석 후 엑셀다운로드
"
"3. 엑셀다운로드 한 파일을 프로그램에서 업로드
"
"4. 분석할 년도 선택 후 실행"
)
)
# 새로운 탭 인터페이스 정의
def chatbot_response(input_text):
# 여기에 챗봇 응답 생성 로직을 추가하세요
response = f"챗봇 응답: {input_text}"
return response
# 긴 글 텍스트 결과를 위한 함수 정의
def long_text_result():
return "AI분석결과 출력됩니다."
# 새로운 탭에 왼쪽 긴 텍스트 결과와 오른쪽 챗봇 인터페이스 구성
iface2 = gr.Blocks()
with iface2:
with gr.Row():
with gr.Column():
gr.Textbox(value=long_text_result(), label="상품의 장단점 10가지를 분석해드립니다.", lines=32, interactive=False)
with gr.Column():
chatbot_input = gr.Textbox(label="챗봇 입력", placeholder="이 상품에 대한 추가적인 자세한 분석내용을 챗봇에게 질문하세요.")
chatbot_examples = gr.Dropdown(
["기능적인 내용 중 만족/불만족 항목을 20개씩 분석해주세요",
"디자인적인 내용 중 만족/불만족 항목을 20개씩 분석해주세요.",
"감성적인 내용 중 만족/불만족 항목을 20개씩 분석해주세요.",
"추가로 20개 더 해주세요."],
label="챗봇 예시항목 선택"
)
chatbot_output = gr.Textbox(label="챗봇 응답", lines=20) # 응답 칸을 길게 설정
with gr.Row():
chatbot_button = gr.Button("챗봇에게 질문하기")
clear_button = gr.Button("모두 지우기")
chatbot_button.click(chatbot_response, inputs=chatbot_input, outputs=chatbot_output)
clear_button.click(fn=lambda: "", inputs=None, outputs=chatbot_output) # 모두 지우기 버튼 클릭 시 응답 내용 초기화
chatbot_examples.change(fn=lambda x: x, inputs=chatbot_examples, outputs=chatbot_input)
# 탭 인터페이스로 결합
tabbed_iface = gr.TabbedInterface(
[iface1, iface2],
tab_names=["리뷰추이 분석", "리뷰 장단점분석(챗봇기능)"]
)
if __name__ == "__main__":
tabbed_iface.launch()