import pandas as pd import openpyxl from openpyxl.chart import BarChart, Reference, PieChart from openpyxl.chart.label import DataLabelList from openpyxl.utils.dataframe import dataframe_to_rows from datetime import datetime from io import BytesIO import matplotlib.pyplot as plt import gradio as gr import tempfile # Read excel data for review analysis def read_excel_data(file): df = pd.read_excel(BytesIO(file), usecols="B, C, D, E", skiprows=1, names=["Review Date", "Option", "Review", "ReviewScore"]) df['Review Date'] = pd.to_datetime(df['Review Date']).dt.tz_localize(None).dt.date df['Year-Month'] = df['Review Date'].astype(str).str.slice(0, 7) df['Year'] = df['Review Date'].astype(str).str.slice(0, 4) df['Month'] = df['Review Date'].astype(str).str.slice(5, 7) df['Day'] = df['Review Date'].astype(str).str.slice(8, 10) df['Option1'] = df['Option'].str.split(" / ").str[0] # 1차 옵션만 추출 df['Review Length'] = df['Review'].str.len() # 추가된 부분: 리뷰 길이 계산 return df # Analyze review data def analyze_data(df): monthly_data = df.groupby('Year-Month').size().reset_index(name='Counts') yearly_data = df.groupby('Year').size().reset_index(name='Counts') return monthly_data, yearly_data def analyze_monthly_data_for_year(df, selected_year): monthly_data_for_year = df[df['Year'] == selected_year].groupby('Month').size().reset_index(name='Counts') all_months = pd.DataFrame([f"{m:02d}" for m in range(1, 13)], columns=['Month']) monthly_trend_for_year = pd.merge(all_months, monthly_data_for_year, on='Month', how='left') monthly_trend_for_year['Counts'] = monthly_trend_for_year['Counts'].fillna(0).astype(int) return monthly_trend_for_year def analyze_daily_data(df, selected_year): start_date = datetime.strptime(f"{selected_year}-01-01", "%Y-%m-%d").date() end_date = datetime.strptime(f"{selected_year}-12-31", "%Y-%m-%d").date() date_range = pd.date_range(start=start_date, end=end_date).date daily_data = df[df['Year'] == selected_year].groupby('Review Date').size().reset_index(name='Counts') daily_data['Review Date'] = pd.to_datetime(daily_data['Review Date']).dt.date all_dates_df = pd.DataFrame(date_range, columns=['Review Date']) all_dates_df['Review Date'] = pd.to_datetime(all_dates_df['Review Date']).dt.date merged_data = pd.merge(all_dates_df, daily_data, on='Review Date', how='left') merged_data['Counts'] = merged_data['Counts'].fillna(0).astype(int) return merged_data def analyze_option_data(df): data_counts = df['Option1'].value_counts().reset_index() data_counts.columns = ['Option', 'Counts'] total_counts = data_counts['Counts'].sum() data_counts['Percentage'] = (data_counts['Counts'] / total_counts * 100).round(2) data_counts.sort_values(by='Counts', ascending=False, inplace=True) return data_counts def analyze_option_review_data(df): df["Option1"] = df["Option"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x) df["Option2"] = df["Option"].apply(lambda x: x.split(" / ")[1] if isinstance(x, str) and len(x.split(" / ")) > 1 else "") review_counts = df.groupby(["Option1", "Option2"])["ReviewScore"].value_counts().unstack(fill_value=0) review_counts["Total"] = review_counts.sum(axis=1) option1_counts = df.groupby("Option1")["Option"].count() option2_counts = df.groupby(["Option1", "Option2"])["Option"].count() review_counts["옵션명(1차)건수"] = review_counts.index.get_level_values("Option1").map(option1_counts) review_counts["옵션명(2차)건수"] = option2_counts review_counts.sort_values(by=["옵션명(1차)건수", "옵션명(2차)건수"], ascending=[False, False], inplace=True) return review_counts def analyze_option_data_for_year(df, selected_year): df_year = df[df['Year'] == selected_year] data_counts = df_year['Option1'].value_counts().reset_index() data_counts.columns = ['Option', 'Counts'] total_counts = data_counts['Counts'].sum() data_counts['Percentage'] = (data_counts['Counts'] / total_counts * 100).round(2) data_counts.sort_values(by='Counts', ascending=False, inplace=True) return data_counts def analyze_option_review_data_for_year(df, selected_year): df_year = df[df['Year'] == selected_year] df_year["Option1"] = df_year["Option"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x) df_year["Option2"] = df_year["Option"].apply(lambda x: x.split(" / ")[1] if isinstance(x, str) and len(x.split(" / ")) > 1 else "") review_counts = df_year.groupby(["Option1", "Option2"])["ReviewScore"].value_counts().unstack(fill_value=0) review_counts["Total"] = review_counts.sum(axis=1) option1_counts = df_year.groupby("Option1")["Option"].count() option2_counts = df_year.groupby(["Option1", "Option2"])["Option"].count() review_counts["옵션명(1차)건수"] = review_counts.index.get_level_values("Option1").map(option1_counts) review_counts["옵션명(2차)건수"] = option2_counts review_counts.sort_values(by=["옵션명(1차)건수", "옵션명(2차)건수"], ascending=[False, False], inplace=True) return review_counts def extract_longest_reviews(df): longest_reviews = df.groupby('ReviewScore').apply(lambda x: x.nlargest(100, 'Review Length', keep='all')).reset_index(drop=True) return longest_reviews.drop(columns=['Review Length', 'Year-Month', 'Year', 'Month', 'Day', 'Option1', 'Option2']) # 삭제된 열들 def save_to_excel(original_data, monthly_counts, yearly_counts, monthly_trend, daily_counts, option_counts, review_counts, selected_option_counts, selected_review_counts, longest_reviews): wb = openpyxl.Workbook() # 원본 리뷰 데이터 시트 추가 및 이름 변경 ws_original = wb.active ws_original.title = "원본리뷰데이터" for r in dataframe_to_rows(original_data, index=False, header=True): ws_original.append(r) ws_original.sheet_properties.tabColor = "000000" # 검은색 # 리뷰분석 추이 시트 추가 ws1 = wb.create_sheet(title="전체월별추이(리뷰분석)") for r in dataframe_to_rows(monthly_counts, index=False, header=True): ws1.append(r) chart1 = BarChart() chart1.type = "col" chart1.style = 10 chart1.title = "Monthly Review Trends" chart1.y_axis.title = 'Review Counts' chart1.x_axis.title = 'Year-Month' data1 = Reference(ws1, min_col=2, min_row=1, max_row=ws1.max_row, max_col=2) cats1 = Reference(ws1, min_col=1, min_row=2, max_row=ws1.max_row) chart1.add_data(data1, titles_from_data=True) chart1.set_categories(cats1) chart1.width = 30 chart1.height = 15 ws1.add_chart(chart1, "C2") ws1.sheet_properties.tabColor = "FFA500" # 주황색 # 년도별 리뷰 분석 시트 추가 ws2 = wb.create_sheet(title="년도별추이(리뷰분석)") for r in dataframe_to_rows(yearly_counts, index=False, header=True): ws2.append(r) chart2 = BarChart() chart2.type = "col" chart2.style = 10 chart2.title = "Yearly Review Trends" chart2.y_axis.title = 'Review Counts' chart2.x_axis.title = 'Year' data2 = Reference(ws2, min_col=2, min_row=1, max_row=ws2.max_row, max_col=2) cats2 = Reference(ws2, min_col=1, min_row=2, max_row=ws2.max_row) chart2.add_data(data2, titles_from_data=True) chart2.set_categories(cats2) chart2.width = 30 chart2.height = 15 ws2.add_chart(chart2, "C2") ws2.sheet_properties.tabColor = "FFA500" # 주황색 # 월별 리뷰 분석 시트 추가 ws3 = wb.create_sheet(title="선택한 년도 월별추이(리뷰분석)") for r in dataframe_to_rows(monthly_trend, index=False, header=True): ws3.append(r) chart3 = BarChart() chart3.type = "col" chart3.style = 10 chart3.title = "Monthly Trends for Selected Year" chart3.y_axis.title = 'Review Counts' chart3.x_axis.title = 'Month' data3 = Reference(ws3, min_col=2, min_row=1, max_row=ws3.max_row, max_col=2) cats3 = Reference(ws3, min_col=1, min_row=2, max_row=ws3.max_row) chart3.add_data(data3, titles_from_data=True) chart3.set_categories(cats3) chart3.width = 30 chart3.height = 15 ws3.add_chart(chart3, "C2") ws3.sheet_properties.tabColor = "FFA500" # 주황색 # 일별 리뷰 분석 시트 추가 ws4 = wb.create_sheet(title="선택한 년도 일별추이(리뷰분석)") for r in dataframe_to_rows(daily_counts, index=False, header=True): ws4.append(r) chart4 = BarChart() chart4.type = "col" chart4.style = 10 chart4.title = "Daily Trends for Selected Year" chart4.y_axis.title = 'Review Counts' chart4.x_axis.title = 'Date' data4 = Reference(ws4, min_col=2, min_row=2, max_row=ws4.max_row + 1, max_col=2) cats4 = Reference(ws4, min_col=1, min_row=2, max_row=ws4.max_row + 1) chart4.add_data(data4, titles_from_data=True) chart4.set_categories(cats4) chart4.width = 50 chart4.height = 15 ws4.add_chart(chart4, "C2") ws4.sheet_properties.tabColor = "FFA500" # 주황색 # 옵션분석 결과 시트 추가 ws5 = wb.create_sheet(title="옵션분석 결과(옵션분석)") for r in dataframe_to_rows(option_counts, index=False, header=True): ws5.append(r) bar_chart = BarChart() data = Reference(ws5, min_col=2, min_row=2, max_row=ws5.max_row, max_col=2) cats = Reference(ws5, min_col=1, min_row=2, max_row=ws5.max_row, max_col=1) bar_chart.add_data(data, titles_from_data=False) bar_chart.set_categories(cats) bar_chart.title = "Option Analysis (Counts)" bar_chart.width = 40 bar_chart.height = 20 ws5.add_chart(bar_chart, "G2") ws5.sheet_properties.tabColor = "0000FF" # 파랑색 # Create pie chart top_10 = option_counts.head(10) for idx, row in enumerate(top_10.itertuples(), 1): ws5.cell(row=idx + 1, column=5, value=row.Option) ws5.cell(row=idx + 1, column=6, value=row.Counts) others_sum = option_counts['Counts'][10:].sum() ws5.cell(row=12, column=5, value='Others') ws5.cell(row=12, column=6, value=others_sum) ws5.cell(row=1, column=5, value='Option') ws5.cell(row=1, column=6, value='Counts') pie_chart = PieChart() data = Reference(ws5, min_col=6, min_row=2, max_row=12) categories = Reference(ws5, min_col=5, min_row=2, max_row=12) pie_chart.add_data(data, titles_from_data=False) pie_chart.set_categories(categories) pie_chart.title = "Top 10 Options (Share)" pie_chart.dataLabels = DataLabelList() pie_chart.dataLabels.showPercent = True pie_chart.width = 30 pie_chart.height = 20 ws5.add_chart(pie_chart, "G40") # 옵션별평점분석 시트 추가 ws6 = wb.create_sheet(title="옵션별평점분석(옵션분석)") ws6.append(["Option1", "Option2", "Total Counts", "Score 5", "Score 4", "Score 3", "Score 2", "Score 1", "Option1 Counts", "Option2 Counts"]) for r in dataframe_to_rows(review_counts, index=True, header=False): ws6.append(r) ws6.sheet_properties.tabColor = "0000FF" # 파랑색 # 선택한 년도 옵션분석 결과 시트 추가 ws7 = wb.create_sheet(title="선택한 년도 옵션분석 결과(옵션분석)") for r in dataframe_to_rows(selected_option_counts, index=False, header=True): ws7.append(r) bar_chart_selected = BarChart() data_selected = Reference(ws7, min_col=2, min_row=2, max_row=ws7.max_row, max_col=2) cats_selected = Reference(ws7, min_col=1, min_row=2, max_row=ws7.max_row, max_col=1) bar_chart_selected.add_data(data_selected, titles_from_data=False) bar_chart_selected.set_categories(cats_selected) bar_chart_selected.title = "Option Analysis for Selected Year (Counts)" bar_chart_selected.width = 40 bar_chart_selected.height = 20 ws7.add_chart(bar_chart_selected, "G2") ws7.sheet_properties.tabColor = "0000FF" # 파랑색 # Create pie chart for selected year top_10_selected = selected_option_counts.head(10) for idx, row in enumerate(top_10_selected.itertuples(), 1): ws7.cell(row=idx + 1, column=5, value=row.Option) ws7.cell(row=idx + 1, column=6, value=row.Counts) others_sum_selected = selected_option_counts['Counts'][10:].sum() ws7.cell(row=12, column=5, value='Others') ws7.cell(row=12, column=6, value=others_sum_selected) ws7.cell(row=1, column=5, value='Option') ws7.cell(row=1, column=6, value='Counts') pie_chart_selected = PieChart() data_selected_pie = Reference(ws7, min_col=6, min_row=2, max_row=12) categories_selected_pie = Reference(ws7, min_col=5, min_row=2, max_row=12) pie_chart_selected.add_data(data_selected_pie, titles_from_data=False) pie_chart_selected.set_categories(categories_selected_pie) pie_chart_selected.title = "Top 10 Options for Selected Year (Share)" pie_chart_selected.dataLabels = DataLabelList() pie_chart_selected.dataLabels.showPercent = True pie_chart_selected.width = 30 pie_chart_selected.height = 20 ws7.add_chart(pie_chart_selected, "G40") # 선택한 년도 옵션별평점분석 시트 추가 ws8 = wb.create_sheet(title="선택한 년도 옵션별평점분석(옵션분석)") ws8.append(["Option1", "Option2", "Total Counts", "Score 5", "Score 4", "Score 3", "Score 2", "Score 1", "Option1 Counts", "Option2 Counts"]) for r in dataframe_to_rows(selected_review_counts, index=True, header=False): ws8.append(r) ws8.sheet_properties.tabColor = "0000FF" # 파랑색 # 리뷰 내용이 긴 리뷰 시트 추가 ws9 = wb.create_sheet(title="긴 리뷰 내용") for r in dataframe_to_rows(longest_reviews, index=False, header=True): ws9.append(r) ws9.sheet_properties.tabColor = "00FF00" # 초록색 file_path = "리뷰분석 다운로드.xlsx" wb.save(file_path) return file_path def generate_plots(df, year): # 최근 3년의 데이터만 사용 start_year = datetime.now().year - 2 recent_data = df[df['Year'].astype(int) >= start_year] monthly_counts, yearly_counts = analyze_data(df) # Use all data for yearly counts recent_monthly_counts, _ = analyze_data(recent_data) # Use recent data for monthly counts monthly_trend = analyze_monthly_data_for_year(recent_data, year) daily_counts = analyze_daily_data(recent_data, year) option_counts = analyze_option_data(recent_data) plot_files = [] # 월별 리뷰 추이 그래프 생성 fig1, ax1 = plt.subplots() ax1.plot(recent_monthly_counts['Year-Month'], recent_monthly_counts['Counts'], marker='o') ax1.set_title('Monthly Review Trends (Recent 3 Years)', fontsize=16) # 제목 폰트 크기 설정 ax1.set_ylabel('Review Counts', fontsize=14) # y축 레이블 폰트 크기 설정 # x축 레이블을 90도 회전하여 표시하고 폰트 크기 줄임 ax1.tick_params(axis='x', rotation=90, labelsize=6) tmp_file1 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") fig1.savefig(tmp_file1.name) plot_files.append(tmp_file1.name) fig2, ax2 = plt.subplots() ax2.bar(yearly_counts['Year'], yearly_counts['Counts']) ax2.set_title('Yearly Review Trends') ax2.set_xlabel('Year') ax2.set_ylabel('Review Counts') tmp_file2 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") fig2.savefig(tmp_file2.name) plot_files.append(tmp_file2.name) fig3, ax3 = plt.subplots() ax3.bar(monthly_trend['Month'], monthly_trend['Counts']) ax3.set_title('Monthly Trends for Selected Year') ax3.set_xlabel('Month') ax3.set_ylabel('Review Counts') tmp_file3 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") fig3.savefig(tmp_file3.name) plot_files.append(tmp_file3.name) fig4, ax4 = plt.subplots() ax4.bar(daily_counts['Review Date'], daily_counts['Counts']) ax4.set_title('Daily Trends for Selected Year') ax4.set_xlabel('Date') ax4.set_ylabel('Review Counts') tmp_file4 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") fig4.savefig(tmp_file4.name) plot_files.append(tmp_file4.name) return plot_files def process_file(file, year): df = read_excel_data(file) monthly_counts, yearly_counts = analyze_data(df) monthly_trend = analyze_monthly_data_for_year(df, year) daily_counts = analyze_daily_data(df, year) option_counts = analyze_option_data(df) review_counts = analyze_option_review_data(df) selected_option_counts = analyze_option_data_for_year(df, year) selected_review_counts = analyze_option_review_data_for_year(df, year) longest_reviews = extract_longest_reviews(df) original_data = pd.read_excel(BytesIO(file), sheet_name=0) # 첫 번째 시트만 로드 result_file = save_to_excel(original_data, monthly_counts, yearly_counts, monthly_trend, daily_counts, option_counts, review_counts, selected_option_counts, selected_review_counts, longest_reviews) return result_file def process_file_with_plots(file, year): df = read_excel_data(file) result_file = process_file(file, year) plots = generate_plots(df, year) return [result_file] + plots years = [str(year) for year in range(datetime.now().year, datetime.now().year - 10, -1)] def analyze_and_plot(file, year): return process_file_with_plots(file, year) # 기존 인터페이스 정의 iface1 = gr.Interface( fn=analyze_and_plot, inputs=[gr.File(file_count="single", type="binary"), gr.Radio(years, label="분석년도 선택", value=str(datetime.now().year))], outputs=[gr.File(label="세부분석 자료를 다운받으세요(Excel파일)")] + [gr.Image(label=label) for label in [ "최근3년간 월별 리뷰추이", "최근 년도별 리뷰추이", "선택년도 월 리뷰추이", "선택년도 일일 리뷰추이" ]], title="상품리뷰 분석-ver.1(엑셀파일)", description="Upload an Excel file to analyze review date data and save the results to an Excel file. Also, provide visualized graphs.", ) # 기존 인터페이스 정의 iface1 = gr.Interface( fn=analyze_and_plot, inputs=[gr.File(file_count="single", type="binary"), gr.Radio(years, label="분석년도 선택", value=str(datetime.now().year))], outputs=[gr.File(label="세부분석 자료를 다운받으세요(Excel파일)")] + [gr.Image(label=label) for label in [ "최근3년간 월별 리뷰추이", "최근 년도별 리뷰추이", "선택년도 월 리뷰추이", "선택년도 일일 리뷰추이" ]], title="상품리뷰 분석-ver.1(엑셀파일)", description=( "✨스마트스토어 상품 리뷰를 분석해보세요.

" "📢사용 방법📢
" "1. 구글웹스토어 SMART DATA 검색 후 설치
" "2. 리뷰분석 분석 후 엑셀다운로드
" "3. 엑셀다운로드 한 파일을 프로그램에서 업로드
" "4. 분석할 년도 선택 후 실행" ) ) # 새로운 탭 인터페이스 정의 def chatbot_response(input_text): # 여기에 챗봇 응답 생성 로직을 추가하세요 response = f"챗봇 응답: {input_text}" return response # 긴 글 텍스트 결과를 위한 함수 정의 def long_text_result(): return "AI분석결과 출력됩니다." # 새로운 탭에 왼쪽 긴 텍스트 결과와 오른쪽 챗봇 인터페이스 구성 iface2 = gr.Blocks() with iface2: with gr.Row(): with gr.Column(): gr.Textbox(value=long_text_result(), label="상품의 장단점 10가지를 분석해드립니다.", lines=32, interactive=False) with gr.Column(): chatbot_input = gr.Textbox(label="챗봇 입력", placeholder="이 상품에 대한 추가적인 자세한 분석내용을 챗봇에게 질문하세요.") chatbot_examples = gr.Dropdown( ["기능적인 내용 중 만족/불만족 항목을 20개씩 분석해주세요", "디자인적인 내용 중 만족/불만족 항목을 20개씩 분석해주세요.", "감성적인 내용 중 만족/불만족 항목을 20개씩 분석해주세요.", "추가로 20개 더 해주세요."], label="챗봇 예시항목 선택" ) chatbot_output = gr.Textbox(label="챗봇 응답", lines=20) # 응답 칸을 길게 설정 with gr.Row(): chatbot_button = gr.Button("챗봇에게 질문하기") clear_button = gr.Button("모두 지우기") chatbot_button.click(chatbot_response, inputs=chatbot_input, outputs=chatbot_output) clear_button.click(fn=lambda: "", inputs=None, outputs=chatbot_output) # 모두 지우기 버튼 클릭 시 응답 내용 초기화 chatbot_examples.change(fn=lambda x: x, inputs=chatbot_examples, outputs=chatbot_input) # 탭 인터페이스로 결합 tabbed_iface = gr.TabbedInterface( [iface1, iface2], tab_names=["리뷰추이 분석", "리뷰 장단점분석(챗봇기능)"] ) if __name__ == "__main__": tabbed_iface.launch()