์‹œํ๋ฆฌํ‹ฐ์ง€ํ˜ธ 2025. 4. 29. 10:18

์ฃผ์ œ : csv ํŒŒ์ผ์— ์ ์žฌ๊ฐ€ ๋˜๋Š” ๋ฐ์ดํ„ฐ๋“ค์˜ ํ†ต๊ณ„๋ฅผ ๋‚ด์„œ ํ”ผํ”ผํ‹ฐ๋ฅผ ๋งŒ๋“ค์–ด์ฃผ๋Š” application

 

๋ฐฐ๊ฒฝ ๋ฐ ๋ชฉํ‘œ : csv์— ์ˆ˜๋งŽ์€ ๋ฐ์ดํ„ฐ๋“ค์ด ์žˆ๋Š”๋ฐ ์ˆ˜์ž‘์—…์œผ๋กœ ํ†ต๊ณ„๋‚ด๋Š” ๋ฐ ์‹œ๊ฐ„์ด ๋งŽ์ด ๊ฑธ๋ ธ๋‹ค. ์ด๋ฅผ ์ž๋™ํ™” ํ•ด์ฃผ๊ธฐ๋กœ ํ•˜๊ณ  ๊ฐœ๋ฐœ ํ–ˆ๋‹ค.

 

์‚ฌ์šฉ ์–ธ์–ด : python

 

[์ฝ”๋“œ]

import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import matplotlib.pyplot as plt
from pptx import Presentation
from pptx.util import Inches
import platform
from pptx.util import Pt

# ์šด์˜ ์ฒด์ œ ํ™•์ธ
if platform.system() == 'Darwin':  # macOS
    plt.rcParams['font.family'] = 'AppleGothic'  # macOS์—์„œ ์‚ฌ์šฉํ•˜๋Š” ํฐํŠธ
elif platform.system() == 'Windows':  # Windows
    plt.rcParams['font.family'] = 'Malgun Gothic'  # Windows์—์„œ ์‚ฌ์šฉํ•˜๋Š” ํฐํŠธ
else:  # Linux ๋“ฑ์˜ ๋‹ค๋ฅธ ์šด์˜ ์ฒด์ œ
    plt.rcParams['font.family'] = 'NanumGothic'  # Linux์—์„œ ์‚ฌ์šฉํ•˜๋Š” ํฐํŠธ

# ์œ ๋‹ˆ์ฝ”๋“œ ๋งˆ์ด๋„ˆ์Šค ๊ธฐํ˜ธ ๊นจ์ง ๋ฐฉ์ง€
plt.rcParams['axes.unicode_minus'] = False


class CSVAnalyzerApp:
    def __init__(self, root):
        self.root = root
        self.root.title("CSV Analyzer & PPT Generator")
        
        # CSV ํŒŒ์ผ ์„ ํƒ ๋ฒ„ํŠผ
        self.file_label = tk.Label(root, text="CSV ํŒŒ์ผ์„ ์„ ํƒํ•˜์„ธ์š”.")
        self.file_label.pack(pady=10)
        
        self.select_button = tk.Button(root, text="ํŒŒ์ผ ์„ ํƒ", command=self.load_csv)
        self.select_button.pack(pady=5)
        
        # ๋ถ„์„ ๊ฒฐ๊ณผ ์ƒํƒœ
        self.status_label = tk.Label(root, text="์ƒํƒœ: ๋Œ€๊ธฐ ์ค‘", fg="blue")
        self.status_label.pack(pady=10)
        
        # PPT ์ƒ์„ฑ ๋ฒ„ํŠผ
        self.generate_button = tk.Button(root, text="PPT ์ƒ์„ฑ", command=self.create_ppt)
        self.generate_button.pack(pady=5)
        self.generate_button.config(state=tk.DISABLED)
        
        self.data = None  # CSV ๋ฐ์ดํ„ฐ ์ €์žฅ ๋ณ€์ˆ˜
    
    def load_csv(self):
        file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
        if file_path:
            try:
                self.data = pd.read_csv(file_path, encoding='euc-kr')
                self.status_label.config(text="์ƒํƒœ: ํŒŒ์ผ ๋กœ๋“œ ์™„๋ฃŒ", fg="green")
                messagebox.showinfo("์„ฑ๊ณต", f"{file_path} ํŒŒ์ผ์ด ๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
                self.generate_button.config(state=tk.NORMAL)
                self.display_basic_analysis()
            except Exception as e:
                messagebox.showerror("์˜ค๋ฅ˜", f"CSV ํŒŒ์ผ ๋กœ๋“œ ์‹คํŒจ: {e}")
                self.status_label.config(text="์ƒํƒœ: ์˜ค๋ฅ˜ ๋ฐœ์ƒ", fg="red")
    
    def display_basic_analysis(self):
        if self.data is not None:
            # ๊ฐ„๋‹จํ•œ ๋ฐ์ดํ„ฐ ํ†ต๊ณ„ ํ‘œ์‹œ
            summary = self.data.describe(include='all')
        
            # Tkinter ์ฐฝ์— ๋ฐ์ดํ„ฐ ์š”์•ฝ ํ‘œ์‹œ
            summary_label = tk.Label(self.root, text=summary, justify="left")
            summary_label.pack(pady=10)

    def create_ppt(self):
        if self.data is not None:
            try:
                ppt = Presentation()

                # ๋‚ ์งœ ์—ด ์ƒ์„ฑ
                if 'Time' in self.data.columns and 'Identifier' in self.data.columns:
                    self.data['Date'] = self.data['Time'].apply(
                        lambda x: str(x)[:10] if isinstance(x, str) else None
                    )
                    

                    # Identifier ๊ฐ’ ์ฒ˜๋ฆฌ
                    def extract_category(identifier):
                        desired_categories = {'AD', 'DU', 'SC', 'AB', 'AAB', 'PR', 'ASC', 'RB', 'ZF', 'RF', 'AE', 'SE'}
                        parts = identifier.split('-')

                        if identifier.startswith('EE'):  # EE๋กœ ์‹œ์ž‘ํ•˜๋ฉด ๋„ค ๋ฒˆ์งธ ๋ฐฐ์—ด ์š”์†Œ
                            category = parts[3] if len(parts) > 3 else "Unknown"
                        else:  # ๊ทธ ์™ธ์˜ ๊ฒฝ์šฐ ์ฒซ ๋ฒˆ์งธ ๋ฐฐ์—ด ์š”์†Œ
                            category = parts[0] if len(parts) > 0 else "Unknown"
                        
                        # ์นดํ…Œ๊ณ ๋ฆฌ๊ฐ€ ์›ํ•˜๋Š” ๋ชฉ๋ก์— ํฌํ•จ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
                        return category if category in desired_categories else "Unknown"

                    self.data['Identifier_Category'] = self.data['Identifier'].apply(
                        lambda x: extract_category(str(x))
                    )

                    # `Unknown` ์ œ์™ธ
                    filtered_data = self.data[self.data['Identifier_Category'] != 'Unknown']
                    

                    # **์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ์ผ๋ณ„ ํ†ต๊ณ„ ๊ณ„์‚ฐ (์ „์ฒด ์นดํ…Œ๊ณ ๋ฆฌ ํฌํ•จ)**
                    desired_categories = ['AD', 'DU', 'SC', 'AB', 'AAB', 'PR', 'ASC', 'RB', 'ZF', 'RF', 'AE', 'SE']

                 
                    # ๊ทธ๋ž˜ํ”„์šฉ ๋ฐ์ดํ„ฐ (Identifier_Category์™€ Date ๊ธฐ์ค€ ๊ทธ๋ฃนํ™”, ์ผ๋ณ„ ๊ทธ๋ž˜ํ”„) 
                    daily_stats_for_graph = ( 
                        filtered_data.groupby(['Identifier_Category', 'Date']) 
                        .size() 
                        .unstack(fill_value=0) 
                        # unstack์€ Date๋ฅผ ์—ด๋กœ ๋ฐ”๊พธ๊ณ  Identifier_Category๋Š” ํ–‰์œผ๋กœ ๋งŒ๋“ ๋‹ค.
                        .reindex(desired_categories, fill_value=0)
                        # reindex๋Š” ์ •ํ•ด๋‘” ์ˆœ์„œ๋Œ€๋กœ ์žฌ์ •๋ ฌ ํ•จ
                    )
                    
                    # ๊ทธ๋ž˜ํ”„์šฉ ๋ฐ์ดํ„ฐ (Identifiler_Category์™€ Month ๊ธฐ์ค€ ๊ทธ๋ฃนํ™”, ์›”๋ณ„ ๊ทธ๋ž˜ํ”„)
                    filtered_data['Month'] = filtered_data['Date'].apply(lambda x: x[:7])
                    monthly_stats = (filtered_data.groupby(['Identifier_Category', 'Month'])
                                     .size()
                                     .unstack(fill_value=0)
                                     .reindex(desired_categories, fill_value=0))
                    

                    if daily_stats_for_graph.empty:
                        raise ValueError("ํ†ต๊ณ„ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. ํ™•์ธ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•˜์„ธ์š”.")

                    # ์ฒซ ๋ฒˆ์งธ ์Šฌ๋ผ์ด๋“œ: ํŒŒ์ด ์ฐจํŠธ
                    slide = ppt.slides.add_slide(ppt.slide_layouts[5])  # Title-Only layout
                    slide.shapes.title.text = "Identifier ํ†ต๊ณ„ ํŒŒ์ด ์ฐจํŠธ"

                    # ํŒŒ์ด ์ฐจํŠธ ์ƒ์„ฑ (์ „์ฒด ๋ฐ์ดํ„ฐ)
                    fig, ax = plt.subplots(figsize=(6, 6))
                    values = daily_stats_for_graph.sum(axis=1)
                    # sum(axis=0) : ์œ„์—์„œ ์•„๋ž˜๋กœ ํ•ฉ์‚ฐ sum(axis=1) : ์™ผ์ชฝ์—์„œ ์˜ค๋ฅธ์ชฝ์œผ๋กœ ํ•ฉ์‚ฐ

                    # 0%์ธ ํ•ญ๋ชฉ ์ œ๊ฑฐ
                    filtered_values = values[values > 0]
                    
                    # piechart๋ฅผ ์ƒ์„ฑํ•˜๊ณ  3๊ฐœ์˜ ๊ฐ’์„ ๋ฐ˜ํ™˜ํ•œ๋‹ค. 
                    patches, wedges, texts = ax.pie(
                        filtered_values,
                        labels=filtered_values.index,
                        autopct='%1.1f%%',
                        startangle=90,
                        wedgeprops={'linewidth' : 1, 'edgecolor': 'black'}, # ์Šฌ๋ผ์ด์Šค ์Šคํƒ€์ผ
                        textprops={'fontsize' : 10}, # ํ…์ŠคํŠธ ์Šคํƒ€์ผ
                        pctdistance=0.7, # ํผ์„ผํŠธ ํ…์ŠคํŠธ ๊ฑฐ๋ฆฌ
                        labeldistance=1.2, # ๋ ˆ์ด๋ธ” ๊ฑฐ๋ฆฌ
                    )


                    
                    # ๋ฒ”๋ก€ ํ•ญ๋ชฉ์— ํผ์„ผํŠธ ๋น„์œจ ์ถ”๊ฐ€
                    # ๊ฐ ํ•ญ๋ชฉ์˜ ํผ์„ผํŠธ ๋น„์œจ์„ ๊ฐ€์ ธ์™€ ๋ฒ”๋ก€์— ์ถ”๊ฐ€
                    legend_labels = [f'{label}: {text.get_text()}' for label, text in zip(filtered_values.index, texts)]                    
                    # ๋ฒ”๋ก€ ์ถ”๊ฐ€
                    ax.legend(patches, legend_labels, loc="best")
                    # ์ œ๋ชฉ ์„ค์ •
                    ax.set_title(f"Identifier ํ†ต๊ณ„ (๋ฒ”์ฃผ๋ณ„ ๋ถ„ํฌ)\n ์ด ํƒ์ง€ ๊ฑด์ˆ˜: {filtered_values.sum(axis=0)} ๊ฐœ", fontsize=14)
                    # ์ฐจํŠธ ์ด๋ฏธ์ง€ ์ €์žฅ
                    chart_image_path = f"./daily_identifier_pie_chart.png"
                    plt.tight_layout()
                    plt.savefig(chart_image_path)
                    plt.close(fig)
                    # ์ฐจํŠธ ์ด๋ฏธ์ง€ ์‚ฝ์ž…
                    slide.shapes.add_picture(chart_image_path, Inches(1.5), Inches(2), Inches(7), Inches(5))                   

                    # ์ผ๋ณ„ ํ†ต๊ณ„ ํ‘œ ์ถ”๊ฐ€ (5์ผ ๋‹จ์œ„)
                    dates = daily_stats_for_graph.columns
                    max_days_per_slide = 5  # ํ•œ ์Šฌ๋ผ์ด๋“œ๋‹น 10์ผ
                    for i in range(0, len(dates), max_days_per_slide):
                        date_range = dates[i:i + max_days_per_slide]
                        subset_stats = daily_stats_for_graph.loc[:, date_range] # loc ์€ ์ˆซ์ž๋กœ ์Šฌ๋ผ์ด์‹ฑ ํ•˜์ง€ ์•Š๊ณ  ์ •ํ™•ํ•œ ๋ช…์นญ์œผ๋กœ ์ง„ํ–‰ํ•œ๋‹ค. ์ผ๋ฐ˜์ ์œผ๋กœ ํ–‰๊ธฐ์ค€์ด๊ณ  ์—ด๊ธฐ์ค€์€ [:, ์—ด] ์ด๋ ‡๊ฒŒ ์ง„ํ–‰ํ•ด์•ผํ•œ๋‹ค.

                        if not subset_stats.empty and len(date_range) > 0:
                            slide = ppt.slides.add_slide(ppt.slide_layouts[5])
                            slide.shapes.title.text = f"{date_range.min()} - {date_range.max()} ์ผ๋ณ„ ํ†ต๊ณ„"                    
                            rows, cols = subset_stats.shape # subset_stats๋Š” groupby๋กœ ๋งŒ๋“ค์–ด์ง„ ํ˜•ํƒœ์ธ๋ฐ ์ด๋Ÿฐ ๊ฒฝ์šฐ ์ธ๋ฑ์Šค๋Š” groupby์—์„œ ํ–‰์œผ๋กœ ์ •ํ•ด์ค€ ๋ถ€๋ถ„์ด๋‹ค.
                            slide_table = slide.shapes.add_table(rows + 2, 6, Inches(0.5), Inches(1.5), Inches(9), Inches(4)).table # rows + 2 ๋Š” ํ‘œ์— ๋“ค์–ด๊ฐˆ ๋‚ด์šฉ ๋•Œ๋ฌธ์— ์ด๋ ‡๊ฒŒ ํ•ด์ค€ ๊ฒƒ์ด๋‹ค.
                            slide_table.cell(0, 0).text = "Identifier" # ์—ฌ๊ธฐ์„œ ๋งํ•˜๋Š” cell์€ ํ‘œ ์•ˆ์— ์žˆ๋Š” ํ•˜๋‚˜์˜ ์นธ์„ ๋งํ•œ๋‹ค.                            
                            for col_idx, date in enumerate(subset_stats.columns, start=1):
                                slide_table.cell(0, col_idx).text = str(date)
                            for row_idx, (category, row_data) in enumerate(subset_stats.iterrows(), start=1):
                                # iterrows() -> pandas ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์˜ ๊ฐํ–‰์„ (์ธ๋ฑ์Šค, ๋ฐ์ดํ„ฐ) ํ˜•ํƒœ๋กœ ๋ฐ˜ํ™˜ํ•˜๋Š” ๋ฉ”์„œ๋“œ์ด๋‹ค.
                                slide_table.cell(row_idx, 0).text = category
                                for col_idx, value in enumerate(row_data, start=1):
                                    slide_table.cell(row_idx, col_idx).text = str(value)
                            # ์ดํ•ฉ ์ถ”๊ฐ€
                            slide_table.cell(rows + 1, 0).text ="์ดํ•ฉ"                    
                            for col_idx, category in enumerate(subset_stats.columns, start=1):
                                slide_table.cell(rows + 1, col_idx).text = str(subset_stats[category].sum())



                    # ์›”๋ณ„ ํ†ต๊ณ„ ์ถ”๊ฐ€ (5๋‹ฌ ์”ฉ)        
                    months = monthly_stats.columns
                    max_months_per_slide = 5
                    for i in range(0, len(months), max_months_per_slide):
                        month_range = months[i:i + max_months_per_slide]
                        subset_monthly_stats = monthly_stats.loc[:, month_range]
                        if not subset_monthly_stats.empty:
                            slide = ppt.slides.add_slide(ppt.slide_layouts[5])
                            slide.shapes.title.text = f"{month_range.min()} - {month_range.max()} ์›”๋ณ„ ํ†ต๊ณ„" 
                            rows, cols = subset_monthly_stats.shape
                            slide_table = slide.shapes.add_table(rows + 2, cols + 1, Inches(0.5), Inches(1.5), Inches(9), Inches(4)).table
                            # ์œ„์— slide_table ์ด๊ฑฐ ์ถ”๊ฐ€ ์•ˆ ํ•˜๋ฉด ๋ˆ„๋ฝ ๋จ
                            slide_table.cell(0,0).text = "Identifier"
                            for col_idx, month in enumerate(subset_monthly_stats.columns, start=1):
                                slide_table.cell(0, col_idx).text = str(month)
                            for row_idx, (category, row_data) in enumerate(subset_monthly_stats.iterrows(), start=1):
                                slide_table.cell(row_idx, 0).text = category
                                for col_idx, value in enumerate(row_data, start=1):
                                    slide_table.cell(row_idx, col_idx).text = str(value)
                            slide_table.cell(rows + 1, 0).text = "์ดํ•ฉ"
                            for col_idx, month in enumerate(subset_monthly_stats.columns, start=1):
                                slide_table.cell(rows + 1, col_idx).text = str(subset_monthly_stats[month].sum())

                
            

                # PowerPoint ์ €์žฅ
                save_path = filedialog.asksaveasfilename(defaultextension=".pptx", filetypes=[("PowerPoint files", "*.pptx")])
                if save_path:
                    ppt.save(save_path)
                    messagebox.showinfo("์„ฑ๊ณต", f"PPT๊ฐ€ ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค: {save_path}")
            except Exception as e:
                messagebox.showerror("์˜ค๋ฅ˜", f"PPT ์ƒ์„ฑ ์‹คํŒจ: {e}")




# ์‹คํ–‰
if __name__ == "__main__":
    root = tk.Tk()
    app = CSVAnalyzerApp(root)
    root.mainloop()

 

[์‹คํ–‰ํ™”๋ฉด]

์‹คํ–‰ ์‹œํ‚ค๋ฉด ๋‚˜ํƒ€๋‚˜๋Š” ์ตœ์ดˆ ํ™”๋ฉด
๊ทœ๊ฒฉ์— ๋งž๋Š” csv ํŒŒ์ผ ์—…๋กœ๋“œ
์ž๋™์œผ๋กœ ํ†ต๊ณ„ ๋‚ด๊ณ  ppt๋กœ ์ œ์ž‘

 

 

์ด๋ฅผ ํ™œ์šฉํ•ด์„œ ์ˆ˜์ž‘์—…์œผ๋กœ ํ•˜๋˜ ํ†ต๊ณ„ ๋ฐ ๋ฐ์ดํ„ฐ ๋ถ„์„ ์‹œ๊ฐ„์„ ํฌ๊ฒŒ ๋‹จ์ถ•์‹œ์ผฐ๋‹ค.