๊ฐ๋ฐ ์ง์, ์คํฌ๋ฆฝํธ/์คํฌ๋ฆฝํธ
csv ๋ฐ์ดํฐ ๋ถ์ ํด
์ํ๋ฆฌํฐ์งํธ
2025. 4. 29. 10:18
์ฃผ์ : csv ํ์ผ์ ์ ์ฌ๊ฐ ๋๋ ๋ฐ์ดํฐ๋ค์ ํต๊ณ๋ฅผ ๋ด์ ํผํผํฐ๋ฅผ ๋ง๋ค์ด์ฃผ๋ application
๋ฐฐ๊ฒฝ ๋ฐ ๋ชฉํ : csv์ ์๋ง์ ๋ฐ์ดํฐ๋ค์ด ์๋๋ฐ ์์์ ์ผ๋ก ํต๊ณ๋ด๋ ๋ฐ ์๊ฐ์ด ๋ง์ด ๊ฑธ๋ ธ๋ค. ์ด๋ฅผ ์๋ํ ํด์ฃผ๊ธฐ๋ก ํ๊ณ ๊ฐ๋ฐ ํ๋ค.
์ฌ์ฉ ์ธ์ด : python
[์ฝ๋]
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import matplotlib.pyplot as plt
from pptx import Presentation
from pptx.util import Inches
import platform
from pptx.util import Pt
# ์ด์ ์ฒด์ ํ์ธ
if platform.system() == 'Darwin': # macOS
plt.rcParams['font.family'] = 'AppleGothic' # macOS์์ ์ฌ์ฉํ๋ ํฐํธ
elif platform.system() == 'Windows': # Windows
plt.rcParams['font.family'] = 'Malgun Gothic' # Windows์์ ์ฌ์ฉํ๋ ํฐํธ
else: # Linux ๋ฑ์ ๋ค๋ฅธ ์ด์ ์ฒด์
plt.rcParams['font.family'] = 'NanumGothic' # Linux์์ ์ฌ์ฉํ๋ ํฐํธ
# ์ ๋์ฝ๋ ๋ง์ด๋์ค ๊ธฐํธ ๊นจ์ง ๋ฐฉ์ง
plt.rcParams['axes.unicode_minus'] = False
class CSVAnalyzerApp:
def __init__(self, root):
self.root = root
self.root.title("CSV Analyzer & PPT Generator")
# CSV ํ์ผ ์ ํ ๋ฒํผ
self.file_label = tk.Label(root, text="CSV ํ์ผ์ ์ ํํ์ธ์.")
self.file_label.pack(pady=10)
self.select_button = tk.Button(root, text="ํ์ผ ์ ํ", command=self.load_csv)
self.select_button.pack(pady=5)
# ๋ถ์ ๊ฒฐ๊ณผ ์ํ
self.status_label = tk.Label(root, text="์ํ: ๋๊ธฐ ์ค", fg="blue")
self.status_label.pack(pady=10)
# PPT ์์ฑ ๋ฒํผ
self.generate_button = tk.Button(root, text="PPT ์์ฑ", command=self.create_ppt)
self.generate_button.pack(pady=5)
self.generate_button.config(state=tk.DISABLED)
self.data = None # CSV ๋ฐ์ดํฐ ์ ์ฅ ๋ณ์
def load_csv(self):
file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
if file_path:
try:
self.data = pd.read_csv(file_path, encoding='euc-kr')
self.status_label.config(text="์ํ: ํ์ผ ๋ก๋ ์๋ฃ", fg="green")
messagebox.showinfo("์ฑ๊ณต", f"{file_path} ํ์ผ์ด ๋ก๋๋์์ต๋๋ค.")
self.generate_button.config(state=tk.NORMAL)
self.display_basic_analysis()
except Exception as e:
messagebox.showerror("์ค๋ฅ", f"CSV ํ์ผ ๋ก๋ ์คํจ: {e}")
self.status_label.config(text="์ํ: ์ค๋ฅ ๋ฐ์", fg="red")
def display_basic_analysis(self):
if self.data is not None:
# ๊ฐ๋จํ ๋ฐ์ดํฐ ํต๊ณ ํ์
summary = self.data.describe(include='all')
# Tkinter ์ฐฝ์ ๋ฐ์ดํฐ ์์ฝ ํ์
summary_label = tk.Label(self.root, text=summary, justify="left")
summary_label.pack(pady=10)
def create_ppt(self):
if self.data is not None:
try:
ppt = Presentation()
# ๋ ์ง ์ด ์์ฑ
if 'Time' in self.data.columns and 'Identifier' in self.data.columns:
self.data['Date'] = self.data['Time'].apply(
lambda x: str(x)[:10] if isinstance(x, str) else None
)
# Identifier ๊ฐ ์ฒ๋ฆฌ
def extract_category(identifier):
desired_categories = {'AD', 'DU', 'SC', 'AB', 'AAB', 'PR', 'ASC', 'RB', 'ZF', 'RF', 'AE', 'SE'}
parts = identifier.split('-')
if identifier.startswith('EE'): # EE๋ก ์์ํ๋ฉด ๋ค ๋ฒ์งธ ๋ฐฐ์ด ์์
category = parts[3] if len(parts) > 3 else "Unknown"
else: # ๊ทธ ์ธ์ ๊ฒฝ์ฐ ์ฒซ ๋ฒ์งธ ๋ฐฐ์ด ์์
category = parts[0] if len(parts) > 0 else "Unknown"
# ์นดํ
๊ณ ๋ฆฌ๊ฐ ์ํ๋ ๋ชฉ๋ก์ ํฌํจ๋์๋์ง ํ์ธ
return category if category in desired_categories else "Unknown"
self.data['Identifier_Category'] = self.data['Identifier'].apply(
lambda x: extract_category(str(x))
)
# `Unknown` ์ ์ธ
filtered_data = self.data[self.data['Identifier_Category'] != 'Unknown']
# **์นดํ
๊ณ ๋ฆฌ๋ณ ์ผ๋ณ ํต๊ณ ๊ณ์ฐ (์ ์ฒด ์นดํ
๊ณ ๋ฆฌ ํฌํจ)**
desired_categories = ['AD', 'DU', 'SC', 'AB', 'AAB', 'PR', 'ASC', 'RB', 'ZF', 'RF', 'AE', 'SE']
# ๊ทธ๋ํ์ฉ ๋ฐ์ดํฐ (Identifier_Category์ Date ๊ธฐ์ค ๊ทธ๋ฃนํ, ์ผ๋ณ ๊ทธ๋ํ)
daily_stats_for_graph = (
filtered_data.groupby(['Identifier_Category', 'Date'])
.size()
.unstack(fill_value=0)
# unstack์ Date๋ฅผ ์ด๋ก ๋ฐ๊พธ๊ณ Identifier_Category๋ ํ์ผ๋ก ๋ง๋ ๋ค.
.reindex(desired_categories, fill_value=0)
# reindex๋ ์ ํด๋ ์์๋๋ก ์ฌ์ ๋ ฌ ํจ
)
# ๊ทธ๋ํ์ฉ ๋ฐ์ดํฐ (Identifiler_Category์ Month ๊ธฐ์ค ๊ทธ๋ฃนํ, ์๋ณ ๊ทธ๋ํ)
filtered_data['Month'] = filtered_data['Date'].apply(lambda x: x[:7])
monthly_stats = (filtered_data.groupby(['Identifier_Category', 'Month'])
.size()
.unstack(fill_value=0)
.reindex(desired_categories, fill_value=0))
if daily_stats_for_graph.empty:
raise ValueError("ํต๊ณ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค. ํ์ธ ํ ๋ค์ ์๋ํ์ธ์.")
# ์ฒซ ๋ฒ์งธ ์ฌ๋ผ์ด๋: ํ์ด ์ฐจํธ
slide = ppt.slides.add_slide(ppt.slide_layouts[5]) # Title-Only layout
slide.shapes.title.text = "Identifier ํต๊ณ ํ์ด ์ฐจํธ"
# ํ์ด ์ฐจํธ ์์ฑ (์ ์ฒด ๋ฐ์ดํฐ)
fig, ax = plt.subplots(figsize=(6, 6))
values = daily_stats_for_graph.sum(axis=1)
# sum(axis=0) : ์์์ ์๋๋ก ํฉ์ฐ sum(axis=1) : ์ผ์ชฝ์์ ์ค๋ฅธ์ชฝ์ผ๋ก ํฉ์ฐ
# 0%์ธ ํญ๋ชฉ ์ ๊ฑฐ
filtered_values = values[values > 0]
# piechart๋ฅผ ์์ฑํ๊ณ 3๊ฐ์ ๊ฐ์ ๋ฐํํ๋ค.
patches, wedges, texts = ax.pie(
filtered_values,
labels=filtered_values.index,
autopct='%1.1f%%',
startangle=90,
wedgeprops={'linewidth' : 1, 'edgecolor': 'black'}, # ์ฌ๋ผ์ด์ค ์คํ์ผ
textprops={'fontsize' : 10}, # ํ
์คํธ ์คํ์ผ
pctdistance=0.7, # ํผ์ผํธ ํ
์คํธ ๊ฑฐ๋ฆฌ
labeldistance=1.2, # ๋ ์ด๋ธ ๊ฑฐ๋ฆฌ
)
# ๋ฒ๋ก ํญ๋ชฉ์ ํผ์ผํธ ๋น์จ ์ถ๊ฐ
# ๊ฐ ํญ๋ชฉ์ ํผ์ผํธ ๋น์จ์ ๊ฐ์ ธ์ ๋ฒ๋ก์ ์ถ๊ฐ
legend_labels = [f'{label}: {text.get_text()}' for label, text in zip(filtered_values.index, texts)]
# ๋ฒ๋ก ์ถ๊ฐ
ax.legend(patches, legend_labels, loc="best")
# ์ ๋ชฉ ์ค์
ax.set_title(f"Identifier ํต๊ณ (๋ฒ์ฃผ๋ณ ๋ถํฌ)\n ์ด ํ์ง ๊ฑด์: {filtered_values.sum(axis=0)} ๊ฐ", fontsize=14)
# ์ฐจํธ ์ด๋ฏธ์ง ์ ์ฅ
chart_image_path = f"./daily_identifier_pie_chart.png"
plt.tight_layout()
plt.savefig(chart_image_path)
plt.close(fig)
# ์ฐจํธ ์ด๋ฏธ์ง ์ฝ์
slide.shapes.add_picture(chart_image_path, Inches(1.5), Inches(2), Inches(7), Inches(5))
# ์ผ๋ณ ํต๊ณ ํ ์ถ๊ฐ (5์ผ ๋จ์)
dates = daily_stats_for_graph.columns
max_days_per_slide = 5 # ํ ์ฌ๋ผ์ด๋๋น 10์ผ
for i in range(0, len(dates), max_days_per_slide):
date_range = dates[i:i + max_days_per_slide]
subset_stats = daily_stats_for_graph.loc[:, date_range] # loc ์ ์ซ์๋ก ์ฌ๋ผ์ด์ฑ ํ์ง ์๊ณ ์ ํํ ๋ช
์นญ์ผ๋ก ์งํํ๋ค. ์ผ๋ฐ์ ์ผ๋ก ํ๊ธฐ์ค์ด๊ณ ์ด๊ธฐ์ค์ [:, ์ด] ์ด๋ ๊ฒ ์งํํด์ผํ๋ค.
if not subset_stats.empty and len(date_range) > 0:
slide = ppt.slides.add_slide(ppt.slide_layouts[5])
slide.shapes.title.text = f"{date_range.min()} - {date_range.max()} ์ผ๋ณ ํต๊ณ"
rows, cols = subset_stats.shape # subset_stats๋ groupby๋ก ๋ง๋ค์ด์ง ํํ์ธ๋ฐ ์ด๋ฐ ๊ฒฝ์ฐ ์ธ๋ฑ์ค๋ groupby์์ ํ์ผ๋ก ์ ํด์ค ๋ถ๋ถ์ด๋ค.
slide_table = slide.shapes.add_table(rows + 2, 6, Inches(0.5), Inches(1.5), Inches(9), Inches(4)).table # rows + 2 ๋ ํ์ ๋ค์ด๊ฐ ๋ด์ฉ ๋๋ฌธ์ ์ด๋ ๊ฒ ํด์ค ๊ฒ์ด๋ค.
slide_table.cell(0, 0).text = "Identifier" # ์ฌ๊ธฐ์ ๋งํ๋ cell์ ํ ์์ ์๋ ํ๋์ ์นธ์ ๋งํ๋ค.
for col_idx, date in enumerate(subset_stats.columns, start=1):
slide_table.cell(0, col_idx).text = str(date)
for row_idx, (category, row_data) in enumerate(subset_stats.iterrows(), start=1):
# iterrows() -> pandas ๋ฐ์ดํฐํ๋ ์์ ๊ฐํ์ (์ธ๋ฑ์ค, ๋ฐ์ดํฐ) ํํ๋ก ๋ฐํํ๋ ๋ฉ์๋์ด๋ค.
slide_table.cell(row_idx, 0).text = category
for col_idx, value in enumerate(row_data, start=1):
slide_table.cell(row_idx, col_idx).text = str(value)
# ์ดํฉ ์ถ๊ฐ
slide_table.cell(rows + 1, 0).text ="์ดํฉ"
for col_idx, category in enumerate(subset_stats.columns, start=1):
slide_table.cell(rows + 1, col_idx).text = str(subset_stats[category].sum())
# ์๋ณ ํต๊ณ ์ถ๊ฐ (5๋ฌ ์ฉ)
months = monthly_stats.columns
max_months_per_slide = 5
for i in range(0, len(months), max_months_per_slide):
month_range = months[i:i + max_months_per_slide]
subset_monthly_stats = monthly_stats.loc[:, month_range]
if not subset_monthly_stats.empty:
slide = ppt.slides.add_slide(ppt.slide_layouts[5])
slide.shapes.title.text = f"{month_range.min()} - {month_range.max()} ์๋ณ ํต๊ณ"
rows, cols = subset_monthly_stats.shape
slide_table = slide.shapes.add_table(rows + 2, cols + 1, Inches(0.5), Inches(1.5), Inches(9), Inches(4)).table
# ์์ slide_table ์ด๊ฑฐ ์ถ๊ฐ ์ ํ๋ฉด ๋๋ฝ ๋จ
slide_table.cell(0,0).text = "Identifier"
for col_idx, month in enumerate(subset_monthly_stats.columns, start=1):
slide_table.cell(0, col_idx).text = str(month)
for row_idx, (category, row_data) in enumerate(subset_monthly_stats.iterrows(), start=1):
slide_table.cell(row_idx, 0).text = category
for col_idx, value in enumerate(row_data, start=1):
slide_table.cell(row_idx, col_idx).text = str(value)
slide_table.cell(rows + 1, 0).text = "์ดํฉ"
for col_idx, month in enumerate(subset_monthly_stats.columns, start=1):
slide_table.cell(rows + 1, col_idx).text = str(subset_monthly_stats[month].sum())
# PowerPoint ์ ์ฅ
save_path = filedialog.asksaveasfilename(defaultextension=".pptx", filetypes=[("PowerPoint files", "*.pptx")])
if save_path:
ppt.save(save_path)
messagebox.showinfo("์ฑ๊ณต", f"PPT๊ฐ ์์ฑ๋์์ต๋๋ค: {save_path}")
except Exception as e:
messagebox.showerror("์ค๋ฅ", f"PPT ์์ฑ ์คํจ: {e}")
# ์คํ
if __name__ == "__main__":
root = tk.Tk()
app = CSVAnalyzerApp(root)
root.mainloop()
[์คํํ๋ฉด]
์ด๋ฅผ ํ์ฉํด์ ์์์ ์ผ๋ก ํ๋ ํต๊ณ ๋ฐ ๋ฐ์ดํฐ ๋ถ์ ์๊ฐ์ ํฌ๊ฒ ๋จ์ถ์์ผฐ๋ค.