Files
pge-raport-generator/visualize_data.py

527 lines
20 KiB
Python

#!/usr/bin/env python3
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import argparse
import os
from pathlib import Path
from typing import Optional, List, Dict
import warnings
# Suppress matplotlib warnings
warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib")
class PGEDataVisualizer:
"""
Class for visualizing PGE energy meter data from converted CSV reports.
Creates various charts for different time aggregations.
"""
def __init__(self, base_filename: str, data_dir: str = ".", output_dir: str = "charts",
buy_price: float = 1.4, sell_price: float = 0.2):
"""
Initialize the visualizer.
Args:
base_filename: Base filename without suffix (e.g., '202512011130_590543580100122940_46f7640c-07d2-4e64-9ba7-20b8c2eeca7d')
data_dir: Directory containing the CSV files
output_dir: Directory to save charts
buy_price: Price per kWh for energy consumed from grid (default: 1.4 PLN)
sell_price: Price per kWh for energy delivered to grid (default: 0.2 PLN)
"""
self.base_filename = base_filename
self.data_dir = Path(data_dir)
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.buy_price = buy_price
self.sell_price = sell_price
# Polish energy type colors (consistent across all charts)
self.colors = {
'Czynna zbilansowana': '#2E86AB', # Blue - Net energy
'Czynna oddana': '#A23B72', # Purple - Energy delivered
'Czynna pobrana': '#F18F01' # Orange - Energy consumed
}
# Set matplotlib style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
# Configure Polish locale for better formatting
self.setup_locale()
def setup_locale(self):
"""Setup matplotlib for better Polish text rendering."""
plt.rcParams['font.size'] = 10
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 10
plt.rcParams['figure.titlesize'] = 16
def load_data(self, suffix: str) -> Optional[pd.DataFrame]:
"""
Load data from CSV file with given suffix.
Args:
suffix: File suffix (e.g., '-hourly', '-daily')
Returns:
DataFrame or None if file doesn't exist
"""
filename = f"{self.base_filename}{suffix}.csv"
filepath = self.data_dir / filename
if not filepath.exists():
print(f"Warning: File not found: {filepath}")
return None
try:
df = pd.read_csv(filepath, sep=';')
print(f"Loaded {suffix} data: {len(df)} rows")
return df
except Exception as e:
print(f"Error loading {filepath}: {e}")
return None
def parse_timestamps(self, df: pd.DataFrame, time_col: str) -> pd.DataFrame:
"""
Parse timestamp column to datetime.
Args:
df: DataFrame with time column
time_col: Name of time column
Returns:
DataFrame with parsed datetime column
"""
df = df.copy()
if time_col == 'Timestamp':
df[time_col] = pd.to_datetime(df[time_col])
elif time_col == 'Date':
df[time_col] = pd.to_datetime(df[time_col])
elif time_col == 'WeekStart':
df[time_col] = pd.to_datetime(df[time_col])
elif time_col == 'Month':
df[time_col] = pd.to_datetime(df[time_col] + '-01')
return df
def calculate_costs(self, df: pd.DataFrame, energy_cols: List[str]) -> pd.DataFrame:
"""
Calculate energy costs based on buy/sell prices.
Args:
df: DataFrame with energy columns
energy_cols: List of energy column names
Returns:
DataFrame with added cost columns
"""
df = df.copy()
if 'Czynna pobrana' in energy_cols:
df['Koszt_pobrana'] = df['Czynna pobrana'] * self.buy_price
if 'Czynna oddana' in energy_cols:
df['Przychód_oddana'] = df['Czynna oddana'] * self.sell_price
# Net cost (positive = cost, negative = profit)
if 'Koszt_pobrana' in df.columns and 'Przychód_oddana' in df.columns:
df['Koszt_netto'] = df['Koszt_pobrana'] - df['Przychód_oddana']
elif 'Koszt_pobrana' in df.columns:
df['Koszt_netto'] = df['Koszt_pobrana']
elif 'Przychód_oddana' in df.columns:
df['Koszt_netto'] = -df['Przychód_oddana']
return df
def plot_time_series(self, df: pd.DataFrame, time_col: str, title: str,
filename: str, figsize: tuple = (15, 8)):
"""
Create time series plot for energy data.
Args:
df: DataFrame with time and energy columns
time_col: Name of time column
title: Plot title
filename: Output filename
figsize: Figure size
"""
if df is None or df.empty:
return
df = self.parse_timestamps(df, time_col)
fig, ax = plt.subplots(figsize=figsize)
energy_cols = [col for col in df.columns if col != time_col]
for col in energy_cols:
if col in self.colors:
color = self.colors[col]
else:
color = None
ax.plot(df[time_col], df[col], label=col, linewidth=2, color=color, marker='o', markersize=3)
ax.set_title(title, fontweight='bold', pad=20)
ax.set_xlabel('Czas', fontweight='bold')
ax.set_ylabel('Energia [kWh]', fontweight='bold')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(True, alpha=0.3)
# Format x-axis based on data type
if time_col in ['Timestamp', 'Date']:
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
elif time_col == 'WeekStart':
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
plt.tight_layout()
output_path = self.output_dir / f"{filename}.png"
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"Saved chart: {output_path}")
def plot_stacked_area(self, df: pd.DataFrame, time_col: str, title: str,
filename: str, figsize: tuple = (15, 8)):
"""
Create stacked area chart for energy data.
"""
if df is None or df.empty:
return
df = self.parse_timestamps(df, time_col)
fig, ax = plt.subplots(figsize=figsize)
energy_cols = [col for col in df.columns if col != time_col]
# Prepare data for stacking (only positive values)
positive_data = df[energy_cols].clip(lower=0)
colors_list = [self.colors.get(col, None) for col in energy_cols]
ax.stackplot(df[time_col], *[positive_data[col] for col in energy_cols],
labels=energy_cols, colors=colors_list, alpha=0.7)
ax.set_title(title, fontweight='bold', pad=20)
ax.set_xlabel('Czas', fontweight='bold')
ax.set_ylabel('Energia [kWh]', fontweight='bold')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(True, alpha=0.3)
# Format x-axis
if time_col in ['Timestamp', 'Date', 'WeekStart']:
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
plt.tight_layout()
output_path = self.output_dir / f"{filename}.png"
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"Saved chart: {output_path}")
def plot_monthly_hourly_averages(self, df_hourly: pd.DataFrame):
"""
Create monthly hourly average charts for each month.
"""
if df_hourly is None or df_hourly.empty:
return
df = self.parse_timestamps(df_hourly, 'Timestamp')
# Add hour and month columns
df['Hour'] = df['Timestamp'].dt.hour
df['Month'] = df['Timestamp'].dt.to_period('M')
energy_cols = [col for col in df.columns if col not in ['Timestamp', 'Hour', 'Month']]
# Get unique months
months = sorted(df['Month'].unique())
# Create subplots for each month
n_months = len(months)
cols = 3 # 3 charts per row
rows = (n_months + cols - 1) // cols
fig, axes = plt.subplots(rows, cols, figsize=(20, 6 * rows))
if rows == 1:
axes = axes.reshape(1, -1)
elif n_months == 1:
axes = axes.reshape(1, 1)
for i, month in enumerate(months):
row = i // cols
col = i % cols
ax = axes[row, col] if rows > 1 else axes[col]
# Filter data for this month
month_data = df[df['Month'] == month]
# Calculate hourly averages
hourly_avg = month_data.groupby('Hour')[energy_cols].mean()
# Plot each energy type
for energy_col in energy_cols:
color = self.colors.get(energy_col, None)
ax.plot(hourly_avg.index, hourly_avg[energy_col],
label=energy_col, linewidth=2.5, color=color, marker='o', markersize=4)
ax.set_title(f'{month} - Średnie godzinowe zużycie energii', fontweight='bold')
ax.set_xlabel('Godzina')
ax.set_ylabel('Energia [kWh]')
ax.grid(True, alpha=0.3)
ax.legend(fontsize=8)
ax.set_xticks(range(0, 24, 2))
# Add zero line for reference
ax.axhline(y=0, color='red', linestyle='--', alpha=0.5)
# Hide unused subplots
for i in range(n_months, rows * cols):
row = i // cols
col = i % cols
axes[row, col].set_visible(False)
plt.suptitle('Miesięczne profile godzinowe zużycia energii', fontsize=16, fontweight='bold')
plt.tight_layout()
output_path = self.output_dir / "monthly_hourly_profiles.png"
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"Saved chart: {output_path}")
def plot_cost_analysis(self, df: pd.DataFrame, time_col: str, title_suffix: str):
"""
Create cost analysis charts.
Args:
df: DataFrame with energy data
time_col: Time column name
title_suffix: Suffix for the title (e.g., 'miesięczne', 'dzienne')
"""
if df is None or df.empty:
return
df = self.parse_timestamps(df, time_col)
energy_cols = [col for col in df.columns if col != time_col and not col.startswith('Koszt') and not col.startswith('Przychód')]
df_with_costs = self.calculate_costs(df, energy_cols)
# Create cost analysis chart
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))
# Energy costs breakdown
if 'Koszt_pobrana' in df_with_costs.columns:
ax1.bar(df_with_costs[time_col], df_with_costs['Koszt_pobrana'],
label=f'Koszt energii pobranej ({self.buy_price} zł/kWh)',
color='#F18F01', alpha=0.7)
if 'Przychód_oddana' in df_with_costs.columns:
ax1.bar(df_with_costs[time_col], -df_with_costs['Przychód_oddana'],
label=f'Przychód z energii oddanej ({self.sell_price} zł/kWh)',
color='#A23B72', alpha=0.7)
ax1.set_title(f'Analiza kosztów energii - {title_suffix}', fontweight='bold', pad=20)
ax1.set_xlabel('Okres', fontweight='bold')
ax1.set_ylabel('Kwota [zł]', fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.axhline(y=0, color='black', linestyle='-', alpha=0.3)
# Net cost trend
if 'Koszt_netto' in df_with_costs.columns:
colors = ['red' if x > 0 else 'green' for x in df_with_costs['Koszt_netto']]
ax2.bar(df_with_costs[time_col], df_with_costs['Koszt_netto'],
color=colors, alpha=0.7)
ax2.axhline(y=0, color='black', linestyle='-', alpha=0.5)
ax2.set_title(f'Koszt netto energii - {title_suffix}', fontweight='bold', pad=20)
ax2.set_xlabel('Okres', fontweight='bold')
ax2.set_ylabel('Koszt netto [zł]', fontweight='bold')
ax2.grid(True, alpha=0.3)
# Add summary text
total_cost = df_with_costs['Koszt_netto'].sum()
color = 'red' if total_cost > 0 else 'green'
ax2.text(0.02, 0.98, f'Łączny koszt: {total_cost:.2f}',
transform=ax2.transAxes, fontsize=12, fontweight='bold',
verticalalignment='top', color=color,
bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
# Format x-axis
if time_col in ['Timestamp', 'Date', 'WeekStart']:
for ax in [ax1, ax2]:
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
if len(df_with_costs) > 10:
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
plt.tight_layout()
filename = f"cost_analysis_{title_suffix.replace(' ', '_')}"
output_path = self.output_dir / f"{filename}.png"
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"Saved chart: {output_path}")
def plot_monthly_comparison(self, df: pd.DataFrame):
"""
Create monthly comparison charts.
"""
if df is None or df.empty:
return
df = self.parse_timestamps(df, 'Month')
# Monthly totals comparison
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))
energy_cols = [col for col in df.columns if col != 'Month']
# Bar chart
x_pos = range(len(df))
width = 0.25
for i, col in enumerate(energy_cols):
color = self.colors.get(col, None)
ax1.bar([x + i * width for x in x_pos], df[col], width,
label=col, color=color, alpha=0.8)
ax1.set_title('Miesięczne zużycie energii - porównanie', fontweight='bold', pad=20)
ax1.set_xlabel('Miesiąc', fontweight='bold')
ax1.set_ylabel('Energia [kWh]', fontweight='bold')
ax1.set_xticks([x + width for x in x_pos])
ax1.set_xticklabels(df['Month'].dt.strftime('%Y-%m'), rotation=45)
ax1.legend()
ax1.grid(True, alpha=0.3)
# Net energy (zbilansowana) focus
if 'Czynna zbilansowana' in energy_cols:
color = self.colors.get('Czynna zbilansowana', 'blue')
ax2.plot(df['Month'], df['Czynna zbilansowana'],
marker='o', linewidth=3, markersize=8, color=color)
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7)
ax2.set_title('Energia zbilansowana - trend miesięczny', fontweight='bold', pad=20)
ax2.set_xlabel('Miesiąc', fontweight='bold')
ax2.set_ylabel('Energia zbilansowana [kWh]', fontweight='bold')
ax2.grid(True, alpha=0.3)
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)
plt.tight_layout()
output_path = self.output_dir / "monthly_comparison.png"
plt.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"Saved chart: {output_path}")
def generate_all_charts(self):
"""
Generate all available charts for the dataset.
"""
print(f"Generating charts for: {self.base_filename}")
print(f"Output directory: {self.output_dir}")
print(f"Energy prices: Buy={self.buy_price} zł/kWh, Sell={self.sell_price} zł/kWh")
# Load all data types
hourly_data = self.load_data('-hourly')
daily_data = self.load_data('-daily')
weekly_data = self.load_data('-weekly')
monthly_data = self.load_data('-monthly')
# Generate time series charts
if hourly_data is not None:
# Sample hourly data for better visualization (first 30 days)
hourly_sample = hourly_data.head(24 * 30) if len(hourly_data) > 720 else hourly_data
self.plot_time_series(hourly_sample, 'Timestamp',
'Zużycie energii - dane godzinowe (30 dni)', 'hourly_timeseries')
# Generate monthly hourly averages
self.plot_monthly_hourly_averages(hourly_data)
if daily_data is not None:
self.plot_time_series(daily_data, 'Date',
'Zużycie energii - dane dzienne', 'daily_timeseries')
self.plot_stacked_area(daily_data, 'Date',
'Zużycie energii - obszar skumulowany (dzienne)', 'daily_stacked')
# Generate daily cost analysis
self.plot_cost_analysis(daily_data, 'Date', 'dzienne')
if weekly_data is not None:
self.plot_time_series(weekly_data, 'WeekStart',
'Zużycie energii - dane tygodniowe', 'weekly_timeseries')
if monthly_data is not None:
self.plot_time_series(monthly_data, 'Month',
'Zużycie energii - dane miesięczne', 'monthly_timeseries')
self.plot_monthly_comparison(monthly_data)
# Generate monthly cost analysis
self.plot_cost_analysis(monthly_data, 'Month', 'miesięczne')
print("\nChart generation completed!")
def main():
parser = argparse.ArgumentParser(
description="Generate visualizations for PGE energy data"
)
parser.add_argument(
"base_filename",
help="Base filename without suffix (e.g., '202512011130_590543580100122940_46f7640c-07d2-4e64-9ba7-20b8c2eeca7d')"
)
parser.add_argument(
"--data-dir", "-d",
default=".",
help="Directory containing CSV files (default: current directory)"
)
parser.add_argument(
"--output-dir", "-o",
default="charts",
help="Output directory for charts (default: 'charts')"
)
parser.add_argument(
"--buy-price", "-b",
type=float,
default=1.4,
help="Price per kWh for energy consumed from grid (default: 1.4 PLN)"
)
parser.add_argument(
"--sell-price", "-s",
type=float,
default=0.2,
help="Price per kWh for energy delivered to grid (default: 0.2 PLN)"
)
args = parser.parse_args()
try:
visualizer = PGEDataVisualizer(
base_filename=args.base_filename,
data_dir=args.data_dir,
output_dir=args.output_dir,
buy_price=args.buy_price,
sell_price=args.sell_price
)
visualizer.generate_all_charts()
except Exception as e:
print(f"Error: {e}")
return 1
return 0
if __name__ == "__main__":
exit(main())