527 lines
20 KiB
Python
527 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.dates as mdates
|
|
import seaborn as sns
|
|
import argparse
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Optional, List, Dict
|
|
import warnings
|
|
|
|
# Suppress matplotlib warnings
|
|
warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib")
|
|
|
|
class PGEDataVisualizer:
|
|
"""
|
|
Class for visualizing PGE energy meter data from converted CSV reports.
|
|
Creates various charts for different time aggregations.
|
|
"""
|
|
|
|
def __init__(self, base_filename: str, data_dir: str = ".", output_dir: str = "charts",
|
|
buy_price: float = 1.4, sell_price: float = 0.2):
|
|
"""
|
|
Initialize the visualizer.
|
|
|
|
Args:
|
|
base_filename: Base filename without suffix (e.g., '202512011130_590543580100122940_46f7640c-07d2-4e64-9ba7-20b8c2eeca7d')
|
|
data_dir: Directory containing the CSV files
|
|
output_dir: Directory to save charts
|
|
buy_price: Price per kWh for energy consumed from grid (default: 1.4 PLN)
|
|
sell_price: Price per kWh for energy delivered to grid (default: 0.2 PLN)
|
|
"""
|
|
self.base_filename = base_filename
|
|
self.data_dir = Path(data_dir)
|
|
self.output_dir = Path(output_dir)
|
|
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
self.buy_price = buy_price
|
|
self.sell_price = sell_price
|
|
|
|
# Polish energy type colors (consistent across all charts)
|
|
self.colors = {
|
|
'Czynna zbilansowana': '#2E86AB', # Blue - Net energy
|
|
'Czynna oddana': '#A23B72', # Purple - Energy delivered
|
|
'Czynna pobrana': '#F18F01' # Orange - Energy consumed
|
|
}
|
|
|
|
# Set matplotlib style
|
|
plt.style.use('seaborn-v0_8-darkgrid')
|
|
sns.set_palette("husl")
|
|
|
|
# Configure Polish locale for better formatting
|
|
self.setup_locale()
|
|
|
|
def setup_locale(self):
|
|
"""Setup matplotlib for better Polish text rendering."""
|
|
plt.rcParams['font.size'] = 10
|
|
plt.rcParams['axes.titlesize'] = 14
|
|
plt.rcParams['axes.labelsize'] = 12
|
|
plt.rcParams['xtick.labelsize'] = 10
|
|
plt.rcParams['ytick.labelsize'] = 10
|
|
plt.rcParams['legend.fontsize'] = 10
|
|
plt.rcParams['figure.titlesize'] = 16
|
|
|
|
def load_data(self, suffix: str) -> Optional[pd.DataFrame]:
|
|
"""
|
|
Load data from CSV file with given suffix.
|
|
|
|
Args:
|
|
suffix: File suffix (e.g., '-hourly', '-daily')
|
|
|
|
Returns:
|
|
DataFrame or None if file doesn't exist
|
|
"""
|
|
filename = f"{self.base_filename}{suffix}.csv"
|
|
filepath = self.data_dir / filename
|
|
|
|
if not filepath.exists():
|
|
print(f"Warning: File not found: {filepath}")
|
|
return None
|
|
|
|
try:
|
|
df = pd.read_csv(filepath, sep=';')
|
|
print(f"Loaded {suffix} data: {len(df)} rows")
|
|
return df
|
|
except Exception as e:
|
|
print(f"Error loading {filepath}: {e}")
|
|
return None
|
|
|
|
def parse_timestamps(self, df: pd.DataFrame, time_col: str) -> pd.DataFrame:
|
|
"""
|
|
Parse timestamp column to datetime.
|
|
|
|
Args:
|
|
df: DataFrame with time column
|
|
time_col: Name of time column
|
|
|
|
Returns:
|
|
DataFrame with parsed datetime column
|
|
"""
|
|
df = df.copy()
|
|
|
|
if time_col == 'Timestamp':
|
|
df[time_col] = pd.to_datetime(df[time_col])
|
|
elif time_col == 'Date':
|
|
df[time_col] = pd.to_datetime(df[time_col])
|
|
elif time_col == 'WeekStart':
|
|
df[time_col] = pd.to_datetime(df[time_col])
|
|
elif time_col == 'Month':
|
|
df[time_col] = pd.to_datetime(df[time_col] + '-01')
|
|
|
|
return df
|
|
|
|
def calculate_costs(self, df: pd.DataFrame, energy_cols: List[str]) -> pd.DataFrame:
|
|
"""
|
|
Calculate energy costs based on buy/sell prices.
|
|
|
|
Args:
|
|
df: DataFrame with energy columns
|
|
energy_cols: List of energy column names
|
|
|
|
Returns:
|
|
DataFrame with added cost columns
|
|
"""
|
|
df = df.copy()
|
|
|
|
if 'Czynna pobrana' in energy_cols:
|
|
df['Koszt_pobrana'] = df['Czynna pobrana'] * self.buy_price
|
|
|
|
if 'Czynna oddana' in energy_cols:
|
|
df['Przychód_oddana'] = df['Czynna oddana'] * self.sell_price
|
|
|
|
# Net cost (positive = cost, negative = profit)
|
|
if 'Koszt_pobrana' in df.columns and 'Przychód_oddana' in df.columns:
|
|
df['Koszt_netto'] = df['Koszt_pobrana'] - df['Przychód_oddana']
|
|
elif 'Koszt_pobrana' in df.columns:
|
|
df['Koszt_netto'] = df['Koszt_pobrana']
|
|
elif 'Przychód_oddana' in df.columns:
|
|
df['Koszt_netto'] = -df['Przychód_oddana']
|
|
|
|
return df
|
|
|
|
def plot_time_series(self, df: pd.DataFrame, time_col: str, title: str,
|
|
filename: str, figsize: tuple = (15, 8)):
|
|
"""
|
|
Create time series plot for energy data.
|
|
|
|
Args:
|
|
df: DataFrame with time and energy columns
|
|
time_col: Name of time column
|
|
title: Plot title
|
|
filename: Output filename
|
|
figsize: Figure size
|
|
"""
|
|
if df is None or df.empty:
|
|
return
|
|
|
|
df = self.parse_timestamps(df, time_col)
|
|
|
|
fig, ax = plt.subplots(figsize=figsize)
|
|
|
|
energy_cols = [col for col in df.columns if col != time_col]
|
|
|
|
for col in energy_cols:
|
|
if col in self.colors:
|
|
color = self.colors[col]
|
|
else:
|
|
color = None
|
|
ax.plot(df[time_col], df[col], label=col, linewidth=2, color=color, marker='o', markersize=3)
|
|
|
|
ax.set_title(title, fontweight='bold', pad=20)
|
|
ax.set_xlabel('Czas', fontweight='bold')
|
|
ax.set_ylabel('Energia [kWh]', fontweight='bold')
|
|
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
|
ax.grid(True, alpha=0.3)
|
|
|
|
# Format x-axis based on data type
|
|
if time_col in ['Timestamp', 'Date']:
|
|
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
|
|
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
|
|
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
|
|
elif time_col == 'WeekStart':
|
|
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
|
|
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
|
|
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
|
|
|
|
plt.tight_layout()
|
|
output_path = self.output_dir / f"{filename}.png"
|
|
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
print(f"Saved chart: {output_path}")
|
|
|
|
def plot_stacked_area(self, df: pd.DataFrame, time_col: str, title: str,
|
|
filename: str, figsize: tuple = (15, 8)):
|
|
"""
|
|
Create stacked area chart for energy data.
|
|
"""
|
|
if df is None or df.empty:
|
|
return
|
|
|
|
df = self.parse_timestamps(df, time_col)
|
|
|
|
fig, ax = plt.subplots(figsize=figsize)
|
|
|
|
energy_cols = [col for col in df.columns if col != time_col]
|
|
|
|
# Prepare data for stacking (only positive values)
|
|
positive_data = df[energy_cols].clip(lower=0)
|
|
|
|
colors_list = [self.colors.get(col, None) for col in energy_cols]
|
|
|
|
ax.stackplot(df[time_col], *[positive_data[col] for col in energy_cols],
|
|
labels=energy_cols, colors=colors_list, alpha=0.7)
|
|
|
|
ax.set_title(title, fontweight='bold', pad=20)
|
|
ax.set_xlabel('Czas', fontweight='bold')
|
|
ax.set_ylabel('Energia [kWh]', fontweight='bold')
|
|
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
|
ax.grid(True, alpha=0.3)
|
|
|
|
# Format x-axis
|
|
if time_col in ['Timestamp', 'Date', 'WeekStart']:
|
|
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
|
|
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
|
|
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
|
|
|
|
plt.tight_layout()
|
|
output_path = self.output_dir / f"{filename}.png"
|
|
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
print(f"Saved chart: {output_path}")
|
|
|
|
def plot_monthly_hourly_averages(self, df_hourly: pd.DataFrame):
|
|
"""
|
|
Create monthly hourly average charts for each month.
|
|
"""
|
|
if df_hourly is None or df_hourly.empty:
|
|
return
|
|
|
|
df = self.parse_timestamps(df_hourly, 'Timestamp')
|
|
|
|
# Add hour and month columns
|
|
df['Hour'] = df['Timestamp'].dt.hour
|
|
df['Month'] = df['Timestamp'].dt.to_period('M')
|
|
|
|
energy_cols = [col for col in df.columns if col not in ['Timestamp', 'Hour', 'Month']]
|
|
|
|
# Get unique months
|
|
months = sorted(df['Month'].unique())
|
|
|
|
# Create subplots for each month
|
|
n_months = len(months)
|
|
cols = 3 # 3 charts per row
|
|
rows = (n_months + cols - 1) // cols
|
|
|
|
fig, axes = plt.subplots(rows, cols, figsize=(20, 6 * rows))
|
|
if rows == 1:
|
|
axes = axes.reshape(1, -1)
|
|
elif n_months == 1:
|
|
axes = axes.reshape(1, 1)
|
|
|
|
for i, month in enumerate(months):
|
|
row = i // cols
|
|
col = i % cols
|
|
ax = axes[row, col] if rows > 1 else axes[col]
|
|
|
|
# Filter data for this month
|
|
month_data = df[df['Month'] == month]
|
|
|
|
# Calculate hourly averages
|
|
hourly_avg = month_data.groupby('Hour')[energy_cols].mean()
|
|
|
|
# Plot each energy type
|
|
for energy_col in energy_cols:
|
|
color = self.colors.get(energy_col, None)
|
|
ax.plot(hourly_avg.index, hourly_avg[energy_col],
|
|
label=energy_col, linewidth=2.5, color=color, marker='o', markersize=4)
|
|
|
|
ax.set_title(f'{month} - Średnie godzinowe zużycie energii', fontweight='bold')
|
|
ax.set_xlabel('Godzina')
|
|
ax.set_ylabel('Energia [kWh]')
|
|
ax.grid(True, alpha=0.3)
|
|
ax.legend(fontsize=8)
|
|
ax.set_xticks(range(0, 24, 2))
|
|
|
|
# Add zero line for reference
|
|
ax.axhline(y=0, color='red', linestyle='--', alpha=0.5)
|
|
|
|
# Hide unused subplots
|
|
for i in range(n_months, rows * cols):
|
|
row = i // cols
|
|
col = i % cols
|
|
axes[row, col].set_visible(False)
|
|
|
|
plt.suptitle('Miesięczne profile godzinowe zużycia energii', fontsize=16, fontweight='bold')
|
|
plt.tight_layout()
|
|
|
|
output_path = self.output_dir / "monthly_hourly_profiles.png"
|
|
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
print(f"Saved chart: {output_path}")
|
|
|
|
def plot_cost_analysis(self, df: pd.DataFrame, time_col: str, title_suffix: str):
|
|
"""
|
|
Create cost analysis charts.
|
|
|
|
Args:
|
|
df: DataFrame with energy data
|
|
time_col: Time column name
|
|
title_suffix: Suffix for the title (e.g., 'miesięczne', 'dzienne')
|
|
"""
|
|
if df is None or df.empty:
|
|
return
|
|
|
|
df = self.parse_timestamps(df, time_col)
|
|
|
|
energy_cols = [col for col in df.columns if col != time_col and not col.startswith('Koszt') and not col.startswith('Przychód')]
|
|
df_with_costs = self.calculate_costs(df, energy_cols)
|
|
|
|
# Create cost analysis chart
|
|
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))
|
|
|
|
# Energy costs breakdown
|
|
if 'Koszt_pobrana' in df_with_costs.columns:
|
|
ax1.bar(df_with_costs[time_col], df_with_costs['Koszt_pobrana'],
|
|
label=f'Koszt energii pobranej ({self.buy_price} zł/kWh)',
|
|
color='#F18F01', alpha=0.7)
|
|
|
|
if 'Przychód_oddana' in df_with_costs.columns:
|
|
ax1.bar(df_with_costs[time_col], -df_with_costs['Przychód_oddana'],
|
|
label=f'Przychód z energii oddanej ({self.sell_price} zł/kWh)',
|
|
color='#A23B72', alpha=0.7)
|
|
|
|
ax1.set_title(f'Analiza kosztów energii - {title_suffix}', fontweight='bold', pad=20)
|
|
ax1.set_xlabel('Okres', fontweight='bold')
|
|
ax1.set_ylabel('Kwota [zł]', fontweight='bold')
|
|
ax1.legend()
|
|
ax1.grid(True, alpha=0.3)
|
|
ax1.axhline(y=0, color='black', linestyle='-', alpha=0.3)
|
|
|
|
# Net cost trend
|
|
if 'Koszt_netto' in df_with_costs.columns:
|
|
colors = ['red' if x > 0 else 'green' for x in df_with_costs['Koszt_netto']]
|
|
ax2.bar(df_with_costs[time_col], df_with_costs['Koszt_netto'],
|
|
color=colors, alpha=0.7)
|
|
ax2.axhline(y=0, color='black', linestyle='-', alpha=0.5)
|
|
ax2.set_title(f'Koszt netto energii - {title_suffix}', fontweight='bold', pad=20)
|
|
ax2.set_xlabel('Okres', fontweight='bold')
|
|
ax2.set_ylabel('Koszt netto [zł]', fontweight='bold')
|
|
ax2.grid(True, alpha=0.3)
|
|
|
|
# Add summary text
|
|
total_cost = df_with_costs['Koszt_netto'].sum()
|
|
color = 'red' if total_cost > 0 else 'green'
|
|
ax2.text(0.02, 0.98, f'Łączny koszt: {total_cost:.2f} zł',
|
|
transform=ax2.transAxes, fontsize=12, fontweight='bold',
|
|
verticalalignment='top', color=color,
|
|
bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
|
|
|
|
# Format x-axis
|
|
if time_col in ['Timestamp', 'Date', 'WeekStart']:
|
|
for ax in [ax1, ax2]:
|
|
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
|
|
if len(df_with_costs) > 10:
|
|
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
|
|
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
|
|
|
|
plt.tight_layout()
|
|
|
|
filename = f"cost_analysis_{title_suffix.replace(' ', '_')}"
|
|
output_path = self.output_dir / f"{filename}.png"
|
|
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
print(f"Saved chart: {output_path}")
|
|
|
|
def plot_monthly_comparison(self, df: pd.DataFrame):
|
|
"""
|
|
Create monthly comparison charts.
|
|
"""
|
|
if df is None or df.empty:
|
|
return
|
|
|
|
df = self.parse_timestamps(df, 'Month')
|
|
|
|
# Monthly totals comparison
|
|
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))
|
|
|
|
energy_cols = [col for col in df.columns if col != 'Month']
|
|
|
|
# Bar chart
|
|
x_pos = range(len(df))
|
|
width = 0.25
|
|
|
|
for i, col in enumerate(energy_cols):
|
|
color = self.colors.get(col, None)
|
|
ax1.bar([x + i * width for x in x_pos], df[col], width,
|
|
label=col, color=color, alpha=0.8)
|
|
|
|
ax1.set_title('Miesięczne zużycie energii - porównanie', fontweight='bold', pad=20)
|
|
ax1.set_xlabel('Miesiąc', fontweight='bold')
|
|
ax1.set_ylabel('Energia [kWh]', fontweight='bold')
|
|
ax1.set_xticks([x + width for x in x_pos])
|
|
ax1.set_xticklabels(df['Month'].dt.strftime('%Y-%m'), rotation=45)
|
|
ax1.legend()
|
|
ax1.grid(True, alpha=0.3)
|
|
|
|
# Net energy (zbilansowana) focus
|
|
if 'Czynna zbilansowana' in energy_cols:
|
|
color = self.colors.get('Czynna zbilansowana', 'blue')
|
|
ax2.plot(df['Month'], df['Czynna zbilansowana'],
|
|
marker='o', linewidth=3, markersize=8, color=color)
|
|
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7)
|
|
ax2.set_title('Energia zbilansowana - trend miesięczny', fontweight='bold', pad=20)
|
|
ax2.set_xlabel('Miesiąc', fontweight='bold')
|
|
ax2.set_ylabel('Energia zbilansowana [kWh]', fontweight='bold')
|
|
ax2.grid(True, alpha=0.3)
|
|
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
|
|
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)
|
|
|
|
plt.tight_layout()
|
|
output_path = self.output_dir / "monthly_comparison.png"
|
|
plt.savefig(output_path, dpi=300, bbox_inches='tight')
|
|
plt.close()
|
|
print(f"Saved chart: {output_path}")
|
|
|
|
def generate_all_charts(self):
|
|
"""
|
|
Generate all available charts for the dataset.
|
|
"""
|
|
print(f"Generating charts for: {self.base_filename}")
|
|
print(f"Output directory: {self.output_dir}")
|
|
print(f"Energy prices: Buy={self.buy_price} zł/kWh, Sell={self.sell_price} zł/kWh")
|
|
|
|
# Load all data types
|
|
hourly_data = self.load_data('-hourly')
|
|
daily_data = self.load_data('-daily')
|
|
weekly_data = self.load_data('-weekly')
|
|
monthly_data = self.load_data('-monthly')
|
|
|
|
# Generate time series charts
|
|
if hourly_data is not None:
|
|
# Sample hourly data for better visualization (first 30 days)
|
|
hourly_sample = hourly_data.head(24 * 30) if len(hourly_data) > 720 else hourly_data
|
|
self.plot_time_series(hourly_sample, 'Timestamp',
|
|
'Zużycie energii - dane godzinowe (30 dni)', 'hourly_timeseries')
|
|
|
|
# Generate monthly hourly averages
|
|
self.plot_monthly_hourly_averages(hourly_data)
|
|
|
|
if daily_data is not None:
|
|
self.plot_time_series(daily_data, 'Date',
|
|
'Zużycie energii - dane dzienne', 'daily_timeseries')
|
|
|
|
self.plot_stacked_area(daily_data, 'Date',
|
|
'Zużycie energii - obszar skumulowany (dzienne)', 'daily_stacked')
|
|
|
|
# Generate daily cost analysis
|
|
self.plot_cost_analysis(daily_data, 'Date', 'dzienne')
|
|
|
|
if weekly_data is not None:
|
|
self.plot_time_series(weekly_data, 'WeekStart',
|
|
'Zużycie energii - dane tygodniowe', 'weekly_timeseries')
|
|
|
|
if monthly_data is not None:
|
|
self.plot_time_series(monthly_data, 'Month',
|
|
'Zużycie energii - dane miesięczne', 'monthly_timeseries')
|
|
|
|
self.plot_monthly_comparison(monthly_data)
|
|
|
|
# Generate monthly cost analysis
|
|
self.plot_cost_analysis(monthly_data, 'Month', 'miesięczne')
|
|
|
|
print("\nChart generation completed!")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Generate visualizations for PGE energy data"
|
|
)
|
|
parser.add_argument(
|
|
"base_filename",
|
|
help="Base filename without suffix (e.g., '202512011130_590543580100122940_46f7640c-07d2-4e64-9ba7-20b8c2eeca7d')"
|
|
)
|
|
parser.add_argument(
|
|
"--data-dir", "-d",
|
|
default=".",
|
|
help="Directory containing CSV files (default: current directory)"
|
|
)
|
|
parser.add_argument(
|
|
"--output-dir", "-o",
|
|
default="charts",
|
|
help="Output directory for charts (default: 'charts')"
|
|
)
|
|
parser.add_argument(
|
|
"--buy-price", "-b",
|
|
type=float,
|
|
default=1.4,
|
|
help="Price per kWh for energy consumed from grid (default: 1.4 PLN)"
|
|
)
|
|
parser.add_argument(
|
|
"--sell-price", "-s",
|
|
type=float,
|
|
default=0.2,
|
|
help="Price per kWh for energy delivered to grid (default: 0.2 PLN)"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
visualizer = PGEDataVisualizer(
|
|
base_filename=args.base_filename,
|
|
data_dir=args.data_dir,
|
|
output_dir=args.output_dir,
|
|
buy_price=args.buy_price,
|
|
sell_price=args.sell_price
|
|
)
|
|
|
|
visualizer.generate_all_charts()
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit(main()) |