#!/usr/bin/env python3 import pandas as pd import matplotlib.pyplot as plt import matplotlib.dates as mdates import seaborn as sns import argparse import os from pathlib import Path from typing import Optional, List, Dict import warnings # Suppress matplotlib warnings warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib") class PGEDataVisualizer: """ Class for visualizing PGE energy meter data from converted CSV reports. Creates various charts for different time aggregations. """ def __init__(self, base_filename: str, data_dir: str = ".", output_dir: str = "charts", buy_price: float = 1.4, sell_price: float = 0.2): """ Initialize the visualizer. Args: base_filename: Base filename without suffix (e.g., '202512011130_590543580100122940_46f7640c-07d2-4e64-9ba7-20b8c2eeca7d') data_dir: Directory containing the CSV files output_dir: Directory to save charts buy_price: Price per kWh for energy consumed from grid (default: 1.4 PLN) sell_price: Price per kWh for energy delivered to grid (default: 0.2 PLN) """ self.base_filename = base_filename self.data_dir = Path(data_dir) self.output_dir = Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) self.buy_price = buy_price self.sell_price = sell_price # Polish energy type colors (consistent across all charts) self.colors = { 'Czynna zbilansowana': '#2E86AB', # Blue - Net energy 'Czynna oddana': '#A23B72', # Purple - Energy delivered 'Czynna pobrana': '#F18F01' # Orange - Energy consumed } # Set matplotlib style plt.style.use('seaborn-v0_8-darkgrid') sns.set_palette("husl") # Configure Polish locale for better formatting self.setup_locale() def setup_locale(self): """Setup matplotlib for better Polish text rendering.""" plt.rcParams['font.size'] = 10 plt.rcParams['axes.titlesize'] = 14 plt.rcParams['axes.labelsize'] = 12 plt.rcParams['xtick.labelsize'] = 10 plt.rcParams['ytick.labelsize'] = 10 plt.rcParams['legend.fontsize'] = 10 plt.rcParams['figure.titlesize'] = 16 def load_data(self, suffix: str) -> Optional[pd.DataFrame]: """ Load data from CSV file with given suffix. Args: suffix: File suffix (e.g., '-hourly', '-daily') Returns: DataFrame or None if file doesn't exist """ filename = f"{self.base_filename}{suffix}.csv" filepath = self.data_dir / filename if not filepath.exists(): print(f"Warning: File not found: {filepath}") return None try: df = pd.read_csv(filepath, sep=';') print(f"Loaded {suffix} data: {len(df)} rows") return df except Exception as e: print(f"Error loading {filepath}: {e}") return None def parse_timestamps(self, df: pd.DataFrame, time_col: str) -> pd.DataFrame: """ Parse timestamp column to datetime. Args: df: DataFrame with time column time_col: Name of time column Returns: DataFrame with parsed datetime column """ df = df.copy() if time_col == 'Timestamp': df[time_col] = pd.to_datetime(df[time_col]) elif time_col == 'Date': df[time_col] = pd.to_datetime(df[time_col]) elif time_col == 'WeekStart': df[time_col] = pd.to_datetime(df[time_col]) elif time_col == 'Month': df[time_col] = pd.to_datetime(df[time_col] + '-01') return df def calculate_costs(self, df: pd.DataFrame, energy_cols: List[str]) -> pd.DataFrame: """ Calculate energy costs based on buy/sell prices. Args: df: DataFrame with energy columns energy_cols: List of energy column names Returns: DataFrame with added cost columns """ df = df.copy() if 'Czynna pobrana' in energy_cols: df['Koszt_pobrana'] = df['Czynna pobrana'] * self.buy_price if 'Czynna oddana' in energy_cols: df['Przychód_oddana'] = df['Czynna oddana'] * self.sell_price # Net cost (positive = cost, negative = profit) if 'Koszt_pobrana' in df.columns and 'Przychód_oddana' in df.columns: df['Koszt_netto'] = df['Koszt_pobrana'] - df['Przychód_oddana'] elif 'Koszt_pobrana' in df.columns: df['Koszt_netto'] = df['Koszt_pobrana'] elif 'Przychód_oddana' in df.columns: df['Koszt_netto'] = -df['Przychód_oddana'] return df def plot_time_series(self, df: pd.DataFrame, time_col: str, title: str, filename: str, figsize: tuple = (15, 8)): """ Create time series plot for energy data. Args: df: DataFrame with time and energy columns time_col: Name of time column title: Plot title filename: Output filename figsize: Figure size """ if df is None or df.empty: return df = self.parse_timestamps(df, time_col) fig, ax = plt.subplots(figsize=figsize) energy_cols = [col for col in df.columns if col != time_col] for col in energy_cols: if col in self.colors: color = self.colors[col] else: color = None ax.plot(df[time_col], df[col], label=col, linewidth=2, color=color, marker='o', markersize=3) ax.set_title(title, fontweight='bold', pad=20) ax.set_xlabel('Czas', fontweight='bold') ax.set_ylabel('Energia [kWh]', fontweight='bold') ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left') ax.grid(True, alpha=0.3) # Format x-axis based on data type if time_col in ['Timestamp', 'Date']: ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1)) plt.setp(ax.xaxis.get_majorticklabels(), rotation=45) elif time_col == 'WeekStart': ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1)) plt.setp(ax.xaxis.get_majorticklabels(), rotation=45) plt.tight_layout() output_path = self.output_dir / f"{filename}.png" plt.savefig(output_path, dpi=300, bbox_inches='tight') plt.close() print(f"Saved chart: {output_path}") def plot_stacked_area(self, df: pd.DataFrame, time_col: str, title: str, filename: str, figsize: tuple = (15, 8)): """ Create stacked area chart for energy data. """ if df is None or df.empty: return df = self.parse_timestamps(df, time_col) fig, ax = plt.subplots(figsize=figsize) energy_cols = [col for col in df.columns if col != time_col] # Prepare data for stacking (only positive values) positive_data = df[energy_cols].clip(lower=0) colors_list = [self.colors.get(col, None) for col in energy_cols] ax.stackplot(df[time_col], *[positive_data[col] for col in energy_cols], labels=energy_cols, colors=colors_list, alpha=0.7) ax.set_title(title, fontweight='bold', pad=20) ax.set_xlabel('Czas', fontweight='bold') ax.set_ylabel('Energia [kWh]', fontweight='bold') ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left') ax.grid(True, alpha=0.3) # Format x-axis if time_col in ['Timestamp', 'Date', 'WeekStart']: ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1)) plt.setp(ax.xaxis.get_majorticklabels(), rotation=45) plt.tight_layout() output_path = self.output_dir / f"{filename}.png" plt.savefig(output_path, dpi=300, bbox_inches='tight') plt.close() print(f"Saved chart: {output_path}") def plot_monthly_hourly_averages(self, df_hourly: pd.DataFrame): """ Create monthly hourly average charts for each month. """ if df_hourly is None or df_hourly.empty: return df = self.parse_timestamps(df_hourly, 'Timestamp') # Add hour and month columns df['Hour'] = df['Timestamp'].dt.hour df['Month'] = df['Timestamp'].dt.to_period('M') energy_cols = [col for col in df.columns if col not in ['Timestamp', 'Hour', 'Month']] # Get unique months months = sorted(df['Month'].unique()) # Create subplots for each month n_months = len(months) cols = 3 # 3 charts per row rows = (n_months + cols - 1) // cols fig, axes = plt.subplots(rows, cols, figsize=(20, 6 * rows)) if rows == 1: axes = axes.reshape(1, -1) elif n_months == 1: axes = axes.reshape(1, 1) for i, month in enumerate(months): row = i // cols col = i % cols ax = axes[row, col] if rows > 1 else axes[col] # Filter data for this month month_data = df[df['Month'] == month] # Calculate hourly averages hourly_avg = month_data.groupby('Hour')[energy_cols].mean() # Plot each energy type for energy_col in energy_cols: color = self.colors.get(energy_col, None) ax.plot(hourly_avg.index, hourly_avg[energy_col], label=energy_col, linewidth=2.5, color=color, marker='o', markersize=4) ax.set_title(f'{month} - Średnie godzinowe zużycie energii', fontweight='bold') ax.set_xlabel('Godzina') ax.set_ylabel('Energia [kWh]') ax.grid(True, alpha=0.3) ax.legend(fontsize=8) ax.set_xticks(range(0, 24, 2)) # Add zero line for reference ax.axhline(y=0, color='red', linestyle='--', alpha=0.5) # Hide unused subplots for i in range(n_months, rows * cols): row = i // cols col = i % cols axes[row, col].set_visible(False) plt.suptitle('Miesięczne profile godzinowe zużycia energii', fontsize=16, fontweight='bold') plt.tight_layout() output_path = self.output_dir / "monthly_hourly_profiles.png" plt.savefig(output_path, dpi=300, bbox_inches='tight') plt.close() print(f"Saved chart: {output_path}") def plot_cost_analysis(self, df: pd.DataFrame, time_col: str, title_suffix: str): """ Create cost analysis charts. Args: df: DataFrame with energy data time_col: Time column name title_suffix: Suffix for the title (e.g., 'miesięczne', 'dzienne') """ if df is None or df.empty: return df = self.parse_timestamps(df, time_col) energy_cols = [col for col in df.columns if col != time_col and not col.startswith('Koszt') and not col.startswith('Przychód')] df_with_costs = self.calculate_costs(df, energy_cols) # Create cost analysis chart fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12)) # Energy costs breakdown if 'Koszt_pobrana' in df_with_costs.columns: ax1.bar(df_with_costs[time_col], df_with_costs['Koszt_pobrana'], label=f'Koszt energii pobranej ({self.buy_price} zł/kWh)', color='#F18F01', alpha=0.7) if 'Przychód_oddana' in df_with_costs.columns: ax1.bar(df_with_costs[time_col], -df_with_costs['Przychód_oddana'], label=f'Przychód z energii oddanej ({self.sell_price} zł/kWh)', color='#A23B72', alpha=0.7) ax1.set_title(f'Analiza kosztów energii - {title_suffix}', fontweight='bold', pad=20) ax1.set_xlabel('Okres', fontweight='bold') ax1.set_ylabel('Kwota [zł]', fontweight='bold') ax1.legend() ax1.grid(True, alpha=0.3) ax1.axhline(y=0, color='black', linestyle='-', alpha=0.3) # Net cost trend if 'Koszt_netto' in df_with_costs.columns: colors = ['red' if x > 0 else 'green' for x in df_with_costs['Koszt_netto']] ax2.bar(df_with_costs[time_col], df_with_costs['Koszt_netto'], color=colors, alpha=0.7) ax2.axhline(y=0, color='black', linestyle='-', alpha=0.5) ax2.set_title(f'Koszt netto energii - {title_suffix}', fontweight='bold', pad=20) ax2.set_xlabel('Okres', fontweight='bold') ax2.set_ylabel('Koszt netto [zł]', fontweight='bold') ax2.grid(True, alpha=0.3) # Add summary text total_cost = df_with_costs['Koszt_netto'].sum() color = 'red' if total_cost > 0 else 'green' ax2.text(0.02, 0.98, f'Łączny koszt: {total_cost:.2f} zł', transform=ax2.transAxes, fontsize=12, fontweight='bold', verticalalignment='top', color=color, bbox=dict(boxstyle='round', facecolor='white', alpha=0.8)) # Format x-axis if time_col in ['Timestamp', 'Date', 'WeekStart']: for ax in [ax1, ax2]: ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) if len(df_with_costs) > 10: ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1)) plt.setp(ax.xaxis.get_majorticklabels(), rotation=45) plt.tight_layout() filename = f"cost_analysis_{title_suffix.replace(' ', '_')}" output_path = self.output_dir / f"{filename}.png" plt.savefig(output_path, dpi=300, bbox_inches='tight') plt.close() print(f"Saved chart: {output_path}") def plot_monthly_comparison(self, df: pd.DataFrame): """ Create monthly comparison charts. """ if df is None or df.empty: return df = self.parse_timestamps(df, 'Month') # Monthly totals comparison fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12)) energy_cols = [col for col in df.columns if col != 'Month'] # Bar chart x_pos = range(len(df)) width = 0.25 for i, col in enumerate(energy_cols): color = self.colors.get(col, None) ax1.bar([x + i * width for x in x_pos], df[col], width, label=col, color=color, alpha=0.8) ax1.set_title('Miesięczne zużycie energii - porównanie', fontweight='bold', pad=20) ax1.set_xlabel('Miesiąc', fontweight='bold') ax1.set_ylabel('Energia [kWh]', fontweight='bold') ax1.set_xticks([x + width for x in x_pos]) ax1.set_xticklabels(df['Month'].dt.strftime('%Y-%m'), rotation=45) ax1.legend() ax1.grid(True, alpha=0.3) # Net energy (zbilansowana) focus if 'Czynna zbilansowana' in energy_cols: color = self.colors.get('Czynna zbilansowana', 'blue') ax2.plot(df['Month'], df['Czynna zbilansowana'], marker='o', linewidth=3, markersize=8, color=color) ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7) ax2.set_title('Energia zbilansowana - trend miesięczny', fontweight='bold', pad=20) ax2.set_xlabel('Miesiąc', fontweight='bold') ax2.set_ylabel('Energia zbilansowana [kWh]', fontweight='bold') ax2.grid(True, alpha=0.3) ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45) plt.tight_layout() output_path = self.output_dir / "monthly_comparison.png" plt.savefig(output_path, dpi=300, bbox_inches='tight') plt.close() print(f"Saved chart: {output_path}") def generate_all_charts(self): """ Generate all available charts for the dataset. """ print(f"Generating charts for: {self.base_filename}") print(f"Output directory: {self.output_dir}") print(f"Energy prices: Buy={self.buy_price} zł/kWh, Sell={self.sell_price} zł/kWh") # Load all data types hourly_data = self.load_data('-hourly') daily_data = self.load_data('-daily') weekly_data = self.load_data('-weekly') monthly_data = self.load_data('-monthly') # Generate time series charts if hourly_data is not None: # Sample hourly data for better visualization (first 30 days) hourly_sample = hourly_data.head(24 * 30) if len(hourly_data) > 720 else hourly_data self.plot_time_series(hourly_sample, 'Timestamp', 'Zużycie energii - dane godzinowe (30 dni)', 'hourly_timeseries') # Generate monthly hourly averages self.plot_monthly_hourly_averages(hourly_data) if daily_data is not None: self.plot_time_series(daily_data, 'Date', 'Zużycie energii - dane dzienne', 'daily_timeseries') self.plot_stacked_area(daily_data, 'Date', 'Zużycie energii - obszar skumulowany (dzienne)', 'daily_stacked') # Generate daily cost analysis self.plot_cost_analysis(daily_data, 'Date', 'dzienne') if weekly_data is not None: self.plot_time_series(weekly_data, 'WeekStart', 'Zużycie energii - dane tygodniowe', 'weekly_timeseries') if monthly_data is not None: self.plot_time_series(monthly_data, 'Month', 'Zużycie energii - dane miesięczne', 'monthly_timeseries') self.plot_monthly_comparison(monthly_data) # Generate monthly cost analysis self.plot_cost_analysis(monthly_data, 'Month', 'miesięczne') print("\nChart generation completed!") def main(): parser = argparse.ArgumentParser( description="Generate visualizations for PGE energy data" ) parser.add_argument( "base_filename", help="Base filename without suffix (e.g., '202512011130_590543580100122940_46f7640c-07d2-4e64-9ba7-20b8c2eeca7d')" ) parser.add_argument( "--data-dir", "-d", default=".", help="Directory containing CSV files (default: current directory)" ) parser.add_argument( "--output-dir", "-o", default="charts", help="Output directory for charts (default: 'charts')" ) parser.add_argument( "--buy-price", "-b", type=float, default=1.4, help="Price per kWh for energy consumed from grid (default: 1.4 PLN)" ) parser.add_argument( "--sell-price", "-s", type=float, default=0.2, help="Price per kWh for energy delivered to grid (default: 0.2 PLN)" ) args = parser.parse_args() try: visualizer = PGEDataVisualizer( base_filename=args.base_filename, data_dir=args.data_dir, output_dir=args.output_dir, buy_price=args.buy_price, sell_price=args.sell_price ) visualizer.generate_all_charts() except Exception as e: print(f"Error: {e}") return 1 return 0 if __name__ == "__main__": exit(main())