import pandas as pd import numpy as np def concatenate_data(weather_file, cons_file): df_wea = pd.read_csv(weather_file, sep=";") df_pow = pd.read_csv(cons_file, sep=";") df_wea.rename(index=str, columns={ "Date UTC": "date", "T° (C)": "temp", "P (hPa)": "press", "HR (%)": "humidity", "P.rosée (°C)": "dew-point", "Visi (km)": "visibl", "Vt. moy. (km/h)": "wind-avg", "Vt. raf. (km/h)": "wind-max", "Vt. dir (°)": "wind-dir", "RR 3h (mm)": "rain", "Neige (cm)": "snow", "Nebul. (octats)": "nebul" }, inplace=True) qual = [] quan = ["temp", "press", "humidity", "dew-point", "visibl", "wind-avg", "wind-max", "wind-dir", "rain", "snow", "nebul"] for i in qual: df_wea[i] = df_wea[i].fillna(value=df_wea[i].mode()) for i in quan: df_wea[i] = df_wea[i].fillna(value=df_wea[i].median()) df_wea["date"] = pd.to_datetime(df_wea["date"], format="%d/%m/%y %Hh%M") df_wea["weekday"] = df_wea["date"].dt.weekday df_wea["month"] = df_wea["date"].dt.month df_wea["year"] = df_wea["date"].dt.year df_wea["hour"] = df_wea["date"].dt.hour df_pow["date"] = pd.to_datetime(df_pow["date"], infer_datetime_format=True) df_pow = df_pow.drop_duplicates(subset="date").reset_index().set_index('date') ser_pow = df_pow["puissance"].resample("3H", label="right").sum() return df_wea.join(other=ser_pow, on="date") concatenate_data("data/meteo_train.csv", "data/conso_train.csv")