import GPy
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
= pd.read_csv("../../beat_stgnp/dataset/bjair/NP/processed_raw.csv")
data "time"] = pd.to_datetime(data["time"], format="%Y-%m-%d %H:%M:%S")
data["time"] = data["time"].apply(lambda x: x.timestamp())
data[
= ["latitude", "longitude", "time"]
x = ["PM25_Concentration"]
y
= train_test_split(data[x], data[y], test_size=0.2, random_state=42)
x_train, x_test, y_train, y_test = map(lambda x: x.values, [x_train, x_test, y_train, y_test])
x_train, x_test, y_train, y_test
= MinMaxScaler()
x_scaler = StandardScaler()
y_scaler = x_scaler.fit_transform(x_train)
x_train = y_scaler.fit_transform(y_train)
y_train = x_scaler.transform(x_test)
x_test
= RandomForestRegressor(n_estimators=1000, random_state=42)
model
model.fit(x_train, y_train.ravel())= model.predict(x_test)
y_pred print("RMSE", np.sqrt(np.mean((y_scaler.inverse_transform(y_pred).ravel() - y_test.ravel())**2)))
/tmp/ipykernel_922642/3470971270.py:18: DataConversionWarning:A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().