Basis functions

import GPy
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

import matplotlib.pyplot as plt

data = pd.read_csv("../../beat_stgnp/dataset/bjair/NP/processed_raw.csv")
data["time"] = pd.to_datetime(data["time"], format="%Y-%m-%d %H:%M:%S")
data["time"] = data["time"].apply(lambda x: x.timestamp())

x = ["latitude", "longitude", "time"]
y = ["PM25_Concentration"]

x_train, x_test, y_train, y_test = train_test_split(data[x], data[y], test_size=0.2, random_state=42)
x_train, x_test, y_train, y_test = map(lambda x: x.values, [x_train, x_test, y_train, y_test])

x_scaler = MinMaxScaler()
y_scaler = StandardScaler()
x_train = x_scaler.fit_transform(x_train)
y_train = y_scaler.fit_transform(y_train)
x_test = x_scaler.transform(x_test)

model = RandomForestRegressor(n_estimators=1000, random_state=42)
model.fit(x_train, y_train.ravel())
y_pred = model.predict(x_test)
print("RMSE", np.sqrt(np.mean((y_scaler.inverse_transform(y_pred).ravel() - y_test.ravel())**2)))

 /tmp/ipykernel_922642/3470971270.py:18: DataConversionWarning:A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().