# find opt parameters for given kernel with randomized coordinate descent scheme # Sergey V. | 2023 source('gp_util_funcs.R'); # Load necessary libraries library(quantmod) # For getSymbols() library(xts) # For working with time series data # Step 1: Fetch forex data usdchf <- getSymbols("USDCHF=X", src = "yahoo", from = Sys.Date() - 7, to = Sys.Date(), periodicity = "5minutes", auto.assign = FALSE) # Step 2: Create a regular 5-minute time index (as xts object) regular_time_index <- seq(from = min(index(usdchf)), to = max(index(usdchf)), by = "5 min") regular_time_xts <- xts(order.by = regular_time_index) # Step 3: Merge the forex data with the regular time index to fill in missing times usdchf_filled <- merge(usdchf, regular_time_xts, all = TRUE) # Step 4: Fill missing values using last observation carried forward and interpolation usdchf_filled <- na.locf(usdchf_filled) # Fill missing values with last known value usdchf_filled <- na.approx(usdchf_filled) # Interpolate remaining missing values # Step 5: Extract 'Close' and 'Time' data # Save the time separately time_saved <- index(usdchf_filled) # Remove the row names (which are timestamps) and reset row indices data_cleaned_no_time <- data.frame(Close = as.numeric(usdchf_filled$USDCHF.X.Close)) # Check the structure of the data to ensure it only contains numeric values print(head(data_cleaned_no_time)) # ------------------------------------- # Step 6: Use numeric indices for x_train and x_test train_size <- round(nrow(data_cleaned_no_time) * 0.8) # Define train size # Use numeric indices instead of time x_train <- 1:train_size x_test <- (train_size + 1):nrow(data_cleaned_no_time) # Adding a small noise to x_train to break the regularity of values x_train <- x_train + rnorm(length(x_train), mean = 0, sd = 0.01) # Set y_train and y_test using the 'Close' values y_train <- data_cleaned_no_time$Close[1:train_size] y_test <- data_cleaned_no_time$Close[(train_size + 1):nrow(data_cleaned_no_time)] # Check x_train, y_train to ensure they are numeric print(head(x_train)) print(head(y_train)) # Define kernels to optimize kernels <- list(gaussian=kfunc_gauss, exponential=kfunc_exp, matern=kfunc_matern) kernels <- list(exponential=kfunc_exp) #set.seed(123) # For reproducibility # Objective function for negative log-likelihood objective_fn <- function(a, b, c) { Sigma <- kfunc_gauss(x_train, x_train, a, b, c) # Calculate determinant detV <- det(Sigma) if (detV <= 0 || is.na(detV)) { return(Inf) # Return large value for invalid parameters } #Vinv <- solve(Sigma) #log_likelihood <- -0.5 * sum(log(diag(Sigma))) # basic form log_likelihood = get_log_likelihood(x_train, x_train, kernel, sigma2e = 0.001, a, b, c); return(-log_likelihood) # Minimize the negative log-likelihood } # Randomized Coordinate Descent for kernel parameter opt randomized_coordinate_descent <- function(init_params, n_iterations, step_size, bounds = list(a = c(0.1, 100), b = c(0.1, 100), c = c(0.1, 100))) { # Initialize parameters a <- init_params[1] b <- init_params[2] c <- init_params[3] # Define the parameter names and ranges params <- c("a", "b", "c") for (i in 1:n_iterations) { # Randomly select a parameter to update param_to_update <- sample(params, 1) # Get current parameter values current_val <- get(param_to_update) # Randomly sample new value for the selected parameter within its bounds lower_bound <- bounds[[param_to_update]][1] upper_bound <- bounds[[param_to_update]][2] new_val <- current_val + runif(1, -step_size, step_size) # Ensure new value is within bounds new_val <- max(min(new_val, upper_bound), lower_bound) # Temporarily update the selected parameter and compute the objective function if (param_to_update == "a") { new_obj_val <- get_log_likelihood(x_train, y_train, kfunc_gauss, sigma2e = 0.001, new_val, b, c) } else if (param_to_update == "b") { new_obj_val <- get_log_likelihood(x_train, y_train, kfunc_gauss, sigma2e = 0.001, a, new_val, c) } else if (param_to_update == "c") { new_obj_val <- get_log_likelihood(x_train, y_train, kfunc_gauss, sigma2e = 0.001, a, b, new_val) } # Compare the new objective value to the old one old_obj_val <- get_log_likelihood(x_train, y_train, kfunc_gauss, sigma2e = 0.001, a, b, c) if (new_obj_val < old_obj_val) { # Update the parameter only if the objective improves if (param_to_update == "a") { a <- new_val } else if (param_to_update == "b") { b <- new_val } else if (param_to_update == "c") { c <- new_val } } # Output progress cat("Iteration:", i, "| a =", a, "b =", b, "c =", c, "| Objective value =", old_obj_val, "\n") } return(c(a, b, c)) # Return the final parameter values } # Initial parameter guesses init_params <- c(1, 1, 1) optimal_params = c(0.515314, 1.0, 0.1); run_cd_scheme = 0; # Run randomized coordinate descent if(run_cd_scheme){ optimal_params <- randomized_coordinate_descent(init_params, n_iterations = 20, step_size = 0.5) } # Print the final optimal parameters cat("Optimal parameters: a =", optimal_params[1], "b =", optimal_params[2], "c =", optimal_params[3], "\n") # run gp a = optimal_params[1]; b = optimal_params[2]; c = optimal_params[3]; gp = gp_solve( x_train , y_train , x_test , kernels$exponential , sigma2e = 0.001, a, b, c) gp_high = gp; mu_vals1 = gp["mu"]$mu; var_vals1 = abs(diag(gp["var"]$var)); cih_vals1 = mu_vals1 + 2*sd(var_vals1); cil_vals1 = mu_vals1 - 2*sd(var_vals1); png("images/plot_test1.png"); #matplot(x.test,cbind(ytest_true,gp[["mu"]]),type='l',col=c('blue','red')) plot(x_test,y_test,type='p',col='blue', xlab= "xtest", ylab = "y true, predicted") par(new=TRUE) plot(x_test,gp[["mu"]],type='p',col='red', axes = FALSE, xlab = "", ylab = "") #plot(x.test,gp[["mu"]],gp[["var"]],col='red', axes = FALSE, xlab = "", ylab = "") legend("topright", legend=c("actual", "predicted"), col=c("blue", "red"), lty=1:2, cex=0.8) dev.off()