Working with R and Python simultaneously is a mess. Here is a short comparison how to do things with R and Python to analyze financial data.
Reading the data
R
1 2 3 4 5 6 7 8 |
library(curl) library(xts) url <- getURL("https://raw.githubusercontent.com/DmitryPukhov/stock-predictor/master/data/RI.RTSI_180101_180313.csv") price.csv <- read.csv(textConnection(url)) price.csv <- price.csv[-c(1:2)] price.ohlcv = as.xts(read.zoo(price.csv, index.column=c(1,2), tz="UTC", format="%Y%m%d%H:%M:%S")) names(price.ohlcv) <- c("Open","High","Low","Close","Vol") |
Python
1 2 3 4 5 6 7 8 |
import pandas as pd url <- getURL("https://raw.githubusercontent.com/DmitryPukhov/stock-predictor/master/data/RI.RTSI_180101_180313.csv") price_ohlcv=pd.read_csv(url, parse_dates={'datetime':['<DATE>','<TIME>']}, index_col='datetime', usecols=['<DATE>','<TIME>','<OPEN>','<HIGH>','<LOW>','<CLOSE>','<VOL>']) price_ohlcv.columns=['open','high','low','close','vol'] |
Reading financial data
R
1 2 3 |
library(quantmod) sp500 = getSymbols('SP500', src='FRED', from='2017-05', to='2018-03') sp500.tail() |
Python
1 2 3 4 5 |
import pandas as pd import numpy as np from pandas_datareader.data import DataReader sp500 = DataReader('SP500', data_source='fred', start='2016-05', end='2018') sp500.tail() |
View table
R
1 2 3 4 |
head(price.ohlcv) tail(price.ohlcv) str(price.ohlcv) summary(price.ohlcv) |
Python
1 2 3 4 |
price.head() price.tail() price.info() price.describe() |
Selecting time series
R
1 2 3 |
sp500.winter <- SP500['2017-12/2018-02'] sp500.monthly <- to.period(SP500,on='month') sp500.monthly.mean <- apply.monthly(SP500, mean) |
Python
1 2 3 |
sp500_winter = SP500['2017-01':'2017-02'] sp500_monthly = SP500.asfreq('M') sp500_monthly_mean = sp500.resample('M').mean() |
Plotting
R
1 |
plot(sp500) |
Python
1 2 3 |
import matplotlib.pyplot as plt SP500.plot() plt.show() |
Train model
R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
library(astsa) n = nrow(price.cl) # train/test split price.cl.train <- price.cl[1:round(0.7 * n)] price.cl.test <- price.cl[(round(0.7 * n)+1):n] # Predict price.cl.pred.raw <- sarima.for(price.cl, p = 1, d = 1, q = 1, P = 0, D = 0, Q = 3, S = 12, n.ahead=9) price.cl.pred <- xts(price.cl.pred.raw$pred, timeBasedSeq("20180220/20180313")) # Draw the plot plot(merge(price.cl, price.cl.pred), type="n") points(price.cl.train, col = "blue") points(price.cl.pred, col = "red", lty=2) |
.. to be continue