Reinforcement Learning for Stock Trading

Credits — Peoples Daily

Stock Trading

Bellman Equation
Bellman Equation

Actions

Reward

model = Sequential()        
model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
model.add(Dense(units=32, activation="relu")) model.add(Dense(units=8, activation="relu")) model.add(Dense(self.action_size, activation="linear")) model.compile(loss="mse", optimizer=Adam(lr=0.001))
def act(self, state):        
if not self.is_eval and np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
options = self.model.predict(state)
return np.argmax(options[0])

Experience Replay

def expReplay(self, batch_size):  

mini_batch = []
l = len(self.memory)

for i in range(l - batch_size + 1, l):
mini_batch.append(self.memory[i])

for state, action, reward, next_state, done in mini_batch:
target = reward
if not done:
target =reward+self.gamma*\
np.amax(self.model.predict(next_state)0])

target_f = self.model.predict(state)
target_f[0][action] = target

self.model.fit(state, target_f, epochs=1, verbose=0)

if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay

Training to learn Trading

for e in range(episode_count + 1):    
print("Episode " + str(e) + "/" + str(episode_count))
state = getState(data, 0, window_size + 1)
total_profit = 0
agent.inventory = []
for t in range(l):
action = agent.act(state)

# hold
next_state = getState(data, t + 1, window_size + 1)
reward = 0
# buy
if action == 1:
agent.inventory.append(data[t])
print("Buy: " + formatPrice(data[t]))

# sell
elif action == 2 and len(agent.inventory) > 0:
bought_price = agent.inventory.pop(0)
reward = max(data[t] - bought_price, 0)
total_profit += data[t] - bought_price
print("Sell: " + formatPrice(data[t]) + " | Profit: " +
formatPrice(data[t] - bought_price))
done = True if t == l - 1 else False
agent.memory.append((state, action, reward, next_state,
done))
state = next_state
if done:
print("--------------------------------")
print("Total Profit: " + formatPrice(total_profit))
print("--------------------------------")
if len(agent.memory) > batch_size:
agent.expReplay(batch_size)
if e % 10 == 0:
agent.model.save("models/model_ep" + str(e))

Evaluation

for t in xrange(l): 
action = agent.act(state)

# hold
next_state = getState(data, t + 1, window_size + 1)
reward = 0

# buy
if action == 1:
agent.inventory.append(data[t])
print("Buy: " + formatPrice(data[t]))

# sell
elif action == 2 and len(agent.inventory) > 0:
bought_price = agent.inventory.pop(0)
reward = max(data[t] - bought_price, 0)
total_profit += data[t] - bought_price
print("Sell: " + formatPrice(data[t]) + " | Profit: " +
formatPrice(data[t] - bought_price))
done = True if t == l - 1 else False
agent.memory.append((state, action, reward, next_state, done))
state = next_state
if done:
print("--------------------------------")
print(stock_name + " Total Profit: "
+formatPrice(total_profit))
print("--------------------------------")

Conclusion

Author

References

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store