@inproceedings{5bbd2983973e4d1aaee747e2b1994d33,
title = "A Robust Exploration Strategy in Reinforcement Learning Based on Temporal Difference Error",
keywords = "Exploitation, Exploration, greedy, k-armed bandit, Q-learning, Reinforcement learning, Softmax",
author = "Hajar, \{Muhammad Shadi\} and Harsha Kalutarage and Al-Kadri, \{M. Omar\}",
note = "Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 35th Australasian Joint Conference on Artificial Intelligence, AI 2022 ; Conference date: 05-12-2022 Through 09-12-2022",
year = "2022",
doi = "10.1007/978-3-031-22695-3\_55",
language = "English",
isbn = "9783031226946",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "789--799",
editor = "Haris Aziz and D{\'e}bora Corr{\^e}a and Tim French",
booktitle = "AI 2022",
}