Display Publications by [Year] [Type] [Topic]
Enabling reinforcement learning to be effective in large-scale multi-agent Markov Decisions Problems is a challenging task. To address this problem we propose a multi-agent variant of Q-learning: ``Q Updates with Immediate Counterfactual Rewards-learning'' (QUICR-learning). Given a global reward function over all agents that the large-scale system is trying to maximize, QUICR-learning breaks down the global reward into many agent-specific rewards that have the following two properties: 1) agents maximizing their agent-specific rewards tend to maximize the global reward, 2) an agent's action has a large influence on its agent-specific reward, allowing it to learn quickly. Each agent then uses standard Q-learning type updates to form a policy to maximize the agent-specific rewards. Results on multi-agent grid-world problems over two topologies, show that QUICR-learning can be effective with hundreds of agents and can achieve up to 300\% improvements in performance over both conventional and local Q-learning in the largest tested systems.
@incollection{tumer-agogino_lsmas05, author = {A. Agogino and K. Tumer}, title = {Reinforcement Learning in Large Multi-agent Systems}, booktitle = {AAMAS-05 Workshop on Coordination of Large Scale Multiagent Systems}, address = {Utrecht, Netherlands}, editors = {R. Mailler and P. Scerri and R. Vincent}, month = {July}, abstract = {Enabling reinforcement learning to be effective in large-scale multi-agent Markov Decisions Problems is a challenging task. To address this problem we propose a multi-agent variant of Q-learning: ``Q Updates with Immediate Counterfactual Rewards-learning'' (QUICR-learning). Given a global reward function over all agents that the large-scale system is trying to maximize, QUICR-learning breaks down the global reward into many agent-specific rewards that have the following two properties: 1) agents maximizing their agent-specific rewards tend to maximize the global reward, 2) an agent's action has a large influence on its agent-specific reward, allowing it to learn quickly. Each agent then uses standard Q-learning type updates to form a policy to maximize the agent-specific rewards. Results on multi-agent grid-world problems over two topologies, show that QUICR-learning can be effective with hundreds of agents and can achieve up to 300\% improvements in performance over both conventional and local Q-learning in the largest tested systems.}, bib2html_pubtype = {Workshop/Symposium Papers}, bib2html_rescat = {Multiagent Systems}, year = {2005} }
Generated by bib2html.pl (written by Patrick Riley ) on Wed Apr 01, 2020 17:39:43