Display Publications by [Year] [Type] [Topic]
This paper addresses how efficient reward methods can be applied to multiple agents co-evolving in noisy and changing environments, under communication limitations. This problem is approached by ``factoring'' a global reward over all agents into agent-specific rewards that have two key properties: 1) agents maximizing their agent-specific rewards will tend to maximize the global reward, 2) an agent's action has a large influence over its agent-specific reward allowing it to evolve quickly. Agents using these agent-specific rewards are tested in episodic and non-episodic, continuous-space multi-rover environment where rovers evolve to maximize a global reward function over all rovers. The environments are dynamic (i.e. changes over time) and can be noisy and can restrict communication between agents . We show that a control policy evolved using these agent-specific rewards outperforms global reward methods by up to 400\%.More notably, in the presence of a larger number of rovers or roverswith noisy and communication limited sensors, the proposed methodoutperforms global reward by a higher percentage than innoise-free conditions with a small number of rovers.
@incollection{tumer-agogino_lamas05, author = {K. Tumer and A. Agogino}, title = {Efficient Reward Functions for Adaptive Multi-Rover Systems}, booktitle = {AAMAS-05 Workshop on Learning and Adaptation in Multiagent Systems}, address = {Utrecht, Netherlands}, editors = {K. Tuyls and P. Jan't Hoen and S. Sen and K. Verbeeck}, month = {July}, abstract = {This paper addresses how efficient reward methods can be applied to multiple agents co-evolving in noisy and changing environments, under communication limitations. This problem is approached by ``factoring'' a global reward over all agents into agent-specific rewards that have two key properties: 1) agents maximizing their agent-specific rewards will tend to maximize the global reward, 2) an agent's action has a large influence over its agent-specific reward allowing it to evolve quickly. Agents using these agent-specific rewards are tested in episodic and non-episodic, continuous-space multi-rover environment where rovers evolve to maximize a global reward function over all rovers. The environments are dynamic (i.e. changes over time) and can be noisy and can restrict communication between agents . We show that a control policy evolved using these agent-specific rewards outperforms global reward methods by up to 400\%. More notably, in the presence of a larger number of rovers or rovers with noisy and communication limited sensors, the proposed method outperforms global reward by a higher percentage than in noise-free conditions with a small number of rovers.}, bib2html_pubtype = {Workshop/Symposium Papers}, bib2html_rescat = {Multiagent Systems, Robotics}, year = {2005} }
Generated by bib2html.pl (written by Patrick Riley ) on Wed Apr 01, 2020 17:39:43