Publications

DORA the explorer: directed outreaching reinforcement action-selection

Exploration is a fundamental aspect of Reinforcement Learning, typically implemented using stochastic action-selection. Exploration, however, can be more efficient if directed toward gaining new world knowledge. Visit-counters have been proven useful both in practice and in theory for directed exploration. However, a major limitation of counters is their locality. While there are a few model-based solutions to this shortcoming, a model-free approach is still missing. We propose E-values, a generalization of counters that can be used to evaluate the propagating exploratory value over state-action trajectories. We compare our approach to commonly used RL techniques, and show that using E-values improves learning and performance over traditional counters. We also show how our method can be implemented with function approximation to efficiently learn continuous MDPs. We demonstrate this by showing that our approach surpasses state of the art performance in the Freeway Atari 2600 game.

Authors: Fox, L. Choshen, L., and Loewenstein, Y.,
Year of publication: 2018
Journal: Proc. ICLR

Link to publication:

Labs:

You can use the keyboard arrows to navigate between the component buttons
",e=e.removeChild(e.firstChild)):"string"==typeof o.is?e=l.createElement(a,{is:o.is}):(e=l.createElement(a),"select"===a&&(l=e,o.multiple?l.multiple=!0:o.size&&(l.size=o.size))):e=l.createElementNS(e,a),e[Ni]=t,e[Pi]=o,Pl(e,t,!1,!1),t.stateNode=e,l=Ae(a,o),a){case"iframe":case"object":case"embed":Te("load",e),u=o;break;case"video":case"audio":for(u=0;u<$a.length;u++)Te($a[u],e);u=o;break;case"source":Te("error",e),u=o;break;case"img":case"image":case"link":Te("error",e),Te("load",e),u=o;break;case"form":Te("reset",e),Te("submit",e),u=o;break;case"details":Te("toggle",e),u=o;break;case"input":A(e,o),u=M(e,o),Te("invalid",e),Ie(n,"onChange");break;case"option":u=B(e,o);break;case"select":e._wrapperState={wasMultiple:!!o.multiple},u=Uo({},o,{value:void 0}),Te("invalid",e),Ie(n,"onChange");break;case"textarea":V(e,o),u=H(e,o),Te("invalid",e),Ie(n,"onChange");break;default:u=o}Me(a,u);var s=u;for(i in s)if(s.hasOwnProperty(i)){var c=s[i];"style"===i?ze(e,c):"dangerouslySetInnerHTML"===i?(c=c?c.__html:void 0,null!=c&&Aa(e,c)):"children"===i?"string"==typeof c?("textarea"!==a||""!==c)&&X(e,c):"number"==typeof c&&X(e,""+c):"suppressContentEditableWarning"!==i&&"suppressHydrationWarning"!==i&&"autoFocus"!==i&&(ea.hasOwnProperty(i)?null!=c&&Ie(n,i):null!=c&&x(e,i,c,l))}switch(a){case"input":L(e),j(e,o,!1);break;case"textarea":L(e),$(e);break;case"option":null!=o.value&&e.setAttribute("value",""+P(o.value));break;case"select":e.multiple=!!o.multiple,n=o.value,null!=n?q(e,!!o.multiple,n,!1):null!=o.defaultValue&&q(e,!!o.multiple,o.defaultValue,!0);break;default:"function"==typeof u.onClick&&(e.onclick=Fe)}Ve(a,o)&&(t.effectTag|=4)}null!==t.ref&&(t.effectTag|=128)}return null;case 6:if(e&&null!=t.stateNode)Ll(e,t,e.memoizedProps,o);else{if("string"!=typeof o&&null===t.stateNode)throw Error(r(166));n=yn(yu.current),yn(bu.current),Jn(t)?(n=t.stateNode,o=t.memoizedProps,n[Ni]=t,n.nodeValue!==o&&(t.effectTag|=4)):(n=(9===n.nodeType?n:n.ownerDocument).createTextNode(o),n[Ni]=t,t.stateNode=n)}return null;case 13:return zt(vu),o=t.memoizedState,0!==(64&t.effectTag)?(t.expirationTime=n,t):(n=null!==o,o=!1,null===e?void 0!==t.memoizedProps.fallback&&Jn(t):(a=e.memoizedState,o=null!==a,n||null===a||(a=e.child.sibling,null!==a&&(i=t.firstEffect,null!==i?(t.firstEffect=a,a.nextEffect=i):(t.firstEffect=t.lastEffect=a,a.nextEffect=null),a.effectTag=8))),n&&!o&&0!==(2&t.mode)&&(null===e&&!0!==t.memoizedProps.unstable_avoidThisFallback||0!==(1&vu.current)?rs===Qu&&(rs=Yu):(rs!==Qu&&rs!==Yu||(rs=Gu),0!==us&&null!==es&&(To(es,ns),Co(es,us)))),(n||o)&&(t.effectTag|=4),null);case 4:return wn(),Ol(t),null;case 10:return Zt(t),null;case 17:return It(t.type)&&Ft(),null;case 19:if(zt(vu),o=t.memoizedState,null===o)return null;if(a=0!==(64&t.effectTag),i=o.rendering,null===i){if(a)mr(o,!1);else if(rs!==Qu||null!==e&&0!==(64&e.effectTag))for(i=t.child;null!==i;){if(e=_n(i),null!==e){for(t.effectTag|=64,mr(o,!1),a=e.updateQueue,null!==a&&(t.updateQueue=a,t.effectTag|=4),null===o.lastEffect&&(t.firstEffect=null),t.lastEffect=o.lastEffect,o=t.child;null!==o;)a=o,i=n,a.effectTag&=2,a.nextEffect=null,a.firstEffect=null,a.lastEffect=null,e=a.alternate,null===e?(a.childExpirationTime=0,a.expirationTime=i,a.child=null,a.memoizedProps=null,a.memoizedState=null,a.updateQueue=null,a.dependencies=null):(a.childExpirationTime=e.childExpirationTime,a.expirationTime=e.expirationTime,a.child=e.child,a.memoizedProps=e.memoizedProps,a.memoizedState=e.memoizedState,a.updateQueue=e.updateQueue,i=e.dependencies,a.dependencies=null===i?null:{expirationTime:i.expirationTime,firstContext:i.firstContext,responders:i.responders}),o=o.sibling;return Mt(vu,1&vu.current|2),t.child}i=i.sibling}}else{if(!a)if(e=_n(i),null!==e){if(t.effectTag|=64,a=!0,n=e.updateQueue,null!==n&&(t.updateQueue=n,t.effectTag|=4),mr(o,!0),null===o.tail&&"hidden"===o.tailMode&&!i.alternate)return t=t.lastEffect=o.lastEffect,null!==t&&(t.nextEffect=null),null}else 2*ru()-o.renderingStartTime>o.tailExpiration&&1t)&&vs.set(e,t)))}}function Ur(e,t){e.expirationTimee?n:e,2>=e&&t!==e?0:e}function qr(e){if(0!==e.lastExpiredTime)e.callbackExpirationTime=1073741823,e.callbackPriority=99,e.callbackNode=$t(Vr.bind(null,e));else{var t=Br(e),n=e.callbackNode;if(0===t)null!==n&&(e.callbackNode=null,e.callbackExpirationTime=0,e.callbackPriority=90);else{var r=Fr();if(1073741823===t?r=99:1===t||2===t?r=95:(r=10*(1073741821-t)-10*(1073741821-r),r=0>=r?99:250>=r?98:5250>=r?97:95),null!==n){var o=e.callbackPriority;if(e.callbackExpirationTime===t&&o>=r)return;n!==Yl&&Bl(n)}e.callbackExpirationTime=t,e.callbackPriority=r,t=1073741823===t?$t(Vr.bind(null,e)):Wt(r,Hr.bind(null,e),{timeout:10*(1073741821-t)-ru()}),e.callbackNode=t}}}function Hr(e,t){if(ks=0,t)return t=Fr(),No(e,t),qr(e),null;var n=Br(e);if(0!==n){if(t=e.callbackNode,(Ju&(Wu|$u))!==Hu)throw Error(r(327));if(lo(),e===es&&n===ns||Kr(e,n),null!==ts){var o=Ju;Ju|=Wu;for(var a=Yr();;)try{eo();break}catch(t){Xr(e,t)}if(Gt(),Ju=o,Bu.current=a,rs===Ku)throw t=os,Kr(e,n),To(e,n),qr(e),t;if(null===ts)switch(a=e.finishedWork=e.current.alternate,e.finishedExpirationTime=n,o=rs,es=null,o){case Qu:case Ku:throw Error(r(345));case Xu:No(e,2=n){e.lastPingedTime=n,Kr(e,n);break}}if(i=Br(e),0!==i&&i!==n)break;if(0!==o&&o!==n){e.lastPingedTime=o;break}e.timeoutHandle=Si(oo.bind(null,e),a);break}oo(e);break;case Gu:if(To(e,n),o=e.lastSuspendedTime,n===o&&(e.nextKnownPendingLevel=ro(a)),ss&&(a=e.lastPingedTime,0===a||a>=n)){e.lastPingedTime=n,Kr(e,n);break}if(a=Br(e),0!==a&&a!==n)break;if(0!==o&&o!==n){e.lastPingedTime=o;break}if(1073741823!==is?o=10*(1073741821-is)-ru():1073741823===as?o=0:(o=10*(1073741821-as)-5e3,a=ru(),n=10*(1073741821-n)-a,o=a-o,0>o&&(o=0),o=(120>o?120:480>o?480:1080>o?1080:1920>o?1920:3e3>o?3e3:4320>o?4320:1960*Uu(o/1960))-o,n=o?o=0:(a=0|l.busyDelayMs,i=ru()-(10*(1073741821-i)-(0|l.timeoutMs||5e3)),o=i<=a?0:a+o-i),10 component higher in the tree to provide a loading indicator or placeholder to display."+N(i))}rs!==Zu&&(rs=Xu),l=yr(l,i),f=a;do{switch(f.tag){case 3:u=l,f.effectTag|=4096,f.expirationTime=t;var w=Ar(f,u,t);ln(f,w); break e;case 1:u=l;var E=f.type,k=f.stateNode;if(0===(64&f.effectTag)&&("function"==typeof E.getDerivedStateFromError||null!==k&&"function"==typeof k.componentDidCatch&&(null===ms||!ms.has(k)))){f.effectTag|=4096,f.expirationTime=t;var _=Ir(f,u,t);ln(f,_);break e}}f=f.return}while(null!==f)}ts=no(ts)}catch(e){t=e;continue}break}}function Yr(){var e=Bu.current;return Bu.current=Cu,null===e?Cu:e}function Gr(e,t){eus&&(us=e)}function Jr(){for(;null!==ts;)ts=to(ts)}function eo(){for(;null!==ts&&!Gl();)ts=to(ts)}function to(e){var t=Fu(e.alternate,e,ns);return e.memoizedProps=e.pendingProps,null===t&&(t=no(e)),qu.current=null,t}function no(e){ts=e;do{var t=ts.alternate;if(e=ts.return,0===(2048&ts.effectTag)){if(t=br(t,ts,ns),1===ns||1!==ts.childExpirationTime){for(var n=0,r=ts.child;null!==r;){var o=r.expirationTime,a=r.childExpirationTime;o>n&&(n=o),a>n&&(n=a),r=r.sibling}ts.childExpirationTime=n}if(null!==t)return t;null!==e&&0===(2048&e.effectTag)&&(null===e.firstEffect&&(e.firstEffect=ts.firstEffect),null!==ts.lastEffect&&(null!==e.lastEffect&&(e.lastEffect.nextEffect=ts.firstEffect),e.lastEffect=ts.lastEffect),1e?t:e}function oo(e){var t=qt();return Vt(99,ao.bind(null,e,t)),null}function ao(e,t){do lo();while(null!==gs);if((Ju&(Wu|$u))!==Hu)throw Error(r(327));var n=e.finishedWork,o=e.finishedExpirationTime;if(null===n)return null;if(e.finishedWork=null,e.finishedExpirationTime=0,n===e.current)throw Error(r(177));e.callbackNode=null,e.callbackExpirationTime=0,e.callbackPriority=90,e.nextKnownPendingLevel=0;var a=ro(n);if(e.firstPendingTime=a,o<=e.lastSuspendedTime?e.firstSuspendedTime=e.lastSuspendedTime=e.nextKnownPendingLevel=0:o<=e.firstSuspendedTime&&(e.firstSuspendedTime=o-1),o<=e.lastPingedTime&&(e.lastPingedTime=0),o<=e.lastExpiredTime&&(e.lastExpiredTime=0),e===es&&(ts=es=null,ns=0),1u&&(c=u,u=l,l=c),c=Ue(w,l),f=Ue(w,u),c&&f&&(1!==k.rangeCount||k.anchorNode!==c.node||k.anchorOffset!==c.offset||k.focusNode!==f.node||k.focusOffset!==f.offset)&&(E=E.createRange(),E.setStart(c.node,c.offset),k.removeAllRanges(),l>u?(k.addRange(E),k.extend(f.node,f.offset)):(E.setEnd(f.node,f.offset),k.addRange(E)))))),E=[];for(k=w;k=k.parentNode;)1===k.nodeType&&E.push({element:k,left:k.scrollLeft,top:k.scrollTop});for("function"==typeof w.focus&&w.focus(),w=0;w=t&&e<=t}function To(e,t){var n=e.firstSuspendedTime,r=e.lastSuspendedTime;nt||0===n)&&(e.lastSuspendedTime=t),t<=e.lastPingedTime&&(e.lastPingedTime=0),t<=e.lastExpiredTime&&(e.lastExpiredTime=0)}function Co(e,t){t>e.firstPendingTime&&(e.firstPendingTime=t);var n=e.firstSuspendedTime;0!==n&&(t>=n?e.firstSuspendedTime=e.lastSuspendedTime=e.nextKnownPendingLevel=0:t>=e.lastSuspendedTime&&(e.lastSuspendedTime=t+1),t>e.nextKnownPendingLevel&&(e.nextKnownPendingLevel=t))}function No(e,t){var n=e.lastExpiredTime;(0===n||n>t)&&(e.lastExpiredTime=t)}function Po(e,t,n,o){var a=t.current,i=Fr(),l=su.suspense;i=jr(i,a,l);e:if(n){n=n._reactInternalFiber;t:{if(J(n)!==n||1!==n.tag)throw Error(r(170));var u=n;do{switch(u.tag){case 3:u=u.stateNode.context;break t;case 1:if(It(u.type)){u=u.stateNode.__reactInternalMemoizedMergedChildContext;break t}}u=u.return}while(null!==u);throw Error(r(171))}if(1===n.tag){var s=n.type;if(It(s)){n=Dt(n,s,u);break e}}n=u}else n=Al;return null===t.context?t.context=n:t.pendingContext=n,t=on(i,l),t.payload={element:e},o=void 0===o?null:o,null!==o&&(t.callback=o),an(a,t),Dr(a,i),i}function Oo(e){if(e=e.current,!e.child)return null;switch(e.child.tag){case 5:return e.child.stateNode;default:return e.child.stateNode}}function Ro(e,t){e=e.memoizedState,null!==e&&null!==e.dehydrated&&e.retryTime