%PDF-1.3
1 0 obj
<<
/Kids [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R 12 0 R 13 0 R ]
/Type /Pages
/Count 10
>>
endobj
2 0 obj
<<
/Subject (Neural Information Processing Systems http\072\057\057nips\056cc\057)
/Publisher (Curran Associates\054 Inc\056)
/Language (en\055US)
/Created (2019)
/EventType (Poster)
/Description-Abstract (We take initial steps in studying PAC\055MDP algorithms with limited adaptivity\054 that is\054 algorithms that change its exploration policy as infrequently as possible during regret minimization\056 This is motivated by the difficulty of running fully adaptive algorithms in real\055world applications \050such as medical domains\051\054 and we propose to quantify adaptivity using the notion of \134emph\173local switching cost\175\056 Our main contribution\054 Q\055Learning with UCB2 exploration\054 is a model\055free algorithm for \044H\044\055step episodic MDP that achieves sublinear regret whose local switching cost in \044K\044 episodes is \044O\050H\1363SA\134log K\051\044\054 and we provide a lower bound of \044\134Omega\050HSA\051\044 on the local switching cost for any no\055regret algorithm\056 Our algorithm can be naturally adapted to the concurrent setting \134citep\173guo2015concurrent\175\054 which yields nontrivial results that improve upon prior work in certain aspects\056)
/Producer (PyPDF2)
/Title (Provably Efficient Q\055Learning with Low Switching Cost)
/Date (2019)
/ModDate (D\07220200213015021\05508\04700\047)
/Published (2019)
/Type (Conference Proceedings)
/firstpage (8004)
/Book (Advances in Neural Information Processing Systems 32)
/Description (Paper accepted and presented at the Neural Information Processing Systems Conference \050http\072\057\057nips\056cc\057\051)
/Editors (H\056 Wallach and H\056 Larochelle and A\056 Beygelzimer and F\056 d\047Alch\351\055Buc and E\056 Fox and R\056 Garnett)
/Author (Yu Bai\054 Tengyang Xie\054 Nan Jiang\054 Yu\055Xiang Wang)
/lastpage (8013)
>>
endobj
3 0 obj
<<
/Type /Catalog
/Pages 1 0 R
>>
endobj
4 0 obj
<<
/Contents 14 0 R
/Parent 1 0 R
/Resources 15 0 R
/MediaBox [ 0 0 612 792 ]
/Annots [ 46 0 R 47 0 R 48 0 R 49 0 R 50 0 R 51 0 R 52 0 R 53 0 R 54 0 R 55 0 R 56 0 R 57 0 R 58 0 R 59 0 R ]
/Type /Page
>>
endobj
5 0 obj
<<
/Contents 60 0 R
/Parent 1 0 R
/Resources 61 0 R
/MediaBox [ 0 0 612 792 ]
/Annots [ 86 0 R 87 0 R 88 0 R 89 0 R 90 0 R 91 0 R 92 0 R 93 0 R 94 0 R 95 0 R 96 0 R 97 0 R 98 0 R 99 0 R 100 0 R 101 0 R 102 0 R 103 0 R 104 0 R 105 0 R 106 0 R 107 0 R 108 0 R 109 0 R 110 0 R 111 0 R ]
/Type /Page
>>
endobj
6 0 obj
<<
/Contents 112 0 R
/Parent 1 0 R
/Resources 113 0 R
/MediaBox [ 0 0 612 792 ]
/Annots [ 138 0 R 139 0 R 140 0 R ]
/Type /Page
>>
endobj
7 0 obj
<<
/Contents 141 0 R
/Parent 1 0 R
/Resources 142 0 R
/MediaBox [ 0 0 612 792 ]
/Annots [ 151 0 R 152 0 R 153 0 R 154 0 R 155 0 R ]
/Type /Page
>>
endobj
8 0 obj
<<
/Contents 156 0 R
/Parent 1 0 R
/Resources 157 0 R
/MediaBox [ 0 0 612 792 ]
/Annots [ 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R 163 0 R 164 0 R 165 0 R ]
/Type /Page
>>
endobj
9 0 obj
<<
/Contents 166 0 R
/Parent 1 0 R
/Resources 167 0 R
/MediaBox [ 0 0 612 792 ]
/Annots [ 172 0 R 173 0 R 174 0 R 175 0 R 176 0 R 177 0 R 178 0 R 179 0 R 180 0 R 181 0 R 182 0 R 183 0 R 184 0 R 185 0 R ]
/Type /Page
>>
endobj
10 0 obj
<<
/Contents 186 0 R
/Parent 1 0 R
/Resources 187 0 R
/MediaBox [ 0 0 612 792 ]
/Annots [ 204 0 R 205 0 R 206 0 R 207 0 R 208 0 R 209 0 R 210 0 R 211 0 R 212 0 R 213 0 R 214 0 R 215 0 R 216 0 R ]
/Type /Page
>>
endobj
11 0 obj
<<
/Contents 217 0 R
/Parent 1 0 R
/Resources 218 0 R
/MediaBox [ 0 0 612 792 ]
/Annots [ 219 0 R 220 0 R 221 0 R 222 0 R 223 0 R 224 0 R 225 0 R 226 0 R 227 0 R ]
/Type /Page
>>
endobj
12 0 obj
<<
/Contents 228 0 R
/Parent 1 0 R
/Type /Page
/Resources 229 0 R
/MediaBox [ 0 0 612 792 ]
>>
endobj
13 0 obj
<<
/Contents 230 0 R
/Parent 1 0 R
/Type /Page
/Resources 231 0 R
/MediaBox [ 0 0 612 792 ]
>>
endobj
14 0 obj
<<
/Length 3879
/Filter /FlateDecode
>>
stream
xڝZے6}#U5b/oIqYoMl?P4Ø"^;'ryEtqeD߯˪*.m$F7F~Y&8/8bu[lyێH'+]tPtf&H0r" v}K\lA*UFs~Y'i 7# ʺ˼ ;NϮvΊjE7k6+j̫ۦ#