Skip to content

Commit e05ed4e

Browse files
Update report (#448)
* update report * update report * update report * update report * update report Co-authored-by: Jun Tian <[email protected]>
1 parent 9731436 commit e05ed4e

File tree

12 files changed

+395
-1
lines changed

12 files changed

+395
-1
lines changed

.cspell/cspell.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@
121121
"gzopen",
122122
"turbulences",
123123
"Decompressor",
124+
"MADDPG",
125+
"Guoyu",
126+
"Yang",
124127
"PLAS",
125128
"CRR",
126129
"reparamaterize",

docs/homepage/blog/index.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,6 @@
2828
- [Chapter10 Mountain Car.jl](/blog/notebooks_for_reinforcement_learning_an_introduction/Chapter10_Mountain_Car.jl)
2929
- [Chapter11 Counter Example.jl](/blog/notebooks_for_reinforcement_learning_an_introduction/Chapter11_Counter_Example.jl)
3030
- [Chapter12 Random Walk.jl](/blog/notebooks_for_reinforcement_learning_an_introduction/Chapter12_Random_Walk.jl)
31-
- [Chapter13 Short Corridor.jl](/blog/notebooks_for_reinforcement_learning_an_introduction/Chapter13_Short_Corridor.jl)
31+
- [Chapter13 Short Corridor.jl](/blog/notebooks_for_reinforcement_learning_an_introduction/Chapter13_Short_Corridor.jl)
32+
33+
- [Phase 1 Technical Report of Enriching Offline Reinforcement Learning Algorithms in ReinforcementLearning.jl](/blog/offline_reinforcement_learning_algorithm_phase1)
Loading
Loading
Loading
Loading
Loading

docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/index.md

Lines changed: 311 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
% Encoding: UTF-8
2+
3+
@InProceedings{DBLP:journals/corr/KingmaW13,
4+
author = {Diederik P. Kingma and Max Welling},
5+
booktitle = {International Conference on Learning Representations, {(ICLR)}},
6+
title = {Auto-Encoding Variational Bayes},
7+
year = {2014},
8+
}
9+
10+
@InProceedings{DBLP:conf/nips/KumarZTL20,
11+
author = {Aviral Kumar and Aurick Zhou and George Tucker and Sergey Levine},
12+
booktitle = {Advances in Neural Information Processing Systems, NeurIPS},
13+
title = {Conservative Q-Learning for Offline Reinforcement Learning},
14+
year = {2020},
15+
}
16+
17+
@InProceedings{DBLP:conf/nips/0001NZMSRSSGHF20,
18+
author = {Ziyu Wang and Alexander Novikov and Konrad Zolna and Josh Merel and Jost Tobias Springenberg and Scott E. Reed and Bobak Shahriari and Noah Y. Siegel and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and Nicolas Heess and Nando de Freitas},
19+
booktitle = {Advances in Neural Information Processing Systems, NeurIPS},
20+
title = {Critic Regularized Regression},
21+
year = {2020},
22+
}
23+
24+
@Article{DBLP:journals/corr/abs-2011-07213,
25+
author = {Wenxuan Zhou and Sujay Bajracharya and David Held},
26+
journal = {CoRR},
27+
title = {{PLAS:} Latent Action Space for Offline Reinforcement Learning},
28+
year = {2020},
29+
}
30+
31+
@Article{DBLP:journals/corr/abs-1812-05905,
32+
author = {Tuomas Haarnoja and Aurick Zhou and Kristian Hartikainen and George Tucker and Sehoon Ha and Jie Tan and Vikash Kumar and Henry Zhu and Abhishek Gupta and Pieter Abbeel and Sergey Levine},
33+
journal = {CoRR},
34+
title = {Soft Actor-Critic Algorithms and Applications},
35+
year = {2018},
36+
volume = {abs/1812.05905},
37+
archiveprefix = {arXiv},
38+
}
39+
40+
@InProceedings{DBLP:conf/icml/0001ZSSZSG21,
41+
author = {Yue Wu and Shuangfei Zhai and Nitish Srivastava and Joshua M. Susskind and Jian Zhang and Ruslan Salakhutdinov and Hanlin Goh},
42+
booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML}},
43+
title = {Uncertainty Weighted Actor-Critic for Offline Reinforcement Learning},
44+
year = {2021},
45+
pages = {11319--11328},
46+
publisher = {{PMLR}},
47+
series = {Proceedings of Machine Learning Research},
48+
volume = {139},
49+
}
50+
51+
@InProceedings{DBLP:conf/icml/KostrikovFTN21,
52+
author = {Ilya Kostrikov and Rob Fergus and Jonathan Tompson and Ofir Nachum},
53+
booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML}},
54+
title = {Offline Reinforcement Learning with Fisher Divergence Critic Regularization},
55+
year = {2021},
56+
editor = {Marina Meila and Tong Zhang},
57+
pages = {5774--5783},
58+
publisher = {{PMLR}},
59+
series = {Proceedings of Machine Learning Research},
60+
volume = {139},
61+
}
62+
63+
@InProceedings{DBLP:conf/nips/KumarFSTL19,
64+
author = {Aviral Kumar and Justin Fu and Matthew Soh and George Tucker and Sergey Levine},
65+
booktitle = {Advances in Neural Information Processing Systems, NeurIPS},
66+
title = {Stabilizing Off-Policy Q-Learning via Bootstrapping Error Reduction},
67+
year = {2019},
68+
pages = {11761--11771},
69+
}
70+
71+
@Article{DBLP:journals/corr/abs-1911-11361,
72+
author = {Yifan Wu and George Tucker and Ofir Nachum},
73+
journal = {CoRR},
74+
title = {Behavior Regularized Offline Reinforcement Learning},
75+
year = {2019},
76+
volume = {abs/1911.11361},
77+
archiveprefix = {arXiv},
78+
}

0 commit comments

Comments
 (0)