JuliaReinforcementLearning
diff --git a/‎.cspell/cspell.json
Lines changed: 3 additions & 0 deletions b/‎.cspell/cspell.json
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/homepage/blog/index.md
Lines changed: 3 additions & 1 deletion b/‎docs/homepage/blog/index.md
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/CRR.png
113 KB b/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/CRR.png
113 KB
diff --git a/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/JuliaRL_CRR_CartPole.png
14 KB b/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/JuliaRL_CRR_CartPole.png
14 KB
diff --git a/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/JuliaRL_OfflineDQN_CQL_CartPole.png
32.1 KB b/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/JuliaRL_OfflineDQN_CQL_CartPole.png
32.1 KB
diff --git a/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/JuliaRL_OfflineDQN_CartPole.png
31 KB b/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/JuliaRL_OfflineDQN_CartPole.png
31 KB
diff --git a/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/JuliaRL_PLAS_Pendulum.png
22.8 KB b/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/JuliaRL_PLAS_Pendulum.png
22.8 KB
diff --git a/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/JuliaRL_SAC_Pendulum.png
28.5 KB b/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/JuliaRL_SAC_Pendulum.png
28.5 KB
diff --git a/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/PLAS1.png
52.9 KB b/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/PLAS1.png
52.9 KB
diff --git a/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/PLAS2.png
209 KB b/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/PLAS2.png
209 KB
diff --git a/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/index.md
Lines changed: 311 additions & 0 deletions b/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/index.md
Lines changed: 311 additions & 0 deletions
diff --git a/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/offlineRL.bib
Lines changed: 78 additions & 0 deletions b/‎docs/homepage/blog/offline_reinforcement_learning_algorithm_phase1/offlineRL.bib
Lines changed: 78 additions & 0 deletions
@@ -121,6 +121,9 @@
         "gzopen",
         "turbulences",
         "Decompressor",
+        "MADDPG",
+        "Guoyu",
+        "Yang",
         "PLAS",
         "CRR",
         "reparamaterize",
 
@@ -28,4 +28,6 @@
   - [Chapter10 Mountain Car.jl](/blog/notebooks_for_reinforcement_learning_an_introduction/Chapter10_Mountain_Car.jl)
   - [Chapter11 Counter Example.jl](/blog/notebooks_for_reinforcement_learning_an_introduction/Chapter11_Counter_Example.jl)
   - [Chapter12 Random Walk.jl](/blog/notebooks_for_reinforcement_learning_an_introduction/Chapter12_Random_Walk.jl)
-  - [Chapter13 Short Corridor.jl](/blog/notebooks_for_reinforcement_learning_an_introduction/Chapter13_Short_Corridor.jl)
+  - [Chapter13 Short Corridor.jl](/blog/notebooks_for_reinforcement_learning_an_introduction/Chapter13_Short_Corridor.jl)
+
+- [Phase 1 Technical Report of Enriching Offline Reinforcement Learning Algorithms in ReinforcementLearning.jl](/blog/offline_reinforcement_learning_algorithm_phase1)
@@ -0,0 +1,78 @@
+% Encoding: UTF-8
+
+@InProceedings{DBLP:journals/corr/KingmaW13,
+  author    = {Diederik P. Kingma and Max Welling},
+  booktitle = {International Conference on Learning Representations, {(ICLR)}},
+  title     = {Auto-Encoding Variational Bayes},
+  year      = {2014},
+}
+
+@InProceedings{DBLP:conf/nips/KumarZTL20,
+  author    = {Aviral Kumar and Aurick Zhou and George Tucker and Sergey Levine},
+  booktitle = {Advances in Neural Information Processing Systems, NeurIPS},
+  title     = {Conservative Q-Learning for Offline Reinforcement Learning},
+  year      = {2020},
+}
+
+@InProceedings{DBLP:conf/nips/0001NZMSRSSGHF20,
+  author    = {Ziyu Wang and Alexander Novikov and Konrad Zolna and Josh Merel and Jost Tobias Springenberg and Scott E. Reed and Bobak Shahriari and Noah Y. Siegel and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and Nicolas Heess and Nando de Freitas},
+  booktitle = {Advances in Neural Information Processing Systems, NeurIPS},
+  title     = {Critic Regularized Regression},
+  year      = {2020},
+}
+
+@Article{DBLP:journals/corr/abs-2011-07213,
+  author  = {Wenxuan Zhou and Sujay Bajracharya and David Held},
+  journal = {CoRR},
+  title   = {{PLAS:} Latent Action Space for Offline Reinforcement Learning},
+  year    = {2020},
+}
+
+@Article{DBLP:journals/corr/abs-1812-05905,
+  author        = {Tuomas Haarnoja and Aurick Zhou and Kristian Hartikainen and George Tucker and Sehoon Ha and Jie Tan and Vikash Kumar and Henry Zhu and Abhishek Gupta and Pieter Abbeel and Sergey Levine},
+  journal       = {CoRR},
+  title         = {Soft Actor-Critic Algorithms and Applications},
+  year          = {2018},
+  volume        = {abs/1812.05905},
+  archiveprefix = {arXiv},
+}
+
+@InProceedings{DBLP:conf/icml/0001ZSSZSG21,
+  author    = {Yue Wu and Shuangfei Zhai and Nitish Srivastava and Joshua M. Susskind and Jian Zhang and Ruslan Salakhutdinov and Hanlin Goh},
+  booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML}},
+  title     = {Uncertainty Weighted Actor-Critic for Offline Reinforcement Learning},
+  year      = {2021},
+  pages     = {11319--11328},
+  publisher = {{PMLR}},
+  series    = {Proceedings of Machine Learning Research},
+  volume    = {139},
+}
+
+@InProceedings{DBLP:conf/icml/KostrikovFTN21,
+  author    = {Ilya Kostrikov and Rob Fergus and Jonathan Tompson and Ofir Nachum},
+  booktitle = {Proceedings of the 38th International Conference on Machine Learning, {ICML}},
+  title     = {Offline Reinforcement Learning with Fisher Divergence Critic Regularization},
+  year      = {2021},
+  editor    = {Marina Meila and Tong Zhang},
+  pages     = {5774--5783},
+  publisher = {{PMLR}},
+  series    = {Proceedings of Machine Learning Research},
+  volume    = {139},
+}
+
+@InProceedings{DBLP:conf/nips/KumarFSTL19,
+  author    = {Aviral Kumar and Justin Fu and Matthew Soh and George Tucker and Sergey Levine},
+  booktitle = {Advances in Neural Information Processing Systems, NeurIPS},
+  title     = {Stabilizing Off-Policy Q-Learning via Bootstrapping Error Reduction},
+  year      = {2019},
+  pages     = {11761--11771},
+}
+
+@Article{DBLP:journals/corr/abs-1911-11361,
+  author        = {Yifan Wu and George Tucker and Ofir Nachum},
+  journal       = {CoRR},
+  title         = {Behavior Regularized Offline Reinforcement Learning},
+  year          = {2019},
+  volume        = {abs/1911.11361},
+  archiveprefix = {arXiv},
+}