{"created":"2023-07-25T08:11:12.110144+00:00","id":11347,"links":{},"metadata":{"_buckets":{"deposit":"5939639a-b1ed-451b-b419-2aa9856cadc3"},"_deposit":{"created_by":18,"id":"11347","owners":[18],"pid":{"revision_id":0,"type":"depid","value":"11347"},"status":"published"},"_oai":{"id":"oai:ir.kagoshima-u.ac.jp:00011347","sets":["54:55"]},"author_link":["125737"],"item_5_date_6":{"attribute_name":"作成日","attribute_value_mlt":[{"subitem_date_issued_datetime":"2013-03-25","subitem_date_issued_type":"Issued"}]},"item_5_date_granted_54":{"attribute_name":"学位授与年月日 ","attribute_value_mlt":[{"subitem_dategranted":"2013-03-25"}]},"item_5_degree_grantor_53":{"attribute_name":"学位授与機関","attribute_value_mlt":[{"subitem_degreegrantor":[{"subitem_degreegrantor_language":"ja","subitem_degreegrantor_name":"鹿児島大学"}],"subitem_degreegrantor_identifier":[{"subitem_degreegrantor_identifier_name":"17701","subitem_degreegrantor_identifier_scheme":"kakenhi"}]}]},"item_5_degree_name_42":{"attribute_name":"学位名","attribute_value_mlt":[{"subitem_degreename":"博士(工学)","subitem_degreename_language":"ja"},{"subitem_degreename":"Doctor of Philosophy in Engineering","subitem_degreename_language":"en"}]},"item_5_description_17":{"attribute_name":"ファイル(説明)","attribute_value_mlt":[{"subitem_description":"学位論文の要旨, 学位論文本文","subitem_description_language":"ja","subitem_description_type":"Other"}]},"item_5_description_4":{"attribute_name":"要約(Abstract)","attribute_value_mlt":[{"subitem_description":"理工学研究科博士論文(工学) ; 学位取得日: 平成25年3月25日","subitem_description_language":"ja","subitem_description_type":"Other"},{"subitem_description":"\"There are several kinds of learning methods however most of the research tell us that reinforcement learning (RL) [1] is the most suitable method in machine learning that deals with the decision to take an action using an agent at discrete time steps, and it is expected that would be useful anywhere in the future. There are several ways to implement the learning process but Q-learning algorithm due to Watkins [2] is a policy for estimating the optimal state-action value (Q-value), and it is one of the most fundamental methods in RL. Q-learning can apply in many practical applications but it works only state and action are both discrete. It is difficult to treat in continuous state space because of the Curse of dimensionality problem.\nThis dissertation proposes VQE (Voronoi Q-value Element) to be able to apply the Q-learning in continuous state space and to solve the Curse of dimensionality problem by partitioning the state space. As a method of space division, we apply the Voronoi diagram which is a general space division. Nevertheless, Voronoi diagram has a lot of flexibility thus a method of position determination of VQEs becomes a problem. Therefore, we present the addition method of VQEs to decide the position and LBG algorithm is used for adaptive state transition vector grouping. In addition, we propose the integration method of VQEs to reduce the number of states and memory usage and Delaunay tessellation technique is used to find the adjacent VQEs. These proposed methods also aim to show the improvement of a learning efficiency.\nIn order to examine the efficiency of our proposed methods, we constructed the continuous states and discrete actions experimental model. The experiments are carried out compared with lattice of a previous work. The results indicate that the proposed methods are greatly improved than the previous method.\"","subitem_description_language":"en","subitem_description_type":"Other"}]},"item_5_dissertation_number_55":{"attribute_name":"学位授与番号","attribute_value_mlt":[{"subitem_dissertationnumber":"甲理工研第378号"}]},"item_5_publisher_23":{"attribute_name":"公開者・出版者","attribute_value_mlt":[{"subitem_publisher":"鹿児島大学","subitem_publisher_language":"ja"},{"subitem_publisher":"Kagoshima University","subitem_publisher_language":"en"}]},"item_5_subject_15":{"attribute_name":"NDC","attribute_value_mlt":[{"subitem_subject":"007","subitem_subject_scheme":"NDC"}]},"item_5_text_52":{"attribute_name":"学位番号","attribute_value_mlt":[{"subitem_text_value":"理工研第378号"}]},"item_5_version_type_14":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_access_right":{"attribute_name":"アクセス権","attribute_value_mlt":[{"subitem_access_right":"open access","subitem_access_right_uri":"http://purl.org/coar/access_right/c_abf2"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Kathy Thi Aung","creatorNameLang":"en"},{"creatorName":"ケティ ティ オウン","creatorNameLang":"ja"}],"nameIdentifiers":[{"nameIdentifier":"125737","nameIdentifierScheme":"WEKO"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2016-10-31"}],"displaytype":"detail","filename":"rikouken378.pdf","filesize":[{"value":"806.9 kB"}],"format":"application/pdf","mimetype":"application/pdf","url":{"label":"rikouken378.pdf","objectType":"abstract","url":"https://ir.kagoshima-u.ac.jp/record/11347/files/rikouken378.pdf"},"version_id":"b4de89e5-3bca-4166-afcf-1671ae39a3db"},{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2016-10-31"}],"displaytype":"detail","filename":"diss_KATHY-THI-AUNG_201303.pdf","filesize":[{"value":"2.7 MB"}],"format":"application/pdf","mimetype":"application/pdf","url":{"label":"diss_KATHY-THI-AUNG_201303.pdf","objectType":"fulltext","url":"https://ir.kagoshima-u.ac.jp/record/11347/files/diss_KATHY-THI-AUNG_201303.pdf"},"version_id":"1a248dec-aadf-4399-afe8-9acdbf935a6e"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"doctoral thesis","resourceuri":"http://purl.org/coar/resource_type/c_db06"}]},"item_title":"Study on reinforcement learning using Voronoi diagram in continuous state space","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Study on reinforcement learning using Voronoi diagram in continuous state space","subitem_title_language":"en"},{"subitem_title":"連続的な状態空間のボロノイ分割を用いた強化学習に関する研究","subitem_title_language":"ja"}]},"item_type_id":"5","owner":"18","path":["55"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2015-02-18"},"publish_date":"2015-02-18","publish_status":"0","recid":"11347","relation_version_is_last":true,"title":["Study on reinforcement learning using Voronoi diagram in continuous state space"],"weko_creator_id":"18","weko_shared_id":-1},"updated":"2024-06-12T06:58:48.552103+00:00"}