VOTING POWER100.00%
DOWNVOTE POWER100.00%
RESOURCE CREDITS100.00%
REPUTATION PROGRESS0.00%
Net Worth
0.037USD
STEEM
0.000STEEM
SBD
0.000SBD
Effective Power
5.007SP
├── Own SP
0.631SP
└── Incoming DelegationsDeleg
+4.376SP
Detailed Balance
| STEEM | ||
| balance | 0.000STEEM | STEEM |
| market_balance | 0.000STEEM | STEEM |
| savings_balance | 0.000STEEM | STEEM |
| reward_steem_balance | 0.000STEEM | STEEM |
| STEEM POWER | ||
| Own SP | 0.631SP | SP |
| Delegated Out | 0.000SP | SP |
| Delegation In | 4.376SP | SP |
| Effective Power | 5.007SP | SP |
| Reward SP (pending) | 0.000SP | SP |
| SBD | ||
| sbd_balance | 0.000SBD | SBD |
| sbd_conversions | 0.000SBD | SBD |
| sbd_market_balance | 0.000SBD | SBD |
| savings_sbd_balance | 0.000SBD | SBD |
| reward_sbd_balance | 0.000SBD | SBD |
{
"balance": "0.000 STEEM",
"savings_balance": "0.000 STEEM",
"reward_steem_balance": "0.000 STEEM",
"vesting_shares": "1026.318903 VESTS",
"delegated_vesting_shares": "0.000000 VESTS",
"received_vesting_shares": "7117.340903 VESTS",
"sbd_balance": "0.000 SBD",
"savings_sbd_balance": "0.000 SBD",
"reward_sbd_balance": "0.000 SBD",
"conversions": []
}Account Info
| name | fesan81 |
| id | 466352 |
| rank | 1,358,003 |
| reputation | 130516081 |
| created | 2017-11-28T09:43:48 |
| recovery_account | steem |
| proxy | None |
| post_count | 3 |
| comment_count | 0 |
| lifetime_vote_count | 0 |
| witnesses_voted_for | 0 |
| last_post | 2017-11-28T10:29:03 |
| last_root_post | 2017-11-28T10:29:03 |
| last_vote_time | 2017-11-28T10:29:03 |
| proxied_vsf_votes | 0, 0, 0, 0 |
| can_vote | 1 |
| voting_power | 0 |
| delayed_votes | 0 |
| balance | 0.000 STEEM |
| savings_balance | 0.000 STEEM |
| sbd_balance | 0.000 SBD |
| savings_sbd_balance | 0.000 SBD |
| vesting_shares | 1026.318903 VESTS |
| delegated_vesting_shares | 0.000000 VESTS |
| received_vesting_shares | 7117.340903 VESTS |
| reward_vesting_balance | 0.000000 VESTS |
| vesting_balance | 0.000 STEEM |
| vesting_withdraw_rate | 0.000000 VESTS |
| next_vesting_withdrawal | 1969-12-31T23:59:59 |
| withdrawn | 0 |
| to_withdraw | 0 |
| withdraw_routes | 0 |
| savings_withdraw_requests | 0 |
| last_account_recovery | 1970-01-01T00:00:00 |
| reset_account | null |
| last_owner_update | 1970-01-01T00:00:00 |
| last_account_update | 1970-01-01T00:00:00 |
| mined | No |
| sbd_seconds | 0 |
| sbd_last_interest_payment | 1970-01-01T00:00:00 |
| savings_sbd_last_interest_payment | 1970-01-01T00:00:00 |
{
"id": 466352,
"name": "fesan81",
"owner": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",
1
]
]
},
"active": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",
1
]
]
},
"posting": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",
1
]
]
},
"memo_key": "STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk",
"json_metadata": "",
"posting_json_metadata": "",
"proxy": "",
"last_owner_update": "1970-01-01T00:00:00",
"last_account_update": "1970-01-01T00:00:00",
"created": "2017-11-28T09:43:48",
"mined": false,
"recovery_account": "steem",
"last_account_recovery": "1970-01-01T00:00:00",
"reset_account": "null",
"comment_count": 0,
"lifetime_vote_count": 0,
"post_count": 3,
"can_vote": true,
"voting_manabar": {
"current_mana": "8143659806",
"last_update_time": 1779063441
},
"downvote_manabar": {
"current_mana": 2035914951,
"last_update_time": 1779063441
},
"voting_power": 0,
"balance": "0.000 STEEM",
"savings_balance": "0.000 STEEM",
"sbd_balance": "0.000 SBD",
"sbd_seconds": "0",
"sbd_seconds_last_update": "1970-01-01T00:00:00",
"sbd_last_interest_payment": "1970-01-01T00:00:00",
"savings_sbd_balance": "0.000 SBD",
"savings_sbd_seconds": "0",
"savings_sbd_seconds_last_update": "1970-01-01T00:00:00",
"savings_sbd_last_interest_payment": "1970-01-01T00:00:00",
"savings_withdraw_requests": 0,
"reward_sbd_balance": "0.000 SBD",
"reward_steem_balance": "0.000 STEEM",
"reward_vesting_balance": "0.000000 VESTS",
"reward_vesting_steem": "0.000 STEEM",
"vesting_shares": "1026.318903 VESTS",
"delegated_vesting_shares": "0.000000 VESTS",
"received_vesting_shares": "7117.340903 VESTS",
"vesting_withdraw_rate": "0.000000 VESTS",
"next_vesting_withdrawal": "1969-12-31T23:59:59",
"withdrawn": 0,
"to_withdraw": 0,
"withdraw_routes": 0,
"curation_rewards": 0,
"posting_rewards": 0,
"proxied_vsf_votes": [
0,
0,
0,
0
],
"witnesses_voted_for": 0,
"last_post": "2017-11-28T10:29:03",
"last_root_post": "2017-11-28T10:29:03",
"last_vote_time": "2017-11-28T10:29:03",
"post_bandwidth": 0,
"pending_claimed_accounts": 0,
"vesting_balance": "0.000 STEEM",
"reputation": 130516081,
"transfer_history": [],
"market_history": [],
"post_history": [],
"vote_history": [],
"other_history": [],
"witness_votes": [],
"tags_usage": [],
"guest_bloggers": [],
"rank": 1358003
}Withdraw Routes
| Incoming | Outgoing |
|---|---|
Empty | Empty |
{
"incoming": [],
"outgoing": []
}From Date
To Date
2026/05/18 00:17:21
2026/05/18 00:17:21
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 7117.340903 VESTS |
| Transaction Info | Block #106143492/Trx 4aea0d1b751e3fd7542ddbbdf903b4cbe1a8d516 |
View Raw JSON Data
{
"trx_id": "4aea0d1b751e3fd7542ddbbdf903b4cbe1a8d516",
"block": 106143492,
"trx_in_block": 3,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2026-05-18T00:17:21",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "7117.340903 VESTS"
}
]
}2026/05/12 03:59:51
2026/05/12 03:59:51
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 4405.130498 VESTS |
| Transaction Info | Block #105975899/Trx 2b1e16f8356838e4d548082239de9faa053b2dc7 |
View Raw JSON Data
{
"trx_id": "2b1e16f8356838e4d548082239de9faa053b2dc7",
"block": 105975899,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2026-05-12T03:59:51",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "4405.130498 VESTS"
}
]
}2026/04/25 23:38:12
2026/04/25 23:38:12
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 7129.856659 VESTS |
| Transaction Info | Block #105511142/Trx 478228483a50204438f57ee36636053543c29abb |
View Raw JSON Data
{
"trx_id": "478228483a50204438f57ee36636053543c29abb",
"block": 105511142,
"trx_in_block": 0,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2026-04-25T23:38:12",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "7129.856659 VESTS"
}
]
}2026/01/23 07:54:00
2026/01/23 07:54:00
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 4446.677317 VESTS |
| Transaction Info | Block #102851927/Trx 881c756278aea13ff5bf4877e6a4b75070d6e430 |
View Raw JSON Data
{
"trx_id": "881c756278aea13ff5bf4877e6a4b75070d6e430",
"block": 102851927,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2026-01-23T07:54:00",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "4446.677317 VESTS"
}
]
}curtisjohnsreplied to @fesan81 / srxfha2025/02/19 10:51:12
curtisjohnsreplied to @fesan81 / srxfha
2025/02/19 10:51:12
| parent author | fesan81 |
| parent permlink | from-classic-ai-techniques-to-deep-reinforcement-learning |
| author | curtisjohns |
| permlink | srxfha |
| title | |
| body | The evolution of AI, from classic techniques to the advanced realm of Deep Reinforcement Learning, has been nothing short of revolutionary. These advancements are reshaping industries and creating new opportunities for innovation. For professionals looking to stay ahead, [Artificial Intelligence (AI) Training Programs in USA: Short Courses in AI for Managers in Las Vegas, Nevada, USA](https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/) offer a perfect blend of foundational knowledge and cutting-edge insights. Such programs are essential for managers aiming to harness AI's potential and lead their teams into the future. |
| json metadata | {"links":["https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/"],"app":"steemit/0.2"} |
| Transaction Info | Block #93145911/Trx cbaf9f49b35fdf6f1bf1f9ada50bfbb92584c1e2 |
View Raw JSON Data
{
"trx_id": "cbaf9f49b35fdf6f1bf1f9ada50bfbb92584c1e2",
"block": 93145911,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2025-02-19T10:51:12",
"op": [
"comment",
{
"parent_author": "fesan81",
"parent_permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
"author": "curtisjohns",
"permlink": "srxfha",
"title": "",
"body": "The evolution of AI, from classic techniques to the advanced realm of Deep Reinforcement Learning, has been nothing short of revolutionary. These advancements are reshaping industries and creating new opportunities for innovation. For professionals looking to stay ahead, [Artificial Intelligence (AI) Training Programs in USA: Short Courses in AI for Managers in Las Vegas, Nevada, USA](https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/) offer a perfect blend of foundational knowledge and cutting-edge insights. Such programs are essential for managers aiming to harness AI's potential and lead their teams into the future.",
"json_metadata": "{\"links\":[\"https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/\"],\"app\":\"steemit/0.2\"}"
}
]
}2024/12/17 03:13:03
2024/12/17 03:13:03
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 4610.896514 VESTS |
| Transaction Info | Block #91298335/Trx 35c8ead72b0fdd36795fbf9255ead640917b5a40 |
View Raw JSON Data
{
"trx_id": "35c8ead72b0fdd36795fbf9255ead640917b5a40",
"block": 91298335,
"trx_in_block": 3,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2024-12-17T03:13:03",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "4610.896514 VESTS"
}
]
}2023/11/13 18:55:48
2023/11/13 18:55:48
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 4780.030046 VESTS |
| Transaction Info | Block #79852533/Trx 037986a84a0673bcab0a9f219e2d6f62fb34bd0d |
View Raw JSON Data
{
"trx_id": "037986a84a0673bcab0a9f219e2d6f62fb34bd0d",
"block": 79852533,
"trx_in_block": 0,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2023-11-13T18:55:48",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "4780.030046 VESTS"
}
]
}2023/09/21 21:52:15
2023/09/21 21:52:15
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 7717.308832 VESTS |
| Transaction Info | Block #78347876/Trx adae44dc781faef55a299f48f95fe0022f1d24f6 |
View Raw JSON Data
{
"trx_id": "adae44dc781faef55a299f48f95fe0022f1d24f6",
"block": 78347876,
"trx_in_block": 2,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2023-09-21T21:52:15",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "7717.308832 VESTS"
}
]
}2022/11/03 11:39:42
2022/11/03 11:39:42
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 7938.990270 VESTS |
| Transaction Info | Block #69113210/Trx 0ff9088204ae0c3dbb8056bc3de7d92631fccd08 |
View Raw JSON Data
{
"trx_id": "0ff9088204ae0c3dbb8056bc3de7d92631fccd08",
"block": 69113210,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2022-11-03T11:39:42",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "7938.990270 VESTS"
}
]
}2022/01/17 10:55:51
2022/01/17 10:55:51
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 8159.523501 VESTS |
| Transaction Info | Block #60809381/Trx c9a210a64e4a04873b39b8fb965864b18451001a |
View Raw JSON Data
{
"trx_id": "c9a210a64e4a04873b39b8fb965864b18451001a",
"block": 60809381,
"trx_in_block": 18,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2022-01-17T10:55:51",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "8159.523501 VESTS"
}
]
}2021/06/14 00:51:06
2021/06/14 00:51:06
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 8343.292159 VESTS |
| Transaction Info | Block #54607769/Trx a8f4abf39b60cb8cf9cf21109e1b6da0cf76291c |
View Raw JSON Data
{
"trx_id": "a8f4abf39b60cb8cf9cf21109e1b6da0cf76291c",
"block": 54607769,
"trx_in_block": 1,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2021-06-14T00:51:06",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "8343.292159 VESTS"
}
]
}2020/12/11 11:09:39
2020/12/11 11:09:39
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 8530.714133 VESTS |
| Transaction Info | Block #49355222/Trx 940df32535961a20ee428af7e48845661f172607 |
View Raw JSON Data
{
"trx_id": "940df32535961a20ee428af7e48845661f172607",
"block": 49355222,
"trx_in_block": 7,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-12-11T11:09:39",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "8530.714133 VESTS"
}
]
}2020/12/06 04:46:57
2020/12/06 04:46:57
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 1912.543513 VESTS |
| Transaction Info | Block #49206788/Trx 12304560b06a1623a9baf9affde5198c7027c8e0 |
View Raw JSON Data
{
"trx_id": "12304560b06a1623a9baf9affde5198c7027c8e0",
"block": 49206788,
"trx_in_block": 2,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-12-06T04:46:57",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "1912.543513 VESTS"
}
]
}2020/12/05 14:47:51
2020/12/05 14:47:51
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 8536.921987 VESTS |
| Transaction Info | Block #49190319/Trx a15087d90c575f6bf0e74ea7e896319c3fbf4277 |
View Raw JSON Data
{
"trx_id": "a15087d90c575f6bf0e74ea7e896319c3fbf4277",
"block": 49190319,
"trx_in_block": 6,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-12-05T14:47:51",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "8536.921987 VESTS"
}
]
}2020/11/02 15:43:21
2020/11/02 15:43:21
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 1920.017158 VESTS |
| Transaction Info | Block #48257899/Trx 5ea6e20a2a7debb55b3f227c1b8330d589a02b37 |
View Raw JSON Data
{
"trx_id": "5ea6e20a2a7debb55b3f227c1b8330d589a02b37",
"block": 48257899,
"trx_in_block": 0,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-11-02T15:43:21",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "1920.017158 VESTS"
}
]
}2020/05/09 05:44:15
2020/05/09 05:44:15
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 8739.727346 VESTS |
| Transaction Info | Block #43217034/Trx aa9fa936b5b0aea4ea795102e47d710e94a01695 |
View Raw JSON Data
{
"trx_id": "aa9fa936b5b0aea4ea795102e47d710e94a01695",
"block": 43217034,
"trx_in_block": 14,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-05-09T05:44:15",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "8739.727346 VESTS"
}
]
}2020/05/08 09:20:45
2020/05/08 09:20:45
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 1953.311140 VESTS |
| Transaction Info | Block #43193138/Trx 0154e0a16af12532e1cb31212c847e614a6e66e1 |
View Raw JSON Data
{
"trx_id": "0154e0a16af12532e1cb31212c847e614a6e66e1",
"block": 43193138,
"trx_in_block": 16,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-05-08T09:20:45",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "1953.311140 VESTS"
}
]
}2020/04/15 21:37:12
2020/04/15 21:37:12
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 8752.704765 VESTS |
| Transaction Info | Block #42562566/Trx acc89f09e3fb23df53a682f6f9989b3982fd3d31 |
View Raw JSON Data
{
"trx_id": "acc89f09e3fb23df53a682f6f9989b3982fd3d31",
"block": 42562566,
"trx_in_block": 15,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2020-04-15T21:37:12",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "8752.704765 VESTS"
}
]
}2019/11/28 10:15:45
2019/11/28 10:15:45
| parent author | fesan81 |
| parent permlink | 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning |
| author | steemitboard |
| permlink | steemitboard-notify-fesan81-20191128t101545000z |
| title | |
| body | Congratulations @fesan81! You received a personal award! <table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday2.png</td><td>Happy Birthday! - You are on the Steem blockchain for 2 years!</td></tr></table> <sub>_You can view [your badges on your Steem Board](https://steemitboard.com/@fesan81) and compare to others on the [Steem Ranking](https://steemitboard.com/ranking/index.php?name=fesan81)_</sub> ###### [Vote for @Steemitboard as a witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) to get one more award and increased upvotes! |
| json metadata | {"image":["https://steemitboard.com/img/notify.png"]} |
| Transaction Info | Block #38567210/Trx e1b12920842a4da876ca2e70302909b9e3417dc4 |
View Raw JSON Data
{
"trx_id": "e1b12920842a4da876ca2e70302909b9e3417dc4",
"block": 38567210,
"trx_in_block": 14,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2019-11-28T10:15:45",
"op": [
"comment",
{
"parent_author": "fesan81",
"parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
"author": "steemitboard",
"permlink": "steemitboard-notify-fesan81-20191128t101545000z",
"title": "",
"body": "Congratulations @fesan81! You received a personal award!\n\n<table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday2.png</td><td>Happy Birthday! - You are on the Steem blockchain for 2 years!</td></tr></table>\n\n<sub>_You can view [your badges on your Steem Board](https://steemitboard.com/@fesan81) and compare to others on the [Steem Ranking](https://steemitboard.com/ranking/index.php?name=fesan81)_</sub>\n\n\n###### [Vote for @Steemitboard as a witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) to get one more award and increased upvotes!",
"json_metadata": "{\"image\":[\"https://steemitboard.com/img/notify.png\"]}"
}
]
}2019/05/12 14:52:06
2019/05/12 14:52:06
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 8948.327570 VESTS |
| Transaction Info | Block #32845445/Trx fc02bcb8c077d51409bfdf1481c27f81d4d51d84 |
View Raw JSON Data
{
"trx_id": "fc02bcb8c077d51409bfdf1481c27f81d4d51d84",
"block": 32845445,
"trx_in_block": 10,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2019-05-12T14:52:06",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "8948.327570 VESTS"
}
]
}2018/11/28 18:11:57
2018/11/28 18:11:57
| parent author | fesan81 |
| parent permlink | 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning |
| author | steemitboard |
| permlink | steemitboard-notify-fesan81-20181128t181157000z |
| title | |
| body | Congratulations @fesan81! You received a personal award! <table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday1.png</td><td>1 Year on Steemit</td></tr></table> <sub>_[Click here to view your Board of Honor](https://steemitboard.com/@fesan81)_</sub> > Support [SteemitBoard's project](https://steemit.com/@steemitboard)! **[Vote for its witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1)** and **get one more award**! |
| json metadata | {"image":["https://steemitboard.com/img/notify.png"]} |
| Transaction Info | Block #28102358/Trx e25fd613b1c01eb8c06ed8f6ae6ca04eba85675b |
View Raw JSON Data
{
"trx_id": "e25fd613b1c01eb8c06ed8f6ae6ca04eba85675b",
"block": 28102358,
"trx_in_block": 9,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-11-28T18:11:57",
"op": [
"comment",
{
"parent_author": "fesan81",
"parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
"author": "steemitboard",
"permlink": "steemitboard-notify-fesan81-20181128t181157000z",
"title": "",
"body": "Congratulations @fesan81! You received a personal award!\n\n<table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday1.png</td><td>1 Year on Steemit</td></tr></table>\n\n<sub>_[Click here to view your Board of Honor](https://steemitboard.com/@fesan81)_</sub>\n\n\n> Support [SteemitBoard's project](https://steemit.com/@steemitboard)! **[Vote for its witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1)** and **get one more award**!",
"json_metadata": "{\"image\":[\"https://steemitboard.com/img/notify.png\"]}"
}
]
}2018/05/16 20:17:12
2018/05/16 20:17:12
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 9147.880005 VESTS |
| Transaction Info | Block #22489845/Trx 9c624768b6f900c7305367d53c22f378917890e7 |
View Raw JSON Data
{
"trx_id": "9c624768b6f900c7305367d53c22f378917890e7",
"block": 22489845,
"trx_in_block": 27,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-05-16T20:17:12",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "9147.880005 VESTS"
}
]
}2018/04/21 20:42:54
2018/04/21 20:42:54
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 29535.849045 VESTS |
| Transaction Info | Block #21771146/Trx 3f25b7c23f8d3b576ee414d1b3115663303993af |
View Raw JSON Data
{
"trx_id": "3f25b7c23f8d3b576ee414d1b3115663303993af",
"block": 21771146,
"trx_in_block": 13,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2018-04-21T20:42:54",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "29535.849045 VESTS"
}
]
}2017/12/12 22:19:57
2017/12/12 22:19:57
| delegator | steem |
| delegatee | fesan81 |
| vesting shares | 29739.681097 VESTS |
| Transaction Info | Block #18032582/Trx 871880414549d43d34f7037684c3102ab8d07915 |
View Raw JSON Data
{
"trx_id": "871880414549d43d34f7037684c3102ab8d07915",
"block": 18032582,
"trx_in_block": 16,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-12-12T22:19:57",
"op": [
"delegate_vesting_shares",
{
"delegator": "steem",
"delegatee": "fesan81",
"vesting_shares": "29739.681097 VESTS"
}
]
}2017/11/30 15:06:36
2017/11/30 15:06:36
| parent author | fesan81 |
| parent permlink | 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning |
| author | neurallearner |
| permlink | re-fesan81-2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning-20171130t150626925z |
| title | |
| body | Hey come and give a shot and see how much you know about deep-learning by explaining the [joke](https://steemit.com/ai/@neurallearner/ai-joke-activities) and at the same time promote learning AI. |
| json metadata | {"tags":["artifitialintelligence"],"links":["https://steemit.com/ai/@neurallearner/ai-joke-activities"],"app":"steemit/0.1"} |
| Transaction Info | Block #17678489/Trx 280064fbc1e023a86819c80863d819b66e84325f |
View Raw JSON Data
{
"trx_id": "280064fbc1e023a86819c80863d819b66e84325f",
"block": 17678489,
"trx_in_block": 15,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-30T15:06:36",
"op": [
"comment",
{
"parent_author": "fesan81",
"parent_permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
"author": "neurallearner",
"permlink": "re-fesan81-2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning-20171130t150626925z",
"title": "",
"body": "Hey come and give a shot and see how much you know about deep-learning by explaining the [joke](https://steemit.com/ai/@neurallearner/ai-joke-activities) and at the same time promote learning AI.",
"json_metadata": "{\"tags\":[\"artifitialintelligence\"],\"links\":[\"https://steemit.com/ai/@neurallearner/ai-joke-activities\"],\"app\":\"steemit/0.1\"}"
}
]
}2017/11/28 12:15:54
2017/11/28 12:15:54
| parent author | fesan81 |
| parent permlink | 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning |
| author | steemitboard |
| permlink | steemitboard-notify-fesan81-20171128t121556000z |
| title | |
| body | Congratulations @fesan81! You have completed some achievement on Steemit and have been rewarded with new badge(s) : [](http://steemitboard.com/@fesan81) You published your First Post [](http://steemitboard.com/@fesan81) You made your First Vote [](http://steemitboard.com/@fesan81) You got a First Vote Click on any badge to view your own Board of Honor on SteemitBoard. For more information about SteemitBoard, click [here](https://steemit.com/@steemitboard) If you no longer want to receive notifications, reply to this comment with the word `STOP` > By upvoting this notification, you can help all Steemit users. Learn how [here](https://steemit.com/steemitboard/@steemitboard/http-i-cubeupload-com-7ciqeo-png)! |
| json metadata | {"image":["https://steemitboard.com/img/notifications.png"]} |
| Transaction Info | Block #17617488/Trx d80d9da4e0af2ee834592b3838f3be253d89df95 |
View Raw JSON Data
{
"trx_id": "d80d9da4e0af2ee834592b3838f3be253d89df95",
"block": 17617488,
"trx_in_block": 20,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T12:15:54",
"op": [
"comment",
{
"parent_author": "fesan81",
"parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
"author": "steemitboard",
"permlink": "steemitboard-notify-fesan81-20171128t121556000z",
"title": "",
"body": "Congratulations @fesan81! You have completed some achievement on Steemit and have been rewarded with new badge(s) :\n\n[](http://steemitboard.com/@fesan81) You published your First Post\n[](http://steemitboard.com/@fesan81) You made your First Vote\n[](http://steemitboard.com/@fesan81) You got a First Vote\n\nClick on any badge to view your own Board of Honor on SteemitBoard.\nFor more information about SteemitBoard, click [here](https://steemit.com/@steemitboard)\n\nIf you no longer want to receive notifications, reply to this comment with the word `STOP`\n\n> By upvoting this notification, you can help all Steemit users. Learn how [here](https://steemit.com/steemitboard/@steemitboard/http-i-cubeupload-com-7ciqeo-png)!",
"json_metadata": "{\"image\":[\"https://steemitboard.com/img/notifications.png\"]}"
}
]
}2017/11/28 10:29:24
2017/11/28 10:29:24
| parent author | fesan81 |
| parent permlink | 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning |
| author | cheetah |
| permlink | cheetah-re-fesan812jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning |
| title | |
| body | Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in: https://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578 |
| json metadata | |
| Transaction Info | Block #17615361/Trx f8ad938a0fe5df8e083e06de25a60f0c70b3d833 |
View Raw JSON Data
{
"trx_id": "f8ad938a0fe5df8e083e06de25a60f0c70b3d833",
"block": 17615361,
"trx_in_block": 9,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:29:24",
"op": [
"comment",
{
"parent_author": "fesan81",
"parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
"author": "cheetah",
"permlink": "cheetah-re-fesan812jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
"title": "",
"body": "Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in:\nhttps://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578",
"json_metadata": ""
}
]
}2017/11/28 10:29:21
2017/11/28 10:29:21
| voter | cheetah |
| author | fesan81 |
| permlink | 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning |
| weight | 8 (0.08%) |
| Transaction Info | Block #17615360/Trx bc2eaef723a0100a5b96c3fcec6ab7fcff1bb367 |
View Raw JSON Data
{
"trx_id": "bc2eaef723a0100a5b96c3fcec6ab7fcff1bb367",
"block": 17615360,
"trx_in_block": 2,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:29:21",
"op": [
"vote",
{
"voter": "cheetah",
"author": "fesan81",
"permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
"weight": 8
}
]
}fesan81upvoted (100.00%) @fesan81 / 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning2017/11/28 10:29:03
fesan81upvoted (100.00%) @fesan81 / 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
2017/11/28 10:29:03
| voter | fesan81 |
| author | fesan81 |
| permlink | 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning |
| weight | 10000 (100.00%) |
| Transaction Info | Block #17615354/Trx 17ef38ed1d9794d07176db8f7eaee5c72e655452 |
View Raw JSON Data
{
"trx_id": "17ef38ed1d9794d07176db8f7eaee5c72e655452",
"block": 17615354,
"trx_in_block": 14,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:29:03",
"op": [
"vote",
{
"voter": "fesan81",
"author": "fesan81",
"permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
"weight": 10000
}
]
}fesan81updated options for 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning2017/11/28 10:29:03
fesan81updated options for 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
2017/11/28 10:29:03
| author | fesan81 |
| permlink | 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning |
| max accepted payout | 1000000.000 SBD |
| percent steem dollars | 0 |
| allow votes | true |
| allow curation rewards | true |
| extensions | [] |
| Transaction Info | Block #17615354/Trx 17ef38ed1d9794d07176db8f7eaee5c72e655452 |
View Raw JSON Data
{
"trx_id": "17ef38ed1d9794d07176db8f7eaee5c72e655452",
"block": 17615354,
"trx_in_block": 14,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:29:03",
"op": [
"comment_options",
{
"author": "fesan81",
"permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
"max_accepted_payout": "1000000.000 SBD",
"percent_steem_dollars": 0,
"allow_votes": true,
"allow_curation_rewards": true,
"extensions": []
}
]
}fesan81published a new post: 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning2017/11/28 10:29:03
fesan81published a new post: 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
2017/11/28 10:29:03
| parent author | |
| parent permlink | artifitialintelligence |
| author | fesan81 |
| permlink | 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning |
| title | From classic AI techniques to Deep Reinforcement Learning |
| body |  Building machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950]. The fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data. Deep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning: Machine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data.  Moreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998]. Knowledge discovery with efficient algorithms for unsupervised or supervised feature learning Deep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning. One of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943] “Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002] Also inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited. In another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP) Bellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA) Finally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.* Integration of reasoning techniques with deep learning Deep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning. Initially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) . Perceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link. Since MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before. In Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning. Scientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision. Systems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results. Later AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016] Bibliography 1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA. 2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press. 3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149. 4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649). 5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999 6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009 7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649). 8. Hebb, D. O. (1949). The organization of behavior. Wiley. 9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61. 10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272. 11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105). 12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444. 13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551. 14. LeCun, Y., Bottou, L., Bengio, Y., & H ner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323 15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163). 16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002) 19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602. 20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408. 21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536. 22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997. 23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489. 24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39. 25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press. 26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University. |
| json metadata | {"tags":["artifitialintelligence","datascience","computerscience","deeplearning","reinforcementlearning"],"image":["https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg","https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg"],"app":"steemit/0.1","format":"markdown"} |
| Transaction Info | Block #17615354/Trx 17ef38ed1d9794d07176db8f7eaee5c72e655452 |
View Raw JSON Data
{
"trx_id": "17ef38ed1d9794d07176db8f7eaee5c72e655452",
"block": 17615354,
"trx_in_block": 14,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:29:03",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "artifitialintelligence",
"author": "fesan81",
"permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
"title": "From classic AI techniques to Deep Reinforcement Learning",
"body": "\nBuilding machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950].\n\nThe fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data.\n\nDeep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning:\n\nMachine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. \n\n\nMoreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998].\n\nKnowledge discovery with efficient algorithms for unsupervised or supervised feature learning\n\nDeep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning.\n\nOne of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943]\n\n“Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002]\n\nAlso inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited.\n\nIn another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP)\n\nBellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA)\n\nFinally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.*\n\nIntegration of reasoning techniques with deep learning\n\nDeep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning.\nInitially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) .\n\nPerceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link.\n\nSince MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before.\n\nIn Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning.\n\nScientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision.\n\nSystems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results.\n\nLater AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016]\n\nBibliography\n\n1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA.\n2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press.\n3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149.\n4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649).\n5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999\n6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009\n7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649).\n8. Hebb, D. O. (1949). The organization of behavior. Wiley.\n9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \\wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61.\n10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272.\n11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105).\n12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444.\n13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551.\n14. LeCun, Y., Bottou, L., Bengio, Y., & H\nner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323\n15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163).\n16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002)\n19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602.\n20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408.\n21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536.\n22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997.\n23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489.\n24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39.\n25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press.\n26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.",
"json_metadata": "{\"tags\":[\"artifitialintelligence\",\"datascience\",\"computerscience\",\"deeplearning\",\"reinforcementlearning\"],\"image\":[\"https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg\",\"https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}2017/11/28 10:17:36
2017/11/28 10:17:36
| parent author | fesan81 |
| parent permlink | 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning |
| author | cheetah |
| permlink | cheetah-re-fesan812fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning |
| title | |
| body | Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in: https://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578 |
| json metadata | |
| Transaction Info | Block #17615125/Trx 0b6dc347a3542ade12da9b46a949155aa81000aa |
View Raw JSON Data
{
"trx_id": "0b6dc347a3542ade12da9b46a949155aa81000aa",
"block": 17615125,
"trx_in_block": 7,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:17:36",
"op": [
"comment",
{
"parent_author": "fesan81",
"parent_permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
"author": "cheetah",
"permlink": "cheetah-re-fesan812fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
"title": "",
"body": "Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in:\nhttps://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578",
"json_metadata": ""
}
]
}2017/11/28 10:17:06
2017/11/28 10:17:06
| voter | cheetah |
| author | fesan81 |
| permlink | 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning |
| weight | 8 (0.08%) |
| Transaction Info | Block #17615115/Trx 5ac607e7e3075e102926cbf5942cf3a3e30f06a6 |
View Raw JSON Data
{
"trx_id": "5ac607e7e3075e102926cbf5942cf3a3e30f06a6",
"block": 17615115,
"trx_in_block": 3,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:17:06",
"op": [
"vote",
{
"voter": "cheetah",
"author": "fesan81",
"permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
"weight": 8
}
]
}fesan81upvoted (100.00%) @fesan81 / 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning2017/11/28 10:16:09
fesan81upvoted (100.00%) @fesan81 / 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
2017/11/28 10:16:09
| voter | fesan81 |
| author | fesan81 |
| permlink | 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning |
| weight | 10000 (100.00%) |
| Transaction Info | Block #17615096/Trx e66cbdd76ce82ec27cfd1ef45d24a26ac0638586 |
View Raw JSON Data
{
"trx_id": "e66cbdd76ce82ec27cfd1ef45d24a26ac0638586",
"block": 17615096,
"trx_in_block": 21,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:16:09",
"op": [
"vote",
{
"voter": "fesan81",
"author": "fesan81",
"permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
"weight": 10000
}
]
}fesan81updated options for 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning2017/11/28 10:16:09
fesan81updated options for 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
2017/11/28 10:16:09
| author | fesan81 |
| permlink | 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning |
| max accepted payout | 1000000.000 SBD |
| percent steem dollars | 0 |
| allow votes | true |
| allow curation rewards | true |
| extensions | [] |
| Transaction Info | Block #17615096/Trx e66cbdd76ce82ec27cfd1ef45d24a26ac0638586 |
View Raw JSON Data
{
"trx_id": "e66cbdd76ce82ec27cfd1ef45d24a26ac0638586",
"block": 17615096,
"trx_in_block": 21,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:16:09",
"op": [
"comment_options",
{
"author": "fesan81",
"permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
"max_accepted_payout": "1000000.000 SBD",
"percent_steem_dollars": 0,
"allow_votes": true,
"allow_curation_rewards": true,
"extensions": []
}
]
}fesan81published a new post: 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning2017/11/28 10:16:09
fesan81published a new post: 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
2017/11/28 10:16:09
| parent author | |
| parent permlink | artifitialintelligence |
| author | fesan81 |
| permlink | 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning |
| title | From classic AI techniques to Deep Reinforcement Learning |
| body |  Building machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950]. The fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data. Deep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning: Machine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data.  Moreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998]. Knowledge discovery with efficient algorithms for unsupervised or supervised feature learning Deep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning. One of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943] “Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002] Also inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited. In another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP) Bellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA) Finally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.* Integration of reasoning techniques with deep learning Deep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning. Initially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) . Perceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link. Since MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before. In Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning. Scientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision. Systems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results. Later AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016] Bibliography 1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA. 2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press. 3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149. 4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649). 5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999 6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009 7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649). 8. Hebb, D. O. (1949). The organization of behavior. Wiley. 9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61. 10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272. 11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105). 12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444. 13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551. 14. LeCun, Y., Bottou, L., Bengio, Y., & H ner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323 15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163). 16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002) 19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602. 20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408. 21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536. 22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997. 23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489. 24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39. 25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press. 26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University. |
| json metadata | {"tags":["artifitialintelligence","datascience","computerscience","deeplearning","reinforcementlearning"],"image":["https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg","https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg"],"app":"steemit/0.1","format":"markdown"} |
| Transaction Info | Block #17615096/Trx e66cbdd76ce82ec27cfd1ef45d24a26ac0638586 |
View Raw JSON Data
{
"trx_id": "e66cbdd76ce82ec27cfd1ef45d24a26ac0638586",
"block": 17615096,
"trx_in_block": 21,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:16:09",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "artifitialintelligence",
"author": "fesan81",
"permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
"title": "From classic AI techniques to Deep Reinforcement Learning",
"body": "\nBuilding machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950].\n\nThe fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data.\n\nDeep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning:\n\nMachine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. \n\n\nMoreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998].\n\nKnowledge discovery with efficient algorithms for unsupervised or supervised feature learning\n\nDeep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning.\n\nOne of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943]\n\n“Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002]\n\nAlso inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited.\n\nIn another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP)\n\nBellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA)\n\nFinally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.*\n\nIntegration of reasoning techniques with deep learning\n\nDeep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning.\nInitially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) .\n\nPerceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link.\n\nSince MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before.\n\nIn Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning.\n\nScientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision.\n\nSystems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results.\n\nLater AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016]\n\nBibliography\n\n1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA.\n2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press.\n3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149.\n4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649).\n5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999\n6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009\n7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649).\n8. Hebb, D. O. (1949). The organization of behavior. Wiley.\n9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \\wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61.\n10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272.\n11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105).\n12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444.\n13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551.\n14. LeCun, Y., Bottou, L., Bengio, Y., & H\nner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323\n15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163).\n16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002)\n19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602.\n20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408.\n21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536.\n22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997.\n23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489.\n24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39.\n25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press.\n26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.",
"json_metadata": "{\"tags\":[\"artifitialintelligence\",\"datascience\",\"computerscience\",\"deeplearning\",\"reinforcementlearning\"],\"image\":[\"https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg\",\"https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}fesan81upvoted (100.00%) @fesan81 / from-classic-ai-techniques-to-deep-reinforcement-learning2017/11/28 09:50:03
fesan81upvoted (100.00%) @fesan81 / from-classic-ai-techniques-to-deep-reinforcement-learning
2017/11/28 09:50:03
| voter | fesan81 |
| author | fesan81 |
| permlink | from-classic-ai-techniques-to-deep-reinforcement-learning |
| weight | 10000 (100.00%) |
| Transaction Info | Block #17614575/Trx 41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc |
View Raw JSON Data
{
"trx_id": "41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc",
"block": 17614575,
"trx_in_block": 11,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T09:50:03",
"op": [
"vote",
{
"voter": "fesan81",
"author": "fesan81",
"permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
"weight": 10000
}
]
}fesan81updated options for from-classic-ai-techniques-to-deep-reinforcement-learning2017/11/28 09:50:03
fesan81updated options for from-classic-ai-techniques-to-deep-reinforcement-learning
2017/11/28 09:50:03
| author | fesan81 |
| permlink | from-classic-ai-techniques-to-deep-reinforcement-learning |
| max accepted payout | 1000000.000 SBD |
| percent steem dollars | 0 |
| allow votes | true |
| allow curation rewards | true |
| extensions | [] |
| Transaction Info | Block #17614575/Trx 41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc |
View Raw JSON Data
{
"trx_id": "41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc",
"block": 17614575,
"trx_in_block": 11,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T09:50:03",
"op": [
"comment_options",
{
"author": "fesan81",
"permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
"max_accepted_payout": "1000000.000 SBD",
"percent_steem_dollars": 0,
"allow_votes": true,
"allow_curation_rewards": true,
"extensions": []
}
]
}fesan81published a new post: from-classic-ai-techniques-to-deep-reinforcement-learning2017/11/28 09:50:03
fesan81published a new post: from-classic-ai-techniques-to-deep-reinforcement-learning
2017/11/28 09:50:03
| parent author | |
| parent permlink | artifitialintelligence |
| author | fesan81 |
| permlink | from-classic-ai-techniques-to-deep-reinforcement-learning |
| title | From classic AI techniques to Deep Reinforcement Learning |
| body |  Building machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950]. The fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data. Deep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning: Machine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data.  Moreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998]. Knowledge discovery with efficient algorithms for unsupervised or supervised feature learning Deep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning. One of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943] “Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002] Also inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited. In another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP) Bellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA) Finally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.* Integration of reasoning techniques with deep learning Deep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning. Initially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) . Perceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link. Since MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before. In Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning. Scientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision. Systems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results. Later AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016] Bibliography 1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA. 2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press. 3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149. 4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649). 5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999 6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009 7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649). 8. Hebb, D. O. (1949). The organization of behavior. Wiley. 9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61. 10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272. 11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105). 12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444. 13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551. 14. LeCun, Y., Bottou, L., Bengio, Y., & H ner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323 15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163). 16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002) 19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602. 20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408. 21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536. 22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997. 23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489. 24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39. 25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press. 26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University. |
| json metadata | {"tags":["artifitialintelligence","datascience","computerscience","deeplearning","reinforcementlearning"],"image":["https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg","https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg"],"app":"steemit/0.1","format":"markdown"} |
| Transaction Info | Block #17614575/Trx 41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc |
View Raw JSON Data
{
"trx_id": "41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc",
"block": 17614575,
"trx_in_block": 11,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T09:50:03",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "artifitialintelligence",
"author": "fesan81",
"permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
"title": "From classic AI techniques to Deep Reinforcement Learning",
"body": "\nBuilding machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950].\n\nThe fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data.\n\nDeep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning:\n\nMachine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. \n\n\nMoreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998].\n\nKnowledge discovery with efficient algorithms for unsupervised or supervised feature learning\n\nDeep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning.\n\nOne of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943]\n\n“Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002]\n\nAlso inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited.\n\nIn another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP)\n\nBellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA)\n\nFinally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.*\n\nIntegration of reasoning techniques with deep learning\n\nDeep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning.\nInitially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) .\n\nPerceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link.\n\nSince MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before.\n\nIn Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning.\n\nScientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision.\n\nSystems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results.\n\nLater AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016]\n\nBibliography\n\n1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA.\n2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press.\n3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149.\n4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649).\n5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999\n6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009\n7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649).\n8. Hebb, D. O. (1949). The organization of behavior. Wiley.\n9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \\wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61.\n10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272.\n11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105).\n12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444.\n13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551.\n14. LeCun, Y., Bottou, L., Bengio, Y., & H\nner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323\n15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163).\n16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002)\n19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602.\n20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408.\n21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536.\n22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997.\n23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489.\n24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39.\n25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press.\n26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.",
"json_metadata": "{\"tags\":[\"artifitialintelligence\",\"datascience\",\"computerscience\",\"deeplearning\",\"reinforcementlearning\"],\"image\":[\"https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg\",\"https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}2017/11/28 09:43:48
2017/11/28 09:43:48
| fee | 0.500 STEEM |
| delegation | 57000.000000 VESTS |
| creator | steem |
| new account name | fesan81 |
| owner | {"weight_threshold":1,"account_auths":[],"key_auths":[["STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",1]]} |
| active | {"weight_threshold":1,"account_auths":[],"key_auths":[["STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",1]]} |
| posting | {"weight_threshold":1,"account_auths":[],"key_auths":[["STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",1]]} |
| memo key | STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk |
| json metadata | |
| extensions | [] |
| Transaction Info | Block #17614450/Trx 2fb3f31aec55bba631571a17577acc550ea65f36 |
View Raw JSON Data
{
"trx_id": "2fb3f31aec55bba631571a17577acc550ea65f36",
"block": 17614450,
"trx_in_block": 14,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T09:43:48",
"op": [
"account_create_with_delegation",
{
"fee": "0.500 STEEM",
"delegation": "57000.000000 VESTS",
"creator": "steem",
"new_account_name": "fesan81",
"owner": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",
1
]
]
},
"active": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",
1
]
]
},
"posting": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",
1
]
]
},
"memo_key": "STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk",
"json_metadata": "",
"extensions": []
}
]
}Manabar
Voting Power100.00%
Downvote Power100.00%
Resource Credits100.00%
Reputation Progress0.00%
{
"voting_manabar": {
"current_mana": "8143659806",
"last_update_time": 1779063441
},
"downvote_manabar": {
"current_mana": 2035914951,
"last_update_time": 1779063441
},
"rc_account": {
"account": "fesan81",
"rc_manabar": {
"current_mana": "10164408779",
"last_update_time": 1779063441
},
"max_rc_creation_adjustment": {
"amount": "2020748973",
"precision": 6,
"nai": "@@000000037"
},
"max_rc": "10164408779"
}
}Account Metadata
| POSTING JSON METADATA | |
| None | |
| JSON METADATA | |
| None |
{
"posting_json_metadata": {},
"json_metadata": {}
}Auth Keys
Owner
Single Signature
Public Keys
STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb1/1
Active
Single Signature
Public Keys
STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F1/1
Posting
Single Signature
Public Keys
STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc1/1
Memo
STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk
{
"owner": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",
1
]
]
},
"active": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",
1
]
]
},
"posting": {
"weight_threshold": 1,
"account_auths": [],
"key_auths": [
[
"STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",
1
]
]
},
"memo": "STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk"
}Witness Votes
0 / 30
No active witness votes.
[]