Ecoer Logo
VOTING POWER100.00%
DOWNVOTE POWER100.00%
RESOURCE CREDITS100.00%
REPUTATION PROGRESS0.00%
Net Worth
0.037USD
STEEM
0.000STEEM
SBD
0.000SBD
Effective Power
5.007SP
├── Own SP
0.631SP
└── Incoming Deleg
+4.376SP

Detailed Balance

STEEM
balance
0.000STEEM
market_balance
0.000STEEM
savings_balance
0.000STEEM
reward_steem_balance
0.000STEEM
STEEM POWER
Own SP
0.631SP
Delegated Out
0.000SP
Delegation In
4.376SP
Effective Power
5.007SP
Reward SP (pending)
0.000SP
SBD
sbd_balance
0.000SBD
sbd_conversions
0.000SBD
sbd_market_balance
0.000SBD
savings_sbd_balance
0.000SBD
reward_sbd_balance
0.000SBD
{
  "balance": "0.000 STEEM",
  "savings_balance": "0.000 STEEM",
  "reward_steem_balance": "0.000 STEEM",
  "vesting_shares": "1026.318903 VESTS",
  "delegated_vesting_shares": "0.000000 VESTS",
  "received_vesting_shares": "7117.340903 VESTS",
  "sbd_balance": "0.000 SBD",
  "savings_sbd_balance": "0.000 SBD",
  "reward_sbd_balance": "0.000 SBD",
  "conversions": []
}

Account Info

namefesan81
id466352
rank1,358,003
reputation130516081
created2017-11-28T09:43:48
recovery_accountsteem
proxyNone
post_count3
comment_count0
lifetime_vote_count0
witnesses_voted_for0
last_post2017-11-28T10:29:03
last_root_post2017-11-28T10:29:03
last_vote_time2017-11-28T10:29:03
proxied_vsf_votes0, 0, 0, 0
can_vote1
voting_power0
delayed_votes0
balance0.000 STEEM
savings_balance0.000 STEEM
sbd_balance0.000 SBD
savings_sbd_balance0.000 SBD
vesting_shares1026.318903 VESTS
delegated_vesting_shares0.000000 VESTS
received_vesting_shares7117.340903 VESTS
reward_vesting_balance0.000000 VESTS
vesting_balance0.000 STEEM
vesting_withdraw_rate0.000000 VESTS
next_vesting_withdrawal1969-12-31T23:59:59
withdrawn0
to_withdraw0
withdraw_routes0
savings_withdraw_requests0
last_account_recovery1970-01-01T00:00:00
reset_accountnull
last_owner_update1970-01-01T00:00:00
last_account_update1970-01-01T00:00:00
minedNo
sbd_seconds0
sbd_last_interest_payment1970-01-01T00:00:00
savings_sbd_last_interest_payment1970-01-01T00:00:00
{
  "id": 466352,
  "name": "fesan81",
  "owner": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",
        1
      ]
    ]
  },
  "active": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",
        1
      ]
    ]
  },
  "posting": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",
        1
      ]
    ]
  },
  "memo_key": "STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk",
  "json_metadata": "",
  "posting_json_metadata": "",
  "proxy": "",
  "last_owner_update": "1970-01-01T00:00:00",
  "last_account_update": "1970-01-01T00:00:00",
  "created": "2017-11-28T09:43:48",
  "mined": false,
  "recovery_account": "steem",
  "last_account_recovery": "1970-01-01T00:00:00",
  "reset_account": "null",
  "comment_count": 0,
  "lifetime_vote_count": 0,
  "post_count": 3,
  "can_vote": true,
  "voting_manabar": {
    "current_mana": "8143659806",
    "last_update_time": 1779063441
  },
  "downvote_manabar": {
    "current_mana": 2035914951,
    "last_update_time": 1779063441
  },
  "voting_power": 0,
  "balance": "0.000 STEEM",
  "savings_balance": "0.000 STEEM",
  "sbd_balance": "0.000 SBD",
  "sbd_seconds": "0",
  "sbd_seconds_last_update": "1970-01-01T00:00:00",
  "sbd_last_interest_payment": "1970-01-01T00:00:00",
  "savings_sbd_balance": "0.000 SBD",
  "savings_sbd_seconds": "0",
  "savings_sbd_seconds_last_update": "1970-01-01T00:00:00",
  "savings_sbd_last_interest_payment": "1970-01-01T00:00:00",
  "savings_withdraw_requests": 0,
  "reward_sbd_balance": "0.000 SBD",
  "reward_steem_balance": "0.000 STEEM",
  "reward_vesting_balance": "0.000000 VESTS",
  "reward_vesting_steem": "0.000 STEEM",
  "vesting_shares": "1026.318903 VESTS",
  "delegated_vesting_shares": "0.000000 VESTS",
  "received_vesting_shares": "7117.340903 VESTS",
  "vesting_withdraw_rate": "0.000000 VESTS",
  "next_vesting_withdrawal": "1969-12-31T23:59:59",
  "withdrawn": 0,
  "to_withdraw": 0,
  "withdraw_routes": 0,
  "curation_rewards": 0,
  "posting_rewards": 0,
  "proxied_vsf_votes": [
    0,
    0,
    0,
    0
  ],
  "witnesses_voted_for": 0,
  "last_post": "2017-11-28T10:29:03",
  "last_root_post": "2017-11-28T10:29:03",
  "last_vote_time": "2017-11-28T10:29:03",
  "post_bandwidth": 0,
  "pending_claimed_accounts": 0,
  "vesting_balance": "0.000 STEEM",
  "reputation": 130516081,
  "transfer_history": [],
  "market_history": [],
  "post_history": [],
  "vote_history": [],
  "other_history": [],
  "witness_votes": [],
  "tags_usage": [],
  "guest_bloggers": [],
  "rank": 1358003
}

Withdraw Routes

IncomingOutgoing
Empty
Empty
{
  "incoming": [],
  "outgoing": []
}
From Date
To Date
steemdelegated 4.376 SP to @fesan81
2026/05/18 00:17:21
delegatorsteem
delegateefesan81
vesting shares7117.340903 VESTS
Transaction InfoBlock #106143492/Trx 4aea0d1b751e3fd7542ddbbdf903b4cbe1a8d516
View Raw JSON Data
{
  "trx_id": "4aea0d1b751e3fd7542ddbbdf903b4cbe1a8d516",
  "block": 106143492,
  "trx_in_block": 3,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-05-18T00:17:21",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "7117.340903 VESTS"
    }
  ]
}
steemdelegated 2.708 SP to @fesan81
2026/05/12 03:59:51
delegatorsteem
delegateefesan81
vesting shares4405.130498 VESTS
Transaction InfoBlock #105975899/Trx 2b1e16f8356838e4d548082239de9faa053b2dc7
View Raw JSON Data
{
  "trx_id": "2b1e16f8356838e4d548082239de9faa053b2dc7",
  "block": 105975899,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-05-12T03:59:51",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "4405.130498 VESTS"
    }
  ]
}
steemdelegated 4.383 SP to @fesan81
2026/04/25 23:38:12
delegatorsteem
delegateefesan81
vesting shares7129.856659 VESTS
Transaction InfoBlock #105511142/Trx 478228483a50204438f57ee36636053543c29abb
View Raw JSON Data
{
  "trx_id": "478228483a50204438f57ee36636053543c29abb",
  "block": 105511142,
  "trx_in_block": 0,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-04-25T23:38:12",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "7129.856659 VESTS"
    }
  ]
}
steemdelegated 2.734 SP to @fesan81
2026/01/23 07:54:00
delegatorsteem
delegateefesan81
vesting shares4446.677317 VESTS
Transaction InfoBlock #102851927/Trx 881c756278aea13ff5bf4877e6a4b75070d6e430
View Raw JSON Data
{
  "trx_id": "881c756278aea13ff5bf4877e6a4b75070d6e430",
  "block": 102851927,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-01-23T07:54:00",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "4446.677317 VESTS"
    }
  ]
}
2025/02/19 10:51:12
parent authorfesan81
parent permlinkfrom-classic-ai-techniques-to-deep-reinforcement-learning
authorcurtisjohns
permlinksrxfha
title
bodyThe evolution of AI, from classic techniques to the advanced realm of Deep Reinforcement Learning, has been nothing short of revolutionary. These advancements are reshaping industries and creating new opportunities for innovation. For professionals looking to stay ahead, [Artificial Intelligence (AI) Training Programs in USA: Short Courses in AI for Managers in Las Vegas, Nevada, USA](https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/) offer a perfect blend of foundational knowledge and cutting-edge insights. Such programs are essential for managers aiming to harness AI's potential and lead their teams into the future.
json metadata{"links":["https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/"],"app":"steemit/0.2"}
Transaction InfoBlock #93145911/Trx cbaf9f49b35fdf6f1bf1f9ada50bfbb92584c1e2
View Raw JSON Data
{
  "trx_id": "cbaf9f49b35fdf6f1bf1f9ada50bfbb92584c1e2",
  "block": 93145911,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2025-02-19T10:51:12",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "curtisjohns",
      "permlink": "srxfha",
      "title": "",
      "body": "The evolution of AI, from classic techniques to the advanced realm of Deep Reinforcement Learning, has been nothing short of revolutionary. These advancements are reshaping industries and creating new opportunities for innovation. For professionals looking to stay ahead, [Artificial Intelligence (AI) Training Programs in USA: Short Courses in AI for Managers in Las Vegas, Nevada, USA](https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/) offer a perfect blend of foundational knowledge and cutting-edge insights. Such programs are essential for managers aiming to harness AI's potential and lead their teams into the future.",
      "json_metadata": "{\"links\":[\"https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/\"],\"app\":\"steemit/0.2\"}"
    }
  ]
}
steemdelegated 2.835 SP to @fesan81
2024/12/17 03:13:03
delegatorsteem
delegateefesan81
vesting shares4610.896514 VESTS
Transaction InfoBlock #91298335/Trx 35c8ead72b0fdd36795fbf9255ead640917b5a40
View Raw JSON Data
{
  "trx_id": "35c8ead72b0fdd36795fbf9255ead640917b5a40",
  "block": 91298335,
  "trx_in_block": 3,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2024-12-17T03:13:03",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "4610.896514 VESTS"
    }
  ]
}
steemdelegated 2.939 SP to @fesan81
2023/11/13 18:55:48
delegatorsteem
delegateefesan81
vesting shares4780.030046 VESTS
Transaction InfoBlock #79852533/Trx 037986a84a0673bcab0a9f219e2d6f62fb34bd0d
View Raw JSON Data
{
  "trx_id": "037986a84a0673bcab0a9f219e2d6f62fb34bd0d",
  "block": 79852533,
  "trx_in_block": 0,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2023-11-13T18:55:48",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "4780.030046 VESTS"
    }
  ]
}
steemdelegated 4.744 SP to @fesan81
2023/09/21 21:52:15
delegatorsteem
delegateefesan81
vesting shares7717.308832 VESTS
Transaction InfoBlock #78347876/Trx adae44dc781faef55a299f48f95fe0022f1d24f6
View Raw JSON Data
{
  "trx_id": "adae44dc781faef55a299f48f95fe0022f1d24f6",
  "block": 78347876,
  "trx_in_block": 2,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2023-09-21T21:52:15",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "7717.308832 VESTS"
    }
  ]
}
steemdelegated 4.881 SP to @fesan81
2022/11/03 11:39:42
delegatorsteem
delegateefesan81
vesting shares7938.990270 VESTS
Transaction InfoBlock #69113210/Trx 0ff9088204ae0c3dbb8056bc3de7d92631fccd08
View Raw JSON Data
{
  "trx_id": "0ff9088204ae0c3dbb8056bc3de7d92631fccd08",
  "block": 69113210,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2022-11-03T11:39:42",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "7938.990270 VESTS"
    }
  ]
}
steemdelegated 5.016 SP to @fesan81
2022/01/17 10:55:51
delegatorsteem
delegateefesan81
vesting shares8159.523501 VESTS
Transaction InfoBlock #60809381/Trx c9a210a64e4a04873b39b8fb965864b18451001a
View Raw JSON Data
{
  "trx_id": "c9a210a64e4a04873b39b8fb965864b18451001a",
  "block": 60809381,
  "trx_in_block": 18,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2022-01-17T10:55:51",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8159.523501 VESTS"
    }
  ]
}
steemdelegated 5.129 SP to @fesan81
2021/06/14 00:51:06
delegatorsteem
delegateefesan81
vesting shares8343.292159 VESTS
Transaction InfoBlock #54607769/Trx a8f4abf39b60cb8cf9cf21109e1b6da0cf76291c
View Raw JSON Data
{
  "trx_id": "a8f4abf39b60cb8cf9cf21109e1b6da0cf76291c",
  "block": 54607769,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2021-06-14T00:51:06",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8343.292159 VESTS"
    }
  ]
}
steemdelegated 5.245 SP to @fesan81
2020/12/11 11:09:39
delegatorsteem
delegateefesan81
vesting shares8530.714133 VESTS
Transaction InfoBlock #49355222/Trx 940df32535961a20ee428af7e48845661f172607
View Raw JSON Data
{
  "trx_id": "940df32535961a20ee428af7e48845661f172607",
  "block": 49355222,
  "trx_in_block": 7,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-12-11T11:09:39",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8530.714133 VESTS"
    }
  ]
}
steemdelegated 1.176 SP to @fesan81
2020/12/06 04:46:57
delegatorsteem
delegateefesan81
vesting shares1912.543513 VESTS
Transaction InfoBlock #49206788/Trx 12304560b06a1623a9baf9affde5198c7027c8e0
View Raw JSON Data
{
  "trx_id": "12304560b06a1623a9baf9affde5198c7027c8e0",
  "block": 49206788,
  "trx_in_block": 2,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-12-06T04:46:57",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "1912.543513 VESTS"
    }
  ]
}
steemdelegated 5.248 SP to @fesan81
2020/12/05 14:47:51
delegatorsteem
delegateefesan81
vesting shares8536.921987 VESTS
Transaction InfoBlock #49190319/Trx a15087d90c575f6bf0e74ea7e896319c3fbf4277
View Raw JSON Data
{
  "trx_id": "a15087d90c575f6bf0e74ea7e896319c3fbf4277",
  "block": 49190319,
  "trx_in_block": 6,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-12-05T14:47:51",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8536.921987 VESTS"
    }
  ]
}
steemdelegated 1.180 SP to @fesan81
2020/11/02 15:43:21
delegatorsteem
delegateefesan81
vesting shares1920.017158 VESTS
Transaction InfoBlock #48257899/Trx 5ea6e20a2a7debb55b3f227c1b8330d589a02b37
View Raw JSON Data
{
  "trx_id": "5ea6e20a2a7debb55b3f227c1b8330d589a02b37",
  "block": 48257899,
  "trx_in_block": 0,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-11-02T15:43:21",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "1920.017158 VESTS"
    }
  ]
}
steemdelegated 5.373 SP to @fesan81
2020/05/09 05:44:15
delegatorsteem
delegateefesan81
vesting shares8739.727346 VESTS
Transaction InfoBlock #43217034/Trx aa9fa936b5b0aea4ea795102e47d710e94a01695
View Raw JSON Data
{
  "trx_id": "aa9fa936b5b0aea4ea795102e47d710e94a01695",
  "block": 43217034,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-05-09T05:44:15",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8739.727346 VESTS"
    }
  ]
}
steemdelegated 1.201 SP to @fesan81
2020/05/08 09:20:45
delegatorsteem
delegateefesan81
vesting shares1953.311140 VESTS
Transaction InfoBlock #43193138/Trx 0154e0a16af12532e1cb31212c847e614a6e66e1
View Raw JSON Data
{
  "trx_id": "0154e0a16af12532e1cb31212c847e614a6e66e1",
  "block": 43193138,
  "trx_in_block": 16,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-05-08T09:20:45",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "1953.311140 VESTS"
    }
  ]
}
steemdelegated 5.381 SP to @fesan81
2020/04/15 21:37:12
delegatorsteem
delegateefesan81
vesting shares8752.704765 VESTS
Transaction InfoBlock #42562566/Trx acc89f09e3fb23df53a682f6f9989b3982fd3d31
View Raw JSON Data
{
  "trx_id": "acc89f09e3fb23df53a682f6f9989b3982fd3d31",
  "block": 42562566,
  "trx_in_block": 15,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-04-15T21:37:12",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8752.704765 VESTS"
    }
  ]
}
2019/11/28 10:15:45
parent authorfesan81
parent permlink2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
authorsteemitboard
permlinksteemitboard-notify-fesan81-20191128t101545000z
title
bodyCongratulations @fesan81! You received a personal award! <table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday2.png</td><td>Happy Birthday! - You are on the Steem blockchain for 2 years!</td></tr></table> <sub>_You can view [your badges on your Steem Board](https://steemitboard.com/@fesan81) and compare to others on the [Steem Ranking](https://steemitboard.com/ranking/index.php?name=fesan81)_</sub> ###### [Vote for @Steemitboard as a witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) to get one more award and increased upvotes!
json metadata{"image":["https://steemitboard.com/img/notify.png"]}
Transaction InfoBlock #38567210/Trx e1b12920842a4da876ca2e70302909b9e3417dc4
View Raw JSON Data
{
  "trx_id": "e1b12920842a4da876ca2e70302909b9e3417dc4",
  "block": 38567210,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2019-11-28T10:15:45",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "steemitboard",
      "permlink": "steemitboard-notify-fesan81-20191128t101545000z",
      "title": "",
      "body": "Congratulations @fesan81! You received a personal award!\n\n<table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday2.png</td><td>Happy Birthday! - You are on the Steem blockchain for 2 years!</td></tr></table>\n\n<sub>_You can view [your badges on your Steem Board](https://steemitboard.com/@fesan81) and compare to others on the [Steem Ranking](https://steemitboard.com/ranking/index.php?name=fesan81)_</sub>\n\n\n###### [Vote for @Steemitboard as a witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) to get one more award and increased upvotes!",
      "json_metadata": "{\"image\":[\"https://steemitboard.com/img/notify.png\"]}"
    }
  ]
}
steemdelegated 5.501 SP to @fesan81
2019/05/12 14:52:06
delegatorsteem
delegateefesan81
vesting shares8948.327570 VESTS
Transaction InfoBlock #32845445/Trx fc02bcb8c077d51409bfdf1481c27f81d4d51d84
View Raw JSON Data
{
  "trx_id": "fc02bcb8c077d51409bfdf1481c27f81d4d51d84",
  "block": 32845445,
  "trx_in_block": 10,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2019-05-12T14:52:06",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8948.327570 VESTS"
    }
  ]
}
2018/11/28 18:11:57
parent authorfesan81
parent permlink2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
authorsteemitboard
permlinksteemitboard-notify-fesan81-20181128t181157000z
title
bodyCongratulations @fesan81! You received a personal award! <table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday1.png</td><td>1 Year on Steemit</td></tr></table> <sub>_[Click here to view your Board of Honor](https://steemitboard.com/@fesan81)_</sub> > Support [SteemitBoard's project](https://steemit.com/@steemitboard)! **[Vote for its witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1)** and **get one more award**!
json metadata{"image":["https://steemitboard.com/img/notify.png"]}
Transaction InfoBlock #28102358/Trx e25fd613b1c01eb8c06ed8f6ae6ca04eba85675b
View Raw JSON Data
{
  "trx_id": "e25fd613b1c01eb8c06ed8f6ae6ca04eba85675b",
  "block": 28102358,
  "trx_in_block": 9,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-11-28T18:11:57",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "steemitboard",
      "permlink": "steemitboard-notify-fesan81-20181128t181157000z",
      "title": "",
      "body": "Congratulations @fesan81! You received a personal award!\n\n<table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday1.png</td><td>1 Year on Steemit</td></tr></table>\n\n<sub>_[Click here to view your Board of Honor](https://steemitboard.com/@fesan81)_</sub>\n\n\n> Support [SteemitBoard's project](https://steemit.com/@steemitboard)! **[Vote for its witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1)** and **get one more award**!",
      "json_metadata": "{\"image\":[\"https://steemitboard.com/img/notify.png\"]}"
    }
  ]
}
steemdelegated 5.624 SP to @fesan81
2018/05/16 20:17:12
delegatorsteem
delegateefesan81
vesting shares9147.880005 VESTS
Transaction InfoBlock #22489845/Trx 9c624768b6f900c7305367d53c22f378917890e7
View Raw JSON Data
{
  "trx_id": "9c624768b6f900c7305367d53c22f378917890e7",
  "block": 22489845,
  "trx_in_block": 27,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-05-16T20:17:12",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "9147.880005 VESTS"
    }
  ]
}
steemdelegated 18.158 SP to @fesan81
2018/04/21 20:42:54
delegatorsteem
delegateefesan81
vesting shares29535.849045 VESTS
Transaction InfoBlock #21771146/Trx 3f25b7c23f8d3b576ee414d1b3115663303993af
View Raw JSON Data
{
  "trx_id": "3f25b7c23f8d3b576ee414d1b3115663303993af",
  "block": 21771146,
  "trx_in_block": 13,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-04-21T20:42:54",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "29535.849045 VESTS"
    }
  ]
}
steemdelegated 18.284 SP to @fesan81
2017/12/12 22:19:57
delegatorsteem
delegateefesan81
vesting shares29739.681097 VESTS
Transaction InfoBlock #18032582/Trx 871880414549d43d34f7037684c3102ab8d07915
View Raw JSON Data
{
  "trx_id": "871880414549d43d34f7037684c3102ab8d07915",
  "block": 18032582,
  "trx_in_block": 16,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-12-12T22:19:57",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "29739.681097 VESTS"
    }
  ]
}
2017/11/30 15:06:36
parent authorfesan81
parent permlink2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
authorneurallearner
permlinkre-fesan81-2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning-20171130t150626925z
title
bodyHey come and give a shot and see how much you know about deep-learning by explaining the [joke](https://steemit.com/ai/@neurallearner/ai-joke-activities) and at the same time promote learning AI.
json metadata{"tags":["artifitialintelligence"],"links":["https://steemit.com/ai/@neurallearner/ai-joke-activities"],"app":"steemit/0.1"}
Transaction InfoBlock #17678489/Trx 280064fbc1e023a86819c80863d819b66e84325f
View Raw JSON Data
{
  "trx_id": "280064fbc1e023a86819c80863d819b66e84325f",
  "block": 17678489,
  "trx_in_block": 15,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-30T15:06:36",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "neurallearner",
      "permlink": "re-fesan81-2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning-20171130t150626925z",
      "title": "",
      "body": "Hey come and give a shot and see how much you know about deep-learning by explaining the [joke](https://steemit.com/ai/@neurallearner/ai-joke-activities) and at the same time promote learning AI.",
      "json_metadata": "{\"tags\":[\"artifitialintelligence\"],\"links\":[\"https://steemit.com/ai/@neurallearner/ai-joke-activities\"],\"app\":\"steemit/0.1\"}"
    }
  ]
}
2017/11/28 12:15:54
parent authorfesan81
parent permlink2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
authorsteemitboard
permlinksteemitboard-notify-fesan81-20171128t121556000z
title
bodyCongratulations @fesan81! You have completed some achievement on Steemit and have been rewarded with new badge(s) : [![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstpost.png)](http://steemitboard.com/@fesan81) You published your First Post [![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstvote.png)](http://steemitboard.com/@fesan81) You made your First Vote [![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstvoted.png)](http://steemitboard.com/@fesan81) You got a First Vote Click on any badge to view your own Board of Honor on SteemitBoard. For more information about SteemitBoard, click [here](https://steemit.com/@steemitboard) If you no longer want to receive notifications, reply to this comment with the word `STOP` > By upvoting this notification, you can help all Steemit users. Learn how [here](https://steemit.com/steemitboard/@steemitboard/http-i-cubeupload-com-7ciqeo-png)!
json metadata{"image":["https://steemitboard.com/img/notifications.png"]}
Transaction InfoBlock #17617488/Trx d80d9da4e0af2ee834592b3838f3be253d89df95
View Raw JSON Data
{
  "trx_id": "d80d9da4e0af2ee834592b3838f3be253d89df95",
  "block": 17617488,
  "trx_in_block": 20,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T12:15:54",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "steemitboard",
      "permlink": "steemitboard-notify-fesan81-20171128t121556000z",
      "title": "",
      "body": "Congratulations @fesan81! You have completed some achievement on Steemit and have been rewarded with new badge(s) :\n\n[![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstpost.png)](http://steemitboard.com/@fesan81) You published your First Post\n[![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstvote.png)](http://steemitboard.com/@fesan81) You made your First Vote\n[![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstvoted.png)](http://steemitboard.com/@fesan81) You got a First Vote\n\nClick on any badge to view your own Board of Honor on SteemitBoard.\nFor more information about SteemitBoard, click [here](https://steemit.com/@steemitboard)\n\nIf you no longer want to receive notifications, reply to this comment with the word `STOP`\n\n> By upvoting this notification, you can help all Steemit users. Learn how [here](https://steemit.com/steemitboard/@steemitboard/http-i-cubeupload-com-7ciqeo-png)!",
      "json_metadata": "{\"image\":[\"https://steemitboard.com/img/notifications.png\"]}"
    }
  ]
}
2017/11/28 10:29:24
parent authorfesan81
parent permlink2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
authorcheetah
permlinkcheetah-re-fesan812jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
title
bodyHi! I am a robot. I just upvoted you! I found similar content that readers might be interested in: https://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578
json metadata
Transaction InfoBlock #17615361/Trx f8ad938a0fe5df8e083e06de25a60f0c70b3d833
View Raw JSON Data
{
  "trx_id": "f8ad938a0fe5df8e083e06de25a60f0c70b3d833",
  "block": 17615361,
  "trx_in_block": 9,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:29:24",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "cheetah",
      "permlink": "cheetah-re-fesan812jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "title": "",
      "body": "Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in:\nhttps://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578",
      "json_metadata": ""
    }
  ]
}
2017/11/28 10:29:21
votercheetah
authorfesan81
permlink2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
weight8 (0.08%)
Transaction InfoBlock #17615360/Trx bc2eaef723a0100a5b96c3fcec6ab7fcff1bb367
View Raw JSON Data
{
  "trx_id": "bc2eaef723a0100a5b96c3fcec6ab7fcff1bb367",
  "block": 17615360,
  "trx_in_block": 2,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:29:21",
  "op": [
    "vote",
    {
      "voter": "cheetah",
      "author": "fesan81",
      "permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "weight": 8
    }
  ]
}
2017/11/28 10:29:03
voterfesan81
authorfesan81
permlink2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
weight10000 (100.00%)
Transaction InfoBlock #17615354/Trx 17ef38ed1d9794d07176db8f7eaee5c72e655452
View Raw JSON Data
{
  "trx_id": "17ef38ed1d9794d07176db8f7eaee5c72e655452",
  "block": 17615354,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:29:03",
  "op": [
    "vote",
    {
      "voter": "fesan81",
      "author": "fesan81",
      "permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "weight": 10000
    }
  ]
}
2017/11/28 10:29:03
authorfesan81
permlink2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
max accepted payout1000000.000 SBD
percent steem dollars0
allow votestrue
allow curation rewardstrue
extensions[]
Transaction InfoBlock #17615354/Trx 17ef38ed1d9794d07176db8f7eaee5c72e655452
View Raw JSON Data
{
  "trx_id": "17ef38ed1d9794d07176db8f7eaee5c72e655452",
  "block": 17615354,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:29:03",
  "op": [
    "comment_options",
    {
      "author": "fesan81",
      "permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "max_accepted_payout": "1000000.000 SBD",
      "percent_steem_dollars": 0,
      "allow_votes": true,
      "allow_curation_rewards": true,
      "extensions": []
    }
  ]
}
2017/11/28 10:29:03
parent author
parent permlinkartifitialintelligence
authorfesan81
permlink2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
titleFrom classic AI techniques to Deep Reinforcement Learning
body![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg) Building machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950]. The fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data. Deep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning: Machine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. ![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg) Moreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998]. Knowledge discovery with efficient algorithms for unsupervised or supervised feature learning Deep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning. One of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943] “Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002] Also inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited. In another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP) Bellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA) Finally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.* Integration of reasoning techniques with deep learning Deep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning. Initially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) . Perceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link. Since MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before. In Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning. Scientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision. Systems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results. Later AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016] Bibliography 1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA. 2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press. 3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149. 4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649). 5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999 6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009 7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649). 8. Hebb, D. O. (1949). The organization of behavior. Wiley. 9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61. 10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272. 11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105). 12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444. 13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551. 14. LeCun, Y., Bottou, L., Bengio, Y., & H ner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323 15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163). 16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002) 19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602. 20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408. 21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536. 22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997. 23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489. 24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39. 25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press. 26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.
json metadata{"tags":["artifitialintelligence","datascience","computerscience","deeplearning","reinforcementlearning"],"image":["https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg","https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg"],"app":"steemit/0.1","format":"markdown"}
Transaction InfoBlock #17615354/Trx 17ef38ed1d9794d07176db8f7eaee5c72e655452
View Raw JSON Data
{
  "trx_id": "17ef38ed1d9794d07176db8f7eaee5c72e655452",
  "block": 17615354,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:29:03",
  "op": [
    "comment",
    {
      "parent_author": "",
      "parent_permlink": "artifitialintelligence",
      "author": "fesan81",
      "permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "title": "From classic AI techniques to Deep Reinforcement Learning",
      "body": "![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg)\nBuilding machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950].\n\nThe fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data.\n\nDeep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning:\n\nMachine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. \n\n![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg)\nMoreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998].\n\nKnowledge discovery with efficient algorithms for unsupervised or supervised feature learning\n\nDeep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning.\n\nOne of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943]\n\n“Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002]\n\nAlso inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited.\n\nIn another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP)\n\nBellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA)\n\nFinally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.*\n\nIntegration of reasoning techniques with deep learning\n\nDeep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning.\nInitially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) .\n\nPerceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link.\n\nSince MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before.\n\nIn Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning.\n\nScientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision.\n\nSystems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results.\n\nLater AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016]\n\nBibliography\n\n1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA.\n2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press.\n3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149.\n4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649).\n5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999\n6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009\n7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649).\n8. Hebb, D. O. (1949). The organization of behavior. Wiley.\n9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \\wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61.\n10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272.\n11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105).\n12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444.\n13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551.\n14. LeCun, Y., Bottou, L., Bengio, Y., & H\nner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323\n15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163).\n16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002)\n19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602.\n20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408.\n21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536.\n22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997.\n23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489.\n24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39.\n25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press.\n26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.",
      "json_metadata": "{\"tags\":[\"artifitialintelligence\",\"datascience\",\"computerscience\",\"deeplearning\",\"reinforcementlearning\"],\"image\":[\"https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg\",\"https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
    }
  ]
}
2017/11/28 10:17:36
parent authorfesan81
parent permlink2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
authorcheetah
permlinkcheetah-re-fesan812fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
title
bodyHi! I am a robot. I just upvoted you! I found similar content that readers might be interested in: https://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578
json metadata
Transaction InfoBlock #17615125/Trx 0b6dc347a3542ade12da9b46a949155aa81000aa
View Raw JSON Data
{
  "trx_id": "0b6dc347a3542ade12da9b46a949155aa81000aa",
  "block": 17615125,
  "trx_in_block": 7,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:17:36",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "cheetah",
      "permlink": "cheetah-re-fesan812fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "title": "",
      "body": "Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in:\nhttps://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578",
      "json_metadata": ""
    }
  ]
}
2017/11/28 10:17:06
votercheetah
authorfesan81
permlink2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
weight8 (0.08%)
Transaction InfoBlock #17615115/Trx 5ac607e7e3075e102926cbf5942cf3a3e30f06a6
View Raw JSON Data
{
  "trx_id": "5ac607e7e3075e102926cbf5942cf3a3e30f06a6",
  "block": 17615115,
  "trx_in_block": 3,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:17:06",
  "op": [
    "vote",
    {
      "voter": "cheetah",
      "author": "fesan81",
      "permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "weight": 8
    }
  ]
}
2017/11/28 10:16:09
voterfesan81
authorfesan81
permlink2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
weight10000 (100.00%)
Transaction InfoBlock #17615096/Trx e66cbdd76ce82ec27cfd1ef45d24a26ac0638586
View Raw JSON Data
{
  "trx_id": "e66cbdd76ce82ec27cfd1ef45d24a26ac0638586",
  "block": 17615096,
  "trx_in_block": 21,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:16:09",
  "op": [
    "vote",
    {
      "voter": "fesan81",
      "author": "fesan81",
      "permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "weight": 10000
    }
  ]
}
2017/11/28 10:16:09
authorfesan81
permlink2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
max accepted payout1000000.000 SBD
percent steem dollars0
allow votestrue
allow curation rewardstrue
extensions[]
Transaction InfoBlock #17615096/Trx e66cbdd76ce82ec27cfd1ef45d24a26ac0638586
View Raw JSON Data
{
  "trx_id": "e66cbdd76ce82ec27cfd1ef45d24a26ac0638586",
  "block": 17615096,
  "trx_in_block": 21,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:16:09",
  "op": [
    "comment_options",
    {
      "author": "fesan81",
      "permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "max_accepted_payout": "1000000.000 SBD",
      "percent_steem_dollars": 0,
      "allow_votes": true,
      "allow_curation_rewards": true,
      "extensions": []
    }
  ]
}
2017/11/28 10:16:09
parent author
parent permlinkartifitialintelligence
authorfesan81
permlink2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
titleFrom classic AI techniques to Deep Reinforcement Learning
body![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg) Building machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950]. The fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data. Deep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning: Machine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. ![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg) Moreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998]. Knowledge discovery with efficient algorithms for unsupervised or supervised feature learning Deep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning. One of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943] “Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002] Also inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited. In another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP) Bellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA) Finally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.* Integration of reasoning techniques with deep learning Deep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning. Initially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) . Perceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link. Since MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before. In Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning. Scientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision. Systems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results. Later AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016] Bibliography 1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA. 2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press. 3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149. 4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649). 5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999 6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009 7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649). 8. Hebb, D. O. (1949). The organization of behavior. Wiley. 9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61. 10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272. 11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105). 12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444. 13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551. 14. LeCun, Y., Bottou, L., Bengio, Y., & H ner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323 15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163). 16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002) 19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602. 20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408. 21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536. 22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997. 23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489. 24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39. 25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press. 26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.
json metadata{"tags":["artifitialintelligence","datascience","computerscience","deeplearning","reinforcementlearning"],"image":["https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg","https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg"],"app":"steemit/0.1","format":"markdown"}
Transaction InfoBlock #17615096/Trx e66cbdd76ce82ec27cfd1ef45d24a26ac0638586
View Raw JSON Data
{
  "trx_id": "e66cbdd76ce82ec27cfd1ef45d24a26ac0638586",
  "block": 17615096,
  "trx_in_block": 21,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:16:09",
  "op": [
    "comment",
    {
      "parent_author": "",
      "parent_permlink": "artifitialintelligence",
      "author": "fesan81",
      "permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "title": "From classic AI techniques to Deep Reinforcement Learning",
      "body": "![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg)\nBuilding machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950].\n\nThe fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data.\n\nDeep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning:\n\nMachine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. \n\n![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg)\nMoreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998].\n\nKnowledge discovery with efficient algorithms for unsupervised or supervised feature learning\n\nDeep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning.\n\nOne of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943]\n\n“Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002]\n\nAlso inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited.\n\nIn another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP)\n\nBellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA)\n\nFinally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.*\n\nIntegration of reasoning techniques with deep learning\n\nDeep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning.\nInitially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) .\n\nPerceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link.\n\nSince MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before.\n\nIn Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning.\n\nScientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision.\n\nSystems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results.\n\nLater AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016]\n\nBibliography\n\n1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA.\n2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press.\n3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149.\n4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649).\n5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999\n6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009\n7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649).\n8. Hebb, D. O. (1949). The organization of behavior. Wiley.\n9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \\wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61.\n10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272.\n11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105).\n12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444.\n13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551.\n14. LeCun, Y., Bottou, L., Bengio, Y., & H\nner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323\n15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163).\n16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002)\n19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602.\n20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408.\n21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536.\n22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997.\n23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489.\n24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39.\n25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press.\n26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.",
      "json_metadata": "{\"tags\":[\"artifitialintelligence\",\"datascience\",\"computerscience\",\"deeplearning\",\"reinforcementlearning\"],\"image\":[\"https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg\",\"https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
    }
  ]
}
2017/11/28 09:50:03
voterfesan81
authorfesan81
permlinkfrom-classic-ai-techniques-to-deep-reinforcement-learning
weight10000 (100.00%)
Transaction InfoBlock #17614575/Trx 41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc
View Raw JSON Data
{
  "trx_id": "41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc",
  "block": 17614575,
  "trx_in_block": 11,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T09:50:03",
  "op": [
    "vote",
    {
      "voter": "fesan81",
      "author": "fesan81",
      "permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
      "weight": 10000
    }
  ]
}
2017/11/28 09:50:03
authorfesan81
permlinkfrom-classic-ai-techniques-to-deep-reinforcement-learning
max accepted payout1000000.000 SBD
percent steem dollars0
allow votestrue
allow curation rewardstrue
extensions[]
Transaction InfoBlock #17614575/Trx 41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc
View Raw JSON Data
{
  "trx_id": "41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc",
  "block": 17614575,
  "trx_in_block": 11,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T09:50:03",
  "op": [
    "comment_options",
    {
      "author": "fesan81",
      "permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
      "max_accepted_payout": "1000000.000 SBD",
      "percent_steem_dollars": 0,
      "allow_votes": true,
      "allow_curation_rewards": true,
      "extensions": []
    }
  ]
}
2017/11/28 09:50:03
parent author
parent permlinkartifitialintelligence
authorfesan81
permlinkfrom-classic-ai-techniques-to-deep-reinforcement-learning
titleFrom classic AI techniques to Deep Reinforcement Learning
body![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg) Building machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950]. The fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data. Deep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning: Machine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. ![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg) Moreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998]. Knowledge discovery with efficient algorithms for unsupervised or supervised feature learning Deep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning. One of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943] “Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002] Also inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited. In another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP) Bellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA) Finally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.* Integration of reasoning techniques with deep learning Deep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning. Initially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) . Perceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link. Since MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before. In Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning. Scientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision. Systems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results. Later AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016] Bibliography 1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA. 2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press. 3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149. 4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649). 5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999 6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009 7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649). 8. Hebb, D. O. (1949). The organization of behavior. Wiley. 9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61. 10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272. 11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105). 12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444. 13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551. 14. LeCun, Y., Bottou, L., Bengio, Y., & H ner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323 15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163). 16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002) 19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602. 20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408. 21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536. 22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997. 23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489. 24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39. 25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press. 26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.
json metadata{"tags":["artifitialintelligence","datascience","computerscience","deeplearning","reinforcementlearning"],"image":["https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg","https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg"],"app":"steemit/0.1","format":"markdown"}
Transaction InfoBlock #17614575/Trx 41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc
View Raw JSON Data
{
  "trx_id": "41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc",
  "block": 17614575,
  "trx_in_block": 11,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T09:50:03",
  "op": [
    "comment",
    {
      "parent_author": "",
      "parent_permlink": "artifitialintelligence",
      "author": "fesan81",
      "permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
      "title": "From classic AI techniques to Deep Reinforcement Learning",
      "body": "![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg)\nBuilding machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950].\n\nThe fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data.\n\nDeep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning:\n\nMachine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. \n\n![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg)\nMoreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998].\n\nKnowledge discovery with efficient algorithms for unsupervised or supervised feature learning\n\nDeep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning.\n\nOne of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943]\n\n“Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002]\n\nAlso inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited.\n\nIn another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP)\n\nBellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA)\n\nFinally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.*\n\nIntegration of reasoning techniques with deep learning\n\nDeep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning.\nInitially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) .\n\nPerceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link.\n\nSince MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before.\n\nIn Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning.\n\nScientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision.\n\nSystems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results.\n\nLater AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016]\n\nBibliography\n\n1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA.\n2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press.\n3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149.\n4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649).\n5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999\n6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009\n7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649).\n8. Hebb, D. O. (1949). The organization of behavior. Wiley.\n9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \\wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61.\n10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272.\n11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105).\n12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444.\n13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551.\n14. LeCun, Y., Bottou, L., Bengio, Y., & H\nner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323\n15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163).\n16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002)\n19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602.\n20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408.\n21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536.\n22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997.\n23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489.\n24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39.\n25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press.\n26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.",
      "json_metadata": "{\"tags\":[\"artifitialintelligence\",\"datascience\",\"computerscience\",\"deeplearning\",\"reinforcementlearning\"],\"image\":[\"https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg\",\"https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
    }
  ]
}
steemcreated a new account: @fesan81
2017/11/28 09:43:48
fee0.500 STEEM
delegation57000.000000 VESTS
creatorsteem
new account namefesan81
owner{"weight_threshold":1,"account_auths":[],"key_auths":[["STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",1]]}
active{"weight_threshold":1,"account_auths":[],"key_auths":[["STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",1]]}
posting{"weight_threshold":1,"account_auths":[],"key_auths":[["STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",1]]}
memo keySTM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk
json metadata
extensions[]
Transaction InfoBlock #17614450/Trx 2fb3f31aec55bba631571a17577acc550ea65f36
View Raw JSON Data
{
  "trx_id": "2fb3f31aec55bba631571a17577acc550ea65f36",
  "block": 17614450,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T09:43:48",
  "op": [
    "account_create_with_delegation",
    {
      "fee": "0.500 STEEM",
      "delegation": "57000.000000 VESTS",
      "creator": "steem",
      "new_account_name": "fesan81",
      "owner": {
        "weight_threshold": 1,
        "account_auths": [],
        "key_auths": [
          [
            "STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",
            1
          ]
        ]
      },
      "active": {
        "weight_threshold": 1,
        "account_auths": [],
        "key_auths": [
          [
            "STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",
            1
          ]
        ]
      },
      "posting": {
        "weight_threshold": 1,
        "account_auths": [],
        "key_auths": [
          [
            "STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",
            1
          ]
        ]
      },
      "memo_key": "STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk",
      "json_metadata": "",
      "extensions": []
    }
  ]
}

Account Metadata

POSTING JSON METADATA
None
JSON METADATA
None
{
  "posting_json_metadata": {},
  "json_metadata": {}
}

Auth Keys

Owner
Single Signature
Public Keys
STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb1/1
Active
Single Signature
Public Keys
STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F1/1
Posting
Single Signature
Public Keys
STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc1/1
Memo
STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk
{
  "owner": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",
        1
      ]
    ]
  },
  "active": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",
        1
      ]
    ]
  },
  "posting": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",
        1
      ]
    ]
  },
  "memo": "STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk"
}

Witness Votes

0 / 30
No active witness votes.
[]