fesan81 Steem Profile | Ecosynthesizer

@fesan81

steemit.com/@fesan81

VOTING POWER100.00%

DOWNVOTE POWER100.00%

RESOURCE CREDITS100.00%

REPUTATION PROGRESS0.00%

Net Worth

0.037USD

STEEM

0.000STEEM

SBD

0.000SBD

Effective Power

5.007SP

├── Own SP

0.631SP

└── Incoming DelegationsDeleg

+4.376SP

Detailed Balance

STEEM
balance	0.000STEEM	STEEM
market_balance	0.000STEEM	STEEM
savings_balance	0.000STEEM	STEEM
reward_steem_balance	0.000STEEM	STEEM
STEEM POWER
Own SP	0.631SP	SP
Delegated Out	0.000SP	SP
Delegation In	4.376SP	SP
Effective Power	5.007SP	SP
Reward SP (pending)	0.000SP	SP
SBD
sbd_balance	0.000SBD	SBD
sbd_conversions	0.000SBD	SBD
sbd_market_balance	0.000SBD	SBD
savings_sbd_balance	0.000SBD	SBD
reward_sbd_balance	0.000SBD	SBD

{
  "balance": "0.000 STEEM",
  "savings_balance": "0.000 STEEM",
  "reward_steem_balance": "0.000 STEEM",
  "vesting_shares": "1026.318903 VESTS",
  "delegated_vesting_shares": "0.000000 VESTS",
  "received_vesting_shares": "7117.340903 VESTS",
  "sbd_balance": "0.000 SBD",
  "savings_sbd_balance": "0.000 SBD",
  "reward_sbd_balance": "0.000 SBD",
  "conversions": []
}

Account Info

name	fesan81
id	466352
rank	1,358,003
reputation	130516081
created	2017-11-28T09:43:48
recovery_account	steem
proxy	None
post_count	3
comment_count	0
lifetime_vote_count	0
witnesses_voted_for	0
last_post	2017-11-28T10:29:03
last_root_post	2017-11-28T10:29:03
last_vote_time	2017-11-28T10:29:03
proxied_vsf_votes	0, 0, 0, 0
can_vote	1
voting_power	0
delayed_votes	0
balance	0.000 STEEM
savings_balance	0.000 STEEM
sbd_balance	0.000 SBD
savings_sbd_balance	0.000 SBD
vesting_shares	1026.318903 VESTS
delegated_vesting_shares	0.000000 VESTS
received_vesting_shares	7117.340903 VESTS
reward_vesting_balance	0.000000 VESTS
vesting_balance	0.000 STEEM
vesting_withdraw_rate	0.000000 VESTS
next_vesting_withdrawal	1969-12-31T23:59:59
withdrawn	0
to_withdraw	0
withdraw_routes	0
savings_withdraw_requests	0
last_account_recovery	1970-01-01T00:00:00
reset_account	null
last_owner_update	1970-01-01T00:00:00
last_account_update	1970-01-01T00:00:00
mined	No
sbd_seconds	0
sbd_last_interest_payment	1970-01-01T00:00:00
savings_sbd_last_interest_payment	1970-01-01T00:00:00

{
  "id": 466352,
  "name": "fesan81",
  "owner": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",
        1
      ]
    ]
  },
  "active": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",
        1
      ]
    ]
  },
  "posting": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",
        1
      ]
    ]
  },
  "memo_key": "STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk",
  "json_metadata": "",
  "posting_json_metadata": "",
  "proxy": "",
  "last_owner_update": "1970-01-01T00:00:00",
  "last_account_update": "1970-01-01T00:00:00",
  "created": "2017-11-28T09:43:48",
  "mined": false,
  "recovery_account": "steem",
  "last_account_recovery": "1970-01-01T00:00:00",
  "reset_account": "null",
  "comment_count": 0,
  "lifetime_vote_count": 0,
  "post_count": 3,
  "can_vote": true,
  "voting_manabar": {
    "current_mana": "8143659806",
    "last_update_time": 1779063441
  },
  "downvote_manabar": {
    "current_mana": 2035914951,
    "last_update_time": 1779063441
  },
  "voting_power": 0,
  "balance": "0.000 STEEM",
  "savings_balance": "0.000 STEEM",
  "sbd_balance": "0.000 SBD",
  "sbd_seconds": "0",
  "sbd_seconds_last_update": "1970-01-01T00:00:00",
  "sbd_last_interest_payment": "1970-01-01T00:00:00",
  "savings_sbd_balance": "0.000 SBD",
  "savings_sbd_seconds": "0",
  "savings_sbd_seconds_last_update": "1970-01-01T00:00:00",
  "savings_sbd_last_interest_payment": "1970-01-01T00:00:00",
  "savings_withdraw_requests": 0,
  "reward_sbd_balance": "0.000 SBD",
  "reward_steem_balance": "0.000 STEEM",
  "reward_vesting_balance": "0.000000 VESTS",
  "reward_vesting_steem": "0.000 STEEM",
  "vesting_shares": "1026.318903 VESTS",
  "delegated_vesting_shares": "0.000000 VESTS",
  "received_vesting_shares": "7117.340903 VESTS",
  "vesting_withdraw_rate": "0.000000 VESTS",
  "next_vesting_withdrawal": "1969-12-31T23:59:59",
  "withdrawn": 0,
  "to_withdraw": 0,
  "withdraw_routes": 0,
  "curation_rewards": 0,
  "posting_rewards": 0,
  "proxied_vsf_votes": [
    0,
    0,
    0,
    0
  ],
  "witnesses_voted_for": 0,
  "last_post": "2017-11-28T10:29:03",
  "last_root_post": "2017-11-28T10:29:03",
  "last_vote_time": "2017-11-28T10:29:03",
  "post_bandwidth": 0,
  "pending_claimed_accounts": 0,
  "vesting_balance": "0.000 STEEM",
  "reputation": 130516081,
  "transfer_history": [],
  "market_history": [],
  "post_history": [],
  "vote_history": [],
  "other_history": [],
  "witness_votes": [],
  "tags_usage": [],
  "guest_bloggers": [],
  "rank": 1358003
}

Withdraw Routes

Incoming	Outgoing
Empty	Empty

{
  "incoming": [],
  "outgoing": []
}

From Date

To Date

steemdelegated 4.376 SP to @fesan81

2026/05/18 00:17:21 UTC

106,143,492|4aea0d1

delegator	steem
delegatee	fesan81
vesting shares	7117.340903 VESTS
Transaction Info	Block #106143492/Trx 4aea0d1b751e3fd7542ddbbdf903b4cbe1a8d516

View Raw JSON Data

{
  "trx_id": "4aea0d1b751e3fd7542ddbbdf903b4cbe1a8d516",
  "block": 106143492,
  "trx_in_block": 3,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-05-18T00:17:21",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "7117.340903 VESTS"
    }
  ]
}

steemdelegated 2.708 SP to @fesan81

2026/05/12 03:59:51 UTC

105,975,899|2b1e16f

delegator	steem
delegatee	fesan81
vesting shares	4405.130498 VESTS
Transaction Info	Block #105975899/Trx 2b1e16f8356838e4d548082239de9faa053b2dc7

View Raw JSON Data

{
  "trx_id": "2b1e16f8356838e4d548082239de9faa053b2dc7",
  "block": 105975899,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-05-12T03:59:51",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "4405.130498 VESTS"
    }
  ]
}

steemdelegated 4.383 SP to @fesan81

2026/04/25 23:38:12 UTC

105,511,142|4782284

delegator	steem
delegatee	fesan81
vesting shares	7129.856659 VESTS
Transaction Info	Block #105511142/Trx 478228483a50204438f57ee36636053543c29abb

View Raw JSON Data

{
  "trx_id": "478228483a50204438f57ee36636053543c29abb",
  "block": 105511142,
  "trx_in_block": 0,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-04-25T23:38:12",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "7129.856659 VESTS"
    }
  ]
}

steemdelegated 2.734 SP to @fesan81

2026/01/23 07:54:00 UTC

102,851,927|881c756

delegator	steem
delegatee	fesan81
vesting shares	4446.677317 VESTS
Transaction Info	Block #102851927/Trx 881c756278aea13ff5bf4877e6a4b75070d6e430

View Raw JSON Data

{
  "trx_id": "881c756278aea13ff5bf4877e6a4b75070d6e430",
  "block": 102851927,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2026-01-23T07:54:00",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "4446.677317 VESTS"
    }
  ]
}

curtisjohnsreplied to @fesan81 / srxfha

2025/02/19 10:51:12 UTC

93,145,911|cbaf9f4

parent author	fesan81
parent permlink	from-classic-ai-techniques-to-deep-reinforcement-learning
author	curtisjohns
permlink	srxfha
title
body	The evolution of AI, from classic techniques to the advanced realm of Deep Reinforcement Learning, has been nothing short of revolutionary. These advancements are reshaping industries and creating new opportunities for innovation. For professionals looking to stay ahead, [Artificial Intelligence (AI) Training Programs in USA: Short Courses in AI for Managers in Las Vegas, Nevada, USA](https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/) offer a perfect blend of foundational knowledge and cutting-edge insights. Such programs are essential for managers aiming to harness AI's potential and lead their teams into the future.
json metadata	{"links":["https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/"],"app":"steemit/0.2"}
Transaction Info	Block #93145911/Trx cbaf9f49b35fdf6f1bf1f9ada50bfbb92584c1e2

View Raw JSON Data

{
  "trx_id": "cbaf9f49b35fdf6f1bf1f9ada50bfbb92584c1e2",
  "block": 93145911,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2025-02-19T10:51:12",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "curtisjohns",
      "permlink": "srxfha",
      "title": "",
      "body": "The evolution of AI, from classic techniques to the advanced realm of Deep Reinforcement Learning, has been nothing short of revolutionary. These advancements are reshaping industries and creating new opportunities for innovation. For professionals looking to stay ahead, [Artificial Intelligence (AI) Training Programs in USA: Short Courses in AI for Managers in Las Vegas, Nevada, USA](https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/) offer a perfect blend of foundational knowledge and cutting-edge insights. Such programs are essential for managers aiming to harness AI's potential and lead their teams into the future.",
      "json_metadata": "{\"links\":[\"https://www.iim-edu.org/managementtrainingcoursesusa/nevada-las-vegas-nv/\"],\"app\":\"steemit/0.2\"}"
    }
  ]
}

steemdelegated 2.835 SP to @fesan81

2024/12/17 03:13:03 UTC

91,298,335|35c8ead

delegator	steem
delegatee	fesan81
vesting shares	4610.896514 VESTS
Transaction Info	Block #91298335/Trx 35c8ead72b0fdd36795fbf9255ead640917b5a40

View Raw JSON Data

{
  "trx_id": "35c8ead72b0fdd36795fbf9255ead640917b5a40",
  "block": 91298335,
  "trx_in_block": 3,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2024-12-17T03:13:03",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "4610.896514 VESTS"
    }
  ]
}

steemdelegated 2.939 SP to @fesan81

2023/11/13 18:55:48 UTC

79,852,533|037986a

delegator	steem
delegatee	fesan81
vesting shares	4780.030046 VESTS
Transaction Info	Block #79852533/Trx 037986a84a0673bcab0a9f219e2d6f62fb34bd0d

View Raw JSON Data

{
  "trx_id": "037986a84a0673bcab0a9f219e2d6f62fb34bd0d",
  "block": 79852533,
  "trx_in_block": 0,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2023-11-13T18:55:48",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "4780.030046 VESTS"
    }
  ]
}

steemdelegated 4.744 SP to @fesan81

2023/09/21 21:52:15 UTC

78,347,876|adae44d

delegator	steem
delegatee	fesan81
vesting shares	7717.308832 VESTS
Transaction Info	Block #78347876/Trx adae44dc781faef55a299f48f95fe0022f1d24f6

View Raw JSON Data

{
  "trx_id": "adae44dc781faef55a299f48f95fe0022f1d24f6",
  "block": 78347876,
  "trx_in_block": 2,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2023-09-21T21:52:15",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "7717.308832 VESTS"
    }
  ]
}

steemdelegated 4.881 SP to @fesan81

2022/11/03 11:39:42 UTC

69,113,210|0ff9088

delegator	steem
delegatee	fesan81
vesting shares	7938.990270 VESTS
Transaction Info	Block #69113210/Trx 0ff9088204ae0c3dbb8056bc3de7d92631fccd08

View Raw JSON Data

{
  "trx_id": "0ff9088204ae0c3dbb8056bc3de7d92631fccd08",
  "block": 69113210,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2022-11-03T11:39:42",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "7938.990270 VESTS"
    }
  ]
}

steemdelegated 5.016 SP to @fesan81

2022/01/17 10:55:51 UTC

60,809,381|c9a210a

delegator	steem
delegatee	fesan81
vesting shares	8159.523501 VESTS
Transaction Info	Block #60809381/Trx c9a210a64e4a04873b39b8fb965864b18451001a

View Raw JSON Data

{
  "trx_id": "c9a210a64e4a04873b39b8fb965864b18451001a",
  "block": 60809381,
  "trx_in_block": 18,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2022-01-17T10:55:51",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8159.523501 VESTS"
    }
  ]
}

steemdelegated 5.129 SP to @fesan81

2021/06/14 00:51:06 UTC

54,607,769|a8f4abf

delegator	steem
delegatee	fesan81
vesting shares	8343.292159 VESTS
Transaction Info	Block #54607769/Trx a8f4abf39b60cb8cf9cf21109e1b6da0cf76291c

View Raw JSON Data

{
  "trx_id": "a8f4abf39b60cb8cf9cf21109e1b6da0cf76291c",
  "block": 54607769,
  "trx_in_block": 1,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2021-06-14T00:51:06",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8343.292159 VESTS"
    }
  ]
}

steemdelegated 5.245 SP to @fesan81

2020/12/11 11:09:39 UTC

49,355,222|940df32

delegator	steem
delegatee	fesan81
vesting shares	8530.714133 VESTS
Transaction Info	Block #49355222/Trx 940df32535961a20ee428af7e48845661f172607

View Raw JSON Data

{
  "trx_id": "940df32535961a20ee428af7e48845661f172607",
  "block": 49355222,
  "trx_in_block": 7,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-12-11T11:09:39",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8530.714133 VESTS"
    }
  ]
}

steemdelegated 1.176 SP to @fesan81

2020/12/06 04:46:57 UTC

49,206,788|1230456

delegator	steem
delegatee	fesan81
vesting shares	1912.543513 VESTS
Transaction Info	Block #49206788/Trx 12304560b06a1623a9baf9affde5198c7027c8e0

View Raw JSON Data

{
  "trx_id": "12304560b06a1623a9baf9affde5198c7027c8e0",
  "block": 49206788,
  "trx_in_block": 2,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-12-06T04:46:57",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "1912.543513 VESTS"
    }
  ]
}

steemdelegated 5.248 SP to @fesan81

2020/12/05 14:47:51 UTC

49,190,319|a15087d

delegator	steem
delegatee	fesan81
vesting shares	8536.921987 VESTS
Transaction Info	Block #49190319/Trx a15087d90c575f6bf0e74ea7e896319c3fbf4277

View Raw JSON Data

{
  "trx_id": "a15087d90c575f6bf0e74ea7e896319c3fbf4277",
  "block": 49190319,
  "trx_in_block": 6,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-12-05T14:47:51",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8536.921987 VESTS"
    }
  ]
}

steemdelegated 1.180 SP to @fesan81

2020/11/02 15:43:21 UTC

48,257,899|5ea6e20

delegator	steem
delegatee	fesan81
vesting shares	1920.017158 VESTS
Transaction Info	Block #48257899/Trx 5ea6e20a2a7debb55b3f227c1b8330d589a02b37

View Raw JSON Data

{
  "trx_id": "5ea6e20a2a7debb55b3f227c1b8330d589a02b37",
  "block": 48257899,
  "trx_in_block": 0,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-11-02T15:43:21",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "1920.017158 VESTS"
    }
  ]
}

steemdelegated 5.373 SP to @fesan81

2020/05/09 05:44:15 UTC

43,217,034|aa9fa93

delegator	steem
delegatee	fesan81
vesting shares	8739.727346 VESTS
Transaction Info	Block #43217034/Trx aa9fa936b5b0aea4ea795102e47d710e94a01695

View Raw JSON Data

{
  "trx_id": "aa9fa936b5b0aea4ea795102e47d710e94a01695",
  "block": 43217034,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-05-09T05:44:15",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8739.727346 VESTS"
    }
  ]
}

steemdelegated 1.201 SP to @fesan81

2020/05/08 09:20:45 UTC

43,193,138|0154e0a

delegator	steem
delegatee	fesan81
vesting shares	1953.311140 VESTS
Transaction Info	Block #43193138/Trx 0154e0a16af12532e1cb31212c847e614a6e66e1

View Raw JSON Data

{
  "trx_id": "0154e0a16af12532e1cb31212c847e614a6e66e1",
  "block": 43193138,
  "trx_in_block": 16,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-05-08T09:20:45",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "1953.311140 VESTS"
    }
  ]
}

steemdelegated 5.381 SP to @fesan81

2020/04/15 21:37:12 UTC

42,562,566|acc89f0

delegator	steem
delegatee	fesan81
vesting shares	8752.704765 VESTS
Transaction Info	Block #42562566/Trx acc89f09e3fb23df53a682f6f9989b3982fd3d31

View Raw JSON Data

{
  "trx_id": "acc89f09e3fb23df53a682f6f9989b3982fd3d31",
  "block": 42562566,
  "trx_in_block": 15,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2020-04-15T21:37:12",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8752.704765 VESTS"
    }
  ]
}

steemitboardreplied to @fesan81 / steemitboard-notify-fesan81-20191128t101545000z

2019/11/28 10:15:45 UTC

38,567,210|e1b1292

parent author	fesan81
parent permlink	2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
author	steemitboard
permlink	steemitboard-notify-fesan81-20191128t101545000z
title
body	Congratulations @fesan81! You received a personal award! <table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday2.png</td><td>Happy Birthday! - You are on the Steem blockchain for 2 years!</td></tr></table> <sub>_You can view [your badges on your Steem Board](https://steemitboard.com/@fesan81) and compare to others on the [Steem Ranking](https://steemitboard.com/ranking/index.php?name=fesan81)_</sub> ###### [Vote for @Steemitboard as a witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) to get one more award and increased upvotes!
json metadata	{"image":["https://steemitboard.com/img/notify.png"]}
Transaction Info	Block #38567210/Trx e1b12920842a4da876ca2e70302909b9e3417dc4

View Raw JSON Data

{
  "trx_id": "e1b12920842a4da876ca2e70302909b9e3417dc4",
  "block": 38567210,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2019-11-28T10:15:45",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "steemitboard",
      "permlink": "steemitboard-notify-fesan81-20191128t101545000z",
      "title": "",
      "body": "Congratulations @fesan81! You received a personal award!\n\n<table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday2.png</td><td>Happy Birthday! - You are on the Steem blockchain for 2 years!</td></tr></table>\n\n<sub>_You can view [your badges on your Steem Board](https://steemitboard.com/@fesan81) and compare to others on the [Steem Ranking](https://steemitboard.com/ranking/index.php?name=fesan81)_</sub>\n\n\n###### [Vote for @Steemitboard as a witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) to get one more award and increased upvotes!",
      "json_metadata": "{\"image\":[\"https://steemitboard.com/img/notify.png\"]}"
    }
  ]
}

steemdelegated 5.501 SP to @fesan81

2019/05/12 14:52:06 UTC

32,845,445|fc02bcb

delegator	steem
delegatee	fesan81
vesting shares	8948.327570 VESTS
Transaction Info	Block #32845445/Trx fc02bcb8c077d51409bfdf1481c27f81d4d51d84

View Raw JSON Data

{
  "trx_id": "fc02bcb8c077d51409bfdf1481c27f81d4d51d84",
  "block": 32845445,
  "trx_in_block": 10,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2019-05-12T14:52:06",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "8948.327570 VESTS"
    }
  ]
}

steemitboardreplied to @fesan81 / steemitboard-notify-fesan81-20181128t181157000z

2018/11/28 18:11:57 UTC

28,102,358|e25fd61

parent author	fesan81
parent permlink	2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
author	steemitboard
permlink	steemitboard-notify-fesan81-20181128t181157000z
title
body	Congratulations @fesan81! You received a personal award! <table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday1.png</td><td>1 Year on Steemit</td></tr></table> <sub>_[Click here to view your Board of Honor](https://steemitboard.com/@fesan81)_</sub> > Support [SteemitBoard's project](https://steemit.com/@steemitboard)! [Vote for its witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1) and get one more award!
json metadata	{"image":["https://steemitboard.com/img/notify.png"]}
Transaction Info	Block #28102358/Trx e25fd613b1c01eb8c06ed8f6ae6ca04eba85675b

View Raw JSON Data

{
  "trx_id": "e25fd613b1c01eb8c06ed8f6ae6ca04eba85675b",
  "block": 28102358,
  "trx_in_block": 9,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-11-28T18:11:57",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "steemitboard",
      "permlink": "steemitboard-notify-fesan81-20181128t181157000z",
      "title": "",
      "body": "Congratulations @fesan81! You received a personal award!\n\n<table><tr><td>https://steemitimages.com/70x70/http://steemitboard.com/@fesan81/birthday1.png</td><td>1 Year on Steemit</td></tr></table>\n\n<sub>_[Click here to view your Board of Honor](https://steemitboard.com/@fesan81)_</sub>\n\n\n> Support [SteemitBoard's project](https://steemit.com/@steemitboard)! **[Vote for its witness](https://v2.steemconnect.com/sign/account-witness-vote?witness=steemitboard&approve=1)** and **get one more award**!",
      "json_metadata": "{\"image\":[\"https://steemitboard.com/img/notify.png\"]}"
    }
  ]
}

steemdelegated 5.624 SP to @fesan81

2018/05/16 20:17:12 UTC

22,489,845|9c62476

delegator	steem
delegatee	fesan81
vesting shares	9147.880005 VESTS
Transaction Info	Block #22489845/Trx 9c624768b6f900c7305367d53c22f378917890e7

View Raw JSON Data

{
  "trx_id": "9c624768b6f900c7305367d53c22f378917890e7",
  "block": 22489845,
  "trx_in_block": 27,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-05-16T20:17:12",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "9147.880005 VESTS"
    }
  ]
}

steemdelegated 18.158 SP to @fesan81

2018/04/21 20:42:54 UTC

21,771,146|3f25b7c

delegator	steem
delegatee	fesan81
vesting shares	29535.849045 VESTS
Transaction Info	Block #21771146/Trx 3f25b7c23f8d3b576ee414d1b3115663303993af

View Raw JSON Data

{
  "trx_id": "3f25b7c23f8d3b576ee414d1b3115663303993af",
  "block": 21771146,
  "trx_in_block": 13,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2018-04-21T20:42:54",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "29535.849045 VESTS"
    }
  ]
}

steemdelegated 18.284 SP to @fesan81

2017/12/12 22:19:57 UTC

18,032,582|8718804

delegator	steem
delegatee	fesan81
vesting shares	29739.681097 VESTS
Transaction Info	Block #18032582/Trx 871880414549d43d34f7037684c3102ab8d07915

View Raw JSON Data

{
  "trx_id": "871880414549d43d34f7037684c3102ab8d07915",
  "block": 18032582,
  "trx_in_block": 16,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-12-12T22:19:57",
  "op": [
    "delegate_vesting_shares",
    {
      "delegator": "steem",
      "delegatee": "fesan81",
      "vesting_shares": "29739.681097 VESTS"
    }
  ]
}

neurallearnerreplied to @fesan81 / re-fesan81-2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning-20171130t150626925z

2017/11/30 15:06:36 UTC

17,678,489|280064f

parent author	fesan81
parent permlink	2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
author	neurallearner
permlink	re-fesan81-2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning-20171130t150626925z
title
body	Hey come and give a shot and see how much you know about deep-learning by explaining the [joke](https://steemit.com/ai/@neurallearner/ai-joke-activities) and at the same time promote learning AI.
json metadata	{"tags":["artifitialintelligence"],"links":["https://steemit.com/ai/@neurallearner/ai-joke-activities"],"app":"steemit/0.1"}
Transaction Info	Block #17678489/Trx 280064fbc1e023a86819c80863d819b66e84325f

View Raw JSON Data

{
  "trx_id": "280064fbc1e023a86819c80863d819b66e84325f",
  "block": 17678489,
  "trx_in_block": 15,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-30T15:06:36",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "neurallearner",
      "permlink": "re-fesan81-2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning-20171130t150626925z",
      "title": "",
      "body": "Hey come and give a shot and see how much you know about deep-learning by explaining the [joke](https://steemit.com/ai/@neurallearner/ai-joke-activities) and at the same time promote learning AI.",
      "json_metadata": "{\"tags\":[\"artifitialintelligence\"],\"links\":[\"https://steemit.com/ai/@neurallearner/ai-joke-activities\"],\"app\":\"steemit/0.1\"}"
    }
  ]
}

steemitboardreplied to @fesan81 / steemitboard-notify-fesan81-20171128t121556000z

2017/11/28 12:15:54 UTC

17,617,488|d80d9da

parent author	fesan81
parent permlink	2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
author	steemitboard
permlink	steemitboard-notify-fesan81-20171128t121556000z
title
body	Congratulations @fesan81! You have completed some achievement on Steemit and have been rewarded with new badge(s) : [![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstpost.png)](http://steemitboard.com/@fesan81) You published your First Post [![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstvote.png)](http://steemitboard.com/@fesan81) You made your First Vote [![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstvoted.png)](http://steemitboard.com/@fesan81) You got a First Vote Click on any badge to view your own Board of Honor on SteemitBoard. For more information about SteemitBoard, click [here](https://steemit.com/@steemitboard) If you no longer want to receive notifications, reply to this comment with the word `STOP` > By upvoting this notification, you can help all Steemit users. Learn how [here](https://steemit.com/steemitboard/@steemitboard/http-i-cubeupload-com-7ciqeo-png)!
json metadata	{"image":["https://steemitboard.com/img/notifications.png"]}
Transaction Info	Block #17617488/Trx d80d9da4e0af2ee834592b3838f3be253d89df95

View Raw JSON Data

{
  "trx_id": "d80d9da4e0af2ee834592b3838f3be253d89df95",
  "block": 17617488,
  "trx_in_block": 20,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T12:15:54",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "steemitboard",
      "permlink": "steemitboard-notify-fesan81-20171128t121556000z",
      "title": "",
      "body": "Congratulations @fesan81! You have completed some achievement on Steemit and have been rewarded with new badge(s) :\n\n[![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstpost.png)](http://steemitboard.com/@fesan81) You published your First Post\n[![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstvote.png)](http://steemitboard.com/@fesan81) You made your First Vote\n[![](https://steemitimages.com/70x80/http://steemitboard.com/notifications/firstvoted.png)](http://steemitboard.com/@fesan81) You got a First Vote\n\nClick on any badge to view your own Board of Honor on SteemitBoard.\nFor more information about SteemitBoard, click [here](https://steemit.com/@steemitboard)\n\nIf you no longer want to receive notifications, reply to this comment with the word `STOP`\n\n> By upvoting this notification, you can help all Steemit users. Learn how [here](https://steemit.com/steemitboard/@steemitboard/http-i-cubeupload-com-7ciqeo-png)!",
      "json_metadata": "{\"image\":[\"https://steemitboard.com/img/notifications.png\"]}"
    }
  ]
}

cheetahreplied to @fesan81 / cheetah-re-fesan812jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 10:29:24 UTC

17,615,361|f8ad938

parent author	fesan81
parent permlink	2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
author	cheetah
permlink	cheetah-re-fesan812jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
title
body	Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in: https://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578
json metadata
Transaction Info	Block #17615361/Trx f8ad938a0fe5df8e083e06de25a60f0c70b3d833

View Raw JSON Data

{
  "trx_id": "f8ad938a0fe5df8e083e06de25a60f0c70b3d833",
  "block": 17615361,
  "trx_in_block": 9,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:29:24",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "cheetah",
      "permlink": "cheetah-re-fesan812jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "title": "",
      "body": "Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in:\nhttps://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578",
      "json_metadata": ""
    }
  ]
}

cheetahupvoted (0.08%) @fesan81 / 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 10:29:21 UTC

17,615,360|bc2eaef

voter	cheetah
author	fesan81
permlink	2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
weight	8 (0.08%)
Transaction Info	Block #17615360/Trx bc2eaef723a0100a5b96c3fcec6ab7fcff1bb367

View Raw JSON Data

{
  "trx_id": "bc2eaef723a0100a5b96c3fcec6ab7fcff1bb367",
  "block": 17615360,
  "trx_in_block": 2,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:29:21",
  "op": [
    "vote",
    {
      "voter": "cheetah",
      "author": "fesan81",
      "permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "weight": 8
    }
  ]
}

fesan81upvoted (100.00%) @fesan81 / 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 10:29:03 UTC

17,615,354|17ef38e

voter	fesan81
author	fesan81
permlink	2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
weight	10000 (100.00%)
Transaction Info	Block #17615354/Trx 17ef38ed1d9794d07176db8f7eaee5c72e655452

View Raw JSON Data

{
  "trx_id": "17ef38ed1d9794d07176db8f7eaee5c72e655452",
  "block": 17615354,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:29:03",
  "op": [
    "vote",
    {
      "voter": "fesan81",
      "author": "fesan81",
      "permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "weight": 10000
    }
  ]
}

fesan81updated options for 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 10:29:03 UTC

17,615,354|17ef38e

author	fesan81
permlink	2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
max accepted payout	1000000.000 SBD
percent steem dollars	0
allow votes	true
allow curation rewards	true
extensions	[]
Transaction Info	Block #17615354/Trx 17ef38ed1d9794d07176db8f7eaee5c72e655452

View Raw JSON Data

{
  "trx_id": "17ef38ed1d9794d07176db8f7eaee5c72e655452",
  "block": 17615354,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:29:03",
  "op": [
    "comment_options",
    {
      "author": "fesan81",
      "permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "max_accepted_payout": "1000000.000 SBD",
      "percent_steem_dollars": 0,
      "allow_votes": true,
      "allow_curation_rewards": true,
      "extensions": []
    }
  ]
}

fesan81published a new post: 2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 10:29:03 UTC

17,615,354|17ef38e

parent author
parent permlink	artifitialintelligence
author	fesan81
permlink	2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning
title	From classic AI techniques to Deep Reinforcement Learning
body	![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg) Building machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950]. The fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data. Deep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning: Machine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. ![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg) Moreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998]. Knowledge discovery with efficient algorithms for unsupervised or supervised feature learning Deep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning. One of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943] “Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002] Also inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited. In another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP) Bellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA) Finally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.* Integration of reasoning techniques with deep learning Deep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning. Initially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) . Perceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link. Since MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before. In Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning. Scientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision. Systems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results. Later AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016] Bibliography 1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA. 2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press. 3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149. 4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649). 5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999 6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009 7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649). 8. Hebb, D. O. (1949). The organization of behavior. Wiley. 9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61. 10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272. 11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105). 12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444. 13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551. 14. LeCun, Y., Bottou, L., Bengio, Y., & H ner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323 15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163). 16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002) 19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602. 20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408. 21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536. 22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997. 23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489. 24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39. 25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press. 26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.
json metadata	{"tags":["artifitialintelligence","datascience","computerscience","deeplearning","reinforcementlearning"],"image":["https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg","https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg"],"app":"steemit/0.1","format":"markdown"}
Transaction Info	Block #17615354/Trx 17ef38ed1d9794d07176db8f7eaee5c72e655452

View Raw JSON Data

{
"trx_id": "17ef38ed1d9794d07176db8f7eaee5c72e655452",
"block": 17615354,
"trx_in_block": 14,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:29:03",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "artifitialintelligence",
"author": "fesan81",
"permlink": "2jyyxt-from-classic-ai-techniques-to-deep-reinforcement-learning",
"title": "From classic AI techniques to Deep Reinforcement Learning",
"body": "![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg)\nBuilding machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950].\n\nThe fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data.\n\nDeep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning:\n\nMachine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. \n\n![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg)\nMoreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998].\n\nKnowledge discovery with efficient algorithms for unsupervised or supervised feature learning\n\nDeep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning.\n\nOne of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943]\n\n“Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002]\n\nAlso inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited.\n\nIn another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP)\n\nBellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA)\n\nFinally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.*\n\nIntegration of reasoning techniques with deep learning\n\nDeep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning.\nInitially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) .\n\nPerceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link.\n\nSince MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before.\n\nIn Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning.\n\nScientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision.\n\nSystems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results.\n\nLater AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016]\n\nBibliography\n\n1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA.\n2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press.\n3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149.\n4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649).\n5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999\n6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009\n7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649).\n8. Hebb, D. O. (1949). The organization of behavior. Wiley.\n9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \\wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61.\n10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272.\n11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105).\n12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444.\n13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551.\n14. LeCun, Y., Bottou, L., Bengio, Y., & H\nner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323\n15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163).\n16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002)\n19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602.\n20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408.\n21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536.\n22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997.\n23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489.\n24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39.\n25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press.\n26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.",
"json_metadata": "{\"tags\":[\"artifitialintelligence\",\"datascience\",\"computerscience\",\"deeplearning\",\"reinforcementlearning\"],\"image\":[\"https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg\",\"https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}

cheetahreplied to @fesan81 / cheetah-re-fesan812fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 10:17:36 UTC

17,615,125|0b6dc34

parent author	fesan81
parent permlink	2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
author	cheetah
permlink	cheetah-re-fesan812fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
title
body	Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in: https://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578
json metadata
Transaction Info	Block #17615125/Trx 0b6dc347a3542ade12da9b46a949155aa81000aa

View Raw JSON Data

{
  "trx_id": "0b6dc347a3542ade12da9b46a949155aa81000aa",
  "block": 17615125,
  "trx_in_block": 7,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:17:36",
  "op": [
    "comment",
    {
      "parent_author": "fesan81",
      "parent_permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "author": "cheetah",
      "permlink": "cheetah-re-fesan812fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "title": "",
      "body": "Hi! I am a robot. I just upvoted you! I found similar content that readers might be interested in:\nhttps://towardsdatascience.com/from-classic-ai-techniques-to-deep-learning-753d20cf8578",
      "json_metadata": ""
    }
  ]
}

cheetahupvoted (0.08%) @fesan81 / 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 10:17:06 UTC

17,615,115|5ac607e

voter	cheetah
author	fesan81
permlink	2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
weight	8 (0.08%)
Transaction Info	Block #17615115/Trx 5ac607e7e3075e102926cbf5942cf3a3e30f06a6

View Raw JSON Data

{
  "trx_id": "5ac607e7e3075e102926cbf5942cf3a3e30f06a6",
  "block": 17615115,
  "trx_in_block": 3,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:17:06",
  "op": [
    "vote",
    {
      "voter": "cheetah",
      "author": "fesan81",
      "permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "weight": 8
    }
  ]
}

fesan81upvoted (100.00%) @fesan81 / 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 10:16:09 UTC

17,615,096|e66cbdd

voter	fesan81
author	fesan81
permlink	2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
weight	10000 (100.00%)
Transaction Info	Block #17615096/Trx e66cbdd76ce82ec27cfd1ef45d24a26ac0638586

View Raw JSON Data

{
  "trx_id": "e66cbdd76ce82ec27cfd1ef45d24a26ac0638586",
  "block": 17615096,
  "trx_in_block": 21,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:16:09",
  "op": [
    "vote",
    {
      "voter": "fesan81",
      "author": "fesan81",
      "permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "weight": 10000
    }
  ]
}

fesan81updated options for 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 10:16:09 UTC

17,615,096|e66cbdd

author	fesan81
permlink	2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
max accepted payout	1000000.000 SBD
percent steem dollars	0
allow votes	true
allow curation rewards	true
extensions	[]
Transaction Info	Block #17615096/Trx e66cbdd76ce82ec27cfd1ef45d24a26ac0638586

View Raw JSON Data

{
  "trx_id": "e66cbdd76ce82ec27cfd1ef45d24a26ac0638586",
  "block": 17615096,
  "trx_in_block": 21,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T10:16:09",
  "op": [
    "comment_options",
    {
      "author": "fesan81",
      "permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
      "max_accepted_payout": "1000000.000 SBD",
      "percent_steem_dollars": 0,
      "allow_votes": true,
      "allow_curation_rewards": true,
      "extensions": []
    }
  ]
}

fesan81published a new post: 2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 10:16:09 UTC

17,615,096|e66cbdd

parent author
parent permlink	artifitialintelligence
author	fesan81
permlink	2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning
title	From classic AI techniques to Deep Reinforcement Learning
body	![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg) Building machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950]. The fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data. Deep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning: Machine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. ![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg) Moreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998]. Knowledge discovery with efficient algorithms for unsupervised or supervised feature learning Deep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning. One of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943] “Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002] Also inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited. In another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP) Bellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA) Finally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.* Integration of reasoning techniques with deep learning Deep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning. Initially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) . Perceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link. Since MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before. In Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning. Scientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision. Systems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results. Later AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016] Bibliography 1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA. 2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press. 3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149. 4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649). 5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999 6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009 7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649). 8. Hebb, D. O. (1949). The organization of behavior. Wiley. 9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61. 10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272. 11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105). 12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444. 13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551. 14. LeCun, Y., Bottou, L., Bengio, Y., & H ner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323 15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163). 16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002) 19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602. 20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408. 21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536. 22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997. 23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489. 24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39. 25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press. 26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.
json metadata	{"tags":["artifitialintelligence","datascience","computerscience","deeplearning","reinforcementlearning"],"image":["https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg","https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg"],"app":"steemit/0.1","format":"markdown"}
Transaction Info	Block #17615096/Trx e66cbdd76ce82ec27cfd1ef45d24a26ac0638586

View Raw JSON Data

{
"trx_id": "e66cbdd76ce82ec27cfd1ef45d24a26ac0638586",
"block": 17615096,
"trx_in_block": 21,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T10:16:09",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "artifitialintelligence",
"author": "fesan81",
"permlink": "2fbu4q-from-classic-ai-techniques-to-deep-reinforcement-learning",
"title": "From classic AI techniques to Deep Reinforcement Learning",
"body": "![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg)\nBuilding machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950].\n\nThe fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data.\n\nDeep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning:\n\nMachine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. \n\n![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg)\nMoreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998].\n\nKnowledge discovery with efficient algorithms for unsupervised or supervised feature learning\n\nDeep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning.\n\nOne of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943]\n\n“Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002]\n\nAlso inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited.\n\nIn another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP)\n\nBellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA)\n\nFinally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.*\n\nIntegration of reasoning techniques with deep learning\n\nDeep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning.\nInitially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) .\n\nPerceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link.\n\nSince MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before.\n\nIn Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning.\n\nScientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision.\n\nSystems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results.\n\nLater AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016]\n\nBibliography\n\n1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA.\n2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press.\n3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149.\n4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649).\n5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999\n6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009\n7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649).\n8. Hebb, D. O. (1949). The organization of behavior. Wiley.\n9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \\wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61.\n10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272.\n11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105).\n12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444.\n13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551.\n14. LeCun, Y., Bottou, L., Bengio, Y., & H\nner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323\n15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163).\n16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002)\n19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602.\n20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408.\n21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536.\n22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997.\n23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489.\n24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39.\n25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press.\n26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.",
"json_metadata": "{\"tags\":[\"artifitialintelligence\",\"datascience\",\"computerscience\",\"deeplearning\",\"reinforcementlearning\"],\"image\":[\"https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg\",\"https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}

fesan81upvoted (100.00%) @fesan81 / from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 09:50:03 UTC

17,614,575|41b4ecf

voter	fesan81
author	fesan81
permlink	from-classic-ai-techniques-to-deep-reinforcement-learning
weight	10000 (100.00%)
Transaction Info	Block #17614575/Trx 41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc

View Raw JSON Data

{
  "trx_id": "41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc",
  "block": 17614575,
  "trx_in_block": 11,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T09:50:03",
  "op": [
    "vote",
    {
      "voter": "fesan81",
      "author": "fesan81",
      "permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
      "weight": 10000
    }
  ]
}

fesan81updated options for from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 09:50:03 UTC

17,614,575|41b4ecf

author	fesan81
permlink	from-classic-ai-techniques-to-deep-reinforcement-learning
max accepted payout	1000000.000 SBD
percent steem dollars	0
allow votes	true
allow curation rewards	true
extensions	[]
Transaction Info	Block #17614575/Trx 41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc

View Raw JSON Data

{
  "trx_id": "41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc",
  "block": 17614575,
  "trx_in_block": 11,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T09:50:03",
  "op": [
    "comment_options",
    {
      "author": "fesan81",
      "permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
      "max_accepted_payout": "1000000.000 SBD",
      "percent_steem_dollars": 0,
      "allow_votes": true,
      "allow_curation_rewards": true,
      "extensions": []
    }
  ]
}

fesan81published a new post: from-classic-ai-techniques-to-deep-reinforcement-learning

2017/11/28 09:50:03 UTC

17,614,575|41b4ecf

parent author
parent permlink	artifitialintelligence
author	fesan81
permlink	from-classic-ai-techniques-to-deep-reinforcement-learning
title	From classic AI techniques to Deep Reinforcement Learning
body	![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg) Building machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950]. The fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data. Deep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning: Machine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. ![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg) Moreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998]. Knowledge discovery with efficient algorithms for unsupervised or supervised feature learning Deep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning. One of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943] “Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002] Also inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited. In another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP) Bellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA) Finally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.* Integration of reasoning techniques with deep learning Deep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning. Initially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) . Perceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link. Since MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before. In Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning. Scientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision. Systems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results. Later AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016] Bibliography 1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA. 2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press. 3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149. 4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649). 5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999 6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009 7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649). 8. Hebb, D. O. (1949). The organization of behavior. Wiley. 9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61. 10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272. 11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105). 12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444. 13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551. 14. LeCun, Y., Bottou, L., Bengio, Y., & H ner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323 15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163). 16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133. 18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002) 19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602. 20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408. 21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536. 22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997. 23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489. 24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39. 25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press. 26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.
json metadata	{"tags":["artifitialintelligence","datascience","computerscience","deeplearning","reinforcementlearning"],"image":["https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg","https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg"],"app":"steemit/0.1","format":"markdown"}
Transaction Info	Block #17614575/Trx 41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc

View Raw JSON Data

{
"trx_id": "41b4ecf4cac6a0147fa130dac95e1e8ebd2b2ffc",
"block": 17614575,
"trx_in_block": 11,
"op_in_trx": 0,
"virtual_op": 0,
"timestamp": "2017-11-28T09:50:03",
"op": [
"comment",
{
"parent_author": "",
"parent_permlink": "artifitialintelligence",
"author": "fesan81",
"permlink": "from-classic-ai-techniques-to-deep-reinforcement-learning",
"title": "From classic AI techniques to Deep Reinforcement Learning",
"body": "![machine learning.jpg](https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg)\nBuilding machines that can learn from examples, experience, or even from another machines at human level are the main goal of solving AI. That goal in other words is to create a machine that pass the Turing test: when a human is interacting with it, for the human it will not possible to conclude if it he is interacting with a human or a machine [Turing, A.M 1950].\n\nThe fundamental algorithms of deep learning were developed in the middle of 20th century. Since them the field was developed as a theory branch of stochastic operations research and computer science, but without any breakthrough application. But, in the last 20 years the synergy between big data sets, specially labeled data, and augmentation of computer power using graphics processor units, those algorithms have been developed in more complex techniques, technologies and reasoning logics enable to achieve several goals as reducing word error rates in speech recognition; cutting the error rate in an image recognition competition [Krizhevsky et al 2012] and beating a human champion at Go [Silver et al 2016]. Andrew Ng, attributes this success because the capacity of deep NN to learn complicate functions correctly, and its performance that grows proportionally to the input data.\n\nDeep learning is a class of machine learning that allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. [Lecun et al 2015] To understand this technology it is important to know the main techniques of machine learning:\n\nMachine learning techniques are divided in two types: supervised learning, which trains a model that takes known data set (a labeled training set) as input and generates a model that can predict future output of new data. Unsupervised learning takes a dataset (unlabeled) and find patterns or intrinsic structures in data, it usually works as clustering data. \n\n![ML 1 ENG.jpg](https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg)\nMoreover, reinforcement learning problems involve learning to maximize a numerical reward signal from experience, this is, how to map steps to actions (or create a policy) in order to maximize reward utility. This type of machine learning do not lean from a training set of labeled data, it learns from interaction with its environment. It tries several paths in order to maximize the long term accumulate reward, also call utility. Reinforcement learning is characterized by this points: learning system’s actions influence its later inputs, it doesn’t have direct instructions as to what actions to take, and where the consequences of actions, including reward signals, play out over extended time periods [Sutton & Barto 1998].\n\nKnowledge discovery with efficient algorithms for unsupervised or supervised feature learning\n\nDeep learning is a type of machine learning based in some basic algorithms that were developed in the middle of 20th century. Most of them are used within neural networks, in next section we will show some of the most important algorithms of machine learning.\n\nOne of the most powerful techniques in machine learning is Neural Networks because it can be implemented in complementary of different types of machine learning, as show in next sessions. Inspired by the human brain, the neurophysiologist Warren McCulloch and the logician Walter Pits proposed a neural network consists of highly connected networks of functions that map the path from inputs to the desired outputs. It was a fist mathematic approach. The network is trained by iteratively modifying the weights of the connections. [M. Warren, W Piltts 1943]\n\n“Neural networks, with their remarkable ability to derive meaning from complicated or imprecise data, can be used to extract patterns and detect trends that are too complex to be noticed by either humans or other computer techniques. A trained neural network can be thought of as an “expert” in the category of information it has been given to analyze. This expert can then be used to provide projections given new situations of interest and answer what if questions.” [Melin et al 2002]\n\nAlso inspired by neurosciences, Rosenblatt [Rosenblatt et al F. 1958] have developed the perceptron, that is an algorithm for learning a binary classifier, it maps the output of each neuron to an 0 or 1; It takes an input vector x, the weigh vector w and evaluate if the scalar product overcome the threshold u, that is f(x) = 1 if wx — u > 0 or 0 otherwise. In simple layer Neural Networks this algorithm wasn’t very useful because binary classification is limited.\n\nIn another side, reinforcement learning algorithms were developed from dynamic programing principles. Dynamic programming is the group of algorithms that can solve all type of Markov Decision Process. [Bellman 1957 Dynamic Programing] MDP are mathematical model for modeling decision making in stochastic situation. They are usually represented by a graph, where the nodes are the states and the lines are actions from one state to other, starting from each state each actions have a certain probability to occur independent with the past state or action. Each new state gives a reward (positive or negative). Solve the MDP means to found a policy that maximize the sum of all reward. If the probability to be in a certain state is not 100%, the problem becomes a Partial Observable MDP (POMDP)\n\nBellman was first in proposing an equation that can solve this problems. [1957 Bellman] this recursive formula provides the utility of following certain policy expecting the highest reward. Solving this equations means finding the optimal policy, this problem was complicate to solve because this equation involves a maximization function, which cannot be derivate. The problem let the domain of reinforcement learning without any relevant advancement until 1989 when Watkins proposed the Q-learning algorithm [Watkins, 1989. Ph.D. thesis] It solve the problem by calculation quantity of state to actions. (SARSA)\n\nFinally the convergence of the solution is warranty by using the Temporal Difference (TD) learning algorithm. It was propose by R. S. Sutton in 1988. [Sutton et al 1988] Since his first used it has become a reference for solving reinforcement learning problems. It ensures the participation of most of the states, helping to solve a challenge born with the exploration-exploitation dilemma.*\n\nIntegration of reasoning techniques with deep learning\n\nDeep learning starts when neural networks develop more than just one layer. Working in several layer of neurons was only the first step towards deep learning and data mining. In this section we will expose how the techniques of the first section are integrated into multi-layer neural networks and they developed together the fundaments of deep learning.\nInitially the perceptron was conceived to solve one layer neural networks, it works as a one dimension classifier. This technique was not very useful for example in speech or image classification because techniques must be insensitive to irrelevant variations of the input such as orientation of the photo, illumination, zoom, but they should be sensitive to details that difference an image from another (a wolf from a dog for example) .\n\nPerceptron start being useful only in multi-layer networks when Multiple Layer perceptron (MLP) was developed in 1986 by Rumelhart, with a backpropagation technique called Gradient descend. Backpropagation is the ensemble of algorithms aimed to assign the right weights for which the neural network have lower error in its learning. One of the most import methods inside backpropagation is Stochastic Gradient Descent (SGD) this is an algorithm that aims to minimize the error rate using calculus concepts as chain rules for partial derivatives [Rumelhart, et al 1986]. In this technique the derivate of the objective with respect to the input of a neuron can be calculated by computing backwards with respect to the output of that neuron (that’s the input of next neuron) The technique propagate all derives, or gradients, starting from the top output and go all the way to the bottom, then it straightforward compute the respective weight of each link.\n\nSince MLP and SGD, then there was no so much progress in solving neural networks until the proposition of another method of backpropagation called Long Short Term Memory LSTM in 1997 [Hochreiter et al 1997] It shortens the normal gradient descent method an introduces the concept of recurrent network to learn long range dependencies. It leans much faster and solves complex artificial long-time-lag tasks that have never been solved before.\n\nIn Deep learning paper LeCun explains the importance its use for the formation of deep learning: “Long short-term memory (LSTM) networks that use special hidden units, the natural behaviour of which is to remember inputs for a long time. A special unit called the memory cell acts like an accumulator or a gated leaky neuron: it has a connection to itself at the next time step that has a weight of one, so it copies its own real-valued state and accumulates the external signal, but this self-connection is multiplicatively gated by another unit that learns to decide when to clear the content of the memory.” [LeCun et al, Nature 2015],LSTM networks let train some type of networks called Recurrent Neural Networks (RNN) that can be train for tasks that involve sequential inputs, such as speech and language [Graves et al 2013]. RNN where the very useful next years, but it didn’t solve the problems of deep learning.\n\nScientist also developed another type of network that was easier to train, that means, it needs less examples to gain the right weights in the links between neuron, and it also can do better classifications. This network is called Convolutional neural network (CNN) and it is characterized for having a full connectivity between adjacent layers. It appears first in [Lecun et al 1989] with an application of handwritten zip code recognition. The author explain how the convolutional and pooling layers in CNN are directly inspired by the classic notions of cells in visual neuroscience. CNN was the first step in developing computer vision.\n\nSystems combining neural networks and reinforcement learning are the basis of Deep Reinforcement Learning (DRL). In this case the agent in a state use a deep neural network to learn a policy; with this policy the agent takes an action in the environment and gets a reward from the specific state. The reward feeds the neural network and it generates a better policy. This was developed and apply in a famous paper call playing Atari with deep reinforcement learning [Mnih, V. et al 2013] in which they learn a machine to play Atari games directly from pixels, and after training, the machine output excellent results.\n\nLater AlhpaGo team developed a deep reinforcement learning system, using all technologies cited in this session (LSTM, CNN,RNN) creating an artificial intelligence system capable to learn to play the game of Go, be trained watching experts, be trained playing against itself, and finally beating the world champion Lee Sedol. This was a breakthrough in artificial intelligence because due to the complexity of the game, scientist thought it would take more years to make a machine win this game. [Silver et al 2016]\n\nBibliography\n\n1. Bellman, R. (1957). A Markovian decision process (No. P-1066). RAND CORP SANTA MONICA CA.\n2. Bellman, R. E. (1957). Dynamic programming. Princeton, NJ: Princeton University Press.\n3. Bottou, L. (2014). From machine learning to machine reasoning. Machine learning, 94 (2), 133–149.\n4. Ciresan, D., Meier, U., & Schmidhuber, J. (2012). Multi-column Deep Neural Networks for Image Classi cation. In Computer Vision and Pattern Recognition (CVPR) (pp. 3642–3649).\n5. F. Vernadat, Techniques de modélisation en entreprise : applications aux processus opérationnels, Collection Gestion, Economica, 1999\n6. GONZALES N. : Contribution à l’amélioration des processus à travers la mesure de la maturité du projet : application à l’automobile, Thèse Doctorale 3 décembre 2009\n7. Graves, A., Mohamed, A.-r., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In Acoustics, speech and signal processing (icassp), 2013 ieee international conference on (pp. 6645–6649).\n8. Hebb, D. O. (1949). The organization of behavior. Wiley.\n9. Hinton, G. E., Dayan, P., Frey, B. J., & Neal, R. M. (1995). The \\wake-sleep” algorithm for unsupervised neural networks. Science, 268 (5214), 1158–61.\n10. Juang, B. H., & Rabiner, L. R. (1990). Hidden Markov models for speech recognition. Technometric, 33 (3), 251–272.\n11. Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). ImageNet classification with deep convolutional neural networks. In Advances in Neural Information Processing Systems 25 (pp. 1097–1105).\n12. LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521 , 436–444.\n13. LeCun, Y., Boser, B., Denker, J. S., Henderson, D., Howard, R. E., Hubbard, W., & Jackel, L. D. (1989). Backpropagation applied to handwritten zip code recognition. Neural Computation, 1 , 541–551.\n14. LeCun, Y., Bottou, L., Bengio, Y., & H\nner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86 (11), 2278–2323\n15. Littman, M. L. (1994). Markov games as a framework for multi-agent reinforcement learning. In Proceedings of the eleventh international conference on machine learning (Vol. 157, pp. 157–163).\n16. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n17. McCulloch, Warren; Walter Pitts (1943). “A Logical Calculus of Ideas Immanent in Nervous Activity”. Bulletin of Mathematical Biophysics. 5 (4): 115–133.\n18. Melin, P., Castillo, O.: Modelling, Simulation and Control of Non-Linear Dynamical Systems. Taylor and Francis, London (2002)\n19. Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602.\n20. Rosenblatt, F. (1958). The perceptron: a probabilistic model for information storage and organization in the brain. Psychological Review, 65 , 386–408.\n21. Rumelhart, D. E., Hinton, G., & Williams, R. (1986). Learning representations by back-propagating errors. Nature, 323 (9), 533{536.\n22. S. Hochreiter and J. Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997.\n23. Silver, D., Huang, A., Maddison, C. J., Guez, A., Sifre, L., Driessche, G. V. D., . . . Hassabis, D. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 529 (7585), 484–489.\n24. Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Mach. Learn., 39.\n25. Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction (Vol. 1, №1). Cambridge: MIT press.\n26. Watkins, C.J.C.H., (1989), Learning from Delayed Rewards. Ph.D. thesis, Cambridge University.",
"json_metadata": "{\"tags\":[\"artifitialintelligence\",\"datascience\",\"computerscience\",\"deeplearning\",\"reinforcementlearning\"],\"image\":[\"https://steemitimages.com/DQmZD5fHMy4UiWY2qcHB3k5CFzv1n8uuq7AKSbRbAA8AJSi/machine%20learning.jpg\",\"https://steemitimages.com/DQmQF9mvJDWQsq4LjovbTefwBzvmMBJ2pXwoSt6uEyswaSk/ML%201%20ENG.jpg\"],\"app\":\"steemit/0.1\",\"format\":\"markdown\"}"
}
]
}

steemcreated a new account: @fesan81

2017/11/28 09:43:48 UTC

17,614,450|2fb3f31

fee	0.500 STEEM
delegation	57000.000000 VESTS
creator	steem
new account name	fesan81
owner	{"weight_threshold":1,"account_auths":[],"key_auths":[["STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",1]]}
active	{"weight_threshold":1,"account_auths":[],"key_auths":[["STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",1]]}
posting	{"weight_threshold":1,"account_auths":[],"key_auths":[["STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",1]]}
memo key	STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk
json metadata
extensions	[]
Transaction Info	Block #17614450/Trx 2fb3f31aec55bba631571a17577acc550ea65f36

View Raw JSON Data

{
  "trx_id": "2fb3f31aec55bba631571a17577acc550ea65f36",
  "block": 17614450,
  "trx_in_block": 14,
  "op_in_trx": 0,
  "virtual_op": 0,
  "timestamp": "2017-11-28T09:43:48",
  "op": [
    "account_create_with_delegation",
    {
      "fee": "0.500 STEEM",
      "delegation": "57000.000000 VESTS",
      "creator": "steem",
      "new_account_name": "fesan81",
      "owner": {
        "weight_threshold": 1,
        "account_auths": [],
        "key_auths": [
          [
            "STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",
            1
          ]
        ]
      },
      "active": {
        "weight_threshold": 1,
        "account_auths": [],
        "key_auths": [
          [
            "STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",
            1
          ]
        ]
      },
      "posting": {
        "weight_threshold": 1,
        "account_auths": [],
        "key_auths": [
          [
            "STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",
            1
          ]
        ]
      },
      "memo_key": "STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk",
      "json_metadata": "",
      "extensions": []
    }
  ]
}

Manabar

Voting Power100.00%

Downvote Power100.00%

Resource Credits100.00%

Reputation Progress0.00%

{
  "voting_manabar": {
    "current_mana": "8143659806",
    "last_update_time": 1779063441
  },
  "downvote_manabar": {
    "current_mana": 2035914951,
    "last_update_time": 1779063441
  },
  "rc_account": {
    "account": "fesan81",
    "rc_manabar": {
      "current_mana": "10164408779",
      "last_update_time": 1779063441
    },
    "max_rc_creation_adjustment": {
      "amount": "2020748973",
      "precision": 6,
      "nai": "@@000000037"
    },
    "max_rc": "10164408779"
  }
}

Account Metadata

POSTING JSON METADATA
None
JSON METADATA
None

{
  "posting_json_metadata": {},
  "json_metadata": {}
}

Auth Keys

Owner

Single Signature

Public Keys

STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb1/1

Active

Single Signature

Public Keys

STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F1/1

Posting

Single Signature

Public Keys

STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc1/1

Memo

STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk

{
  "owner": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM6kL7JYSz3SdUSzw83JGDA3CumVfKCsZC82dVZuyTSNBcadL9Jb",
        1
      ]
    ]
  },
  "active": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM8DjPbqKFGjJK2sp9Qfnt3rn6GVPn2zXJVPZNCRnGYKNauE2n2F",
        1
      ]
    ]
  },
  "posting": {
    "weight_threshold": 1,
    "account_auths": [],
    "key_auths": [
      [
        "STM5weQvsb37BUiMz7STRvMaTDtuiVnxE66hJYaLKSKL3RjDaXmbc",
        1
      ]
    ]
  },
  "memo": "STM5F4LapqumRfCisBCMNJ2SHjKDJEQVgwS8xHXdPSQ4ZuvjiJQYk"
}

Witness Votes

0 / 30

No active witness votes.

[]