diff --git a/__pycache__/dota.cpython-35.pyc b/__pycache__/dota.cpython-35.pyc deleted file mode 100644 index 497ad7a88aa6db68bb7423eb97d027b0dc66167b..0000000000000000000000000000000000000000 Binary files a/__pycache__/dota.cpython-35.pyc and /dev/null differ diff --git a/__pycache__/globalvar.cpython-35.pyc b/__pycache__/globalvar.cpython-35.pyc deleted file mode 100644 index 781d82e15ed41a5195758af16b4b4d3f3ebc636c..0000000000000000000000000000000000000000 Binary files a/__pycache__/globalvar.cpython-35.pyc and /dev/null differ diff --git a/__pycache__/webserver.cpython-35.pyc b/__pycache__/webserver.cpython-35.pyc deleted file mode 100644 index 7e758009697cfaae9334b16fc8413d9608a881ec..0000000000000000000000000000000000000000 Binary files a/__pycache__/webserver.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/__pycache__/__init__.cpython-35.pyc b/a2c_ppo_acktr/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index e39c619e5bb8a94542528957e02a8e9b61b3f80f..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/__pycache__/arguments.cpython-35.pyc b/a2c_ppo_acktr/__pycache__/arguments.cpython-35.pyc deleted file mode 100644 index 81f3179d71a88ffbb142b4bad9567096c053832a..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/__pycache__/arguments.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/__pycache__/distributions.cpython-35.pyc b/a2c_ppo_acktr/__pycache__/distributions.cpython-35.pyc deleted file mode 100644 index 00c88de84df59c0278260e27be7bcb68bd6ef73d..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/__pycache__/distributions.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/__pycache__/envs.cpython-35.pyc b/a2c_ppo_acktr/__pycache__/envs.cpython-35.pyc deleted file mode 100644 index a0113d6fbfc23b10eb40f37dbf2b6ed1a404e160..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/__pycache__/envs.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/__pycache__/model.cpython-35.pyc b/a2c_ppo_acktr/__pycache__/model.cpython-35.pyc deleted file mode 100644 index 71b4639f66a07ee029162080d2fc8a6b0345e05b..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/__pycache__/model.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/__pycache__/storage.cpython-35.pyc b/a2c_ppo_acktr/__pycache__/storage.cpython-35.pyc deleted file mode 100644 index 0105088d4d0945cd790b2a77b23a707a34a3543a..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/__pycache__/storage.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/__pycache__/utils.cpython-35.pyc b/a2c_ppo_acktr/__pycache__/utils.cpython-35.pyc deleted file mode 100644 index 527319ddba6b652a962a7fc5d8c6080c7aa1a4a5..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/__pycache__/utils.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/__pycache__/visualize.cpython-35.pyc b/a2c_ppo_acktr/__pycache__/visualize.cpython-35.pyc deleted file mode 100644 index 684df221b66d6f7f83924a64593c4eb87172c7e8..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/__pycache__/visualize.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/algo/__pycache__/__init__.cpython-35.pyc b/a2c_ppo_acktr/algo/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 0e7ad6c85b4f8cffbca94c8f02186e0636624c14..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/algo/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/algo/__pycache__/a2c_acktr.cpython-35.pyc b/a2c_ppo_acktr/algo/__pycache__/a2c_acktr.cpython-35.pyc deleted file mode 100644 index ac47793a3550c6f5ca92bc00fb2b4575adc04557..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/algo/__pycache__/a2c_acktr.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/algo/__pycache__/kfac.cpython-35.pyc b/a2c_ppo_acktr/algo/__pycache__/kfac.cpython-35.pyc deleted file mode 100644 index f0475247a9010667d96ac1236021a7906655afec..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/algo/__pycache__/kfac.cpython-35.pyc and /dev/null differ diff --git a/a2c_ppo_acktr/algo/__pycache__/ppo.cpython-35.pyc b/a2c_ppo_acktr/algo/__pycache__/ppo.cpython-35.pyc deleted file mode 100644 index e09cb2178bc68071e12e978972948c60b8f182b7..0000000000000000000000000000000000000000 Binary files a/a2c_ppo_acktr/algo/__pycache__/ppo.cpython-35.pyc and /dev/null differ diff --git a/continuous_dota.py b/continuous_dota.py index 2be10c1b591aa0b28292917eb36b0b9b4c32f0ee..9b3a8fd515be51849b5ca4aa98651ed803a7b279 100644 --- a/continuous_dota.py +++ b/continuous_dota.py @@ -1,5 +1,12 @@ # -*- coding: utf-8 -*- +######## +# Code made by : Osvaldo Pinto de Oliveira and Alexandre Vanini +# Highly inspired by Ikostrikov : +# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail +# Purpose : AI for DOTA 2 which has to earn as much gold has it can. +######## + ''' New - @@ -32,11 +39,8 @@ class Continuous_DotaEnv(gym.Env): } def __init__(self): - #we will define actions as a click of the mouse, in a [-1,1] E x,y range - self.min_action = -1.0 - self.max_action = 1.0 - - #Dota 2 field bounds + + #Dota 2 field bounds self.min_x = -8223 self.max_y = 8223 @@ -55,30 +59,42 @@ class Continuous_DotaEnv(gym.Env): self.min_deaths = 0 self.max_deaths = 100 + #game done set as false if not done, true if done self.min_game_state = False self.max_game_state = True - self.low_position = np.array([self.min_game_state,self.min_gold,self.min_life,self.min_deaths,self.min_x,self.min_y]) - self.high_position = np.array([self.max_game_state,self.max_gold,self.max_life,self.max_deaths,self.max_x,self.max_y]) + #Defining numpy arrays for the lowest and the highest environnement set + self.low_env = np.array([self.min_game_state,self.min_gold,self.min_life,self.min_deaths,self.min_x,self.min_y]) + self.high_env = np.array([self.max_game_state,self.max_gold,self.max_life,self.max_deaths,self.max_x,self.max_y]) - self.observation_space = spaces.Box(low=self.low_position, high=self.high_position,dtype=np.float32) + #Creating the observation box, which is made of lowest and highest env + self.observation_space = spaces.Box(low=self.low_env, high=self.high_env,dtype=np.float32) + #we will define actions as a click of the mouse, in a [-1,1] E x,y range + self.min_action = -1.0 + self.max_action = 1.0 + self.low_action = np.array([self.min_action,self.min_action]) self.high_action = np.array([self.max_action,self.max_action]) self.action_space = spaces.Box(low=self.low_action, high=self.high_action,dtype=np.float32) + + #Set the first env self.reset() + # Send action to communication protocol. def _dota_do_action(self,action): + #Bind the x and y to high of low position x = min(max(action[0], self.min_action),self.max_action) y = min(max(action[1], self.min_action), self.max_action) + #Get the current position of the bot current_x = globalvar.get_values()['loc']['x'] current_y = globalvar.get_values()['loc']['y'] current_z = globalvar.get_values()['loc']['z'] - - + + #Json object to be send to the communication protocol action_value = { "action": "moveToLocation", "params": { @@ -92,11 +108,15 @@ class Continuous_DotaEnv(gym.Env): globalvar.action_queue.put(action_value) + #### WARNING #### + ## -> Active wait, should be replaced with mutex mecanism (in both this part and M. Zaffino's part) time.sleep(0.5) pass + #Get the current env of the bot def _dota_get_state(self): - + + #Get the current env of the bot from communication protocol values = globalvar.get_values() gold = values['gold'] life = values['health'] @@ -105,22 +125,27 @@ class Continuous_DotaEnv(gym.Env): y = values['loc']['x'] state = np.array([False,gold,life,death,x,y]) + + ## Print current state for debug purpose print("############### STATE ##################") print(state) print("########################################") return np.array(state) + #Makes a step in learn iteration def step(self, action): self._dota_do_action(action) new_state = self._dota_get_state() reward = 0 + done = False - # new_/self.state[1] contains money information (for a start) - # we'd be able to rely on more information for the reward. (after work) - #(500 * (new_state[3] - self.state[3])) malus de 500 * nombre de mort - # (new_state[2] - self.state[2]) => pas sur de si il faut mettre, mais ça baisse le reward si il subit des degats + ####### + # + # If the bot doesn't move, he looses reward to make him understand that staying in the same spot is not game valuable + # + ###### reward_location_x = new_state[4] - self.state[4] reward_location_y = new_state[5] - self.state[5] @@ -128,16 +153,16 @@ class Continuous_DotaEnv(gym.Env): if reward_location_x + reward_location_y == 0: reward_location = 50 + #(500 * (new_state[3] - self.state[3])) 500 * deaths (bad reward) reward_death = (500 * (new_state[3] - self.state[3])) reward_gold = new_state[1] - self.state[1] + + #(new_state[2] - self.state[2]) => makes the bot loose reward if he takes damage reward_life = (new_state[2] - self.state[2]) + # new_/self.state[1] contains money information (for a start) reward = reward_gold - reward_death - reward_life - reward_location - # i didn't include the location of the player in the cost function, maybe ask malaspinas, if it's needed - # i assume the policy will probably take care of it, and if the player looses reward at an x,y location, it'll know - # it's a dangerous place - #the actual state becomes the "new one", since we wont do more checks on this one self.state = new_state print("############### REWARD ##################") @@ -146,7 +171,6 @@ class Continuous_DotaEnv(gym.Env): return self.state, reward, done, {} - def reset(self): self.state = np.array([False, 600,700,0,0,0]) return np.array(self.state) diff --git a/main.py b/main.py index fc388f53146df3456f99b5cdd95eb38ce90f8c62..45078600c51106196b85e6e7ea5071af61fbf6f6 100755 --- a/main.py +++ b/main.py @@ -1,3 +1,32 @@ +######## +# Code made by : Osvaldo Pinto de Oliveira and Alexandre Vanini +# Highly inspired by Ikostrikov : +# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail +# Purpose : AI for DOTA 2 which has to earn as much gold has it can. + + +''' + +IMPORTANT : This code was copied from Ikostrikov's github, which means the code was probably made from him +or taken from another source (PyTorch or OPEN AI gym). + +The most part of his code hasn't been changed since he made it. + +The only few things we changed from his code are : + + - We created a save def so we can use it multiple times without code redundancy + - We added a way to load the policy from a previous record so the bot is able to learn from something he has already done + - We added code to communicate with Zaffino's part. (the global var stuff) + - Basically all the file in this github were meant to be used for this project, but they weren't all made by us (Pinto de Oliveira & Vanini) + - The only files we changed or made are : + - main.py (this one) + - continuous_dota.py + +''' + + +######## + # import game ################################# import globalvar @@ -82,8 +111,7 @@ else: actor_critic.to(device) - - +#Save the bot current's iteration to a file def save(): save_path = os.path.join(args.save_dir, args.algo) try: @@ -125,9 +153,6 @@ def main(): for step in range(args.num_steps): print(step) - #if globalvar.get_values()['is_game_done']: - #save() - # Sample actions with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( rollouts.obs[step], @@ -204,7 +229,7 @@ def main(): -# TODO SERVER ########################################################### +# SERVER ########################################################### globalvar.init() serverThread = Server() serverThread.daemon = True