Here we show the difference between motor and goal babbling exploration strategies on a idealized two-dimensional arm example.
We consider a two-dimensional arm with 20 segments of the same lenghts, with a total length of one meter. The range of angles the joint can achieve is [-150, 150] degrees.
import environments.envs
# we copy the default configuration of a 2d kinematic arm and set its parameters
arm_cfg = environments.envs.KinematicArm2D.defcfg._deepcopy()
arm_cfg.dim = 20
arm_cfg.lengths = 1.0/arm_cfg.dim
arm_cfg.limits = (-150, 150)
arm = environments.Environment.create(arm_cfg)
# Loading the bokeh plotting library
from bokeh import plotting
plotting.output_notebook()
from arm_vizu import bokeh_kin
plotting.figure(title='arm motor commands examples')
# some sample motor signals
m_signals = [
{'j0': -31.23, 'j1': -44.21, 'j2': -20.18, 'j3': +31.55, 'j4': +35.66, 'j5': +5.19, 'j6': +17.34, 'j7': +24.51, 'j8': -2.69, 'j9': +26.52, 'j10': -34.87, 'j11': +10.72, 'j12': -19.38, 'j13': -33.49, 'j14': +13.78, 'j15': -22.43, 'j16': +33.61, 'j17': -28.95, 'j18': +34.31, 'j19': 45.75},
{'j0': -53.66, 'j1': -56.20, 'j2': -56.67, 'j3': -34.83, 'j4': -20.29, 'j5': +7.51, 'j6': +20.92, 'j7': +25.51, 'j8': -17.59, 'j9': +6.51, 'j10': -9.65, 'j11': +45.70, 'j12': +20.88, 'j13': +24.25, 'j14': +28.65, 'j15': -42.79, 'j16': +34.45, 'j17': -39.90, 'j18': +2.74, 'j19': -11.12},
{'j0': +58.13, 'j1': +45.43, 'j2': -21.01, 'j3': +2.35, 'j4': -38.90, 'j5': -39.23, 'j6': +45.14, 'j7': -57.58, 'j8': +39.49, 'j9': +29.01, 'j10': -0.09, 'j11': -56.19, 'j12': +56.07, 'j13': +5.91, 'j14': +36.61, 'j15': -52.65, 'j16': -58.60, 'j17': +32.45, 'j18': +43.69, 'j19': -120.77},
{'j0': +53.09, 'j1': +55.83, 'j2': -51.08, 'j3': +41.44, 'j4': +44.43, 'j5': +4.67, 'j6': +2.15, 'j7': +37.23, 'j8': -3.77, 'j9': -46.70, 'j10': +56.41, 'j11': -21.08, 'j12': +13.73, 'j13': +47.23, 'j14': +7.94, 'j15': -27.26, 'j16': +56.54, 'j17': -7.77, 'j18': -18.98, 'j19': +149.46}
]
for i, m_signal in enumerate(m_signals):
bokeh_kin(arm, m_signal, alpha=0.2 + i*0.15)
plotting.hold(True)
plotting.show()
To create a random motor babbling explorer, we only need the motor channels.
import explorers
mb_cfg = explorers.RandomMotorExplorer.defcfg._deepcopy()
mb_cfg.m_channels = arm.m_channels
mb_ex = explorers.Explorer.create(mb_cfg)
We run the exploration strategy for 5000 timesteps.
import random
random.seed(0) # comment for different results
N = 5000
mb_s_signals = []
for i in range(N):
# creating the motor command
exploration = mb_ex.explore()
# executing the motor command
feedback = arm.execute(exploration['m_signal'])
# updating the explorer with the feedback
mb_ex.receive(exploration, feedback)
mb_s_signals.append(feedback['s_signal'])
mb_xs = [s_signal['x'] for s_signal in mb_s_signals]
mb_ys = [s_signal['y'] for s_signal in mb_s_signals]
def mb_plot(n=200):
plotting.figure(title='motor babbling, {} steps'.format(n))
plotting.scatter(mb_xs[:n], mb_ys[:n],
x_range=[-1, 1], y_range=[-1, 1],
fill_alpha= 0.5, line_color=None, radius=2.0, radius_units='screen')
plotting.show()
from IPython.html import widgets
widgets.interact(mb_plot, n=(0, 5000, 5))
The motor babblign exploration trajectory remains concentrated in the center of reachable space.
The explorer for goal babbling is a bit more complex, as it combines a motor babbling explorer, used to bootstrap the exploration (here for 50 timesteps), and a pure goal babbling explorer. We use the class MetaExplorer
to combine the two and orchestrate them in time.
STEP_ERA1 = 50 # number of motor babbling steps
P_MB_ERA1 = 1.0 # probability to do motor babbling during era 1
P_MB_ERA2 = 0.0 # probability to do motor babbling during era 2
ex_cfg = explorers.MetaExplorer.defcfg._deepcopy()
ex_cfg.m_channels = arm.m_channels
ex_cfg.s_channels = arm.s_channels
# two sub-explorer, motor babbling and goal babbling.
ex_cfg.ex_0 = explorers.RandomMotorExplorer.defcfg._deepcopy()
ex_cfg.ex_1 = explorers.RandomGoalExplorer.defcfg._deepcopy()
# there are two eras, the first ends at step STEP_ERA1, the second one never ends.
ex_cfg.eras = (STEP_ERA1, None)
# the weights determine how the probability to use ex_0 (motor babbling),
# or ex_1 (goal babbling) during each era.
ex_cfg.weights = ((P_MB_ERA1, 1.0 - P_MB_ERA1),
(P_MB_ERA2, 1.0 - P_MB_ERA2))
The goal babbling explorer needs an inverse model. We use a very simple one. Given a goal (i.e. a sensory signal), we find the nearest sensory signal in recorded observations, and then add a small random perturbation to its corresponding motor command to create a new motor command that we return.
Here the perturbation is drawn between 5% of the legal value range of the motor channels.
import learners
DISTURB = 0.05
learn_cfg = learners.DisturbLearner.defcfg._deepcopy()
learn_cfg.m_disturb = DISTURB
ex_cfg.ex_1.learner = learn_cfg
ex = explorers.Explorer.create(ex_cfg)
We run the exploration policy for 2000 steps
random.seed(0) # comment for different results
N = 5000
gb_s_signals = []
for i in range(N):
exploration = ex.explore()
feedback = arm.execute(exploration['m_signal'])
ex.receive(exploration, feedback)
gb_s_signals.append(feedback['s_signal'])
We show the scatter plot of the effects. Move the slider to increase the numbers of steps displayed.
xs = [s_signal['x'] for s_signal in gb_s_signals]
ys = [s_signal['y'] for s_signal in gb_s_signals]
def gb_plot(n=200):
plotting.figure(title='goal babbling, {} steps'.format(n))
plotting.scatter(xs[:n], ys[:n],
x_range=[-1, 1], y_range=[-1, 1],
fill_alpha= 0.5, line_color=None, radius=2.0, radius_units='screen')
plotting.show()
widgets.interact(gb_plot, n=(0, 5000, 5))
Many parameters impacts the goal babbling strategy. The interactive plot below allows to test the impact of each parameters in real time.
dataset = {}
last_config = {}
def explore(SEED =0, # random seed
ARM_DIM =20, # number of joints of the arm
LIMIT =150, # range of the joints
STEP_ERA1=50, # lenght of the first era
P_MB_ERA1=1.0, # probability to do motor babbling during era 1
P_MB_ERA2=0.0, # probability to do motor babbling during era 2
N =5000 # total number of steps)
):
# Not redoing the exploration unless we have to.
global last_config
random.seed(SEED)
core_keys = ['SEED', 'ARM_DIM', 'LIMIT', 'STEP_ERA1', 'P_MB_ERA1', 'P_MB_ERA2', 'N']
config = {'SEED': SEED, 'ARM_DIM': ARM_DIM, 'LIMIT': LIMIT, 'STEP_ERA1': STEP_ERA1,
'P_MB_ERA1': P_MB_ERA1, 'P_MB_ERA2': P_MB_ERA2, 'N': N}
if (config.keys() == last_config.keys() and
all(config[k] == last_config[k] for k in core_keys) and
config['N'] <= last_config['N']):
return
else:
last_config = config
# Arm environment
arm_cfg = environments.envs.KinematicArm2D.defcfg._deepcopy()
arm_cfg.dim = ARM_DIM
arm_cfg.lengths = 1.0/arm_cfg.dim
arm_cfg.limits = (-LIMIT, LIMIT)
arm = environments.Environment.create(arm_cfg)
# Explorer Config
ex_cfg = explorers.MetaExplorer.defcfg._deepcopy()
ex_cfg.m_channels = arm.m_channels
ex_cfg.s_channels = arm.s_channels
ex_cfg.ex_0 = explorers.RandomMotorExplorer.defcfg._deepcopy()
ex_cfg.ex_1 = explorers.RandomGoalExplorer.defcfg._deepcopy()
ex_cfg.eras = (STEP_ERA1, None)
ex_cfg.weights = ((P_MB_ERA1, 1.0 - P_MB_ERA1),
(P_MB_ERA2, 1.0 - P_MB_ERA2))
# Learner Config
learn_cfg = learners.DisturbLearner.defcfg._deepcopy()
learn_cfg.m_disturb = DISTURB
ex_cfg.ex_1.learner = learn_cfg
ex = explorers.Explorer.create(ex_cfg)
history = []
for i in range(N):
exploration = ex.explore()
feedback = arm.execute(exploration['m_signal'])
ex.receive(exploration, feedback)
history.append((exploration, feedback))
dataset['m_channels'] = ex.m_channels
dataset['s_channels'] = ex.s_channels
dataset['explorations'] = history
def plot_interact(SEED =0, # random seed
ARM_DIM =20, # number of joint of the arm
LIMIT =150, # range of the joints
STEP_ERA1 =50, # lenght of the first era
P_MB_ERA1 =1.0, # probability to do motor babbling during era 1
P_MB_ERA2 =0.0, # probability to do motor babbling during era 2
N =1000, # total number of steps)
n_displayed=1000 # step displayed
):
explore(SEED=SEED, ARM_DIM=ARM_DIM, LIMIT=LIMIT, STEP_ERA1=STEP_ERA1,
P_MB_ERA1=P_MB_ERA1, P_MB_ERA2=P_MB_ERA2, N=N)
xs = [e[1]['s_signal']['x'] for e in dataset['explorations']]
ys = [e[1]['s_signal']['y'] for e in dataset['explorations']]
n = min(n_displayed, N)
plotting.figure(title='{} steps'.format(n))
plotting.scatter(xs[:n], ys[:n],
x_range=[-1, 1], y_range=[-1, 1],
fill_alpha= 0.5, line_color=None, radius=2.0, radius_units='screen')
plotting.show()
widgets.interact(plot_interact, SEED=(0, 100, 1), ARM_DIM=(1, 50), LIMIT=(5, 180, 5), STEP_ERA1=(0, 1000, 10), P_MB_ERA1=(0.0, 1.0, 0.05), P_MB_ERA2=(0.0, 1.0, 0.05), N=(0, 5000, 100), n_displayed=(0, 5000, 5))