無限時間最適フィードバック制御モデル#

モデルの構造#

無限時間最適フィードバック制御モデル (infinite-horizon optimal feedback control model) [Qian et al., 2013]

dx=(Ax+Bu)dt+Yudγ+Gdωdy=Cxdt+Ddξdx^=(Ax^+Bu)dt+K(dyCx^dt)

実装#

ライブラリの読み込みと関数の定義.

using Base: @kwdef
using Parameters: @unpack
using LinearAlgebra, Kronecker, Random, BlockDiagonals, PyPlot
rc("axes.spines", top=false, right=false)
rc("font", family="Arial") 

定数の定義

α1=btateI,α2=1tate+(1ta+1te)bIα3=bI+1ta+1te,bu=1tateI
@kwdef struct SaccadeModelParameter
    n = 4 # number of dims
    i = 0.25 # kgm^2, 
    b = 0.2 # kgm^2/s
    ta = 0.03 # s
    te = 0.04 # s
    L0 = 0.35 # m

    bu = 1 / (ta * te * i)
    α1 = bu * b
    α2 = 1/(ta * te) + (1/ta + 1/te) * b/i
    α3 = b/i + 1/ta + 1/te

    A = [zeros(3) I(3); -[0, α1, α2, α3]']
    B = [zeros(3); bu]
    C = [I(3) zeros(3)]
    D = Diagonal([1e-3, 1e-2, 5e-2])

    Y = 0.02 * B
    G = 0.03 * I(n)

    Q = Diagonal([1.0, 0.01, 0, 0]) 
    R = 0.0001
    U = Diagonal([1.0, 0.1, 0.01, 0])
end
SaccadeModelParameter
X:=[xx~],dω¯:=[dωdξ],A¯:=[ABLBL0AKC]Y¯:=[YLYLYLYL],G¯:=[G0GKD]

とする.元論文ではF,F¯が定義されていたが,F=0とするため,以後の式から削除した.

P:=[P11P12P12P22]:=E[XX]V:=[Q+LRLLRLLRLLRL+U]
K=P22C(DD)1L=(R+Y(S11+S22)Y)1BS11A¯S+SA¯+Y¯SY¯+V=0A¯P+PA¯+Y¯PY¯+G¯G¯=0

A=(aij)m×n 行列,B=(bkl)p×q 行列とすると、それらのクロネッカー積 AB

AB=[a11Ba1nBam1BamnB]

で与えられる mp×nq 区分行列である.

Roth’s column lemma (vec-trick)

(BA)vec(X)=vec(AXB)=vec(C)

によりこれを解くと,

S=vec1((IA¯+A¯I+Y¯Y¯)1vec(V))P=vec1((IA¯+A¯I+Y¯Y¯)1vec(G¯G¯))

となる.ここでI=Iを用いた.

K, L, S, Pの計算#

  1. LとKをランダムに初期化

  2. SとPを計算

  3. LとKを更新

  4. 収束するまで2と3を繰り返す.

収束スピードはかなり速い.

function infinite_horizon_ofc(param::SaccadeModelParameter, maxiter=1000, ϵ=1e-8)
    @unpack n, A, B, C, D, Y, G, Q, R, U = param
    
    # initialize
    L = rand(n)' # Feedback gains
    K = rand(n, 3) # Kalman gains
    I₂ₙ = I(2n)

    for _ in 1:maxiter
         = [A-B*L B*L; zeros(size(A)) (A-K*C)]
         = [-ones(2) ones(2)]  (Y*L) 
         = [G zeros(size(K)); G (-K*D)]
        V = BlockDiagonal([Q, U]) + [1 -1; -1 1]  (L'* R * L)

        # update S, P
        S = -reshape((I₂ₙ  ()' +  ()'  I₂ₙ + ()'  ()') \ vec(V), (2n, 2n))
        P = -reshape((I₂ₙ   +    I₂ₙ +    ) \ vec( * ()'), (2n, 2n))

        # update K, L
        P₂₂ = P[n+1:2n, n+1:2n]
        S₁₁ = S[1:n, 1:n]
        S₂₂ = S[n+1:2n, n+1:2n]

        Kₜ₋₁ = copy(K)
        Lₜ₋₁ = copy(L)

        K = P₂₂ * C' / (D * D')
        L = (R + Y' * (S₁₁ + S₂₂) * Y) \ B' * S₁₁
        if sum(abs.(K - Kₜ₋₁)) < ϵ && sum(abs.(L - Lₜ₋₁)) < ϵ
            break
        end
    end
    return L, K
end
infinite_horizon_ofc (generic function with 3 methods)
param = SaccadeModelParameter()
L, K = infinite_horizon_ofc(param);

シミュレーション#

function simulation(param::SaccadeModelParameter, L, K, dt=0.001, T=2.0, init_pos=-0.5; noisy=true)
    @unpack n, A, B, C, D, Y, G, Q, R, U = param
    nt = round(Int, T/dt)
    X = zeros(n, nt)
    u = zeros(nt)
    X[1, 1] = init_pos # m; initial position (target position is zero)

    if noisy
        sqrtdt = dt
         = zeros(n, nt)
        [1, 1] = X[1, 1]
        for t in 1:nt-1
            u[t] = -L * [:, t]
            X[:, t+1] = X[:,t] + (A * X[:,t] + B * u[t]) * dt + sqrtdt * (Y * u[t] * randn() + G * randn(n))
            dy = C * X[:,t] * dt + D * sqrtdt * randn(n-1)
            [:, t+1] = [:,t] + (A * [:,t] + B * u[t]) * dt + K * (dy - C * [:,t] * dt)
        end
    else
        for t in 1:nt-1
            u[t] = -L * X[:, t]
            X[:, t+1] = X[:, t] + (A * X[:, t] + B * u[t]) * dt
        end
    end
    return X, u
end
simulation (generic function with 4 methods)

理想状況でのシミュレーション

dt = 1e-3
T = 1.0
1.0
Xa, ua = simulation(param, L, K, dt, T, noisy=false);

ノイズを含むシミュレーション#

n = 4
nsim = 10
XSimAll = []
uSimAll = []
for i in 1:nsim
    XSim, u = simulation(param, L, K, dt, T, noisy=true);
    push!(XSimAll, XSim)
    push!(uSimAll, u)
end

結果の描画#

tarray = collect(dt:dt:T)
label = [L"Position ($m$)", L"Velocity ($m/s$)", L"Acceleration ($m/s^2$)", L"Jerk ($m/s^3$)"]

fig, ax = subplots(1, 3, figsize=(10, 3))
for i in 1:2
    for j in 1:nsim
        ax[i].plot(tarray, XSimAll[j][i,:]', "tab:gray", alpha=0.5)
    end
    
    ax[i].plot(tarray, Xa[i,:], "tab:red")
    ax[i].set_ylabel(label[i]); ax[i].set_xlabel(L"Time ($s$)"); ax[i].set_xlim(0, T); ax[i].grid()
end

for j in 1:nsim
    ax[3].plot(tarray, uSimAll[j], "tab:gray", alpha=0.5)
end
ax[3].plot(tarray, ua, "tab:red")
ax[3].set_ylabel(L"Control signal ($N\cdot m$)"); ax[3].set_xlabel(L"Time ($s$)"); ax[3].set_xlim(0, T); ax[3].grid()

tight_layout()
../_images/infinite-horizon-ofc_21_0.png

Target jump#

target jumpする場合の最適制御 [Li et al., 2018]

function target_jump_simulation(param::SaccadeModelParameter, L, K, dt=0.001, T=2.0, 
        Ttj=0.4, tj_dist=0.1, 
        init_pos=-0.5; noisy=true)
    # Ttj : target jumping timing (sec)
    # tj_dist : target jump distance
    @unpack n, A, B, C, D, Y, G, Q, R, U = param
    nt = round(Int, T/dt)
    ntj = round(Int, Ttj/dt)
    X = zeros(n, nt)
    u = zeros(nt)
    X[1, 1] = init_pos # m; initial position (target position is zero)

    if noisy
        sqrtdt = dt
         = zeros(n, nt)
        [1, 1] = X[1, 1]
        for t in 1:nt-1
            if t == ntj
                X[1, t] -= tj_dist # When k == ntj, target jumpさせる(実際には現在の位置をずらす)
                [1, t] -= tj_dist
            end
            u[t] = -L * [:, t]
            X[:, t+1] = X[:,t] + (A * X[:,t] + B * u[t]) * dt + sqrtdt * (Y * u[t] * randn() + G * randn(n))
            dy = C * X[:,t] * dt + D * sqrtdt * randn(n-1)
            [:, t+1] = [:,t] + (A * [:,t] + B * u[t]) * dt + K * (dy - C * [:,t] * dt)
        end
    else
        for t in 1:nt-1
            if t == ntj
                X[1, t] -= tj_dist # When k == ntj, target jumpさせる(実際には現在の位置をずらす)
            end
            u[t] = -L * X[:, t]
            X[:, t+1] = X[:, t] + (A * X[:, t] + B * u[t]) * dt
        end
    end
    X[1, 1:ntj-1] .-= tj_dist;
    return X, u
end
target_jump_simulation (generic function with 6 methods)
Ttj = 0.4
tj_dist = 0.1
nt = round(Int, T/dt)
ntj = round(Int, Ttj/dt);
Xtj, utj = target_jump_simulation(param, L, K, dt, T, noisy=false);
XtjAll = []
utjAll = []
for i in 1:nsim
    XSim, u = target_jump_simulation(param, L, K, dt, T, noisy=true);
    push!(XtjAll, XSim)
    push!(utjAll, u)
end
target_pos = zeros(nt)
target_pos[1:ntj-1] .-= tj_dist; 

fig, ax = subplots(1, 3, figsize=(10, 3))
for i in 1:2
    ax[1].plot(tarray, target_pos, "tab:green")
    for j in 1:nsim
        ax[i].plot(tarray, XtjAll[j][i,:]', "tab:gray", alpha=0.5)
    end
    ax[i].axvline(x=Ttj, color="gray", linestyle="dashed")
    ax[i].plot(tarray, Xtj[i,:], "tab:red")
    ax[i].set_ylabel(label[i]); ax[i].set_xlabel(L"Time ($s$)"); ax[i].set_xlim(0, T); ax[i].grid()
end
for j in 1:nsim
    ax[3].plot(tarray, utjAll[j], "tab:gray", alpha=0.5)
end
ax[3].axvline(x=Ttj, color="gray", linestyle="dashed")
ax[3].plot(tarray, utj, "tab:red")
ax[3].set_ylabel(L"Control signal ($N\cdot m$)"); ax[3].set_xlabel(L"Time ($s$)"); ax[3].set_xlim(0, T); ax[3].grid()

tight_layout()
../_images/infinite-horizon-ofc_27_0.png

参考文献#

LMSQ18

Zhe Li, Pietro Mazzoni, Sen Song, and Ning Qian. A single, continuously applied control policy for modeling reaching movements with and without perturbation. Neural Comput., 30(2):397–427, February 2018.

QJJM13

Ning Qian, Yu Jiang, Zhong-Ping Jiang, and Pietro Mazzoni. Movement duration, fitts's law, and an infinite-horizon optimal feedback control model for biological motor systems. Neural Comput., 25(3):697–724, March 2013.