The basis for two dimensions would be
$$
p(x) =
\left[\begin{array}{ccccc} x_1 & x_2 & x_1^2 & x_1x_2 & x_2^2 \end{array}\right]^T
$$
$f(x) = p(x)^T a$
where $a = [a_0, a_1, ...]$. This would be akin to calculating
$$
f(x) = a_0 + a_1 x_1 + a_2 x_2 + a_3 x_1 x_2 + a_4 x_2^2
$$
Then with given data points
$$
\left[\begin{array}{ccccc}
(x_1^1) & (x_2^1) & (x_1^1)^2 & (x_1^1)(x_2^1) & (x_2^1)^2 \\
\vdots & & & & \vdots \\
(x_1^n) & (x_2^n) & (x_1^n)^2 & (x_1^n)(x_2^n) & (x_2^n)^2 \\
\end{array}\right]
\mathbf{a} =
\left[\begin{array}{c}
y^1 \\ \vdots \\ y^n
\end{array}\right]
$$
and solve for $a$.
But I need the
$$
f(x) = x^T A x
$$
form, so I can easily get $\nabla f (x) = 2 A x$ and the Hessian $\nabla^2 f(x) = 2 A$.
It turns out if we start with 3 dimensions, and use $x_3 = 1$ this gives the necessary form in two dimensions,
$$
x^T A x =
\left[\begin{array}{ccc}
x_1 & x_2 & x_3
\end{array}\right]
\left[\begin{array}{ccc}
a_{11} & a_{12} & a_{13} \\
a_{21} & a_{22} & a_{23} \\
a_{31} & a_{32} & a_{33}
\end{array}\right]
\left[\begin{array}{c}
x_1 \\ x_2 \\ x_3
\end{array}\right]
$$
$$
= \left[\begin{array}{c}
x_1 a_{11} + x_2 a_{21} + x_3 a_{31} \\
x_1 a_{12} + x_2 a_{22} + x_3 a_{32} \\
x_1 a_{13} + x_2 a_{23} + x_3 a_{33}
\end{array}\right]^T
\left[\begin{array}{c}
x_1 \\ x_2 \\ x_3
\end{array}\right]
$$
$$
= x_1 x_1 a_{11} + x_1 x_2 a_{21} + x_1 x_3 a_{31} +
$$
$$
x_1 x_2 a_{12} + x_2 x_2 a_{22} + x_3 x_2 a_{32} +
$$
$$
x_1 x_3 a_{13} + x_2 x_2 a_{23} + x_3 x_3 a_{33}
$$
Using $x_3 = 1$
$$
x^T A x =
\left[\begin{array}{ccc}
x_1 & x_2 & 1
\end{array}\right]
\left[\begin{array}{ccc}
a_{11} & a_{12} & a_{13} \\
a_{21} & a_{22} & a_{23} \\
a_{31} & a_{32} & a_{33}
\end{array}\right]
\left[\begin{array}{c}
x_1 \\ x_2 \\ 1
\end{array}\right]
$$
$$
= a_{11} x_1^2 + a_{21} x_1 x_2 + a_{31}x_1 +
$$
$$
a_{12}x_1 x_2 + a_{22}x_2^2 + a_{32} x_2+
$$
$$
a_{13} x_1 + a_{23} x_2 x_3 + a_{33}
$$
from scipy.interpolate import Rbf
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import autograd.numpy as anp
import autograd
def random_ball(num_points, dimension, radius=1):
from numpy import random, linalg
random_directions = random.normal(size=(dimension,num_points))
random_directions /= linalg.norm(random_directions, axis=0)
random_radii = random.random(num_points) ** (1/dimension)
return radius * (random_directions * random_radii).T
np.random.seed(0)
N = 20
def rosenbrock(x):
return (1 + x[0])**2 + 100*(x[1] - x[0]**2)**2
def Rosenbrock(x,y):
return (1 + x)**2 + 100*(y - x**2)**2
def get_fvals_in_region(xcurr, f, radius):
b = random_ball(N, 2, radius)
pts = xcurr+b
vals = [f(p) for p in pts]
return xcurr+b, np.array(vals)
x0 = [1.5,0]
xs,vs = get_fvals_in_region(x0, rosenbrock, 0.5)
res = []
for i in range(vs.shape[0]):
res.append((xs[i,0],xs[i,1],vs[i]))
res = np.array(res).reshape(vs.shape[0], 3)
x = np.linspace(-2,2,250)
y = np.linspace(-1,3,250)
X, Y = np.meshgrid(x, y)
Z = Rosenbrock(X, Y)
import itertools
import numpy.linalg as lin
def quad_interpolate(xi, yi):
xi = np.hstack((xi, np.ones((1,len(xi))).T ))
#print (xi)
D = xi.shape[1]
print (D)
X_train = []
for row in xi:
X_train.append([row[i]*row[j] for i,j in itertools.product(range(D),range(D)) ])
X_train = np.array(X_train)
print (X_train.shape)
print (yi.shape)
coef,_,_,_ = lin.lstsq(X_train, yi)
return coef
xi = res[:,[0,1]]
yi = res[:,[2]]
coef = quad_interpolate(xi,yi)
print (coefs)
x = np.linspace(-2,2,250)
y = np.linspace(-1,3,250)
X, Y = np.meshgrid(x, y)
Z = Rosenbrock(X, Y)
fig = plt.figure(figsize = (8,4))
ax = fig.gca(projection='3d')
ax.plot3D(res[:,0],res[:,1],res[:,2],'r.')
ax.plot_surface(X,Y,Z,rstride = 5, cstride = 5, cmap = 'jet', alpha = .4, edgecolor = 'none' )
def q_interp(x1,x2):
x = np.array([[x1,x2,1]])
A = coef.reshape(3,3)
res = np.dot(np.dot(x,A),x.T)
return np.float(res)
Zi = np.array([q_interp(xx,yy) for xx,yy in zip(X.flatten(),Y.flatten())])
Zi = Zi.reshape(X.shape)
ax.plot_wireframe(X,Y,Zi)
coefs = coef.reshape(3,3)
g = (2 * np.dot(coefs[:2,:2],np.array(x0).reshape(2,1)))
gnorm = g / np.sum(g)
ax.set_zlim(0,2500)
ax.quiver(x0[0], x0[1], 0, -gnorm[0], -gnorm[1], 0, color='red')
hess = 2*coefs[:2,:2]
print (hess)
newton_dir = -np.dot(lin.inv(hess),g)
print (newton_dir)
d = newton_dir
print (d)
ax.quiver(x0[0], x0[1], 0, d[0], d[1], 0, color='green')
ax.plot3D([x0[0]], [x0[1]], [0.0], 'b.')
ax.view_init(21, -133)
Best Answer
You can do multi-variate quadratic regression in the usual way. Let's label the row (and column) indices of the design matrix $A$, and the row index of the value vector $b$, by index $s(\{p_1, p_2, p_3, \cdots\})$ which pertains to the coefficient of $x_i^{p_1}x_2^{p_2}\cdots$. For example, the row labeled $s(\{ 1, 0, 2\})$ will be the row pertaining to the coefficient of $x_1x_3^2$.
Then the elements of $A$ are calculated as $$ A_{s(\{p_1, p_2, p_3, \cdots\}),s(\{q_1, q_2, q_3, \cdots\})} = \sum x_1^{p_1+q_1} x_2^{p_2+q_2} x_3^{p_3+q_3} \cdots $$ and the elements of $b$ are $$ b_{s(\{p_1, p_2, p_3, \cdots\})} = \sum y\,x_1^{p_1} x_2^{p_2} x_3^{p_3} \cdots $$ where of course all the sums are taken over the set of data points.
For example, for a 2-variable quadratic fit $y = a + bu + cv + du^2 + e uv + fv^2$ you need to solve $$ \pmatrix{N &\sum u_i &\sum v_i & \sum u_i^2 & \sum u_iv_i & \sum v_i^2 \\ \sum u_i & \sum u_i^2 & \sum u_i v_i & \sum u_i^3 & \sum u_i^2v_i & \sum u_i v_i^2 \\ \sum v_i & \sum u_iv_i & \sum v_i^2 & \sum u_i^2v_i & \sum u_iv_i^2 & \sum v_i^3 \\ \sum u_i^2 & \sum u_i^3 & \sum u_i^2 v_i & \sum u_i^4 & \sum u_i^3v_i & \sum u_i^2 v_i^2 \\ \sum u_iv_i & \sum u_i^2v_i & \sum u_i v_i^2 & \sum u_i^3v_i & \sum u_i^2v_i^2 & \sum u_i v_i^3 \\ \sum v_i^2 & \sum u_iv_i^2 & \sum v_i^3 & \sum u_i^2v_i^2 & \sum u_iv_i^3 & \sum v_i^4 } \pmatrix{a\\b\\c\\d\\e\\f} =\pmatrix{\sum y_i \\ \sum y_i u_i \\ \sum y_iv_i \\ \sum y_iu_i^2\\ \sum y_iu_iv_i \\ \sum y_iv_i^2} $$