notes.tex

\documentclass[12pt,a4paper]{article}

% ================================================================================

\newcommand{\norm}[1]{\left\lVert#1\right\rVert}
\newcommand{\minimize}[1]{\mathop{\mbox{minimize}}_{#1} \ \ }
\newcommand{\maximize}[1]{\mathop{\mbox{maximize}}_{#1} \ \ }
\newcommand{\st}[0]{\mbox{subject to} \ \ }
\newcommand\cc[1]{\texttt{#1}}
\newcommand{\abs}[1]{\left|#1\right|}

% ================================================================================

\usepackage{array}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{graphicx}
\usepackage[sans]{dsfont}
\usepackage{bbm}
\usepackage{amsmath,bm}
\usepackage{url}
\usepackage{hyperref}
\usepackage{eurosym}
\usepackage{bbold}
\usepackage[toc,page]{appendix}
\usepackage[top=2cm, bottom=2cm, left=2cm, right=2cm]{geometry}

\def\UrlBreaks{\do\/\do-}  % https://tex.stackexchange.com/a/561193

\usepackage[toc,page]{appendix}

\usepackage[breakable]{tcolorbox}
\DeclareRobustCommand{\mybox}[2][gray!20]{
  \begin{tcolorbox}[
      breakable,
      left=0pt,
      right=0pt,
      top=0pt,
      bottom=0pt,
      colback=#1,
      colframe=#1,
      width=\dimexpr\textwidth\relax,
      enlarge left by=0mm,
      boxsep=5pt,
      arc=0pt,outer arc=0pt,
    ]
    #2
  \end{tcolorbox}
}

\newcommand{\floor}[1]{\left\lfloor #1 \right\rfloor}

\begin{document}

\section{Exercise 1.13}

\subsection{Proof 1}

The second-order difference equation $F_k = F_{k-1} + F_{k-2}$ can be represented as two
first-order equations
\begin{align*}
  \underbrace{\begin{bmatrix} F_{k-1} \\ F_k \end{bmatrix}}_{x_{k+1}} =
  \underbrace{\begin{bmatrix} 0 & 1 \\ 1 & 1 \end{bmatrix}}_{A}
  \underbrace{\begin{bmatrix} F_{k-2} \\ F_{k-1} \end{bmatrix}}_{x_k}.
\end{align*}
Hence, $x_{n}$ (where $n \geq 2$) depends on the initial condition $x_0 = (F_0, F_1) = (0, 1)$ as follows
\begin{align} \label{eq.initial-condition}
  x_n = A^nx_0.
\end{align}
The eigenvalues of $A$ are~\footnote{We have to find $\lambda$ such that $\cc{det}(A - \lambda I) = 0$.}
\begin{align*}
  \lambda_1 = \frac{1 + \sqrt{5}}{2}, \quad \lambda_2 = \frac{1 - \sqrt{5}}{2},
\end{align*}
with corresponding eigenvectors~\footnote{Found by solving equations $(A - \lambda_i
I)q_i = 0$ (and normalizing).}
\begin{align*}
  q_1 = \frac{1}{n_1}\begin{bmatrix} 1 \\ \lambda_1 \end{bmatrix}, \quad
  q_2 = \frac{1}{n_2}\begin{bmatrix} 1 \\ \lambda_2 \end{bmatrix},
\end{align*}
where $n_k = \norm{q_k} = \sqrt{1 + \lambda_k^2}$. Using
\begin{align*}
  Q = \begin{bmatrix}q1 & q2\end{bmatrix}, \quad
    \Lambda = \begin{bmatrix} \lambda_1 & 0 \\ 0 & \lambda_2 \end{bmatrix}
\end{align*}
we can perform a similarity transform (note that $Q$ is an orthogonal matrix)
\begin{align*}
  x_{k+1} = \underbrace{Q\Lambda Q^T}_{A} x_k,
\end{align*}
Hence, we can express~\eqref{eq.initial-condition} as (note that $(Q\Lambda Q^T)(Q\Lambda Q^T) = (Q\Lambda^2 Q^T)$)
\begin{align*}
  x_{n}
  = Q\Lambda^n \underbrace{Q^T x_0}_{\tilde{x}_0}
  = \frac{\lambda_1}{n_1}q_1\lambda_1^n + \frac{\lambda_2}{n_2}q_2\lambda_2^n,
\end{align*}
where $\tilde{x}_0 = (\lambda_1/n_1, \lambda_2/n_2)$ can be seen as the initial
conditions after the similarity transform. Therefore, we can express the $n$-th
Fibonacci number as the first component of $x_{n}$
\begin{align*}
  F_n
  &= \frac{\lambda_1}{n_1^2}\lambda_1^n + \frac{\lambda_2}{n_2^2}\lambda_2^n \\
  &= \frac{1 + \sqrt{5}}{5 + \sqrt{5}}\lambda_1^n + \frac{1 - \sqrt{5}}{5 - \sqrt{5}}\lambda_2^n
  = \frac{\sqrt{5}\lambda_1^n - \sqrt{5}\lambda_2^n}{5} \\
  &= \frac{\lambda_1^n - \lambda_2^n}{\sqrt{5}}.
\end{align*}

Note that $|\lambda_2^n| < 1$ for any $n > 0$ and hence, dividing by $\sqrt{5} > 2$
leads to a number smaller than $1/2$. Therefore, $F_n$ is the closest integer to
$\lambda_1^n / \sqrt{5}$.

\subsection{Proof 2 (by induction)}

\begin{itemize}
\item for $n = 0$ we have $F_0 = \frac{\lambda_1^{0} - \lambda_2^{0}}{\sqrt{5}} = 0$
\item for $n = 1$ we have $F_1 = \frac{\lambda_1^{1} - \lambda_2^{1}}{\sqrt{5}} = 1$
\item next, assume that $F_{n-2} = \frac{\lambda_1^{n-2} - \lambda_2^{n-2}}{\sqrt{5}}$
  and $F_{n-1} = \frac{\lambda_1^{n-1} - \lambda_2^{n-1}}{\sqrt{5}}$, we have to prove
  that $F_{n} = F_{n-1} + F_{n-2} = \frac{\lambda_1^{n} - \lambda_2^{n}}{\sqrt{5}}$:
\end{itemize}
\begin{align*}
  \lambda_1^{n-2} - \lambda_2^{n-2} + \lambda_1^{n-1} - \lambda_2^{n-1}
  &= \lambda_1^{n-2} + \lambda_1^{n-1} - (\lambda_2^{n-2} + \lambda_2^{n-1}) \\
  &= \lambda_1^n\lambda_1^{-1}\underbrace{(\lambda_1^{-1} + 1)}_{\lambda_1} - \lambda_2^n\lambda_2^{-1}\underbrace{(\lambda_2^{-1} + 1)}_{\lambda_2} \\
  &= \lambda_1^n - \lambda_2^n,
\end{align*}
where we have used that $\lambda_i - \frac{1}{\lambda_i} = 1$ (for $i=1,2$). This is the
case because $\lambda_1$ and $\lambda_2$ are eigenvalues of $A$
(see~\eqref{eq.initial-condition}) and hence they satisfy its characteristic equation
$\cc{det}(A - \lambda I) = 0$ (\emph{i.e.,} $\lambda^2 - \lambda -1 = 0$).

\section{Exercise 1.19}

Transformation:
\begin{align*}
  a &\leftarrow bq + a(p + q) \\
  b &\leftarrow bp + aq.
\end{align*}
Applying twice:
\begin{align*}
  a
  &\leftarrow (bp + aq)q + (bq + ap + aq)p + (bq + ap + aq)q \\
  &\leftarrow bpq + aq^2 + bpq + ap^2 + apq + bq^2 + apq + aq^2 \\
  &\leftarrow bpq + bpq + bq^2 + aq^2  + ap^2 + apq + apq + aq^2 \\
  &\leftarrow b(q^2 + 2pq) + a(p^2 + 2pq + 2q^2) \\
  %
  b
  &\leftarrow (bp + aq)p + (bq + ap + aq)q \\
  &\leftarrow bp^2 + apq + bq^2 + apq + aq^2 \\
  &\leftarrow b(p^2 + q^2) + a(q^2 + 2pq).
\end{align*}
Hence
\begin{align*}
  p^{'} &= p^2 + q^2 \\
  q^{'} &= q^2 + 2pq.
\end{align*}

\section{Exercise 1.24}

In this exercise we use the first two properties of the $\bmod$ operator (for integers
$x$, $y$ and $n$)
\begin{align}
  (xy)\bmod n &= [(x\bmod n)(y\bmod n)]\bmod n \label{eq.mod.property1} \\
  (xy)\bmod n &= [x(y\bmod n)]\bmod n \label{eq.mod.property2} \\
  (x + y)\bmod n &= [(x\bmod n) + (y\bmod n)]\bmod n \label{eq.mod.property3}
\end{align}
Lets prove them.

\subsection{Proof of property~\eqref{eq.mod.property3}}

We can express
\begin{align} \label{eq.quotient-form}
  \begin{split}
    x &= q_xn + (x \bmod n) \\
    y &= q_yn + (y \bmod n),
  \end{split}
\end{align}
for some integers $q_x$ and $q_y$. Substituting these in $(x + y) \bmod n$ gives
\begin{align*}
  \left[\underbrace{(q_x + q_y)}_{q}n + \underbrace{(x \bmod n) + (y \bmod n)}_z\right] \bmod n = (qn + z) \bmod n.
\end{align*}
By definition
\begin{align*}
  u \bmod n = u - n\floor{\frac{u}{n}},
\end{align*}
hence (note that $q$ is an integer)
\begin{align*}
  (qn + z) \bmod n
  &= (qn + z) - n\floor{\frac{qn + z}{n}} \\
  &= (qn + z) - n\floor{q + \frac{z}{n}} \\
  &= (qn + z) - n\left(q + \floor{\frac{z}{n}}\right) \\
  &= (qn + z) - \left(qn + n\floor{\frac{z}{n}}\right) \\
  &= z - n\floor{\frac{z}{n}} \\
  &= z\bmod n.
\end{align*}
Therefore we have proven~\eqref{eq.mod.property3}.

\subsection{Proof of property~\eqref{eq.mod.property1}}

Substituting~\eqref{eq.quotient-form} in $(xy)\bmod n$ gives
\begin{align*}
  (q_xq_yn^2 + q_xn(y \bmod n) + q_yn(x \bmod n) + (x \bmod n)(y \bmod n))\bmod n
\end{align*}
Using property~\eqref{eq.mod.property3} we see that $(xy)\bmod n = ((x \bmod n)(y \bmod n))\bmod n$ because
\begin{align*}
  q_xq_yn^2\bmod n = q_xn(y \bmod n)\bmod n = q_yn(x \bmod n)\bmod n = 0.
\end{align*}

\subsection{Proof of property~\eqref{eq.mod.property2}}

Using property~\eqref{eq.mod.property1} to expand $[x(y\bmod n)]\bmod n$ we get
\begin{align*}
  [x(y\bmod n)]\bmod n
  &= [(x\bmod n)((y\bmod n)\bmod n)]\bmod n \\
  &= [(x\bmod n)(y\bmod n)]\bmod n \\
  &= (xy)\bmod n.
\end{align*}

\section{Exercise 1.28}

If $x^2 \bmod n = m \bmod n$~\footnote{Which could be written equivalently as $x^2
\equiv m \bmod n$.} we say that $x$ is the square root of $m$ modulo $n$. For example,
\begin{align*}
  1^2 \equiv 3^2 \equiv 5^2 \equiv 7^2 \equiv 1 \bmod 8,
\end{align*}
that is $1, 3, 5$ and $7$ are square roots of $1$ modulo $n=8$~\footnote{\emph{i.e.,}
$x^2 \equiv 1 \bmod n$ implies $x^2 = kn + 1$ (with a positive integer $k$).}. We are
interested in the case when $m = 1$. We consider $1$ and $n-1$ as trivial square roots
of $1$ modulo $n$ because
\begin{itemize}
\item $1^2 \bmod n = 1 \bmod n = 1$
\item $(n-1)^2 \bmod n = (n^2 - 2n + 1) \bmod n = 1$, using property~\eqref{eq.mod.property3}.
\end{itemize}
Of course, we could say the same thing about $n+1$ but in the Miller-Rabin test we care
only about numbers smaller than $n$.

\section{Exercise 2.13, 2.14, 2.15, 2.16}

Let $x$ denotes an interval. We use the following notation:
\begin{itemize}
\item $x_c$: the center of interval $x$
\item $x_w$: the width of interval $x$
\item $x_{\ell} = x_c - x_w$: interval lower-bound
\item $x_{u} = x_c + x_w$: interval upper-bound
\item $x_p = \abs{100\frac{x_w}{x_c}}$: what percent of $x_c$ is $x_w$.
\end{itemize}

\subsection{\cc{mul-interval} (case 1)}

In Exercise 2.13 we have to find a simplified expression for $z_p$ in terms of $x_p$ and
$y_p$, where $x$, $y$ and $z$ are intervals such that $z = x * y$~\footnote{The operator
$*$ denotes multiplication of intervals.}. In this exercise we are allowed to assume
that $x_{\ell}, y_{\ell} \geq 0$ (see ``case 1'' in exercise 2.11 in
\cc{exercises2.rkt}) - this implies that:
\begin{itemize}
\item $z_{\ell} = x_{\ell} y_{\ell} = (x_c - x_w)(y_c - y_w) = x_cy_c - x_cy_w - x_wy_c + x_wy_w$
\item $z_{u} = x_{u} y_{u} = (x_c + x_w)(y_c + y_w) = x_cy_c + x_cy_w + x_wy_c + x_wy_w$
\item $z_w = \frac{1}{2}(z_u - z_{\ell}) = x_cy_w + x_wy_c$
\item $z_c = \frac{1}{2}(z_u + z_{\ell}) = x_cy_c + x_wy_w$
\item $z_p = 100\frac{z_w}{z_c} = 100\frac{x_cy_w + x_wy_c}{x_cy_c + x_wy_w} \approx 100\frac{x_cy_w + x_wy_c}{x_cy_c}$ because $x_w$ and $y_w$ are assumed small.
\end{itemize}
The expression for $z_p$ can be further reduced to $z_p = x_p + y_p$ by substituting
$x_w = \frac{x_cx_p}{100}$ and $y_w = \frac{y_cy_p}{100}$ (note that $x_c, y_c \geq 0$
by assumption).

\subsection{\cc{add-interval}}

Let us consider $z = x + y$:
\begin{itemize}
\item $z_{\ell} = x_{\ell} + y_{\ell} = x_c + y_c - x_w - y_w$
\item $z_{u} = x_{u} + y_{u} = x_c + y_c + x_w + y_w$
\item $z_w = \frac{1}{2}(z_u - z_{\ell}) = x_w + y_w$
\item $z_c = \frac{1}{2}(z_u + z_{\ell}) = x_c + y_c$
\item $z_p = \abs{100\frac{z_w}{z_c}} = \abs{100\frac{x_w + y_w}{x_c + y_c}} =
  \abs{\frac{x_p\abs{x_c} + y_p\abs{y_c}}{x_c + y_c}} = \abs{x_p\frac{\abs{x_c}}{x_c +
      y_c} + y_p\frac{\abs{y_c}}{x_c + y_c}}$.
\end{itemize}
$z_p$ is undefined when $z_c = 0$ and is a weighted average of $x_p$ and $y_p$:
\begin{align*}
  z_p =
  x_p\underbrace{\abs{\frac{x_c}{x_c + y_c}}}_{\alpha} +
  y_p\underbrace{\abs{\frac{y_c}{x_c + y_c}}}_{\beta}.
\end{align*}
The weighting coefficients $\alpha$ and $\beta$ can be greater than 1 as $x_c,
y_c\in\mathbb{R}$, \emph{i.e.,} $(x+y)_p$ could be greater than $\max(x_p, y_p)$. On the
other hand, for any choice of $x_c$ and $y_c$, $\alpha + \beta \geq 1$. Hence $z_p \geq
\min(x_p, y_p)$. That is, as expected, the addition operation cannot reduce the
uncertainty below $\min(x_p, y_p)$.

\subsection{\cc{sub-interval}}

Let us consider $z = x - y$:
\begin{itemize}
\item $z_{\ell} = x_{\ell} - y_{u} = x_c - x_w - (y_c + y_w) = x_c - y_c - x_w - y_w$
\item $z_{u} = x_{u} - y_{\ell} = x_c + x_w - (y_c - y_w) = x_c - y_c + x_w + y_w$
\item $z_w = \frac{1}{2}(z_u - z_{\ell}) = x_w + y_w$
\item $z_c = \frac{1}{2}(z_u + z_{\ell}) = x_c - y_c$
\item $z_p = \abs{100\frac{z_w}{z_c}} = \abs{100\frac{x_w + y_w}{x_c - y_c}} =
  \abs{\frac{x_p\abs{x_c} + y_p\abs{y_c}}{x_c - y_c}} =
  x_p\underbrace{\abs{\frac{x_c}{x_c - y_c}}}_{\alpha} +
  y_p\underbrace{\abs{\frac{y_c}{x_c - y_c}}}_{\beta}$.
\end{itemize}
$z_p$ is undefined when $x_c = y_c$. For any choice of $x_c$ and $y_c$, $\alpha + \beta
\geq 1$. Hence, $z_p \geq \min(x_p, y_p)$. That is, as expected, the subtraction
operation cannot reduce the uncertainty below $\min(x_p, y_p)$.

\subsection{Analysis}

If $f: \mathbb{R} \to \mathbb{R}$ is a continuous function over a closed and bounded
interval $[x_{\ell}, x_u]$, then its image is a closed and bounded interval
\begin{align} \label{eq.range_of_func_of_interval}
  f([x_{\ell}, x_u]) = \left[\min_{v\in[x_{\ell}, x_u]}f(v), \max_{v\in[x_{\ell}, x_u]}f(v)\right].
\end{align}
In the case of our four basic arithmetic operations~\footnote{Assuming that 0 is not
contained in the denominator interval for division.}, $f$ is monotone and computing the
minimum and maximum above becomes trivial~\cite{interval-operators}:
\begin{align*}
  [x_{\ell}, x_u] \star [y_{\ell}, y_u] = [\min(\mathcal{C}), \max(\mathcal{C})],
\end{align*}
where $\star$ denotes one of our four binary arithmetic operations and $\mathcal{C} =
\{x_{\ell}y_{\ell}, x_{\ell}y_{u}, x_{u}y_{\ell}, x_{u}y_{u}\}$. This is equivalent to
\begin{align*}
  [x_{\ell}, x_u] \star [y_{\ell}, y_u] = \{v_1 \star v_2 \, | \, v_1 \in [x_{\ell}, x_u], v_2 \in [y_{\ell}, y_u]\}.
\end{align*}
Let us consider now two algebraically equivalent expressions:
\begin{itemize}
\item $f_1(x) = x^2$ (uses a unary operator $(\cdot)^2$)
\item $f_2(x) = xx$ (uses a binary multiplication operator)
\end{itemize}
When $x$ is a real number, we have $f_1(x) = f_2(x)$. However, when $x$ is an interval
($[x_{\ell}, x_u]$), the unary operator and the binary operator might lead to different
results depending on the way we choose to implement them (\emph{i.e,} depending on the
assumptions we take into account when solving the two optimization problems in
\eqref{eq.range_of_func_of_interval}). Concretely,
\begin{itemize}
\item the unary operator could be defined as $[x_{\ell}, x_u]^2 = \{v^2 \, | \,
  v\in[x_{\ell}, x_u]\}$, \emph{e.g.,} $[-1, 1]^2 = [0, 1]$
\item in the definition of the binary operator we could (for simplicity) ignore the fact
  that the two operands are the same and, in effect, compute $f_3(x, y) = xy$ (instead
  of $f_2(x)$) assuming that we could simply use it with argument $y = x$. This,
  however, is not equivalent because the solutions of the optimization problems in
  \eqref{eq.range_of_func_of_interval} with and without the $y=x$ assumption are
  different. For example, using the multiplication procedure we have defined, $[-1, 1] *
  [-1, 1] = [-1, 1]$.
\end{itemize}
In Exercise 2.15, we are asked whether tighter error bounds can be produced if the
expression can be written in a form such that no variable that represents an uncertain
number is repeated. This is clearly the case for the two examples above, but not true in
general. For example we get exactly the same bounds for $3 * x$ as for $x+x+x$ (for some
interval $x$, where $3$ can be seen as the interval $[3, 3]$), \emph{i.e.,} the bounds
obtained using $3 * x$ are not ``tighter''~\footnote{I guess I am being a bit pedantic
here. Note that the uncertainty of some operations can be much larger than $\max(x_p,
y_p)$ depending on the particular intervals $x$ and $y$.}.

According to~\cite{interval-operators} ``... it can be shown that the exact range of
values can be achieved, if each variable appears only once ...''. Intuitively, this is
the case because:
\begin{itemize}
\item As we have shown in the preceding sections, our four operations cannot reduce
  uncertainty of intervals $x$ and $y$ below $\min(x_p, y_p)$. So in general, performing
  fewer number of operations is likely to lead to smaller uncertainty. For example
  $x+x-x$ would have larger uncertainty compared to $x$. Similarly for $x*x/(x*x)$ and
  $x/x$.
\item Not taking into account the assumption $y=x$ in our binary operations, leads to
  more relaxed bounds. Reformulating the expression to have a single occurrence of
  uncertain variables is an attempt to avoid having to deal with the $y=x$ assumption.
\end{itemize}
The second point above addresses the first part of exercise 2.16 (why equivalent
algebraic expressions may lead to different answers). The second part (namely ``can you
devise an interval-arithmetic package that does not have this shortcoming?'') is
interesting. The answer is ``no'' in general but we can improve things in the following
ways:
\begin{itemize}
\item we can imagine to implement a library of functions (\emph{e.g.,} $(\cdot)^2$)
  where various assumptions (\emph{e.g.,} $y=x$) are handled explicitly. In theory this
  would lead to exact bounds in the situations we can handle but one cannot handle all
  situations of interest exactly as the solutions of the optimization problems
  in~\eqref{eq.range_of_func_of_interval} might be untractable.
\item Implement an automated system for reformulating an expression in terms of an
  existing library of primitives. This might not always be possible (see the
  ``dependency problem'' in~\cite{interval-operators}).
\item In cases where exact form of the range of an expression cannot be computed, we
  could switch to using various approximations (\emph{e.g.,} sampling based methods).
\end{itemize}

\appendix
\appendixpage

\section{Random quotes}

\begin{itemize}
\item ``It is very easy to confuse the essence of what you are doing with the tools that
  you use.''
\item ``One of the things we have to learn how to do is ignore details. The key to
  understand complicated things is to know what not to look at.''
\item ``The way in which you would construct a recursive process is by wishful thinking.
  You have to believe''~\footnote{He is talking about the Hanoi towers problem.}.
\item ``Wishful thinking - essential to good engineering, and certainly essential to good
  computer-science.''
\item ``We divorce the task of building things from the task of implementing the parts.''
\item H. Abelson comments on the axiom ``doing your design before any of your code'':
  ``this is the axiom of a person that hasn't implemented very large computer systems
  ... the real power is when you can pretend that you have made the decision and later
  on decide which is the decision that you should have made (then you have the best of
  both worlds)''~\footnote{He is talking about data abstraction and designing systems in
  layers. Making important decisions is best done as late as possible (to put in a
  different way, we should have a proper parameterization and decide on the parameters
  only when necessary). Of course, it very much depends on what one means by ``design''
  here.}.
\item ``When you look at means of combination, you should ask yourself whether things
  are closed under that means of combination.''
\item ``... the difference between merely implementing something in a language and
  embedding something in a language''~\footnote{He talks about the ``picture language''
  which is designed in such a way that it can leverage LISP naturally.}
\item ``LISP is a lousy language for doing any particular problem. What it is good for
  is figuring out the right language that you want and embedding that in
  LISP''~\footnote{Aim at designing systems as interconnected levels of languages as
  opposed to a strict hierarchy (see end of Lecture 3A).}.
\item ``The design process is not so much implementing programs as implementing
  languages (and that is really the power of LISP).''
\item ``... a small change in the problem should lead to only a small change in the
  solution. There ought to be a continuity ... instead of solving a particular problem
  at every level of decomposition of the problem ... you solve the class of problems,
  which are a neighborhood of the particular problem that you're trying to solve.''
\item They make a distinction between derivative of a function (meaning a procedure) and
  a derivative of an expression. The former is a black-box which gives you just the
  answers. ``The derivative of an expression is the way it is written and therefore it
  is a syntactic phenomenon.''
\item ``I am doing a dispatch on the type of the expression here - absolutely essential
  in building languages.''~\footnote{This is when discussing the \cc{cond} cases for the
  \cc{deriv} procedure (for symbolic differentiation).}
\item ``Thee main difference between the confusion that existed ten years ago and the
  confusion that exists now is that now a variety of inadequate ontological theories
  have been embodied in a plethora of correspondingly inadequate programming
  languages.'', see footnote on page 270.
\end{itemize}

\nocite{*}
\bibliographystyle{ieeetr}
\bibliography{bib}

\end{document}