%%% ====================================================================
%%%  @LaTeX3-report { LaTeX3-L3-007
%%%  filename        = "l3d007.tex",
%%%  archived        = "ctan:/tex-archive/info/ltx3pub/",
%%%  author          = "Justin Ziegler",
%%%  doc-group       = "Project core team",
%%%  title           = "Technical report on Math Font Encoding",
%%%  version         = "2.00",
%%%  date            = "June 1, 1994",
%%%  time            = "18:52:36 GMT",
%%%  status          = "Public, official",
%%%  abstract        = "This is a report of the \LaTeX3
%%%                     Project work on math font encoding.
%%%                     ",
%%%  keywords        = "Math fonts encoding",
%%%  project-address = "LaTeX3 Project            \\
%%%                     c/o Dr. Chris Rowley      \\
%%%                     The Open University       \\
%%%                     Parsifal College          \\
%%%                     Finchley Road             \\
%%%                     London NW3 7BG, England, UK",
%%%  project-tel     = "+44 20 7794 0575",
%%%  project-FAX     = "+44 20 7556 6196",
%%%  project-email   = "latex-team@latex-project.org",
%%%  copyright       = "Copyright (C) 1994 LaTeX3 Project.
%%%                     All rights reserved.
%%%
%%%                     Permission is granted to make and distribute
%%%                     verbatim copies of this publication or of
%%%                     coherent parts from this publication provided
%%%                     this copyright notice and this permission
%%%                     notice are preserved on all copies.
%%%
%%%                     Permission is granted to copy and distribute
%%%                     translations of this publication or of
%%%                     individual items from this publication into
%%%                     another language provided that the translation
%%%                     is approved by the original copyright holders.
%%%
%%%                     No other permissions to copy or distribute this
%%%                     publication in any form are granted and in
%%%                     particular no permission to copy parts of it
%%%                     in such a way as to materially change its
%%%                     meaning.",
%%%  generalinfo     = "To subscribe to the LaTeX3 discussion list:
%%%
%%%                      Send mail to listserv@urz.uni-heidelberg.de
%%%                      with the following line as the body of the
%%%                      message (substituting your own name):
%%%
%%%                        subscribe LaTeX-L First-name Surname
%%%
%%%                     To find out about volunteer work:
%%%
%%%                      look at the document vol-task.tex which can
%%%                      be obtained electronically, see below.
%%%
%%%                     To retrieve project publications electronically:
%%%
%%%                      Project publications are available for
%%%                      retrieval by anonymous ftp from ctan hosts:
%%%                          cam.ctan.org
%%%                          dante.ctan.org
%%%                          tug.ctan.org
%%%                      in the directory /tex-archive/info/ltx3pub.
%%%
%%%                      The file ltx3pub.bib in that directory gives
%%%                      full bibliographical information including
%%%                      abstracts in BibTeX format.  A brief history
%%%                      of the project and a description of its aims
%%%                      is contained in l3d001.tex.
%%%
%%%                     If you only have access to email, and not ftp
%%%                      You may use the ftpmail service.
%%%                      Send a message just containg the word
%%%                          help
%%%                      to ftpmail@dante.ctan.org
%%%                      for more information about this service.
%%%
%%%                     For offers of financial contributions or
%%%                      contributions of computing equipment or
%%%                      software, contact the project at the above
%%%                      address, or the TeX Users Group.
%%%
%%%                     For offers of technical assistance, contact the
%%%                      project at the above address.
%%%
%%%                     For technical enquiries and suggestions, send
%%%                      e-mail to the latex-l list or contact the
%%%                      project at the above address.",
%%%  checksum        = "23322 5582 30455 207129",
%%%  docstring       = "The checksum field above contains a CRC-16
%%%                     checksum as the first value, followed by the
%%%                     equivalent of the standard UNIX wc (word
%%%                     count) utility output of lines, words, and
%%%                     characters.  This is produced by Robert
%%%                     Solovay's checksum utility.",
%%%  }
%%% ====================================================================

\NeedsTeXFormat{LaTeX2e}

\begin{filecontents}{l3d007.sty}
%%% Action macros for this document
%%%

%%% table stuff from testfont whole thing need wrapup!!!!!!

\tracinglostchars=0

\newcount\mcnt \newcount\ncnt \newcount\pcnt \newdimen\dim
%JWZ the next two counters can be changed for 128 glyphs or 256 glyphs.
\newcount\maxz \maxz=128
\newcount\maxiz \maxiz=127
\newif\ifskipping
\def\setbaselineskip{\setbox0=\hbox{\ncnt=0
\loop\char\ncnt \ifnum \ncnt<\maxiz \advance\ncnt 1 \repeat}
\baselineskip=6pt \advance\baselineskip\ht0 \advance\baselineskip\dp0 }
\def\hours{\ncnt=\time \divide\ncnt 60
  \mcnt=-\ncnt \multiply\mcnt 60 \advance\mcnt \time
  \twodigits\ncnt\twodigits\mcnt}
\def\twodigits#1{\ifnum #1<10 0\fi \number#1}
\def\startfont#1{\font\testfont=#1
%  \leftline{\scriptsize Test of #1\unskip\ on \today\ at \hours}
%  \medskip
  \testfont \setbaselineskip
  \ifdim\fontdimen6\testfont<10pt \rightskip=0pt plus 20pt
  \else\rightskip=0pt plus 2em \fi
  \spaceskip=\fontdimen2\testfont % space between words (\raggedright)
  \xspaceskip=\fontdimen2\testfont \advance\xspaceskip
  by\fontdimen7\testfont}

\def\oct#1{\hbox{\rm\'{}\kern-.2em\it#1\/\kern.05em}} % octal constant
\def\hex#1{\hbox{\rm\H{}\tt#1}} % hexadecimal constant
\def\setdigs#1"#2{\gdef\h{#2}% \h=hex prefix; \0\1=corresponding octal
 \mcnt=\ncnt \divide\mcnt by 64 \xdef\0{\the\mcnt}%
 \multiply\mcnt by-64 \advance\mcnt by\ncnt \divide\mcnt by 8
 \xdef\1{\the\mcnt}}
\def\testrow{\setbox0=\hbox{\penalty 1\def\\{\char"\h}%
 \\0\\1\\2\\3\\4\\5\\6\\7\\8\\9\\A\\B\\C\\D\\E\\F%
 \global\pcnt=\lastpenalty}} % \pcnt=1 if none of the characters exist
\def\oddline{\cr
  \noalign{\nointerlineskip}
  \multispan{19}\hrulefill&
  \setbox0=\hbox{\lower 2.3pt\hbox{\hex{\h x}}}\smash{\box0}\cr
  \noalign{\nointerlineskip}}
\def\evenline{\loop\skippingfalse
 \ifnum\ncnt<\maxz \mcnt=\ncnt \divide\mcnt 16 \chardef\next=\mcnt
 \expandafter\setdigs\meaning\next \testrow
 \ifnum\pcnt=1 \skippingtrue \fi\fi
 \ifskipping \global\advance\ncnt 16 \repeat
 \ifnum\ncnt=\maxz \let\next=\endchart\else\let\next=\morechart\fi
 \next}
\def\morechart{\cr\noalign{\hrule\penalty5000}
 \chartline \oddline \mcnt=\1 \advance\mcnt 1 \xdef\1{\the\mcnt}
 \chartline \evenline}
\def\chartline{&\oct{\0\1x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&}
\def\chartstrut{\lower4.5pt\vbox to14pt{}}
\def\dofonttable#1{\startfont{#1}$$\global\ncnt=0
  \halign to\hsize\bgroup
    \chartstrut##\tabskip0pt plus10pt&
    &\hfil##\hfil&\vrule##\cr
    \lower6.5pt\null
    &&&\oct0&&\oct1&&\oct2&&\oct3&&\oct4&&\oct5&&\oct6&&\oct7&\evenline}
\def\endchart{\cr\noalign{\hrule}
  \raise11.5pt\null&&&\hex 8&&\hex 9&&\hex A&&\hex B&
  &\hex C&&\hex D&&\hex E&&\hex F&\cr\egroup$$\par}
\def\:{\setbox0=\hbox{\char\ncnt}%
  \ifdim\ht0>7.5pt\reposition
  \else\ifdim\dp0>2.5pt\reposition\fi\fi
  \box0\global\advance\ncnt 1 }
\def\reposition{\setbox0=\vbox{\kern2pt\box0}\dim=\dp0
  \advance\dim 2pt \dp0=\dim}
\def\centerlargechars{
  \def\reposition{\setbox0=\hbox{$\vcenter{\kern2pt\box0\kern2pt}$}}}

%%% use of code example

%\begin{figure}
%\dofonttable{msbm7}
%\caption{Output of the \texprog{nfssfont.tex} program for the font
%         \texttt{msbm7}}
%\label{fig:testfontmsbm}
%\end{figure}


\newcommand\textem[1]{\emph{#1}}

\newcommand{\note}[1]{%
  \par \vskip .25cm \textbf{Note:\ }#1 \par \vskip .25cm}

\chardef\bslash=`\\
%    Command name
\newcommand{\cn}[1]{$\mathchar"026E${\tt#1}}
%    File name
\newcommand{\fn}[1]{{\tt#1}}

\def\jzitem#1{\item{\bf #1\\}}

\def\thisisnew{\marginpar {\textbf
                 {\textsl {\begin{raggedright}This part is
                                 new.\end{raggedright}}}}}

\def\changedabit{\marginpar {\textbf
        {\textsl {\begin{raggedright}Changed this a little
                bit.
                        \end {raggedright}}}}}

\def\seeappendix{\marginpar {\textbf
         {\textsl {\begin{raggedright}Here is an
                 appendix reference.
                 \end {raggedright}}}}}

% don't warn about bold typewriter
\DeclareFontShape{OT1}{cmtt}{bx}{n}{ <-> ssub * cmtt/m/n }{}
% \DeclareFontShape {T1}{cmtt}{bx}{n}{ <-> ssub * cmtt/m/n }{}
\end{filecontents}


\begin{filecontents}{l3d007a.tex}
\section{Introduction}
  This document aims to put on paper what could be the backbone or the
  skeleton of a new math encoding for \TeX. This is not the complete
  description of an encoding, but a sort of grid, or global picture of
  what things could look like. This document refers to many glyph groups
  defined in another document called ``Towards a list of math glyphs''.
  Same author.


\section{A few definitions}

\begin{description}

\item[An ``encoding table''.] This conveys the
traditional meaning of an encoding. That is to say a set of 256
glyphs in a given order. The expression ``encoding table'' is usually
abbreviated to ``encoding''.

\item[A ``slot''.] It is the usual word used for referring
to a position in an encoding, that can contain a glyph. It is usually
an integer between 0 and 255. A slot is
certainly not a family, nor anything to do with it.

\item[A ``math kernel''.] This terminology is used to
specify the fonts that are necessary for the math facility to work
as it is described in most \TeX\ documentation\footnote{documentation
on LA\TeX, AMS\TeX, etc, also fits in here.}.
In DEK's implementation the math kernel consists of the
families from 0 to 3. On top of the kernel, many other fonts, with
whatever encoding is available, could be optionally loaded and used.

\item[A ``math encoding''.] It is considered here as a whole; not
just one 256-glyph encoding table, but a set of encoding tables.
This concept will be referd to as ``M-encoding''.

\item[The ``core symbols''.] They are made of two
groups. The group of symbols that must live with the default alphabet
for kerning reasons, and the group of symbols that must live with the
default alphabet for design reasons.

\item[The ``default alphabet''.] It is the alphabet that is
used  when a user types \texttt{\$abc\$}. In the present encoding that
produces $abc$.

\item[``Glyph compatibility''.] Two encodings are glyph compatible
when they contain the same glyphs but not in the same positions. As well
as containing the same glyphs, it must also be possible for the
corresponding fonts to contain the same sidebearings and the same
kerning and ligaturing information.

\end{description}


\section{Global policy}
  \subsection{Text in math mode}
   It is generally agreed that for best quality documents, if one wants
   to put text in a math formulae, one should switch back into text
   mode, using something like the \cn{text} macro in the AMS
   package. None of the encodings can be expected to support bad
   usage. In a math encoded font, the letters are not kerned in the same
   way as in a text encoded font, and there are no letter ligatures,
   because they are not needed.

  \subsection{A global rule for boldface}
   It has been decided not to mix ligth face and bold face symbols in
   the same encoding, but to generate a separate boldface version of all
   lightface math encoded fonts when necessary.  This does not make it
   impossible to mix the two: either one can use the
   \texttt{\string\boldsymbol} approach, or one can load an extra bold
   face font in a given family, and have it directly and permanently
   accessible.

  \subsection{Sans serif and typewriter fonts}
  Extra fonts could be designed in sans serif, or in typewriter using
  some of the proposed new encodings. Another solution is to load the
  Cork encoded sans serif fonts (or typewriter fonts) in free
  families. In either case, the new math encoding will not have any
  slots containing specifically sans serif or typewriter glyphs.

  \subsection{Concerning the Euler shapes}
  Euler shapes could be a good example implementation of the new math
  encoding. Thus no Euler glyphs will be included in the new
  encoding. However, it may be useful to use the Euler Fraktur for a
  first implementation example, if the new encoding includes a Fraktur
  or old  German alphabet.


\section{Concerning Cyrillic letters}
  These would be available, but not as part of the math encoding. They
  would be loaded as an extra family, with whatever encoding exists,
  together with suitable \cn{mathchardef}s.

  \subsection{Compatibility with other typesetting systems}
  \subsubsection{Grouping all \TeX\ specific glyphs in one font
  encoding}
  The present \texttt{cmex} font contains glyphs that cannot be used
  by other typesetting systems, because they are set in a strange way.

  The present \texttt{cmsy} font contains one glyph that is set in a
  strange way --- the radical sign, and thus makes that whole font
  unusable for the outer world. It would be a good idea to make sure
  that this does not happen again.

  The \LaTeX 3 project is hoping to set a new standard, that will not
  only be used by \TeX, but by all systems that typeset mathematical
  formulae. If every thing goes according to plan, in the next few years
  many math fonts will exist, for many different systems, and they will
  all use the same encoding.
  The fact that they all use the same encoding means that it will be
  very easy to exchange fonts from one system to another. So one day a
  \TeX\ user will be able to take a math font used by Microsoft Word,
  and convert it easily in order to use it with \TeX.

  If \TeX\ specific glyphs are grouped in one font, there will only be
  one problematic font. As it happens, all \TeX\ specific glyphs are
  more or less geometric, so they could be used with more than one
  math font.

  On the other hand, if \TeX\ specific glyphs are spread around in
  many fonts, then many ``imported'' fonts will not be usable by \TeX\
  without major messing about.

  Concerning this problem, the real question is: is the \LaTeX 3 project
  setting a real standard for the next few years, or just
  making another \TeX\ math font encoding?
  If the answer is: ``the \LaTeX 3 project is setting a real standard
  for the next few years'', then \TeX\ specific glyphs must be grouped
  in one font. If that is really not possible,
  then one can maybe consider putting them in two fonts.

  If a font designer designs a math font for adobe, the work necessary
  for adapting his font to the \TeX\ world should be reduced as much as
  possible.
  Otherwise nobody will provide any fonts for \TeX.

  The \TeX\ specific glyphs that are concerned here could be visually
  compatible with many math fonts.

  Which are the \TeX\ specific glyphs? So far:
  \begin{itemize}
  \item The delimiters.
  \item The large and small `bigops'.
  \item The radicals.
  \end{itemize}


 \subsubsection{The space issue}
  To enable easier font exchange between the \TeX\ world and the rest of
  the world, the new math encoding will have a space in position 32
  (decimal) of every encoding table, if it is possible.


  \subsection{General document compatibility}
  It is not worth beeing totaly compatible. A lot of glyph positions
  will change, thus direct \cn{mathchardef}s will not always work.
        Documented names from AMSLA\TeX\ (this includes names from
        LA\TeX, \TeX, and AMS\TeX, and LAMS\TeX) ought to be kept.


  \subsection{Grouping all Plain and \LaTeX\ glyphs in 4 fonts}
  The main reason for this is compatibility. Let us consider a user
  that has typed a document with the present math encoding and has
  saturated the families for this document. If the new math encoding
  does not guarantee Plain and \LaTeX\ glyph compatibility with a
  minimum of 4 fonts, then that document cannot run with the new math
  encoding: not enough families. Thus one should make the first four
  font encodings of the new math encoding glyph compatible with
  the group made by: cmr, cmmi, cmex, cmsy.

  \subsection{Grouping all AMS\TeX\ and AMS\LaTeX\ glyphs in less than
    6 fonts} The main reason for this is compatibility. Let us consider
  a user that has typed a document with the existant AMS\TeX\ or
  \LaTeX\ package, and has saturated the families for this document.
  If the new math encoding does not guarantee AMS\LaTeX\ and AMS\TeX\
  glyph compatibility with less than 6 fonts, then that document cannot
  run with the new math encoding: not enough families. The first 6
  font encodings must be one way glyph compatible with the fonts
  provided in the AMS package.

  \subsection{Replacing \texttt {cmex}}
  The math font group has decided that the encoding due to replace the
  \texttt {cmex} encoding will be designed in such a way that the
  corresponding font can be loaded in three sizes or in one size.

  Loading such a font in three sizes produces better typesetting. But
  the resulting page and line-breaks will not be the same as
  when the font was loaded in one size only. Some people will not like
  that change, in their old documents. But for new documents loading
  the extensibles font in three sizes will be better.

  \subsection{Accents in maths}
  There seems to be an agreement that math accents should not change
  with the font or style of the letter. But it is not a problem to keep
  the accents that are already in plain \TeX\ (and the \fn{cm} fonts) in
  the same position as in the T1 encoding. That will allow them to be
  variable family; thus any T1 encoding could be loaded, and its
  accents used. Since there will be millions of documents using
  bold hats this possibility must be preserved even if by default
  all math accents are non-variable family.


\section{The base: a Cork encoded text font}

Main use: things like \texttt{\string\log}. This would generally be a
Latin font.

If it is a Latin upright font, it would probably also be used by
physicists (and chemists) for operators, and more generally whenever
upright letters are needed.

\textbf{Separating this set from the rest} enables the user to decide
how `log' and `sin', etc. should be typeset. Thus the multiletter
operators can be compatible with the text font, or with the rest of
the math glyphs, or even set in yet another font.

Math mode should not be used for setting text phrases in mathematical
material.  For example:
 \begin{verbatim}
$$x=y \quad {\rm is\ a\ direct\ consequence\ of} \quad y=x$$
\end{verbatim}
would be better input as:
\begin{verbatim}
$$x=y \quad\hbox{\rm is a direct consequence of}\quad y=x$$
\end{verbatim}
or, better still:
\begin{verbatim}
$$x=y \quad\text{is a direct consequence of}\quad y=x$$
\end{verbatim}
where \verb|\text| is a macro which sets its argument in horizontal
mode.  This avoids unpleasant surprises such as:
\begin{verbatim}
$$X \quad{\rm is\ a\ sub-object\ of}\quad Y$$
\end{verbatim}
However, for upward compatibility with existing plain \TeX\
documents, it will still be possible to set text phrases in math mode,
as long as they only contain $\langle\mbox{character}\rangle$s of type
Variable.


\section{The ``text symbol'' encoding: the TS encoding}
Here one would put the old style numerals, and most of what
is comming out of the present math encoding. Other symbols could
be added. The Text Symbol encoding is definetly \textem{not part of
  the math kernel.} But since it will contain symbols that
previously were accessed via the math fonts, its
encoding must be supplied. This font will not be loaded in a family. It
will just be loaded as a normal text font\footnote{If some users really
feel the need to load it in a math family, they can.}.


\section{The core: the MC encoding (263)}
Counting: 1,10,1 , 54,5 , 124,14 , 12,24,9 ,9= 263 glyphs

The accents are no longer here. They had no real reason to be here.
Most of them are geometrics anyway. But they do have reasons to be
elsewhere. One of the main consequences of taking these accents out
is that the core can be made more coherent, and more complete.  The MC
encoding would contain:
  \begin{itemize}
        \itemsep -0.1cm

  \item The skewchar in position 0: 1
  \item The core digits: 10
  \item The space character in position 32: 1

  \item The core Latin alphabet, which is the default alphabet, in
    uppercase, and lowercase, together with the dotless i and j: 54

  \item The Latin friends: 5

  \item All the Greek material: 124

  \item The Greek friends, next to the Greek: 14

  \item The core symbols for kerning reasons (punctuation and
    delimiters): 12

  \item The core symbols for design reasons: 24

    More for kerning reasons:
  \item The basic geometric delimiters: 9

    Some new glyphs:
  \item New basic delimiters: 9

  \end{itemize}

Sacrifices can be made in the greek material, and in the core symbols
for design reasons.


\section{The MX encoding: 243}

Count up: 1 1   78 8   10   24 6  16   7 7   26   47 = 230

The usual extensible characters, together with some new ones, would live
here. Here any characters that have strange \TeX\ features, like big
descenders are included, thus grouping glyphs that are not compatible
with the outer world.

For various reasons discussed in ``Replacing \texttt {cmex}?'' (Same
author), the math font group has decided that the MX encoding will be
designed in a way such that when it is loaded in one size (like in the
present \TeX), every thing works OK, and the user can still have
access to the new symbols. However, the MX encoding will produce better
quality typesetting when loaded in three sizes.

Detailed contents of MX:
  \begin{itemize}
        \itemsep -0.1cm

  \item Maybe a skewchar: 1

    The space is questionable here, because MX will not be usable by
    other typesetting systems, see comment:
  \item Maybe a space: 1

  \item Big and extensible \TeX\ delimiters from cmex: 78

    Any characters that have strange \TeX\ features like big descenders:
  \item The radicals: 8

    Technically the following can come out, but then they must go in
    MSP.
    To make this possible one could take the bar accent out of MSP.
  \item Horizontal curly braces: 10

  \item All existant big and small ``bigops'' except the integrals: 24

  \item The existant Plain vertical extensible arrows: 6


    One has to limit the number of wide accents, otherwise there is not
    enough place.
  \item The wide tildes, and the wide hats: 16

  New glyphs:

    It is a little bothering that the following will be separated from
    their small versions, but there is nothing much that can be done
                about it:
  \item The big ``big integral'' family: 7
  \item The small ``big integral'' family: 7

  \item The new big and small `bigops': 26
  \item New multisized and extensible delimiters: 47


  \end{itemize}

Note: all integral glyphs must be kerned with themselves, so that two
integrals following each other can be kerned easily with a little
care. Same for various other glyphs.


\section{The math symbol `privilege' font ``MSP'': 250}
 Count up: 1,1, 54 , 18,7,3 , 23 , 8,4,2,4,4 , 20,7, 10 , 8,8,12 ,  14 ,
 6,4,5,4,5 , 8,2  , 16 = 250
  \begin{itemize}
        \itemsep -0.1cm

  \item A skewchar in position 0: 1
  \item A spacechar in position 32: 1

  \item The script/calligraphic Latin letter set:54

    According to AMS statistics, the script/cal are
    used more often than the Blackboard bold.

    Having the script/calligraphic here also achieves backward
    compatibility with the existing \TeX, without loading MS1 and MS2.

  \item The basic accents: 18   %  all basic accents should stay here
  \item The double accents: 7   %  could go elsewhere
  \item The underaccents: 3     %  could go elsewhere

    The following must stay here:
  \item The ``Basic symbols'' group: 23

    The next 5 are needed for compatibility with plain:
  \item The ``Greater than plain'' group: 8
  \item The ``Subset plain'' group: 4
  \item The ``In / ni plain'' group: 2
  \item The ``Sqsubset plain \& ams'' group: 4
  \item The ``Succ without sim plain'' group: 4

  \item The ``Small binops plain'' group: 20
  \item The small ints: 7
    These should probably live with the other `succ' members for design
    reasons:
  \item The ``Succ without sim ams'' group: 10

    The next three make a homogenous group, and must live with sim.
    Sim itself must live here because of compatibility with Plain:
  \item The ``Greater than with sim'' group: 8
  \item The ``Succ with sim'' group: 8
  \item The ``Sim'' group: 12

    The arrows, for compatibility, (improved a little though):
  \item ``Plain horizontal arrows'': 14 or 10
  \item ``Plain vertical arrows'' : 6 \\
    Does not include the extensible arrows.
    The latter are in MX as before.
  \item ``Plain oblique arrows'': 4\\
    Also called ``Plain other arrows''.
  \item ``Latex arrows'': 5
  \item Plain miscellaneous geometric symbols: 4

    Extras ---~these are new glyphs~--- to improve a little what \TeX\
    can already do:
  \item Extra arrows for use with plain: 5

    The ``lasy'' triangles are included in the AMS fonts and thus are
                included in the following group:
  \item AMS left-right open triangles: 8

    Should live with the ``Plain oblique arrows'':
  \item ``AMS obliques'':2

    Some new glyphs: Some of this could come out.
  \item Wide accents bar: 8

\end{itemize}


\section{The MS1, MS2, Math-Symbol encodings}
Each of these encodings will contain a set of Latin letters, like for
instance fraktur or blackboard bold, in uppercase or lowercase or
both. In some cases a place should be reserved for a set of matching
numbers too (i.e. Open). The rest would be filled up with symbols.
An MS$_i$ encoding is needed for:

\begin{itemize}
\item An extra script/calligraphic, (see below comment on script and
  calligraphic) the default caligraphic is in the MSP encoding.
\item Open + (Arrows or relations) + other geometrics.
\item Old german,
\end{itemize}


\textbf{Note:} Barbara Beeton writes ``Regarding script vs.
calligraphic, I do understand the difference; however, at AMS I believe
we only very rarely get a request to use both styles in the same paper.

For that there are two possibilities:
\begin{enumerate}
        \item designing one encoding table where the positions A-Z (and
        probably  a-z and 0-9 even if they are not all filled) are
        supposed to contain a  ``calligraphy/script'' set of characters.
        Then there would be  instances of that encoding that would
        contain script chars and others  that would contain calligraphic
        chars.  Suppose our standard would say that this encoding is to
        be used as family 4.  A designer would then choose  one such
        font with this encoding for family 4 (thereby effectively
        deciding  what \texttt{\string\cal} and a lot of other symbols
        look like (the ones whose \cn {mathchardef} points into family
        4)). For those who in addition would like to use another
        script/call math alphabet: they can then just allocate one of
        the free families. Access to this would then be trivial.

        \item Having two different encodings; one for cal, and one for
        script. The remaining symbols in both encodings would be
        different too. Thus both encodings would need to be part of the
        standard suite of math encoding tables.

        Which solution is preferable depends a bit on the number of
        symbols that ought to go in the standard.''
\end{enumerate}

Also J\"org Knappen writes: ``I strongly support having two different
encodings, one for cal and one for script. If users have the choice
between cal and script, they prefer script (at least in
Mainz\footnote{Maybe Americans prefer it the other way
  round.}). However, the old calligraphic alphabet still
needs to be supported for compatibility reasons.''


\section{The MS1 encoding: 232}
Count up: 1 1 54 10  32  36 30 12 10 21 10 15= 232
  \begin{enumerate}
        \itemsep -0.1cm

    \item A skewchar in position 0: 1
    \item A spacechar in position 32: 1
    \item The BBB alphabet uppercase and lowercase: 54
    \item The BBB digits: 10
    \item The last WIDE ACCENTS: arc, back-to-front vector, and
    double-sided  vector, normal vector: 32

      For AMS inclusion:
    \item The ``AMS horizontal arrows'' group: 22
    \item The AMS other arrows group: 12
    \item The ``Greater than AMS'' group: 30
    \item The ``Subset AMS'' group: 12
    \item ``AMS Equals friends'': 10
    \item ``AMS Miscellaneous geometric symbols'': 21
    \item ``AMS Vdash group'': 10
    \item ``AMS boxes and friends: 15

      For fun if there is place to spare:
    \item Alan's arrow construction set: ?

  \end{enumerate}


\section{Other requested typefaces}
  \begin{itemize}

  \item In general, users may want MC fonts in arbitrary styles (bold
    sans serif MC for instance) in order to get the Greek letters in
    their favourite styles.

  \item A ``text-like'' italic or slanted for computer science
    identifier names and the like.  This would be Cork encoded and
    optionally loaded.

  \item A ``bold upright'' for use as variables -- e.g. vectors in
    physics notation rather than the arrow over an italic letter. This
    would be Cork encoded, and optionally loaded or accessed via the
    \texttt{\string\boldsymbol} concept in which case no family would be
    required.

  \item Bold italic for use as variables: either optionally loaded as
    a second font with MC or cork encoding (using only variable family
    symbols) or accessed via something like
    \texttt{\string\boldsymbol}.

  \item Bold Old german (occasional) suggested
    \texttt{\string\boldsymbol} approach.

  \item Bold script (occasional) suggested \texttt{\string\boldsymbol}
    approach.

  \item Sans serif lightface (occasional): optionally loaded cork
    encoded font.

  \item Sans serif boldface (occasional): optionally loaded cork
    encoded font.

  \item Bold symbols: either \texttt{\string\boldsymbol} or optionally
    loaded in remaining slots.

  \item Ultra bold symbols: either \texttt{\string\boldsymbol} or
    optionally loaded in remaining slots.

  \item An MC-encoded bold font containing upright bold Latin glyphs,
    plus bold upright and bold slanted Greek.  This would contain all
    of the most commonly requested bold glyphs in one font (rather
    than many more).


  \item A cyrillic alphabet. Loaded as an extra family, or in text.

  \end{itemize}


\section{Summarising the families used by the proposed YAASP M-encoding}
\begin{enumerate}
\item Family $0$: A Cork encoded upright text font.

\item Family $2$: An MC encoded font containing the default Latin and
  Greek italic+upright, and core symbols...

\item Family $1$: An MSP encoded font containing cal/script and
  accents...

\item Family $3$: An MX encoded font including all extensible glyphs,
  and double sized operators...

\item Family $y$: An MS$1$ encoded symbol font for Open, and arrows
  or relations.

\item Family $z$: An MS$2$ encoded symbol font for Old German.
\end{enumerate}

\textbf{Comments:}
\begin{itemize}
        \item This leaves 10 families free for anything else, (like bold
        or sans...) and makes many symbols available.

        \item The first four encoding tables put together give total
        \TeX,  La\TeX\ compatibility.

        \item The first six give total \TeX, \LaTeX, AMS\TeX,
        AMS\LaTeX, LAMS\TeX\ compatibility.

        \item The six put together: do wonders, using no more font
        families than  the present AMS\TeX.

\end{itemize}

\section{Discussion}

\subsection{Advantages}

For MC: A big advantage here, is kerning. In this encoding kerning
is possible between the Latin default alphabet, and both italic and
upright Greek alphabets. This is necessary for compatibility, and for
tidyness. On top of this both letter sets (in actual fact there are
three) can be kerned with the core symbols that are in the MC
encoding. This last point is the most important, and gives new and
better automatic math spacing. (For compatibility reasons,
the Greek italic must be kerned with the period, the comma, and the
slash.)

The bold version of the MC encoding gives the user access to a lot of
bold letter sets in one go. The global family consumption is therefore
largely reduced: 1 bold font instead of 2 or 3.

Taking the accents away from the letters, means that the accents do
not change when the text face changes, i.e. bold letters and medium
letters get the same accents.

One can get more than compatibility with plain \TeX\ only using 4
families (the same number as standard \TeX\ currently uses).

One can get more than compatibility with AMS\TeX\ using 6 families.
This is less or equal than the number of families used by AMS\TeX.

The calligraphic alphabet is more used than the open, so putting it
with the accents is a step towards grouping most used glyphs together.

This proposal gives a little room in the MC for free spaces, and good
core material.

With the MSP encoding concept, the MSi encodings can really be
considered as (optional) extensions. Thus somebody who knows he
does not need the arrow kit and the Blackboard bold letter set does not
have to load them. Same for Fraktur.

All the \TeX\ specific glyphs are grouped in MX. Thus all the other
fonts could be used by other typesetting systems.

Using the Cork encoded font in family 0 for things like \cn{log} and
\cn{sin}, means that the Greek users can replace it by a Greek font.
(Apparently Greek mathematicians set these function names
using a Greek alphabet).


\subsection{Disadvantages}

If there is not enough space for all the required symbols,
one can make an MS3 encoding that would contain the other
version of script/cal, together with extra symbols.


\subsection{Comments}

In this proposal the core is really made of two fonts, and the kernel is
made of four.

Comments from Alan about the space slot in MX:
\begin{quote}
  ``MX will be used by \TeX, and the dvi drivers may be outputting to a
  device that does not accept anything but a space in position 32.  So
  if you don't include a space here, then the MX-encoded fonts have to
  be split into two device fonts by the drivers.''
\end{quote}

Comment from Alan about the Cork encoded font:
\begin{quote}
  ``I think it would be good to specify that this is family 0, for
  compatiblity with current \TeX\ documents containing explicit
 \cn{fam 0}
  (naughty them!) and in order to have filled up slots 0 to 3 rather
  than leaving a gap in family 0.''
\end{quote}
\end{filecontents}

\begin{filecontents}{l3d007b.tex}
\section{Introduction}
  The goal of this document is to try and list the future contents of
  the new math encoding. The glyphs have been divided into groups and
  subgroups.

  \textem {This is a draft and a workbench document.  Some of it is out
  of date, and usually the comments are not for the public.  There are
  lots of spelling mistakes, I know! The reader should be indulgent.}


%something added by bb for euler:
%begin{enumerate}
%\item Cursive (normal) capital letters. not often used.
%\item Cursive (normal) lower-case letters.
%\item Script upper-case.
%\item (Some) script lower-case.
%\item Fraktur (old german) upper-case.
%\item Fraktur (old german) lower-case.
%\item Digits.
%\end{enumerate}
%% bb --
%[bb: I will be happy to make a proposal for this.]


\section{Extra font dimensions}
\begin{itemize}
\itemsep -0.15cm
\item The design size,
\item The default script size,
\item The default scriptscript size,
\item Suggested value for mathsurround (in MC)
\item Math\_axis (in every font)
\item Thin mu skip,
\item Med mu skip,
\item Thick mu skip,
\item Recommended rule weight
\item Baselineskip: leadingheight,
\item Baselineskip: leadingdepth
\item Suggested by JMR: the big and bigg params.
\end{itemize}


\section{Kerning}
Better kerning should be made possible in the Latin math
italic, if it is possible. Normal kerning information is put in the
\fn{.tfm}
file. But in math mode, for things to get kerned as specified in the
\texttt{.tfm} file the left atom must be of ordinary type. If the user
interface redefines everything that must be kerned as being ordinary,
old documents will start looking different, and this is not wanted. To
avoid this, the user interface could define a macro
\texttt{\string\mathkerning\{...\}} that would use the kerning specified
in the \texttt{.tfm} file, without globally making all characters
ordinary.

The following glyphs should be kerned:
\begin{itemize}
\itemsep -0.15cm
\item The spacing of [ and ( and ) and ] followed by letters should
  be adjusted.

\item  The spacing of letters followed by [ ] ( ) should be
  adjusted. This is an  important one.

\item Keep the kerning with\quad . \quad , \quad / \quad for most
  letters !
%\item Keep kerning between = and upright $\Gamma$ ! (I must have
  %dreamt this one) Maybe add kerning for other punctuation characters.
\item At least keep the kerning between d and Y,Z,j,f. Maybe add some
  others: dx, dy, d$\alpha$, d$\theta$, d$\phi$ ...

\item New kerning of the integral with itself. This would only
  be available via a \texttt{\string\mathkerning} macro (see
  previous comment).
\item Kerning of the period with itself,
\item Kerning of the centered period with itself.
\item Jan M.R. is sure that kerning is needed between
  Latin and Greek. More precise information is needed.
\end{itemize}


\section{The following should be taken out of the present math encoding}
\begin{itemize}
\itemsep -0.15cm
\item The old digits: 10.
\item The 2 paragraph signs: \P, \S.
\item The Yen sign: Y.
\item The double dagger sign \ddag.
\item The four card families: $\clubsuit, \heartsuit, \diamondsuit,
  \spadesuit$.
\item The musical signs: $\flat, \natural, \sharp$ ?????????
\item The maltese cross. (AMS)
\item The $\mathchar"017F$ seems not to be needed in maths.
\item The circled R must come out of the math symbols. (AMS)
\item The raised asterisk,
\item The two triangles in cmmi: cmmi'56 '57  ???????
\end{itemize}

All these will be put in to the ``Text symbols'' encoding, that would
come in many faces, and be text dependent.
Other glyphs could be put in the ``Text symbols'' font:
\begin{itemize}
\itemsep -0.15cm
\item More numerals,
\item The perthousand sign.
\item Maybe this is a good place for the `fraction' characters from
  adobe.
\item $<$florin$>$, $<$ellipsis$>$ etc.
\item The superior and inferior digits, and put in kerning so that
  $<$onesuperior$>$ $<$fraction$>$ $<$twoinferior$>$ produces a 1/2.
\item The single dagger finds a place here although it is in
  maths as well. This makes them two different symbols, and enables both
  to  have more specific shapes.
\item A real copyright symbol, TM (trademark) and SM (service mark).
\item An interrabang (a combination of ? and !) new. (bb)
\end{itemize}


Alan Jeffrey has worked on the `text symbol font'. Actually it is now
called the companion text font. He has written more on this topic.
``alanje@cogs.susx.ac.uk''


\section{The Greek glyphs: 124}
The following shapes must be included:
\begin{itemize}
\itemsep -0.15cm
\item All the Upper-case in upright. 24
\item All the Upper-case in italic. 24
\item All the Lower-case in upright. 24
\item All the Lower-case in italic face. 24. So far: $24\times 4=96$
\item All the variable shapes in upright. 10
\item All the variable shapes in italic. 10
\item All the special numeric letters in upright. 3
\item All the special numeric letters in italic. If lack of space
  prefer the italic shapes to the upright ones. 3.
\item Some control glyphs: 2 (probably useless)
\end{itemize}


\subsection{Variable shapes: 10}
This list comes basically from: J\"org Knappen.  They are all listed
here including the ones that are already in the \fn{cm} fonts:
\begin{enumerate}
\itemsep -0.15cm
\item Lower-case Phi,
\item Lower-case Pi,
\item Lower-case Kappa, (AMS)
\item Lower-case beta (new),
\item Lower-case Rho,
\item Lower-case Epsilon,
\item Lower-case Sigma,
\item Lower-case Theta.
\item Upper case chi (new),
\item Upper case for upsilon.
\end{enumerate}

\subsection{Extra letters for numerals: 3}
Source: J\"org Knappen.  They are all listed here including the ones
that are already in the \fn{cm} and \fn{ams} fonts. Must be given in
lower-case.
Upper-case Greek numerals exist, although extremely rare. For the
  sake of completeness one could fill them in. But they are surely not
  the hottest characters needed. (J\"org)
\begin{enumerate}
\itemsep -0.15cm
\item Qoppa (new),
\item Sampi (Sanpi?) (new) (J\"org Knappen),
\item Digamma (AMS).
\end{enumerate}


\subsection{Control glyphs: 2}
\begin{enumerate}
\itemsep -0.15cm
\item An italic control glyph, i.e. the following Greek letter is not
taken from the upright, but from the italic Greek,
\item A variable shape control glyph, i.e. the following Greek letter
is not taken from the normal set of letters, but form the variant shape
set. This will not work for all letters. Thus may not be a good idea.
\end{enumerate}

\note {From Alan about the control slots for Greek, ``Er, I'm not very
  sure about those, since they'll affect kerning.  I'd prefer to have
  the choice between italic / upright made by the document designer.
  And I'm not sure why anyone would want to get at an upper case $\xi$
  by a macro \texttt{\string\uppercasegreek}\{\texttt{\string\xi}\}!''
  --- ``True they will affect the kerning. But one could use them
  differently from what you suggested.  Although I'm not sure it is
  interesting, the ligature mechanism does not have to be visible for
  the user, i.e. he can still type \texttt{\string\Gamma}, which is
  expanded to \texttt{\string\up \string\gamma}.''}


\section{Extra Greek-like material: 14}
This group of glyphs should not really be separated from the rest of
the Greek material.
\begin{enumerate}
\itemsep -0.15cm
  \item An upright partial sign,
  \item An italic partial sign,
  \item An upright partial sign with a slanted bar, AMS \cn{eth}
  \item A \cn{thorn} WASY'151 but this one is not very good. There is a
          better one in dcmr'136. There is one is wslipa10'102 (J\"org)
  \item A barred upright lambda, ?????? (probably not J\"org)
  \item A barred italic lambda, this is preferred. (J\"org)
  \item An upright mho sign (upside down Omega),
  \item The back to front epsilon: AMS "7F \cn{backepsilon},
  \item Arabic letter dal: looks something like a back to front $c$.
  \item Hebrew letter msbm'151,
  \item Hebrew letter msbm'152,
  \item Hebrew letter msbm'153,
  \item The $\aleph$ or \cn{aleph} in position CMSY'100,
  \item The Nabla, $\nabla$ in CMSY'162
\end{enumerate}

The barred signs may be obtained by ligatures, or could be
constructed with kerning. In any case some slots for ligatures must be
left free if possible.


\section{The Latin letters: One set$=54$ glyphs}

We shall assume here that all lower case alphabets contain a dotless `i'
and a dotless `j', so that they can take accents other than a dot.
\begin{itemize}
\itemsep -0.15cm
\item The usual \fn{cmmi} italic shapes. Upper-case and lower-case.
\item The calligraphic shapes. Upper-case and lower-case.
  The lower-case shapes are presently maybe not available.
\item The script shapes. Upper-case and lower-case.
  The lower-case shapes are presently maybe not available.
\item The black board bold shapes. Upper-case and lower-case.
\item The Fraktur style. Upper-case, and lower-case.
\end{itemize}

  \subsection{The calligraphic and/or script styles}
  BB: ``How are ``calligraphic'' and ``script'' different here?  I've
  never seen what Knuth calls calligraphic and what most
  mathematicians call script (the ``curly'' style) used in the same
  context, so they are presumably not distinct from one another in
  actual usage.''

  The two should be included if there is enough space.
  Otherwise one is enought.

  \subsection{A hyphen char ?} These Latin letters are not meant for
  typesetting words. It is assumed that all multi-letter words should be
  typeset using the text fonts, not the math fonts. Thus
  the hyphen character is not needed in the math encoding.


  \subsection{Computer science and identifiers}
  It looks as though the new math encoding will not contain anything
  specially designed for computer science. Computer scientists will have
  to use \texttt {cmti*} in an extra family for long identifiers.

  \subsection{Chemists and chemical formulae} Considering the fact that
  chemists do use a lot of mathematical expressions, they need the total
  math mode as it is. On top of that they need a special mode for
  writing chemical equations. One of the particularities of this
  chemical mode would be the different placing of sub- and
  superscript. A possible implementation is something like
  \cn{EnterChemicalMode} and \cn{ExitChemicalMode}, which would
  in actual fact load a new set of fonts (or only the font in family 2),
  in order to have a different font dimensions in family 2.


\section{Latin-like material: 5}
This group should live next to the Latin letter set.
  \begin{enumerate}
        \itemsep -0.15cm
  \item An upright d. This is needed for standard mathematical
  typesetting.

  \item A horizontally barred italic h, for physicists.

  \item A slanted barred italic h, for physicists.

  \item An italic upper-case Vee with a bar, the bar is meant to be
    horizontal. \texttt{jvpurcel@vela.acs.oakland.edu}

  \item An upright upper-case Vee with a bar, the bar is meant to be
    horizontal, and extends through both sides of the Vee almost like
    a strikeout. \texttt{jvpurcel@vela.acs.oakland.edu}

  \end{enumerate}


\section{The different ways needed to write numbers}
  \begin{itemize}
        \itemsep -0.15cm
  \item The normal set of numbers in \fn{cmmi}: upright lining.

  \item The blackboard bold numbers. (Used in physics and a field of
  maths. See  Alan J. for more details.) [Note:  presently no
  satisfactory bbb numbers  seem to exist.]

  \end{itemize}


\section{Empty slots?}
Some free slots could be included, so that people can put their
ligatures in when they are trying to convert fonts coming from other
worlds.

Alan J. can give good explanations for this.


\section{Arrows}
Arrow construction should be possible. But to make sure it does not fail
when used in different sizes, every single glyph used for this purpose,
will be \textem{specifically} designed for this use.  \textem{All of
them will be in the same font table}. This does not mean that a given
construction block can't be used for different types of arrows.  This
sort of thing has to be thought of, and forecasted. These construction
blocks must not be used for any other purpose --- like for instance the
equal or minus sign.

All arrows from \fn{cm}, and from \fn{msam/msbm}, should be taken if
necessary. Maybe some others too.

  \subsection{The ``Plain horizontal arrows'' group: 14 (Alan:10)}
  The first 6 are in cmmi'050 to '055:
  \begin{enumerate}
                \itemsep -0.15cm
    \item leftharpoonup
    \item leftharpoondown
    \item rightharpoondown
    \item rightharpoonup
    \item lhook
    \item rhook

      From \fn{cmsy}:
    \item leftarrow '40
    \item Leftarrow '50
    \item leftrightarrow '44
    \item Leftrightarrow '54
    \item rightarrow '41
    \item Rightarrow '51
    \item CMSY'67 this is the \cn{mapstochar}
    \item CMSY'66 the negation sign/slash: 1
   \end{enumerate}

  \subsection{Extra arrows for use with plain arrows: 5 (Alan 5)}
    \begin{enumerate}
                \itemsep -0.15cm

    \item It would be reasonable to add a \cn{mapsfromchar} in order to
    build things like: $<\!\!-\!|$: 1

    \item It would be reasonable to add a \cn{Mapstochar} that could go
    with the double arrows to build things like $|\!=\!>$ : 1

    \item It would be reasonable to add a \cn{Mapsfromchar}
      that could go with the double arrows to build things like
      $<\!=\!\!|$ : 1

    \item A - for extending arrows: 1
    \item A = for extending arrows: 1

  \end{enumerate}


  \subsection{The ``Plain vertical arrows'' group: 6 (Alan 6)}
  \begin{itemize}
        \itemsep -0.15cm
  \item updownarrow cmsy'154
  \item Updownarrow cmsy'155
  \item uparrow cmsy'042
  \item downarrow cmsy'043
  \item Uparrow cmsy'052
  \item Downarrow cmsy'053
  \end{itemize}

  \subsection{Plain vertical extensible arrows: 6 (Alan 6)}
  \begin{enumerate}
        \itemsep -0.15cm
  \item Top sing arrow: cmex'170
  \item Bottom single arrow cmex'171
  \item Top double arrow cmex'176
  \item Bottom double arrow cmex'177
  \item Middle double arrow cmex '167
  \item Middle single arrow cmex '077
  \end{enumerate}

  \subsection{Plain extra vertical arrows: 0}
  Nothing added here.


  \subsection{The plain other arrows: 8 (Alan)}
  First the oblique arrows:
  \begin{enumerate}
        \itemsep -0.15cm
  \item CMSY'45
  \item CMSY'46
  \item CMSY'55
  \item CMSY'56

    What else: ?
  \end{enumerate}

  \subsection{The ``Ams obliques'' group: 2}
  \begin{enumerate}
        \itemsep -0.15cm
  \item msbm'36
  \item msbm'37
  \end{enumerate}


  \subsection{The ``Latex arrows'' group: 5}
  The four characters in position LASY'50 to '53 from the lasy font
  (These appear in the wasy font as well) must be put with the arrows.
  They are arrow heads. The squig \cn{arrow} in position '73
  of lasy should also be included: 5


  \subsection{The ``Ams other arrows'' group: 4}
  \begin{itemize}
                \itemsep -0.15cm
    \item Circle arrows MSAM: '10 to '11 :2
    \item Horizontal arrows MSAM:'113 '114 :2
  \end{itemize}

  \subsection{AMS horizontal arrows: 22 (Alan 23)}
  This includes all the horizontal arrows and the negated ones,
  that are listed page 280 of ``The joy of tex''.
  \begin{enumerate}
        \itemsep -0.15cm
  \item leftarrowtail
  \item leftleftarrows
  \item leftrightarrows
  \item leftrightsquigarrow
  \item lefttrightharpoons
  \item Lleftarrow
  \item looparrowleft
  \item looparrowright
  \item nleftarrow
  \item nLeftarrow
  \item nLeftrightarrow
  \item nleftrightarrow
  \item nrightarrow
  \item nRightarrow
  \item rightarrowtail
  \item rightleftarrows
  \item rightleftharpoons
  \item rightrightarrows
  \item rightsquigarrow
  \item Rrightarrow
  \item twoheadleftarrow
  \item twoheadrightarrow
  \end{enumerate}

  \subsection{Ams vertical arrows (Alan: 6) (here: 8)}
  \begin{enumerate}
        \itemsep -0.15cm
  \item MSAM:'24 upuparrows
  \item MSAM:'25 downdownarrows
  \item MSAM:'26 upharpoonright
  \item MSAM:'27 downharpoonright
  \item MSAM:'30 upharpoonleft
  \item MSAM:'31 downharpoonleft
  \item MSAM:'36 Lsh
  \item MSAM:'37 Rsh
  \end{enumerate}


  \subsection{Some control glyphs for access to arrows }
  These do not appear in the \fn{.dvi} file, they simply enable the
  construction of some arrows and slahsed arrows using the ligature
  mechanism.

% end of arrows


\section{All sorts of accents}

  \subsection{Basic size accents: 18}
  \begin{itemize}
        \itemsep -0.15cm
  \item All those that are created by macros in the Ams package: the 3
    dotted accent, and the 4 dotted accent.\quad 2

  \item The ones in \TeX: e\char'22\ e\char'23\  e\char'24\ e\char'25
    e\char'26\ e\char'27\ e\char'136\ e\char'137\ e\char'177\ e\char'176
    e$\mathchar"017E$. They all come from \fn{cmr} except for
    the last two from \fn{cmmi}. \quad 11

  \item Extra: a back-to-front vector arrow, \quad 1
  \item Extra: a double sided type vector arrow, \quad 1
  \item Extra: a square bracket used as an accent, \quad 1
  \item Extra: The previous one turned upside down, \quad 1
  \item Extra: an arc is requested by AMS, \quad 1
  \end{itemize}

  \note {The e\char'175\ in \fn{cmr} is not needed in maths, it is just
  a Hungarian accent.}

  \note {The $\mathchar"017F$ seems not to be needed in maths. It
    could be put in the text companion font.}


  \subsection{Double accents: 7}
  \begin{enumerate}
                \itemsep -0.15cm
    \item A bar and a dot on top,
    \item A dot and a bar on top,
    \item 2 dots with a bar on top,
    \item A bar with 2 dots on top,
    \item A hat and a tilde on top,
    \item A hat and bar on top,
    \item A double bar,
  \end{enumerate}

  \note {For the double accents, Spivak and Ralf Rey could do some
    archive research at the AMS. Similar research could be done at the
    APS, and the CUP.}


  \subsection{Variable size accents: $7*8=56$}
  Variable size has meant 5 different sizes until today.
  That number could be raised to 8.
  \note {If the accents are in a font
    loaded in three different sizes, the choice mechanism of
    \cn{mathaccent} will only look in the current style size (unlike
    the delimiter choice mechanism). Thus although one could hope to
    multiply the number of available sizes by three, in actual fact in
    a given style the number of automatically available sizes would not
    be multiplied.  All the same this \textem{would} give better
    results in each style,
    but it would also create \textem{compatibility problems}
    i.e.~formulae heights and widths may change.  Even if not done in an
    automatic way, the user would still have a larger range of accents
    to choose from.
    Compatibility problems could be avoided by redefining
    \cn{mathaccent} to a \cn{mathchoice}. Thus the accents could always
    come from text style, and the accented material could come from the
    current style. But this does not work either. In doing so one would
    no longer be able to take the base accents from the current style.
    Although one could make two macros. See paper ``Repacing
    \texttt{cmex}?'', same author.}

  \begin{enumerate}
                \itemsep -0.15cm
    \item e$\mathchar"017E$ the vector. \quad 8
    \item e\char'176\ the tilde. \quad 8
    \item e\char'136\ the hat. \quad 8
    \item e\char'26\ the bar. \quad 8
    \item Some people request a variable size arc. \quad 8
    \item The back to front vector arrow, \quad 8
    \item The double sided vector arrow, \quad 8
  \end{enumerate}


  \subsection{Under accents: 3 so far}
  Requests exist for the following:
  \begin{enumerate}
                \itemsep -0.15cm
    \item A tilde,
    \item A breve (\char'25)
    \item A bar
  \end{enumerate}

  Like for the double accents, research could be done at the AMS...
  ???????


\section{Core symbols}

The symbols that have some reason to live with the default math
material. There are mainly two reasons for them to be there: one is
kerning, and the other is design similarity.

  \subsection{For kerning reasons: 12}
  \begin{enumerate}
        \itemsep -0.15cm
  \item The period . CMMI
  \item The coma , CMMI
  \item The semi colon ; CMR
  \item The colon : CMR
  \item The exclamation mark ! CMR
  \item The (
  \item and the ) respectively opening-class and closing-class, CMR
  \item The [
  \item and the ] respectively opening-class and closing-class, CMR
  \item The $\{$,
  \item And the $\}$ (design similarity reasons also) in positions '146
  and  '147 of CMSY,
  \item The `/' as a delimiter\footnote {This is not accessible via a
      single key. The key `/' produces the sign $/$ taken from
      \fn{cmmi}.}, and  as a fraction sign, CMR

  \end{enumerate}

  \subsection{Basic geometric delimiters: 9}
  Should go in the core, for kerning reasons, like the other ( ) and [
  ].  If they don't fit in the core, they must go with the basics.  The
  ones listed here are all in CMSY, around '142, and '150.
    \begin{enumerate}
                \itemsep -0.15cm
    \item $\rangle$
    \item $\langle$
    \item $|$
    \item $\|$
    \item $\rceil$
    \item $\lceil$
    \item $\rfloor$
    \item $\lfloor$
    \item The $\mathchar"026E$ in position '156
    \end{enumerate}

     Test: $|f|,\|f\|,\lfloor f\rfloor,$$\lceil f\rceil,\langle
     f\rangle$ $f\lceil,f\lfloor,$$f\langle,f\backslash$ strange that
     no kerning seems to be needed here, where as it is necessary for
     the bracket.

  \subsection{New basic size delimiters: 9}
  Basic size means the same size as the parentheses and brackets in
  \fn{cmr}. The following is a preference order list of desired new
        delimiters:
  \begin{enumerate}
                \itemsep -0.15cm
    \item A $|||$ for use as $|||f|||$ a norme,

      Semantic brackets: $[\![$ and $]\!]$
    \item The opening semantic bracket,
    \item The closing semantic bracket,

    \item Opening multi set brackets $\{|$
    \item Closing multi set bracket $|\}$

      Unicode contains another style of brackets, they call them
      tortoise shell brackets. They look like:
      \begin{verbatim}
         /
        /
       /
       |
       |
       |
       \
        \
         \
      \end{verbatim}
      These are like parentheses, but with straight lines. No curves.
    \item The opening tortoise shell bracket,
    \item The closing turtoise shell bracket,

      Triangle brackets, something like:
      \begin{verbatim}
          /|
         / |
        /  |
        \  |
         \ |
          \|
      \end{verbatim}
    \item The opening triangle bracket,
    \item The closing triangle bracket.

%    \item The opening turtoise shell in Bbb.
%    \item The closing turtoise shell in Bbb.

  \end{enumerate}


  \subsection{For design similarity reasons: 24}
        All this group must live with the default alphabet for design
        similarity reasons.
  \begin{enumerate}
                \itemsep -0.15cm
    \item The question mark ? must live with the ! CMR
    \item The percent sign \% must live with the ! and ? CMR
    \item The at sign @ must live with the \%  CMR
    \item The \$ sign must live with the @ \% ? ! CMR
    \item The \& must live with \$, \% .. CMR
    \item The \# in CMR

    \item The inverted \&: must be found.

    \item The $\ell$ as a rounded `l'. CMMI
    \item The centered dot $\cdot$ for use as a multiplication sign,
    must live with the period. CMSY
    \item The asterisk $\mathchar"0203$ for use as a multiplication
      sign, in position '003 in CMSY.
    \item The $\propto$ sign must live with @, \%, $\ell$. In position
      '057 of CMSY.
    \item The $'$ or prime in position '060 of CMSY, one cannot
      separate the prime from the the prime ligature slots. (2
                        ligatures) Kerning of letters with the prime is
                        not possible, because
      the latter is set in superscript. ????????

    \item The backprime from MSAM'070 should live with the prime. ??????

    \item The $\infty$ sign in position '061,

    \item The $\emptyset$ in position '073 of CMSY,

    \item The \cn{check} mark in MSAM'130, ?????
    \item The \cn{between} double parentheses in MSAM'107
      should go with the normal parentheses.
    \item The $\Re$, could come out if necessary,
    \item The $\Im$ in position '074, and '075 of CMSY, could also
      come out if necessary.
    \item The $\dagger$ or dagger in CMSY'171, ?????
    \item The smile, ???????
    \item The frown form CMMI ????????
    \item Could maybe include the circled S from MSAM'163. ?????
    \item The Weierstrass symbol: $\wp$ only in one style, (could come
    out)
    \item The powerset symbol from
    \texttt{Martin.Ward@durham.ac.uk}. (could  come out if necessary)
      It looks something like this:
    \begin{verbatim}
          XXXXXXXXXXX
      XXXXXXXX    XXXX
    XXXXX           XXXX
    XXXXX           XXXX
  XXXXXXXX          XXXX
  XXX  XXX          XX
  XX   XXXXXXX  XXXXXX
  XX    XXXXXXXXXXXX
  XX    XXXX
  XX    XXXX
  XX      XXXX
  XX      XXXX
  XXXX    XXXX
  XXXX    XXXX
  XXXX  XXXX
    XXXXXX
    \end{verbatim}
  \end{enumerate}


\section{Symbols from \fn{lasy} that must be kept: }
The first four \fn{lasy} symbols are in \fn{msam}.

The ones in positions '50 through '53 are arrow heads, and are counted
as such in the corresponding group.

Character '60 is in the \fn{msam}.

'61 is not in the \fn{msam}, and should be kept.

Char'62: same as \fn{msam}'03 ?

Char'63: same as \fn{msam}'06?

Lasy'72: same as \fn{msbm}'163 or \fn{msam}'166 ?

Lasy'73: same as \fn{msam}'40 ?

A list of what should be kept from \fn{lasy}:
\begin{itemize}
        \itemsep -0.15cm
        \item Character '61: \quad 1
        \item ???
\end{itemize} \textbf{This makes a total of 1.}


\section{The ``Subset'' groups}
\note {None of these have anything to do with the \cn{sim} glyph.}

  \subsection{The ``subset plain'' group: 4}
  \begin{itemize}
                \itemsep -0.15cm
    \item The $\subseteq$ in position CMSY'022
    \item The $\supseteq$ in position CMSY'023
    \item The $\subset$ in position CMSY'032,
    \item The $\supset$ in position CMSY'033,
  \end{itemize}

  \subsection{The ``subset Ams'' group: 12}
  \begin{itemize}
                \itemsep -0.15cm
    \item From MSBM'040 to MSBM'43 : 12
  \end{itemize}

  \subsection{The ``In/ni plain'' group: 2}
  \begin{enumerate}
                \itemsep -0.15cm
    \item The $\in$ sign in position CMSY'062,
    \item The $\ni$ sign in position CMSY'063,
  \end{enumerate}

  \subsection{The ``sqsubset plain \& Ams'' group: 4}
  These do not have a place any where else:
  \begin{enumerate}
                \itemsep -0.15cm
    \item The $\sqsubseteq$, cmsy'166,
    \item The $\sqsupseteq$, cmsy'167,
    \item The sqsubset from MSAM'100,
    \item The sqsupset from MSAM'101,
  \end{enumerate}


\section{The ``Greater than'' group}

  \subsection{The ``Greater than Plain'' group: 8}
  \begin{itemize}
                \itemsep -0.15cm
    \item The $\leq$ in position CMSY'024,
    \item The $\geq$ in position CMSY'025,
    \item The $\ll$ in position CMSY'34,
    \item The $\gg$ in position CMSY'35
    \item $<$ less than CMMI'074,
    \item $>$ Greater than: CMMI'076,
    \item The alternative leq: msam'66
    \item The alternative geq: msam'76
  \end{itemize}

  \subsection{The ``Greater than AMS'' group: 30}
  \begin{itemize}
                \itemsep -0.15cm
    \item From MSBM'000 to '005: 6
    \item From MSBM'010 to '015: 6
    \item From MSBM'024 to '025: 4
    \item From MSBM'154 to '155: 2
    \item From MSAM'060 to '061: 2
    \item From MSAM'065 and '067: 2
    \item From MSAM'075 and '077: 2
    \item From MSAM'121 to '124: 4
    \item From MSAM'156 to '157: 2
  \end{itemize}

  \subsection{The ``greater than with sim'' group: 8}
  \begin{enumerate}
                \itemsep -0.15cm
    \item MSBM'022,
    \item MSBM'023,
    \item MSBM'032,
    \item MSBM'033.
    \item MSAM'046,
    \item MSAM'047,
    \item MSAM'056,
    \item MSAM'057
  \end{enumerate}

  The `shapee' \cn{sim}, and the geometric \cn{sim} are considered to be
  the same glyph, i.e. the difference that is sometimes visible is
  considered  to be a bug.


\section{The ``Succ'' groups}

  \subsection{The ``Succ without sim plain'' group: 4}
    \begin{enumerate}
                        \itemsep -0.15cm
      \item CMSY'026,
      \item CMSY'027,
      \item CMSY'036,
      \item CMSY'037
    \end{enumerate}

  \subsection{The ``Succ without sim AMS'' group: 10}
    \begin{itemize}
                        \itemsep -0.15cm
      \item MSBM'006, '007: 2
      \item MSBM'016, '017: 2
      \item MSBM'026, '027: 2
      \item MSAM'062 - '064: 3
      \item MSAM'074: 1
    \end{itemize}

  \subsection{The ``Succ with sim Ams'' group: 8}
  \begin{itemize}
                \itemsep -0.15cm
    \item MSBM'020, '021: 2
    \item MSBM'030, '031: 2
    \item MSBM'166, '167: 2
    \item MSAM'45,
    \item MSAM'55,
  \end{itemize}

  The `shapee' \cn{sim}, and the geometric \cn{sim} are considered to be
  the same glyph, i.e. the difference that is sometimes visible is
  considered to be a bug.


\section{The ``Sim'' group: 12}
  \begin{enumerate}
\itemsep -0.15cm
    \item sim CMSY'030
    \item approx CMSY '31
    \item simeq CMSY'047
    \item wr CMSY'157
    \item The bold MSBM'034
    \item MSBM'035
    \item MSBM'150
    \item The bold MSBM'163
    \item The bold MSBM'164
    \item MSBM'165
    \item MSAM'166, backsim
    \item MSAM'167, backsimeq
  \end{enumerate}


\section{Binops}
  \subsection{The ``Small binops plain'' group: 20}
  \begin{enumerate}
                \itemsep -0.15cm
    \item cap  CMSY
    \item cup CMSY
    \item uplus CMSY
    \item sqcap CMSY
    \item sqcup CMSY
    \item big circle CMSY
    \item big triangle up CMSY
    \item big triangle down CMSY
    \item vee CMSY
    \item wedge CMSY
    \item oplus CMSY
    \item ominus CMSY
    \item otimes CMSY
    \item oslash CMSY
    \item odot CMSY
    \item amalg CMSY
    \item bullet CMSY
    \item circ CMSY
    \item diamond CMSY
    \item star (5 branches): CMMI'77
  \end{enumerate}

  \subsection{Small plain left right triangles: 2}
  These should be replaced by the ones in the AMS fonts for math usage.

  They are also used as bullets, and should go in the text symbol
  encoding for such a usage.
  \begin{enumerate}
                \itemsep -0.15cm
    \item triangle left: CMMI'56
    \item triangle right CMMI'57
  \end{enumerate}

  \subsection{AMS left right open triangles: 8}
  These are also in LASY.
  \begin{itemize}
        \itemsep -0.15cm
  \item vartriangle left
  \item vartriangle right
  \item triangle left eq
  \item triangle right eq

    The previous four are in \fn{msam}
  \item Same 4 negated in \fn{msbm}: 4
  \end{itemize}

\section{Basic Symbols: 24}
A group of symbols used for typesetting basic mathematics.
These are mainly geometrics. Some have been added for similarity
reasons:
  \begin{enumerate}
        \itemsep -0.15cm
  \item = The equals sign, CMR'075
  \item - The minus sign, CMSY'00
  \item + The plus sign, CMR'053

  \item The $\times$ multiplication sign CMSY'002,
  \item The \cn{divide} sign $\div$ CMSY'004
  \item The \cn{divideontimes} from msbm'076 should live with divide
    and times.
  \item The rtimes from msbm'157 should live with the times.
  \item The ltimes from msbm'156 should live with the times.

  \item The $\pm$ sign in position CMSY'006,
  \item The $\mp$ sign in position CMSY'007,

  \item The $\equiv$ in position CMSY'021, Difficult to separate from
    other similar relations.

  \item The $\forall$ sign in position '070,
  \item The $\exists$ sign in position '071,
  \item The \cn{nexists} sign from msbm'100
  \item The $\neg$ sign CMSY'072,
  \item The \cn{varpropto} from AMS "5F. ????? or should this be left
    as a geometric?????
  \item The varemptyset from MSBM'77, ????

  \item Could go here: the upside down F: Finv from msbm'140 ???????
  \item And the back to front G: Game from Msbm'141 ???????

  \item Unary minus like en dash, could be CMR'173 but I personally
    think it should be shorter.

  \item The \cn{varnothing} from MSBM'77,
  \item smallsetminus from msbm'162

  \item The $\perp$ perp or bot sign in position '077,
  \item top sign CMSY'076
\end{enumerate}


\section{Radical}
  \subsection{The \TeX\ radicals: 10}
  Currently available in \fn{cmex} are:
  \begin{itemize}
                \itemsep -0.15cm
                \item Five radical signs: 5
                \item The vertical bit needed to construct the big
                radical '165 : 1
                \item The top bit of the constructed radical. '166 : 1
  \end{itemize}

  \fn{cmsy} includes the basic size of the radical. It
  has always been loaded in three sizes, and must remain so.
        If it is taken out of \fn{cmsy}, and put in a \fn{cmex}
        replacement,
        then this point must be taken into consideration.

        If the new \fn{cmex} is loaded in one size, it must contain
        three different sizes of the radical in order to stay compatible
        with plain: 3


  \subsection{New radicals: 2}
  Request made by: \texttt { HITT\% USOUTHAL.BITNET@SHSU.edu }.

  One can overload the little vertical extensible module of the
  radical, in order to produce a left quantum operator. For the right
  quantum operator, the glyphs could be available, but the radical
  macro can't be used.
        A specific macro could be designed and it would need two glyphs:
  the top right hand corner, and the repeatable
  vertical bit: 2


\section{The integrals family: 18}

  \subsection{Big `bigops' size: 7}
  \begin{enumerate}
                \itemsep -0.15cm
    \item The single integral.
    \item The double integral. Could be done with kerning if there is
    not enough space.
    \item The triple integral. Could be done with kerning if there is
    not enough space.
    \item The single O integral.
    \item The double O integral.
%    \item The triple O integral. ??????? J\"org says no.
    \item The sigma integral. For physics: J\"org.
    \item The slash integral. For physics: J\"org.
  \end{enumerate}

  \subsection{Small `bigops' size: 7}
  The same as in big `bigops' size.

  \subsection{Small size: 7 }
  This refers to the size of the \cn{smallint} in CMSY.
  \begin{enumerate}
        \itemsep -0.15cm
  \item The single normal integral.
  \item The single O integral.
  \item Double O integral. ???? (J\"org thinks yes)
  \item Double normal integral ???? (J\"org thinks yes) Could be done
    with kerning if there is not enough space.
  \item Triple normal integral ???? (J\"org thinks yes) Could be done
    with kerning if there is not enough space.
  \item The sigma integral.
  \item The slash integral.
  \end{enumerate}

  Mail from HSS:
  \begin{quote} %\fontsize{7}{7}\selectfont
    \cn{doubleoint} is used by Becker in "electromagnetic fields and
    interactions" (Dover). I also saw \cn{tripleoint} used in
    electromagnetic theory books although both are somewhat archaic.

    Concerning the small version (in \fn{cmsy}), I suggested this solely
    for reasons of completeness. The need for it is less now that
    \fn{cmex} will be loaded in three sizes. But the small version of
    \cn{int}
    \& \cn{oint} look a bit large when used in inline formulas. The
    \cn{smallint} \& \cn{smalloint} etc. may be a choice for some
    authors in the latter case.

    YH also pointed out, that the upright versions of integrals are very
    common in textbooks. since the integral sign is one of the most
    common symbols used in maths, it may not be a bad idea to include
    upright versions of *all* integral signs in \fn{cmex} (with
    corresponding small versions in \fn{cmsy}) again for reasons of
    completeness.

  \end{quote}

\section{AMS Vdash group: 10}
\begin{enumerate}
\itemsep -0.15cm
\item MSBM'054 nparallel
\item MSBM'055 nmid
\item MSBM'056 nshortmid
\item MSBM'057 nshortparallel
\item MSBM'061 nVdash
\item MSBM'062 nvDash
\item MSBM'063 nVDash
\item MSBM'160 shortmid
\item MSBM'161 shortparallel
\item MSAM'015 Vdash
\item MSAM'016 Vvdash
\item MSAM'017 vDash
\end{enumerate}

\section{Plain and \fn{lasy} miscellaneous symbols: 6}
\begin{enumerate}
\itemsep -0.15cm
\item CMSY'20
\item CMSY'140
\item CMSY'141

  Should live with the two previous:
\item MSBM'060 What about the back to front version of this ?????
\item LASY'061 the bow tie,
\item LASY'62 the small box. It is smaller than the one in the AMS
  fonts. ????
\end{enumerate}


\section{AMS equals friends: 10}
  \begin{enumerate}
\itemsep -0.15cm
  \item msam'155
  \item msam'154
  \item msam'120
  \item msam'73
  \item msam'72
  \item msam'54
  \item msam'53
  \item msam'52
  \item msam'51
  \item msam'44
  \end{enumerate}


\section{AMS miscellaneous geometric symbols: 21}
\begin{enumerate}
\itemsep -0.15cm
\item msam'174
\item msam'173
\item msam'171
\item msam'170
\item msam'165
\item msam'164
\item msam'161
\item msam'160
\item msam'151
\item msam'150
\item msam'147
\item msam'146
\item msam'141
\item msam'140
\item msam'136
\item msam'135
\item msam'134
\item msam'133
\item msam'132
\item msam'131
\item msam'050
\item msam'005

\end{enumerate}


\section{AMS boxes and friends: 15}
\begin{enumerate}
\itemsep -0.15cm
\item MSAM'000
\item MSAM'001
\item MSAM'002
\item MSAM'003
\item MSAM'004
\item MSAM'006
\item MSAM'007
\item MSAM'014
\item MSAM'106
\item MSAM'110
\item MSAM'111
\item MSAM'112
\item MSAM'115
\item MSAM'116
\item MSAM'117
\end{enumerate}


\section{The horizontal curly braces: 10}
Their design should be the same as the vertical braces. Add two
horizontal extension modules for them, since if they are drawn with
rules, digitization errors may cause them not to line up with the
horizontal brace glyphs. What's more, this would enable the designer
to choose there boldness.

Plus two extra middle bits. So that the designer is not restricted by
the number of slots. Knuths design could use that little amount of
glyphs, but maybe other will need more.

Count: for the downwards brace: 2 end bits, 1 middle bit, 2 extensible
modules. That makes a total of 5 per curly brace. One up, and one
down: makes 10.


\section{Big and extensible \TeX\ delimiters from \fn{cmex}: 78}
This group includes delimiters that are in \fn{cmex}. And an extra
little extensible module for the \{ and \}.
  \begin{itemize}
        \itemsep -0.15cm

  \item Four different sizes of ( and ): 8

  \item Extensible versions of ( and ): 6 \\
    Left and right extensible modules are '102 and '103. Top and bottom
    are '060, '061, '100, '101.

  \item Four different sizes of [ ]: 8

  \item Extensible version of [ and ]: 6 \\
    The extensible modules, one for the right bracket, and one for the
    left bracket are: '066, '067. The top and bottom pieces are: '062
    to '065.

  \item Four different sizes of \{ and \}\,: 8

  \item Extensible module for \{ and \}\,: 7 \\
    The extensible module ('76) used for the curly braces is very small,
      because it is added twice: once above the middle piece, and once
      below the middle piece. Its height is half that of the parentheses
      extensible  module. Other pieces are: '070 - '075.


  \item An extra extensible module for the \{ and \}\,: 1\\
    There is only one extensible module for both the left and the
    right curly brace in \fn{cm}. This is because the left-right spread
    of a curly brace is symmetrical in \fn{cm}, unlike the parentheses
    for example.
    This may not be the case for other designs.

  \item Four different sizes of $\langle$ and $rangle$: 8

  \item Four different sizes of $\backslash$ and $/$: 8

  \item Four different sizes of $\lfloor$ and $\rfloor$: 8 \\
    The extensible version is build with the same pieces as the
    extensible brackets.

  \item Four different sizes of $\lceil$ and $\rceil$: 8 \\
    The extensible version is build with the same pieces as the
    extensible brackets.

  \item Extensible vert and parallel: 2 \\
    Extensible versions of the vertical bar and the double vertical
    bar. They are their own extensible modules: '014 and '015.

  \end{itemize}


\section{Bigops}
  \subsection{Old bigops from \TeX: 28}
  There are two glyphs for each bigop.
  \begin{enumerate}
                \itemsep -0.15cm
    \item The sqcup
    \item The circled integral
    \item The circled dot
    \item The circled plus
    \item The circled times
    \item The sums
    \item The prods
    \item The normal integrals
    \item The bigcups
    \item The bigcaps
    \item The U plus
    \item The wedges
    \item The vees
    \item The coprods
  \end{enumerate}

  \subsection{New double sized `bigops': 26 }
  All these should come in two sizes, in the same font, like the present
  \cn{bigcup}. One for display style, and one for text style. That
  makes two glyphs for each one.

  \begin{enumerate}
                \itemsep -0.15cm

    \item A double sized sqcap $\sqcap$ \cn{bigsqcap} (can be found in
                \fn{cspex})

    \item Two sized $\bigcirc$ with $\vee$ inside.  $\bigcirc
      \!\!\!\!\!\vee$ proposed name: \cn{ovee}, and \cn{bigovee}.
      Can be found in \fn{cspex} and \fn{stmary}.

    \item Two sized $\bigcirc$ with $\wedge$ inside.  $\bigcirc
      \!\!\!\!\!\wedge$ proposed name \cn{owedge}, and
      \cn{bigowedge}. Comment from Alan:
      \begin{quote}
        ``As far as I'm aware nobody has *ever* used these glyphs in a
        paper.  I put them in St Mary's Road because I needed them at
        the time, but I shortly abandoned writing the paper they were
        going to be used in.  Please don't include them!  (If we are
        going to, we need to include <ovee> and <owedge> as well as
        <bigovee> and <bigowedge> which are the ones you described.)''
      \end{quote}

    \item Dijkstra choice: $[\!]$ CSPEX

    \item A wide Dijkstra choice. CSPEX . Comment from Alan:
      \begin{quote}
        If this is the glyph I think it is, it's not quite a wide
        Dijkstra choice in shape (although mathematically it's the
        same thing as Dijkstra choice).  The two glyphs are:

        $<$dijkstrachoice$>$ looks remarkably like [ and ] glued
        together.

        $<$oblong$>$ looks like $<$sqcap$>$ but with the square
        completed.

        $<$oblong$>$ is used in CSP in conjunction with $<$sqcap$>$,
        so it's quite important that they look the same.  In
        particular, they need to be of the same width because if they're
        not, formulae sometimes don't line up properly...
      \end{quote}

    \item Parallel \cn{bigparallel} just a double sized version of
      parallel.

    \item Interleaving $|\!|\!|$ : \cn{biginterleaving}
      \begin{quote}
        `Interleaving' and `parallel' are used in (at least) three
        different ways:
        \begin{itemize}
                                %\itemsep -0.15cm
        \item As delimiters $||$foo$||$ and $|||$foo$|||$.  These
          should come in basic-sized and extensible versions.
        \item As binary operators p $||$ q and p $|||$ q.  These can
          be the same glyphs as for the basic-sized delimiters.
        \item As `big' operators $||_i \;p_i$ and $|||_i \;p_i$
          similar to \cn{bigcup}.  These should come in text style and
          display style versions.
      \end{itemize}
      The big operators are not the same glyphs as the extensible
      delimiters.
    \end{quote}

  \item \cn{bigcupdot}: A `U' with a dot in it. Something like:
    $\bigcup\!\!\!\!\cdot$

  \item \cn{bigcapdot}: an upside down `U' with a dot in it. Something
    like: $\bigcap\!\!\!\!\cdot$

  \item An inverted \& called \cn{dnasrepma}

  \item Large operator symbol based on, an asterisk sign.

  \item Large operator symbol based on a times sign.

  \item Large operator symbol based on, a hash sign.

  \item Large operator symbol based on, an ampersand sign.

  \end{enumerate}


\section{Non classified existing symbols}
Here is a list of some symbols that do not have a place elsewhere:
\begin{itemize}
        \itemsep -0.15cm
  \item The different shapes of \# should find a place, although one is
    already in the core group.
\end{itemize}


\section{A list of new glyphs}

The following symbols should be added in the math fonts. Some have
already been designed by various people, so it should be possible to
find them...


  \subsection{Basic size operators: 2}
  Basic size means the same size as the operators in \fn{cmsy}.
  \begin{enumerate}
                \itemsep -0.15cm
    \item Something like \cn{cupdot} and
    \item Something like \cn{capdot} Frank M. can justify these.
  \end{enumerate}


  \subsection{New multi-sized, and extensible delimiters: 47}
  Count: 8, 6,  1,  8, 8,   8,   8, makes 47 .

  A multi-sized delimiter means: 4 sizes for each side : 8 glyphs.
  Plus and extensible version: top, bottom, extension module for both
  sides: 6 glyphs. Sometimes also a middle: 8 glyphs. Total: 16 or 14.
  \begin{enumerate}
                \itemsep -0.15cm

    \item Four sizes of the semantic brackets $[\![$ and $]\!]$: 8

    \item An extensible version of the semantic brackets: 6 \\
      (Top - Middle - Bottom) * 2 makes 6.

    \item An extensible version of $|||$ for use as $|||f|||$ (a
      norme). Just the extension module: 1

    \item 4 sizes of multiset brackets $\{|$ and $|\}$: 8

    \item An extensible version of multi-set brackets: 8 \\
      (Top - Bottom - Middle - extensible module) *2 makes 8.

    Unicode contains another style of brackets, they are called
      tortoise shell brackets. They look like
        \begin{verbatim}
          /
         /
        /
        |
        |
        |
        |
        |
        \
         \
          \
        \end{verbatim}
    \item Four sizes of tortoise shell brackets: 8 \\
      No extensible version. could add them in.


    \item Four sizes of triangle brackets: 8 \\
      They look like:
      \begin{verbatim}
          /|
         / |
        /  |
        \  |
         \ |
          \|
      \end{verbatim}

  \end{enumerate}


\subsection{Geometrics: 21}
\begin{enumerate}
        \itemsep -0.15cm

  \item The \fn{ams} smaller or equal and greater or equal must not be
        forgotten.

  \item Linear `is implied by' if o-- and o--o [bb: There are also
  versions of these with filled-in circles.]

  \item From JMR: something like: $\raise .1cm \hbox{$|$}\!\_$
    maybe the same upside down.

  \item From JMR: Something like: $\_\!\raise .1cm \hbox{$|$}$
    maybe the same upside down.

  \item \texttt{Here are some other symbol I once needed: \cn{opm},
      \cn{omp}.  Why?  There is \cn{oplus} and \cn{ominus} and there
      is +, -, \cn{pm} and \cn{mp}, but there's no \cn{opm} and
      \cn{omp}.}

    \texttt {I once needed them in a context, where \cn{oplus} and
      \cn{ominus} were used as subscripts to indicate symmetric and
      antisymmetric wave functions that were normalized. It is easy to
      specify formulae that include both cases using \cn{pm} and
      \cn{mp}, but suddenly there was no \cn{opm} and \cn{omp}. I
      constructed symbols using the circle from the copyright sign,
      but that was not actually the perfect size.}

\end{enumerate}

\subsection{New arrows}
\begin{itemize}
\itemsep -0.15cm
  \item Alan J. wrote: \cn{arrownot} and \cn{Arrownot}, so that for
       example \cn{arrownot}\cn{mapsto} is visually compatible with
      \cn{nrightarrow}.  Describing the same thing he also wrote: Add
      the `building blocks' for the AMS negated relations, for example a
       \cn{arrownot} to build \cn{nlongrightarrow} and
         \cn{nrightarrowfill}.

  \item The building blocks to make \cn{mapsfrom} $<\!\!-\!|$
    \cn{Mapsto} $|\!=\!>$ and \cn{Mapsfrom} $<\!=\!\!|$

  \item Lfloor, Rfloor, Lceil, Rceil like $\lfloor\!\lfloor$

  \item Arrows with triangles on the end.
$$<\!\!\!|\!\!-\!\!-,\ -\!\!-\!\!|\!\!\!>,\ <\!\!\!|\!\!-\!\!-|\!\!\!>$$

  \item Equals like symbol: $<\!--\!>$ with $==$ underneath.
\end{itemize}


\subsection{Non geometrics: 19}

\begin{enumerate}
\itemsep -0.15cm
%  \item The double circled integral, or the surface integral for
%    physicists.
%  \item A single integral with on top a $\Sigma$:
%    $\displaystyle\int\!\!\!\!\!\!\textstyle\Sigma$ (J\"org)
%  \item A single integral with a slanted dash:
%    $\displaystyle\int\!\!\!\!\!\textstyle-$
%  \item A triple, circled integral.

  \item Possibly something like $\bar{}\!\!($ and $\bar{}\!\!)$ if the
  bar was touching the parentheses.

  \item Banana brackets: look (sort of) like $(\!|$ and $|\!)$.  Or
    they look like bananas if you believe Jeremy... Alan: The St. Mary
    Road font includes samples of them, in a line-drawing style. Since
    I have not seen them in real use, I cannot comment, if this style
    or rather the look of \& in \fn{cmr} is appropriate.

  \item lightning (\fn{wasy}) --- probably does not belongs in maths.
    Alan: It is actually used though!  It means `interrupt' in process
    theory, c.f.  Communicating Sequential Processes, Hoare, Prentice
    Hall 1985.  I don't know how widely used outside process theory it
    is though...


%  \item Upright partial derivation sign. In the Greek-like stuff.
%  \item Arabic letter dal: looks something like a back to front $c$.
%    This should live with the Hebrew letters.
%  \item Remember that message forwarded by J\"org. The V-bar, and the
%    parenthesis-bar. jvpurcel@vela.acs.oakland.edu. The V-bar is in
%    with the Latin like stuff.

  \item Katakana character that looks like a spiral. (bb)

%  \item Here is a proposal from J\"org: Mylona's and Whitney's
%    ligatures: two extra symbs. These would come in italic, and
%    upright like the rest of the Greek stuff. J\"org himself does not
%    like this.  [bb: But we're not talking about text! Why put these
%    in a math font?]  It now looks as though this is not going to be
%    included in the new math encoding.

  \item A lower-case sigma with a long tail that goes a little bit below
  the baseline.

  \item The two versions of the \# hash sign must not be forgotten. It
                seems that one is geometric, and one is not: the slanted
                hash sign and the upright hash sign.


  \item An \cn{inviota} is sometimes requested on the net. I'll send
    you a reference file for it. (J\"org)

  \item More ?

\end{enumerate}
\end{filecontents}


\begin{filecontents}{l3d007c.tex}
%a macro to put a frame round a box
\newcommand{\frameit}[1]{\vbox{\hrule\hbox{\vrule
      \hskip .1cm\vbox{\vskip .1cm\hbox{#1}\vskip .1cm}\hskip .1cm
      \vrule}\hrule}}


\begin{quote} \small
  The placing of mathematical accents in \TeX{} is done with
  the following control sequence: {\tt\string
    \mathaccent"xyzz\{box\}}. {\tt"xyzz} is the code that
  identifies the accent, and `{\tt box}' is the entity to come
  under the accent. The box can be any type of box known by \TeX{}: a
  single letter, a vbox, an hbox, etc... The accent code used is a
  usual math code (refer to any book on \TeX\ for more details).  The
  accent is basicaly centered over the given box, but there are ways
  of influencing the way the centering is done. The vertical placement
  is as we shall see a little more tricky.
\end{quote}


\section{The accent choosing}

Let us call $x$ the box that is due to be accentuated, and $u$ its
width.
We shall first suppose that $x$ is a single character.\\
\vskip .05cm
\centerline{$x:\qquad \rightarrow$\lower .2cm%
\hbox{\frameit{\huge x}}$\leftarrow$u}
\vskip .2cm

If the accent is part of a list of successors\footnote{See document on
  charlist for more details about successors.}, \TeX{} chooses the one
whose total width is {\em as close as possible} to $u$, but still
{\em smaller or equal} to $u$.

\vskip .25cm \textbf{Note.} A list of successors can only take place
in one and one font only, and Knuth reminds us of this fact in rule 12:
``[...] has a successor in its font [...]'' This makes me wonder...
The hat and tilde come in their smallest size in cmr position '136 and
'176. The other sizes are in cmex position '142 - '147. The
following:\\
\verb|   $\mathaccent"005E{e}$, ...  ,$\mathaccent"005E{eeee}$|\\
produces: $\mathaccent"005E{e}$ and
$\mathaccent "005E{ee}$ and $\mathaccent "005E{eee}$ and
$\mathaccent "005E{eeee}$. So as expected there is no automatic sizing,
seeing as the hat in cmr is not part of a charlist. Same test with
tilde: \\
\verb|   $\mathaccent "007E{e}$, ...  ,$\mathaccent"007E{eeee}$|\\
 produces: $\mathaccent"007E{e}$ and $\mathaccent
"007E{ee}$ and $\mathaccent "007E{eee}$ and $\mathaccent "007E{eeee}$.
In order to get a hat that changes size automatically one must
call the smallest hat in \texttt{cmex} directly , like \cn{widehat}
does. \cn{widehat} is defined as: \cn{mathaccent} \texttt{"0362}.
Family three indicates cmex, and 62 is the hex position of the
smallest hat in \texttt{cmex}. Thus \cn{widehat}\verb|{abda}|
produces: $\widehat{abda}$.


\section{The horizontal placing}

The accent followed by its italic correction is put into a box $y$
whose width we shall call $w$.\\
\vskip .05cm
\centerline{$y:\qquad \rightarrow$\lower .2cm\hbox{\frameit{ acc
{\tt \string\/}}}$\leftarrow\ w$}
\vskip .2cm

Let us call $s$ the kern between the character in box $x$ and the {\em
skewchar\/}\footnote{See below for more details about the skewchar.}
---~taken in that order. The box $y$ is first centered on top of the
box $x$ and then shifted rightwards by the amount $s$.

\textsl {If the box $x$ is {\em not a single character}, the amount
  $s$ is taken equal to zero, therefore the skewchar is ignored and
  the box $y$ is normally centered\footnote{But the box $y$
  contains the italic correction, which influences the centering.}
  over the box $x$.
}
%
%
%
\subsection{The `skewchar'}
The \textem {skewchar} is a specific character that D.E.~Knuth decided
to use ---~in the way described above~--- for placing accents. Each font
should have its own skewchar.

For most characters, the {\tt .tfm} file specifies a particular
kerning of each letter with the skewchar. This is true for the
computer modern fonts, but other font designers may not have used this
feature.

Why choose one skewchar rather than another? This is because the
character $\mathchar"017F$ chosen by Knuth does not have any other
kerning that could have been disturbed by the skewchar kerning. This
choice may not always be good for all fonts, because it depends on
what the character in position '127 is. Thus a font designer might
choose another skewchar and put the necessary kernings in the {\tt
  .tfm} file.  Accent glyphs can be used as skewchars, because they
are usually not subject to kerns from other glyphs.

As long as one uses the {\tt cm} fonts, it does not make much sense to
change the skewchar, unless one wants to get strange effects, or
unless one intends to change the {\tt .tfm file}. One should remember
that an assignment to skewchar is not undone at the end of the group,
but is a part of the global font information. A local change
therefore involves saving the original value, and then restoring it.

\subsection{The italic correction}
The presence of the italic correction here is a little mysterious.
Its effect is to shift the accent to the left compared to the position
it would have without it. One should note that it is still added when
the $x$ box is more than one character.
%My personal theory is that
%whereas the skewchar-kerning enables a general positioning of all
%accents on one character, the italic correction enables an adjustment
%of a particular accent over all characters.


\subsection{Conclusions}
It is useless changing the skewchar unless one also modifies the
{\tt .tfm} file(s), which contain(s) the info for the skewchar kerning,
and for the italic correction.

Accents from any font can be positioned on characters from any other
font if there is a skewchar in the character font.

%
%
%
%
%
%
\section{The vertical placing}
This is a little more tricky.  Here as well one can start
by supposing that the character to be put under the accent is single.
All previous notations are kept. $\chi$ is the x-height
of the accent's font i.e. {\tt\string \fontdimen5}. Let us call $h_x$
the height of the $x$ box:\\
%
\vskip .1cm

\centerline{\vbox{\hbox{$\ \downarrow$} \frameit{\huge x} \kern
    -.2cm\hbox{$\ \uparrow$}
\hbox{$h_x$}}}
\noindent
and $h'_x$ the height of a box $x'$ containing the character to be put
under the accent, together with an empty superscript, and an empty
subscript.\\
\centerline{%
$x':\qquad $ \lower 1.5cm \vbox{\hbox{$\ \downarrow$}%
\frameit{$\mathop{\frameit{\huge x}}\limits^{\frameit{\scriptsize%
super}}_{\frameit{\scriptsize sub}}$}%
\kern-.2cm
\hbox{$\ \uparrow$}%
\hbox{$h'_x$}}%
}%end of centerline
%

\noindent
Now set $\delta$ with the minimum of $\chi$ and $h_x$, and increase it
by $h'_x-h_x$.

The end result is a \cn{vbox} $z$ containing box $y$ (the accent
correctly positioned lateraly), followed by a (vertical) kern of
$-\delta$, and then box $x'$ .  A normal accent char has the folowing
aspect:
\frameit{\char127}. This explains why the kern of $-\delta$ is needed.
Without it the gap under the accent would be to big. One can now
understand why an `O' used as an accent over an `e' produces the
following: $\mathaccent"714F{e}$.

If the height $h_z$ of $z$ is smaller than $h_x$ then a kern
is added on top of $z$ in order to make the end height that of $x$.
Finally, the width of $z$ is set to the width of $x$.
%

\textsl{If the character to be accentuated is not single, $\delta$ is
  not increased by $h'_x-h_x$. In other words, one can forget about
  the subscript and superscript.
}%end of textsl

\subsection{Conclusions}
The x-height is used for the vertical placing of accents. This means
that one cannot mix in the same font table glyphs designed with
different x-heights.
\end{filecontents}


\begin{filecontents}{l3d007d.tex}
\begin{quote}\small
The re-encoding of the math fonts cannot be thought of without a
closer look at \TeX's mechanisms for dealing with extensible
characters. This includes all characters that come in different sizes,
all characters that are constructed, and the operators that usually
come in two sizes. The ultimate questions are: how
to implement these characters? What sort of kerning can be
done with them?
\end{quote}


\section{Let us start with the easiest: The operators}

A large operator like $\sum$ will be vertically centered with respect
to the axis when it is typeset. Thus, large operators can be used
with different sizes of text. This vertical adjustment is not made for
symbols of other classes. [The \TeX book p.155]

This is a sum in tex: $\sum_{i=0}^{i=n} i$ and this is a prod in text:
$\prod_{j=0}^{i=m} j$.
Same test in display maths:$$ \sum_{i=0}^{i=n}
vi\quad \prod_{j=0}^{j=m} j$$

The sum is defined as follows:
\begin{verbatim}\mathchardef\sum = "1350 \end{verbatim}
If \TeX\ is in display style, it looks to see if the character in
position "50 of family 3 (The extensible \fn{cmex} family) has a
successor\footnote{See below for explanations on successors.}.  If it
does then the successor is taken. When not in display style, \TeX\
just takes character "50 from family 3.

\subsection{Conclusions for operators}
Both occurrencies of a double sized operator must be in the same font
table, because they are linked by the successor mechanism.

%JWZ commented this out: don't understand.
%It would be a good idea to try and make them all similar sizes so that
%they don't require many different sizes. This is due to font
%restrictions.
%\subsection{Going further}
%One wonders if the choosing is recursive, but presumably it isn't,
%because of the ``if we are in display mode'', which does not do any
%size testing.


\section{How characters can be linked}

\subsection{\texttt{charlists}}
They enable several characters of the same font to be linked together.
By simply typing \texttt{charlist oct"000": oct"020": oct"022":
  oct"040": oct"060"} in the metafont source, one links all the
occurrences (in the \fn{cmex} font) of the left parentheses in
increasing size-order.

They are used for:
\begin{itemize}
\item Linking variable-size delimiters,
\item Linking variable-width accents,
\item Making pairs of operators that come in textsize and
  display size.
\end{itemize}

\subsection{The \texttt{extensible} lists p318 metafont book.}

An extensible glyph is identified with one of its pieces. One has
to decide which piece is going to be used for this identification.
For instance, in \texttt{extensible oct"060": oct"060", 0, oct"100",
  oct"102";} the first \texttt{oct"060"} is the identifier of the whole
  glyph.
The next three characters are the top, middle, and bottom pieces. The
last character position is that of the piece to be repeated as many
times as necessary between the top
and middle, and between the bottom and middle pieces. All three pieces
are optional. When they are not needed, they are replaced with `0'.
But if a zero is put in the repeater position, then character `0' will
effectively be used as a repeater.

\subsection{Restrictions compiled from p318 of the Metafont book}
\begin{itemize}
\item An \texttt{extensible} identifier can only appear at the end of a
  \texttt{charlist}.
\item A kerning/ligtable label can only appear at the end of a charlist.
\item One cannot use an extensible identifier as a kerning label.
\item One cannot use an extensible identifier as a ligature label.
%JWZ commented out the next two lines.
%\item this restriction explains why one specifies the kerning and the
%  ligatures in the same table.
\end{itemize}

\subsection{Conclusions}
Kerning with parentheses is going to be very tricky...

A delimiter is made of two sets of characters that can be in two
separate font tables. The glyphs in these two sets (the delimiters),
can be kerned with the characters that accompany them in their font. So
one must place them correctly.

\note {There are no parentheses in the \fn{cmmi} fonts, so this
  possibility has not been used.}

There is a list of all \fn{cm} extensibles, and charlists in ``Computer
modern typefaces'' p66.


\section{The vertical constructables, or ``those that come in pieces''
  --- \textit{Delimiters}}

\subsection{A few notes}
First information about delimiters p.171 of the \TeX book. They are not
all of the same type. Some are (bigl,bigr) Open/close atoms, and
others are (bigm) rel and (big) ord atoms. On the other hand, a
\texttt{\string\left} \texttt{\string\right} grouping is inner.

When a delimiter gets larger, its height and depth both grow by the
same amount.

In the \fn{cmex} font, most of the vertical constructable glyphs are
below the baseline, in the \texttt{.tfm} file.

\subsection{A first description of the choice mechanism}
A delimiter is defined by a small ``character'' and a ``large
character''. \textbf{These characters can be in different families,
and therefore in different font tables.} We shall call the small
character $a$ in family $f_a$, and the large character $b$ in family
$f_b$.

The search first starts by considering the three different sizes of
char a in its family\footnote{i.e. the scriptscriptsize then the
  scriptsize and finaly the textsize. This is done in an optimized
  way: if the current size is bigger than scripscriptsize, no point
  looking in the scriptscriptsize font, same goes for the other two
  sizes.}. When testing a character in a given font table, its
successors are tested before going to the next bigger font. In other
words, for each member of a family, starting with the smallest, and
going to the biggest, \TeX{} first looks at char $a$ and then at its
successors\footnote{See later explanations.} in the same
font table. If nothing suitable is actually found within the family
$f_a$ a similar search is done in the family $f_b$ based on character
$b$.

The search stops when the character being tested has a sufficient
height plus depth, or when it is extensible\footnote{Obviously in this
  case the appropriate delim has been found, seeing as it can be made
  to any given size bigger than the non-extensible characters.}.

If either of the couples $(a,f_a)$, and $(b,f_b)$ are set to
$(0,0)$ then the corresponding part of the search is bypassed.

If none of the characters actually found are suitable, the biggest is
taken, i.e. the one with the greatest height plus depth.


\subsection{A second description of the choice mechanism by Victor E.}
\TeX\ first tries the small variant, and if that is not satisfactory
(or if the left part of the delimiter code is 000) it tries the large
variant. If trying the large variant does not meet with succes \TeX
takes the largest delimiter encountered in this search. If no
delimiter at all is found, (which can happen if the right hand part is
also 000), an empty box of width \texttt{\string\nulldelimiterspace}
is taken.

Investigating a variant means in sequence:
\begin{itemize}
\item If the current style is scriptscript style, the scriptscript font
  of the family is tried.
\item If the current style is script style or smaller the script font of
  the family is tried.
\item Otherwise the text font of the family is tried.
\end{itemize}
Note: The plain format puts the \fn{cmex10} font in all three styles of
family three.

Looking for a delimiter at a certain position in a certain font means:
\begin{itemize}
\item If the character is large enough, accept it.
\item If the character is extensible, accept it.
\item Otherwise, if the character has a successor, (the same but
  bigger), try the successor.
\end{itemize}


\subsection{The final width of the delimiter ?}
If the character chosen ends up to be an \textbf{extensible} one,
\textem{the resulting width is that of the repeatable piece.}
Otherwise (in the case of a normal character) the width is that of the
chosen character \textem{plus its italic correction.}


\subsection{Conclusions for delims}
\begin{itemize}
\item The different sizes of a delimiter can be spread in two font
  tables if it is necessary.
\item A given delimiter does not absolutly have to have two sizes.
\item One can adjust the width of the repeatable piece for
  extensibles.
\item One can adjust the italic correction of the normal ``single
  glyph'' delimiters, in order to get it further away from things like
  $f$, $j$, $g$ and $p$. In the present case of `(' (in text
  size) it comes from the text font \fn{cmr*} so obviously has not got
  any italic correction. The vertical bar has not got any either
  (checked in the \fn{.pl} files). This is quite a global solution and
  the italic correction will be added in all cases: if it is small it
  may not bother anybody and should have the right effect
  in most cases.
\end{itemize}


\section{References}
The \TeX book ``Construction of math symbols'':
151 mathchoice: no good;
152: about delimiters and size choosing;
178: using phantom and vphantom - no good;
358: how large operators are assigned in plain, and
   some horizontal constructables;
359: all the 24 delims that can change size and the big and bigg macros;
360-361: nothing.

The \TeX book ``math symbols'': 127-128: nothing, 289: nothing, 290:
interesting things about delims.

The \TeX book ``Mathcode'':
134: tiny little bit at the bottom about mathcodes,
154: a list of the 8 classes and (3) about variable family and (bottom)
   about mathcode,
155: the mathcode "8000 + mathchardef + mathinner,
156: delcodes and delimiter,
157: radical,
214: nothing,
271: nothing, 289: nothing,
319,326: answer to exercises,
344: where all the mathcodes are set so that `1' comes from fam 0 and
`a' from fam 1,
345: where the delcodes for plain tex are set.

The \TeX book ``mathop'': 155 cf mathcode, 178 cf construction of math
symbs, 291 a bit about mathaccent, radical 324-325  361

The \TeX book ``dilimiters'':
156: delcode;
157: radical;
214: nothing; 289: nothing; 271: nothing;
290: in the middle ``A delim...''; 345: see at the top;
359: Plain tex definitions of some delimiters;
146: A list of plain tex delims;
147: the bigs;
148-149: details for use of left right;
150: still more extensible chars (bottom not on growing delims);
171: info on the type of atoms made by big bigr bigl bigm;
437: openings and closings;
442: The search of the appropriate delimiter: good.

About radicals rule 11 page 443 appendix G. \TeX book

\TeX book: About operators rule 13 page 443, successors. Interesting
about italic corrections. The way limits are typeset rule 13a.

About parameter usage p447 \TeX book.
About math spacing p170 \TeX book.

\TeX\ for the impatient: p.126 nothing, p.194-196 a list of operators
and a few explanations.

\TeX\ by topic p.194: Large operators and their limits, the choosing
method: good.
\end{filecontents}


\begin{filecontents}{l3d007e.tex}
\begin{quote}\small
% jz -- a little change in the first sentence
  While working on the new math encoding, the writer realised that
  the fact the \texttt{cmex} font is only loaded in one size, and not in
  three like the other math fonts, was going to create a few problems.
  This paper deals with the following topics:
  \begin{itemize}
    \item What is in \texttt{cmex}?
    \item Which special mechanisms does \TeX\ use to access glyphs from
      \texttt{cmex}?
    \item What could be added to \texttt{cmex}?
    \item What could be taken out of \texttt{cmex}?
  \end{itemize}
  The aim of this paper is to help the MFG\footnote{Math Font Group.}
  design the MX encoding as a replacement and improvement of the
  \texttt{cmex} encoding.  \note{Most of what is written in this paper
    is pure theory, and has not been applied or tested.}

  Acknowledgements: thanks to Alan Jeffrey and Barbara Beeton for
  their constructive comments, and help.
\end{quote}

% bb -- suggest adding an introductory section 0 giving a bit of
%       background on knuth and early history; i would make suggestions
%       and vouch for the accuracy of the statements, or offer to be
%       quoted as the source if something is conjecture.

\section{What is in \texttt{cmex}?}
  \subsection{Delimiters}
  \begin{itemize}
  \item Four different sizes of ( ) and extensible versions. Left and
  right extensible modules are '102 and '103.
  \item Four different sizes of [ ] and extensible versions. The
  extensible  modules, one for the right bracket, and one for the left
    bracket are: '066, '067.
  \item Same for \{ and \}\,; the extensible module is: '076.
    \note{The extensible module here is very small, because it is
      added twice: once above the middle piece, and once below the
      middle piece. Its height is half that of the parentheses'
      extensible module. Interesting to see that there is only one
      extensible module for both the left and the right curly brace.
      This is because the left-right extension of a curly
      brace is symmetrical, unlike the parentheses for example.}
  \item Four different sizes of $\langle$ and $rangle$. No extensible
  version.
  \item Same for $\backslash$ and $/$. No extensible version.
  \item Four different sizes of $\lfloor$ and $\rfloor$ and an
    extensible version. Extensible modules: '066, and '067.
  \item Same for $\lceil$ and $\rceil$. Same extensible modules as the
    previous one.
  \item Glyphs in positions '014 and '015 are the extensible versions
    of the vertical bar and the double vertical bar. They are their
    own extensible modules.
  \end{itemize}

  \subsection{Large operators}
  Large operators come in pairs:
  \begin{itemize}
    \item The sqcup
% bb -- although the sqcap isn't here, might want to note that it
%       isn't but could be added
    \item The circled integral
    \item The circled dot
    \item The circled plus
    \item The circled times
    \item The sums
    \item The prods
    \item The normal integrals
    \item The bigcups
    \item The bigcaps
    \item The U plus
    \item The wedges
    \item The vees
    \item The coprods
  \end{itemize}

  \subsection{Wide accents}
  \begin{itemize}
    \item 3 sizes for the hat
    \item 3 sizes for the tilde
  \end{itemize}

  \subsection{Radicals}
  \begin{itemize}
    \item Five radical signs
    \item The vertical bit needed to construct the big radical: '165
    \item The top bit of the constructed radical: '166
  \end{itemize}

  \subsection{Arrows}
  \begin{itemize}
  \item The three pieces for the construction of the vertical double
    arrow: '167 '176 '177
  \item The three pieces of the vertical single arrow: '077, '170,
    '171
  \end{itemize}

  \subsection{Horizontal curly braces}
  \begin{itemize}
  \item The four pieces for the construction of horizontal curly
    braces: '172 -- '175
  \end{itemize}


\section{\TeX's behavior with \texttt{cmex} glyphs}
  \subsection{Large operators}
  \begin{itemize}
  \item A large operator is vertically centered with respect to the
    math axis. This means that, whatever the surrounding glyph size,
    things will not look too bad.
  \item With the following definition of a large operator:
    \cn{mathchardef} \cn{sum} \texttt{= "1xyy}, if \TeX\ is in \textem
    {display style}, it looks to see if the character in position
    \texttt{"yy} of family \texttt{x} has a successor. If it does then
    the successor is taken. When not in \textem {display style}, \TeX\
    simply takes character \texttt{"yy} from family \texttt{x}. Thus in
    text style, in script style and in scriptscript style the same
    glyph is used.
  \end{itemize}

  \subsection{Vertical delimiters, and friends}
  Radicals are delimiters, and vertical arrows also, so let us only
  speak about delimiters. Here is a quote from Victor Eijkhout's book:
  \begin{quote}
    A delimiter has two codes: a small variant, and a large variant.
    \TeX\ first tries the small variant, and if that is not
    satisfactory (or if the left part of the delimiter code is 000) it
    tries the large variant. If trying the large variant does not meet
    with success \TeX\ takes the largest delimiter encountered in this
    search. If no delimiter at all is found, (which can happen if the
    right hand part is also 000), an empty box of width
    \texttt{\string\nulldelimiterspace} is taken.

    Investigating a variant means in sequence:
    \begin{itemize}
    \item If the current style is scriptscript style, the scriptscript
      font of the family is tried.
    \item If the current style is script style or smaller the script
      font of the family is tried.
    \item Otherwise the text font of the family is tried.
    \end{itemize}

    Looking for a delimiter at a certain position in a certain font
    means:
    \begin{itemize}
    \item If the character is large enough, accept it.
    \item If the character is extensible accept it.
    \item Otherwise, if the character has a successor (the same but
      bigger), try the successor.
    \end{itemize}
  \end{quote}
  Using the three size mechanism probably did not seem necessary to
  Knuth.  Generally large delimiters are used in display style, and not
  in script or scriptscript style. However, they can also be used in
  the small styles.

  \subsection{Wide accents}
  For the choice of accents, \TeX\ only considers one font, but looks
  to see if the current accent has a successor. Unlike the delimiter
  choice mechanism, the accent choice mechanism does not go through all
  three sizes. \TeX\ chooses the accent in such a way that the accent
  width is as close as possible but smaller than the width of the box
  to cover.

  \subsection{First conclusion}
  For operators, a distinction in size is made between text style and
  display style, whereas with the usual automatic size choosing
  mechanism, glyphs in text style and in display style are taken from
  the text size font, and are therefore the same size.


\section{Consequences of loading \texttt{cmex} in 3 different sizes}

  \subsection{Consequences for operators}
  For operators, let us consider two completely separate policies. In
  the one  case \texttt{cmex} is unchanged, and loaded in three
  sizes. In the other case, an imaginary font derived from \fn{cmex}
  called \texttt{cmex}$'$ is loaded in three sizes.  In \fn{cmex}$'$ the
  operators no longer have two sizes, thus glyphs like \cn{bigcup} do
  not have a successor in their font.

        \textem {All the following supposes that no new macros have been
  written. What would \TeX's automatic behavior would be?}

  \begin{description}
  \item[If \texttt{cmex}$'$ is loaded in three sizes.] In
    such a situation \TeX\ has a large version of \cn{bigcup} (and
    other operators) in text size, a small version in script size, and
    yet a smaller version in scriptscript size.

    The operators can be centered: no problem.

% jz -- change:
    In script and scriptscript style the resulting `big operators' would
    be smaller than if they were produced with today's standard \TeX,
    and today's standard \texttt{cmex}.  What is more, one would be
    smaller than the other, which is also not the case with today's
    standard \TeX, and today's standard \texttt{cmex}.
% ----

    In display style one would get big operators from the text size
    font: this is acceptable.  \textem {But one would also get a big
      operator in text style,} and that does not conform with today's
    standard \TeX\ behavior.


% jz -- I added encoding:
  \item[If the existing \texttt{cmex} encoding is loaded in three
  sizes.]
    The operators can be centered: no problem.

    In script style, one would get the smallest version of a large
    operator. But coming from a small size font, that will produce
    something very small. In scriptscript style, same behavior as in
    script style, but the result would be even smaller.
% jz -- Added this:
    Thus in script, and in scriptscript style, the large `big
    operators' would never automatically be used.
    Hence the \texttt{cmex}$'$ encoding.
% -- maybe should have said that earlier :-)

    In text style, \TeX\ would produce the small version taken from
    the text size font. In display style \TeX\ would produce the big
    version of operators taken from the text size font.

    So in text style and in display style, there would be no change
    compared to what today's standard \TeX\ produces.
% jz -- changed the phrasing here:
    But script and scriptscript style would produce different
    results.
% --

  \end{description}

  In both cases, things could be improved if macros were written to
  override the present behavior of \cn{bigsqcup}. One could think of
  things like \cn{mathchoice}, but $\ldots$


  \subsection{For vertical delimiters, radicals, vertical arrows}
  Let us start by supposing \TeX\ is in scriptscript style, and it has
  to typeset a large delimiter. One should consider two cases:
  \begin{description}
  \item[The delimiter has an extensible variant.]  In this case the
    search will start in scriptscript size, and continue until \TeX\
    finds the extensible variant of scriptscript size. Then the search
    will stop, and the extensible will be used.  This extensible will
    come from scriptscript size, and therefore probably not look the
    same as it would in today's setup, where all extensibles come from
    text size.
  \item[The delimiter does not have an extensible variant.]  As
    previously, the search starts in scriptscript size. If nothing
    big enough is found in scriptscript size, the search continues in
    script size. If still nothing is found, the search then continues
    in text size. If necessary the biggest delimiter from text size
    will be used. If the search stops in text size, there is no
    difference with what \TeX\ produces today. But if the search stops
    before reaching text size, the chosen delimiter will be different
    from the one \TeX\ would use in the present configuration. Its
    strokes would be finer, and better adapted for use in script style.
  \end{description}

  If one supposes that \TeX\ is in script style, the previous two
  cases also apply, except that every occurrence of `scriptscript' must
  be replaced with `script'. If one supposes that \TeX\ is in text
  style, the result of loading three different sizes of \texttt{cmex}
  would be the same as it is in \TeX's current configuration.

  \subsection{For horizontal curly braces}
  If they are automatically taken from script size, or from script
  script size when necessary, the spacing changes a little,
  because the dimensions in the \texttt {.tfm} files would be
  different. A consequence of this could be different line and page
  breaks.

  However, it would be nice if curly braces did come out
  of the correctly sized fonts. Then their boldness would match the
  surrounding text. But apparently from a macro programming
  point of view things could be difficult, even if the glyphs are
  available and loaded.

  \subsection{For wide accents}
  See first paragraph of previous section.

% jz --
%  I guess that in actual fact, the wide accents were put in
%  \texttt{cmex} because they could not fit any where else.
% jz -- I don't want to start any discussion on that topic so i'm
% taking it out

  If accents were taken from the current size, things could only look
  better.
  The accent width would be closer to that of the material under the
  accent, and the accent's boldness would be better adjusted.

  \note {Unlike the delimiter choice mechanism, the accent choice
    mechanism is restricted to one font, and one size.  It will thus
    not look in text size when it is in script size for instance. So
    in script style, accents will always come out of the script size
    font, and in scriptscript style, accents will always come out of
    the scriptscript size font, etc.$\ldots$}

  \subsection{Conclusion}
% jz -- Changed the conclusion
  Nearly everything in \texttt{cmex} could have lived in a normal three
  sized math font, and maybe that would have been better. The only
  problems would have come from the specific ``big operator'' behavior
  required by Knuth.

  Also one must not forget that Knuth did not want to leave any empty
  slots.
% jz -- added this:

  The reduced amount of memory that was available on the machines with
  which \TeX\ was first used could have been another reason for loading
  \texttt{cmex} in one size only.


% jz -- This section has been changed a bit, and extended.

\section{What could be added to \texttt{cmex}?}
Let us now consider possible evolutions of \texttt{cmex}.
In spite of the terminology ``adding to \texttt{cmex}'', the font
resulting from these evolutions would have a different name.
  \subsection{If the \texttt{cmex} encoded font is loaded in three
  sizes}
  In this case big operators would not produce the usual results, and
  the rest would be slightly different, as stated above.
    \begin{itemize}

    \item One could add wide accents, but one would get slightly
      different (better) results. Thus wide accents would match the
      script and scriptscript styles. Macros could be made available
      as a style option to keep the old behavior, if necessary.

    \item One could increase the number of different sizes for accents.

    \item One could add big delimiters and their extensible versions,
      without any problem! Things will be slightly
      better adjusted in script and scriptscript style. Macros could
      be made (available as a style option) to keep the old behavior,
      if necessary.

    \item One could increase the number of different sizes for
      delimiters, and one could probably reduce the height of the
      extensible module in order to make the growing of delimiters
      more gradual.

    \item One could add some vertical extensible arrows!  Things will be
      slightly better adjusted in script and scriptscript style.

    \item One can add small and large `big operators' without any
      problem!

    \item Big improvement: one could add loads of other glyphs (symbols,
      etc.$\ldots$) that would come in all three sizes.

    \end{itemize}

  \subsection{If the \texttt{cmex} encoded font is only loaded in one
  size}
    \begin{itemize}
    \item One could add big delimiters and their extensible versions
      without any problem!

    \item One could increase the number of different sizes for
      delimiters, and one could reduce the height of the extensible
      module in order to make the growing of delimiters more gradual.

    \item One could add some vertical extensible arrows!

    \item One could add large operators without any problem!

    \item One could add wide accents without any problem, and the
    present behavior of wide accents would not change. But if wide
    accents are meant to match the script and scriptscript styles, then
    wide accents must go in another font that would be loaded in
    different sizes.

    \item One could increase the number of different sizes for accents.

    \item One could add other stuff, but it would only come in one
      size.

    \end{itemize}

  \subsection {If a \texttt{cmex}$'$ encoded font is loaded in three
      sizes}
        The imaginary \texttt{cmex}$'$ encoded font, previously
        described in this document is considered here.

  One could add the same things as when \texttt{cmex} is loaded in three
  sizes. The only difference is: if no macro programming were done, the
  text style and display style will produce the same `big operators'.
  In script and scriptscript style the `big operators' would be in
  different sizes from one another and smaller than those in text
  style.


\section{Conclusions}
If one loads \texttt{cmex} in three different sizes, many things are
improved, and with a \cn{mathchoice} the initial behaviour of large
operators could be kept, or available as a style option.

If \texttt{cmex} is kept in a single size, one must decide whether to
put wide accents in or not.


\section{The beginning of my \texttt{cmex10.pl} file}

\begin{verbatim}
(FAMILY CMEX)
(FACE O 352)
(CODINGSCHEME TEX MATH EXTENSION)
(DESIGNSIZE R 10.0)
(COMMENT DESIGNSIZE IS IN POINTS)
(COMMENT OTHER SIZES ARE MULTIPLES OF DESIGNSIZE)
(CHECKSUM O 37254272422)
(FONTDIMEN
   (SLANT R 0.0)
   (SPACE R 0.0)
   (STRETCH R 0.0)
   (SHRINK R 0.0)
   (XHEIGHT R 0.430555)
   (QUAD R 1.000003)
   (EXTRASPACE R 0.0)
   (DEFAULTRULETHICKNESS R 0.039999)
   (BIGOPSPACING1 R 0.111112)
   (BIGOPSPACING2 R 0.166667)
   (BIGOPSPACING3 R 0.2)
   (BIGOPSPACING4 R 0.6)
   (BIGOPSPACING5 R 0.1)
   )
(CHARACTER O 0 ...
\end{verbatim}


\subsection{Comments about the \texttt{cmex10.pl} file}
\begin{itemize}
\item The xheight is not equal to zero.
\item The space is equal to zero.

\item With the following:
\begin{verbatim}
(CHARACTER O 100
   (CHARWD R 0.875003)
   (CHARHT R 0.039999)
   (CHARDP R 1.760019)
   (VARCHAR
      (TOP O 70)
      (BOT O 73)
      (REP O 76)
      )
   )
\end{verbatim}
that is in the \texttt{.pl} file, one can produce something that looks
like a growing integral:
$$ \left\lmoustache \frac{3.q}{\frac{3\pi.r^2}{3.q.b.c}} \right.$$

\item The pieces used to construct the horizontal curly braces are not
linked in any way.

\item The bottom pieces of the extensible parentheses are overloaded
  for \cn{rmoustache} and \cn{lmoustache}. One of these could be linked
  (charlisted) with the integrals, so that \cn{left}\cn{bigint} could
  produce a growing integral like the delimiters.

\item The bottom pieces of the curly braces ('072 and '073) are also
  overloaded for \cn{lgroup} and \cn{rgroup}.

\item The middle pieces of the curly braces are overloaded for
  \cn{arrowvert} and \cn{Arrowvert}. Other single and double
  extensible bars with different spacing.

\item The extensible module of the curly braces is overloaded for
  \cn{bracevert}. A fat vertical bar.

\item The extensible modules of the parenthese are overloaded to
  produce more fat vertical bars.

\item More overloading: the construction pieces of the extensible
  brackets are also used for the $\lceil \rceil \lfloor \rfloor$. Thus
  the top left bracket piece ('062) identifies the left bracket; the
  top right bracket piece identifies the left bracket; the
  bottom left bracket piece identifies the $\lfloor$ extensible
  version; the bottom right piece identifies the $\rfloor$; the left
  extensible module identifies the $\lceil$; and the right extensible
  module identifies the $\rceil$. This over-loading may not be desired.

\item For the wide accents and the curly braces the depth is nil.

\item All the delimiter glyphs in \texttt{cmex} are set with a very
  small height and a big depth. This is because the radical
  primitive is also used for delimiters. For radicals, the \texttt
  {.tfm} height of the glyph is used to determine the size of the
  \cn{hrule}.

\item The extension modules do not have any height at all. Same for
  the arrow heads.

\item The four integrals have italic corrections.

\item Small versions of operators have a nil height, whereas
  big versions have a small height and a big depth:
  \begin{verbatim}
  (CHARACTER O 116
   (COMMENT This is the small \bigotimes)
   (CHARWD R 1.1111145)
   (CHARDP R 1.000013)
   (NEXTLARGER O 117)
   )
  (CHARACTER O 117
   (COMMENT This is the big \bigotimes)
   (CHARWD R 1.511116)
   (CHARHT R 0.100001)
   (CHARDP R 1.500012)
   )
  \end{verbatim}
  What is more, in the metafont code,
  both big and small versions of bigops are under the baseline.

\item There are no kerns or ligatures in \texttt{cmex}.

\end{itemize}


\section{Characters under the baseline}

\subsection{Which?}
It is understood and agreed that the radical glyphs need to be
virtually completely under the base line. \TeX\ uses their small
height to measure the thickness of the radical's over line. But
delimiters and `bigops' are also placed under the baseline for no
obvious reasons: both types of glyphs are just centered on the maths
axis.

\subsection{`Bigops' and metafont code}
\begin{description}
\item[The metafont padded operator.]
Although both big and small versions of bigops are placed completely
under the baseline (height=0), the big version ends up ---~in the
\texttt {tfm} file, with a non-nil height. Many people agree that the
reason for this is that the metafont code for the large version of
`bigops' contains the \cn {padded} macro. This last places some extra
space around the glyphs.  This extra space would serve for separating
the `bigops' from the limits they may take.

Large integrals do not have any padded macro, and thus in the \texttt
{tfm} file, they are placed completely under the base line.  The
reason for the difference between integrals, and other `bigops' could
be that the limits of the former are usually placed next to the glyph,
and not on top. Even when limits are placed on top of the integral,
the results do not look too bad, because the integral is very narrow.

The reason for `bigops' being set under the baseline is still unknown.
Whether or not they would be correctly centered on the math axis,
if they were placed over the base line is not sure either.

\item[Changing the metrics in the metafont code.] It is interesting to
  see how \TeX\ would manage if the `bigops' were placed over the base
  line.
  The best way to find out, is to change the metafont code of \texttt
  {cmex}, and see$\ldots$ I first did the test on \cn{bigoplus}
  because it was a good candidate ---~simple metafont code. I have
  never used metafont before. I copied all the necessary files into my
  directory, and changed their names to `\texttt {my*}'. I then did
  the following modifications in \texttt {mybigop.mf}
  (look for \%\%\%\%):
  \begin{verbatim}
    cmchar "\textstyle circle-plus operator";
    beginchar(oct"114",20u#,10/6dh#,0); %%%% was ,0,10/6dh#)
    adjust_fit(0,0); pickup pencircle scaled stem;
    lft x6=hround u; x2=w-x6; top y8=h; bot y4=-d; %%%% was top y8=0
    ...

    cmchar "\displaystyle circle-plus operator";
    beginchar(oct"115",27.2u#,14/6dh#,0); padded 1/6dh#;
         %%%% was ,0,10/6dh#)
    adjust_fit(0,0); pickup pencircle scaled curve;
    lft x6=hround u; x2=w-x6; top y8=h; bot y4=-d; %%%% was top y8=0
    ...

  \end{verbatim}

  When I ran Metafont on it there were no problems. After having put
  all the relevent files where they were meant to go, I gave the
  following to \TeX:

  \begin{verbatim}

     Old cmex in text style: $\bigoplus i \coprod$
     Old cmex in display style: $$-\bigoplus i\coprod \mathchar"034D$$

     % change font:
     \font\myfont=myex10
     \textfont3=\myfont
     \scriptfont3=\myfont
     \scriptscriptfont3=\myfont

     New myex in text style: $\bigoplus i \coprod$
     New myex in display style: $$-\bigoplus i\coprod \mathchar"034D$$
     The minus sign gives the height of the math axis, while the bottom
                 of the word `base' gives that of the baseline.

     \bye

  \end{verbatim}

  Here is the output:

     Old cmex in text style: $-\bigoplus i \coprod$ \\
     Old cmex in display style:
         $$-\bigoplus i\coprod \mathchar"034D base$$

     % change font:
     %\font\myfont=myex10
     %\textfont3=\myfont
     %\scriptfont3=\myfont
     %\scriptscriptfont3=\myfont

     New myex in text style: $-\bigoplus i \coprod$ \\
     New myex in display style:
         $$-\bigoplus i\coprod \mathchar"034D base$$
     The minus sign gives the height of the math axis, while the bottom
                 of the word `base' gives the baseline.

  The results are quite obvious: in both case the \cn{bigoplus} is
  correctly centered. On the 300 dpi printer I have here there is no
  visible difference. What is more the \cn{mathchar"034D} proves that
  the metrics of the \cn{bigoplus} have changed: in the first case the
  glyph is placed under the baseline, and in the second case it is
  placed over the baseline. The difference is not visible if this
  document is printed with the wrong fonts.
\end{description}
\end{filecontents}


\documentclass{l3ms002}

\usepackage{l3d007}
\usepackage{ifthen}

\setlength{\emergencystretch}{2em}

\hyphenation{pre-sent fa-mi-ly}

\renewcommand{\abstractname}{\Large Acknowledgement}


\typeout{******************************************}
\typeout{* }
\typeout{* This document makes use of three fonts}
\typeout{* which you might not have on your system.}
\typeout{* These are:}
\typeout{* \space\space ecrm1000 \space\space T1 encoded Computer Modern}
\typeout{* \space\space msam10   \space\space AMS symbol font A}
\typeout{* \space\space msbm10   \space\space AMS symbol font B}
\typeout{* }
\typeout{* In that case type <RETURN> to the error message}
\typeout{* The output will not contain the font charts but}
\typeout{* but will otherwise be readable.}
\typeout{* }
\typeout{* The document will also produce a number of underfull}
\typeout
       {* and overfull boxes. Please ignore them (or volunteer to help)}
\typeout{* }
\typeout{******************************************}

\newcounter{sleep}
\whiledo{\value{sleep}<1000}{\stepcounter{sleep}}

\begin{document}


\title{Technical Report on Math Font Encoding}
\author{Justin Ziegler}
\date{Started on June 13, 1993\\
                        Last change: June 1, 1994\\
                        Organisational updates: August 23, 2000\\
      Printed: \today \\
      Filename: \fn{l3d007.tex} }

\maketitle


\newcommand{\NFSS}{\textsf{NFSS}}

\chapter*{Foreword}

I'm pleased to present the final report on ``Math Font Encoding''
produced by Justin Ziegler for the \LaTeX3 project to the public.

Justin has worked for three months at the Johannes Gutenberg
University Mainz. His work was generously sponsored by GUTenberg
(The French \TeX{} Users Group) and by the ZDV of the University
of Mainz (Data Processing Center), the latter providing Justin with
office space and taking care of the administrative details.

In the past years a lot of work went into integrating  new fonts into
the \TeX{} system. Only five years ago, typesetting with \TeX{}
basically meant typesetting in Computer Modern. Nowadays many users can
choose  (at least theoretically) from several thousands of fonts.
Today, \NFSS{} is the standard font selection in \LaTeX{} and due to
this mechanism and the fontinst-package by Alan Jeffrey virtually every
PostScript font, in fact, every font for which a \texttt{tfm}-file can
be obtained, can be used, out of the box, with \LaTeX.

But for these thousand text fonts there are only five font families for
use in math formulas
to go with them. Even worse, every of these math font sets are encoded
in a different way making it nearly impossible even for an expert \TeX{}
user to use different fonts for math in different jobs.

The work undertaken by Justin is the first of several steps to solve
the problems at hand, the final goal being the development of a system
that allows the user
to change math fonts as painlessly as it is now possible with text
fonts.

Based on Justin's analysis and his proposal, the \LaTeX3 Project is now
undertaking to provide a prototype implementation for math fonts,
starting with the Computer Modern fonts as well as the Euler Math fonts.
We expect this implementation to be available for public usage
during 1995.


\begin{flushright}
Mainz, December 6, 1994\\[5pt]
Frank Mittelbach\\
Technical Director \LaTeX3 Project
\end{flushright}


\begin{abstract}

\vskip1cm
  I wish to thank the many people without whom my stay in
  Germany would not have been possible, and the work I did would not
  have been done. This includes:

  \begin{description}
  \item[GUTenberg] who financed my stay;

  \item[Ehoud Ahronovitz] for helping me with the administrative side
    of things, for giving me the opportunity of coming here, and
    spending extra time with me to make sure that everything went
    all right;

  \item[Frank Mittelbach] for his friendly welcome, for the
    organization, time and guidance;

        \item[Bernard Gaulle] the past president of GUTenberg, for the
                organization and logistics;

  \item[Klaus Merle] for lending all the material that I used;

  \item[Chris Rowley] for the organization, and help;

  \item[Stefan Steffens] for answering patiently all my stupid
    questions, and helping me integrate Mainz and the university;

  \item[The \LaTeX3 project] which partially financed my stay in England
    for the Aston conference;

  \begin{sloppypar}
  \item[Barbara Beeton and Alan Jeffrey] who commented my papers, and
    answered more stupid questions;
  \end{sloppypar}

  \item[J\"org Knappen] who gave me advice on the project, and
    with whom I discovered the Mainz night life;

  \item[All the computer center employees] for making my stay more
    pleasant;

  \begin{sloppypar}
  \item[All the people who took the trouble to answer my mail,] for
  their time  and effort;
  \end{sloppypar}

  \item[Donald E. Knuth] who created \TeX.

  \end{description}

\end{abstract}


\setcounter{tocdepth}{1}
\tableofcontents


%                                This is the introduction


\chapter{Introduction / Overview}

\section{The technical environment}
I worked in the ZDV of the university of Mainz in Germany.  In German
ZDV stands for ``Zentrum f\"ur Daten Verarbeitung''. Which means: Data
Processing Center.
This is where the main --- soft and hardware --- maintenance team
works.

I worked on an X-terminal like a lot of other people in the university.
For writing my documents I used GNU Emacs together with the
AUC\TeX\ package.


\section{A few Definitions}%    ***************** first section
        %jwzinria: one could get rid of this subsection ?
  \subsection{\TeX: a page description language}
  The best definition I can find for \TeX\ is: ``one third compiler,
  one third interpreter, and one third word processor''. It was
  written in $\fam 1 1975$ by D.~E.~Knuth and a group of students.
  One of its main features is its portability. A document written
  on one machine can be used on another machine.  Knuth also
  insisted that \TeX\ would not change. So a document written in $\fam
  1 1980$
  is still usable in $\fam 1 1990$.

  The language defined by \TeX\ is very specific, in so far as it is
  designed to describe a page layout. \TeX\ processes the page like a
  rectangle, or more exactly like a box, that can be filled with
  smaller boxes. These smaller boxes can similarly be filled with
  smaller boxes, and etc... The smallest box one can get is a
  vertical~/~horizontal line, or a character (a glyph), or just some
  space. \TeX\ has variables in which one can put boxes, or different
  types of numbers. One can define functions ---~usually called
  macros~--- in a way similar to lisp. The if-then-else statement is
  there, and combined with recursion it can be used to make loops.

  In spite of its limitations due to its specificity, \TeX{} defines
  a Turing machine. The syntax is very disagreeable, but one can get
  used to it: somebody wrote a basic interpreter in \TeX. The only
  difference between \TeX{} and a usual compiler, is that \TeX{} stops
  the compilation when it gets to the pcode, and just puts it into a
  file.  This file, called the device independant file, can then be
  sent to a printer, a screen, or any other printing device.

  Today many people use \TeX. All \TeX\ users have got together and
  created TUG: \TeX\ Users Group.

  \subsection{Plain \TeX} \label{plain-tex}
        %jwzinria: one could get rid of this subsection ?
  Plain \TeX\ is the standard set of macros and definitions that comes
  with \TeX. It is written in \TeX.

  \subsection{\LaTeX{}: a document description language}
        %jwzinria: one could get rid of this subsection ?
  Just as \TeX\ is a language to describe pages, \LaTeX\ is a
  language designed for describing whole documents, and their logical
  structure. The idea is that it lets the user concentrate on the
  contents of the document rather than the formating commands
  necessary for the document to look good. Thus it uses the
  logical mark-up concept. It was written by Leslie Lamport in 1985.
  Technicaly, \LaTeX\ is ``only'' a cluster of macros written in \TeX.
  This means that a \LaTeX\ user has still got access to most of the
  \TeX\ language.  \LaTeX\ includes the following facilities:
  \begin{itemize}
       \item Cross referencing.
       \item Automatic construction of a table of contents.
       \item Automatic construction of an index.
       \item Bibliography referencing.
       \item Basically the same math mode as \TeX.
  \end{itemize}


  \subsection{The \LaTeX3 project}
        %jwzinria: one could get rid of this subsection ?
  During the 1989 TUG conference at Stanford, the decision was taken
  to produce an improved and expanded version of \LaTeX, that was
  going to be called \LaTeX3.

  The major difference in the new version will be the addition of a
  good interface through which designers can specify how classes of
  documents should be formated.

  Frank Mittelbach is the technical director of the project; he and
  Chris Rowley are responsible for the management.


  \subsection{Fonts, glyphs, and slots}
  \TeX\ would not be able to produce any nice documents if it did not
  have any fonts. One cannot get a nice looking `A' or `$\mathcal{A}$',
  or any other letter if nobody has previously designed it.

  All \TeX\ really does, is produce a file that contains a set of
  instructions. Each instruction looks like the following: ``place
  here the picture that is in such and such a file, in position number
  $x$.'' \textem{The files that contain all the pictures (the letters
    and other symbols), are called ``fonts''. All the pictures that are
    in a font are called ``glyphs''.} Every glyph in a given font has
  a specific and known position. \textem{I shall use the word ``slot''
    to refer to a given position in a font.} Some slots can be empty,
  but most of them contain a glyph.


  \subsection{Font encodings} \label {about-encoding}
  When \TeX\ refers to the glyph number $x$, it must know which glyph
  is in position number $x$. This knowledge is contained in the
  encoding.  In some cases one could say that the letters are in the
  ASCII order. But this is not sufficient, because the
  ASCII code does not include all the glyphs that people wish
  to put in their documents. Therefore, one must link every single
  font with a given encoding, and make the encoding known by \TeX.
  Many different encodings exist, sometimes even for the same group
  of glyphs. But there are also many fonts that use the same encoding.

  A mathematical definition of an encoding could be the following:
  \textem{An encoding is a set of glyph names in a given order.}

  \subsection{The ``Computer Modern Fonts''}
  When D.~E.~Knuth created \TeX, he also created a set of fonts called
  the
  \textem{Computer Modern Fonts}. Most of them were based on an encoding
  that is called the Computer Modern Encoding throughout this document.

        All file names of Computer Modern Fonts start with the two
        letters `\texttt{cm}'.


  \subsection{Metafont: a font description language}
  Metafont is a language / program especially designed to describe glyph
  shapes, and more generally whole fonts.  It was used to generate all
  the  ``Computer modern'' fonts. The Metafont user must describe or
  ``program'' the curves for each glyph. Then Metafont produces an array
  of black and white dots for each glyph. The dots can be made as small
  as necessary to fit the precision of the printing device.


  \subsection{\TeX\ version 3}
  In the beginning of $\fam 1 1990$, under a lot of pressure (from the
  \TeX\ User Community), D.~E.~Knuth produced a new version of \TeX.
  \TeX\ version 3 was born. The main improvements were the following:

  \begin{itemize}
  \item Up to 256 glyphs per font. The previous versions of \TeX\ could
    only use the first 128 glyphs of a font.


  \item Virtual fonts. A normal font has all its
    glyphs in a file, and this file is in actual fact the font.
    Virtual fonts enable people to group 256 glyphs taken from many
    different fonts, and make \TeX\ think it is using one normal font.
    For instance, one could make a virtual font with lowercase letters
    in bold, and uppercase letters in italic. The user would work as
    if he was using one font, but the results would in actual fact be
    a combination of two fonts. A very good example implementation of
    virtual fonts is the creation of ``Small Caps'' fonts: the
    uppercase letters could come from a roman upright font at 12
    points, whereas the lowercase ones could come from a roman
    upright at 10 points.

    Virtual fonts enable still more ingenious things, like replacing
    glyphs with a set of \TeX\ macros.  One can then
    consider, for example, automatic raising or lowering of some
    letters.

  \item Better hyphenation. \TeX\ version 3 can have up to 256
    different hyphenation tables, and can produce good automatic
    hyphenation even when a word contains accents. The latter was not
    possible in previous versions. More generally the hyphenation
    mecanisms have been improved.

  \item The new ligature mecanism is more powerful. The result of a
    ligature is no longer only one glyph, but can be a set of
    glyphs...

  \item Special ligatures can be done at the beginning and at the end
    of words. Thus when a given letter is at the end of a word, its
    shape can be different from the shape it would have in the middle
    of a word.

  \item Better automatic adjusting of interword space.

  \item More little details that make everybody happy...
  \end{itemize}


  \subsection{The ``DC Fonts''}
  Although D.~E.~Knuth included a lot of ``European glyphs'' in his
  Computer
  Modern fonts, more were needed. In $\fam 1 1989$ \TeX\ users got
  together in Cork, and designed some new fonts called the \textem{DC
  Fonts}.
  Thanks to the new features  of \TeX\ version 3 (256 glyphs per font
        encoding),
  DC fonts included for example more special letters for Catalan and
  Scandinavian languages.

        The DC Fonts used what is now called the \textem{Cork encoding.}
        All DC  fonts file names start with `\texttt{dc}'.


\section{My work}
%\section{My work}
One of \TeX's nicest features is its ability to typeset mathematical
formulae. There has now been over ten years of experience typesetting
mathematical material with \TeX.  During this time, \TeX's math mode
has been used to set a wide variety of material, including traditional
mathematics, categorical diagrams, chemical reactions, computer
programs and textual material such as `$5\frac12\%$ or $\fam0M^{lle}$.

In recent years, with the arrival of the Cork standard for typesetting
European text, and the Virtual Font standard, the fonts available for
use in \TeX\ have radically changed. The current situation is that
there are over 14,000 text fonts available for use in \TeX, but only
five math fonts:
\begin{itemize}
\item Computer Modern
\item Computer Concrete with Euler
\item Lucida Math
\item Lucida New Math
\item Math Time
\end{itemize}
Each of these fonts use different encodings, and each comes with its own
selection of \TeX\ macros.

Although the Cork encoding is rapidly being established as the
standard encoding for European Latin text, there is no similar
encoding for mathematics.  The result is:
\begin{itemize}
\item complex macro packages for using each math font.
\item it is difficult to set mathematics with Cork text, since the Cork
   encoding does not include the uppercase Greek.
\item installing PostScript math fonts such as Mathematical Pi is very
   difficult.
\end{itemize}

Furthermore, the present math encoding includes glyphs like old-style
digits, and game card suits $(\spadesuit)$ that just do not belong in
a math encoding. On the other hand, many new glyphs have been
designed and should be included in the math encoding.

To solve these problems, a new math encoding, using all the power of
\TeX\ version 3, is needed. For this reason I have been trying to
re-organize all the glyphs that are needed to typeset mathematical
formulae with \TeX, according to various technical constraints.

The new math encoding that I am helping to produce is hopefully going
to be part of the \LaTeX3 package, and comes as one of the general
improvements of \LaTeX.

First I learnt to use \TeX. In a second stage, I had to study and
understand the technical constraints that apply on the grouping of
mathematical glyphs in a font. Only then could I actually start
thinking about which glyphs should go where. I intensively used \LaTeX\
---~so that I permanently had an up to date record of what had been
done ---~and email, to communicate with the people I
was working with.


\chapter{The \TeX nicalities of math typesetting}


\section{A brief description of \TeX's math facility}
  \begin{description}
  \item[Logical markup like \LaTeX.] For the design of \TeX's user
    interface, one of Knuth's concerns was that in the source code of
    a mathematical document the formulae should be readable in a
    linear manner. Thus when a mathematician thinks, he says to
    himself: ``$n$ over $n-1$'', and when a \TeX nician works, in order
    to produce the result $\frac{n}{n-1}$ he just has to type:
    \verb|n \over {n-1}|.

    The user is no longer bothered by trying to get this bit of text
    higher than this other bit of text.  He just gives \TeX\ the
    logical meaning of what should be typeset, and it is correctly
    placed.

  \item[The two math modes.] There are two ways to enter \TeX's math
    mode, which produce slightly different results with the same
    input. One mode is called the \textem{display mode,} and produces
    \textem{display style,} while the other is called \textem{text
      mode,} and produces \textem{text style.} The following input:\\
    \verb|    $ \int_0^1 \frac{1}{x}\;dx $| \\ produces
    \textem{text style:} $\int_0^1 \frac{1}{x}\;dx$, which can be
    mixed with text, whereas \\
    \verb|    $$ \int_0^1 \frac {1}{x} \;dx $$| \\ produces \textem
    {display style:}
    $$ \int_0^1 \frac {1}{x} \;dx $$ which is automatically
    centered and surrounded by space.

  \item[Automatic size change according to meaning.] When the user
    says to \TeX: ``this letter is a superscript'', or ``this number is
    a subscript'', \TeX\ automatically typesets the letter (or the
    number) in a smaller font size. \TeX\ does that same size adjustment
    for setting limits on glyphs like $\sum$, or $\smallint$.

  \item[Automatic placing for sub/superscript and for limits.] At the
    same time as \TeX\ changes size automatically when the user
    specifies a sub- or superscript, \TeX\ also raises and lowers the
    resulting text. When placing limits over a $\sum$, for example,
    \TeX\ automatically centers them over the sum:
    $$\sum_{i=0}^{i=n} i = \frac {n(n+1)} {2}$$

  \item[Size change for big operators.] One can see in the previous
    example that the two $\sum$ signs (one in the text and one in the
    example), are not set in the same size. \TeX\ changes the size of
    some big operators when they are set in a centered environment
    like that example is. The integral also changes size.

  \item[Automatic spacing and math classes.] As one can see in the
    previous example, \TeX\ also spaces various glyphs in a special
    way. For instance the space around the $+$ sign is quite large,
    whereas the space between the $n$ and the open parentheses is
    comparatively reduced. Turning off the automatic mathematical
    spacing for the $+$ sign would produce the following: $(n \mathord
    + 1)$ versus $(n+1)$.

    On a \TeX nical point of view, the math spacing is done by
    dividing all mathematical glyphs into classes. For each class
    \TeX\ has different spacing rules. Thus a class 1 glyph followed
    by a class two glyph would not induce the same spacing as a class
    1 followed by a class 3. There is no point in giving all the spacing
    rules here. The different classes are listed below\footnote{Thanks
      to Victor Eijkout for the comments.}:
    \begin{enumerate}
    \item \textem{Ordinary:} lowercase Greek characters, and those
      symbols that are just `symbols';
    \item \textem{Large operators:} integral and sum signs, and `big'
      objects such as \cn{bigcap}, or \cn{bigotimes}. Large operators
      are centered vertically, and they may behave differently in text
      style, and in display style\footnote{See below for
        explanations.}.
    \item \textem{Binary operators:} plus, minus, and look
      alikes;
    \item \textem{Binary relations:} equal, less than, subset, and
      friends;
    \item \textem{Opening symbol:} opening brace, bracket,
      parentheses, etc$\ldots$
    \item \textem{Closing symbol:} closing brace, etc$\ldots$
    \item \textem{Punctuation:} most punctuation marks, with an
      exception or two;
    \item \textem{Variable family:} described further on in section
      \ref {var-fam}.
    \end{enumerate}

  \item[More symbols/glyphs.] Last but not least, \TeX's math facility
    gives the user easy access to special symbols: Greek letters,
    $\aleph$\footnote{$\aleph$ is a Hebrew letter, not a Greek one.}
    $\cap$, $\subset$, and many others that are often used in
    mathematical formulae.

  \end{description}


\section{Math styles}
  When Knuth wrote ``The \TeX book'', he extended the `display', and
  `text style' terminology. If \TeX\ is typesetting sub- or superscript
  material, one says that it is in \textem{script style.} Furthermore,
  if \TeX\ is typesetting sub- or superscript when it is already in
  script style, one says that it is in \textem {scriptscript style.} The
  style terminology must not be confused with the size terminology that
  is described further on: text size, script size, and scriptscript
  size.


\section{Font families}
  \subsection{What are font families? / a definition}
  In math mode, \TeX\ does not load fonts in the same way as it does
  in text mode. For maths, Knuth thought best to organize the fonts in
  families, and give each family a number. One font family can contain
  three fonts.

  The normal use is to load in a single family the same font in
  three different sizes. One size for the main text, one size for
  superscript and subscript, and one size for the exceptional
  super-superscript, or super-subscript. A good example ought to make
  things clear: $$\int_0^\infty\;e^{\alpha.x^\alpha}\;dx = \;\;?$$ It is
  clear that the $x$ is smaller than the $e$, and that the second
  $\alpha$ is smaller than the first, which is the same size as the $x$.

  \subsection{The organization of mathematical glyphs}
  In the present version of \TeX\ the mathematically used glyphs are
  organized in 4 families:
  \begin{description}
  \item[Family 0: Computer Modern Roman (\texttt {cmr})] This is a
    normal upright roman text font. It is loaded in a math family
    in order to typeset things like $\log$ or $\sin$. The other reason
    for which it is loaded into a math font family is that it
    contains the uppercase Greek alphabet, so that the user can
    typeset $\Psi$ and $\Gamma$, or even $\Upsilon$. A few other
    symbols are also taken from \texttt{cmr}: `;' `=' `( )' `[ ]' `:'
    `+' $\ldots$ See figure in appendix \ref
    {app-fonts}.

  \item[Family 1: Computer Modern Math Italic (\texttt {cmmi})] The
    \texttt {cmmi} font is one of the special math fonts. For a
    non-expert user, its letters look just like normal \textit {italic}
    letters. But in actual fact they are slightly different in their
    shapes, especially the lowercase. The reason for the letters being
    different is so that the variable $a$ can be easily differenciated
    from the article `a' used in ``a horse'' for example.

    Whereas \texttt {cmit}\footnote {The normal italic Computer
      Modern font.} contains ligatures, \texttt{cmmi} does not, and
    includes instead the Greek lowercase and uppercase alphabets in
    italic.

    A strange feature of \fn{cmmi} is that it contains some
    old style digits. Thus one can write $\fam1 1789$ or $\fam1 1942$
    which are quite different from 1789 and 1942. But these digits are
    never used in maths, so they do not belong in a font that is
    designed for use in maths.

    The \fn{cmmi} font also includes some other useful\footnote
    {Only for scientists though.} symbols / glyphs that one can see on
    the corresponding figure in appendix \ref {app-fonts}.

  \item[Family 2: Computer Modern Symbols (\texttt {cmsy})] One can
    find in this font the calligraphic alphabet that some scientists
    use: $\cal{A B C D E F G H}$ $\cal{ I J K L M N O}$ $\cal{ P Q R S
      T U V X Y Z}$; plus lots of other symbols that only
    mathematicians could want to use: $\cap \cup \ominus \otimes
    \bigtriangleup \exists \; \forall \subset \le \succ \leftarrow
    \ldots$ See figure in appendix \ref
    {app-fonts}.

  \item[Family 3: Computer Modern Extensibles (\texttt {cmex})] All
    three sizes in this family are the same.  \texttt {cmex} mainly
    contains symbols that change size, automatically.
    One can produce:
    \[ \left\{
    \begin{array}{ll}
    u(x,y,z,t) & = u_{0}(x,y,t) + U(x,y,z,t) \\
    v(x,y,z,t) & = v_{0}(x,y,t) + V(x,y,z,t) \\
    w(x,y,z,t) & = w_{0}(x,y,t) + W(x,y,z,t) \\
    w'(x,y,z,t) & = w'_{0}(x,y,t) + W'(x,y,z,t)
    \end{array}
    \right. \]\label {extens-example}%
    with four or ten lines, and the `\{' will get bigger and bigger of
    its own accord, without the user specifying anything more. \texttt
    {cmex} also contains wide accents, so one can produce: $\widehat{a}$
    $\widehat{ar}$ $\widehat{arg}$. I have previously spoken about the
    automatic size change of some operators, whether in text, or in
    display, style. These double sized `big operators' are in \texttt
    {cmex}:
    $\bigcap\ and\ \coprod$ in text style, and in display style:
    $$\bigcup \  and\ \coprod\ and \ldots$$
    The total contents of \fn {cmex} is
    shown in a  figure appendix \ref {app-fonts}.

    Most of the glyhs in \texttt{cmex} have a stange metric
    particularity, that makes them \TeX\ specific. Thus no other
    typesetting system can use those glyphs. Vice versa \TeX\ could not
    use those glyphs if they were made for another typesetting system.
    I spent a certain amount of time trying to understand all the
    triks hidden in \texttt {cmex}, and wrote a document on the topic
    (see appendix \ref {app-rep-cmex}). The math font group was then
    able to take decisions concerning the replacement of \fn
    {cmex}.

  \item[The AMS symbol fonts: \texttt {msam} and \texttt {msbm}.] Many
    more mathematical glyphs, and an extra blackboard bold alphabet.
    They are not part of the standard \TeX, and are not loaded
    automatically in a family, but they are used on many sites. They
    were designed for the AMS: American Math Society, for use with
    \TeX, and are now very widely spread. Their contents is shown in
    figures, appendix \ref {app-fonts}.

  \end{description}


  \subsection {How does \TeX\ identify glyphs? }
    \begin{description}

    \item[Glyph names.] In Plain (see section \ref {plain-tex}) many
      glyph names are defined. They refer to some of the numerous
      glyphs \TeX\ can typeset.

      The user can also define his own names for glyphs. To a
      glyph name must be associated a family number, and a position
      in the given family. On top of that \TeX\ likes to know which
      class the glyph belongs to. As well as the classes that have
      already been defined, there is an extra one:

    \item[The \textem {`variable family'} class and the \cn{fam}
      variable.] \label{var-fam} This class has nothing to do with
      spacing, and, to my mind, treating it as a class is one of
      Knuth's mistakes. It is used in particular for letters, but it
      could have other uses. If the calligraphic, upright, and italic
      letters all have the same position in their respective fonts,
      one does not want to define a different name for each letter in
      each shape. Instead, \TeX\ has a \cn{fam} variable, that
      contains the number of the current family where glyphs should be
      taken from. So when a glyph is of class \textem {`variable
        family'}, it is taken from the family number \cn{fam}. But
      that is not enough. Some times the \cn{fam} variable can be
      equal to $-1$, and there is no family number $-1$.  In such a
      case a default family number is used. So together with the class
      and the position, one can assign the default family number for
      each glyph name. When a glyph is not defined as being variable
      family, it always comes from the same family, and its family
      number is linked to its name in the same way as the class
      number.

      Example: when the user enters math mode, \cn{fam} is equal to
      $-1$, the letters come from the default family. By typing:
      \verb|$abda$| which produces `$abda$', one can see that the
      default family for letters is family number 1 (See family
      descriptions). If the user assigns the family variable to $0$
      then the letters will come from family $0$. Thus
      \verb|$\fam0 abda$| produces `$\fam0 abda$'. (See family
      descriptions).
    \end{description}


\section{Font metric files: The ``\texttt{.tfm}'' files}
  \subsection{A theoretical overview}
  When \TeX\ is typesetting a page, and making all the calculations
  that are necessary for this, it does not need the actual picture of
  the glyphs. All \TeX\ needs at this stage is the dimensions of the
  glyphs, and other numerical data. That information is in the
  ``\texttt{.tfm}''\footnote{\texttt{tfm} stands for ``\TeX\ font metric
    file.''} files, and every font has one.  Without it, the font is
  unusable as far as \TeX\footnote{There are some slight exceptions to
    this rule: in some cases a given font can use another font's
    ``\texttt{.tfm}'' file. But the visual results are not very good.}
    is  concerned.

  For mathematical typesetting \TeX\ uses all the information that a
  ``\texttt {.tfm}'' file can give. One of the first things I had to do
  was to study and understand the machinery hidden in the math fonts
  ``\texttt {.tfm}'' files. From a general point of view a font metrics
  file can contain the following data: \footnote {This is not
    restricted to \TeX.  Although the file formats maybe slightly
    different, Postscript type fonts and others use similar
    metric files.  One can find programs to convert the files
    from one format to another.}

  \begin{description}
  \item [Font dimensions.] These are global parameters
    for the whole font. In a normal text font one would find the
    slant (positive on an italic or slanted font), the size of the
    interword space, other interword spacing parameters, more general
    spacing parameters, and the x-height. The latter is the height of
    the `x' glyph, and is used for correct accent positioning.

    The fonts in family 2 and 3 are a little special as far as font
    dimensions are concerned. \TeX\ looks in family 2 and 3 for more
    font dimensions than usual. This extra information is used for
    special math spacing.

  \item [Glyph dimensions.] Each glyph has a height, a width and a
    depth specified in the ``\texttt{.tfm}'' file. The height of the
    box that surrounds a glyph is equal to the height of the glyph
    plus the depth of the glyph, whereas the width is that of the
    glyph. I think it is important to say that \textem{these values
      are theoretical, and can be quite different from the real size
      of the glyph.} Thus some glyphs are bigger than their box. A
    good example of this is the italic `f': \textit{f}. The top right
    end, and the bottom left end stick out of the box. The right hand
    side of a given glyph box is also the left hand side of the
    next\footnote{The box on the right of the first one.} glyph box.

  \item [Kerns.] \label {about-kerning} They are necessary for the
    \textem{automatic adjustment } of the spacing between two glyphs.
    Many non professional electronic typesetting systems have for a
    long time ignored this refinement of traditional typesetting. The
    problem is the following: for visual comfort all the letters of
    the alphabet cannot be spaced in the same manner. For instance
    when an `A' is followed by a `V', the two letters must be brought
    closer together to produce `AV' versus `A{V}'. In other cases
    letters must be separated a little to produce `aj' versus `a{j}',
    or `f!' versus `f{!}'. Otherwise the spacing does not look correct
    compared to the spacing of surrounding letters.  In the
    ``\texttt{.tfm}'' file, for each glyph one can specify kerns with
    every other glyph \textem{of the font}. \textem{When two glyphs
      that are kerned in the ``\texttt{.tfm}'' file are found side by
      side in the right order, \TeX\ automatically brings them closer
      together, or farther away.}

  \item [Ligatures.] \label {about-ligs} Here again, the idea is to
    improve visual comfort, and reading. Some letters when followed by
    other particular letters do not look right. In this case the two
    glyphs side by side must be replaced by another glyph that will
    look much better. This is called a ligature. The best and very
    well known example occurs when an `f' glyph is followed by an `i'
    glyph.  The non-ligatured glyphs look like `f{i}', and
    \textem{\TeX\ automatically replaces} them with the ligature that
    looks like `fi'. In the ``Collection La Pleiade'', one can see many
    other ligatures if one looks hard enough.

    In \TeX\ version 3 the concept of ligatures is more general. It
    can use more than two letters, and has other interesting new
    features.

  \item [Italic corrections.] For this I can only quote Frank
    Mittelbach:
    \begin{quote}
      ``At the points where one switches from slanted or italic to
      upright, the glyphs usually come too close together, especially
      if the last slanted/italic glyph has an ascender\footnote{Here
        is something that has not been defined. The following letters
        have ascenders: l,k,h,f,t,b,d, in lowercase. One can guess
        what descenders are.}. The proper amount of extra
      white space that should be added at this boundary is called the
      `italic correction'. Its value depends on individual glyph
      shape, and is therefore stored in the ``\texttt{.tfm}'' file for
      each glyph. [...]  For an upright font the italic corrections
      are usually null. [...]  In slanted and italic fonts, the italic
      corrections are usually positif...''
    \end{quote}
    Example: in the word {\it dif}ferent, the first f runs into the
    second one. Whereas in the word \textem{dif}ferent, a little space
    is left between the two f's. That space is the f's italic
    correction.

  \item [`Skewchar' kerning.]  The skewchar is a specific character
    that is used for placing mathematical accents. In math mode, when
    an accent is placed on a glyph, the accent is first centered on
    top of the glyph's box, and then shifted rightwards by the amount
    of the kern between the glyph and the skewchar.

    Each font should have its own skewchar. For most characters, the
    ``{\tt .tfm}'' file specifies the kerning of each letter with its
    skewchar. This is true for the computer modern fonts, but other
    font designers may have chosen not to use this feature.

    Why choose one skewchar rather than another? This is because the
    character $\mathchar"017F$ chosen by Knuth does not have any other
    kerning that could have been disturbed by the skewchar kerning.
    This choice may not always be good for all fonts, because it
    depends on what the character in position '127 is.  Thus a font
    designer might choose another skewchar and put the necessary
    kernings in the ``{\tt .tfm}'' file. Accent glyphs can be used as
    skewchars, because they are not usually subject to kerning from
    other glyphs.''


  \begin{sloppypar}
  \item[Charlists.]  \label {about-charlists} Charlists enable several
    characters in a font to be linked together.  The \texttt {cmex}
    font uses charlists a lot: by just typing \texttt{charlist oct
      "000": oct "020": oct "022": oct "040": oct "060"} in the
    metafont source code, one links in order of increasing size all
    the left parenthesis that are in the font. Thus with this
    information contained in the ``\texttt {.tfm}'' file, \TeX\ can
    find the parentheses that has the correct size for what is
    currently being typeset.

    Charlists are used for:
    \begin{itemize}
      \item Linking variable-size delimiters,
      \item Linking variable-width accents,
      \item Pairing the ``big operators'' that are typeset in
        different sizes in display style, and text style.
    \end{itemize}
  \end{sloppypar}

  \item[Extensibles.] \label {about-extens} Extensible glyphs can
    change size vertically (not horizontally), according to the
    context. A good example is given in section \ref {extens-example}
    where the \{ grows automatically.

    \begin{sloppypar}
    An extensible glyph is identified with one of its pieces. One simply
    has to decide which piece is going to be used for this
    identification.  In the following example: \texttt {extensible
      oct"060": oct"060", 0, oct"100", oct"102";} --- which appears in
    the metafont code of \texttt {cmex}, the first oct"060" is the
    identifier of the whole extensible glyph.  The next three
    characters are the top, middle, and bottom pieces of the glyph
    whose identifier is oct"060". The last character code is that
    of the piece to be repeated as many times as necessary between the
    top and middle, and between the bottom and middle pieces. All
    pieces are optional except the repeatable piece.
    \end{sloppypar}

    This mecanism is also used for the construction of the radical
    sign. But it only works for glyphs that grow vertically.
    Therefore the horizontal braces and the horizontal extendable
    arrows cannot use this facility.

\end{description}


  \subsection{Example: analysis of `\texttt{cmmi}' metrics}
  %\footnotetext{\texttt{cmmi} stands for Computer Modern Math italic,
  %  and is the default font for typesetting mathematics.}
  I shall use here the usual \TeX\ notation for writing octal numbers.
  Thus all numbers preceeded by a little quote sign like '77 are in
  octal.
  \begin{itemize}
  \item Most characters in `\texttt{cmmi}' are kerned with the
    skewchar.

  \item Many Greek uppercase and lowercase letters are kerned to: `.'
    `,' and `/' respectivly '72, '73, and '75. This takes us right up
    to position '50.

  \item Characters from '50 to '73 are not kerned at all. This
    includes: funny horizontal half arrows, two hooks for the arrow
    construction set, two triangles, the old style digits, the `.', the
    `,' and the `$<$'.

  \item The `$/$' sign is kerned with 1\footnote {The digit.}, A, M,
    N, Y, Z. Nothing to say about `$<$' and $*$ and $\partial$.

  \item Then come the uppercase Latin letters. They are not kerned
    among each other.  They are not kerned either with the lowercase
    letters. Just like the Greek letters, some of them are kerned with
    `.' `,' `$/$'.

  \item In my \fn{.pl} file, it looks as though N and X have got two
    different kerns with '75.  (Not yet any explanation for this.) The
    3 musical signs are not kerned with anything. The horizontal
    parentheses are not either.

  \item The lowercase Latin letters are not kerned with each other,
    except `$d$' that is kerned with $Y, Z, j, f$. Some of them are
    kerned with `,' `.'  `$/$' in a way similar to that of uppercase
    letters.

  \item The last characters are not kerned at all.
  \end{itemize}

  For compatibility reasons, all these kerns will have to be in the
  new encoding.

  See appendix \ref {app-rep-cmex} for a complete description and
  analyses of \fn{cmex10.tfm}.


%  \subsection{Large operators}
%  \subsection{Delimiters}


\chapter{Dividing all the glyphs into groups}

\section{More vocabulary}\label{MoreVocab}
  \begin{description}
  \item[An ``encoding table''.] This conveys the traditional meaning
    of an encoding (see section \ref {about-encoding}). That is to say
    a set of 256 glyphs in a given order.  The expression ``encoding
    table'' is usually abbreviated: ``encoding''.

  \item[A ``slot'':] the usual word used for referring to a
    position in an encoding. A slot can contain a glyph, or be empty.
    It is represented by an integer between 0 and 255. A slot is
    \textem{not} a family in spite of the usage some people make of this
    word.

  \item[The ``math kernel''.] This terminology is used to specify the
    minimal group of fonts that is necessary for the math facility to
    work, as described in the \TeX\ documentation\footnote{And
      LA\TeX, AMS\TeX, etc, documentation.}.  In D.~E.~K.'s
    package (Plain) the math kernel consists of the families
    numbered from 0 to 3. Together with the kernel, many other fonts can
    optionally be loaded and used.

  \item[A ``math encoding'':] considered here as a
    whole. Not just one 256-glyph encoding table, but a set of $x$
    encoding tables, where $x$ is greater or equal to the number of
    fonts in the math kernel.  I will sometimes refer to this concept
    with the abbreviation ``M-encoding''.

  \item[The ``default alphabet'':] the alphabet that is used
    when a user types \texttt{\$abc\$}. With Plain \TeX's math
    encoding that produces $abc$.

  \item[``Glyph compatibility'':] two encodings (or M-encodings) are
    glyph compatible, if they contain the same glyphs. The latter do
    not systematically have to be in the same positions. However
    identical glyphs must have the same
    metrics. The kerning and ligaturing information must
    also be identical in both M-encodings.


\end{description}

\section{General approach}
Taking all the glyphs one by one, and putting them in a font encoding
would have been to easy, and above all not satisfactory.  Instead one
must divide all the necessary glyphs into groups and subgroups, and then
tried to match groups in individual encoding tables according to all
the constraints.

For instance, a typical group is the Latin alphabet: it includes the
uppercase letters A-Z, and the lowercase letters a-z.
Mathematicians often use accents on letters. For this they
need a dotless `i' (looks like: `$\i$') and a dotless `j'
(looks like: `$\j$') with every Latin alphabet. Thus the Latin
alphabet group contains the uppercase and lowercase letters, the
dotless `i' and the dotless `j'.

The grouping is based on different types of constraints: some
technical, and some based on glyph usage. These are detailed in the next
section.

\section{Grouping constraints}
Before being able to group the glyphs, grouping rules had to be
established. \footnote {I've put in appendix
\ref {app-accents}, \ref {app-rep-cmex}, and \ref {app-delims}, three
of the documents that I wrote for this purpose.}

At first I did not realise the importance of the design similarity
constraint for the person designing the font. Thus the first two
proposals did not really take it into account at all.

  \begin{description}
    \item [Kerning.] See section \ref {about-kerning} for a definition
    of  kerning. The kerning information for a given font can be found
        in it's \texttt{tfm} file.  Thus two glyphs from the same
        encoding table can be kerned together, but the letter `f'
        belonging in a given encoding table, cannot be kerned with a
        glyph (the open parenthesis for instance) belonging in another
        encoding table. A group resulting from this constraint is: the
        group of glyphs that must be kerned with the default alphabet.
        This group and the alphabet will have to live in the same
        encoding table. This constraint is considered to be one of the
        most important.

        In fact this type of grouping is not so much grouping together
        all the glyphs that must be kerned, but putting together in one
        group the glyphs that need to be kerned with another group. In
        order to facilitate the counting.

  \item[Ligaturing.] See section \ref {about-ligs} for a definition of
        ligaturing.  In a similar manner to kerning, ligatures request
         that various glyphs live in the same encoding. If the letter
        `f' is to be ligatured with the letter `i' and produce the `fi'
        ligature, then those three glyphs `f',`i', and `fi' must live
        in the same encoding. In actual fact ligatures are not really
        used in math fonts. But they may be necessary one day. So
        empty slots should be left for ligatures where possible.

  \item[Design similarity:] another reason for which the letter A must
  live in the same font encoding as the letter B, and all the other
  letters. All the glyphs in a normal text encoding are designed to be
  visually compatible with each other. This should also be the case in a
  math encoding.  But all the compatible glyphs cannot live in the same
  font. There are simply too many of them. So one has to make a
  choice. Which glyphs must be alike? A lot of groups result from this
  constraint, which even comes into play when putting the groups
  together into encodings. A good example is the sim group.  `Sim' is
  the name given to the glyph: $\sim$. Many mathematical symbols contain
  such a sim. $\approx$ cannot be separated from $\sim$ because they
  must look alike, and for that they must be designed by the same
  person.  Even more, the $\sim$ and the $\approx$ should be produced in
  metafont using the same sub-routine, with the same parameters. This
  also explains why it is important that the letters of a given style
  all live together.


  \item[Charlists.] The reader is advised to re-read section \ref
    {about-charlists} if he no longer remembers what charlists are. The
    information that such and such a glyph is part of a charlist is in
    the \texttt{tfm} file.  Therefore charlists are also restricted to
    one font. Because of this all glyphs that are intended to be
    linked in a charlist must be put in the same font. Concerned
    by this restriction are:
    \begin{itemize}
    \item Wide accents, which are linked with a charlist in order of
      increasing size,
    \item Big delimiters: same as accents,
    \item The two sizes of big operators which are linked,
    \item All the different sized radicals.
    \end{itemize}

  \item[Extensibles.] In case of memory deficiency the reader is
    advised to take another look at the relevant passage in section
    \ref {about-extens} again. As for charlists, the extensible
    information is part of the \texttt {tfm} file. The different
    pieces of an extensible glyph must therefore live in the same
    font. Concerned by this restriction are:

    \begin{itemize}
      \item Extensible delimiters (not all delimiters are extensible).
        This constraint is doubled by the fact that an extensible
        delimiter is often the last element of a charlist. Thus many
        glyphs must live together.

      \item Radicals: the last element of the radical charlist is an
                   extensible: it grows as high as necessary. In
                   the same way as delimiters, the glyphs used to
                   build the extensible radical are a subgroup of
                   the radical charlist group, and therefore must
                   live with the other members of the charlist.

      \item Vertical arrows or bars\footnote {But not horizontal
          arrows.}.
    \end{itemize}

  \item[Constructed symbols.] Some glyphs in a font are especially
    designed to be put next to each other. Good examples are the
    horizontal arrows, and the horizontal curly braces. Because of
    their horizontal characteristic, the extensible mechanism cannot
    be used. So the $$\underbrace {\mathrm {horizontal\ curly\ brace}}$$
    is built up with abbuting glyphs. These glyphs must be of the same
    weight, and very well adjusted in order to fit together properly.
    They must therefore live in the same font.

\end{description}


\section {Constraint importance}
The design constraint is less important than the kerning constraint.
Whereas keeping empty slots for ligaturing has very little
importance compared to the two former constraints.

Charlists' and extensible lists' members \textbf {must} stay together,
without exception. One could establish the following order of
importance:

  \begin{enumerate}
  \item Extensibles,
  \item Charlists,
  \item Constructed symbols,
  \item Kerning,
  \item Ligatures,
  \item Design similarities,
  \item Empty slots for ligatures.
  \end{enumerate}


\section{A few groups}

\begin{itemize}
\item The Greek letter sets,
\item The Greek-like glyphs,
\item The Latin letter set,
\item The Latin-like material
\item The digits,
\item The vertical arrows,
\item The horizontal arrows,
\item The accents, wide, double, underaccents,
\item The core symbols: must live with the default alphabet,
\item The subset group,
\item The greater than group,
\item etc ...
\end{itemize}

A lot of the above groups were still divided into smaller groups in
order to make things fit in the encoding tables. Compromises had to be
made, in order to respect the constraints set by compatibility.


\chapter{Making encoding tables}
Similarly to the constraints governing the grouping, the constraints
governing the division into encoding tables listed below were not at
all obvious, and had to be thought of, and fully understood.

The construction of encoding tables largely depends on the main goals of
a new math encoding.

\section{The constraints of group grouping}
\begin{description}

  \item[Glyphs access.] (alphabets, variable family) This is
    another technical constraint due to the way \TeX\ accesses glyphs.
    It is also a user interface constraint, because the idea is to
    make alphabets easily accessible to the user.

  Due to the variable family mechanism (explained in section \ref
  {var-fam}), It is very practical for the user that font
  encodings contain only one alphabet. Thus when the fonts are
  loaded into the families, different letters can be accessed by
  changing the \cn{fam} variable, and typing the usual letters on
  the keyboard. For instance, when \cn{fam} is equal to $-1$, the
  default family is used. When \cn{fam} equals 2 the user can
  get the script alphabet. For this the user need only type
  \verb|$\fam=2 A,B,C$| and the letters $\cal{A,B,C}$ are
  produced.

  The alternative would be to have many alphabets in one encoding.
  In that case, to access script letters A,B,C for example, the user
  would have to type \verb|$\scriptA,\scriptB$|. That would be much more
  difficult to read, and less practical.

  This constraint --~due to glyph access~-- sets the shape of the whole
  M-encoding and has a very high priority.

  \item[Font access.] This only concerns the font that will replace
    \texttt {cmex}. For compatibility reasons, the math font group
    decided that it would be reasonable to try and replace \texttt
    {cmex} by a font that can be loaded in one size, \textem {and} in
    three sizes. Therefore the \texttt{cmex} replacement can only
    take:
    \begin{itemize}
    \item Wide accents,
    \item Big delimiters,
    \item Big Operators,
    \item Radicals (with a small change),
    \item Vertical extensible arrows.
    \end{itemize}
    It would be too long to justify the decision here, but the relevant
    document is in appendix \ref {app-rep-cmex}. One of the consequences
    of this is that one cannot put an alphabet in \texttt {cmex}'s
    replacement encoding. An aphabet must be available in all three
    sizes.
    Other glyphs are also victims of this limitation.

  \item[Kerning.] Obviously, if glyphs in two separate groups must be
                kerned, then those two groups must live together.

  \item[Design similarity.] This is a designor's constraint and
  therefore has low priority. Because of this low priority, it often
  happens that big design similarity groups are subdivided into smaller
  ones. In such case one must try in sofar as is possible to put the
  smaller groups back together.

  \item[Bold face.] Mathematicians and physicists often use boldface
    glyphs. These can either be directly available in some of the
    encodings, whereby the encoding will specify: here should go a
    bold uppercase `A' ---~and that could be next to a non-bold glyph;
    or none of the encoding tables specify whether or not the glyphs
    are bold, and a bold version of the whole M-encoding or of each
    encoding table can be made ---~as with text fonts.

           To reduce the total number of glyphs in the M-encoding, the
           second possibility has been chosen. But this induces another
           constraint on the global M-encoding: the individual encoding
           tables must be designed in such a way that the most commonly
           used bold glyphs are put together.

  \begin{sloppypar}
        \item[Compatibility with other font-using programs.] Since the
           invention of ASCII code, the first 32 slots of fonts were
           often not used for glyphs, but reserved for control
           codes. Today many programs are still not designed to use the
           first 32 slots of a font. Thus fonts should not contain any
           glyphs in those slots.
           But this would be a big waste for \TeX, because it can use
           glyphs in slots below 32.

           However, if the glyphs in the critical slots do not have any
           kerning relation-ship with other glyphs in the font, then the
           former can be put in another font, and be used with little
           difficulty even in problematic software. This seemed a fairly
           good compromise, so it was decided to fill slots below 32
           with glyphs that do not have any kerning with the others, and
           could thus be separated from them.

    On the same lines: some programs are unable to use fonts that do
    not have a space in position 32. To solve this problem, only one
    slot in concerned, so it was decided to include a space in every
    font. This should not be a problem.

  \item[Grouping \TeX\ specific glyphs: another compatibility
    issue.] \hfil
    The present \texttt{cmex} font/encoding contains glyphs
    that cannot be used by other typesetting systems, because they
    are set in a strange way. Similarly \texttt{cmsy} contains one
    glyph that is set in a strange way: the radical sign.  Therefore
    the whole of \texttt{cmsy} is unusable for other programs. Such a
    mistake must not be reproduced.

        It is hoped that the new \TeX\ math encoding will set a
        standard, that will not only be used by \TeX, but by all
        systems that typeset mathematical formulae. If everything goes
        according to plan, in the next few years many math fonts will
        exist, for many different systems, and they will all use the
        same M-encoding. Thus it will be very easy to use the same
        fonts on different systems. One day a \TeX\ user will be able
        to take a mathematical font from Microsoft Word, and convert it
        easily in order to use it with \TeX.

    If \TeX\ specific glyphs are grouped in one font, there will only
    be one problematic font. As it happens, all \TeX\ specific glyphs
    are more or less geometric, so they could be used next to
    different math fonts. On the other hand, if \TeX\ specific glyphs
    are spread around in many fonts, then many ``imported'' fonts will
    not be usable by \TeX\ without major changes.

    From a commercial point of view, if a font designer creates a math
    font for Adobe, the work necessary for adapting it to \TeX\ must
    be reduced to the minimum.  Otherwise nobody will provide any new
    math fonts for \TeX.

    \TeX\ specific glyphs are the following:
    \begin{itemize}
    \item The delimiters,
    \item The large and small `bigops',
    \item The radicals.
    \end{itemize}


  \item[Compatibility with Plain and \LaTeX.] Let us consider a user
    that has typed a document with the present math encoding, and in
    so doing has saturated the available families. If the new math
    encoding does not garantee Plain and \LaTeX\ glyph compatibility
    with a \textem {maximum of 4 fonts,} then the document will
    not be able to run with the new math encoding: not enough
    families. Thus one should make the first four encoding tables of
    the global M-encoding glyphs compatible with the Computer
    Modern cluster: cmr, cmmi, cmex, and cmsy.


  \item[Compatibility with AMS\TeX, AMS\LaTeX, and LAMS\TeX.]  Let us
    consider this time a user that has typed a document with the
    existant AMS\TeX\ or AMS\LaTeX\ package, and in so doing has
    saturated the available families.  If the new math encoding does
    not give AMS\LaTeX\ and AMS\TeX\ glyph compatibility with less
    than 6 encoding tables, then that document will not run with the
    new math encoding, for lack of family reasons.

    The first 6 encoding tables must be one way glyph compatible with
    the fonts provided in the AMS packages.
  \end{sloppypar}

  \item[Trying to give the Plain \TeX\ user a logical cluster of new
    glyphs.] No comment.

\end{description}

\section{The Aston-LC math encoding}
This is one of the proposals first thought of, but it is
not the one finally chosen, because it had many problems.

LC stands for latin core. The main characteristic is the separation of
the Greek letter sets from the Latin ones.  In keeping these two sets
separate, we give the greek letters an identity of their own, thus
making them quite independent of the rest. The idea goes in the
direction of orthogonal grouping. All the encodings that contain
letters would have them in the Cork encoding positions, thus making
access very simple. In fact this positioning concept will be taken
farther: Cork encoded glyphs that are in the new encoding, will keep
their Cork position.

\subsection{The encoding tables}
  \begin{description}
  \item[The text symbols: the TS encoding.] Here would be included the
  old  style numerals, and most of what is to be taken out of the
  present math encoding, because it does not belong with the rest of the
    math glyphs. Other symbols could be added in this encoding.

    This encoding is not part of the M-encoding, but it will contain
    symbols that previously were accessed via the math fonts. In
    normal usage, this font will not be loaded in a family. It will
    simply be loaded as a normal text font.

  \item[The base: a Cork encoded latin text font.] The main use of
    this font would be to typeset function names like \texttt
    {\string\log}. The idea being that the user can actually choose
    this font among the existing Cork encoded fonts. Thus `sin' can
    actually be typeset in the same style as the text, or in another
    special style to match the rest of the math glyphs.

  \item[The core: the MC encoding.] It would not contain any Greek
    glyphs (unlike \texttt {cmmi}). The basic accents (only one size)
    would be here, next to the default numerals. It would also include
    all the upper and lowercase default latin alphabet, all
    of the symbols that are most commonly used, and glyphs that must
    be kerned with the default alphabet.

  \item[The Greek alphabets: the MG encoding.] This encoding table
    would contain all the upper and lowercase Greek letters in
    upright and italic, plus some variable shape Greek letters, also
    in upright and italic, and some numeric Greek letters. Any other
    Greek related glyphs would also live in MG. If place is still
    available, one could include some symbols. An advantage of
    putting the italic Greek and upright Greek together, is that both
    are often requested in medium and in bold weight.

  \item[The extensibles: the MX encoding.] This encoding would look
    very much like the present \texttt {cmex} encoding: the usual
    extensible characters, together with some new ones. It could
    include any characters that have strange \TeX\ features like big
    descenders. Thus glyphs that are not compatible with the
    outer world would be kept together.

  \begin{sloppypar}
  \item[The math symbols: the MS1, MS2, MS3... encodings.] Each of
    these encodings would contain a set of Latin letters, like for
    instance script or blackboard bold, in upper or lowercase, or
    both, together with a set of matching accents if needed. In some
    cases a place should also be reserved for a set of matching
    numbers.  The rest would be filled up with symbols. There could be
    an  MS$_i$ encoding for:
    \begin{itemize}
    \item Calligraphic,
    \item Script,
    \item Open,
    \item Old german, (Fraktur)
    \end{itemize}
  \end{sloppypar}

  \end{description}


\subsection{Other requested typefaces}
  \begin{itemize}
  \item A ``text-like'' italic or slanted font for computer science
    identifier-names and the like.  This would be Cork encoded.
  \item A ``bold upright'' for use as variables -- e.g. vectors in
    physics notation rather than the arrow over an italic letter. This
    would be Cork encoded.
  \item Bold italic for use as variables: an MC or Cork encoding.
  \item Bold Old german (occasional).
  \item Bold script (occasional).
  \item Sans serif lightface (occasional): Cork encoded font.
  \item Sans serif boldface (occasional): Cork encoded font.
  \item Bold symbols: the same encodings loaded in bold.
  \item Ultra bold symbols: the same encodings loaded in bold.
\end{itemize}


\subsection{Summarizing the family occupation}
The following encodings are needed in the kernel:

\begin{enumerate}
\item A Cork encoded upright text font.
\item An MC encoded font containing the default alphabet, digits,
  accents, and symbols.
\item An MS$_1$ encoded symbol font for calligraphic/script.
\item An MX encoded extensible font.
\item An MG encoded font for Greek italic and upright.
\item An MS$_2$ encoded symbol font for Open and symbols.
\item An MS$_3$ encoded symbol font for Old german and symbols.
\end{enumerate}

This occupies 7 families, and leaves 9 free for anything else, (like
bold or sans...) and makes many symbols available.


\subsection{Pros and cons}
This proposal did not respect the limit of 4 and 6 families
(compatibility with Plain\TeX\ and AMS\TeX), nor did it enable the Latin
and Greek to be kerned together, nor could the Greek be kerned with the
same symbols as the Latin alphabet, unless these were
repeated. Generally, to get the equivalent of Plain \TeX, one would have
had to load 5 families, and to get the functionalities of AMS\TeX, one
would have needed to load 7 families.

One of the advantages was the orthogonality of the individual encoding
tables, i.e. there were no strange mixes like Latin and Greek, or
anything of the sort.

The main reasons for rejecting this proposal are:
\begin{itemize}
\item it is a big family consumer. In particular bold Latin and Greek
  would occupy two extra families, and they are frequently requested.
\item it does not enable kerning between the Greek and punctuation
  which is needed for compatibility ---~the punctuation is in
  a separate encoding table from the Greek.
\end{itemize}

The next proposal is more attractive...


\section{The Aston LGC math encoding}

LGC stands for Latin Greek core.  One of the main features of this
proposal is that the Greek and Latin alphabets have been put
together. In one font they could be upright, and in the other they could
be italic. A good reason for doing things this way is that the font
dimension called slant may give a few unexpected problems if italic and
non italic glyphs are mixed in the way that they would have been in the
Aston LC math encoding.


\subsection{The encoding tables}
\begin{description}

\begin{sloppypar}
\item[The text symbols: the TS encoding.] This would be the same as in
  the previous proposal, and would be used in a similar manner.

\item[The base: a Cork encoded latin text font.] This would be the same
  as in the previous proposal, and they would be used in a similar
  manner.
\end{sloppypar}

\item[The core: the LG encoding.] Instead of the MC encoding (in the
  previous proposal), the core could be duplicated. Once in upright,
  and once in uppercase.  The LG encoding would contain one instance
  of both Latin and Greek letter sets. So two LG encoded fonts would
  be used (upright, and italic).

  As far as the other slots are concerned, they could be filled in
  with the most used math symbols (similarly to the MC encoding),
  these would then appear once in upright, and once in bold. An
  alternative to such a duplication would be to make an LG1 encoding
  that would contain different symbols from an LG2 encoding, and these
  would always be in upright, whereas the letters would be specified
  as italic in LG1, and upright in LG2.

  \textbf{Note.} The user could choose whether he wants to load both LG1
  and LG2, or only one of the two.


\item[The extensibles: the MX encoding.] It would be the same as in
  the previous proposal, and it would be used in a similar manner.

\item[The math symbols: the MS1, MS2, MS3... encodings.] {\sloppy These
    would be the same as in the previous proposal, and they would be
    used in a similar manner.}

\end{description}

\subsection{And the rest?}
Similarly to the previous proposal, many other fonts could be loaded
in all the free families.


\subsection{Summarising the Family occupation}
\begin{enumerate}
  \item An LG encoded font containing Latin and Greek italic. (This
    could be LG1 if necessary. See explanations above.)
  \item An MS$_1$ encoded symbol font for calligraphic/script.
  \item An MX encoded extensibles font.
  \item An LG encoded font containing latin and Greek upright. (This
    could be LG2 if necessary. See explanations above.)
  \item An MS$_2$ encoded font for Open and symbols.
  \item An MS$_3$ encoded font for Old German and symbols.
\end{enumerate}

Only 6 families are occupied. This leaves 10 families free for
anything else, (like bold or sans...)  and makes many symbols
available.


\subsection{Pros and cons}

One of the advantages of this proposal is that kerning can be done
between Latin and Greek (as long as they are in the same shape), and
between Greek and other symbols present in the encoding such as
punctuation. Also when bold is requested, one gets the bold Latin and
the bold Greek in the same font table, which again consumes less
families than having the two separate.

This proposal occupies less families than the previous one.

Reasons for abandoning the Aston LGC math encoding:
\begin{itemize}
  \item The user must be able to choose the look of his log, sin, and
    friends.  He may want them to be either text compatible, or
    compatible with the other math alphabets and the rest of the math
    glyphs in general. The choice must be left open, and the math font
    designer must not impose his decision on the user.
  \item A solution to the previous problem is to include another
    font for this purpose, as in the previous proposal. But then the
    family occupation rises up to 7, and three Latin alphabets are
    loaded, of which one (the LG upright) is probably not going to be
    used much. Thus a lot of precious space is wasted.
  \item The `Yaasp' proposal is much more attractive.
\end{itemize}


\chapter{The proposed YAASP encoding}
\label{app-yaasp}
\begin{quote}
   This chapter is the final proposal that was made. It is also the body
   of the official document that was produced. The reader may find some
   similarities with the previous sections, for instance some of the
   definitions can be found in section \ref {MoreVocab}. Also a lot of
   the points discussed in the global policy section have already been
   discussed.

\end{quote}

\input{l3d007a.tex}


\chapter{The glyph groups}

\input{l3d007b.tex}


\chapter{Final conclusions}

The `Yaasp' proposal, which is the final proposal made, is given in
chapter \ref{app-yaasp}.

Working on the \LaTeX3 project in Mainz was very interesting for many
reasons:
\begin{itemize}
\item People next to me were working on net management and system
  maintenance. I used email intensively for communicating
  with other people working on the project. I used tar files and
  other programs to send large amounts of information to other people.

  It was a very good introduction to the network oriented studies I
  will be doing in my last year. I don't think that I really knew before
  what the network was.  Now I have a better idea.

\item I installed a test version of NFSS2. It was a good
  introduction to software installing, and enabled me to discover a
  few more UNIX tools. I hope I will be able to use this knowledge for
  installing various packages for \TeX\ and especially emacs in the
  Ecole des Mines de Saint Etienne.


\item The work I did was to a large extent research work and thus
  involved many topics for which no previous experience was
  available. For this reason the work seemed to go slowly, and we often
  had to go back to the drawing board and re-think points that we
  thought were already finished and done with. All of this, of course,
  was made worse by the fact that a lot of the communication was done
  via email.

   However, the final result was very positive. By the end of the three
   months, a complete proposal for a new math font set-up was produced.
   For Frank Mittelbach technical director of the \LaTeX 3 project,
   this is a good achievement, and a big step forward. The next stage is
   to try and implement the proposal, and start testing it.


\item Last but not least, I greatly improved my English and my German.
  I learnt about another country, about its educational system, and
  about its habits, which one can only grasp by working in the country.
  I learnt how to integrate in a foreign environment, and how to deal
  with a few distressing problems: it was not easy to keep calm when
  my car packed up the week-end before I had planned to go back to
  France.  In one's own country garage mechanics aren't easy people to
  deal with; matters get even worse when it is in a foreign country.

\end{itemize}

The whole experience was very enriching in many domains. The difficulty
I had in speaking German proved to me that the teaching of languages in
the \textem {Ecole des Mines de Saint Etienne} is not good enough, and
must be improved and given more importance.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% appendix starts here.
\appendix


\chapter{Analysing \TeX's positioning of \cn{mathaccent}s}
\label {app-accents}
\input{l3d007c.tex}


\chapter{A close look at extensible characters}
\label {app-delims}
\input{l3d007d.tex}


\chapter{Replacing \texttt{cmex} ?}
\label{app-rep-cmex}
\input{l3d007e.tex}


\chapter{Fonts and font encodings}
\label{app-fonts}

The first 4 figures given on the next few pages are the standard fonts
used in plain \TeX\ for maths.
\begin{itemize}
\item \textbf{Computer Modern Roman:} loaded in family 0 shown on figure
  \ref {fnt-tbl-cmr}.
\item \textbf{Computer Modern Math Italic:} loaded in family 1 shown on
  figure  \ref {fnt-tbl-cmmi}.
\item \textbf{Computer Modern SYmbols:} loaded in family 2 shown on
  figure \ref {fnt-tbl-cmsy}.
\item \textbf{Computer Modern EXtensibles:} loaded in family 3 shown on
   figure \ref {fnt-tbl-cmex}.
\end{itemize}
%
Figure \ref {fnt-tbl-dcr} shows the DC-encoding with which the new
math encoding is designed to live. Unlike the \fn{cmr} encoding, the
Cork encoding does not include any Greek glyphs, this prevents its use
in family 0 for maths. But an upright text font is needed in family 0,
for mixing sub- and super-script in text. This problem has until now
prevented the wide spreading of the DC-fonts.

The next two fonts shown in figures \ref {fnt-tbl-msam} and \ref
{fnt-tbl-msbm} are the AMS fonts, designed especially for use in
maths.
%
\begin{figure}[b]
  \dofonttable{cmr10}
  \caption{The \fn{cmr} encoding: 128 glyphs.}
  \label{fnt-tbl-cmr}
\end{figure}
%
\begin{figure}[b]
  \dofonttable{cmmi10}
  \caption{The \fn{cmmi} encoding: 128 glyphs.}
  \label{fnt-tbl-cmmi}
\end{figure}
%
\begin{figure}[b]
  \dofonttable{cmsy10}
  \caption{The \fn{cmsy} encoding: 128 glyphs.}
  \label{fnt-tbl-cmsy}
\end{figure}
%
\begin{figure}[b]
  \dofonttable{cmex10}
  \caption{The \fn{cmex} encoding: 128 glyphs.}
  \label{fnt-tbl-cmex}
\end{figure}
%
\begin{figure}[b]
  \dofonttable{msam10}
  \caption{The \fn{msam} encoding: 128 glyphs.}
  \label{fnt-tbl-msam}
\end{figure}
%
\begin{figure}[b]
  \dofonttable{msbm10}
  \caption{The \fn{msbm} encoding: 128 glyphs.}
  \label{fnt-tbl-msbm}
\end{figure}
%
\maxz=256 \maxiz=255
\begin{figure}[b]
  \dofonttable{ecrm1000}
  %\dofonttable{cmssdc10}
  \caption{The \fn{dcr} encoding: 256 glyphs.}
  \label{fnt-tbl-dcr}
\end{figure}
%

%\begin{figure}[b]
%  \dofonttable{cspex10}
%  \caption{The \fn{cspex} and Saint Mary Road encodings.}
%  \label{fnt-tbl-mary}
%\end{figure}


%\chapter{Requirements analysis}
%\label{app-require}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% end
\end{document}
\chapter{Daily detail of what I did (for personal record)}

\begin{itemize}

\jzitem{First week}
A lot of reading: Tex for the impatient...
Getting accustamed to the local environment: Emacs Bash Mwm...
Getting Telnet and mail connection with France.
Started my social report.
Looked a little at the code of latex.
Played around with latex. (wrote two letters)
Played around with the Tex boxes.
Read the chapter 7 from Frank's coming book.
Started to read a little more about the math fonts in \TeX.

\jzitem{Sunday 13th June}
Started this report. Got familiar with Frank's integrated documenting
system. Got to letter f in the tex index of commands. (evening)

\jzitem{Monday 14th June}
Trying to install NFSS2 in my environment.
Read a lot of the installation documentation.
Found the punk font, it was here.
Read to letter k of the tex index.(evening)

\jzitem{Tuesday 15th June}
Last details of installing nfss2.
Testing various features of NFSS2.
Playing with nfss2: first page of my report.
Read to letter p of the tex index.

\jzitem{Wednesday 16th June}
Still correcting a few problems with the installation of nfss2.
Installing the punk font in NfSS2:
made file nfpunk.dst based on nfpandor.dst.

\jzitem{Thursday 17th June}
Still installing the punk font in NfSS2.
Read mail about maths.
Went to get my car.
End of afternoon with Frank.
Read to letter t of the tex index.

\jzitem{Friday 18th June : time flies}
Ended the installation of the punk fonts.
Read the article from tugboat about the punk fonts.
Did the documentation for the pk fonts.
Played around with NFSS2 and texlatex: getting my presentation page ok.

\jzitem{Saturday 19th June}
Ended the tex index.
Read more about char in the Tex by topic.

\jzitem{Sunday 20th June}
Read still more about the Tex fonts and math fonts.
Tested char and loops =$>$ loops make problems in latex.
Tried postcript fonts.

\jzitem{Monday 21st June} Tried to install the dunhill font with
nfss2: OK.  Maybe make a ``\texttt{.sty}'' file like punk. I find it
is a nice font.  Used raise and negative kerns for a joke.  Reading
more on math fonts.

\jzitem{Tuesday 22nd June}
Still more reading on mathfonts.-Families-mathcode-mathchar-greek
and testing. Finaly solved all problems due to the loop.

\jzitem{Wednesday 23rd June}
Testing on skewchar, and more reading about maths.

\jzitem{Thursday 24th June}
More testing with skewchar.
Meeting with FMI

\jzitem{Friday 25th June}
All day : writing the paper about accents.

\jzitem{Week end}
nothing about tex or latex
oh yes: reading about fonts. (cahier gutenberg)

\jzitem{Monday 28th June}
Morning more testing with accents.
Afternoon: Meeting with FMI and Jorg K.

\jzitem{Tuesday 29th June}
Writing a summary of yesterday, and trying to figure out how to do the
work.

\jzitem{Wednesday 30th June}
Reinstalling NFSS2. Sending NFSS2 to Estonia. Reading all the mail
printed by Joerg.

\jzitem{Thursday 1st July}
More thinking about maths. Sending mail to people. Reading about the
Tex files.

\jzitem{Friday 2nd July}
More mailing.

\jzitem{monday 5th July}
More mailing, meeting with Frank.

\jzitem{Tuesday 6th July}
More mailing, reading about the extensible chars.

\jzitem{Wednesday 7th July}
More mailing. Sending stuff to joerg. Dealing with Aston.
Continuing The paper on extensible chars.

\end{itemize}
\end{document}


% Local Variables:
% mode: latex
% TeX-master: t
% TeX-command-default: "LaTeX2+"
% End: