\documentclass[11pt, reqno]{article}
\usepackage[margin = 1.5in]{geometry}
\usepackage{amsmath, amssymb, amsthm, fancyhdr, verbatim, graphicx}
\usepackage{enumerate}
\usepackage[all]{xy}
%\usepackage{MnSymbol}
%\usepackage[utopia]{mathdesign}
%\usepackage{mathpazo}
%\usepackage{euler}
\usepackage[OT2, T1]{fontenc}
%\usepackage{mathabx,epsfig}
\usepackage{framed, hyperref}
\DeclareSymbolFont{cyrletters}{OT2}{wncyr}{m}{n}
\DeclareMathSymbol{\Sha}{\mathalpha}{cyrletters}{"58}
\numberwithin{equation}{subsection}
\pagestyle{fancy}
\lhead{}
%\renewcommand{\headrulewidth}{0.4pt}
%\renewcommand{\footrulewidth}{0.4pt}
%%%%%%%%%%%%%% COLOR COMMENTS! %%%%%%%%%%%%%%%
\usepackage{color}
\newcommand{\tony}[1]{{\color{blue} \sf
$\spadesuit\spadesuit\spadesuit$ TONY: [#1]}}
\newcommand{\reword}[1]{{\color{red} \sf $\spadesuit\spadesuit\spadesuit$ reword: [#1]}}
% changed above definition to make comments disappear
%\newcommand{dzb}[1]{}
%\newcommand{reword}[1]{}
% Color names: http://en.wikibooks.org/wiki/LaTeX/Colors#The_68_standard_colors_known_to_dvips
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{mathrsfs}
\newcommand{\note}[1]{{\color{BurntOrange} $\blacktriangle\blacktriangle$\sf Note: [#1]}}
\newcommand{\chk}[1]{#1^{\smash{\scalebox{.7}[1.4]{\rotatebox{90}{\guilsinglleft}}}}}
%\def\acts{\mathrel{\reflectbox{$\righttoleftarrow$}}}
%\def\racts{\mathrel{\reflectbox{$\lefttorightarrow$}}}
\newcommand{\F}{\mathbf{F}}
\newcommand{\RR}{\mathbf{R}}
\newcommand{\CC}{\mathbf{C}}
\newcommand{\G}{\mathbf{G}}
\newcommand{\tr}[0]{\operatorname{tr}}
\newcommand{\wt}[1]{\widetilde{#1}}
\newcommand{\frob}[0]{\operatorname{Frob}}
\newcommand{\card}[0]{\#}
\newcommand{\pmat}[4]{\begin{pmatrix}#1 & #2 \\ #3 & #4\end{pmatrix}}
\newcommand{\pderiv}[2]{\frac{\partial #1}{\partial #2}}
\newcommand{\Q}{\mathbf{Q}}
\newcommand{\Z}{\mathbf{Z}}
\newcommand{\leg}[2]{\left(\frac{#1}{#2}\right)}
\newcommand{\mf}[1]{\mathfrak{#1}}
\newcommand{\nm}{\operatorname{N}_{K/\Q}}
\newcommand{\Cl}{\operatorname{Cl}}
\newcommand{\sgn}{\operatorname{sgn}}
\newcommand{\Gal}{\operatorname{Gal}}
\newcommand{\Mat}{\operatorname{Mat}}
\newcommand{\cl}{\overline}
%\newcommand{\cal}[1]{\mathcal{#1}}
\newcommand{\R}{\mathbf{R}}
\newcommand{\HH}{\mathbf{H}}
\newcommand{\ul}[1]{\underline{#1}}
\newcommand{\ol}[1]{\overline{#1}}
\newcommand{\wh}[1]{\widehat{#1}}
%\newcommand{\scr}[1]{\mathscr{#1}}
\newcommand{\PS}{\mathbf{P}}
\newcommand{\mbb}[1]{\mathbf{#1}}
\newcommand{\newpar}[1]{\noindent\textbf{#1}}
\newcommand{\Cal}[1]{\mathcal{#1}}
\newcommand{\A}{\mathbf{A}}
\newcommand{\et}{\text{\'{e}t}}
\newcommand{\mbf}[1]{\mathbf{#1}}
\newcommand{\rmono}[2]{\underset{#2}{\stackrel{#1}{\hookrightarrow}}}
\newcommand{\lmono}[2]{\underset{#2}{\stackrel{#1}{\hookleftarrow}}}
\newcommand{\rrat}[2]{\underset{#2}{\stackrel{#1}{\dashrightarrow}}}
\newcommand{\lrat}[2]{\underset{#2}{\stackrel{#1}{\dashleftarrow}}}
\DeclareMathOperator{\GL}{GL}
\DeclareMathOperator{\SL}{SL}
\DeclareMathOperator{\Frob}{Frob}
\DeclareMathOperator{\ab}{ab}
\DeclareMathOperator{\coker}{coker}
\DeclareMathOperator{\cyc}{cyc}
\DeclareMathOperator{\N}{\mathbf{N}}
\DeclareMathOperator{\Tr}{Tr}
\DeclareMathOperator{\Enc}{Enc}
\DeclareMathOperator{\Dec}{Dec}
\DeclareMathOperator{\PGL}{PGL}
\DeclareMathOperator{\Sp}{Sp}
\DeclareMathOperator{\SO}{SO}
\DeclareMathOperator{\Hom}{Hom}
\DeclareMathOperator{\Ind}{Ind}
\DeclareMathOperator{\Ima}{Im\,}
\DeclareMathOperator{\rank}{rank}
\DeclareMathOperator{\Jac}{Jac}
\DeclareMathOperator{\ord}{ord}
\DeclareMathOperator{\Aut}{Aut}
\DeclareMathOperator{\Rep}{Rep}
\DeclareMathOperator{\Nm}{Nm}
\DeclareMathOperator{\Spec}{Spec\,}
\DeclareMathOperator{\Art}{Art}
\DeclareMathOperator{\Lie}{Lie}
\DeclareMathOperator{\End}{End}
\DeclareMathOperator{\Isom}{Isom}
\DeclareMathOperator{\Inn}{Inn}
\DeclareMathOperator{\Out}{Out}
\DeclareMathOperator{\ad}{ad}
\DeclareMathOperator{\Ver}{Ver}
\DeclareMathOperator{\Br}{Br}
\DeclareMathOperator{\unr}{unr}
\DeclareMathOperator{\Res}{Res}
\DeclareMathOperator{\Frac}{Frac}
\DeclareMathOperator{\Div}{Div}
\DeclareMathOperator{\cusp}{cusp}
\DeclareMathOperator{\Stab}{Stab}
\DeclareMathOperator{\Bun}{Bun}
\DeclareMathOperator{\Ext}{Ext}
\DeclareMathOperator{\Pic}{Pic}
\DeclareMathOperator{\ch}{char\,}
\DeclareMathOperator{\Id}{Id}
\DeclareMathOperator{\Ad}{Ad}
\DeclareMathOperator{\Gr}{Gr}
\DeclareMathOperator{\Shtuka}{Shtuka}
\DeclareMathOperator{\Quot}{Quot}
\DeclareMathOperator{\Jet}{Jet}
\DeclareMathOperator{\Sym}{Sym}
\DeclareMathOperator{\codim}{codim}
\DeclareMathOperator{\bfet}{\textbf{f\'{e}t}}
\DeclareMathOperator{\Lat}{Lat}
\DeclareMathOperator{\MaxSpec}{MaxSpec}
\DeclareMathOperator{\Span}{Span}
\DeclareMathOperator{\Proj}{Proj\,}
\DeclareMathOperator{\dom}{dom}
\DeclareMathOperator{\pt}{pt}
\DeclareMathOperator{\tors}{tors}
\DeclareMathOperator{\vol}{vol}
\newtheorem{thm}{Theorem}[subsection]
\newtheorem{lemma}[thm]{Lemma}
\newtheorem{prop}[thm]{Proposition}
\newtheorem{cor}[thm]{Corollary}
\newtheorem{crit}[thm]{Criterion}
\newtheorem{conj}[thm]{Conjecture}
\theoremstyle{remark}
\newtheorem{remark}[thm]{Remark}
\newtheorem{defn}[thm]{Definition}
\newtheorem{example}[thm]{Example}
\newtheorem{exercise}[thm]{Exercise}
\makeatletter
\def\imod#1{\allowbreak\mkern5mu({\operator@font mod}\,\,#1)}
\makeatother
\widowpenalty=1000
\clubpenalty=1000
\setcounter{tocdepth}{1}
\title{Overview}
\author{Brian Conrad and Akshay Venkatesh \footnote{Notes taken by Tony Feng} }
\date{September 30, 2015}
\begin{document}
\maketitle
\section{Introduction to the BSD conjecture}
\subsection{Some history}
The BSD conjecture originated from observations made by Birch and Swinnerton-Dyer when studying the asymptotics of
\[
\prod_{p \leq x} \frac{\#E(\F_p)}{p}
\]
as $x$ grows (``the first serious contribution to mathematics by a computer''). Specifically, Birch and Swinnerton-Dyer found that
\[
\boxed{\prod_{p \leq x} \frac{\#E(\F_p)}{p} \sim C_E (\log x)^{\rank(E(\Q))}}
\]
for some (explicit) constant $C_E$. This phenomenon was then reformulated by Tate in terms of an $L$-function $L(E/\Q,s)$
and then generalized to abelian varieties over global fields.
\subsection{Motivation: the class number formula}
Let me give a completely \emph{ahistorical} motivation by going back to the zeta function.
If $K$ is a number field, then it has an associated \emph{Dedekind zeta function} $\zeta_K$. One has that
\[
\ord_{s=0} \zeta_K = r := r_1+r_2-1 = \rank(\Cal{O}_K^{\times}).
\]
Moreover, the leading coefficient $C_K$ is described explicitly by fundamental invariants associated to $K$:
\[
\boxed{C_K = -\frac{h_K R_K}{w_K}}
\]
where
\begin{itemize}
\item $h_K$ is the \emph{class number} of $K$; i.e. the size of the class group $\Pic(\Cal{O}_K) \simeq H_{\et}^1(\Cal{O}_K, \G_m)$ (which classifies
``$\G_m$-torsors over $\Cal{O}_K$''),
\item $w_K = \# (\Cal{O}_K^{\times})_{\tors}$ is the number of roots of unity in $K$,
\item $R_K$ is the \emph{regulator} of $K$, defined as follows. The map $\Cal{L} \colon \Cal{O}_K^{\times} \rightarrow \R^{r_1+r_2}$ is defined by
\[
\Cal{L}(u) = (\log||u||_v)_{v \mid \infty}
\]
(where complex places are taken in pairs, and for such places the norm used here is the square of the usual absolute value)
and via the product formula this factors through the hyperplane $H = \{(t_i)_i \mid \sum t_i = 0 \}$. There is a natural measure on $H$ induced via the short exact sequence
\[
0 \rightarrow H \rightarrow \R^{r_1+r_2} \xrightarrow{\Sigma} \R \rightarrow 0
\]
and the Lebesgue measures on $\R^{r_1+r_2}$ and $\R$. (Explicitly, the measure on $H$ corresponds to a choice of orthonormal basis
via the standard inner product on $\R^{r_1+r_2}$.)
\begin{remark}
It is a theme that comes up over and over again in number theory that when you have a short exact sequence
$$1 \rightarrow H \rightarrow G \rightarrow G/H \rightarrow 1$$
of locally compact Hausdorff topological groups and Haar measures chosen on two out of the three groups, then there is a
unique choice of Haar measure on the third so that an appropriate ``Fubini formula'' holds among integration on the three
spaces (characterized for integration of continuous functions with compact support, but valid and used in an $L^1$-sense too).
This is discussed in Lang's book {\em Real and functional analysis} for the characterization aspect with continuous compactly-supported
functions, and the $L^1$-aspect is then an instructive exercise.
\end{remark}
With respect to this preferred measure on $H$, we set $R_K = \vol(H/\Cal{L}(\Cal{O}_K^{\times}))$.
\end{itemize}
In Tate's reformulation of the BSD conjecture, there are analogous pieces to those appearing here. So the class number formula is a ``$\G_m$-version over $\Cal{O}_K$'' of the BSD conjecture for the ``N\'eron model over $\Cal{O}_K$ for an abeian variety over $K$'' (though the meaning and role and N\'eron models will be
largely postponed until the next lecture, with $\ell$-adic representations serving in place of N\'eron models today).
\subsection{Construction of the $L$-function}
Let $A$ be an abelian variety over a global field $K$. We want to define an $L$-function $L(A/K, s)$.
The $L$-function will be constructed as an Euler product over local factors, which are related to the
``reduction'' of $A$ at the primes of $K$.
There are complications at places of bad reduction; some references on elliptic curves give case-wise formulas in these cases that may seem ad hoc
without a broader context.
We shall give a \emph{uniform} definition of the local factors of $L(A/K, s)$ using the (rational) \emph{Tate module}
\[
V_{\ell}(A) = T_{\ell}(A)[1/\ell],
\]
which is a vector space isomorphic to $\Q_{\ell}^{2g}$, equipped with a continuous linear action of $\Gamma_K := \Gal(K_s/K)$.
\begin{defn}
Let $R$ be a discrete valuation ring and $X$ be a smooth proper scheme over $F := \Frac(R)$. We say that $X$ has \emph{good reduction} with respect to $R$ if $X = \Cal{X} \otimes_R F$ for some smooth proper $R$-scheme $\Cal{X}$.
\end{defn}
\noindent \textbf{Hard facts.} If $A$ is an abelian variety, then $A$ has good reduction at $v$ (i.e. with respect to $\Cal{O}_{K, v}$) if and only if $A_{K_v}$ has good reduction with respect to $\Cal{O}_{K,v}$. Moreover, in such cases we have $A = \Cal{A}_v \otimes_{\Cal{O}_{K,v}} K$ where $\Cal{A}$ is an \emph{abelian scheme} over $\Cal{O}_{K,v}$ (i.e. a smooth proper $\Cal{O}_{K,v}$-group scheme with connected reduction). In particular, the special fiber of $\Cal{A}_v$ is an abelian variety over the residue field. This $\Cal{A}_v$ is in fact \emph{unique} and even \emph{functorial} in $A$ . All of these assertions rests
crucially on the theory of N\'eron models, to be discussed next time.
\begin{remark}
This is really miraculous. For a general $X$ with good reduction there is no preferred $\Cal{X}$, but for \emph{abelian varieties} there \emph{is} is a preferred $\Cal{X}$.
Note that it isn't at all clear for general $X$ that good reduction over the completion $K_v$ should imply the same over $K$ (i.e., relative to the algebraic
localization $\Cal{O}_{K,v}$).
If $\dim X=1$ (and $X$ is geometrically connected over $F$)
with positive genus then there is a ``best possible'' $\Cal{X}$ even when good reduction fails (the so-called minimal regular proper
model, to be discussed later), but this lacks the good functoriality properties of the N\'eron model for abelian varieties.
\end{remark}
If we want to make definitions (e.g. of the local factors of an $L$-function) for $X$ in terms of an integral model $\Cal{X}$, then it's not clear that this
is independent of that choice. Beyond the case of curves and abelian varieties, there is no ``best choice'' of integral model:
\begin{example}\label{surface}
For a discrete valuation ring $R$ with fraction field $K$ and residue field $k$
we shall give a smooth projective surface over $K$ admitting two smooth projective
$R$-models that are not $R$-isomorphic (even ignoring the chosen identification of their generic
fibers).
The following example was suggested by R. Vakil. We will construct a projective smooth $R$-scheme $\Cal{X}$
whose generic fiber is $\PS^1_K \times \PS^1_K$ and whose special fiber is the Hirzebruch surface $F_2 := \PS(\Cal{O} \oplus \Cal{O}(2))$.
(Note that $\PS^1 \times \PS^1$ may be viewed as the Hirzebruch surface $F_0$.) Then $\Cal{X}$ and $\mathbf{P}^1_{R} \times \mathbf{P}^1_{R}$ have isomorphic generic fibers but non-isomorphic special fibers, so they are not $R$-isomorphic.
For the construction of $\Cal{X}$, let $\Cal{Q} \subset \mathbf{P}^3_R$ be the $R$-flat quadric
$xy-z(z-tw) = 0$ where $t \in R$ is a uniformizer, so $\Cal{Q}_K$ is smooth (hence the $R$-flat $\Cal{Q}$ is reduced) and $\Cal{Q}_k$ is a cone
with singularity at $\xi_0 = [0,0,0,1]$. Since $\Cal{Q}_K$ is defined by a $K$-split quadratic form in 4 variables,
namely $xy - zw'$ with $w' = z'-tw$, it is isomorphic to $\PS^1_{K} \times \PS^1_{K}$
(using $\mathbf{P}^1_K \times \mathbf{P}^1_K \hookrightarrow \mathbf{P}^3_K$ via
$([\alpha, \beta], [u, v]) \mapsto [\beta u, \alpha v, \beta v, (\beta v - \alpha u)/t]$).
%e.g., at $[x_0, y_0, z'_0, w'_0] \in \Cal{Q}_K$ where $x_0 \ne 0$ the two lines are
%$[x, m_1 z', z', m_1 x]$ and $[x, m_2 w', m_2 x, w']$ for $m_1 = w'_0/x_0$ and $m_2 = z'_0/x_0$).
Also,
the blowup of the cone $\Cal{Q}_k$ at the cone point $\xi_0$ is isomorphic to $F_2$.
Now we introduce an incidence relation over $R$ that picks out one of the two lines through each point in $\Cal{Q}_K$ by introducing an auxiliary slope parameter.
Consider the closed subscheme $\Cal{X} \subset \mathbf{P}^3 \times \mathbf{P}^1$ defined by
$$\{([x, y, z, w], [\alpha, \beta])\,|\, \alpha x = \beta z, \beta y = \alpha (z-tw)\}$$
where $[\alpha, \beta]$ denotes homogeneous coordinates on $\mathbf{P}^1$. The projection
$p_2: \Cal{X} \rightarrow \mathbf{P}^1$ is a $\mathbf{P}^1$-bundle: over $\{\beta \ne 0\}$ it is $\mathbf{P}^1$
with homogenous coordinates $[x, w]$ and over $\{\alpha \ne 0\}$ it is $\mathbf{P}^1$ with homogeneous coordinates
$[y,w]$. Thus, $\Cal{X}$ is $R$-smooth with geometrically connected fibers.
The projection $\Cal{X} \rightarrow \mathbf{P}^3$ lands inside
the quadric $\Cal{Q}$ (as is sufficient to check over $K$ since $\Cal{X}$ is $R$-flat, or can be checked by working
on fibers over ${\rm{Spec}}(R)$ since $\Cal{Q}$ is reduced). The resulting map $\Cal{X}_K \rightarrow Q_K = \mathbf{P}^1_K \times \mathbf{P}^1_K$ is an isomorphism
since composing it with the first projection $\pi_1:Q_K \rightarrow \mathbf{P}^1_K$ recovers the
composite map $\Cal{X}_K \hookrightarrow \mathbf{P}^3_K \times \mathbf{P}^1_K
\rightarrow \mathbf{P}^1_K$ (as is sufficient to check in the sense of rational maps since $\Cal{X}_K$ is integral);
in other words, the smooth closed subscheme $\Cal{X}_K \subset \Cal{Q}_K \times \mathbf{P}^1_K$ lies inside the graph of $\pi_1$
and so must coincide with that graph.
The projection $\pi_1: \Cal{Q}_K \rightarrow \mathbf{P}^1_K$ extends to a map $\Pi_1:\Cal{Q} - \{\xi_0\} \rightarrow \mathbf{P}^1_R$ by using
the two expressions $[y, z]$ and $[z-tw, x]$, so by the same reasoning as above we see that the inclusion $\Cal{X} \subset \Cal{Q} \times \mathbf{P}^1_R$
coincides with the graph of $\Pi_1$ away from $\{\xi_0\} \times \mathbf{P}^1_R = \mathbf{P}^1_k$. Hence, the projection
$p_1:\Cal{X} \rightarrow \Cal{Q}$ is an isomorphism over $\Cal{Q} - \{\xi_0\}$, whereas $p_1^{-1}(\xi_0) \simeq
\PS^1_k$. Since $\Cal{X}_k$ is a smooth surface, the curve $p_1^{-1}(\xi_0)$ is Cartier in $\Cal{X}_k$. Hence,
$(p_1)_k$ factors uniquely through the blow-up ${\rm{Bl}}_{\xi_0}(Q_k)$.
To prove that the
unique $\Cal{Q}_k$-map $f:\Cal{X}_k \rightarrow {\rm{Bl}}_{\xi_0}(\Cal{Q}_k) = F_2$ between smooth surfaces is an isomorphism, we have to show that the
map $f_{\xi_0}:\mathbf{P}^1_k \rightarrow \mathbf{P}^1_k$ between ${\xi_0}$-fibers is an isomorphism. For this we
shall go back to how a blow-up is built via charts.
Direct computation over $\{w \ne 0\} = \mathbf{A}^3_k \subset \mathbf{P}^3_k$ shows that
${\rm{Bl}}_{\xi_0}(\Cal{Q}_k)$ is covered by the charts $D_{+}(x)$ and $D_{+}(y)$, and the very definition of $\Cal{X}_k = \Cal{X} \bmod t$
identifies $D_{+}(x)$ with $\Cal{X}_k \cap \{\beta \ne 0\}$ and identifies $D_{+}(y)$ with $\Cal{X}_k \cap \{\alpha \ne 0\}$ compatibily with gluing data,
so we are done.
\end{example}
In view of Example \ref{surface} we will make definitions in terms of the Galois representations intrinsically attached
to the ``generic fiber'' $X$ over $K$ instead. This introduces a different problem, namely possible dependence on $\ell$ (and rationality issues
to make sense of evaluating an $\ell$-adic Euler factor on a complex number $q_v^{-s}$,
but there are ways around this when we can link the Euler factor to a geometric object that doesn't involve $\ell$ (such as by using
the N\'eron model in the case of abelian varieties, as we will see next time).
\begin{remark}\label{spread_out}
It is a soft fact that if $R$ is a Dedekind domain and $X$ is smooth and proper over $F = \Frac(R)$, then there exists $r \in R - \{0\}$
and a smooth proper $\Cal{X}$ over $R[1/r]$ such that $X = \Cal{X} \otimes_{R[1/r]} F$, so $X$ has good reduction at all but finitely many maximal ideals of $R$.
The principle is that any reasonable property over the (geometric) generic fiber ``spreads out'' to an open subset of ${\rm{Spec}}(R)$ (and this principle
is valid with $R$ any commutative ring whatsoever). There is an exhaustive discussion of this principle in EGA IV$_3$, \S8--\S9, \S11, etc.
\end{remark}
Let $X$ be smooth and proper over a global field $K$, and $\ell \neq \ch(K)$ be a prime. Then attached to $X$ are \'{e}tale cohomology groups
${\rm{H}}_{\et}^i(X_{K_s}, \Q_{\ell})$, which are finite-dimensional $\Q_{\ell}$-vector spaces,
equipped with a natural continuous $\Gamma_K$-action. (Here $X_{K_s} = X \otimes_K K_s$).
\begin{example}
If $A$ is an abelian variey, then there is a $\Gamma_K$-equivariant isomorphism ${\rm{H}}^1_{\et}(A_{K_s}, \Q_{\ell}) \simeq V_{\ell}(A)^*$. The dual here means
(as usual in representation theory)
that the action of $\gamma$ on ${\rm{H}}^1_{\et}(A_{K_s}, \Q_{\ell})$ is identified with the linear dual of the action of $\gamma^{-1}$ on $V_{\ell}(A)$.
\end{example}
\begin{thm} If $X$ has a smooth proper model $\Cal{X}_v$ over $\Cal{O}_{K,v}$ then
${\rm{H}}^i_{\et}(X_{K_s}, \Q_{\ell})$ is unramified at $v$, and naturally isomorphic to ${\rm{H}}_{\et}^i(\Cal{X}_v \otimes_{\Cal{O}_{K,v}} \ol{\F}_v, \Q_{\ell})$
as Galois modules with respect to the identification $D_v/I_v \simeq \Gamma_{\F_v}$.
\end{thm}
\begin{remark}
For abelian varieties, the \emph{N\'{e}ron-Ogg-Shafarevich criterion} provides a converse result (to be proved next time using N\'eron models).
In general, there is no converse.
\end{remark}
The upshot is that at unramified places, which constitute all but finitely many places,
we have an action of Frobenius, so we can try to define an $L$-function following Artin's formalism for associating Artin $L$-functions to Galois representations:
we consider the definition
\[
L^i(X/K, s) ``=" \prod_v \det(1-q_v^{-s} \phi_v \mid ({\rm{H}}^i)^{I_v})^{-1}
\]
where $q_v = \# \F_v$ and $\phi_v$ is the ``geometric Frobenius'' in $D_v$ (inverse to the ``classical Frobenius'').
Strictly speaking, for the local factor at $v$ we should require $\ell \ne {\rm{char}}(\F_v)$, as is automatic for global function fields
but is a mild nuisance at the finitely many $\ell$-adic places when $K$ is a number field; we will address the resulting ``independence of $\ell$'' issue shorrtly.
\begin{remark}
Why the geometric Frobenius rather than the classical one? The geometric Frobenius on cohomology is dual to the usual Frobenius on the Tate module
in the case of degree-1 cohomology for abelian varieties. But the more serious reason stems from the Grothendieck-Lefschetz cohomological formula
for $L$-functions attached to constructible $\ell$-adic sheaves on separated schemes of finite type over finite fields, according to which the action of geometric rather
arithmetic Frobenius in the Galois group is what appears.
\end{remark}
To make sense of the putative definition of $L^i(X/K, s)$, there are some complications to be overcome:
\begin{enumerate}
\item The characteristic polynomial of $\phi_v$ on $({\rm{H}}^i)^{I_v}$ lies a priori in $\Q_{\ell}[T]$, and thus seems to depend on $\ell$. Moreover,
it isn't clear what is meant by evaluating this $\ell$-adic polynomial in a manner that involves
$q_v^{-s}$ with $s \in \mathbf{C}$. These concerns would be eliminated if the polynomial is actually in $\Q[T]$ and as such is
independent of $\ell$ (with $\ell \ne {\rm{char}}(\F_v)$).
One solution to this in practice, which is what we'll do for the abelian variety case, is to give a geometric re-interpretation which is evidently independent of $\ell$. For abelian varieties, this goes through the N\'{e}ron model (as we will explain next time, even for bad $v$).
In general this is a serious issue, especially at the bad places. At places of good reduction it is settled using the Riemann Hypothesis proved by Deligne.
\item Does the product (absolutely) converge? To handle this we need to ``uniformly'' (in a power of $q_v$) bound the absolute values
of the $\phi_v$-eigenvalues in $\mathbf{C}$. The Riemann Hypothesis provides
the bound, giving that $L(X/K, s)$ is absolutely convergent (uniformly in closed right half-planes) for $\Rep s > 1 + i/2$.
\end{enumerate}
With the above issues settled, we can then consider analytic continuation, poles, leading terms, etc. This analytic continuation remains unsolved in general
even for abelian varieties (though some cases are now known following the work of Wiles, Taylor, etc.).
This defines $L(A/K,s) = L^1(A/K, s)$ for ${\rm{Re}}(s) > 3/2$.
\subsection{The BSD conjecture}
First we discuss the ``weak form''.
\begin{conj}[Weak BSD]
$L(A/K, s)$ has analytic continuation to $\CC$ and
\[
\ord_{s=1} L(A/K, s) = \rank A(K).
\]
\end{conj}
\emph{Evidence.} Very little of the conjecture has been proven; there is progress for $\dim A = 1$ with ``rank'' (algebraic or analytic) $\leq 1$ for some classes of $K$.
The best evidence (in B. Conrad's opinion) is for $K$ a global function field and $A$ the Jacobian of a curve. The point is that here one has an interpretation via surfaces over finite fields, called the \emph{Artin-Tate conjecture}. To a curve over a global function field we can attach canonically a fibered surface over a finite field
via the theory of minimal regular proper models, to be discussed later. The BSD conjecture for the Jacobian of such a curve is then equivalent to a conjecture about this surface which {\em makes sense for any smooth projective surface} over a finite field (in particular, having nothing to do with a fibration structure).
Now we discuss the ``strong form'' of the conjecture, which also predicts the leading coefficient.
\begin{conj} Near $s=1$, we have
\[
L \sim C_A (s-1)^{\rank A(K)}
\]
where
\[
C_A = \frac{ \# \Sha_A \cdot R_A \cdot \Omega_A}{\#A(K)_{\tors} \cdot \# \wh{A}(K)_{\tors}}.
\]
\end{conj}
We shall now briefly describe the various terms (with some details
postponed to next time), and see how this is analogous to the class number formula.
\begin{itemize}
\item Here $\Sha_A$ is the \emph{Tate-Shafarevich group} of $A$. This admits a definition in terms of Galois cohomology, to be discussed
later, and it is also closely related to the \'etale cohomology group ${\rm{H}}^1_{\et}(\Cal{O}_K, \mathcal{A})$ for the N\'eron model $\mathcal{A}$ of $A$
(analogous to the term ${\rm{H}}^1_{\et}(\Cal{O}_K, \G_m)$ appearing in the class number formula). A precise definition
will be given next time. This is the analogue of the class group. It is a major open problem to prove
the finiteness of this group (known in some very special cases related to low-rank elliptic curves).
\item The regulator is a volume term. There is a canonical height pairing
\[
A(K)_{\R} \times \wh{A}(K)_{\R} \rightarrow \R
\]
and the volume is attached to the lattice $A(K) \times \wh{A}(K) \hookrightarrow A(K)_{\R} \times \wh{A}(K)_{\R} $ with respect to this pairing.
\item $\Omega_A$ is a volume term involving the archimedean and bad places (encoding ``Tamagawa factors'').
\end{itemize}
Cassels discovered an interesting structure for elliptic curves, which was then generalized for abelian varieties by Tate, now known as the \emph{Cassels-Tate pairing}:
\[
\langle \cdot, \cdot \rangle_A: \Sha_A \times \Sha_{\wh{A}} \rightarrow \Q/\Z.
\]
(Definitions of this will be discussed later.) The construction shows that this is skew-symmetric with respect to double-duality. If $\phi \colon A \rightarrow \wh{A}$ is a polarization (which, roughly speaking, is a symmetric isogeny to the dual abelian variety satisfying a positivity property) then inserting it into
the second variable yields (by symmetry of $\phi$) a skew-symmetric form on $\Sha_A$.
A serious result concerning the Cassels--Tate pairing is that the kernel on each side of the pairing coincides
with the maximal divisible subgroup $(\Sha_A)_{\rm{div}}$. (Recall that an abelian group $M$ is {\em divisible}
if $n$-multiplication on $M$ is surjective for all $n > 0$; e.g., $\Q_p/\Z_p$ is divisible for any prime $p$, as is $\Q/\Z$.)
Thus, if $\phi$ is a \emph{principal} polarization (i.e. $\deg \phi =1$) then the resulting skew-symmetric form on $\Sha_A/(\Sha_A)_{\rm{div}}$ has trivial kernel
in each variable; hence, the pairing is perfect if $\Sha_A/(\Sha_A)_{\rm{div}}$ is finite.
It is well-known that if $X$ is a smooth projective geometrically connected curve over $K$
then $A = \Jac(X)$ then $A$ has a canonical principal polarization. A basic algebra fact is that given a finite abelian group with a perfect skew-symmetric
form valued in $\Q/\Z$, the size of the odd part has to be a square (this is the analogue of the fact that a symplectic space has to have even dimension). The upshot is that if $A$ admits a principal polarization, then the size of the odd part of $\Sha/\Sha_{\rm{div}}$ -- if this group is finite! -- is either a square or twice a square.
Given an ample line bundle $\Cal{L}$, one get a polarization $\phi_{\Cal{L}}$ in the usual manner (functorially defined by $x \mapsto t_x^{\ast}(\Cal{L})\otimes \Cal{L}^{-1}$). However, for abelian varieties over general fields there are examples (when the ground field is not separably closed)
of polarizations that do not come from this construction (which happens for $A = \Jac(X)$ and some curves $X$ over number fields $K$
with $X(K) = \emptyset$; recall that in such cases the principal polarization is built indirectly via Galois descent).
Tate showed that if $\phi = \phi_{\Cal{L}}$ for some $\Cal{L}$ (as happens for elliptic curves, but generally not for
the principal polarization of higher-genus curves without a rational point) then the associated perfect skew-symmetric form on $\Sha_A$ is
even {\em alternating}, so in such cases $\# \Sha_A $ is a perfect square (even at $2$).
\begin{remark} Possibly due to a general lack of appreciation for the fact that {\em not} every polarization arises from a line bundle on $A$ itself, there arose a
folklore belief (never stated by Tate!) that for a principally polarized $A$, $\#\Sha_A$ is a square. In 1999 Poonen and Stoll gave examples of Tate-Shafarevich groups
of principally polarized Jacobians of higher-genus curves over $\Q$ whose order is not a square.
In fact they went much further: they discovered a
2-torsion cohomological invariant whose vanishing corresponds to the alternating property for a principally polarized abelian variety. Using this, they found
another incredible example: a principally polarized Jacobian for which $\#\Sha$ is a square but the Cassels-Tate form is {\em not} alternating!
\end{remark}
\subsection{Isogeny invariance}
Since we can basically prove nothing, one might ask how to test it - e.g. if there are ways of probing whether or not the formulation is good with respect to
known properties of $L$-functions.
The $L$-function is built out of the $\ell$-adic representations, which are evidently invariant under isogeny. More precisely, if $f \colon A \rightarrow B$ is an isogeny then $V_{\ell}(A) \simeq V_{\ell}(B)$, so $L(A/K, s) = L(B/K, s)$. For the conjecture to be true, we must then have $C_A = C_B$. Recall that
\[
C_A = \frac{ \# \Sha_A \cdot R_A \cdot \Omega_A}{\#A(K)_{\tors} \cdot \# \wh{A}(K)_{\tors}}.
\]
In general torsion isn't isogeny-invariant, and $\# \Sha$ likewise is not (it is essentially an ``integral'' cohomology group). Overall,
none of the pieces going into the definition of $C_A$ are invariant under isogeny. However, it turns out that the combination $C_A$ \emph{is} isogeny invariant. This is a theorem of Tate, which will be discussed later in the seminar for number fields (see Theorem 7.3 in Chapter I of Milne's book {\em Arithmetic
Duality Theorems}, which uses the full force of Tate global duality to be discussed in a couple of weeks, along
with the Cassels--Tate pairing).
A serious issue with the BSD conjecture, noted above, is that it's not even known that $\# \Sha_A < \infty$. What is known is a much more elementary result that
$\Sha_A[m]$ is finite for any $m>0$ not divisible by $\ch(K)$
(as will follow immediately from general finiteness theorems in global Galois cohomology to be discussed in a couple of weeks;
see Remark 6.7 in Chapter I of Milne's book {\em Arithmetic Duality Theorems}); the same holds
if $\ch(K)|m$ but requires deeper methods with group schemes and flat cohomology.
Of course, the $m$-torsion finiteness for all $m>0$ isn't good enough to prove the finiteness of $\Sha_A$, since we haven't
ruled out possibilities such as that perhaps
$\Sha_A \supset \Q_7/\Z_7$: the group $\Q_7/\Z_7$ has finite $m$-torsion for any $m>0$, but is obviously infinite. (This is an instance of a ``divisible group'': an abelian group on which
$n$-multiplication is surjective for every integer $n > 0$.)
However, it is a formal consequence of finiteness of the $\ell$-torsion finiteness for
a prime $\ell$ that the $\ell$-primary part of the torsion abelian group $\Sha_A/(\Sha_A)_{\rm{div}}$ (the quotient by the maximal divisible subgroup) is finite:
\begin{lemma}\label{easyfinite}
Let $M$ be an abelian group that is $\ell$-power torsion for a prime $\ell$. If $M[\ell]$ is finite then $M/M_{\rm{div}}$ is finite.
\end{lemma}
\begin{proof}
By the snake lemma applied to the $\ell$-power endomorphism of the exact sequence
$$0 \rightarrow M_{\rm{div}} \rightarrow M \rightarrow M/M_{\rm{div}} \rightarrow 0$$
we see that $M[\ell] \rightarrow (M/M_{\rm{div}})[\ell]$ is surjective. Hence, we may replace $M$ with $M/M_{\rm{div}}$ to
reduce to the case that $M_{\rm{div}}=0$ (it is a simple exercise that $M/M_{\rm{div}}$ has vanishing
maximal divisible subgroup). In other words, the descending sequence of subgroups $\ell^n M$ has vanishing intersection.
Our aim now is to show that $M$ is finite. By Pontryagin duality, it is equivalent to prove finiteness
for the compact Hausdorff abelian dual $M'$ of $M$. This dual is a pro-$\ell$ group (dual to $M$ being $\ell$-power torsion),
and the maximal subgroup $M_{\rm{div}} \subset M$ on which $\ell$-multiplication is surjective
is dual to the ``smallest'' torsion-free quotient of $M'$; i.e., $M_{\rm{div}}$ is dual to $M'/M'_{\rm{tor}}$.
Hence, $M'$ is torsion. Likewise, $M[\ell]$ is dual to $M'/\ell M'$, so $M'/\ell M'$ is finite. By compactness of $M'$,
if we choose $m'_1, \dots, m'_d \in M'$ representing generators of $M'/\ell M'$ then the natural map
$$\Z_{\ell}^d \rightarrow M'$$
defined by $(a_j) \mapsto \sum a_j m'_j$ is surjective modulo $\ell$ and hence is surjective by successive approximation
with pro-$\ell$ groups. Thus, the commutative pro-$\ell$ group $M'$ is a finitely generated
as a $\Z_{\ell}$-module. But $M'$ is torsion, so it is clearly finite.
\end{proof}
In general we do not know for number fields $K$ how to rule out the possibility that $\Sha_A/(\Sha_A)_{\rm{div}}$ has
nontrivial $\ell$-primary part for infinitely many primes $\ell$.
By contrast, in the function field case the situation is much better, as we'll soon see.
\subsection{Artin-Tate conjecture}
Suppose $K$ is the function field of a smooth proper geometrically connected curve $C$ over a finite field $\kappa$.
Given a smooth proper and geometrically connected curve $X \rightarrow \Spec K$ of positive genus, there is a \emph{minimal regular proper model} $\Cal{X} \rightarrow C$. (Thus, $\Cal{X}$ is a smooth proper and geometrically connected surface over $\F_q$; it is projective
by construction.) There is a lot of interesting geometry associated to this surface (ignoring its fibration structure over $C$):
\begin{itemize}
\item the N\'{e}ron-Severi group $NS(\Cal{X})$, which has an intersection pairing; this is closely related to $J(K)$ equipped with its height pairing.
\item Artin realized that $\Sha_J$ is closely related to $\Br(\Cal{X}) := {\rm{H}}_{\et}^2(\Cal{X}, \G_m)$; e.g., the finiteness of each is equivaent to that of the form,
and the Brauer group admits a pairing analogous to the Cassels--Tate pairing.
\item The zeta function $\zeta_{\Cal{X}, \F_q}$ is closely related to $L(J/K, s)$.
\end{itemize}
\begin{center}
\begin{tabular}{|l|l|}
\hline
$\Cal{X}$ & $X$ \\
\hline
$NS(\Cal{X}) $ & $J(X)$ \\
$\Br(\Cal{X})$ & $\Sha_J$ \\
$\zeta_{\Cal{X}, \F_q}$ & $L(J/K, s)$\\
\hline
\end{tabular}
\end{center}
Tate realized that one could recast the entire BSD conjecture for $X$ in terms of the invariants on the $\Cal{X}$-side. The $L$-function of $J$
(in contrast with more general abelian varieties!) is known to have analytic continuation via the link to the
zeta function of the surface, and Tate proved the following compelling result.
\begin{thm}[Tate]
In the notation above, we have
\[
\rank_{s=1} L \geq \rank J(K).
\]
\end{thm}
Tate analyized a natural pairing built on $\Br(\Cal{X})$ via \'etale cohomology, including perfectness and skew-symmetry properties,
and deduced that if the order were finite then it must be a square or twice a square.
Manin found examples where the size is $\Z/2\Z$. But 30 years later, a mistake was found in Manin's example, and
it was proved that actually the order is \emph{always} a square (when finite), contrary to the Poonen--Stoll examples over number fields.
This is \emph{used} in the proof of the equivalence of the Artin--Tate for $\Cal{X}$ and the BSD Conjectures
for Jacobians $J$ over global function fields, as we will see later.
\newpage
\section{Introduction to the Bloch-Kato conjecture}
\subsection{Siegel's mass formula}
Let's begin with an active learning exercise. Consider the equation
\[
x^2+y^2+z^2 = N
\]
for a squarefree positive integer $N$. How many integral solutions are there when $N = 10001$?
There were several guesses from the audience. The correct answer is 1920 (the closest guess was $901$).
Here is one approach. The question is obviously related to the geometry of the sphere of radius $r=\sqrt{N = 10001} \approx 100$. The lattice points lying in a spherical shell within a distance $1$ of this sphere will necessarily be solutions to the equation. We can approximate the number of such lattice points by the volume of the spherical shell consisting of $(x,y,z)$ with $|x^2+y^2+z^2-10001|<1/2$, which is $4\pi r^2$ times the thickness of the shell. Since $(r+\delta)^2 - r^2 \approx 2 r \delta$, the thickness should be about $\frac{1}{2 \sqrt{N}}$, so the volume of the shell is about $ 2\pi \sqrt{N} \approx 600$.
However, there are some congruence conditions on the possible values of a sum of three squares. For instance:
\begin{example}\label{density2} Consider the prime $2$.
The squares in $\Z_2$ are characterized modulo 8, and $x^2 \bmod 8$ depends only on $x \bmod 4$.
If we assume that $x,y,z$ take values uniformly at random modulo $8$ (or modulo 4), then the distribution of $x^2+y^2+z^2$ is determined modulo $8$, but it is \emph{not} uniform: the values $1, 2, 5, 6$ modulo $8$ are taken $3/16$ of the time each, the value $3$ modulo $8$ is taken $1/8$ of the time, the values $0, 4$ modulo 8
are taken $1/16$ of the time each, and $7$ modulo 8 is never attained.
To account for this disparity, we should multiply the volume estimate by the ratio
$\frac{3/16}{1/8} = \frac{3}{2}$ for $N \equiv 1, 2, 5, 6 \bmod 8$ (such as for $N = 10001$) and $\frac{1/8}{1/8} = 1$ for $N \equiv 3 \bmod 8$
(and 0 for $N \equiv 7 \bmod 8$), leading to an initial correction of $900$ for $N=10001$. Yet this remains far from the correct count.
\end{example}
A systematic perspective on Example \ref{density2}, incorporating congruential information at all primes in a unified manner, goes back to Hardy--Littlewood.
The idea is that after approximating the number of integral solutions with a volume estimate, we should adjust by multiplying against $p$-adic densities
measuring $p$-adic non-uniformities for all primes $p$.
Miraculously, Siegel's ``mass formula'' proves that for positive-definite integral
quadratic forms {\em which are unique in their genus}, this always converges to exactly the correct count!
Fortunately, $x^2 + y^2 + z^2$ is unique in its genus (first proved by Gauss via his reduction theory for ternary quadratic forms in {\em Disquisitiones Arithmeticae}).
Siegel proved a more general result that computed a weighted sum of ``representation counts''
$\#\{\vec{x} \in \Z^m\,|\, q(\vec{x}) = N\}$ across all quadratic lattices $q$
in a fixed positive-definite genus, with the count equal to a product of local densities at all places of $\Q$ (the archimedean
density playing the role of the volume term above).
The $p$-adic density measures non-uniformity
in the distribution of values of the quadratic form $Q := x^2 + y^2 + z^2$ as a function
$\Z_p^3 \rightarrow \Z_p$ for primes $p$.
Consider the ratio of volumes ${\rm{vol}}(Q^{-1}(U))/{\rm{vol}}(U)$ relative to the standard Haar measure on $\Z_p$
(assigning volume 1 to both $\Z_p^3$ and $\Z_p$)
as $U$ varies through ever-smaller open balls centered at $N$. Such volume considerations will provide a precise
language for probabilistic reasoning.
If the values of $Q$ were ``uniformly distributed''
in the $p$-adic sense then such ratios would get close to 1 as $U$ gets small. So we want to study these ratios for
small $U$ centered at $N$. Taking $U = N + p^e \Z_p$ with $e \ge 1$, the volume ratio for $U$ is
\begin{equation}\label{genvol}
\frac{\#\{(x, y, z) \in (\Z/p^e\Z)^3\,|\, Q(x,y,z) \equiv N \bmod p^e\}p^{-3e}}{p^{-e}};
\end{equation}
we want to understand the behavior as $e$ grows.
If $p \nmid 2N$ then the affine quadric $\{Q = N\} \subset \mathbf{A}^3$ is $\Z_p$-smooth, so every solution to $Q(x,y,z) \equiv N \bmod p^e$ lifts in
$p^3/p = p^2$ ways to a solution mod $p^{e+1}$ for all $e \ge 1$. Hence, for such $p$ the ratio is always equal to the value for $e=1$:
\begin{equation}\label{goodp}
\frac{\#\{(x,y,z) \in \mathbf{F}_p^3\,|\,Q(x,y,z) = N \bmod p\}}{p^2}.
\end{equation}
We will show below that this ratio is $1 + \frac{\leg{-N}{p}}{p}$. These quadratic residue symbols for $N = 10001$ are positive for all the small primes $3,5,7,11, 13$,
causing an ``excess'' beyond 1 for such ``correction factors''.
By accounting for these and the appropriate $p$-adic density factors for $p|2N$ as discussed below,
computing with the first 20000 primes yields something like $1919.8$ for the heuristic prediction of the count of solutions; much better than 900, and very close
to the true count of 1920.
Now let's elaborate on the calculation of the $p$-adic density for {\em all} primes $p$. (The impatient reader may skip ahead to Theorem \ref{formula}.)
{\bf Case I.} First we complete the discussion
for $p \nmid 2N$ by showing that (\ref{goodp}) is equal to
$1 + \frac{\leg{-N}{p}}{p}$, or in other words that the number of solutions in $\F_p^3$ to $x^2 + y^2 + z^2 \equiv N \bmod p$ is
$p^2 + p\leg{-N}{p}$. We will establish this count for {\em any} integer $N$ (regardless of whether or not $p$ divides $2N$), assuming $p$ is odd.
Let $\chi = \leg{\cdot}{p}$ denote the Legendre symbol modulo $p$.
Since $a \in \Z/p\Z$ is a square in $1+\chi(a)$ ways, the count of interest is equal to
\begin{align*}
&\sum_{a, b \in \Z/p\Z} (1+\chi(a) )(1+\chi(b))(1+ \chi(N-a-b)) \\
= & \sum_{a, b \in \Z/p\Z} 1 + \sum_{a, b\in \Z/p\Z} \chi(a) \chi(b) \chi(N-a-b)
\end{align*}
because the other character sums cancel. Therefore, it suffices to show that
\begin{equation}\label{three_squares}
\sum_{a, b\in \Z/p\Z} \chi(a) \chi(b) \chi(N-a-b) = p \chi(-N).
\end{equation}
\begin{lemma}\label{two_squares}
We have
\[
S_N := \sum_{a \in \Z/p\Z} \chi(a) \chi(N-a) = \begin{cases} \chi(-1) (p-1) & N \equiv 0 \pmod{p} \\ -\chi(-1) & N \not\equiv 0 \pmod{p}. \end{cases}
\]
\end{lemma}
\begin{proof}
By the same reasoning as used in the preceding paragraph,
\[
\#\{(a,b) \mid a^2+b^2 \equiv N \pmod{p} \} = p + S_N.
\]
The case $N=0$ is now trivial, so we now assume $N \ne 0$. Hence,
the conic $u^2+v^2 = N$ in $\A^2_{\F_p}$ has smooth projective closure (since $p$ is odd). Furthermore, it has a rational point by the pigeonhole principle,
because the two expressions $a^2 $ and $N-b^2$ each take $\frac{p+1}{2}$ values modulo $p$ as $a,b$ range over $\F_p$, and hence must share at least one value. Therefore, the projectivization of the conic is isomorphic over $\F_p$ to $\PS^1_{\F_p}$, hence has exactly $p+1$ solutions over $\F_p$.
To complete the proof, it only remains to count the number of rational points of the conic on the line at infinity.
But this is evidently $0$ if $\chi(-1) = -1$ and $2$ otherwise, which can be written uniformly as $1+\chi(-1)$.
In conclusion, we have found that if $N \not\equiv 0 \pmod{p}$ then
\[
\#\{(a,b) \mid a^2+b^2 \equiv N \pmod{p} \} = p +1 - (1+ \chi(-1)) = p-\chi(-1)
\]
which shows that $S_N = - \chi(-1)$ in this case, as desired.
\end{proof}
Rewriting \eqref{three_squares} as
\[
\sum_{b \in \Z/p\Z} \chi(b) \sum_{a \in \Z/p\Z} \chi(a) \chi(N-a-b) = \sum_{b \in \Z/p\Z} \chi(b) S_{N-b}
\]
we can use Lemma \ref{two_squares} to simplify it to
\begin{align*}
\sum_{b \not\equiv N} - \chi(b) \chi(-1) + \chi(N) \chi(-1)(p-1) &= \sum_{b \in \Z/p\Z} -\chi(b) \chi(-1) + \chi(N) \chi(-1) p \\
&= \chi(-N)p
\end{align*}
as desired. This completes the determination of the $p$-adic density when $p \nmid 2N$.
\medskip
{\bf Case II.} What if $p=2$? In Example \ref{density2} we computed (\ref{genvol}) for $p=2$ and $e = 3$, getting the ratio $3/2$ for $N \equiv 1, 2 \bmod 4$
and $1$ for $N \equiv 3 \bmod 8$ (and the other cases either cannot occur for squarefree $N$ or, as for $7 \bmod 8$, admit no solutions
at all). We claim that (\ref{genvol}) for $p=2$ stabilizes at all $e \ge 3$. If
$x_0^2 + y_0^2 + z_0^2 \equiv N \bmod 2^e$ then for $e \ge 1$ we see that to solve
$$(x_0 + 2^e \epsilon_1)^2 + (y_0 + 2^e \epsilon_2)^2 + (z_0 + 2^e \epsilon_3)^2 \equiv N \bmod 2^{e+1}$$
the $\epsilon_j$'s drop out and so either the original triple is actually a solution modulo $2^{e+1}$ (in which case
all 8 triples mod $2^{e+1}$ with the same reduction as $(x_0, y_0, z_0)$ modulo $2^e$
are solutions modulo $2^{e+1}$) or it does not lift to a solution modulo $2^{e+1}$.
In cases when $(x_0, y_0, z_0)$ is a solution mod $2^{e+1}$ then
exactly half of the 8 classes mod $2^{e+1}$ sharing its reduction modulo $2^e$ will lift to solutions mod $2^{e+2}$. Indeed, the congruence
$$(x_0 + 2^e \epsilon_1)^2 + (y_0 + 2^e \epsilon_2)^2 + (z_0 + 2^e \epsilon_3)^2 \equiv N \bmod 2^{e+2}$$
says exactly
$$x_0 \epsilon_1 + y_0 \epsilon_2 + z_0 \epsilon_3 \equiv (N - x_0^2 - y_0^2 - z_0^2)/2^{e+1} \bmod 2.$$
The left side is a {\em nontrivial} linear form in the $\epsilon_j$'s because if $x_0, y_0, z_0$ all vanish mod 2
then $N$ would be divisible by 4 (as we are assuming the triple is a solution modulo $2^{e+1} \in 4\Z$)
yet $N$ is assumed to be squarefree. Hence, exactly {\em half} of the 8 possibilities for $(\epsilon_1, \epsilon_2, \epsilon_3) \bmod 2$ will work as claimed.
The upshot is that if we consider congruence classes of triples modulo $2^{e'}$ with $e' \ge 2$ (i.e., $e' = e+1$ with $e \ge 1$)
and consider them in 8-fold clumps based on reduction modulo $2^{e'-1}$ then half of each clump (i.e., 4 triples per clump) lifts to a solution modulo $2^{e'+1}$,
with all 8 lifts of a triple providing solutions when any single one does. Hence, among all 64 lifts mod $2^{e'+1}$ of the 8 triples in such a clump of solutions mod $2^{e'}$,
exactly half are solutions mod $2^{e'+1}$.
We have shown that if there are $\nu_{e'}$ solutions mod $2^{e'}$ then among the $8\nu_{e'}$ lifts
of these to triples modulo $2^{e'+1}$ exactly $4\nu_{e'}$ of those lifts are solutions. In other words, $\nu_{e'+1} = 4\nu_{e'}$ for all $e' \ge 2$.
This establishes the asserted stabilization of (\ref{genvol}) for $p=2$ and $e \ge 3$ (and actually for $e \ge 2$ if one does the computation for $e=2$).
\medskip
{\bf Case III.} Finally, what about the $p$-adic density for an odd prime factor $p$ of $N$? We claim that stabilization happens for $e \ge 2$.
The key point is that any triple $(x_0, y_0, z_0)$ that is a solution to the congruence modulo $p^e$
with $e \ge 2$ cannot reduce to $(0,0,0)$ modulo $p$, as otherwise we would
get $p^2|N$ since $e \ge 2$, contradicting that $N$ is squarefree. Hence,
lifting a solution moduloe $p^e$ to a solution modulo $p^{e+1}$ with $e \ge 2$ amounts to the congruence
$$(x_0 + p^e \epsilon_1)^2 + (y_0 + p^e \epsilon_2)^2 + (z_0 + p^e \epsilon_3)^2 \equiv N \bmod p^{e+1}$$
with unknown $\epsilon_j$'s that only matter modulo $p$ and $(x_0, y_0, z_0)$ that does not vanish modulo $p$. Since $p$ is odd, this is exactly
$$x_0 \epsilon_1 + y_0 \epsilon_2 + z_0 \epsilon_3 \equiv (1/2)(N - x_0^2 - y_0^2 - z_0^2) \bmod p$$
which is a {\em nontrivial} linear condition on the $\epsilon_j$'s. Thus, each solution modulo $p^e$ lifts to $p^2$ solutions modulo $p^{e+1}$
when $e \ge 2$, proving the asserted stabilization for odd $p|N$.
It remains to determine (\ref{genvol}) for odd $p|N$ with $e=2$. The preceding calculation for such $p$ works equally well when $e=1$
because we have already shown that any solution modulo $p^2$
must have {\em nonzero} reduction modulo $p$. Hence, if $\nu_e$ denotes the number of solutions modulo $p^e$ then
the absence of a mod-$p^2$ congruential solution lifting the solution $(0,0,0)$ to $x^2 + y^2 + z^2 \equiv N \bmod p$ implies
$\nu_2 = p^2(\nu_1 - 1) = p^2(p^2 - 1)$, so $$\frac{\nu_2}{p^4} = \frac{p^2 - 1}{p^2} = 1 - \frac{1}{p^2}.$$
Putting it all together, Siegel's theorem gives:
\begin{thm}\label{formula} For a squarefree positive integer $N$,
$$\#\{(x,y,z) \in \Z^3\,|\, x^2+y^2+z^2=N\} =
2\pi \sqrt{N} \rho_2 \cdot \prod_{p\nmid 2N} \left( 1 + \frac{\psi(p)}{p} \right) \cdot \prod_{p|N_{\rm{odd}}} (1 - 1/p^2),
$$
where $\psi = \leg{-N}{\bullet}$ is the quadratic character associated to $\Q(\sqrt{-N})$ and the $2$-adic density $\rho_2$ is equal to $3/2$
when $N \equiv 1, 2 \bmod 4$ and is equal to $1$ when $N \equiv 3 \bmod 8$.
\end{thm}
\begin{remark}
If $N$ were permitted to have nontrivial square factors then the computations of the local densities at prime factors of $N$ would change!
\end{remark}
We want to relate this final expression to a Dirichlet $L$-function.
For $p \nmid 2N$, we have
\begin{equation}\label{trick}
\left( 1 + \frac{\psi(p)}{p} \right) = \left( 1- \frac{\psi(p)}{p} \right)^{-1} \left(1 - \frac{1}{p^2} \right)
\end{equation}
as $\psi(p)^2 = 1$. For \emph{odd} $p|N$, we have $\psi(p) = 0$ because $p$ is ramified in $\Q(\sqrt{-N})$, so
the factor $1 - 1/p^2$ at such $p$ on the right side of Theorem \ref{formula} coincides with the right side of (\ref{trick}).
Therefore, the count is equal to
$$2\pi \sqrt{N} \rho_2 \cdot \prod_{p>2} \left( 1- \frac{\psi(p)}{p} \right)^{-1} \left(1 - \frac{1}{p^2} \right)$$
Writing $L^{(\ell)}$ for the $L$-function with the Euler factor at a prime $\ell$ removed, this can be rewritten as
\[
2\pi \sqrt{N} \rho_2 \cdot L^{(2)}(1, \psi)/\zeta^{(2)}(2).
\]
By the analytic class number formula (recalled in Example \ref{classex} below) and Euler's calculation of $\zeta(2)$, this is
\[
2\pi \sqrt{N} \rho_2 \left( 1-\frac{\psi(2)}{2} \right) \frac{2\pi h}{w \sqrt{d_N}} \cdot \frac{6}{\pi^2} \left( 1- \frac{1}{4} \right)^{-1}
\]
where $w$ is the number of integral units in $\Q(\sqrt{-N})$, $h$ is its class number,
and $d_N$ is the absolute discriminant $\Q(\sqrt{-N})$ (so $d_N = N$ if $N \equiv 3 \pmod{4}$ and $d_N = 4N$ otherwise;
likewise, $w = 2$ except when $N = 1, 3$).
Cancelling factors of $\pi^2$, etc., the count is equal to
$$\frac{4}{3} \cdot \rho_2 \left(1- \frac{\psi(2)}{2}\right)\frac{12h}{w/2}\sqrt{\frac{N}{d_N}}.$$
This is still ugly, but we shall now transform it into something very clean, by separately
considering different cases for $N \bmod 8$.
If $N \equiv 1,2 \pmod{4}$ then $\psi(2) = 0$, $\rho_2 = 3/2$, and $\sqrt{N/d_N} = 1/2$, so $\rho_2$ cancels out and we get
the formula $$\frac{12h}{w/2}$$
for the original count (so $w/2 = 1$ when $N > 1$); this applies to $N = 10001$.
As a reality check, this gives the correct count for $N=1$ due to the presence of $w/2$ (which is equal to 2 when $N=1$), as well as for $N=5$ (with $h=2$).
If $N \equiv 3 \pmod{8}$ then $\psi(2) = -1$, $\rho_2 = 1$, and $\sqrt{N/d_N} = 1$, so the count is
$$\frac{24h}{w/2}$$ (with $w/2=1$ when $N > 3$). As a reality check, this gives the correct count for $N=3$ (with $w/2=3$) and $N=11$ (with $h=1$).
\subsection{Hints of a general conjecture}
Birch and Swinnerton-Dyer were inspired by Siegel's mass formula. Their idea was to estimate the size of $E(\Q)$ using
the product of ``local densities'' $\prod_{p \le x} \frac{\#E(\F_p)}{p}$.
\begin{remark}There is something subtle going on here. The Riemann Hypothesis for elliptic curves
over finite fields suggests that it is more natural to compare $\#E(\F_p)$ to $p+1$ instead of $p$, yet such a change has a huge effect
since $\prod_{p\le x} (p+1)/p$ diverges like $\log x$.
\end{remark}
For an elliptic curve $E/\Q$, Birch and Swinnerton-Dyer predict that
\[
\frac{L^{(r)}(E, 1)}{r!} = \frac{\Sha_E \cdot \Omega_E \cdot R_E}{E(\Q)_{\tors}^2}.
\]
The goal of this year's learning seminar is to get to a generalization of this formula. For a variety $X$, you have cohomology groups ${\rm{H}}^i(X)$ and an $L$-function $L^i(X,s)$. We want to give a similar prediction for $L^i(X, q)$ at $q \in \Z$.
It is conjectured that this $L$-function has a functional equation relating the values at $s$ and $i+1-s$. More precisely, a product of the form $L_{\infty}(s) L^i(X, s)$ is symmetric under $s \leftrightarrow i+1-s$. (The $L_{\infty}(s)$ should essentially be comprised of Gamma functions).
Let $\boxed{p=i+1}$ and $\boxed{q^* = p-q}$. Without loss of generality, we may assume that $q \geq p/2$ and $q^* \leq p/2$. We briefly survey some of the evidence for such a conjecture.
\begin{example} (\emph{BSD conjecture.}) If $X = E$, then $i=1$ and $p=2, q=1$. The Tate-Shafarevich group should be interpreted as a ``generalized class number''. The $R_E$ is the size of a ``generalized unit group'' (analogous to the classical regulator). The $\Omega_E$ is, up to Tamagawa factors, $\int_{E(\R)} \omega$ where $\omega$ is a rational form. We'll call it a ``period'' since it's the integral of an algebraic differential form over a cycle.
\end{example}
\begin{example}\label{classex} (\emph{The class number formula.}) Consider $\Q(\sqrt{d})$. There is an associated Dirichlet character $\chi_d$, and
\[
\zeta_{\Q(\sqrt{d})} = \zeta(s) L(s, \chi_d).
\]
Then the class number formula says that
\[
L(1, \chi_d) = \begin{cases} \frac{2\pi h}{w \sqrt{|d|}} & d<0, \\ \frac{2h R}{w\sqrt{d}} & d>0. \end{cases}
\]
Here $h$ is the class number and $2\pi/\sqrt{|d|}$ is like a period (coming from integrating over a circle). For $d<0$ the formula involves a class number and a period, but no units. In the $d>0$ case, there is a class number and units, but no period.
Writing in terms of the value at $s=0$ instead, we get
\[
L(0, \chi_d) = \zeta_{\Q(\sqrt{d})}(0)/\zeta_{\Q}(0) = \frac{2hR}{w}
\]
(where $h$, $R$, and $w$ are the usual invariants for $\Q(\sqrt{d})$).
\end{example}
\begin{example}\label{mod_form}
Another piece of evidence comes from the $L$-functions of modular forms. (Zagier attributes this to Eichler-Shimura-Manin.) The following type of statement was known by the early 70s. Let $\Delta = q \prod (1-q^n)^{24} \in \Cal{S}_{12}(1)$, so
\[
\Delta = \sum \tau(n) q^n
\]
and
\[
L(\Delta, s) = \sum \frac{\tau(n)}{n^s}.
\]
A fact about this is that there exist $\Omega_{even}$ and $\Omega_{odd} \in R$ such that
\[
\frac{L(\Delta, q)}{(2 \pi i)^q \Omega_q} \in \Q^*
\]
for $1 \leq q \leq 11$. (Here $\Omega_q = \Omega_{even}$ if $q$ is even and similarly if $q$ is odd.) Two striking features of this are that there is a restricted range, and that there is a parity consideration.
\end{example}
\begin{example}
There are examples coming from other $L$-functions.
\begin{itemize}
\item Euler knew that $\zeta(2n) \in \Q \pi^{2n}$ and $\zeta(1-2n) \in \Q$.
\item Hurwitz showed that
\[
\sum_{(a,b)} \frac{1}{(a+bi)^{4n}} \in \Q \omega^{4n}, \quad \omega = \int_0^1 \frac{dx}{\sqrt{1-x^4}}.
\]
\end{itemize}
\end{example}
\subsection{Deligne's conjecture}
In the end we'll have a precise conjecture, but for now we work up to $\Q^*$, which means that we can ignore class numbers. Deligne made a conjecture that describes examples with no regulator, i.e. no ``unit group''.
First, what does it mean to have ``no unit group''? Deligne calls $L^i(X, q)$ is a \emph{critical value} if $L_{\infty}(q) \neq \infty$ and $L_{\infty}(q^*) \neq \infty$. Deligne's conjecture is that
\[
\boxed{L^i(X, q^*) \in \Q (2\pi i )^{1-q} \det \langle \omega_i, \gamma_j \rangle}
\]
where $\omega_i$ is a $\Q$-basis for $F^q {\rm{H}}^i_{dR}(X)$ and $\gamma_j$ is a $\Q$-basis for $H_{sing}(X(\CC), \Q)^{\pm}$. What does the $\pm$ mean? In BSD, you integrate $\omega$ against the real points of $E(\R)$, so the complex conjugation comes into play. Here, the space $\pm$ is the $(-1)^{q-1}$ eigenspace for complex conjugation.
The fact that these two things have the same dimension is a consequence of the ``criticality.'' In the case of an elliptic curve, this picks out the \emph{holomorphic} differential and the \emph{real} points of the elliptic curve.
What's going on in Example \ref{mod_form}? In these terms, the point is that the Hodge structure has huge gaps. The $1$ and $11$ correspond to when the Hodge filtration changes. The reason why a parity condition enters is obvious.
\end{document}