%\iffalse 
% file:    linearregression.dtx
% author:  Battista Benciolini
% contact: benciolinibattista at gmail dot com
% 
% process this file with pdflatex to obtain:
%
%  - linearregression.pfd     (full documentation, three pass needed)
%  - mainlinearregression.tex (interactive main-program document)
%  - linearregression.sty     (package)
%  - sampledata.txt           (as the name says)
% 
% The author would strongly appreciate to receive  
% any comment, criticism  and just usage report
%
%\fi
%\iffalse 
%<*ins>
\begingroup
\input docstrip.tex
\keepsilent
\preamble
------------------------------------------------------------------------
[2024-12-14]
This file is part of the (expanded) distribution of linearregression
The author of  linearregression  is  Battista Benciolini 
<benciolinibattista at gmail dot com >
------------------------------------------------------------------------
The author would strongly appreciate to receive  
any comment, criticism  and just usage report   
------------------------------------------------------------------------
This program may be used, distributed and modified under 
the conditions of the LaTeX Project Public License.
(see: http://www.latex-project.org/lppl.txt)
------------------------------------------------------------------------
\endpreamble
\askforoverwritefalse
\generate{\file{linearregression.sty}{\from{linearregression.dtx}{package}}}
\generate{\file{mainlinearregression.tex}{\from{linearregression.dtx}{main}}}
\nopreamble\nopostamble
\generate{\file{sampledata.txt}{\from{linearregression.dtx}{data}}}
\endgroup
%</ins>
%\fi
%\iffalse 
%<*driver>
\documentclass[a4paper,10pt]{ltxdoc}
\usepackage[T1]{fontenc}
\usepackage{lmodern}
\usepackage[lite,nobysame,non-compressed-cites]{amsrefs}
\usepackage{amsmath,amssymb,amsfonts}
\usepackage{multicol}  
\usepackage{linearregression} 
\usepackage{graphics} 
\DeclareRobustCommand*{\Ars}{\textsf{%
\lower -.48ex\hbox{\rotatebox{-20}{A}}\kern -.3em{rs}}%
\discretionary{-}{}{\kern -.05em}\TeX\discretionary{-}{}{%
\kern -.17em}\lower -.357ex\hbox{nica}}% excerpt from some GUIT sty file 
\NewDocumentCommand\vect{m}{\underline{#1}}           % vector
\NewDocumentCommand\barycenter{m}{\overline{#1}}      % barycenter
\NewDocumentCommand\point{}{\vect{y}}                 % point
\NewDocumentCommand\coeff{}{\vect{x}}                 % direction
\NewDocumentCommand\dx{}{\vect\delta}                 % direction variation
\NewDocumentCommand\vv{}{\vect{v}}                    % barycentric coordinates
\NewDocumentCommand\trasp{}{^{\mathsf{T}}}            % traspose
\NewDocumentCommand\Renne{}{\mathbb{R}^n}             % vector space 
\NewDocumentCommand\dor{}{f}                          % distance from origin 
\NewDocumentCommand\ipoint{}{i}                       % index for points
\NewDocumentCommand\pointsum{}{\sum_{\ipoint=1}^m}    % sum over points
\NewDocumentCommand\reff{m}{(\ref{#1})}               % ref  in ( )
\NewDocumentCommand\matr{m}{{#1}}                     % matrix
\NewDocumentCommand\mC{}{\matr{C}}                    % matrix C 
\NewDocumentCommand\mc{}{k}                           % elements of matrix C 
\NewDocumentCommand\mL{}{\matr{\Lambda}}              % matrix lambda
\NewDocumentCommand\mX{}{\matr{X}}                    % matrix X 
\NewDocumentCommand\spm{}{\phantom{-}}                % space for the sign
\NewDocumentCommand\ctext{}{caption}                  % caption (a variable !)
\NewDocumentCommand\matrixtwotwo{mmmm}{               % | 2 x 2 
\begin{pmatrix} #1 & #2 \\ #3 & #4 \end{pmatrix}}     % | matrix
\DeclareMathOperator\tr{tr}                           % trace
\DeclareMathOperator\sgn{sgn}                         % signum 
\title{Linear regression with \LaTeX}
\author{Battista Benciolini}
\NewDocumentCommand\titleauthorfootnote{}{\begingroup% Not an elegant solution
\let\thefootnote\relax                             % but it is ok at the moment
\footnote{Linear regression with LaTeX - available in CTAN}% 
\footnote{Battista Benciolini - contact: benciolinibattista at gmail dot com}%
\endgroup\setcounter{footnote}{0}}% 
\parindent=0pt
\begin{document}
\hypersetup{hidelinks}
\maketitle 
\titleauthorfootnote
\tableofcontents
\vfill
\DocInput{linearregression.dtx}
\end{document}
%</driver>
%\fi
%
% \section{Introduction: first description of the problem\label{intro}}
% I start with a quote from \Ars\ (April 2021, number 31, page 73):
% \begin{quotation}
% The physicist Mario Rossi is investigating a phenomenon, 
% presumably linear, and he performs measurements in his laboratory 
% to verify his hypothesis; he measures the quantity $x$ which generates 
% the phenomenon and he measures also one of the characteristics 
% $y$ showed by the phenomenon under the effect of the stimulation $x$. 
% \\ ... \par
% Subsequently Mario graphs the data of the table to judge if the points 
% reasonably follow a linear trend or not; in this regard he computes the 
% parameters of the regression line and he draws this line on the graph 
% in order to judge the quality of the obtained results.
% \\ ... \par
% Being a \LaTeX\ user, he thinks to kill two birds with one stone: 
% using \LaTeX\ to draw the graph with the experimental data consisting 
% in the $x$, $y$ points and, at the same time, to compute the
% parameter $a$ e $b$ of the regression line $y = ax+b$, 
% and finally to draw also this line on the same graph.
% \end{quotation}
% A summary description of the problem is therefore the following.
% A set of data pairs  is available and each pair is represented as a point 
% in the plane. A straight line is searched that optimally approximates 
% the points. The first step is therefore the choice of an optimality criterion. 
% This choice is the topic of the next section. \par
% From the text we also know that the possible deviation of $y$ 
% with respect to the model is quite larger than the uncertainty of $x$.
% \par
% After reading the description of the problem 
% of Mario Rossi I tried to produce a solution. 
% In this work I will use $y_1$ and $y_2$ 
% instead of $x$ and $y$ for the two measured quantities that 
% will become the first and second coordinate, or abscissa and ordinate, 
% in the Cartesian plane.
% \par
% The problem can be treated as a mere problem of approximation or 
% alternatively as an estimation problem in the frame of a 
% probabilistic description of the uncertainty. The two treatments are 
% conceptually different. The probabilistic treatment produces some more 
% results, but the estimation of the parameters is the same. 
% On the other hand the treatment as an approximation problem is in some sense   
% more immediate and requires a less extended theoretical background.
% For this reason it will be preferred here.  
% I consider the original problem and also a variation 
% of it based on the assumption that the two variables are known with  
% the same uncertainty. The two considered situations will prove 
% to be quite different.
%
% \section{Geometric definition of three optimality criteria}
% \begin{figure}
% \setlength\unitlength{4cm}
% \begin{picture}(1, 0.7)(0.,0.)
% \multiput(0.,0.)(1.1,0){3}{\line(1,0){1}}
% \multiput(0.,0.)(1.1,0){3}{\line(0,1){1}}
% \multiput(1,1)(1.1,0){3}{\line(-1,0){1}}
% \multiput(1,1)(1.1,0){3}{\line(0,-1){1}}
% \thicklines
% \multiput(0.08,0.06)(1.1,0){3}{\line(4,3){0.88}}
% \multiput(0.26,0.09)(1.1,0){3}{\circle{0.03}}
% \multiput(0.45,0.65)(1.1,0){3}{\circle{0.03}}
% \multiput(0.92,0.44)(1.1,0){3}{\circle{0.03}}
% \put(0.26,0.09){\line(0,1){0.1050}}
% \put(0.45,0.65){\line(0,-1){0.3125}}
% \put(0.92,0.44){\line(0,1){0.2500}}
% \put(1.36,0.09){\line(-1,0){0.14}}
% \put(1.55,0.65){\line( 1,0){0.4167}}
% \put(2.02,0.44){\line(-1,0){0.3333}}
% \put(2.46,0.09){\line(-3,4){0.05}}
% \put(2.65,0.65){\line(3,-4){0.15}}
% \put(3.12,0.44){\line(-3,4){0.12}}
% \end{picture}
% \caption{The three kinds of segments 
% used in the definition of the objective function}
% \label{fig:criteria}
% \end{figure}
% For each point given in the plane  we can consider the corresponding point 
% with the same abscissa and belonging to the line. 
% Remember that the line is exactly what has to be determined.
% The distance between the given point and the just defined point on the line
% is a reasonable measure of the discrepancy between the empirical data and the
% corresponding theoretical model. 
% The distances we are speaking about are the lengths of the segments shown 
% in the leftmost scheme of figure (\ref{fig:criteria}).
% To obtain a global discrepancy measure that considers all the points 
% at once we perform the sum of the squares of the lengths 
% of the mentioned segments. It is now clear that the two coordinates 
% of the points are treated quite differently and play a different role in 
% the definition of the optimality criterion. This choice is reasonable when 
% the measuring errors only (or mainly) affect the second coordinate. 
% The optimal line is the line that minimize the just defined 
% global discrepancy. The procedure for the determination of the optimal line 
% is named linear regression. 
% In this work it is named \textit{classical linear regression}.
% We can easily exchange the role of the two quantities, i.e.\ we can   
% imagine that the first quantity is affected by errors. 
% The problem is not conceptually different. The segments plotted in the
% central picture 
% of figure (\ref{fig:criteria}) represent the discrepancy between 
% the empirical data and the model.
% This other procedure  is named \textit{classical linear regression  
% with inverted role of the coordinates}.\par
% The situation is really different if the two coordinates have to be treated 
% symmetrically. 
% In this case the discrepancy between 
% the empirical data and the model must be defined in a purely geometrical way.
% Just the line and the points enter in the definition without any special role 
% for any predefined direction. With these requirements it is quite natural 
% to use the distance of each point from the line. 
% Remember that the distance of a point 
% from a line is intended along the shortest path, i.e.\ measured in the 
% direction orthogonal to the line itself. The rightmost scheme of 
% figure (\ref{fig:criteria}) shows the segments that are considered.
% The global measure of discrepancy is again obtained as the sum 
% of the squares of the lengts of the mentioned orthogonal segments. 
% The procedure that obtain the optimal line that 
% minimize the just defined global discrepancy is named 
% \textit{symmetrical linear regression}.\par
% Some arguments of the present section will be repeated in section
% \ref{package} from the algebraic and computational point of view.
% 
% \section{Information on the realised solution, including limitations}
% The code that implements the solution is recorded in two files, that are 
% a package (sty) file and a main interactive document.
% The file |linearregression.sty| provides several commands 
% that can be used in any document. The file |mainlinearregression.tex|
% provides a simple interactive user interface.
% The package described in the sections \ref{manual} and 
% \ref{package} (user manual and implementation) provides the 
% functions that execute the various needed operations, i.e.\
% data input, computations, printing the numerical results and 
% generating a graphic representation of data and results.
% Some auxiliary functions complete the package.
% The design of the output (tables and plots) includes some arbitrary choices.
% The style of the graphic output is quite minimalist 
% (e.g.:\ no colors, no variations of line styles).\par
%
% \section{Some comments about the programming of the package}
% Large part of the code is written using the |expl3| language. 
% I have tried to be compliant with the various recommendations and 
% prescriptions for a correct use of the language, 
% but I probably only partly succeeded.\par
% Different more elegant and more coherent solutions probably exist 
% both for the general structure of the package and for some specific part 
% of the code, but this is what I have been able to do.
% Some perhaps problematic aspects are mentioned here after.\par
% Several used variables are global and they are accessed by various functions.
% This makes the various parts of the package 
% quite connected to each other and creates strong dependencies. \par
% The layered programming style is only partially applied. 
% The partition between document command and lower level functions is present, 
% but part of the low level code is directly in the document commands. 
% Variants are not used.   
%
% \section{A ready to use simple user interface\label{main}}
% The main file asks the user for the name of a
% file containing the data and generates a one page output.
%\iffalse
%<*main>
%\fi
%    \begin{macrocode}
\documentclass[a4paper]{article}
\usepackage{lmodern}
\usepackage{linearregression}
\begin{document}
\pagestyle{empty}  
\lraskfilename 
\lrcomputation
\lrplotparameters{0.16}{11.0}
\lrplot{11.0}{+}{+}{-}{-}
\lrprint
\end{document}
%    \end{macrocode}
%\iffalse
%</main>
%\fi
% The plot just includes the points and the line obtained with the classical 
% linear regression. Lines 8 and 9  of the code must be modified if a different 
% result is desired. See the next section for details.
%
% \section{A user manual for the package\label{manual}}
% The various analysis of a data set and the representation of the data 
% and of the results is obtained with a sequence of several commands. 
% The main operations are:
% (i) selection of the data file, (ii) data imput and computation, 
% (iii) printing of a table,
% (iv) printing of a picture (that can be repeated with different parameters).
% It is generally convenient to put the table and the picture(s) 
% in a proper floating environment. 
% The commands for the four mentioned operations are described here after.
% The first needed operation is to set the name of the data file.
% This is done with the command \DescribeMacro{\lrfilename} 
% \cs{lrfilename}\marg{file} that has a mandatory argument. 
% The argument is the name of the data file. As an alternative the 
% command \DescribeMacro{\lraskfilename} \cs{lraskfilename} can be used. 
% It asks the user to type the  name of the data file in the terminal.
% \par
% The macro \DescribeMacro{\lrcomputation} 
% \cs{lrcomputation} reads the data 
% and performs all the computations. 
% The results of the computations remain available in internal 
% variables and are then used by the macro that print them 
% or generates a plot.
%\par
% The macro \DescribeMacro{\lrprint} 
% \cs{lrprint} generates a table with all the estimated 
% parameters and some information about the data. 
% The computed numbers are printed with four decimal digit maximum by default, 
% but this number can be set to any desired value with the command 
% \DescribeMacro{\lrnumdigit} \cs{lrnumdigit}\marg{number}. 
% The mandatory argument is the desired maximum number of digits.
% \par 
% The macro \DescribeMacro{\lrplot} 
% \cs{lrplot}\marg{imagewidth}\marg{key1}\marg{key2}\marg{key3}\marg{key4} 
% really generates the plot. The first argument is the
% width of the plot in centimetrs, while the height is computed according 
% to the distribution of the points or it is recorded with the command 
% \cs{lrplotparameters} (see below). 
% The other four arguments are referred 
% to the data points, to the lines determined with classical regression, 
% with classical regression with inverted role of the coordinates and 
% with symmetric regression. 
% The four items, i.e.\ the set of points and the three lines, are drawn 
% or not according to the corresponding string found in |key|$i$. 
% Each item is not plotted if the string is the character |-|, it is plotted 
% in any other case. Furthermore the lines are accompanied by a label made 
% by the corresponding |key|, unless it is just a |+| or a |-|.  
% \par
% The command \DescribeMacro{\lrplotparameters} 
% \cs{lrplotparameters}\marg{diameter}\marg{imageheight} 
% is used to record some more parameters that are related to the generation 
% of the plot.  
% The first argument \marg{diameter} is the diameter in centimeters of the discs
% that represent the points. It must be a real positive number.
% The default value is 0.2.
% The second argument \marg{imageheight} is the height of the plot 
% in centimeters. 
% It must be a real number with a special meaning of any non positive value.
% If the number is positive it is really used as the height and the variables are 
% independently scaled to fill the width and the height of the plot. 
% If the value is not positive or if the command is not used the 
% two variables are scaled with the same factor and the height is computed 
% accordingly. This can generate reasonable results only if the ranges 
% of the two variables are not too much different.
% All the arguments are mandatory. 
% \par
% The use of \cs{lrnumdigit} and \cs{lrplotparameters} is a convenient solution 
% to introduce some flexibility in the output and to completely preserve 
% compatibility with the previous version of the package. 
% Some more elegant solution could be realized with the use of optional 
% arguments and perhaps of key-value pairs, but the implemented solution 
% is preferred because it is extremely simple.
% \par
% Few words are necessary about the format of the data file.
% Each record of the file hold the two values related to a point.
% The two values must be separated by any number (one is needed as a minimum) of 
% space and comma characters. No character different from space 
% can be accepted before the first value and after the  second value. 
%
% \section{An example\label{example}}
% The data  reported here after will be available in |sampledata.txt| 
% and will be used in the examples presented in this section .
%\iffalse
%<*data>
%\fi
% \begin{multicols}{4}
%    \begin{macrocode}
-0.546  0.107
 1.093 -0.510
 1.440  1.995
 1.414  0.991
 0.735  1.585
-1.848 -0.235
-0.203 -0.292
 1.517  0.779
 0.559 -1.341
-0.462 -0.437
-0.785 -0.661
-0.558  0.397
 0.181 -2.616
 0.619  1.859
-0.223 -1.915
 0.629 -0.534
-1.989 -2.300
-0.241  1.098
-0.931 -1.613
-1.070  0.592
 2.341  0.413
 1.993 -0.111
-2.357 -0.312
-1.975  0.140
%    \end{macrocode}
% \end{multicols}
%\iffalse
%</data>
%\fi
%
% The analysis of the sample data and the generation of a numeric table 
% is  operated by a code similar to the following 
% (see table \ref{tab:sampledata}). \\
% |\lrfilename{sampledata.txt}| \\ |\lrcomputation| \\ 
% |\lrnumdigit{5}| \\ |\begin{table}| \\  
% |  \lrprint| \\
% |  \caption{Analysis of ... }| \\  |\label{tab:sampledata}\end{table}| 
% \par
% The generation of some different graphical representation of the data and of 
% the results  is  operated by a code similar to the following 
% (see figures \ref{fig:sampledataB} ).\\
% \RenewDocumentCommand\ctext{}{LEFT The three lines are obtained with the three 
% optimality criteria. (AA) classical linear regression; (BB) classical linear 
% regression with inverted role of the coordinates; (S) symmetric linear 
% regression. RIGHT Data points and line estimated with 
% symmetric linear regression.}
% |\begin{figure}|\\|\lrplot{10.}{-}{AA}{BB}{S}| \\
% |\lrplot{10.}{+}{-}{-}{+}|
% \\ |\caption{|\ctext|}|\\ | \label{fig:sampledataB}  \end{figure}|
%
% \lrfilename{sampledata.txt} \lrcomputation \lrnumdigit{5}
% \begin{table}  \lrprint     \caption{Analysis of the sample data} 
%  \label{tab:sampledata} \end{table}
%  \begin{figure}    \lrplot{6.}{-}{AA}{BB}{S} \hfill \lrplot{6.}{+}{-}{-}{+} 
%    \caption{\ctext} \label{fig:sampledataB} \end{figure}
% The same data are used to show the effect of the command 
% \cs{lrplotparameters}.
%  \begin{figure}    
% \lrplotparameters{0.12}{5.}  \lrplot{4.}{+}{-}{-}{+} \hfill 
% \lrplotparameters{0.10}{3.}  \lrplot{4.}{+}{-}{-}{+} \hfill
% \lrplotparameters{0.11}{-1.} \lrplot{4.}{+}{-}{-}{+} 
%  \caption{Three different version of a plot to demonstrate the use of 
%  the command \cs{lrplotparameters}} \label{fig:sampledataC} \end{figure}
% The commands:\\ |...| \\
% |\lrplotparameters{0.12}{5.}  \lrplot{4.}{+}{-}{-}{+} \hfill | \\
% |\lrplotparameters{0.10}{3.}  \lrplot{4.}{+}{-}{-}{+} \hfill | \\
% |\lrplotparameters{0.11}{-1.} \lrplot{4.}{+}{-}{-}{+} | \\ |...| \\
% generate the figure (\ref{fig:sampledataC}).
%
% \section{A package for linear regression 
% and the theory behind it\label{package}}
%\iffalse
%<*package>
%\fi
%
% \subsection{Math preliminaries and notation \label{prelim}}
% The coordinates of a set of $m$ points on the plane are available. 
% A straight line is searched that optimally approximates the points.\par 
% The coordinates of a generic point are $y_1$ and $y_2$ 
% and they are collected in the vector $\point$.
% Any given point is identified with the index $\ipoint$.
% Explicit indices $(\dots)_1$ or $(\dots)_2$ always refer to the first 
% or second coordinate of a point or to the first or second component 
% of a vector in the plane. 
% Symbolic index $(\dots)\ipoint$ always refers to the different points. Few 
% formulas require both indices $(\dots)_{1\ipoint}$, $(\dots)_{2\ipoint}$.\par
% With more then two points a criterion of best approximation 
% is needed to select the optimal line that describes the data. \par
% Lower case symbols are used for scalars. Lower case underlined
% symbols are used for vectors in the plane. Upper case symbols
% are used for matrices.
% \par
% It is possible that certain data generate an ambiguity or a singularity
% in the computation. 
% The following mathematical treatment of the problem
% do not mention these situations and the code does not deal with them.
%
% \subsection{Package declaration, required package and definition of variables}
% The various macro will be provided in a package file
% that is introduced as usual. Most of the macros require 
% the \LaTeX3 syntax.
%    \begin{macrocode}
\ProvidesPackage{linearregression}[2024-12-14]
\RequirePackage{pict2e}        	
\ExplSyntaxOn
%    \end{macrocode}
% The variables used in the package are defined hereafter.
%    \begin{macrocode}
\ior_new:N  \g_BBLR_file_ior 
\tl_new:N   \g_BBLR_file_name_tl
\int_new:N  \g_BBLR_number_of_points_int
\fp_new:N   \g_BBLR_abscissa_fp 
\fp_new:N   \g_BBLR_ordinate_fp     
\fp_new:N   \g_BBLR_mean_abscissa_fp 
\fp_new:N   \g_BBLR_mean_ordinate_fp 
\fp_new:N   \g_BBLR_abscissa_SecOrdMoment_fp 
\fp_new:N   \g_BBLR_ordinate_SecOrdMoment_fp 
\fp_new:N   \g_BBLR_mixed_SecOrdMoment_fp 
\fp_new:N   \g_BBLR_slope_A_fp 
\fp_new:N   \g_BBLR_slope_B_fp   
\fp_new:N   \g_BBLR_slope_S_fp 
\fp_new:N   \g_BBLR_intercept_A_fp 
\fp_new:N   \g_BBLR_intercept_B_fp 
\fp_new:N   \g_BBLR_intercept_S_fp 
\fp_new:N   \g_BBLR_cos_fp
\fp_new:N   \g_BBLR_sin_fp 
\fp_new:N   \g_BBLR_sig_sin_fp
\fp_new:N   \g_BBLR_eig_diff_fp 
\fp_new:N   \g_BBLR_diag_diff_fp 
\tl_new:N   \g_BBLR_file_line_tl  
\fp_new:N   \g_BBLR_min_abscissa_fp   
\fp_new:N   \g_BBLR_min_ordinate_fp 
\fp_new:N   \g_BBLR_max_abscissa_fp
\fp_new:N   \g_BBLR_max_ordinate_fp 
\fp_new:N   \g_BBLR_min_draw_abscissa_fp 
\fp_new:N   \g_BBLR_max_draw_abscissa_fp  
\bool_new:N \g_BBLR_data_eof_bool
\int_new:N  \g_BBLR_record_length_int
\int_new:N  \g_BBLR_rec_count_int
\int_new:N  \g_BBLR_first_separator_int
\int_new:N  \g_BBLR_last_separator_int      
\str_const:Nn  \c_BBLR_space_str {~}      
\str_const:Nn  \c_BBLR_comma_str {,}    
\str_const:Nn  \c_BBLR_plus_str  {+}   
\str_const:Nn  \c_BBLR_minus_str {-}         
\bool_new:N   \g_BBLR_plot_points_bool        
\bool_new:N   \g_BBLR_plot_lineA_bool      
\bool_new:N   \g_BBLR_plot_lineB_bool       
\bool_new:N   \g_BBLR_plot_lineS_bool     
\bool_new:N        \g_BBLR_two_scale_bool 
\bool_gset_false:N \g_BBLR_two_scale_bool 
\fp_new:N   \g_BBLR_base_fp                
\fp_new:N   \g_BBLR_height_fp             
\fp_new:N   \g_BBLR_Xbase_fp           
\fp_new:N   \g_BBLR_Xheight_fp            
\fp_new:N   \g_BBLR_XXheight_fp   
\fp_new:N   \g_BBLR_Dabscissa_fp         
\fp_new:N   \g_BBLR_Dordinate_fp         
\fp_new:N   \g_BBLR_diameter_fp            
\fp_gset:Nn \g_BBLR_diameter_fp{0.2}    
\fp_new:N   \g_BBLR_line_base_length_fp    
\fp_new:N   \g_BBLR_scale_factor_fp      
\fp_new:N   \g_BBLR_scale_factor_H_fp 
\str_new:N  \g_BBLR_point_code_str
\str_new:N  \g_BBLR_labelA_str
\str_new:N  \g_BBLR_labelB_str         
\str_new:N  \g_BBLR_labelS_str
\int_new:N   \g_BBLR_num_dec_dig_int
\int_gset:Nn \g_BBLR_num_dec_dig_int{4}
%    \end{macrocode}
%
% \subsection{Preparing data input}
% \begin{macro}{\lrfilename}
% The command \cs{lrfilename} records the file name passed as argument.
%    \begin{macrocode}
\NewDocumentCommand{\lrfilename}{m}{
\tl_gset:Nn \g_BBLR_file_name_tl {#1}
}
%    \end{macrocode}
% \end{macro}
% \begin{macro}{\lraskfilename}
% The command \cs{lraskfilename} asks for the data file name from the terminal.
%    \begin{macrocode}
\NewDocumentCommand{\lraskfilename}{}{
\ior_get_term:nN  {filename ? } \g_BBLR_file_name_tl 
\tl_trim_spaces:N \g_BBLR_file_name_tl 
}
%    \end{macrocode}
% \end{macro}
%
% \subsection{Computation}
% \begin{macro}{\lrcomputation}
% The  command \cs{lrcomputation} reads the data file and 
% performs all the relevant computations to solve the 
% proposed problem. 
%    \begin{macrocode}
\NewDocumentCommand{\lrcomputation}{}{%
%    \end{macrocode}
%
% \subsubsection{Computation of first and second order moments}
% In the sequel it will results that the first and second order moments 
% of the data provide everything needed to solve the problem. 
% The barycenter of the data is defined as 
% \begin{equation}
% \barycenter{\point}=\frac{1}{m}\pointsum \point_\ipoint.
% \label{barycenter} \end{equation}
% It is convenient to scan the data to accumulate the sum 
% that appears in \reff{barycenter}.
% The coordinates of each point are read from the file 
% and they are immediately used. The data are not globally recorded.
%    \begin{macrocode}
\bool_gset_false:N \g_BBLR_data_eof_bool             
\int_zero:N \g_BBLR_number_of_points_int                
\fp_zero:N  \g_BBLR_mean_abscissa_fp                     
\fp_zero:N  \g_BBLR_mean_ordinate_fp                   
\ior_open:Nn \g_BBLR_file_ior \g_BBLR_file_name_tl   
\bool_until_do:Nn \g_BBLR_data_eof_bool {            
  \ior_str_get:NN \g_BBLR_file_ior \g_BBLR_file_line_tl
  \if_eof:w \g_BBLR_file_ior                        
    \bool_gset_true:N \g_BBLR_data_eof_bool          
  \else:                                              
    \int_incr:N \g_BBLR_number_of_points_int       
    \BBLR_decode_data:                               
    \fp_gset:Nn \g_BBLR_mean_abscissa_fp               
    {\g_BBLR_mean_abscissa_fp  + \g_BBLR_abscissa_fp}   
    \fp_gset:Nn \g_BBLR_mean_ordinate_fp            
    {\g_BBLR_mean_ordinate_fp + \g_BBLR_ordinate_fp} 
  \fi:                                                 
}                                                    
%    \end{macrocode}
% Loop ended. Now close the file and divide by the number of points.
%    \begin{macrocode}
\ior_close:N \g_BBLR_file_ior                        
\fp_gset:Nn \g_BBLR_mean_abscissa_fp                  
{\g_BBLR_mean_abscissa_fp / \g_BBLR_number_of_points_int}
\fp_gset:Nn \g_BBLR_mean_ordinate_fp                  
{\g_BBLR_mean_ordinate_fp / \g_BBLR_number_of_points_int}
%    \end{macrocode}
%
% The barycentric coordinates are defined for each point
% \begin{equation} \vv_\ipoint= \point_\ipoint - \barycenter{\point}
% \label{residual} \end{equation}
%  and the empirical dispersion matrix is defined as: 
% \begin{equation}  \mC=\frac{1}{m}\pointsum \vv_\ipoint\vv_\ipoint\trasp .
% \label{matrixC} \end{equation}
% Superscript as in $()\trasp$ means transpose. The elements of  $\mC$ are the 
% second order central moments and they are denoted as: 
% \begin{equation}  \mC=\matrixtwotwo{\mc_{11}}{\mc_{12}}{\mc_{12}}{\mc_{22}}.
% \label{matrixCc} \end{equation}
% A second scan of the data is performed to compute the
% sums that appears in \reff{matrixC} and to determine the 
% the extremal values of the coordinates. Record scan can be regulated 
% by a record counter, because the number of points is now known.
%    \begin{macrocode}
\fp_zero:N  \g_BBLR_abscissa_SecOrdMoment_fp                      
\fp_zero:N  \g_BBLR_ordinate_SecOrdMoment_fp                      
\fp_zero:N  \g_BBLR_mixed_SecOrdMoment_fp                         
\fp_gset_eq:NN  \g_BBLR_min_abscissa_fp  \g_BBLR_mean_abscissa_fp    
\fp_gset_eq:NN  \g_BBLR_min_ordinate_fp  \g_BBLR_mean_ordinate_fp        
\fp_gset_eq:NN  \g_BBLR_max_abscissa_fp  \g_BBLR_mean_abscissa_fp       
\fp_gset_eq:NN  \g_BBLR_max_ordinate_fp \g_BBLR_mean_ordinate_fp  
\ior_open:Nn   \g_BBLR_file_ior \g_BBLR_file_name_tl         
\int_zero:N    \g_BBLR_rec_count_int           
\int_do_until:nn                  
{\g_BBLR_rec_count_int = \g_BBLR_number_of_points_int} 
{                                                        
    \ior_str_get:NN \g_BBLR_file_ior \g_BBLR_file_line_tl    
    \int_incr:N \g_BBLR_rec_count_int               
    \BBLR_decode_data:                                    
    \fp_gset:Nn \g_tmpa_fp                          
    {\g_BBLR_abscissa_fp - \g_BBLR_mean_abscissa_fp}
    \fp_gset:Nn \g_tmpb_fp                          
    {\g_BBLR_ordinate_fp - \g_BBLR_mean_ordinate_fp}
    \fp_gset:Nn \g_BBLR_abscissa_SecOrdMoment_fp             
    {\g_BBLR_abscissa_SecOrdMoment_fp + \g_tmpa_fp * \g_tmpa_fp}
    \fp_gset:Nn \g_BBLR_mixed_SecOrdMoment_fp                
    {\g_BBLR_mixed_SecOrdMoment_fp + \g_tmpa_fp * \g_tmpb_fp}     
    \fp_gset:Nn  \g_BBLR_ordinate_SecOrdMoment_fp           
    {\g_BBLR_ordinate_SecOrdMoment_fp + \g_tmpb_fp * \g_tmpb_fp}              
\fp_gset:Nn  \g_BBLR_min_abscissa_fp                               
{min(\g_BBLR_min_abscissa_fp, \g_BBLR_abscissa_fp)}                 
\fp_gset:Nn  \g_BBLR_min_ordinate_fp                              
{min(\g_BBLR_min_ordinate_fp, \g_BBLR_ordinate_fp)}                 
\fp_gset:Nn  \g_BBLR_max_abscissa_fp                              
{max(\g_BBLR_max_abscissa_fp, \g_BBLR_abscissa_fp)}                 
\fp_gset:Nn  \g_BBLR_max_ordinate_fp                              
{max(\g_BBLR_max_ordinate_fp, \g_BBLR_ordinate_fp)}                 
}                                                             
\ior_close:N \g_BBLR_file_ior                                     
\fp_gset:Nn \g_BBLR_abscissa_SecOrdMoment_fp                       
{\g_BBLR_abscissa_SecOrdMoment_fp / \g_BBLR_number_of_points_int}   
\fp_gset:Nn \g_BBLR_mixed_SecOrdMoment_fp                           
{\g_BBLR_mixed_SecOrdMoment_fp / \g_BBLR_number_of_points_int}      
\fp_gset:Nn \g_BBLR_ordinate_SecOrdMoment_fp                        
{\g_BBLR_ordinate_SecOrdMoment_fp / \g_BBLR_number_of_points_int}   
\fp_gset:Nn \g_BBLR_Dabscissa_fp                               
{\g_BBLR_max_abscissa_fp - \g_BBLR_min_abscissa_fp }       
\fp_gset:Nn \g_BBLR_Dordinate_fp                            
{\g_BBLR_max_ordinate_fp - \g_BBLR_min_ordinate_fp }        
%    \end{macrocode}
% A single pass algorithm exists, but it is numerically less stable. 
%
% \subsubsection{Classical linear regression \label{classical}}
% A line in the plane is described by the equation  
% \begin{equation} y_2=ay_1+b \label{eqab} \end{equation}
% that contains the parameters $a$ and $b$. 
% For each point it is possible to define a distance or a discrepancy 
% of the experimental data with respect to the model.
% In the given problem the second coordinate is much more affected by 
% errors than the first coordinate. It is therefore reasonable 
% to define the approximation error of each point as
% \begin{equation} e_\ipoint=y_{2\ipoint}-ay_{1\ipoint}-b
% \label{e}\end{equation}
% i.e.\ the difference between the empirical value $y_{2\ipoint}$ 
% and its model counterpart $ay_{1\ipoint}+b$.
% This is the already mentioned  classical linear regression. In fact the 
% absolute value of $e_\ipoint$ expressed in (\ref{e}) is the lenght 
% of the segments shown in the leftmost scheeme of figure (\ref{fig:criteria}).
% The global discrepancy between the data and the model is measured by the 
% least square objective function defined by:
% \begin{equation} \psi=\pointsum e_\ipoint^2 \label{psiab} \end{equation}
% and the parameters $a$ and $b$ will be determined 
% just by the minimization of the function $\psi$ defined in \reff{psiab}.
% \par
% In the present treatment of the regression problem as a pure 
% approximation problem the definition  of  $\psi$ in \reff{psiab} 
% seams quite arbitrary. It is anyway a convenient choice. 
% \par
% Expression \reff{e}  can be rewritten in the different form
% \begin{equation} 
% e_\ipoint=v_{2\ipoint}-av_{1\ipoint}+\barycenter{y}_2-a\barycenter{y}_1-b
% \label{e2}\end{equation}
% so that the function to be minimized can be expressed 
% as the sum of two quadratic functions:
% \begin{equation} 
% \psi=
% \pointsum (v_{2\ipoint}-av_{1\ipoint})^2+
% m(\barycenter{y}_2-a\barycenter{y}_1-b)^2 
% \label{psiab2} \end{equation}
% and the minimum can be attained considering 
% the two terms one at a time. 
% The second term in the right-hand side of \reff{psiab2} 
% vanishes if the choice of $b$ is: 
% \begin{equation} b=\barycenter{y}_2-a\barycenter{y}_1.
% \label{estb} \end{equation}
% The first term in the right-hand side of \reff{psiab2} becomes:
% \begin{equation}  \psi_{(a)}=m\left(\mc_{22}-2a\mc_{12}+a^2\mc_{11}\right).
% \label{parabola} \end{equation}
% Searching the minimum of $\psi$ w.r.t.\ $a$ is therefore the search 
% of the abscissa of the vertex of a parabola  
% with axis parallel to the second coordinated axis.
% The result is:
% \begin{equation} a=\mc_{12}/\mc_{11}
% \label{esta} \end{equation}
% Now the slope $a$ and the intercept $b$ can be actually computed.
%    \begin{macrocode}
\fp_gset:Nn \g_BBLR_slope_A_fp
{\g_BBLR_mixed_SecOrdMoment_fp / \g_BBLR_abscissa_SecOrdMoment_fp }
\fp_gset:Nn \g_BBLR_intercept_A_fp
{\g_BBLR_mean_ordinate_fp - \g_BBLR_slope_A_fp * \g_BBLR_mean_abscissa_fp}
%    \end{macrocode}
% \par
% The empirical data and the estimated values of $a$ and $b$
% can be used to compute 
% the value actually attained by the residuals $e_\ipoint$ and
% by the function $\psi$. Then the index
% \begin{equation} \hat\sigma_0^2=\psi/(m-2)\end{equation}
% can be used to evaluate the general quality of the data and of the model.
% This claim is clearly quite generic. A complete understanding 
% of this evaluation would require to treat the linear regression 
% problem in the framework of the probabilistic estimation theory. 
% The used notation is derived from that theory.\par
% If the role of the two coordinates is exchanged the result 
% for $a$ becomes (still with reference to \reff{eqab})
% \begin{equation} a=\mc_{22}/\mc_{12}.\end{equation}
% The slope and the intercept can be computed accordingly. 
%    \begin{macrocode}
\fp_gset:Nn \g_BBLR_slope_B_fp
{\g_BBLR_ordinate_SecOrdMoment_fp / \g_BBLR_mixed_SecOrdMoment_fp}
\fp_gset:Nn \g_BBLR_intercept_B_fp
{\g_BBLR_mean_ordinate_fp - \g_BBLR_slope_B_fp * \g_BBLR_mean_abscissa_fp}
%    \end{macrocode}
%
% \subsubsection{Symmetric linear regression \label{symmetric}}
% If both the coordinates of the experimental points are affected 
% by the same uncertainty it is advisable to use a more symmetric 
% optimality criterion and it is convenient to use a different model equation.
% \par
% The same line can be described by a different equation, i.e.\ 
% \begin{equation} x_1y_1+x_2y_2=\dor \end{equation}
% or in vector form:
% \begin{equation} \coeff\trasp\point=\dor. \label{eqvx} \end{equation}
% The parameters in \reff{eqvx} 
% are the scalar $\dor$ and the elements 
% of the vector $\coeff$, i.e.\ $x_1$ and $x_2$. 
% The line described by \reff{eqvx} is obviously
% invariant when the three parameters are simultaneously 
% scaled by a constant. The normalization condition 
% \begin{equation} \coeff\trasp\coeff=1,  \label{norm} \end{equation}
% supplemented by $\dor\ge 0$, 
% is quite convenient because the parameters will assume 
% a significant geometrical meaning:
% $\coeff$ is a unit vector orthogonal to the line and $\dor$ 
% is the distance of the line from the origin. 
% The expression 
% \begin{equation}  d=\dor-\coeff\trasp\point \label{distance} \end{equation}
% is the distance of the generic point $\point$ from the line 
% with a sign that is positive for points on the same side of the origin.
% \par
% The distance of each given point from the desired optimal line 
% is denoted by $d_\ipoint$.
% It has a clear intrinsic geometrical meaning and it does not 
% privileges one coordinate w.r.t.\ the other.
% The function to be minimized by the optimal line is 
% \begin{equation} 
% \phi=\frac{1}{m}\pointsum d_\ipoint^2. \label{phi1} \end{equation}
% The parameters of  \reff{eqvx} are determined by the minimization 
% of the function $\phi$ that can be expressed as:
% \begin{equation}  
% \phi=\frac{1}{m}\pointsum (\coeff\trasp\point_{\ipoint}-\dor)^{2}
% \label{phi2} \end{equation}
% and then, after some algebraic manipulations:
% \begin{equation} 
% \phi=\coeff\trasp\mC\coeff+(\dor-\coeff\trasp\barycenter{\point})^2
% \label{phi3}. \end{equation}
% The function $\phi$ is composed (as it was the function $\psi$) by the sum 
% of two parts. The second term in the right-hand side of \reff{phi3} 
% vanishes if the choice of $\dor$ is: 
% \begin{equation} \dor=\coeff\trasp\barycenter{\point}.
% \label{estd} \end{equation}
% Both (\ref{estb}) and (\ref{estd}) means that the optimal line includes 
% the mean point.
% Then it is necessary to minimize the function
% \begin{equation}   \phi_{(\coeff)} =  \coeff\trasp\mC\coeff
% \label{quadraticfun} \end{equation}
% with the constrain $\coeff\trasp\coeff=1$.
% It can be proved that the function $\phi_{(\coeff)}$ 
% is stationary if $\coeff$ is an eigenvector of \mC. \par
% The function $\phi_{(\coeff)}$ and the constrain must be combined 
% using a Lagrange multiplier:
% \begin{equation} 
% \Phi=  \coeff\trasp\mC\coeff+\lambda(1-\coeff\trasp\coeff).
% \label{Phi} \end{equation}
% Then the stationarity points of $\Phi$ must be determined. 
% Equating to zero the derivatives of $\Phi$ gives
% \begin{equation} 
% \mC\coeff=\lambda\coeff
% \label{auto} \end{equation}
% i.e.\ $\coeff$ is an eigenvector of $\mC$. \par
% The same result is obtained with the following argument.
% The function $\phi_{(\coeff)}$ is stationary if its first variation 
% is zero. The variation of $\coeff$ is named $\dx$ . 
% It must respect the constrain, that becomes $\dx\trasp\coeff=0$.
% The first variation of $\phi_{(\coeff)}$ is $2\dx\trasp\mC\coeff$, 
% and it is zero if and only if the following implication is valid:
% $\dx\trasp\coeff=0 \implies \dx\trasp\mC\coeff=0$,
% and the implication is valid if and only if the vector
% $\mC\coeff$  has the same direction of $\coeff$, i.e.\ if 
% $\coeff$ is an eigenvector of $\mC$.
% \par
% The result on the optimal line 
% can be described geometrically in the following way:
% (i) the optimal line includes the barycenter of the data;  
% (ii) the optimal line is orthogonal to the eigenvector of 
% $\mC$ corresponding to the minimum eigenvalue .\par
% The obtained result can be generalized in $\Renne$.
% A set of points in $\Renne$ must be approximated by an $(n-1)$-dimensional 
% affine subspace. (Other more general situations can be considered.)
% \par
% The trace of the matrix $\mC$, denoted as $\tr(\mC)$, is a measure of the 
% global dispersion of the set of points. 
% The minimum eigenvalue $\lambda_{\textrm{min}}$ of $\mC$ is a measure 
% of the dispersion of the set of points with 
% respect to the optimal affine subspace. Therefore the index 
% \begin{equation}  \frac{n\lambda_{\textrm{min}}}{\tr(\mC)} 
% \end{equation}  
% can be used as an indicator of the relative residual 
% dispersion of the data around the optimal affine subspace.
% The defined index is dimensionless and it is 
% always between $0$ and $1$.
% \par
% For the actual computation of $\coeff$ it is convenient to consider 
% the spectral factorization of the matrix $\mC$, i.e.\ 
% $\mC=\mX\mL\mX\trasp$ where $\mL$ is a diagonal matrix 
% whose diagonal elements are the eigenvalues of $\mC$ 
% and $\mX$ is an orthonormal matrix whose columns are 
% the eigenvectors of $\mC$. The spectral factorization  exists 
% for any symmetric matrix, but it is specially simple for 
% a $2\times 2$ matrix.
% \begin{equation}
% \matrixtwotwo{\mc_{11}}{\mc_{12}}{\mc_{12}}{\mc_{22}}=
% \matrixtwotwo{c}{-s}{s}{\spm c} 
% \matrixtwotwo{\lambda_1}{0}{0}{\lambda_2} 
% \matrixtwotwo{\spm c}{s}{-s}{c}
% \label{spectral}\end{equation}
% The eigenvalues can be easily obtained because 
% their sum is the trace of $\mC$ 
% \begin{equation}
% \lambda_1 + \lambda_2 = \mc_{11}+\mc_{22}
% \label{Sum}\end{equation}
% and their product 
% is the determinant of the same matrix.
% Therefore after some manipulations it results:
% \begin{equation}
% \lambda_1 - \lambda_2 = \sqrt{(\mc_{11}-\mc_{22})^2+4\mc_{12}^2} 
% \label{Difference}\end{equation}
% and the two eigenvalues are then immediately obtained. \par
% It is convenient to compute the difference of the two diagonal elements 
% of the dispersion matrix and the difference of its eigenvalues.
%    \begin{macrocode}
\fp_gset:Nn   \g_BBLR_diag_diff_fp
{\g_BBLR_abscissa_SecOrdMoment_fp - \g_BBLR_ordinate_SecOrdMoment_fp}
\fp_gset:Nn   \g_BBLR_eig_diff_fp
{sqrt(\g_BBLR_diag_diff_fp * \g_BBLR_diag_diff_fp + 
  4 * \g_BBLR_mixed_SecOrdMoment_fp * \g_BBLR_mixed_SecOrdMoment_fp)}
%    \end{macrocode}
% The computation of $c$ and $s$ is obtained from \reff{spectral}
% taking into account that $c^2+s^2=1$.
% From \reff{spectral} it results: 
% \begin{equation} \mc_{11}-\mc_{22}=(\lambda_1-\lambda_2)(c^2-s^2)
% \label{Cos2A}\end{equation}
% and also 
% \begin{equation} \mc_{12}=(\lambda_1-\lambda_2)cs
% \label{Sin2A}\end{equation}
% that is only used to determine the sign of $cs$.
% The expression for the parameters $c$ and $s$ are:
% \begin{equation} 
% c=\sqrt{\frac{1}{2}+\frac{\mc_{11}-\mc_{22}}{2(\lambda_1-\lambda_2)}}
% \label{cos}\end{equation}
% \begin{equation} 
% s=\sgn(\mc_{12})
% \sqrt{\frac{1}{2}-\frac{\mc_{11}-\mc_{22}}{2(\lambda_1-\lambda_2)}}
% \label{sin}\end{equation}
% The parameters $s$ and $c$ are the sine and cosine 
% of the angle between the axis of $y_1$ and the eigenvector 
% corresponding to the maximum eigenvalue. \par
% They are computed using the already defined elements.
%    \begin{macrocode}
\fp_gset:Nn   \g_BBLR_cos_fp%
{sqrt((1 + \g_BBLR_diag_diff_fp / \g_BBLR_eig_diff_fp) / 2)}
\fp_gset:Nn \g_BBLR_sig_sin_fp {\fp_sign:n {\g_BBLR_mixed_SecOrdMoment_fp}}
\fp_gset:Nn   \g_BBLR_sin_fp                
{\g_BBLR_sig_sin_fp*sqrt((1-\g_BBLR_diag_diff_fp / \g_BBLR_eig_diff_fp) / 2)}
%    \end{macrocode}
% The vector $\coeff$ is :
% \begin{equation} 
% \coeff=\sgn(-s\barycenter{y}_1+c\barycenter{y}_2)
% \begin{pmatrix} -s \\ c\end{pmatrix}.
% \label{xhat}\end{equation}
%
%\par
% The parameter $a$ of model \reff{eqab} can be obtained as: 
% \begin{equation} 
% a=s/c
% \end{equation}
% Now the slope and the intercept of the optimal line corresponding to the
% symmetric criterion can be computed.
%
%    \begin{macrocode}
\fp_gset:Nn \g_BBLR_slope_S_fp
{\g_BBLR_sin_fp / \g_BBLR_cos_fp }
\fp_gset:Nn \g_BBLR_intercept_S_fp
{\g_BBLR_mean_ordinate_fp - 
\g_BBLR_slope_S_fp * \g_BBLR_mean_abscissa_fp}
}                    
%    \end{macrocode}
%
% The theoretical treatment of the proposed problem and the 
% implementation of its numerical solution end here.
% \end{macro}
%
% \subsection{Print of table of results}
% \begin{macro}{\lrprint}
% The  command \cs{lrprint} prints  some info on the data 
% and the results of the computations in tabular form.
%    \begin{macrocode}
\NewDocumentCommand{\lrprint}{}{
\begin{center}
\begin{tabular}{| l | r |} \hline
 Data~File                & \g_BBLR_file_name_tl                   \\ 
 \hline
 Number~of~points         & \int_use:N\g_BBLR_number_of_points_int \\ 
 \hline 
 Mean~values~of~the~coordinates              &% 
 $\BBLR_print_fp:N \g_BBLR_mean_abscissa_fp$ \\ &
 $\BBLR_print_fp:N \g_BBLR_mean_ordinate_fp$ \\ \hline
 Minimum~values~of~the~coordinates        &% 
 $\BBLR_print_fp:N \g_BBLR_min_abscissa_fp$ \\ &
 $\BBLR_print_fp:N \g_BBLR_min_ordinate_fp$    \\ \hline
  Maximum~values~of~the~coordinates    &% 
 $\BBLR_print_fp:N \g_BBLR_max_abscissa_fp$ \\ &
 $\BBLR_print_fp:N \g_BBLR_max_ordinate_fp$    \\ \hline 
 {Second~order~moments}\phantom{xxxxxxxxx}{abscissa} &%
 $\BBLR_print_fp:N \g_BBLR_abscissa_SecOrdMoment_fp$ \\
 \multicolumn{1}{|r|}{mixed}   & %
$\BBLR_print_fp:N \g_BBLR_mixed_SecOrdMoment_fp$ ~ \\ 
\multicolumn{1}{|r|}{ordinate}   & %
$\BBLR_print_fp:N \g_BBLR_ordinate_SecOrdMoment_fp$  \\ \hline 
  Slope~and~intercept~of~optimal~line  & $\BBLR_print_fp:N 
  \g_BBLR_slope_A_fp$ \\
 (estimated~with~errors~in~ordinate)&$\BBLR_print_fp:N 
 \g_BBLR_intercept_A_fp$\\ \hline
  Slope~and~intercept~of~optimal~line & $\BBLR_print_fp:N 
  \g_BBLR_slope_B_fp$ \\ 
 (estimated~with~errors~in~abscissa)&$\BBLR_print_fp:N 
 \g_BBLR_intercept_B_fp$\\ \hline
  Unit~vector~along~the~line & $\BBLR_print_fp:N  
  \g_BBLR_cos_fp$ \\
  &  $\BBLR_print_fp:N \g_BBLR_sin_fp$   \\
  Slope~and~intercept~of~optimal~line &$\BBLR_print_fp:N 
  \g_BBLR_slope_S_fp$ \\
(estimated~with~symmetric~regression) & 
 $\BBLR_print_fp:N \g_BBLR_intercept_S_fp$\\ \hline 
\end{tabular}
\end{center}
}
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\lrnumdigit}
% The  command \cs{lrnumdigit} records the maximum number of digits 
% used in the table for computed numbers. The default value is $4$.
%    \begin{macrocode}
\NewDocumentCommand{\lrnumdigit}{m}{
\int_gset:Nn \g_BBLR_num_dec_dig_int{#1}
}
%    \end{macrocode}
% \end{macro}
%
% \subsection{Plot of points and lines}   
% \begin{macro}{\lrplot}
% The  command \cs{lrplot} produce a framed plot of the data 
% and of the regression line(s). The size of the plot and its actual 
% content are determined by the arguments.
%    \begin{macrocode}
\NewDocumentCommand{\lrplot}{mmmmm}{%
%    \end{macrocode}
% The plotting area is divided into a main plotting area for 
% the representation of points and line(s) and a small surrounding free space. 
% The height is computed taking into account the distribution of the points, 
% if it is not explicitelly given..
%    \begin{macrocode}
\fp_gset:Nn \g_BBLR_base_fp {#1}
\fp_gset:Nn \g_BBLR_Xbase_fp {\g_BBLR_base_fp - 0.6}
\fp_gset:Nn \g_BBLR_scale_factor_fp{\g_BBLR_Xbase_fp / 
\g_BBLR_Dabscissa_fp}
\bool_if:NTF \g_BBLR_two_scale_bool 
{\fp_gset:Nn \g_BBLR_height_fp {\g_BBLR_XXheight_fp}
\fp_gset:Nn \g_BBLR_Xheight_fp {\g_BBLR_height_fp - 0.6}
\fp_gset:Nn \g_BBLR_scale_factor_H_fp{\g_BBLR_Xheight_fp / 
\g_BBLR_Dordinate_fp}
}
{\fp_gset:Nn \g_BBLR_Xheight_fp {\g_BBLR_Dordinate_fp * 
\g_BBLR_scale_factor_fp}
\fp_gset:Nn \g_BBLR_height_fp {\g_BBLR_Xheight_fp + 0.6}
\fp_gset:Nn \g_BBLR_scale_factor_H_fp {\g_BBLR_scale_factor_fp }
}
%    \end{macrocode}
% The information about the items to be plotted is in the remaining arguments.
%    \begin{macrocode}
\str_gset:Nn \g_BBLR_point_code_str {#2}
\str_gset:Nn \g_BBLR_labelA_str     {#3}
\str_gset:Nn \g_BBLR_labelB_str     {#4}
\str_gset:Nn \g_BBLR_labelS_str     {#5}
\bool_gset:Nn \g_BBLR_plot_points_bool
{!(\str_if_eq_p:NN \g_BBLR_point_code_str \c_BBLR_minus_str)}
\bool_gset:Nn \g_BBLR_plot_lineA_bool
{!(\str_if_eq_p:NN \g_BBLR_labelA_str \c_BBLR_minus_str)}
\bool_gset:Nn \g_BBLR_plot_lineB_bool
{!(\str_if_eq_p:NN \g_BBLR_labelB_str \c_BBLR_minus_str)}
\bool_gset:Nn \g_BBLR_plot_lineS_bool
{!(\str_if_eq_p:NN \g_BBLR_labelS_str \c_BBLR_minus_str)}
%    \end{macrocode}
% The unit of length is $1$ centimeter. The plotting area is framed.
%    \begin{macrocode}
\setlength{\unitlength}{1.0cm}  
\fp_gset:Nn \g_tmpa_fp {\g_BBLR_Xbase_fp +0.2}
\fp_gset:Nn \g_tmpb_fp {\g_BBLR_Xheight_fp +0.1}
\begin{picture}(\fp_use:N\g_BBLR_base_fp,\fp_use:N\g_BBLR_height_fp)(-0.3,-0.3)
\put(-0.1,-0.1){\line(1,0){\fp_use:N\g_tmpa_fp}}
\put(-0.1,\fp_use:N\g_tmpb_fp){\line(1,0){\fp_use:N\g_tmpa_fp}}
\fp_gset:Nn \g_tmpa_fp {\g_tmpa_fp -0.1}
\fp_gset:Nn \g_tmpb_fp {\g_tmpb_fp +0.1}
\put(-0.1,-0.1){\line(0,1){\fp_use:N\g_tmpb_fp}}
\put(\fp_use:N\g_tmpa_fp,-0.1){\line(0,1){\fp_use:N\g_tmpb_fp}}
%    \end{macrocode}
% The plot of points and line(s) is obtained using auxiliary functions.
%    \begin{macrocode}
\thicklines
\bool_if:nT {\g_BBLR_plot_points_bool}{\BBLR_plot_points:}
\bool_if:nT {\g_BBLR_plot_lineA_bool}{
\BBLR_draw_line:NNN \g_BBLR_slope_A_fp\g_BBLR_intercept_A_fp
\g_BBLR_labelA_str}
\bool_if:nT {\g_BBLR_plot_lineB_bool}{
\BBLR_draw_line:NNN \g_BBLR_slope_B_fp\g_BBLR_intercept_B_fp
\g_BBLR_labelB_str}
\bool_if:nT {\g_BBLR_plot_lineS_bool}{
\BBLR_draw_line:NNN \g_BBLR_slope_S_fp\g_BBLR_intercept_S_fp
\g_BBLR_labelS_str}
\end{picture}
}%                                                          
%    \end{macrocode}
% \end{macro}
% \begin{macro}{\lrplotparameters}
% The  command \cs{lrplotparameters} records two parameters related to 
% the generation of the plot and sets a boolean variable. 
%    \begin{macrocode}
\NewDocumentCommand{\lrplotparameters}{mm}{
\fp_gset:Nn \g_BBLR_diameter_fp{#1}
\fp_gset:Nn \g_BBLR_XXheight_fp{#2}
\bool_gset:Nn \g_BBLR_two_scale_bool 
{\fp_compare_p:nNn {\g_BBLR_XXheight_fp}>{0.}}
}
%    \end{macrocode}
% \end{macro}
%
% \subsection{Functions for internal use}
% The functions listed here after are for internal 
% use and are just minimally documented. \par
% The function |\BBLR_decode_data:|   
% \marginpar{\raggedleft\texttt{
% \textbackslash{}BBLR\textunderscore{}decode\textunderscore{}data:}}
% extract two numeric values from the string read from the file.
% Some clever actions are necessary because 
% a so called csv file sometime do not contains the separating commas. 
%    \begin{macrocode}
\cs_new_protected:Nn \BBLR_decode_data: {             
\tl_trim_spaces:N \g_BBLR_file_line_tl                          
\int_gzero:N \g_tmpa_int                              
\int_gzero:N \g_BBLR_first_separator_int                      
\int_gzero:N \g_BBLR_last_separator_int               
\int_gset:Nn \g_BBLR_record_length_int {           
\str_count:N \g_BBLR_file_line_tl}                         
\str_map_variable:NNn \g_BBLR_file_line_tl \g_tmpa_str {
\int_gincr:N \g_tmpa_int                 
\bool_lazy_or:nnTF                                                       
{\str_if_eq_p:NN \g_tmpa_str \c_BBLR_comma_str}                   
{\str_if_eq_p:NN \g_tmpa_str \c_BBLR_space_str}                    
{\int_gset_eq:NN \g_BBLR_last_separator_int \g_tmpa_int
\int_if_zero:nTF {\g_BBLR_first_separator_int}
{\int_gset_eq:NN \g_BBLR_first_separator_int \g_tmpa_int
}{\prg_do_nothing:}                                              
}{\prg_do_nothing:}                                                
}                                        
\int_gincr:N \g_BBLR_last_separator_int                    
\int_gdecr:N \g_BBLR_first_separator_int                  
\fp_gset:Nn  \g_BBLR_abscissa_fp{                                   
\str_range:Nnn \g_BBLR_file_line_tl{1}{\g_BBLR_first_separator_int}}
\fp_gset:Nn  \g_BBLR_ordinate_fp{                                   
\str_range:Nnn \g_BBLR_file_line_tl                                 
{\g_BBLR_last_separator_int}{\g_BBLR_record_length_int}}            
}                                                
%    \end{macrocode}
% The function |\BBLR_plot_points:|  \marginpar{\raggedleft\texttt{
% \textbackslash{}BBLR\textunderscore{}plot\textunderscore{}points:}}
% scans the data file to read the coordinates and 
% it draws a circle for each point.
% 
%   \begin{macrocode}
\cs_new_protected:Nn  \BBLR_plot_points: {
\ior_open:Nn   \g_BBLR_file_ior \g_BBLR_file_name_tl              
\int_zero:N    \g_BBLR_rec_count_int        
\int_do_until:nn                        
{\g_BBLR_rec_count_int = \g_BBLR_number_of_points_int} 
{                                                          
    \ior_str_get:NN \g_BBLR_file_ior \g_BBLR_file_line_tl    
    \int_incr:N \g_BBLR_rec_count_int               
    \BBLR_decode_data:                                  
    \fp_gset:Nn \g_tmpa_fp{(\g_BBLR_abscissa_fp-\g_BBLR_min_abscissa_fp)* 
    \g_BBLR_scale_factor_fp}
    \fp_gset:Nn \g_tmpb_fp{(\g_BBLR_ordinate_fp-\g_BBLR_min_ordinate_fp)* 
    \g_BBLR_scale_factor_H_fp}
    \put(\fp_use:N\g_tmpa_fp, \fp_use:N\g_tmpb_fp){           
    {\circle*{\fp_use:N\g_BBLR_diameter_fp}}}                  
}                                                            
\ior_close:N \g_BBLR_file_ior                                    
}                                                
%    \end{macrocode}
% The function |\BBLR_draw_line:NNN|  \marginpar{\raggedleft\texttt{
% \textbackslash{}BBLR\textunderscore{}draw\textunderscore{}line:NNN}}
% draws the line. The first two parameters given as arguments 
% are the slope and the intercept. The third parameter is a label.
% \par The next code finds the intersection of the line with the plotting area.
%    \begin{macrocode}
\cs_new_protected:Nn  \BBLR_draw_line:NNN {
\fp_gset:Nn \fp_tmpa_fp {#1 * \g_BBLR_min_abscissa_fp + #2 }
\fp_compare:nTF{\fp_tmpa_fp > \g_BBLR_max_ordinate_fp}{
\fp_gset:Nn \g_BBLR_min_draw_abscissa_fp {(\g_BBLR_max_ordinate_fp -#2) / #1}
}{
\fp_compare:nTF{\fp_tmpa_fp < \g_BBLR_min_ordinate_fp}{
\fp_gset:Nn \g_BBLR_min_draw_abscissa_fp {(\g_BBLR_min_ordinate_fp - #2) / #1}
}{
\fp_gset:Nn \g_BBLR_min_draw_abscissa_fp { \g_BBLR_min_abscissa_fp }
}}
\fp_gset:Nn \fp_tmpa_fp {#1 * \g_BBLR_max_abscissa_fp + #2 }
\fp_compare:nTF{\fp_tmpa_fp > \g_BBLR_max_ordinate_fp}{
\fp_gset:Nn \g_BBLR_max_draw_abscissa_fp {(\g_BBLR_max_ordinate_fp -#2) / #1}
}{
\fp_compare:nTF{\fp_tmpa_fp < \g_BBLR_min_ordinate_fp}{
\fp_gset:Nn \g_BBLR_max_draw_abscissa_fp { (\g_BBLR_min_ordinate_fp - #2) / #1}
}{
\fp_gset:Nn \g_BBLR_max_draw_abscissa_fp {  \g_BBLR_max_abscissa_fp }
}}
%    \end{macrocode}
% Some parameters (coordinates of starting point, base-length, scaled slope) 
% are computed and the line is drawn.
%    \begin{macrocode}
\fp_gset:Nn \fp_tmpa_fp {(\g_BBLR_min_draw_abscissa_fp -              
\g_BBLR_min_abscissa_fp)* \g_BBLR_scale_factor_fp}           
\fp_gset:Nn \fp_tmpb_fp {(#1 * \g_BBLR_min_draw_abscissa_fp + #2 -
\g_BBLR_min_ordinate_fp)* \g_BBLR_scale_factor_H_fp}             
\fp_gset:Nn \fp_BBLR_line_base_length_fp{(\g_BBLR_max_draw_abscissa_fp -
\g_BBLR_min_draw_abscissa_fp) * \g_BBLR_scale_factor_fp}   
\fp_gset:Nn \fp_scaled_slope_fp 
{(#1 * \g_BBLR_scale_factor_H_fp / \g_BBLR_scale_factor_fp)}
\put(\fp_use:N\fp_tmpa_fp, \fp_use:N\fp_tmpb_fp){          
\line(1.,\fp_use:N\fp_scaled_slope_fp){\fp_use:N\fp_BBLR_line_base_length_fp}}
%    \end{macrocode}
%The third parameter is used as a label, if it is not a |+|.
%    \begin{macrocode}
\bool_if:nF {\str_if_eq_p:NN #3 \c_BBLR_plus_str}{       
\fp_gset:Nn \fp_tmpa_fp                                    
{0.08 * \g_BBLR_min_draw_abscissa_fp + 0.92 * \g_BBLR_max_draw_abscissa_fp} 
\fp_gset:Nn \fp_tmpb_fp {#1 * \fp_tmpa_fp + #2 }
\fp_gset:Nn \fp_tmpa_fp 
{(\fp_tmpa_fp-\g_BBLR_min_abscissa_fp)*\g_BBLR_scale_factor_fp 
+ 0.3 * #1 /sqrt(1.+#1*#1)}
\fp_gset:Nn \fp_tmpb_fp 
{(\fp_tmpb_fp-\g_BBLR_min_ordinate_fp)* \g_BBLR_scale_factor_fp
- 0.3 /sqrt(1.+#1*#1)}
\put(\fp_use:N\fp_tmpa_fp, \fp_use:N\fp_tmpb_fp){#3}
}                                                             
}                                                 
%    \end{macrocode}
% The function |\BBLR_print_fp:N |  \marginpar{\raggedleft\texttt{
% \textbackslash{}BBLR\textunderscore{}print\textunderscore{}fp:N }}
% is used to print floating point numbers with a limited number
% of decimal digit.
%
%    \begin{macrocode}
\cs_new_protected:Nn \BBLR_print_fp:N {%
{\fp_eval:n{trunc(#1 ,\g_BBLR_num_dec_dig_int)}}
}
%    \end{macrocode}
%
%    \begin{macrocode}
\ExplSyntaxOff
%    \end{macrocode}
%
%
%\iffalse
%</package>
%\fi
%
% \section{Versions}
% 2024-06-10:  first post on CTAN.\par
% 2024-11-23: small changes in the text, 
% different format of numbers in the table.\par
% 2024-12-14: more flexibe output, some changes in the text.
% 
% \section{Acknowledgments}
% The colleagues Paolo Zatelli, Alfonso Vitti and Giulia Graldi
% read some preliminary version 
% of this text and suggested several improvements. 
% Claudio Beccari usefully commented about the first version 
% when I was preparing the second version. \par
%
% \section{Citations and references}
% \subsection*{Mathematics}
% The books by Lang \cite{Lang} and by Strang \cite{Strang} give 
% all the background on linear algebra.\par
% The texts by Sansò \cites{Sanso1, Sanso2} (in italian) treat the
% teory of probability and its application to metrology. 
% The paper  by Pearson \cite{Pearson} is the oldest text that 
% I have found on the symmetric regression, or total regression.
% \subsection*{Programming}
% The two documents \cites{L3A, L3B} are the fountamental and official guide
% for \LaTeX3 programming. The books by Donald Knuth \cites{Knuth} 
% and Leslie Lamport \cites{Lamport} are still essential references. 
% The papers by Enrico Gregorio \cites{egreg1, egreg2, egreg3, egreg4, egreg5}
% explain some general and some special aspect of \LaTeX3 programming. 
% \subsection*{References}
% \begin{biblist}[\normalsize]   
% \bib{egreg1}{article}{
% author={Gregorio, Enrico}, 
% journal={ArsTeXnica},
% number={14},pages={41\ndash 47}, date={2012},
% title={\LaTeX3: un nuovo gioco per i maghi e per diventarlo},
% }
% \bib{egreg2}{article}{
% author={Gregorio, Enrico}, 
% journal={ArsTeXnica},
% number={22},pages={69\ndash 77}, date={2016},
% title={Liste, cicli, \LaTeX3},
% }
% \bib{egreg3}{article}{
% author={Gregorio, Enrico}, 
% journal={ArsTeXnica},
% number={24},pages={37\ndash 44}, date={2017},
% title={Condizionali in \LaTeX},
% }
% \bib{egreg4}{article}{
% author={Gregorio, Enrico}, 
% journal={ArsTeXnica},
% number={30},pages={36\ndash 45}, date={2020},
% title={Funzioni e |expl3|},
% }
% \bib{egreg5}{article}{
% author={Gregorio, Enrico}, 
% journal={TUGboat},
% volume={41},number={3},pages={299\ndash 307}, date={2020},
% title={Functions and |expl3|},
% }
% \bib{Knuth}{book}{
% author={Knuth, Donald},
% title={The TeXbook},
% date={1986},
% publisher={American Mathematical Society and Addison-Wesley},
% } 
% \bib{Lang}{book}{
% author={Lang, Serge},
% title={Linear Algebra},
% date={1987},
% publisher={Springer-Verlag},
% place={Berlin Heidelberg},
% }
% \bib{Lamport}{book}{
% author={Lamport, Leslie},
% title={LaTeX - A document preparation system (2nd ed.\ )},
% date={1994},
% publisher={Addison-Wesley},
% note={something interesting in the fist edition, too},
% }
% \bib{L3A}{article}{
% title={The |expl3| package and LaTeX3 programming},
% author={The LaTeX project team}, date={2024},
% note={file: |expl3.pdf| available in CTAN in l3kernel}, 
% }
% \bib{L3B}{article}{
% title={The \LaTeX3 interface},
% author={The LaTeX project team}, date={2024},
% note={file: |interface3.pdf| available in CTAN in l3kernel} 
% }
% \bib{Pearson}{article}{
% title={On lines and planes of closest fit to systems of points in space},
% author={Pearson, Karl}, date={1901},
% journal={Philosophical Magazine},
% volume={2},number={11},pages={559\ndash 572}, 
% }
% \bib{Sanso1}{book}{
% author={Sansò, Fernando},
% title={Elementi di teoria della probabilità},
% date={1996},
% publisher={Città-Studi},
% place={Milano},
% note={see: http://www.geolab.polimi.it/text-books/},
% }
% \bib{Sanso2}{book}{
% author={Sansò, Fernando},
% title={La teoria della stima},
% date={1996},
% publisher={Città-Studi},
% place={Milano},
% note={see: http://www.geolab.polimi.it/text-books/},
% }
% \bib{Strang}{book}{
% author={Strang, Gilbert},
% title={Introduction to linear algebra},
% date={2009},
% publisher={Wellesley-Cambridge press,},
% }
% \end{biblist} 
%
% \par\vfill\centerline{\small ***}\vfill
% \end{document} 
% 
%\iffalse
% END OF FILE linearregression.dtx
%\fi