% \iffalse meta-comment
%
%% File: l3text-map.dtx
%
% Copyright (C) 2022-2025 The LaTeX Project
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version.  The latest version
% of this license is in the file
%
%    https://www.latex-project.org/lppl.txt
%
% This file is part of the "l3kernel bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
%    https://github.com/latex3/latex3
%
% for those people who are interested.
%
%<*driver>
\documentclass[full,kernel]{l3doc}
\begin{document}
  \DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
%
% \title{^^A
%   The \pkg{l3text-map} module\\ Text processing (mapping)^^A
% }
%
% \author{^^A
%  The \LaTeX{} Project\thanks
%    {^^A
%      E-mail:
%        \href{mailto:latex-team@latex-project.org}
%          {latex-team@latex-project.org}^^A
%    }^^A
% }
%
% \date{Released 2025-07-11}
%
% \maketitle
%
% \begin{documentation}
%
% \end{documentation}
%
% \begin{implementation}
%
% \section{\pkg{l3text-map} implementation}
%
%    \begin{macrocode}
%<*code>
%    \end{macrocode}
%
%    \begin{macrocode}
%<@@=text>
%    \end{macrocode}
%
% \subsection{Mapping to text}
%
% \subsubsection{Common code}
%
% \begin{macro}[EXP]{\@@_map_tokens:nnn, \@@_map_tokens:enn}
% \begin{macro}[EXP]{\@@_map_loop:nnnw}
% \begin{macro}[EXP]{\@@_map_group:nnnn}
% \begin{macro}[EXP]{\@@_map_space:nnnw}
% \begin{macro}[EXP]{\@@_map_N_type:nnnN}
% \begin{macro}[EXP]{\@@_map_math_search:nnnnN}
% \begin{macro}[EXP]{\@@_map_math_search:nnnNNN}
% \begin{macro}[EXP]{\@@_map_math_loop:nnnNw}
% \begin{macro}[EXP]{\@@_map_math_N_type:nnnNN}
% \begin{macro}[EXP]{\@@_map_math_group:nnnNn} 
% \begin{macro}[EXP]{\@@_map_math_space:nnnNw}
% \begin{macro}[EXP]{\@@_map_codepoint:nnnn}
% \begin{macro}[EXP]{\@@_map_CR:nnnw}
% \begin{macro}[EXP]{\@@_map_CR:nnnN}
% \begin{macro}[EXP]{\@@_map_class:nnnn}
% \begin{macro}[EXP]{\@@_map_class:nnnnn}
% \begin{macro}[EXP]{\@@_map_lookahead:nnnnnw}
% \begin{macro}[EXP]{\@@_map_lookahead:nnnnnN}
% \begin{macro}[TF,EXP]{\@@_map_if_ignorable:n}
% \begin{macro}[EXP]{\@@_map_output:nn}
% \begin{macro}[EXP]{\text_map_break:}
% \begin{macro}[EXP]{\text_map_break:n}
%   Mapping to text all works the same way: using standard \enquote{action}
%   loop on expanded text. There are different ways to determine the boundary
%   conditions for breaking: to avoid duplication, the common ideas are covered
%   here with the specifics split out. In all cases, anything which is not a
%   character token is treated as a boundary.
%    \begin{macrocode}
\cs_new:Npn \@@_map_tokens:nnn #1#2#3
  {
    \@@_map_loop:nnnw {#3} {#2} { } #1
      \q_@@_recursion_tail \q_@@_recursion_stop
    \prg_break_point:Nn \text_map_break: { }
  }
\cs_generate_variant:Nn \@@_map_tokens:nnn { e }
\cs_new:Npn \@@_map_loop:nnnw #1#2#3#4 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#4}
      { \@@_map_N_type:nnnN }
      {
        \tl_if_head_is_group:nTF {#4}
          { \@@_map_group:nnnn }
          { \@@_map_space:nnnw }
      }
    {#1} {#2} {#3} #4 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_group:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_output:nn {#1} { {#4} }
    \@@_map_loop:nnnw {#1} {#2} { }
  }
\use:e
  { \cs_new:Npn \exp_not:N \@@_map_space:nnnw #1#2#3 \c_space_tl }
  {
    \@@_map_output:nn {#1} {#3}
    #1 { ~ }
    \@@_map_loop:nnnw {#1} {#2} { }
  }
\cs_new:Npn \@@_map_N_type:nnnN #1#2#3#4
  {
    \@@_if_q_recursion_tail_stop_do:Nn #4
      {
        \@@_map_output:nn {#1} {#3}
        \text_map_break:
      }
    \exp_args:NV \@@_map_math_search:nnnnN
      \l_text_math_delims_tl {#1} {#2} {#3} #4
  }
%    \end{macrocode}
%   We need to exclude math mode here as quite apart from the conceptual
%   questions, it could contain implicit tokens. Those would cause all sorts
%   of issues, so are best just skipped over here.
%    \begin{macrocode}
\cs_new:Npn \@@_map_math_search:nnnnN #1#2#3#4#5
  {
    \@@_map_math_search:nnnNNN {#2} {#3} {#4} #5 #1
      \q_@@_recursion_tail \q_@@_recursion_tail
      \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_math_search:nnnNNN #1#2#3#4#5#6
  {
    \@@_if_q_recursion_tail_stop_do:Nn #5
      { \@@_map_cs_check:nnnN {#1} {#2} {#3} #4 }
    \token_if_eq_meaning:NNTF #4 #5
      {
        \@@_use_i_delimit_by_q_recursion_stop:nw
          {
            \@@_map_output:nn {#1} {#3}
            \@@_map_math_loop:nnnNw {#1} {#2} {#4} #6
          }
      }
      { \@@_map_math_search:nnnNNN {#1} {#2} {#3} #4 }
  }
\cs_new:Npn \@@_map_math_loop:nnnNw #1#2#3#4#5 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#5}
      { \@@_map_math_N_type:nnnNN }
      {
        \tl_if_head_is_group:nTF {#5}
          { \@@_map_math_group:nnnNn }
          { \@@_map_math_space:nnnNw }
      }
    {#1} {#2} {#3} #4 #5 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_math_N_type:nnnNN #1#2#3#4#5
  {
    \@@_if_q_recursion_tail_stop_do:Nn #5
      {
        \@@_map_output:nn {#1} {#3}
        \text_map_break:
      }
    \token_if_eq_meaning:NNTF #5 #4
      {
        \@@_map_output:nn {#1} { #3 #4 }
        \@@_map_loop:nnnw {#1} {#2} { }
      }
      { \@@_map_math_loop:nnnNw {#1} {#2} { #3 #5 } #4 }
  }
\cs_new:Npn \@@_map_math_group:nnnNn #1#2#3#4#5
  { \@@_map_math_loop:nnnNw {#1} {#2} { #3 {#5} } #4 }
\use:e
  {
    \cs_new:Npn \exp_not:N \@@_map_math_space:nnnNw #1#2#3#4
      \c_space_tl
  }
  { \@@_map_math_loop:nnnNw {#1} {#2} { #3 ~ } #4 }
\cs_new:Npn \@@_map_cs_check:nnnN #1#2#3#4
  {
    \token_if_cs:NTF #4
      {
        \@@_map_output:nn {#1} {#3}
        #1 {#4}
        \@@_map_loop:nnnw {#1} {#2} { }
      }
      {
        \@@_codepoint_process:nN
          { \@@_map_codepoint:nnnn {#1} {#2} {#3} } #4
      }
  }
%    \end{macrocode}
%  We pull out a few special cases here. Carriage returns case needs a bit of
%  context handling so has an auxiliary. Codepoint U+200D is the zero-width
%  joiner, which has no context to concern us: just don't break. (These special
%  cases apply to all forms of text mapping.)
%    \begin{macrocode}
\cs_new:Npn \@@_map_codepoint:nnnn #1#2#3#4
  {
    \@@_codepoint_compare:nNnTF {#4} = { "000D }
      {
        \@@_map_output:nn {#1} {#3}
        \@@_map_CR:nnnw {#1} {#2} {#4}
      }
      {
        \@@_codepoint_compare:nNnTF {#4} = { "200D }
          { \@@_map_loop:nnnw {#1} {#2} {#3#4} }
          { \@@_map_class:nnnn {#1} {#2} {#3} {#4} }
      }
  }
%    \end{macrocode}
%   A carriage return is a boundary unless it is immediately followed by
%   a line feed, in which case that pair is a boundary.
%    \begin{macrocode}
\cs_new:Npn \@@_map_CR:nnnw #1#2#3#4 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#4}
      { \@@_map_CR:nnnN {#1} {#2} {#3} }
      {
        #1 {#3}
        \@@_map_loop:nnnw {#1} {#2} { }
      }
        #4 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_CR:nnnN #1#2#3#4
  {
    \@@_if_q_recursion_tail_stop_do:Nn #4
      {
        #1 {#3}
        \text_map_break:
      }
    \bool_lazy_and:nnTF
      { ! \token_if_cs_p:N #4 }
      { \int_compare_p:nNn { `#4 } = { "000A } }
      {
        \@@_map_output:nn {#1} {#3#4}
        \@@_map_loop:nnnw {#1} {#2} { }
      }
      { \@@_map_loop:nnnw {#1} {#2} { } #3 }
  }
%    \end{macrocode}
%   There are various classes of character, and we deal with them all in
%   the same general way. We need to example the relevant list of codepoints:
%   if we get a hit, then we do whatever the relevant action is. To keep names
%   short and to allow code sharing, we have two ways of naming the functions:
%   most class names are unique, so it's only where we see the same name used
%   in both break classes that we need more control.
%    \begin{macrocode}
\cs_new:Npn \@@_map_class:nnnn #1#2#3#4
  {
    \exp_args:Nnnne \@@_map_class:nnnnn {#1} {#2} {#3} {#4}
      {
        \use:c { __kernel_codepoint_to_ #2 _class:n }
          { \@@_codepoint_from_chars:Nw #4 }
      }
  }
\cs_new:Npn \@@_map_class:nnnnn #1#2#3#4#5
  {
    \cs_if_exist_use:cF { @@_map_ #5 :nnnn }
      { \@@_map_Other:nnnn }
        {#1} {#2} {#3} {#4}
  }
%    \end{macrocode}
%   A generic loop-ahead setup: we need to handle both the previously collected
%   tokens and any \enquote{conditional} ones. The latter occur when looking
%   ahead for word-breaking: these \emph{may} be combined with the collected
%   tokens, but if we hit the end-of-loop, need to be output separately.
%    \begin{macrocode}
\cs_new:Npn \@@_map_lookahead:nnnnnw #1#2#3#4#5#6 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#6}
      { \@@_map_lookahead:nnnnnN {#1} {#2} {#3} {#4} {#5} }
      { \@@_map_loop:nnnw {#1} {#2} {#3} #4 }
    #6 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_lookahead:nnnnnN #1#2#3#4#5#6
  {
    \@@_if_q_recursion_tail_stop_do:Nn #6
      {
        #1 {#3}
        \tl_if_blank:nF {#4} { #1 {#4} }
      }
    \token_if_cs:NTF #6
      {
        #1 {#3}
        \@@_map_loop:nnnw {#1} {#2} { } #4
      }
      { \@@_codepoint_process:nN { #5 {#1} {#2} {#3} {#4} } }
        #6
  }
%    \end{macrocode}
%   To deal with \enquote{ignored} characters for word break mapping: needed
%   for generic |Regional_Indicator| function, so set up here.
%    \begin{macrocode}
\prg_new_conditional:Npnn \@@_map_if_ignorable:n #1 { TF }
  {
    \str_case:nnTF {#1}
      {
        { Extend }       { }
        { Format }       { }
        { ZWJ }          { }
      }
      \prg_return_true:
      \prg_return_false:
  }
%    \end{macrocode}
%   For the end of the process.
%    \begin{macrocode}
\cs_new:Npn \@@_map_output:nn #1#2
  { \tl_if_blank:nF {#2} { #1 {#2} } }
\cs_new:Npn \text_map_break:
  { \prg_map_break:Nn \text_map_break: { } }
\cs_new:Npn \text_map_break:n
  { \prg_map_break:Nn \text_map_break: }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {
%     \@@_map_Control:nnnn            ,
%     \@@_map_Newline:nnnn            ,
%     \@@_map_Extend:nnnn             ,
%     \@@_map_Format:nnnn             ,
%     \@@_map_SpacingMark:nnnn        ,
%     \@@_map_Other:nnnn              ,
%     \@@_map_Regional_Indicator:nnnn
%   }
% \begin{macro}[EXP]{\@@_map_Regional_Indicator_aux:nnnnn}
%   A small number of classes appear in both forms of breaking and have the
%   same behavior. For |Control| and |Newline|, we set up here as they are the
%   same outcome. We have the same story for |Format|, which is functionally
%   the same as |Newline|.
%    \begin{macrocode}
\cs_new:Npn \@@_map_Control:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_output:nn {#1} {#4}
    \@@_map_loop:nnnw {#1} {#2} { }
  }
\cs_new_eq:NN \@@_map_Newline:nnnn \@@_map_Control:nnnn
\cs_new:Npn \@@_map_Extend:nnnn #1#2#3#4
  { \@@_map_loop:nnnw {#1} {#2} {#3#4} }
\cs_new_eq:NN \@@_map_Format:nnnn \@@_map_Extend:nnnn
\cs_new_eq:NN \@@_map_SpacingMark:nnnn \@@_map_Extend:nnnn
\cs_new:Npn \@@_map_Other:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_loop:nnnw {#1} {#2} {#4}
  }
%    \end{macrocode}
%  The Regional Indicator rule means looking ahead and dealing with the
%  case where there are two in a row. So we use a look ahead to pick them
%  off. As there is only one range the values are hard-coded. For
%  word breaking, we also need to allow for the various extenders.
%    \begin{macrocode}
\cs_new:Npn \@@_map_Regional_Indicator:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_lookahead:nnnnnw {#1} {#2} {#4} { }
      \@@_map_Regional_Indicator_aux:nnnnn
  }
\cs_new:Npn \@@_map_Regional_Indicator_aux:nnnnn #1#2#3#4#5
  {
    \bool_lazy_or:nnTF
      { \@@_codepoint_compare_p:nNn {#5} < { "1F1E6 } }
      { \@@_codepoint_compare_p:nNn {#5} > { "1F1FF } }
      {
        \str_if_eq:nnTF {#2} { wordbreak }
          {
            \exp_args:Ne \@@_map_if_ignorable:nTF
              {
                \__kernel_codepoint_to_grapheme_class:n
                  { \@@_codepoint_from_chars:Nw #5 }
              }
              {
                \@@_map_lookahead:nnnnnw {#1} {#2} {#3#5} { }
                  \@@_map_Regional_Indicator_aux:nnnnn
              }
              { \@@_map_loop:nnnw {#1} {#2} {#3} #5 }
          }
          { \@@_map_loop:nnnw {#1} {#2} {#3} #5 }
      }
      { \@@_map_loop:nnnw {#1} {#2} {#3#5} }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{Grapheme mapping}
%
% \begin{macro}[EXP]{\text_map_function:nN}
% \begin{macro}[EXP]{\text_map_tokens:nn}
% \begin{macro}[EXP]{\@@_map_Prepend:nnnn}
% \begin{macro}[EXP]{\@@_map_Prepend_aux:nnnnn}
% \begin{macro}[EXP]{\@@_map_Prepend:nnn}
% \begin{macro}[EXP]
%   {
%     \@@_map_L:nnnn   ,
%     \@@_map_LV:nnnn  ,
%     \@@_map_V:nnnn   ,
%     \@@_map_LVT:nnnn ,
%     \@@_map_T:nnnn
%   }
% \begin{macro}[EXP]{\@@_map_hangul:nnnw}
% \begin{macro}[EXP]{\@@_map_hangul:nnnN}
% \begin{macro}[EXP]{\@@_map_hangul:nnnn}
% \begin{macro}[EXP]{\@@_map_hangul_aux:nnnnw}
% \begin{macro}[EXP]{\@@_map_hangul:nnnnw}
% \begin{macro}[EXP]{\@@_map_hangul_next:nnnnn}
% \begin{macro}[EXP]{\@@_map_hangul_end:nw}
% \begin{macro}[EXP]
%   {
%     \@@_map_hangul_L:nnn   ,
%     \@@_map_hangul_LV:nnn  ,
%     \@@_map_hangul_V:nnn   ,
%     \@@_map_hangul_LVT:nnn ,
%     \@@_map_hangul_T:nnn
%   }
%   The standard lead-off for an action loop.
%    \begin{macrocode}
\cs_new:Npn \text_map_function:nN #1#2
  {
    \@@_map_tokens:enn { \text_expand:n {#1} }
      { grapheme } {#2}
  }
\cs_new:Npn \text_map_tokens:nn #1#2
  {
    \@@_map_tokens:enn { \text_expand:n {#1} }
      { grapheme } {#2}
  }
%    \end{macrocode}
%   Outputting anything earlier, the combine with what follows. The only
%   exclusions are control characters.
%    \begin{macrocode}
\cs_new:Npn \@@_map_Prepend:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_lookahead:nnnnnw {#1} { grapheme } {#4} { }
      \@@_map_Prepend_aux:nnnnn
  }
\cs_new:Npn \@@_map_Prepend_aux:nnnnn #1#2#3#4#5
  {
    \bool_lazy_or:nnTF
      { \@@_codepoint_compare_p:nNn {#5} = { "000A } }
      { \@@_codepoint_compare_p:nNn {#5} = { "000D } }
      {
        #1 {#3}
        \@@_map_loop:nnnw {#1} { grapheme } {#5}
      }
      { \@@_map_Prepend:nnn {#1} {#3} {#5} }
  }
\cs_new:Npn \@@_map_Prepend:nnn #1#2#3
  {
    \str_if_eq:eeTF
      { Control }
      {
        \__kernel_codepoint_to_grapheme_class:n
          { \@@_codepoint_from_chars:Nw #3 }
      }
      { \@@_map_loop:nnnw {#1} { grapheme } {#2} #3 }
      { \@@_map_loop:nnnw {#1} { grapheme } {#2#3} }
  }
%    \end{macrocode}
%   Hangul needs additional treatment. First we have to deal with
%   the start-of-Hangul position: output what we had up to now, then
%   move the specialist handler. The idea here is to pick off the
%   different codepoint types one at a time, tracking what else can be
%   considered at each stage until we hit the end of the viable types.
%   Other than that, we just keep building up the Hangul codepoints
%   using a dedicated version of the loop from above.
%    \begin{macrocode}
\cs_new:Npn \@@_map_L:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_hangul:nnnw
      {#1} {#4} { L ; V ; LV ; LVT }
  }
\cs_new:Npn \@@_map_LV:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_hangul:nnnw
      {#1} {#4} { V ; T }
  }
\cs_new_eq:NN \@@_map_V:nnnn \@@_map_LV:nnnn
\cs_new:Npn \@@_map_LVT:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_hangul:nnnw
      {#1} {#4} { T }
  }
\cs_new_eq:NN \@@_map_T:nnnn \@@_map_LVT:nnnn
\cs_new:Npn \@@_map_hangul:nnnw #1#2#3#4 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#4}
      { \@@_map_hangul:nnnN {#1} {#2} {#3} }
      {
        #1 {#2}
        \@@_map_loop:nnnw {#1} { grapheme } { }
      }
    #4 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_hangul:nnnN #1#2#3#4
  {
    \@@_if_q_recursion_tail_stop_do:Nn #4
      {
        #1 {#2}
        \text_map_break:
      }
    \token_if_cs:NTF #4
      {
        #1 {#2}
        \@@_map_loop:nnnw {#1} { grapheme } { }
      }
      {
        \@@_codepoint_process:nN
          { \@@_map_hangul:nnnn {#1} {#2} {#3} } #4
      }
  }
\exp_args_generate:n { Nnne }
\cs_new:Npn \@@_map_hangul:nnnn #1#2#3#4
  {
    \exp_args:NNnne \@@_map_hangul_aux:nnnnw {#1} {#2} {#4}
      {
        \__kernel_codepoint_to_grapheme_class:n
          { \@@_codepoint_from_chars:Nw #4 }
      }
      #3 ; \q_recursion_tail ; \q_recursion_stop
  }
\cs_new:Npn \@@_map_hangul_aux:nnnnw #1#2#3#4#5 ;
  {
    \quark_if_recursion_tail_stop_do:nn {#5}
      { \@@_map_loop:nnnw {#1} { grapheme } {#2} #3 }
    \@@_map_hangul:nnnnnw {#1} {#2} {#3} {#4} {#5}
  }
\cs_generate_variant:Nn \@@_map_hangul_aux:nnnnw { nnne }
\cs_new:Npn \@@_map_hangul:nnnnnw #1#2#3#4#5#6 \q_recursion_stop
  {
    \str_if_eq:nnTF {#4} {#5}
      { \use:c { @@_map_hangul_ #5 :nnn } {#1} {#2} {#3} }
      { \@@_map_hangul_next:nnnnn {#1} {#2} {#3} {#4} {#6} }
  }
\cs_new:Npn \@@_map_hangul_next:nnnnn #1#2#3#4#5
  { \@@_map_hangul_aux:nnnnw {#1} {#2} {#3} {#4} #5 \q_recursion_stop }
\cs_new:Npn \@@_map_hangul_end:nw #1#2 \q_@@_recursion_stop {#1}
\cs_new:Npn \@@_map_hangul_L:nnn #1#2#3
  {
    \@@_map_hangul:nnnw
      {#1} {#2#3} { L V { LV } { LVT } }
  }
\cs_new:Npn \@@_map_hangul_LV:nnn #1#2#3
  {
    \@@_map_hangul:nnnw
      {#1} {#2#3} { VT }
  }
\cs_new_eq:NN \@@_map_hangul_V:nnn \@@_map_hangul_LV:nnn
\cs_new:Npn \@@_map_hangul_LVT:nnn #1#2#3
  {
    \@@_map_hangul:nnnw
      {#1} {#2#3} { T }
  }
\cs_new_eq:NN \@@_map_hangul_T:nnn \@@_map_hangul_LVT:nnn
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Word break mapping}
%
% \begin{macro}[EXP]{\text_map_function:nN}
% \begin{macro}[EXP]{\text_map_tokens:nn}
% \begin{macro}[EXP]{\@@_map_collect:nnnnn}
% \begin{macro}[EXP]{\@@_map_collect_auxi:nnnnnnnn}
% \begin{macro}[EXP]{\@@_map_collect_auxii:nnnnnnn}
% \begin{macro}[EXP]{\@@_map_collect_auxiii:n}
% \begin{macro}[EXP]{\@@_map_collect_auxiv:nnnnnnn}
% \begin{macro}[EXP]{\@@_map_collect_auxv:nnnnnnn}
% \begin{macro}[EXP]
%   {
%     \@@_map_ALetter:nnnn       ,
%     \@@_map_Hebrew_Letter:nnnn ,
%     \@@_map_Katakana:nnnn      ,
%     \@@_map_Numeric:nnnn       ,
%     \@@_map_WSegSpace:nnnn     ,
%     \@@_map_ExtendNumLet:nnnn
%   }
% \begin{macro}[EXP]{\@@_map_ExtendNumLet_auxi::nnnnn}
% \begin{macro}[EXP]{\@@_map_ExtendNumLet_auxii:nnnn}
%   The standard lead-off for an action loop.
%    \begin{macrocode}
\cs_new:Npn \text_words_map_function:nN #1#2
  {
    \@@_map_tokens:enn { \text_expand:n {#1} }
      { wordbreak } {#2}
  }
\cs_new:Npn \text_words_map_tokens:nn #1#2
  {
    \@@_map_tokens:enn { \text_expand:n {#1} }
      { wordbreak } {#2}
  }
%    \end{macrocode}
%   The main rule for word breaking is that characters bind to following
%   ones, potentially either allowing for \emph{or} totally ignoring
%   intervening ones. For each class, we are passed a list of classes that
%   bind and ones that we should allow in between. In all cases, the classes
%   |Extend|, |Format| and |ZWJ| need to be entirely ignored: they are hard
%   coded and handled separately from the in-between ones. Notice that we use
%   \cs{str_case:nnTF} to make our boolean here: that way, all that needs to be
%   passed internally are lists of classes.
%    \begin{macrocode}
\cs_new:Npn \@@_map_collect:nnnnn #1#2#3#4#5
  {
    \@@_map_lookahead:nnnnnw {#1} { wordbreak } {#2} { }
      { \@@_map_collect_auxi:nnnnnnnn {#3} {#4} {#5} }
  }
\cs_new:Npn \@@_map_collect_auxi:nnnnnnnn #1#2#3#4#5#6#7#8
  {
    \exp_args:Ne \@@_map_collect_auxii:nnnnnnn
      {
        \__kernel_codepoint_to_wordbreak_class:n
          { \@@_codepoint_from_chars:Nw #8 }
      }
	  {#4} {#6} {#1} {#2} {#3} {#8}
  }
%    \end{macrocode}
%   We now need to deal with the three possible positive outcomes of examining
%   the next character. The first is that we have found one of the binding
%   characters that ends the current cycle: we then pass on to the appropriate
%   function.  Second, we have the ignored characters: if we find these, we
%   loop back around. Finally, we look at the \enquote{in-between} characters:
%   if one is found, we need a further look ahead to reach a decision. Rather
%   than have extra complexity in the setup, we have a hard-coded skipping of
%   |ExtendNumLet| for |WSegSpace| (as |ExtendNumLet| only applies to
%   |ALetter|, |Hebrew_Letter|, |Numeric| and |Katakana|).
%    \begin{macrocode}
\cs_new:Npn \@@_map_collect_auxii:nnnnnnn #1#2#3#4#5#6#7
  {
    \str_case:neTF {#1}
      {
        \tl_map_function:eN
          {
            #4
            \str_if_eq:nnF {#4} { { WSegSpace } } { { ExtendNumLet } }
          }
          \@@_map_collect_auxiii:n
      }
      {
        \cs_if_exist_use:cF { @@_map_ #1 :nnnn }
          { \@@_map_Other:nnnn }
            {#2} { wordbreak } { } {#3#7}
      }
      {
        \@@_map_if_ignorable:nTF {#1}
          { \@@_map_collect:nnnnn {#2} {#3#7} {#4} {#5} {#6} }
          {
            \str_case:neTF {#1}
              { \tl_map_function:nN {#5} \@@_map_collect_auxiii:n }
              {
                \@@_map_lookahead:nnnnnw {#2} { wordbreak } {#3} {#7}
                  { \@@_map_collect_auxiv:nnnnnnn {#5} {#6} }
              }
              {
                \@@_map_output:nn {#2} {#3}
                \@@_map_loop:nnnw {#2} { wordbreak } { } #7
              }
          }
      }
  }
\cs_new:Npn \@@_map_collect_auxiii:n #1
  { \exp_not:n { {#1} { } } }
%    \end{macrocode}
%   We are now have a character which \emph{may} bind to the previous one if
%   the next character is of the correct class also. So we carry forward the
%   collected material and the conditional character, then look ahead again.
%   If successful, combine together and move on using the new class, otherwise
%   output and restart where we were.
%    \begin{macrocode}
\cs_new:Npn \@@_map_collect_auxiv:nnnnnnn #1#2#3#4#5#6#7
  {
    \exp_args:Ne \@@_map_collect_auxv:nnnnnnn
      {
        \__kernel_codepoint_to_wordbreak_class:n
          { \@@_codepoint_from_chars:Nw #7 }
      }
	  {#3} {#5} {#6} {#1} {#2} {#7}
  }
\cs_new:Npn \@@_map_collect_auxv:nnnnnnn #1#2#3#4#5#6#7
  {
    \str_case:neTF {#1}
      { \tl_map_function:nN {#6} \@@_map_collect_auxiii:n }
      { \use:c { @@_map_ #1 :nnnn } {#2} { wordbreak } { } {#3#4#7} }
      {
        \@@_map_if_ignorable:nTF {#1}
          {
            \@@_map_lookahead:nnnnnw {#2} { wordbreak } {#3} {#4#7}
              { \@@_map_collect_auxiv:nnnnnnn {#5} {#6} }
          }
          {
            \@@_map_output:nn {#2} {#3}
            \@@_map_loop:nnnw {#2} { wordbreak } { } #4#7
          }
      }
  }
%    \end{macrocode}
%   Use the generic collector.
%    \begin{macrocode}
\cs_new:Npn \@@_map_ALetter:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_collect:nnnnn {#1} {#4}
      { { ALetter } { Hebrew_Letter } { Numeric } }
      { { MidLetter } { MidNumLet } { Single_Quote } }
      { { ALetter } { Hebrew_Letter } }
  }
\cs_new:Npn \@@_map_Hebrew_Letter:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_collect:nnnnn {#1} {#4}
      { { ALetter } { Hebrew_Letter } { Numeric } { Single_Quote } }
      { { MidLetter } { MidNumLet } { Double_Quote } }
      { { Hebrew_Letter } }
  }
\cs_new:Npn \@@_map_Katakana:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_collect:nnnnn {#1} {#4} { { Katakana } } { } { }
  }
\cs_new:Npn \@@_map_Numeric:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_collect:nnnnn {#1} {#4}
      { { ALetter } { Hebrew_Letter } { Numeric } }
      { { MidNum } { MidNumLet } { Single_Quote } }
      { { Numeric } }
  }
\cs_new:Npn \@@_map_WSegSpace:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_collect:nnnnn {#1} {#4} { { WSegSpace } } { } { }
  }
%    \end{macrocode}
%   We should only get here in the case we have a \enquote{dangling} extender.
%   If so, look ahead for characters to bind to, then for the set of three
%   that we need to skip over.
%    \begin{macrocode}
\cs_new:Npn \@@_map_ExtendNumLet:nnnn #1#2#3#4
  {
    \@@_map_output:nn {#1} {#3}
    \@@_map_lookahead:nnnnnw {#1} { wordbreak } {#4} { }
      \@@_map_ExtendNumLet_auxi:nnnnn
  }
\cs_new:Npn \@@_map_ExtendNumLet_auxi:nnnnn #1#2#3#4#5
  {
    \exp_args:Ne \@@_map_ExtendNumLet_auxii:nnnn
      {
        \__kernel_codepoint_to_wordbreak_class:n
          { \@@_codepoint_from_chars:Nw #5 }
      }
      {#1} {#3} {#5}
  }
\cs_new:Npn \@@_map_ExtendNumLet_auxii:nnnn #1#2#3#4
  {
    \str_case:nnTF {#1}
      {
        { ALetter }       { }
        { Hebrew_Letter } { }
        { Numeric }       { }
        { Katakana }      { }
        { ExtendNumLet }  { }
      }
      {
        \cs_if_exist_use:cF { @@_map_ #1 :nnnn }
          { \@@_map_Other:nnnn }
            {#2} { wordbreak } { } {#3#4}
      }
      {
        \@@_map_if_ignorable:nTF {#1}
          {
            \@@_map_lookahead:nnnnnw {#2} { wordbreak } {#3#4} { }
              \@@_map_ExtendNumLet_auxi:nnnnn
          }
          {
            \@@_map_output:nn {#2} {#3}
            \@@_map_loop:nnnw {#2} { wordbreak } { } #4
          }
      }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Inline mappings}
%
% \begin{macro}{\text_map_inline:nn, \text_words_map_inline:nn}
%   The standard non-expandable inline version.
%    \begin{macrocode}
\cs_new_protected:Npn \text_map_inline:nn #1#2
  {
    \int_gincr:N \g__kernel_prg_map_int
    \cs_gset_protected:cpn
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w } ##1 {#2}
    \exp_args:Nnc \text_map_function:nN {#1}
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w }
    \prg_break_point:Nn \text_map_break:
      { \int_gdecr:N \g__kernel_prg_map_int }
  }
\cs_new_protected:Npn \text_words_map_inline:nn #1#2
  {
    \int_gincr:N \g__kernel_prg_map_int
    \cs_gset_protected:cpn
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w } ##1 {#2}
    \exp_args:Nnc \text_words_map_function:nN {#1}
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w }
    \prg_break_point:Nn \text_map_break:
      { \int_gdecr:N \g__kernel_prg_map_int }
  }
%    \end{macrocode}
% \end{macro}
%
%    \begin{macrocode}
%</code>
%    \end{macrocode}
%
% \end{implementation}
%
% \PrintIndex