% \iffalse meta-comment
%
%% File: l3text-map.dtx
%
% Copyright (C) 2022-2025 The LaTeX Project
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version.  The latest version
% of this license is in the file
%
%    https://www.latex-project.org/lppl.txt
%
% This file is part of the "l3kernel bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
%    https://github.com/latex3/latex3
%
% for those people who are interested.
%
%<*driver>
\documentclass[full,kernel]{l3doc}
\begin{document}
  \DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
%
% \title{^^A
%   The \pkg{l3text-map} module\\ Text processing (mapping)^^A
% }
%
% \author{^^A
%  The \LaTeX{} Project\thanks
%    {^^A
%      E-mail:
%        \href{mailto:latex-team@latex-project.org}
%          {latex-team@latex-project.org}^^A
%    }^^A
% }
%
% \date{Released 2025-04-14}
%
% \maketitle
%
% \begin{documentation}
%
% \end{documentation}
%
% \begin{implementation}
%
% \section{\pkg{l3text-map} implementation}
%
%    \begin{macrocode}
%<*code>
%    \end{macrocode}
%
%    \begin{macrocode}
%<@@=text>
%    \end{macrocode}
%
% \subsection{Mapping to text}
%
% \subsubsection{Common code}
%
% \begin{macro}[EXP]{\@@_map_function:nnN, \@@_map_function:enN}
% \begin{macro}[EXP]{\@@_map_loop:Nnnw}
% \begin{macro}[EXP]{\@@_map_group:Nnnn}
% \begin{macro}[EXP]{\@@_map_space:Nnnw}
% \begin{macro}[EXP]{\@@_map_N_type:NnnN}
% \begin{macro}[EXP]{\@@_map_codepoint:Nnnn}
% \begin{macro}[EXP]{\@@_map_CR:Nnnw}
% \begin{macro}[EXP]{\@@_map_CR:NnnN}
% \begin{macro}[EXP]{\@@_map_class:Nnnn}
% \begin{macro}[EXP]{\@@_map_class:Nnnnn}
% \begin{macro}[EXP]{\@@_map_lookahead:Nnnnnw}
% \begin{macro}[EXP]{\@@_map_lookahead:NnnnnN}
% \begin{macro}[TF,EXP]{\@@_map_if_ignorable:n}
% \begin{macro}[EXP]{\@@_map_output:Nn}
% \begin{macro}[EXP]{\text_map_break:}
% \begin{macro}[EXP]{\text_map_break:n}
%   Mapping to text all works the same way: using standard \enquote{action}
%   loop on expanded text. There are different ways to determine the boundary
%   conditions for breaking: to avoid duplication, the common ideas are covered
%   here with the specifics split out. In all cases, anything which is not a
%   character token is treated as a boundary.
%    \begin{macrocode}
\cs_new:Npn \@@_map_function:nnN #1#2#3
  {
    \@@_map_loop:Nnnw #3 {#2} { } #1
      \q_@@_recursion_tail \q_@@_recursion_stop
    \prg_break_point:Nn \text_map_break: { }
  }
\cs_generate_variant:Nn \@@_map_function:nnN { e }
\cs_new:Npn \@@_map_loop:Nnnw #1#2#3#4 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#4}
      { \@@_map_N_type:NnnN }
      {
        \tl_if_head_is_group:nTF {#4}
          { \@@_map_group:Nnnn }
          { \@@_map_space:Nnnw }
      }
    #1 {#2} {#3} #4 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_group:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    {
      \@@_map_loop:Nnnw #1 {#2} { } #4
        \q_@@_recursion_tail \q_@@_recursion_stop
      \prg_break_point:Nn \text_map_break: { }
    }
    \@@_map_loop:Nnnw #1 {#2} { }
  }
\use:e
  { \cs_new:Npn \exp_not:N \@@_map_space:Nnnw #1#2#3 \c_space_tl }
  {
    \@@_map_output:Nn #1 {#3}
    #1 { ~ }
    \@@_map_loop:Nnnw #1 {#2} { }
  }
\cs_new:Npn \@@_map_N_type:NnnN #1#2#3#4
  {
    \@@_if_q_recursion_tail_stop_do:Nn #4
      {
        \@@_map_output:Nn #1 {#3}
        \text_map_break:
      }
    \token_if_cs:NTF #4
      {
        \@@_map_output:Nn #1 {#3}
        #1 {#4}
        \@@_map_loop:Nnnw #1 {#2} { }
      }
      {
        \@@_codepoint_process:nN
          { \@@_map_codepoint:Nnnn #1 {#2} {#3} } #4
      }
  }
%    \end{macrocode}
%  We pull out a few special cases here. Carriage returns case needs a bit of
%  context handling so has an auxiliary. Codepoint U+200D is the zero-width
%  joiner, which has no context to concern us: just don't break. (These special
%  cases apply to all forms of text mapping.)
%    \begin{macrocode}
\cs_new:Npn \@@_map_codepoint:Nnnn #1#2#3#4
  {
    \@@_codepoint_compare:nNnTF {#4} = { "000D }
      {
        \@@_map_output:Nn #1 {#3}
        \@@_map_CR:Nnnw #1 {#2} {#4}
      }
      {
        \@@_codepoint_compare:nNnTF {#4} = { "200D }
          { \@@_map_loop:Nnnw #1 {#2} {#3#4} }
          { \@@_map_class:Nnnn #1 {#2} {#3} {#4} }
      }
  }
%    \end{macrocode}
%   A carriage return is a boundary unless it is immediately followed by
%   a line feed, in which case that pair is a boundary.
%    \begin{macrocode}
\cs_new:Npn \@@_map_CR:Nnnw #1#2#3#4 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#4}
      { \@@_map_CR:NnnN #1 {#2} {#3} }
      {
        #1 {#3}
        \@@_map_loop:Nnnw #1 {#2} { }
      }
        #4 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_CR:NnnN #1#2#3#4
  {
    \@@_if_q_recursion_tail_stop_do:Nn #4
      {
        #1 {#3}
        \text_map_break:
      }
    \bool_lazy_and:nnTF
      { ! \token_if_cs_p:N #4 }
      { \int_compare_p:nNn { `#4 } = { "000A } }
      {
        \@@_map_output:Nn #1 {#3#4}
        \@@_map_loop:Nnnw #1 {#2} { }
      }
      { \@@_map_loop:Nnnw #1 {#2} { } #3 }
  }
%    \end{macrocode}
%   There are various classes of character, and we deal with them all in
%   the same general way. We need to example the relevant list of codepoints:
%   if we get a hit, then we do whatever the relevant action is. To keep names
%   short and to allow code sharing, we have two ways of naming the functions:
%   most class names are unique, so it's only where we see the same name used
%   in both break classes that we need more control.
%    \begin{macrocode}
\cs_new:Npn \@@_map_class:Nnnn #1#2#3#4
  {
    \exp_args:Nnnne \@@_map_class:Nnnnn #1 {#2} {#3} {#4}
      {
        \use:c { __kernel_codepoint_to_ #2 _class:n }
          { \@@_codepoint_from_chars:Nw #4 }
      }
  }
\cs_new:Npn \@@_map_class:Nnnnn #1#2#3#4#5
  {
    \cs_if_exist_use:cF { @@_map_ #5 :Nnnn }
      { \@@_map_Other:Nnnn }
        #1 {#2} {#3} {#4}
  }
%    \end{macrocode}
%   A generic loop-ahead setup: we need to handle both the previously collected
%   tokens and any \enquote{conditional} ones. The latter occur when looking
%   ahead for word-breaking: these \emph{may} be combined with the collected
%   tokens, but if we hit the end-of-loop, need to be output separately.
%    \begin{macrocode}
\cs_new:Npn \@@_map_lookahead:Nnnnnw #1#2#3#4#5#6 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#6}
      { \@@_map_lookahead:NnnnnN #1 {#2} {#3} {#4} {#5} }
      { \@@_map_loop:Nnnw #1 {#2} {#3} #4 }
    #6 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_lookahead:NnnnnN #1#2#3#4#5#6
  {
    \@@_if_q_recursion_tail_stop_do:Nn #6
      {
        #1 {#3}
        \tl_if_blank:nF {#4} { #1 {#4} }
      }
    \token_if_cs:NTF #6
      {
        #1 {#3}
        \@@_map_loop:Nnnw #1 {#2} { } #4
      }
      { \@@_codepoint_process:nN { #5 #1 {#2} {#3} {#4} } }
        #6
  }
%    \end{macrocode}
%   To deal with \enquote{ignored} characters for word break mapping: needed
%   for generic |Regional_Indicator| function, so set up here.
%    \begin{macrocode}
\prg_new_conditional:Npnn \@@_map_if_ignorable:n #1 { TF }
  {
    \str_case:nnTF {#1}
      {
        { Extend }       { }
        { Format }       { }
        { ZWJ }          { }
      }
      \prg_return_true:
      \prg_return_false:
  }
%    \end{macrocode}
%   For the end of the process.
%    \begin{macrocode}
\cs_new:Npn \@@_map_output:Nn #1#2
  { \tl_if_blank:nF {#2} { #1 {#2} } }
\cs_new:Npn \text_map_break:
  { \prg_map_break:Nn \text_map_break: { } }
\cs_new:Npn \text_map_break:n
  { \prg_map_break:Nn \text_map_break: }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {
%     \@@_map_Control:Nnnn            ,
%     \@@_map_Newline:Nnnn            ,
%     \@@_map_Extend:Nnnn             ,
%     \@@_map_Format:Nnnn             ,
%     \@@_map_SpacingMark:Nnnn        ,
%     \@@_map_Other:Nnnn              ,
%     \@@_map_Regional_Indicator:Nnnn
%   }
% \begin{macro}[EXP]{\@@_map_Regional_Indicator_aux:Nnnnn}
%   A small number of classes appear in both forms of breaking and have the
%   same behavior. For |Control| and |Newline|, we set up here as they are the
%   same outcome. We have the same story for |Format|, which is functionally
%   the same as |Newline|.
%    \begin{macrocode}
\cs_new:Npn \@@_map_Control:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_output:Nn #1 {#4}
    \@@_map_loop:Nnnw #1 {#2} { }
  }
\cs_new_eq:NN \@@_map_Newline:Nnnn \@@_map_Control:Nnnn
\cs_new:Npn \@@_map_Extend:Nnnn #1#2#3#4
  { \@@_map_loop:Nnnw #1 {#2} {#3#4} }
\cs_new_eq:NN \@@_map_Format:Nnnn \@@_map_Extend:Nnnn
\cs_new_eq:NN \@@_map_SpacingMark:Nnnn \@@_map_Extend:Nnnn
\cs_new:Npn \@@_map_Other:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_loop:Nnnw #1 {#2} {#4}
  }
%    \end{macrocode}
%  The Regional Indicator rule means looking ahead and dealing with the
%  case where there are two in a row. So we use a look ahead to pick them
%  off. As there is only one range the values are hard-coded. For
%  word breaking, we also need to allow for the various extenders.
%    \begin{macrocode}
\cs_new:Npn \@@_map_Regional_Indicator:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_lookahead:Nnnnnw #1 {#2} {#4} { }
      \@@_map_Regional_Indicator_aux:Nnnnn
  }
\cs_new:Npn \@@_map_Regional_Indicator_aux:Nnnnn #1#2#3#4#5
  {
    \bool_lazy_or:nnTF
      { \@@_codepoint_compare_p:nNn {#5} < { "1F1E6 } }
      { \@@_codepoint_compare_p:nNn {#5} > { "1F1FF } }
      {
        \str_if_eq:nnTF {#2} { wordbreak }
          {
            \exp_args:Ne \@@_map_if_ignorable:nTF
              {
                \__kernel_codepoint_to_grapheme_class:n
                  { \@@_codepoint_from_chars:Nw #5 }
              }
              {
                \@@_map_lookahead:Nnnnnw #1 {#2} {#3#5} { }
                  \@@_map_Regional_Indicator_aux:Nnnnn
              }
              { \@@_map_loop:Nnnw #1 {#2} {#3} #5 }
          }
          { \@@_map_loop:Nnnw #1 {#2} {#3} #5 }
      }
      { \@@_map_loop:Nnnw #1 {#2} {#3#5} }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{Grapheme mapping}
%
% \begin{macro}[EXP]{\text_map_function:nN}
% \begin{macro}[EXP]{\@@_map_Prepend:Nnnn}
% \begin{macro}[EXP]{\@@_map_Prepend_aux:Nnnnn}
% \begin{macro}[EXP]{\@@_map_Prepend:Nnn}
% \begin{macro}[EXP]
%   {
%     \@@_map_L:Nnnn   ,
%     \@@_map_LV:Nnnn  ,
%     \@@_map_V:Nnnn   ,
%     \@@_map_LVT:Nnnn ,
%     \@@_map_T:Nnnn
%   }
% \begin{macro}[EXP]{\@@_map_hangul:Nnnw}
% \begin{macro}[EXP]{\@@_map_hangul:NnnN}
% \begin{macro}[EXP]{\@@_map_hangul:Nnnn}
% \begin{macro}[EXP]{\@@_map_hangul_aux:Nnnnw}
% \begin{macro}[EXP]{\@@_map_hangul:Nnnnw}
% \begin{macro}[EXP]{\@@_map_hangul_next:Nnnnn}
% \begin{macro}[EXP]{\@@_map_hangul_end:nw}
% \begin{macro}[EXP]
%   {
%     \@@_map_hangul_L:Nnn   ,
%     \@@_map_hangul_LV:Nnn  ,
%     \@@_map_hangul_V:Nnn   ,
%     \@@_map_hangul_LVT:Nnn ,
%     \@@_map_hangul_T:Nnn
%   }
%   The standard lead-off for an action loop.
%    \begin{macrocode}
\cs_new:Npn \text_map_function:nN #1#2
  {
    \@@_map_function:enN { \text_expand:n {#1} }
      { grapheme } #2
  }
%    \end{macrocode}
%   Outputting anything earlier, the combine with what follows. The only
%   exclusions are control characters.
%    \begin{macrocode}
\cs_new:Npn \@@_map_Prepend:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_lookahead:Nnnnnw #1 { grapheme } {#4} { }
      \@@_map_Prepend_aux:Nnnnn
  }
\cs_new:Npn \@@_map_Prepend_aux:Nnnnn #1#2#3#4#5
  {
    \bool_lazy_or:nnTF
      { \@@_codepoint_compare_p:nNn {#5} = { "000A } }
      { \@@_codepoint_compare_p:nNn {#5} = { "000D } }
      {
        #1 {#3}
        \@@_map_loop:Nnnw #1 { grapheme } {#5}
      }
      { \@@_map_Prepend:Nnn #1 {#3} {#5} }
  }
\cs_new:Npn \@@_map_Prepend:Nnn #1#2#3
  {
    \str_if_eq:eeTF
      { Control }
      {
        \__kernel_codepoint_to_grapheme_class:n
          { \@@_codepoint_from_chars:Nw #3 }
      }
      { \@@_map_loop:Nnnw #1 { grapheme } {#2} #3 }
      { \@@_map_loop:Nnnw #1 { grapheme } {#2#3} }
  }
%    \end{macrocode}
%   Hangul needs additional treatment. First we have to deal with
%   the start-of-Hangul position: output what we had up to now, then
%   move the specialist handler. The idea here is to pick off the
%   different codepoint types one at a time, tracking what else can be
%   considered at each stage until we hit the end of the viable types.
%   Other than that, we just keep building up the Hangul codepoints
%   using a dedicated version of the loop from above.
%    \begin{macrocode}
\cs_new:Npn \@@_map_L:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_hangul:Nnnw
      #1 {#4} { L ; V ; LV ; LVT }
  }
\cs_new:Npn \@@_map_LV:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_hangul:Nnnw
      #1 {#4} { V ; T }
  }
\cs_new_eq:NN \@@_map_V:Nnnn \@@_map_LV:Nnnn
\cs_new:Npn \@@_map_LVT:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_hangul:Nnnw
      #1 {#4} { T }
  }
\cs_new_eq:NN \@@_map_T:Nnnn \@@_map_LVT:Nnnn
\cs_new:Npn \@@_map_hangul:Nnnw #1#2#3#4 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#4}
      { \@@_map_hangul:NnnN #1 {#2} {#3} }
      {
        #1 {#2}
        \@@_map_loop:Nnnw #1 { grapheme } { }
      }
    #4 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_hangul:NnnN #1#2#3#4
  {
    \@@_if_q_recursion_tail_stop_do:Nn #4
      {
        #1 {#2}
        \text_map_break:
      }
    \token_if_cs:NTF #4
      {
        #1 {#2}
        \@@_map_loop:Nnnw #1 { grapheme } { }
      }
      {
        \@@_codepoint_process:nN
          { \@@_map_hangul:Nnnn #1 {#2} {#3} } #4
      }
  }
\exp_args_generate:n { Nnne }
\cs_new:Npn \@@_map_hangul:Nnnn #1#2#3#4
  {
    \exp_args:NNnne \@@_map_hangul_aux:Nnnnw #1 {#2} {#4}
      {
        \__kernel_codepoint_to_grapheme_class:n
          { \@@_codepoint_from_chars:Nw #4 }
      }
      #3 ; \q_recursion_tail ; \q_recursion_stop
  }
\cs_new:Npn \@@_map_hangul_aux:Nnnnw #1#2#3#4#5 ;
  {
    \quark_if_recursion_tail_stop_do:nn {#5}
      { \@@_map_loop:Nnnw #1 { grapheme } {#2} #3 }
    \@@_map_hangul:Nnnnnw #1 {#2} {#3} {#4} {#5}
  }
\cs_generate_variant:Nn \@@_map_hangul_aux:Nnnnw { Nnne }
\cs_new:Npn \@@_map_hangul:Nnnnnw #1#2#3#4#5#6 \q_recursion_stop
  {
    \str_if_eq:nnTF {#4} {#5}
      { \use:c { @@_map_hangul_ #5 :Nnn } #1 {#2} {#3} }
      { \@@_map_hangul_next:Nnnnn #1 {#2} {#3} {#4} {#6} }
  }
\cs_new:Npn \@@_map_hangul_next:Nnnnn #1#2#3#4#5
  { \@@_map_hangul_aux:Nnnnw #1 {#2} {#3} {#4} #5 \q_recursion_stop }
\cs_new:Npn \@@_map_hangul_end:nw #1#2 \q_@@_recursion_stop {#1}
\cs_new:Npn \@@_map_hangul_L:Nnn #1#2#3
  {
    \@@_map_hangul:Nnnw
      #1 {#2#3} { L V { LV } { LVT } }
  }
\cs_new:Npn \@@_map_hangul_LV:Nnn #1#2#3
  {
    \@@_map_hangul:Nnnw
      #1 {#2#3} { VT }
  }
\cs_new_eq:NN \@@_map_hangul_V:Nnn \@@_map_hangul_LV:Nnn
\cs_new:Npn \@@_map_hangul_LVT:Nnn #1#2#3
  {
    \@@_map_hangul:Nnnw
      #1 {#2#3} { T }
  }
\cs_new_eq:NN \@@_map_hangul_T:Nnn \@@_map_hangul_LVT:Nnn
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Word break mapping}
%
% \begin{macro}[EXP]{\text_map_function:nN}
% \begin{macro}[EXP]{\@@_map_collect:Nnnnn}
% \begin{macro}[EXP]{\@@_map_collect_auxi:nnnNnnnn}
% \begin{macro}[EXP]{\@@_map_collect_auxii:nNnnnnn}
% \begin{macro}[EXP]{\@@_map_collect_auxiii:n}
% \begin{macro}[EXP]{\@@_map_collect_auxiv:nnNnnnn}
% \begin{macro}[EXP]{\@@_map_collect_auxv:nNnnnnn}
% \begin{macro}[EXP]
%   {
%     \@@_map_ALetter:Nnnn       ,
%     \@@_map_Hebrew_Letter:Nnnn ,
%     \@@_map_Katakana:Nnnn      ,
%     \@@_map_Numeric:Nnnn       ,
%     \@@_map_WSegSpace:Nnnn     ,
%     \@@_map_ExtendNumLet:Nnnn
%   }
% \begin{macro}[EXP]{\@@_map_ExtendNumLet_auxi::Nnnnn}
% \begin{macro}[EXP]{\@@_map_ExtendNumLet_auxii:nNnn}
%   The standard lead-off for an action loop.
%    \begin{macrocode}
\cs_new:Npn \text_words_map_function:nN #1#2
  {
    \@@_map_function:enN { \text_expand:n {#1} }
      { wordbreak } #2
  }
%    \end{macrocode}
%   The main rule for word breaking is that characters bind to following
%   ones, potentially either allowing for \emph{or} totally ignoring
%   intervening ones. For each class, we are passed a list of classes that
%   bind and ones that we should allow in between. In all cases, the classes
%   |Extend|, |Format| and |ZWJ| need to be entirely ignored: they are hard
%   coded and handled separately from the in-between ones. Notice that we use
%   \cs{str_case:nnTF} to make our boolean here: that way, all that needs to be
%   passed internally are lists of classes.
%    \begin{macrocode}
\cs_new:Npn \@@_map_collect:Nnnnn #1#2#3#4#5
  {
    \@@_map_lookahead:Nnnnnw #1 { wordbreak } {#2} { }
      { \@@_map_collect_auxi:nnnNnnnn {#3} {#4} {#5} }
  }
\cs_new:Npn \@@_map_collect_auxi:nnnNnnnn #1#2#3#4#5#6#7#8
  {
    \exp_args:Ne \@@_map_collect_auxii:nNnnnnn
      {
        \__kernel_codepoint_to_wordbreak_class:n
          { \@@_codepoint_from_chars:Nw #8 }
      }
	  #4 {#6} {#1} {#2} {#3} {#8}
  }
%    \end{macrocode}
%   We now need to deal with the three possible positive outcomes of examining
%   the next character. The first is that we have found one of the binding
%   characters that ends the current cycle: we then pass on to the appropriate
%   function.  Second, we have the ignored characters: if we find these, we
%   loop back around. Finally, we look at the \enquote{in-between} characters:
%   if one is found, we need a further look ahead to reach a decision. Rather
%   than have extra complexity in the setup, we have a hard-coded skipping of
%   |ExtendNumLet| for |WSegSpace| (as |ExtendNumLet| only applies to
%   |ALetter|, |Hebrew_Letter|, |Numeric| and |Katakana|).
%    \begin{macrocode}
\cs_new:Npn \@@_map_collect_auxii:nNnnnnn #1#2#3#4#5#6#7
  {
    \str_case:neTF {#1}
      {
        \tl_map_function:eN
          {
            #4
            \str_if_eq:nnF {#4} { { WSegSpace } } { { ExtendNumLet } }
          }
          \@@_map_collect_auxiii:n
      }
      {
        \cs_if_exist_use:cF { @@_map_ #1 :Nnnn }
          { \@@_map_Other:Nnnn }
            #2 { wordbreak } { } {#3#7}
      }
      {
        \@@_map_if_ignorable:nTF {#1}
          { \@@_map_collect:Nnnnn #2 {#3#7} {#4} {#5} {#6} }
          {
            \str_case:neTF {#1}
              { \tl_map_function:nN {#5} \@@_map_collect_auxiii:n }
              {
                \@@_map_lookahead:Nnnnnw #2 { wordbreak } {#3} {#7}
                  { \@@_map_collect_auxiv:nnNnnnn {#5} {#6} }
              }
              {
                \@@_map_output:Nn #2 {#3}
                \@@_map_loop:Nnnw #2 { wordbreak } { } #7
              }
          }
      }
  }
\cs_new:Npn \@@_map_collect_auxiii:n #1
  { \exp_not:n { {#1} { } } }
%    \end{macrocode}
%   We are now have a character which \emph{may} bind to the previous one if
%   the next character is of the correct class also. So we carry forward the
%   collected material and the conditional character, then look ahead again.
%   If successful, combine together and move on using the new class, otherwise
%   output and restart where we were.
%    \begin{macrocode}
\cs_new:Npn \@@_map_collect_auxiv:nnNnnnn #1#2#3#4#5#6#7
  {
    \exp_args:Ne \@@_map_collect_auxv:nNnnnnn
      {
        \__kernel_codepoint_to_wordbreak_class:n
          { \@@_codepoint_from_chars:Nw #7 }
      }
	  #3 {#5} {#6} {#1} {#2} {#7}
  }
\cs_new:Npn \@@_map_collect_auxv:nNnnnnn #1#2#3#4#5#6#7
  {
    \str_case:neTF {#1}
      { \tl_map_function:nN {#6} \@@_map_collect_auxiii:n }
      { \use:c { @@_map_ #1 :Nnnn } #2 { wordbreak } { } {#3#4#7} }
      {
        \@@_map_if_ignorable:nTF {#1}
          {
            \@@_map_lookahead:Nnnnnw #2 { wordbreak } {#3} {#4#7}
              { \@@_map_collect_auxiv:nnNnnnn {#5} {#6} }
          }
          {
            \@@_map_output:Nn #2 {#3}
            \@@_map_loop:Nnnw #2 { wordbreak } { } #4#7
          }
      }
  }
%    \end{macrocode}
%   Use the generic collector.
%    \begin{macrocode}
\cs_new:Npn \@@_map_ALetter:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_collect:Nnnnn #1 {#4}
      { { ALetter } { Hebrew_Letter } { Numeric } }
      { { MidLetter } { MidNumLet } { Single_Quote } }
      { { ALetter } { Hebrew_Letter } }
  }
\cs_new:Npn \@@_map_Hebrew_Letter:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_collect:Nnnnn #1 {#4}
      { { ALetter } { Hebrew_Letter } { Numeric } { Single_Quote } }
      { { MidLetter } { MidNumLet } { Double_Quote } }
      { { Hebrew_Letter } }
  }
\cs_new:Npn \@@_map_Katakana:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_collect:Nnnnn #1 {#4} { { Katakana } } { } { }
  }
\cs_new:Npn \@@_map_Numeric:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_collect:Nnnnn #1 {#4}
      { { ALetter } { Hebrew_Letter } { Numeric } }
      { { MidNum } { MidNumLet } { Single_Quote } }
      { { Numeric } }
  }
\cs_new:Npn \@@_map_WSegSpace:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_collect:Nnnnn #1 {#4} { { WSegSpace } } { } { }
  }
%    \end{macrocode}
%   We should only get here in the case we have a \enquote{dangling} extender.
%   If so, look ahead for characters to bind to, then for the set of three
%   that we need to skip over.
%    \begin{macrocode}
\cs_new:Npn \@@_map_ExtendNumLet:Nnnn #1#2#3#4
  {
    \@@_map_output:Nn #1 {#3}
    \@@_map_lookahead:Nnnnnw #1 { wordbreak } {#4} { }
      \@@_map_ExtendNumLet_auxi:Nnnnn
  }
\cs_new:Npn \@@_map_ExtendNumLet_auxi:Nnnnn #1#2#3#4#5
  {
    \exp_args:Ne \@@_map_ExtendNumLet_auxii:nNnn
      {
        \__kernel_codepoint_to_wordbreak_class:n
          { \@@_codepoint_from_chars:Nw #5 }
      }
      #1 {#3} {#5}
  }
\cs_new:Npn \@@_map_ExtendNumLet_auxii:nNnn #1#2#3#4
  {
    \str_case:nnTF {#1}
      {
        { ALetter }       { }
        { Hebrew_Letter } { }
        { Numeric }       { }
        { Katakana }      { }
        { ExtendNumLet }  { }
      }
      {
        \cs_if_exist_use:cF { @@_map_ #1 :Nnnn } % TEMP?
          { \@@_map_Other:Nnnn }
            #2 { wordbreak } { } {#3#4}
      }
      {
        \@@_map_if_ignorable:nTF {#1}
          {
            \@@_map_lookahead:Nnnnnw #2 { wordbreak } {#3#4} { }
              \@@_map_ExtendNumLet_auxi:Nnnnn
          }
          {
            \@@_map_output:Nn #2 {#3}
            \@@_map_loop:Nnnw #2 { wordbreak } { } #4
          }
      }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Inline mappings}
%
% \begin{macro}{\text_map_inline:nn, \text_words_map_inline:nn}
%   The standard non-expandable inline version.
%    \begin{macrocode}
\cs_new_protected:Npn \text_map_inline:nn #1#2
  {
    \int_gincr:N \g__kernel_prg_map_int
    \cs_gset_protected:cpn
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w } ##1 {#2}
    \exp_args:Nnc \text_map_function:nN {#1}
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w }
    \prg_break_point:Nn \text_map_break:
      { \int_gdecr:N \g__kernel_prg_map_int }
  }
\cs_new_protected:Npn \text_words_map_inline:nn #1#2
  {
    \int_gincr:N \g__kernel_prg_map_int
    \cs_gset_protected:cpn
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w } ##1 {#2}
    \exp_args:Nnc \text_words_map_function:nN {#1}
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w }
    \prg_break_point:Nn \text_map_break:
      { \int_gdecr:N \g__kernel_prg_map_int }
  }
%    \end{macrocode}
% \end{macro}
%
%    \begin{macrocode}
%</code>
%    \end{macrocode}
%
% \end{implementation}
%
% \PrintIndex