001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload2.core;
018
019import java.io.ByteArrayOutputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.io.OutputStream;
023import java.io.UnsupportedEncodingException;
024import java.nio.charset.Charset;
025
026import org.apache.commons.fileupload2.core.FileItemInput.ItemSkippedException;
027import org.apache.commons.io.Charsets;
028import org.apache.commons.io.IOUtils;
029import org.apache.commons.io.build.AbstractOrigin;
030import org.apache.commons.io.build.AbstractStreamBuilder;
031import org.apache.commons.io.output.NullOutputStream;
032
033/**
034 * Low-level API for processing file uploads.
035 *
036 * <p>
037 * This class can be used to process data streams conforming to MIME 'multipart' format as defined in <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC
038 * 1867</a>. Arbitrarily large amounts of data in the stream can be processed under constant memory usage.
039 * </p>
040 * <p>
041 * The format of the stream is defined in the following way:
042 * </p>
043 * <pre>
044 *   multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
045 *   encapsulation := delimiter body CRLF<br>
046 *   delimiter := "--" boundary CRLF<br>
047 *   close-delimiter := "--" boundary "--"<br>
048 *   preamble := &lt;ignore&gt;<br>
049 *   epilogue := &lt;ignore&gt;<br>
050 *   body := header-part CRLF body-part<br>
051 *   header-part := 1*header CRLF<br>
052 *   header := header-name ":" header-value<br>
053 *   header-name := &lt;printable ASCII characters except ":"&gt;<br>
054 *   header-value := &lt;any ASCII characters except CR &amp; LF&gt;<br>
055 *   body-data := &lt;arbitrary data&gt;<br>
056 * </pre>
057 *
058 * <p>
059 * Note that body-data can contain another mulipart entity. There is limited support for single pass processing of such nested streams. The nested stream is
060 * <strong>required</strong> to have a boundary token of the same length as the parent stream (see {@link #setBoundary(byte[])}).
061 * </p>
062 * <p>
063 * Here is an example of usage of this class:
064 * </p>
065 *
066 * <pre>
067 * try {
068 *     MultipartInput multipartStream = MultipartInput.builder()
069 *             .setBoundary(boundary)
070 *             .setInputStream(input)
071 *             .get();
072 *     boolean nextPart = multipartStream.skipPreamble();
073 *     OutputStream output;
074 *     while (nextPart) {
075 *         String header = multipartStream.readHeaders();
076 *         // process headers
077 *         // create some output stream
078 *         multipartStream.readBodyData(output);
079 *         nextPart = multipartStream.readBoundary();
080 *     }
081 * } catch (MultipartInput.MalformedStreamException e) {
082 *     // the stream failed to follow required syntax
083 * } catch (IOException e) {
084 *     // a read or write error occurred
085 * }
086 * </pre>
087 */
088public final class MultipartInput {
089
090    /**
091     * Builds a new {@link MultipartInput} instance.
092     * <p>
093     * For example:
094     * </p>
095     *
096     * <pre>{@code
097     * MultipartInput factory = MultipartInput.builder().setPath(path).setBufferSize(DEFAULT_THRESHOLD).get();
098     * }
099     * </pre>
100     */
101    public static class Builder extends AbstractStreamBuilder<MultipartInput, Builder> {
102
103        /**
104         * Boundary.
105         */
106        private byte[] boundary;
107
108        /**
109         * Progress notifier.
110         */
111        private ProgressNotifier progressNotifier;
112
113        /** The  per part size limit for headers.
114         */
115        private int partHeaderSizeMax = DEFAULT_PART_HEADER_SIZE_MAX;
116
117        /**
118         * Constructs a new instance.
119         */
120        public Builder() {
121            setBufferSizeDefault(DEFAULT_BUFSIZE);
122        }
123
124        /**
125         * Constructs a new instance.
126         * <p>
127         * This builder uses the InputStream, buffer size, boundary and progress notifier aspects.
128         * </p>
129         * <p>
130         * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an
131         * {@link UnsupportedOperationException}.
132         * </p>
133         *
134         * @return a new instance.
135         * @throws IOException                   if an I/O error occurs.
136         * @throws UnsupportedOperationException if the origin cannot provide a Path.
137         * @see AbstractOrigin#getReader(Charset)
138         */
139        @Override
140        public MultipartInput get() throws IOException {
141            return new MultipartInput(getInputStream(), boundary, getBufferSize(), getPartHeaderSizeMax(), progressNotifier);
142        }
143
144        /** Returns the per part size limit for headers.
145         * @return The maximum size of the headers in bytes.
146         * @since 2.0.0-M4
147         */
148        public int getPartHeaderSizeMax() {
149            return partHeaderSizeMax;
150        }
151
152        /**
153         * Sets the boundary.
154         *
155         * @param boundary the boundary.
156         * @return {@code this} instance.
157         */
158        public Builder setBoundary(final byte[] boundary) {
159            this.boundary = boundary;
160            return this;
161        }
162
163       /** Sets the per part size limit for headers.
164     * @param partHeaderSizeMax The maximum size of the headers in bytes.
165     * @return This builder.
166     * @since 2.0.0-M4
167     */
168    public Builder setPartHeaderSizeMax(final int partHeaderSizeMax) {
169        this.partHeaderSizeMax = partHeaderSizeMax;
170        return this;
171    }
172
173        /**
174             * Sets the progress notifier.
175             *
176             * @param progressNotifier progress notifier.
177             * @return {@code this} instance.
178             */
179            public Builder setProgressNotifier(final ProgressNotifier progressNotifier) {
180                this.progressNotifier = progressNotifier;
181                return this;
182            }
183
184    }
185
186    /**
187     * Signals an attempt to set an invalid boundary token.
188     */
189    public static class FileUploadBoundaryException extends FileUploadException {
190
191        /**
192         * The UID to use when serializing this instance.
193         */
194        private static final long serialVersionUID = 2;
195
196        /**
197         * Constructs an instance with the specified detail message.
198         *
199         * @param message The detail message (which is saved for later retrieval by the {@link #getMessage()} method)
200         */
201        public FileUploadBoundaryException(final String message) {
202            super(message);
203        }
204
205    }
206
207    /**
208     * An {@link InputStream} for reading an items contents.
209     */
210    public class ItemInputStream extends InputStream {
211
212        /**
213         * Offset when converting negative bytes to integers.
214         */
215        private static final int BYTE_POSITIVE_OFFSET = 256;
216
217        /**
218         * The number of bytes, which have been read so far.
219         */
220        private long total;
221
222        /**
223         * The number of bytes, which must be hold, because they might be a part of the boundary.
224         */
225        private int pad;
226
227        /**
228         * The current offset in the buffer.
229         */
230        private int pos;
231
232        /**
233         * Whether the stream is already closed.
234         */
235        private boolean closed;
236
237        /**
238         * Creates a new instance.
239         */
240        ItemInputStream() {
241            findSeparator();
242        }
243
244        /**
245         * Returns the number of bytes, which are currently available, without blocking.
246         *
247         * @throws IOException An I/O error occurs.
248         * @return Number of bytes in the buffer.
249         */
250        @Override
251        public int available() throws IOException {
252            if (pos == -1) {
253                return tail - head - pad;
254            }
255            return pos - head;
256        }
257
258        private void checkOpen() throws ItemSkippedException {
259            if (closed) {
260                throw new FileItemInput.ItemSkippedException("checkOpen()");
261            }
262        }
263
264        /**
265         * Closes the input stream.
266         *
267         * @throws IOException An I/O error occurred.
268         */
269        @Override
270        public void close() throws IOException {
271            close(false);
272        }
273
274        /**
275         * Closes the input stream.
276         *
277         * @param closeUnderlying Whether to close the underlying stream (hard close)
278         * @throws IOException An I/O error occurred.
279         */
280        public void close(final boolean closeUnderlying) throws IOException {
281            if (closed) {
282                return;
283            }
284            if (closeUnderlying) {
285                closed = true;
286                input.close();
287            } else {
288                for (;;) {
289                    var avail = available();
290                    if (avail == 0) {
291                        avail = makeAvailable();
292                        if (avail == 0) {
293                            break;
294                        }
295                    }
296                    if (skip(avail) != avail) {
297                        // TODO What to do?
298                    }
299                }
300            }
301            closed = true;
302        }
303
304        /**
305         * Called for finding the separator.
306         */
307        private void findSeparator() {
308            pos = MultipartInput.this.findSeparator();
309            if (pos == -1) {
310                if (tail - head > keepRegion) {
311                    pad = keepRegion;
312                } else {
313                    pad = tail - head;
314                }
315            }
316        }
317
318        /**
319         * Gets the number of bytes, which have been read by the stream.
320         *
321         * @return Number of bytes, which have been read so far.
322         */
323        public long getBytesRead() {
324            return total;
325        }
326
327        /**
328         * Tests whether this instance is closed.
329         *
330         * @return whether this instance is closed.
331         */
332        public boolean isClosed() {
333            return closed;
334        }
335
336        /**
337         * Attempts to read more data.
338         *
339         * @return Number of available bytes
340         * @throws IOException An I/O error occurred.
341         */
342        private int makeAvailable() throws IOException {
343            if (pos != -1) {
344                return 0;
345            }
346
347            // Move the data to the beginning of the buffer.
348            total += tail - head - pad;
349            System.arraycopy(buffer, tail - pad, buffer, 0, pad);
350
351            // Refill buffer with new data.
352            head = 0;
353            tail = pad;
354
355            for (;;) {
356                final var bytesRead = input.read(buffer, tail, bufSize - tail);
357                if (bytesRead == -1) {
358                    // The last pad amount is left in the buffer.
359                    // Boundary can't be in there so signal an error
360                    // condition.
361                    final var msg = "Stream ended unexpectedly";
362                    throw new MalformedStreamException(msg);
363                }
364                if (notifier != null) {
365                    notifier.noteBytesRead(bytesRead);
366                }
367                tail += bytesRead;
368
369                findSeparator();
370                final var av = available();
371
372                if (av > 0 || pos != -1) {
373                    return av;
374                }
375            }
376        }
377
378        /**
379         * Reads the next byte in the stream.
380         *
381         * @return The next byte in the stream, as a non-negative integer, or -1 for EOF.
382         * @throws IOException An I/O error occurred.
383         */
384        @Override
385        public int read() throws IOException {
386            checkOpen();
387            if (available() == 0 && makeAvailable() == 0) {
388                return -1;
389            }
390            ++total;
391            final int b = buffer[head++];
392            if (b >= 0) {
393                return b;
394            }
395            return b + BYTE_POSITIVE_OFFSET;
396        }
397
398        /**
399         * Reads bytes into the given buffer.
400         *
401         * @param b   The destination buffer, where to write to.
402         * @param off Offset of the first byte in the buffer.
403         * @param len Maximum number of bytes to read.
404         * @return Number of bytes, which have been actually read, or -1 for EOF.
405         * @throws IOException An I/O error occurred.
406         */
407        @Override
408        public int read(final byte[] b, final int off, final int len) throws IOException {
409            checkOpen();
410            if (len == 0) {
411                return 0;
412            }
413            var res = available();
414            if (res == 0) {
415                res = makeAvailable();
416                if (res == 0) {
417                    return -1;
418                }
419            }
420            res = Math.min(res, len);
421            System.arraycopy(buffer, head, b, off, res);
422            head += res;
423            total += res;
424            return res;
425        }
426
427        /**
428         * Skips the given number of bytes.
429         *
430         * @param bytes Number of bytes to skip.
431         * @return The number of bytes, which have actually been skipped.
432         * @throws IOException An I/O error occurred.
433         */
434        @Override
435        public long skip(final long bytes) throws IOException {
436            checkOpen();
437            var available = available();
438            if (available == 0) {
439                available = makeAvailable();
440                if (available == 0) {
441                    return 0;
442                }
443            }
444            // Fix "Implicit narrowing conversion in compound assignment"
445            // https://github.com/apache/commons-fileupload/security/code-scanning/118
446            // Math.min always returns an int because available is an int.
447            final var res = Math.toIntExact(Math.min(available, bytes));
448            head += res;
449            return res;
450        }
451
452    }
453
454    /**
455     * Signals that the input stream fails to follow the required syntax.
456     */
457    public static class MalformedStreamException extends FileUploadException {
458
459        /**
460         * The UID to use when serializing this instance.
461         */
462        private static final long serialVersionUID = 2;
463
464        /**
465         * Constructs an {@code MalformedStreamException} with the specified detail message.
466         *
467         * @param message The detail message.
468         */
469        public MalformedStreamException(final String message) {
470            super(message);
471        }
472
473        /**
474         * Constructs an {@code MalformedStreamException} with the specified detail message.
475         *
476         * @param message The detail message.
477         * @param cause   The cause (which is saved for later retrieval by the {@link #getCause()} method). (A null value is permitted, and indicates that the
478         *                cause is nonexistent or unknown.)
479         */
480        public MalformedStreamException(final String message, final Throwable cause) {
481            super(message, cause);
482        }
483
484    }
485
486    /**
487     * Internal class, which is used to invoke the {@link ProgressListener}.
488     */
489    public static class ProgressNotifier {
490
491        /**
492         * The listener to invoke.
493         */
494        private final ProgressListener progressListener;
495
496        /**
497         * Number of expected bytes, if known, or -1.
498         */
499        private final long contentLength;
500
501        /**
502         * Number of bytes, which have been read so far.
503         */
504        private long bytesRead;
505
506        /**
507         * Number of items, which have been read so far.
508         */
509        private int items;
510
511        /**
512         * Creates a new instance with the given listener and content length.
513         *
514         * @param progressListener The listener to invoke.
515         * @param contentLength    The expected content length.
516         */
517        public ProgressNotifier(final ProgressListener progressListener, final long contentLength) {
518            this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
519            this.contentLength = contentLength;
520        }
521
522        /**
523         * Called to indicate that bytes have been read.
524         *
525         * @param byteCount Number of bytes, which have been read.
526         */
527        void noteBytesRead(final int byteCount) {
528            //
529            // Indicates, that the given number of bytes have been read from the input stream.
530            //
531            bytesRead += byteCount;
532            notifyListener();
533        }
534
535        /**
536         * Called to indicate, that a new file item has been detected.
537         */
538        public void noteItem() {
539            ++items;
540            notifyListener();
541        }
542
543        /**
544         * Called for notifying the listener.
545         */
546        private void notifyListener() {
547            progressListener.update(bytesRead, contentLength, items);
548        }
549
550    }
551
552    /**
553     * The Carriage Return ASCII character value.
554     */
555    public static final byte CR = 0x0D;
556
557    /**
558     * The Line Feed ASCII character value.
559     */
560    public static final byte LF = 0x0A;
561
562    /**
563     * The dash (-) ASCII character value.
564     */
565    public static final byte DASH = 0x2D;
566
567    /**
568     * The default length of the buffer used for processing a request.
569     */
570    static final int DEFAULT_BUFSIZE = 4096;
571
572    /**
573     * Default per part header size limit in bytes.
574     * @since 2.0.0-M4
575     */
576    public static final int DEFAULT_PART_HEADER_SIZE_MAX = 512;
577
578    /**
579     * A byte sequence that marks the end of {@code header-part} ({@code CRLFCRLF}).
580     */
581    static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF };
582
583    /**
584     * A byte sequence that that follows a delimiter that will be followed by an encapsulation ({@code CRLF}).
585     */
586    static final byte[] FIELD_SEPARATOR = { CR, LF };
587
588    /**
589     * A byte sequence that that follows a delimiter of the last encapsulation in the stream ({@code --}).
590     */
591    static final byte[] STREAM_TERMINATOR = { DASH, DASH };
592
593    /**
594     * A byte sequence that precedes a boundary ({@code CRLF--}).
595     */
596    static final byte[] BOUNDARY_PREFIX = { CR, LF, DASH, DASH };
597
598    /**
599     * Compares {@code count} first bytes in the arrays {@code a} and {@code b}.
600     *
601     * @param a     The first array to compare.
602     * @param b     The second array to compare.
603     * @param count How many bytes should be compared.
604     * @return {@code true} if {@code count} first bytes in arrays {@code a} and {@code b} are equal.
605     */
606    static boolean arrayEquals(final byte[] a, final byte[] b, final int count) {
607        for (var i = 0; i < count; i++) {
608            if (a[i] != b[i]) {
609                return false;
610            }
611        }
612        return true;
613    }
614
615    /**
616     * Constructs a new {@link Builder}.
617     *
618     * @return a new {@link Builder}.
619     */
620    public static Builder builder() {
621        return new Builder();
622    }
623
624    /**
625     * The input stream from which data is read.
626     */
627    private final InputStream input;
628
629    /**
630     * The length of the boundary token plus the leading {@code CRLF--}.
631     */
632    private int boundaryLength;
633
634    /**
635     * The amount of data, in bytes, that must be kept in the buffer in order to detect delimiters reliably.
636     */
637    private final int keepRegion;
638
639    /**
640     * The byte sequence that partitions the stream.
641     */
642    private final byte[] boundary;
643
644    /**
645     * The table for Knuth-Morris-Pratt search algorithm.
646     */
647    private final int[] boundaryTable;
648
649    /**
650     * The length of the buffer used for processing the request.
651     */
652    private final int bufSize;
653
654    /**
655     * The buffer used for processing the request.
656     */
657    private final byte[] buffer;
658
659    /**
660     * The index of first valid character in the buffer. <br>
661     * 0 <= head < bufSize
662     */
663    private int head;
664
665    /**
666     * The index of last valid character in the buffer + 1. <br>
667     * 0 <= tail <= bufSize
668     */
669    private int tail;
670
671    /**
672     * The content encoding to use when reading headers.
673     */
674    private Charset headerCharset;
675
676    /**
677     * The progress notifier, if any, or null.
678     */
679    private final ProgressNotifier notifier;
680
681    /**
682     * The maximum size of the headers in bytes.
683     */
684    private final int partHeaderSizeMax;
685
686    /**
687     * Constructs a {@code MultipartInput} with a custom size buffer.
688     * <p>
689     * Note that the buffer must be at least big enough to contain the boundary string, plus 4 characters for CR/LF and double dash, plus at least one byte of
690     * data. Too small a buffer size setting will degrade performance.
691     * </p>
692     *
693     * @param input      The {@code InputStream} to serve as a data source.
694     * @param boundary   The token used for dividing the stream into {@code encapsulations}.
695     * @param bufferSize The size of the buffer to be used, in bytes.
696     * @param notifier   The notifier, which is used for calling the progress listener, if any.
697     * @throws IllegalArgumentException If the buffer size is too small.
698     */
699    private MultipartInput(final InputStream input, final byte[] boundary, final int bufferSize, final int partHeaderSizeMax, final ProgressNotifier notifier) {
700        if (boundary == null) {
701            throw new IllegalArgumentException("boundary may not be null");
702        }
703        // We prepend CR/LF to the boundary to chop trailing CR/LF from
704        // body-data tokens.
705        this.boundaryLength = boundary.length + BOUNDARY_PREFIX.length;
706        if (bufferSize < this.boundaryLength + 1) {
707            throw new IllegalArgumentException("The buffer size specified for the MultipartInput is too small");
708        }
709
710        this.input = input;
711        this.bufSize = Math.max(bufferSize, boundaryLength * 2);
712        this.buffer = new byte[this.bufSize];
713        this.notifier = notifier;
714        this.partHeaderSizeMax = partHeaderSizeMax;
715
716        this.boundary = new byte[this.boundaryLength];
717        this.boundaryTable = new int[this.boundaryLength + 1];
718        this.keepRegion = this.boundary.length;
719
720        System.arraycopy(BOUNDARY_PREFIX, 0, this.boundary, 0, BOUNDARY_PREFIX.length);
721        System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length);
722        computeBoundaryTable();
723
724        head = 0;
725        tail = 0;
726    }
727
728    /**
729     * Computes the table used for Knuth-Morris-Pratt search algorithm.
730     */
731    private void computeBoundaryTable() {
732        var position = 2;
733        var candidate = 0;
734
735        boundaryTable[0] = -1;
736        boundaryTable[1] = 0;
737
738        while (position <= boundaryLength) {
739            if (boundary[position - 1] == boundary[candidate]) {
740                boundaryTable[position] = candidate + 1;
741                candidate++;
742                position++;
743            } else if (candidate > 0) {
744                candidate = boundaryTable[candidate];
745            } else {
746                boundaryTable[position] = 0;
747                position++;
748            }
749        }
750    }
751
752    /**
753     * Reads {@code body-data} from the current {@code encapsulation} and discards it.
754     * <p>
755     * Use this method to skip encapsulations you don't need or don't understand.
756     * </p>
757     *
758     * @return The amount of data discarded.
759     * @throws MalformedStreamException if the stream ends unexpectedly.
760     * @throws IOException              if an i/o error occurs.
761     */
762    public long discardBodyData() throws MalformedStreamException, IOException {
763        return readBodyData(NullOutputStream.INSTANCE);
764    }
765
766    /**
767     * Searches for a byte of specified value in the {@code buffer}, starting at the specified {@code position}.
768     *
769     * @param value The value to find.
770     * @param pos   The starting position for searching.
771     * @return The position of byte found, counting from beginning of the {@code buffer}, or {@code -1} if not found.
772     */
773    protected int findByte(final byte value, final int pos) {
774        for (var i = pos; i < tail; i++) {
775            if (buffer[i] == value) {
776                return i;
777            }
778        }
779
780        return -1;
781    }
782
783    /**
784     * Searches for the {@code boundary} in the {@code buffer} region delimited by {@code head} and {@code tail}.
785     *
786     * @return The position of the boundary found, counting from the beginning of the {@code buffer}, or {@code -1} if not found.
787     */
788    protected int findSeparator() {
789        var bufferPos = this.head;
790        var tablePos = 0;
791        while (bufferPos < this.tail) {
792            while (tablePos >= 0 && buffer[bufferPos] != boundary[tablePos]) {
793                tablePos = boundaryTable[tablePos];
794            }
795            bufferPos++;
796            tablePos++;
797            if (tablePos == boundaryLength) {
798                return bufferPos - boundaryLength;
799            }
800        }
801        return -1;
802    }
803
804    /**
805     * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the platform default encoding is
806     * used.
807     *
808     * @return The encoding used to read part headers.
809     */
810    public Charset getHeaderCharset() {
811        return headerCharset;
812    }
813
814    /** Returns the per part size limit for headers.
815     *
816     * @return The maximum size of the headers in bytes.
817     * @since 2.0.0-M4
818     */
819    public int getPartHeaderSizeMax() {
820        return partHeaderSizeMax;
821    }
822
823    /**
824     * Creates a new {@link ItemInputStream}.
825     *
826     * @return A new instance of {@link ItemInputStream}.
827     */
828    public ItemInputStream newInputStream() {
829        return new ItemInputStream();
830    }
831
832    /**
833     * Reads {@code body-data} from the current {@code encapsulation} and writes its contents into the output {@code Stream}.
834     * <p>
835     * Arbitrary large amounts of data can be processed by this method using a constant size buffer. (see {@link MultipartInput#builder()}).
836     * </p>
837     *
838     * @param output The {@code Stream} to write data into. May be null, in which case this method is equivalent to {@link #discardBodyData()}.
839     * @return the amount of data written.
840     * @throws MalformedStreamException if the stream ends unexpectedly.
841     * @throws IOException              if an i/o error occurs.
842     */
843    public long readBodyData(final OutputStream output) throws MalformedStreamException, IOException {
844        try (var inputStream = newInputStream()) {
845            return IOUtils.copyLarge(inputStream, output);
846        }
847    }
848
849    /**
850     * Skips a {@code boundary} token, and checks whether more {@code encapsulations} are contained in the stream.
851     *
852     * @return {@code true} if there are more encapsulations in this stream; {@code false} otherwise.
853     * @throws FileUploadSizeException  if the bytes read from the stream exceeded the size limits
854     * @throws MalformedStreamException if the stream ends unexpectedly or fails to follow required syntax.
855     */
856    public boolean readBoundary() throws FileUploadSizeException, MalformedStreamException {
857        final var marker = new byte[2];
858        final boolean nextChunk;
859        head += boundaryLength;
860        try {
861            marker[0] = readByte();
862            if (marker[0] == LF) {
863                // Work around IE5 Mac bug with input type=image.
864                // Because the boundary delimiter, not including the trailing
865                // CRLF, must not appear within any file (RFC 2046, section
866                // 5.1.1), we know the missing CR is due to a buggy browser
867                // rather than a file containing something similar to a
868                // boundary.
869                return true;
870            }
871
872            marker[1] = readByte();
873            if (arrayEquals(marker, STREAM_TERMINATOR, 2)) {
874                nextChunk = false;
875            } else if (arrayEquals(marker, FIELD_SEPARATOR, 2)) {
876                nextChunk = true;
877            } else {
878                throw new MalformedStreamException("Unexpected characters follow a boundary");
879            }
880        } catch (final FileUploadSizeException e) {
881            throw e;
882        } catch (final IOException e) {
883            throw new MalformedStreamException("Stream ended unexpectedly", e);
884        }
885        return nextChunk;
886    }
887
888    /**
889     * Reads a byte from the {@code buffer}, and refills it as necessary.
890     *
891     * @return The next byte from the input stream.
892     * @throws IOException if there is no more data available.
893     */
894    public byte readByte() throws IOException {
895        // Buffer depleted ?
896        if (head == tail) {
897            head = 0;
898            // Refill.
899            tail = input.read(buffer, head, bufSize);
900            if (tail == -1) {
901                // No more data available.
902                throw new IOException("No more data is available");
903            }
904            if (notifier != null) {
905                notifier.noteBytesRead(tail);
906            }
907        }
908        return buffer[head++];
909    }
910
911    /**
912     * Reads the {@code header-part} of the current {@code encapsulation}.
913     * <p>
914     * Headers are returned verbatim to the input stream, including the trailing {@code CRLF} marker. Parsing is left to the application.
915     * </p>
916     * <p>
917     * <strong>TODO</strong> allow limiting maximum header size to protect against abuse.
918     * </p>
919     *
920     * @return The {@code header-part} of the current encapsulation.
921     * @throws FileUploadSizeException  if the bytes read from the stream exceeded the size limits.
922     * @throws MalformedStreamException if the stream ends unexpectedly.
923     */
924    public String readHeaders() throws FileUploadSizeException, MalformedStreamException {
925        var i = 0;
926        byte b;
927        // to support multi-byte characters
928        final var baos = new ByteArrayOutputStream();
929        var size = 0;
930        while (i < HEADER_SEPARATOR.length) {
931            try {
932                b = readByte();
933            } catch (final FileUploadSizeException e) {
934                // wraps a FileUploadSizeException, re-throw as it will be unwrapped later
935                throw e;
936            } catch (final IOException e) {
937                throw new MalformedStreamException("Stream ended unexpectedly", e);
938            }
939            final int phsm = getPartHeaderSizeMax();
940            if (phsm != -1 && ++size > phsm) {
941                throw new FileUploadSizeException(
942                        String.format("Header section has more than %s bytes (maybe it is not properly terminated)", Integer.valueOf(phsm)), phsm, size);
943            }
944            if (b == HEADER_SEPARATOR[i]) {
945                i++;
946            } else {
947                i = 0;
948            }
949            baos.write(b);
950        }
951        try {
952            return baos.toString(Charsets.toCharset(headerCharset, Charset.defaultCharset()).name());
953        } catch (final UnsupportedEncodingException e) {
954            // not possible
955            throw new IllegalStateException(e);
956        }
957    }
958
959    /**
960     * Changes the boundary token used for partitioning the stream.
961     * <p>
962     * This method allows single pass processing of nested multipart streams.
963     * </p>
964     * <p>
965     * The boundary token of the nested stream is {@code required} to be of the same length as the boundary token in parent stream.
966     * </p>
967     * <p>
968     * Restoring the parent stream boundary token after processing of a nested stream is left to the application.
969     * </p>
970     *
971     * @param boundary The boundary to be used for parsing of the nested stream.
972     * @throws FileUploadBoundaryException if the {@code boundary} has a different length than the one being currently parsed.
973     */
974    public void setBoundary(final byte[] boundary) throws FileUploadBoundaryException {
975        if (boundary.length != boundaryLength - BOUNDARY_PREFIX.length) {
976            throw new FileUploadBoundaryException("The length of a boundary token cannot be changed");
977        }
978        System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length);
979        computeBoundaryTable();
980    }
981
982    /**
983     * Sets the character encoding to be used when reading the headers of individual parts. When not specified, or {@code null}, the platform default encoding
984     * is used.
985     *
986     * @param headerCharset The encoding used to read part headers.
987     */
988    public void setHeaderCharset(final Charset headerCharset) {
989        this.headerCharset = headerCharset;
990    }
991
992    /**
993     * Finds the beginning of the first {@code encapsulation}.
994     *
995     * @return {@code true} if an {@code encapsulation} was found in the stream.
996     * @throws IOException if an i/o error occurs.
997     */
998    public boolean skipPreamble() throws IOException {
999        // First delimiter may be not preceded with a CRLF.
1000        System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
1001        boundaryLength = boundary.length - 2;
1002        computeBoundaryTable();
1003        try {
1004            // Discard all data up to the delimiter.
1005            discardBodyData();
1006
1007            // Read boundary - if succeeded, the stream contains an
1008            // encapsulation.
1009            return readBoundary();
1010        } catch (final MalformedStreamException e) {
1011            return false;
1012        } finally {
1013            // Restore delimiter.
1014            System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2);
1015            boundaryLength = boundary.length;
1016            boundary[0] = CR;
1017            boundary[1] = LF;
1018            computeBoundaryTable();
1019        }
1020    }
1021
1022}