Subversion
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
svn_path.h
Go to the documentation of this file.
1 /**
2  * @copyright
3  * ====================================================================
4  * Copyright (c) 2000-2004 CollabNet. All rights reserved.
5  *
6  * This software is licensed as described in the file COPYING, which
7  * you should have received as part of this distribution. The terms
8  * are also available at http://subversion.tigris.org/license-1.html.
9  * If newer versions of this license are posted there, you may use a
10  * newer version instead, at your option.
11  *
12  * This software consists of voluntary contributions made by many
13  * individuals. For exact contribution history, see the revision
14  * history and logs, available at http://subversion.tigris.org/.
15  * ====================================================================
16  * @endcopyright
17  *
18  * @file svn_path.h
19  * @brief A path manipulation library
20  *
21  * All incoming and outgoing paths are non-NULL and in UTF-8, unless
22  * otherwise documented.
23  *
24  * No result path ever ends with a separator, no matter whether the
25  * path is a file or directory, because we always canonicalize() it.
26  *
27  * Nearly all the @c svn_path_xxx functions expect paths passed into
28  * them to be in canonical form as defined by the Subversion path
29  * library itself. The only functions which do *not* have such
30  * expectations are:
31  *
32  * - @c svn_path_canonicalize()
33  * - @c svn_path_is_canonical()
34  * - @c svn_path_internal_style()
35  * - @c svn_path_uri_encode()
36  *
37  * For the most part, we mean what most anyone would mean when talking
38  * about canonical paths, but to be on the safe side, you must run
39  * your paths through @c svn_path_canonicalize() before passing them to
40  * other functions in this API.
41  */
42 
43 #ifndef SVN_PATH_H
44 #define SVN_PATH_H
45 
46 #include <apr.h>
47 #include <apr_pools.h>
48 #include <apr_tables.h>
49 
50 #include "svn_types.h"
51 #include "svn_string.h"
52 
53 
54 #ifdef __cplusplus
55 extern "C" {
56 #endif /* __cplusplus */
57 
58 
59 
60 /** Convert @a path from the local style to the canonical internal style. */
61 const char *
62 svn_path_internal_style(const char *path, apr_pool_t *pool);
63 
64 /** Convert @a path from the canonical internal style to the local style. */
65 const char *
66 svn_path_local_style(const char *path, apr_pool_t *pool);
67 
68 
69 /** Join a base path (@a base) with a component (@a component), allocating
70  * the result in @a pool. @a component need not be a single component: it
71  * can be any path, absolute or relative to @a base.
72  *
73  * If either @a base or @a component is the empty path, then the other
74  * argument will be copied and returned. If both are the empty path the
75  * empty path is returned.
76  *
77  * If the @a component is an absolute path, then it is copied and returned.
78  * Exactly one slash character ('/') is used to join the components,
79  * accounting for any trailing slash in @a base.
80  *
81  * Note that the contents of @a base are not examined, so it is possible to
82  * use this function for constructing URLs, or for relative URLs or
83  * repository paths.
84  *
85  * This function is NOT appropriate for native (local) file
86  * paths. Only for "internal" canonicalized paths, since it uses '/'
87  * for the separator. Further, an absolute path (for @a component) is
88  * based on a leading '/' character. Thus, an "absolute URI" for the
89  * @a component won't be detected. An absolute URI can only be used
90  * for the base.
91  */
92 char *
93 svn_path_join(const char *base, const char *component, apr_pool_t *pool);
94 
95 /** Join multiple components onto a @a base path, allocated in @a pool. The
96  * components are terminated by a @c NULL.
97  *
98  * If any component is the empty string, it will be ignored.
99  *
100  * If any component is an absolute path, then it resets the base and
101  * further components will be appended to it.
102  *
103  * This function does not support URLs.
104  *
105  * See svn_path_join() for further notes about joining paths.
106  */
107 char *
108 svn_path_join_many(apr_pool_t *pool, const char *base, ...);
109 
110 
111 /** Get the basename of the specified canonicalized @a path. The
112  * basename is defined as the last component of the path (ignoring any
113  * trailing slashes). If the @a path is root ("/"), then that is
114  * returned. Otherwise, the returned value will have no slashes in
115  * it.
116  *
117  * Example: svn_path_basename("/foo/bar") -> "bar"
118  *
119  * The returned basename will be allocated in @a pool.
120  *
121  * @note If an empty string is passed, then an empty string will be returned.
122  */
123 char *
124 svn_path_basename(const char *path, apr_pool_t *pool);
125 
126 /** Get the dirname of the specified canonicalized @a path, defined as
127  * the path with its basename removed. If @a path is root ("/"), it is
128  * returned unchanged.
129  *
130  * The returned dirname will be allocated in @a pool.
131  */
132 char *
133 svn_path_dirname(const char *path, apr_pool_t *pool);
134 
135 /** Split @a path into a root portion and an extension such that
136  * the root + the extension = the original path, and where the
137  * extension contains no period (.) characters. If not @c NULL, set
138  * @a *path_root to the root portion. If not @c NULL, set
139  * @a *path_ext to the extension (or "" if there is no extension
140  * found). Allocate both @a *path_root and @a *path_ext in @a pool.
141  *
142  * @since New in 1.5.
143  */
144 void
145 svn_path_splitext(const char **path_root, const char **path_ext,
146  const char *path, apr_pool_t *pool);
147 
148 /** Return the number of components in the canonicalized @a path.
149  *
150  * @since New in 1.1.
151 */
152 apr_size_t
153 svn_path_component_count(const char *path);
154 
155 /** Add a @a component (a NULL-terminated C-string) to the
156  * canonicalized @a path. @a component is allowed to contain
157  * directory separators.
158  *
159  * If @a path is non-empty, append the appropriate directory separator
160  * character, and then @a component. If @a path is empty, simply set it to
161  * @a component; don't add any separator character.
162  *
163  * If the result ends in a separator character, then remove the separator.
164  */
165 void
166 svn_path_add_component(svn_stringbuf_t *path, const char *component);
167 
168 /** Remove one component off the end of the canonicalized @a path. */
169 void
171 
172 /** Remove @a n components off the end of the canonicalized @a path.
173  * Equivalent to calling svn_path_remove_component() @a n times.
174  *
175  * @since New in 1.1.
176  */
177 void
178 svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n);
179 
180 /** Divide the canonicalized @a path into @a *dirpath and @a
181  * *base_name, allocated in @a pool.
182  *
183  * If @a dirpath or @a base_name is NULL, then don't set that one.
184  *
185  * Either @a dirpath or @a base_name may be @a path's own address, but they
186  * may not both be the same address, or the results are undefined.
187  *
188  * If @a path has two or more components, the separator between @a dirpath
189  * and @a base_name is not included in either of the new names.
190  *
191  * examples:
192  * - <pre>"/foo/bar/baz" ==> "/foo/bar" and "baz"</pre>
193  * - <pre>"/bar" ==> "/" and "bar"</pre>
194  * - <pre>"/" ==> "/" and "/"</pre>
195  * - <pre>"X:/" ==> "X:/" and "X:/"</pre>
196  * - <pre>"bar" ==> "" and "bar"</pre>
197  * - <pre>"" ==> "" and ""</pre>
198  */
199 void
200 svn_path_split(const char *path,
201  const char **dirpath,
202  const char **base_name,
203  apr_pool_t *pool);
204 
205 
206 /** Return non-zero iff @a path is empty ("") or represents the current
207  * directory -- that is, if prepending it as a component to an existing
208  * path would result in no meaningful change.
209  */
210 int
211 svn_path_is_empty(const char *path);
212 
213 #ifndef SVN_DIRENT_URI_H
214 /* This declaration has been moved to svn_dirent_uri.h, remains here only for
215  compatiblity reasons. */
217 svn_dirent_is_root(const char *dirent, apr_size_t len);
218 #endif /* SVN_DIRENT_URI_H */
219 
220 /** Return a new path (or URL) like @a path, but transformed such that
221  * some types of path specification redundancies are removed.
222  *
223  * This involves collapsing redundant "/./" elements, removing
224  * multiple adjacent separator characters, removing trailing
225  * separator characters, and possibly other semantically inoperative
226  * transformations.
227  *
228  * Convert the scheme and hostname to lowercase (see issue #2475)
229  *
230  * The returned path may be statically allocated, equal to @a path, or
231  * allocated from @a pool.
232  */
233 const char *
234 svn_path_canonicalize(const char *path, apr_pool_t *pool);
235 
236 /** Return @c TRUE iff path is canonical. Use @a pool for temporary
237  * allocations.
238  *
239  * @since New in 1.5.
240  */
242 svn_path_is_canonical(const char *path, apr_pool_t *pool);
243 
244 
245 /** Return an integer greater than, equal to, or less than 0, according
246  * as @a path1 is greater than, equal to, or less than @a path2.
247  */
248 int
249 svn_path_compare_paths(const char *path1, const char *path2);
250 
251 
252 /** Return the longest common path shared by two canonicalized paths,
253  * @a path1 and @a path2. If there's no common ancestor, return the
254  * empty path.
255  *
256  * @a path1 and @a path2 may be URLs. In order for two URLs to have
257  * a common ancestor, they must (a) have the same protocol (since two URLs
258  * with the same path but different protocols may point at completely
259  * different resources), and (b) share a common ancestor in their path
260  * component, i.e. 'protocol://' is not a sufficient ancestor.
261  */
262 char *
263 svn_path_get_longest_ancestor(const char *path1,
264  const char *path2,
265  apr_pool_t *pool);
266 
267 /** Convert @a relative canonicalized path to an absolute path and
268  * return the results in @a *pabsolute, allocated in @a pool.
269  *
270  * @a relative may be a URL, in which case no attempt is made to convert it,
271  * and a copy of the URL is returned.
272  */
273 svn_error_t *
274 svn_path_get_absolute(const char **pabsolute,
275  const char *relative,
276  apr_pool_t *pool);
277 
278 /** Return the path part of the canonicalized @a path in @a
279  * *pdirectory, and the file part in @a *pfile. If @a path is a
280  * directory, set @a *pdirectory to @a path, and @a *pfile to the
281  * empty string. If @a path does not exist it is treated as if it is
282  * a file, since directories do not normally vanish.
283  */
284 svn_error_t *
285 svn_path_split_if_file(const char *path,
286  const char **pdirectory,
287  const char **pfile,
288  apr_pool_t *pool);
289 
290 /** Find the common prefix of the canonicalized paths in @a targets
291  * (an array of <tt>const char *</tt>'s), and remove redundant paths if @a
292  * remove_redundancies is TRUE.
293  *
294  * - Set @a *pcommon to the absolute path of the path or URL common to
295  * all of the targets. If the targets have no common prefix, or
296  * are a mix of URLs and local paths, set @a *pcommon to the
297  * empty string.
298  *
299  * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
300  * to an array of targets relative to @a *pcommon, and if
301  * @a remove_redundancies is TRUE, omit any paths/URLs that are
302  * descendants of another path/URL in @a targets. If *pcommon
303  * is empty, @a *pcondensed_targets will contain full URLs and/or
304  * absolute paths; redundancies can still be removed (from both URLs
305  * and paths). If @a pcondensed_targets is NULL, leave it alone.
306  *
307  * Else if there is exactly one target, then
308  *
309  * - Set @a *pcommon to that target, and
310  *
311  * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
312  * to an array containing zero elements. Else if
313  * @a pcondensed_targets is NULL, leave it alone.
314  *
315  * If there are no items in @a targets, set @a *pcommon and (if
316  * applicable) @a *pcondensed_targets to @c NULL.
317  *
318  * @note There is no guarantee that @a *pcommon is within a working
319  * copy. */
320 svn_error_t *
321 svn_path_condense_targets(const char **pcommon,
322  apr_array_header_t **pcondensed_targets,
323  const apr_array_header_t *targets,
324  svn_boolean_t remove_redundancies,
325  apr_pool_t *pool);
326 
327 
328 /** Copy a list of canonicalized @a targets, one at a time, into @a
329  * pcondensed_targets, omitting any targets that are found earlier in
330  * the list, or whose ancestor is found earlier in the list. Ordering
331  * of targets in the original list is preserved in the condensed list
332  * of targets. Use @a pool for any allocations.
333  *
334  * How does this differ in functionality from svn_path_condense_targets()?
335  *
336  * Here's the short version:
337  *
338  * 1. Disclaimer: if you wish to debate the following, talk to Karl. :-)
339  * Order matters for updates because a multi-arg update is not
340  * atomic, and CVS users are used to, when doing 'cvs up targetA
341  * targetB' seeing targetA get updated, then targetB. I think the
342  * idea is that if you're in a time-sensitive or flaky-network
343  * situation, a user can say, "I really *need* to update
344  * wc/A/D/G/tau, but I might as well update my whole working copy if
345  * I can." So that user will do 'svn up wc/A/D/G/tau wc', and if
346  * something dies in the middles of the 'wc' update, at least the
347  * user has 'tau' up-to-date.
348  *
349  * 2. Also, we have this notion of an anchor and a target for updates
350  * (the anchor is where the update editor is rooted, the target is
351  * the actual thing we want to update). I needed a function that
352  * would NOT screw with my input paths so that I could tell the
353  * difference between someone being in A/D and saying 'svn up G' and
354  * being in A/D/G and saying 'svn up .' -- believe it or not, these
355  * two things don't mean the same thing. svn_path_condense_targets()
356  * plays with absolute paths (which is fine, so does
357  * svn_path_remove_redundancies()), but the difference is that it
358  * actually tweaks those targets to be relative to the "grandfather
359  * path" common to all the targets. Updates don't require a
360  * "grandfather path" at all, and even if it did, the whole
361  * conversion to an absolute path drops the crucial difference
362  * between saying "i'm in foo, update bar" and "i'm in foo/bar,
363  * update '.'"
364  */
365 svn_error_t *
366 svn_path_remove_redundancies(apr_array_header_t **pcondensed_targets,
367  const apr_array_header_t *targets,
368  apr_pool_t *pool);
369 
370 
371 /** Decompose the canonicalized @a path into an array of <tt>const
372  * char *</tt> components, allocated in @a pool. If @a path is
373  * absolute, the first component will be a lone dir separator (the
374  * root directory).
375  */
376 apr_array_header_t *
377 svn_path_decompose(const char *path, apr_pool_t *pool);
378 
379 /** Join an array of <tt>const char *</tt> components into a '/'
380  * separated path, allocated in @a pool. The joined path is absolute if
381  * the first component is a lone dir separator.
382  *
383  * Calling svn_path_compose() on the output of svn_path_decompose()
384  * will return the exact same path.
385  *
386  * @since New in 1.5.
387  */
388 const char *
389 svn_path_compose(const apr_array_header_t *components, apr_pool_t *pool);
390 
391 /** Test that @a name is a single path component, that is:
392  * - not @c NULL or empty.
393  * - not a `/'-separated directory path
394  * - not empty or `..'
395  */
397 svn_path_is_single_path_component(const char *name);
398 
399 
400 /**
401  * Test to see if a backpath, i.e. '..', is present in @a path.
402  * If not, return @c FALSE.
403  * If so, return @c TRUE.
404  *
405  * @since New in 1.1.
406  */
408 svn_path_is_backpath_present(const char *path);
409 
410 
411 /**
412  * Test to see if a dotpath, i.e. '.', is present in @a path.
413  * If not, return @c FALSE.
414  * If so, return @c TRUE.
415  *
416  * @since New in 1.6.
417  */
419 svn_path_is_dotpath_present(const char *path);
420 
421 
422 /** Test if @a path2 is a child of @a path1.
423  * If not, return @c NULL.
424  * If so, return a copy of the remainder path, allocated in @a pool.
425  * (The remainder is the component which, added to @a path1, yields
426  * @a path2. The remainder does not begin with a dir separator.)
427  *
428  * Both paths must be in canonical form, and must either be absolute,
429  * or contain no ".." components.
430  *
431  * If @a path2 is the same as @a path1, it is not considered a child, so the
432  * result is @c NULL; an empty string is never returned.
433  *
434  * @note In 1.5 this function has been extended to allow a @c NULL @a pool
435  * in which case a pointer into @a path2 will be returned to
436  * identify the remainder path.
437  *
438  * ### @todo the ".." restriction is unfortunate, and would ideally
439  * be lifted by making the implementation smarter. But this is not
440  * trivial: if the path is "../foo", how do you know whether or not
441  * the current directory is named "foo" in its parent?
442  */
443 const char *
444 svn_path_is_child(const char *path1, const char *path2, apr_pool_t *pool);
445 
446 /** Return TRUE if @a path1 is an ancestor of @a path2 or the paths are equal
447  * and FALSE otherwise.
448  *
449  * @since New in 1.3.
450  */
452 svn_path_is_ancestor(const char *path1, const char *path2);
453 
454 /**
455  * Check whether @a path is a valid Subversion path.
456  *
457  * A valid Subversion pathname is a UTF-8 string without control
458  * characters. "Valid" means Subversion can store the pathname in
459  * a repository. There may be other, OS-specific, limitations on
460  * what paths can be represented in a working copy.
461  *
462  * ASSUMPTION: @a path is a valid UTF-8 string. This function does
463  * not check UTF-8 validity.
464  *
465  * Return @c SVN_NO_ERROR if valid and @c SVN_ERR_FS_PATH_SYNTAX if
466  * invalid.
467  *
468  * @note Despite returning an @c SVN_ERR_FS_* error, this function has
469  * nothing to do with the versioned filesystem's concept of validity.
470  *
471  * @since New in 1.2.
472  */
473 svn_error_t *
474 svn_path_check_valid(const char *path, apr_pool_t *pool);
475 
476 
477 /** URI/URL stuff
478  *
479  * @defgroup svn_path_uri_stuff URI/URL conversion
480  * @{
481  */
482 
483 /** Return TRUE iff @a path looks like a valid absolute URL. */
485 svn_path_is_url(const char *path);
486 
487 /** Return @c TRUE iff @a path is URI-safe, @c FALSE otherwise. */
489 svn_path_is_uri_safe(const char *path);
490 
491 /** Return a URI-encoded copy of @a path, allocated in @a pool. (@a
492  path can be an arbitrary UTF-8 string and does not have to be a
493  canonical path.) */
494 const char *
495 svn_path_uri_encode(const char *path, apr_pool_t *pool);
496 
497 /** Return a URI-decoded copy of @a path, allocated in @a pool. */
498 const char *
499 svn_path_uri_decode(const char *path, apr_pool_t *pool);
500 
501 /** Extend @a url by @a component, URI-encoding that @a component
502  * before adding it to the @a url; return the new @a url, allocated in
503  * @a pool. If @a component is @c NULL, just return a copy of @a url,
504  * allocated in @a pool.
505  *
506  * @a component need not be a single path segment, but if it contains
507  * multiple segments, they must be separated by '/'. @a component
508  * should not begin with '/', however; if it does, the behavior is
509  * undefined.
510  *
511  * @a url must be in canonical format; it may not have a trailing '/'.
512  *
513  * @note To add a component that is already URI-encoded, use
514  * <tt>svn_path_join(url, component, pool)</tt> instead.
515  *
516  * @note gstein suggests this for when @a component begins with '/':
517  *
518  * "replace the path entirely
519  * https://example.com:4444/base/path joined with /leading/slash,
520  * should return: https://example.com:4444/leading/slash
521  * per the RFCs on combining URIs"
522  *
523  * We may implement that someday, which is why leading '/' is
524  * merely undefined right now.
525  *
526  * @since New in 1.6.
527  */
528 const char *
529 svn_path_url_add_component2(const char *url,
530  const char *component,
531  apr_pool_t *pool);
532 
533 /** Like svn_path_url_add_component2, but allows path components that
534  * end with a trailing '/'
535  *
536  * @deprecated Provided for backward compatibility with the 1.5 API.
537  */
539 const char *
540 svn_path_url_add_component(const char *url,
541  const char *component,
542  apr_pool_t *pool);
543 
544 /**
545  * Convert @a iri (Internationalized URI) to an URI.
546  * The return value may be the same as @a iri if it was already
547  * a URI. Else, allocate the return value in @a pool.
548  *
549  * @since New in 1.1.
550  */
551 const char *
552 svn_path_uri_from_iri(const char *iri, apr_pool_t *pool);
553 
554 /**
555  * URI-encode certain characters in @a uri that are not valid in an URI, but
556  * doesn't have any special meaning in @a uri at their positions. If no
557  * characters need escaping, just return @a uri.
558  *
559  * @note Currently, this function escapes <, >, ", space, {, }, |, \, ^, and `.
560  * This may be extended in the future to do context-dependent escaping.
561  *
562  * @since New in 1.1.
563  */
564 const char *
565 svn_path_uri_autoescape(const char *uri, apr_pool_t *pool);
566 
567 /** @} */
568 
569 /** Charset conversion stuff
570  *
571  * @defgroup svn_path_charset_stuff Charset conversion
572  * @{
573  */
574 
575 /** Convert @a path_utf8 from UTF-8 to the internal encoding used by APR. */
576 svn_error_t *
577 svn_path_cstring_from_utf8(const char **path_apr,
578  const char *path_utf8,
579  apr_pool_t *pool);
580 
581 /** Convert @a path_apr from the internal encoding used by APR to UTF-8. */
582 svn_error_t *
583 svn_path_cstring_to_utf8(const char **path_utf8,
584  const char *path_apr,
585  apr_pool_t *pool);
586 
587 
588 /** @} */
589 
590 #ifdef __cplusplus
591 }
592 #endif /* __cplusplus */
593 
594 
595 #endif /* SVN_PATH_H */