# -*- coding: utf-8 -*-
"""The 'filesysobjects.paths' module provides operations on static file
resource paths.
"""
from __future__ import absolute_import
from __future__ import print_function
import os
import sre_constants
import re
import posixpath
import ntpath
from filesysobjects import PathError, \
ISSTR, \
gettpf, getspf, \
rte2num, rte_map, \
V3K, RTE, RTE_POSIX, RTE_WIN32, FileSysObjectsError, \
RTE_LOCAL, RTE_CNP, RTE_CNW, \
RTE_FILEURI0, RTE_FILEURI4, RTE_FILEURI5, RTE_FILEURI
__author__ = 'Arno-Can Uestuensoez'
__license__ = "Artistic-License-2.0 + Forced-Fairplay-Constraints"
__copyright__ = "Copyright (C) 2010-2016 Arno-Can Uestuensoez" \
"@Ingenieurbuero Arno-Can Uestuensoez"
__version__ = '0.1.20'
__uuid__ = "4135ab0f-fbb8-45a2-a6b1-80d96c164b72"
__docformat__ = "restructuredtext en"
#
# for test and development
# _mydebug = False
#*
# *** static compiled strings ***
#*
# pathname seperator
if RTE & RTE_WIN32:
OSSEP = os.path.sep #: os separator
OSSEPCLS = '[\\\\]' #: character class os separator
OSSEPCLSN = '[^\\\\]' #: character class without separator
else:
OSSEP = os.path.sep #: os separator
OSSEPCLS = '[/]' #: character class os separator
OSSEPCLSN = '[^/]' #: character class without separator
rebaseflags = re.X # @UndefinedVariable
if V3K:
rebaseflags |= re.ASCII # @UndefinedVariable
#
# prohibited characters for optional validation - see strict options
#
INVALIDCHARSWIN = re.compile(r'[:<>*?]') #: windows
INVALIDCHARSPOSIX = re.compile(r'\0') #: posix
INVALIDCHARS = re.compile(r'[:<>*?\0]') #: super position of both
#: maps unambiguous escape characters to escape sequences
ESC_CHAR_MAP = {
'\a': "\\a",
'\b': "\\b",
'\f': "\\f",
'\n': "\\n",
'\r': "\\r",
'\t': "\\t",
'\v': "\\v",
}
#: list of special escape characters
ESC_CHARS = '[\a\b\f\n\r\t\v]'
#: maps escape characters for escape sequences to unescape
UNESC_CHAR_MAP = {
'a': "\a",
'b': "\b",
'f': "\f",
'n': "\n",
'r': "\r",
't': "\t",
'v': "\v",
}
# pylint: disable-msg=W0105
# [MS-DTYP] - 2.2.57 - UNC definitions
# pchar = %x20-21 / %x23-29 / %x2D-2E / %x30-39 / %x40-5A / %x5E-7B / %x7D-FF
# pchar = r'[\x20-\x21\x23-\x29\x2D-\x2E\x30-\x39\x40-\x5A\x5E-\x7B\x7D-\xFF]'
# pchar="""[^\x00-\x1f\x22\x2a-\x2c\x2f\x3a-\x3f\x5b-\x5d\x7c]"""
#
# *** splits environment variables ***
#
if RTE & RTE_WIN32:
_ENV_SPLIT = re.compile(r"""
(
(([^%]*?)([%][a-zA-Z0-9_]+[%])) # 2: defined without brace
| (([^%]*?)([%][a-zA-Z0-9_]+[^%]?)) # 5: ERROR:
| ((.*)()) # 8: any
)
""", rebaseflags)
"""Split-out environment variables for substitution."""
_ENV_SPLITg = [
2,
5,
8,
]
"""Entry points into sub strings environment variables and literals."""
else:
_ENV_SPLIT = re.compile(r"""
(
(([^$]*?)([$][{][a-zA-Z0-9_]+[}])) # 2: defined with brace
| (([^$]*?)([$][a-zA-Z0-9_]+[;]?)) # 5: defined without brace
| (([^$]*?)([$][{][a-zA-Z0-9_]+[^}]?)) # 8: ERROR:
| ((.*)()) # 11: any
)
""", rebaseflags)
"""Split-out environment variables for substitution."""
_ENV_SPLITg = [
2,
5,
8,
11,
]
"""Entry points into sub strings environment variables and literals."""
# pylint: enable-msg=W0105
if V3K:
pathflags = rebaseflags | re.M | re.ASCII # @UndefinedVariable
else:
pathflags = rebaseflags | re.M # @UndefinedVariable
#: First stage regexpr scanner for 'normpathx', also used in 'escapepathx'.
PATHSCANNER = re.compile(r"""
(["]{3}[\x01-\xFF]*?["]{3}) # 1 quoted string by 3 double quotes(") - similar to Python
|([']{3}[\x01-\xFF]*?[']{3}) # 2 quoted string by 3 single quotes(') - similar to Python
|([\a\b\f\n\r\t\v]) # 3 python escape char - without separate backslash
|(?<=[^\\\\]){0,1}([\\\\][u][0-9]{4})
# 4 unicode-16
|(?<=[^\\\\]){0,1}([\\\\][U][0-9]{8})
# 5 unicode-32
|^(file://[/]{0,1}[/\\\\]{2})(?![/\\\\])
# 6 share/netapp - rfc8089, [MS-DTYP]
|^(file://)(?![/\\\\]) # 7 non-local - rfc8089 / maps to Posix-App
|^(file:)(?=/[^/\\\\]) # 8 min - rfc8089
|^(file://)(?=/) # 9 absolute path - rfc8089 rfc1738
|^(file:)(?=[a-zA-Z]:) # 10 short-form - rfc8089 - DOS drive
|^(//)(?=[^\\\\/"]+[\\\\/][\x20-\x21\x23-\x29\x2D-\x2E\x30-\x39\x40-\x5A\x5E-\x7B\x7D-\xFF]{1,80}[\\\\/]*)
# 11 portable UNC
|^(//)(?=[^/\\\\][^/]*/.+) # 12 pure posix - with the additional constraint first != [/\\]
|^([\\\\][\\\\])(?=[^\\\\/"]+[\\\\/][\x20-\x21\x23-\x29\x2D-\x2E\x30-\x39\x40-\x5A\x5E-\x7B\x7D-\xFF]{1,80}[\\\\/]*)
# 13 UNC
|^([a-zA-Z]:)(?=\\a|\\b|\\f|\\n|\\r|\\t|\\v)
# 14 drive following escaped special escape character
|^([a-zA-Z]:[\\\\]+)(?![\a\b\f\n\r\t\v])
# 15 drive following 1..n * '[\\]'
|^([a-zA-Z]:[/]+) # 16 drive following 1..n * '[/]'
|^([a-zA-Z]:) # 17 drive only, or relative path
|(?<=[;:])([a-zA-Z]:[\\\\]+) # 18 drive following 1..n * '[\\]'
|(?<=[;:])([a-zA-Z]:[/]+) # 19 drive following 1..n * '[/]'
|(?<=[;:])([a-zA-Z]:) # 20 drive only, or relative path
|(?<=[\\\\])([/]+) # 21 os. sep - posix pathname separators
|(/+)(?=/) # 22 n * posix dir-separators
|(/) # 23 1 * posix dir-separators
|(?<=[\\\\/])([.][.][\\\\/]) # 24 'up-dir: /../ '
|^([.][.][\\\\/]+) # 25 'up-dir: ^../ '
|(?<=[\\\\/])([.][.])$ # 26 'up-dir: /..$ '
|(?<=[/\\\\])([.][/\\\\]) # 27 'null-dir: \.\ /./'
|^([.][/\\\\]+) # 28 'null-dir: .\ ./'
|(?<=[/\\\\])([.])$ # 29 'null-dir: \. /.'
|([\\\\][\\\\]) # 30 bs pairs
|([\\\\])(?=\n) # 31 single bs - escape '\n'
|([\\\\])(?=\n) # 32 single bs - escape '\n'
|([\\\\])(?![\\\\]) # 33 single bs - non-escape
|([:]+) # 34 posix path-separators
|([;]+) # 35 win path-separators
|(?<![^\\\\][\\\\])(\[) # 36 start char class
|(?<![^\\\\][\\\\])(\]) # 37 end char class
|(?<![^\\\\][\\\\])(') # 38 escaped '
|(?<![^\\\\][\\\\])(") # 39 escaped "
|([^\\\\/\a\b\f\n\r\t\v:;"'\[\]]+)# 40
|([^\\\\/\a\b\f\n\r\t\v:;]+)(?!.*["'\[\]])
# 41
|(.) # 42 # free char
""", pathflags)
#
# map matches to actual control sequences
#
SC_BSPAIR = 1000 # '\' pair
SC_CIFS = 1010 # cifs:
SC_CRMASK = 1020 # masked '\n'
SC_DOIT = 1030 # out of range
SC_DQUOTED = 1040 # "
SC_DRIVE = 1050 # dos drive letter - or a directory on Posix !!!
SC_DRIVENPSEP = 1060 # dos drive letter following n * posix_sep
SC_DRIVENWSEP = 1070 # dos drive letter following n * win_sep
SC_DUMMY = 1080 # dummy
SC_EACHOF = 1090 # assure for each
SC_ESCCHAR = 1100 # \[abf...]
SC_FABS = 1110 # file:///path - absolute path - rfc8089 rfc1738
SC_FILE = 1120 # file:
SC_FMIN = 1130 # file:/path - min rfc8089 - Appendix B
SC_FNONLOCAL = 1140 # file://host/path non-local - rfc8089 - Appendix B / maps to Posix-App
SC_FSHORT= 1150 # file:<dos-drive>:path - short-form - rfc8089
SC_FUNC = 1160 # file:///// | file://// - share/netapp - rfc8089 - Appendix E.3.2
SC_HTTP = 1170 # http:
SC_KEEP = 1180 # keep literally
SC_MASKALL = 1190 # keep literally
SC_NULLDIR = 1200 # '\.\' '/./'
SC_PAPP = 1210 # Posix-Net-App
SC_PDOM = 1220 # Posix-Net-App prefix-compliance to SC_WDOM [MS-DTY]
SC_PSEPP = 1230 # ':'
SC_PSEPW = 1240 # ';'
SC_REPLACE = 1250 # replace an equal set of chars e.g. '/' or '\'
SC_SEPP = 1260 # n * Posix path.sep
SC_SEPW = 1270 # 1 * win path.sep
SC_SLASH = 1280 # 1 * '/'
SC_SLASHPREB = 1290 # '\' + '/'
SC_SMB = 1300 # smb:
SC_SQUOTED = 1310 # '
SC_TOEVEN = 1320 # assure count is even
SC_U16 = 1330 # unicode-16
SC_U16R = 1340 # unicode-16 raw
SC_U32 = 1350 # unicode-32
SC_U32R = 1360 # unicode-32 raw
SC_UNC = 1370 # unc:
SC_UPDIR = 1380 # '/../'
SC_WDOM = 1390 # Win-Domain
SC_CHRCLSSTART = 1400
SC_CHRCLSEND = 1410
SC_ANYONECHR = 1420
SC_ESCAPEDSQUOT = 1430
SC_ESCAPEDDQUOT = 1440
#: Context maps of item indexes corresponding to group indexes onto constants.
#: Performance enhancement by padding, in order to avoid hash calculations via a dictionary.
ASCII_SC_CTRL = [
0, # all by *re*
SC_DQUOTED, # 1 string
SC_SQUOTED, # 2 string
SC_ESCCHAR, # 3 python escape sequences
SC_U16, # 4 unicode-16
SC_U32, # 5 unicode-32
SC_FUNC , # 6 file:///// | file://// - share/netapp - rfc8089 - Appendix E.3.2
SC_FNONLOCAL, # 7 rfc8089 - Appendix B
SC_FMIN, # 8 rfc8089 - Appendix B
SC_FABS, # 9 absolute path- rfc8089, rfc1738
SC_FSHORT, # 10 short-form - rfc8089
SC_PDOM, # 11 '//' - UNC-Compatible
SC_PAPP, # 12 '//' - pure POSIX compliance
SC_WDOM, # 13 '\\\\'
SC_DRIVE, # 14 DOS drive letter - no following sep
SC_DRIVENWSEP, # 15 DOS drive letter with n * win_sep
SC_DRIVENPSEP, # 16 DOS drive letter with n * possix_sep
SC_DRIVE, # 17 DOS drive letter - no following sep
SC_DRIVENWSEP, # 18 DOS drive letter with n * win_sep
SC_DRIVENPSEP, # 19 DOS drive letter with n * posix_sep
SC_DRIVE, # 20 DOS drive letter - no following sep
SC_SLASHPREB, # 21 '\/'
SC_SEPP, # 22 n * '/'
SC_SLASH, # 23 1 * '/'
SC_UPDIR, # 24 'updir/..'
SC_UPDIR, # 25 'updir/..'
SC_UPDIR, # 26 'updir/..'
SC_NULLDIR, # 27 'nulldir/.'
SC_NULLDIR, # 28 'nulldir/.'
SC_NULLDIR, # 29 'nulldir/.'
SC_BSPAIR, # 30 a '\' pair of 1..n
SC_CRMASK, # 31 a masked <CR> - r'\\n', else difficult to detect by regexpr
SC_CRMASK, # 32 same as SC_CRMASK, but raw - '\\n',
SC_SEPW, # 33 '\\',
SC_PSEPP, # 34 ':',
SC_PSEPW, # 35 ';',
SC_CHRCLSSTART, # 36
SC_CHRCLSEND, # 37
SC_ESCAPEDSQUOT, # 38
SC_ESCAPEDDQUOT, # 39
SC_DOIT, # 40
SC_DOIT, # 41
SC_DOIT, # 42
]
#: Checks for contained dot-directory names in paths, controls parser mode.
_NULLDIRS = re.compile(
r'.*([/\\\\][.]{1,2}[/\\\\]|[/\\\\][.][.]$|^[.][.][/\\\\])')
_file_uri_scheme = {
'': 'file',
'/': 'file',
'\\': 'file',
'//': 'netapp',
'\\\\': 'netapp',
'fileuri': 'file://',
'fileuri0': 'file:',
'fileuri4': 'file:///',
'fileuri5': 'file:////',
RTE_FILEURI0: 'file:',
RTE_FILEURI4: 'file:///',
RTE_FILEURI5: 'file:////',
RTE_FILEURI: 'file://',
}
_file_uri_scheme_num = {
'fileuri': RTE_FILEURI,
'fileuri0': RTE_FILEURI0,
'fileuri4': RTE_FILEURI4,
'fileuri5': RTE_FILEURI5,
RTE_FILEURI0: RTE_FILEURI0,
RTE_FILEURI4: RTE_FILEURI4,
RTE_FILEURI5: RTE_FILEURI5,
RTE_FILEURI: RTE_FILEURI,
}
_get_lead_sep = re.compile(r'(/*|[\\\\]*)')
#: short scanner for unescape
PATHSCANNER_UNESC = re.compile(r"""
(["]{3}[\x01-\xFF]*?["]{3}) # 1 quoted string by 3 double quotes(") - similar to Python
|([']{3}[\x01-\xFF]*?[']{3}) # 2 quoted string by 3 single quotes(') - similar to Python
|(?<=[\\\\])([\a\b\f\n\r\t\v]) # 3 python escape char with leading backslash
|(?<=[^\\\\])([\\u][0-9]{4}) # 4 unicode-16
|(?<=[^\\\\])([\\U][0-9]{8}) # 5 unicode-32
|^([\\\\][\\\\])(?=[^\\\\/"]+[\\\\/][\x20-\x21\x23-\x29\x2D-\x2E\x30-\x39\x40-\x5A\x5E-\x7B\x7D-\xFF]{1,80}[\\\\/]*)
# 6 UNC
|([\\\\][abfnrtv]) # 7 python escaped char with leading backslash
|([\\\\][\\\\])(?![abfnrtvuU]) # 8 bs pairs - free - not escaping
|([\\\\][\\\\][abfnrtvuU]) # 9 bs pairs - escaping an escape char
|([\\\\])(?![\\\\]) # 10 single bs - non-escape
|([\\\\][']) # 11 special escapes - POSIX + Windows
|([\\\\]["]) # 12 special escapes - POSIX only (+ URI ?)
|(?<![^\\\\][\\\\])(\[) # 13 start char class
|(?<![^\\\\][\\\\])(\]) # 14 end char class
|([^\\\\/\a\b\f\n\r\t\v:;"'\[\]]+)# 15
|([^\\\\/\a\b\f\n\r\t\v:;]+)(?!.*["'\[\]])
# 16
|(.) # 17 # free char
""", pathflags)
[docs]def sub_keep(it, spf=RTE, strip=True, pathsep=''):
"""To be used by re.sub() - keeps mixed.
"""
g = it.lastindex # PATHSCANNER ASCII_SC_CTRL
if it.group(g):
# x = it.group(g)
# c = ASCII_SC_CTRL[g]
# if ASCII_SC_REPLACE_KEEP[g] == SC_KEEP:
# pass
# return x
return it.group(g)
_esc_state_shared = []
[docs]def sub_esc(it, spf=RTE, strip=False, pathsep='', state=_esc_state_shared, **kw):
"""To be used by re.sub() - escapes backslashes and non-printable
characters.
Args:
**it**:
iterator from *re.sub*.
**spf**:
Source platform, defines the input syntax domain.
For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`.
For additi0onal details refer to
:ref:`tpf and spf <TPF_AND_SPF>`,
`paths.getspf() <paths.html#getspf>`_,
:ref:`normapppathx() <def_normapppathx>`,
`normpathx() <paths.html#normpathx>`_.
**strip**:
**pathsep**:
**state**:
kw:
**charback**:
Escapes all backslashes within character classes.
Could be combined with *force* and *freeback*.
**force**:
Escapes all back-slashes, else the special
characters only. Unix processing of DOS paths
requires all separators to be escaped.
default := False
**freeback**:
Escapes backslashes outside character classes.
Could be combined with *charback*.
Returns:
Converted format win.
E.g. ::
C:\\Windows\\system32\\cmd.exe;C:\\Windows\\system32\\notepad.exe
Raises:
pass-through
"""
try:
_all = kw['force'] # old - temporary for migration
except KeyError:
_all = kw.get('all', False)
_charback = kw.get('charback', False)
_freeback = kw.get('freeback', False)
g = it.lastindex # PATHSCANNER ASCII_SC_CTRL
_le = it.end(g)
if it.group(g):
x = it.group(g)
c = ASCII_SC_CTRL[g]
# FIXME: check for null-dir on 'win'
if c == SC_BSPAIR: # pairs of '\\'
if not state:
if _all: # escape anything - blindly
return 2 * x
elif _freeback: # free standing only
return 2 * x
elif _charback: # within character classes only
return 2 * x
elif c in (SC_CHRCLSSTART, ): # char class start
state.append('')
elif c in (SC_CHRCLSEND, ): # char class end
r = ''.join(state) + x
if V3K:
state.clear()
else:
if state:
for i in range(len(state)):
state.pop(-1)
return r
elif c in (SC_SEPW, ): # 1 * '\' - win treats '/' equal
# if _all and not state: # escape anything - blindly
# return 2 * x
if not state:
if _all: # escape anything - blindly
return 2 * x
elif _freeback: # free standing only
return 2 * x
elif _charback: # within character classes only
return 2 * x
elif c in (SC_WDOM, ):
# if _all and not state: # escape anything - blindly
# return 2 * x
if not state:
if _all: # escape anything - blindly
return 2 * x
elif _freeback: # free standing only
return 2 * x
elif _charback: # within character classes only
return 2 * x
elif c == SC_DRIVENWSEP:
if not state:
if _all : # escape anything - blindly
return x[:2] + 2 * x[2:]
if (len(x) - 2) % 2:
return x + '\\'
elif c in (SC_ESCCHAR, ):
if not state:
return ESC_CHAR_MAP[x]
elif c in (SC_CRMASK, ):
if not state:
return '\\\\'
elif c == SC_NULLDIR:
if not state:
if g == 27:
if x[-1] == '\\':
return x + '\\'
elif g == 28:
if x[-1] == '\\':
return x + '\\'
elif g == 29:
return x
return x
_unesc_state_shared = []
[docs]def sub_unesc(it, _t=None, spf=None, state=_unesc_state_shared, **kw):
"""To be used by re.sub() - unescapes backslashes and
non-printable characters.
Args:
**it**:
iterator from *re.sub*.
**_t**:
**spf**:
Source platform, defines the input syntax domain.
For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`.
For additi0onal details refer to
:ref:`tpf and spf <TPF_AND_SPF>`,
`paths.getspf() <paths.html#getspf>`_,
:ref:`normapppathx() <def_normapppathx>`,
`normpathx() <paths.html#normpathx>`_.
**state***:
kw:
**all** or **force**:
Unescapes all back/slashes, else the special
characters only. Unix processing of DOS paths
requires all separators to be escaped and
therefore eventually to be unescaped too.
default := False
Returns:
Converted format win.
E.g.::
C:\\Windows\\system32\\cmd.exe;C:\\Windows\\system32\\notepad.exe
Raises:
pass-through
"""
try:
_all = kw['force'] # old - temporary for migration
except KeyError:
_all = kw.get('all', False)
g = it.lastindex # PATHRULES ASCII_SC_CTRL
_le = it.end(g)
if it.group(g):
x = it.group(g)
if g == 3:
if _all:
return '\\' + x
elif g == 6:
if _all:
return 2 * x
elif g == 7: # escaped special char '\\[ab....]'
return UNESC_CHAR_MAP[x[1]]
elif g == 8: # free pairs of '\\' - non/escaping
if _all:
return '\\'
elif g == 9: # single '\' following escaped char '\[ab....]'
return x[1:]
elif g == 11: # escaped '
if _all:
return x[1]
elif g == 12: # escaped "
if _all:
return x[1]
elif state:
if len(x) > 1:
return state.pop() + x[1:]
return state.pop()
return x
[docs]def sub_posix(it, spf=RTE, strip=True, pathsep=':', state=None, **kw):
"""To be used by re.sub() - converts to posix.
Replaces '[/\\]' with '/', and '[;:]' with ':'.
Posix does not have drives, just ignores the
drive-property, assumes these are ordinary characters.
When drives are required as syntax tokens refer to 'Cygwin'.
Args:
**it**:
Iterator from *re.sub*.
**spf**:
Source platform, defines the input syntax domain.
For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`.
For additi0onal details refer to
:ref:`tpf and spf <TPF_AND_SPF>`,
`paths.getspf() <paths.html#getspf>`_,
:ref:`normapppathx() <def_normapppathx>`,
`normpathx() <paths.html#normpathx>`_.
**strip**:
Strip redundancies.
**pathsep**:
Input separator 'pathsep' to be be
replaced. ::
pathsep := ':' ';' ''
One or more are allowed, is used as a set
containment of replacement checks. Empty
string disables the replacement.
**state**:
Compile states.
kw:
**apppre**:
Application prefix.
**keepsep**:
Keeps seprator, in particular the trailing.
**stripquote**:
Strips *filesysobjects* triple-quotes.
Returns:
Converted format posix.
E.g.::
c:/Windows/system32/cmd.exe:c:/Windows/system32/notepad.exe
Raises:
pass-through
"""
apppre = kw.get('apppre', False)
keepsep = kw.get('keepsep', False)
stripquote = kw.get('stripquote', False)
# tracks multiple path separators, when these are of mixed
# platforms('/', '\') to be normalized
if state[0]:
sx = state[0].pop()
else:
sx = 0
# ignores character classes
if state[1]:
inchrclass = True
else:
inchrclass = False
#
# frequently used values
#
g = it.lastindex # PATHSCANNER ASCII_SC_CTRL
_le = it.end(g)
_isfirst = it.start(g) == 0
_islast = _le == it.endpos
_charbefore = it.start(g) - 1
if it.group(g):
x = it.group(g)
c = ASCII_SC_CTRL[g]
if c in (SC_CHRCLSSTART, ): # char class start
state[1].append('')
elif c in (SC_CHRCLSEND, ): # char class end
if V3K:
state[1].clear()
else:
if state[1]:
for i in range(len(state[1])):
state[1].pop(-1)
elif inchrclass:
if c in (SC_DQUOTED, SC_SQUOTED):
if stripquote:
return x[3:-3]
elif c in (SC_DQUOTED, SC_SQUOTED):
if stripquote:
return x[3:-3]
elif c == SC_BSPAIR: # pairs of '\\'
if strip:
if _isfirst:
return '/'
_pre = it.string[_charbefore]
if _pre in '/\\': # already done
if sx:
return '/'
return ''
elif _pre is ':':
return '/'
elif _islast or it.string[_le] is ':':
if not keepsep:
return ''
return '/'
elif it.string[_le] in '\/':
if it.string[_charbefore] is ':':
state[0].append(2)
else:
state[0].append(1)
return ''
return '/'
else:
return '//'
elif c == SC_SLASH:
if strip:
if (_isfirst and _islast):
return '/'
elif _islast:
if sx == 2:
return '/'
elif not keepsep:
return ''
elif it.string[_le] is ':':
if _isfirst or it.string[_charbefore] is ':':
return '/'
if not keepsep:
return ''
return '/'
elif c == SC_SLASHPREB:
if strip:
if sx:
return '/'
return ''
return x
elif c == SC_SEPP: # n * '/'
if strip:
if _isfirst or it.string[_charbefore] is ':':
sx = state[0].append(2)
return ''
else:
return '/' * len(x)
elif c == SC_SEPW: # n * '/'
if _isfirst: # is app-pre '//'
return '/'
if it.string[_charbefore] is ':':
return '/'
if strip:
if _islast or it.string[_le] is ':':
if sx == 2:
return '/'
return ''
if sx:
return '/'
if it.string[_charbefore] in '/\\':
return ''
else:
return '/' * len(x)
if it.endpos > _le:
if it.string[_le] == '\\':
if it.endpos > _le + 1 and it.string[_le + 1] == '\n':
pass
else:
if strip:
return ''
return '/'
elif c == SC_PSEPP:
if x[0] not in pathsep: # 1..n
return x
if strip:
if _islast: # drop trailing os.pathsep
return ''
return ':'
else:
return ':' * len(x)
elif c == SC_PSEPW:
# native
return x
# mixed
if x[0] not in pathsep: # 1..n
return x
if spf & RTE_POSIX: # for posix node names an ordinary character
return x
if strip:
if _islast: # drop trailing os.pathsep
return ''
if len(it.string) > _le:
if it.string[_le] == ':':
return ''
return ':'
else:
return ':' * len(x)
elif c in (SC_FUNC,): # verified by char contents
if apppre:
return 'file://///'
return '//'
elif c is SC_UPDIR:
return '../'
elif c is SC_FNONLOCAL: # verified by char contents
if apppre:
return 'file://'
return '//'
elif c in (SC_FMIN, SC_FABS, SC_FSHORT,):
if not apppre:
return ''
elif c in (SC_WDOM, SC_PDOM, SC_PAPP,): # verified by char contents
if strip and it.string[_le] in ':':
return '/'
elif strip and _islast:
return ''
return '//'
elif c in (SC_DRIVENPSEP, SC_DRIVENWSEP): # posix does not have drives
if strip: # keep drive-root
return x[0] + ':/'
return x[:2] + '/' * (len(x) - 2)
elif c == SC_DRIVE:
return x
elif c == SC_NULLDIR:
return ''
elif c == SC_FILE:
if apppre:
return x
return ''
elif c == SC_UNC:
if apppre:
return x
return '//'
return x
def sub_uri(it, spf=RTE, strip=True, pathsep='', state=None, **kw):
"""To be used by re.sub() - converts to uri.
Args:
**it**:
Iterator from *re.sub*.
**spf**:
Source platform, defines the input syntax domain.
For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`.
For additi0onal details refer to
:ref:`tpf and spf <TPF_AND_SPF>`,
`paths.getspf() <paths.html#getspf>`_,
:ref:`normapppathx() <def_normapppathx>`,
`normpathx() <paths.html#normpathx>`_.
**strip**:
Strip redundancies.
**pathsep**:
Input separator 'pathsep' to be be
replaced. ::
pathsep := ':' ';' ''
One or more are allowed, is used as a set
containment of replacement checks. Empty
string disables the replacement.
kw:
**apppre**:
Application prefix.
Returns:
Converted format uri.
E.g.::
http://a/b/c
Raises:
pass-through
"""
apppre = kw.get('apppre', False)
g = it.lastindex # PATHRULES ASCII_SC_CTRL
_le = it.end(g)
if it.group(g):
x = it.group(g)
c = ASCII_SC_CTRL[g]
if c == SC_BSPAIR: # pairs of '\\'
if strip:
if it.start(g) == 0:
# if it.string[it.end(g)] in '/\\':
# return '/'
# return '//'
return '/'
if it.string[it.start(g) - 1] in '/\\': # already done
return ''
elif len(it.string) > _le:
return '/'
else:
return '//'
if strip: # want to reduce by look-ahead
if len(it.string) > _le and it.string[_le] == '\\':
# FIXME: a.s.a.p.
if it.string.find('\\', _le) < len(it.string) - 1:
return ''
elif len(it.string) > _le and it.string[_le] == '/':
# FIXME: a.s.a.p.
if it.string.find('/', _le) < len(it.string) - 1:
return ''
return '/'
else:
return '//'
elif c == SC_SLASH:
return '/'
elif c == SC_SLASHPREB:
if strip:
return ''
return x
elif c == SC_SEPP: # n * '/'
if strip:
return ''
else:
return '/' * len(x)
elif c == SC_SEPW: # n * '/'
if it.start(g) == 0: # is app-pre '//'
if it.start(g) > 1:
return '//'
return '/'
if strip:
if it.string[it.start(g) - 1] in '/\\':
return ''
# is app-pre '//'
elif it.start(g) == 1 and it.string[0] in ('/', '\\'):
return '' # 2SEP has own rules
if len(it.string) == _le and _le > 1: # do not '/'
return '/'
if len(it.string) > _le:
if it.string[_le] == '\\':
if len(it.string) > _le + 1 and it.string[_le + 1] == '\n':
pass
else:
if strip:
return ''
return '/'
if strip:
return '/'
else:
return '/' * len(x)
elif c == SC_PSEPP:
if x[0] not in pathsep: # 1..n
return x
if strip:
if len(it.string) == _le: # drop trailing os.pathsep
return ''
if len(it.string) > _le:
if it.string[_le] == ';':
return ''
return ':'
else:
return ':' * len(x)
elif c == SC_PSEPW:
if x[0] not in pathsep: # 1..n
return x
if spf & RTE_POSIX: # for posix node names an ordinary character
return x
if strip:
if len(it.string) == _le: # do not drop trailing '/'
return '/'
if len(it.string) > _le:
if it.string[_le] == ':':
return ''
return ':'
else:
return ':' * len(x)
elif c in (SC_PAPP, SC_WDOM):
return '//'
elif c in (SC_DRIVENPSEP, SC_DRIVENWSEP): # posix does not have drives
if strip: # keep drive-root
return x[0] + ':/'
return x[:2] + '/' * (len(x) - 2)
elif c is SC_UPDIR:
return '../'
elif c == SC_DRIVE:
return x
elif c == SC_NULLDIR:
return ''
elif c == SC_FILE:
if apppre:
return x
return ''
elif c == SC_UNC:
if apppre:
return x
return '//'
return x
def sub_rfc8089(it, spf=RTE, strip=True, pathsep='', state=None, **kw):
"""To be used by re.sub() - converts to file uri in accordance to RFC8089.
This is different from most of common URI, e.g. HTTP(S).
Args:
**it**:
Iterator from *re.sub*.
**spf**:
Source platform, defines the input syntax domain.
For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`.
For additi0onal details refer to
:ref:`tpf and spf <TPF_AND_SPF>`,
`paths.getspf() <paths.html#getspf>`_,
:ref:`normapppathx() <def_normapppathx>`,
`normpathx() <paths.html#normpathx>`_.
**strip**:
Strip redundancies.
**pathsep**:
Input separator 'pathsep' to be be
replaced. ::
pathsep := ':' ';' ''
One or more are allowed, is used as a set
containment of replacement checks. Empty
string disables the replacement.
**state**:
Compile states.
kw:
**apppre**:
Application prefix.
**keepsep**:
Keeps the trailing separator. ::
keepsep := (
True # keep trailing sep, indicating a directory
| False # drop trailing sep
)
**stripquote**:
Strips *filesysobjects* triple-quotes.
Returns:
Converted format uri.
E.g.::
http://a/b/c
Raises:
pass-through
"""
apppre = kw.get('apppre', False)
keepsep = kw.get('keepsep', True)
stripquote = kw.get('stripquote', False)
# tracks multiple path separators, when these are of mixed
# platforms('/', '\') to be normalized
if state[0]:
sx = state[0].pop()
else:
sx = 0
# ignores character classes
if state[1]:
inchrclass = True
else:
inchrclass = False
#
# frequently used values
#
g = it.lastindex # PATHSCANNER ASCII_SC_CTRL
_le = it.end(g)
_isfirst = it.start(g) == 0
_islast = _le == it.endpos
_charbefore = it.start(g) - 1
if it.group(g):
x = it.group(g)
c = ASCII_SC_CTRL[g]
if c in (SC_CHRCLSSTART, ): # char class start
state[1].append('')
elif c in (SC_CHRCLSEND, ): # char class end
if V3K:
state[1].clear()
else:
if state[1]:
for i in range(len(state[1])):
state[1].pop(-1)
elif inchrclass:
if c in (SC_DQUOTED, SC_SQUOTED):
if stripquote:
return x[3:-3]
elif c == SC_BSPAIR: # pairs of '\\'
if strip:
if _isfirst:
return '/'
_pre = it.string[_charbefore]
if _pre in '/\\': # already done
if sx:
return '/'
return ''
elif _pre is ':':
return '/'
elif _islast or it.string[_le] is ':':
if not keepsep:
return ''
return '/'
elif it.string[_le] in '\/':
if it.string[_charbefore] is ':':
state[0].append(2)
else:
state[0].append(1)
return ''
return '/'
else:
return '//'
# if strip:
# if it.start(g) == 0:
# # if it.string[it.end(g)] in '/\\':
# # return '/'
# # return '//'
# return '/'
#
# if it.string[it.start(g) - 1] in '/\\': # already done
# return ''
# elif len(it.string) > _le:
# return '/'
# else:
# return '//'
#
# if strip: # want to reduce by look-ahead
# if len(it.string) > _le and it.string[_le] == '\\':
# # FIXME: a.s.a.p.
# if it.string.find('\\', _le) < len(it.string) - 1:
# return ''
# elif len(it.string) > _le and it.string[_le] == '/':
# # FIXME: a.s.a.p.
# if it.string.find('/', _le) < len(it.string) - 1:
# return ''
#
# return '/'
# else:
# return '//'
elif c == SC_SLASH:
if strip:
if (_isfirst and _islast):
return '/'
elif _islast:
if sx == 2:
return '/'
elif not keepsep:
return ''
elif it.string[_le] is ':':
if _isfirst or it.string[_charbefore] is ':':
return '/'
if not keepsep:
return ''
return '/'
elif c == SC_SLASHPREB:
if strip:
if sx:
return '/'
return ''
return x
# if strip:
# return ''
# return x
elif c == SC_SEPP: # n * '/'
if strip:
if _isfirst or it.string[_charbefore] is ':':
sx = state[0].append(2)
return ''
else:
return '/' * len(x)
# if strip:
# return ''
#
# else:
# return '/' * len(x)
elif c == SC_SEPW: # n * '/'
if _isfirst: # is app-pre '//'
return '/'
if it.string[_charbefore] is ':':
return '/'
if strip:
if _islast or it.string[_le] is ':':
if sx == 2:
return '/'
return ''
if sx:
return '/'
if it.string[_charbefore] in '/\\':
return ''
else:
return '/' * len(x)
if it.endpos > _le:
if it.string[_le] == '\\':
if it.endpos > _le + 1 and it.string[_le + 1] == '\n':
pass
else:
if strip:
return ''
return '/'
# if it.start(g) == 0: # is app-pre '//'
# if it.start(g) > 1:
# return '//'
# return '/'
# if strip:
# if it.string[it.start(g) - 1] in '/\\':
# return ''
# # is app-pre '//'
# elif it.start(g) == 1 and it.string[0] in ('/', '\\'):
# return '' # 2SEP has own rules
#
# if len(it.string) == _le and _le > 1: # do not '/'
# return '/'
#
# if len(it.string) > _le:
# if it.string[_le] == '\\':
# if len(it.string) > _le + 1 and it.string[_le + 1] == '\n':
# pass
# else:
# if strip:
# return ''
# return '/'
#
# if strip:
# return '/'
# else:
# return '/' * len(x)
elif c == SC_PSEPP:
if x[0] not in pathsep: # 1..n
return x
if strip:
if _islast: # drop trailing os.pathsep
return ''
return ':'
else:
return ':' * len(x)
# if x[0] not in pathsep: # 1..n
# return x
#
# if strip:
# if len(it.string) == _le: # drop trailing os.pathsep
# return ''
#
# if len(it.string) > _le:
# if it.string[_le] == ';':
# return ''
#
# return ':'
# else:
# return ':' * len(x)
elif c == SC_PSEPW:
# native
# return x
# mixed
if x[0] not in pathsep: # 1..n
return x
if spf & RTE_POSIX: # for posix node names an ordinary character
return x
if strip:
if _islast: # drop trailing os.pathsep
if keepsep:
return x
else:
return ''
if len(it.string) > _le:
if it.string[_le] == ':':
return ''
return ':'
else:
return ':' * len(x)
# if x[0] not in pathsep: # 1..n
# return x
#
# if spf & RTE_POSIX: # for posix node names an ordinary character
# return x
#
# if strip:
# if len(it.string) == _le: # do not drop trailing '/'
# return '/'
#
# if len(it.string) > _le:
# if it.string[_le] == ':':
# return ''
#
# return ':'
# else:
# return ':' * len(x)
elif c in (SC_PAPP, SC_WDOM):
return '//'
elif c in (SC_DRIVENPSEP, SC_DRIVENWSEP): # posix does not have drives
if strip: # keep drive-root
return x[0] + ':/'
return x[:2] + '/' * (len(x) - 2)
elif c is SC_UPDIR:
return '../'
elif c == SC_DRIVE:
return x
elif c == SC_NULLDIR:
return
elif c == SC_FILE:
if apppre:
return x
return ''
elif c == SC_UNC:
if apppre:
return x
return '//'
return x
[docs]def sub_win(it, spf=RTE, strip=True, pathsep=';', state=None, **kw):
"""To be used by re.sub() - converts to windows.
Replaces '[/\\\\]' with '\\\\', and '[;:]' with ';'.
Args:
**it**:
iterator from *re.sub*.
**spf**:
Source platform, defines the input syntax domain.
For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`.
For additi0onal details refer to
:ref:`tpf and spf <TPF_AND_SPF>`,
`paths.getspf() <paths.html#getspf>`_,
:ref:`normapppathx() <def_normapppathx>`,
`normpathx() <paths.html#normpathx>`_.
**strip**:
Strip redundancies.
**pathsep**:
Input separator 'pathsep' to be be
replaced. ::
pathsep := ':' ';' ''
One or more are allowed, is used as a set
containment of replacement checks. Empty
string disables the replacement.
**state**:
Compile states.
kw:
**apppre**:
Application prefix.
**keepsep**:
Keeps seprator, in particular the trailing.
**stripquote**:
Strips *filesysobjects* triple-quotes.
Returns:
Converted format win.
E.g.::
C:\\Windows\\system32\\cmd.exe;C:\\Windows\\system32\\notepad.exe
Raises:
pass-through
"""
apppre = kw.get('apppre', False)
keepsep = kw.get('keepsep', False)
stripquote = kw.get('stripquote', False)
if apppre:
# scheme for an URI requested, so slashes only
sep = '/'
sep2 = '//'
else:
# no scheme, so a UNC
sep = '\\'
sep2 = '\\\\'
# tracks multiple path separators
if state[0]:
sx = state[0].pop()
else:
sx = 0
# ignores character classes
if state[1]:
inchrclass = True
else:
inchrclass = False
#
# frequently used values
#
g = it.lastindex # PATHRULES ASCII_SC_CTRL
_le = it.end(g)
_isfirst = it.start(g) == 0
_islast = _le == it.endpos
_isnotlast = it.endpos > _le
_charbefore = it.start(g) - 1
if _isnotlast:
_nextissep = it.string[_le] in '\\/'
else:
_nextispsep = None
if _isnotlast:
_nextispsep = it.string[_le] in pathsep
else:
_nextispsep = None
if it.group(g): # PATHRULES PATHSCANNER ASCII_SC_CTRL
x = it.group(g)
c = ASCII_SC_CTRL[g]
if c in (SC_CHRCLSSTART, ): # char class start
state[1].append('')
elif c in (SC_CHRCLSEND, ): # char class end
if V3K:
state[1].clear()
else:
if state[1]:
for i in range(len(state[1])):
state[1].pop(-1)
elif inchrclass:
if c in (SC_DQUOTED, SC_SQUOTED):
if stripquote:
return x[3:-3]
elif c in (SC_DQUOTED, SC_SQUOTED):
if stripquote:
return x[3:-3]
elif c == SC_BSPAIR: # pairs of '\\'
if strip:
if _isfirst: # TODO: basically not possible as net-app, see SC_PAP
if _isnotlast and _nextissep:
state[0].append(2)
return ''
return sep
elif _islast:
if sx == 2:
return sep
return ''
elif _nextispsep:
if sx:
return sep
elif it.string[_charbefore] in pathsep:
return sep
return ''
elif _nextissep:
if not sx:
sx = 1
state[0].append(sx)
return ''
return sep
else:
return sep2
elif c in (SC_SLASH, ): # 1 * '\' - win treats '/' equal
if strip:
if sx == 3:
return ''
if _isfirst:
if _islast:
return sep
elif _nextissep:
state[0].append(2)
return ''
elif _nextispsep or _islast:
if sx == 2:
return sep
elif it.string[_charbefore] in pathsep:
return sep
return ''
elif _isnotlast and _nextissep:
state[0].append(1)
return ''
return sep
return sep
elif c in (SC_SLASHPREB, ): # '\/'
if strip:
if _islast:
if sx == 2:
return sep
return ''
elif _nextissep:
if not sx:
sx = 1
state[0].append(sx)
return ''
elif sx == 2:
return sep
return sep
return sep * len(x)
elif c in (SC_SEPP, ): # n * '/'
if strip:
if _isfirst:
state[0].append(2)
else:
state[0].append(1)
return ''
return sep * len(x)
elif c in (SC_SEPW, ): # n * '\' - win treats '/' equal
if not strip:
return sep * len(x)
if _isfirst:
if _isnotlast:
if _nextissep:
state[0].append(2)
return ''
if _nextispsep:
return sep
else:
return sep
elif _islast or _nextispsep:
if sx == 2:
return sep
if it.string[_charbefore] in pathsep:
return sep
return ''
elif _isnotlast and _nextissep:
if not sx:
sx = 1
state[0].append(sx)
return ''
if sx > 1:
return sep
return sep
elif c == SC_PSEPP:
# native
if strip:
if x[0] in pathsep and _islast:
return ''
return x
elif c == SC_CRMASK:
if _isfirst:
return x
elif sx:
return sep + x[1:]
return x
elif c in (SC_FUNC,): # verified by char contents
if apppre:
return 'file://///'
if strip and it.string[_le] in ':':
return '/'
elif strip and _islast:
return ''
return sep2
elif c is SC_FABS:
# lookahead for RFC8089 - appendix E.2.1
if len(it.string) > _le + 2 and \
ord(it.string[_le + 1].upper()) in range(65,91) and \
it.string[_le + 2] == ':':
state[0].append(3)
if not apppre:
return ''
elif c in (SC_FMIN, SC_FSHORT,):
if not apppre:
return ''
elif c == SC_PSEPW:
if x not in pathsep: # changed spf/psep
return x
if strip:
_is = it.start(g)
if not _is or (_is + 1) == it.endpos:
return ''
if x[0] in pathsep and _is > 0 and it.string[_is
- 1] in pathsep: # 1..n
return ''
return ';'
return ';' * len(x)
elif c is SC_UPDIR:
return '..\\'
elif c is SC_FNONLOCAL:
if not apppre:
return ''
elif c is SC_PAPP:
if strip and _nextispsep:
return sep
return sep2
elif c in (SC_WDOM, SC_PDOM): # basically sure a UNC or NETAPP
if strip and _nextispsep:
return sep
return sep2
elif c in (SC_DRIVENWSEP, SC_DRIVENPSEP):
if strip: # keep drive-root
return x[0] + ':' + sep
return x[:2] + sep * (len(x) - 2)
elif c == SC_DRIVE:
return x
elif c == SC_NULLDIR:
return ''
elif c == SC_FILE:
if apppre:
return x
return ''
elif c == SC_UNC:
if apppre:
return x
return sep2
return x
sub_path_calls = { #: 're.sub' callbacks for normalization
'b': sub_win,
'cnp': sub_posix,
'cnw': sub_win,
'file': sub_rfc8089,
'http': sub_uri,
'https': sub_uri,
'k': sub_keep,
'keep': sub_keep,
'posix': sub_posix,
'rfsys': sub_posix,
's': sub_posix,
'share': sub_win,
'uri': sub_uri,
'win': sub_win,
'win32': sub_win,
}
[docs]def escapepathx(spath, tpf=None, **kargs):
"""Escape special characters within path names,
supports cross-platform processing, knows the
special escape characters of Python and *re*.
The characters could be masked by quoting, and/or
enclosing in character classes.
+----------------+-----------------------+-----------------+
| input | -> esc | -> unesc |
+================+=======================+=================+
| \\\\abc"\\\\n" | \\\\\\\\abc"\\\\n" | \\\\abc"\\\\n" |
+----------------+-----------------------+-----------------+
| \\\\"abc\\\\n" | \\\\\\\\"abc\\\\n" | \\\\"abc\\\\n" |
+----------------+-----------------------+-----------------+
| \\\\abc\\\\n | \\\\\\\\abc\\\\\\\\n | \\\\abc\\\\n |
+----------------+-----------------------+-----------------+
| \\\\xy" "z | \\\\\\\\xy" "z | \\\\xy" "z |
+----------------+-----------------------+-----------------+
| \\\\"xy z" | \\\\\\\\"xy z" | \\\\"xy z" |
+----------------+-----------------------+-----------------+
| \\\\xy z | \\\\\\\\xy\\\\ z | \\\\xy z |
+----------------+-----------------------+-----------------+
Args:
**spath**:
The path to be escaped. ::
spath := (
<path-string>
| <path-array>
)
path-string := (str | unicode)
path-array := (list | tuple)
* *path-string*
The string representation of a complete path, which may contain literal,
*glob*, and *re* expressions. The supported character representation
is *str* or *unicode* for Pyton2.7 and Python3.5+.
* *path-array*
The component representation of a path, which consists of it's items,
either as a *list* or as a *tuple*. Each item may contain literal,
*glob*, and *re* expressions.
**tpf**:
Target path separator, currently not used.
kargs:
**charback**:
Escapes all backslashes within character classes.
Could be combined with *force* and *freeback*. ::
\a\[\\] => \a\[\\\\]
**force**:
Controls the escaped scope. Excludes quoted strings and
character classes. Could be combined with *charback*. ::
force = (
True # escape characters and any free backslash
| False # defined escape characters only
)
force == True
\\a\\X\\n => \\\\a\\\\X\\\\n
force == False
\\a\\X\\n => \\\\a\\X\\\\n
default := False
**freeback**:
Escapes backslashes outside character classes.
Could be combined with *charback*. ::
\a\b\[\\] => \a\\b\\[\\]
Returns:
The escaped path with added '*\\\\*' in accordance to the rules and chosen
options. The return type of the representation is the same as the input
representation. ::
str => str
unicode => unicode
list => list
tuple => tuple
Raises:
PathError
FileSysObjectsError
TypeError
pass-through
"""
if not tpf:
_ttpf = RTE
else:
try:
_ttpf = rte2num[tpf]
except KeyError:
raise PathError("escapepathx:Parameter tpf: " + str(tpf))
try:
_strip = kargs.pop('strip')
except KeyError:
_strip = False
_state = []
if type(spath) in ISSTR:
return PATHSCANNER.sub(lambda x: sub_esc(x, _ttpf, _strip, '', _state, **kargs), spath)
elif type(spath) in (list, tuple,):
ret = []
for spx in spath:
ret.append(PATHSCANNER.sub(lambda x: sub_esc(x, _ttpf, _strip, _state, **kargs), spx))
return ret
else:
raise FileSysObjectsError("escapepathx:requires (str | list | tuple), got: " + str(spath))
[docs]def unescapepathx(spath, **kargs):
"""Unescape path - which has been escaped before. The path representation
could either be as a string/unicode or split components as a *list*
or *tuple*.
.. warning::
Processes strings accurately which were processed by *escapepathx()*
before, else the result could be erroneous. In particular for
windows paths due to the ambiguity of the '\\\\'!
The same masking rules apply as for the *normpathx()* and
*escapepathx()* calls. Escape sequences could be protected by quoting,
which keeps the content literally. See *pathtools.stripquotes*.
Args:
**spath**:
The path to be unescaped. ::
spath := (
<path-string>
| <path-array>
)
path-string := (str | unicode)
path-array := (list | tuple)
* *path-string*
The string representation of a complete path, which may contain literal,
*glob*, and *re* expressions. The supported character representation
is *str* or *unicode* for Pyton2.7 and Python3.5+.
* *path-array*
The component representation of a path, which consists of it's items,
either as a *list* or as a *tuple*. Each item may contain literal,
*glob*, and *re* expressions.
kargs:
**tpf**:
Target platform, currently not used.
**netpath**:
When *True* considers double prefix separators as share and/or
network application, else assumes these are the result of escaping
with force.
default := False
Returns:
The unescaped path with removed '\\' in accordance to the rules and chosen
options. The return type of the representation is the same as the input
representation. ::
str => str
unicode => unicode
list => list
tuple => tuple
Raises:
PathError
FileSysObjectsError
TypeError
pass-through
"""
netpath = kargs.get('netpath', False)
tpf = kargs.get('tpf', RTE)
if type(tpf) is not int:
try:
tpf = rte2num[tpf]
except KeyError:
raise PathError("unescapepathx:Parameter tpf: " + str(tpf))
_tsep, _tpsep, tpf, _tpfn, _apre = gettpf(tpf)
state = []
if type(spath) in ISSTR:
if netpath and spath[0] in ('/', '\\') and spath[1] in ('/', '\\'):
return spath[0] + PATHSCANNER_UNESC.sub(lambda x: sub_unesc(x, tpf, state, **kargs), spath)
else:
return PATHSCANNER_UNESC.sub(lambda x: sub_unesc(x, tpf, state, **kargs), spath)
elif type(spath) in (list, tuple,):
ret = []
for spx in spath:
if netpath and spath[0] in ('/', '\\') and spath[1] in ('/', '\\'):
return spath[0] + PATHSCANNER_UNESC.sub(lambda x: sub_unesc(x, tpf, state, **kargs), spx)
else:
return PATHSCANNER_UNESC.sub(lambda x: sub_unesc(x, tpf, state, **kargs), spx)
return ret
else:
raise FileSysObjectsError("unescapepathx:requires (str | list | tuple), got: " + str(spath))
[docs]def splitpathx_win(p, **kw):
"""Split windows pathnames containing 'literal', 'glob', and 're/regexpr'.
Serves the source platform windows and alike.
For the call interface see *splitpathx()*
Args:
**p**:
The path name to split.
kargs:
**apppre**:
Application prefix.
default := False
**keepsep**:
Keeps seprator, in particular the trailing.
default := False
**strip**:
Strip separators, in particular the trailing.
default := False
**stripquote**:
Strips *filesysobjects* triple-quotes.
default := False
**tpf**:
Target platform. Defines some fine-tuning,
e.g. for the file-URI, see *splitpathx*.
default := current OS.
Returns:
The splitted path, else *[]*.
Raises:
pass-through
"""
parts = []
_cur = ""
apppre = kw.get('apppre', False)
keepsep = kw.get('keepsep', False)
strip = kw.get('strip', False)
stripquote = kw.get('stripquote', False)
try:
tpf = rte2num[kw.get('tpf', RTE_FILEURI)]
except KeyError:
raise FileSysObjectsError("parameter tpf = " + str(kw.get('tpf')))
# controls updir: /../.. != ../..
# 0: no history
# 1: leading chain of RELATIVE up-dirs, keep them all
# 2: has a leading chain of RELATIVE up-dirs
state = 0
inclass = 0
for it in PATHSCANNER.finditer(p):
g = it.lastindex # PATHSCANNER ASCII_SC_CTRL
_le = it.end(g)
_isfirst = it.start(g) == 0
_islast = _le == it.endpos
_charbefore = it.start(g) - 1
if it.group(g):
x = it.group(g)
c = ASCII_SC_CTRL[g]
if c in (SC_CHRCLSSTART,):
inclass = 1
if not parts:
parts.append(x)
else:
parts[-1] += x
elif c in (SC_CHRCLSEND,):
inclass = 0
if not parts:
parts.append(x)
else:
parts[-1] += x
elif inclass and c not in (SC_DQUOTED, SC_SQUOTED,):
if parts:
parts[-1] += x
else:
parts.append(x)
#continue
elif c == SC_SLASH: # 1 * '/'
if not parts: # if first - absolute
parts.append('')
parts.append('')
continue
if strip:
if parts and len(parts) > 1 and not parts[-1]:
continue
elif _islast:
continue
elif it.endpos > it.end(g) and re.match(
r'[.][/\\\\]', it.string[it.end(g):]):
if parts[-1]:
parts.append('')
continue
elif re.match(r'^[/\\\\]*$', it.string[_le:]):
continue
parts.append('')
elif c == SC_SEPP: # n * '/' - always followed by a shlash
if strip:
continue
parts.extend(['' for i in range(len(x))]) # @UnusedVariable
elif c == SC_BSPAIR: # pairs of '\\'
if _isfirst:
parts.append('')
parts.append('')
continue
if strip:
if it.string[_charbefore] not in '/\\':
parts.append('')
else:
parts.append('')
parts.append('')
elif c == SC_SLASHPREB:
if strip:
continue
parts.extend(['' for i in range(len(x))]) # @UnusedVariable
elif c == SC_SEPW: # 1 * '\\'
if it.start(g) == 0:
parts.append('')
if strip and _islast:
continue
parts.append('')
continue
# FIXME:
if not strip or strip and it.string[_le] not in '/\\':
parts.append('')
continue
if strip:
if it.string[_charbefore] in '/\\' or _islast:
continue
parts.append('')
elif c in (SC_PAPP, SC_PDOM, SC_WDOM,
SC_FUNC, SC_FNONLOCAL,): # leading 2 * '' for '\\' or '//'
if apppre:
if SC_FNONLOCAL and tpf in (RTE_FILEURI0, 'fileuri0',):
parts.append('file://')
continue
elif SC_FNONLOCAL and tpf in (RTE_FILEURI4, 'fileuri4',):
parts.append('file:///')
parts.append('')
continue
elif SC_FNONLOCAL and tpf in (RTE_FILEURI5, 'fileuri5', RTE_FILEURI, 'fileuri',):
parts.append('file:////')
parts.append('')
continue
if SC_FUNC:
parts.append('file:///')
elif c in (SC_FMIN,):
if apppre:
if tpf in (RTE_FILEURI, 'fileuri',):
parts.append('file://')
elif tpf in (RTE_FILEURI0, 'fileuri0',):
parts.append('file:')
else:
parts.append('file://')
elif c in (SC_FABS,):
if apppre:
if tpf in (RTE_FILEURI0, 'fileuri0',):
parts.append('file:')
else:
parts.append('file://')
elif c in (SC_FSHORT,):
if apppre:
parts.append('file://')
elif c in (SC_DRIVENPSEP, SC_DRIVENWSEP,): # posix does not have drives
if strip: # keep drive-root
if not parts:
parts.append(x[:2])
else:
parts[-1] += x[:2]
parts.append('')
else:
if not parts:
parts.append(x[:2])
else:
parts[-1] += x[:2]
parts.extend(['' for i in range((len(x) - 2))]) # @UnusedVariable
elif c == SC_DRIVE:
if not parts:
parts.append(x)
else:
parts[-1] += x[:2]
elif c == SC_NULLDIR:
continue
elif c == SC_UPDIR:
if _isfirst:
parts.append('..')
parts.append('')
state = 1
continue
elif state == 1:
parts[-1] += '..'
parts.append('')
if len(it.string) > _le + 3 and it.string[_le:_le + 3] != '../' or \
len(it.string) > _le + 2 and it.string[_le:_le + 2] != '..':
state = 2
continue
if parts:
if not parts[0] and not parts[1]: # share/posix-app
if len(parts) > 5:
if not parts[-1]:
parts.pop()
parts.pop()
parts.append('')
elif len(parts) > 4:
parts.pop()
elif not parts[0]: # absolute path
if len(parts) > 2:
if not parts[-1]:
parts.pop(-2)
else:
parts.pop()
parts.append('')
elif len(parts) > 1:
parts.pop()
parts.append('')
else: # relative path
if not parts[-1]:
if len(parts) > 3 and parts[-2] != '..':
parts.pop()
parts.pop()
else:
parts.pop()
parts.append('..')
else:
parts.pop()
parts.append('')
continue
elif c in (SC_DQUOTED, SC_SQUOTED):
if stripquote:
_x = x[3:-3]
else:
_x = x
if not parts:
parts.append(_x)
else:
parts[-1] += _x
continue
elif not parts:
parts.append(x)
else:
parts[-1] += x
if strip and parts and not keepsep:
while parts and not parts[-1]:
parts.pop()
if apppre and parts != [] and not parts[0].startswith('file:'):
_x = _file_uri_scheme[_get_lead_sep.match(parts[0]).group(0)]
if _x == 'file':
if parts[0] == '':
if tpf in (RTE_FILEURI4, RTE_FILEURI5,) and parts[2] == '':
parts[0] = _file_uri_scheme[tpf]
elif tpf == RTE_FILEURI0:
parts[0] = _file_uri_scheme[RTE_FILEURI0]
else:
parts[0] = _file_uri_scheme[RTE_FILEURI]
else:
# does not recognize drives
raise PathError("file-uri requires absolute path, got: " + str(p))
elif _x == 'netapp':
if tpf in (RTE_FILEURI4, RTE_FILEURI5,):
parts[0] = _file_uri_scheme[tpf]
else:
parts[0] = _file_uri_scheme[RTE_FILEURI5]
else:
parts[0] = _file_uri_scheme[RTE_FILEURI]
return tuple(parts)
[docs]def splitpathx_posix(p, **kw):
"""Split pathnames containing 'literal', 'glob', and 're/regexpr'.
Serves the source platform POSIX and alike.
For the call interface see *splitpathx()*
Args:
**p**:
The path name to split.
kargs:
**apppre**:
Application prefix.
default := False
**keepsep**:
Keeps seprator, in particular the trailing.
default := False
**strip**:
Strip separators, in particular the trailing.
default := False
**stripquote**:
Strips *filesysobjects* triple-quotes.
default := False
**tpf**:
Target platform. Defines some fine-tuning,
e.g. for the file-URI, see *splitpathx*.
default := current OS.
Returns:
The splitted path, else *[]*.
Raises:
pass-through
"""
parts = []
_cur = ""
apppre = kw.get('apppre', False)
keepsep = kw.get('keepsep', False)
strip = kw.get('strip', False)
stripquote = kw.get('stripquote', False)
try:
tpf = rte2num[kw.get('tpf', RTE_FILEURI)]
except KeyError:
raise FileSysObjectsError("parameter tpf = " + str(kw.get('tpf')))
# controls updir: /../.. != ../..
# 0: no history
# 1: leading chain of RELATIVE up-dirs, keep them all
# 2: has a leading chain of RELATIVE up-dirs
state = 0
inclass = 0
for it in PATHSCANNER.finditer(p):
g = it.lastindex # PATHSCANNER ASCII_SC_CTRL
_le = it.end(g)
_isfirst = it.start(g) == 0
_islast = _le == it.endpos
_charbefore = it.start(g) - 1
if it.group(g):
x = it.group(g)
c = ASCII_SC_CTRL[g]
if c in (SC_CHRCLSSTART,):
inclass = 1
if not parts:
parts.append(x)
else:
parts[-1] += x
elif c in (SC_CHRCLSEND,):
inclass = 0
if not parts:
parts.append(x)
else:
parts[-1] += x
elif inclass and c not in (SC_DQUOTED, SC_SQUOTED,):
if parts:
parts[-1] += x
else:
parts.append(x)
#continue
elif c == SC_SLASH: # 1 * '/'
if not parts: # if first - absolute
parts.append('')
parts.append('')
continue
if strip:
if parts and len(parts) > 1 and not parts[-1]:
continue
elif _islast:
continue
elif it.endpos > it.end(g) and re.match(
r'[.]/', it.string[it.end(g):]):
if parts[-1]:
parts.append('')
continue
elif re.match(r'^[/\\\\]*$', it.string[_le:]):
continue
parts.append('')
elif c == SC_SEPP: # n * '/' - always followed by a shlash
if strip:
continue
parts.extend(['' for i in range(len(x))]) # @UnusedVariable
elif c == SC_BSPAIR: # pairs of '\\'
if _isfirst:
parts.append('')
parts.append('')
continue
if strip:
if it.string[_charbefore] not in '/\\':
parts.append('')
else:
parts.append('')
parts.append('')
elif c == SC_SLASHPREB:
if strip:
continue
parts.extend(['' for i in range(len(x))]) # @UnusedVariable
elif c == SC_SEPW: # 1 * '\\'
if it.start(g) == 0:
parts.append('')
if strip and _islast:
continue
parts.append('')
continue
# FIXME:
if not strip or strip and it.string[_le] not in '/\\':
parts.append('')
continue
if strip:
if it.string[_charbefore] in '/\\' or _islast:
continue
parts.append('')
elif c in (SC_PAPP, SC_PDOM, SC_WDOM,
SC_FUNC, SC_FNONLOCAL,): # leading 2 * '' for '\\' or '//'
if apppre:
if SC_FNONLOCAL and tpf in (RTE_FILEURI0, 'fileuri0',):
parts.append('file://')
continue
elif SC_FNONLOCAL and tpf in (RTE_FILEURI4, 'fileuri4',):
parts.append('file:///')
parts.append('')
continue
elif SC_FNONLOCAL and tpf in (RTE_FILEURI5, 'fileuri5', RTE_FILEURI, 'fileuri',):
parts.append('file:////')
parts.append('')
continue
if SC_FUNC:
parts.append('file://')
else:
parts.append('')
parts.append('')
parts.append('')
elif c in (SC_FMIN,):
if apppre:
if tpf in (RTE_FILEURI, 'fileuri',):
parts.append('file://')
elif tpf in (RTE_FILEURI0, 'fileuri0',):
parts.append('file:')
else:
parts.append('file://')
elif c in (SC_FABS,):
if apppre:
if tpf in (RTE_FILEURI0, 'fileuri0',):
parts.append('file:')
else:
parts.append('file://')
elif c in (SC_FSHORT,):
if apppre:
parts.append('file://')
elif c in (SC_DRIVENPSEP, SC_DRIVENWSEP,): # posix does not have drives
if not parts:
parts.append('')
if strip: # keep drive-root
parts[-1] += x[:2]
parts.append('')
else:
parts[-1] += x[:2]
parts.extend(['' for i in range((len(x) - 2))]) # @UnusedVariable
elif c == SC_DRIVE:
if not parts:
parts.append(x)
else:
parts[-1] += x[:2]
elif c == SC_NULLDIR:
continue
elif c == SC_UPDIR:
if _isfirst:
parts.append('..')
parts.append('')
state = 1
continue
elif state == 1:
parts[-1] += '..'
parts.append('')
if len(it.string) > _le + 3 and it.string[_le:_le + 3] != '../' or \
len(it.string) > _le + 2 and it.string[_le:_le + 2] != '..':
state = 2
continue
if parts:
if not parts[0] and not parts[1]: # share/posix-app
if len(parts) > 5:
if not parts[-1]:
parts.pop()
parts.pop()
parts.append('')
elif len(parts) > 4:
parts.pop()
elif not parts[0]: # absolute path
if len(parts) > 2:
if not parts[-1]:
parts.pop(-2)
else:
parts.pop()
parts.append('')
elif len(parts) > 1:
parts.pop()
parts.append('')
else: # relative path
if not parts[-1]:
if len(parts) > 3 and parts[-2] != '..':
parts.pop()
parts.pop()
else:
parts.pop()
parts.append('..')
else:
parts.pop()
parts.append('')
continue
elif c in (SC_DQUOTED, SC_SQUOTED):
if stripquote:
_x = x[3:-3]
else:
_x = x
if not parts:
parts.append(_x)
else:
parts[-1] += _x
continue
elif not parts:
parts.append(x)
else:
parts[-1] += x
if strip and parts and not keepsep:
while parts and not parts[-1]:
parts.pop()
if apppre and parts != [] and not parts[0].startswith('file:'):
_x = _file_uri_scheme[_get_lead_sep.match(parts[0]).group(0)]
if _x == 'file':
if parts[0] == '':
if tpf in (RTE_FILEURI4, RTE_FILEURI5,) and parts[2] == '':
parts[0] = _file_uri_scheme[tpf]
elif tpf == RTE_FILEURI0:
parts[0] = _file_uri_scheme[RTE_FILEURI0]
else:
parts[0] = _file_uri_scheme[RTE_FILEURI]
else:
# does not recognize drives
raise PathError("file-uri requires absolute path, got: " + str(p))
elif _x == 'netapp':
if tpf in (RTE_FILEURI4, RTE_FILEURI5,):
parts[0] = _file_uri_scheme[tpf]
else:
parts[0] = _file_uri_scheme[RTE_FILEURI5]
else:
parts[0] = _file_uri_scheme[RTE_FILEURI]
return tuple(parts)
[docs]def splitpathx(spath, **kw):
"""Split pathnames into a list/tuple of items for each directory.
For example ::
In [15]: filesysobjects.paths.splitpathx("/a/b/c")
Out[15]: ('', 'a', 'b', 'c')
In [16]: filesysobjects.paths.splitpathx("x:/a/b/c")
Out[16]: ('x:', 'a', 'b', 'c')
In [17]: filesysobjects.paths.splitpathx("x:\\a\\b\\c")
Out[17]: ('x:', 'a', 'b', 'c')
For *URI*s and search paths refer to *splitapppathx*.
Supports directory name types as 'literal', 'glob', and 're/regexpr'.
Supports the same syntax elements as *normpathx*, while it is prepared
to simple application of the built-in *join()* with *os.sep*.
Is not aware of application tags except Network-Shares,
Posix-Applications, and file-URI.
**REMARK**:
The intention is to replace the 'str.split()' method
for the split of the path parts, thus this is different to the
method 'os.path.split()'.
Args:
**spath**:
Path to split.
kw:
**apppre**:
Application prefix, when 'True' the scheme is included,
else dropped. ::
apppre=(True|False)
**keepsep**:
Modifies the behavior of 'strip' parameter.
If 'False', the trailing separator is dropped. ::
splitpathx('/a/b', keepsep=False) => ('', 'a', 'b')
splitpathx('/a/b/', keepsep=False) => ('', 'a', 'b')
for 'True' trailing separators are kept as directory
marker::
splitpathx('/a/b', keepsep=True) => ('', 'a', 'b')
splitpathx('/a/b/', keepsep=True) => ('', 'a', 'b', '')
**pathsep**:
Optional search path separator.
posix: ':'
win32: ';'
default := os.pathsep
**sep**:
Optional path separator.
posix: '/'
win32: '\\'
default := os.path.sep
**strip**:
Removes null-entries.
default := False
**stripquote**:
Removes paired triple-quotes of protected/masked
string sections. ::
"/a/'''head:'''/c" => "/a/head:/c"
default := False
**spf**:
Source platform, defines the input syntax domain.
For the syntax refer to the API in the manual at :ref:`spf <OPTS_SPF>`.
For additi0onal details refer to
:ref:`tpf and spf <TPF_AND_SPF>`,
`paths.getspf() <paths.html#getspf>`_,
:ref:`normapppathx() <def_normapppathx>`,
`normpathx() <paths.html#normpathx>`_.
**tpf**:
Target platform. Even though the splitted form of a resource path
is basically canonical, some details of the specifications for
slightly variations requires the granular fine-tuning. Thus defines
in case of ambiguity the *scheme* for *apppre=True*. Accepts the
following values only. ::
tpf := (
RTE_FILEURI0 | 'fileuri0' # RFC8089 - minimal
| RTE_FILEURI4 | 'fileuri4' # RFC8089 - 4-slash UNC/POSIX-app
| RTE_FILEURI5 | 'fileuri5' # RFC8089 - 5-slash UNC/POSIX-app
| RTE_FILEURI | 'fileuri' # RFC8089 - canonical
)
Returns:
A list containing the path split into it's components. The list
is prepared to be concatenated by *join()*.
The interface is aware of the *os.path.sep* character, but a
present regular expression may span multiple path components,
which have to be handled dynamically when applying the path
pattern e.g. by *findpattern*.
Raises:
pass-through
"""
try:
spf = rte2num[kw.get('spf', RTE_FILEURI)]
except KeyError:
raise FileSysObjectsError("parameter error: spf =" + str(kw.get('spf')))
if spf & RTE_WIN32:
return splitpathx_win(spath, **kw)
return splitpathx_posix(spath, **kw)
[docs]def normpathx(spath, **kargs):
"""Normalize paths, similar to 'os.path.normpath()' - with
optional extensions paths with basic application schemes
and search paths, dos-drives, and the split of paths into
directories. The various representations could be converted
on-the-fly. ::
smb, cifs, file, http/https, UNC, POSIX-network apps
For advanced processing of application schemes refer to
*normapppathx()* and 'splitapppathx()'. The path could
include regular expressions *re* and *glob*, literals
and masked parts.
* regular expressions
The supported regular expressions are native Python regular
expressions as supported by 're' with support of expressions
spanning multiple directories.
* globs
Standard module *glob*.
* literals:
Any literal path.
Regular expressions and globs could be masked as quoted strings,
which are kept unchanged.
The *normpathx* provides the features as simple interface for the
normalization across multiple platforms. The companion interface
provide various features, e.g. the *escapepathx* and *unescapepathx*
of path names including *re* and *glob*.
Args:
**spath**:
A single path entry - no valid 'os.pathsep'. In case of
required search path including semantic 'os.pathsep'
use 'splitapppathx()'.
kargs:
**apppre**:
Application prefix.
default:=False
**keepsep**:
Keeps significant seperators, in particular
the trailing path separator 'sep', and the
trailing search path 'pathseparator'.
**strip**:
Strips redundancies from path names, ::
"a/.//./b/c/../" => "a/b"
see related 'keepsep' ::
"a/.//./b/c/../" => "a/b/"
default:=True
**stripquote**:
Removes paired triple-quotes of protected/masked
string sections. ::
"/a/'''head:'''/c" => "/a/head:/c"
default := False
**spf**:
Source platform, defines the input syntax domain.
For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`.
For additi0onal details refer to
:ref:`tpf and spf <TPF_AND_SPF>`,
`paths.getspf() <paths.html#getspf>`_,
:ref:`normapppathx() <def_normapppathx>`,
`normpathx() <paths.html#normpathx>`_.
**tpf**:
Target platform, defines the output syntax domain.
For the syntax refer to the API in the manual at :ref:`tpf <OPTS_TPF>`.
For additi0onal details refer to
:ref:`tpf and spf <TPF_AND_SPF>`,
`paths.gettpf() <paths.html#gettpf>`_,
:ref:`normapppathx() <def_normapppathx>`,
`normpathx() <paths.html#normpathx>`_.
**pathsep**:
Changes path separator for the source platform. ::
pathsep := (
(: | ;) # replaces by ':' or ';'
| <keyword>
| <#enum>
)
Returns:
Normalized path.
Raises:
PathError
pass-through
"""
strip = kargs.get('strip', True)
tpf = kargs.get('tpf', False)
apppre = kargs.get('apppre', False)
#
# target platform
#
# use system interfaces
if tpf in ('local', RTE_LOCAL,):
return os.path.normpath(spath)
elif tpf in ('cnp', RTE_CNP,):
return posixpath.normpath(spath)
elif tpf in ('cnw', RTE_CNW,):
return ntpath.normpath(spath)
_tsep, _tpsep, tpf, _tpfn, _apre = gettpf(tpf, apppre=apppre)
#
# sourceplatform
#
# recognized pathsep, empty is no replacement
spf = kargs.get('spf', False)
_sep, _psep, spf, _spfn = getspf(spf)
# recognized pathsep, empty or False is no replacement
_p = kargs.get('pathsep')
if _p:
try:
_psep = rte_map[_p][1]
except KeyError:
raise PathError("unknown pathseparator: " + str(kargs.get('pathsep')))
try:
cb = sub_path_calls[tpf]
except KeyError:
raise PathError("Platform callback: " + str(tpf))
kw = {}
kw['apppre'] = apppre
kw['keepsep'] = kargs.get('keepsep', False)
kw['stripquote'] = kargs.get('stripquote', False)
state = ([], [],)
if strip:
try:
_m = _NULLDIRS.match(spath)
except TypeError:
_m = _NULLDIRS.match(escapepathx(spath, force=True))
# except sre_constants.error:
# _m = _NULLDIRS.match(escapepathx(spath, force=True))
if _m:
state = []
kw['tpf'] = _tpfn
kw['pathsep'] = _psep
kw['sep'] = _sep
kw['strip'] = strip
return _tsep.join(splitpathx(spath, **kw))
else:
return PATHSCANNER.sub(
lambda x: cb(x, _spfn, strip, _psep, state, **kw), spath)
else:
if _psep is False:
return PATHSCANNER.sub(lambda x: cb(x, _spfn, strip, state, **kw), spath)
else:
return PATHSCANNER.sub(lambda x: cb(x, _spfn, strip, _psep, state, **kw), spath)