Source code for filesysobjects.paths

# -*- coding: utf-8 -*-
"""The 'filesysobjects.paths' module provides operations on static file
resource paths.
"""
from __future__ import absolute_import
from __future__ import print_function

import os
import sre_constants
import re
import posixpath
import ntpath

from filesysobjects import PathError, \
    ISSTR, \
    gettpf, getspf, \
    rte2num, rte_map, \
    V3K, RTE, RTE_POSIX, RTE_WIN32, FileSysObjectsError, \
    RTE_LOCAL, RTE_CNP, RTE_CNW, \
    RTE_FILEURI0, RTE_FILEURI4, RTE_FILEURI5, RTE_FILEURI

__author__ = 'Arno-Can Uestuensoez'
__license__ = "Artistic-License-2.0 + Forced-Fairplay-Constraints"
__copyright__ = "Copyright (C) 2010-2016 Arno-Can Uestuensoez" \
                "@Ingenieurbuero Arno-Can Uestuensoez"
__version__ = '0.1.20'
__uuid__ = "4135ab0f-fbb8-45a2-a6b1-80d96c164b72"

__docformat__ = "restructuredtext en"

#
# for test and development
# _mydebug = False

#*
# *** static compiled strings ***
#*

# pathname seperator
if RTE & RTE_WIN32:
    OSSEP = os.path.sep  #: os separator
    OSSEPCLS = '[\\\\]'  #: character class os separator
    OSSEPCLSN = '[^\\\\]'  #: character class without separator
else:
    OSSEP = os.path.sep  #: os separator
    OSSEPCLS = '[/]'  #: character class os separator
    OSSEPCLSN = '[^/]'  #: character class without separator

rebaseflags = re.X  # @UndefinedVariable
if V3K:
    rebaseflags |= re.ASCII  # @UndefinedVariable

#
# prohibited characters for optional validation - see strict options
#
INVALIDCHARSWIN = re.compile(r'[:<>*?]')  #: windows
INVALIDCHARSPOSIX = re.compile(r'\0')  #: posix
INVALIDCHARS = re.compile(r'[:<>*?\0]')  #: super position of both

#: maps unambiguous escape characters to escape sequences
ESC_CHAR_MAP = {
    '\a': "\\a",
    '\b': "\\b",
    '\f': "\\f",
    '\n': "\\n",
    '\r': "\\r",
    '\t': "\\t",
    '\v': "\\v",
}

#: list of special escape characters
ESC_CHARS = '[\a\b\f\n\r\t\v]'

#: maps escape characters for escape sequences to unescape
UNESC_CHAR_MAP = {
    'a': "\a",
    'b': "\b",
    'f': "\f",
    'n': "\n",
    'r': "\r",
    't': "\t",
    'v': "\v",
}

# pylint: disable-msg=W0105

# [MS-DTYP] - 2.2.57 - UNC definitions
# pchar = %x20-21 / %x23-29 / %x2D-2E / %x30-39 / %x40-5A / %x5E-7B / %x7D-FF
# pchar = r'[\x20-\x21\x23-\x29\x2D-\x2E\x30-\x39\x40-\x5A\x5E-\x7B\x7D-\xFF]'
# pchar="""[^\x00-\x1f\x22\x2a-\x2c\x2f\x3a-\x3f\x5b-\x5d\x7c]"""

#
# *** splits environment variables ***
#
if RTE & RTE_WIN32:
    _ENV_SPLIT = re.compile(r"""
       (
           (([^%]*?)([%][a-zA-Z0-9_]+[%]))           # 2: defined without brace
         | (([^%]*?)([%][a-zA-Z0-9_]+[^%]?))         # 5: ERROR:
         | ((.*)())                                  # 8: any
       )
       """, rebaseflags)
    """Split-out environment variables for substitution."""

    _ENV_SPLITg = [
        2,
        5,
        8,
    ]
    """Entry points into sub strings environment variables and literals."""
else:
    _ENV_SPLIT = re.compile(r"""
       (
           (([^$]*?)([$][{][a-zA-Z0-9_]+[}]))        # 2: defined with brace
         | (([^$]*?)([$][a-zA-Z0-9_]+[;]?))          # 5: defined without brace
         | (([^$]*?)([$][{][a-zA-Z0-9_]+[^}]?))      # 8: ERROR:
         | ((.*)())                                  # 11: any
       )
       """, rebaseflags)
    """Split-out environment variables for substitution."""

    _ENV_SPLITg = [
        2,
        5,
        8,
        11,
    ]
    """Entry points into sub strings environment variables and literals."""

# pylint: enable-msg=W0105
if V3K:
    pathflags = rebaseflags | re.M | re.ASCII  # @UndefinedVariable
else:
    pathflags = rebaseflags | re.M  # @UndefinedVariable

#: First stage regexpr scanner for 'normpathx', also used in 'escapepathx'.
PATHSCANNER = re.compile(r"""
    (["]{3}[\x01-\xFF]*?["]{3})       # 1  quoted string by 3 double quotes(") - similar to Python
    |([']{3}[\x01-\xFF]*?[']{3})      # 2  quoted string by 3 single quotes(') - similar to Python
    |([\a\b\f\n\r\t\v])               # 3  python escape char - without separate backslash
    |(?<=[^\\\\]){0,1}([\\\\][u][0-9]{4})
                                      # 4  unicode-16
    |(?<=[^\\\\]){0,1}([\\\\][U][0-9]{8})
                                      # 5  unicode-32
    |^(file://[/]{0,1}[/\\\\]{2})(?![/\\\\])
                                      # 6  share/netapp - rfc8089, [MS-DTYP]

    |^(file://)(?![/\\\\])            # 7  non-local - rfc8089 / maps to Posix-App
    |^(file:)(?=/[^/\\\\])            # 8  min - rfc8089
    |^(file://)(?=/)                  # 9  absolute path - rfc8089 rfc1738
    |^(file:)(?=[a-zA-Z]:)            # 10 short-form - rfc8089 - DOS drive

    |^(//)(?=[^\\\\/"]+[\\\\/][\x20-\x21\x23-\x29\x2D-\x2E\x30-\x39\x40-\x5A\x5E-\x7B\x7D-\xFF]{1,80}[\\\\/]*)
                                      # 11 portable UNC

    |^(//)(?=[^/\\\\][^/]*/.+)        # 12 pure posix - with the additional constraint first != [/\\]
    |^([\\\\][\\\\])(?=[^\\\\/"]+[\\\\/][\x20-\x21\x23-\x29\x2D-\x2E\x30-\x39\x40-\x5A\x5E-\x7B\x7D-\xFF]{1,80}[\\\\/]*)
                                      # 13 UNC

    |^([a-zA-Z]:)(?=\\a|\\b|\\f|\\n|\\r|\\t|\\v)
                                      # 14 drive following escaped special escape character

    |^([a-zA-Z]:[\\\\]+)(?![\a\b\f\n\r\t\v])
                                      # 15 drive following 1..n * '[\\]'
    |^([a-zA-Z]:[/]+)                 # 16 drive following 1..n * '[/]'
    |^([a-zA-Z]:)                     # 17 drive only, or relative path
    |(?<=[;:])([a-zA-Z]:[\\\\]+)      # 18 drive following 1..n * '[\\]'
    |(?<=[;:])([a-zA-Z]:[/]+)         # 19 drive following 1..n * '[/]'

    |(?<=[;:])([a-zA-Z]:)             # 20 drive only, or relative path
    |(?<=[\\\\])([/]+)                # 21 os. sep - posix pathname separators
    |(/+)(?=/)                        # 22 n * posix dir-separators
    |(/)                              # 23 1 * posix dir-separators
    |(?<=[\\\\/])([.][.][\\\\/])      # 24 'up-dir: /../ '
    |^([.][.][\\\\/]+)                # 25 'up-dir: ^../ '
    |(?<=[\\\\/])([.][.])$            # 26 'up-dir: /..$ '

    |(?<=[/\\\\])([.][/\\\\])         # 27 'null-dir: \.\ /./'
    |^([.][/\\\\]+)                   # 28 'null-dir: .\ ./'
    |(?<=[/\\\\])([.])$               # 29 'null-dir: \. /.'

    |([\\\\][\\\\])                   # 30 bs pairs
    |([\\\\])(?=\n)                   # 31 single bs - escape '\n'
    |([\\\\])(?=\n)                   # 32 single bs - escape '\n'
    |([\\\\])(?![\\\\])               # 33 single bs - non-escape
    |([:]+)                           # 34 posix path-separators
    |([;]+)                           # 35 win path-separators
    |(?<![^\\\\][\\\\])(\[)           # 36 start char class
    |(?<![^\\\\][\\\\])(\])           # 37 end char class
    |(?<![^\\\\][\\\\])(')            # 38 escaped '
    |(?<![^\\\\][\\\\])(")            # 39 escaped "
    |([^\\\\/\a\b\f\n\r\t\v:;"'\[\]]+)# 40
    |([^\\\\/\a\b\f\n\r\t\v:;]+)(?!.*["'\[\]])
                                      # 41
    |(.)                              # 42 # free char
    """, pathflags)

#
# map matches to actual control sequences
#
SC_BSPAIR = 1000  # '\' pair
SC_CIFS = 1010  # cifs:
SC_CRMASK = 1020  # masked '\n'
SC_DOIT = 1030  # out of range
SC_DQUOTED = 1040  # "
SC_DRIVE = 1050  # dos drive letter - or a directory on Posix !!!
SC_DRIVENPSEP = 1060  # dos drive letter following n * posix_sep
SC_DRIVENWSEP = 1070  # dos drive letter following n * win_sep
SC_DUMMY = 1080  # dummy
SC_EACHOF = 1090  # assure for each
SC_ESCCHAR = 1100  # \[abf...]
SC_FABS = 1110  # file:///path - absolute path - rfc8089 rfc1738
SC_FILE = 1120  # file:
SC_FMIN = 1130  # file:/path - min rfc8089 - Appendix B
SC_FNONLOCAL = 1140  # file://host/path  non-local - rfc8089 - Appendix B / maps to Posix-App
SC_FSHORT= 1150  # file:<dos-drive>:path - short-form - rfc8089
SC_FUNC = 1160  # file:///// | file://// - share/netapp - rfc8089 - Appendix E.3.2
SC_HTTP = 1170  # http:
SC_KEEP = 1180  # keep literally
SC_MASKALL = 1190  # keep literally
SC_NULLDIR = 1200  # '\.\' '/./'
SC_PAPP = 1210  # Posix-Net-App
SC_PDOM = 1220  # Posix-Net-App prefix-compliance to SC_WDOM [MS-DTY]
SC_PSEPP = 1230  # ':'
SC_PSEPW = 1240  # ';'
SC_REPLACE = 1250  # replace an equal set of chars e.g. '/' or '\'
SC_SEPP = 1260  # n * Posix path.sep
SC_SEPW = 1270  # 1 * win path.sep
SC_SLASH = 1280  # 1 * '/'
SC_SLASHPREB = 1290  # '\' + '/'
SC_SMB = 1300  # smb:
SC_SQUOTED = 1310  # '
SC_TOEVEN = 1320  # assure count is even
SC_U16 = 1330  # unicode-16
SC_U16R = 1340  # unicode-16 raw
SC_U32 = 1350  # unicode-32
SC_U32R = 1360  # unicode-32 raw
SC_UNC = 1370  # unc:
SC_UPDIR = 1380  # '/../'
SC_WDOM = 1390  # Win-Domain
SC_CHRCLSSTART = 1400
SC_CHRCLSEND = 1410
SC_ANYONECHR = 1420
SC_ESCAPEDSQUOT = 1430
SC_ESCAPEDDQUOT = 1440


#: Context maps of item indexes corresponding to group indexes onto constants.
#: Performance enhancement by padding, in order to avoid hash calculations via a dictionary.
ASCII_SC_CTRL = [
    0,  # all by *re*
    SC_DQUOTED,      # 1  string
    SC_SQUOTED,      # 2  string
    SC_ESCCHAR,      # 3  python escape sequences
    SC_U16,          # 4  unicode-16
    SC_U32,          # 5  unicode-32

    SC_FUNC ,        # 6  file:///// | file://// - share/netapp - rfc8089 - Appendix E.3.2
    SC_FNONLOCAL,    # 7  rfc8089 - Appendix B
    SC_FMIN,         # 8  rfc8089 - Appendix B
    SC_FABS,         # 9  absolute path- rfc8089, rfc1738
    SC_FSHORT,       # 10 short-form - rfc8089

    SC_PDOM,         # 11  '//' - UNC-Compatible
    SC_PAPP,         # 12 '//' - pure POSIX compliance
    SC_WDOM,         # 13 '\\\\'

    SC_DRIVE,        # 14  DOS drive letter - no following sep

    SC_DRIVENWSEP,   # 15  DOS drive letter with n * win_sep
    SC_DRIVENPSEP,   # 16  DOS drive letter with n * possix_sep

    SC_DRIVE,        # 17  DOS drive letter - no following sep

    SC_DRIVENWSEP,   # 18 DOS drive letter with n * win_sep
    SC_DRIVENPSEP,   # 19 DOS drive letter with n * posix_sep

    SC_DRIVE,        # 20 DOS drive letter - no following sep
    SC_SLASHPREB,    # 21 '\/'
    SC_SEPP,         # 22 n * '/'
    SC_SLASH,        # 23 1 * '/'
    SC_UPDIR,        # 24 'updir/..'
    SC_UPDIR,        # 25 'updir/..'
    SC_UPDIR,        # 26 'updir/..'

    SC_NULLDIR,      # 27 'nulldir/.'
    SC_NULLDIR,      # 28 'nulldir/.'
    SC_NULLDIR,      # 29 'nulldir/.'

    SC_BSPAIR,       # 30 a '\' pair of 1..n
    SC_CRMASK,       # 31 a masked <CR> - r'\\n', else difficult to detect by regexpr
    SC_CRMASK,       # 32 same as SC_CRMASK, but raw - '\\n',
    SC_SEPW,         # 33 '\\',
    SC_PSEPP,        # 34 ':',
    SC_PSEPW,        # 35 ';',
    SC_CHRCLSSTART,  # 36
    SC_CHRCLSEND,    # 37
    SC_ESCAPEDSQUOT, # 38
    SC_ESCAPEDDQUOT, # 39
    SC_DOIT,         # 40
    SC_DOIT,         # 41
    SC_DOIT,         # 42
]
#: Checks for contained dot-directory names in paths, controls parser mode.
_NULLDIRS = re.compile(
    r'.*([/\\\\][.]{1,2}[/\\\\]|[/\\\\][.][.]$|^[.][.][/\\\\])')

_file_uri_scheme = {
    '':  'file',
    '/':  'file',
    '\\':  'file',
    '//':  'netapp',
    '\\\\':  'netapp',
    'fileuri':  'file://',
    'fileuri0': 'file:',
    'fileuri4': 'file:///',
    'fileuri5': 'file:////',
    RTE_FILEURI0: 'file:',
    RTE_FILEURI4: 'file:///',
    RTE_FILEURI5: 'file:////',
    RTE_FILEURI: 'file://',
}
_file_uri_scheme_num = {
    'fileuri':  RTE_FILEURI,
    'fileuri0': RTE_FILEURI0,
    'fileuri4': RTE_FILEURI4,
    'fileuri5': RTE_FILEURI5,
    RTE_FILEURI0: RTE_FILEURI0,
    RTE_FILEURI4: RTE_FILEURI4,
    RTE_FILEURI5: RTE_FILEURI5,
    RTE_FILEURI: RTE_FILEURI,
}

_get_lead_sep = re.compile(r'(/*|[\\\\]*)')


#: short scanner for unescape
PATHSCANNER_UNESC = re.compile(r"""
    (["]{3}[\x01-\xFF]*?["]{3})       #  1  quoted string by 3 double quotes(") - similar to Python
    |([']{3}[\x01-\xFF]*?[']{3})      #  2  quoted string by 3 single quotes(') - similar to Python

    |(?<=[\\\\])([\a\b\f\n\r\t\v])    #  3  python escape char with leading backslash
    |(?<=[^\\\\])([\\u][0-9]{4})      #  4  unicode-16
    |(?<=[^\\\\])([\\U][0-9]{8})      #  5  unicode-32
    |^([\\\\][\\\\])(?=[^\\\\/"]+[\\\\/][\x20-\x21\x23-\x29\x2D-\x2E\x30-\x39\x40-\x5A\x5E-\x7B\x7D-\xFF]{1,80}[\\\\/]*)
                                      #  6 UNC
    |([\\\\][abfnrtv])                #  7  python escaped char with leading backslash
    |([\\\\][\\\\])(?![abfnrtvuU])    #  8 bs pairs - free - not escaping
    |([\\\\][\\\\][abfnrtvuU])        #  9 bs pairs - escaping an escape char
    |([\\\\])(?![\\\\])               # 10 single bs - non-escape
    |([\\\\]['])                      # 11 special escapes - POSIX + Windows
    |([\\\\]["])                      # 12 special escapes - POSIX only (+ URI ?)
    |(?<![^\\\\][\\\\])(\[)           # 13 start char class
    |(?<![^\\\\][\\\\])(\])           # 14 end char class
    |([^\\\\/\a\b\f\n\r\t\v:;"'\[\]]+)# 15
    |([^\\\\/\a\b\f\n\r\t\v:;]+)(?!.*["'\[\]])
                                      # 16
    |(.)                              # 17 # free char
    """, pathflags)

[docs]def sub_keep(it, spf=RTE, strip=True, pathsep=''): """To be used by re.sub() - keeps mixed. """ g = it.lastindex # PATHSCANNER ASCII_SC_CTRL if it.group(g): # x = it.group(g) # c = ASCII_SC_CTRL[g] # if ASCII_SC_REPLACE_KEEP[g] == SC_KEEP: # pass # return x return it.group(g)
_esc_state_shared = []
[docs]def sub_esc(it, spf=RTE, strip=False, pathsep='', state=_esc_state_shared, **kw): """To be used by re.sub() - escapes backslashes and non-printable characters. Args: **it**: iterator from *re.sub*. **spf**: Source platform, defines the input syntax domain. For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`. For additi0onal details refer to :ref:`tpf and spf <TPF_AND_SPF>`, `paths.getspf() <paths.html#getspf>`_, :ref:`normapppathx() <def_normapppathx>`, `normpathx() <paths.html#normpathx>`_. **strip**: **pathsep**: **state**: kw: **charback**: Escapes all backslashes within character classes. Could be combined with *force* and *freeback*. **force**: Escapes all back-slashes, else the special characters only. Unix processing of DOS paths requires all separators to be escaped. default := False **freeback**: Escapes backslashes outside character classes. Could be combined with *charback*. Returns: Converted format win. E.g. :: C:\\Windows\\system32\\cmd.exe;C:\\Windows\\system32\\notepad.exe Raises: pass-through """ try: _all = kw['force'] # old - temporary for migration except KeyError: _all = kw.get('all', False) _charback = kw.get('charback', False) _freeback = kw.get('freeback', False) g = it.lastindex # PATHSCANNER ASCII_SC_CTRL _le = it.end(g) if it.group(g): x = it.group(g) c = ASCII_SC_CTRL[g] # FIXME: check for null-dir on 'win' if c == SC_BSPAIR: # pairs of '\\' if not state: if _all: # escape anything - blindly return 2 * x elif _freeback: # free standing only return 2 * x elif _charback: # within character classes only return 2 * x elif c in (SC_CHRCLSSTART, ): # char class start state.append('') elif c in (SC_CHRCLSEND, ): # char class end r = ''.join(state) + x if V3K: state.clear() else: if state: for i in range(len(state)): state.pop(-1) return r elif c in (SC_SEPW, ): # 1 * '\' - win treats '/' equal # if _all and not state: # escape anything - blindly # return 2 * x if not state: if _all: # escape anything - blindly return 2 * x elif _freeback: # free standing only return 2 * x elif _charback: # within character classes only return 2 * x elif c in (SC_WDOM, ): # if _all and not state: # escape anything - blindly # return 2 * x if not state: if _all: # escape anything - blindly return 2 * x elif _freeback: # free standing only return 2 * x elif _charback: # within character classes only return 2 * x elif c == SC_DRIVENWSEP: if not state: if _all : # escape anything - blindly return x[:2] + 2 * x[2:] if (len(x) - 2) % 2: return x + '\\' elif c in (SC_ESCCHAR, ): if not state: return ESC_CHAR_MAP[x] elif c in (SC_CRMASK, ): if not state: return '\\\\' elif c == SC_NULLDIR: if not state: if g == 27: if x[-1] == '\\': return x + '\\' elif g == 28: if x[-1] == '\\': return x + '\\' elif g == 29: return x return x
_unesc_state_shared = []
[docs]def sub_unesc(it, _t=None, spf=None, state=_unesc_state_shared, **kw): """To be used by re.sub() - unescapes backslashes and non-printable characters. Args: **it**: iterator from *re.sub*. **_t**: **spf**: Source platform, defines the input syntax domain. For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`. For additi0onal details refer to :ref:`tpf and spf <TPF_AND_SPF>`, `paths.getspf() <paths.html#getspf>`_, :ref:`normapppathx() <def_normapppathx>`, `normpathx() <paths.html#normpathx>`_. **state***: kw: **all** or **force**: Unescapes all back/slashes, else the special characters only. Unix processing of DOS paths requires all separators to be escaped and therefore eventually to be unescaped too. default := False Returns: Converted format win. E.g.:: C:\\Windows\\system32\\cmd.exe;C:\\Windows\\system32\\notepad.exe Raises: pass-through """ try: _all = kw['force'] # old - temporary for migration except KeyError: _all = kw.get('all', False) g = it.lastindex # PATHRULES ASCII_SC_CTRL _le = it.end(g) if it.group(g): x = it.group(g) if g == 3: if _all: return '\\' + x elif g == 6: if _all: return 2 * x elif g == 7: # escaped special char '\\[ab....]' return UNESC_CHAR_MAP[x[1]] elif g == 8: # free pairs of '\\' - non/escaping if _all: return '\\' elif g == 9: # single '\' following escaped char '\[ab....]' return x[1:] elif g == 11: # escaped ' if _all: return x[1] elif g == 12: # escaped " if _all: return x[1] elif state: if len(x) > 1: return state.pop() + x[1:] return state.pop() return x
[docs]def sub_posix(it, spf=RTE, strip=True, pathsep=':', state=None, **kw): """To be used by re.sub() - converts to posix. Replaces '[/\\]' with '/', and '[;:]' with ':'. Posix does not have drives, just ignores the drive-property, assumes these are ordinary characters. When drives are required as syntax tokens refer to 'Cygwin'. Args: **it**: Iterator from *re.sub*. **spf**: Source platform, defines the input syntax domain. For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`. For additi0onal details refer to :ref:`tpf and spf <TPF_AND_SPF>`, `paths.getspf() <paths.html#getspf>`_, :ref:`normapppathx() <def_normapppathx>`, `normpathx() <paths.html#normpathx>`_. **strip**: Strip redundancies. **pathsep**: Input separator 'pathsep' to be be replaced. :: pathsep := ':' ';' '' One or more are allowed, is used as a set containment of replacement checks. Empty string disables the replacement. **state**: Compile states. kw: **apppre**: Application prefix. **keepsep**: Keeps seprator, in particular the trailing. **stripquote**: Strips *filesysobjects* triple-quotes. Returns: Converted format posix. E.g.:: c:/Windows/system32/cmd.exe:c:/Windows/system32/notepad.exe Raises: pass-through """ apppre = kw.get('apppre', False) keepsep = kw.get('keepsep', False) stripquote = kw.get('stripquote', False) # tracks multiple path separators, when these are of mixed # platforms('/', '\') to be normalized if state[0]: sx = state[0].pop() else: sx = 0 # ignores character classes if state[1]: inchrclass = True else: inchrclass = False # # frequently used values # g = it.lastindex # PATHSCANNER ASCII_SC_CTRL _le = it.end(g) _isfirst = it.start(g) == 0 _islast = _le == it.endpos _charbefore = it.start(g) - 1 if it.group(g): x = it.group(g) c = ASCII_SC_CTRL[g] if c in (SC_CHRCLSSTART, ): # char class start state[1].append('') elif c in (SC_CHRCLSEND, ): # char class end if V3K: state[1].clear() else: if state[1]: for i in range(len(state[1])): state[1].pop(-1) elif inchrclass: if c in (SC_DQUOTED, SC_SQUOTED): if stripquote: return x[3:-3] elif c in (SC_DQUOTED, SC_SQUOTED): if stripquote: return x[3:-3] elif c == SC_BSPAIR: # pairs of '\\' if strip: if _isfirst: return '/' _pre = it.string[_charbefore] if _pre in '/\\': # already done if sx: return '/' return '' elif _pre is ':': return '/' elif _islast or it.string[_le] is ':': if not keepsep: return '' return '/' elif it.string[_le] in '\/': if it.string[_charbefore] is ':': state[0].append(2) else: state[0].append(1) return '' return '/' else: return '//' elif c == SC_SLASH: if strip: if (_isfirst and _islast): return '/' elif _islast: if sx == 2: return '/' elif not keepsep: return '' elif it.string[_le] is ':': if _isfirst or it.string[_charbefore] is ':': return '/' if not keepsep: return '' return '/' elif c == SC_SLASHPREB: if strip: if sx: return '/' return '' return x elif c == SC_SEPP: # n * '/' if strip: if _isfirst or it.string[_charbefore] is ':': sx = state[0].append(2) return '' else: return '/' * len(x) elif c == SC_SEPW: # n * '/' if _isfirst: # is app-pre '//' return '/' if it.string[_charbefore] is ':': return '/' if strip: if _islast or it.string[_le] is ':': if sx == 2: return '/' return '' if sx: return '/' if it.string[_charbefore] in '/\\': return '' else: return '/' * len(x) if it.endpos > _le: if it.string[_le] == '\\': if it.endpos > _le + 1 and it.string[_le + 1] == '\n': pass else: if strip: return '' return '/' elif c == SC_PSEPP: if x[0] not in pathsep: # 1..n return x if strip: if _islast: # drop trailing os.pathsep return '' return ':' else: return ':' * len(x) elif c == SC_PSEPW: # native return x # mixed if x[0] not in pathsep: # 1..n return x if spf & RTE_POSIX: # for posix node names an ordinary character return x if strip: if _islast: # drop trailing os.pathsep return '' if len(it.string) > _le: if it.string[_le] == ':': return '' return ':' else: return ':' * len(x) elif c in (SC_FUNC,): # verified by char contents if apppre: return 'file://///' return '//' elif c is SC_UPDIR: return '../' elif c is SC_FNONLOCAL: # verified by char contents if apppre: return 'file://' return '//' elif c in (SC_FMIN, SC_FABS, SC_FSHORT,): if not apppre: return '' elif c in (SC_WDOM, SC_PDOM, SC_PAPP,): # verified by char contents if strip and it.string[_le] in ':': return '/' elif strip and _islast: return '' return '//' elif c in (SC_DRIVENPSEP, SC_DRIVENWSEP): # posix does not have drives if strip: # keep drive-root return x[0] + ':/' return x[:2] + '/' * (len(x) - 2) elif c == SC_DRIVE: return x elif c == SC_NULLDIR: return '' elif c == SC_FILE: if apppre: return x return '' elif c == SC_UNC: if apppre: return x return '//' return x
def sub_uri(it, spf=RTE, strip=True, pathsep='', state=None, **kw): """To be used by re.sub() - converts to uri. Args: **it**: Iterator from *re.sub*. **spf**: Source platform, defines the input syntax domain. For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`. For additi0onal details refer to :ref:`tpf and spf <TPF_AND_SPF>`, `paths.getspf() <paths.html#getspf>`_, :ref:`normapppathx() <def_normapppathx>`, `normpathx() <paths.html#normpathx>`_. **strip**: Strip redundancies. **pathsep**: Input separator 'pathsep' to be be replaced. :: pathsep := ':' ';' '' One or more are allowed, is used as a set containment of replacement checks. Empty string disables the replacement. kw: **apppre**: Application prefix. Returns: Converted format uri. E.g.:: http://a/b/c Raises: pass-through """ apppre = kw.get('apppre', False) g = it.lastindex # PATHRULES ASCII_SC_CTRL _le = it.end(g) if it.group(g): x = it.group(g) c = ASCII_SC_CTRL[g] if c == SC_BSPAIR: # pairs of '\\' if strip: if it.start(g) == 0: # if it.string[it.end(g)] in '/\\': # return '/' # return '//' return '/' if it.string[it.start(g) - 1] in '/\\': # already done return '' elif len(it.string) > _le: return '/' else: return '//' if strip: # want to reduce by look-ahead if len(it.string) > _le and it.string[_le] == '\\': # FIXME: a.s.a.p. if it.string.find('\\', _le) < len(it.string) - 1: return '' elif len(it.string) > _le and it.string[_le] == '/': # FIXME: a.s.a.p. if it.string.find('/', _le) < len(it.string) - 1: return '' return '/' else: return '//' elif c == SC_SLASH: return '/' elif c == SC_SLASHPREB: if strip: return '' return x elif c == SC_SEPP: # n * '/' if strip: return '' else: return '/' * len(x) elif c == SC_SEPW: # n * '/' if it.start(g) == 0: # is app-pre '//' if it.start(g) > 1: return '//' return '/' if strip: if it.string[it.start(g) - 1] in '/\\': return '' # is app-pre '//' elif it.start(g) == 1 and it.string[0] in ('/', '\\'): return '' # 2SEP has own rules if len(it.string) == _le and _le > 1: # do not '/' return '/' if len(it.string) > _le: if it.string[_le] == '\\': if len(it.string) > _le + 1 and it.string[_le + 1] == '\n': pass else: if strip: return '' return '/' if strip: return '/' else: return '/' * len(x) elif c == SC_PSEPP: if x[0] not in pathsep: # 1..n return x if strip: if len(it.string) == _le: # drop trailing os.pathsep return '' if len(it.string) > _le: if it.string[_le] == ';': return '' return ':' else: return ':' * len(x) elif c == SC_PSEPW: if x[0] not in pathsep: # 1..n return x if spf & RTE_POSIX: # for posix node names an ordinary character return x if strip: if len(it.string) == _le: # do not drop trailing '/' return '/' if len(it.string) > _le: if it.string[_le] == ':': return '' return ':' else: return ':' * len(x) elif c in (SC_PAPP, SC_WDOM): return '//' elif c in (SC_DRIVENPSEP, SC_DRIVENWSEP): # posix does not have drives if strip: # keep drive-root return x[0] + ':/' return x[:2] + '/' * (len(x) - 2) elif c is SC_UPDIR: return '../' elif c == SC_DRIVE: return x elif c == SC_NULLDIR: return '' elif c == SC_FILE: if apppre: return x return '' elif c == SC_UNC: if apppre: return x return '//' return x def sub_rfc8089(it, spf=RTE, strip=True, pathsep='', state=None, **kw): """To be used by re.sub() - converts to file uri in accordance to RFC8089. This is different from most of common URI, e.g. HTTP(S). Args: **it**: Iterator from *re.sub*. **spf**: Source platform, defines the input syntax domain. For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`. For additi0onal details refer to :ref:`tpf and spf <TPF_AND_SPF>`, `paths.getspf() <paths.html#getspf>`_, :ref:`normapppathx() <def_normapppathx>`, `normpathx() <paths.html#normpathx>`_. **strip**: Strip redundancies. **pathsep**: Input separator 'pathsep' to be be replaced. :: pathsep := ':' ';' '' One or more are allowed, is used as a set containment of replacement checks. Empty string disables the replacement. **state**: Compile states. kw: **apppre**: Application prefix. **keepsep**: Keeps the trailing separator. :: keepsep := ( True # keep trailing sep, indicating a directory | False # drop trailing sep ) **stripquote**: Strips *filesysobjects* triple-quotes. Returns: Converted format uri. E.g.:: http://a/b/c Raises: pass-through """ apppre = kw.get('apppre', False) keepsep = kw.get('keepsep', True) stripquote = kw.get('stripquote', False) # tracks multiple path separators, when these are of mixed # platforms('/', '\') to be normalized if state[0]: sx = state[0].pop() else: sx = 0 # ignores character classes if state[1]: inchrclass = True else: inchrclass = False # # frequently used values # g = it.lastindex # PATHSCANNER ASCII_SC_CTRL _le = it.end(g) _isfirst = it.start(g) == 0 _islast = _le == it.endpos _charbefore = it.start(g) - 1 if it.group(g): x = it.group(g) c = ASCII_SC_CTRL[g] if c in (SC_CHRCLSSTART, ): # char class start state[1].append('') elif c in (SC_CHRCLSEND, ): # char class end if V3K: state[1].clear() else: if state[1]: for i in range(len(state[1])): state[1].pop(-1) elif inchrclass: if c in (SC_DQUOTED, SC_SQUOTED): if stripquote: return x[3:-3] elif c == SC_BSPAIR: # pairs of '\\' if strip: if _isfirst: return '/' _pre = it.string[_charbefore] if _pre in '/\\': # already done if sx: return '/' return '' elif _pre is ':': return '/' elif _islast or it.string[_le] is ':': if not keepsep: return '' return '/' elif it.string[_le] in '\/': if it.string[_charbefore] is ':': state[0].append(2) else: state[0].append(1) return '' return '/' else: return '//' # if strip: # if it.start(g) == 0: # # if it.string[it.end(g)] in '/\\': # # return '/' # # return '//' # return '/' # # if it.string[it.start(g) - 1] in '/\\': # already done # return '' # elif len(it.string) > _le: # return '/' # else: # return '//' # # if strip: # want to reduce by look-ahead # if len(it.string) > _le and it.string[_le] == '\\': # # FIXME: a.s.a.p. # if it.string.find('\\', _le) < len(it.string) - 1: # return '' # elif len(it.string) > _le and it.string[_le] == '/': # # FIXME: a.s.a.p. # if it.string.find('/', _le) < len(it.string) - 1: # return '' # # return '/' # else: # return '//' elif c == SC_SLASH: if strip: if (_isfirst and _islast): return '/' elif _islast: if sx == 2: return '/' elif not keepsep: return '' elif it.string[_le] is ':': if _isfirst or it.string[_charbefore] is ':': return '/' if not keepsep: return '' return '/' elif c == SC_SLASHPREB: if strip: if sx: return '/' return '' return x # if strip: # return '' # return x elif c == SC_SEPP: # n * '/' if strip: if _isfirst or it.string[_charbefore] is ':': sx = state[0].append(2) return '' else: return '/' * len(x) # if strip: # return '' # # else: # return '/' * len(x) elif c == SC_SEPW: # n * '/' if _isfirst: # is app-pre '//' return '/' if it.string[_charbefore] is ':': return '/' if strip: if _islast or it.string[_le] is ':': if sx == 2: return '/' return '' if sx: return '/' if it.string[_charbefore] in '/\\': return '' else: return '/' * len(x) if it.endpos > _le: if it.string[_le] == '\\': if it.endpos > _le + 1 and it.string[_le + 1] == '\n': pass else: if strip: return '' return '/' # if it.start(g) == 0: # is app-pre '//' # if it.start(g) > 1: # return '//' # return '/' # if strip: # if it.string[it.start(g) - 1] in '/\\': # return '' # # is app-pre '//' # elif it.start(g) == 1 and it.string[0] in ('/', '\\'): # return '' # 2SEP has own rules # # if len(it.string) == _le and _le > 1: # do not '/' # return '/' # # if len(it.string) > _le: # if it.string[_le] == '\\': # if len(it.string) > _le + 1 and it.string[_le + 1] == '\n': # pass # else: # if strip: # return '' # return '/' # # if strip: # return '/' # else: # return '/' * len(x) elif c == SC_PSEPP: if x[0] not in pathsep: # 1..n return x if strip: if _islast: # drop trailing os.pathsep return '' return ':' else: return ':' * len(x) # if x[0] not in pathsep: # 1..n # return x # # if strip: # if len(it.string) == _le: # drop trailing os.pathsep # return '' # # if len(it.string) > _le: # if it.string[_le] == ';': # return '' # # return ':' # else: # return ':' * len(x) elif c == SC_PSEPW: # native # return x # mixed if x[0] not in pathsep: # 1..n return x if spf & RTE_POSIX: # for posix node names an ordinary character return x if strip: if _islast: # drop trailing os.pathsep if keepsep: return x else: return '' if len(it.string) > _le: if it.string[_le] == ':': return '' return ':' else: return ':' * len(x) # if x[0] not in pathsep: # 1..n # return x # # if spf & RTE_POSIX: # for posix node names an ordinary character # return x # # if strip: # if len(it.string) == _le: # do not drop trailing '/' # return '/' # # if len(it.string) > _le: # if it.string[_le] == ':': # return '' # # return ':' # else: # return ':' * len(x) elif c in (SC_PAPP, SC_WDOM): return '//' elif c in (SC_DRIVENPSEP, SC_DRIVENWSEP): # posix does not have drives if strip: # keep drive-root return x[0] + ':/' return x[:2] + '/' * (len(x) - 2) elif c is SC_UPDIR: return '../' elif c == SC_DRIVE: return x elif c == SC_NULLDIR: return elif c == SC_FILE: if apppre: return x return '' elif c == SC_UNC: if apppre: return x return '//' return x
[docs]def sub_win(it, spf=RTE, strip=True, pathsep=';', state=None, **kw): """To be used by re.sub() - converts to windows. Replaces '[/\\\\]' with '\\\\', and '[;:]' with ';'. Args: **it**: iterator from *re.sub*. **spf**: Source platform, defines the input syntax domain. For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`. For additi0onal details refer to :ref:`tpf and spf <TPF_AND_SPF>`, `paths.getspf() <paths.html#getspf>`_, :ref:`normapppathx() <def_normapppathx>`, `normpathx() <paths.html#normpathx>`_. **strip**: Strip redundancies. **pathsep**: Input separator 'pathsep' to be be replaced. :: pathsep := ':' ';' '' One or more are allowed, is used as a set containment of replacement checks. Empty string disables the replacement. **state**: Compile states. kw: **apppre**: Application prefix. **keepsep**: Keeps seprator, in particular the trailing. **stripquote**: Strips *filesysobjects* triple-quotes. Returns: Converted format win. E.g.:: C:\\Windows\\system32\\cmd.exe;C:\\Windows\\system32\\notepad.exe Raises: pass-through """ apppre = kw.get('apppre', False) keepsep = kw.get('keepsep', False) stripquote = kw.get('stripquote', False) if apppre: # scheme for an URI requested, so slashes only sep = '/' sep2 = '//' else: # no scheme, so a UNC sep = '\\' sep2 = '\\\\' # tracks multiple path separators if state[0]: sx = state[0].pop() else: sx = 0 # ignores character classes if state[1]: inchrclass = True else: inchrclass = False # # frequently used values # g = it.lastindex # PATHRULES ASCII_SC_CTRL _le = it.end(g) _isfirst = it.start(g) == 0 _islast = _le == it.endpos _isnotlast = it.endpos > _le _charbefore = it.start(g) - 1 if _isnotlast: _nextissep = it.string[_le] in '\\/' else: _nextispsep = None if _isnotlast: _nextispsep = it.string[_le] in pathsep else: _nextispsep = None if it.group(g): # PATHRULES PATHSCANNER ASCII_SC_CTRL x = it.group(g) c = ASCII_SC_CTRL[g] if c in (SC_CHRCLSSTART, ): # char class start state[1].append('') elif c in (SC_CHRCLSEND, ): # char class end if V3K: state[1].clear() else: if state[1]: for i in range(len(state[1])): state[1].pop(-1) elif inchrclass: if c in (SC_DQUOTED, SC_SQUOTED): if stripquote: return x[3:-3] elif c in (SC_DQUOTED, SC_SQUOTED): if stripquote: return x[3:-3] elif c == SC_BSPAIR: # pairs of '\\' if strip: if _isfirst: # TODO: basically not possible as net-app, see SC_PAP if _isnotlast and _nextissep: state[0].append(2) return '' return sep elif _islast: if sx == 2: return sep return '' elif _nextispsep: if sx: return sep elif it.string[_charbefore] in pathsep: return sep return '' elif _nextissep: if not sx: sx = 1 state[0].append(sx) return '' return sep else: return sep2 elif c in (SC_SLASH, ): # 1 * '\' - win treats '/' equal if strip: if sx == 3: return '' if _isfirst: if _islast: return sep elif _nextissep: state[0].append(2) return '' elif _nextispsep or _islast: if sx == 2: return sep elif it.string[_charbefore] in pathsep: return sep return '' elif _isnotlast and _nextissep: state[0].append(1) return '' return sep return sep elif c in (SC_SLASHPREB, ): # '\/' if strip: if _islast: if sx == 2: return sep return '' elif _nextissep: if not sx: sx = 1 state[0].append(sx) return '' elif sx == 2: return sep return sep return sep * len(x) elif c in (SC_SEPP, ): # n * '/' if strip: if _isfirst: state[0].append(2) else: state[0].append(1) return '' return sep * len(x) elif c in (SC_SEPW, ): # n * '\' - win treats '/' equal if not strip: return sep * len(x) if _isfirst: if _isnotlast: if _nextissep: state[0].append(2) return '' if _nextispsep: return sep else: return sep elif _islast or _nextispsep: if sx == 2: return sep if it.string[_charbefore] in pathsep: return sep return '' elif _isnotlast and _nextissep: if not sx: sx = 1 state[0].append(sx) return '' if sx > 1: return sep return sep elif c == SC_PSEPP: # native if strip: if x[0] in pathsep and _islast: return '' return x elif c == SC_CRMASK: if _isfirst: return x elif sx: return sep + x[1:] return x elif c in (SC_FUNC,): # verified by char contents if apppre: return 'file://///' if strip and it.string[_le] in ':': return '/' elif strip and _islast: return '' return sep2 elif c is SC_FABS: # lookahead for RFC8089 - appendix E.2.1 if len(it.string) > _le + 2 and \ ord(it.string[_le + 1].upper()) in range(65,91) and \ it.string[_le + 2] == ':': state[0].append(3) if not apppre: return '' elif c in (SC_FMIN, SC_FSHORT,): if not apppre: return '' elif c == SC_PSEPW: if x not in pathsep: # changed spf/psep return x if strip: _is = it.start(g) if not _is or (_is + 1) == it.endpos: return '' if x[0] in pathsep and _is > 0 and it.string[_is - 1] in pathsep: # 1..n return '' return ';' return ';' * len(x) elif c is SC_UPDIR: return '..\\' elif c is SC_FNONLOCAL: if not apppre: return '' elif c is SC_PAPP: if strip and _nextispsep: return sep return sep2 elif c in (SC_WDOM, SC_PDOM): # basically sure a UNC or NETAPP if strip and _nextispsep: return sep return sep2 elif c in (SC_DRIVENWSEP, SC_DRIVENPSEP): if strip: # keep drive-root return x[0] + ':' + sep return x[:2] + sep * (len(x) - 2) elif c == SC_DRIVE: return x elif c == SC_NULLDIR: return '' elif c == SC_FILE: if apppre: return x return '' elif c == SC_UNC: if apppre: return x return sep2 return x
sub_path_calls = { #: 're.sub' callbacks for normalization 'b': sub_win, 'cnp': sub_posix, 'cnw': sub_win, 'file': sub_rfc8089, 'http': sub_uri, 'https': sub_uri, 'k': sub_keep, 'keep': sub_keep, 'posix': sub_posix, 'rfsys': sub_posix, 's': sub_posix, 'share': sub_win, 'uri': sub_uri, 'win': sub_win, 'win32': sub_win, }
[docs]def escapepathx(spath, tpf=None, **kargs): """Escape special characters within path names, supports cross-platform processing, knows the special escape characters of Python and *re*. The characters could be masked by quoting, and/or enclosing in character classes. +----------------+-----------------------+-----------------+ | input | -> esc | -> unesc | +================+=======================+=================+ | \\\\abc"\\\\n" | \\\\\\\\abc"\\\\n" | \\\\abc"\\\\n" | +----------------+-----------------------+-----------------+ | \\\\"abc\\\\n" | \\\\\\\\"abc\\\\n" | \\\\"abc\\\\n" | +----------------+-----------------------+-----------------+ | \\\\abc\\\\n | \\\\\\\\abc\\\\\\\\n | \\\\abc\\\\n | +----------------+-----------------------+-----------------+ | \\\\xy" "z | \\\\\\\\xy" "z | \\\\xy" "z | +----------------+-----------------------+-----------------+ | \\\\"xy z" | \\\\\\\\"xy z" | \\\\"xy z" | +----------------+-----------------------+-----------------+ | \\\\xy z | \\\\\\\\xy\\\\ z | \\\\xy z | +----------------+-----------------------+-----------------+ Args: **spath**: The path to be escaped. :: spath := ( <path-string> | <path-array> ) path-string := (str | unicode) path-array := (list | tuple) * *path-string* The string representation of a complete path, which may contain literal, *glob*, and *re* expressions. The supported character representation is *str* or *unicode* for Pyton2.7 and Python3.5+. * *path-array* The component representation of a path, which consists of it's items, either as a *list* or as a *tuple*. Each item may contain literal, *glob*, and *re* expressions. **tpf**: Target path separator, currently not used. kargs: **charback**: Escapes all backslashes within character classes. Could be combined with *force* and *freeback*. :: \a\[\\] => \a\[\\\\] **force**: Controls the escaped scope. Excludes quoted strings and character classes. Could be combined with *charback*. :: force = ( True # escape characters and any free backslash | False # defined escape characters only ) force == True \\a\\X\\n => \\\\a\\\\X\\\\n force == False \\a\\X\\n => \\\\a\\X\\\\n default := False **freeback**: Escapes backslashes outside character classes. Could be combined with *charback*. :: \a\b\[\\] => \a\\b\\[\\] Returns: The escaped path with added '*\\\\*' in accordance to the rules and chosen options. The return type of the representation is the same as the input representation. :: str => str unicode => unicode list => list tuple => tuple Raises: PathError FileSysObjectsError TypeError pass-through """ if not tpf: _ttpf = RTE else: try: _ttpf = rte2num[tpf] except KeyError: raise PathError("escapepathx:Parameter tpf: " + str(tpf)) try: _strip = kargs.pop('strip') except KeyError: _strip = False _state = [] if type(spath) in ISSTR: return PATHSCANNER.sub(lambda x: sub_esc(x, _ttpf, _strip, '', _state, **kargs), spath) elif type(spath) in (list, tuple,): ret = [] for spx in spath: ret.append(PATHSCANNER.sub(lambda x: sub_esc(x, _ttpf, _strip, _state, **kargs), spx)) return ret else: raise FileSysObjectsError("escapepathx:requires (str | list | tuple), got: " + str(spath))
[docs]def unescapepathx(spath, **kargs): """Unescape path - which has been escaped before. The path representation could either be as a string/unicode or split components as a *list* or *tuple*. .. warning:: Processes strings accurately which were processed by *escapepathx()* before, else the result could be erroneous. In particular for windows paths due to the ambiguity of the '\\\\'! The same masking rules apply as for the *normpathx()* and *escapepathx()* calls. Escape sequences could be protected by quoting, which keeps the content literally. See *pathtools.stripquotes*. Args: **spath**: The path to be unescaped. :: spath := ( <path-string> | <path-array> ) path-string := (str | unicode) path-array := (list | tuple) * *path-string* The string representation of a complete path, which may contain literal, *glob*, and *re* expressions. The supported character representation is *str* or *unicode* for Pyton2.7 and Python3.5+. * *path-array* The component representation of a path, which consists of it's items, either as a *list* or as a *tuple*. Each item may contain literal, *glob*, and *re* expressions. kargs: **tpf**: Target platform, currently not used. **netpath**: When *True* considers double prefix separators as share and/or network application, else assumes these are the result of escaping with force. default := False Returns: The unescaped path with removed '\\' in accordance to the rules and chosen options. The return type of the representation is the same as the input representation. :: str => str unicode => unicode list => list tuple => tuple Raises: PathError FileSysObjectsError TypeError pass-through """ netpath = kargs.get('netpath', False) tpf = kargs.get('tpf', RTE) if type(tpf) is not int: try: tpf = rte2num[tpf] except KeyError: raise PathError("unescapepathx:Parameter tpf: " + str(tpf)) _tsep, _tpsep, tpf, _tpfn, _apre = gettpf(tpf) state = [] if type(spath) in ISSTR: if netpath and spath[0] in ('/', '\\') and spath[1] in ('/', '\\'): return spath[0] + PATHSCANNER_UNESC.sub(lambda x: sub_unesc(x, tpf, state, **kargs), spath) else: return PATHSCANNER_UNESC.sub(lambda x: sub_unesc(x, tpf, state, **kargs), spath) elif type(spath) in (list, tuple,): ret = [] for spx in spath: if netpath and spath[0] in ('/', '\\') and spath[1] in ('/', '\\'): return spath[0] + PATHSCANNER_UNESC.sub(lambda x: sub_unesc(x, tpf, state, **kargs), spx) else: return PATHSCANNER_UNESC.sub(lambda x: sub_unesc(x, tpf, state, **kargs), spx) return ret else: raise FileSysObjectsError("unescapepathx:requires (str | list | tuple), got: " + str(spath))
[docs]def splitpathx_win(p, **kw): """Split windows pathnames containing 'literal', 'glob', and 're/regexpr'. Serves the source platform windows and alike. For the call interface see *splitpathx()* Args: **p**: The path name to split. kargs: **apppre**: Application prefix. default := False **keepsep**: Keeps seprator, in particular the trailing. default := False **strip**: Strip separators, in particular the trailing. default := False **stripquote**: Strips *filesysobjects* triple-quotes. default := False **tpf**: Target platform. Defines some fine-tuning, e.g. for the file-URI, see *splitpathx*. default := current OS. Returns: The splitted path, else *[]*. Raises: pass-through """ parts = [] _cur = "" apppre = kw.get('apppre', False) keepsep = kw.get('keepsep', False) strip = kw.get('strip', False) stripquote = kw.get('stripquote', False) try: tpf = rte2num[kw.get('tpf', RTE_FILEURI)] except KeyError: raise FileSysObjectsError("parameter tpf = " + str(kw.get('tpf'))) # controls updir: /../.. != ../.. # 0: no history # 1: leading chain of RELATIVE up-dirs, keep them all # 2: has a leading chain of RELATIVE up-dirs state = 0 inclass = 0 for it in PATHSCANNER.finditer(p): g = it.lastindex # PATHSCANNER ASCII_SC_CTRL _le = it.end(g) _isfirst = it.start(g) == 0 _islast = _le == it.endpos _charbefore = it.start(g) - 1 if it.group(g): x = it.group(g) c = ASCII_SC_CTRL[g] if c in (SC_CHRCLSSTART,): inclass = 1 if not parts: parts.append(x) else: parts[-1] += x elif c in (SC_CHRCLSEND,): inclass = 0 if not parts: parts.append(x) else: parts[-1] += x elif inclass and c not in (SC_DQUOTED, SC_SQUOTED,): if parts: parts[-1] += x else: parts.append(x) #continue elif c == SC_SLASH: # 1 * '/' if not parts: # if first - absolute parts.append('') parts.append('') continue if strip: if parts and len(parts) > 1 and not parts[-1]: continue elif _islast: continue elif it.endpos > it.end(g) and re.match( r'[.][/\\\\]', it.string[it.end(g):]): if parts[-1]: parts.append('') continue elif re.match(r'^[/\\\\]*$', it.string[_le:]): continue parts.append('') elif c == SC_SEPP: # n * '/' - always followed by a shlash if strip: continue parts.extend(['' for i in range(len(x))]) # @UnusedVariable elif c == SC_BSPAIR: # pairs of '\\' if _isfirst: parts.append('') parts.append('') continue if strip: if it.string[_charbefore] not in '/\\': parts.append('') else: parts.append('') parts.append('') elif c == SC_SLASHPREB: if strip: continue parts.extend(['' for i in range(len(x))]) # @UnusedVariable elif c == SC_SEPW: # 1 * '\\' if it.start(g) == 0: parts.append('') if strip and _islast: continue parts.append('') continue # FIXME: if not strip or strip and it.string[_le] not in '/\\': parts.append('') continue if strip: if it.string[_charbefore] in '/\\' or _islast: continue parts.append('') elif c in (SC_PAPP, SC_PDOM, SC_WDOM, SC_FUNC, SC_FNONLOCAL,): # leading 2 * '' for '\\' or '//' if apppre: if SC_FNONLOCAL and tpf in (RTE_FILEURI0, 'fileuri0',): parts.append('file://') continue elif SC_FNONLOCAL and tpf in (RTE_FILEURI4, 'fileuri4',): parts.append('file:///') parts.append('') continue elif SC_FNONLOCAL and tpf in (RTE_FILEURI5, 'fileuri5', RTE_FILEURI, 'fileuri',): parts.append('file:////') parts.append('') continue if SC_FUNC: parts.append('file:///') elif c in (SC_FMIN,): if apppre: if tpf in (RTE_FILEURI, 'fileuri',): parts.append('file://') elif tpf in (RTE_FILEURI0, 'fileuri0',): parts.append('file:') else: parts.append('file://') elif c in (SC_FABS,): if apppre: if tpf in (RTE_FILEURI0, 'fileuri0',): parts.append('file:') else: parts.append('file://') elif c in (SC_FSHORT,): if apppre: parts.append('file://') elif c in (SC_DRIVENPSEP, SC_DRIVENWSEP,): # posix does not have drives if strip: # keep drive-root if not parts: parts.append(x[:2]) else: parts[-1] += x[:2] parts.append('') else: if not parts: parts.append(x[:2]) else: parts[-1] += x[:2] parts.extend(['' for i in range((len(x) - 2))]) # @UnusedVariable elif c == SC_DRIVE: if not parts: parts.append(x) else: parts[-1] += x[:2] elif c == SC_NULLDIR: continue elif c == SC_UPDIR: if _isfirst: parts.append('..') parts.append('') state = 1 continue elif state == 1: parts[-1] += '..' parts.append('') if len(it.string) > _le + 3 and it.string[_le:_le + 3] != '../' or \ len(it.string) > _le + 2 and it.string[_le:_le + 2] != '..': state = 2 continue if parts: if not parts[0] and not parts[1]: # share/posix-app if len(parts) > 5: if not parts[-1]: parts.pop() parts.pop() parts.append('') elif len(parts) > 4: parts.pop() elif not parts[0]: # absolute path if len(parts) > 2: if not parts[-1]: parts.pop(-2) else: parts.pop() parts.append('') elif len(parts) > 1: parts.pop() parts.append('') else: # relative path if not parts[-1]: if len(parts) > 3 and parts[-2] != '..': parts.pop() parts.pop() else: parts.pop() parts.append('..') else: parts.pop() parts.append('') continue elif c in (SC_DQUOTED, SC_SQUOTED): if stripquote: _x = x[3:-3] else: _x = x if not parts: parts.append(_x) else: parts[-1] += _x continue elif not parts: parts.append(x) else: parts[-1] += x if strip and parts and not keepsep: while parts and not parts[-1]: parts.pop() if apppre and parts != [] and not parts[0].startswith('file:'): _x = _file_uri_scheme[_get_lead_sep.match(parts[0]).group(0)] if _x == 'file': if parts[0] == '': if tpf in (RTE_FILEURI4, RTE_FILEURI5,) and parts[2] == '': parts[0] = _file_uri_scheme[tpf] elif tpf == RTE_FILEURI0: parts[0] = _file_uri_scheme[RTE_FILEURI0] else: parts[0] = _file_uri_scheme[RTE_FILEURI] else: # does not recognize drives raise PathError("file-uri requires absolute path, got: " + str(p)) elif _x == 'netapp': if tpf in (RTE_FILEURI4, RTE_FILEURI5,): parts[0] = _file_uri_scheme[tpf] else: parts[0] = _file_uri_scheme[RTE_FILEURI5] else: parts[0] = _file_uri_scheme[RTE_FILEURI] return tuple(parts)
[docs]def splitpathx_posix(p, **kw): """Split pathnames containing 'literal', 'glob', and 're/regexpr'. Serves the source platform POSIX and alike. For the call interface see *splitpathx()* Args: **p**: The path name to split. kargs: **apppre**: Application prefix. default := False **keepsep**: Keeps seprator, in particular the trailing. default := False **strip**: Strip separators, in particular the trailing. default := False **stripquote**: Strips *filesysobjects* triple-quotes. default := False **tpf**: Target platform. Defines some fine-tuning, e.g. for the file-URI, see *splitpathx*. default := current OS. Returns: The splitted path, else *[]*. Raises: pass-through """ parts = [] _cur = "" apppre = kw.get('apppre', False) keepsep = kw.get('keepsep', False) strip = kw.get('strip', False) stripquote = kw.get('stripquote', False) try: tpf = rte2num[kw.get('tpf', RTE_FILEURI)] except KeyError: raise FileSysObjectsError("parameter tpf = " + str(kw.get('tpf'))) # controls updir: /../.. != ../.. # 0: no history # 1: leading chain of RELATIVE up-dirs, keep them all # 2: has a leading chain of RELATIVE up-dirs state = 0 inclass = 0 for it in PATHSCANNER.finditer(p): g = it.lastindex # PATHSCANNER ASCII_SC_CTRL _le = it.end(g) _isfirst = it.start(g) == 0 _islast = _le == it.endpos _charbefore = it.start(g) - 1 if it.group(g): x = it.group(g) c = ASCII_SC_CTRL[g] if c in (SC_CHRCLSSTART,): inclass = 1 if not parts: parts.append(x) else: parts[-1] += x elif c in (SC_CHRCLSEND,): inclass = 0 if not parts: parts.append(x) else: parts[-1] += x elif inclass and c not in (SC_DQUOTED, SC_SQUOTED,): if parts: parts[-1] += x else: parts.append(x) #continue elif c == SC_SLASH: # 1 * '/' if not parts: # if first - absolute parts.append('') parts.append('') continue if strip: if parts and len(parts) > 1 and not parts[-1]: continue elif _islast: continue elif it.endpos > it.end(g) and re.match( r'[.]/', it.string[it.end(g):]): if parts[-1]: parts.append('') continue elif re.match(r'^[/\\\\]*$', it.string[_le:]): continue parts.append('') elif c == SC_SEPP: # n * '/' - always followed by a shlash if strip: continue parts.extend(['' for i in range(len(x))]) # @UnusedVariable elif c == SC_BSPAIR: # pairs of '\\' if _isfirst: parts.append('') parts.append('') continue if strip: if it.string[_charbefore] not in '/\\': parts.append('') else: parts.append('') parts.append('') elif c == SC_SLASHPREB: if strip: continue parts.extend(['' for i in range(len(x))]) # @UnusedVariable elif c == SC_SEPW: # 1 * '\\' if it.start(g) == 0: parts.append('') if strip and _islast: continue parts.append('') continue # FIXME: if not strip or strip and it.string[_le] not in '/\\': parts.append('') continue if strip: if it.string[_charbefore] in '/\\' or _islast: continue parts.append('') elif c in (SC_PAPP, SC_PDOM, SC_WDOM, SC_FUNC, SC_FNONLOCAL,): # leading 2 * '' for '\\' or '//' if apppre: if SC_FNONLOCAL and tpf in (RTE_FILEURI0, 'fileuri0',): parts.append('file://') continue elif SC_FNONLOCAL and tpf in (RTE_FILEURI4, 'fileuri4',): parts.append('file:///') parts.append('') continue elif SC_FNONLOCAL and tpf in (RTE_FILEURI5, 'fileuri5', RTE_FILEURI, 'fileuri',): parts.append('file:////') parts.append('') continue if SC_FUNC: parts.append('file://') else: parts.append('') parts.append('') parts.append('') elif c in (SC_FMIN,): if apppre: if tpf in (RTE_FILEURI, 'fileuri',): parts.append('file://') elif tpf in (RTE_FILEURI0, 'fileuri0',): parts.append('file:') else: parts.append('file://') elif c in (SC_FABS,): if apppre: if tpf in (RTE_FILEURI0, 'fileuri0',): parts.append('file:') else: parts.append('file://') elif c in (SC_FSHORT,): if apppre: parts.append('file://') elif c in (SC_DRIVENPSEP, SC_DRIVENWSEP,): # posix does not have drives if not parts: parts.append('') if strip: # keep drive-root parts[-1] += x[:2] parts.append('') else: parts[-1] += x[:2] parts.extend(['' for i in range((len(x) - 2))]) # @UnusedVariable elif c == SC_DRIVE: if not parts: parts.append(x) else: parts[-1] += x[:2] elif c == SC_NULLDIR: continue elif c == SC_UPDIR: if _isfirst: parts.append('..') parts.append('') state = 1 continue elif state == 1: parts[-1] += '..' parts.append('') if len(it.string) > _le + 3 and it.string[_le:_le + 3] != '../' or \ len(it.string) > _le + 2 and it.string[_le:_le + 2] != '..': state = 2 continue if parts: if not parts[0] and not parts[1]: # share/posix-app if len(parts) > 5: if not parts[-1]: parts.pop() parts.pop() parts.append('') elif len(parts) > 4: parts.pop() elif not parts[0]: # absolute path if len(parts) > 2: if not parts[-1]: parts.pop(-2) else: parts.pop() parts.append('') elif len(parts) > 1: parts.pop() parts.append('') else: # relative path if not parts[-1]: if len(parts) > 3 and parts[-2] != '..': parts.pop() parts.pop() else: parts.pop() parts.append('..') else: parts.pop() parts.append('') continue elif c in (SC_DQUOTED, SC_SQUOTED): if stripquote: _x = x[3:-3] else: _x = x if not parts: parts.append(_x) else: parts[-1] += _x continue elif not parts: parts.append(x) else: parts[-1] += x if strip and parts and not keepsep: while parts and not parts[-1]: parts.pop() if apppre and parts != [] and not parts[0].startswith('file:'): _x = _file_uri_scheme[_get_lead_sep.match(parts[0]).group(0)] if _x == 'file': if parts[0] == '': if tpf in (RTE_FILEURI4, RTE_FILEURI5,) and parts[2] == '': parts[0] = _file_uri_scheme[tpf] elif tpf == RTE_FILEURI0: parts[0] = _file_uri_scheme[RTE_FILEURI0] else: parts[0] = _file_uri_scheme[RTE_FILEURI] else: # does not recognize drives raise PathError("file-uri requires absolute path, got: " + str(p)) elif _x == 'netapp': if tpf in (RTE_FILEURI4, RTE_FILEURI5,): parts[0] = _file_uri_scheme[tpf] else: parts[0] = _file_uri_scheme[RTE_FILEURI5] else: parts[0] = _file_uri_scheme[RTE_FILEURI] return tuple(parts)
[docs]def splitpathx(spath, **kw): """Split pathnames into a list/tuple of items for each directory. For example :: In [15]: filesysobjects.paths.splitpathx("/a/b/c") Out[15]: ('', 'a', 'b', 'c') In [16]: filesysobjects.paths.splitpathx("x:/a/b/c") Out[16]: ('x:', 'a', 'b', 'c') In [17]: filesysobjects.paths.splitpathx("x:\\a\\b\\c") Out[17]: ('x:', 'a', 'b', 'c') For *URI*s and search paths refer to *splitapppathx*. Supports directory name types as 'literal', 'glob', and 're/regexpr'. Supports the same syntax elements as *normpathx*, while it is prepared to simple application of the built-in *join()* with *os.sep*. Is not aware of application tags except Network-Shares, Posix-Applications, and file-URI. **REMARK**: The intention is to replace the 'str.split()' method for the split of the path parts, thus this is different to the method 'os.path.split()'. Args: **spath**: Path to split. kw: **apppre**: Application prefix, when 'True' the scheme is included, else dropped. :: apppre=(True|False) **keepsep**: Modifies the behavior of 'strip' parameter. If 'False', the trailing separator is dropped. :: splitpathx('/a/b', keepsep=False) => ('', 'a', 'b') splitpathx('/a/b/', keepsep=False) => ('', 'a', 'b') for 'True' trailing separators are kept as directory marker:: splitpathx('/a/b', keepsep=True) => ('', 'a', 'b') splitpathx('/a/b/', keepsep=True) => ('', 'a', 'b', '') **pathsep**: Optional search path separator. posix: ':' win32: ';' default := os.pathsep **sep**: Optional path separator. posix: '/' win32: '\\' default := os.path.sep **strip**: Removes null-entries. default := False **stripquote**: Removes paired triple-quotes of protected/masked string sections. :: "/a/'''head:'''/c" => "/a/head:/c" default := False **spf**: Source platform, defines the input syntax domain. For the syntax refer to the API in the manual at :ref:`spf <OPTS_SPF>`. For additi0onal details refer to :ref:`tpf and spf <TPF_AND_SPF>`, `paths.getspf() <paths.html#getspf>`_, :ref:`normapppathx() <def_normapppathx>`, `normpathx() <paths.html#normpathx>`_. **tpf**: Target platform. Even though the splitted form of a resource path is basically canonical, some details of the specifications for slightly variations requires the granular fine-tuning. Thus defines in case of ambiguity the *scheme* for *apppre=True*. Accepts the following values only. :: tpf := ( RTE_FILEURI0 | 'fileuri0' # RFC8089 - minimal | RTE_FILEURI4 | 'fileuri4' # RFC8089 - 4-slash UNC/POSIX-app | RTE_FILEURI5 | 'fileuri5' # RFC8089 - 5-slash UNC/POSIX-app | RTE_FILEURI | 'fileuri' # RFC8089 - canonical ) Returns: A list containing the path split into it's components. The list is prepared to be concatenated by *join()*. The interface is aware of the *os.path.sep* character, but a present regular expression may span multiple path components, which have to be handled dynamically when applying the path pattern e.g. by *findpattern*. Raises: pass-through """ try: spf = rte2num[kw.get('spf', RTE_FILEURI)] except KeyError: raise FileSysObjectsError("parameter error: spf =" + str(kw.get('spf'))) if spf & RTE_WIN32: return splitpathx_win(spath, **kw) return splitpathx_posix(spath, **kw)
[docs]def normpathx(spath, **kargs): """Normalize paths, similar to 'os.path.normpath()' - with optional extensions paths with basic application schemes and search paths, dos-drives, and the split of paths into directories. The various representations could be converted on-the-fly. :: smb, cifs, file, http/https, UNC, POSIX-network apps For advanced processing of application schemes refer to *normapppathx()* and 'splitapppathx()'. The path could include regular expressions *re* and *glob*, literals and masked parts. * regular expressions The supported regular expressions are native Python regular expressions as supported by 're' with support of expressions spanning multiple directories. * globs Standard module *glob*. * literals: Any literal path. Regular expressions and globs could be masked as quoted strings, which are kept unchanged. The *normpathx* provides the features as simple interface for the normalization across multiple platforms. The companion interface provide various features, e.g. the *escapepathx* and *unescapepathx* of path names including *re* and *glob*. Args: **spath**: A single path entry - no valid 'os.pathsep'. In case of required search path including semantic 'os.pathsep' use 'splitapppathx()'. kargs: **apppre**: Application prefix. default:=False **keepsep**: Keeps significant seperators, in particular the trailing path separator 'sep', and the trailing search path 'pathseparator'. **strip**: Strips redundancies from path names, :: "a/.//./b/c/../" => "a/b" see related 'keepsep' :: "a/.//./b/c/../" => "a/b/" default:=True **stripquote**: Removes paired triple-quotes of protected/masked string sections. :: "/a/'''head:'''/c" => "/a/head:/c" default := False **spf**: Source platform, defines the input syntax domain. For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`. For additi0onal details refer to :ref:`tpf and spf <TPF_AND_SPF>`, `paths.getspf() <paths.html#getspf>`_, :ref:`normapppathx() <def_normapppathx>`, `normpathx() <paths.html#normpathx>`_. **tpf**: Target platform, defines the output syntax domain. For the syntax refer to the API in the manual at :ref:`tpf <OPTS_TPF>`. For additi0onal details refer to :ref:`tpf and spf <TPF_AND_SPF>`, `paths.gettpf() <paths.html#gettpf>`_, :ref:`normapppathx() <def_normapppathx>`, `normpathx() <paths.html#normpathx>`_. **pathsep**: Changes path separator for the source platform. :: pathsep := ( (: | ;) # replaces by ':' or ';' | <keyword> | <#enum> ) Returns: Normalized path. Raises: PathError pass-through """ strip = kargs.get('strip', True) tpf = kargs.get('tpf', False) apppre = kargs.get('apppre', False) # # target platform # # use system interfaces if tpf in ('local', RTE_LOCAL,): return os.path.normpath(spath) elif tpf in ('cnp', RTE_CNP,): return posixpath.normpath(spath) elif tpf in ('cnw', RTE_CNW,): return ntpath.normpath(spath) _tsep, _tpsep, tpf, _tpfn, _apre = gettpf(tpf, apppre=apppre) # # sourceplatform # # recognized pathsep, empty is no replacement spf = kargs.get('spf', False) _sep, _psep, spf, _spfn = getspf(spf) # recognized pathsep, empty or False is no replacement _p = kargs.get('pathsep') if _p: try: _psep = rte_map[_p][1] except KeyError: raise PathError("unknown pathseparator: " + str(kargs.get('pathsep'))) try: cb = sub_path_calls[tpf] except KeyError: raise PathError("Platform callback: " + str(tpf)) kw = {} kw['apppre'] = apppre kw['keepsep'] = kargs.get('keepsep', False) kw['stripquote'] = kargs.get('stripquote', False) state = ([], [],) if strip: try: _m = _NULLDIRS.match(spath) except TypeError: _m = _NULLDIRS.match(escapepathx(spath, force=True)) # except sre_constants.error: # _m = _NULLDIRS.match(escapepathx(spath, force=True)) if _m: state = [] kw['tpf'] = _tpfn kw['pathsep'] = _psep kw['sep'] = _sep kw['strip'] = strip return _tsep.join(splitpathx(spath, **kw)) else: return PATHSCANNER.sub( lambda x: cb(x, _spfn, strip, _psep, state, **kw), spath) else: if _psep is False: return PATHSCANNER.sub(lambda x: cb(x, _spfn, strip, state, **kw), spath) else: return PATHSCANNER.sub(lambda x: cb(x, _spfn, strip, _psep, state, **kw), spath)