"""The filesysobjects.pathstools module provides operations
for address paths of file system based resources.
.. note::
Current version supports local accessible file systems only.
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import re, sre_constants
import glob
import itertools
from filesysobjects.plugins.smb import normapppathx
from filesysobjects import V3K
from os import scandir, walk
except ImportError:
from scandir import scandir, walk # @UnusedImport
from pysourceinfo.helper import getpythonpath_rel
import filesysobjects
from filesysobjects import ISSTR, PathError, \
FileSysObjectsError, PathToolsError, \
rte2num, \
from filesysobjects.paths import normpathx, escapepathx
from filesysobjects.apppaths import splitapppathx
__author__ = 'Arno-Can Uestuensoez'
__license__ = "Artistic-License-2.0 + Forced-Fairplay-Constraints"
__copyright__ = "Copyright (C) 2010-2016 Arno-Can Uestuensoez" \
"@Ingenieurbuero Arno-Can Uestuensoez"
__version__ = '0.1.20'
__uuid__ = "4135ab0f-fbb8-45a2-a6b1-80d96c164b72"
__docformat__ = "restructuredtext en"
rebaseflags = re.X # @UndefinedVariable
if V3K:
rebaseflags |= re.ASCII # @UndefinedVariable
if V3K:
pathflags = rebaseflags | re.M | re.ASCII # @UndefinedVariable
pathflags = rebaseflags | re.M # @UndefinedVariable
#: First stage regexpr scanner for 'normpathx', also used in 'escapepathx'.
QUOTESCANNER = re.compile(r"""
(["]{3}[\x01-\xFF]*?["]{3}) # 1 quoted string by 3 double quotes(") - similar to Python
|([']{3}[\x01-\xFF]*?[']{3}) # 2 quoted string by 3 single quotes(') - similar to Python
""", pathflags)
[docs]def findrelpath_in_searchpath(spath, plist=None, **kargs):
"""Search for subdirectory trees *spath* of the paths
contained in *plist*. ::
MATCH : plist[x]/spath
supports *glob*.
A path to be hooked into 'plist[]' when present.
Could be either a literal, or a glob as an relative
or absolute path. Valid *spath* wildcard types:
| literal | X |
| re | -- |
| glob | X |
See common options for details. ::
spath := (literal|glob):
List of potential hooks for 'spath'.
The following formats are provided:
1. list of single paths - used literally
2. list of search path strings - each search path is split
3. string with search path - split into it's components
4. string with a single path - used literally
The default behavior is:
* first: (1.)
* second: (3.), this contains (4.)
The case (2.) has to be forced by the key-option: 'subsplit',
or to be prepared by the call 'clearpath(split=True,)'.
Due to performance the case #1 should be preferred in order
to save repetitive automatic conversion.
See common options for further details.
default := sys.path
Is a directory.
Is a file.
Is a symbolic link.
Has a symbolic link in path.
Ignore matches '< #idx',
return match '== #idx'. Depends on
'reverse' ::
matchidx := #idx:
default := 0 # first match
Suppress application of 'glob'.
Inverts to does not matched defined
Suppress normalization by call of
Reversed search order.
Splits each item part of a 'plist' option.
When successful returns the absolute pathname,
else 'None'. For a list refer to iterator.
passed through exceptions
if not spath:
if type(plist) is list:
elif plist is None:
plist = sys.path
elif type(plist) in ISSTR:
plist = splitapppathx(plist, apppre=False)
raise PathToolsError("Unknown type:" + str(plist))
if os.path.isabs(spath):
if os.path.exists(spath):
return spath
return None
raw = False
_rgx = False
_rev = False
matchidx = 0
_chkT = False
_isL = False
_isD = False
_isF = False
_isPL = False
_not = False
_ng = False
_ssplit = False
for k, v in kargs.items():
if k == 'matchidx':
if not type(v) is int or v < 0:
raise PathToolsError("Requires int>0 matchidx=" + str(v))
matchidx = v
elif k == 'not':
_not = v
elif k == 'raw':
raw = v
elif k == 'reverse':
_rev = v
elif k == 'noglob':
_ng = v
elif k == 'isLink':
_chkT = True
_isL = v
elif k == 'isDir':
_chkT = True
_isD = v
elif k == 'isFile':
_chkT = True
_isF = v
elif k == 'isPathByLink':
_chkT = True
elif k == 'subsplit':
_ssplit = True
raise PathToolsError("Unknown param: " + str(k) + ":" + str(v))
if _ssplit: # split sub paths, but do not alter the callers source
plist = plist[:]
clearpath(plist, split=True)
# use canonical copy
if not raw:
_sp = normpathx(spath, keepsep=True, apppre=False)
_sp = spath[:]
def _checkit(p):
_b = True
if _chkT:
if _isF and not os.path.isfile(p):
_b = False
elif _isD and not os.path.isdir(p):
_b = False
elif _isL and not os.path.islink(p):
_b = False
elif _isPL and not os.path.isfile(p):
_b = False
return _b
# short it up for absolute input of existing paths, thus is a literal too!
if os.path.isabs(_sp) and os.path.exists(_sp): # exists as absolute
_b = _checkit(_sp)
for p in plist:
if not p:
if p.startswith(_sp):
_b &= True
if _b and matchidx != 0:
_b = False
matchidx -= 1
return normpathx(_sp)
if _b and not _not:
return _sp
return None
if _rev: #: reverse => bottom up for sorted
_pl = reversed(plist)
_pl = plist
# now look for hooks of relative paths in plist
for p in _pl:
if not p:
_b = True
if os.path.isabs(_sp):
_px = normpathx(_sp)
_px = normpathx(os.path.abspath(p + os.sep + _sp), stripquote=True)
if os.path.exists(_px):
_b = _checkit(_px)
if _b and matchidx != 0:
_b = False
matchidx -= 1
if _b and not _not:
return _px
if not _ng:
# try a glob
for gm in glob.glob(_px):
_b = _checkit(gm)
if _b and matchidx != 0:
_b = False
matchidx -= 1
if _b and not _not:
return gm
return None
[docs]def findrelpath_in_searchpath_iter(spath, plist=None, **kargs):
"""Iterates all matches in plist, see *findrelpath_in_searchpath*.
if plist == None:
plist = sys.path
elif type(plist) in ISSTR:
plist = [plist]
for pl in plist:
matchidx = 0
kargs['matchidx'] = matchidx
while True:
r = findrelpath_in_searchpath(spath, [pl], **kargs)
if r:
yield r
kargs['matchidx'] += 1
[docs]def findrelpath_in_uppertree(spath, plist=None, **kargs):
"""Iterates all matches in plist, see findrelpath_in_searchpath.
return None
[docs]def findrelpath_in_uppertree_iter(spath, plist=None, **kargs):
"""Iterates all matches in plist, see *findrelpath_in_uppertree*.
if plist == None:
plist = sys.path
for pl in plist:
matchidx = 0
kargs['matchidx'] = matchidx
while True:
r = findrelpath_in_searchpath(spath, [pl], **kargs)
if r:
yield r
kargs['matchidx'] += 1
[docs]def findpattern(*srcdirs, **kargs):
"""Executes on each *srcdirs* a tree search with match and drop patterns.
The interface is basically similar to 'find' with enhanced match options.
The search operation relies for Python3.5+ on *os.scandir()* - for
Python2.7 on *scandir.scandir()*. The interface extends the lower layer
API by lists of filters with support for 'literal', 'glob' and/or
'regexpr' pattern.
The search algorithm is performed as:
0. If *srcdirs* is provided use each as starting file path,
else initialize based on current working directory.
1. Set behavior attributes, see options:
0. *listorder*
1. *followlinks*
2. *matchbehavior*
3. *matchcnt*
2. If no filter parameters are present return afterwards.
Else apply filter parameters. Each non-matching condition
continues with next filter cycle by default. When no match
the result is accepted, see options:
0. check *level*
1. check *blacklist*
2. check *whitelist*
3. check *dropdirs*
4. check *type*
3. Apply string manipulation, see options
a. check-cut *topcutlist*
The general behavior for the parameters is additive, this means
e.g. when provided *wildcards*, *globs*, *regexpr*, and *filter*
these are applied subsequently. Each match is added by default
to the result list. The drop parameters instantly remove the match
from the result list.
List of top level paths for search. Supports *literal*,
*re*, and *glob*.
default := *[<current-workdir>,]*
Additional control:
Defines the type of returned path value,
when present. ::
abs := (
True # Force absolute paths.
| False # Force relative paths.
default := "depends on type of input"
File system nodes to be dropped from the result.
Supports Python *re* expressions,
See `Variants of Pathname Parameters - Literals, RegExpr, and Glob <path_syntax.html#variants-of-pathname-parameters-literals-regexpr-and-glob>`_
default := None
File system nodes to be added to the result.
Supports Python *re* expressions,
See `Variants of Pathname Parameters - Literals, RegExpr, and Glob <path_syntax.html#variants-of-pathname-parameters-literals-regexpr-and-glob>`_
default := *\**
Follow symbolic links.
default := *False*
Group ID.
Depth of search. The values are: ::
None: Search the subtree unlimited.
0: Current directory only, basically
the same as *os.listdir()*.
>0: Sub-directories of given level.
default := *None*
Defines the selection of matches:
* *n<0*: the last *n*, last := *-1*.
* *n=0*: all
* *n>0*: the first *n*, first := *1*.
default := 0
Deletes for files the postfix.
Output format. Current available formats are: ::
output := (
OF_LIST_STR, # list of file system path names
OF_LIST_OID, # list of dotted object notation
OF_RAW # list of raw entries *DirEntry*, see scandir
Cut listed topmost path elements from
found list elements, resulting in relative
default := *['.' + os.sep]*
Defines the order or the resulting list, same as
*os.walk*. ::
topdown := (True | False)
default := True
Search and list selected types only.
The value is a bit-array. ::
default := *T_ALL*
User ID.
Results in an list of found entries. When none,
an empty list.
ret = [] # resolved result
i0 = 0 # reusable index - handle with care
i1 = 0 # reusable index - handle with care
i2 = 0 # reusable index - handle with care
if not srcdirs:
srcdirs = ('.')
_blacklist = None
_followlinks = False
_gid = []
_lvl = None
_matchbehavior = M_ALL | M_FILTPAR
_matchcnt = 0
_nopostfix = False
_output = OF_LIST_STR
_srcdirs = srcdirs
_topcutlist = ['.' + os.sep]
_topdown = L_TDOWN_WALK
_types = T_ALL
_uid = []
_whitelist = None
for k, v in kargs.items():
if k == 'types':
_types = v
elif k == 'level':
if v == None:
_lvl = -1
elif v >= 0:
_lvl = v
raise FileSysObjectsError("supported values for %s: None, x>=0, got: %s" % (str(k), str(v)))
elif k == 'listorder':
_topdown = v
elif k == 'followlinks':
_followlinks = v
elif k == 'matchbehavior':
_matchbehavior = v
elif k == 'matchcnt':
_matchcnt = v
elif k == 'nopostfix':
_nopostfix = v
elif k == 'topcutlist':
if type(v) in ISSTR:
_topcutlist = [v]
elif isinstance(v, (list, tuple)):
_topcutlist = v
elif k == 'blacklist':
if type(v) in ISSTR:
_blacklist = v
elif isinstance(v, (list, tuple)):
_blacklist = v
raise FileSysObjectsError("'%s' requires (str|list|tuple), got: %s" % (k, str(v)))
elif k == 'whitelist':
if type(v) in ISSTR:
_whitelist = v
elif isinstance(v, (list, tuple)):
_whitelist = v
raise FileSysObjectsError("'%s' requires (str|list|tuple), got: %s" % (k, str(v)))
elif k == 'gid':
_gid = v
elif k == 'uid':
_uid = v
elif k == 'output':
_output = v
raise PathToolsError("Unknown parameter:" + str(k) + ":" +
# monitor thresholds
_clvl = 0 # current level
_cmatch = 0 # current match count
# cache for resolved source paths
_srccache = []
for i0 in _srcdirs: # workout source dirs
i1 = expandpath(i0, wildcards=W_GLOB | W_RE)
if i1:
# whitelist
_wcache = None
if _whitelist:
# give it a try
_wcache = re.compile(normpathx(escapepathx(_whitelist), stripquote=True))
except TypeError:
_wcache = []
for x in _whitelist:
# pattern needs twice
_pat = normpathx(escapepathx(x), force=True, stripquote=True)
_pat = escapepathx(_pat, force=True)
except IndexError as e:
raise PathError(str(e) + '\n' + "requires (str | list | tuple), got: " + str(_whitelist))
except sre_constants.error as e:
if e.pattern is not None and e.pos is not None:
e.msg += '\n"re" compile-error:\n in: %s\n %s^\n => %s' % (
' ' * (e.pos),
raise PathError(str(e) + '\n' + e.msg)
# blacklist
_bcache = None
if _blacklist:
_bcache = re.compile(normapppathx(escapepathx(_blacklist), stripquote=True))
except TypeError:
_bcache = []
for x in _blacklist:
_bcache.append(re.compile(normapppathx(escapepathx(x), stripquote=True)))
except IndexError as e:
raise PathError(str(e) + '\n' + "requires (str | list | tuple), got: " + str(_blacklist))
except sre_constants.error as e:
if e.pattern is not None and e.pos is not None:
e.msg += '\n"re" compile-error:\n in: %s\n %s^\n => %s' % (
' ' * (e.pos),
raise PathError(str(e) + '\n' + e.msg)
def checknode(nx, **kargs):
"""Check a single node by prepared filter.
Directory entry: ::
Python2.7: *scandir.DirEntry*.
Python3.5+: *os.DirEntry*.
MATCH: paths
ELSE: None
ret = True
# blacklist
if _bcache:
if _bcache.match(nx.path):
return False
except AttributeError:
ret = True
for spx in _bcache:
if spx.match(nx.path):
return False
# whitelist
if _wcache:
if not _wcache.match(nx.path):
ret = False
except AttributeError:
ret = False
for spx in _wcache:
if spx.match(nx.path):
ret = True
if not ret:
return False
# check type
if _types ^ T_ALL:
ret = False
if _types & T_DIR and nx.is_dir(follow_symlinks=_followlinks):
ret = True
if _types & T_FILE and nx.is_file(follow_symlinks=_followlinks):
ret = True
if _types & T_SYML and nx.is_symlink():
ret = True
if _types & T_MNT and os.path.ismount(nx.path):
ret = True
si = nx.stat(follow_symlinks=_followlinks)
if _types & T_HARDL and si.st_nlink != 1:
ret = True
elif _types & T_DEV and si.st_dev:
ret = True
# elif _types & T_EXP and os.path.isfile(nx):
# res.append(nx)
# elif _types & T_LOCAL and os.path.isfile(nx):
# res.append(nx)
# elif _types & T_NODES and os.path.isfile(nx):
# res.append(nx)
elif _uid or _gid:
si = nx.stat(follow_symlinks=_followlinks)
if _uid and si.st_uid in (_uid):
ret = True
if _gid and si.st_gid in (_gid):
ret = True
return ret
def fetchnode(start, lvl=0):
if _lvl != None and lvl > _lvl:
_opwd = None
if abs:
# force absolute
start = os.path.abspath(start)
elif abs == False and start[0] == os.sep:
_opwd = os.path.curdir
os.path.curdir = start
for sx in scandir(start):
if checknode(sx):
if _output in (OF_LIST_STR, OF_LIST_OID):
# a. check-cut postfix
if _nopostfix and sx.is_dir():
px = os.path.splitext(sx.path)[0]
px = sx.path
# b. check-cut topcutlist
if _topcutlist:
for i2 in _topcutlist:
if px.startswith(i2):
px = px[len(i2):]
if _output == OF_LIST_OID:
elif _output == OF_LIST_RAW:
if sx.is_dir():
fetchnode(sx.path, lvl+1)
if _opwd:
os.path.curdir = _opwd
for s in _srccache: # workout top-root-list
return ret
# Pre-Reduce the result by glob for literal prefix of re.
# Match on the longest sub-part applicable as prefix for a glob
_glob_prefix = re.compile(r"""
(^[a-zA-Z]:) # 1 - DOS drive
|(\[[\^][^\]]*\][*]*) # 2 - [^...] it is a re
|(\[[^/\]]*\][*]*) # 3 - [...] a char class without separator
|(\[[!][^/\]]*[/][^\]]*\][*]*) # 4 - [!.../...] a non-def char class with separator
|(\[[/]*\][*]*) # 5 - [/] a char class with posix-separator ONLY
|(\[[^/\[\]]*[/][^/\[]*\][*]*) # 6 - [.../...] a char class with posix-separator
|(\[[\\\\]*\][*]*) # 7 - [\\\\] a char class with nt-separator ONLY
|(\[[^\\\\]*[\\\\][^\\\\]*\][*]*) # 8 - [...\\\\...] a char class with nt-separator
|(\([^)]*\)) # 9 - (......) a re-group
|([/]+) # 10 - '/' class-less n * posix-separator
|([\\\\]+) # 11 - '\\' class-less n * nt-separator
# 12 - a string containing '.*' and optional additional chars
|([*]) # 13 - a wildcard character
|([^/\\\\\[\]]+)(?=/|$) # 14 - any character until next shlash - separator for posix
|([^/\\\\\[\]]+)(?=[\\\\]|$) # 15 - any character until next back-slash - separator for win
|(\[) # 16 - likely to be a syntax error, but could be literal too
|(\]) # 17 - likely to be a syntax error, but could be literal too
|(.) # 18 - any literal character
|($) # 19 - end of string
""", re.X) # @UndefinedVariable
[docs]def stripquotes(path, scope=Q_ALL_TRIPLE):
"""Strips quotes.
Path to strip off of quotes.
Scope to be applied. ::
scope := (
Q_ALL_TRIPLE # " * 3 and ' * 3
Path without selected triple-quotes.
parts = ''
for it in QUOTESCANNER.finditer(path):
g = it.lastindex # PATHSCANNER ASCII_SC_CTRL
if it.group(g):
x = it.group(g)
if g == 1 and scope & Q_DOUBLE_TRIPLE:
_x = x[3:-3]
if not parts:
parts += _x
parts += _x
elif g == 2 and scope & Q_SINGLE_TRIPLE:
# ignore SonarLint here
_x = x[3:-3]
if not parts:
parts += _x
parts += _x
parts += x
return parts
[docs]def split_re_glob(expr, **kargs):
"""Splits a mixed path expression into the *glob* + *literal*
prefix and the regular expression *re* postfix. The cut for the
expression is done by grouping the expressions at the edge of
paths by matched free characters of path separators. The
detection of the first *re* is done by detecting an unambiguously
non-glob expression. These are considered as *re* expressions: ::
[^...] a exclusive character class
(...) a group
\[ or \] escaped square brackets
Due to the various permitted character ranges,
where e.g. POSIX allows almost any character these
maz still be valid characters. In this case they have
to be either escaped, or to be applied within character
The type resolution could in addition controlled by
the option *typeprio*, which redfines some compilation
See manual for further details.
expression to be split
Source platform, defines the separator for path items.
default := None
The priority type in case of ambiguity. ::
typeprio := (
W_GLOB # interprets as *glob*
| W_RE # interprets as *re*
default := W_RE
A list of two elements,
'L[0]- glob' and 'L[1]- re'
_res = [[],[]]
_buf = ['']
_state = 0 # has changed from *glob* to *re*
spf = kargs.get('spf')
if not spf:
spf = RTE
spf = rte2num[spf]
except KeyError:
raise FileSysObjectsError("Unknown source platform: " + str(spf))
typeprio = kargs.get('typeprio', W_RE)
for it in _glob_prefix.finditer(expr):
g = it.lastindex
# glob-prefix
if g in (
3, # class no sep
13, # wildcard char
_buf[-1] += it.group(g)
if g in (
12, # any-wildcard
_buf[-1] += it.group(g)
if typeprio & W_RE:
_state = 1
# any character until next shlash - separator for posix
elif g in (
14, # any char until next shlash
18, # 17 - any literal character
# is a path-item separator in any case - I am likely to ignore that POSIX allows slash as part of node name
_buf[-1] += it.group(g)
# any character before next back-slash - separator for win
elif g in (
15, # any char before next back-slash
if spf == RTE_WIN32:
# is a path-item separator for win
_buf[-1] += it.group(g)
# is a common character for path-item names
_buf[-1] += it.group(g)
# non-glob
elif g in (
2, # [^...]
4, # [^/]
16, # \[ basically a syntax error, but could be literal
17, # \] basically a syntax error, but could be literal
9, # (...) group
if not _state:
_buf = _buf[-1:]
_state = 1
_buf[-1] += it.group(g)
# free separators
elif g in (
10, # free n*posix-sep
if not _state:
if _buf[-1] != '':
_buf = ['']
elif _buf[-1] != '':
# free separators
elif g in (
11, # free n*nt-sep
if spf & RTE_WIN32:
if _buf[-1] != '':
_buf[-1] += it.group(g)
# separators in classes
elif g in (
5, # class with posix-sep [/]
# is a common character for path-item names
_buf[-1] += it.group(g)
# separators in classes
elif g in (
8, # class with nt-sep
# if spf == RTE_WIN32:
# # is a path-item separator for win
# _buf.append('')
# else:
# # is a common character for path-item names
# _buf[-1] += it.group(g)
_buf[-1] += it.group(g)
# dos drive
elif g in (
1, # DOS drive
_buf[-1] = it.group(g)
if len(_buf) > 1 and _buf[-1] == '':
_buf = _buf[:-1]
if _state:
# has re
_res[1] = _buf
# assumably did not match any re
return _res
[docs]def get_subpath_product(dirs, subpaths):
"""Creates the textual cartesian product of directories and relative sub-paths.
Addditional checks sould be applied by *clearpath()* and *expandpath()*.
The calls for the specific platforms notations: ::
cprod_posix = get_subpath_product(('/path/0', '/path/1'), ('suba', 'subb'))
cprod_win32 = get_subpath_product(('c:\path\0', 'd:\path\1'), ('suba', 'subb'))
results in: ::
cprod_posix == [
'/path/0/suba', '/path/0/subb',
'/path/1/suba' '/path/1/subb'
cprod_win32 == [
'c:\path\0\suba', 'c:\path\0\subb',
'd:\path\1\suba' 'd:\path\1\subb'
Base directories for the provided subdirectories.
Sub-paths, each to be located within each of the
directories *dirs*.
The list of path names resulting from the literal textual
cartesian product.
res = []
for x in itertools.product(dirs, subpaths):
if os.path.isabs(x[1]):
raise FileSysObjectsError("subpath is absolute:" + str(x))
return res
#: The scanner for the glob to re compiler.
_glob_to_re = re.compile(r"""
(["]{3}[\x01-\xFF]*?["]{3}) # 1 quoted string by 3 double quotes(") - similar to Python
|([']{3}[\x01-\xFF]*?[']{3}) # 2 quoted string by 3 single quotes(') - similar to Python
|(\[[!][^/\]]*[/][^\]]*\]) # 3 - [!.../...]
|(\[[!][^\]]*[^\]]*\]) # 4 - [!...]
|(\[[!][^/\]]*[*][^\]]*\]) # 5 - [!...*...]
|(\[[!][^/\]]*[.][^\]]*\]) # 6 - [!...'.'...]
|(\[[!][^/\]]*[?][^\]]*\]) # 7 - [!...'?'...]
|(\[[^/\]]*[/][^\]]*\]) # 8 - [.../...] - don't touch it
|(\[[^/\]]*[*][^\]]*\]) # 9 - [...*...] - don't touch it
|(\[[^/\]]*[.][^\]]*\]) # 10 - [...'.'...] - don't touch it
|(\[[^/\]]*[?][^\]]*\]) # 11 - [...'?'...] - don't touch it
|([\\\\][\\\\]) # 12 - escaped backslash
|([\\\\][*]) # 13 - escaped wildcard character
|([*]) # 14 - wildcard character
|([.]) # 15 - dot - for glob a literal dot
|([?]) # 16 - question mark - for glob any char
|(.) # 17 - any literal char
|($) # 18 - end of string
""", re.X) # @UndefinedVariable
[docs]def glob_to_re(expr, **kargs):
"""Compiles a *glob* to the corresponding *re*.
The following mapping is implemented:
| glob | re |
| \* | .* |
| [!x] | [^x] |
| . | [.] |
| ? | . |
A *glob* expression. The function relies on
the caller and 'blindly' assumes to process
a glob.
The platform defining the syntax domain
of *expr*.
The converted expression.
res = ''
spf = kargs.get('spf', RTE)
for it in _glob_to_re.finditer(expr):
g = it.lastindex
if g in (1, 2): # quotes
res += it.group(g)
if g in (3, 4, 5, 6, 7,): # exclusive classes
res += '[^' + it.group(g)[2:]
elif g in (8, 9, 10, 11,): # classes
res += it.group(g)
elif g in (12,): # '\\'
res += it.group(g)
elif g in (13,): # '\*'
if spf == RTE_WIN32:
res += '\.*'
# requires escaping when not wanted
res += it.group(g)
elif g == 14: # '*'
res += '.*'
elif g == 15: # '.'
res += '[.]'
elif g == 16: # '?'
res += '.'
elif g == 17: # <any-char>
res += it.group(g)
elif g == 18: # '$'
res += it.group(g)
return res
[docs]def splitre_separator(expr):
"""Splits a literal and/or wildcard expressions into it's
directory items. This provides for intermediate file sets
evaluated by directory-wise *glob* and *re*.
expression to be split
A list of partial wildcard expressions
res = ['']
for it in _glob_prefix.finditer(expr):
g = it.lastindex
if g in (
1, # 1 - DOS drive
2, # class no sep
3, # class no sep
4, # [^/]
9, # group
12, # wildcard char with '.*'
13, # wildcard char
14, # any character until next back-slash - separator for posix
15, # any character until next back-slash - separator for win
16, # \[ basically a syntax error, but could be literal
17, # \] basically a syntax error, but could be literal
18, # 17 - any literal character
res[-1] += it.group(g)
elif g in (
10, # free n*posix-sep
11, # free n*nt-sep
elif g in (
5, # class with posix-sep [/]
6, # class with posix-sep [/] only
7, # class with nt-sep only
8, # class with nt-sep
res[-1] += it.group(g)
if len(res) > 1 and res[-1] == '':
return res[:-1]
return res
[docs]def clearpath(plist=None, **kargs):
"""Clears, splits and joins a list of path variables by various criteria.
List of paths to be cleared.
See common options for details.
default := sys.path
Converts all entries into absolute pathnames.
This implies the activation of the options
*shellvars* and *uservars*.
default := False
Removes all existing items. For test
and verification.
default := False
Removes all items which do not exist.
default := False
Removes all items which are not redundant.
Results e.g. in multiple incarnations of the same
file/path type.
default := False
Calls 'os.path.normpath' on each result.
default := False
Clears all items from redundancies.
default := True
Converts all entries into relative pathnames.
default := False
This reverses the resulting search order
from bottom-up to top-down. Takes effect on
'redundant' only.
default := False
Replaces shell variables within pathnames. ::
$VAR, ${VAR}
default := False
Replaces special user variables as
path-prefix. Currently support: ::
~ (tilde)
default := False
Drops resulting empty items.
default := True
Forces split of multiple paths items within
one item into separate item entries.
default := True
Removes paired triple-quotes of protected/masked
string sections. ::
"/a/'''head:'''/c" => "/a/head:/c"
default := False
Performs any action for each
item of 'plist' only.
default := False
When successful returns 'True', else returns either 'False',
or raises an exception.
passed through exceptions:
if plist == None:
plist = sys.path
# _links = kargs.get('links', False)
_abs = kargs.get('abs', False)
_existent = kargs.get('existent', False)
_ne = kargs.get('nonexistent', False)
_normpath = kargs.get('normpath', False)
_nr = kargs.get('nonredundant', False)
_redundant = kargs.get('redundant', True)
_rel = kargs.get('rel', False)
_reverse = kargs.get('reverse', False)
_shrink = kargs.get('shrink', True)
_split = kargs.get('split', True)
_user = kargs.get('uservars', False)
_vars = kargs.get('shellvars', False)
_wio = kargs.get('withinItemOnly', False)
_stripquote = kargs.get('stripquote', True)
def clearIt(px, ref=None):
"""the actual workhorse
px: patch to process
ref: reference path
if _abs:
px = os.path.abspath(px)
if _existent and os.path.exists(px):
if _ne and not os.path.exists(px):
if _normpath:
px = os.path.normpath(px)
if _rel:
px = getpythonpath_rel(px, plist)
return px
def clrred(x):
"""clear redundancies"""
if x in clearpath._clearlst:
return x
# --------------------------------------------
if not _wio:
clearpath._clearlst = []
pn = plist[:] # input list
if _reverse: # revese input list
for p in range(len(plist)):
plist.pop() # clear source for new items - in place of caller
# split items into sub items as separate new items
if _split:
_pn = []
for p in pn:
if p:
for px in splitapppathx(p):
pn = _pn
# work out items
for p in pn: # each item
# within item only
if _wio:
clearpath._clearlst = []
pn = ''
# reverse order
if _reverse:
plx = splitapppathx(p, stripquote=_stripquote)
plx = splitapppathx(p, stripquote=_stripquote)
# clear redundancies
for p1 in plx:
if _vars or _abs:
p1 = os.path.expandvars(p1)
if _user or _abs:
p1 = os.path.expanduser(p1)
if _redundant:
px = clrred(clearIt(p1))
px = clearIt(p1)
if _shrink:
if px:
pn += os.pathsep + px
if px:
pn += os.pathsep + px
pn += os.pathsep
if pn:
pn = pn[1:]
if _reverse and pn:
# plx = pn.split(os.pathsep)
plx = splitapppathx(pn, stripquote=_stripquote)
pn = os.pathsep.join(plx)
# shrink
if _shrink:
if pn:
if _reverse:
[docs]def expandpath(*paths, **kargs):
"""Splits, expands and normalizes a list of paths and search paths.
**REMINDER**: Non-existent path entries are dropped.
The input list '*\*paths*' may contain mixed entries of:
* search paths - combined by *os.pathsep*
* wildcards-paths containing '*literal*', '*glob*', and '*regexpr*' parts
* inserted user directory
* inserted environment variables
* file and directory paths
The resulting normalized *list* contains in case of matches
one path in each entry by the following algorithm.
.. code-block:: text
Expand contained:
1. expand environment variables
2. expand user directory
3. check and use if exists, else continue
If wildcards are selected continue with
4. split and resolve paths and remove quotes - *stripquote*
5. expand *literals* and *globs*
6. expand *literals* and *regexpr*
A list of paths and search paths to be expanded. For
supported pattern of following table refer to parameter
| type | *W_LITERAL* | *W_GLOB* | *W_RE* | *W_FULL* |
| literal | X | X | X | X |
| glob | | X | | X |
| re | | | X | X |
default := *W_RE* # see parameter **wildcards**
Contained file name paths are cut to their *dirname*. ::
expandvars := (True | False)
default := *False*
Expand embedded environment variables. ::
expandvars := (True | False)
default := True
Expand embedded user directories. ::
expandvars := (True | False)
default := True
Returns directories only.
default := all
Returns files only.
default := all
Clear nested subdirectories ::
expandvars := (True | False)
when *True*, e.g. ::
/a/b:/a/b/c::/a/b/c => /a/b
default := *False*
Controls whether regexpr may span multiple
directories, thus handling os.path.sep as
ordinary chars.
This in particular controls the technical
implementation of the iterative path
resolution for paths by intermixed
* True:
The treatment of path seperators within
regexpr has to be assured by the caller.
* False:
The regular experessions are resolved
in chunks seperated by the path seperator.
Thus each chunk is technically wrapped by
the re-module:
'<regexpr>' + os.path.sep+'$'
Reduces by dropping redundancies. The strip parameter
influences the match of regular expressions, which just
do a pattern match, thus hit null-separator directories too.
The strip of these prevents from unwanted matches on separator
characters. ::
strip := (
True # clear null-separators
| False # no strip at all
| all # clear any redundancy
| contain # contained sub directories
| multiple # multiple occurance
default := True
Source platform, defines the input syntax domain.
For the syntax refer to API in the manual at :ref:`spf <OPTS_SPF>`.
For additi0onal details refer to
:ref:`tpf and spf <TPF_AND_SPF>`,
`paths.getspf() <paths.html#getspf>`_,
:ref:`normapppathx() <def_normapppathx>`,
`normpathx() <paths.html#normpathx>`_.
Controls the type of path evaluation.
Supported *paths* values types are: ::
wildcards := (
W_LITERAL # literal existence check
| W_GLOB # globs, contains W_LITERAL
| W_RE # re, contains W_GLOB
| W_FULL # re
Expects mixed path names with *literals*, *globs*, and *regexpr*.
default := *W_RE*
In case of success a list with directory entries with
splitted search paths. An empty list when no results.
res = []
_dironly = kargs.get('dironly')
_escape = kargs.get('escape', False)
_expanduser = kargs.get('expanduser', True)
_expandvars = kargs.get('expandvars', True)
_isdir = kargs.get('isDir', None)
_isfile = kargs.get('isFile', None)
_nestclear = kargs.get('nestclear')
_spf = kargs.get('spf')
_splitglobregexpr = kargs.get('splitglobregexpr')
_tpf = kargs.get('tpf')
_wildcards = kargs.get('wildcards', W_RE)
_tsep, _tpsep, tpf, _tpfn, _apre = filesysobjects.paths.gettpf(_tpf)
_strip = kargs.get('strip', True)
if _strip not in (True, False, 'all', 'contain', 'multiple'):
raise PathToolsError("ERROR:Unknow param: strip=" + str(_strip))
_pl = list(paths)
clearpath(_pl, stripquote=False)
if _escape:
_pl = [escapepathx(x) for x in _pl]
# split search lists of contained multiple paths
_pl = []
for _plx in paths:
_plx = splitapppathx(_plx, stripquote=False, spf=_spf, tpf=_tpf)
clearpath(_pl, stripquote=False)
if _escape:
_pl = [escapepathx(x) for x in _pl]
for pi in _pl: # list of paths
# *** 1. expand environment variables
if _expandvars:
pi = os.path.expandvars(pi)
# *** 2. expand user directory
if _expanduser:
pi = os.path.expanduser(pi)
# 3. *** check and use if exists, else continue
if os.path.exists(pi):
# no need for search
if _wildcards == W_LITERAL_QUOTED: # literal with quotes
lppath = normapppathx(
pi, stripquote=True, strip=False, spf=_spf, tpf=_tpf,
if os.path.exists(lppath):
elif _wildcards: # requires recursive file system evaluation -> performance degradation
# *** 4. split, strip and unquote
lppath = list(splitapppathx(pi, stripquote=True, spf=_spf, tpf=_tpf))
for i in reversed(lppath):
_gr = split_re_glob(i)
_ix = []
if _gr[1]:
# the non-glob-part for '*-globbing' and post-filtering
if len(i) > 1:
_ind = lppath.index(i)
for _ixx in reversed(_ix):
lppath.insert(_ind, _ixx)
elif i == '':
if _strip:
_ind = lppath.index(i)
if _ind:
# first is root-slash
_idx = _lenppath = len(lppath) # abort endless recursion
# ** now we have the non-quoted effective path expression
# *** 5. expand literals and globs
pparts = '' #: largest-glob
gparts = ''
globtore = False
if lppath:
# re present
if _gr[0]:
# the glob part for explicit 'globbing'
if RTE & RTE_WIN32:
pparts = _tsep.join(_gr[0])
pparts = _tsep + _tsep.join(_gr[0])
gparts = pparts
elif _gr[0]:
# glob only
if _gr[0] != '':
if RTE & RTE_WIN32:
lppath = [_tsep.join(_gr[0])]
lppath = [_tsep + _tsep.join(_gr[0])]
lppath = [_tsep.join(_gr[0])]
pparts = glob_to_re(pparts)
_search_set_next = []
for px in lppath: # 3. expand literals and *globs* - walk the path top-down
_idx -= 1
# includes Posix-netapp and MS-shares
if px == '' and _idx > 0 and _strip:
elif pparts == _tsep and px != '':
_p = pparts + px
if RTE & RTE_WIN32:
if px[1] == ':':
_p = px
_p = pparts + _tsep + px
except IndexError:
_p = pparts + _tsep + px
elif pparts:
_p = pparts + _tsep + px
_p = px
if _idx > 0:
# make match criteria a bit stronger
_p += _tsep
if _wildcards & W_RE:
# try to resolve ambiguity between DSL, of e.g. [^x] and [!x]
# priority on W_RE here
_subre = split_re_glob(_p)
if _debug > 3:
print("DBG:subre = " + str(_subre))
if not _subre[1]:
# glob/literal only
# check match by glob for current path-part
_lglob = glob.iglob(_p) # check only for a match
_pi = next(_lglob) # test a match
pparts = _p
gparts = _p
except StopIteration:
gparts += _tsep + '***'
_pi = None # found first segment that did not match
# has *re*
_head = False
_lpre = len(_tsep.join(_subre[0]))
for chk in _subre[1]:
_lpre += len(_tsep + chk)
if chk and chk[0] in ('*',):
if not _head:
_head = True
'WARNING:Leading wildcard without char for re:\n'
' -> %s\n'
sys.stderr.write(' ' * _lpre + '^\n')
# has a non-glob/non-literal postfix, so search and filter
gparts += _tsep + '***'
_pi = None # found first segment that did not match
pparts += _tsep + px
if _pi: # current segment is literal part of the path
if _idx <= 0: # if current set are leafs
res.append(_pi) # add checked leaf
for _lg in _lglob: # collect remaining leafs
try: # check match by glob for current path-part
_lglob = glob.iglob(_p) # check only for a match
_pi = next(_lglob) # test a match
pparts = _p
gparts = _p
except StopIteration:
gparts += _tsep + '***'
_pi = None # found first segment that did not match
if _pi: # current segment is literal part of the path
if _idx <= 0: # if current set are leafs
res.append(_pi) # add checked leaf
for _lg in _lglob: # collect remaining leafs
if _wildcards & W_RE: # current level is possibly matched by expansion
# entering next level of expansion
# *** 6. expand regexpr
_s = _search_set_next[:] # use search set from previous - if there
_sext = glob.glob(gparts) # expand globs for post matching of re
_s.extend(_sext) # current roundexpanded
except StopIteration:
if gparts.endswith('***') and (px.startswith('.') or px.startswith('[.]')): # is shifted glob for re
_sext = glob.glob(os.path.dirname(gparts) + _tsep + '.*')
_s.extend(_sext) # current round expanded
except StopIteration:
_search_set_next = [] # prepare next round
matched = False
for z in _s:
if not _tpfn & RTE:
z = normpathx(z, tpf=_tpfn)
if pparts[-1] != r'$':
a = escapepathx(pparts + r'$', force=True)
a = escapepathx(pparts, force=True)
a = escapepathx(a, force=True)
b = escapepathx(z, force=True)
if not re.match(a, b):
if _debug > 3:
print("DBG:current-raw = " + str(z))
print("DBG:current-esc = " + str(b))
print("DBG:regexpr = " + str(a))
if _debug > 3:
print("DBG:current-raw = " + str(z))
print("DBG:matched by = " + str(pparts))
_search_set = glob.glob(z) # pick up first search subset
while _search_set:
p0 = _search_set.pop(0) # next item from search list
p0d = os.path.dirname(p0) # parent node for appending re
if os.path.isfile(p0): # nothing more to traverse
# it is a potential regexpr
# try to search for paths matching the LHS-part of
# the maximum of the remaining path by regexpr
_pat = escapepathx(p0d + _tsep + px + r'$', force=True)
_pat = escapepathx(_pat, force=True, charback=True)
if not _idx > 0 and re.match(_pat, escapepathx(p0, force=True)):
if _debug > 3:
print("DBG:res[i] = " + str(p0))
matched = True
except sre_constants.error as e:
if e.pattern is not None and e.pos is not None:
e.msg += '\ncompile-error:\n in: %s\n %s^\n => %s' % (
str(escapepathx(p0d + _tsep + px + r'$')),
' ' * (e.pos+1),
str(escapepathx(p0d + _tsep + px + r'$')[:e.pos+1])
raise PathError(str(e) + '\n' + e.msg)
_pat = escapepathx(p0d + _tsep + px + r'$', force=True)
_pat = escapepathx(_pat, force=True, charback=True)
if re.match(_pat, escapepathx(p0, force=True)): # check re match
if _idx <= 0: # it's a leaf
matched = True
else: # intermediate branch, set it for next round
except sre_constants.error as e:
if e.pattern is not None and e.pos is not None:
e.msg += '\ncompile-error:\n in: %s\n %s^\n => %s' % (
str(escapepathx(p0d + _tsep + px + r'$')),
' ' * (e.pos+1),
str(escapepathx(p0d + _tsep + px + r'$')[:e.pos+1])
raise PathError(str(e) + '\n' + e.msg)
if not matched and not _search_set_next:
# literal W_LITERAL
if os.path.exists(pi):
# normalize the result
res = [filesysobjects.apppaths.normpathx(x, tpf=_tpf,) for x in res]
# TODO: add app and glob/re-support
if _dironly:
_tpfn = filesysobjects.paths.gettpf(_tpf)[3]
if _tpfn & RTE_WIN32:
import ntpath
for rx in range(len(res)):
if not _tsep.isdir(res[rx]):
res[rx] = ntpath.dirname(res[rx], _tpf, **kargs)
if not res[rx]:
elif _tpfn & RTE_POSIX:
import posixpath
for rx in range(len(res)):
if not os.path.isdir(res[rx]):
res[rx] = posixpath.dirname(res[rx])
if not res[rx]:
for rx in range(len(res)):
if not os.path.isdir(res[rx]):
res[rx] = posixpath.dirname(res[rx])
res[rx] = os.path.dirname(res[rx])
if not res[rx]:
# TODO: integrate into the algorithm
if _isdir or _isfile:
_res = []
for r in res:
if _isdir and os.path.isdir(r):
elif _isfile and os.path.isfile(r):
res = _res
return res