|
| 1 | +""" |
| 2 | +Lib/ctypes.util.find_library() support for AIX |
| 3 | +Similar approach as done for Darwin support by using separate files |
| 4 | +but unlike Darwin - no extension such as ctypes.macholib.* |
| 5 | +
|
| 6 | +dlopen() is an interface to AIX initAndLoad() - primary documentation at: |
| 7 | +https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/dlopen.htm |
| 8 | +https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/load.htm |
| 9 | +
|
| 10 | +AIX supports two styles for dlopen(): svr4 (System V Release 4) which is common on posix |
| 11 | +platforms, but also a BSD style - aka SVR3. |
| 12 | +
|
| 13 | +From AIX 5.3 Difference Addendum (December 2004) |
| 14 | +2.9 SVR4 linking affinity |
| 15 | +Nowadays, there are two major object file formats used by the operating systems: |
| 16 | +XCOFF: The COFF enhanced by IBM and others. The original COFF (Common |
| 17 | +Object File Format) was the base of SVR3 and BSD 4.2 systems. |
| 18 | +ELF: Executable and Linking Format that was developed by AT&T and is a |
| 19 | +base for SVR4 UNIX. |
| 20 | +
|
| 21 | +While the shared library content is identical on AIX - one is located as a filepath name |
| 22 | +(svr4 style) and the other is located as a member of an archive (and the archive |
| 23 | +is located as a filepath name). |
| 24 | +
|
| 25 | +The key difference arises when supporting multiple abi formats (i.e., 32 and 64 bit). |
| 26 | +For svr4 either only one ABI is supported, or there are two directories, or there |
| 27 | +are different file names. The most common solution for multiple ABI is multiple |
| 28 | +directories. |
| 29 | +
|
| 30 | +For the XCOFF (aka AIX) style - one directory (one archive file) is sufficient |
| 31 | +as multiple shared libraries can be in the archive - even sharing the same name. |
| 32 | +In documentation the archive is also referred to as the "base" and the shared |
| 33 | +library object is referred to as the "member". |
| 34 | +
|
| 35 | +For dlopen() on AIX (read initAndLoad()) the calls are similiar. |
| 36 | +Default activity occurs when no path information is provided. When path |
| 37 | +information is provided dlopen() does not search any other directories. |
| 38 | +
|
| 39 | +For SVR4 - the shared library name is the name of the file expected: libFOO.so |
| 40 | +For AIX - the shared library is expressed as base(member). The search is for the |
| 41 | +base (e.g., libFOO.a) and once the base is found the shared library - identified by |
| 42 | +member (e.g., libFOO.so, or shr.o) is located and loaded. |
| 43 | +
|
| 44 | +The mode bit RTLD_MEMBER tells initAndLoad() that it needs to use the AIX (SVR3) |
| 45 | +naming style. |
| 46 | +""" |
| 47 | +__author__ = "Michael Felt <[email protected]>" |
| 48 | + |
| 49 | +import re |
| 50 | +from os import environ, path |
| 51 | +from sys import executable |
| 52 | +from ctypes import c_void_p, sizeof |
| 53 | +from subprocess import Popen, PIPE, DEVNULL |
| 54 | + |
| 55 | +# Executable bit size - 32 or 64 |
| 56 | +# Used to filter the search in an archive by size, e.g., -X64 |
| 57 | +AIX_ABI = sizeof(c_void_p) * 8 |
| 58 | + |
| 59 | + |
| 60 | +from sys import maxsize |
| 61 | +def _last_version(libnames, sep): |
| 62 | + def _num_version(libname): |
| 63 | + # "libxyz.so.MAJOR.MINOR" => [MAJOR, MINOR] |
| 64 | + parts = libname.split(sep) |
| 65 | + nums = [] |
| 66 | + try: |
| 67 | + while parts: |
| 68 | + nums.insert(0, int(parts.pop())) |
| 69 | + except ValueError: |
| 70 | + pass |
| 71 | + return nums or [maxsize] |
| 72 | + return max(reversed(libnames), key=_num_version) |
| 73 | + |
| 74 | +def get_ld_header(p): |
| 75 | + # "nested-function, but placed at module level |
| 76 | + ld_header = None |
| 77 | + for line in p.stdout: |
| 78 | + if line.startswith(('/', './', '../')): |
| 79 | + ld_header = line |
| 80 | + elif "INDEX" in line: |
| 81 | + return ld_header.rstrip('\n') |
| 82 | + return None |
| 83 | + |
| 84 | +def get_ld_header_info(p): |
| 85 | + # "nested-function, but placed at module level |
| 86 | + # as an ld_header was found, return known paths, archives and members |
| 87 | + # these lines start with a digit |
| 88 | + info = [] |
| 89 | + for line in p.stdout: |
| 90 | + if re.match("[0-9]", line): |
| 91 | + info.append(line) |
| 92 | + else: |
| 93 | + # blank line (seperator), consume line and end for loop |
| 94 | + break |
| 95 | + return info |
| 96 | + |
| 97 | +def get_ld_headers(file): |
| 98 | + """ |
| 99 | + Parse the header of the loader section of executable and archives |
| 100 | + This function calls /usr/bin/dump -H as a subprocess |
| 101 | + and returns a list of (ld_header, ld_header_info) tuples. |
| 102 | + """ |
| 103 | + # get_ld_headers parsing: |
| 104 | + # 1. Find a line that starts with /, ./, or ../ - set as ld_header |
| 105 | + # 2. If "INDEX" in occurs in a following line - return ld_header |
| 106 | + # 3. get info (lines starting with [0-9]) |
| 107 | + ldr_headers = [] |
| 108 | + p = Popen(["/usr/bin/dump", "-X%s" % AIX_ABI, "-H", file], |
| 109 | + universal_newlines=True, stdout=PIPE, stderr=DEVNULL) |
| 110 | + # be sure to read to the end-of-file - getting all entries |
| 111 | + while True: |
| 112 | + ld_header = get_ld_header(p) |
| 113 | + if ld_header: |
| 114 | + ldr_headers.append((ld_header, get_ld_header_info(p))) |
| 115 | + else: |
| 116 | + break |
| 117 | + p.stdout.close() |
| 118 | + p.wait |
| 119 | + return ldr_headers |
| 120 | + |
| 121 | +def get_shared(ld_headers): |
| 122 | + """ |
| 123 | + extract the shareable objects from ld_headers |
| 124 | + character "[" is used to strip off the path information. |
| 125 | + Note: the "[" and "]" characters that are part of dump -H output |
| 126 | + are not removed here. |
| 127 | + """ |
| 128 | + shared = [] |
| 129 | + for (line, _) in ld_headers: |
| 130 | + # potential member lines contain "[" |
| 131 | + # otherwise, no processing needed |
| 132 | + if "[" in line: |
| 133 | + # Strip off trailing colon (:) |
| 134 | + shared.append(line[line.index("["):-1]) |
| 135 | + return shared |
| 136 | + |
| 137 | +def get_one_match(expr, lines): |
| 138 | + """ |
| 139 | + Must be only one match, otherwise result is None. |
| 140 | + When there is a match, strip leading "[" and trailing "]" |
| 141 | + """ |
| 142 | + # member names in the ld_headers output are between square brackets |
| 143 | + expr = r'\[(%s)\]' % expr |
| 144 | + matches = list(filter(None, (re.search(expr, line) for line in lines))) |
| 145 | + if len(matches) == 1: |
| 146 | + return matches[0].group(1) |
| 147 | + else: |
| 148 | + return None |
| 149 | + |
| 150 | +# additional processing to deal with AIX legacy names for 64-bit members |
| 151 | +def get_legacy(members): |
| 152 | + """ |
| 153 | + This routine provides historical aka legacy naming schemes started |
| 154 | + in AIX4 shared library support for library members names. |
| 155 | + e.g., in /usr/lib/libc.a the member name shr.o for 32-bit binary and |
| 156 | + shr_64.o for 64-bit binary. |
| 157 | + """ |
| 158 | + if AIX_ABI == 64: |
| 159 | + # AIX 64-bit member is one of shr64.o, shr_64.o, or shr4_64.o |
| 160 | + expr = r'shr4?_?64\.o' |
| 161 | + member = get_one_match(expr, members) |
| 162 | + if member: |
| 163 | + return member |
| 164 | + else: |
| 165 | + # 32-bit legacy names - both shr.o and shr4.o exist. |
| 166 | + # shr.o is the preffered name so we look for shr.o first |
| 167 | + # i.e., shr4.o is returned only when shr.o does not exist |
| 168 | + for name in ['shr.o', 'shr4.o']: |
| 169 | + member = get_one_match(re.escape(name), members) |
| 170 | + if member: |
| 171 | + return member |
| 172 | + return None |
| 173 | + |
| 174 | +def get_version(name, members): |
| 175 | + """ |
| 176 | + Sort list of members and return highest numbered version - if it exists. |
| 177 | + This function is called when an unversioned libFOO.a(libFOO.so) has |
| 178 | + not been found. |
| 179 | +
|
| 180 | + Versioning for the member name is expected to follow |
| 181 | + GNU LIBTOOL conventions: the highest version (x, then X.y, then X.Y.z) |
| 182 | + * find [libFoo.so.X] |
| 183 | + * find [libFoo.so.X.Y] |
| 184 | + * find [libFoo.so.X.Y.Z] |
| 185 | +
|
| 186 | + Before the GNU convention became the standard scheme regardless of |
| 187 | + binary size AIX packagers used GNU convention "as-is" for 32-bit |
| 188 | + archive members but used an "distinguishing" name for 64-bit members. |
| 189 | + This scheme inserted either 64 or _64 between libFOO and .so |
| 190 | + - generally libFOO_64.so, but occasionally libFOO64.so |
| 191 | + """ |
| 192 | + # the expression ending for versions must start as |
| 193 | + # '.so.[0-9]', i.e., *.so.[at least one digit] |
| 194 | + # while multiple, more specific expressions could be specified |
| 195 | + # to search for .so.X, .so.X.Y and .so.X.Y.Z |
| 196 | + # after the first required 'dot' digit |
| 197 | + # any combination of additional 'dot' digits pairs are accepted |
| 198 | + # anything more than libFOO.so.digits.digits.digits |
| 199 | + # should be seen as a member name outside normal expectations |
| 200 | + exprs = [r'lib%s\.so\.[0-9]+[0-9.]*' % name, |
| 201 | + r'lib%s_?64\.so\.[0-9]+[0-9.]*' % name] |
| 202 | + for expr in exprs: |
| 203 | + versions = [] |
| 204 | + for line in members: |
| 205 | + m = re.search(expr, line) |
| 206 | + if m: |
| 207 | + versions.append(m.group(0)) |
| 208 | + if versions: |
| 209 | + return _last_version(versions, '.') |
| 210 | + return None |
| 211 | + |
| 212 | +def get_member(name, members): |
| 213 | + """ |
| 214 | + Return an archive member matching the request in name. |
| 215 | + Name is the library name without any prefix like lib, suffix like .so, |
| 216 | + or version number. |
| 217 | + Given a list of members find and return the most appropriate result |
| 218 | + Priority is given to generic libXXX.so, then a versioned libXXX.so.a.b.c |
| 219 | + and finally, legacy AIX naming scheme. |
| 220 | + """ |
| 221 | + # look first for a generic match - prepend lib and append .so |
| 222 | + expr = r'lib%s\.so' % name |
| 223 | + member = get_one_match(expr, members) |
| 224 | + if member: |
| 225 | + return member |
| 226 | + elif AIX_ABI == 64: |
| 227 | + expr = r'lib%s64\.so' % name |
| 228 | + member = get_one_match(expr, members) |
| 229 | + if member: |
| 230 | + return member |
| 231 | + # since an exact match with .so as suffix was not found |
| 232 | + # look for a versioned name |
| 233 | + # If a versioned name is not found, look for AIX legacy member name |
| 234 | + member = get_version(name, members) |
| 235 | + if member: |
| 236 | + return member |
| 237 | + else: |
| 238 | + return get_legacy(members) |
| 239 | + |
| 240 | +def get_libpaths(): |
| 241 | + """ |
| 242 | + On AIX, the buildtime searchpath is stored in the executable. |
| 243 | + as "loader header information". |
| 244 | + The command /usr/bin/dump -H extracts this info. |
| 245 | + Prefix searched libraries with LD_LIBRARY_PATH (preferred), |
| 246 | + or LIBPATH if defined. These paths are appended to the paths |
| 247 | + to libraries the python executable is linked with. |
| 248 | + This mimics AIX dlopen() behavior. |
| 249 | + """ |
| 250 | + libpaths = environ.get("LD_LIBRARY_PATH") |
| 251 | + if libpaths is None: |
| 252 | + libpaths = environ.get("LIBPATH") |
| 253 | + if libpaths is None: |
| 254 | + libpaths = [] |
| 255 | + else: |
| 256 | + libpaths = libpaths.split(":") |
| 257 | + objects = get_ld_headers(executable) |
| 258 | + for (_, lines) in objects: |
| 259 | + for line in lines: |
| 260 | + # the second (optional) argument is PATH if it includes a / |
| 261 | + path = line.split()[1] |
| 262 | + if "/" in path: |
| 263 | + libpaths.extend(path.split(":")) |
| 264 | + return libpaths |
| 265 | + |
| 266 | +def find_shared(paths, name): |
| 267 | + """ |
| 268 | + paths is a list of directories to search for an archive. |
| 269 | + name is the abbreviated name given to find_library(). |
| 270 | + Process: search "paths" for archive, and if an archive is found |
| 271 | + return the result of get_member(). |
| 272 | + If an archive is not found then return None |
| 273 | + """ |
| 274 | + for dir in paths: |
| 275 | + # /lib is a symbolic link to /usr/lib, skip it |
| 276 | + if dir == "/lib": |
| 277 | + continue |
| 278 | + # "lib" is prefixed to emulate compiler name resolution, |
| 279 | + # e.g., -lc to libc |
| 280 | + base = 'lib%s.a' % name |
| 281 | + archive = path.join(dir, base) |
| 282 | + if path.exists(archive): |
| 283 | + members = get_shared(get_ld_headers(archive)) |
| 284 | + member = get_member(re.escape(name), members) |
| 285 | + if member != None: |
| 286 | + return (base, member) |
| 287 | + else: |
| 288 | + return (None, None) |
| 289 | + return (None, None) |
| 290 | + |
| 291 | +def find_library(name): |
| 292 | + """AIX implementation of ctypes.util.find_library() |
| 293 | + Find an archive member that will dlopen(). If not available, |
| 294 | + also search for a file (or link) with a .so suffix. |
| 295 | +
|
| 296 | + AIX supports two types of schemes that can be used with dlopen(). |
| 297 | + The so-called SystemV Release4 (svr4) format is commonly suffixed |
| 298 | + with .so while the (default) AIX scheme has the library (archive) |
| 299 | + ending with the suffix .a |
| 300 | + As an archive has multiple members (e.g., 32-bit and 64-bit) in one file |
| 301 | + the argument passed to dlopen must include both the library and |
| 302 | + the member names in a single string. |
| 303 | +
|
| 304 | + find_library() looks first for an archive (.a) with a suitable member. |
| 305 | + If no archive+member pair is found, look for a .so file. |
| 306 | + """ |
| 307 | + |
| 308 | + libpaths = get_libpaths() |
| 309 | + (base, member) = find_shared(libpaths, name) |
| 310 | + if base != None: |
| 311 | + return "%s(%s)" % (base, member) |
| 312 | + |
| 313 | + # To get here, a member in an archive has not been found |
| 314 | + # In other words, either: |
| 315 | + # a) a .a file was not found |
| 316 | + # b) a .a file did not have a suitable member |
| 317 | + # So, look for a .so file |
| 318 | + # Check libpaths for .so file |
| 319 | + # Note, the installation must prepare a link from a .so |
| 320 | + # to a versioned file |
| 321 | + # This is common practice by GNU libtool on other platforms |
| 322 | + soname = "lib%s.so" % name |
| 323 | + for dir in libpaths: |
| 324 | + # /lib is a symbolic link to /usr/lib, skip it |
| 325 | + if dir == "/lib": |
| 326 | + continue |
| 327 | + shlib = path.join(dir, soname) |
| 328 | + if path.exists(shlib): |
| 329 | + return soname |
| 330 | + # if we are here, we have not found anything plausible |
| 331 | + return None |
0 commit comments