diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml new file mode 100644 index 000000000..da604b693 --- /dev/null +++ b/.github/workflows/unit-tests.yml @@ -0,0 +1,44 @@ +# Run unit tests (GitHub action) + +name: unit tests + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + + unit-tests-Ubuntu-jdk-17: + + runs-on: ubuntu-latest + + steps: + - run: echo "Branch ${{ github.ref }} of repository ${{ github.repository }}." + + - uses: actions/checkout@v3 + - uses: gradle/wrapper-validation-action@v1 + + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: 'temurin' + java-version: '17' + + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + # This has to match the language version we're targeting + python-version: '3.11' + + - name: Unit test with Gradle + run: ./gradlew --no-daemon test + + - name: Clean up Gradle cache + # Remove some files from the Gradle cache, so they aren't cached + # by GitHub Actions. Restoring these files from a GitHub Actions + # cache might cause problems for future builds. + #https://docs.github.com/en/actions/guides/building-and-testing-java-with-gradle#caching-dependencies + run: | + rm -f ~/.gradle/caches/modules-2/modules-2.lock + rm -f ~/.gradle/caches/modules-2/gc.properties diff --git a/.gitignore b/.gitignore index 636281998..a875f80a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,43 +1,62 @@ +# .gitignore: things not to put under source control + +# Places and extensions for unmanaged tools and scraps +/local/ +/scraps/ +/temp/ +*.sav* +*.patch + +# Project specific structure +**/bin/ +**/venv/ + +# Generated during build +**/build/ +**/_build/ +**/_static/ +**/_templates/ + +# Droppings of various tools, not for the record +**/.gradle/ +**/__pycache__/ +.AppleDouble +.DS_Store *.class +*.log *.pyc *.pyd *.pyo *.orig *.rej *.swp +*.tmp \#* *~ -.gradle +# +# IDE Files. (We don't check them in: make your own!) +# + +# Eclipse +.classpath +.externalToolBuilders/ +.project +.settings/ +.pydevproject # IntelliJ files *.eml *.ipr *.iml *.iws -.idea/* +.idea/ -# Eclipse files -.classpath -.externalToolBuilders/* -.project -.pydevproject - -# Netbeans files -nbproject -nbbuild.xml - -.vagrant - -.AppleDouble -.DS_Store -.settings -__pycache__ -bin - -# Jython specific +# +# Jython 2 specific: retire these when we can (or promote them to 3) +# +.hg* ant.properties -build build2 cachedir .jython_cache @@ -49,3 +68,15 @@ $test_* profile.txt out + +# Stuff dropped by bug tests (just in case that ant target is used) +bugtests/support_config.py +bugtests/test392m.py +bugtests/*.err +bugtests/*.out +bugtests/*.zip +bugtests/**/*.jar +bugtests/test*jar +bugtests/test*cache/ +bugtests/test*javapackage/ + diff --git a/.hgignore b/.hgignore deleted file mode 100644 index d02b1ae6c..000000000 --- a/.hgignore +++ /dev/null @@ -1,45 +0,0 @@ -syntax: glob -*.class -*.pyc -*.pyd -*.pyo -*.orig -*.rej -*.swp -\#* -*~ - -.gradle - -# IntelliJ files -*.eml -*.ipr -*.iml -*.iws -.idea/* - -# Eclipse files -.classpath -.externalToolBuilders/* -.project -.pydevproject - -.AppleDouble -.DS_Store -.settings -__pycache__ -bin - -# Jython specific -ant.properties -build -build2 -cachedir -.jython_cache -dist -publications -reports -jffi*.dll - -profile.txt -out diff --git a/.hgsub b/.hgsub deleted file mode 100644 index e69de29bb..000000000 diff --git a/.hgsubstate b/.hgsubstate deleted file mode 100644 index e69de29bb..000000000 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 792a10bf2..000000000 --- a/.travis.yml +++ /dev/null @@ -1,53 +0,0 @@ -language: java - -addons: - hostname: jyshort - - homebrew: # only active on osx - update: true - packages: - - ant - - -matrix: - include: - - os: linux - dist: xenial - addons: - apt: - packages: - - ant - jdk: openjdk8 - - - os: linux - dist: xenial - addons: - apt: - packages: - - ant - jdk: openjdk11 - - - os: linux - dist: xenial - addons: - apt: - packages: - - ant - jdk: openjdk12 - - - os: osx - osx_image: xcode9.3 # Last supporting Java 8 - jdk: oraclejdk8 - - -install: - - ant developer-build - - -script: - - ant regrtest-travis - -notifications: - email: - recipients: - - jython-dev@lists.sourceforge.net diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 36a45e9d4..0c01b30a1 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -409,25 +409,44 @@ def test_universal_newlines_communicate(self): # Interpreter without universal newline support self.assertEqual(stdout, "line1\nline2\rline3\r\nline4\r\nline5\nline6") + @unittest.skipIf(jython, "file descriptor limit not reached on Jython") def test_no_leaking(self): # Make sure we leak no resources - if not hasattr(test_support, "is_resource_enabled") \ - or test_support.is_resource_enabled("subprocess") and not mswindows \ - and not jython: + if not mswindows: max_handles = 1026 # too much for most UNIX systems else: - # Settle for 65 on jython: spawning jython processes takes a - # long time - max_handles = 65 - for i in range(max_handles): - p = subprocess.Popen([sys.executable, "-c", - "import sys;sys.stdout.write(sys.stdin.read())"], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - data = p.communicate("lime")[0] - self.assertEqual(data, "lime") - + max_handles = 2050 # too much for (at least some) Windows setups + handles = [] + try: + for i in range(max_handles): + try: + handles.append(os.open(test_support.TESTFN, + os.O_WRONLY | os.O_CREAT)) + except OSError as e: + if e.errno != errno.EMFILE: + raise + break + else: + self.skipTest("failed to reach the file descriptor limit " + "(tried %d)" % max_handles) + # Close a couple of them (should be enough for a subprocess) + for i in range(10): + os.close(handles.pop()) + # Loop creating some subprocesses. If one of them leaks some fds, + # the next loop iteration will fail by reaching the max fd limit. + for i in range(15): + p = subprocess.Popen([sys.executable, "-c", + "import sys;" + "sys.stdout.write(sys.stdin.read())"], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + data = p.communicate(b"lime")[0] + self.assertEqual(data, b"lime") + finally: + for h in handles: + os.close(h) + test_support.unlink(test_support.TESTFN) def test_list2cmdline(self): self.assertEqual(subprocess.list2cmdline(['a b c', 'd', 'e']), diff --git a/NEWS b/NEWS index b615b07a7..e15177fef 100644 --- a/NEWS +++ b/NEWS @@ -11,13 +11,15 @@ For more details, three sources are available according to type: Jython 2.7.3a1 Bugs fixed + - [ GH-35 ] Travis CI on JDKs 8, 11, 13 and add Windows to OSes + - [ GH-50 ] (First) migration from Mercurial corrupted project history - [ GH-27 ] -Q new always fails - [ 2892 ] Migrate from hg.python.org to GitHub - [ GH-4 ] Swap from Mercurial to Git as our SCM tool - [ GH-2 ] Transfer closed-fixed issues in NEWS from frozen-mirror New Features - - The project has moved its home to GitHub. + - The project has moved its home to GitHub (twice). Jython 2.7.2 same as 2.7.2rc1 diff --git a/README.md b/README.md index db4ca0cd5..21084ffbe 100644 --- a/README.md +++ b/README.md @@ -1,92 +1,62 @@ -# Jython: Python for the Java Platform -[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.python/jython-standalone/badge.svg)](https://search.maven.org/artifact/org.python/jython-standalone/) -[![Javadocs](https://www.javadoc.io/badge/org.python/jython-standalone.svg)](https://www.javadoc.io/doc/org.python/jython-standalone) +[![APIdia](https://apidia.net/java/Jython/3/badge.svg)](https://apidia.net/java/Jython/3) + +# Jython 3: Python 3 for the Java Platform This is the development repository of Jython, the implementation of Python in Java. -Only version 2.7 of Python can be supported at present -(but watch this space for a 3.x version). -Along with good (not perfect!) language -and runtime compatibility with CPython 2.7, -Jython 2.7 provides substantial support of the Python ecosystem. -This includes built-in support of *pip/setuptools* -(you can use `bin/pip` if the targets do not include `C` extensions) -and a native launcher for Windows (`bin/jython.exe`) -that works essentially as the `python` command. +You are looking at the branch intended to support version 3.8 of Python: +it doesn't build anything useful right now. +Jython 3.x is not yet a viable product you can use in applications. +Head over to [the 2.7 branch](https://github.com/jython/jython/tree/master) +to find the current release. + + +## Target -Jim Baker presented a talk at PyCon 2015 about Jython 2.7, -including demos of new features: https://www.youtube.com/watch?v=hLm3garVQFo +Along with good language and runtime compatibility with CPython 3.8, +Jython 3.8 is intended to provide substantial support of the Python ecosystem, +and solid Java integration. + +For more about the target see the +[Jython 3 MVP](https://www.jython.org/jython-3-mvp) +page. See [ACKNOWLEDGMENTS](ACKNOWLEDGMENTS) for details about Jython's copyright, license, contributors, and mailing lists. Consult [NEWS](NEWS) for detailed release notes, including bugs fixed, backwards breaking changes, and new features. -We sincerely thank all who contribute to Jython, by bug reports, patches, +We are sincerely grateful to all who contribute to Jython, by bug reports, patches, pull requests, documentation changes and e-mail discussions. -## How to build Jython -The project uses Git for version-control, -and the master repository is at https://github.com/jython/jython, -You should clone this repository to create a buildable copy of the latest state -of the Jython source. -The previously authoritative repository at https://hg.python.org/jython is not now in use, -remaining frozen at v2.7.2. +## Current focus of work -### Build using `ant` for development +The current focus is to establish a foundation for the run-time +that makes good use of the dynamic language features of the JVM. +There are just enough classes here to illustrate the architectural ideas +underpinning the new foundation. -Jython is normally built using `ant`. -It is necessary to have Ant and at least a Java 8 SDK on the path. -To build Jython in development, we generally use the command: -``` -ant -``` -This leaves an executable in `dist/bin` -that you may run from the check-out root with: -``` -dist/bin/jython -``` -Other `ant` targets exist, notably `clean`, and `jar`. +The code of the Jython 2 implementation is also present on this branch, +waiting to be shifted onto the new foundations (or definitively dropped), +file by file. It does not participate in the build. -You can test your build of Jython (by running the regression tests), -with the command: -``` -dist/bin/jython -m test.regrtest -e -m regrtest_memo.txt -``` -### Build an installer using `ant` +## How to build Jython -If you want to install a snapshot build of Jython, use the command: -``` -ant installer -``` -This will leave you with a snapshot installer JAR in `dist`, -that you can run with: -``` -java -jar jython-installer.jar -``` -for the graphical installer, or: +### Build using `Gradle` for development + +Jython may be built using `Gradle`. ``` -java -jar jython-installer.jar --console +$ ./gradlew build ``` -For the console version. (A `--help` option gives you the full story.) +In its present state, no executable is built, although there is a JAR, +that in principle could be used in sample programs. -### Build a JAR using Gradle +Jython is normally built only to run the unit tests (the `core:test` target). +The documentation built by the `core:javadoc` target may also be interesting. +Running the unit tests in `core/src/test/java`, +under a debugger in an IDE, +is perhaps the best way to explore how the code works. -Experimentally, we have a Gradle build that results in a family of JARs, -and a POM. -This is intended to provide the Jython core in a form that Gradle and Maven -users can consume as a dependency. -Invoke this with: -``` -PS> .\gradlew publish -``` -and a JAR and POM are delivered to ` .build2\repo` - -Whereas the JARs delivered by the installer are somewhat "fat", -embedding certain dependencies in shaded (renamed) form, -the JAR from the Gradle build is "spare" -and cites its dependencies externally through a POM. -The project would like to know if this is being done suitably -for downstream use. +Watch this space for further developments. diff --git a/b/.idea/libraries/extlibs.xml b/b/.idea/libraries/extlibs.xml deleted file mode 100644 index 2e5e50203..000000000 --- a/b/.idea/libraries/extlibs.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/b/.idea/libraries/jar.xml b/b/.idea/libraries/jar.xml deleted file mode 100644 index 10beca9d8..000000000 --- a/b/.idea/libraries/jar.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/b/.idea/libraries/svnant_jars.xml b/b/.idea/libraries/svnant_jars.xml deleted file mode 100644 index 36cddb560..000000000 --- a/b/.idea/libraries/svnant_jars.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/b/.idea/libraries/test.xml b/b/.idea/libraries/test.xml deleted file mode 100644 index 926b5e97d..000000000 --- a/b/.idea/libraries/test.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/bugtests/README.txt b/bugtests/README.txt deleted file mode 100644 index 7e86be369..000000000 --- a/bugtests/README.txt +++ /dev/null @@ -1,69 +0,0 @@ - -This directory contains small tests that attempt to ensure that old fixed -bugs do not reappear. - -These tests are not actively maintained and while many pass, others have been -broken by changes in the main codebase and now fail for what might be spurious -reasons. - -As the regression tests have grown, the need for these has largely ceased. -It is likely these tests will be removed eventually. - - -Running -======= - -Start a command prompt in this ("bugtests") directory. Make sure that -the "classes" subdirectory is included in the CLASSPATH environment -variable and that the "bugtests" directory is *not* included in in CLASSPATH. - -Create a file called "support_config.py" which contains the following three -entries: - ------ -java_home = "" -jython_home = "" -classpath = "" ------ - -This is used to make the tests more platform independent. My file for OS X -looks like: - ------ -java_home = "/Library/Java/Home" -jython_home = "/Users/bzimmer/Development/sourceforge/jython/dist" -classpath = jython_home + "/jython-dev.jar:classes" ------ - -Run the script "driver.py". - -After running the tests the applets should be tested by starting the -appletviewer on all the *.html files. Both MS and javasoft's appletviewer -should be tested. - -Adding new tests -================ - -The tests follow a strict naming scheme that ensure that we always know -which files that belong to each test. The main script of each test is called -"testNNN" where NNN is a 3-digit number. All other files that belong to this -test also starts with testNNN. There are no exceptions to this rule! - -Dependent file normally follow a simple naming - -testNNNp a python package -testNNNm a python module -testNNNj a java class -testNNNi a java interface -testNNNc a python module meant for compilation with jythonc. -testNNNa an applet, compiled with jythonc. -testNNNs modules with deliberate syntax errors. - -The tests should always complete without throwing exceptions or errors. Since -these tests also cover bugs which may not have been fixed yet, the test should -instead throw a TestWarning exception. When the bug is fixed the TestWarning -should be removed and replaced with a TestError instead. - - - - diff --git a/bugtests/classes/test292j.java b/bugtests/classes/test292j.java deleted file mode 100644 index 105864796..000000000 --- a/bugtests/classes/test292j.java +++ /dev/null @@ -1,18 +0,0 @@ - -import org.python.core.*; - -public class test292j { - public static void main(String[] args) { - PySystemState.initialize(); - Py.getSystemState().path = new PyList(); - Py.getSystemState().path.append(new PyString(".")); - - try { - __builtin__.__import__("test292j1"); - } catch (PyException exc) { - if (!exc.match(Py.ImportError)) - throw exc; - } - } -} - diff --git a/bugtests/classes/test301p/A.java b/bugtests/classes/test301p/A.java deleted file mode 100644 index 06d679fde..000000000 --- a/bugtests/classes/test301p/A.java +++ /dev/null @@ -1,7 +0,0 @@ - -package test301p; - -public class A { - public B b; //b never used -} - diff --git a/bugtests/classes/test301p/B.java b/bugtests/classes/test301p/B.java deleted file mode 100644 index db39fe600..000000000 --- a/bugtests/classes/test301p/B.java +++ /dev/null @@ -1,6 +0,0 @@ -package test301p; - -public class B { - -} - diff --git a/bugtests/classes/test336p/Data.java b/bugtests/classes/test336p/Data.java deleted file mode 100644 index 9892669f4..000000000 --- a/bugtests/classes/test336p/Data.java +++ /dev/null @@ -1,4 +0,0 @@ -package test336p; - -public class Data { } - diff --git a/bugtests/classes/test336p/data/MyData.java b/bugtests/classes/test336p/data/MyData.java deleted file mode 100644 index 9b0ef90f4..000000000 --- a/bugtests/classes/test336p/data/MyData.java +++ /dev/null @@ -1,4 +0,0 @@ -package test336p.data; - -public class MyData { } - diff --git a/bugtests/driver.py b/bugtests/driver.py deleted file mode 100644 index 8b7eacb68..000000000 --- a/bugtests/driver.py +++ /dev/null @@ -1,76 +0,0 @@ - -import sys, string, traceback, getopt, support, os, glob - -failures = {} -warnings = {} -skipped = {} - - -def runTests(seq): - def report(msg, errors_dict, loud=1): - print n, msg - errors_dict[n] = 1 - if loud: - if m and hasattr(m, "__doc__"): - print m.__doc__.strip() - print " ", sys.exc_info()[0] - print " ", sys.exc_info()[1] - traceback.print_tb(sys.exc_info()[2], file=sys.stdout) - - for n in seq: - m = None - try: - stdout = sys.stdout - if os.path.isfile(n + ".py"): - m = __import__(n) - sys.stdout = stdout - print n, "OK" - else: - print n, "Skipped" - except support.TestWarning: - sys.stdout = stdout - report("Warning", warnings, loud=loud_warnings) - except support.TestSkip: - report("Skipped", skipped, loud=0) - except: - sys.stdout = stdout - report("Failed", failures) - - summarize(failures, "failures") - summarize(warnings, "warnings") - summarize(skipped, "skipped") - -def summarize(errors_dict, description): - t = errors_dict.keys() - t.sort() - print "%d %s" % (len(t), description) - print t - -if __name__ == '__main__': - opts, args = getopt.getopt(sys.argv[1:], 'wc', 'runjythonc') - loud_warnings = ('-w',"") in opts - support.test_jythonc = ('--runjythonc', '') in opts - - if loud_warnings: - print "LOUD warnings" - if support.test_jythonc: - print 'Running jythonc tests' - - sys.path[:0] = ['classes'] - - if len(args) > 0: - tests = [int(test) for test in args[0].split(',')] - else: - testfiles = glob.glob('test???.py') - testfiles.sort() - lastTest = testfiles[-1] - tests = range(int(lastTest[4:7]) + 1)# upper bound: last test + 1 - runTests(["test%3.3d" % i for i in tests]) - - if len(failures) + len(warnings) > 0: - rc = 1 - else: - rc = 0 - - sys.exit(rc) - diff --git a/bugtests/jarmaker.py b/bugtests/jarmaker.py deleted file mode 100644 index a6efdd689..000000000 --- a/bugtests/jarmaker.py +++ /dev/null @@ -1,24 +0,0 @@ -import support -import sys -import os - -from java.io import File - -package = "javapackage" -clazz = "JavaClass" -jardir = "simplejar" -jarfn = "simple.jar" -clazzfile = File(jardir + '/'+ package, "%s.class" % clazz) # java.io.File - -def mkjar(): - jarfile = File(jardir, jarfn) - # create a .jar file containing a .class file - if not jarfile.exists(): - support.compileJava("%s/%s/%s.java" % (jardir, package, clazz)) - jarPacker = support.JarPacker(jarfile, bufsize=128) - jarPacker.addFile(clazzfile, parentDirName=package) - jarPacker.close() - return jardir + '/' + jarfn, package, clazz - - - diff --git a/bugtests/simplejar/javapackage/JavaClass.java b/bugtests/simplejar/javapackage/JavaClass.java deleted file mode 100644 index c201b93d6..000000000 --- a/bugtests/simplejar/javapackage/JavaClass.java +++ /dev/null @@ -1,3 +0,0 @@ -package javapackage; - -public class JavaClass {} diff --git a/bugtests/stdtest.py b/bugtests/stdtest.py deleted file mode 100644 index 8d223a30f..000000000 --- a/bugtests/stdtest.py +++ /dev/null @@ -1,183 +0,0 @@ - - -import test.regrtest - -import os, sys - - - -skipped = [ - 'test_al', - 'test_asynchat', - 'test_audioop', - 'test_b1', - 'test_b2', - 'test_bastion', - 'test_bsddb', - 'test_capi', - 'test_cd', - 'test_cl', - 'test_cmath', - 'test_commands', - 'test_crypt', - 'test_curses', - 'test_dbm', - 'test_dl', - 'test_email_codecs', # Needs asian codecs. - 'test_fcntl', - 'test_fork1', - 'test_frozen', - 'test_future1', # called as part of test_future - 'test_future2', # called as part of test_future - 'test_future3', # called as part of test_future - 'test_gc', - 'test_gettext', - 'test_getargs', # Test a python bug, this throws a different exc in jy. - 'test_gdbm', - 'test_gl', - 'test_grp', - 'test_hotshot', - 'test_imageop', - 'test_imgfile', - 'test_linuxaudiodev', - 'test_locale', - 'test_longexp', - 'test_minidom', - 'test_mmap', - 'test_nis', - 'test_openpty', - 'test_parser', - 'test_poll', - 'test_pty', - 'test_pwd', - 'test_regex', - 'test_rgbimg', - 'test_rotor', - 'test_sax', - 'test_select', - 'test_signal', - 'test_socketserver', - 'test_socket_ssl', - 'test_strop', - 'test_sundry', - 'test_sunaudiodev', - 'test_symtable', - 'test_timing', - 'test_unicodedata', - 'test_wave', - 'test_winreg', - 'test_winsound', -] - -failures = [ - 'test_array', - 'test_binop', - 'test_codeop', - 'test_compare', - 'test_cookie', - 'test_cpickle', - 'test_descr', - 'test_descrtut', - 'test_doctest2', - 'test_email', - 'test_extcall', - 'test_fpformat', - 'test_funcattrs', - 'test_generators', - 'test_getargs', - 'test_hmac', - 'test_inspect', - 'test_iter', - 'test_largefile', - 'test_long', - 'test_long_future', - 'test_mailbox', - 'test_marshal', - 'test_mhlib', - 'test_mutants', - 'test_ntpath', - 'test_os', - 'test_operations', - 'test_pickle', - 'test_pkgimport', - 'test_popen2', - 'test_profile', - 'test_profilehooks', - 'test_pyclbr', - 'test_pyexpat', - 'test_repr', - 'test_richcmp', - 'test_scope', - 'test_socket', - 'test_struct', - 'test_tempfile', - 'test_threaded_import', - 'test_threadedtempfile', - 'test_trace', - 'test_types', - 'test_ucn', - 'test_unary', - 'test_unicode', - 'test_unicode_file', - 'test_urllib2', - 'test_userlist', - 'test_uu', - 'test_weakref', - 'test_zlib', -] - - - - -def usage(): - print "jython stdtest.py [options] [tests]" - print " -h, --help : print this help" - print " -v, --verbose : turn on verbosity" - print " -s, --skipped : Run the tests that is normally skipped" - print " -f, --failures : Run the tests that normally fails" - print " -t, --test : Run the tests listed as arguments" - -def main(): - import getopt - try: - opts, args = getopt.getopt(sys.argv[1:], "hvsft:", [ - "help", "verbose", "skipped", "failures", "test="]) - except getopt.GetoptError: - # print help information and exit: - usage() - sys.exit(2) - - - alltests = [ f[:-3] for f in os.listdir("../dist/Lib/test") - if f.startswith("test_") and f.endswith(".py") ] - tests = [s for s in alltests if s not in failures and s not in skipped] - verbose = 0 - - for o, a in opts: - if o in ("-h", "--help"): - usage() - sys.exit() - if o in ("-v", "--verbose"): - verbose = 1 - if o in ("-s", "--skipped"): - tests = skipped - if o in ("-f", "--failures"): - tests = failures - if o in ("-t", "--test"): - tests = a.split(",") - - sys.argv = [] - - if tests.count("test_largefile") > 0: - tests.remove("test_largefile") - - test.regrtest.main(tests, verbose=verbose) - - -if __name__ == "__main__": - main() - - -#test.regrtest.main(tests, verbose=0) -#test.regrtest.main(skipped, verbose=0) -test.regrtest.main(failures, verbose=0) diff --git a/bugtests/support.py b/bugtests/support.py deleted file mode 100644 index 04a2669c2..000000000 --- a/bugtests/support.py +++ /dev/null @@ -1,249 +0,0 @@ -import sys -is_jython = sys.platform[:4] == "java" - -import re, exceptions, thread, os, shutil -import support_config as cfg - -if is_jython: - import jarray - from java.io import FileInputStream - from java.io import FileOutputStream - from java.util.jar import JarEntry - from java.util.jar import JarFile - from java.util.jar import JarInputStream - from java.util.jar import JarOutputStream - from java.util.jar import Manifest - -UNIX = os.pathsep == ":" -WIN = os.pathsep == ";" -test_jythonc = 1 - -if not UNIX ^ WIN: - raise TestError("Unknown platform") - -class TestError(exceptions.Exception): - def __init__(self, args): - exceptions.Exception.__init__(self, args) - -class TestWarning(exceptions.Exception): - def __init__(self, args): - exceptions.Exception.__init__(self, args) - -class TestSkip(exceptions.Exception): - def __init__(self, args): - exceptions.Exception.__init__(self, args) - -def compare(s, pattern): - m = re.search(pattern, str(s)) - if m is None: - raise TestError("string compare error\n '" + str(s) + "'\n '" + pattern + "'") - -def StreamReader(instream, outstream): - while 1: - ch = instream.read() - if ch == -1: break - outstream.write(ch) - -def execCmd(cmd, kw): - __doc__ = """execute a command, and wait for its results -returns 0 if everything was ok -raises a TestError if the command did not end normally""" - if kw.has_key("verbose") and kw["verbose"]: - print cmd - import java - r = java.lang.Runtime.getRuntime() - e = getattr(r, "exec") - p = e(cmd) - - if kw.has_key("output"): - outstream = java.io.FileOutputStream(kw['output']) - else: - outstream = java.lang.System.out - if kw.has_key("error"): - errstream = java.io.FileOutputStream(kw['error']) - else: - errstream = java.lang.System.out - - thread.start_new_thread(StreamReader, (p.inputStream, outstream)) - thread.start_new_thread(StreamReader, (p.errorStream, errstream)) - - ret = p.waitFor() - if ret != 0 and not kw.has_key("expectError"): - raise TestError, "%s failed with %d" % (cmd, ret) - - return ret - -def compileJava(src, **kw): - classfile = src.replace('.java', '.class') - if not 'force' in kw and os.path.exists(classfile) and os.stat(src).st_mtime < os.stat(classfile).st_mtime: - return 0 - classpath = cfg.classpath - if "classpath" in kw: - classpath = os.pathsep.join([cfg.classpath, kw["classpath"]]) - if UNIX: - cmd = "%s/bin/javac -classpath %s %s" % (cfg.java_home, classpath, src) - elif WIN: - src = src.replace("/", "\\") - cmd = 'cmd /C "%s/bin/javac.exe" -classpath %s %s' % (cfg.java_home, classpath, src) - return execCmd(cmd, kw) - -def runJava(cls, **kw): - classpath = cfg.classpath - if "classpath" in kw: - classpath = os.pathsep.join([cfg.classpath, kw["classpath"]]) - if kw.get('pass_jython_home', 0): - defs = "-Dpython.home=%s" % cfg.jython_home - else: - defs = '' - if UNIX: - cmd = ['/bin/sh', '-c', "%s/bin/java -classpath %s %s %s" % (cfg.java_home, classpath, defs, cls)] - elif WIN: - cmd = 'cmd /C "%s/bin/java.exe" -classpath %s %s %s' % (cfg.java_home, classpath, defs, cls) - return execCmd(cmd, kw) - -def runJavaJar(jar, *args, **kw): - argString = " ".join(args) - if UNIX: - cmd = ['/bin/sh', '-c', "%s/bin/java -jar %s %s" % (cfg.java_home, jar, argString)] - elif WIN: - cmd = 'cmd /C "%s/bin/java.exe" -jar %s %s' % (cfg.java_home, jar, argString) - return execCmd(cmd, kw) - -def runJython(cls, **kw): - javaargs = '' - if 'javaargs' in kw: - javaargs = kw['javaargs'] - classpath = cfg.classpath - if "classpath" in kw: - classpath = os.pathsep.join([cfg.classpath, kw["classpath"]]) - if UNIX: - cmd = "%s/bin/java -classpath %s %s -Dpython.home=%s org.python.util.jython %s" % (cfg.java_home, classpath, javaargs, cfg.jython_home, cls) - elif WIN: - cmd = 'cmd /C "%s/bin/java.exe" -classpath %s %s -Dpython.home=%s org.python.util.jython %s' % (cfg.java_home, classpath, javaargs, cfg.jython_home, cls) - return execCmd(cmd, kw) - -def compileJPythonc(*files, **kw): - if not test_jythonc: - raise TestSkip('Skipping pythonc') - if os.path.isdir("jpywork") and not kw.has_key("keep"): - shutil.rmtree("jpywork", 1) - - cmd = "-i " - if kw.has_key("core"): - cmd = cmd + "--core " - if kw.has_key("deep"): - cmd = cmd + "--deep " - if kw.has_key("all"): - cmd = cmd + "--all " - if kw.has_key("package"): - cmd = cmd + "--package %s " % kw['package'] - if kw.has_key("addpackages"): - cmd = cmd + "--addpackages %s " % kw['addpackages'] - if kw.has_key("jar"): - cmd = cmd + "--jar %s " % kw['jar'] - if os.path.isfile(kw['jar']): - os.remove(kw['jar']) - cmd = cmd + " ".join(files) - - classpath = cfg.classpath - if "classpath" in kw: - classpath = os.pathsep.join([cfg.classpath, kw["classpath"]]) - - jythonc = "%s/Tools/jythonc/jythonc.py %s" % (cfg.jython_home, cmd) - if UNIX: - cmd = "%s/bin/java -classpath %s -Dpython.home=%s org.python.util.jython %s" % (cfg.java_home, classpath, cfg.jython_home, jythonc) - elif WIN: - cmd = 'cmd /C "%s/bin/java.exe" -classpath "%s" -Dpython.home=%s org.python.util.jython %s' % (cfg.java_home, classpath, cfg.jython_home, jythonc) - return execCmd(cmd, kw) - -def grep(file, text, count=0): - f = open(file, "r") - lines = f.readlines() - f.close() - - result = [] - for line in lines: - if re.search(text, line): - result.append(line) - - if count: - return len(result) - return result - -class JarPacker: - __doc__ = """helper class to pack stuff into a jar file - - the terms 'file' and 'dir' mean java.io.File here """ - - def __init__(self, jarFile, bufsize=1024): - self._jarFile = jarFile - self._bufsize = bufsize - self._manifest = None - self._jarOutputStream = None - - def close(self): - self.getJarOutputStream().close() - - def addManifestFile(self, manifestFile): - __doc__ = """only one manifest file can be added""" - self.addManifest(Manifest(FileInputStream(manifestFile))) - - def addManifest(self, manifest): - if not self._manifest: - self._manifest = manifest - - def addFile(self, file, parentDirName=None): - buffer = jarray.zeros(self._bufsize, 'b') - inputStream = FileInputStream(file) - jarEntryName = file.getName() - if parentDirName: - jarEntryName = parentDirName + "/" + jarEntryName - self.getJarOutputStream().putNextEntry(JarEntry(jarEntryName)) - read = inputStream.read(buffer) - while read <> -1: - self.getJarOutputStream().write(buffer, 0, read) - read = inputStream.read(buffer) - self.getJarOutputStream().closeEntry() - inputStream.close() - - def addDirectory(self, dir, parentDirName=None): - if not dir.isDirectory(): - return - filesInDir = dir.listFiles() - for currentFile in filesInDir: - if currentFile.isFile(): - if parentDirName: - self.addFile(currentFile, parentDirName + "/" + dir.getName()) - else: - self.addFile(currentFile, dir.getName()) - else: - if parentDirName: - newParentDirName = parentDirName + "/" + dir.getName() - else: - newParentDirName = dir.getName() - self.addDirectory(currentFile, newParentDirName) - - def addJarFile(self, jarFile): - __doc__ = """if you want to add a .jar file with a MANIFEST, add it first""" - jarJarFile = JarFile(jarFile) - self.addManifest(jarJarFile.getManifest()) - jarJarFile.close() - - jarInputStream = JarInputStream(FileInputStream(jarFile)) - jarEntry = jarInputStream.getNextJarEntry() - while jarEntry: - self.getJarOutputStream().putNextEntry(jarEntry) - buffer = jarray.zeros(self._bufsize, 'b') - read = jarInputStream.read(buffer) - while read <> -1: - self.getJarOutputStream().write(buffer, 0, read) - read = jarInputStream.read(buffer) - self.getJarOutputStream().closeEntry() - jarEntry = jarInputStream.getNextJarEntry() - - def getJarOutputStream(self): - if not self._jarOutputStream: - if self._manifest: - self._jarOutputStream = JarOutputStream(FileOutputStream(self._jarFile), self._manifest) - else: - self._jarOutputStream = JarOutputStream(FileOutputStream(self._jarFile)) - return self._jarOutputStream diff --git a/bugtests/test238p/__init__.py b/bugtests/test238p/__init__.py deleted file mode 100644 index 139597f9c..000000000 --- a/bugtests/test238p/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/bugtests/test238p/test238j.java b/bugtests/test238p/test238j.java deleted file mode 100644 index efe6b5fa8..000000000 --- a/bugtests/test238p/test238j.java +++ /dev/null @@ -1,6 +0,0 @@ - -package test238p; -public class test238j { - public static String spam() { return "bar"; } - public static void bar() {} -} diff --git a/bugtests/test239j1.java b/bugtests/test239j1.java deleted file mode 100644 index 5b12c94b8..000000000 --- a/bugtests/test239j1.java +++ /dev/null @@ -1,4 +0,0 @@ -public class test239j1 { - public int theInt = 0; - public int theSleepTime = 0; -} diff --git a/bugtests/test239j2.java b/bugtests/test239j2.java deleted file mode 100644 index 12702301c..000000000 --- a/bugtests/test239j2.java +++ /dev/null @@ -1,14 +0,0 @@ - - -public class test239j2 implements Runnable { - int myTestInt = 0; - int mySleepTime = 0; - - public test239j2(test239j1 config) { - myTestInt = config.theInt; - mySleepTime = config.theSleepTime; - } - - public void run() { } - -} diff --git a/bugtests/test240p/__init__.py b/bugtests/test240p/__init__.py deleted file mode 100644 index 8b1378917..000000000 --- a/bugtests/test240p/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/bugtests/test240p/test240j1.java b/bugtests/test240p/test240j1.java deleted file mode 100644 index f2cfbc2b8..000000000 --- a/bugtests/test240p/test240j1.java +++ /dev/null @@ -1,5 +0,0 @@ -package test240p; -public class test240j1 { - int theInt = 0; - int theSleepTime = 0; -} diff --git a/bugtests/test240p/test240j2.java b/bugtests/test240p/test240j2.java deleted file mode 100644 index 9405c0baf..000000000 --- a/bugtests/test240p/test240j2.java +++ /dev/null @@ -1,13 +0,0 @@ -package test240p; -public class test240j2 implements Runnable { - int myTestInt = 0; - int mySleepTime = 0; - - public test240j2(test240j1 config) { - myTestInt = config.theInt; - mySleepTime = config.theSleepTime; - } - - public void run() { } - -} diff --git a/bugtests/test241.py b/bugtests/test241.py deleted file mode 100644 index 18aba26ac..000000000 --- a/bugtests/test241.py +++ /dev/null @@ -1,14 +0,0 @@ -import support - -support.compileJava("test241p/test241i.java") - -import test241p -class A(test241p.test241i): - def foo(self, i): - return i - -a = A() -v = a.foo(44) - -if v != 44: - raise support.TestError, "Wrong return value %d" % v diff --git a/bugtests/test241p/__init__.py b/bugtests/test241p/__init__.py deleted file mode 100644 index 8b1378917..000000000 --- a/bugtests/test241p/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/bugtests/test241p/test241i.java b/bugtests/test241p/test241i.java deleted file mode 100644 index 8995e7ccf..000000000 --- a/bugtests/test241p/test241i.java +++ /dev/null @@ -1,5 +0,0 @@ - -package test241p; -public interface test241i { - public void foo(int i); -} \ No newline at end of file diff --git a/bugtests/test243.py b/bugtests/test243.py deleted file mode 100644 index 364f6df0a..000000000 --- a/bugtests/test243.py +++ /dev/null @@ -1,12 +0,0 @@ - -import java, support - -import test243p -#print test243p -try: - import test243p.P -except (ImportError, java.lang.NoClassDefFoundError): - pass -else: - raise support.TestError, "Should raise an exception" - diff --git a/bugtests/test243p/__init__.py b/bugtests/test243p/__init__.py deleted file mode 100644 index f3d96f263..000000000 --- a/bugtests/test243p/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__path__[0]=__path__[0]+'/real' diff --git a/bugtests/test243p/real/A.java b/bugtests/test243p/real/A.java deleted file mode 100644 index 98692c558..000000000 --- a/bugtests/test243p/real/A.java +++ /dev/null @@ -1 +0,0 @@ -package test243p; public class A { int v; } \ No newline at end of file diff --git a/bugtests/test243p/real/P.java b/bugtests/test243p/real/P.java deleted file mode 100644 index 80a27f4b7..000000000 --- a/bugtests/test243p/real/P.java +++ /dev/null @@ -1,5 +0,0 @@ -package test243p; - -public class P { - public static void p(A a) { System.out.println(a.v); } -} diff --git a/bugtests/test244.py b/bugtests/test244.py deleted file mode 100644 index 5fb8cef32..000000000 --- a/bugtests/test244.py +++ /dev/null @@ -1,8 +0,0 @@ - -import support - -support.compileJava("test244p/A.java") - -import test244p.A - -a=test244p.A() diff --git a/bugtests/test244p/A.java b/bugtests/test244p/A.java deleted file mode 100644 index e15fa7b4d..000000000 --- a/bugtests/test244p/A.java +++ /dev/null @@ -1,3 +0,0 @@ -package test244p; - -public class A { } diff --git a/bugtests/test244p/__init__.py b/bugtests/test244p/__init__.py deleted file mode 100644 index 8b1378917..000000000 --- a/bugtests/test244p/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/bugtests/test267.py b/bugtests/test267.py deleted file mode 100644 index 997233c0d..000000000 --- a/bugtests/test267.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -Make sure that ImportAll and ExecStmt can modify the locals -""" - -import support - - -def f1(): - from stat import * - v1 = ST_ATIME - assert v1 == 7 - exec "foo=22" - v2 = foo - assert v2 == 22 - -f1() diff --git a/bugtests/test305.py b/bugtests/test305.py deleted file mode 100644 index fb0a46205..000000000 --- a/bugtests/test305.py +++ /dev/null @@ -1,21 +0,0 @@ -""" -Test that the case of the module extension does not matter. -As the registry documentation says, this will only work if options.caseok is true. -""" - -import support - -if support.UNIX: - raise support.TestWarning("this will fail on unix platforms") - -from org.python.core import Options -switchedCase = 0 -if not Options.caseok: - switchedCase = 1 - Options.caseok = 1 -try: - import test305m # the file is named test305m.PY -finally: - if switchedCase: - Options.caseok = 0 - diff --git a/bugtests/test305m.PY b/bugtests/test305m.PY deleted file mode 100644 index 236df7905..000000000 --- a/bugtests/test305m.PY +++ /dev/null @@ -1,3 +0,0 @@ - -foo = 1 - diff --git a/bugtests/test306.py b/bugtests/test306.py deleted file mode 100644 index 4a00aacff..000000000 --- a/bugtests/test306.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -Test normcase. -""" - -import support -import os - -if os.sep == '\\': #only do this test on windows. - p1 = os.path.normpath('e:\\someDir\\packag/modul.py') - if p1 != 'e:\\someDir\\packag\\modul.py': - raise support.TestError('Wrong normpath %s' % p1) - diff --git a/bugtests/test321.py b/bugtests/test321.py deleted file mode 100644 index b1b404b22..000000000 --- a/bugtests/test321.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -[ #475666 ] __nonzero__ exceptions must be ignored -""" - -import support - - -msgs = [] - -class Foo: - def __getattr__(self, key): - msgs.append('getting %s' % key) - raise KeyError, key - -foo = Foo() -if not foo: print 'no foo' - -class Foo: - def __nonzero__(self): - msgs.append("called __nonzero__") - raise KeyError - -foo = Foo() -try: - if not foo: print 'no foo' -except KeyError: - pass -else: - raise support.TestError('Must raise a keyerror') - -support.compare(msgs, "['getting __nonzero__', 'getting __len__', 'called __nonzero__']") - diff --git a/bugtests/test322.py b/bugtests/test322.py deleted file mode 100644 index 8a4ad287c..000000000 --- a/bugtests/test322.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -[ #448398 ] open('test.txt','w').write('test') fails -""" - -import support - -support.runJython("test322m.py") - -import os - -l = os.stat("test322.out")[6] - -if l != 7: - raise support.TestWarning('The file should have been auto flushed') diff --git a/bugtests/test322m.py b/bugtests/test322m.py deleted file mode 100644 index a0a9add68..000000000 --- a/bugtests/test322m.py +++ /dev/null @@ -1,4 +0,0 @@ -f = open('test322.out','w') -f.write("xvavava") -#f.flush() - diff --git a/bugtests/test323.py b/bugtests/test323.py deleted file mode 100644 index 917f07b74..000000000 --- a/bugtests/test323.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Tests using a path inside a zip file for zip imports -""" - -import support -import zipfile, time - -def addZipEntry(zip, name, data): - entry = zipfile.ZipInfo() - entry.filename = name - entry.date_time = time.gmtime(time.time()) - zip.writestr(entry, data) - - -zip = zipfile.ZipFile("test323.zip", "w", zipfile.ZIP_DEFLATED) - -addZipEntry(zip, "Lib/test323m.py", """ -assert __name__ == 'test323m', " __name__ should've been test323m but was %s" % __name__ -from java.io import File -expected = "test323.zip%sLib/test323m.py" % (File.separator) -assert expected in __file__, "%s should've been in __file__ but was %s" % (expected, __file__) -""") - -zip.close() - -import sys -sys.path.append("test323.zip/Lib") - -import test323m diff --git a/bugtests/test324.py b/bugtests/test324.py deleted file mode 100644 index 9ef69e9ec..000000000 --- a/bugtests/test324.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -[ #467826 ] SHA digest() method doesn't work -""" - -import support - -import sha -s = sha.sha() -s.update("foo") -r = s.digest() - -support.compare(len(r), "20") - - diff --git a/bugtests/test325.py b/bugtests/test325.py deleted file mode 100644 index b8d88bc76..000000000 --- a/bugtests/test325.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -Verify a bug in pickle reported by mailling list. -""" - -import support - -# This program failed with pickle due to the id() problem. -import pickle - -pfile=open("test325.out","wb") -p=pickle.Pickler(pfile) -for l in range (1,10000): - row=[str(l),str(l)] - p.dump(row) -pfile.close() - -#print "reading" -n=1 -try: - pfile=open("test325.out","rb") - l=pickle.load(pfile) - while l: - comp = [str(n),str(n)] - if l != comp: - print "Pickle error" - print str(l) + " should be " + str(comp) - raise support.TestError("pickle is not working") - n=n+1 - l=pickle.load(pfile) - pfile.close() - -except EOFError: - #print "End reached, well done" - pfile.close() - diff --git a/bugtests/test326.py b/bugtests/test326.py deleted file mode 100644 index a80bb1f33..000000000 --- a/bugtests/test326.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -[ #473676 ] cStringIO bug -""" - -import support - -import cStringIO - -s = cStringIO.StringIO() -r = s.read(1) - -if len(r) != 0: - raise support.TestError('EOF must be the empty string') - - -s = cStringIO.StringIO("abc") -r = s.read(2) -assert len(r) == 2 -r = s.read(1) -assert len(r) == 1 -r = s.read(1) -if len(r) != 0: - raise support.TestError('EOF must be the empty string #2') - diff --git a/bugtests/test327.py b/bugtests/test327.py deleted file mode 100644 index b3c2b9c05..000000000 --- a/bugtests/test327.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -[ #458945 ] Missing 'lastindex' on match objects -""" - -import support - -import re - -m = re.match(r"(\w*) (\w*) (\w*)", "word1 word2 word3") -if m.lastindex != 3: - raise support.TestError('Wrong lastindex value#1 : %d' % m.lastindex) - -m = re.match(r"((\w*) )+", "word1 word2 word3 ") -if m.lastindex != 2: - raise support.TestError('Wrong lastindex value#2 : %d' % m.lastindex) - -m = re.match(r"abc", "abc") -if m.lastindex != None: - raise support.TestError('Wrong lastindex value#3 : %d' % m.lastindex) - diff --git a/bugtests/test328.py b/bugtests/test328.py deleted file mode 100644 index fc94455aa..000000000 --- a/bugtests/test328.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -[ #462280 ] builtin method as a class variable -""" - -import support - -class Foo: - mylistIndex = ['a', 'b', 'c', 'd', 'e'].index - -a = Foo() -if a.mylistIndex.__self__ != ['a', 'b', 'c', 'd', 'e']: - raise support.TestError('Builtin func with wrong self') -assert a.mylistIndex('c') == 2 -assert Foo.mylistIndex('c') == 2 - diff --git a/bugtests/test329.py b/bugtests/test329.py deleted file mode 100644 index 2e987a561..000000000 --- a/bugtests/test329.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -[ #475445 ] incompatibility with python - -Check comment handling when reading source from stdin. -""" - -import support - -support.runJava("org.python.util.jython -S < test329s1.py > test329.out") -support.runJava("org.python.util.jython -S < test329s2.py > test329.out") -support.runJava("org.python.util.jython -S < test329s3.py > test329.out") - diff --git a/bugtests/test329s1.py b/bugtests/test329s1.py deleted file mode 100644 index 853154428..000000000 --- a/bugtests/test329s1.py +++ /dev/null @@ -1,6 +0,0 @@ -import sys -for t in range(10): - # a comment - print t; sys.exit(0) - -sys.exit(22) diff --git a/bugtests/test329s2.py b/bugtests/test329s2.py deleted file mode 100644 index 3c0517886..000000000 --- a/bugtests/test329s2.py +++ /dev/null @@ -1,6 +0,0 @@ -import sys -for t in range(10): -# a comment - print t; sys.exit(0) - -sys.exit(22) diff --git a/bugtests/test329s3.py b/bugtests/test329s3.py deleted file mode 100644 index b107488d8..000000000 --- a/bugtests/test329s3.py +++ /dev/null @@ -1,6 +0,0 @@ -import sys -for t in range(10): - a = 1 # a comment - print t; sys.exit(0) - -sys.exit(22) diff --git a/bugtests/test330.py b/bugtests/test330.py deleted file mode 100644 index dd068a9c4..000000000 --- a/bugtests/test330.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -[ #477793 ] os.utime() is missing -""" - -import support - -import os -f = open("test330.out", "w") -f.close() - -m = os.stat("test330.out")[8] -os.utime("test330.out", (0, 0)) -if os.stat("test330.out")[8] != 0: - raise support.TestWarning("Modification time not changed #1") - -os.utime("test330.out", (m, m)) -if os.stat("test330.out")[8] != m: - raise support.TestError("Modification time not changed #2") - diff --git a/bugtests/test331.py b/bugtests/test331.py deleted file mode 100644 index f9416efe8..000000000 --- a/bugtests/test331.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -[ #477608 ] os.path.getmtime() missing -""" - -import support -import os - -s = os.stat("test331.py") - -if s[8] != os.path.getmtime("test331.py"): - raise support.TestWarning("Modification time was wrong") - -if s[7] != os.path.getatime("test331.py"): - raise support.TestWarning("Access time was wrong") - diff --git a/bugtests/test332.py b/bugtests/test332.py deleted file mode 100644 index da2144fd9..000000000 --- a/bugtests/test332.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -[ #438297 ] SimpleHTTPServer does not work -""" - -import support - -import sys -import SimpleHTTPServer -import BaseHTTPServer - -def test(HandlerClass = SimpleHTTPServer.SimpleHTTPRequestHandler, - ServerClass = BaseHTTPServer.HTTPServer): - server_address = ('', 8000) - # Silense the server - HandlerClass.log_message = lambda x, b, *arg: None - httpd = ServerClass(server_address, HandlerClass) - # do just one request. - httpd.handle_request() - -import thread -thread.start_new_thread(test, ()) - -import httplib -import time -time.sleep(5) - -h = httplib.HTTP() -h.connect("localhost", 8000) -h.putrequest('GET', "/") -h.endheaders() -status, reason, headers = h.getreply() -if status != 200: - raise support.TestError("Wrong status: %d" % status) -if reason != "OK": - raise support.TestError("Wrong status: %d" % status) -h.getfile().read() - - diff --git a/bugtests/test333.py b/bugtests/test333.py deleted file mode 100644 index 12994fc2e..000000000 --- a/bugtests/test333.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -[ #476772 ] shutdowns in jython / atexit -""" - -import support -import os - -def check(filename, result): - f = open(filename) - l = f.readlines() - f.close() - if l != result: - raise support.TestError("Result was wrong: %s" % l) - -# Different exit situations in the interpreter. - -support.runJython("test333s1.py", output="test333s1.out") -check("test333s1.out", [ "myfunc\n" ]) - -ret = support.runJython("test333s2.py", output="test333s2.out", expectError=1) -if ret != 42: - raise support.TestError("Return code was wrong: %d" % ret) -check("test333s2.out", [ "myfunc\n" ]) - -support.runJython("test333s3.py", - output="test333s3.out", error="test333s3.err", expectError=1) -check("test333s3.out", [ "myfunc\n" ]) -check("test333s3.err", [ - 'Traceback (innermost last):\n', - ' File "test333s3.py", line 8, in ?\n', - 'Exc\n', -]) - -# Different exit situations in compiled applications. - -support.compileJPythonc("test333s1.py", output="test333s1.err") -support.runJava("test333s1", classpath="jpywork", output="test333s1.out") -check("test333s1.out", [ "myfunc\n" ]) - -support.compileJPythonc("test333s1.py", output="test333s3.err") -support.runJava("test333s1", classpath="jpywork", output="test333s1.out") -check("test333s1.out", [ "myfunc\n" ]) - -support.compileJPythonc("test333s3.py", output="test333s3.err") -support.runJava("test333s3", classpath="jpywork", output="test333s3.out", - error="test333s3.err", expectError=1) -check("test333s3.out", [ "myfunc\n" ]) -f = open("test333s3.err") -lines = f.readlines(); -f.close() -if "Exc\n" not in lines: - raise support.TestError("Should raise a 'Exc' exception") diff --git a/bugtests/test333s1.py b/bugtests/test333s1.py deleted file mode 100644 index 3f9896c01..000000000 --- a/bugtests/test333s1.py +++ /dev/null @@ -1,7 +0,0 @@ - -import sys - -def myfunc(): - print "myfunc" - -sys.exitfunc = myfunc diff --git a/bugtests/test333s2.py b/bugtests/test333s2.py deleted file mode 100644 index 7fc74bbe5..000000000 --- a/bugtests/test333s2.py +++ /dev/null @@ -1,9 +0,0 @@ - -import sys - -def myfunc(): - print "myfunc" - -sys.exitfunc = myfunc - -sys.exit(42) diff --git a/bugtests/test333s3.py b/bugtests/test333s3.py deleted file mode 100644 index 2757e633a..000000000 --- a/bugtests/test333s3.py +++ /dev/null @@ -1,8 +0,0 @@ - -import sys - -def myfunc(): - print "myfunc" - -sys.exitfunc = myfunc -raise "Exc" diff --git a/bugtests/test334.py b/bugtests/test334.py deleted file mode 100644 index 5300ab8d7..000000000 --- a/bugtests/test334.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -[ #477768 ] ord([123]) 21a3 -""" - -import support - -try: - ord([123]) -except TypeError: - pass - diff --git a/bugtests/test335.py b/bugtests/test335.py deleted file mode 100644 index fbecd145e..000000000 --- a/bugtests/test335.py +++ /dev/null @@ -1,17 +0,0 @@ -""" -[ #476580 ] 'del obj.non_member' : wrong exception -""" - -import support -class C : pass - -o = C() -try: - o.foo -except AttributeError: - pass - -try: - del o.foo -except AttributeError: - pass diff --git a/bugtests/test336.py b/bugtests/test336.py deleted file mode 100644 index b05025331..000000000 --- a/bugtests/test336.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -[ #451552 ] case insensitivity on import causes prob -""" - -import support -import java - -support.compileJava("classes/test336p/Data.java") -support.compileJava("classes/test336p/data/MyData.java") - -from test336p.data import MyData diff --git a/bugtests/test338.py b/bugtests/test338.py deleted file mode 100644 index a4f1f5321..000000000 --- a/bugtests/test338.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -[ #480017 ] Proxy supers are loaded from syspath -Running test338j will throw a ClassCastException if a proxy's superclass is loaded -by the syspath classloader. -""" - -import support - -support.compileJava("test338cl.java", classpath=".") -support.compileJava("test338j1.java", classpath=".") -support.compileJava("test338j.java", classpath=".") - -support.runJava("test338j", classpath=".") - diff --git a/bugtests/test338cl.java b/bugtests/test338cl.java deleted file mode 100644 index 95ac2a897..000000000 --- a/bugtests/test338cl.java +++ /dev/null @@ -1,30 +0,0 @@ - -import java.io.*; - -public class test338cl extends ClassLoader { - - protected Class loadClass(String name, boolean resolve) - throws ClassNotFoundException - { -//System.out.println("MyLoadClass " + name); - Class c = findLoadedClass(name); - if (c != null) - return c; - - try { - FileInputStream fis = new FileInputStream(name.replace('.', '/') + ".class"); - int size = fis.available(); - byte[] buf = new byte[size]; - fis.read(buf); - fis.close(); - - c = defineClass(name, buf, 0, buf.length); - if (resolve) - resolveClass(c); - return c; - } catch (IOException exc) { - return super.loadClass(name, resolve); - } - } - -} diff --git a/bugtests/test338j.java b/bugtests/test338j.java deleted file mode 100644 index 6aab2cd30..000000000 --- a/bugtests/test338j.java +++ /dev/null @@ -1,26 +0,0 @@ - -import org.python.util.*; -import org.python.core.*; - -public class test338j implements Runnable { - public static void main(String[] args) throws Exception { - //new Main().run(); - Runnable r = (Runnable)Class.forName("test338j", true, new test338cl()).newInstance(); - r.run(); - } - - public void run() { - String brob = "test338m"; - PythonInterpreter interp = new PythonInterpreter(); - interp.set("test338j1", test338j1.class); - interp.execfile(brob + ".py"); - interp.exec("cl = " + brob + "()"); - Object newobj = interp.get("cl", Object.class); - //System.out.println(newobj.getClass().getClassLoader()); - //System.out.println(newobj.getClass().getSuperclass().hashCode() + " " + test338j1.class.hashCode()); - //System.out.println(newobj.getClass().getSuperclass().getClassLoader()); - test338j1 boobj = (test338j1) newobj; - } -} - - diff --git a/bugtests/test338j1.java b/bugtests/test338j1.java deleted file mode 100644 index cb45c1bfe..000000000 --- a/bugtests/test338j1.java +++ /dev/null @@ -1,6 +0,0 @@ - -public class test338j1 { - public String getDescription() { - return "a test338j1 description"; - } -} diff --git a/bugtests/test338m.py b/bugtests/test338m.py deleted file mode 100644 index 644db5f3c..000000000 --- a/bugtests/test338m.py +++ /dev/null @@ -1,8 +0,0 @@ -#import test338j1 - -class test338m(test338j1): - def getDescription(self): - desc = test338j1.getDescription(self) # Superclass call - return "Foo_" + desc - - diff --git a/bugtests/test339.py b/bugtests/test339.py deleted file mode 100644 index 0f63687ca..000000000 --- a/bugtests/test339.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -[ #449956 ] jythonc 2.1a3 --package problem - -""" - -import support - -support.compileJPythonc("test339c.py", package="test339p", output="test339.err") - -#raise support.TestError("" + `x`) diff --git a/bugtests/test339c.py b/bugtests/test339c.py deleted file mode 100644 index dddb0e839..000000000 --- a/bugtests/test339c.py +++ /dev/null @@ -1,12 +0,0 @@ -import java -import test339c - -class spam(java.lang.Object): - pass - -class eggs1(spam): - pass - -class eggs2(test339c.spam): - pass - diff --git a/bugtests/test340.py b/bugtests/test340.py deleted file mode 100644 index ba39d622e..000000000 --- a/bugtests/test340.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -[ #480390 ] main() does not throw exceptions -""" - -import support - -support.compileJPythonc("test340c.py", core=1, jar="test340.jar", - output="test340.err") -support.compileJava("test340j.java") - -rc = support.runJava("test340j", classpath=".", expectError=1) -if rc != 42: - support.TestError("Did not catch exception correctly %d" % rc) - - diff --git a/bugtests/test340c.py b/bugtests/test340c.py deleted file mode 100644 index 6d56b45a5..000000000 --- a/bugtests/test340c.py +++ /dev/null @@ -1,5 +0,0 @@ - -import java.lang.Exception -raise java.lang.Exception - - diff --git a/bugtests/test340j.java b/bugtests/test340j.java deleted file mode 100644 index eb962aab2..000000000 --- a/bugtests/test340j.java +++ /dev/null @@ -1,24 +0,0 @@ - -import java.net.*; -import java.lang.reflect.*; - -public class test340j { - public static void main(String[] args) { - try { - String jar = "./test340.jar"; - URLClassLoader theLoader = new URLClassLoader(new URL[] { - new URL("file:" + jar)}); - Object theLoadedClass = Class.forName("test340c", true, theLoader). - newInstance(); - String[] array = new String[] {}; - Method main = theLoadedClass.getClass(). - getMethod("main", new Class[] { array.getClass() }); - main.invoke(theLoadedClass, new Object[] {new String[] {}}); - } - catch (Throwable t) { - System.exit(42); - } - System.exit(43); - } -} - diff --git a/bugtests/test341.py b/bugtests/test341.py deleted file mode 100644 index 88765edbf..000000000 --- a/bugtests/test341.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -[ #451746 ] jythonc --deep jpy$packages problem - -""" - -import support - -support.compileJPythonc("test341c1.py", deep=1 , output="test341.err") - -#raise support.TestError("" + `x`) diff --git a/bugtests/test341c1.py b/bugtests/test341c1.py deleted file mode 100644 index 1efad1a35..000000000 --- a/bugtests/test341c1.py +++ /dev/null @@ -1,7 +0,0 @@ - -from test341c2 import test341c2 - -class bar(test341c2): - pass - - diff --git a/bugtests/test341c2.py b/bugtests/test341c2.py deleted file mode 100644 index 367e7a95e..000000000 --- a/bugtests/test341c2.py +++ /dev/null @@ -1,5 +0,0 @@ -import java - -class test341c2(java.lang.Object): - pass - diff --git a/bugtests/test342.py b/bugtests/test342.py deleted file mode 100644 index b90bf1f44..000000000 --- a/bugtests/test342.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -__import__(_) does an unwanted relative search -""" - -import support - -from test342p import doimp - -#support.compare(doimp.kind,"absolute") -if doimp.kind != "absolute": - raise support.TestError("Should be absolute") diff --git a/bugtests/test342m.py b/bugtests/test342m.py deleted file mode 100644 index 0653c5533..000000000 --- a/bugtests/test342m.py +++ /dev/null @@ -1 +0,0 @@ -kind = "absolute" diff --git a/bugtests/test342p/__init__.py b/bugtests/test342p/__init__.py deleted file mode 100644 index 139597f9c..000000000 --- a/bugtests/test342p/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/bugtests/test342p/doimp.py b/bugtests/test342p/doimp.py deleted file mode 100644 index a2d9f6d68..000000000 --- a/bugtests/test342p/doimp.py +++ /dev/null @@ -1,3 +0,0 @@ -kind = __import__('test342m').kind - - diff --git a/bugtests/test342p/test342m.py b/bugtests/test342p/test342m.py deleted file mode 100644 index d8413ddfc..000000000 --- a/bugtests/test342p/test342m.py +++ /dev/null @@ -1 +0,0 @@ -kind = "relative" diff --git a/bugtests/test343.py b/bugtests/test343.py deleted file mode 100644 index 923cdb20f..000000000 --- a/bugtests/test343.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -[ #485558 ] Synchronization bug in sys.initialize -""" - -import support - -support.compileJava("test343j.java") -support.runJava("test343j", classpath=".") - diff --git a/bugtests/test343j.java b/bugtests/test343j.java deleted file mode 100644 index 9684791f9..000000000 --- a/bugtests/test343j.java +++ /dev/null @@ -1,16 +0,0 @@ -import org.python.util.*; - -public class test343j implements Runnable { - public static void main(String[] args) { - new Thread(new test343j()).start(); - new Thread(new test343j()).start(); - new Thread(new test343j()).start(); - new Thread(new test343j()).start(); - new Thread(new test343j()).start(); - } - - public void run() { - new PythonInterpreter(); - new PythonInterpreter(); - } -} diff --git a/bugtests/test344.py b/bugtests/test344.py deleted file mode 100644 index 7cf99cb05..000000000 --- a/bugtests/test344.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -[ #485968 ] cStringIO.softspace is not assignable. -""" - -import support - -import sys, cStringIO - -h = cStringIO.StringIO() - -sys.stdout = h -print "line1" -print "line2", -print "line3", -sys.stdout = sys.__stdout__ - -if h.getvalue() != "line1\nline2 line3": - raise support.TestError('Wrong softspace handling in cStringIO"') diff --git a/bugtests/test345.py b/bugtests/test345.py deleted file mode 100644 index ddf47b0e2..000000000 --- a/bugtests/test345.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -[ #489836 ] Private names is not mangled -""" - -import support - -support.compileJPythonc("test345c.py", jar="test345.jar", core=1, - output="test345.out") -support.runJava("-jar test345.jar") diff --git a/bugtests/test345c.py b/bugtests/test345c.py deleted file mode 100644 index fb09e5331..000000000 --- a/bugtests/test345c.py +++ /dev/null @@ -1,28 +0,0 @@ -""" - -""" - -import support - -class A: - def __init__(self): - self.__stop("A") - self.__x = 1 - self.__y = 1 - del self.__x - - def __stop(self, s): - pass - - __c = 1 - -def simpledir(obj): - l = obj.__dict__.keys() - l.sort() - return l - -if simpledir(A) != ['_A__c', '_A__stop', '__doc__', '__init__', '__module__']: - raise support.TestError("bug in private class var mangling %s" % dir(A)) -if simpledir(A()) != ['_A__y']: - raise support.TestError("bug in private var mangling %s" % dir(A())) - diff --git a/bugtests/test346.py b/bugtests/test346.py deleted file mode 100644 index cd4a2e048..000000000 --- a/bugtests/test346.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -[ #488632 ] -c sys.argv diff -""" - -import support - -support.runJython( - """-c "import sys; assert sys.argv == ['-c', '-v', 'args']" -v args""") - diff --git a/bugtests/test347.py b/bugtests/test347.py deleted file mode 100644 index 1ac7d8e91..000000000 --- a/bugtests/test347.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -[ #490157 ] string.splitlines() - incorrectly splits -""" - -import support - -r = 'This is a\n multiline string\n'.splitlines() - -if r != ['This is a', ' multiline string']: - raise support.TestError("Wrong splitlines(): %s" % r) - diff --git a/bugtests/test348.py b/bugtests/test348.py deleted file mode 100644 index 3d17c948d..000000000 --- a/bugtests/test348.py +++ /dev/null @@ -1,13 +0,0 @@ -""" -[ #490230 ] NotImplemented not implemented -""" - -import support - -class Z: - def __le__(self,o): - return NotImplemented - -z=Z() -assert z<="a" - diff --git a/bugtests/test349.py b/bugtests/test349.py deleted file mode 100644 index 174954b56..000000000 --- a/bugtests/test349.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -[ #494514 ] Python object not gc()'d -""" - -import support -import java, time, sys, cStringIO - -class A: - def __del__(self): - raise KeyError, "dummy" - -try: - sys.stderr = cStringIO.StringIO() - A() - - java.lang.System.gc() - time.sleep(2) -finally: - v = sys.stderr.getvalue() - sys.stderr = sys.__stderr__ - - support.compare(v, "Exception KeyError: .* ignored") - - diff --git a/bugtests/test350.py b/bugtests/test350.py deleted file mode 100644 index 5026c7dbd..000000000 --- a/bugtests/test350.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -[ #495458 ] multi level import from .zip file -""" - -import support -import zipfile, time - -def addZipEntry(zip, name, data): - entry = zipfile.ZipInfo() - entry.filename = name - entry.date_time = time.gmtime(time.time()) - zip.writestr(entry, data) - -zip = zipfile.ZipFile("test350.zip", "w") - -addZipEntry(zip, "Lib/aaa/__init__.py", "") -addZipEntry(zip, "Lib/aaa/bbb/__init__.py", "") -addZipEntry(zip, "Lib/aaa/bbb/ccc/__init__.py", "") -addZipEntry(zip, "Lib/aaa/bbb/ccc/yyy.py", "") -addZipEntry(zip, "Lib/aaa/bbb/xxx.py", "") - -zip.close() - -import sys -sys.path.append("test350.zip/Lib") - -import aaa -import aaa.bbb -import aaa.bbb.ccc -import aaa.bbb.ccc.yyy -import aaa.bbb.xxx - -sys.path.pop() diff --git a/bugtests/test351.py b/bugtests/test351.py deleted file mode 100644 index f19554aa8..000000000 --- a/bugtests/test351.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -[ #489168 ] Parse error, java traceback -""" - -import support - -a = 1 - # indented comment -a = 2 - - diff --git a/bugtests/test352.py b/bugtests/test352.py deleted file mode 100644 index 3a8747802..000000000 --- a/bugtests/test352.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -[ #495602 ] os.path.dirname() can result in an NPE -""" - -import support -import os - -try: - os.path.dirname(None) -except TypeError: - pass - -try: - os.path.basename(None) -except TypeError: - pass - -try: - os.path.exists(None) -except TypeError: - pass - -try: - os.path.isabs(None) -except TypeError: - pass - -try: - os.path.isfile(None) -except TypeError: - pass - -try: - os.path.isdir(None) -except TypeError: - pass - -try: - os.path.join(None) -except TypeError: - pass - -try: - os.path.join(None, None) -except TypeError: - pass - -try: - os.path.normcase(None) -except (TypeError, AttributeError): - pass - -try: - if hasattr(os.path, "samefile"): - os.path.samefile(None, None) -except TypeError: - pass - -try: - os.path.abspath(None) -except TypeError: - pass - -try: - os.path.getsize(None) -except TypeError: - pass - -try: - os.path.getmtime(None) -except TypeError: - pass - -try: - os.path.getatime(None) -except TypeError: - pass - diff --git a/bugtests/test353.py b/bugtests/test353.py deleted file mode 100644 index 3afef6ec3..000000000 --- a/bugtests/test353.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -[ #495604 ] imp.find_module fails when None is 2 arg -""" - -import support - -import imp -imp.find_module("re", None) - diff --git a/bugtests/test354.py b/bugtests/test354.py deleted file mode 100644 index f0a844e06..000000000 --- a/bugtests/test354.py +++ /dev/null @@ -1,13 +0,0 @@ -""" -[ 522423 ] cStringIO has no reset() method -""" - -import support - -import cStringIO -s = cStringIO.StringIO("abcdef") -s.read(3) -s.reset() -support.compare(s.read(3), "abc") -support.compare(s.read(3), "def") - diff --git a/bugtests/test355.py b/bugtests/test355.py deleted file mode 100644 index edc72273f..000000000 --- a/bugtests/test355.py +++ /dev/null @@ -1,13 +0,0 @@ -""" -[ 522558 ] list() is broken -""" - -import support - -L = [1, 2, 3] -L2 = list(L) -L2.insert(0, 4) -if L == L2: - raise support.TestError('list() should create a copy') - - diff --git a/bugtests/test356.py b/bugtests/test356.py deleted file mode 100644 index bfd07ba2b..000000000 --- a/bugtests/test356.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -[ 522828 ] struct.pack('>NNs', v) fails for NN > 20 -""" - -import support - -import struct -a = 'abcd' * 8 -struct.pack('>32s', a) - diff --git a/bugtests/test357.py b/bugtests/test357.py deleted file mode 100644 index 9380c3f3c..000000000 --- a/bugtests/test357.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -[ 517237 ] Binary ops with int and long fail -""" - -import support - -5&7L -5|7L -5^7L - diff --git a/bugtests/test358.py b/bugtests/test358.py deleted file mode 100644 index c8e56f308..000000000 --- a/bugtests/test358.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -[ 515894 ] Behaviour of "+=" stm. is different from -""" - -import support - -a = [1, 2, 3] -a += "456" -if a != [1, 2, 3, '4', '5', '6']: - raise support.TestError('list += not working') diff --git a/bugtests/test360.py b/bugtests/test360.py deleted file mode 100644 index 4eef9dc9e..000000000 --- a/bugtests/test360.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -[ 532747 ] for i in iter(d) -""" - -import support - -d = {1:2,3:4} -l = [] -for i in iter(d): l.append(i) - diff --git a/bugtests/test361.py b/bugtests/test361.py deleted file mode 100644 index 92774d13c..000000000 --- a/bugtests/test361.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -Test for [ 551888 ] Opening utf-8 files with codecs fails -""" - -import support - -f = open("test361.out", "w") -f.write("hello") -f.close() - -import codecs -f = codecs.open("test361.out", "r", "utf-8") -print f.read() -f.close() - diff --git a/bugtests/test362.py b/bugtests/test362.py deleted file mode 100644 index 3268737e2..000000000 --- a/bugtests/test362.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -[ 545235 ] unexpected match with re -""" - -import support - - -import re -rt = re.compile(r'c[^a]*t', re.IGNORECASE) -if rt.match("cat") is not None: - raise support.TestError('Should not match #1') -rs = re.compile(r'c[^a]t', re.IGNORECASE) -if rs.match('cat') is not None: - raise support.TestError('Should not match #2') diff --git a/bugtests/test363.py b/bugtests/test363.py deleted file mode 100644 index 716ac1780..000000000 --- a/bugtests/test363.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -[ 533354 ] bug in xml.dom.minidom.parseString -""" - -import support - -import xml.dom.minidom -DOM = xml.dom.minidom.parseString("") - -#raise support.TestWarning('A test of TestWarning. It is not an error') diff --git a/bugtests/test364.py b/bugtests/test364.py deleted file mode 100644 index 2c414f2b4..000000000 --- a/bugtests/test364.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -[ 531256 ] Constructor problem using newInstance() -""" - -import support - -support.compileJava("test364p/X.java") -support.compileJava("test364p/Y.java") - -from test364p import X,Y -class PyX(X): pass -class PyY(Y): pass -PyX.useClass(PyY) -X() # OK -try: - PyX() # Not OK prints 'TypeError: Proxy instance reused' -except TypeError: - raise support.TestWarning('Class ctor should mix with newInstance()') diff --git a/bugtests/test364p/X.java b/bugtests/test364p/X.java deleted file mode 100644 index 51f3e3b62..000000000 --- a/bugtests/test364p/X.java +++ /dev/null @@ -1,13 +0,0 @@ -package test364p; -public class X { - static Class myClass; - // register a class to construct - public static void useClass(Class cls) { - myClass=cls; - } - Object o; - public X() throws Exception { - o=myClass.newInstance(); - } -} - diff --git a/bugtests/test364p/Y.java b/bugtests/test364p/Y.java deleted file mode 100644 index 5cd68b1ab..000000000 --- a/bugtests/test364p/Y.java +++ /dev/null @@ -1,6 +0,0 @@ -package test364p; -public class Y { - public Y() { - } -} - diff --git a/bugtests/test365.py b/bugtests/test365.py deleted file mode 100644 index f14440c72..000000000 --- a/bugtests/test365.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -[ 508111 ] jythonc generates invalid statements -""" - -import support - -support.compileJPythonc("test365c.py", output="test365.err") - -#raise support.TestWarning('A test of TestWarning. It is not an error') diff --git a/bugtests/test365c.py b/bugtests/test365c.py deleted file mode 100644 index c31883f58..000000000 --- a/bugtests/test365c.py +++ /dev/null @@ -1,8 +0,0 @@ - -bar = 1 -def foo(): - return 2 - -bar and foo() - - diff --git a/bugtests/test366i.java b/bugtests/test366i.java deleted file mode 100644 index 42ca09b5a..000000000 --- a/bugtests/test366i.java +++ /dev/null @@ -1,4 +0,0 @@ - -public interface test366i { - public void foo(); -} diff --git a/bugtests/test366j.java b/bugtests/test366j.java deleted file mode 100644 index 238db4b1a..000000000 --- a/bugtests/test366j.java +++ /dev/null @@ -1,4 +0,0 @@ -public class test366j { - public void foo() { } -} - diff --git a/bugtests/test367.py b/bugtests/test367.py deleted file mode 100644 index a7aead262..000000000 --- a/bugtests/test367.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -Test raising a tuple. -""" - -import support - -error = "anerror"; - -try: - raise (error,), "value" -except error: - pass -except: - raise support.TestError('Should have been caught by except clause') - diff --git a/bugtests/test368.py b/bugtests/test368.py deleted file mode 100644 index 477e319dc..000000000 --- a/bugtests/test368.py +++ /dev/null @@ -1,45 +0,0 @@ -""" -[ 529242 ] Python singletons deserialization bug -""" - -import support - -from java.io import * -from org.python.util import * - - -SINGL= None -#SINGL= Ellipsis - -class Test(Serializable): - def __init__(self): - self.attr = SINGL - def test(self): - if self.attr is not SINGL: - raise support.TestError("Singleton not unique") - if self.attr != SINGL: - raise support.TestError("Singleton not unique") - -def load(path): - file = File(path) - fileIn = FileInputStream(file) - pyIn = PythonObjectInputStream(fileIn) - pyObj = pyIn.readObject() - pyIn.close() - return pyObj - -def save(obj, path): - fileOut = FileOutputStream(path) - objOut = ObjectOutputStream(fileOut) - objOut.writeObject(obj) - objOut.flush() - objOut.close() - -#print "Testing initial object..." -a = Test() -a.test() -save(a, "test368.out") -b = load("test368.out") -#print "Testing deserialized object..." -b.test() - diff --git a/bugtests/test370.py b/bugtests/test370.py deleted file mode 100644 index 0cd2b970d..000000000 --- a/bugtests/test370.py +++ /dev/null @@ -1,13 +0,0 @@ -""" -[ 562943 ] os.path.getmtime misbehaves on nonfile -""" - -import support - -import os.path -try: - print os.path.getmtime('nonfile') -except OSError: - pass -else: - raise support.TestError('Should raise an OSError') diff --git a/bugtests/test371.py b/bugtests/test371.py deleted file mode 100644 index 8fc9b78c4..000000000 --- a/bugtests/test371.py +++ /dev/null @@ -1,338 +0,0 @@ -""" -[ 577395 ] Outer finally not executed at return. -break/continue through finally. -""" -# Local name: bugtests/test371.py - - -import support - -# Some glue to do all tests defined in this module, -# and fail only at end in finalTestReport() -totalTestFailures = 0 -totalTests = 0 - -def testFail(mes): - global totalTestFailures - global totalTests - print 'Fail:', mes - totalTestFailures += 1 - totalTests += 1 - -def testPass(mes): - global totalTests - #print 'Ok:', mes - totalTests += 1 - -def testEq(val, expected, mes): - if val != expected: - testFail('%s: expected %s, got %s' % (mes, repr(expected), repr(val))) - else: - testPass('%s: %s' % (mes, repr(val))) - -def finalTestReport(): - global totalTestFailures - global totalTests - if totalTestFailures > 0: - raise support.TestError('%d of %d test(s) failed in this module' - % (totalTestFailures, totalTests)) - else: - print 'All %d test(s) passed in this module.' % totalTests - - -retval = 'rql' - - -x = [] -def tryfinallyreturn1(): - try: - x.append(1) - return retval - finally: - x.append(2) - -r = tryfinallyreturn1() -testEq(x, [1,2], 'tryfinallyreturn1 side effect') -testEq(r, retval, 'tryfinallyreturn1 return value') - -x = [] -def tryfinallyreturn2(): # fails in jython 2.1, x == [1,2] afterwards - try: - try: - x.append(1) - return retval - finally: - x.append(2) - finally: - x.append(3) - -r = tryfinallyreturn2() -testEq(x, [1,2,3], 'tryfinallyreturn2 side effect') -testEq(r, retval, 'tryfinallyreturn2 return value') - -x = [] -def tryfinallyreturn3(): # fails in jython 2.1, x == [1,2] afterwards - try: - try: - try: - x.append(1) - return retval - finally: - x.append(2) - finally: - x.append(3) - finally: - x.append(4) - -r = tryfinallyreturn3() -testEq(x, [1,2,3,4], 'tryfinallyreturn3 side effect') -testEq(r, retval, 'tryfinallyreturn3 return value') - - -x = [] -def tryfinallyraise1(): - try: - x.append(1) - raise Exception - finally: - x.append(2) - -try: - tryfinallyraise1() -except Exception: - testEq(x, [1,2], 'tryfinallyraise1 side effect') -else: - testFail('tryfinallyraise1 did not trow Exception') - - -x = [] -def tryfinallyraise2(): - try: - try: - x.append(1) - raise Exception - finally: - x.append(2) - finally: - x.append(3) - -try: - tryfinallyraise2() -except Exception: - testEq(x, [1,2,3], 'tryfinallyraise2 side effect') -else: - testFail('tryfinallyraise2 did not trow Exception') - -x = [] -def tryfinallyraise3(): - try: - try: - try: - x.append(1) - raise Exception - finally: - x.append(2) - finally: - x.append(3) - finally: - x.append(4) - -try: - tryfinallyraise3() -except Exception: - testEq(x, [1,2,3,4], 'tryfinallyraise3 side effect') -else: - testFail('tryfinallyraise3 did not trow Exception') - - -x = [] -def fortryfinallycontinuereturn1(): - for i in range(3): - try: - x.append(2 * i) - if i == 0: - continue - return retval - finally: - x.append(2 * i + 1) - -r = fortryfinallycontinuereturn1() -testEq(x, [0,1,2,3], 'fortryfinallycontinuereturn1 side effect') -testEq(r, retval, 'fortryfinallycontinuereturn1 return value') - -x = [] -def fortryfinallycontinuereturn2(): - for i in range(3): - try: - try: - x.append(3 * i) - if i == 0: - continue - return retval - finally: - x.append(3 * i + 1) - finally: - x.append(3 * i + 2) - -r = fortryfinallycontinuereturn2() -testEq(x, [0,1,2,3,4,5], 'fortryfinallycontinuereturn2 side effect') -testEq(r, retval, 'fortryfinallycontinuereturn2 return value') - -x = [] -def fortryfinallycontinuereturn3(): # fails in jython 2.1, x == [1,2] afterwards - for i in range(3): - try: - try: - try: - x.append(4 * i) - if i == 0: - continue - return retval - finally: - x.append(4 * i + 1) - finally: - x.append(4 * i + 2) - finally: - x.append(4 * i + 3) - - -r = fortryfinallycontinuereturn3() -testEq(x, [0,1,2,3,4,5,6,7], 'fortryfinallycontinuereturn3 side effect') -testEq(r, retval, 'fortryfinallycontinuereturn3 return value') - - -x = [] -def fortryfinallybreak1(): - for i in range(3): - try: - x.append(2 * i) - if i == 1: - break - finally: - x.append(2 * i + 1) - return retval - -r = fortryfinallybreak1() -testEq(x, [0,1,2,3], 'fortryfinallybreak1 side effect') -testEq(r, retval, 'fortryfinallybreak1 return value') - -x = [] -def fortryfinallybreak2(): - for i in range(3): - try: - try: - x.append(3 * i) - if i == 1: - break - finally: - x.append(3 * i + 1) - finally: - x.append(3 * i + 2) - return retval - -r = fortryfinallybreak2() -testEq(x, [0,1,2,3,4,5], 'fortryfinallybreak2 side effect') -testEq(r, retval, 'fortryfinallybreak2 return value') - - -x = [] -def fortryfinallycontinueraise1(): - for i in range(3): - try: - x.append(2 * i) - if i == 0: - continue - raise Exception - finally: - x.append(2 * i + 1) - -try: - fortryfinallycontinueraise1() -except Exception: - testEq(x, [0,1,2,3], 'fortryfinallycontinueraise1 side effect') -else: - testFail('fortryfinallycontinueraise1 did not trow Exception') - -x = [] -def fortryfinallycontinueraise2(): - for i in range(3): - try: - try: - x.append(3 * i) - if i == 0: - continue - raise Exception - finally: - x.append(3 * i + 1) - finally: - x.append(3 * i + 2) - -try: - fortryfinallycontinueraise2() -except Exception: - testEq(x, [0,1,2,3,4,5], 'fortryfinallycontinueraise2 side effect') -else: - testFail('fortryfinallycontinueraise2 did not trow Exception') - - -x = [] -def tryfortrycontinueraise1(): - try: - for i in range(3): - try: - x.append(2 * i) - if i == 0: - continue - raise Exception - finally: - x.append(2 * i + 1) - finally: - x.append('last') - -try: - tryfortrycontinueraise1() -except Exception: - testEq(x, [0,1,2,3,'last'], 'tryfortrycontinueraise1 side effect') -else: - testFail('tryfortrycontinueraise1 did not trow Exception') - - -x = [] -def tryfortrybreak1(): - try: - for i in range(3): - try: - x.append(2 * i) - if i == 1: - break - finally: - x.append(2 * i + 1) - return retval - finally: - x.append('last') - -r = tryfortrybreak1() -testEq(x, [0,1,2,3,'last'], 'tryfortrybreak1 side effect') -testEq(r, retval, 'tryfortrybreak1 return value') - - -x = [] -def tryfortrycontinuereturn1(): - try: - for i in range(3): - try: - x.append(2 * i) - if i == 0: - continue - return retval - finally: - x.append(2 * i + 1) - finally: - x.append('last') - -r = tryfortrycontinuereturn1() -testEq(x, [0,1,2,3,'last'], 'tryfortryfinallyreturn1 side effect') -testEq(r, retval, 'tryfortrycontinuereturn1 return value') - -finalTestReport() - diff --git a/bugtests/test372.py b/bugtests/test372.py deleted file mode 100644 index 4cc8923b2..000000000 --- a/bugtests/test372.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -Test for patch "[ 577728 ] struct.java now accepts 64bits ints" -""" - -import support - -from struct import * - -#unsigned long check -try: - pack('Q',-1) - raise support.TestError('Error: unsigned long should not work') -except (TypeError, error): - pass - -big_long=0x10000000000000000L -#oversized unsigned long check -try: - print pack('64bits ints' -except OverflowError: - pass - -try: - print pack('>Q',big_long) - print 'Error, should not try to pack >64bits ints' -except OverflowError: - pass - -#oversized positive signed long check -try: - print pack('64bits ints' -except OverflowError: - pass - -try: - print pack('>q',big_long) - print 'Error, should not try to pack >64bits ints' -except OverflowError: - pass - -#oversized negative signed long check -big_long=-big_long -try: - print pack('64bits ints' -except OverflowError: - pass - -try: - print pack('>q',big_long) - print 'Error, should not try to pack >64bits ints' -except OverflowError: - pass - -u_data=(0x1L,0x10000L,0x100000000L) -s_data=(0x1L,-0x10000L,0x0FFFFFFFFL,-0x100000000L) -#internal pack-unpack coherence check - -s=pack(' %s ==> %s"%(`u_data`,`s`,`unpack('QQQ',u_data[0],u_data[1],u_data[2]) -if u_data!=unpack('>QQQ',s): - raise support.TestError("internal coherence error: %s ==> %s ==> %s"%(`u_data`,`s`,`unpack('>QQQ',s)`)) - -s=pack(' %s ==> %s"%(`s_data`,`s`,`unpack('qqqq',s_data[0],s_data[1],s_data[2],s_data[3]) -if s_data!=unpack('>qqqq',s): - raise support.TestError("internal coherence error: %s ==> %s ==> %s"%(`s_data`,`s`,`unpack('>qqqq',s)`)) - -#external unpack coherence check -string_from_CPython='\x00\x00\x00\x00\x00\x01\x11p\xff\xff\xff\xff\xff\xfe\xc7\x80\xff\xff\xff\xff\xff\xff\xff\xfb\x00\x00\x00\x00\x00\x018\x80' -if (70000,-80000,-5,80000)!=unpack('!Qqqq',string_from_CPython): - raise support.TestError('Error unpacking from CPython !') - diff --git a/bugtests/test374.py b/bugtests/test374.py deleted file mode 100644 index 6b2ecaaff..000000000 --- a/bugtests/test374.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -[ 631017 ] Private fields mismangled -""" - -import support - -class _A: - __value = 1 - -class B(_A): - _initial_value = 2 - def foo(self): - assert self._A__value == 1 - assert self._initial_value == 2 - - -B().foo() - -#raise support.TestWarning('A test of TestWarning. It is not an error') diff --git a/bugtests/test375.py b/bugtests/test375.py deleted file mode 100644 index 6d5445103..000000000 --- a/bugtests/test375.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -[ 631017 ] Private fields mismangled -""" - -import support - -support.compileJPythonc("test375c.py", output="test375.err", - jar="test375.jar", core=1) -support.runJava("test375c", classpath="test375.jar") diff --git a/bugtests/test375c.py b/bugtests/test375c.py deleted file mode 100644 index 6b2ecaaff..000000000 --- a/bugtests/test375c.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -[ 631017 ] Private fields mismangled -""" - -import support - -class _A: - __value = 1 - -class B(_A): - _initial_value = 2 - def foo(self): - assert self._A__value == 1 - assert self._initial_value == 2 - - -B().foo() - -#raise support.TestWarning('A test of TestWarning. It is not an error') diff --git a/bugtests/test376.py b/bugtests/test376.py deleted file mode 100644 index 046e7a4f0..000000000 --- a/bugtests/test376.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -[ 631035 ] Negative repeat cause java exception. -""" - -import support - -assert "0"*-1 == "" - diff --git a/bugtests/test377.py b/bugtests/test377.py deleted file mode 100644 index 7d66c1901..000000000 --- a/bugtests/test377.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -[ 631047 ] %e formatting of float fails. -""" - -import support - -assert '%.*e' % (0, float(1000)) == '1e+003' - diff --git a/bugtests/test378.py b/bugtests/test378.py deleted file mode 100644 index ca4c0024c..000000000 --- a/bugtests/test378.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -[ 631430 ] read(-1) uses wrong fileposition. -""" - -import support - -f = open("test378.out", "wb") -f.write("123456789") -f.close() - -f = open("test378.out") -f.read(4) -s = f.read(); -f.close(); - -assert s == "56789" - - diff --git a/bugtests/test379.py b/bugtests/test379.py deleted file mode 100644 index d7bfebaad..000000000 --- a/bugtests/test379.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -test for -[ 730156 ] java.lang.VerifyError with very simple Python source -""" -import support - -code = """ -def method(): - try: - for dummy in [1,2,3]: - try: - return "result" - except: - pass - finally: - pass -""" - -import java.lang - -try: - c = compile(code,"","exec") -except java.lang.VerifyError,e: - raise support.TestWarning("try-for-try-finally still produces invalid bytecode") - -d = {} - -exec code in d - -if d['method']() != 'result': - raise support.TestError("wrong result") \ No newline at end of file diff --git a/bugtests/test380.py b/bugtests/test380.py deleted file mode 100644 index 5eefea08a..000000000 --- a/bugtests/test380.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -fixed broken id checks with pickle and copy that depends on id working correctly -""" - -import support - -d = {} - -import java - -clash_id = java.lang.System.identityHashCode - -for i in xrange(100000): - s = ['test',i] - j = clash_id(s) - if d.has_key(j): - break - d[j] = s - -s1 = s -s0 = d[j] - -data = [s0,s1,s0] - -#print data - -import pickle -import cPickle - -def check(ctxt,data0,data1): - if data0 != data1: - raise support.TestError,"data corrupted in %s because of id clashes: %s != %s" % (ctxt.__name__,data0,data1) - -def pik_test(pikmod,data): - pik =pikmod.dumps(data,1) - data1 = pikmod.loads(pik) - check(pikmod,data,data1) - -pik_test(cPickle,data) -pik_test(pickle,data) - -import copy - -check(copy.deepcopy,data,copy.deepcopy(data)) diff --git a/bugtests/test381.py b/bugtests/test381.py deleted file mode 100644 index 953cd43ca..000000000 --- a/bugtests/test381.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -namespace (PyStringMap) deletion-confused insert bug -""" - -#============================================== -# we need some stuff to find a pair of keys -# with the same initial index in hash table - - -from java.lang.System import identityHashCode -from java.lang import String - - -def hashCode(key): - return identityHashCode(String.intern(key)) - - -def scanKeys(base, r, sz, deep): - for i in xrange(65,91): #chars 'A'-'Z' - key = base+chr(i) - #sz is hash table size - if hashCode(key)%sz == r: - break - if deep: - key = scanKeys(base, r, sz, deep-1) - if key is not None: - break - return key - - -# find a key with the same hash index as key1 -def findPairKey(key1, sz=7): - #in empty PyStringMap hash table has size 7 - r=hashCode(key1)%sz - base="" - for deep in xrange(0,15): - key = scanKeys(base, r, sz, deep) - if key is not None: return key - - - -class AA: pass -d = AA().__dict__ - - -# now d is an empty PyStringMap dict - - -key1="key1" -#find a pair key for key1 -key2 = findPairKey(key1) - - -# key2 consists of upper case characters (by construction) -# and always differs from key1 -#print "key1=",repr(key1)," key2=",repr(key2) - - -d[key2] = "foo" #key2 occupies initial slot -d[key1] = "value1" #key1 occupies next slot -del d[key2] #initial slot is marked by "" -d[key1] = "value2" #key1 replaces "" in the first - #slot but not old key1 value! -del d[key1] -#we hope key1 is not in the dict any more... -try: - v=d[key1] - #print "Oops! d[key1]=",repr(v) #Oops! Magically ressurected! - raise support.TestError,"namespace deletion-confused insert bug" -except KeyError: - #print "OK" - pass \ No newline at end of file diff --git a/bugtests/test382.py b/bugtests/test382.py deleted file mode 100644 index cc0f4c1e3..000000000 --- a/bugtests/test382.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -catching frame wasn't captured in a traceback -""" - -import sys - -def check(tb,expt_lines): - assert tb.tb_frame is sys._getframe(1),"catching frame should be included" - lines=[] - while tb: - lines.append(tb.tb_lineno) - tb = tb.tb_next - assert expt_lines==lines, "bogus line numbers: %s vs. expected %s" % (lines,expt_lines) - -def f(): - try: - raise KeyError # 17 - except: - raise - -try: - f() # 22 -except: - t,e,tb = sys.exc_info() - check(tb,[22,17]) - -try: - f() # 28 -except KeyError,e: - t,e,tb = sys.exc_info() - check(tb,[28,17]) - -try: - 1/0 # 34 -except: - t,e,tb = sys.exc_info() - check(tb,[34]) - -try: - try: - 1/0 # 41 - except: - raise -except: - t,e,tb = sys.exc_info() - check(tb,[41]) diff --git a/bugtests/test383.py b/bugtests/test383.py deleted file mode 100644 index d900317f2..000000000 --- a/bugtests/test383.py +++ /dev/null @@ -1,9 +0,0 @@ - -try: - from java.util.regex import Pattern - p = Pattern.compile("xxx") - m = p.split("ABCDEFG") -except ImportError, e: - import support - raise support.TestWarning("JVM version >= 1.4 needed to test PyString -> CharSequence") - diff --git a/bugtests/test384.py b/bugtests/test384.py deleted file mode 100644 index 62ad38330..000000000 --- a/bugtests/test384.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -test for patch [ 1153003 ] -""" - -import support -import jarray -import java -from org.python.core import ArgParser, PyObject - -try: - # test(1, arg1=2) - args = jarray.array([1,2], PyObject) - kwds = jarray.array(['arg1'], java.lang.String) - ArgParser('test', args, kwds, 'arg1', 'arg2') -except TypeError: - pass -else: - raise support.TestError('Should raise a TypeError') diff --git a/bugtests/test385.py b/bugtests/test385.py deleted file mode 100644 index faae3b358..000000000 --- a/bugtests/test385.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Try importing from a jar after sys.path.append(jar) - -This nails down a bug reported here: - http://sourceforge.net/mailarchive/message.php?msg_id=14088259 -which only occurred on systems where java.io.File.separatorChar is not a forward slash ('/') - -since - at the moment - jython modules hide java packages with the same name from import, -use a unique java package name for the sake of this test -""" - -import jarmaker -import support -import sys - -jarfn, package, clazz = jarmaker.mkjar() -# append this jar file to sys.path -sys.path.append(jarfn) - -# try to import the class -importStmt = "from %s import %s" % (package, clazz) -try: - exec(importStmt) -finally: - sys.path.remove(jarfn) diff --git a/bugtests/test386.py b/bugtests/test386.py deleted file mode 100644 index 062074378..000000000 --- a/bugtests/test386.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Test the standalone starting (java -jar jython.jar some.py) -""" - -import support -import sys -import os - -import support_config as cfg - -from java.io import File - -TESTDIR = "test386jar" -JYTHON_DEV_JAR = "jython-dev.jar" -TEST_PY_NAME = TESTDIR +"/test386called.py" - -def checkTestDir(): - if not os.path.exists(TESTDIR): - raise AssertionError, TESTDIR + " does not exist" - if not os.path.exists(TEST_PY_NAME): - raise AssertionError, TEST_PY_NAME + " does not exist" - -# create a jython standalone jar file: -# add the contents of jython-dev.jar and /Lib files to a new jython-dev.jar -def mkjar(): - jarFile = File(TESTDIR, JYTHON_DEV_JAR) - jarPacker = support.JarPacker(jarFile) - jarPacker.addJarFile(File(cfg.jython_home + "/%s" % JYTHON_DEV_JAR)) - jarPacker.addDirectory(File(cfg.jython_home + "/Lib")) - jarPacker.close() - return jarFile - - -checkTestDir() -mkjar() -jarFileName = "%s/%s" % (TESTDIR, JYTHON_DEV_JAR) -support.runJavaJar(jarFileName, TEST_PY_NAME) \ No newline at end of file diff --git a/bugtests/test386jar/test386called.py b/bugtests/test386jar/test386called.py deleted file mode 100644 index 8dcbee2be..000000000 --- a/bugtests/test386jar/test386called.py +++ /dev/null @@ -1,49 +0,0 @@ -# make sure we are in 'standalone' mode, without package scan -import sys -skipName = "python.cachedir.skip" -if not sys.registry.containsKey(skipName): - raise AssertionError, skipName + " is missing" -if not "true" == sys.registry.getProperty(skipName): - raise AssertionError, skipName + " is not true" - -# import a non-builtin module which is not imported by default on startup -# this verifies that /Lib .py files can be imported -# this fixes bug [ 1194650 ] -import getopt - -# an early java import # (only works since java.util is an already loaded package) -from java import util -util # used to give a NameError - -# import java specific py modules -import os - -# now do some java imports which previously failed without a package scan -# this (most of the time) solves the famous 'no module named java' problem -import java # (only works since java is an already loaded package) -import java.lang # (only works since java.lang is an already loaded package) - -# explicit imports -from java.math import BigDecimal -from java.math import BigDecimal, BigInteger -from java.lang.reflect import Method - -# verify the self healing -try: - # assume package javax.imageio.event was never touched before - import javax.imageio.event - raise AssertionError, "ImportError expected when executing 'import javax.imageio.event'" -except ImportError: - pass -from javax.imageio.event import IIOReadProgressListener - -# importing this twice was a problem -from org.python.core import PySystemState -from org.python.core import PySystemState - -# verify explicit imports of the form 'import java.net.URL' -import javax.security.auth.Policy -javax -javax.security -javax.security.auth -javax.security.auth.Policy diff --git a/bugtests/test387.py b/bugtests/test387.py deleted file mode 100644 index ede638bc0..000000000 --- a/bugtests/test387.py +++ /dev/null @@ -1,10 +0,0 @@ -import support - -import test387p.test387m - -import sys - -if not 'test387p.difflib' in sys.modules: - raise support.TestError, 'Cached module for sibling module import miss should exist in sys.modules' -if not sys.modules['test387p.difflib'] is None: - raise support.TestError, 'Cached module for sibling module import miss should be None in sys.modules' diff --git a/bugtests/test387p/__init__.py b/bugtests/test387p/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/bugtests/test387p/test387m.py b/bugtests/test387p/test387m.py deleted file mode 100644 index 3c9179c47..000000000 --- a/bugtests/test387p/test387m.py +++ /dev/null @@ -1 +0,0 @@ -import difflib diff --git a/bugtests/test388.py b/bugtests/test388.py deleted file mode 100644 index c4ce72a50..000000000 --- a/bugtests/test388.py +++ /dev/null @@ -1,14 +0,0 @@ -''' -This checks that Python imports precedes the loading of Java directories from the -classpath. - -Reported in bug 1421812. -''' -import support - -import test388m - -if not hasattr(test388m, 'x'): - raise support.TestError, 'Python modules should be imported before directories for Java' - - diff --git a/bugtests/test388m.py b/bugtests/test388m.py deleted file mode 100644 index 0553d3a2e..000000000 --- a/bugtests/test388m.py +++ /dev/null @@ -1 +0,0 @@ -x = 7 diff --git a/bugtests/test390.py b/bugtests/test390.py deleted file mode 100644 index 3ac4f7d59..000000000 --- a/bugtests/test390.py +++ /dev/null @@ -1,19 +0,0 @@ -''' -Checks that exceptions imported in import * will catch thrown subclass excetions -in an except statement. - -Reported in bugs 1531644 and 1269872. -''' -import support -import sys - -from java.net import Socket -from java.io import * - -try: - # Do a connection that will yield a ECONNREFUSED -> ConnectException. - conn = Socket('localhost', 8342) -except IOException, e: - pass -except: - raise support.TestError, "A %s was raised which is an IOExcption but except IOException above didn't catch it" % sys.exc_info()[0] diff --git a/bugtests/test392.py b/bugtests/test392.py deleted file mode 100644 index 6160f0dc7..000000000 --- a/bugtests/test392.py +++ /dev/null @@ -1,24 +0,0 @@ -''' -From bug #1284344 -import a compiled module moved to a new location and check that its __file__ -matches its new spot. -''' - -fname = 'test392m.py' - -open(fname, 'w').close()#touch! - -import test392m -del test392m -import os -compiledName = 'test392m$py.class' -os.rename(compiledName, 'test392LibDir/%s' % compiledName) -os.remove(fname) - -import support -ret = support.runJython('test392importer.py', expectError=True) -if ret == 1: - raise support.TestError, '__file__ on test392m reflected where it was compiled, not where it was imported.' -elif ret != 0: - raise support.TestError, 'running test392importer.py exited with an unexpected code' - diff --git a/bugtests/test392LibDir/__init__.py b/bugtests/test392LibDir/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/bugtests/test392importer.py b/bugtests/test392importer.py deleted file mode 100644 index ef650a976..000000000 --- a/bugtests/test392importer.py +++ /dev/null @@ -1,5 +0,0 @@ -import sys -sys.path.append('test392LibDir') -import test392m - -assert 'test392LibDir' in test392m.__file__, "test392m.__file__ doesn't contain test392LibDir, the directory it's in" diff --git a/bugtests/test393.py b/bugtests/test393.py deleted file mode 100644 index 2e3f33e8f..000000000 --- a/bugtests/test393.py +++ /dev/null @@ -1,6 +0,0 @@ -''' -From bug #1548501 -Check that __file__ is set on a file run directly from jython. -''' -import support -support.runJython('test393m.py') diff --git a/bugtests/test393m.py b/bugtests/test393m.py deleted file mode 100644 index 42750504c..000000000 --- a/bugtests/test393m.py +++ /dev/null @@ -1 +0,0 @@ -assert __file__ == "test393m.py" diff --git a/bugtests/test394.py b/bugtests/test394.py deleted file mode 100644 index 0bf6a89ad..000000000 --- a/bugtests/test394.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -Test standalone starting, -where the manifest of a .jar refers to jython.jar - - -This used to give an error importing site, as follows: - -error importing site -Traceback (innermost last): - File "C:\workspace\jython\bugtests\test394jar\jython-dev.jar\Lib/site.py", line 210, in ? -TypeError: unsupported operand type(s) for +: 'NoneType' and 'str' -Traceback (innermost last): - File "C:/workspace/jython/bugtests/test394.py", line 71, in ? - File "C:\workspace\jython\bugtests\support.py", line 100, in runJavaJar - File "C:\workspace\jython\bugtests\support.py", line 65, in execCmd -TestError: cmd /C "C:/Programme/Java/jdk1.5.0_09/bin/java.exe -jar test394jar/run.jar " failed with -1 - -""" - -import support -import sys -import os - -import support_config as cfg - -from java.io import File - -TESTDIR = "test394jar" -JYTHON_DEV_JAR = "jython-dev.jar" -RUN_JAR = "run.jar" -TEST_PY_NAME = TESTDIR +"/test394called.py" -CLAZZ = "Runner" -MANIFEST = "MANIFEST.MF" - -def checkTestDir(): - if not os.path.exists(TESTDIR): - raise AssertionError, TESTDIR + " does not exist" - if not os.path.exists(TEST_PY_NAME): - raise AssertionError, TEST_PY_NAME + " does not exist" - javaFileName = TESTDIR + "/" + CLAZZ + ".java" - if not os.path.exists(javaFileName): - raise AssertionError, javaFileName + " does not exist" - manifestFileName = TESTDIR + "/" + MANIFEST - if not os.path.exists(manifestFileName): - raise AssertionError, manifestFileName + " does not exist" - - -# create a jython standalone jar file: -# add the contents of jython-dev.jar and /Lib files to a new jython-dev.jar -def mkJythonJar(): - jarFile = File(TESTDIR, JYTHON_DEV_JAR) - jarPacker = support.JarPacker(jarFile) - jarPacker.addJarFile(File(cfg.jython_home + "/%s" % JYTHON_DEV_JAR)) - jarPacker.addDirectory(File(cfg.jython_home + "/Lib")) - jarPacker.close() - return jarFile - -# make a java class calling jython main -def mkJavaClass(): - support.compileJava("%s/%s.java" % (TESTDIR, CLAZZ)) - -# create a runnable jar file with a manifest referring to jython-dev.jar -def mkRunJar(): - jarFile = File(TESTDIR, RUN_JAR) - manifestFile = File(TESTDIR, MANIFEST) - jarPacker = support.JarPacker(jarFile) - jarPacker.addManifestFile(manifestFile) - jarPacker.addFile(File(TESTDIR, CLAZZ+".class"), TESTDIR) - jarPacker.close() - - - -checkTestDir() -mkJythonJar() -mkJavaClass() -mkRunJar() -jarFileName = "%s/%s" % (TESTDIR, RUN_JAR) -support.runJavaJar(jarFileName) \ No newline at end of file diff --git a/bugtests/test394jar/MANIFEST.MF b/bugtests/test394jar/MANIFEST.MF deleted file mode 100644 index 2ae770cf8..000000000 --- a/bugtests/test394jar/MANIFEST.MF +++ /dev/null @@ -1,4 +0,0 @@ -Manifest-Version: 1.0 -Class-Path: . jython-dev.jar -Main-Class: test394jar.Runner - diff --git a/bugtests/test394jar/Runner.java b/bugtests/test394jar/Runner.java deleted file mode 100644 index df1c28cbe..000000000 --- a/bugtests/test394jar/Runner.java +++ /dev/null @@ -1,11 +0,0 @@ -package test394jar; - -import org.python.util.jython; - -public class Runner { - - public static void main(String args[]) { - jython.main(new String[] { "test394jar/test394called.py" }); - } - -} diff --git a/bugtests/test394jar/test394called.py b/bugtests/test394jar/test394called.py deleted file mode 100644 index d25d49e0f..000000000 --- a/bugtests/test394jar/test394called.py +++ /dev/null @@ -1 +0,0 @@ -a = 1 \ No newline at end of file diff --git a/bugtests/test397.py b/bugtests/test397.py deleted file mode 100644 index 031f1f2bd..000000000 --- a/bugtests/test397.py +++ /dev/null @@ -1,41 +0,0 @@ -''' -Checks that files are closed in three situations: -1. Garbage collection/finalization close -2. Regular close -3. Shutdown time, close out open PyFiles -''' - -import os -import support - -from java.io import File -from java.lang import System, Thread - -def check(fn='test.txt'): - f = File(fn) - if not f.exists(): - raise support.TestError('"%s" should exist' % fn) - if not f.length(): - raise support.TestError('"%s" should have contents' % fn) - os.remove(fn) - - -open("garbagecollected", "w").write("test") - -#Wait up to 2 seconds for garbage collected to disappear -System.gc() -for i in range(10): - if not os.path.exists('garbagecollected'): - break - Thread.sleep(200) - -check("garbagecollected") - -f = open("normalclose", "w") -f.write("test") -f.close() -check("normalclose") - -#test397m writes to "shutdown" and exits -support.runJython('test397m.py') -check('shutdown') diff --git a/bugtests/test397m.py b/bugtests/test397m.py deleted file mode 100644 index 3d8547b98..000000000 --- a/bugtests/test397m.py +++ /dev/null @@ -1 +0,0 @@ -open("shutdown", "w").write('exiting') diff --git a/bugtests/test398.py b/bugtests/test398.py deleted file mode 100644 index 3f626d1a9..000000000 --- a/bugtests/test398.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -test fix for bug #1642285 - -Try importing from a jar which contains a .class file which is completely empty (0bytes). -Make sure that the bad class file is skipped while good class file is processed. - -Although this is an aberrant .class file, it has been seen in the wild (see bug report, found in a weblogic -jar). - -""" - -import support -import sys -import os - -from java.io import File -from java.lang import String -from java.util import Properties -from org.python.core.packagecache import SysPackageManager - -PACKAGE = "test398javapackage" -CACHEDIR = "test398cache" -BAD_CLAZZ = "test398j1" -GOOD_CLAZZ = "test398j2" -JARDIR = "test398jar" -JARFILE = "test398.jar" -GOOD_CLAZZ_FILE = File(PACKAGE, "%s.class" % GOOD_CLAZZ) # java.io.File -BAD_CLAZZ_FILE = File(PACKAGE, "%s.class" % BAD_CLAZZ) # java.io.File - -def mkdir(dir): - if not os.path.exists(dir): - os.mkdir(dir) - -def mkjavaclass(): - mkdir(PACKAGE) - f = open("%s/%s.java" % (PACKAGE, GOOD_CLAZZ), "w") - f.write(""" -package %s; -public class %s { -} -""" % (PACKAGE, GOOD_CLAZZ)) - f.close() - support.compileJava("%s/%s.java" % (PACKAGE, GOOD_CLAZZ)) - -def mkbadclass(): - mkdir(PACKAGE) - f = open("%s/%s.class" % (PACKAGE, BAD_CLAZZ), "w") - f.close() - -def mkjar(): - mkdir(JARDIR) - jarFile = File(JARDIR, JARFILE) - jarPacker = support.JarPacker(jarFile, bufsize=128) - jarPacker.addFile(GOOD_CLAZZ_FILE, parentDirName=PACKAGE) - jarPacker.addFile(BAD_CLAZZ_FILE, parentDirName=PACKAGE) - jarPacker.close() - return jarFile - -def mkprops(): - props = Properties() - props.setProperty("java.ext.dirs", String(JARDIR)); - props.setProperty("python.security.respectJavaAccessibility", String("true")); - return props - -# create a .jar file containing a .class file -mkjavaclass() -mkbadclass() -jarFile = mkjar() -props = mkprops() -man = SysPackageManager(File(CACHEDIR, "packages"), props) -assert os.path.exists(os.path.join(CACHEDIR, "packages", "test398.pkc")) diff --git a/bugtests/test400.py b/bugtests/test400.py deleted file mode 100644 index 1993c657a..000000000 --- a/bugtests/test400.py +++ /dev/null @@ -1,10 +0,0 @@ -import support - -try: - import x - raise support.TestError, "x shouldn't be on sys.path until after this" -except: - pass -import sys -sys.path.append('test400') -import x diff --git a/bugtests/test400/x/__init__.py b/bugtests/test400/x/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/bugtests/test402.py b/bugtests/test402.py deleted file mode 100644 index 8940bec3d..000000000 --- a/bugtests/test402.py +++ /dev/null @@ -1,24 +0,0 @@ -''' - -test402m adds a jar file to sys.path and imports a package from it. The first -run ensures that, by default, package scanning is enabled for jars added to -sys.path. The second run turns off package scanning, so it checks that the -package is unimportable without the scan. Finally, we run test402n which adds -the same jar to its sys.path and imports a fully qualified class from it. We -run it with package scanning off to make sure that even without package -scanning, jars are correctly added to sys.path and fully qualified class -imports work on them. - -''' - -import support -import jarmaker - -jarmaker.mkjar() - -support.runJython('test402m.py') -ret = support.runJython('test402m.py', error='test402.err', - javaargs='-Dpython.cachedir.skip=true', expectError=1) -if ret == 0: - raise support.TestError('Successfully imported a package from a jar on sys.path without caching!') -support.runJython('test402n.py', javaargs='-Dpython.cachedir.skip=true') diff --git a/bugtests/test402m.py b/bugtests/test402m.py deleted file mode 100644 index 615b4ac69..000000000 --- a/bugtests/test402m.py +++ /dev/null @@ -1,4 +0,0 @@ -import sys -sys.path.append('simplejar/simple.jar') - -import javapackage diff --git a/bugtests/test402n.py b/bugtests/test402n.py deleted file mode 100644 index 750c84c56..000000000 --- a/bugtests/test402n.py +++ /dev/null @@ -1,4 +0,0 @@ -import sys -sys.path.append('simplejar/simple.jar') - -from javapackage import JavaClass diff --git a/build-tools/build-tools.gradle b/build-tools/build-tools.gradle new file mode 100644 index 000000000..7836e2900 --- /dev/null +++ b/build-tools/build-tools.gradle @@ -0,0 +1,27 @@ +/* + * build-tools.gradle + * + * Build the tools on which the Jython Project depends at *build time*. + * This is structured as an independent build. It is included in the main + * Jython build through its settings file, and its products aa dependencies. + */ + +plugins { + id 'java-library' +} + +description = 'Tools for building Jython' +group = 'org.python' +//version = '0.4.0-SNAPSHOT' + + +repositories { + mavenLocal() + mavenCentral() +} + +dependencies { + testImplementation 'org.junit.jupiter:junit-jupiter-api:5.10.+' + testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.10.+' +} + diff --git a/build-tools/python/lib/compile_examples.py b/build-tools/python/lib/compile_examples.py new file mode 100644 index 000000000..27ebf11e0 --- /dev/null +++ b/build-tools/python/lib/compile_examples.py @@ -0,0 +1,169 @@ +# Reading compiled Python files + +import sys, os.path +import marshal, py_compile, dis + +# Normally you don't get a .pyc file if you just run a program. +# You do get a .pyc file from compiling a module. +# It is written in ./__pycache__ and called NAME.cpython-311.pyc + +CACHE = '__pycache__' +COMPILER = 'cpython-311' + + +HELP = """Command: compile_examples srcdir dstdir + + where: + srcdir is the root of the Python examples in the project source + typically ./src/test/pythonExample + dstdir is the root of the Python examples in the build tree + typically ./build/generated/sorces/pythonExample/test + """ + + + +def getcode(filename): + "Read a compiled file and return the code object" + with open(filename, 'rb') as f: + # Skip header. See run_pyc_file() in pythonrun.c + f.read(16) + return marshal.load(f) + + +def getobj(filename): + "Read an object from a file" + with open(filename, 'rb') as f: + return marshal.load(f) + + +def filetime(path_elements, name_elements): + """Compose a file path and report when last modified + + path_elements: file path elements (will be os.path.join'd) + name_elements: file name elements (will be '.'.join'd) + + The arguments are used to locate a file (which need not exist) + and the function returns the name (always) and the last modified + time (or zero if the file does not exist). + """ + + file = os.path.join(*path_elements, '.'.join(name_elements)) + try: + time = os.path.getmtime(file) + except OSError: + time = 0 + #print(f"{file:>40s}: {time:15.3f}") + return file, time + + +def copy(srcfile, dstfile): + "Copy one text file to another" + print(f" Copy: {os.path.basename(srcfile)}") + with open(srcfile, 'rt', encoding='utf-8') as s: + ensure_dir(os.path.dirname(dstfile)) + with open(dstfile, 'wt', encoding='utf-8') as d: + for line in s: + d.write(line) + + +def execute(pycfile, varfile, disfile): + "Execute a program and save the local variables" + print(f" Generate: {os.path.basename(disfile)}") + co = getcode(pycfile) + with open(disfile, 'wt', encoding='utf-8') as f: + # Dumps code blocks of nested functions + dis.dis(co, file=f) + print(f" Generate: {os.path.basename(varfile)}") + gbl = dict() + exec(co, gbl) + # Remove items forced in by exec + del gbl['__builtins__'] + # try: + # print(" ", list(gbl.keys())) + # except UnicodeEncodeError: + # pass + with open(varfile, 'wb') as f: + marshal.dump(gbl, f) + + +def generate(reldir, name, source, generated): + """Generate test reldir/name.py and results + + reldir: the relative directory path (from source/generated) + name: just the name part of the Python file + source: directory of the source files + generated: directory of the compiled/generated files + """ + srcfile, srctime = filetime([source, reldir], [name, 'py']) + #print(f" {name}.py") + #print(f" source: {srctime:15.3f}") + + dstfile, dsttime = filetime([generated, reldir], [name, 'py']) + #print(f" build: {dsttime:15.3f}") + + pycfile, pyctime = filetime([generated, reldir, CACHE], + [name, COMPILER, 'pyc']) + #print(f" .pyc: {pyctime:15.3f}") + + varfile, vartime = filetime([generated, reldir, CACHE], + [name, COMPILER, 'var']) + #print(f" .var: {vartime:15.3f}") + + disfile, distime = filetime([generated, reldir, CACHE], + [name, COMPILER, 'dis']) + #print(f" .dis: {distime:15.3f}") + + if dsttime < srctime: + # Copy, compile, run and store + copy(srcfile, dstfile) + dsttime = srctime + + if pyctime < dsttime: + # Compile, run and store + print(f" Compile: {os.path.basename(pycfile)}") + py_compile.compile(dstfile) + pyctime = os.path.getmtime(pycfile) + + if vartime < pyctime or distime < pyctime: + # Run and store + execute(pycfile, varfile, disfile) + + +def ensure_dir(d): + if not os.path.exists(d): + os.makedirs(d) + if not (ok:=os.path.isdir(d)): + print(f"Not a directory '{d}'", file=sys.stderr) + return ok + + +def main(source, generated): + + for dirpath, _, files in os.walk(source): + #print(f"{dirpath}:") + reldir = os.path.relpath(dirpath, source) + for file in files: + parts = file.rsplit('.', 1) + if len(parts) > 1 and parts[1] == "py": + name = parts[0] + #print(f" {name}:") + generate(reldir, name, source, generated) + +def show_help(): + print(HELP, file=sys.stderr) + +# -------------------------------------------------------------------- + +if len(sys.argv) == 3: + source, generated = sys.argv[1:] + if ensure_dir(source) and ensure_dir(generated): + cwd = os.getcwd() + source = os.path.relpath(source) + generated = os.path.relpath(generated) + main(source, generated) + else: + show_help() +else: + show_help() + + diff --git a/build-tools/python/lib/core/PyFloat.py b/build-tools/python/lib/core/PyFloat.py new file mode 100644 index 000000000..704bf233f --- /dev/null +++ b/build-tools/python/lib/core/PyFloat.py @@ -0,0 +1,362 @@ +# PyFloat.py: A generator for Java files that define the Python float + +# Copyright (c)2021 Jython Developers. +# Licensed to PSF under a contributor agreement. + +# This generator writes PyFloatMethods.java and PyFloatBinops.java . + +from dataclasses import dataclass +from typing import Callable + +from . import ImplementationGenerator, TypeInfo, WorkingType, OpInfo + +@dataclass +class FloatTypeInfo(TypeInfo): + "Information about a type and templates for conversion to float types" + # There is a template (a function) to generate an expression + # that converts *from* this type to each named Java type that may be + # a "working type" when implementing an operation. + + # That's only 'double', but conceivably primitive 'float' later. + + # Template for expression that converts to primitive double + as_double: str = None + + +# Useful in cases where an argument is already the right type +itself = lambda x: x + +# A constant FloatTypeInfo for each argument type that we might have to +# convert to a "working type" when implementing an operation. +# Arguments are: name, min_working_type, +# as_double +PY_FLOAT_CLASS = FloatTypeInfo('PyFloat', WorkingType.DOUBLE, + lambda x: f'{x}.value') +OBJECT_CLASS = FloatTypeInfo('Object', WorkingType.OBJECT, + lambda x: f'toDouble({x})') +DOUBLE_CLASS = FloatTypeInfo('Double', WorkingType.DOUBLE, + itself) + +# Accepted types that may appear as the other operand in binary +# operations specialised to both types. +PY_LONG_CLASS = FloatTypeInfo('PyLong', WorkingType.DOUBLE, + lambda x: f'convertToDouble({x}.value)') +BIG_INTEGER_CLASS = FloatTypeInfo('BigInteger', WorkingType.DOUBLE, + lambda x: f'convertToDouble({x})') +INTEGER_CLASS = FloatTypeInfo('Integer', WorkingType.DOUBLE, + lambda x: f'{x}.doubleValue()') +BOOLEAN_CLASS = FloatTypeInfo('Boolean', WorkingType.DOUBLE, + lambda x: f'({x} ? 1.0 : 0.0)') + +# A constant FloatTypeInfo for types appearing as return types only. +#(No conversion to a working type is expected.) +# convert to a "working type" when implementing an operation. +PRIMITIVE_BOOLEAN = FloatTypeInfo('boolean', WorkingType.BOOLEAN) +PRIMITIVE_INT = FloatTypeInfo('int', WorkingType.INT) +PRIMITIVE_DOUBLE = FloatTypeInfo('double', WorkingType.DOUBLE) + + +@dataclass +class UnaryOpInfo(OpInfo): + # There is a template (a function) to generate an expression for + # each Java working type in which the result may be evaluated. + + # That's only 'double', but conceivably primitive 'float' later. + + # Template for when the working type is Java double + double_op: Callable + + +@dataclass +class BinaryOpInfo(OpInfo): + # There is a template (a function) to generate the body + body_method: Callable + + # There is a template (a function) to generate an expression for + # each Java working type in which the result may be evaluated. + # That's only 'double', but conceivably primitive 'float' later. + + # Template for when the working type is Java double + double_op: Callable + + # Also create class-specific binop specialisations + class_specific: bool = False + + +def unary_method(op:UnaryOpInfo, t:FloatTypeInfo): + "Template generating the body of a unary operation." + # Decide the width at which to work with this type and op + iw = max(op.min_working_type.value, t.min_working_type.value) + w = WorkingType(iw) + if w == WorkingType.DOUBLE: + return _unary_method_double(op, t) + else: + raise ValueError( + f"Cannot make method body for {op.name} and {w}") + +def _unary_method_double(op:UnaryOpInfo, t:FloatTypeInfo): + "Template for unary methods when the working type is DOUBLE" + return f''' + return {op.double_op(t.as_double("self"))}; + ''' + + +def binary_floatmethod(op:BinaryOpInfo, + t1:FloatTypeInfo, n1, + t2:FloatTypeInfo, n2): + """Template for a binary operation with float result. + + Argument coercions are made according to their static type then + the operation is applied which must yield a result in the working + type. This is only appropriate where the return from the generated + method should be a Python float (e.g. not comparisons, __divmod__). + """ + # Decide the width at which to work with these types and op + iw = max(op.min_working_type.value, + t1.min_working_type.value, + t2.min_working_type.value) + w = WorkingType(iw) + if w == WorkingType.DOUBLE: + return _binary_floatmethod_double(op, t1, n1, t2, n2) + elif w == WorkingType.OBJECT: + return _binary_floatmethod_obj(op, t1, n1, t2, n2) + else: + raise ValueError( + f"Cannot make method body for {op.name} and {w}") + +def _binary_floatmethod_double(op:BinaryOpInfo, + t1:FloatTypeInfo, n1, + t2:FloatTypeInfo, n2): + "Template for binary float methods when the working type is DOUBLE" + return f''' + return {op.double_op(t1.as_double(n1), t2.as_double(n2))}; + ''' + +def _binary_floatmethod_obj(op:BinaryOpInfo, + t1:FloatTypeInfo, n1, + t2:FloatTypeInfo, n2): + "Template for binary float methods when the working type is OBJECT" + return f''' + try {{ + return {op.double_op(t1.as_double(n1), t2.as_double(n2))}; + }} catch (NoConversion e) {{ + return Py.NotImplemented; + }} + ''' + + +def binary_method(op:BinaryOpInfo, + t1:FloatTypeInfo, n1, + t2:FloatTypeInfo, n2): + """Template for a binary operation with any result type. + + Argument coercions are made according to their static type then + the operation is applied and the result returned without further + processing.""" + # Decide the width at which to work with these types and op + iw = max(op.min_working_type.value, + t1.min_working_type.value, + t2.min_working_type.value) + w = WorkingType(iw) + if w == WorkingType.DOUBLE: + return _binary_method_double(op, t1, n1, t2, n2) + elif w == WorkingType.OBJECT: + return _binary_method_obj(op, t1, n1, t2, n2) + else: + raise ValueError( + f"Cannot make method body for {op.name} and {w}") + +def _binary_method_double(op:BinaryOpInfo, + t1:FloatTypeInfo, n1, + t2:FloatTypeInfo, n2): + "Template for binary methods when the working type is DOUBLE" + return f''' + return {op.double_op(t1.as_double(n1), t2.as_double(n2))}; + ''' + +def _binary_method_obj(op:BinaryOpInfo, + t1:FloatTypeInfo, n1, + t2:FloatTypeInfo, n2): + "Template for binary methods when the working type is OBJECT" + return f''' + try {{ + return {op.double_op(t1.as_double(n1), t2.as_double(n2))}; + }} catch (NoConversion e) {{ + return Py.NotImplemented; + }} + ''' + + +class PyFloatGenerator(ImplementationGenerator): + + # The canonical and adopted implementations in PyFloat.java. + ACCEPTED_CLASSES = [PY_FLOAT_CLASS, DOUBLE_CLASS] + + # These classes may occur as the second operand in binary + # operations. Order is not significant. + OPERAND_CLASSES = ACCEPTED_CLASSES + [ + # XXX Consider *not* specialising ... + # Although PyLong and BigInteger are accepted operands, we + # decline to specialise, since the implementation would be + # equivalent to the one in PyFloatMethods. + PY_LONG_CLASS, + BIG_INTEGER_CLASS, + INTEGER_CLASS, + BOOLEAN_CLASS, + ] + + # Operations may simply be codified as a return expression, since + # all operand types may be converted to primitive double. + + UNARY_OPS = [ + # Arguments are: name, return_type, min_working_type, + # double_op + UnaryOpInfo('__abs__', OBJECT_CLASS, WorkingType.DOUBLE, + lambda x: f'Math.abs({x})'), + UnaryOpInfo('__neg__', OBJECT_CLASS, WorkingType.DOUBLE, + lambda x: f'-{x}'), + UnaryOpInfo('__pos__', OBJECT_CLASS, WorkingType.DOUBLE, + lambda x: f'{x}'), + UnaryOpInfo('__bool__', PRIMITIVE_BOOLEAN, WorkingType.DOUBLE, + lambda x: f'{x} != 0.0'), + UnaryOpInfo('__hash__', PRIMITIVE_INT, WorkingType.DOUBLE, + lambda x: f'Double.hashCode({x})'), + ] + BINARY_OPS = [ + # Arguments are: name, return_type, working_type, + # body_method, + # double_op, + # with_class_specific_binops + BinaryOpInfo('__add__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'{x} + {y}', + True), + BinaryOpInfo('__radd__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'{y} + {x}', + True), + BinaryOpInfo('__sub__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'{x} - {y}', + True), + BinaryOpInfo('__rsub__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'{y} - {x}', + True), + BinaryOpInfo('__mul__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'{x} * {y}', + True), + BinaryOpInfo('__rmul__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'{y} * {x}', + True), + + BinaryOpInfo('__truediv__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'{x} / nonzero({y})', + True), + BinaryOpInfo('__rtruediv__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'{y} / nonzero({x})', + True), + + BinaryOpInfo('__floordiv__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'floordiv({x}, {y})', + False), + BinaryOpInfo('__rfloordiv__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'floordiv({y}, {x})', + False), + BinaryOpInfo('__mod__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'mod({x}, {y})', + False), + BinaryOpInfo('__rmod__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_floatmethod, + lambda x, y: f'mod({y}, {x})', + False), + + BinaryOpInfo('__divmod__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_method, + lambda x, y: f'divmod({x}, {y})', + False), + BinaryOpInfo('__rdivmod__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_method, + lambda x, y: f'divmod({y}, {x})', + False), + + BinaryOpInfo('__lt__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_method, + lambda x, y: f'{x} < {y}'), + BinaryOpInfo('__le__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_method, + lambda x, y: f'{x} <= {y}'), + BinaryOpInfo('__eq__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_method, + lambda x, y: f'{x} == {y}'), + BinaryOpInfo('__ne__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_method, + lambda x, y: f'{x} != {y}'), + BinaryOpInfo('__gt__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_method, + lambda x, y: f'{x} > {y}'), + BinaryOpInfo('__ge__', OBJECT_CLASS, WorkingType.DOUBLE, + binary_method, + lambda x, y: f'{x} >= {y}'), + ] + + # Emit methods selectable by a single type + def special_methods(self, e): + + # Emit the unary operations + for op in self.UNARY_OPS: + self.emit_heading(e, op.name) + for t in self.ACCEPTED_CLASSES: + self.special_unary(e, op, t) + + # Emit the binary operations op(T, Object) + for op in self.BINARY_OPS: + self.emit_heading(e, op.name) + for vt in self.ACCEPTED_CLASSES: + self.special_binary(e, op, vt, OBJECT_CLASS) + + # Emit methods selectable by a pair of types (for call sites) + def special_binops(self, e): + + # Emit the binary operations and comparisons + for op in self.BINARY_OPS: + if op.class_specific: + self.emit_heading(e, op.name) + for vt in self.ACCEPTED_CLASSES: + for wt in self.OPERAND_CLASSES: + self.special_binary(e, op, vt, wt) + + def special_unary(self, e, op:UnaryOpInfo, t): + e.emit('static ').emit(op.return_type.name).emit(' ') + e.emit(op.name).emit('(').emit(t.name).emit(' self) {') + with e.indentation(): + method = unary_method(op, t) + method = self.left_justify(method) + e.emit_lines(method) + e.emit_line('}').emit_line() + + # Emit one binary operation, for example: + # private static Object __add__(Double v, Integer w) { + # return v.doubleValue() + w.doubleValue(); + # } + def special_binary(self, e, op:BinaryOpInfo, t1, t2): + reflected = op.name.startswith('__r') and \ + op.name not in ("__rshift__", "__round__", "__repr__") + n1, n2 = 'vw' if not reflected else 'wv' + e.emit('static ').emit(op.return_type.name).emit(' ') + e.emit(op.name).emit('(') + e.emit(t1.name).emit(' ').emit(n1).emit(', ') + e.emit(t2.name).emit(' ').emit(n2).emit(') {') + with e.indentation(): + method = op.body_method(op, t1, n1, t2, n2) + method = self.left_justify(method) + e.emit_lines(method) + e.emit_line('}').emit_line() + diff --git a/build-tools/python/lib/core/PyLong.py b/build-tools/python/lib/core/PyLong.py new file mode 100644 index 000000000..8ef1b6f03 --- /dev/null +++ b/build-tools/python/lib/core/PyLong.py @@ -0,0 +1,517 @@ +# PyLong.py: A generator for Java files that define the Python int + +# Copyright (c)2021 Jython Developers. +# Licensed to PSF under a contributor agreement. + +# This generator writes PyLongMethods.java and PyLongBinops.java . + +from dataclasses import dataclass +from typing import Callable + +from . import ImplementationGenerator, TypeInfo, WorkingType, OpInfo + + +@dataclass +class IntTypeInfo(TypeInfo): + "Information about a type and templates for conversion to int types" + # There is a template (a function) to generate an expression + # that converts *from* this type to each named Java type that may be + # a "working type" when implementing an operation. + + # Template for expression that converts to BigInteger + as_big: Callable = None + # Template for expression that converts to primitive Java long + as_long: Callable = None + # Template for expression that converts to primitive Java int + as_int: Callable = None + + +# Useful in cases where an argument is already the right type +itself = lambda x: x + +# A constant IntTypeInfo for each argument type that we might have to +# convert to a "working type" when implementing an operation. +# Arguments are: name, min_working_type, +# as_big, as_long, as_int +PY_LONG_CLASS = IntTypeInfo('PyLong', WorkingType.BIG, + lambda x: f'{x}.value') +OBJECT_CLASS = IntTypeInfo('Object', WorkingType.OBJECT, + lambda x: f'toBig({x})') +BIG_INTEGER_CLASS = IntTypeInfo('BigInteger', WorkingType.BIG, + itself) +INTEGER_CLASS = IntTypeInfo('Integer', WorkingType.INT, + lambda x: f'BigInteger.valueOf({x})', + lambda x: f'((long) {x})', + itself) +BOOLEAN_CLASS = IntTypeInfo('Boolean', WorkingType.INT, + lambda x: f'({x} ? ONE : ZERO)', + lambda x: f'({x} ? 1L : 0L)', + lambda x: f'({x} ? 1 : 0)') +DOUBLE_CLASS = IntTypeInfo('Double', WorkingType.OBJECT) + +PRIMITIVE_BOOLEAN = IntTypeInfo('boolean', WorkingType.BOOLEAN) +PRIMITIVE_INT = IntTypeInfo('int', WorkingType.INT) + + +@dataclass +class UnaryOpInfo(OpInfo): + # There is a template (a function) to generate an expression for + # each Java working type in which the result may be evaluated. + + # Working type is Java BigInteger + big_op: Callable + # Working type is Java long + long_op: Callable + # Working type is Java int + int_op: Callable + + +@dataclass +class BinaryOpInfo(OpInfo): + # There is a template (a function) to generate the method body + body_method: Callable + + # There is a template (a function) to generate an expression for + # each Java working type in which the result may be evaluated. + + # Working type is Java BigInteger + big_op: Callable + # Working type is Java long + long_op: Callable + # Working type is Java int + int_op: Callable + + # Also create class-specific binop specialisations + class_specific: bool = False + + +def unary_method(op:UnaryOpInfo, t:IntTypeInfo): + "Template generating the body of a unary operation." + # Decide the width at which to work with this type and op + iw = max(op.min_working_type.value, t.min_working_type.value) + w = WorkingType(iw) + if w == WorkingType.INT: + return _unary_method_int(op, t) + elif w == WorkingType.LONG: + return _unary_method_long(op, t) + elif w == WorkingType.BIG: + return _unary_method_big(op, t) + else: + raise ValueError( + f"Cannot make method body for {op.name} and {w}") + +def _unary_method_int(op:UnaryOpInfo, t:IntTypeInfo): + "Template for unary methods when the working type is INT" + return f''' + return {op.int_op(t.as_int("self"))}; + ''' + +def _unary_method_long(op:UnaryOpInfo, t:IntTypeInfo): + "Template for unary methods when the working type is LONG" + return f''' + long r = {op.long_op(t.as_long("self"))}; + int s = (int) r; + return s == r ? s : BigInteger.valueOf(r); + ''' + +def _unary_method_big(op:UnaryOpInfo, t:IntTypeInfo): + "Template for unary methods when the working type is BIG" + return f''' + return {op.big_op(t.as_big("self"))}; + ''' + + +def binary_intmethod(op:BinaryOpInfo, + t1:IntTypeInfo, n1, + t2:IntTypeInfo, n2): + """Template for a binary operation with int result. + + Argument coercions are made according to their static type then + the operation is applied which must yield a result in the working + type. Processing is applied to that result to choose an integer + representation. This is only appropriate where the return from the + generated method should be a Python int (e.g. not comparisons).""" + # Decide the width at which to work with these types and op + iw = max(op.min_working_type.value, + t1.min_working_type.value, + t2.min_working_type.value) + w = WorkingType(iw) + if w == WorkingType.INT: + return _binary_intmethod_int(op, t1, n1, t2, n2) + elif w == WorkingType.LONG: + return _binary_intmethod_long(op, t1, n1, t2, n2) + elif w == WorkingType.BIG: + return _binary_intmethod_big(op, t1, n1, t2, n2) + elif w == WorkingType.OBJECT: + return _binary_intmethod_obj(op, t1, n1, t2, n2) + else: + raise ValueError( + f"Cannot make method body for {op.name} and {w}") + +def _binary_intmethod_int(op:BinaryOpInfo, + t1:IntTypeInfo, n1, + t2:IntTypeInfo, n2): + "Template for binary int methods when the working type is INT" + return f''' + return {op.int_op(t1.as_int(n1), t2.as_int(n2))}; + ''' + +def _binary_intmethod_long(op:BinaryOpInfo, + t1:IntTypeInfo, n1, + t2:IntTypeInfo, n2): + "Template for binary int methods when the working type is LONG" + return f''' + long r = {op.long_op(t1.as_long(n1), t2.as_long(n2))}; + int s = (int) r; + return s == r ? s : BigInteger.valueOf(r); + ''' + +def _binary_intmethod_big(op:BinaryOpInfo, + t1:IntTypeInfo, n1, + t2:IntTypeInfo, n2): + "Template for binary int methods when the working type is BIG" + return f''' + return toInt({op.big_op(t1.as_big(n1), t2.as_big(n2))}); + ''' + +def _binary_intmethod_obj(op:BinaryOpInfo, + t1:IntTypeInfo, n1, + t2:IntTypeInfo, n2): + "Template for binary int methods when the working type is OBJECT" + return f''' + try {{ + return toInt({op.big_op(t1.as_big(n1), t2.as_big(n2))}); + }} catch (NoConversion e) {{ + return Py.NotImplemented; + }} + ''' + + +def binary_method(op:BinaryOpInfo, + t1:IntTypeInfo, n1, + t2:IntTypeInfo, n2): + """Template for a binary operation with any result type. + + Argument coercions are made according to their static type then + the operation is applied and the result returned without further + processing.""" + # Decide the width at which to work with these types and op + iw = max(op.min_working_type.value, + t1.min_working_type.value, + t2.min_working_type.value) + w = WorkingType(iw) + if w == WorkingType.INT: + return _binary_method_int(op, t1, n1, t2, n2) + elif w == WorkingType.LONG: + return _binary_method_long(op, t1, n1, t2, n2) + elif w == WorkingType.BIG: + return _binary_method_big(op, t1, n1, t2, n2) + elif w == WorkingType.OBJECT: + return _binary_method_obj(op, t1, n1, t2, n2) + else: + raise ValueError( + f"Cannot make method body for {op.name} and {w}") + +def _binary_method_int(op:BinaryOpInfo, + t1:IntTypeInfo, n1, + t2:IntTypeInfo, n2): + "Template for binary methods when the working type is INT" + return f''' + return {op.int_op(t1.as_int(n1), t2.as_int(n2))}; + ''' + +def _binary_method_long(op:BinaryOpInfo, + t1:IntTypeInfo, n1, + t2:IntTypeInfo, n2): + "Template for binary methods when the working type is LONG" + return f''' + return {op.long_op(t1.as_long(n1), t2.as_long(n2))}; + ''' + +def _binary_method_big(op:BinaryOpInfo, + t1:IntTypeInfo, n1, + t2:IntTypeInfo, n2): + "Template for binary methods when the working type is BIG" + return f''' + return {op.big_op(t1.as_big(n1), t2.as_big(n2))}; + ''' + +def _binary_method_obj(op:BinaryOpInfo, + t1:IntTypeInfo, n1, + t2:IntTypeInfo, n2): + "Template for binary methods when the working type is OBJECT" + return f''' + try {{ + return {op.big_op(t1.as_big(n1), t2.as_big(n2))}; + }} catch (NoConversion e) {{ + return Py.NotImplemented; + }} + ''' + + +class PyLongGenerator(ImplementationGenerator): + + # The canonical and adopted implementations in PyInteger.java, + # as there are no further accepted self-classes. + ACCEPTED_CLASSES = [ + PY_LONG_CLASS, + BIG_INTEGER_CLASS, + INTEGER_CLASS, + BOOLEAN_CLASS, + ] + OPERAND_CLASSES = ACCEPTED_CLASSES + [ + ] + + # Operations have to provide versions in which long and + # BigInteger are the common type to which arguments are converted. + + UNARY_OPS = [ + # Arguments are: name, return_type, min_working_type, + # big_op, long_op, int_op + UnaryOpInfo('__abs__', OBJECT_CLASS, WorkingType.LONG, + lambda x: f'{x}.abs()', + lambda x: f'Math.abs({x})', + lambda x: f'Math.abs({x})'), + UnaryOpInfo('__index__', OBJECT_CLASS, WorkingType.INT, + itself, + itself, + itself), + UnaryOpInfo('__int__', OBJECT_CLASS, WorkingType.INT, + itself, + itself, + itself), + UnaryOpInfo('__invert__', OBJECT_CLASS, WorkingType.INT, + lambda x: f'{x}.not()', + lambda x: f'~{x}', + lambda x: f'~{x}'), + UnaryOpInfo('__neg__', OBJECT_CLASS, WorkingType.LONG, + lambda x: f'{x}.negate()', + lambda x: f'-{x}', + lambda x: f'-{x}'), + UnaryOpInfo('__float__', OBJECT_CLASS, WorkingType.INT, + lambda x: f'PyLong.convertToDouble({x})', + lambda x: f'((double) {x})', + lambda x: f'((double) {x})'), + UnaryOpInfo('__bool__', PRIMITIVE_BOOLEAN, WorkingType.BOOLEAN, + lambda x: f'{x}.signum() != 0', + lambda x: f'{x} != 0L', + lambda x: f'{x} != 0'), + UnaryOpInfo('__hash__', PRIMITIVE_INT, WorkingType.INT, + lambda x: f'{x}.hashCode()', + lambda x: f'{x}.hashCode()', + lambda x: f'{x}'), + ] + + BINARY_OPS = [ + # Arguments are: name, return_type, working_type, + # body_method, + # big_op, long_op, int_op, + # with_class_specific_binops + BinaryOpInfo('__add__', OBJECT_CLASS, WorkingType.LONG, + binary_intmethod, + lambda x, y: f'{x}.add({y})', + lambda x, y: f'{x} + {y}', + lambda x, y: f'{x} + {y}', + True), + BinaryOpInfo('__radd__', OBJECT_CLASS, WorkingType.LONG, + binary_intmethod, + lambda x, y: f'{y}.add({x})', + lambda x, y: f'{y} + {x}', + lambda x, y: f'{y} + {x}', + True), + BinaryOpInfo('__sub__', OBJECT_CLASS, WorkingType.LONG, + binary_intmethod, + lambda x, y: f'{x}.subtract({y})', + lambda x, y: f'{x} - {y}', + lambda x, y: f'{x} - {y}', + True), + BinaryOpInfo('__rsub__', OBJECT_CLASS, WorkingType.LONG, + binary_intmethod, + lambda x, y: f'{y}.subtract({x})', + lambda x, y: f'{y} - {x}', + lambda x, y: f'{y} - {x}', + True), + BinaryOpInfo('__mul__', OBJECT_CLASS, WorkingType.LONG, + binary_intmethod, + lambda x, y: f'{x}.multiply({y})', + lambda x, y: f'{x} * {y}', + lambda x, y: f'{x} * {y}', + True), + BinaryOpInfo('__rmul__', OBJECT_CLASS, WorkingType.LONG, + binary_intmethod, + lambda x, y: f'{y}.multiply({x})', + lambda x, y: f'{y} * {x}', + lambda x, y: f'{y} * {x}', + True), + BinaryOpInfo('__floordiv__', OBJECT_CLASS, WorkingType.INT, + binary_intmethod, + lambda x, y: f'divide({x}, {y})', + lambda x, y: f'divide({x}, {y})', + lambda x, y: f'divide({x}, {y})', + True), + BinaryOpInfo('__rfloordiv__', OBJECT_CLASS, WorkingType.INT, + binary_intmethod, + lambda x, y: f'divide({y}, {x})', + lambda x, y: f'divide({y}, {x})', + lambda x, y: f'divide({y}, {x})', + True), + BinaryOpInfo('__mod__', OBJECT_CLASS, WorkingType.INT, + binary_intmethod, + lambda x, y: f'modulo({x}, {y})', + lambda x, y: f'modulo({x}, {y})', + lambda x, y: f'modulo({x}, {y})', + True), + BinaryOpInfo('__rmod__', OBJECT_CLASS, WorkingType.INT, + binary_intmethod, + lambda x, y: f'modulo({y}, {x})', + lambda x, y: f'modulo({y}, {x})', + lambda x, y: f'modulo({y}, {x})', + True), + + BinaryOpInfo('__divmod__', OBJECT_CLASS, WorkingType.INT, + binary_method, + lambda x, y: f'divmod({x}, {y})', + lambda x, y: f'divmod({x}, {y})', + lambda x, y: f'divmod({x}, {y})', + True), + BinaryOpInfo('__rdivmod__', OBJECT_CLASS, WorkingType.INT, + binary_method, + lambda x, y: f'divmod({y}, {x})', + lambda x, y: f'divmod({y}, {x})', + lambda x, y: f'divmod({y}, {x})', + True), + + BinaryOpInfo('__truediv__', OBJECT_CLASS, WorkingType.INT, + binary_method, + lambda x, y: f'trueDivide({x}, {y})', + lambda x, y: f'trueDivide({x}, {y})', + lambda x, y: f'(double){x} / (double){y}', + True), + BinaryOpInfo('__rtruediv__', OBJECT_CLASS, WorkingType.INT, + binary_method, + lambda x, y: f'trueDivide({y}, {x})', + lambda x, y: f'trueDivide({y}, {x})', + lambda x, y: f'(double){y} / (double){x}', + True), + + BinaryOpInfo('__and__', OBJECT_CLASS, WorkingType.INT, + binary_intmethod, + lambda x, y: f'{x}.and({y})', + lambda x, y: f'{x} & {y}', + lambda x, y: f'{x} & {y}', + True), + BinaryOpInfo('__rand__', OBJECT_CLASS, WorkingType.INT, + binary_intmethod, + lambda x, y: f'{y}.and({x})', + lambda x, y: f'{y} & {x}', + lambda x, y: f'{y} & {x}', + True), + BinaryOpInfo('__or__', OBJECT_CLASS, WorkingType.INT, + binary_intmethod, + lambda x, y: f'{x}.or({y})', + lambda x, y: f'{x} | {y}', + lambda x, y: f'{x} | {y}', + True), + BinaryOpInfo('__ror__', OBJECT_CLASS, WorkingType.INT, + binary_intmethod, + lambda x, y: f'{y}.or({x})', + lambda x, y: f'{y} | {x}', + lambda x, y: f'{y} | {x}'), + BinaryOpInfo('__xor__', OBJECT_CLASS, WorkingType.INT, + binary_intmethod, + lambda x, y: f'{x}.xor({y})', + lambda x, y: f'{x} ^ {y}', + lambda x, y: f'{x} ^ {y}', + True), + BinaryOpInfo('__rxor__', OBJECT_CLASS, WorkingType.INT, + binary_intmethod, + lambda x, y: f'{y}.xor({x})', + lambda x, y: f'{y} ^ {x}', + lambda x, y: f'{y} ^ {x}', + True), + + BinaryOpInfo('__lt__', OBJECT_CLASS, WorkingType.INT, + binary_method, + lambda x, y: f'{x}.compareTo({y}) < 0', + lambda x, y: f'{x} < {y}', + lambda x, y: f'{x} < {y}'), + BinaryOpInfo('__le__', OBJECT_CLASS, WorkingType.INT, + binary_method, + lambda x, y: f'{x}.compareTo({y}) <= 0', + lambda x, y: f'{x} <= {y}', + lambda x, y: f'{x} <= {y}'), + BinaryOpInfo('__eq__', OBJECT_CLASS, WorkingType.INT, + binary_method, + lambda x, y: f'{x}.compareTo({y}) == 0', + lambda x, y: f'{x} == {y}', + lambda x, y: f'{x} == {y}'), + BinaryOpInfo('__ne__', OBJECT_CLASS, WorkingType.INT, + binary_method, + lambda x, y: f'{x}.compareTo({y}) != 0', + lambda x, y: f'{x} != {y}', + lambda x, y: f'{x} != {y}'), + BinaryOpInfo('__gt__', OBJECT_CLASS, WorkingType.INT, + binary_method, + lambda x, y: f'{x}.compareTo({y}) > 0', + lambda x, y: f'{x} > {y}', + lambda x, y: f'{x} > {y}'), + BinaryOpInfo('__ge__', OBJECT_CLASS, WorkingType.INT, + binary_method, + lambda x, y: f'{x}.compareTo({y}) >= 0', + lambda x, y: f'{x} >= {y}', + lambda x, y: f'{x} >= {y}'), + ] + + # Emit methods selectable by a single type + def special_methods(self, e): + + # Emit the unary operations + for op in self.UNARY_OPS: + self.emit_heading(e, op.name) + for t in self.ACCEPTED_CLASSES: + self.special_unary(e, op, t) + + # Emit the binary operations op(T, Object) + for op in self.BINARY_OPS: + self.emit_heading(e, op.name) + for vt in self.ACCEPTED_CLASSES: + self.special_binary(e, op, vt, OBJECT_CLASS) + + # Emit methods selectable by a pair of types (for call sites) + def special_binops(self, e): + + # Emit the binary operations and comparisons + for op in self.BINARY_OPS: + if op.class_specific: + self.emit_heading(e, op.name) + for vt in self.ACCEPTED_CLASSES: + for wt in self.OPERAND_CLASSES: + self.special_binary(e, op, vt, wt) + + def special_unary(self, e, op:UnaryOpInfo, t): + e.emit('static ').emit(op.return_type.name).emit(' ') + e.emit(op.name).emit('(').emit(t.name).emit(' self) {') + with e.indentation(): + method = unary_method(op, t) + method = self.left_justify(method) + e.emit_lines(method) + e.emit_line('}').emit_line() + + # Emit one binary operation, for example: + # private static Object __add__(Integer v, BigInteger w) { + # return v + toInt(w); + # } + def special_binary(self, e, op:BinaryOpInfo, t1, t2): + reflected = op.name.startswith('__r') and \ + op.name not in ("__rshift__", "__round__", "__repr__") + n1, n2 = 'vw' if not reflected else 'wv' + e.emit('static ').emit(op.return_type.name).emit(' ') + e.emit(op.name).emit('(') + e.emit(t1.name).emit(' ').emit(n1).emit(', ') + e.emit(t2.name).emit(' ').emit(n2).emit(') {') + with e.indentation(): + method = op.body_method(op, t1, n1, t2, n2) + method = self.left_justify(method) + e.emit_lines(method) + e.emit_line('}').emit_line() + diff --git a/build-tools/python/lib/core/PyUnicode.py b/build-tools/python/lib/core/PyUnicode.py new file mode 100644 index 000000000..56b71d3f6 --- /dev/null +++ b/build-tools/python/lib/core/PyUnicode.py @@ -0,0 +1,312 @@ +# PyUnicode.py: A generator for Java files that define the Python str + +# Copyright (c)2021 Jython Developers. +# Licensed to PSF under a contributor agreement. + +# This generator writes PyUnicodeMethods.java and PyUnicodeBinops.java . + +# At the time of this writing, only the comparison operations are +# generated. Unlike arithmetic types, str does not have a large +# set of operations with a uniform pattern, so it is more effective +# to hand-craft the small number of cases needed. + +from dataclasses import dataclass +from typing import Callable + +from . import ImplementationGenerator, TypeInfo, WorkingType, OpInfo + + +@dataclass +class StrTypeInfo(TypeInfo): + "Information about a type and templates for conversion to str types" + # There is a template (a function) to generate an expression + # that converts *from* this type to each named Java type. + # Template for expression that converts to PySequence + as_seq: Callable = None + # Template for expression that converts to String + as_str: Callable = None + +# Useful in cases where an argument is already the right type +itself = lambda x: x + +PY_UNICODE_CLASS = StrTypeInfo('PyUnicode', WorkingType.SEQ, + lambda x: f'{x}.adapt()', + itself) +STRING_CLASS = StrTypeInfo('String', WorkingType.STRING, + lambda x: f'adapt({x})', + itself) +OBJECT_CLASS = StrTypeInfo('Object', WorkingType.OBJECT, + lambda x: f'adapt({x})') + + +@dataclass +class UnaryOpInfo(OpInfo): + # There is a template (a function) to generate an expression + # for each working Java type to which argument may be converted. + # Working type is Java String + str_op: Callable + + +@dataclass +class BinaryOpInfo(OpInfo): + # There is a template (a function) to generate the body + body_method: Callable + # There is a template (a function) to generate an expression + # for each working Java type to which arguments may be converted. + # Working type is Java String + str_op: Callable + # Working type is PySequence + seq_op: Callable + # Also create class-specific binop specialisations + class_specific: bool = False + + +def unary_method(op:UnaryOpInfo, t:StrTypeInfo): + "Template generating the body of a unary operation." + # Decide the width at which to work with this type and op + iw = max(op.min_working_type.value, t.min_working_type.value) + w = WorkingType(iw) + if w == WorkingType.STRING: + return _unary_method_str(op, t) + elif w == WorkingType.SEQ: + return _unary_method_seq(op, t) + elif w == WorkingType.OBJECT: + return _unary_method_obj(op, t) + else: + raise ValueError( + f"Cannot make method body for {op.name} and {w}") + +def _unary_method_str(op:UnaryOpInfo, t:StrTypeInfo): + "Template for unary methods when the working type is STRING" + return f''' + return {op.str_op(t.as_str("self"))}; + ''' + +def _unary_method_seq(op:UnaryOpInfo, t:StrTypeInfo): + "Template for unary methods when the working type is LONG" + return f''' + return {op.seq_op(t.as_seq("self"))}; + ''' + +def _unary_method_obj(op:UnaryOpInfo, t:StrTypeInfo): + "Template for unary methods when the working type is BIG" + return f''' + return {op.seq_op(t.as_seq("self"))}; + ''' + + +def binary_method(op:BinaryOpInfo, t1:StrTypeInfo, n1, t2:StrTypeInfo, n2): + "Template generating the body of a binary operation." + # Decide the width at which to work with these typse and op + iw = max(op.min_working_type.value, + t1.min_working_type.value, + t2.min_working_type.value) + w = WorkingType(iw) + if w == WorkingType.STRING: + return _binary_method_str(op, t1, n1, t2, n2) + elif w == WorkingType.SEQ: + return _binary_method_seq(op, t1, n1, t2, n2) + elif w == WorkingType.OBJECT: + return _binary_method_obj(op, t1, n1, t2, n2) + else: + raise ValueError( + f"Cannot make method body for {op.name} and {w}") + + +def _binary_method_str(op:BinaryOpInfo, t1:StrTypeInfo, n1, t2:StrTypeInfo, n2): + return f''' + return {op.str_op(t1.as_str(n1), t2.as_str(n2))}; + ''' + +def _binary_method_seq(op:BinaryOpInfo, t1:StrTypeInfo, n1, t2:StrTypeInfo, n2): + return f''' + return {op.seq_op(t1.as_seq(n1), t2.as_seq(n2))}; + ''' + +def _binary_method_obj(op:BinaryOpInfo, t1:StrTypeInfo, n1, t2:StrTypeInfo, n2): + return f''' + try {{ + return {op.seq_op(t1.as_seq(n1), t2.as_seq(n2))}; + }} catch (NoConversion e) {{ + return Py.NotImplemented; + }} + ''' + +def comparison(op:BinaryOpInfo, t1:StrTypeInfo, n1, t2:StrTypeInfo, n2): + "Template generating the body of a comparison operation." + iw = max(op.min_working_type.value, + t1.min_working_type.value, + t2.min_working_type.value) + w = WorkingType(iw) + if w == WorkingType.STRING: + return _comparison_str(op, t1, n1, t2, n2) + elif w == WorkingType.SEQ: + return _comparison_seq(op, t1, n1, t2, n2) + elif w == WorkingType.OBJECT: + return _comparison_obj(op, t1, n1, t2, n2) + else: + raise ValueError( + f"Cannot make method body for {op.name} and {w}") + +def _comparison_guard(op:BinaryOpInfo, t1:StrTypeInfo, n1, t2:StrTypeInfo, n2): + if t2.name == "Object" or t2.name == t1.name: + # The objects might be identical, permitting a shortcut + name = op.name + if name == "__eq__" or name == "__le__" or name == "__ge__": + return f'{n1} == {n2} || ' + elif name == "__ne__" or name == "__lt__" or name == "__gt__": + return f'{n1} != {n2} && ' + return "" + +def _comparison_str(op:BinaryOpInfo, t1:StrTypeInfo, n1, t2:StrTypeInfo, n2): + guard = _comparison_guard(op, t1, n1, t2, n2) + return f''' + return {guard}{op.int_op(t1.as_str(n1), t2.as_str(n2))}; + ''' + +def _comparison_seq(op:BinaryOpInfo, t1:StrTypeInfo, n1, t2:StrTypeInfo, n2): + guard = _comparison_guard(op, t1, n1, t2, n2) + return f''' + return {guard}{op.seq_op(t1.as_seq(n1), t2.as_seq(n2))}; + ''' + +def _comparison_obj(op:BinaryOpInfo, t1:StrTypeInfo, n1, t2:StrTypeInfo, n2): + guard = _comparison_guard(op, t1, n1, t2, n2) + return f''' + try {{ + return {guard}{op.seq_op(t1.as_seq(n1), t2.as_seq(n2))}; + }} catch (NoConversion e) {{ + return Py.NotImplemented; + }} + ''' + +class PyUnicodeGenerator(ImplementationGenerator): + + # The canonical and adopted implementations in PyUnicode.java, + # as there are no further accepted self-classes. + ACCEPTED_CLASSES = [ + PY_UNICODE_CLASS, + STRING_CLASS, + ] + OPERAND_CLASSES = ACCEPTED_CLASSES + [ + ] + + # Operations have to provide versions in which long and + # BigInteger are the common type to which arguments are converted. + + UNARY_OPS = [ + # Arguments are: name, min_working_type, + # body_method, + # str_op + ] + + BINARY_OPS = [ + # Arguments are: name, return_type, working_type, + # body_method, + # str_op, seq_op, + # class_specific + + # BinaryOpInfo('__add__', OBJECT_CLASS, WorkingType.STRING, + # binary_method, + # lambda x, y: f'{x} + ({y})', + # lambda x, y: f'{x}.concat({y})', + # True), + + BinaryOpInfo('__lt__', OBJECT_CLASS, WorkingType.STRING, + comparison, + lambda x, y: f'{x}.compareTo({y}) < 0', + lambda x, y: f'{x}.compareTo({y}) < 0'), + BinaryOpInfo('__le__', OBJECT_CLASS, WorkingType.STRING, + comparison, + lambda x, y: f'{x}.compareTo({y}) <= 0', + lambda x, y: f'{x}.compareTo({y}) <= 0'), + BinaryOpInfo('__eq__', OBJECT_CLASS, WorkingType.STRING, + comparison, + lambda x, y: f'eq({x}, {y})', + lambda x, y: f'eq({x}, {y})'), + BinaryOpInfo('__ne__', OBJECT_CLASS, WorkingType.STRING, + comparison, + lambda x, y: f'!eq({x}, ({y})', + lambda x, y: f'!eq({x}, {y})'), + BinaryOpInfo('__gt__', OBJECT_CLASS, WorkingType.STRING, + comparison, + lambda x, y: f'{x}.compareTo({y}) > 0', + lambda x, y: f'{x}.compareTo({y}) > 0'), + BinaryOpInfo('__ge__', OBJECT_CLASS, WorkingType.STRING, + comparison, + lambda x, y: f'{x}.compareTo({y}) >= 0', + lambda x, y: f'{x}.compareTo({y}) >= 0'), + ] + + # Emit methods selectable by a single type + def special_methods(self, e): + + # Emit the unary operations + for op in self.UNARY_OPS: + e.emit_line(f'// {"-"*(60-len(op.name))} {op.name}') + e.emit_line() + for t in self.ACCEPTED_CLASSES: + self.special_unary(e, op, t) + + # Emit the binary operations op(T, Object) + for op in self.BINARY_OPS: + e.emit_line(f'// {"-"*(60-len(op.name))} {op.name}') + e.emit_line() + for vt in self.ACCEPTED_CLASSES: + self.special_binary(e, op, vt, OBJECT_CLASS) + + # Emit methods selectable by a pair of types (for call sites) + def special_binops(self, e): + + # Emit the binary operations and comparisons + for op in self.BINARY_OPS: + if op.class_specific: + e.emit_line(f'// {"-"*(60-len(op.name))} {op.name}') + e.emit_line() + for vt in self.ACCEPTED_CLASSES: + for wt in self.OPERAND_CLASSES: + self.special_binary(e, op, vt, wt) + + def left_justify(self, text): + lines = list() + # Find common leading indent + common = 999 + for line in text.splitlines(): + # Discard trailing space + line = line.rstrip() + # Discard empty lines + if (n:=len(line)) > 0: + space = n - len(line.lstrip()) + if space < common: common = space + lines.append(line) + if common == 999: common = 0 + # Remove this common prefix + clean = list() + for line in lines: + clean.append(line[common:]) + return clean + + def special_unary(self, e, op:UnaryOpInfo, t): + e.emit('static ').emit(op.return_type.name).emit(' ') + e.emit(op.name).emit('(').emit(t.name).emit(' self) {') + with e.indentation(): + method = unary_method(op, t) + method = self.left_justify(method) + e.emit_lines(method) + e.emit_line('}').emit_line() + + def special_binary(self, e, op:BinaryOpInfo, t1, t2): + reflected = op.name.startswith('__r') and \ + op.name not in ("__rrshift__", "__round__", "__repr__") + n1, n2 = 'vw' if not reflected else 'wv' + e.emit('static ').emit(op.return_type.name).emit(' ') + e.emit(op.name).emit('(') + e.emit(t1.name).emit(' ').emit(n1).emit(', ') + e.emit(t2.name).emit(' ').emit(n2).emit(') {') + with e.indentation(): + method = op.body_method(op, t1, n1, t2, n2) + method = self.left_justify(method) + e.emit_lines(method) + e.emit_line('}').emit_line() + + diff --git a/build-tools/python/lib/core/__init__.py b/build-tools/python/lib/core/__init__.py new file mode 100644 index 000000000..718b4976c --- /dev/null +++ b/build-tools/python/lib/core/__init__.py @@ -0,0 +1,15 @@ +# core package: generators and other tooling + +# Copyright (c)2021 Jython Developers. +# Licensed to PSF under a contributor agreement. + +# These classes support the processing of template files into +# the Java class definitions that realise Python objects +# and their methods. + +from .base import ImplementationGenerator, TypeInfo, WorkingType, OpInfo +from .PyFloat import PyFloatGenerator +from .PyLong import PyLongGenerator +from .PyUnicode import PyUnicodeGenerator + + diff --git a/build-tools/python/lib/core/base.py b/build-tools/python/lib/core/base.py new file mode 100644 index 000000000..e0e5c7d8e --- /dev/null +++ b/build-tools/python/lib/core/base.py @@ -0,0 +1,119 @@ +# base.py: foundations for source-code generation from templates + +# Copyright (c)2021 Jython Developers. +# Licensed to PSF under a contributor agreement. + +# Module: evo1.generate.base + +from dataclasses import dataclass +from enum import Enum +from datetime import datetime + + +# The method implementations convert operands to a common "working +# type" in order to perform the central operation. The type varies +# with the operation and the operand(s). For example, when adding +# two operands known to be Integer, the common type is LONG (Java +# long), so that there is no overflow, while bit-wise operations on +# the same pair may be carried out in an INT. +# +# In a unary operation, the wider of the (minimum) operation type +# and the operand type is used. When mixing types in a binary +# operation, the widest of the two types and the operation is used. +class WorkingType(Enum): + "Enumerates the types to which operands may be converted." + BOOLEAN = 0 + INT = 1 + LONG = 2 + BIG = 3 + DOUBLE = 4 + STRING = 5 + SEQ = 6 + OBJECT = 7 + + +# We use pre-defined data classes to describe (Java) types that may +# appear as operands or return types. We record the name in Java, +# information about the minimum "width" at which we ought to +# compute with them, and how to convert them to int, long and big +# representations. + +@dataclass +class TypeInfo: + "Information about a type an templates for conversion to int types" + # Java name of a Java class ("PyLong", "Integer", etc.) + name: str + # An argument of this type implies the working type is at least: + min_working_type: WorkingType + + +# Implementation template scripts extend this dataclass to describe +# their operations. + +@dataclass +class OpInfo: + "Base class for describing operations." + # Name of the operation ("__add__", "__neg__", etc.). + name: str + # An implementation of this op has a return type of: + return_type: TypeInfo + # Implementation of this op implies the working type is at least: + min_working_type: WorkingType + + +# Base class of generators for object implementations + +class ImplementationGenerator: + + # Adjust the indent to match that requested + def set_indent(self, i): + self.emitter.indent = i + + # Create a warning comment + def emit_object_template(self, e, src): + name = getattr(src, 'name', '?').replace('\\', '/') + e.emit_line("/*") + e.emit_line(" * Generated by java_object_gen using ") + e.emit(f"generator {self.__class__.__name__}.") + e.emit_line(f" * Source: {name}") + timestamp = datetime.now().strftime("%A %Y-%m-%d %H:%M:%S") + e.emit_line(f" * Date: {timestamp}") + e.emit_line(" */") + + # Emit a rule (comment) above a block of implementations + def emit_heading(self, e, name:str): + e.emit_line(f'// {"-"*(60-len(name))} {name}') + e.emit_line() + + # Emit methods selectable by a single type + def special_methods(self, e): + pass + + # Emit methods selectable by a pair of types (for call sites) + def special_binops(self, e): + pass + + def emit_object_plumbing(self, e): + pass + + def left_justify(self, text): + "Left-justify a block of text, returning a list of lines." + lines = list() + # Find common leading indent + common = 999 + for line in text.splitlines(): + # Discard trailing space + line = line.rstrip() + # Discard empty lines + if (n:=len(line)) > 0: + space = n - len(line.lstrip()) + if space < common: common = space + lines.append(line) + if common == 999: common = 0 + # Remove this common prefix + clean = list() + for line in lines: + clean.append(line[common:]) + return clean + + diff --git a/build-tools/python/lib/srcgen.py b/build-tools/python/lib/srcgen.py new file mode 100644 index 000000000..863962f0c --- /dev/null +++ b/build-tools/python/lib/srcgen.py @@ -0,0 +1,161 @@ +# Classes that emit indented, wrapped (Java) source code + +# Copyright (c)2021 Jython Developers. +# Licensed to PSF under a contributor agreement. + +import io +import sys + +class IndentedEmitter: + """Class to write wrapped, indented (program) text onto a stream. + + Text is supplied via the emit() and emit_line() methods, and added to + an internal buffer. emit_line() writes the current buffer (if it is not + empty), always beginning a new, indented line. emit() first checks for + sufficient buffer space, writing existing content to the output stream + only as necessary to respect the stated width. The supplied text is + treated as atomic, however long: neither method inserts line-breaks. + close() must be called to ensure the last buffered text reaches the + output stream. (Consider using contextlib.closing.) + """ + + class IndentationContextManager: + """Context in which the indentation is increased by one.""" + + def __init__(self, emitter): + self.emitter = emitter + + def __enter__(self): + self.emitter.indent += 1 + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.emitter.indent -= 1 + + def indentation(self): + """Return a context manager to increase the indentation by one.""" + return IndentedEmitter.IndentationContextManager(self) + + def __init__(self, stream=None, width=None, indent=None): + self.stream = stream or sys.stdout + self.width = width if width is not None else 70 + self.indent = indent if indent is not None else 1 + # Output buffer when lines are pieced together + self.buf = io.StringIO() + + def flush(self): + """Emit residual line (if any) to the output stream.""" + residue = self.buf.getvalue().rstrip() + if residue: + print(residue, file=self.stream) + self.buf.seek(0) + self.buf.truncate() + + close = flush # synonym for the benefit of "with closing(...)" + + def emit(self, text="", suffix=""): + """Write the text+suffix to self.buf. + + Start a new line if necessary. + """ + n = len(text) + if suffix: + n += len(suffix) + if self.buf.tell() + n > self.width: + # Must start a new line first + self.emit_line() + self.buf.write(text) + if suffix: + self.buf.write(suffix) + return self + + def emit_line(self, text=""): + """Begin a new line with indent and optional text.""" + if self.buf.tell() > 0: + # Flush existing buffer to output + print(self.buf.getvalue().rstrip(), file=self.stream) + self.buf.seek(0) + self.buf.truncate() + for _ in range(self.indent): + self.buf.write(" ") + self.buf.write(text) + return self + + def emit_lines(self, lines): + """Begin a new line and emit with indented multi-line text.""" + for line in lines: + self.emit_line(line) + return self + + +class JavaConstantEmitter(IndentedEmitter): + """A class capable of emitting Java constants from Python values. + + This class extends the basic IndentedEmitter for wrapped, indented + program text with methods that translate Python values to equivalent + Java constants (or constructor expressions). + """ + + MAX_INT = (1 << 31) - 1 + MIN_INT = -MAX_INT - 1 + + def java_int(self, value, suffix=""): + """Emit the value as a Java int constant.""" + if self.MIN_INT <= value <= self.MAX_INT: + return self.emit(repr(value) + suffix) + else: + raise ValueError("Value out of range for Java int") + + def java_string(self, value, suffix=""): + """Emit the value as a Java String constant.""" + text = repr(str(value)) + if text.startswith("'"): + q = '"' + text = q + text[1:-1].replace(q, '\\"') + q + return self.emit(text, suffix) + + def java_byte(self, value, suffix=""): + """Emit the value as a Java int constant wrapped to signed byte.""" + bstr = format(value if value < 128 else value - 256, "d") + return self.emit(bstr, suffix) + + def java_double(self, value, suffix=""): + """Emit the value as a Java double constant.""" + return self.emit(repr(value), suffix) + + def java_arglist(self, handler, a, suffix=""): + """Emit comma-separated Java values using the given handler. + + The handler is a function f(obj, suffix="") that emits the + individual argument. It must be capable of converting all types + that may be supplied in a. + """ + n = len(a) + if n == 0: + self.emit(suffix) + else: + with self.indentation(): + for i in range(n - 1): + handler(a[i], ", ") + handler(a[-1], suffix) + return self + + def java_array(self, handler, a, suffix=""): + """Emit a Java array of elements emitted by the given handler. + + The handler is a function f(obj, suffix="") that emits the + individual element. Since Java arrays are homogeneous, it + will often be a single bound method emitting a compatible value + e.g. self.java_byte. + """ + n = len(a) + if n == 0: + self.emit("{}", suffix) + else: + self.emit("{ ") + with self.indentation(): + for i in range(n - 1): + handler(a[i], ", ") + handler(a[-1], " }" + suffix) + return self + diff --git a/build-tools/python/tool/java_object_gen.py b/build-tools/python/tool/java_object_gen.py new file mode 100644 index 000000000..6b8363d01 --- /dev/null +++ b/build-tools/python/tool/java_object_gen.py @@ -0,0 +1,166 @@ +# tool/java_object_gen Emit Java +# +# Copyright (c)2021 Jython Developers. +# Licensed to PSF under a contributor agreement. + +# This is a tool used from the core.gradle build file to generate object +# implementation methods, such as __neg__ and __rsub__, in Java. +# It processes Java files looking for a few simple markers, which it +# replaces with blocks of method definitions. +# +# See the files in core/src/main/javaTemplate for examples. + +import sys +import os +import re +import argparse +import srcgen +from re import match +from contextlib import closing +from dataclasses import dataclass + +from core import ImplementationGenerator +from core import PyFloatGenerator +from core import PyLongGenerator +from core import PyUnicodeGenerator + + +class ImplementationTemplateProcessorFactory: + "Class creating a processor for object templates" + + def __init__(self, source_dir, dest_dir, error, verbose=False): + "Create a factory specifying source and destination roots" + self.src_dir = os.path.relpath(source_dir) + self.dst_dir = os.path.relpath(dest_dir) + self.verbose = verbose + # Check source directory + if not os.path.isdir(self.src_dir): + error(f'no such directory {self.src_dir}') + # Ensure destination directory + if not os.path.isdir(self.dst_dir): + os.makedirs(self.dst_dir, exist_ok=True) + # Confirm + if self.verbose: + # cwd is the project directory e.g. ~/rt3 + cwd = os.getcwd() + print(f' Current dir = {cwd}') + print(f' templates from {self.src_dir} to {self.dst_dir}') + + def get_processor(self, package, name): + "Create a template processor for one named class" + return ImplementationTemplateProcessor(self, package, name) + + +class ImplementationTemplateProcessor: + "A template processor for one named class" + + # Patterns marker lines in template files. + # Each has a group 1 that captures the indentation. + OBJECT_GENERATOR = re.compile( + r'([\t ]*)//\s*\$OBJECT_GENERATOR\$\s*(\w+)') + SPECIAL_METHODS = re.compile(r'([\t ]*)//\s*\$SPECIAL_METHODS\$') + SPECIAL_BINOPS = re.compile(r'([\t ]*)//\s*\$SPECIAL_BINOPS\$') + MANGLED = re.compile(r'(([\t ]*)//\s*\($\w+\$)') + + def __init__(self, factory, package, name): + self.factory = factory + self.package = package + self.name = name + self.generatorClass = ImplementationGenerator + self.generator = None + self.emitterClass = srcgen.IndentedEmitter + + def open_src(self): + return open( + os.path.join(self.factory.src_dir, self.package, self.name), + 'r', encoding='utf-8') + + def open_dst(self): + location = os.path.join(self.factory.dst_dir, self.package) + os.makedirs(location, exist_ok=True) + return open( + os.path.join(location, self.name), + 'w', encoding='utf-8', newline='\n') + + def process(self): + if self.factory.verbose: + print(f" process {self.name}") + with self.open_src() as src: + with self.open_dst() as dst: + self.process_lines(src, dst) + + def process_lines(self, src, dst): + + def emitter(m): + indent = (len(m[1].expandtabs(4)) + 3) // 4 + return self.emitterClass(dst, 70, indent) + + for line in src: + + if m := self.OBJECT_GENERATOR.match(line): + generatorName = m[2] + self.generatorClass = globals()[generatorName] + self.generator = self.generatorClass() + with closing(emitter(m)) as e: + self.generator.emit_object_template(e, src) + + elif m := self.SPECIAL_METHODS.match(line): + with closing(emitter(m)) as e: + self.generator.special_methods(e) + + elif m := self.SPECIAL_BINOPS.match(line): + with closing(emitter(m)) as e: + self.generator.special_binops(e) + + elif m := self.MANGLED.match(line): + print("Mangled template directive?", + m[2], file=sys.stderr) + dst.write(line) + + else: + dst.write(line) + + +def get_parser(): + parser = argparse.ArgumentParser( + prog='java_object_gen', + description='Generate Python object implementations.' + ) + + parser.add_argument('source_dir', + help='Template directory (to process)') + parser.add_argument('dest_dir', + help='Destination directory (in build tree)') + parser.add_argument('--verbose', '-v', action='store_true', + help='Show more information') + return parser + + +def process(src_dir, dest_dir, error, verbose=False): + '''Friendly entry point to use this script via API.''' + # Embed arguments into factory + factory = ImplementationTemplateProcessorFactory( + src_dir, dest_dir, error, verbose) + + # Process all Java files in the template tree at src_dir + for dirpath, dirnames, filenames in os.walk(src_dir): + # Any .java files here? + javanames = [n for n in filenames + if os.path.splitext(n)[1].lower() == '.java'] + if javanames: + package = os.path.relpath(dirpath, src_dir) + for name in javanames: + proc = factory.get_processor(package, name) + proc.process() + + +def main(): + # Parse the command line to argparse arguments + parser = get_parser() + args = parser.parse_args() + process(args.source_dir, args.dest_dir, parser.error, args.verbose) + + +if __name__ == '__main__': + main() + diff --git a/build-tools/python/tool/marshal_test.py b/build-tools/python/tool/marshal_test.py new file mode 100644 index 000000000..e3721b7ad --- /dev/null +++ b/build-tools/python/tool/marshal_test.py @@ -0,0 +1,117 @@ +# Generate test material for simple marshal tests + +import io, sys, os, os.path, math +import marshal, array, py_compile +import dis, inspect, types + +def make_interned(b): + "Change type code to interned equivalent" + tc = chr(b[0]&0x7f) + a0 = None + if tc == 'a': a0 = b'A' + elif tc == 'u': a0 = b't' + elif tc == 'z': a0 = b'Z' + + if a0: + return a0 + b[1:] + else: + return b + + +def as_byte_array(s): + a = array.array('B', s) + vals = map(lambda v: format(v, "#04x"), a) + return "new byte[] {" + ", ".join(vals) + "}" + + +def as_java(v): + "Limited translation to Java" + if isinstance(v, bool): + return str(v).lower() + elif isinstance(v, int): + if v < 2**31 and v >= -2**31: + return str(v) + else: + return f"new BigInteger(\"{v:d}\")" + elif isinstance(v, float): + if math.isinf(v): + if v > 0: + return "Double.POSITIVE_INFINITY" + else: + return "Double.NEGATIVE_INFINITY" + elif math.isnan(v): + return "Double.NaN" + else: + return v.hex() + elif isinstance(v, str): + return f"\"{v:s}\"" + elif isinstance(v, tuple): + args = ", ".join(map(as_java, v)) + return "Py.tuple(" + args + ")" + elif isinstance(v, list): + args = ", ".join(map(as_java, v)) + return "new PyList(List.of(" + args + "))" + elif isinstance(v, dict): + args = ", ".join(map(as_java, v.items())) + return "PyDict.fromKeyValuePairs(" + args + ")" + elif isinstance(v, bytes): + return as_bytes(v) + elif isinstance(v, StopIteration): + return "Py.StopIteration" + else: + return "Py.None" + + +def as_bytes(s): + a = array.array('B', s) + values = map(lambda v: format(v, "#04x"), a) + return "bytes(" + ", ".join(values) + ")" + + +def print_load_example(expr, env = locals()): + result = eval(expr, None, env) + b = marshal.dumps(result) + tc = chr(b[0]&0x7f) + if tc in 'auz': + # Force to intern the string + b = make_interned(b) + tc = chr(b[0]&0x7f) + print(f"loadExample( \"{expr:s}\", // tc='{tc:s}'") + javabytes = as_bytes(b) + print(f"{javabytes:s},") + java = as_java(result) + print(f"{java:s} ),") + + +# str +sa = "hello" +sb = "sæll" +su = "\U0001f40d" + + +# tuple +t = (1,2,3) + +# list +list0 = [] +list1 = [sa] +list3 = [sa, 2, t] +listself = [1, 2, 3] +listself[1] = listself + +expressions = [ + "None", + "False", "True", + "0", "1", "-42", "2**31-1", "2047**4", "2**45", "-42**15", + "0.", "1.", "-42.", "1e42", "1.8e300", "1.12e-308", + "float.fromhex('0x1.fffffffffffffp1023')", "float.fromhex('-0x1.p-1022')", + "float('inf')", "float('-inf')", "float('nan')", + "'hello'", "'sæll'", "'\U0001f40d'", + "()", "(sa,sa,sa)", "(sb,sb,t,t)", + "[]", "[sa]", "[sa, 2, t]", + "{}", "{sa:sb}", "dict(python=su)", "{sa:1, sb:2, su:t}", +] + +for x in expressions: + print_load_example(x) + diff --git a/build-tools/settings.gradle b/build-tools/settings.gradle new file mode 100644 index 000000000..5e0426400 --- /dev/null +++ b/build-tools/settings.gradle @@ -0,0 +1,9 @@ +/* + * build-tools/settings.gradle + * + * Included project for tools used to create Jython. + */ + +rootProject.name = 'build-tools' + +rootProject.buildFileName = rootProject.name + '.gradle' diff --git a/build.gradle b/build.gradle index ef0a2144a..c5a69740a 100644 --- a/build.gradle +++ b/build.gradle @@ -1,34 +1,14 @@ /* - * Gradle build for Jython. See also settings.gradle. + * build.gradle * - * This is an attempt to build a distributable JAR using Gradle that could be - * cited as a dependency by other Gradle or Maven projects, when they use the - * Jython interpreter from Java (under JSR-223 or directly). - * - * At present, the build is additional to the Ant build that remains the - * primary and reliable support for development, for test, and to build the - * Jython installers. - * - * The delivered jar should contain only Jython project material (Java classes - * and the Python library) while the many JARs Jython depends on will be cited - * in the accompanying metadata as dependencies. - * - * The Jython source structure does not follow the standard convention for - * Gradle. This script deals with that without changing it, but it uses a build - * directory (build2) entirely separate from Ant's, in which generated and - * compiled material is posted conformant with Gradle conventions. This means - * that the later tasks Gradle provides (test and jar) do not need so much - * customisation. + * Build file for the Jython Project, a multi-project build. */ plugins { id 'java-library' - id 'antlr' id 'maven-publish' } -import java.text.SimpleDateFormat - // ---------------- Determine the version of Jython ---------------------------- /* @@ -45,7 +25,7 @@ import java.text.SimpleDateFormat // Versions are specified in this grammar: // . ( . )? ( )? ( - )? -version = '2.7.3a1' +version = '3.11.0a1' // Valid examples (please preserve in comments): //version = '2.7.2a2' @@ -56,861 +36,38 @@ version = '2.7.3a1' group = 'org.python' - // ---------------- Miscellaneous configuration -------------------------------- -/* - * We support Java 8 onwards officially, up to v2.7.2, but retain the option of - * compiling for Java 7 (in v2.7.2) by maintaining compatibility in the code - * base and in the choice of JARs. - */ -sourceCompatibility = '1.7' // Make both 1.8 after 2.7.2 released -targetCompatibility = '1.8' - -project.compileJava.options.debug = true - - -// Separate the Gradle build from that of Ant -buildDir = file('build2') -ext { - buildDate = new Date() - /* - * The directory structure supporting the build has separate locations for - * several intermediate stages. - */ - // Java source generated by ANTLR - antlrGenDir = "$buildDir/gensrc/org/python/antlr" - // Intermediate locations for compiled classes - unexposedDir = "$buildDir/unexposed" - exposedDir = "$buildDir/exposed" - // The standard library may safely be assembled in-place as a resource - pythonLibDir = "$buildDir/python/Lib/" - buildLibDir = "$buildDir/resources/main/Lib/" - buildTestLibDir = "$buildDir/resources/test/Lib/" - compiledLibDir = "$buildDir/resources/main/Lib/" - compiledTestLibDir = "$buildDir/resources/test/Lib/" -} - - -repositories { - // Jython is distributed through Maven Central. Get our dependencies there too. - mavenCentral() -} - -sourceSets { - - main { // Non-standard locations must be specified explicitly +allprojects { - antlr { - srcDirs = ['grammar'] - exclude 'Base.g' // Not used (and produces errors) - } + apply plugin: 'java' + // We support Java 11 onwards officially. + sourceCompatibility = '17' - java { - srcDirs = ['src', project.ext.antlrGenDir] - // Reference to proprietary libraries not supplied - exclude 'com/ziclix/python/sql/handler/InformixDataHandler.java' - exclude 'com/ziclix/python/sql/handler/OracleDataHandler.java' - } - - resources { - // Resources in project root, but this invites an explosion. - // ... so claim no sources: - srcDirs = [] - // and fix it in task processResources - } - } + // Same configuiration all sub-projects + tasks.withType(JavaCompile) { + options.encoding = 'UTF-8' + options.debug = true - test { // Non-standard locations must be specified explicitly + // deprecation is noisy + options.deprecation = true - java { - srcDirs = ['tests/java'] - // Reference to proprietary libraries not supplied - exclude 'com/ziclix/python/sql/**' - } + // Use only public API + options.compilerArgs.addAll(['--release', '17']) } -} - -dependencies { - /* - * Must these correspond exactly with the external libraries (JARs) - * mentioned in the Ant build.xml? Or is some form of dynamic version - * better for downstream? - * - * Note that an application may specify a later version. Gradle will - * choose the latest required. - */ - - // Using a version available from repo (not 'extlibs/servlet-api-2.5' as in build.xml) - implementation 'javax.servlet:javax.servlet-api:3.1.0' - - /* - * These seem to be unnecessary while the proprietary database support is - * not bundled with Jython. Applications needing them can cite these or a - * version they prefer. - */ - //implementation 'mysql:mysql-connector-java:5.1.42' - //implementation 'org.postgresql:postgresql:42.1.1.jre7' - - // pin to Antlr 3 until we upgrade parsing - antlr 'org.antlr:antlr:3.5.2' - implementation 'org.antlr:antlr-runtime:3.5.2' - - implementation 'org.apache.commons:commons-compress:1.19' - - implementation 'org.bouncycastle:bcpkix-jdk15on:1.62' - implementation 'org.bouncycastle:bcprov-jdk15on:1.62' - - implementation 'org.ow2.asm:asm:7.1' - implementation 'org.ow2.asm:asm-commons:7.1' - implementation 'org.ow2.asm:asm-util:7.1' - - // The Android Guava and "failureaccess" are necessary to support Java 7. - implementation 'com.google.guava:guava:28.0-android' - implementation 'com.google.guava:failureaccess:1.0.1' - // Swap for regular Guava at Java 8. - - implementation 'com.ibm.icu:icu4j:59.1' - implementation 'com.carrotsearch:java-sizeof:0.0.5' - - implementation 'com.github.jnr:jffi:1.2.20' - implementation 'com.github.jnr:jnr-netdb:1.1.6' - implementation 'com.github.jnr:jnr-ffi:2.1.10' - implementation 'com.github.jnr:jnr-posix:3.0.50' - implementation 'com.github.jnr:jnr-constants:0.9.12' - - implementation 'jline:jline:2.14.5' - - implementation 'io.netty:netty-buffer:4.1.45.Final' - implementation 'io.netty:netty-codec:4.1.45.Final' - implementation 'io.netty:netty-common:4.1.45.Final' - implementation 'io.netty:netty-handler:4.1.45.Final' - implementation 'io.netty:netty-resolver:4.1.45.Final' - implementation 'io.netty:netty-transport:4.1.45.Final' - - // Used implicitly in the Ant build, must be explicit here - implementation 'org.apache.ant:ant:1.9.7' - - // Pin to 4.10 until dependency on hamcrest classes resolved. - testImplementation 'junit:junit:4.10' -} - - -// ---------------- Resource Processing ---------------------------------------- - -/* - * Jython brings several files we could treat as resources, but they do not sit - * in the Gradle-conventional 'main/resources' directory, rather are in the - * project root or rub shoulders with the java source. Pick them individually. - * - * Several tasks defined below declare that processResources depends on them, - * with the objective that at the end of processResources all generated - * resources and the stdlib (but not the compiled stdlib) should be in place - * in $buildDir/resources/main. - */ -processResources { - from(file('.')) { - include 'LICENSE.txt' - } - from(file('src')) { - include 'META-INF/**' - include 'org/python/modules/ucnhash.dat' - } -} - -// ---------------- ANTLR Task ------------------------------------------------- - -generateGrammarSource { - maxHeapSize = "512m" - outputDirectory = file(antlrGenDir) -} - -// ---------------- compleJava Task -------------------------------------------- - -compileJava { - // Divert compiled classes to intermediate location pre-exposure. - destinationDir = file(unexposedDir) -} - -// ---------------- Expose Task ------------------------------------------------ - -/* - * The exposer operates between the output of compileJava (unexposed directory) - * and a second intermediate location (exposed directory). These two the - * mergeExposed task will finally combine in the Gradle-standard classes - * directory used as input by the jar task. - */ -configurations { - expose.extendsFrom(implementation) -} - -dependencies { - // The expose (Ant) task depends on classes compiled to here: - expose files(unexposedDir) -} - -// A (Gradle) task to run the Ant task 'expose'. -task expose (group: 'Custom', dependsOn: compileJava) { - - description = 'Expose Java types to Python using their annotations.' - - // Allow Gradle to infer the need to regenreate the outputs - inputs.files(fileTree("${project.ext.unexposedDir}/org/python")) - outputs.dir(project.ext.exposedDir) - - doLast { - /* - * Define an Ant task called 'expose' in the project's AntBuilder. - * We can't define it until ExposeTask has been compiled. - */ - ant.taskdef( - name: 'expose', - classname: 'org.python.expose.generate.ExposeTask', - classpath: configurations.expose.asPath - ) - - // Use the Gradle-conventional directory structure (not the legacy one). - ant.expose( - srcdir: file(project.ext.unexposedDir), - destdir: mkdir(file(project.ext.exposedDir)), - includesfile: file('CoreExposed.includes') - ) - } -} - -// Task to merge the exposed and unexposed classes -task mergeExposed(group: 'Custom', type:Copy, dependsOn: expose) { - description = 'Copy exposed Java types to classes.' - // Exposed version will take precedence - duplicatesStrategy = DuplicatesStrategy.EXCLUDE - from file(exposedDir) - from file(unexposedDir) - into sourceSets.main.output.classesDirs.singleFile -} - -// Attach to the classes task the placing of all compiled and exposed classes. -classes.dependsOn(mergeExposed) - - -// ---------------- Version-related file generation ---------------------------- - -/* - * Write the information that org.python.Version reads from - * org/python/version.properties in the class file structure. The inputs to - * this are: information from Git (git command required); project.version; - * and project.ext.buildDate. The task works quite hard to decode - * project.version, which must have the correct form, to deduce whether you - * really intend this to be a release. If anything comes after the release - * number, typically it's a '+' sign, the version becomes a snapshot. - */ -task generateVersionInfo( - type: WriteProperties, - description: 'Write the version information as properties') { - - outputFile = file("${processResources.destinationDir}/org/python/version.properties") - comment = ' Jython version information (from build.gradle)' - - // Create the properties when the task runs. But do it before the write! - doFirst { - - /* - * Query Git for version and tagging. The git commands are exactly - * those used by CPython to create constants configure.ac, but do not - * correspond very well to their names. - */ - - // Not much like a branch. Used only if git_tag is blank. - def branch = 'git name-rev --name-only HEAD'.execute().text.split('\n', 2)[0] - property('jython.build.git_branch', branch) - println " jython.build.git_branch = ${branch}" - - // When tagged, the result is "tags/vX.Y.Zrc9", or whatever. - // When not, tagged it is the (short) revision number. - def tag = 'git describe --all --always --dirty'.execute().text.split('\n', 2)[0] - property('jython.build.git_tag', tag) - println " jython.build.git_tag = ${tag}" - - // Revision number (short = 7 hex digits) - def ident = 'git rev-parse --short HEAD'.execute().text.split('\n', 2)[0] - property('jython.build.git_version', ident) - println " jython.build.git_version = ${ident}" - - /* - * Decompose the version string into elements for Jython to access as - * properties. (The Ant build.xml requires them to be set in parts, but - * we can work it out from project.version.) - */ - // .(.)()?(+|-)? - def versionRegex = /(\d+)\.(\d+)(\.(\d+))?((a|b|rc)(\d+))?(\+|-(\w+))?/ - def versionMatcher = project.version =~ versionRegex - if (versionMatcher.count != 1) { - throw new IllegalArgumentException( - "Cannot parse project version string '${project.version}'") - } - // In principle it could match more than once: take the leftmost - def versionResult = versionMatcher[0] - - // . means ..0 - String major = versionResult[1] - String minor = versionResult[2] - String micro = versionResult[3] ? versionResult[4] : '0' - - // Convert the optional to numbers - int level = 0, serial = 0 - if (versionResult[5]) { - // This is some kind of pre-final release (unless snapshot) - serial = versionResult[7] as int - switch (versionResult[6]) { - case 'a': level = 0xa; break // ALPHA release - case 'b': level = 0xb; break // BETA release - case 'rc': level = 0xc; break // release candidate - } - } else { - // Not marked as a/b/rc so ostensibly a final release. - level = 0xf - } - - // Convert optional +|- to -DEV or -SNAPSHOT suffix or empty string - String snapshotSuffix = versionResult[8]; - if (snapshotSuffix == '+') { - snapshotSuffix = "-SNAPSHOT" - } - - /* - * Work out if it looks like a release, or adjust project.version. This logic prevents us - * releasing from a polluted repo (similar to logic in the Ant build.xml). - */ - def L = [0:'', 10:'a', 11:'b', 12:'rc', 15:''] - String release = "$major.$minor.$micro${L[level]}${serial?:''}" - - if (snapshotSuffix == null) { - // The version is named correctly for a release. Make safety checks on the repo. - String expectTag = "tags/v$release" - String message = null; - if (tag.endsWith('-dirty')) { - message = 'Version-controlled files have been edited since the last commit' - } else if (tag != expectTag) { - message = "Change set $ident is not tagged $expectTag." - } else { - // Query Git for status: non-empty if uncontrolled (unignored) files. - String gitStatus = 'git status --porcelain'.execute().text - if (gitStatus.trim().length() > 0) { - message = 'Workspace contains uncontrolled files' - } - } - // If a message was set for any reason, fall back to a snapshot. - if (message == null) { - // Repository state is good for a full build. - snapshotSuffix = '' - } else { - // Some reason found not to build the release. - println "$message - build is a snapshot." - snapshotSuffix = '-SNAPSHOT' - } - } - - // Rebuild the version with the snapshot suffix, even if not given originally. - project.version = release + snapshotSuffix - println "This build is for v${project.version}." - - property('jython.version', project.version) - property('jython.major_version', major) - property('jython.minor_version', minor) - property('jython.micro_version', micro) - property('jython.release_level', level) - property('jython.release_serial', serial) - - /* - * Time-stamp the build. In the time part, the ':' gets escaped to - * '\:', consistent with Properties.store(), unlike the Ant build. - */ - property('jython.build.time', - (new SimpleDateFormat('HH:mm:ss')) - .format(project.ext.buildDate)) - property('jython.build.date', - (new SimpleDateFormat('MMM d yyyy')) - .format(project.ext.buildDate)) - } -} - -// Attach this task to processResources -processResources.dependsOn(generateVersionInfo) - - -// ---------------- Copy Python Library ---------------------------------------- - -/* - * The default behaviour of the Java plug-in is to make a JAR of the classes in - * the "main" source set. We need a more complex assembly that provides users - * with exposed classes instead of their plain counterparts, and also various - * configuration files and the Python library. - * - * These copies include the tests, so we can test things :), but a subsequent - * JarTask of the build should exclude them as necessary. (Not yet implemented.) - */ - -ext { - libPython = 'lib-python/2.7' - libJython = 'Lib' - libTestSpecs = [ - 'distutils/tests/', - 'email/test/', - 'json/tests/', - 'lib2to3/tests/', - 'unittest/test/', - 'test/' - ] -} - -/* - * Copy the Python standard library. We take this from a distribution of - * CPython, but take only the files specified in CPythonLib.includes. - * The Jython version of the standard library will be copied to the same place. - * Files from the Jython library having the same name (relative path) as one - * in CPythonLib.includes thereby take precedence. - */ -task mergePythonLib( - type: Copy, - description: 'Merge lib-python and Jython Lib') { - - // There might be a way using a collection of File rather than actual copy. - into pythonLibDir - - // Copy Jython Lib, with precedence over CPython files of the same name - duplicatesStrategy = DuplicatesStrategy.INCLUDE - from libJython - exclude '**/*.class' - - // Allow Gradle to infer the need to regenerate the outputs - inputs.dir libJython - inputs.dir libPython - inputs.file file('CPythonLib.includes') - - doFirst { - // Select the CPython stdlib files by making a list. - def cPythonLibIncludes = [] - // Read list from CPythonLib.includes, stripping comments and spaces. - file('CPythonLib.includes').eachLine { line -> - def trimmed = line.split('#', 2)[0].trim() - if (trimmed.length() > 0) { - cPythonLibIncludes << trimmed - } - } - // Copy the subset as specified by the list - project.copy { - into pythonLibDir - duplicatesStrategy = DuplicatesStrategy.EXCLUDE - from libPython - include cPythonLibIncludes - exclude '**/*.pyc', '**/*.pyd' - } - } -} - -/* - * Copy from the merge location into the main resources, excluding material - * only needed for tests. - */ -task copyLib( - type: Copy, - dependsOn: mergePythonLib, - description: 'Copy merged Python library (main excluding tests)') { - into buildLibDir - from pythonLibDir - exclude '**/*.pyc', '**/*.pyd', '**/*.class' - // Exclude tests and test material - exclude libTestSpecs -} - -// Attach this task to processResources -processResources.dependsOn(copyLib) - -/* - * Copy from the merge location into the test resopurces, including only - * that extra material needed for tests. - */ -task copyTestLib( - type: Copy, - dependsOn: mergePythonLib, - description: 'Copy merged Python library (tests only)') { - into buildTestLibDir - from pythonLibDir - exclude '**/*.pyd', '**/*.class' // test material includes .pyc files - // Include only tests and test material - include libTestSpecs -} - -// Attach this task to processResources -processTestResources.dependsOn(copyTestLib) - - -// ---------------- Jython-Compile Python -------------------------------------- - -/* - * Compile the Python modules to .class files for the JAR. Whereas Jython runs - * happily with a concrete Lib folder, creating and caching the .class files, - * when Jython is supplied as a JAR, we prefer to compile the class files once - * in advance. - */ - -configurations { - pycompile.extendsFrom(implementation) -} - -dependencies { - // Jython as built so far should be on the path of the jycompile (Ant) task - pycompile files("$buildDir/classes/java/main") - pycompile files("$buildDir/resources/main") -} - -// A (Gradle) task to run the Ant task 'jycompile' (not pycompile). -task pycompile( - group: 'Custom', - description: 'Compile the Python modules to .class files for the JAR') { - - // Compiler depends on rest of Jython being fully assembled in 'classes' - dependsOn classes - // Note that classes depends on processResources (Java plug-in). - - // Allow Gradle to infer the need to regenerate the outputs - inputs.dir project.ext.buildLibDir - outputs.dir project.ext.compiledLibDir - - doFirst { - /* - * Define an Ant task called 'jycompile' in the project's AntBuilder. - * We can't define it until JythoncAntTask has been compiled, so this - * must happen during the execution of the task (early). - */ - ant.taskdef( - name: 'jycompile', - classname: 'org.python.util.JycompileAntTask', - classpath: configurations.pycompile.asPath - ) - } - - doLast { - /* - * Now use the 'jycompile' Ant task to compile the Python source we - * supply to users. The exclusions have been copied from build.xml, - * and also this comment: - - */ - def exclusions = ['test/**', 'lib2to3/tests/**', - 'lib2to3/tests/data/myfixes/**'] - ant.jycompile( - srcdir: project.ext.buildLibDir, - destdir: project.ext.compiledLibDir, - excludes: exclusions.join(',') // Yes, it's that way round :o - ) - } -} - - -// ---------------- Building the JARs ------------------------------------------ - -/* - * The default behaviour of the Java plug-in is to make a JAR of the classes in - * the "main" source set and its resources. Having carefully substituted/added - * exposed classes in the assembled classes directory, and having prepared the - * (compiled) stdlib as a resource, this is close to what we need, with a few - * adjustments as noted. - */ -jar { - - // Ensure that compiled stdlib is part of the resources to JAR. - dependsOn pycompile - - // It is important for import that X$py.class be newer than X.py - preserveFileTimestamps = true - - // We don't JAR the expose tool itself - exclude 'org/python/expose/generate/**' - - // Build a custom manifest - manifest { - // These attribute values are based on inspecting the ant build - attributes ([ - 'Main-Class': 'org.python.util.jython', - 'Built-By': 'build.gradle', - ]) - - attributes( [ // Build-Info section - 'version': project.version, - 'build-compiler': 'modern', - 'jdk-target-version': project.targetCompatibility, - 'debug': compileJava.options.debug, - 'informix': false, - 'oracle': false - ], 'Build-Info' ) - } -} - -/* - * This is a task complementary to the jar task, taking just the test material. - * This is not published via the main repositories because it counts as a - * distinct artefact with its own POM. - */ -// XXX Consider instead a multi-project build with one artefact per sub-project. -task testJar(type: Jar) { - classifier = 'tests' - - from sourceSets.test.output - // We don't JAR the expose tool, so we don't JAR the tests - exclude 'org/python/expose/generate/**' - - // Build a custom manifest - manifest { - // These attribute values are based on inspecting the ant build - attributes ([ - //'Main-Class': 'org.python.util.jython', - 'Built-By': 'build.gradle', - ]) - - attributes( [ // Build-Info section - 'version': project.version, - 'build-compiler': 'modern', - 'jdk-target-version': project.targetCompatibility, - 'debug': compileTestJava.options.debug - ], 'Build-Info' ) - } -} - - -// ---------------- Documentation ---------------------------------------------- - -/* - * The JavaDoc, anyway. - */ -javadoc { - options.encoding = 'UTF-8' - source = fileTree(dir: 'src', include: '**/*.java') -} - -// ---------------- Publication ------------------------------------------------ - -/* - * Post the JAR we built to a public repository. We provide secondary -source - * and -javadoc JARs too (supporting 'main'). - * - * How do we test the artifact actually published is correct? The 'test' task - * tests Jython laid out in the build directory, not the JAR we propose to - * distribute. - * - * Maybe have a second JAR that contains the additional material necessary to - * run integration tests (regression tests and others). - */ - -task sourcesJar(type: Jar, dependsOn: classes) { - classifier = 'sources' - from sourceSets.main.allJava -} - -task javadocJar(type: Jar, dependsOn: javadoc) { - classifier = 'javadoc' - from javadoc.destinationDir -} - -publishing { - - publications { - // The production JAR we expect to be cited as a dependency by users - main(MavenPublication) { - - from components.java - - // Also provide the source. - artifact sourcesJar - // Also provide the docs. (Some javadoc errors currently.) - artifact javadocJar - - pom { - // Same description as in ~/maven/pom-template - name = 'Jython' - description = - 'Jython is an implementation of the high-level, dynamic, object-oriented\n' + - 'language Python written in 100% Pure Java, and seamlessly integrated with\n' + - 'the Java platform. It thus allows you to run Python on any Java platform.' - url = 'https://www.jython.org/' - - // We use the PSF 2.0, but only most recently, and actually a bundle. - licenses { - license { - name = 'Jython Software License' - // Not actually the license URL, but linked from here. - url = 'https://www.jython.org/' - distribution = 'repo' - } - } - - // Point to Git repositories hosted on GitHub. - scm { - connection = 'scm:git:https://github.com/jython/jython.git' - developerConnection = 'scm:git:ssh://git@github.com:jython/jython.git' - url = 'https://github.com/jython/jython' - } - - // Could list us all, but why not just the list for now? - developers { - developer { - id = 'jython' - name = 'Jython Developers' - email = 'jython-dev@lists.sourceforge.net' - } - } - } - } + tasks.withType(Javadoc) { + options.encoding = 'UTF-8' + options.showFromPackage() } repositories { - // Staging area where ant -f maven/build.xml will look. - maven { - name = 'stagingRepo' - url = "file://${buildDir}/stagingRepo" - } + mavenLocal() + // Jython is distributed through Maven Central. Get our dependencies there too. + mavenCentral() } } -// Ensure version computation/branding precedes any publication we use. -publish.dependsOn(generateVersionInfo) - -/* FIXME: Depending on publishMainPublicationToMavenLocal does not work, - because it does not exist when evaluating this line. Is this the deferred - configuration removed in Gradle 5.0? Failsd on POM version mismatch if main - publish task not run before publishMainPublicationToMavenLocal. -*/ -//publishMainPublicationToMavenLocal.dependsOn(generateVersionInfo) - - -// ---------------- Java unit tests -------------------------------------------- - -ext { - //distDir = relativePath("$buildDir/assembly") - testSourceDir = relativePath('tests/java') -} - - -dependencies { - // Put the exposed classes on the path of the test tasks - testImplementation files(expose) -} - -// Ensure exposed classes are ahead of standard path -sourceSets.test { - compileClasspath = files(expose.outputs) + compileClasspath - runtimeClasspath = files(expose.outputs) + runtimeClasspath - // println "runtimeClasspath = ${runtimeClasspath.asPath}" -} - -compileTestJava { - dependsOn expose - options.debug = project.compileJava.options.debug -} - -test { - - dependsOn copyLib - - // Stop on first test failure - failFast = true - - // Properties as defined in Ant target javatest-basepath - // XXX Not sure of all that python.home is used for in tests. - systemProperty 'python.home', file(copyLib.destinationDir).parent - systemProperty 'python.test.source.dir', project.ext.testSourceDir - // Place cache outside the targets for jar task - systemProperty 'python.cachedir', "${project.buildDir}/cachedir" - // Logging level: default is message=INFO - //systemProperty 'python.verbose', 'CONFIG' - - include '**/*Test*' - - // Exclude based on Ant target javatest-basepath - exclude '**/InterpTestCase' - exclude '**/jythonTest*' // Must run interactively - exclude 'org/python/antlr/**' - exclude 'org/python/tests/imp/**' // See build.xml:importest - - // Some additional exclusions or else the task fails - - // FIXME: leaves stdin/out/err as PyFileWriter that has no fileno() - // causing _ioTest to fail. - exclude '**/jsr223/*' - - // FIXME: Tests that hard-code directory paths (use a symbol): - exclude 'org/python/compiler/custom_proxymaker/**' - exclude 'org/python/compiler/JavaMakerSmokeTest.class' - - // FIXME: Failing test finds project root from test class location - exclude 'org/python/core/PySystemState_registry_Test.class' - - // FIXME: Fails as sys._jy_console not set when run under Gradle - exclude 'org/python/util/InterpreterTest.class' - - doFirst { - println "systemProperties = $systemProperties" - } - -} - - -// ---------------- Miscellaneous fettling of the prepare phase ---------------- - -// Source is globally UTF-8 (well, nearly). -tasks.withType(JavaCompile) { - options.encoding = "UTF-8" -} - - - -// ---------------- Support for debugging -------------------------------------- - - -afterEvaluate { project -> - //dumpCP() - //dumpSS() -} - -void dumpCP() { - println('\nconfigurations.testCompile:') - configurations.testCompile.each { println it } - println('\nconfigurations.testRuntime:') - configurations.testRuntime.each { println it } - println('\nconfigurations.expose:') - configurations.expose.each { println it } - println('\nconfigurations.pycompile:') - configurations.pycompile.each { println it } -} - -void dumpSS() { - println '*** source sets ***' - for (ss in sourceSets) { - String name = ss.name - println ss - println " ${name}.compileConfigurationName = ${ss.compileConfigurationName}" - println " ${name}.implementationConfigurationName = ${ss.implementationConfigurationName}" - println " ${name}.runtimeConfigurationName = ${ss.runtimeConfigurationName}" - println " ${name}.java.srcDirs = ${ss.java.srcDirs}" - println " ${name}.antlr.srcDirs = ${ss.antlr.srcDirs}" - println " ${name}.resources.srcDirs = ${ss.resources.srcDirs}" - println " ${name}.output.dirs = ${ss.output.dirs.files}" - println " ${name}.output.classesDirs = ${ss.output.classesDirs.files}" - println " ${name}.output.resourcesDir = ${ss.output.resourcesDir}" - println " ${name}.classesTaskName = ${ss.classesTaskName}" - println " ${name}.compileJavaTaskName = ${ss.compileJavaTaskName}" - println " ${name}.jarTaskName = ${ss.jarTaskName}" - } -} +// ---------------- Version-related file generation ---------------------------- +// +// Use the same mechanism as Jython 2 when we are ready. diff --git a/build.xml b/build.xml deleted file mode 100644 index 7a9b4ec10..000000000 --- a/build.xml +++ /dev/null @@ -1,1540 +0,0 @@ - - - - - -Case 1: developer build ------------------------ -Use the command: - ant developer-build -or just: - ant -as it is the default target. This build will create directories -/build and /dist below ${basedir}. -Jython will identify its version with a trailing "-DEV". - -Case 2: build an installer for the development version ------------------------------------------------------- -Use the command: - ant installer -An installer built this way does not include javadoc or source -JARs unless you build them first. It will be versioned as a -snapshot, e.g. ${jython.release}-SNAPSHOT. You can choose another name -for the snapshot, via a property: - ant -Dsnapshot.name=MYTEST installer - -Case 3: full build for a release (use clean, tagged checkout) -------------------------------------------------------------- -You do not have to have access to the Jython Git -repository, but you do need to be at the root of a checked-out -(i.e. newly cloned) source tree. The release aretefacts will be -marked as a snapshot (not an official release) if any of the -following apply: -- there is no .git directory (this is not a repo), -- the source tree contains any extraneous files, -- files have been edited and not committed, -- the current state is not tagged with correct release, -- you supply the snapshot.name property. - -This will create a big jython-installer-${jython.release}.jar, -in the artefacts directory. - -See also https://jython-devguide.rtfd.io/en/latest/release_jy.html - -Note on targets ---------------- -A subset of the available targets are designed for direct invocation. -Following an ant convention, the callable targets have a description -attribute. Use ant -p to display these targets. All other targets -may behave unpredictably if called directly. - -Where ant looks for ant.properties ----------------------------------- - 1. in user.home - 2. in the same directory as this build.xml file -The first setting of a property wins. Further settings are ignored. - -An example ant.properties file: -------------------------------- - -# - zxJDBC -oracle.jar = ../support/ojdbc6.jar -informix.jar = ../support/jdbc-4.10.12.jar - -# - option for javac (build.compiler=modern is a global option to use standard jdk 1.7/1.8) -#build.compiler=modern -#jdk.target.version=1.7 -#debug=false -#deprecation=off - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - . - Build environment for ${ant.project.name} - (Note: if ${propertyname} is displayed, then the property is not set) - --- build Jython version --- - jython.version.short = '${jython.version.short}' - jython.release = '${jython.release}' - jython.version = '${jython.version}' - snapshot.name = '${snapshot.name}' - snapshot.suffix = '${snapshot.suffix}' - jython.dev.jar = '${jython.dev.jar}' - jython.deploy.jar = '${jython.deploy.jar}' - jython.standalone.jar = '${jython.standalone.jar}' - jython.javadoc.jar = '${jython.javadoc.jar}' - jython.sources.jar = '${jython.sources.jar}' - jar.update = '${jar.update}' - --- optional libraries --- - informix = '${informix.jar}' - informix.present = '${informix.present}' - oracle = '${oracle.jar}' - oracle.present = '${oracle.present}' - --- properties --- - basedir = '${basedir}' - source.dir = '${source.dir}' - build.dir = '${build.dir}' - compile.dir = '${compile.dir}' - exposed.dir = '${exposed.dir}' - gensrc.dir = '${gensrc.dir}' - dist.dir = '${dist.dir}' - apidoc.dir = '${apidoc.dir}' - templates.dir = '${templates.dir}' - templates.lazy = '${templates.lazy}' - python.lib = '${python.lib}' - --- compiler options --- - build.compiler = '${build.compiler}' - jdk.target.version = '${jdk.target.version}' - jdk.source.version = '${jdk.source.version}' - deprecation = '${deprecation}' - debug = '${debug}' - nowarn = '${nowarn}' - main.classpath = '${ant.refid:main.classpath}' - --- test config --- - test = '${test}' - test.source.dir = '${test.source.dir}' - reports.dir = '${reports.dir}' - - - - - --- properties only used for a full-build --- - git.present = '${git.present}' - build.git.is_unmodified = '${build.git.is_unmodified}' - build.git.is_clean = '${build.git.is_clean}' - build.git.is_tagged = '${build.git.is_tagged}' - build.git.branch = '${build.git.branch}' - build.git.tag = '${build.git.tag}' - build.git.version = '${build.git.version}' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -${message} - -${build.git.status} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Writing git and build metadata to version.properties. - jython.version = ${jython.version} - build.git.branch = ${build.git.branch} - build.git.version = ${build.git.version} - build.git.tag = ${build.git.tag} - - - - - - - # Jython version information -jython.version=${jython.version} -jython.major_version=${jython.major_version} -jython.minor_version=${jython.minor_version} -jython.micro_version=${jython.micro_version} -jython.release_level=${jython.release_level} -jython.release_serial=${jython.release_serial} -jython.build.date=${build.date} -jython.build.time=${build.time} -jython.build.git_branch=${build.git.branch} -jython.build.git_tag=${build.git.tag} -jython.build.git_version=${build.git.version} - - - - - - - - - - - - - - - - - - - - - -------------------------------------------------------------------------- -This is a snapshot build. It reflects the current development status. -The text for an official release would continue like ... -------------------------------------------------------------------------- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - -
-
-
-
- - - - - - - - - - - - -
- - - - - - - -
-
- -
-
- - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - -
-
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - copy misc files from ${basedir} - - - - - - - - - - - - - - copy sources from ${basedir} - - - - - - - - - - - - - - - - - - - - - - - - copy the demo files from ${basedir}/Demo - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - compiling installer from ${installer.src.dir} - - - copy installer classes to ${dist.dir} - - - - - - - - - copy installer icon to ${dist.dir} - - - - - - - - - - building installer .jar file - - - - - - - - - - - - - - - - - - - - - - - -
- - -
- -
- - - - - - -
-
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - creating ${bugtests.dir}/support_config.py - - -# this configuration was auto-generated by ant build script, -# safe to edit by hand (won't be overwritten) -java_home="${jdk.home}" -jython_home="${dist.dir}" -classpath="${ant.refid:bugtest.classpath}" - - - - - - - - -
diff --git a/core/core.gradle b/core/core.gradle new file mode 100644 index 000000000..fd2120a75 --- /dev/null +++ b/core/core.gradle @@ -0,0 +1,152 @@ +/* + * core.gradle + * + * Sub-project of Jython: interpreter core. We to generate and compile the + * implementation of the core Python objects, exporting a minimal API. + */ + +plugins { + id 'java-library' +} + +dependencies { + //implementation 'org.ow2.asm:asm:9.+' + + // JUnit 5 dependencies + testImplementation 'org.junit.jupiter:junit-jupiter-api:5.10.+' + testImplementation 'org.junit.jupiter:junit-jupiter-params:5.10.+' + testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.10.+' +} + + +test { + useJUnitPlatform { // JUnit5 as standard + includeEngines 'junit-jupiter' + } +} + + +// Sources and destinations when generating Java object implementations + +def genSourcesDir = "$buildDir/generated/sources" +def genOutputDir = "$genSourcesDir/javaTemplate" +def genJavaDir = "$genOutputDir/java/main" +def genTestJavaDir = "$genOutputDir/java/test" + +def genInputDir = getProject().file('src/main/javaTemplate') +def genTestInputDir = getProject().file('src/test/javaTemplate') + + +// Source and destinations of Python source test material + +// We compile these to code objects to test the CPython interpreter. +def testPythonExampleInputDir = getProject().file('src/test/pythonExample') + +def pythonExampleOutput = "$buildDir/generated/sources/pythonExample" +def testPythonExampleOutputDir = "$pythonExampleOutput/test" + + +sourceSets { + + main { + java { + srcDir genJavaDir + } + } + + test { + java { + srcDir genTestJavaDir + } + resources { + srcDir testPythonExampleOutputDir + } + } +} + + + +// A task that runs Python with the tools library on the path + +class PythonExec extends Exec { + + PythonExec() { + executable 'python' + environment('PYTHONPATH', getProject().getRootProject().file( + 'build-tools/python/lib')) + } +} + + +// Generate Python object definitions + +task generateObjectDefinitions(type: PythonExec, + description: 'Generate object definitions from templates' ) { + args getProject().getRootProject().file( + 'build-tools/python/tool/java_object_gen.py') + args '--verbose' + args genInputDir + args genJavaDir +} +compileJava.dependsOn(generateObjectDefinitions) + + +// Report the version of Python available + +task pythonVersion(type: PythonExec) { + args '-V' +} +generateObjectDefinitions.dependsOn(pythonVersion) + + +// Copy and compile Python source examples to the build tree + +task compileTestPythonExamples(type: PythonExec, + description: 'Copy and compile Python test examples') { + // Clues for the incremental build + inputs.files( + fileTree(testPythonExampleInputDir) { + include '**/*.py' + }) + outputs.dir(testPythonExampleOutputDir) + // Exec task is python -m compile_examples + args '-m' + args 'compile_examples' + args testPythonExampleInputDir + args testPythonExampleOutputDir +} +processTestResources.dependsOn(compileTestPythonExamples) + + +// Some adjustments to Javadoc to ensure complex tables readable + +tasks.withType(Javadoc) { + + options.showFromPackage() + + // addStringOption inserts one "-" for us :/ + // CSS adjustments (initially only for table style) + options.addStringOption("-add-stylesheet", + "src/main/javadoc/project-styles.css") + + // Enable "custom" tags used in JDK Javadoc since JSR-335. + // https://nipafx.dev/javadoc-tags-apiNote-implSpec-implNote + options.tags( + "apiNote:a:API Note:", + "implSpec:a:Implementation Requirements:", + "implNote:a:Implementation Note:", + // Unfortunately we must add these standard tags too, + // so they come after the ones we want to enable. + "param", "return", "throws", + "since", "version", "serialData", "see") +} + + +// Particular things we ask of the compiler in this sub-project. +// See also ~/build.gradle for tweaks applying to all projects. + +tasks.withType(JavaCompile) { + // retain parameter names (for use by exposer) + options.compilerArgs.add('-parameters') +} + diff --git a/core/src/main/java/org/python/base/InterpreterError.java b/core/src/main/java/org/python/base/InterpreterError.java new file mode 100644 index 000000000..ca9810b5a --- /dev/null +++ b/core/src/main/java/org/python/base/InterpreterError.java @@ -0,0 +1,51 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.base; + +/** + * Internal error thrown when the Python implementation cannot be + * relied on to work. A Python exception (a {@code PyObject} that + * might be caught in Python code) is not then appropriate. + * Typically thrown during initialisation or for irrecoverable + * internal errors. + */ +public class InterpreterError extends RuntimeException { + private static final long serialVersionUID = 1L; + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public InterpreterError(String msg, Object... args) { super(String.format(msg, args)); } + + /** + * Constructor specifying a cause and a message. + * + * @param cause a Java exception behind the interpreter error + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public InterpreterError(Throwable cause, String msg, Object... args) { + super(String.format(msg, args), cause); + } + + /** + * Constructor specifying a cause. + * + * @param cause a Java exception behind the interpreter error + */ + public InterpreterError(Throwable cause) { + this(cause, notNull(cause.getMessage(), "(no message)")); + } + + /** + * @param msg a string or {@code null} + * @param defaultMsg a string or {@code null} + * @return non-{@code null} {@code msg} or "" + */ + private static String notNull(String msg, String defaultMsg) { + return msg != null ? msg : defaultMsg; + } +} diff --git a/core/src/main/java/org/python/base/MethodKind.java b/core/src/main/java/org/python/base/MethodKind.java new file mode 100644 index 000000000..a1149d0d1 --- /dev/null +++ b/core/src/main/java/org/python/base/MethodKind.java @@ -0,0 +1,42 @@ +package org.python.base; + +/** + * Enum describing whether a method is an instance, static or class + * method (in Python). + */ +public enum MethodKind { + /** + * The method must be defined by a Java static method. An initial + * self or module argument is not expected. (If the writer attempts + * to declare one it will simply be the first parameter.) In a call + * from Python that uses dot notation, which is attribute lookup, + * the target object (or its type) is used to find the method, but + * is not bound to the first parameter. + */ + /* + * In CPython STATIC cannot be used for functions in modules, but we + * find it useful to expose Java static methods that way. + */ + STATIC, + + /** + * The first argument is self or a module. The method must be + * defined either by a Java instance method or by a static method in + * which an initial self or module argument is declared. In a call + * from Python that uses dot notation, which is attribute lookup, + * the target object (or module) is used to find the method, and is + * bound to the first parameter. + */ + INSTANCE, + + /** + * The first argument is the Python type of the target. The method + * must be defined either by a Java static method in which an + * initial type argument is declared. In a call from Python that + * uses dot notation, which is attribute lookup, the target object's + * type is used to find the method, and is bound to the first + * parameter. + */ + // CLASS cannot be used for functions in modules. + CLASS +} diff --git a/core/src/main/java/org/python/base/MissingFeature.java b/core/src/main/java/org/python/base/MissingFeature.java new file mode 100644 index 000000000..70c88e9e3 --- /dev/null +++ b/core/src/main/java/org/python/base/MissingFeature.java @@ -0,0 +1,22 @@ +package org.python.base; + +/** + * Thrown when we reach a combination of circumstances in the + * interpreter that may arise from legitimate use, but we aren't ready + * to implement it. + *

+ * What does the reference implementation do at this point? + */ +public class MissingFeature extends InterpreterError { + private static final long serialVersionUID = 1L; + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public MissingFeature(String msg, Object... args) { + super(String.format(msg, args)); + } +} diff --git a/core/src/main/java/org/python/base/package-info.java b/core/src/main/java/org/python/base/package-info.java new file mode 100644 index 000000000..c6c39ea55 --- /dev/null +++ b/core/src/main/java/org/python/base/package-info.java @@ -0,0 +1,11 @@ +/** + * The {@code base} package contains classes that support the interpreter + * without requiring it to be initialised. (Specifically, they may be used + * before the Python type system is in working order, and without causing it to + * initialise.) + *

+ * Classes {@code public} in this package are intended to be accessible to users + * (meaning extension writers and those embedding the interpreter in an + * application). + */ +package org.python.base; diff --git a/core/src/main/java/org/python/core/Abstract.java b/core/src/main/java/org/python/core/Abstract.java new file mode 100644 index 000000000..71685c05d --- /dev/null +++ b/core/src/main/java/org/python/core/Abstract.java @@ -0,0 +1,961 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.util.function.Supplier; + +import org.python.base.InterpreterError; +import org.python.base.MissingFeature; +import org.python.core.Slot.EmptyException; + +/** + * The "abstract interface" to operations on Python objects. Methods + * here execute the slot functions of the type definition of the + * objects passed in. A primary application is to the CPython byte + * code interpreter. (Methods here often correspond closely to a + * CPython opcode.) + *

+ * In CPython, the methods are found in {@code Objects/abstract.c} + */ +public class Abstract { + + /** + * There are only static methods here, so no instances should be + * created. Formally make the constructor {@code protected} so we + * can sub-class. (Otherwise {@code private} would be the right + * choice.) + */ + protected Abstract() {} + + /** + * The equivalent of the Python expression repr(o), and is called by + * the repr() built-in function. + * + * @param o object + * @return the string representation o + * @throws TypeError if {@code __repr__} returns a non-string + * @throws Throwable from invoked implementation of {@code __repr__} + */ + // Compare CPython PyObject_Repr in object.c + static Object repr(Object o) throws TypeError, Throwable { + if (o == null) { + return ""; + } else { + Operations ops = Operations.of(o); + try { + Object res = ops.op_repr.invoke(o); + if (PyUnicode.TYPE.check(res)) { + return res; + } else { + throw returnTypeError("__repr__", "string", res); + } + } catch (Slot.EmptyException e) { + return String.format("<%s object>", PyType.of(o).getName()); + } + } + } + + /** + * The equivalent of the Python expression str(o). + * + * @param o object + * @return the string representation o + * @throws TypeError if {@code __str__} or {@code __repr__} returns + * a non-string + * @throws Throwable from invoked implementations of {@code __str__} + * or {@code __repr__} + */ + // Compare CPython PyObject_Str in object.c + static Object str(Object o) throws Throwable { + if (o == null) { + return ""; + } else { + Operations ops = Operations.of(o); + if (PyUnicode.TYPE.checkExact(o)) { + return o; + } else if (Slot.op_str.isDefinedFor(ops)) { + Object res = ops.op_str.invoke(o); + if (PyUnicode.TYPE.check(res)) { + return res; + } else { + throw returnTypeError("__str__", "string", res); + } + } else { + return repr(o); + } + } + } + + /** + * Convert a given {@code Object} to an instance of a Java class. + * Raise a {@code TypeError} if the conversion fails. + * + * @param target type defined by {@code c} + * @param o the {@code Object} to convert. + * @param c the class to convert it to. + * @return converted value + */ + @SuppressWarnings("unchecked") + public static T tojava(Object o, Class c) { + try { + // XXX Stop-gap implementation (just cast it) + if (c.isAssignableFrom(o.getClass())) { + return (T)o; + } else { + throw new Slot.EmptyException(); + } + // XXX Replace when this slot is defined: + // return (T)Operations.of(o).op_tojava.invokeExact(o, c); + } catch (NullPointerException npe) { + // Probably an error, but easily converted. + return null; + } catch (Slot.EmptyException e) { + throw typeError("cannot convert %s to %s", o, c.getName()); + } + } + + /** + * Compute and return the hash value of an object. This is the + * equivalent of the Python expression {@code hash(v)}. + * + * @param v to hash + * @return the hash + * @throws TypeError if {@code v} is an unhashable type + * @throws Throwable on errors within {@code __hash__} + */ + static int hash(Object v) throws TypeError, Throwable { + try { + return (int)Operations.of(v).op_hash.invokeExact(v); + } catch (Slot.EmptyException e) { + throw typeError("unhashable type: %s", v); + } + } + + /** + * Test a value used as condition in a {@code for} or {@code if} + * statement. + * + * @param v to test + * @return if Python-truthy + * @throws Throwable from invoked implementations of + * {@code __bool__} or {@code __len__} + */ + // Compare CPython PyObject_IsTrue in object.c + static boolean isTrue(Object v) throws Throwable { + // Begin with common special cases + if (v == Py.True) + return true; + else if (v == Py.False || v == Py.None) + return false; + else { + // Ask the object type through the op_bool or op_len slots + Operations ops = Operations.of(v); + if (Slot.op_bool.isDefinedFor(ops)) + return (boolean)ops.op_bool.invokeExact(v); + else if (Slot.op_len.isDefinedFor(ops)) + return 0 != (int)ops.op_len.invokeExact(v); + else + // No op_bool and no length: claim everything is True. + return true; + } + } + + /** + * Perform a rich comparison, raising {@code TypeError} when the + * requested comparison operator is not supported. + * + * @param v left operand + * @param w right operand + * @param op comparison type + * @return comparison result + * @throws Throwable from invoked implementations + */ + // Compare CPython PyObject_RichCompare, do_richcompare in object.c + static Object richCompare(Object v, Object w, Comparison op) throws Throwable { + return op.apply(v, w); + } + + /** + * Perform a rich comparison with boolean result. This wraps + * {@link #richCompare(Object, Object, Comparison)}, converting the + * result to Java {@code false} or {@code true}, or throwing + * (probably {@link TypeError}), when the objects cannot be + * compared. + * + * @param v left operand + * @param w right operand + * @param op comparison type + * @return comparison result + * @throws Throwable from invoked method implementations + */ + // Compare CPython PyObject_RichCompareBool in object.c + static boolean richCompareBool(Object v, Object w, Comparison op) throws Throwable { + /* + * Quick result when objects are the same. Guarantees that identity + * implies equality. + */ + if (v == w) { + if (op == Comparison.EQ) + return true; + else if (op == Comparison.NE) + return false; + } + return isTrue(op.apply(v, w)); + } + + /** + * Perform a rich comparison with boolean result. This wraps + * {@link #richCompare(Object, Object, Comparison)}, converting the + * result to Java {@code false} or {@code true}. + *

+ * When the when the objects cannot be compared, the client gets to + * choose the exception through the provider {@code exc}. When this + * is {@code null}, the return will simply be {@code false} for + * incomparable objects. + * + * @param type of exception + * @param v left operand + * @param w right operand + * @param op comparison type + * @param exc supplies an exception of the desired type + * @return comparison result + * @throws T on any kind of error + */ + static boolean richCompareBool(Object v, Object w, Comparison op, + Supplier exc) throws T { + try { + return richCompareBool(v, w, op); + } catch (Throwable e) { + if (exc == null) + return false; + else + throw exc.get(); + } + } + + /** + * {@code o.name} with Python semantics. + * + * @param o object to operate on + * @param name of attribute + * @return {@code o.name} + * @throws AttributeError if non-existent etc. + * @throws Throwable on other errors + */ + // Compare CPython _PyObject_GetAttr in object.c + // Also PyObject_GetAttrString in object.c + static Object getAttr(Object o, String name) throws AttributeError, Throwable { + // Decisions are based on type of o (that of name is known) + Operations ops = Operations.of(o); + try { + // Invoke __getattribute__. + return ops.op_getattribute.invokeExact(o, name); + } catch (EmptyException | AttributeError e) { + try { + // Not found or not defined: fall back on __getattr__. + return ops.op_getattr.invokeExact(o, name); + } catch (EmptyException ignored) { + // __getattr__ not defined, original exception stands. + if (e instanceof AttributeError) { throw e; } + throw noAttributeError(o, name); + } + } + } + + /** + * {@code o.name} with Python semantics. + * + * @param o object to operate on + * @param name of attribute + * @return {@code o.name} + * @throws AttributeError if non-existent etc. + * @throws TypeError if the name is not a {@code str} + * @throws Throwable on other errors + */ + // Compare CPython PyObject_GetAttr in object.c + static Object getAttr(Object o, Object name) throws AttributeError, TypeError, Throwable { + // Decisions are based on types of o and name + if (name instanceof String) { + return getAttr(o, name); + } else if (name instanceof PyUnicode) { + return getAttr(o, name.toString()); + } else { + throw attributeNameTypeError(name); + } + } + + /** + * Python {@code o.name}: returning {@code null} when not found (in + * place of {@code AttributeError} as would + * {@link #getAttr(Object, Object)}). Other exceptions that may be + * raised in the process, propagate. + * + * @param o the object in which to look for the attribute + * @param name of the attribute sought + * @return the attribute or {@code null} + * @throws TypeError if {@code name} is not a Python {@code str} + * @throws Throwable on other errors + */ + // Compare CPython _PyObject_LookupAttr in object.c + static Object lookupAttr(Object o, Object name) throws TypeError, Throwable { + // Corresponds to object.c : PyObject_GetAttr + // Decisions are based on types of o and name + if (name instanceof String) { + return lookupAttr(o, name); + } else if (name instanceof PyUnicode) { + return lookupAttr(o, name.toString()); + } else { + throw attributeNameTypeError(name); + } + } + + /** + * Python {@code o.name} returning {@code null} when not found (in + * place of {@code AttributeError} as would + * {@link #getAttr(Object, String)}). Other exceptions that may be + * raised in the process, propagate. + * + * @param o the object in which to look for the attribute + * @param name of the attribute sought + * @return the attribute or {@code null} + * @throws Throwable on other errors than {@code AttributeError} + */ + // Compare CPython _PyObject_LookupAttr in object.c + static Object lookupAttr(Object o, String name) throws TypeError, Throwable { + // Decisions are based on type of o (that of name is known) + try { + // Invoke __getattribute__ + MethodHandle getattro = Operations.of(o).op_getattribute; + return getattro.invokeExact(o, name); + } catch (EmptyException | AttributeError e) { + return null; + } + } + + /** + * {@code o.name = value} with Python semantics. + * + * @param o object to operate on + * @param name of attribute + * @param value to set + * @throws AttributeError if non-existent etc. + * @throws Throwable on other errors + */ + // Compare CPython PyObject_SetAttr in object.c + public static void setAttr(Object o, String name, Object value) + throws AttributeError, Throwable { + // Decisions are based on type of o (that of name is known) + try { + Operations.of(o).op_setattr.invokeExact(o, name, value); + } catch (EmptyException e) { + throw attributeAccessError(o, name, Slot.op_setattr); + } + } + + /** + * {@code o.name = value} with Python semantics. + * + * @param o object to operate on + * @param name of attribute + * @param value to set + * @throws AttributeError if non-existent etc. + * @throws TypeError if the name is not a {@code str} + * @throws Throwable on other errors + */ + // Compare CPython PyObject_SetAttr in object.c + public static void setAttr(Object o, Object name, Object value) + throws AttributeError, TypeError, Throwable { + if (name instanceof String) { + setAttr(o, name, value); + } else if (name instanceof PyUnicode) { + setAttr(o, name.toString(), value); + } else { + throw attributeNameTypeError(name); + } + } + + /** + * {@code del o.name} with Python semantics. + * + * @param o object to operate on + * @param name of attribute + * @throws AttributeError if non-existent etc. + * @throws Throwable on other errors + * + */ + // Compare CPython PyObject_DelAttr in abstract.h + // which is a macro for PyObject_SetAttr in object.c + public static void delAttr(Object o, String name) throws AttributeError, Throwable { + // Decisions are based on type of o (that of name is known) + try { + Operations.of(o).op_delattr.invokeExact(o, name); + } catch (EmptyException e) { + throw attributeAccessError(o, name, Slot.op_delattr); + } + } + + /** + * {@code del o.name} with Python semantics. + * + * @param o object to operate on + * @param name of attribute + * @throws AttributeError if non-existent etc. + * @throws TypeError if the name is not a {@code str} + * @throws Throwable on other errors + */ + // Compare CPython PyObject_SetAttr in object.c + public static void delAttr(Object o, Object name) throws AttributeError, TypeError, Throwable { + if (name instanceof String) { + delAttr(o, name); + } else if (name instanceof PyUnicode) { + delAttr(o, name.toString()); + } else { + throw attributeNameTypeError(name); + } + } + + /** + * Get {@code cls.__bases__}, a Python {@code tuple}, by name from + * the object invoking {@code __getattribute__}. If {@code cls} does + * not define {@code __bases__}, or it is not a {@code tuple}, + * return {@code null}. In that case, it is customary for the caller + * to throw a {@link TypeError}. + * + * @param cls normally a type object + * @return {@code cls.__bases__} or {@code null} + * @throws Throwable propagated from {@code __getattribute__} + */ + // Compare CPython abstract_get_bases in abstract.c + private static PyTuple getBasesOf(Object cls) throws Throwable { + // Should return a tuple: convert anything else to null. + Object bases = lookupAttr(cls, "__bases__"); + // Treat non-tuple as not having the attribute. + return bases instanceof PyTuple ? (PyTuple)bases : null; + } + + /** + * Return {@code true} iff the class {@code derived} is identical to + * or derived from the class {@code cls}. The answer is sought along + * the MRO of {@code derived} if {@code derived} and {@code cls} are + * both Python {@code type} objects, or sub-classes of {@code type}, + * or by traversal of {@code cls.__bases__} otherwise. + * + * @param derived candidate derived type. + * @param cls type that may be an ancestor of {@code derived}, (but + * not a tuple of such). + * @return ẁhether {@code derived} is a sub-class of {@code cls} by + * these criteria. + * @throws TypeError if either input has no {@code __bases__} tuple. + * @throws Throwable propagated from {@code __subclasscheck__} or + * other causes + */ + // Compare CPython recursive_issubclass in abstract.c + // and _PyObject_RealIsSubclass in abstract.c + static boolean recursiveIsSubclass(Object derived, Object cls) throws TypeError, Throwable { + if (cls instanceof PyType && derived instanceof PyType) + // Both are PyType so this is relatively easy. + return ((PyType)derived).isSubTypeOf((PyType)cls); + else if (getBasesOf(derived) == null) + // derived is neither PyType nor has __bases__ + throw new TypeError("issubclass", 1, "a class", derived); + else if (getBasesOf(cls) == null) + // cls is neither PyType nor has __bases__ + throw argumentTypeError("issubclass", 2, "a class or tuple of classes", cls); + else + // Answer by traversing cls.__bases__ for derived + return isSubclassHelper(derived, cls); + } + + /** + * This is equivalent to the Python expression {@code iter(o)}. It + * returns a new Python iterator for the object argument, or the + * object itself if it is already an iterator. + *

+ * {@code o} must either define {@code __iter__}, which will be + * called to obtain an iterator, or define {@code __getitem__}, on + * which an iterator will be created. It is guaranteed that the + * object returned defines {@code __next__}. + * + * @param o the claimed iterable object + * @return an iterator on {@code o} + * @throws TypeError if the object cannot be iterated + * @throws Throwable from errors in {@code o.__iter__} + */ + // Compare CPython PyObject_GetIter in abstract.c + static Object getIterator(Object o) throws TypeError, Throwable { return getIterator(o, null); } + + /** + * Equivalent to {@link #getIterator(Object)}, with the opportunity + * to specify the kind of Python exception to raise. + * + * @param the type of exception to throw + * @param o the claimed iterable object + * @param exc a supplier (e.g. lambda expression) for the exception + * @return an iterator on {@code o} + * @throws E to throw if an iterator cannot be formed + * @throws Throwable from errors in {@code o.__iter__} + */ + // Compare CPython PyObject_GetIter in abstract.c + static Object getIterator(Object o, Supplier exc) + throws TypeError, Throwable { + Operations ops = Operations.of(o); + if (Slot.op_iter.isDefinedFor(ops)) { + // o defines __iter__, call it. + Object r = ops.op_iter.invokeExact(o); + // Did that return an iterator? Check r defines __next__. + if (Slot.op_next.isDefinedFor(Operations.of(r))) { + return r; + } else if (exc == null) { throw returnTypeError("iter", "iterator", r); } + } else if (Slot.op_getitem.isDefinedFor(ops)) { + // o defines __getitem__: make a (Python) iterator. + throw new MissingFeature("PyIterator"); + } + + // Out of possibilities: throw caller-defined exception + if (exc != null) { + throw exc.get(); + } else { + throw typeError(NOT_ITERABLE, o); + } + } + + /** + * Return {@code true} if the object {@code o} supports the iterator + * protocol (has {@code __iter__}). + * + * @param o to test + * @return true if {@code o} supports the iterator protocol + */ + static boolean iterableCheck(Object o) { + return Slot.op_iter.isDefinedFor(Operations.of(o)); + } + + /** + * Return true if the object {@code o} is an iterator (has + * {@code __next__}). + * + * @param o to test + * @return true if {@code o} is an iterator + */ + // Compare CPython PyIter_Check in abstract.c + static boolean iteratorCheck(Object o) { return Slot.op_next.isDefinedFor(Operations.of(o)); } + + /** + * Return the next value from the Python iterator {@code iter}. If + * there are no remaining values, returns {@code null}. If an error + * occurs while retrieving the item, the exception propagates. + * + * @param iter the iterator + * @return the next item + * @throws Throwable from {@code iter.__next__} + */ + // Compare CPython PyIter_Next in abstract.c + static Object next(Object iter) throws Throwable { + Operations o = Operations.of(iter); + try { + return o.op_next.invokeExact(iter); + } catch (StopIteration e) { + return null; + } catch (EmptyException e) { + throw typeError(NOT_ITERABLE, iter); + } + } + + // Plumbing ------------------------------------------------------- + + /** + * Crafted error supporting {@link #getAttr(Object, PyUnicode)}, + * {@link #setAttr(Object, PyUnicode, Object)}, and + * {@link #delAttr(Object, PyUnicode)}. + * + * @param o object accessed + * @param name of attribute + * @param slot operation + * @return an error to throw + */ + private static TypeError attributeAccessError(Object o, String name, Slot slot) { + String mode, kind, fmt = "'%.100s' object has %s attributes (%s.%.50s)"; + // What were we trying to do? + switch (slot) { + case op_delattr: + mode = "delete "; + break; + case op_setattr: + mode = "assign to "; + break; + default: + mode = ""; + break; + } + // Can we even read this object's attributes? + Operations ops = Operations.of(o); + kind = Slot.op_getattribute.isDefinedFor(ops) ? "only read-only" : "no"; + // Now we know what to say + return new TypeError(fmt, ops, kind, mode, name); + } + + // Convenience functions constructing errors -------------------- + + private static final String IS_REQUIRED_NOT = "%.200s is required, not '%.100s'"; + private static final String RETURNED_NON_TYPE = "%.200s returned non-%.200s (type %.200s)"; + private static final String ARGUMENT_MUST_BE = "%s()%s%s argument must be %s, not '%.200s'"; + protected static final String NOT_MAPPING = "%.200s is not a mapping"; + protected static final String NOT_ITERABLE = "%.200s object is not iterable"; + + /** + * Return {@code true} iff {@code derived} is a Python sub-class of + * {@code cls} (including where it is the same class). The answer is + * found by traversing the {@code __bases__} tuples recursively, + * therefore does not depend on the MRO or respect + * {@code __subclasscheck__}. + * + * @param derived candidate derived type + * @param cls type that may be an ancestor of {@code derived} + * @return whether {@code derived} is a sub-class of {@code cls} + * @throws Throwable from looking up {@code __bases__} + */ + // Compare CPython abstract_issubclass in abstract.c + private static boolean isSubclassHelper(Object derived, Object cls) throws Throwable { + while (derived != cls) { + // Consider the bases of derived + PyTuple bases = getBasesOf(derived); + int n; + // derived is a subclass of cls if any of its bases is + if (bases == null || (n = bases.size()) == 0) { + // The __bases__ tuple is missing or empty ... + return false; + } else if (n == 1) { + // The answer is the answer for that single base. + derived = bases.get(0); + } else { + // several bases so work through them in sequence + for (int i = 0; i < n; i++) { + if (isSubclassHelper(bases.get(i), cls)) + return true; + } + // And not otherwise + return false; + } + } + return true; + } + + /** + * Create a {@link TypeError} with a message involving the type of + * {@code args[0]} and optionally other arguments. + * + * @param fmt format for message with a {@code %s} first + * @param args arguments to the formatted message, where Python type + * name of {@code args[0]} will replace it + * @return exception to throw + */ + public static TypeError typeError(String fmt, Object... args) { + args[0] = PyType.of(args[0]).getName(); + return new TypeError(fmt, args); + } + + /** + * Create a {@link TypeError} with a message along the lines "T + * indices must be integers or slices, not X" involving the a target + * type T and a purported index type X presented, e.g. "list indices + * must be integers or slices, not str". + * + * @param t type of target of function or operation + * @param x type of object presented as an index + * @return exception to throw + */ + static TypeError indexTypeError(PyType t, PyType x) { + String fmt = "%.200s indices must be integers or slices, not %.200s"; + return new TypeError(fmt, t.getName(), x.getName()); + } + + /** + * Create a {@link TypeError} with a message along the lines "T + * indices must be integers or slices, not X" involving the type + * name T of a target {@code o} and the type name X of {@code i} + * presented as an index, e.g. "list indices must be integers or + * slices, not str". + * + * @param o target of function or operation + * @param i actual object presented as an index + * @return exception to throw + */ + static TypeError indexTypeError(Object o, Object i) { + return indexTypeError(PyType.of(o), PyType.of(i)); + } + + /** + * Create a {@link TypeError} with a message along the lines "T is + * required, not X" involving any descriptive phrase T and the type + * X of {@code o}, e.g. "a bytes-like object is required, not + * 'str'". + * + * @param t expected kind of thing + * @param o actual object involved + * @return exception to throw + */ + static TypeError requiredTypeError(String t, Object o) { + return new TypeError(IS_REQUIRED_NOT, t, PyType.of(o).getName()); + } + + /** + * Create a {@link TypeError} with a message along the lines + * "attribute name must be string, not 'X'" giving the type X of + * {@code name}. + * + * @param name actual object offered as a name + * @return exception to throw + */ + static TypeError attributeNameTypeError(Object name) { + String fmt = "attribute name must be string, not '%.200s'"; + return new TypeError(fmt, PyType.of(name).getName()); + } + + /** + * Create a {@link TypeError} with a message along the lines "can't + * set attributes of X" giving str of {@code name}. + * + * @param obj actual object on which setting failed + * @return exception to throw + */ + static TypeError cantSetAttributeError(Object obj) { + return new TypeError("can't set attributes of %.200s", obj); + } + + /** + * Create a {@link TypeError} with a message along the lines "F() + * [name] argument must be T, not X", involving a function name, an + * argument name, an expected type description T and the type X of + * {@code o}, e.g. "split() separator argument must be str or None, + * 'tuple'". + * + * @param f name of function or operation + * @param name of argument + * @param t describing the expected kind of argument + * @param o actual argument (not its type) + * @return exception to throw + */ + public static TypeError argumentTypeError(String f, String name, String t, Object o) { + String space = name.length() == 0 ? "" : " "; + return new TypeError(ARGUMENT_MUST_BE, f, space, name, t, PyType.of(o).getName()); + } + + /** + * Create a {@link TypeError} with a message along the lines "F() + * [nth] argument must be T, not X", involving a function name, + * optionally an ordinal n, an expected type description T and the + * type X of {@code o}, e.g. "int() argument must be a string, a + * bytes-like object or a number, not 'list'" or "complex() second + * argument must be a number, not 'type'". + * + * @param f name of function or operation + * @param n ordinal of argument: 1 for "first", etc., 0 for "" + * @param t describing the expected kind of argument + * @param o actual argument (not its type) + * @return exception to throw + */ + public static TypeError argumentTypeError(String f, int n, String t, Object o) { + return argumentTypeError(f, ordinal(n), t, o); + } + + // Helper for argumentTypeError + private static String ordinal(int n) { + switch (n) { + case 0: + return ""; + case 1: + return " first"; + case 2: + return " second"; + case 3: + return " third"; + default: + return String.format(" %dth", n); + } + } + + /** + * Create a {@link TypeError} with a message along the lines "F + * returned non-T (type X)" involving a function name, an expected + * type T and the type X of {@code o}, e.g. "__int__ returned + * non-int (type str)". + * + * @param f name of function or operation + * @param t expected type of return + * @param o actual object returned + * @return exception to throw + */ + static TypeError returnTypeError(String f, String t, Object o) { + return new TypeError(RETURNED_NON_TYPE, f, t, PyType.of(o).getName()); + } + + /** + * Create a {@link AttributeError} with a message along the lines + * "'T' object has no attribute N", where T is the type of the + * object accessed. + * + * @param v object accessed + * @param name of attribute + * @return exception to throw + */ + static AttributeError noAttributeError(Object v, Object name) { + return noAttributeOnType(PyType.of(v), name); + } + + /** + * Create a {@link AttributeError} with a message along the lines + * "'T' object has no attribute N", where T is the type given. + * + * @param type of object accessed + * @param name of attribute + * @return exception to throw + */ + static AttributeError noAttributeOnType(PyType type, Object name) { + String fmt = "'%.50s' object has no attribute '%.50s'"; + return new AttributeError(fmt, type.getName(), name); + } + + /** + * Create a {@link TypeError} with a message along the lines "N must + * be set to T, not a X object" involving the name N of the + * attribute, any descriptive phrase T and the type X of + * {@code value}, e.g. "__dict__ must be set to a + * dictionary, not a 'list' object". + * + * @param name of the attribute + * @param kind expected kind of thing + * @param value provided to set this attribute in some object + * @return exception to throw + */ + static TypeError attrMustBe(String name, String kind, Object value) { + String msg = "%.50s must be set to %.50s, not a '%.50s' object"; + return new TypeError(msg, name, kind, PyType.of(value).getName()); + } + + /** + * Create a {@link TypeError} with a message along the lines "N must + * be set to a string, not a X object". + * + * @param name of the attribute + * @param value provided to set this attribute in some object + * @return exception to throw + */ + static TypeError attrMustBeString(String name, Object value) { + return attrMustBe(name, "a string", value); + } + + /** + * Create a {@link AttributeError} with a message along the lines + * "'T' object attribute N is read-only", where T is the type of the + * object accessed. + * + * @param v object accessed + * @param name of attribute + * @return exception to throw + */ + static AttributeError readonlyAttributeError(Object v, Object name) { + return readonlyAttributeOnType(PyType.of(v), name); + } + + /** + * Create a {@link AttributeError} with a message along the lines + * "'T' object attribute N is read-only", where T is the type given. + * + * @param type of object accessed + * @param name of attribute + * @return exception to throw + */ + static AttributeError readonlyAttributeOnType(PyType type, Object name) { + String fmt = "'%.50s' object attribute '%s' is read-only"; + return new AttributeError(fmt, type.getName(), name); + } + + /** + * Create a {@link AttributeError} with a message along the lines + * "'T' object attribute N cannot be deleted", where T is the type + * of the object accessed. + * + * @param v object accessed + * @param name of attribute + * @return exception to throw + */ + static AttributeError mandatoryAttributeError(Object v, Object name) { + return mandatoryAttributeOnType(PyType.of(v), name); + } + + /** + * Create a {@link AttributeError} with a message along the lines + * "'T' object attribute N cannot be deleted", where T is the type + * given. + * + * @param type of object accessed + * @param name of attribute + * @return exception to throw + */ + static AttributeError mandatoryAttributeOnType(PyType type, Object name) { + String fmt = "'%.50s' object attribute '%s' cannot be deleted"; + return new AttributeError(fmt, type.getName(), name); + } + + /** + * Create an {@link IndexError} with a message along the lines "N + * index out of range", where N is usually a function or type name. + * + * @param name object accessed + * @return exception to throw + */ + static IndexError indexOutOfRange(String name) { + String fmt = "%.50s index out of range"; + return new IndexError(fmt, name); + } + + /** + * Submit a {@code DeprecationWarning} call (which may result in an + * exception) with the same message as + * {@link #returnTypeError(String, String, Object)}, the whole + * followed by one about deprecation of the facility. + * + * @param f name of function or operation + * @param t expected type of return + * @param o actual object returned + * @return {@code o} + */ + static Object returnDeprecation(String f, String t, Object o) { + // Warnings.format(DeprecationWarning.TYPE, 1, + // RETURNED_NON_TYPE_DEPRECATION, f, t, + // PyType.of(o).getName(), t); + return o; + } + + private static final String RETURNED_NON_TYPE_DEPRECATION = + RETURNED_NON_TYPE + ". " + "The ability to return an instance of a strict " + + "subclass of %s is deprecated, and may be " + + "removed in a future version of Python."; + + /** Throw generic something went wrong internally (last resort). */ + static void badInternalCall() { + throw new InterpreterError("bad internal call"); + } + + /** + * Create an {@link InterpreterError} for use where a Python method + * (or special method) implementation receives an argument that + * should be impossible in a correct interpreter. This is a sort of + * {@link TypeError} against the {@code self} argument, but + * occurring where no programming error should be able to induce it + * (e.g. coercion fails after we have passed the check that + * descriptors make on their {@code obj}, or when invoking a special + * method found via an {@link Operations} object. + * + * @param d expected kind of argument + * @param o actual argument (not its type) + * @return exception to throw + */ + static InterpreterError impossibleArgumentError(String d, Object o) { + return new InterpreterError(IMPOSSIBLE_CLASS, d, o.getClass().getName()); + } + + private static final String IMPOSSIBLE_CLASS = + "expected %.50s argument but found impossible Java class %s"; +} diff --git a/core/src/main/java/org/python/core/AbstractPyObject.java b/core/src/main/java/org/python/core/AbstractPyObject.java new file mode 100644 index 000000000..82930e046 --- /dev/null +++ b/core/src/main/java/org/python/core/AbstractPyObject.java @@ -0,0 +1,37 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +/** + * Class that may be used as a base for Python objects (but doesn't have to be) + * to supply some universally needed methods and the type. + */ +abstract class AbstractPyObject implements CraftedPyObject { + + private PyType type; + + /** + * Constructor specifying the Python type, as returned by {@link #getType()}. As + * this is a base for the implementation of all sorts of Python types, it needs + * to be told which one it is. + * + * @param type actual Python type being created + */ + protected AbstractPyObject(PyType type) { this.type = type; } + + @Override + public PyType getType() { return type; } + + @Override + public String toString() { return Py.defaultToString(this); } + + // slot functions ------------------------------------------------- + /* + * It should be possible to declare special (instance) methods in this class to + * save work in implementation classes of Python types. The processing of + * special methods would treat them as defined afresh by each exposed + * implementation (each class that calls PyType.fromSpec()). This may be + * undesirable where sub-classes that are object implementations should instead + * Python-inherit their definition. + */ +} diff --git a/core/src/main/java/org/python/core/ArgParser.java b/core/src/main/java/org/python/core/ArgParser.java new file mode 100644 index 000000000..563be1680 --- /dev/null +++ b/core/src/main/java/org/python/core/ArgParser.java @@ -0,0 +1,1388 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.StringJoiner; + +import org.python.base.InterpreterError; +import org.python.base.MethodKind; + +/** + * This class provides a parser for the positional and keyword + * arguments supplied during a call to a built-in function or + * method. The purpose of an argument parser is to provide the body + * of a function, or perhaps a {@code MethodHandle} with an array of + * values, corresponding in order and number to its parameters + * (formal arguments). + *

+ * This parser transforms several argument presentations that occur + * in a Python implementation, and arranges them into an array. This + * array is either created by the parser, or designated by the + * caller. The parser may therefore be used to prepare arguments for + * a pure a Java method (or {@code MethodHandle}) that accepts an + * array, or to insert arguments as initial values of local + * variables in an an optimised interpreter frame ({@link PyFrame}). + *

+ * The fields of the parser that determine the acceptable numbers of + * positional arguments and their names are essentially those of a + * {@code code} object ({@link PyCode}). Defaults are provided + * values that mirror the defaults built into a {@code function} + * object ({@link PyFunction}). + *

+ * Consider for example a function that in Python would have the + * function definition:

+ * def func(a, b, c=3, d=4, /, e=5, f=6, *aa, g=7, h, i=9, **kk):
+ *     pass
+ * 
This could be described by a constructor call and + * modifiers:
+ * String[] names = {"a", "b", "c", "d",  "e", "f",  "g", "h", "i",
+ *         "aa", "kk"};
+ * ArgParser ap = new ArgParser("func", names,
+ *         names.length - 2, 4, 3, true, true) //
+ *                 .defaults(3, 4, 5, 6) //
+ *                 .kwdefaults(7, null, 9);
+ * 
Note that "aa" and "kk" are at the end of the parameter + * names. (This is how a CPython frame is laid out.) + *

+ * Defaults are provided, after the parser has been constructed, as + * values corresponding to parameter names, when right-justified in + * the space to which they apply. (See diagram below.) Both the + * positional and keyword defaults are given by position in this + * formulation. The {@link #kwdefaults(Object...)} call is allowed + * to supply {@code null} values at positions it does not define. + *

+ * When parsed to an array, the layout of the argument values, in + * relation to fields of the parser will be as follows. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
A Python {@code frame}
namesabcdefghiaakk
layoutposOnlykwOnly
defaultskwdefaults
+ *

+ * The most readable way of specifying a parser (although one that + * is a little costly to construct) is to list the parameters as + * they would be declared in Python, including the furniture that + * marks up the positional-only, keyword-only, positional varargs, + * and keyword varargs. This is the API offered by + * {@link #fromSignature(String, String...)}. In practice we only + * use this in unit tests. For serious applications we construct the + * {@code ArgParser} with a complex of arguments derived by + * inspection of the Java or Python signature. + */ +class ArgParser { + + // Compare code object (and CPython _Arg_Parser in modsupport.h) + + /** Empty names array. */ + private static final String[] NO_STRINGS = new String[0]; + + /** Empty object array. */ + private static final Object[] NO_OBJECTS = new Object[0]; + + /** The name of the function, mainly for error messages. */ + final String name; + + /** + * The kind of object (type or module) in which the method is found. + * This makes a difference to the signature reported for an instance + * method. + */ + final ScopeKind scopeKind; + + /** + * The kind of method (instance, static or class) that this parser + * works for. + */ + final MethodKind methodKind; + + /** + * Names of parameters that could be satisfied by position or + * keyword, including the collector parameters. Elements are + * guaranteed to be interned, and not {@code null} or empty. The + * array must name all the parameters, of which there are: + * {@code argcount + kwonlyargcount + + (hasVarArgs() ? 1 : 0) + (hasVarKeywords() ? 1 : 0)} + *

+ * It is often is longer since it suits us to re-use an array that + * names all the local variables of a frame. + */ + /* + * Here and elsewhere we use the same field names as the CPython + * code, even though it tends to say "argument" when it could mean + * that or "parameter". In comments and documentation + * "positional parameter" means a parameter eligible to be satisfied + * by an argument given by position. + */ + final String[] argnames; + + /** + * The number of positional or keyword parameters, excluding the + * "collector" ({@code *args} and {@code **kwargs}) parameters, and + * any data that may follow the legitimate parameter names. Equal to + * {@code argcount + kwonlyargcount}. + */ + final int regargcount; + + /** The number of positional parameters. */ + final int argcount; + + /** + * The number of parameters that can only be satisfied by arguments + * given by position. This differs from {@link #argcount} by + * excluding parameters that may be given by keyword or position. + */ + final int posonlyargcount; + + /** The number of keyword-only parameters. */ + final int kwonlyargcount; + + /** + * The documentation string of the method. + */ + String doc; + + /** + * The (positional) default parameters or {@code null} if there are + * none. + */ + // Compare CPython PyFunctionObject::func_defaults + private Object[] defaults; + + /** + * The keyword defaults, may be a {@code dict} or {@code null} if + * there are none. + */ + // Compare CPython PyFunctionObject::func_kwdefaults + private Map kwdefaults; + + /** + * The frame has a collector ({@code tuple}) for excess positional + * arguments at this index, if it is {@code >=0}. + */ + final int varArgsIndex; + + /** + * The frame has a collector ({@code dict}) for excess keyword + * arguments at this index, if it is {@code >=0}. + */ + final int varKeywordsIndex; + + /** + * Construct a parser for a named function, with defined numbers of + * positional-only and keyword-only parameters, and parameter names + * in an array prepared by client code. + *

+ * The array of names is used in-place (not copied). The client code + * must therefore ensure that it cannot be modified after the parser + * has been constructed. + *

+ * The array of names may be longer than is necessary: the caller + * specifies how much of the array should be treated as regular + * parameter names, and whether zero, one or two further elements + * will name collectors for excess positional or keyword arguments. + * The rest of the elements will not be examined by the parser. The + * motivation for this design is to permit efficient construction + * when the the array of names is the local variable names in a + * Python {@code code} object. + * + * @param name of the function + * @param names of the parameters including any collectors (varargs) + * @param regargcount number of regular (non-collector) parameters + * @param posOnly number of positional-only parameters + * @param kwOnly number of keyword-only parameters + * @param varargs whether there is positional collector + * @param varkw whether there is a keywords collector + */ + ArgParser(String name, String[] names, int regargcount, int posOnly, int kwOnly, + boolean varargs, boolean varkw) { + this(name, ScopeKind.TYPE, MethodKind.STATIC, names, regargcount, posOnly, kwOnly, varargs, + varkw); + } + + /** + * Construct a parser from descriptive parameters that may be + * derived from a the annotated declarations ({@link Exposed} + * methods) that appear in type and module definitions written in + * Java. For;

+     * def func(a, b, c=3, d=4, /, e=5, f=6, *aa, g=7, h, i=9, **kk):
+     *     pass
+     * 
The constructor arguments should specify this layout: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
A Python {@code frame}
namesabcdefghiaakk
layoutposOnlykwOnlyvarargsvarkw
defaultskwdefaults
+ * + * @param name of the function + * @param scopeKind whether module, etc. + * @param methodKind whether static, etc. + * @param names of the parameters including any collectors (varargs) + * @param regargcount number of regular (non-collector) parameters + * @param posOnly number of positional-only parameters + * @param kwOnly number of keyword-only parameters + * @param varargs whether there is positional collector + * @param varkw whether there is a keywords collector + */ + ArgParser(String name, ScopeKind scopeKind, MethodKind methodKind, String[] names, + int regargcount, int posOnly, int kwOnly, boolean varargs, boolean varkw) { + + // Name of function + this.name = name; + this.methodKind = methodKind; + this.scopeKind = scopeKind; + this.argnames = names; + + // Total parameter count *except* possible varargs, varkwargs + int N = Math.min(regargcount, names.length); + this.regargcount = N; + this.posonlyargcount = posOnly; + this.kwonlyargcount = kwOnly; + this.argcount = N - kwOnly; + + // There may be positional and/or keyword collectors + this.varArgsIndex = varargs ? N++ : -1; + this.varKeywordsIndex = varkw ? N++ : -1; + + assert argnames.length >= argcount + kwonlyargcount + (hasVarArgs() ? 1 : 0) + + (hasVarKeywords() ? 1 : 0); + } + + /** + * Create a parser, for a named function, with defined numbers of + * positional-only and keyword-only parameters, and naming the + * parameters. Parameters that may only be given by position need + * not be named. ("" is acceptable in the names array.) + *

+ * This is a convenient way to construct a reference result in unit + * tests. + * + * @param name of function + * @param decl names of parameters and indicators "/", "*", "**" + * @return the constructed parser + */ + static ArgParser fromSignature(String name, String... decl) { + + // Collect the names of the parameters here + ArrayList args = new ArrayList<>(); + String varargs = null, varkw = null; + + int posOnly = 0, posCount = 0; + + /* + * Scan names, looking out for /, * and ** markers. Nameless + * parameters are tolerated in the positional-only section. + */ + for (String param : decl) { + int paramLen = param.length(); + if (paramLen > 0) { + if (param.charAt(0) == '/') { + // We found a positional-only end marker / + posOnly = args.size(); + } else if (param.charAt(0) == '*') { + if (paramLen > 1) { + if (param.charAt(1) == '*') { + // Looks like a keywords collector + if (paramLen > 2) { + // ... and it has a name. + varkw = param.substring(2); + } + } else { + // Looks like a positional collector + varargs = param.substring(1); + posCount = args.size(); + } + } else { + // We found a keyword-only start marker * + posCount = args.size(); + } + } else { + // We found a proper name for the parameter. + args.add(param); + } + } else { + // We found a "": tolerate for now. + args.add(""); + } + } + + // Total parameter count *except* possible varargs, varkwargs + int N = args.size(); + + /* + * If there was no "/" or "*", all are positional arguments. This is + * consistent with the output of inspect.signature, where e.g. + * inspect.signature(exec) is (source, globals=None, locals=None, + * /). + */ + if (posCount == 0) { posCount = N; } + + // Number of regular arguments (not *, **) + int regArgCount = N; + int kwOnly = N - posCount; + + // Add any *args to the names + if (varargs != null) { + args.add(varargs); + N++; + } + + // Add any **kwargs to the names + if (varkw != null) { + args.add(varkw); + N++; + } + + String[] names = N == 0 ? NO_STRINGS : args.toArray(new String[N]); + + return new ArgParser(name, ScopeKind.TYPE, MethodKind.STATIC, names, regArgCount, posOnly, + kwOnly, varargs != null, varkw != null); + } + + /** + * @return true if default positional arguments are available. + */ + boolean hasDefaults() { return defaults != null; } + + /** + * @return a copy of default positional arguments (or empty array). + */ + Object[] getDefaults() { + if (defaults == null) { + return NO_OBJECTS; + } else { + return Arrays.copyOf(defaults, defaults.length); + } + } + + /** + * @return true if there is an excess positional argument collector. + */ + boolean hasVarArgs() { return varArgsIndex >= 0; } + + /** + * @return true if default keyword-only arguments are available. + */ + boolean hasKwdefaults() { return kwdefaults != null; } + + /** + * @return true if there is an excess keyword argument collector. + */ + boolean hasVarKeywords() { return varKeywordsIndex >= 0; } + + /** + * The representation of an {@code ArgParser} is based on the + * {@code __text_signature__} attribute of built-in methods (see + * {@link #textSignature()}) and the specifications found in CPython + * Argument Clinic. + */ + @Override + public String toString() { return name + textSignature(); } + + /** + * Return a string representing the argument list of the method. The + * string is like that found in the {@code __text_signature__} + * attribute of built-in methods. + * + * @return the signature of the arguments + */ + String textSignature() { + StringJoiner sj = new StringJoiner(", ", "(", ")"); + int empty = sj.length(); + + // Keyword only parameters start at k + int k = regargcount - kwonlyargcount; + // The positional defaults start at d + int d = k - (defaults == null ? 0 : defaults.length); + // We work through the parameters with index i + int i = 0; + + // Possible leading argument + switch (methodKind) { + case INSTANCE: + // $module, $self + sj.add(scopeKind.selfName); + break; + case CLASS: + sj.add("$type"); + break; + default: // STATIC = no mention + break; + } + + // Positional-only parameters + while (i < posonlyargcount) { sj.add(parameterToString(i++, d)); } + + // If there were any positional-only parameters ... + if (sj.length() > empty) { + // ... mark the end of them. + sj.add("/"); + } + + // Positional (but not positional-only) parameters + while (i < k) { sj.add(parameterToString(i++, d)); } + + // Reached the end of the positional section + if (hasVarArgs()) { + // Excess from the positional section goes to a *args + sj.add("*" + argnames[varArgsIndex]); + } else if (i < regargcount) { + // Mark the end but no *args to catch the excess + sj.add("*"); + } + + // Keyword only parameters begin properly + while (i < regargcount) { sj.add(parameterToString(i++)); } + + if (hasVarKeywords()) { + // Excess from the keyword section does to a **kwargs + sj.add("**" + argnames[varKeywordsIndex]); + } + + return sj.toString(); + } + + /** + * Return ith positional parameter name and default value if + * available. Helper to {@link #sigString()}. + */ + private String parameterToString(int i, int d) { + if (i < d) + return argnames[i]; + else { + // A default value is available + Object value = defaults[i - d]; + return argnames[i] + "=" + repr(value); + } + } + + /** + * Return ith parameter name and keyword default value if + * available. Helper to {@link #sigString()}. + */ + private String parameterToString(int i) { + String name = argnames[i]; + if (kwdefaults != null) { + Object value = kwdefaults.get(name); + if (value != null) { + // A default value is available + return argnames[i] + "=" + repr(value); + } + } + return name; + } + + /** + * Weak substitute for {@code repr()} that will do for common types + * of default argument. + * + * @param o object to reproduce + * @return poorman's {@code repr(o)} + */ + private static String repr(Object o) { + if (o instanceof String) { + String s = (String)o; + if (!s.contains("'")) + return "'" + s + "'"; + else + return "\"" + s + "\""; + } else { + return o.toString(); + } + } + + /** + * Parse {@code __call__} arguments and create an array, using the + * arguments supplied and the defaults held in the parser. + * + * @param args all arguments, positional then keyword + * @param names of keyword arguments + * @return array of parsed arguments + */ + Object[] parse(Object[] args, String[] names) { + Object[] a = new Object[argnames.length]; + FrameWrapper w = new ArrayFrameWrapper(a); + parseToFrame(w, args, 0, args.length, names); + return a; + } + + /** + * Parse CPython-style vector call arguments and create an array, + * using the arguments supplied and the defaults held in the parser. + * + * @param s positional and keyword arguments + * @param p position of arguments in the array + * @param n number of positional and keyword arguments + * @param names of keyword arguments or {@code null} + * @return array of parsed arguments + */ + Object[] parse(Object[] s, int p, int n, String[] names) { + Object[] a = new Object[argnames.length]; + FrameWrapper w = new ArrayFrameWrapper(a); + parseToFrame(w, s, p, n, names); + return a; + } + + /** + * Parse classic arguments and create an array, using the arguments + * supplied and the defaults held in the parser. + * + * @param args positional arguments + * @param kwargs keyword arguments + * @return array of parsed arguments + */ + Object[] parse(PyTuple args, PyDict kwargs) { + Object[] a = new Object[argnames.length]; + FrameWrapper w = new ArrayFrameWrapper(a); + parseToFrame(w, args, kwargs); + return a; + } + + /** + * Provide the positional defaults. * The {@code ArgParser} keeps a + * reference to this array, so that subsequent changes to it will + * affect argument parsing. (Concurrent access to the array and + * parser is a client issue.) + *

+ * If L values are provided, they correspond to + * {@code arg[max-L] ... arg[max-1]}, where {@code max} is the index + * of the first keyword-only parameter, or the number of parameters + * if there are no keyword-only parameters. The minimum number of + * positional arguments will then be {@code max-L}. + * + * @param values replacement positional defaults (or {@code null}) + * @return {@code this} + */ + ArgParser defaults(Object... values) { + defaults = values; + checkShape(); + return this; + } + + /** + * Provide the keyword-only defaults as values. If K values are + * provided, they correspond to {@code arg[N-K] ... arg[N-1]}, where + * {@code N} is the number of regular parameters + * ({@link #regargcount}). If the argument is empty, it is converted + * to {@code null} internally. The number of keyword-only parameters + * and positional-only parameters must not together exceed the + * number of regular parameters named in the constructor. + * + * @param values keyword values aligned to the parameter names + * @return {@code this} + */ + ArgParser kwdefaults(Object... values) { + PyDict d = new PyDict(); + int K = values.length; + for (int i = 0, p = regargcount - K; i < K; i++, p++) { + Object v = values[i]; + if (v != null) { d.put(argnames[p], v); } + } + kwdefaults = d; + checkShape(); + return this; + } + + /** + * Provide the keyword-only defaults, perhaps as a {@code dict}. The + * {@code ArgParser} keeps a reference to this map, so that + * subsequent changes to it will affect argument parsing, as + * required for a Python {@link PyFunction function}. (Concurrent + * access to the mapping and parser is a client issue.) + * + * @param kwd replacement keyword defaults (or {@code null}) + * @return {@code this} + */ + ArgParser kwdefaults(Map kwd) { + kwdefaults = kwd; + checkShape(); + return this; + } + + /** + * The number of keyword-only parameters and positional-only + * parameters must not together exceed the number of parameters + * named in the constructor. (The last two are defined in the + * constructor.) Nor must there be excess default values for the + * number of parameters. + */ + private void checkShape() { + // XXX This may be too fussy, given that Python function is not + final int N = argcount; + final int L = defaults == null ? 0 : defaults.length; + final int K = kwonlyargcount; + final int W = kwdefaults == null ? 0 : kwdefaults.size(); + + int min = N - L; + int kwmax = N + K - posonlyargcount; + + if (min < 0) { + throw new InterpreterError(TOO_MANY_DEFAULTS, L, N, name); + } else if (W > kwmax) { throw new InterpreterError(TOO_MANY_KWDEFAULTS, W, kwmax, name); } + } + + private static final String TOO_MANY_DEFAULTS = "More defaults (%d given) than " + + "positional parameters (%d allowed) when specifying '%s'"; + + private static final String TOO_MANY_KWDEFAULTS = + "More keyword defaults (%d given) than remain after " + + "positional-only parameters (%d left) when specifying '%s'"; + + /** Get the name of arg i or make one up. */ + private String nameArg(int i) { + String arg = argnames[i].toString(); + if (arg.length() == 0) { arg = String.format("arg %d", i + 1); } + return arg; + } + + /** + * Abstract wrapper for storage that the enclosing argument parser + * should be able to fill from the arguments to a Python call. + * Typically this wrapper is a window onto the local variables of a + * function invocation (a {@link PyFrame}) that the run-time is in + * the process of initialising during a call. + */ + abstract class FrameWrapper { + + /** + * Get the local variable named by {@code argnames[i]} + * + * @param i index of variable name in {@code argnames} + * @return value of variable named {@code argnames[i]} + */ + abstract Object getLocal(int i); + + /** + * Set the local variable named by {@code argnames[i]} + * + * @param i index of variable name in {@code argnames} + * @param v to assign to variable named {@code argnames[i]} + */ + abstract void setLocal(int i, Object v); + + /** + * Copy positional arguments into local variables, making sure we + * don't copy more than have been allowed for in the frame. + * Providing too many or too few is not an error at this stage, as + * there may be a collector to catch the excess arguments or + * positional or keyword defaults to make up the shortfall. + * + * @param args positional arguments + */ + void setPositionalArguments(PyTuple args) { + int n = Math.min(args.value.length, argcount); + for (int i = 0; i < n; i++) + setLocal(i, args.value[i]); + } + + /** + * Copy positional arguments into local variables, making sure we + * don't copy more than have been allowed for in the frame. + * Providing too many or too few is not an error at this stage, as + * there may be a collector to catch the excess arguments or + * positional or keyword defaults to make up the shortfall. + * + * @param stack positional and keyword arguments + * @param pos position of arguments in the array + * @param nargs number of positional arguments + */ + void setPositionalArguments(Object[] stack, int pos, int nargs) { + int n = Math.min(nargs, argcount); + for (int i = 0, j = pos; i < n; i++) + setLocal(i, stack[j++]); + } + + /** + * For each of the names used as keywords in the call, match it with + * an allowable parameter name, and assign that frame-local variable + * the keyword argument given in the call. If the variable is not + * null, this is an error. + *

+ * "Allowable parameter name" here means the names in + * {@code argnames[p:q]} where {@code p=posonlyargcount} and + * {@code q=argcount + kwonlyargcount}. If the name used in the call + * is not an allowable keyword, then if this parser allows for + * excess keywords, add it to the frame's keyword dictionary, + * otherwise throw an informative error. + *

+ * In this version, accept the keyword arguments passed as a + * dictionary, as in the "classic" {@code (*args, **kwargs)} call. + * + * @param kwargs keyword arguments given in call + */ + void setKeywordArguments(PyDict kwargs) { + /* + * Create a dictionary for the excess keyword parameters, and insert + * it in the local variables at the proper position. + */ + PyDict kwdict = null; + if (hasVarKeywords()) { + kwdict = Py.dict(); + setLocal(varKeywordsIndex, kwdict); + } + + /* + * For each entry in kwargs, search argnames for a match, and either + * assign the local variable or add the name-value pair to kwdict. + */ + for (Map.Entry entry : kwargs.entrySet()) { + Object name = entry.getKey(); + Object value = entry.getValue(); + int index = argnamesIndexOf(name); + + if (index < 0) { + // Not found in (allowed slice of) argnames + if (kwdict != null) + kwdict.put(name, value); + else + // No kwdict: non-match is an error. + throw unexpectedKeyword(name, kwargs.keySet()); + } else { + // Keyword found to name allowable variable at index + if (getLocal(index) == null) + setLocal(index, value); + else + // Unfortunately, that seat is already taken + throw new TypeError(MULTIPLE_VALUES, name, name); + } + } + } + + /** + * For each of the names used as keywords in the call, match it with + * an allowable parameter name, and assign that frame-local variable + * the keyword argument given in the call. If the variable is not + * null, this is an error. + *

+ * "Allowable parameter name" here means the names in + * {@code argnames[p:q]} where {@code p=posonlyargcount} and + * {@code q=argcount + kwonlyargcount}. If the name used in the call + * is not an allowable keyword, then if this parser allows for + * excess keywords, add it to the frame's keyword dictionary, + * otherwise throw an informative error. + *

+ * In this version, accept the keyword arguments passed as a + * dictionary, as in the "classic" {@code (*args, **kwargs)} call. + * + * @param stack {@code [kwstart:kwstart+len(kwnames)]} values + * corresponding to {@code kwnames} in order + * @param kwstart start position in {@code kwvalues} + * @param kwnames keywords used in the call (or {@code **kwargs}) + */ + void setKeywordArguments(Object[] stack, int kwstart, String[] kwnames) { + + PyDict kwdict = null; + if (varKeywordsIndex >= 0) { + /* + * Create a dictionary for the excess keyword parameters, and insert + * it in the local variables at the proper position. + */ + kwdict = Py.dict(); + setLocal(varKeywordsIndex, kwdict); + } + + /* + * For each of the names in kwnames, search argnames for a match, + * and either assign the local variable or add the name-value pair + * to kwdict. + */ + int kwcount = kwnames == null ? 0 : kwnames.length; + for (int i = 0, j = kwstart; i < kwcount; i++) { + String key = kwnames[i]; + Object value = stack[j++]; + int index = argnamesIndexOf(key); + + if (index < 0) { + // Not found in (allowed slice of) argnames + if (kwdict != null) + // Put unmatched (name, value) in dict. + kwdict.put(key, value); + else + // No kwdict: non-match is an error. + throw unexpectedKeyword(key, Arrays.asList(kwnames)); + } else { + // Keyword found to name allowable variable at index + if (getLocal(index) == null) + setLocal(index, value); + else + // Unfortunately, that seat is already taken + throw new TypeError(MULTIPLE_VALUES, name, key); + } + } + } + + /** + * Find the given name in {@code argnames}, and if it is not found, + * return -1. Only the "allowable parameter names", those acceptable + * as keyword arguments, are searched. It is an error if the name is + * not a Python {@code str}. + * + * @param name parameter name given as keyword + * @return index of {@code name} in {@code argnames} or -1 + */ + private int argnamesIndexOf(Object name) { + + int end = regargcount; + + if (name == null || !(PyUnicode.TYPE.check(name))) { + throw new TypeError(KEYWORD_NOT_STRING, name); + } + + /* + * For speed, try raw pointer comparison. As names are normally + * interned Strings this should almost always hit. + */ + for (int i = posonlyargcount; i < end; i++) { + if (argnames[i] == name) + return i; + } + + /* + * It's not definitive until we have repeated the search using + * proper object comparison. + */ + for (int i = posonlyargcount; i < end; i++) { + if (Abstract.richCompareBool(name, argnames[i], Comparison.EQ, null)) + return i; + } + + return -1; + } + + /** + * Fill in missing positional parameters from a from {@code defs}. + * If any positional parameters are cannot be filled, this is an + * error. The number of positional arguments {@code nargs} is + * provided so we know where to start only for their number. + *

+ * It is harmless (but a waste) to call this when + * {@code nargs >= argcount}. + * + * @param nargs number of positional arguments given in call + * @param defs default values by position or {@code null} + * @throws TypeError if there are still missing arguments. + */ + void applyDefaults(int nargs, Object[] defs) throws TypeError { + + int ndefs = defs == null ? 0 : defs.length; + /* + * At this stage, the first nargs parameter slots have been filled + * and some (or all) of the remaining argcount-nargs positional + * parameters may have been assigned using keyword arguments. + * Meanwhile, defs is available to provide values for (only) the + * last defs.length positional parameters. + */ + // locals[nargs:m] have no default values, where: + int m = argcount - ndefs; + int missing = 0; + for (int i = nargs; i < m; i++) { if (getLocal(i) == null) { missing++; } } + if (missing > 0) { throw missingArguments(missing, ndefs); } + + /* + * Variables in locals[m:argcount] may take defaults from defs, but + * perhaps nargs > m. Begin at index nargs, but not necessarily at + * the start of defs. + */ + for (int i = nargs, j = Math.max(nargs - m, 0); j < ndefs; i++, j++) { + if (getLocal(i) == null) { setLocal(i, defs[j]); } + } + } + + /** + * Deal with missing keyword arguments, attempting to fill them from + * {@code kwdefs}. If any parameters are unfilled after that, this + * is an error. + * + * It is harmless (but a waste) to call this when + * {@code kwonlyargcount == 0}. + * + * @param kwdefs default values by keyword or {@code null} + * @throws TypeError if there are too many or missing arguments. + */ + void applyKWDefaults(Map kwdefs) throws TypeError { + /* + * Variables in locals[argcount:end] are keyword-only parameters. If + * they have not been assigned yet, they take values from dict + * kwdefs. + */ + int end = regargcount; + int missing = 0; + for (int i = argcount; i < end; i++) { + Object value = getLocal(i); + if (value == null && kwdefs != null) + setLocal(i, value = kwdefs.get(argnames[i])); + if (value == null) { missing++; } + } + if (missing > 0) { throw missingArguments(missing, -1); } + } + + static final String KEYWORD_NOT_STRING = "%.200s(): keywords must be strings"; + static final String KEYWORD_NOT_COMPARABLE = "Keyword names %s not comparable."; + static final String MULTIPLE_VALUES = "%.200s(): multiple values for parameter '%s'"; + static final String POSITIONAL_ONLY = + "%.200s(): positional-only argument%s passed by keyword: %s"; + static final String UNEXPECTED_KEYWORD = "%.200s(): unexpected keyword argument '%s'"; + + /* + * Compare CPython ceval.c::too_many_positional(). Unlike that + * function, on diagnosing a problem, we do not have to set a + * message and return status. Also, when called there is *always* a + * problem, and therefore an exception. + */ + // XXX Do not report kw arguments given: unnatural constraint. + /* + * The caller must defer the test until after kw processing, just so + * the actual kw-args given can be reported accurately. Otherwise, + * the test could be after (or part of) positional argument + * processing. + */ + protected TypeError tooManyPositional(int posGiven) { + boolean posPlural = false; + int kwGiven = 0; + String posText, givenText; + int defcount = defaults == null ? 0 : defaults.length; + int end = regargcount; + + assert (!hasVarArgs()); + + // Count keyword-only args given + for (int i = argcount; i < end; i++) { if (getLocal(i) != null) { kwGiven++; } } + + if (defcount != 0) { + posPlural = true; + posText = String.format("from %d to %d", argcount - defcount, argcount); + } else { + posPlural = (argcount != 1); + if (argcount == 0) { + posText = "no"; + } else { + posText = String.format("%d", argcount); + } + } + + if (kwGiven > 0) { + String format = " positional argument%s (and %d keyword-only argument%s)"; + givenText = String.format(format, posGiven != 1 ? "s" : "", kwGiven, + kwGiven != 1 ? "s" : ""); + } else { + givenText = ""; + } + + return new TypeError("%s() takes %s positional argument%s but %d%s %s given", name, + posText, posPlural ? "s" : "", posGiven, givenText, + (posGiven == 1 && kwGiven == 0) ? "was" : "were"); + } + + /** + * Diagnose an unexpected keyword occurring in a call and represent + * the problem as an exception. The particular keyword may + * incorrectly name a positional parameter, or it may be entirely + * unexpected (not be a parameter at all). In any case, since this + * error is going to be fatal to the call, this method looks at + * all the keywords to see if any are positional-only + * parameters, and if that's not the problem, reports just the + * originally-offending keyword as unexpected. + *

+ * We call this method when any keyword has been encountered that + * does not match a legitimate parameter, and there is no + * {@code **kwargs} dictionary to catch it. Because Python makes it + * possible to supply keyword arguments from a {@code map} with + * {@code object} keys, we accept any object as a keyword name. + * + * @param type of element in keyword collection + * @param kw the unexpected keyword encountered in the call + * @param kwnames all the keywords used in the call + * @return TypeError diagnosing the problem + */ + /* + * Compare CPython ceval.c::positional_only_passed_as_keyword(), and + * the code around its call. Unlike that function, on diagnosing a + * problem, we do not have to set a message and return status. Also, + * when called there is *always* a problem, and therefore an + * exception. + */ + protected TypeError unexpectedKeyword(Object kw, Collection kwnames) { + /* + * Compare each of the positional only parameter names with each of + * the keyword names given in the call. Collect the matches in a + * list. + */ + List names = new ArrayList<>(); + for (int k = 0; k < posonlyargcount; k++) { + String varname = argnames[k]; + for (K keyword : kwnames) { + if (Abstract.richCompareBool(varname, keyword, Comparison.EQ, null)) + names.add(keyword.toString()); + } + } + + if (!names.isEmpty()) { + // We caught one or more matches: throw + return new TypeError(POSITIONAL_ONLY, name, names.size() == 1 ? "" : "s", + String.join(", ", names)); + } else { + // No match, so it is just unexpected altogether + return new TypeError(UNEXPECTED_KEYWORD, name, kw); + } + } + + /** + * Diagnose which positional or keywords arguments are missing, and + * throw {@link TypeError} listing them. We call this when we have + * already detected a problem, and the process is one of going over + * the data again to create an accurate message. + * + * @param missing number of missing arguments + * @param defcount number of positional defaults available (or -1) + * @return TypeError listing names of the missing arguments + */ + /* + * Compare CPython ceval.c::missing_arguments(). Unlike that + * function, on diagnosing a problem, we do not have to set a + * message and return status so the caller can "goto fail" and clean + * up. We can just throw directly. + */ + protected TypeError missingArguments(int missing, int defcount) { + String kind; + int start, end; + + // Choose the range in which to look for null arguments + if (defcount >= 0) { + kind = "positional"; + start = 0; + end = argcount - defcount; + } else { + kind = "keyword-only"; + start = argcount; + end = start + kwonlyargcount; + } + + // Make a list of names from that range where value is null + ArrayList names = new ArrayList<>(missing); + for (int i = start, j = 0; i < end; i++) { + if (getLocal(i) == null) { names.add(j++, nameArg(i)); } + } + + // Formulate an error from the list + return missingNamesTypeError(kind, names); + } + + /** + * Compose a {@link TypeError} from the missing argument names. + */ + /* + * Compare CPython ceval.c::format_missing(). Unlike that function, + * on diagnosing a problem, we do not have to set a message and + * return status so the caller can "goto fail" and clean up. We can + * just throw directly. + */ + private TypeError missingNamesTypeError(String kind, ArrayList names) { + int len = names.size(); + String joinedNames; + + switch (len) { + case 0: + // Shouldn't happen but let's avoid trouble + joinedNames = ""; + break; + case 1: + joinedNames = names.get(0); + break; + case 2: + joinedNames = + String.format("%s and %s", names.get(len - 2), names.get(len - 1)); + break; + default: + String tail = + String.format(", %s and %s", names.get(len - 2), names.get(len - 1)); + // Chop off the last two objects in the list. + names.remove(len - 1); + names.remove(len - 2); + // Stitch into a nice comma-separated list. + joinedNames = String.join(", ", names) + tail; + } + + return new TypeError("%s() missing %d required %s argument%s: %s", name, len, kind, + len == 1 ? "" : "s", joinedNames); + } + } + + /** + * Wrap an array provided by a client so that the enclosing argument + * parser may fill it from the arguments to a Python call. This + * array could be the local variables in the frame of a function + * being called, or an argument in the call of a method handle that + * accepts its arguments as an array. See: + * {@link ArgParser#parseToFrame(FrameWrapper, PyTuple, PyDict)}. + */ + class ArrayFrameWrapper extends FrameWrapper { + + private final Object[] vars; + final int start; + + /** + * Wrap a slice of an existing array. The elements to fill are a + * slice of the destination array with specified starting index. The + * intended use is that {@code start = 1} allows space for a + * {@code self} reference not in the argument list. The capacity of + * the array, between the start index and the end, must be + * sufficient to hold the parse result may be larger, e.g. to + * accommodate other local variables. + * + * @param vars destination array + * @param start at which to place first parsed argument + */ + ArrayFrameWrapper(Object[] vars, int start) { + super(); + this.vars = vars; + this.start = start; + assert start + argcount <= vars.length; + } + + /** + * Wrap an existing array. The capacity of the array must be + * sufficient to hold the parse result. + * + * @param vars destination array + */ + ArrayFrameWrapper(Object[] vars) { this(vars, 0); } + + @Override + Object getLocal(int i) { return vars[start + i]; } + + @Override + void setLocal(int i, Object v) { vars[start + i] = v; } + + @Override + void setPositionalArguments(PyTuple argsTuple) { + int n = Math.min(argsTuple.value.length, argcount); + System.arraycopy(argsTuple.value, 0, vars, start, n); + } + + @Override + void setPositionalArguments(Object[] stack, int pos, int nargs) { + int n = Math.min(nargs, argcount); + System.arraycopy(stack, pos, vars, start, n); + } + } + + /** + * Parse when an args tuple and keyword dictionary are supplied, + * that is, for a classic call. + * + * @param frame to populate with argument values + * @param args positional arguments given + * @param kwargs keyword arguments given + */ + void parseToFrame(FrameWrapper frame, PyTuple args, PyDict kwargs) { + + final int nargs = args.value.length; + + // Set parameters from the positional arguments in the call. + frame.setPositionalArguments(args); + + // Set parameters from the keyword arguments in the call. + if (kwargs != null && !kwargs.isEmpty()) + frame.setKeywordArguments(kwargs); + + if (nargs > argcount) { + + if (hasVarArgs()) { + // Locate the * parameter in the frame + // Put the excess positional arguments there + frame.setLocal(varArgsIndex, new PyTuple(args.value, argcount, nargs - argcount)); + } else { + // Excess positional arguments but no *args for them. + throw frame.tooManyPositional(nargs); + } + + } else { // nargs <= argcount + + if (hasVarArgs()) { + // No excess: set the * parameter in the frame to empty + frame.setLocal(varArgsIndex, PyTuple.EMPTY); + } + + if (nargs < argcount) { + // Set remaining positional parameters from default + frame.applyDefaults(nargs, defaults); + } + } + + if (kwonlyargcount > 0) + // Set keyword parameters from default values + frame.applyKWDefaults(kwdefaults); + } + + /** + * Parse when an args array and keyword array are supplied, that is, + * for a vector call on a stack slice. + * + * @param frame to populate with argument values + * @param stack array containing all arguments + * @param start of the slice in the stack + * @param nargs number of arguments in the slice, whether position + * or keyword + * @param kwnames (implying number) of keyword arguments + */ + void parseToFrame(FrameWrapper frame, Object[] stack, int start, int nargs, String[] kwnames) { + + // Number of arguments given by keyword + int nkwargs = kwnames == null ? 0 : kwnames.length; + // From here on, number of arguments given by position + nargs = nargs - nkwargs; + + /* + * Here, CPython applies certain criteria for calling a fast path + * that (in our terms) calls only setPositionalArguments(). Those + * that depend only on code or defaults we make when those + * attributes are defined. + */ + + // Set parameters from the positional arguments in the call. + if (nargs > 0) { frame.setPositionalArguments(stack, start, nargs); } + + // Set parameters from the keyword arguments in the call. + if (varKeywordsIndex >= 0 || nkwargs > 0) { + frame.setKeywordArguments(stack, start + nargs, kwnames); + } + + if (nargs > argcount) { + + if (varArgsIndex >= 0) { + // Put the excess positional arguments in the *args + frame.setLocal(varArgsIndex, + new PyTuple(stack, start + argcount, nargs - argcount)); + } else { + // Excess positional arguments but nowhere for them. + throw frame.tooManyPositional(nargs); + } + + } else { // nargs <= argcount + + if (varArgsIndex >= 0) { + // No excess: set the * parameter in the frame to empty + frame.setLocal(varArgsIndex, PyTuple.EMPTY); + } + + if (nargs < argcount) { + // Set remaining positional parameters from default + frame.applyDefaults(nargs, defaults); + } + } + + if (kwonlyargcount > 0) + // Set keyword parameters from default values + frame.applyKWDefaults(kwdefaults); + } + + /** + * Parse when an args array and keyword array are supplied, that is, + * for a standard {@code __call__}. + * + * @param frame to populate with argument values + * @param args all arguments, positional then keyword + * @param kwnames of keyword arguments (or {@code null}) + */ + void parseToFrame(FrameWrapper frame, Object[] args, String[] kwnames) { + + // Number of arguments given by keyword + int nkwargs = kwnames == null ? 0 : kwnames.length; + // Number of arguments given by position + int nargs = args.length - nkwargs; + + /* + * Here, CPython applies certain criteria for calling a fast path + * that (in our terms) calls only setPositionalArguments(). Those + * that depend only on code or defaults we make when those + * attributes are defined. + */ + + // Set parameters from the positional arguments in the call. + if (nargs > 0) { frame.setPositionalArguments(args, 0, nargs); } + + // Set parameters from the keyword arguments in the call. + if (varKeywordsIndex >= 0 || nkwargs > 0) { + frame.setKeywordArguments(args, nargs, kwnames); + } + + if (nargs > argcount) { + + if (varArgsIndex >= 0) { + // Put the excess positional arguments in the *args + frame.setLocal(varArgsIndex, new PyTuple(args, argcount, nargs - argcount)); + } else { + // Excess positional arguments but nowhere for them. + throw frame.tooManyPositional(nargs); + } + + } else { // nargs <= argcount + + if (varArgsIndex >= 0) { + // No excess: set the * parameter in the frame to empty + frame.setLocal(varArgsIndex, PyTuple.EMPTY); + } + + if (nargs < argcount) { + // Set remaining positional parameters from default + frame.applyDefaults(nargs, defaults); + } + } + + if (kwonlyargcount > 0) + // Set keyword parameters from default values + frame.applyKWDefaults(kwdefaults); + } +} diff --git a/core/src/main/java/org/python/core/ArgumentError.java b/core/src/main/java/org/python/core/ArgumentError.java new file mode 100644 index 000000000..c37c2ab8b --- /dev/null +++ b/core/src/main/java/org/python/core/ArgumentError.java @@ -0,0 +1,94 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +/** + * The type of (non-Python) exception thrown by invoking a slot or + * method with the wrong pattern of arguments. An + * {@code ArgumentError} encapsulates what a particular method or + * slot expected by way of the number of positional arguments and + * the presence or otherwise of keyword arguments. + *

+ * Having a distinct exception solves the problem that not all the + * context for an informative message may be in scope at discovery. + * {@code ArgumentError} should be caught as soon as the necessary + * context is available and converted to a Python exception. + */ +class ArgumentError extends Exception { + private static final long serialVersionUID = 1L; + + enum Mode { + NOARGS, NUMARGS, MINMAXARGS, SELF, NOKWARGS; + + /** + * Choose a mode from {@code NOARGS} to {@code MINMAXARGS} based on + * the min and max argument numbers + * + * @param minArgs minimum expected number of arguments + * @param maxArgs maximum expected number of arguments + * @return a mode + */ + static Mode choose(int minArgs, int maxArgs) { + if (minArgs != maxArgs) + return MINMAXARGS; + else if (minArgs != 0) + return NUMARGS; + else + return NOARGS; + } + } + + final ArgumentError.Mode mode; + final short minArgs, maxArgs; + + private ArgumentError(Mode mode, int minArgs, int maxArgs) { + this.mode = mode; + this.minArgs = (short)minArgs; + this.maxArgs = (short)maxArgs; + } + + /** + * The mode is {@link Mode#NOARGS} or {@link Mode#NOKWARGS}. In the + * latter case, {@link #minArgs} and {@link #maxArgs} should be + * ignored. + * + * @param mode qualifies the sub-type of the problem + */ + ArgumentError(Mode mode) { this(mode, 0, 0); } + + /** + * The mode is {@link Mode#NUMARGS} or {@link Mode#NOARGS}. + * + * @param numArgs expected number of arguments + */ + ArgumentError(int numArgs) { this(numArgs, numArgs); } + + /** + * The mode is {@link Mode#MINMAXARGS}, {@link Mode#NUMARGS} or + * {@link Mode#NOARGS}. + * + * @param minArgs minimum expected number of arguments + * @param maxArgs maximum expected number of arguments + */ + ArgumentError(int minArgs, int maxArgs) { + this(Mode.choose(minArgs, maxArgs), minArgs, maxArgs); + } + + @Override + public String toString() { + switch (mode) { + case NOARGS: + return "takes no arguments"; + case NUMARGS: + return String.format("takes %d arguments", minArgs); + case MINMAXARGS: + return String.format("takes from %d to %d arguments", minArgs, maxArgs); + case SELF: + return "'self' required"; + case NOKWARGS: + return "takes no keyword arguments"; + default: + return mode.toString(); + } + } +} diff --git a/core/src/main/java/org/python/core/ArithmeticError.java b/core/src/main/java/org/python/core/ArithmeticError.java new file mode 100644 index 000000000..b7c65bea4 --- /dev/null +++ b/core/src/main/java/org/python/core/ArithmeticError.java @@ -0,0 +1,33 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code ArithmeticError} exception. */ +class ArithmeticError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("ArithmeticError", MethodHandles.lookup())); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected ArithmeticError(PyType type, String msg, Object... args) { + super(type, msg, args); + } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public ArithmeticError(String msg, Object... args) { + this(TYPE, msg, args); + } +} diff --git a/core/src/main/java/org/python/core/AttributeError.java b/core/src/main/java/org/python/core/AttributeError.java new file mode 100644 index 000000000..1aa79f917 --- /dev/null +++ b/core/src/main/java/org/python/core/AttributeError.java @@ -0,0 +1,30 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code AttributeError} exception. */ +class AttributeError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("AttributeError", MethodHandles.lookup()) // + .base(PyException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected AttributeError(PyType type, String msg, Object... args) { super(type, msg, args); } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public AttributeError(String msg, Object... args) { this(TYPE, msg, args); } +} diff --git a/core/src/main/java/org/python/core/BaseBytes.java b/core/src/main/java/org/python/core/BaseBytes.java new file mode 100644 index 000000000..aeede0f6c --- /dev/null +++ b/core/src/main/java/org/python/core/BaseBytes.java @@ -0,0 +1,41 @@ +package org.python.core; + +/** Stop-gap definition to satisfy references in the project. */ +public class BaseBytes { + + // + // Character class operations + // + + // Bit to twiddle (XOR) for lowercase letter to uppercase and + // vice-versa. + private static final int SWAP_CASE = 0x20; + + // Bit masks and sets to use with the byte classification table + private static final byte UPPER = 0b1; + private static final byte LOWER = 0b10; + private static final byte DIGIT = 0b100; + private static final byte SPACE = 0b1000; + private static final byte ALPHA = UPPER | LOWER; + private static final byte ALNUM = ALPHA | DIGIT; + + // Character (byte) classification table. + private static final byte[] ctype = new byte[256]; + static { + for (int c = 'A'; c <= 'Z'; c++) { + ctype[0x80 + c] = UPPER; + ctype[0x80 + SWAP_CASE + c] = LOWER; + } + for (int c = '0'; c <= '9'; c++) { ctype[0x80 + c] = DIGIT; } + for (char c : " \t\n\u000b\f\r".toCharArray()) { ctype[0x80 + c] = SPACE; } + } + + /** + * @param b to classify + * @return b in ' \t\n\v\f\r' + */ + static final boolean isspace(byte b) { + return (ctype[0x80 + b] & SPACE) != 0; + } + +} diff --git a/core/src/main/java/org/python/core/BaseException.java b/core/src/main/java/org/python/core/BaseException.java new file mode 100644 index 000000000..49f56ee8a --- /dev/null +++ b/core/src/main/java/org/python/core/BaseException.java @@ -0,0 +1,61 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code BaseException} exception. */ +class BaseException extends RuntimeException implements CraftedPyObject { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("BaseException", MethodHandles.lookup())); + private final PyType type; + final Object[] args; + + @Override + public PyType getType() { return type; } + + /** + * Constructor for sub-class use specifying {@link #type}. The message + * {@code msg} is a Java format string in which the constructor arguments + * {@code args} are used to fill the place holders. The formatted message is the + * exception message from the Java point of view. + *

+ * From a Python perspective, the tuple ({@code exception.args}) has one + * element, the formatted message, or zero elements if the message is zero + * length. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected BaseException(PyType type, String msg, Object... args) { + super(String.format(msg, args)); + this.type = type; + msg = this.getMessage(); + this.args = msg.length() > 0 ? new Object[] {msg} : Py.EMPTY_ARRAY; + } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public BaseException(String msg, Object... args) { this(TYPE, msg, args); } + + @Override + public String toString() { + String msg = args.length > 0 ? args[0].toString() : ""; + return String.format("%s: %s", getType().name, msg); + } + + // slot functions ------------------------------------------------- + + protected Object __repr__() { + // Somewhat simplified + return getType().name + "('" + getMessage() + "')"; + } +} diff --git a/core/src/main/java/org/python/core/BuiltinsModule.java b/core/src/main/java/org/python/core/BuiltinsModule.java new file mode 100644 index 000000000..87dc896fa --- /dev/null +++ b/core/src/main/java/org/python/core/BuiltinsModule.java @@ -0,0 +1,185 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.util.Iterator; + +import org.python.core.Exposed.Default; +import org.python.core.Exposed.DocString; +import org.python.core.Exposed.KeywordOnly; +import org.python.core.Exposed.Name; +import org.python.core.Exposed.PositionalCollector; +import org.python.core.Exposed.PythonStaticMethod; + +/** + * The {@code builtins} module is definitely called "builtins". + *

+ * Although it is fully a module, the {@link BuiltinsModule} lives + * in the {@code core} package because it needs privileged access to + * the core implementation that extension modules do not. + */ +class BuiltinsModule extends JavaModule { + + private static final ModuleDef DEFINITION = new ModuleDef("builtins", MethodHandles.lookup()); + + /** Construct an instance of the {@code builtins} module. */ + BuiltinsModule() { + super(DEFINITION); + + // This list is taken from CPython bltinmodule.c + add("None", Py.None); + // add("Ellipsis", Py.Ellipsis); + add("NotImplemented", Py.NotImplemented); + add("False", Py.False); + add("True", Py.True); + add("bool", PyBool.TYPE); + // add("memoryview", PyMemoryView.TYPE); + // add("bytearray", PyByteArray.TYPE); + add("bytes", PyBytes.TYPE); + // add("classmethod", PyClassMethod.TYPE); + // add("complex", PyComplex.TYPE); + add("dict", PyDict.TYPE); + // add("enumerate", PyEnum.TYPE); + // add("filter", PyFilter.TYPE); + add("float", PyFloat.TYPE); + // add("frozenset", PyFrozenSet.TYPE); + // add("property", PyProperty.TYPE); + add("int", PyLong.TYPE); + add("list", PyList.TYPE); + // add("map", PyMap.TYPE); + add("object", PyBaseObject.TYPE); + // add("range", PyRange.TYPE); + // add("reversed", PyReversed.TYPE); + // add("set", PySet.TYPE); + add("slice", PySlice.TYPE); + // add("staticmethod", PyStaticMethod.TYPE); + add("str", PyUnicode.TYPE); + // add("super", PySuper.TYPE); + add("tuple", PyTuple.TYPE); + add("type", PyType.TYPE); + // add("zip", PyZip.TYPE); + } + + @PythonStaticMethod + @DocString("Return the absolute value of the argument.") + static Object abs(Object x) throws Throwable { return PyNumber.absolute(x); } + + @PythonStaticMethod + @DocString("Return the number of items in a container.") + static Object len(Object v) throws Throwable { return PySequence.size(v); } + + /** + * Implementation of {@code max()}. + * + * @param arg1 a first argument or iterable of arguments + * @param args contains other positional arguments + * @param key function + * @param dflt to return when iterable is empty + * @return {@code max} result or {@code dflt} + * @throws Throwable from calling {@code key} or comparison + */ + @PythonStaticMethod(positionalOnly = false) + @DocString("Return the largest item in an iterable" + + " or the largest of two or more arguments.") + // Simplified version of max() + static Object max(Object arg1, @KeywordOnly @Default("None") Object key, + @Name("default") @Default("None") Object dflt, @PositionalCollector PyTuple args) + throws Throwable { + // @PositionalCollector has to be last. + return minmax(arg1, args, key, dflt, Comparison.GT); + } + + /** + * Implementation of {@code min()}. + * + * @param arg1 a first argument or iterable of arguments + * @param args contains other positional arguments + * @param key function + * @param dflt to return when iterable is empty + * @return {@code min} result or {@code dflt} + * @throws Throwable from calling {@code key} or comparison + */ + @PythonStaticMethod(positionalOnly = false) + @DocString("Return the smallest item in an iterable" + + " or the smallest of two or more arguments.") + // Simplified version of min() + static Object min(Object arg1, @KeywordOnly @Default("None") Object key, + @Name("default") @Default("None") Object dflt, @PositionalCollector PyTuple args) + throws Throwable { + // @PositionalCollector has to be last. + return minmax(arg1, args, key, dflt, Comparison.LT); + } + + /** + * Implementation of both + * {@link #min(Object, Object, Object, PyTuple) min()} and + * {@link #max(Object, Object, Object, PyTuple) max()}. + * + * @param arg1 a first argument or iterable of arguments + * @param args contains other positional arguments + * + * @param key function + * @param dflt to return when iterable is empty + * @param op {@code LT} for {@code min} and {@code GT} for + * {@code max}. + * @return min or max result as appropriate + * @throws Throwable from calling {@code op} or {@code key} + */ + // Compare CPython min_max in Python/bltinmodule.c + private static Object minmax(Object arg1, PyTuple args, Object key, Object dflt, Comparison op) + throws Throwable { + + int n = args.size(); + Object result; + Iterator others; + assert key != null; + + if (n > 0) { + /* + * Positional mode: arg1 is the first value, args contains the other + * values to compare + */ + result = key == Py.None ? arg1 : Callables.callFunction(key, arg1); + others = args.iterator(); + if (dflt != Py.None) { + String name = op == Comparison.LT ? "min" : "max"; + throw new TypeError(DEFAULT_WITHOUT_ITERABLE, name); + } + + } else { + // Single iterable argument of the values to compare + result = null; + // XXX define PySequence.iterable like PyMapping.map? + others = PySequence.fastList(arg1, null).iterator(); + } + + // Now we can get on with the comparison + while (others.hasNext()) { + Object item = others.next(); + if (key != Py.None) { item = Callables.callFunction(key, item); } + if (result == null) { + result = item; + } else if (Abstract.richCompareBool(item, result, op)) { result = item; } + } + + // result may be null if the single iterable argument is empty + if (result != null) { + return result; + } else if (dflt != Py.None) { + assert dflt != null; + return dflt; + } else { + String name = op == Comparison.LT ? "min" : "max"; + throw new ValueError("%s() arg is an empty sequence", name); + } + } + + private static final String DEFAULT_WITHOUT_ITERABLE = + "Cannot specify a default for %s() with multiple positional arguments"; + + @PythonStaticMethod + @DocString("Return the canonical string representation of the object.\n" + + "For many object types, including most builtins, eval(repr(obj)) == obj.") + static Object repr(Object obj) throws Throwable { return Abstract.repr(obj); } +} diff --git a/core/src/main/java/org/python/core/CPython311Code.java b/core/src/main/java/org/python/core/CPython311Code.java new file mode 100644 index 000000000..6744fd69a --- /dev/null +++ b/core/src/main/java/org/python/core/CPython311Code.java @@ -0,0 +1,483 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.nio.ShortBuffer; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.Spliterator; +import java.util.function.Consumer; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.python.core.stringlib.ByteArrayBuilder; + +/** + * Our equivalent to the Python code object ({@code PyCodeObject} in + * CPython's C API). + */ +public class CPython311Code extends PyCode { + + /** + * Describe the layout of the frame local variables (including + * arguments), cell and free variables allowing implementation-level + * access to CPython-specific features. + */ + final CPythonLayout layout; + + /** + * Instruction opcodes, not {@code null}. Treat these as unsigned + * 16-bit patterns in which the low 8 bits is the argument and the + * upper 8 bits is the opcode itself. + */ + final short[] wordcode; + + /** + * Table of byte code address ranges mapped to source lines, + * presentable as defined in PEP 626. + */ + // See CPython lnotab_notes.txt + final byte[] linetable; + + /** Number of entries needed for evaluation stack. */ + final int stacksize; + + /** + * Table of byte code address ranges mapped to handler addresses in + * a compact byte encoding (defined by CPython and appearing in the + * serialised form of a {@code code} object). + */ + final byte[] exceptiontable; + + /** + * Full constructor based on CPython's + * {@code PyCode_NewWithPosOnlyArgs}. The {@link #traits} of the + * code are supplied here as CPython reports them: as a bit array in + * an integer, but the constructor makes a conversion, and it is the + * {@link #traits} which should be used at the Java level. + *

+ * Where the parameters map directly to an attribute of the code + * object, that is the best way to explain them. Note that this + * factory method is tuned to the needs of {@code marshal.read} + * where the serialised form makes no secret of the version-specific + * implementation details. + * + * @param filename {@code co_filename} + * @param name {@code co_name} + * @param qualname {@code co_qualname} + * @param flags {@code co_flags} a bitmap of traits + * + * @param wordcode {@code co_code} as unsigned 16-bit words + * @param firstlineno first source line of this code + * @param linetable mapping byte code ranges to source lines + * + * @param consts {@code co_consts} + * @param names {@code co_names} + * + * @param layout variable names and properties, in the order + * {@code co_varnames + co_cellvars + co_freevars} but without + * repetition. + * + * @param argcount {@code co_argcount} the number of positional + * parameters (including positional-only arguments and arguments + * with default values) + * @param posonlyargcount {@code co_posonlyargcount} the number of + * positional-only arguments (including arguments with default + * values) + * @param kwonlyargcount {@code co_kwonlyargcount} the number of + * keyword-only arguments (including arguments with default + * values) + * + * @param stacksize {@code co_stacksize} + * @param exceptiontable supports exception processing + */ + public CPython311Code( // + // Grouped as _PyCodeConstructor in pycore_code.h + // Metadata + String filename, String name, String qualname, // + int flags, + // The code + short[] wordcode, int firstlineno, byte[] linetable, + // Used by the code + Object[] consts, String[] names, + // Mapping frame offsets to information + CPythonLayout layout, + // Parameter navigation with varnames + int argcount, int posonlyargcount, int kwonlyargcount, + // Needed to support execution + int stacksize, byte[] exceptiontable) { + + // Most of the arguments are applicable to any PyCode + super(filename, name, qualname, flags, // + firstlineno, // + consts, names, // + argcount, posonlyargcount, kwonlyargcount); + + // A few are CPython-specific (tentatively these). + this.layout = layout; + this.wordcode = wordcode; + this.linetable = linetable; + this.stacksize = stacksize; + this.exceptiontable = exceptiontable; + } + + /** + * Essentially equivalent to the (strongly-typed) constructor, but + * accepting {@code Object} arguments, which are checked for type + * here. This is primarily designed for use by the {@code marshal} + * module. + *

+ * The {@link #traits} of the code are supplied here as CPython + * reports them: as a bitmap in an integer, but the constructor + * makes a conversion, and it is the {@link #traits} which should be + * used at the Java level. + *

+ * Where the parameters map directly to an attribute of the code + * object, that is the best way to explain them. Note that this + * factory method is tuned to the needs of {@code marshal.read} + * where the serialised form makes no secret of the version-specific + * implementation details. + * + * @param filename ({@code str}) = {@code co_filename} + * @param name ({@code str}) = {@code co_name} + * @param qualname ({@code str}) = {@code co_qualname} + * @param flags ({@code int}) = @code co_flags} a bitmap of traits + * + * @param bytecode ({@code bytes}) = {@code co_code} + * @param firstlineno ({@code int}) = {@code co_firstlineno} + * @param linetable ({@code bytes}) = {@code co_linetable} + * + * @param consts ({@code tuple}) = {@code co_consts} + * @param names ({@code tuple[str]}) = {@code co_names} + * + * @param localsplusnames ({@code tuple[str]}) variable names + * @param localspluskinds ({@code bytes}) variable kinds + * @param argcount ({@code int}) = {@code co_argcount} + * @param posonlyargcount ({@code int}) = {@code co_posonlyargcount} + * @param kwonlyargcount ({@code int}) = {@code co_kwonlyargcount} + * @param stacksize ({@code int}) = {@code co_stacksize} + * @param exceptiontable ({@code tuple}) supports exception + * processing + * @return a new code object + */ + // Compare CPython _PyCode_New in codeobject.c + public static CPython311Code create( // + // Grouped as _PyCodeConstructor in pycore_code.h + // Metadata + Object filename, Object name, Object qualname, int flags, + // The code + Object bytecode, int firstlineno, Object linetable, + // Used by the code + Object consts, Object names, + // Mapping frame offsets to information + Object localsplusnames, Object localspluskinds, + // For navigation within localsplus + int argcount, int posonlyargcount, int kwonlyargcount, + // Needed to support execution + int stacksize, Object exceptiontable) { + + // Order of checks and casts based on _PyCode_Validate FWIW + if (argcount < posonlyargcount || posonlyargcount < 0 || kwonlyargcount < 0) { + throw new ValueError("code: argument counts inconsistent"); + } + if (stacksize < 0) { throw new ValueError("code: bad stacksize"); } + if (flags < 0) { throw new ValueError("code: bad flags argument"); } + + PyBytes _bytecode = castBytes(bytecode, "bytecode"); + PyTuple _consts = castTuple(consts, "consts"); + String[] _names = names(names, "names"); + + // Compute a layout from localsplus* arrays + CPythonLayout _layout = + new CPythonLayout(localsplusnames, localspluskinds, totalargs(argcount, flags)); + + String _name = castString(name, "name"); + String _qualname = castString(qualname, "qualname"); + String _filename = castString(filename, "filename"); + + PyBytes _linetable = castBytes(linetable, "linetable"); + PyBytes _exceptiontable = castBytes(exceptiontable, "exceptiontable"); + + // Everything is the right type and size + return new CPython311Code(// + _filename, _name, _qualname, flags, // + wordcode(_bytecode), firstlineno, _linetable.asByteArray(), // + _consts.toArray(), _names, // + _layout, // + argcount, posonlyargcount, kwonlyargcount, // + stacksize, _exceptiontable.asByteArray()); + } + + // Attributes ----------------------------------------------------- + + @Override + int co_stacksize() { return stacksize; } + + @Override + PyBytes co_code() { + ByteArrayBuilder builder = new ByteArrayBuilder(2 * wordcode.length); + for (short opword : wordcode) { + // Opcode is high byte and goes first in byte code + builder.append(opword >> 8).append(opword); + } + return new PyBytes(builder); + } + + // Java API ------------------------------------------------------- + + /** + * Create a {@code PyFunction} that will execute this {@code PyCode} + * (adequate for module-level code). + * + * @param interpreter providing the module context + * @param globals name space to treat as global variables + * @return the function + */ + // Compare CPython PyFunction_NewWithQualName in funcobject.c + // ... with the interpreter required by architecture + @Override + CPython311Function createFunction(Interpreter interpreter, PyDict globals) { + return new CPython311Function(interpreter, this, globals); + } + + @Override + CPython311Function createFunction(Interpreter interpreter, PyDict globals, Object[] defaults, + PyDict kwdefaults, Object annotations, PyCell[] closure) { + return new CPython311Function(interpreter, this, globals, defaults, kwdefaults, annotations, + closure); + } + + /** + * Build an {@link ArgParser} to match the code object and given + * defaults. This is a call-back when constructing a + * {@code CPython311Function} from this {@code code} object and also + * when the code object of a function is replaced. The method + * ensures the parser reflects the variable names and the frame + * layout implied by the code object. The caller (the function + * definition) supplies the default values of arguments on return. + * + * @return parser reflecting the frame layout of this code object + */ + ArgParser buildParser() { + int regargcount = argcount + kwonlyargcount; + return new ArgParser(name, layout.localnames, regargcount, posonlyargcount, kwonlyargcount, + traits.contains(PyCode.Trait.VARARGS), traits.contains(PyCode.Trait.VARKEYWORDS)); + } + + @Override + CPythonLayout layout() { return layout; } + + /** + * Store information about the variables required by a + * {@link CPython311Code} object and where they will be stored in + * the frame it creates. + */ + final static class CPythonLayout implements Layout { + /** Count of {@code co_varnames} */ + final int nvarnames; + /** Count of {@code co_cellvars} */ + final int ncellvars; + /** Count of {@code co_freevars} */ + final int nfreevars; + /** + * Index of first cell (which may be a parameter). Cell variables do + * not in general form a contiguous block in the frame. + */ + private final int cell0; + /** + * Index of first free variable. Free variables form a contiguous + * block in the frame from this index. + */ + final int free0; + /** Names of all the variables in frame order. */ + private final String[] localnames; + /** Kinds of all the variables in frame order. */ + private final byte[] kinds; + + /** + * Construct a {@code Layout} based on a representation used + * internally by CPython that appears in the stream {@code marshal} + * writes, e.g. in a {@code .pyc} file. + * + * @param localsplusnames tuple of all the names + * @param localspluskinds bytes of kinds of variables + * @param nargs the number (leading) that are arguments + */ + CPythonLayout( + // Mapping frame offsets to information + Object localsplusnames, Object localspluskinds, + // For navigation within localsplus + int nargs) { + + PyTuple nameTuple = castTuple(localsplusnames, "localsplusnames"); + PyBytes kindBytes = castBytes(localspluskinds, "localspluskinds"); + + int n = nameTuple.size(); + this.localnames = new String[n]; + this.kinds = new byte[n]; + + if (kindBytes.size() != n) { + throw new ValueError(LENGTHS_UNEQUAL, kindBytes.size(), n); + } + + // Compute indexes into name arrays as we go + int nloc = 0, nfree = 0, ncell = 0, icell0 = -1; + + /* + * Step through the localsplus* variables saving the name and kind + * of each, and counting the different kinds. + */ + for (int i = 0; i < n; i++) { + + String s = PyUnicode.asString(nameTuple.get(i), + o -> Abstract.typeError(NAME_TUPLES_STRING, o, "localsplusnames")); + byte kindByte = kindBytes.get(i).byteValue(); + + if ((kindByte & CO_FAST_LOCAL) != 0) { + if ((kindByte & CO_FAST_CELL) != 0) { + // Argument referenced by nested scope. + ncell += 1; + // Remember where this happens first. + if (icell0 < 0) { icell0 = i; } + } + nloc += 1; + } else if ((kindByte & CO_FAST_CELL) != 0) { + // Locally defined but referenced in nested scope. + ncell += 1; + } else if ((kindByte & CO_FAST_FREE) != 0) { + // Supplied from a containing scope. + nfree += 1; + } + localnames[i] = s; + kinds[i] = kindByte; + } + + // Cache the counts and cardinal points. + this.nvarnames = nloc; + this.ncellvars = ncell; + this.nfreevars = nfree; + // If icell0>=0 cell parameter seen, else first cell. + this.cell0 = icell0 >= 0 ? icell0 : n - nfree - ncell; + this.free0 = localnames.length - nfree; + } + + @Override + public int size() { return localnames.length; } + + @Override + public String name(int index) { return localnames[index]; } + + @Override + public EnumSet traits(int index) { + byte kindByte = kinds[index]; + + if ((kindByte & CO_FAST_LOCAL) != 0) { + if ((kindByte & CO_FAST_CELL) != 0) + // Argument referenced by nested scope + return EnumSet.of(VariableTrait.PLAIN, VariableTrait.CELL); + else + return EnumSet.of(VariableTrait.PLAIN); + } else if ((kindByte & CO_FAST_CELL) != 0) { + // Locally defined but referenced in nested scope + return EnumSet.of(VariableTrait.CELL); + } else { + // Supplied from a containing scope + assert (kindByte & CO_FAST_FREE) != 0; + return EnumSet.of(VariableTrait.FREE); + } + } + + @Override + public Stream localnames() { return Arrays.stream(localnames); } + + @Override + public Stream varnames() { + Spliterator s = spliterator(CO_FAST_LOCAL, nvarnames, 0); + return StreamSupport.stream(s, false); + } + + @Override + public Stream cellvars() { + Spliterator s = spliterator(CO_FAST_CELL, ncellvars, cell0); + return StreamSupport.stream(s, false); + } + + @Override + public Stream freevars() { + Spliterator s = + spliterator(CO_FAST_FREE, nfreevars, localnames.length - nfreevars); + return StreamSupport.stream(s, false); + } + + @Override + public int nvarnames() { return nvarnames; } + + /** @return the length of {@code co_cellvars} */ + @Override + public int ncellvars() { return ncellvars; } + + /** @return the length of {@code co_freevars} */ + @Override + public int nfreevars() { return nfreevars; } + + /** + * A {@code Spliterator} of local variable names of the kind + * indicated in the mask. The caller must specify where to start + * looking in the list and how many names there ought to be. + * + * @param mask single bit kind + * @param count how many of that kind + * @param start to start looking + * @return a spliterator of the names + */ + private Spliterator spliterator(final int mask, final int count, int start) { + return new Spliterator() { + private int i = start, remaining = count; + + @Override + public boolean tryAdvance(Consumer action) { + if (remaining > 0) { + while ((kinds[i++] & mask) == 0) {} // nothing + action.accept(localnames[i - 1]); + remaining -= 1; + return true; + } else + return false; + } + + @Override + public Spliterator trySplit() { return null; } + + @Override + public long estimateSize() { return count; } + + @Override + public int characteristics() { return ORDERED | SIZED | IMMUTABLE; } + }; + } + } + + // Plumbing ------------------------------------------------------- + + private static final String NAME_TUPLES_STRING = + "name tuple must contain only strings, not '%s' (in %s)"; + private static final String LENGTHS_UNEQUAL = + "lengths unequal localspluskinds(%d) _localsplusnames(%d)"; + + private static final int CO_FAST_LOCAL = 0x20, CO_FAST_CELL = 0x40, CO_FAST_FREE = 0x80; + + /** + * Convert the contents of a Python {@code bytes} to 16-bit word + * code as expected by the eval-loop in {@link CPython311Frame}. + * + * @param bytecode as compiled by Python as bytes + * @return 16-bit word code + */ + private static short[] wordcode(PyBytes bytecode) { + ShortBuffer wordbuf = bytecode.getNIOByteBuffer().asShortBuffer(); + final int len = wordbuf.remaining(); + short[] code = new short[len]; + wordbuf.get(code, 0, len); + return code; + } +} diff --git a/core/src/main/java/org/python/core/CPython311Frame.java b/core/src/main/java/org/python/core/CPython311Frame.java new file mode 100644 index 000000000..526a0d1d7 --- /dev/null +++ b/core/src/main/java/org/python/core/CPython311Frame.java @@ -0,0 +1,841 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.util.EnumSet; +import java.util.Map; + +import org.python.base.InterpreterError; +import org.python.core.PyCode.Layout; +import org.python.core.PyCode.Trait; +import org.python.core.PyDict.MergeMode; + +/** A {@link PyFrame} for executing CPython 3.11 byte code. */ +class CPython311Frame extends PyFrame { + + /** + * All local variables, named in {@link Layout#localnames() + * code.layout.localnames}. + */ + final Object[] fastlocals; + + /** Value stack. */ + final Object[] valuestack; + + /** Index of first empty space on the value stack. */ + int stacktop = 0; + + /** Assigned eventually by return statement (or stays None). */ + Object returnValue = Py.None; + + /** + * The built-in objects from {@link #func}, wrapped (if necessary) + * to make it a {@code Map}. Inside the wrapper it will be accessed + * using the Python mapping protocol. + */ + private final Map builtins; + + /** + * Create a {@code CPython38Frame}, which is a {@code PyFrame} with + * the storage and mechanism to execute a module or isolated code + * object (compiled to a {@link CPython311Code}. + *

+ * This will set the {@link #func} and (sometimes) {@link #locals} + * fields of the frame. The {@code globals} and {@code builtins} + * properties, exposed to Python as {@code f_globals} and + * {@code f_builtins}, are determined by {@code func}. + *

+ * The func argument also locates the code object for the frame, the + * properties of which determine many characteristics of the frame. + *

    + *
  • If the {@code code} argument has the {@link Trait#NEWLOCALS} + * the {@code locals} argument is ignored. + *
      + *
    • If the code does not additionally have the trait + * {@link Trait#OPTIMIZED}, a new empty {@code dict} will be + * provided as {@link #locals}.
    • + *
    • Otherwise, the code has the trait {@code OPTIMIZED}, and + * {@link #locals} will be {@code null} until possibly set + * later.
    • + *
    + *
  • + *
  • Otherwise, {@code code} does not have the trait + * {@code NEWLOCALS} and expects an object with the map protocol to + * act as {@link PyFrame#locals}. + *
      + *
    • If the argument {@code locals} is not {@code null} it + * specifies {@link #locals}.
    • + *
    • Otherwise, the argument {@code locals} is {@code null} and + * {@link #locals} will be the same as {@code #globals}.
    • + *
    + *
  • + *
+ * + * @param func that this frame executes + * @param locals local name space (may be {@code null}) + */ + // Compare CPython _PyFrame_New_NoTrack in frameobject.c + protected CPython311Frame(CPython311Function func, Object locals) { + + // Initialise the basics. + super(func); + + CPython311Code code = func.code; + this.valuestack = new Object[code.stacksize]; + int nfast = 0; + + // The need for a dictionary of locals depends on the code + EnumSet traits = code.traits; + if (traits.contains(Trait.NEWLOCALS)) { + // Ignore locals argument + if (traits.contains(Trait.OPTIMIZED)) { + // We can create it later but probably won't need to + this.locals = null; + // Instead locals are in an array + nfast = code.layout.size(); + } else { + this.locals = new PyDict(); + } + } else if (locals == null) { + // Default to same as globals. + this.locals = func.globals; + } else { + /* + * Use supplied locals. As it may not implement j.u.Map, we wrap any + * Python object as a Map. Depending on the operations attempted, + * this may break later. + */ + this.locals = locals; + } + + // Locally present the func.__builtins__ as a Map + this.builtins = PyMapping.map(func.builtins); + + // Initialise local variables (plain and cell) + this.fastlocals = nfast > 0 ? new Object[nfast] : EMPTY_OBJECT_ARRAY; + // Free variables are initialised by opcode COPY_FREE_VARS + } + + @Override + Object eval() { + + // Evaluation stack and index + final Object[] s = valuestack; + int sp = stacktop; + + /* + * Because we use a word array, our ip is half the CPython ip. The + * latter, and all jump arguments, are always even, so we have to + * halve the jump distances or destinations. + */ + int ip = 0; + + /* + * We read each 16-bit instruction from wordcode[] into opword. Bits + * 8-15 are the opcode itself. The bottom 8 bits are an argument. + * (The oparg after an EXTENDED_ARG gets special treatment to + * produce the chaining of argument values.) + */ + final CPython311Code code = this.code; + int opword = code.wordcode[ip++] & 0xffff; + + // Opcode argument (where needed). + int oparg = opword & 0xff; + + // @formatter:off + // The structure of the interpreter loop is: + // while (ip <= END) { + // switch (opword >> 8) { + // case Opcode311.LOAD_CONST: + // s[sp++] = consts[oparg]; break; + // // other cases + // case Opcode311.RETURN_VALUE: + // returnValue = s[--sp]; break loop; + // case Opcode311.EXTENDED_ARG: + // opword = wordcode[ip++] & 0xffff; + // oparg = (oparg << 8) | opword & 0xff; + // continue; + // default: + // throw new InterpreterError("..."); + // } + // opword = wordcode[ip++] & 0xffff; + // oparg = opword & 0xff; + // } + // @formatter:on + + // Cached references from code + final String[] names = code.names; + final Object[] consts = code.consts; + final short[] wordcode = code.wordcode; + final int END = wordcode.length; + + final PyDict globals = func.globals; + assert globals != null; + + // Wrap locals (any type) as a minimal kind of Java map + Map locals = localsMapOrNull(); + + // Holds keyword names argument between KW_NAMES and CALL + PyTuple kwnames = null; + + loop: while (ip <= END) { + /* + * Here every so often, or maybe inside the try, and conditional on + * the opcode, CPython would have us check for asynchronous events + * that need handling. Some are not relevant to this implementation + * (GIL drop request). Some probably are. + */ + + // Comparison with CPython macros in c.eval: + // TOP() : s[sp-1] + // PEEK(n) : s[sp-n] + // POP() : s[--sp] + // PUSH(v) : s[sp++] = v + // SET_TOP(v) : s[sp-1] = v + // GETLOCAL(oparg) : fastlocals[oparg]; + // PyCell_GET(cell) : cell.get() + // PyCell_SET(cell, v) : cell.set(v) + + try { + // Interpret opcode + switch (opword >> 8) { + // Cases ordered as CPython to aid comparison + + case Opcode311.NOP: + case Opcode311.RESUME: + break; + + case Opcode311.LOAD_CONST: + s[sp++] = consts[oparg]; + break; + + case Opcode311.PUSH_NULL: + s[sp++] = null; + break; + + case Opcode311.UNARY_NEGATIVE: { + int top = sp - 1; + s[top] = PyNumber.negative(s[top]); + break; + } + + case Opcode311.UNARY_INVERT: { + int top = sp - 1; + s[top] = PyNumber.invert(s[top]); + break; + } + + case Opcode311.BINARY_SUBSCR: { + // w | v | -> | w[v] | + // -------^sp --------^sp + Object v = s[--sp]; + int top = sp - 1; + s[top] = PySequence.getItem(s[top], v); + ip += Opcode311.INLINE_CACHE_ENTRIES_BINARY_SUBSCR; + break; + } + + case Opcode311.STORE_SUBSCR: // w[v] = u + // u | w | v | -> | + // -----------^sp -^sp + sp -= 3; + // setItem(w, v, u) + PySequence.setItem(s[sp + 1], s[sp + 2], s[sp]); + ip += Opcode311.INLINE_CACHE_ENTRIES_STORE_SUBSCR; + break; + + case Opcode311.DELETE_SUBSCR: // del w[v] + // w | v | -> | + // -------^sp -^sp + sp -= 2; + // delItem(w, v) + PySequence.delItem(s[sp], s[sp + 1]); + break; + + case Opcode311.RETURN_VALUE: + returnValue = s[--sp]; // POP + break loop; + + case Opcode311.STORE_NAME: { + String name = names[oparg]; + try { + locals.put(name, s[--sp]); + } catch (NullPointerException npe) { + throw noLocals("storing", name); + } + break; + } + + case Opcode311.DELETE_NAME: { + String name = names[oparg]; + try { + locals.remove(name); + } catch (NullPointerException npe) { + throw noLocals("deleting", name); + } + break; + } + + case Opcode311.LOAD_NAME: { + // Resolve against locals, globals and builtins + String name = names[oparg]; + Object v; + try { + v = locals.get(name); + } catch (NullPointerException npe) { + throw noLocals("loading", name); + } + + if (v == null) { + v = globals.loadGlobal(builtins, name); + if (v == null) + throw new NameError(NAME_ERROR_MSG, name); + } + s[sp++] = v; // PUSH + break; + } + + case Opcode311.BUILD_TUPLE: + // w[0] | ... | w[oparg-1] | -> | tpl | + // -------------------------^sp -------^sp + // Group the N=oparg elements on the stack + // into a single tuple. + sp -= oparg; + s[sp] = new PyTuple(s, sp++, oparg); + break; + + case Opcode311.BUILD_LIST: + // w[0] | ... | w[oparg-1] | -> | lst | + // -------------------------^sp -------^sp + /* + * Group the N=oparg elements on the stack into a single list. + */ + sp -= oparg; + s[sp] = new PyList(s, sp++, oparg); + break; + + case Opcode311.LIST_EXTEND: { + Object iterable = s[--sp]; + PyList list = (PyList)s[sp - oparg]; + list.list_extend(iterable, + () -> Abstract.typeError(VALUE_AFTER_STAR, iterable)); + break; + } + + case Opcode311.BUILD_MAP: + // k1 | v1 | ... | kN | vN | -> | map | + // -------------------------^sp -------^sp + /* + * Build dictionary from the N=oparg key-value pairs on the stack in + * order. + */ + sp -= oparg * 2; + s[sp] = PyDict.fromKeyValuePairs(s, sp++, oparg); + break; + + case Opcode311.DICT_MERGE: { + // f | map | ... | v | -> | f | map | ... | + // -------------------^sp -----------------^sp + /* + * Update a dictionary from another map v on the stack. There are + * N=oparg arguments including v on the stack, but only v is merged. + * In practice N=1. The function f is only used as context in error + * messages. + */ + Object map = s[--sp]; + PyDict dict = (PyDict)s[sp - oparg]; + try { + dict.merge(map, MergeMode.UNIQUE); + } catch (AttributeError ae) { + throw kwargsTypeError(s[sp - (oparg + 2)], map); + } catch (KeyError.Duplicate ke) { + throw kwargsKeyError(ke, s[sp - (oparg + 2)]); + } + break; + } + + case Opcode311.LOAD_ATTR: { + // v | -> | v.name | + // ---^sp ----------^sp + int top = sp - 1; + s[top] = Abstract.getAttr(s[top], names[oparg]); + ip += Opcode311.INLINE_CACHE_ENTRIES_LOAD_ATTR; + break; + } + + case Opcode311.COMPARE_OP: { + // v | w | -> | op(v,w) | + // -------^sp -----------^sp + Object w = s[--sp]; // POP + int top = sp - 1; + Object v = s[top]; // TOP + s[top] = Comparison.from(oparg).apply(v, w); + ip += Opcode311.INLINE_CACHE_ENTRIES_COMPARE_OP; + break; + } + + case Opcode311.IS_OP: { + // v | w | -> | (v is w) ^ oparg | + // -------^sp --------------------^sp + Object w = s[--sp]; // POP + int top = sp - 1; + Object v = s[top]; // TOP + Comparison op = oparg == 0 ? Comparison.IS : Comparison.IS_NOT; + s[top] = op.apply(v, w); + break; + } + + case Opcode311.CONTAINS_OP: { + // v | w | -> | (v in w) ^ oparg | + // -------^sp --------------------^sp + Object w = s[--sp]; // POP + int top = sp - 1; + Object v = s[top]; // TOP + Comparison op = oparg == 0 ? Comparison.IN : Comparison.NOT_IN; + s[top] = op.apply(v, w); + break; + } + + case Opcode311.JUMP_FORWARD: + ip += oparg; + break; + + case Opcode311.JUMP_BACKWARD: { + ip -= oparg; + break; + } + + case Opcode311.POP_JUMP_BACKWARD_IF_FALSE: { + if (!Abstract.isTrue(s[--sp])) { ip -= oparg; } + break; + } + case Opcode311.POP_JUMP_FORWARD_IF_FALSE: { + if (!Abstract.isTrue(s[--sp])) { ip += oparg; } + break; + } + + case Opcode311.POP_JUMP_BACKWARD_IF_TRUE: { + if (Abstract.isTrue(s[--sp])) { ip -= oparg; } + break; + + } + + case Opcode311.POP_JUMP_FORWARD_IF_TRUE: { + if (Abstract.isTrue(s[--sp])) { ip += oparg; } + break; + + } + + case Opcode311.POP_JUMP_BACKWARD_IF_NOT_NONE: { + if (s[--sp] != Py.None) { ip -= oparg; } + break; + } + + case Opcode311.POP_JUMP_FORWARD_IF_NOT_NONE: { + if (s[--sp] != Py.None) { ip += oparg; } + break; + } + + case Opcode311.POP_JUMP_BACKWARD_IF_NONE: { + if (s[--sp] == Py.None) { ip -= oparg; } + break; + } + + case Opcode311.POP_JUMP_FORWARD_IF_NONE: { + if (s[--sp] == Py.None) { ip += oparg; } + break; + } + + case Opcode311.JUMP_IF_FALSE_OR_POP: { + Object v = s[--sp]; // POP + if (!Abstract.isTrue(v)) { + sp += 1; // UNPOP + ip += oparg; + } + break; + } + + case Opcode311.JUMP_IF_TRUE_OR_POP: { + Object v = s[--sp]; // POP + if (Abstract.isTrue(v)) { + sp += 1; // UNPOP + ip += oparg; + } + break; + } + + case Opcode311.JUMP_BACKWARD_NO_INTERRUPT: { + // Same as plain JUMP_BACKWARD for us + ip -= oparg; + break; + } + + case Opcode311.JUMP_BACKWARD_QUICK: { + // Same as plain JUMP_BACKWARD for us + ip -= oparg; + break; + } + + case Opcode311.LOAD_METHOD: + /* + * Emitted when compiling obj.meth(...). Works in tandem with CALL. + * If we can bypass temporary bound method: + */ + // obj | -> | desc | self | + // -----^sp ---------------^sp + // Otherwise almost conventional LOAD_ATTR: + // obj | -> | null | meth | + // -----^sp ---------------^sp + getMethod(s[--sp], names[oparg], sp); + sp += 2; + ip += Opcode311.INLINE_CACHE_ENTRIES_LOAD_METHOD; + break; + + case Opcode311.PRECALL: + /* + * CPython gains from recognising that a callable is actually a + * bound method, and so each call is includes a PUSH_NULL + * beforehand. PRECALL uses that space to un-bundle (if it can) the + * callable into an unbound callable and its 'self' argument. + * + * There is no proof this would help in Jython. It might, but we can + * safely make this a no-op and CALL will still do the right thing. + */ + ip += Opcode311.INLINE_CACHE_ENTRIES_PRECALL; + break; + + case Opcode311.KW_NAMES: + assert (kwnames == null); + assert PyTuple.TYPE.checkExact(consts[oparg]); + kwnames = (PyTuple)consts[oparg]; + break; + + case Opcode311.CALL: { + /* + * Works in tandem with LOAD_METHOD or PRECALL. If LOAD_METHOD + * bypassed the method binding or PRECALL un-bundled a bound object: + */ + // desc | self | arg[n] | -> | res | + // ----------------------^sp -------^sp + // Otherwise: + // null | meth | arg[n] | -> | res | + // ----------------------^sp -------^sp + // oparg = n + sp -= oparg + 2; + if (s[sp] != null) { + // We bypassed the method binding. Stack: + // desc | self | arg[n] | + // ^sp + // call desc(self, arg1 ... argN) + s[sp] = Callables.vectorcall(s[sp++], s, sp, oparg + 1, kwnames); + } else { + // meth is the bound method self.name + // null | meth | arg[n] | + // ^sp + // call meth(arg1 ... argN) + s[sp++] = Callables.vectorcall(s[sp], s, sp + 1, oparg, kwnames); + } + kwnames = null; + ip += Opcode311.INLINE_CACHE_ENTRIES_CALL; + break; + } + + case Opcode311.CALL_FUNCTION_EX: { + // Call with positional & kw args. Stack: + // f | args | kwdict? | -> res | + // --------------------^sp -----^sp + // oparg is 0 (no kwdict) or 1 (kwdict present) + Object w = (oparg & 0x1) == 0 ? null : s[--sp]; + Object v = s[--sp]; // args tuple + sp -= 1; + assert s[sp - 1] == null; // from PUSH_NULL + s[sp - 1] = Callables.callEx(s[sp], v, w); + break; + } + + case Opcode311.BINARY_OP: { + Object w = s[--sp]; // POP + int top = sp - 1; + Object v = s[top]; // TOP + s[top] = switch (oparg) { + default -> Py.NotImplemented; + case Opcode311.NB_ADD -> PyNumber.add(v, w); + case Opcode311.NB_AND -> PyNumber.and(v, w); + // case Opcode311.NB_FLOOR_DIVIDE -> PyNumber.FloorDivide(v, w); + // case Opcode311.NB_LSHIFT -> PyNumber.Lshift(v, w); + // case Opcode311.NB_MATRIX_MULTIPLY + // -> PyNumber.MatrixMultiply(v, w); + case Opcode311.NB_MULTIPLY -> PyNumber.multiply(v, w); + // case Opcode311.NB_REMAINDER -> PyNumber.Remainder(v, w); + case Opcode311.NB_OR -> PyNumber.or(v, w); + // case Opcode311.NB_POWER -> PyNumber.PowerNoMod(v, w); + // case Opcode311.NB_RSHIFT -> PyNumber.Rshift(v, w); + case Opcode311.NB_SUBTRACT -> PyNumber.subtract(v, w); + // case Opcode311.NB_TRUE_DIVIDE -> PyNumber.TrueDivide(v, w); + case Opcode311.NB_XOR -> PyNumber.xor(v, w); + // case Opcode311.NB_INPLACE_ADD -> PyNumber.InPlaceAdd(v, w); + // case Opcode311.NB_INPLACE_AND -> PyNumber.InPlaceAnd(v, w); + // case Opcode311.NB_INPLACE_FLOOR_DIVIDE + // -> PyNumber.InPlaceFloorDivide(v, w); + // case Opcode311.NB_INPLACE_LSHIFT -> PyNumber.InPlaceLshift(v, w); + // case Opcode311.NB_INPLACE_MATRIX_MULTIPLY + // -> PyNumber.InPlaceMatrixMultiply(v, w); + // case Opcode311.NB_INPLACE_MULTIPLY + // -> PyNumber.InPlaceMultiply(v, w); + // case Opcode311.NB_INPLACE_REMAINDER + // -> PyNumber.InPlaceRemainder(v, w); + // case Opcode311.NB_INPLACE_OR -> PyNumber.InPlaceOr(v, w); + // case Opcode311.NB_INPLACE_POWER + // -> PyNumber.InPlacePowerNoMod(v, w); + // case Opcode311.NB_INPLACE_RSHIFT -> PyNumber.InPlaceRshift(v, w); + // case Opcode311.NB_INPLACE_SUBTRACT + // -> PyNumber.InPlaceSubtract(v, w); + // case Opcode311.NB_INPLACE_TRUE_DIVIDE -> // + // PyNumber.InPlaceTrueDivide(v, w); + // case Opcode311.NB_INPLACE_XOR -> PyNumber.InPlaceXor(v, w); + }; + ip += Opcode311.INLINE_CACHE_ENTRIES_BINARY_OP; + break; + } + + case Opcode311.EXTENDED_ARG: + // Pick up the next instruction. + opword = wordcode[ip++] & 0xffff; + // The current oparg *prefixes* the next oparg, + // which could of course be another + // EXTENDED_ARG. (Trust me, it'll be fine.) + oparg = (oparg << 8) | opword & 0xff; + // This is *instead of* the post-switch fetch. + continue; + + default: + throw new InterpreterError("%s at ip: %d, unknown opcode: %d", + code.qualname, 2 * (ip - 1), opword >> 8); + } // switch + + /* + * Pick up the next instruction and argument. Because we use a word + * array, our ip is half the CPython ip. The latter, and all jump + * arguments, are always even, so we have to halve the jump + * distances or destinations. + */ + opword = wordcode[ip++] & 0xffff; + oparg = opword & 0xff; + + } catch (PyException pye) { + /* + * We ought here to check for exception handlers (defined in Python + * and reflected in the byte code) potentially resuming the loop + * with ip at the handler code, or in a Python finally clause. + */ + // Should handle within Python, but for now, stop. + System.err.println(pye); + throw pye; + } catch (InterpreterError | AssertionError ie) { + /* + * An InterpreterError signals an internal error, recognised by our + * implementation: stop. + */ + System.err.println(ie); + throw ie; + } catch (Throwable t) { + /* + * A non-Python exception signals an internal error, in our + * implementation, in user-supplied Java, or from a Java library + * misused from Python. + */ + // Should handle within Python, but for now, stop. + t.printStackTrace(); + throw new InterpreterError(t, "Non-PyException at ip: %d, opcode: %d", 2 * (ip - 1), + opword >> 8); + } + } // loop + + // ThreadState.get().swap(back); + return returnValue; + } + + // Supporting definitions and methods ----------------------------- + + private static final Object[] EMPTY_OBJECT_ARRAY = Py.EMPTY_ARRAY; + private static final String NAME_ERROR_MSG = "name '%.200s' is not defined"; + private static final String VALUE_AFTER_STAR = "Value after * must be an iterable, not %.200s"; + + /** + * A specialised version of {@code object.__getattribute__} + * specifically to support the {@code LOAD_METHOD} and + * {@code CALL_METHOD} opcode pair generated by the CPython byte + * code compiler. This method will place two entries in the stack at + * the offset given that are either: + *
    + *
  1. an unbound method and the object passed ({@code obj}), + * or
  2. + *
  3. {@code null} and a bound method object.
  4. + *
+ *

+ * The normal behaviour of {@code object.__getattribute__} is + * represented by case 2. + *

+ * Case 1 supports an optimisation that is possible when the type of + * the self object {@code obj} has not overridden + * {@code __getattribute__}, and the {@code name} resolves to a + * regular method in it. {@code CALL_METHOD} will detect and use + * this optimised form if the first element is not {@code null}. + * + * @param obj of which the callable is an attribute + * @param name of callable attribute + * @param offset in stack at which to place results + * @throws AttributeError if the named attribute does not exist + * @throws Throwable from other errors + */ + // Compare CPython _PyObject_GetMethod in object.c + private void getMethod(Object obj, String name, int offset) throws AttributeError, Throwable { + + PyType objType = PyType.of(obj); + + // If type(obj) defines its own __getattribute__ use that. + if (!objType.hasGenericGetAttr()) { + valuestack[offset] = null; + valuestack[offset + 1] = Abstract.getAttr(obj, name); + return; + } + + /* + * From here, the code is a version of the default attribute access + * mechanism PyBaseObject.__getattribute__ in which, if the look-up + * leads to a method descriptor, we avoid binding the descriptor + * into a short-lived bound method object. + */ + + MethodHandle descrGet = null; + boolean methFound = false; + + // Look up the name in the type (null if not found). + Object typeAttr = objType.lookup(name); + if (typeAttr != null) { + // Found in the type, it might be a descriptor + Operations typeAttrOps = Operations.of(typeAttr); + descrGet = typeAttrOps.op_get; + if (typeAttrOps.isMethodDescr()) { + /* + * We found a method descriptor, but will check the instance + * dictionary for a shadowing definition. + */ + methFound = true; + } else if (typeAttrOps.isDataDescr()) { + // typeAttr is a data descriptor so call its __get__. + try { + valuestack[offset] = null; + valuestack[offset + 1] = descrGet.invokeExact(typeAttr, obj, objType); + return; + } catch (Slot.EmptyException e) { + /* + * Only __set__ or __delete__ was defined. We do not catch + * AttributeError: it's definitive. Suppress trying __get__ again. + */ + descrGet = null; + } + } + } + + /* + * At this stage: typeAttr is the value from the type, or a non-data + * descriptor, or null if the attribute was not found. It's time to + * give the object instance dictionary a chance. + */ + if (obj instanceof DictPyObject) { + Map d = ((DictPyObject)obj).getDict(); + Object instanceAttr = d.get(name); + if (instanceAttr != null) { + // Found the callable in the instance dictionary. + valuestack[offset] = null; + valuestack[offset + 1] = instanceAttr; + return; + } + } + + /* + * The name wasn't in the instance dictionary (or there wasn't an + * instance dictionary). typeAttr is the result of look-up on the + * type: a value , a non-data descriptor, or null if the attribute + * was not found. + */ + if (methFound) { + /* + * typeAttr is a method descriptor and was not shadowed by an entry + * in the instance dictionary. + */ + valuestack[offset] = typeAttr; + valuestack[offset + 1] = obj; + return; + } else if (descrGet != null) { + // typeAttr may be a non-data descriptor: call __get__. + try { + valuestack[offset] = null; + valuestack[offset + 1] = descrGet.invokeExact(typeAttr, obj, objType); + return; + } catch (Slot.EmptyException e) {} + } + + if (typeAttr != null) { + /* + * The attribute obtained from the type, and that turned out not to + * be a descriptor, is the callable. + */ + valuestack[offset] = null; + valuestack[offset + 1] = typeAttr; + return; + } + + // All the look-ups and descriptors came to nothing :( + throw Abstract.noAttributeError(obj, name); + } + + /** + * Generate error to throw when we cannot access locals. + * + * @param action "loading", "storing" or "deleting" + * @param name variable name + * @return + */ + private static SystemError noLocals(String action, String name) { + return new SystemError("no locals found when %s '%s'", name); + } + + /** + * Create a {@link TypeError} to throw when keyword arguments appear + * not to be a mapping. {@code dict.merge} raises + * {@link AttributeError} (percolated from an attempt to get 'keys' + * attribute) if its second argument is not a mapping, which we + * convert to a {@link TypeError}. + * + * @param func providing a function name for context + * @param kwargs the alleged mapping + * @return an exception to throw + */ + // Compare CPython format_kwargs_error in ceval.c + private static TypeError kwargsTypeError(Object func, Object kwargs) { + String funcstr = PyObjectUtil.functionStr(func); + return Abstract.argumentTypeError(funcstr, "**", "a mapping", kwargs); + } + + /** + * Create a {@link TypeError} to throw when a duplicate key turns up + * while merging keyword arguments to a function call. + * + * @param ke the duplicate key error + * @param func providing a function name for context + * @return an exception to throw + */ + // Compare CPython format_kwargs_error in ceval.c + private static TypeError kwargsKeyError(KeyError.Duplicate ke, Object func) { + /* + * PyDict.merge raises KeyError.Duplicate (percolated from an + * attempt to assign an existing key), which we convert to a + * TypeError. + */ + String funcstr = PyObjectUtil.functionStr(func); + return new TypeError("%s got multiple values for keyword argument '%s'", funcstr, ke.key); + } +} diff --git a/core/src/main/java/org/python/core/CPython311Function.java b/core/src/main/java/org/python/core/CPython311Function.java new file mode 100644 index 000000000..6794d367d --- /dev/null +++ b/core/src/main/java/org/python/core/CPython311Function.java @@ -0,0 +1,61 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +/** A {@link PyFunction} defined in CPython 3.11 byte code. */ +class CPython311Function extends PyFunction { + + /** Argument parser matched to {@link #code}. */ + private ArgParser argParser; + + /** + * Create a Python {@code function} object defined in CPython 3.11 + * code (full-featured constructor). + * + * @param interpreter providing the module context + * @param code defining the function + * @param globals name space to treat as global variables + * @param defaults default positional argument values + * @param kwdefaults default keyword argument values + * @param annotations type annotations + * @param closure variable referenced but not defined here, must be + * the same size as code + */ + CPython311Function(Interpreter interpreter, CPython311Code code, PyDict globals, + Object[] defaults, PyDict kwdefaults, Object annotations, PyCell[] closure) { + super(interpreter, code, globals, defaults, kwdefaults, annotations, closure); + this.argParser = code.buildParser().defaults(defaults).kwdefaults(kwdefaults); + } + + /** + * Create a Python {@code function} object defined in CPython 3.11 + * code in a simplified form suitable to represent execution of a + * top-level module. + * + * @param interpreter providing the module context + * @param code defining the function + * @param globals name space to treat as global variables + */ + public CPython311Function(Interpreter interpreter, CPython311Code code, PyDict globals) { + this(interpreter, code, globals, null, null, null, null); + } + + @Override + CPython311Frame createFrame(Object locals) { return new CPython311Frame(this, locals); } + + // slot methods -------------------------------------------------- + + @Override + Object __call__(Object[] args, String[] names) throws Throwable { + + // Create a loose frame + CPython311Frame frame = createFrame(null); + + // Fill the local variables that are arguments + ArgParser.FrameWrapper wrapper = argParser.new ArrayFrameWrapper(frame.fastlocals); + argParser.parseToFrame(wrapper, args, names); + + // Run the function body + return frame.eval(); + } +} diff --git a/core/src/main/java/org/python/core/Callables.java b/core/src/main/java/org/python/core/Callables.java new file mode 100644 index 000000000..d904c4326 --- /dev/null +++ b/core/src/main/java/org/python/core/Callables.java @@ -0,0 +1,383 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.util.Arrays; +import java.util.Map; +import java.util.Map.Entry; + +/** Compare CPython {@code Objects/call.c}: {@code Py_Object_*}. */ +class Callables extends Abstract { + + private Callables() {} // only static methods here + + // XXX Could this be (String[]) null with advantages? + private static final String[] NO_KEYWORDS = new String[0]; + + /** + * Call an object with the standard {@code __call__} protocol, that + * is, with an array of all the arguments, those given by position, + * then those given by keyword, and an array of the keywords in the + * same order. Therefore {@code np = args.length - names.length} + * arguments are given by position, and the keyword arguments are + * {@code args[np:]} named by {@code names[:]}. + * + * @param callable target + * @param args all the arguments (position then keyword) + * @param names of the keyword arguments + * @return the return from the call to the object + * @throws TypeError if target is not callable + * @throws Throwable for errors raised in the function + */ + // Compare CPython PyObject_Call in call.c + // Note that CPython allows only exactly tuple and dict. + static Object call(Object callable, Object[] args, String[] names) throws TypeError, Throwable { + + // Speed up the common idiom: + // if (names == null || names.length == 0) ... + if (names != null && names.length == 0) { names = null; } + + if (callable instanceof FastCall) { + // Take the direct route since __call__ is immutable + FastCall fast = (FastCall)callable; + try { + return fast.call(args, names); + } catch (ArgumentError ae) { + // Demand a proper TypeError. + throw fast.typeError(ae, args, names); + } + } + + try { + // Call via the special method (slot function) + MethodHandle call = Operations.of(callable).op_call; + return call.invokeExact(callable, args, names); + } catch (Slot.EmptyException e) { + throw typeError(OBJECT_NOT_CALLABLE, callable); + } + } + + /** + * Call an object with the classic CPython call protocol, that is, + * with a tuple of arguments given by position and a dictionary of + * key-value pairs providing arguments given by keyword. + * + * @param callable target + * @param argTuple positional arguments + * @param kwDict keyword arguments + * @return the return from the call to the object + * @throws TypeError if target is not callable + * @throws Throwable for errors raised in the function + */ + // Compare CPython PyObject_Call in call.c + static Object call(Object callable, PyTuple argTuple, PyDict kwDict) + throws TypeError, Throwable { + + Object[] args; + String[] kwnames; + + if (kwDict == null || kwDict.isEmpty()) { + args = argTuple.toArray(); + kwnames = null; + + } else { + int n = argTuple.size(), m = kwDict.size(), i = 0; + args = argTuple.toArray(new Object[n + m]); + kwnames = new String[m]; + for (Map.Entry e : kwDict.entrySet()) { + Object name = e.getKey(); + kwnames[i++] = PyUnicode.asString(name, Callables::keywordTypeError); + args[n++] = e.getValue(); + } + } + + try { // XXX FastCall possible + /* + * In CPython, there are specific cases here that look for support + * for vector call and PyCFunction (would be PyJavaFunction) leading + * to PyVectorcall_Call or cfunction_call_varargs respectively on + * the args, kwargs arguments. + */ + MethodHandle call = Operations.of(callable).op_call; + return call.invokeExact(callable, args, kwnames); + } catch (Slot.EmptyException e) { + throw typeError(OBJECT_NOT_CALLABLE, callable); + } + } + + /** + * Call an object with the CPython call protocol as supported in the + * interpreter {@code CALL_FUNCTION_EX} opcode, that is, an argument + * tuple (or iterable) and keyword dictionary (or iterable of + * key-value pairs), which may be built by code at the opcode site. + * + * @param callable target + * @param args positional arguments + * @param kwargs keyword arguments + * @return the return from the call to the object + * @throws TypeError if target is not callable + * @throws Throwable for errors raised in the function + */ + // Compare CPython PyObject_Call in call.c + /* + * Note that CPython allows only exactly tuple and dict. (It deals + * with iterables within the opcode implementation.) + */ + static Object callEx(Object callable, Object args, Object kwargs) throws TypeError, Throwable { + + // Represent kwargs as a dict (if not already or null) + PyDict kw; + if (kwargs == null || kwargs instanceof PyDict) + kw = (PyDict)kwargs; + else { + // TODO: Treat kwargs as an iterable of (key,value) pairs + // Throw TypeError if not convertible + kw = Py.dict(); + // Check kwargs iterable, and correctly typed + // kwDict.update(Mapping.items(kwargs)); + } + + // Represent args as a PyTuple (if not already) + PyTuple ar; + if (args instanceof PyTuple) + ar = (PyTuple)args; + else { + // TODO: Treat args as an iterable of objects + // Throw TypeError if not convertible + ar = Py.tuple(); + // Construct PyTuple with whatever checks on values + // argTuple = Sequence.tuple(args); + } + + return call(callable, ar, kw); + } + + static final String OBJECT_NOT_CALLABLE = "'%.200s' object is not callable"; + static final String OBJECT_NOT_VECTORCALLABLE = "'%.200s' object does not support vectorcall"; + static final String ATTR_NOT_CALLABLE = "attribute of type '%.200s' is not callable"; + + /** + * Convert classic call arguments to an array and names of keywords + * to use in the CPython-style vector call. + * + * @param args positional arguments + * @param kwargs keyword arguments (normally {@code PyDict}) + * @param stack to receive positional and keyword arguments, must be + * sized {@code args.length + kwargs.size()}. + * @return names of keyword arguments + */ + // Compare CPython _PyStack_UnpackDict in call.c + static PyTuple unpackDict(Object[] args, Map kwargs, Object[] stack) + throws ArrayIndexOutOfBoundsException { + int nargs = args.length; + assert (kwargs != null); + assert (stack.length == nargs + kwargs.size()); + + System.arraycopy(args, 0, stack, 0, nargs); + + PyTuple.Builder kwnames = new PyTuple.Builder(kwargs.size()); + int j = nargs; + for (Entry e : kwargs.entrySet()) { + kwnames.append(e.getKey()); + stack[j++] = e.getValue(); + } + + return kwnames.take(); + } + + /** + * Call an object with the vector call protocol with some arguments + * given by keyword. This supports CPython byte code generated + * according to the conventions in PEP-590. Unlike its use in + * CPython, this is not likely to be faster than the standard + * {@link #call(Object, Object[], String[]) call} method. + * + * @see FastCall#vectorcall(Object[], int, int, String[]) + * + * @param callable target + * @param stack positional and keyword arguments + * @param start position of arguments in the array + * @param nargs number of positional and keyword arguments + * @param kwnames names of keyword arguments or {@code null} + * @return the return from the call to the object + * @throws TypeError if target is not callable + * @throws Throwable for errors raised in the function + */ + // Compare CPython _PyObject_Vectorcall in abstract.h + // In CPython nargs counts only positional arguments + static Object vectorcall(Object callable, Object[] stack, int start, int nargs, PyTuple kwnames) + throws Throwable { + String[] names = Callables.namesArray(kwnames); + if (callable instanceof FastCall) { + // Fast path recognising optimised callable + FastCall fast = (FastCall)callable; + try { + return fast.vectorcall(stack, start, nargs, names); + } catch (ArgumentError ae) { + // Demand a proper TypeError. + throw fast.typeError(ae, stack, start, nargs, names); + } + } + // Slow path by converting stack to ephemeral array + Object[] args = Arrays.copyOfRange(stack, start, start + nargs); + return call(callable, args, names); + } + + /** + * Call an object with the vector call protocol with no arguments + * given by keyword. This supports CPython byte code generated + * according to the conventions in PEP-590. Unlike its use in + * CPython, this is not likely to be faster than the standard + * {@link #call(Object, Object[], String[]) call} method. + * + * @see FastCall#vectorcall(Object[], int, int) + * + * @param callable target + * @param stack positional and keyword arguments (the stack) + * @param start position of arguments in the array + * @param nargs number of positional and keyword arguments + * @return the return from the call to the object + * @throws TypeError if target is not callable + * @throws Throwable for errors raised in the function + */ + // Compare CPython _PyObject_Vectorcall in abstract.h + // In CPython nargs counts only positional arguments + static Object vectorcall(Object callable, Object[] stack, int start, int nargs) + throws TypeError, Throwable { + if (callable instanceof FastCall) { + // Fast path recognising optimised callable + FastCall fast = (FastCall)callable; + try { + return fast.vectorcall(stack, start, nargs); + } catch (ArgumentError ae) { + // Demand a proper TypeError. + throw fast.typeError(ae, stack, start, nargs); + } + } + // Slow path by converting stack to ephemeral array + Object[] args = Arrays.copyOfRange(stack, start, start + nargs); + return call(callable, args, NO_KEYWORDS); + } + + /** + * Return a dictionary containing the last {@code len(kwnames)} + * elements of the slice {@code stack[start:start+nargs]}. This is a + * helper method to convert CPython vector calls (calls from a slice + * of an array, usually the stack) and involving keywords. + * {@code kwnames} normally contains only {@code str} objects, but + * that is not enforced here. + * + * @param stack positional and keyword arguments + * @param start position of arguments in the array + * @param nargs number of positional arguments + * @param kwnames tuple of names (may be {@code null} if empty) + * @return dictionary or {@code null} if {@code kwnames==null} + */ + // Compare CPython _PyStack_AsDict in call.c + static PyDict stackAsDict(Object[] stack, int start, int nargs, PyTuple kwnames) { + PyDict kwargs = null; + if (kwnames != null) { + kwargs = Py.dict(); + Object[] names = kwnames.value; + for (int i = 0, j = start + nargs; i < names.length; i++) + kwargs.put(names[i], stack[j++]); + } + return kwargs; + } + + /** + * Call an object with positional arguments supplied from Java as + * {@code Object}s. + * + * @param callable target + * @param args positional arguments + * @return the return from the call to the object + * @throws TypeError if target is not callable + * @throws Throwable for errors raised in the function + */ + // Compare CPython PyObject_CallFunctionObjArgs in call.c + static Object callFunction(Object callable, Object... args) throws Throwable { + return call(callable, args, NO_KEYWORDS); + } + + /** + * Call an object with no arguments. + * + * @param callable target + * @return the return from the call to the object + * @throws TypeError if target is not callable + * @throws Throwable for errors raised in the function + */ + // Compare CPython _PyObject_CallNoArg in abstract.h + // and _PyObject_Vectorcall in abstract.h + static Object call(Object callable) throws Throwable { + if (callable instanceof FastCall) { + // Take the short-cut. + FastCall fast = (FastCall)callable; + try { + return fast.call(); + } catch (ArgumentError ae) { + // Demand a proper TypeError. + throw fast.typeError(ae, Py.EMPTY_ARRAY); + } + } + // Fast call is not supported by the type. Make standard call. + return call(callable, Py.EMPTY_ARRAY, NO_KEYWORDS); + } + + /** + * Resolve a name within an object and then call it with the given + * positional arguments supplied from Java. + * + * @param obj target of the method invocation + * @param name identifying the method + * @param args positional arguments + * @return result of call + * @throws AttributeError if the named callable cannot be found + * @throws Throwable from the called method + */ + // Compare CPython _PyObject_CallMethodIdObjArgs in call.c + static Object callMethod(Object obj, String name, Object... args) + throws AttributeError, Throwable { + Object callable = getAttr(obj, name); + return callFunction(callable, args); + } + + /** + * Convert a {@code tuple} of names to an array of Java + * {@code String}. This is useful when converting CPython-style + * keyword names in a call to the array of (guaranteed) + * {@code String} which most of the implementation of call expects. + * + * @param kwnames (keyword) names to convert + * @return the names as an array + * @throws TypeError if any keyword is not a string + */ + static String[] namesArray(PyTuple kwnames) throws TypeError { + int n; + if (kwnames == null || (n = kwnames.size()) == 0) { + return NO_KEYWORDS; + } else { + String[] names = new String[n]; + for (int i = 0; i < n; i++) { + Object name = kwnames.get(i); + names[i] = PyUnicode.asString(name, Callables::keywordTypeError); + } + return names; + } + } + + /** + * Create a {@link TypeError} with a message along the lines + * "keywords must be strings, not 'X'" giving the type X of + * {@code name}. + * + * @param kwname actual object offered as a keyword + * @return exception to throw + */ + public static TypeError keywordTypeError(Object kwname) { + String fmt = "keywords must be strings, not '%.200s'"; + return new TypeError(fmt, PyType.of(kwname).getName()); + } +} diff --git a/core/src/main/java/org/python/core/ClassShorthand.java b/core/src/main/java/org/python/core/ClassShorthand.java new file mode 100644 index 000000000..e4adce4eb --- /dev/null +++ b/core/src/main/java/org/python/core/ClassShorthand.java @@ -0,0 +1,34 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +/** + * Some shorthands used to construct method signatures, + * {@code MethodType}s, etc.. + */ +interface ClassShorthand { + /** Shorthand for {@code Object.class}. */ + static final Class O = Object.class; + /** Shorthand for {@code Class.class}. */ + static final Class C = Class.class; + /** Shorthand for {@code String.class}. */ + static final Class S = String.class; + /** Shorthand for {@code int.class}. */ + static final Class I = int.class; + /** Shorthand for {@code boolean.class}. */ + static final Class B = boolean.class; + /** Shorthand for {@code PyType.class}. */ + static final Class T = PyType.class; + /** Shorthand for {@code void.class}. */ + static final Class V = void.class; + // ** Shorthand for {@code Comparison.class}. */ + // static final Class CMP = Comparison.class; + /** Shorthand for {@code PyTuple.class}. */ + static final Class TUPLE = PyTuple.class; + /// ** Shorthand for {@code PyDict.class}. */ + // static final Class DICT = PyDict.class; + /** Shorthand for {@code Object[].class}. */ + static final Class OA = Object[].class; + /** Shorthand for {@code String[].class}. */ + static final Class SA = String[].class; +} diff --git a/core/src/main/java/org/python/core/Clinic.java b/core/src/main/java/org/python/core/Clinic.java new file mode 100644 index 000000000..3684c84ad --- /dev/null +++ b/core/src/main/java/org/python/core/Clinic.java @@ -0,0 +1,226 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.invoke.MethodType; + +import org.python.base.InterpreterError; + +/** + * A collection of methods and {@code MethodHandle}s for converting + * arguments when calling a Java method from Python. The class enables + * the implementation of built-in (or extension) Python types to be + * written in a natural way using Java standard and primitive types. + *

+ * The class name refers to the CPython Argument Clinic (by Larry + * Hastings) which generates argument processing code for exposed + * methods defined in C and by a textual header. + */ +// Compare CPython *.c.h wrappers +class Clinic { + + /** Lookup for resolving handles throughout the class. */ + private static final Lookup LOOKUP = MethodHandles.lookup(); + + private static final Class O = Object.class; + + // Handles for converters from Python to Java types for args + private static final MethodHandle intArgMH; + private static final MethodHandle doubleArgMH; + private static final MethodHandle stringArgMH; + + // Handles for converters from Java types to Python for returns + private static final MethodHandle voidValueMH; + + private static final MethodHandle intValueMH; + private static final MethodHandle doubleValueMH; + private static final MethodHandle booleanValueMH; + + /** + * Helpers used to construct {@code MethodHandle}s for type + * conversion. + */ + static { + try { + intArgMH = LOOKUP.findStatic(PyLong.class, "asInt", + MethodType.methodType(int.class, O)); + doubleArgMH = LOOKUP.findStatic(PyFloat.class, "asDouble", + MethodType.methodType(double.class, O)); + stringArgMH = LOOKUP.findStatic(Clinic.class, "stringArg", + MethodType.methodType(String.class, O)); + + voidValueMH = MethodHandles.constant(O, Py.None); + + intValueMH = LOOKUP.findStatic(Integer.class, "valueOf", + MethodType.methodType(Integer.class, int.class)); + doubleValueMH = LOOKUP.findStatic(Double.class, "valueOf", + MethodType.methodType(Double.class, double.class)); + booleanValueMH = LOOKUP.findStatic(Boolean.class, "valueOf", + MethodType.methodType(Boolean.class, + boolean.class)); + + } catch (NoSuchMethodException | IllegalAccessException e) { + throw new InterpreterError(e, "during handle lookup"); + } + } + + private Clinic() {} // Oh no you don't + + /** + * Create an array of filters to convert an existing method handle, + * with the given type, to one that expects arguments (starting at a + * given index) that are all {@code Object} in the converted + * locations. The returned array is suitable as an argument to + * {@code MethodHandles.filterArguments}. (Some elements may be + * {@code null}, meaning no adapter is applied.) + *

+ * Where the existing method (described by {@code MethodType mt}) + * expects a primitive type, or a supported reference type (such as + * {@code BigInteger}), the handle to a standard conversion + * accepting an {@code Object} argument will be supplied. These + * conversions will throw a Python exception (often + * {@link TypeError}), when invoked on objects they cannot convert, + * according to the usual behaviour of Python. + *

+ * Where the existing method expects some other reference type, a + * {@code null} conversion will be supplied. If the reference type + * is {@code Object}, no problem arises. + *

+ * When using this filter to adapt a handle on a purported + * implementation of an exposed Python method, types {@code Clinic} + * cannot convert will remain unchanged in the {@code MethodType} of + * the adapted handle. Such a handle must be invoked with arguments + * of exactly matching static type. If (as is likely), in the + * invocation context, the arguments will all be statically + * {@code Object}, the adapted handle would lead to a Java + * {@code WrongMethodTypeException}. + * + * @param mt type to adapt. + * @param pos index in the type at which to start. + * @return array of filter-adaptors to expect {@code Object}. + */ + static MethodHandle[] argumentFilter(MethodType mt, int pos) { + final int n = mt.parameterCount() - pos; + MethodHandle[] filter = new MethodHandle[n]; + for (int p = 0; p < n; p++) { + Class pt = mt.parameterType(pos + p); + filter[p] = adaptParameterToObject(pt); + } + return filter; + } + + /** + * Equivalent to {@code argumentFilter(mt, 0)} + * + * @param mt type to adapt. + * @return array of filter-adaptors to expect {@code Object}. + */ + static MethodHandle[] argumentFilter(MethodType mt) { + return argumentFilter(mt, 0); + } + + /** + * Return a filter that will adapt an existing method handle with + * the given type, to one that the returns {@code Object} or a + * sub-class. If not {@code null}, The handle produced is suitable + * as an argument to {@code MethodHandle.filterReturnValue}. + *

+ * This adapter will often be a constructor for the implementation + * type or equivalent convenience method. If the return type of is + * {@code void.class}, the adapter takes no arguments and produces + * {@link Py#None}. . + *

+ * If the return type is already {@code Object} or a sub-class, this + * method returns {@code null} (which is not suitable as an argument + * to {@code MethodHandle.filterReturnValue}). Client code must test + * for this. + * + * @param mt type to adapt. + * @return {@code null} or a filter-adapter to return + * {@code Object}. + */ + static MethodHandle returnFilter(MethodType mt) { + return adaptReturnToObject(mt.returnType()); + } + + // Conversions to Java ------------------------------------------- + + /** + * The logic of this method defines the standard for converting + * Python types to a specified Java type. + * + * @param c Java type + * @return filter converting Python object to {@code c}. + */ + private static MethodHandle adaptParameterToObject(Class c) { + if (c.isPrimitive()) { + if (c == int.class) { + return Clinic.intArgMH; + } else if (c == double.class) { return Clinic.doubleArgMH; } + } else { + if (c == String.class) { + return Clinic.stringArgMH; + } else if (c == O) { + // The method expects a Object + return null; + } else if (O.isAssignableFrom(c)) { + // The method expects some sub-class of Object + return null; + } + } + throw new InterpreterError( + "Cannot convert Python object to Java %s", + c.getSimpleName()); + } + + /** + * @param o to convert + * @return Java {@code String} value of an object + * @throws TypeError if not interpretable as a string + * @throws Throwable from {@code o.__str__} if called + */ + @SuppressWarnings("unused") + private static String stringArg(Object o) + throws TypeError, Throwable { + Class c = o.getClass(); + if (c == PyUnicode.class) { + return ((PyUnicode) o).toString(); + } else { + return Abstract.str(o).toString(); + } + } + + // Conversions from Java ----------------------------------------- + + /** + * The logic of this method defines the standard for converting + * specified Java types to Python. + * + * @param c Java type + * @return filter converting {@code c} to a Python object. + */ + private static MethodHandle adaptReturnToObject(Class c) { + if (c.isPrimitive()) { + if (c == void.class) { + return Clinic.voidValueMH; + } else if (c == int.class) { + return Clinic.intValueMH; + } else if (c == double.class) { + return Clinic.doubleValueMH; + } else if (c == boolean.class) { + return Clinic.booleanValueMH; + } + } else { + // XXX Possibly special-case certain Java types + // The value is already some kind of Object + return null; + } + throw new InterpreterError( + "Cannot convert Java %s to Python object", + c.getSimpleName()); + } + +} diff --git a/core/src/main/java/org/python/core/Comparison.java b/core/src/main/java/org/python/core/Comparison.java new file mode 100644 index 000000000..50af4ceca --- /dev/null +++ b/core/src/main/java/org/python/core/Comparison.java @@ -0,0 +1,279 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; + +import org.python.core.Slot.EmptyException; + +/** + * Selects a particular "rich comparison" operation from the + * repertoire supported by {@code Opcode.COMPARE_OP}, the argument + * to which is the {@code code} attribute of the name in this + * {@code enum}. + * + * @apiNote The order matches CPython's enumeration of operations + * used in the argument to {@code COMPARE_OP}, so that we can + * rely on it in the CPython byte code interpreter. + */ +enum Comparison { + // Order and number must be reflected in swap[]. + + /** The {@code __lt__} operation. */ + LT("<", Slot.op_lt) { + + @Override + boolean toBool(int c) { return c < 0; } + }, + + /** The {@code __le__} operation. */ + LE("<=", Slot.op_le) { + + @Override + boolean toBool(int c) { return c <= 0; } + }, + + /** The {@code __eq__} operation. */ + EQ("==", Slot.op_eq) { + + @Override + boolean toBool(int c) { return c == 0; } + }, + + /** The {@code __ne__} operation. */ + NE("!=", Slot.op_ne) { + + @Override + boolean toBool(int c) { return c != 0; } + }, + + /** The {@code __gt__} operation. */ + GT(">", Slot.op_gt) { + + @Override + boolean toBool(int c) { return c > 0; } + }, + + /** The {@code __ge__} operation. */ + GE(">=", Slot.op_ge) { + + @Override + boolean toBool(int c) { return c >= 0; } + }, + + /** + * The {@code in} operation (reflected {@code __contains__}). Note + * that "{@code v in seq}" compiles to

+     *    LOAD_NAME    0 (v)
+     *    LOAD_NAME    1 (seq)
+     *    COMPARE_OP   6 (in)
+     * 
which must lead to {@code seq.__contains__(v)}. + */ + IN("in", Slot.op_contains) { + + @Override + boolean toBool(int c) { return c >= 0; } + + @Override + Object apply(Object v, Object seq) throws Throwable { + Operations ops = Operations.of(seq); + try { + MethodHandle contains = slot.getSlot(ops); + return (boolean)contains.invokeExact(seq, v); + } catch (Slot.EmptyException e) { + throw new TypeError(NOT_CONTAINER, ops.type(seq).name); + } + } + }, + + /** + * The inverted {@code in} operation (reflected + * {@code __contains__}). + */ + NOT_IN("not in", Slot.op_contains) { + + @Override + boolean toBool(int c) { return c < 0; } + + @Override + Object apply(Object v, Object seq) throws Throwable { + Operations ops = Operations.of(seq); + try { + MethodHandle contains = slot.getSlot(ops); + return !(boolean)contains.invokeExact(seq, v); + } catch (Slot.EmptyException e) { + throw new TypeError(NOT_CONTAINER, ops.type(seq).name); + } + } + }, + + /** The identity operation. */ + IS("is") { + + @Override + boolean toBool(int c) { return c == 0; } + + @Override + Object apply(Object v, Object w) throws Throwable { return v == w; } + + }, + + /** The inverted identity operation. */ + IS_NOT("is not") { + + @Override + boolean toBool(int c) { return c != 0; } + + @Override + Object apply(Object v, Object w) throws Throwable { return v != w; } + }, + + /** The exception matching operation. */ + EXC_MATCH("matches") { + + @Override + boolean toBool(int c) { return c == 0; } + + @Override + Object apply(Object v, Object w) throws Throwable { + return Py.NotImplemented; // XXX implement me! + } + }, + + /** A dummy operation representing an invalid comparison. */ + BAD("?") { + + @Override + boolean toBool(int c) { return false; } + + @Override + Object apply(Object v, Object w) throws Throwable { return Py.NotImplemented; } + }; + + final String text; + final Slot slot; + + Comparison(String text, Slot slot) { + this.text = text; + this.slot = slot; + } + + Comparison(String text) { this(text, null); } + + /** + * The text corresponding to the value, e.g. "!=" for {@code NE}, + * "is not" for {@code IS_NOT}. Mostly for error messages. + * + * @return text corresponding + */ + @Override + public String toString() { return text; } + + /** + * Translate CPython {@link Opcode311#COMPARE_OP} opcode argument to + * Comparison constant. + * + * @param oparg opcode argument + * @return equivalent {@code Comparison} object + */ + static Comparison from(int oparg) { + return oparg >= 0 && oparg < from.length ? from[oparg] : BAD; + } + + private static final Comparison[] from = values(); + + /** + * The swapped version of this comparison, e.g. LT with GT. + * + * @return swapped version of this comparison + */ + Comparison swapped() { return swap[this.ordinal()]; } + + private static final Comparison[] swap = + {GT, GE, EQ, NE, LT, LE, BAD, BAD, IS, IS_NOT, BAD, BAD}; + + /** + * Translate a comparison result into the appropriate boolean, for + * example {@code GE.toBool(1)} is {@link Py#True}. For the the six + * operations LT to GE inclusive, this is typically wrapped onto a + * call to {@code Comparable.compareTo()}). For the others we assume + * c==0 indicates equality. + *

+ * Avoid the temptation to use the result of a subtraction here + * unless there is no possibility of overflow in the subtraction. + * + * @param c comparison result + * @return boolean equivalent for this operation + */ + // Compare CPython object.h::Py_RETURN_RICHCOMPARE + abstract boolean toBool(int c); + + /** + * Perform this comparison, raising {@code TypeError} when the + * requested comparison operator is not supported. + * + * @param v left operand + * @param w right operand + * @return comparison result + * @throws Throwable from the implementation of the comparison + */ + // Compare CPython PyObject_RichCompare, do_richcompare in object.c + Object apply(Object v, Object w) throws Throwable { + Operations vOps = Operations.of(v); + PyType vType = vOps.type(v); + Operations wOps = Operations.of(w); + PyType wType = wOps.type(w); + Slot swappedSlot = null; + + // Try the swapped operation first if w is a sub-type of v + + if (vType != wType && wType.isSubTypeOf(vType)) { + swappedSlot = swapped().slot; + try { + Object r = swappedSlot.getSlot(wOps).invokeExact(w, v); + if (r != Py.NotImplemented) { return r; } + } catch (EmptyException e) {} + } + + // Try the forward operation + try { + Object r = slot.getSlot(vOps).invokeExact(v, w); + if (r != Py.NotImplemented) { return r; } + } catch (EmptyException e) {} + + // Try the swapped operation if we haven't already + if (swappedSlot == null) { + swappedSlot = swapped().slot; + try { + Object r = swappedSlot.getSlot(wOps).invokeExact(w, v); + if (r != Py.NotImplemented) { return r; } + } catch (EmptyException e) {} + } + + // Neither object implements this. Base == and != on identity. + switch (this) { + case EQ: + return v == w; + case NE: + return v != w; + default: + throw comparisonTypeError(v, w); + } + } + + /** + * Create a TypeError along the lines "OP not supported between + * instances of V and W" + * + * @param v left arg + * @param w right arg + * @return the exception + */ + PyException comparisonTypeError(Object v, Object w) { + return new TypeError(NOT_SUPPORTED, this, PyType.of(v).name, PyType.of(w).name); + } + + private static String NOT_SUPPORTED = + "'%s' not supported between instances of '%.100s' and '%.100s'"; + private static String NOT_CONTAINER = "'%.200s' object is not a container"; +} diff --git a/core/src/main/java/org/python/core/CraftedPyObject.java b/core/src/main/java/org/python/core/CraftedPyObject.java new file mode 100644 index 000000000..d7fbb912c --- /dev/null +++ b/core/src/main/java/org/python/core/CraftedPyObject.java @@ -0,0 +1,14 @@ +package org.python.core; + +/** + * All Python object implementations that we write ourselves implement this + * interface. + */ +public interface CraftedPyObject { + /** + * The Python {@code type} of this object. + * + * @return {@code type} of this object + */ + PyType getType(); +} diff --git a/core/src/main/java/org/python/core/DataDescriptor.java b/core/src/main/java/org/python/core/DataDescriptor.java new file mode 100644 index 000000000..db38caf90 --- /dev/null +++ b/core/src/main/java/org/python/core/DataDescriptor.java @@ -0,0 +1,152 @@ +package org.python.core; + +/** Base class of built-in data descriptors. */ +abstract class DataDescriptor extends Descriptor { + + /** + * Create the common part of {@code DataDescriptor} sub-classes. + * + * @param descrtype actual Python type of descriptor + * @param objclass to which the descriptor applies + * @param name of the attribute + */ + DataDescriptor(PyType descrtype, PyType objclass, String name) { + super(descrtype, objclass, name); + } + + /** + * The {@code __set__} special method of the Python descriptor + * protocol, implementing {@code obj.name = value}. In general, + * {@code obj} must be of type {@link #objclass}. + * + * @param obj object on which the attribute is sought + * @param value to assign (not {@code null}) + * @throws Throwable from the implementation of the setter + */ + // Compare CPython *_set methods in descrobject.c + abstract void __set__(Object obj, Object value) throws TypeError, Throwable; + + /** + * The {@code __delete__} special method of the Python descriptor + * protocol, implementing {@code del obj.name}. In general, + * {@code obj} must be of type {@link #objclass}. + * + * @param obj object on which the attribute is sought + * @throws Throwable from the implementation of the deleter + */ + // Compare CPython *_set in descrobject.c with NULL + abstract void __delete__(Object obj) throws TypeError, Throwable; + + /** + * {@code descr.__set__(obj, value)} has been called on this + * descriptor. We must check that the descriptor applies to the type + * of object supplied as the {@code obj} argument. From Python, + * anything could be presented, but when we operate on it, we'll be + * assuming the particular {@link #objclass} type. + * + * @param obj target object (argument to {@code __set__}) + * @throws TypeError if descriptor doesn't apply to {@code obj} + */ + // Compare CPython descr_setcheck in descrobject.c + protected void checkSet(Object obj) throws TypeError { + PyType objType = PyType.of(obj); + if (!objType.isSubTypeOf(objclass)) { + throw new TypeError(DESCRIPTOR_DOESNT_APPLY, name, objclass.name, objType.name); + } + } + + /** + * {@code descr.__delete__(obj)} has been called on this descriptor. + * We must check that the descriptor applies to the type of object + * supplied as the {@code obj} argument. From Python, anything could + * be presented, but when we operate on it, we'll be assuming the + * particular {@link #objclass} type. + * + * @param obj target object (argument to {@code __delete__}) + */ + // Compare CPython descr_setcheck in descrobject.c + protected void checkDelete(Object obj) throws TypeError { + PyType objType = PyType.of(obj); + if (!objType.isSubTypeOf(objclass)) { + throw new TypeError(DESCRIPTOR_DOESNT_APPLY, name, objclass.name, objType.name); + } + } + + /** + * Create an {@link AttributeError} with a message along the lines + * "attribute 'N' of 'T' objects is not readable" involving the name + * N of this attribute and the type T which is + * {@link Descriptor#objclass}. + * + * @return exception to throw + */ + protected AttributeError cannotReadAttr() { + String msg = "attribute '%.50s' of '%.100s' objects is not readable"; + return new AttributeError(msg, name, objclass.getName()); + } + + /** + * Create an {@link AttributeError} with a message along the lines + * "attribute 'N' of 'T' objects is not writable" involving the name + * N of this attribute and the type T which is + * {@link Descriptor#objclass}. + * + * @return exception to throw + */ + protected AttributeError cannotWriteAttr() { + String msg = "attribute '%.50s' of '%.100s' objects is not writable"; + return new AttributeError(msg, name, objclass.getName()); + } + + /** + * Create a {@link TypeError} with a message along the lines "cannot + * delete attribute N from 'T' objects" involving the name N of this + * attribute and the type T which is {@link Descriptor#objclass}, + * e.g. "cannot delete attribute f_trace_lines from + * 'frame' objects". + * + * @return exception to throw + */ + protected TypeError cannotDeleteAttr() { + String msg = "cannot delete attribute %.50s from '%.100s' objects"; + return new TypeError(msg, name, objclass.getName()); + } + + /** + * Create a {@link TypeError} with a message along the lines "'N' + * must be T, not 'X' as received" involving the name N of the + * attribute, any descriptive phrase T and the type X of + * {@code value}, e.g. "'__dict__' must be a + * dictionary, not 'list' as received". + * + * @param kind expected kind of thing + * @param value provided to set this attribute in some object + * @return exception to throw + */ + protected TypeError attrMustBe(String kind, Object value) { + return Abstract.attrMustBe(name, kind, value); + } + + /** + * Create a {@link TypeError} with a message along the lines "'N' + * must be T, not 'X' as received" involving the name N of the + * attribute, a description T based on the expected Java class + * {@code attrClass}, and the type X of {@code value}, e.g. + * "'__dict__' must be a dictionary, not 'list' + * as received". + * + * @param attrClass expected kind of thing + * @param value provided to set this attribute in some object + * @return exception to throw + */ + protected TypeError attrMustBe(Class attrClass, Object value) { + String kind; + PyType pyType = PyType.fromClass(attrClass); + if (pyType.acceptedCount == 1) { + kind = String.format("'%.50s'", pyType.getName()); + } else { + kind = String.format("'%.50s' (as %.50s)", attrClass.getSimpleName()); + } + return Abstract.attrMustBe(name, kind, value); + } +} diff --git a/core/src/main/java/org/python/core/DerivedPyObject.java b/core/src/main/java/org/python/core/DerivedPyObject.java new file mode 100644 index 000000000..3f6016410 --- /dev/null +++ b/core/src/main/java/org/python/core/DerivedPyObject.java @@ -0,0 +1,7 @@ +package org.python.core; + +/** + * {@code DerivedPyObject} is a marker interface that identifies an object that + * is an instance of a class defined in Python. + */ +interface DerivedPyObject extends CraftedPyObject {} diff --git a/core/src/main/java/org/python/core/Descriptor.java b/core/src/main/java/org/python/core/Descriptor.java new file mode 100644 index 000000000..0ee9eee3e --- /dev/null +++ b/core/src/main/java/org/python/core/Descriptor.java @@ -0,0 +1,148 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import org.python.core.Slot.EmptyException; + +/** + * The base class of many built-in descriptors. Descriptors are a + * fundamental component of the Python type system, populating the + * dictionary of every type. + * + * @implNote It must be possible to create an instance of any concrete + * descriptor (a sub-class of this one) in circumstances where the + * only types in existence are {@link PyType#TYPE} and + * {@link PyType#OBJECT_TYPE}, and where these have not yet been + * given their descriptor attributes or operation slots + * ({@code op_*} slots}. + *

+ * In order to create a descriptor, the JVM need only complete the + * static initialisation of the Java class for that descriptor and + * be able to execute the constructor. + */ +abstract class Descriptor extends AbstractPyObject { + + protected static final String DESCRIPTOR_DOESNT_APPLY = + "descriptor '%s' for '%.100s' objects doesn't apply to a '%.100s' object"; + protected static final String DESCRIPTOR_NEEDS_ARGUMENT = + "descriptor '%s' of '%.100s' object needs an argument"; + protected static final String DESCRIPTOR_REQUIRES = + "descriptor '%s' requires a '%.100s' object but received a '%.100s'"; + /** Single re-used instance of {@link Slot.EmptyException} */ + protected static final EmptyException EMPTY = new EmptyException(); + + /** + * Python {@code type} that defines the attribute being described + * (e.g. for a method, the Python type of the object that will be + * "self" in a call). This is exposed to Python as + * {@code __objclass__}. + */ + // In CPython, called d_type + protected final PyType objclass; + + /** + * Name of the object described, e.g. "__add__" or "to_bytes". This + * is exposed to Python as {@code __name__}. + */ + // In CPython, called d_name + @Exposed.Member(value="__name__", readonly=true) + protected final String name; + + /** + * Qualified name of the object described, e.g. "float.__add__" or + * "int.to_bytes". This is exposed to Python as + * {@code __qualname__}. + */ + // In CPython, called d_qualname. Where used? Better computed? + protected String qualname = null; + + Descriptor(PyType descrtype, PyType objclass, String name) { + super(descrtype); + this.objclass = objclass; + this.name = name; + } + + /** + * The {@code __get__} special method of the Python descriptor + * protocol, implementing {@code obj.name} or possibly + * {@code type.name}. + * + * @apiNote Different descriptor types may have quite different + * behaviour. In general, a call made with {@code obj == null} + * is seeking a result related to the {@code type}, while in one + * where {@code obj != null}, {@code obj} must be of type + * {@link #objclass} and {@code type} will be ignored. + * @param obj object on which the attribute is sought or + * {@code null} + * @param type on which this descriptor was found (may be ignored) + * @return attribute value, bound object or this attribute + * @throws Throwable from the implementation of the getter + */ + // Compare CPython *_get methods in descrobject.c + abstract Object __get__(Object obj, PyType type) throws Throwable; + + /** + * Helper for {@code __repr__} implementation. It formats together + * the {@code kind} argument ("member", "attribute", "method", or + * "slot wrapper"), {@code this.name} and + * {@code this.objclass.name}. + * + * @param kind description of type (first word in the repr) + * @return repr as a {@code str} + */ + protected String descrRepr(String kind) { + return String.format("<%s '%.50s' of '%.100s' objects>", kind, + name, objclass.name); + } + + /** + * {@code descr.__get__(obj, type)} has been called on this + * descriptor. We must check that the descriptor applies to the type + * of object supplied as the {@code obj} argument. From Python, + * anything could be presented, but when we operate on it, we'll be + * assuming the particular {@link #objclass} type. + * + * @param obj target object (non-null argument to {@code __get__}) + * @throws TypeError if descriptor doesn't apply to {@code obj} + */ + // Compare CPython descr_check in descrobject.c + /* + * We differ from CPython in that: 1. We either throw or return + * void: there is no FALSE->error or descriptor. 2. The test + * obj==null (implying found on a type) is the caller's job. 3. In a + * data descriptor, we fold the auditing into this check. + */ + protected void check(Object obj) throws TypeError { + PyType objType = PyType.of(obj); + if (!objType.isSubTypeOf(objclass)) { + throw new TypeError(DESCRIPTOR_DOESNT_APPLY, this.name, + objclass.name, objType.name); + } + } + + // Compare CPython calculate_qualname in descrobject.c + private String calculate_qualname() + throws AttributeError, Throwable { + Object type_qualname = + Abstract.getAttr(objclass, "__qualname__"); + if (type_qualname == null) + return null; + // XXX use PyUnicode.TYPE.check() + if (!(PyType.of(type_qualname).isSubTypeOf(PyUnicode.TYPE))) { + throw new TypeError( + ".__objclass__.__qualname__ is not a unicode object"); + } + return String.format("%s.%s", type_qualname, name); + } + + // Compare CPython descr_get_qualname in descrobject.c + static Object descr_get_qualname(Descriptor descr, Object ignored) + throws AttributeError, Throwable { + if (descr.qualname == null) + descr.qualname = descr.calculate_qualname(); + return descr.qualname; + } + + @Override + public String toString() { return Py.defaultToString(this); } +} diff --git a/core/src/main/java/org/python/core/DictPyObject.java b/core/src/main/java/org/python/core/DictPyObject.java new file mode 100644 index 000000000..185ba5d9d --- /dev/null +++ b/core/src/main/java/org/python/core/DictPyObject.java @@ -0,0 +1,20 @@ +package org.python.core; + +import java.util.Map; + +/** + * Python objects that have instance dictionaries implement this interface. + */ +public interface DictPyObject extends CraftedPyObject { + /** + * The dictionary of the instance, (not necessarily a Python {@code dict} or + * writable. If the returned {@code Map} is not writable, it should throw a Java + * {@code UnsupportedOperationException} on attempts to modify it. + * + * @implSpec A class that implements {@code PyObjectDict} should always return a + * mapping, which may be {@code Collections.emptyMap()} if the instance + * dictionary is intended to be permanently empty. + * @return a mapping to treat like a dictionary (not {@code null}). + */ + Map getDict(); +} diff --git a/core/src/main/java/org/python/core/EOFError.java b/core/src/main/java/org/python/core/EOFError.java new file mode 100644 index 000000000..2b7488a11 --- /dev/null +++ b/core/src/main/java/org/python/core/EOFError.java @@ -0,0 +1,30 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code EOFError} exception. */ +public class EOFError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type object of Python {@code EOFError} exceptions. */ + @SuppressWarnings("hiding") + public static final PyType TYPE = PyType + .fromSpec(new PyType.Spec("EOFError", MethodHandles.lookup()).base(PyException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected EOFError(PyType type, String msg, Object... args) { super(type, msg, args); } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public EOFError(String msg, Object... args) { this(TYPE, msg, args); } +} diff --git a/core/src/main/java/org/python/core/Exposed.java b/core/src/main/java/org/python/core/Exposed.java new file mode 100644 index 000000000..72c18e237 --- /dev/null +++ b/core/src/main/java/org/python/core/Exposed.java @@ -0,0 +1,397 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static java.lang.annotation.ElementType.FIELD; +import static java.lang.annotation.ElementType.METHOD; +import static java.lang.annotation.ElementType.PARAMETER; +import static java.lang.annotation.ElementType.TYPE; +import static java.lang.annotation.RetentionPolicy.RUNTIME; + +import java.lang.annotation.Documented; +import java.lang.annotation.Retention; +import java.lang.annotation.Target; + +/** + * Annotations that may be placed on elements of a Java class + * intended as the implementation of a Python type, and that the + * {@link Exposer} will look for when during the definition of a + * {@link PyType}. + */ +public interface Exposed { + + /** + * Identify a Python instance method of a type or module defined in + * Java and exposed to Python. The signature must be a supported + * type for which coercions can be found for its parameters. + *

+ * When found in the classes that define a built-in type, this + * annotation results in a method definition, then a descriptor in + * the dictionary of the type. When found in the class that defines + * a built-in module, this annotation results in a method definition + * in the module specification, and a bound method in the dictionary + * of each module instance created from it. + *

+ * Annotations may appear on the parameters of a method annotated + * with {@code PythonMethod}. These further describe the method, + * defining the parameters as positional-only parameters, or + * providing default values. A method may also be annotated with a + * documentation string (in the Python sense), by means of the + * @{@link DocString} annotation. + *

+ * In types that accept multiple implementations, more than one + * method of the same name may be annotated {@code PythonMethod}. + * Only one may be the primary definition (see + * {@link PythonMethod#primary()}, and only in that one are the + * documentation string and parameter annotations effective. (It + * need not be the first definition.) These annotations on the + * primary definition define the signature that Python sees. + */ + @Documented + @Retention(RUNTIME) + @Target(METHOD) + @interface PythonMethod { + + /** + * Exposed name of the method if different from the declaration. + * + * @return name of the method + */ + String value() default ""; + + /** + * The element {@code primary=false} is used to indicate that the + * annotated method is not the primary definition. + * + * @return {@code true} (the default) if and only if this is the + * primary definition of the method + */ + boolean primary() default true; + + /** + * The element {@code positionalOnly=false} is used to indicate that + * the arguments in a call to the annotated method may be provided + * by keyword. This provides the call with the semantics of a method + * defined in Python, where

+         * def g(a, b, c):
+         *     print(a, b, c)
+         * 
may be called as
+         * >>> g(b=2, c=3, a=1)
+         * 1 2 3
+         * >>> g(**dict(b=2, c=3, a=1))
+         * 1 2 3
+         * 
It is as if we had annotated an imaginary parameter before + * the first declared parameter (or {@code self}) with + * @{@link PositionalOnly}. + *

+ * The default {@code positional=true} is the more frequent case for + * built-in methods, although it is the opposite of the default for + * methods defined in Python where it would have to be expressed as + * {@code def g(a, b, c, /)}. + * + * @return {@code true} (the default) if and only if this is the + * primary definition of the method + */ + boolean positionalOnly() default true; + } + + /** + * Identify a Python static method of a type or module defined in + * Java and exposed to Python. The signature must be a supported + * type for which coercions can be found for its parameters. + *

+ * When found in the classes that define a built-in type, this + * annotation results in a method definition, then a + * {@code staticmethod} object in the dictionary of the type. When + * found in the class that defines a built-in module, this + * annotation results in a method definition in the module + * specification, and an unbound method in the dictionary of each + * module instance created from it. + *

+ * Annotations may appear on the parameters of a method annotated + * with {@code PythonStaticMethod}. These further describe the + * method, defining the parameters as positional-only parameters, or + * providing default values. A method may also be annotated with a + * documentation string (in the Python sense), by means of the + * @{@link DocString} annotation. + *

+ * Only one method of the given name, in a given class class, may be + * annotated as a {@code PythonStaticMethod}. + */ + @Documented + @Retention(RUNTIME) + @Target(METHOD) + @interface PythonStaticMethod { + + /** + * Exposed name of the function if different from the declaration. + * + * @return name of the function + */ + String value() default ""; + + /** + * The element {@code positionalOnly=false} is used to indicate that + * the arguments in a call to the annotated method may be provided + * by keyword. This provides the call with the semantics of a + * function defined in Python, where

+         * def g(a, b, c):
+         *     print(a, b, c)
+         * 
may be called as
+         * >>> g(b=2, c=3, a=1)
+         * 1 2 3
+         * >>> g(**dict(b=2, c=3, a=1))
+         * 1 2 3
+         * 
It is as if we had annotated an imaginary parameter before + * the first declared parameter (or {@code self}) with + * @{@link PositionalOnly}. + *

+ * The default {@code positional=true} is the more frequent case for + * built-in function, although it is the opposite of the default for + * methods defined in Python where it would have to be expressed as + * {@code def g(a, b, c, /)}. + * + * @return {@code true} (the default) if and only if this is the + * primary definition of the method + */ + boolean positionalOnly() default true; + } + + /** + * Specify the documentation string ({@code __doc__}) for a method, + * field, etc. defined in Java and exposed to Python. + */ + @Documented + @Retention(RUNTIME) + @Target({METHOD, FIELD, TYPE}) + @interface DocString { + String value(); + } + + /** + * Override the name of an parameter to a method defined in Java, as + * it will appear to Python (in generated signatures and error + * messages). It is preferable to use a name in Java that + * conventional for Python, and is only necessary to annotate one + * when the conventional name is impossible (e.g. "new"). + */ + @Documented + @Retention(RUNTIME) + @Target(PARAMETER) + @interface Name { + String value(); + } + + /** + * Declare that the annotated parameter is the last positional only + * parameter. This is equivalent to following it with ", /" in a + * Python signature. + */ + @Documented + @Retention(RUNTIME) + @Target(PARAMETER) + @interface PositionalOnly {} + + /** + * Declare that the annotated parameter is the first keyword only + * parameter. This is equivalent to preceding it with "*, " in a + * Python signature. + */ + @Documented + @Retention(RUNTIME) + @Target(PARAMETER) + @interface KeywordOnly {} + + /** + * Provide default value for the annotated parameter. This is + * equivalent to following it with "=" in a Python signature. + * Allowed values are a small subset of the Python literals, + * presented as a {@code String}: "None", an integer, or a + * single-quoted string. For example

+     *  PyList split(
+     *      @Default("None") Object sep,
+     *      @Default("-1") int maxsplit
+     *  )
+     * 
+ */ + @Documented + @Retention(RUNTIME) + @Target(PARAMETER) + @interface Default { + String value(); + } + + /** + * Declare that the annotated parameter is the collector for excess + * positional arguments. This is equivalent to preceding the name + * with "*" in a Python signature. The type must be {@link PyTuple}. + */ + @Documented + @Retention(RUNTIME) + @Target(PARAMETER) + @interface PositionalCollector {} + + /** + * Declare that the annotated parameter is the collector for excess + * keyword arguments. This is equivalent to preceding the name with + * "**" in a Python signature. The type must be {@link PyDict}. + */ + @Documented + @Retention(RUNTIME) + @Target(PARAMETER) + @interface KeywordCollector {} + + /** + * Identify a field of a Python object as an exposed attribute. Get, + * set and delete operations are provided automatically on a + * descriptor that will be entered in the dictionary of the type + * being defined. If the field is Java {@code final} it will be + * read-only. + *

+ * Some primitive types and {@code String} receive special support + * for conversion from Python objects. A field of type + * {@code Object} may easily be made a member and will then receive + * any Python object. + *

+ * The annotated field may have any Java reference type. In that + * case, an attempt to assign a Python object of the wrong Java type + * will raise a {@link TypeError}. This makes it possible to declare + * an attribute of a specific Python type. For example one enforce + * {@code tuple} values by declaring the field as a {@link PyTuple}. + * The field would also accept Python sub-classes of the attribute + * type, since they must be sub-classes in Java too. + *

+ * This approach creates a limitation where the corresponding Python + * type has multiple Java implementations not related by Java + * inheritance and is not specially provided for (like + * {@code String}). The set operation of the {@link Member} + * attribute will reject instances that have the intended Python + * type but non-matching Java type (with a confusing + * {@link TypeError} to boot). A writable attribute of that type + * should be implemented as {@code Object} or using explicit + * {@link Getter}, {@link Setter} and {@link Deleter} methods. + * + */ + @Documented + @Retention(RUNTIME) + @Target(FIELD) + @interface Member { + + /** + * Exposed name of the member if different from the field. + * + * @return name of the attribute + */ + String value() default ""; + + /** @return true if read-only. */ + boolean readonly() default false; + + /** + * A member may be {@code null} from Java or deleted from Python (if + * not read-only). In this condition: + *

    + *
  • for a member annotated with {@code optional=true}, attempts + * to {@code get} or {@code delete} the member will produce an + * {@link AttributeError}, until it is set again.
  • + *
  • where {@code optional=false} (default), a {@code get} will + * return {@code None} and {@code delete} will have no effect.
  • + *
+ * + * @return true if access following delete will raise an error + */ + boolean optional() default false; + } + + /** + * Identify a method as that to be called during a Python call to + * {@code __getattribute__} naming an exposed attribute. + *

+ * The signature must be {@code ()T} where {@code T} can be + * {@code Object} if the implementor has no reason to do otherwise. + * (One reason might be type safety when calling the same method + * from Java.) The annotated method is responsible for converting to + * {@code T} from however the attribute is represented internally to + * the type. + */ + @Documented + @Retention(RUNTIME) + @Target(METHOD) + @interface Getter { + + /** + * Exposed name of the attribute, if different from the Java method + * name. + * + * This name will relate the {@link Getter}, {@link Setter} and + * {@link Deleter} in a single descriptor. + * + * @return name of the attribute + */ + String value() default ""; + } + + /** + * Identify a method as that to be called during a Python call to + * {@code __setattr__} naming an exposed attribute. + *

+ * The signature must be {@code (T)V} where {@code T} is often + * {@code Object}. The annotated method is responsible for + * converting this to the form in which the attribute is represented + * internally to the type. If {@code T}is something more specific + * than {@code Object}, a cast occurs to this Java type during the + * descriptor call, which if it fails will raise a Python + * {@link TypeError}. + */ + @Documented + @Retention(RUNTIME) + @Target(METHOD) + @interface Setter { + + /** + * Exposed name of the attribute, if different from the Java method + * name. + * + * This name will relate the {@link Getter}, {@link Setter} and + * {@link Deleter} in a single descriptor. + * + * @return name of the attribute + */ + String value() default ""; + } + + /** + * Identify a method as that to be called during a Python call to + * {@code __delattr__} naming an exposed attribute. + *

+ * The signature must be {@code ()V}. + */ + @Documented + @Retention(RUNTIME) + @Target(METHOD) + @interface Deleter { + + /** + * Exposed name of the attribute, if different from the Java method + * name. + * + * This name will relate the {@link Getter}, {@link Setter} and + * {@link Deleter} in a single descriptor. + * + * @return name of the attribute + */ + String value() default ""; + } + + /** + * Documentation-only annotation reminding us that the defining + * class guarantees not to change the contents. If a new value + * is assigned, it will be a new array. It is therefore safe to take + * a reference to this array and treat it as frozen (e.g. to expose + * it as a tuple). There is no enforcement of this contract at run + * time. + */ + @Documented + @Target(FIELD) + @interface FrozenArray {} +} diff --git a/core/src/main/java/org/python/core/Exposer.java b/core/src/main/java/org/python/core/Exposer.java new file mode 100644 index 000000000..e1fb1e554 --- /dev/null +++ b/core/src/main/java/org/python/core/Exposer.java @@ -0,0 +1,1218 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.annotation.Annotation; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.reflect.Member; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.lang.reflect.Parameter; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Set; +import java.util.StringJoiner; +import java.util.TreeSet; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.Function; + +import org.python.base.InterpreterError; +import org.python.base.MethodKind; +import org.python.core.Exposed.Default; +import org.python.core.Exposed.DocString; +import org.python.core.Exposed.Getter; +import org.python.core.Exposed.KeywordCollector; +import org.python.core.Exposed.KeywordOnly; +import org.python.core.Exposed.Name; +import org.python.core.Exposed.PositionalCollector; +import org.python.core.Exposed.PositionalOnly; +import org.python.core.Exposed.PythonMethod; +import org.python.core.Exposed.PythonStaticMethod; +import org.python.core.ModuleDef.MethodDef; + +/** + * An object for tabulating the attributes of classes that define + * Python types or modules. + */ +abstract class Exposer { + + /** + * The index of intermediate descriptions by name, in which we may + * locate specifications already made or begun, using the name. + */ + final Map specs; + + /** + * The table of intermediate descriptions for methods (instance, + * static and class). They will become {@link MethodDef}s, and + * eventually either descriptors in a built-in object type or + * methods bound to instances of a module type. Every entry here is + * also a value in {@link #specs}. + */ + final Set methodSpecs; + + /** Construct the base with its table of entries. */ + protected Exposer() { + this.specs = new HashMap<>(); + this.methodSpecs = new TreeSet<>(); + } + + /** @return which {@link ScopeKind} of {@code Exposer} is this? */ + abstract ScopeKind kind(); + + /** + * On behalf of the given module defined in Java, build a + * description of the attributes discovered by introspection of the + * class provided. + *

+ * Attributes are identified by annotations. (See {@link Exposed}.) + * + * @param definingClass to introspect for members + * @return exposure result + * @throws InterpreterError on errors of definition + */ + static ModuleExposer exposeModule(Class definingClass) throws InterpreterError { + // Create an instance of Exposer to hold specs, type, etc. + ModuleExposer exposer = new ModuleExposer(); + // Let the exposer control the logic + exposer.expose(definingClass); + return exposer; + } + + /** + * On behalf of the given type defined in Java, build a description + * of the attributes discovered by introspection of the class (or + * classes) provided. + *

+ * Special methods are identified by their reserved name, while + * other attributes are identified by annotations. (See + * {@link Exposed}.) + *

+ * In those attributes that reference their defining Python type + * (descriptors), the {@code type} object will be referenced (as + * {@link Descriptor#objclass}). It is not otherwise accessed, since + * it is (necessarily) incomplete at this time. + * + * @param type to which these attributes apply + * @param definingClass to introspect for members + * @param methodClass additional class to introspect for members (or + * {@code null}) + * @return a type exposer able to deliver the attributes + * @throws InterpreterError on errors of definition + */ + static TypeExposer exposeType(PyType type, Class definingClass, Class methodClass) + throws InterpreterError { + + // Create an instance of Exposer to hold specs, type. + TypeExposer exposer = new TypeExposer(type); + + // Scan the defining class for definitions + exposer.expose(definingClass); + + // Scan the supplementary class for definitions + if (methodClass != null) { exposer.expose(methodClass); } + + // For each definition we found, add the attribute + return exposer; + } + + /** + * Add to {@link #specs}, definitions found in the given class and + * annotated for exposure. (Note that the that the override + * {@link TypeExposer#scanJavaMethods(Class)} also adds a method if + * it has a the name of a special method.) + * + * @param defsClass to introspect for definitions + * @throws InterpreterError on duplicates or unsupported types + */ + void scanJavaMethods(Class defsClass) throws InterpreterError { + // Iterate over methods looking for the relevant annotations + for (Class c : superClasses(defsClass)) { + for (Method m : c.getDeclaredMethods()) { + PythonMethod a = m.getDeclaredAnnotation(PythonMethod.class); + if (a != null) { addMethodSpec(m, a); } + } + } + } + + /** + * Walk down to a given class through all super-classes that might + * contain items to expose. We do not need to include classes not + * created with a knowledge of Jython. (This is why it doesn't start + * with {@code java.lang.Object}). + * + * @param c given ending class + * @return super-classes descending to {@code c} + */ + static Collection> superClasses(Class c) { + LinkedList> classes = new LinkedList<>(); + while (c != null && c != Object.class) { + classes.addFirst(c); + c = c.getSuperclass(); + } + return classes; + } + + /** + * Process an annotation that identifies a method of a Python type + * or module defined in Java as one to be exposed to Python, into a + * specification for a method descriptor, and add it to the table of + * specifications by name. + * + * @param anno annotation encountered + * @param meth method annotated + * @throws InterpreterError on duplicates or unsupported types + */ + void addMethodSpec(Method meth, PythonMethod anno) throws InterpreterError { + // For clarity, name lambda expressions for the actions + BiConsumer addMethod = + // Add method m to spec ms + (MethodSpec ms, Method m) -> { + ms.add(m, anno.primary(), anno.positionalOnly(), MethodKind.INSTANCE); + }; + Function cast = + // Test and cast a found Spec to MethodSpec + spec -> spec instanceof MethodSpec ? (MethodSpec)spec : null; + // Now use the generic create/update + addSpec(meth, anno.value(), cast, (String name) -> new MethodSpec(name, kind()), + ms -> methodSpecs.add(ms), addMethod); + } + + /** + * Process an annotation that identifies a method of a Python type + * or module defined in Java as one to be exposed to Python, into a + * specification for a method descriptor, and add it to the table of + * specifications by name. + * + * @param anno annotation encountered + * @param meth method annotated + * @throws InterpreterError on duplicates or unsupported types + */ + void addStaticMethodSpec(Method meth, PythonStaticMethod anno) throws InterpreterError { + // For clarity, name lambda expressions for the actions + BiConsumer addMethod = + // Add method m to spec ms + (StaticMethodSpec ms, + Method m) -> { ms.add(m, true, anno.positionalOnly(), MethodKind.STATIC); }; + Function cast = + // Test and cast a found Spec to StaticMethodSpec + spec -> spec instanceof StaticMethodSpec ? (StaticMethodSpec)spec : null; + // Now use the generic create/update + addSpec(meth, anno.value(), cast, (String name) -> new StaticMethodSpec(name, kind()), + ms -> methodSpecs.add(ms), addMethod); + } + + /** + * Create an exception with a message along the lines "'NAME', + * already exposed as SPEC, cannot be NEW_SPEC" where the + * place-holders are filled from the corresponding arguments (or + * their names or type names). + * + * @param name being defined + * @param member field or method annotated + * @param newSpec of the new entry apparently requested + * @param priorSpec of the inconsistent, existing entry + * @return the required error + */ + static InterpreterError duplicateError(String name, Member member, Spec newSpec, + Spec priorSpec) { + String memberName = member.getName(); + String memberString = memberName == name ? "" : " (called '" + memberName + "' in source)"; + String priorSpecType = priorSpec.annoClassName(); + String newSpecType = newSpec.annoClassName(); + if (priorSpecType.equals(newSpecType)) { newSpecType = "redefined"; } + return new InterpreterError(ALREADY_EXPOSED, name, memberString, priorSpecType, + newSpecType); + } + + private static final String ALREADY_EXPOSED = "'%s'%s, already exposed as %s, cannot be %s"; + + /** + * A helper that avoids repeating nearly the same code for adding + * each particular sub-class of {@link Spec} when a method is + * encountered. The implementation finds or creates a {@code Spec} + * by the given name or method name. It then adds this {@code Spec} + * to {@link #specs}. The caller provides a factory method, in case + * a new {@code Spec} is needed, a method for adding the Spec to a + * type-specific list, and a method for adding the method to the + * {@code Spec}. + * + * @param the type of {@link Spec} being added or added to. + * @param m the method being adding to the {@code MS} + * @param name specified in the annotation or {@code null} + * @param cast to the {@code MS} if possible or {@code null} + * @param makeSpec constructor for an {@code MS} + * @param addSpec function to add the {@code MS} to the proper list + * @param addMethod function to update the {@code MS} with a method + */ + void addSpec(Method m, String name, Function cast, // + Function makeSpec, // + Consumer addSpec, // + BiConsumer addMethod) { + + // The name is as annotated or the "natural" one + if (name == null || name.length() == 0) + name = m.getName(); + + // Find any existing definition + Spec spec = specs.get(name); + MS entry; + if (spec == null) { + // A new entry is needed + entry = makeSpec.apply(name); + specs.put(entry.name, entry); + addSpec.accept(entry); + addMethod.accept(entry, m); + } else if ((entry = cast.apply(spec)) != null) { + // Existing entry will be updated + addMethod.accept(entry, m); + } else { + /* + * Existing entry is not compatible, but make a loose entry on which + * to base the error message. + */ + entry = makeSpec.apply(name); + addMethod.accept(entry, m); + throw duplicateError(name, m, entry, spec); + } + } + + /** + * The base of classes that describe a named, built-in object, + * during the exposure process. Instances of {@code Exposer.Spec} + * are created, and added to a collection held by the exposer, as + * each definition is encountered in a defining class. The + * annotation and other factors determine the particular subclass of + * {@code Exposer.Spec} produced. + *

+ * In cases where more than one Java definition contributes to a + * single exposed attribute, {@code Spec}s are updated as successive + * definitions are encountered. + *

+ * When exposing attributes of a Python type, the actual object to + * be entered in a dictionary of a type or module is obtained by a + * call to {@link #asAttribute(PyType, Lookup)}. + */ + abstract static class Spec implements Comparable { + + /** The Python name of the method being defined. */ + final String name; + + /** The kind of scope (type or module) being defined. */ + final ScopeKind scopeKind; + + /** Documentation string for the (eventual) descriptor. */ + String doc = null; + + /** + * @param name of member + * @param scopeKind module or type? + */ + Spec(String name, ScopeKind scopeKind) { + this.name = name; + this.scopeKind = scopeKind; + } + + /** + * Create an attribute for the type being defined (suitable as an + * entry in its dictionary). + * + * @param objclass defining type + * @param lookup authorisation to access methods or fields + * @return attribute to add + * @throws InterpreterError on specification errors + */ + abstract Object asAttribute(PyType objclass, Lookup lookup) throws InterpreterError; + + /** @return the documentation string (or {@code null}) */ + String getDoc() { return doc; } + + /** + * Name the built-in being defined from a Java perspective, mostly + * for use in messages regarding errors in definition. + * + * @return the Java name + */ + abstract String getJavaName(); + + /** + * The class of annotation that creates a specification of this + * type. This is primarily for creating for error messages that + * direct the author of an exposed class to annotations being used + * incompatibly. {@code WrapperSpec}s return {@code null} as special + * methods are not identified by an annotation. + * + * @return type of thing exposed. + */ + abstract Class annoClass(); + + /** + * Check for a @{@link DocString} annotation, and set the + * document string (but only once).. + * + * @param method that may bear the annotation + * @throws InterpreterError if {@link #doc} is already set + */ + void maybeAddDoc(Method method) throws InterpreterError { + // There may be a @DocString annotation + DocString docAnno = method.getAnnotation(DocString.class); + if (docAnno != null) { + if (this.doc == null) { + this.doc = docAnno.value(); + } else { + throw new InterpreterError("%s %s documented twice", annoClass(), + getJavaName()); + } + } + } + + /** + * String version of the kind of specification this is, expressed as + * the the type of annotation that gave rise to it. + * + * @return annotation type name + */ + protected String annoClassName() { + Class ac = annoClass(); + if (ac == Annotation.class) { + // Special methods recognised by name, so no annotation + return "special method"; + } else if (ac == Getter.class) { + // Since could also be @Setter or @Deleter + return "get-set attribute"; + } else { + return ac.getSimpleName(); + } + } + + @Override + public int compareTo(Spec o) { return name.compareTo(o.name); } + + /** + * Check that a specification is complete and consistent before use. + * A specification is built incrementally, so certain problems are + * only detectable when it is supposed complete. + * + * @throws InterpreterError describing any problem detected + */ + public abstract void checkFormation() throws InterpreterError; + } + + /** + * A specialisation of {@link Spec} to describe, through one or more + * Java methods, a named, built-in method-like object, during the + * exposure process. + */ + static abstract class BaseMethodSpec extends Spec { + + /** Collects the methods declared (often just one). */ + final List methods; + + BaseMethodSpec(String name, ScopeKind scopeKind) { + super(name, scopeKind); + this.methods = new ArrayList<>(1); + } + + /** + * Add a method implementation to the collection. + * + * @param method to add to {@link #methods} + */ + void add(Method method) { methods.add(method); } + + /** @return a name designating the method */ + @Override + String getJavaName() { + StringBuilder b = new StringBuilder(64); + if (!methods.isEmpty()) { + // It shouldn't matter, but take the last added + Method method = methods.get(methods.size() - 1); + b.append(method.getDeclaringClass().getSimpleName()); + b.append('.'); + b.append(method.getName()); + } else { + // Take the name from the Spec instead + b.append(name); + } + return b.toString(); + } + + @Override + public String toString() { + return String.format("%s(%s[%d])", getClass().getSimpleName(), name, methods.size()); + } + + /** + * Insert a {@code MethodHandle h} into a list, such that every + * handle in the list, of which the first parameter type is + * assignable from the first parameter type of {@code h}, will + * appear after {@code h} in the list. If there are none such, + * {@code h} is added at the end. The resulting list is partially + * ordered, and has the property that, in a forward search for a + * handle applicable to a given class, the most specific match is + * found first. + * + * @param list to add h into + * @param h to insert/add + */ + protected static void addOrdered(LinkedList list, MethodHandle h) { + // Type of first parameter of h + Class c = h.type().parameterType(0); + // We'll scan until a more general type is found + ListIterator iter = list.listIterator(0); + while (iter.hasNext()) { + MethodHandle i = iter.next(); + Class d = i.type().parameterType(0); + if (d.isAssignableFrom(c)) { + /* + * d is more general than c (i is more general than h): back up and + * position just before i. + */ + iter.previous(); + break; + } + } + // Insert h where the iterator stopped. Could be the end. + iter.add(h); + } + + /** + * Convenience function to compose error when creating a descriptor + * or method definition, when the un-reflecting to a method handle + * fails. + * + * @param m method we were working on + * @param e what went wrong + * @return an exception to throw + */ + protected static InterpreterError cannotGetHandle(Method m, IllegalAccessException e) { + return new InterpreterError(e, CANNOT_GET_HANDLE, m.getName(), m.getDeclaringClass()); + } + + private static final String CANNOT_GET_HANDLE = "cannot get method handle for '%s' in '%s'"; + + /** + * Convenience function to compose error when creating a descriptor + * or method definition and the arguments of the method handle are + * unexpected number in type or number. + * + * @param type being exposed + * @param mh handle from reflected method + * @return an exception to throw + */ + protected InterpreterError methodSignatureError(PyType type, MethodHandle mh) { + return new InterpreterError(UNSUPPORTED_SIG, name, type.getName(), mh.type(), + annoClassName()); + } + + private static final String UNSUPPORTED_SIG = + "method %.50s in '%.50s' " + "has wrong signature %.100s for %.100s"; + } + + /** + * Specification in which we assemble information about a method in + * advance of creating a method descriptor or other callable. + *

+ * Objects described by this class are defined by a Java signature + * in which parameters may be annotated to modify their treatment by + * Python. An argument parser and a {@link MethodDef} will be + * created to specify that treatment. + */ + static abstract class CallableSpec extends BaseMethodSpec { + + /** + * Names of parameters not including the {@code self} of instance + * methods. (The names are the parameters to the method in the first + * call to {@link #add(Method)}). + */ + String[] parameterNames; + + /** + * The number of positional or keyword parameters, excluding the + * "collector" ({@code *args} and {@code **kwargs}) arguments. Its + * value is {@code Integer.MAX_VALUE} until the primary definition + * of the method has been encountered. + */ + int regargcount = Integer.MAX_VALUE; + + /** + * The number of positional-only arguments (after {@code self}). + * This must be specified in the method declaration marked as + * primary if more than one declaration of the same name is + * annotated {@link PythonMethod}. Its value is + * {@code Integer.MAX_VALUE} until the primary definition of the + * method has been encountered, after which it is somewhere between + * 0 and {@link #regargcount} inclusive. + */ + int posonlyargcount = Integer.MAX_VALUE; + + /** + * The number of keyword-only parameters. This is derived from the + * {@link KeywordOnly} annotation. If more than one declaration of + * the same name is annotated {@link PythonMethod}, it may only be + * specified in the method declaration marked as primary. + */ + int kwonlyargcount; + + /** + * Default values supplied on positional parameters (not just + * positional-only parameters), or {@code null}. + */ + Object[] defaults = null; + + /** + * Default values supplied on keyword-only parameters, or + * {@code null}. + */ + Map kwdefaults = null; + + /** + * Position of the excess positional collector in + * {@link #parameterNames} or {@code -1} if there isn't one. + */ + int varArgsIndex = -1; + + /** + * Position of the excess keywords collector in + * {@link #parameterNames} or {@code -1} if there isn't one. + */ + int varKeywordsIndex = -1; + + /** + * Argument parser constructed from the other arguments. (Cache for + * {@link #getParser()}.) + */ + ArgParser parser; + + /** Kind of method (from a Python perspective). */ + MethodKind methodKind; + + /** + * Create a description for a callable. + * + * @param name of method. + * @param scopeKind module or type? + */ + CallableSpec(String name, ScopeKind scopeKind) { super(name, scopeKind); } + + /** + * Check that {@link #processParameters(Method, boolean)} has been + * called for a primary definition. + */ + private boolean isDefined() { + return parameterNames != null && regargcount <= parameterNames.length; + } + + /** + * @return true if positional argument collector defined. + */ + private boolean hasVarArgs() { return varArgsIndex >= 0; } + + /** + * @return true if keyword argument collector defined. + */ + private boolean hasVarKeywords() { return varKeywordsIndex >= 0; } + + /** + * Get the argument parser belonging to this {@link CallableSpec}. + * The many attributes established by + * {@link #add(Method, boolean, boolean, MethodKind)}, and the + * parameters of the primary call, determine the attributes of this + * {@link CallableSpec}. + *

+ * After the processing the primary call, the method signature is + * known, and it is possible to create a parser. Before that, this + * method will return {@code null}. + * + * @return the parser + */ + ArgParser getParser() { + if (parser == null && parameterNames != null + && parameterNames.length >= posonlyargcount) { + parser = new ArgParser(name, scopeKind, methodKind, parameterNames, regargcount, + posonlyargcount, kwonlyargcount, varArgsIndex >= 0, varKeywordsIndex >= 0); + parser.defaults(defaults).kwdefaults(kwdefaults); + } + return parser; + } + + /** + * Produce a method definition from this specification that + * references a method handle on the (single) defining method and + * the parser created from this specification. This is used in the + * construction of a module defined in Java (a {@link ModuleDef}). + * + * @param lookup authorisation to access methods + * @return corresponding method definition + * @throws InterpreterError on lookup prohibited + */ + MethodDef getMethodDef(Lookup lookup) throws InterpreterError { + assert methods.size() == 1; + Method m = methods.get(0); + MethodHandle mh; + try { + mh = lookup.unreflect(m); + } catch (IllegalAccessException e) { + throw cannotGetHandle(m, e); + } + return new MethodDef(getParser(), mh); + } + + /** + * Add a method implementation. (A test that the signature is + * acceptable follows when we construct the {@link PyMethodDescr}.) + * + * @param method to add to {@link #methods} + * @param primary definition is the primary one + * @param positionalOnly this method begins with positional-only + * parameters + * @param methodKind instance, static or class? + */ + void add(Method method, boolean primary, boolean positionalOnly, MethodKind methodKind) + throws InterpreterError { + + // Check for defined static (in Java, not Python) + int modifiers = method.getModifiers(); + boolean javaStatic = (modifiers & Modifier.STATIC) != 0; + + int n = method.getParameterCount(); + + /* + * Depending on method kind, when Java static, the parameter list + * may have to omit the first declared parameter. + */ + boolean skipFirst; + if (methodKind == MethodKind.INSTANCE) { + /* + * In the default INSTANCE case, we are implementing a Python + * instance method. If the Java method is static, skip the first + * parameter "self". + */ + skipFirst = javaStatic; + + if (javaStatic && (n < 1 || method.getParameterTypes()[0].isPrimitive())) { + throw new InterpreterError(MUST_HAVE_SELF, getJavaName(), scopeKind.selfName); + } + + } else if (javaStatic) { + /* + * If CLASS, The (static) Java signature begins with a {@link + * PyType}, but if STATIC, the parameters from a Python perspective + * are as in the Java definition. + */ + skipFirst = methodKind == MethodKind.CLASS; + + } else { + /* + * These other cases can *only* be implemented as static in Java. + */ + throw new InterpreterError(MUST_BE_JAVA_STATIC, getJavaName(), + methodKind.toString().toLowerCase()); + } + + /* + * If not declared static in Java, the effective signature in Python + * must have a leading "self" (or "module") parameter not declared + * in the Method object. + */ + if (skipFirst) { n -= 1; } + + if (methods.isEmpty()) { + /* + * First method definition of this name. Allocate storage for + * parameter names. We shall store the names only if this is also + * the primary definition, (as well as the first), but will always + * check the number of parameters against this size. + */ + parameterNames = n == 0 ? NO_STRINGS : new String[n]; + + } else if (n != parameterNames.length) { + // Number of parameters differs. + throw new InterpreterError(FURTHER_DEF_ARGS, getJavaName(), n, + parameterNames.length); + } + + // Add to methods + super.add(method); + + if (primary) { + // Primary definition defines the signature + if (isDefined()) + throw new InterpreterError(ONE_PRIMARY, getJavaName()); + // Whether static, instance or class + this.methodKind = methodKind; + + /* + * If annotated positionalOnly=false, the method has no + * positional-only parameters. If not so annotated, then + * positionalOnly=true, and all parameters (after any "self") are + * positional-only, until a parameter annotated @PositionalOnly puts + * an end to that. + */ + if (!positionalOnly) { posonlyargcount = 0; } + + // There may be a @DocString annotation + DocString docAnno = method.getAnnotation(DocString.class); + if (docAnno != null) { doc = docAnno.value(); } + + /* + * Process the sequence of parameters and their annotations. + */ + processParameters(method, skipFirst); + + } else { + // This is not the primary definition + disallowAnnotation(method, DocString.class); + for (Parameter p : method.getParameters()) { disallowAnnotations(p); } + } + } + + @Override + public void checkFormation() throws InterpreterError { + if (methodKind == null) { + throw new InterpreterError(MUST_HAVE_PRIMARY, getJavaName()); + } + } + + /** Empty names array. */ + private static final String[] NO_STRINGS = new String[0]; + + private static final String FURTHER_DEF_ARGS = + "Further definition of '%s' has %d (not %d) arguments"; + + private static final String ONE_PRIMARY = + "All but one definition of '%s' should have " + "element primary=false"; + + private static final String MUST_HAVE_PRIMARY = + "A primary definition of '%s' must be given"; + + private static final String MUST_BE_JAVA_STATIC = + "The definition of '%s' should be Java static " + + "because it is a Python %s method"; + + private static final String MUST_HAVE_SELF = + "Instance method '%s' should have a '%s' parameter " + "because it is Java static"; + + /** + * Scan the parameters of the method being defined looking for + * annotations that determine the specification of the method as + * exposed to Python, and which are held temporarily by this + * {@code MethodSpecification}. + *

+ * Although the annotations do not all work in isolation, their + * effect may be summarised: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
AnnotationEffect on fields
@{@link Name}Renames the parameter where needed (e.g. we want to call it + * "new"). This, or the simple parameter name, appear in at the + * correct position in {@link #parameterNames}
@{@link Default}Provides the default value in {@link #defaults} or + * {@link #kwdefaults}.
@{@link PositionalOnly}Sets {@link #posonlyargcount} to that parameter.
@{@link KeywordOnly}Determines {@link #kwonlyargcount} from a count of this and + * the regular (non-collector) arguments following.
@{@link PositionalCollector}Designates the collector of excess arguments given by + * position. (Must follow all regular arguments.) Sets + * {@link #haveVarargs}.
@{@link KeywordCollector}Designates the collector of excess arguments given by + * keyword. (Must follow all regular arguments and any positional + * collector.) Sets {@link #haveVarkwargs}.
+ *

+ * When implementing a Python instance method by means of a static + * Java method, the Java signature begins with a parameter + * representing the instance operated on. Also, when defining a + * Python class method the Java method must be static and the Java + * signature begins with a {@link PyType} parameter representing the + * type operated on. + *

+ * In both these cases, the client code will ask to skip this first + * parameter when building the description. This is because, at the + * point the parser is used, the corresponding first argument is + * being carried separately from the argument list. (think of the + * way {@code str.replace} is processed, or {@code int.from_bytes}. + *

+ * A Python static method, Python instance method defined by a Java + * instance method, and a top-level function in a module, all have + * parameters exactly as named in the {@code Method}. Any instance + * implied by a non-static declaration ("self" or "module") remains + * implicit. + * + * @param method being defined + * @param skipFirst skip the first declared parameter + */ + private void processParameters(Method method, boolean skipFirst) { + /* + * This should have the same logic as ArgParser.fromSignature, + * except that in the absence of a @PositionalOnly annotation, the + * default is as supplied by the method annotation (already + * processed). Rather than "/" and "*" markers in the parameter + * sequence, we find annotations on the parameters themselves. + */ + + // Collect the names of the parameters here + ArrayList names = new ArrayList<>(); + + // Count regular (non-collector) parameters + int count = 0; + + // Collect the default values here + ArrayList posDefaults = null; + + // Indices of specific markers + int kwOnlyIndex = Integer.MAX_VALUE; + + /* + * Scan parameters, looking out for Name, Default, PositionalOnly, + * KeywordOnly, PositionalCollector and KeywordCollector + * annotations. + */ + Parameter[] pp = method.getParameters(); + + for (int ip = skipFirst ? 1 : 0; ip < pp.length; ip++) { + + // The parameter currently being processed + Parameter p = pp[ip]; + + // index of parameter in Python != ip, possibly + int i = names.size(); + + // Use a replacement Python name if annotated @Name + Name name = p.getAnnotation(Name.class); + String paramName = name == null ? p.getName() : name.value(); + names.add(paramName); + + // Pick up all the other annotations on p + PositionalOnly pos = p.getAnnotation(PositionalOnly.class); + KeywordOnly kwd = p.getAnnotation(KeywordOnly.class); + Default def = p.getAnnotation(Default.class); + PositionalCollector posColl = p.getAnnotation(PositionalCollector.class); + KeywordCollector kwColl = p.getAnnotation(KeywordCollector.class); + + // Disallow these on the same parameter + notUsedTogether(method, paramName, pos, kwd, posColl, kwColl); + notUsedTogether(method, paramName, def, posColl); + notUsedTogether(method, paramName, def, kwColl); + + /* + * We have eliminated the possibility of disallowed combinations of + * annotations, so we can process the parameter types as + * alternatives. + */ + if (pos != null) { + // p is the (last) @PositionalOnly parameter + posonlyargcount = i + 1; + + } else if (kwd != null && kwOnlyIndex == Integer.MAX_VALUE) { + // p is the (first) @KeywordOnly parameter + kwOnlyIndex = i; + + } else if (posColl != null) { + // p is the @PositionalCollector + varArgsIndex = i; + + } else if (kwColl != null) { + // p is the @KeywordCollector + varKeywordsIndex = i; + } + + /* + * Check for a default value @Default. The value is a String we must + * interpret to Python. + */ + if (def != null) { + /* + * We know p is not a *Collector parameter, but our actions depend + * on whether it is positional or keyword-only. + */ + if (i < kwOnlyIndex) { + // p is a positional parameter with a default + if (posDefaults == null) + posDefaults = new ArrayList<>(); + posDefaults.add(eval(def.value())); + } else { // i >= kwOnlyIndex + // p is a keyword-only parameter with a default + if (kwdefaults == null) + kwdefaults = new HashMap(); + kwdefaults.put(paramName, eval(def.value())); + } + + } else if (posDefaults != null && i < kwOnlyIndex) { + /* + * Once we have started collecting positional default values, all + * subsequent positional parameters must have a default. + */ + throw new InterpreterError(MISSING_DEFAULT, getJavaName(), paramName); + } + + /* + * Parameters not having *Collector annotations are "regular". Keep + * count of them, and check we have not yet defined either + * collector. + */ + if (kwColl == null) { + /* + * The parameter is a regular one or a collector of excess + * positional arguments. + */ + if (hasVarKeywords()) + // ... which comes after a keywords collector + throw new InterpreterError(FOLLOWS_KW_COLLECTOR, getJavaName(), paramName); + if (posColl == null) { + // The parameter is a regular one + if (hasVarArgs()) + // .. after a positional collector + throw new InterpreterError(FOLLOWS_POS_COLLECTOR, getJavaName(), + paramName); + // A regular one in the right place + count = i + 1; + } + } + } + + /* + * Some checks and assignments we can only do when we've seen all + * the parameters. + */ + regargcount = count; + posonlyargcount = Math.min(posonlyargcount, count); + kwonlyargcount = count - Math.min(kwOnlyIndex, count); + + if (posDefaults != null) { defaults = posDefaults.toArray(); } + + int n = names.size(); + assert n == parameterNames.length; + if (n > 0) { names.toArray(parameterNames); } + } + + private static final String PARAM = "'%s' parameter '%s' "; + private static final String MISSING_DEFAULT = PARAM + "missing default value"; + private static final String FOLLOWS_POS_COLLECTOR = + PARAM + "follows postional argument collector"; + private static final String FOLLOWS_KW_COLLECTOR = + PARAM + "follows keyword argument collector"; + private static final String ANNOTATIONS_TOGETHER = + PARAM + "annotations %s may not appear together"; + + /** + * Check that only one of the annotations (on a given parameter) is + * null. + * + * @param method within which parameter appears + * @param paramName its name + * @param anno the annotations to check + * @throws InterpreterError if more than one not {@code null}. + */ + private void notUsedTogether(Method method, String paramName, Annotation... anno) + throws InterpreterError { + // Is there a problem? + int count = 0; + for (Annotation a : anno) { if (a != null) { count++; } } + if (count > 1) { + // There is a problem: collect the details. + StringJoiner sj = new StringJoiner(","); + for (Annotation a : anno) { + String name = a.annotationType().getSimpleName(); + sj.add(name); + } + throw new InterpreterError(ANNOTATIONS_TOGETHER, getJavaName(), paramName, sj); + } + } + + /** + * Poor man's eval() specifically for default values in built-in + * methods. + */ + private static Object eval(String s) { + if (s == null || s.equals("None")) { + return Py.None; + } else if (s.matches(REGEX_INT)) { + // Small integer if we can; big if we can't + BigInteger b = new BigInteger(s); + try { + return b.intValueExact(); + } catch (ArithmeticException e) { + return b; + } + } else if (s.matches(REGEX_FLOAT)) { + return Float.valueOf(s); + } else if (s.matches(REGEX_STRING)) { + return Float.valueOf(s); + } else { + // A somewhat lazy fall-back + return s; + } + } + + private static String REGEX_INT = "-?\\d+"; + private static String REGEX_FLOAT = "[-+]?\\d+\\.\\d*((e|E)[-+]?\\d+)?"; + private static String REGEX_STRING = "('[~']*'|\"[~\"]*\")"; + + /** + * Check that the method has no annotation of the given type. + * + * @param method to process + * @parame annoClass type of annotation disallowed + */ + private void disallowAnnotation(Method method, Class annoClass) { + Annotation a = method.getAnnotation(annoClass); + if (a != null) { + String annoName = a.annotationType().getSimpleName(); + throw new InterpreterError(SECONDARY_DEF_ANNO, getJavaName(), annoName); + } + } + + private static final String SECONDARY_DEF_ANNO = + "Secondary definition of '%s' " + "has disallowed annotation '%s'"; + + /** + * Check that the parameter has no annotations @{@link Name}, + * @{@link PositionalOnly}, and @{@link KeywordOnly}. + * + * @param p to process + */ + private void disallowAnnotations(Parameter p) { + for (Class annoClass : DISALLOWED_PAR_ANNOS) { + Annotation a = p.getAnnotation(annoClass); + if (a != null) { + String annoName = a.annotationType().getSimpleName(); + throw new InterpreterError(SECONDARY_DEF_PAR_ANNO, getJavaName(), p.getName(), + annoName); + } + } + } + + /** + * Parameter annotations disallowed on a secondary definition. + */ + private static final List> // + DISALLOWED_PAR_ANNOS = + List.of(Name.class, PositionalOnly.class, KeywordOnly.class, Default.class); + + private static final String SECONDARY_DEF_PAR_ANNO = + "Secondary definition of '%s' parameter '%s' " + "has disallowed annotation '%s'"; + } + + /** + * Specification in which we assemble information about a Python + * instance method in advance of creating a method definition or + * method descriptor. + */ + static class MethodSpec extends CallableSpec { + + MethodSpec(String name, ScopeKind scopeKind) { super(name, scopeKind); } + + @Override + Class annoClass() { return PythonMethod.class; } + + /** + * {@inheritDoc} + *

+ * In a type, the attribute must be represented by a descriptor for + * the Python method from this specification. This method create a + * {@code PyMethodDescr} from the specification. + *

+ * Note that a specification describes the methods as declared, and + * that there may be any number of them, even if there is only one + * implementation of the target type. The specification may + * therefore have collected multiple Java definitions of the same + * name. + * + * This method creates a descriptor that matches them to the + * accepted implementations of the owning class. The descriptor + * returned will contain one method handle for each accepted Java + * implementation of the owning Python class, chosen most closely to + * match the Java class of {@code self}. + * + * @param objclass Python type that owns the descriptor + * @param lookup authorisation to access members + * @return descriptor for access to the method + * @throws InterpreterError if the method type is not supported + */ + @Override + PyMethodDescr asAttribute(PyType objclass, Lookup lookup) throws InterpreterError { + + ArgParser ap = new ArgParser(name, scopeKind, MethodKind.INSTANCE, parameterNames, + regargcount, posonlyargcount, kwonlyargcount, varArgsIndex >= 0, + varKeywordsIndex >= 0); + ap.defaults(defaults).kwdefaults(kwdefaults); + + // Methods have self + this many args: + final int L = regargcount; + + /* + * There could be any number of candidates in the implementation. An + * implementation method "self" could match multiple accepted + * implementations of the type (e.g. Number matching Long and + * Integer). + */ + LinkedList candidates = new LinkedList<>(); + for (Method m : methods) { + // Convert m to a handle (if L args and accessible) + try { + MethodHandle mh = lookup.unreflect(m); + if (mh.type().parameterCount() == 1 + L) + addOrdered(candidates, mh); + } catch (IllegalAccessException e) { + throw cannotGetHandle(m, e); + } + } + + return PyMethodDescr.fromParser(objclass, ap, candidates); + } + } + + /** + * Specification in which we assemble information about a Python + * static method in advance of creating a method definition + * {@link MethodDef} or method descriptor {@link PyMethodDescr}. + */ + static class StaticMethodSpec extends CallableSpec { + + StaticMethodSpec(String name, ScopeKind scopeKind) { super(name, scopeKind); } + + @Override + PyJavaFunction asAttribute(PyType objclass, Lookup lookup) { + // TODO Auto-generated method stub + return null; + } + + @Override + Class annoClass() { return PythonStaticMethod.class; } + } +} diff --git a/core/src/main/java/org/python/core/FastCall.java b/core/src/main/java/org/python/core/FastCall.java new file mode 100644 index 000000000..a17186645 --- /dev/null +++ b/core/src/main/java/org/python/core/FastCall.java @@ -0,0 +1,287 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.util.Arrays; + +/** + * Support direct calls from Java to the function represented by + * this object, potentially without constructing an argument array. + * Clients that know the number of arguments they provide are able + * to call {@code call(...)} with exactly that number. Callable + * objects that implement this interface may override signatures of + * {@code call(...)} that they implement most efficiently. + *

+ * This is an efficiency mechanism similar to the "fast call" paths + * in CPython. It may provide a basis for efficient call sites for + * function and method calling when argument lists are simple. + */ +interface FastCall { + + /** + * Invoke the target object with standard arguments + * ({@code Object[]} and {@code String[]}), providing all the + * argument values from the caller and names for those given by + * keyword. If no other methods are implemented, a call to any other + * interface method will land here with an array of the + * arguments.This is to provide implementations of {@code __call__} + * with a default when no more optimal call is possible. + *

+ * {@code np = args.length - names.length} arguments are given by + * position, and the keyword arguments are + * {{@code names[i]:args[np+i]}}. + * + * @implSpec An object that is a {@link FastCall} must support the + * standard call (and with the same result as the direct call). + * It must not call any other method in this interface, as that + * would risk creating a loop. + * @implNote The reason we do not name this method {@code __call__} + * is because that may be called directly from Python, and an + * object should have the chance to choose amongst the optimised + * implementations specified by this interface, finally + * resorting to {@link #call(Object[], String[])} if necessary. + * + * @param args all arguments given, positional then keyword + * @param names of keyword arguments or {@code null} + * @return result of the invocation + * @throws ArgumentError if the wrong number of arguments is given, + * or keywords where not expected. + * @throws Throwable from the implementation + */ + Object call(Object[] args, String[] names) throws ArgumentError, Throwable; + + /** + * Call the object with arguments given by position only. + * + * @implSpec The default implementation calls + * {@link #call(Object[], String[])} with a null array of names. + * + * @param args arguments given by position + * @return result of the invocation + * @throws ArgumentError if the wrong number of arguments is given. + * @throws Throwable from the implementation + */ + default Object call(Object[] args) throws ArgumentError, Throwable { + return call(args, null); + } + + /* The idea is to provide a series of specialisations e.g. */ + // Object call(arg0, arg1, arg2) { ... } + /* + * Implementations then override __call__(Object[], String[]), and + * all ones they can support efficiently, e.g. call(s), call(s, a), + * call(s, a, b) for an instance method with up to two arguments. + * Anything else is converted by a default implementation to + * call(Object[]). + */ + /** + * Call the object with arguments given by position only. + * + * @implSpec The default implementation calls + * {@link #call(Object[])} with an empty array. + * + * @return result of the invocation + * @throws ArgumentError if zero arguments is the wrong number. + * @throws Throwable from the implementation + */ + default Object call() throws ArgumentError, Throwable { return call(Py.EMPTY_ARRAY); } + + /** + * Call the object with arguments given by position only. + * + * @implSpec The default implementation calls + * {@link #call(Object[])} with an array the single argument. + * + * @param a0 single argument (may be {@code self}) + * @return result of the invocation + * @throws ArgumentError if one argument is the wrong number. + * @throws Throwable from the implementation + */ + default Object call(Object a0) throws ArgumentError, Throwable { + return call(new Object[] {a0}); + } + + /** + * Call the object with arguments given by position only. + * + * @implSpec The default implementation calls + * {@link #call(Object[])} with an array of the arguments. + * + * @param a0 zeroth argument (may be {@code self}) + * @param a1 next argument + * @return result of the invocation + * @throws ArgumentError if two arguments is the wrong number. + * @throws Throwable from the implementation + */ + default Object call(Object a0, Object a1) throws ArgumentError, Throwable { + return call(new Object[] {a0, a1}); + } + + /** + * Call the object with arguments given by position only. + * + * @implSpec The default implementation calls + * {@link #call(Object[])} with an array of the arguments. + * + * @param a0 zeroth argument (may be {@code self}) + * @param a1 next argument + * @param a2 next argument + * @return result of the invocation + * @throws ArgumentError if three arguments is the wrong number. + * @throws Throwable from the implementation + */ + default Object call(Object a0, Object a1, Object a2) throws ArgumentError, Throwable { + return call(new Object[] {a0, a1, a2}); + } + + /** + * Call the object with arguments given by position only. + * + * @implSpec The default implementation calls + * {@link #call(Object[])} with an array of the arguments. + * + * @param a0 zeroth argument (may be {@code self}) + * @param a1 next argument + * @param a2 next argument + * @param a3 next argument + * @throws ArgumentError if four arguments is the wrong number. + * @return result of the invocation + * @throws Throwable from the implementation + */ + default Object call(Object a0, Object a1, Object a2, Object a3) + throws ArgumentError, Throwable { + return call(new Object[] {a0, a1, a2, a3}); + } + + /** + * Call this object with the vector call protocol. This supports + * CPython byte code generated according to the conventions in + * PEP-590. + *

+ * The {@code stack} argument (which is often the interpreter stack) + * contains, at a given offset {@code start}, the {@code count} + * arguments of which the last {@code len(kw)} are given by keyword + * (and may therefore not be in the order expected by the called + * object). + * + * @param s positional and keyword arguments + * @param p position of arguments in the array + * @param n number of positional and keyword arguments + * @param names of keyword arguments or {@code null} + * @return the return from the call to the object + * @throws ArgumentError if the wrong number of arguments is given, + * or keywords where not expected. + * @throws TypeError if target is not callable + * @throws Throwable for errors raised in the function + */ + // Compare CPython _PyObject_Vectorcall in abstract.h + // In CPython nargs counts only positional arguments + default Object vectorcall(Object[] s, int p, int n, String[] names) + throws ArgumentError, Throwable { + if (names == null || names.length == 0) + return vectorcall(s, p, n); + else { + Object[] args = Arrays.copyOfRange(s, p, p + n); + return call(args, names); + } + } + + /** + * Call this object with the vector call protocol, in the case where + * no arguments were given by keyword. This supports CPython byte + * code generated according to the conventions in PEP-590, but + * specialised for this case. + *

+ * The {@code stack} argument (which is often the interpreter stack) + * contains, at a given offset {@code start}, the {@code count} + * arguments given by position. + * + * @param s positional and keyword arguments + * @param p position of arguments in the array + * @param n number of positional arguments + * @return the return from the call to the object + * @throws ArgumentError if the wrong number of arguments is given. + * @throws TypeError if target is not callable + * @throws Throwable for errors raised in the function + */ + // Compare CPython _PyObject_Vectorcall in abstract.h + // In CPython nargs counts only positional arguments + default Object vectorcall(Object[] s, int p, int n) throws ArgumentError, Throwable { + switch (n) { + case 0: + return call(); + case 1: + return call(s[p]); + case 2: + return call(s[p++], s[p]); + case 3: + return call(s[p++], s[p++], s[p]); + case 4: + return call(s[p++], s[p++], s[p++], s[p]); + default: + return call(Arrays.copyOfRange(s, p, p + n)); + } + } + + /** + * Translate an {@link ArgumentError} that resulted from a call to + * this {@code FastCall} object, and the arguments that were + * supplied in the call, to a Python {@link TypeError}. + *

+ * Any of the optimised {@code call(...)}, or + * {@code vectorcall(...)} methods in this interface may throw + * {@code ArgumentError} as a shorthand. (This is to keep code + * short, especially when it is a handle graph.) The caller should + * catch this close to the call and use this method to swap the + * {@code ArgumentError} for a Python {@code TypeError}. + * + * @param ae previously thrown by this object + * @param args all arguments given, positional then keyword + * @param names of keyword arguments or {@code null} + * @return Python {@code TypeError} to throw + */ + TypeError typeError(ArgumentError ae, Object[] args, String[] names); + + /** + * As {@link #typeError(ArgumentError, Object[], String[])} when + * there were no arguments by keyword. + * + * @param ae previously thrown by this object + * @param args all arguments given, positional then keyword + * @return Python {@code TypeError} to throw + */ + default TypeError typeError(ArgumentError ae, Object[] args) { + return typeError(ae, args, null); + } + + /** + * As {@link #typeError(ArgumentError, Object[], String[])} for + * {@link #vectorcall(Object[], int, int, String[])} arguments. + * + * @param ae previously thrown by this object + * @param s positional and keyword arguments + * @param p position of arguments in the array + * @param n number of positional and keyword arguments + * @param names of keyword arguments or {@code null} + * @return Python {@code TypeError} to throw + */ + default TypeError typeError(ArgumentError ae, Object[] s, int p, int n, String[] names) { + Object[] args = Arrays.copyOfRange(s, p, p + n); + return typeError(ae, args, names); + } + + /** + * As + * {@link #typeError(ArgumentError, Object[], int, int, String[])} + * when there were no arguments by keyword. + * + * @param ae previously thrown by this object + * @param s positional and keyword arguments + * @param p position of arguments in the array + * @param n number of positional arguments + * @return Python {@code TypeError} to throw + */ + default TypeError typeError(ArgumentError ae, Object[] s, int p, int n) { + return typeError(ae, s, p, p + n, null); + } +} diff --git a/core/src/main/java/org/python/core/IndexError.java b/core/src/main/java/org/python/core/IndexError.java new file mode 100644 index 000000000..93ae4a9ed --- /dev/null +++ b/core/src/main/java/org/python/core/IndexError.java @@ -0,0 +1,34 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code IndexError} exception. */ +class IndexError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("IndexError", MethodHandles.lookup()) + .base(PyException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected IndexError(PyType type, String msg, Object... args) { + super(type, msg, args); + } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public IndexError(String msg, Object... args) { + this(TYPE, msg, args); + } +} diff --git a/core/src/main/java/org/python/core/Interpreter.java b/core/src/main/java/org/python/core/Interpreter.java new file mode 100644 index 000000000..e4f9b3d36 --- /dev/null +++ b/core/src/main/java/org/python/core/Interpreter.java @@ -0,0 +1,92 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import org.python.base.InterpreterError; + +/** + * An interpreter is responsible for certain variable aspects of the + * "context" within which Python code executes. Chief among these is + * the search path (in fact the whole mechanism) along which modules + * are found, and the dictionary of imported modules itself. + *

+ * The interpreter also holds the wrappers around the standard input + * and output streams, and the registry of codecs. Many of these are + * exposed through the {@code sys} module, rather than any class + * with "interpreter" in the name. + */ +class Interpreter { + + /** + * The list of modules created by this interpreter, exposed as + * {@code sys.modules} when we have a {@code sys} module + */ + final PyDict modules = new PyDict(); + + /** + * The builtins module. An instance is created with each + * {@code Interpreter}. Not {@code null}. + */ + final PyModule builtinsModule; + + /** Create a new {@code Interpreter}. */ + Interpreter() { + builtinsModule = new BuiltinsModule(); + builtinsModule.exec(); + // addModule(builtinsModule); + } + + /** + * Add the given module to the interpreter's list of modules + * (effectively the source of {@code sys.modules}). + * + * @param m to add + */ + void addModule(PyModule m) { + if (modules.putIfAbsent(m.name, m) != null) + throw new InterpreterError("Interpreter.addModule: Module already added %s", m.name); + } + + /** + * Execute the code object and return the result. This is quite like + * {@link BuiltinsModule#exec(Object, Object, Object, Object) + * builtins.exec()}, except that it works without a surrounding + * {@link PyFrame}, from which it could infer {@code globals} and + * {@code locals}. It will create a frame, but it may be on an empty + * stack. + * + * @param code compiled code object + * @param globals global context dictionary + * @param locals local variables (a Python mapping), may be the same + * as {@code globals} or {@code null} + * @return result of evaluation + */ + // Compare CPython PyEval_EvalCode in ceval.c + Object eval(PyCode code, PyDict globals, Object locals) { + if (locals == null) { locals = globals; } + globals.putIfAbsent("__builtins__", builtinsModule); + PyFunction func = code.createFunction(this, globals); + PyFrame f = func.createFrame(locals); + return f.eval(); + } + + /** + * Execute the code object and return the result. This is the + * equivalent of {@link #eval(PyCode, PyDict, Object) eval(code, + * globals, globals)} + * + * @param code compiled code object + * @param globals global context dictionary + * @return result of evaluation + */ + Object eval(PyCode code, PyDict globals) { return eval(code, globals, globals); } + + /** + * Get the value of an attribute of the built-in module, equivalent + * to {@code builtinsModule.dict.get(name)}. + * + * @param name of the attribute ({@code String} or {@code str}) + * @return value of the attribute + */ + Object getBuiltin(String name) { return builtinsModule.dict.get(name); } +} diff --git a/core/src/main/java/org/python/core/JavaModule.java b/core/src/main/java/org/python/core/JavaModule.java new file mode 100644 index 000000000..14d5dda04 --- /dev/null +++ b/core/src/main/java/org/python/core/JavaModule.java @@ -0,0 +1,42 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +/** Common mechanisms for all Python modules defined in Java. */ +public abstract class JavaModule extends PyModule { + + final ModuleDef definition; + + /** + * Construct the base {@code JavaModule}, saving the module + * definition, which is normally created during static + * initialisation of the concrete class defining the module. In + * terms of PEP 489 phases, the constructor performs the + * {@code Py_mod_create}. We defer filling the module dictionary + * from the definition and other sources until {@link #exec()} is + * called. + * + * @param definition of the module + */ + protected JavaModule(ModuleDef definition) { + super(definition.name); + this.definition = definition; + } + + /** + * {@inheritDoc} + *

+ * In the case of a {@code JavaModule}, the base implementation + * mines the method definitions from the {@link #definition}. The + * module should extend this method, that is call + * {@code super.exec()} to add boilerplate and the methods, then add + * other definitions (typically constants) to the module namespace + * with {@link #add(String, Object) #add(String, Object)}. In terms + * of PEP 489 phases, this is the {@code Py_mod_exec} phase. + */ + @Override + void exec() { + super.exec(); + definition.addMembers(this); + } +} diff --git a/core/src/main/java/org/python/core/KeyError.java b/core/src/main/java/org/python/core/KeyError.java new file mode 100644 index 000000000..25c36826b --- /dev/null +++ b/core/src/main/java/org/python/core/KeyError.java @@ -0,0 +1,52 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code KeyError} exception. */ +public class KeyError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type object of Python {@code KeyError} exceptions. */ + @SuppressWarnings("hiding") + public static final PyType TYPE = PyType + .fromSpec(new PyType.Spec("KeyError", MethodHandles.lookup()).base(LookupError.TYPE)); + + /** The problematic key */ + final Object key; + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param key causing the problem + * @param type of object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected KeyError(Object key, PyType type, String msg, Object... args) { + super(type, msg, args); + this.key = key; + } + + /** + * Constructor specifying a key and a message. A Java String form of + * the key will be the first argument formatted + * + * @param key causing the problem + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public KeyError(Object key, String msg, Object... args) { + this(key, TYPE, msg, key.toString(), args); + } + + /** + * A Python {@link KeyError} when the problem is a duplicate key. + * (This is the same Python type, but Java can catch it as a + * distinct type.) + */ + public static class Duplicate extends KeyError { + private static final long serialVersionUID = 1L; + + public Duplicate(Object key) { super(key, "duplicate key %s", key.toString()); } + } +} diff --git a/core/src/main/java/org/python/core/LookupError.java b/core/src/main/java/org/python/core/LookupError.java new file mode 100644 index 000000000..6cc115f29 --- /dev/null +++ b/core/src/main/java/org/python/core/LookupError.java @@ -0,0 +1,30 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code LookupError} exception. */ +public class LookupError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type object of Python {@code LookupError} exceptions. */ + @SuppressWarnings("hiding") + static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("LookupError", MethodHandles.lookup()).base(PyException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected LookupError(PyType type, String msg, Object... args) { super(type, msg, args); } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public LookupError(String msg, Object... args) { this(TYPE, msg, args); } +} diff --git a/core/src/main/java/org/python/core/MemoryError.java b/core/src/main/java/org/python/core/MemoryError.java new file mode 100644 index 000000000..871d53799 --- /dev/null +++ b/core/src/main/java/org/python/core/MemoryError.java @@ -0,0 +1,33 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code MemoryError} exception. */ +class MemoryError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("MemoryError", MethodHandles.lookup())); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected MemoryError(PyType type, String msg, Object... args) { + super(type, msg, args); + } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public MemoryError(String msg, Object... args) { + this(TYPE, msg, args); + } +} diff --git a/core/src/main/java/org/python/core/MethodDescriptor.java b/core/src/main/java/org/python/core/MethodDescriptor.java new file mode 100644 index 000000000..5c652bf6e --- /dev/null +++ b/core/src/main/java/org/python/core/MethodDescriptor.java @@ -0,0 +1,135 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import org.python.core.ArgumentError.Mode; + +/** + * Abstract base class for the descriptor of a method defined in + * Java. This class provides some common behaviour and support + * methods that would otherwise be duplicated. This is also home to + * some static methods in support of both sub-classes and other + * callable objects (e.g. {@link PyJavaFunction}). + */ +abstract class MethodDescriptor extends Descriptor implements FastCall { + + MethodDescriptor(PyType descrtype, PyType objclass, String name) { + super(descrtype, objclass, name); + } + + @Override + @SuppressWarnings("fallthrough") + public TypeError typeError(ArgumentError ae, Object[] args, String[] names) { + int n = args.length; + switch (ae.mode) { + case NOARGS: + case NUMARGS: + case MINMAXARGS: + return new TypeError("%s() %s (%d given)", name, ae, n); + case SELF: + return new TypeError(DESCRIPTOR_NEEDS_ARGUMENT, name, objclass); + case NOKWARGS: + assert names != null && names.length > 0; + default: + return new TypeError("%s() %s", name, ae); + } + } + + /** + * Check that no positional or keyword arguments are supplied. This + * is for use when implementing {@code __call__} etc.. + * + * @param args positional argument array to be checked + * @param names to be checked + * @throws ArgumentError if positional arguments are given or + * {@code names} is not {@code null} or empty + */ + final static void checkNoArgs(Object[] args, String[] names) throws ArgumentError { + if (args.length != 0) + throw new ArgumentError(Mode.NOARGS); + else if (names != null && names.length != 0) + throw new ArgumentError(Mode.NOKWARGS); + } + + /** + * Check that no positional arguments are supplied, when no keyword + * arguments have been. This is for use when implementing optimised + * alternatives to {@code __call__}. + * + * @param args positional argument array to be checked + * @throws ArgumentError if positional arguments are given + */ + final static void checkNoArgs(Object[] args) throws ArgumentError { + if (args.length != 0) { throw new ArgumentError(Mode.NOARGS); } + } + + /** + * Check the number of positional arguments and that no keywords are + * supplied. This is for use when implementing {@code __call__} + * etc.. + * + * @param args positional argument array to be checked + * @param expArgs expected number of positional arguments + * @param names to be checked + * @throws ArgumentError if the wrong number of positional arguments + * are given or {@code kwargs} is not {@code null} or empty + */ + final static void checkArgs(Object[] args, int expArgs, String[] names) throws ArgumentError { + if (args.length != expArgs) + throw new ArgumentError(expArgs); + else if (names != null && names.length != 0) + throw new ArgumentError(Mode.NOKWARGS); + } + + /** + * Check the number of positional arguments and that no keywords are + * supplied. This is for use when implementing {@code __call__} + * etc.. + * + * @param args positional argument array to be checked + * @param minArgs minimum number of positional arguments + * @param maxArgs maximum number of positional arguments + * @param names to be checked + * @throws ArgumentError if the wrong number of positional arguments + * are given or {@code kwargs} is not {@code null} or empty + */ + final static void checkArgs(Object[] args, int minArgs, int maxArgs, String[] names) + throws ArgumentError { + int n = args.length; + if (n < minArgs || n > maxArgs) + throw new ArgumentError(minArgs, maxArgs); + else if (names != null && names.length != 0) + throw new ArgumentError(Mode.NOKWARGS); + } + + /** + * Check that no positional arguments are supplied, when no keyword + * arguments have been. This is for use when implementing optimised + * alternatives to {@code __call__}. + * + * @param args positional argument array to be checked + * @param minArgs minimum number of positional arguments + * @param maxArgs maximum number of positional arguments + * @throws ArgumentError if the wrong number of positional arguments + * are given + */ + final static void checkArgs(Object[] args, int minArgs, int maxArgs) throws ArgumentError { + int n = args.length; + if (n < minArgs || n > maxArgs) { throw new ArgumentError(minArgs, maxArgs); } + } + + /** + * Check that at least one argument {@code self} has been supplied. + * + * @param args positional argument array to be checked + * @param names to be taken into account + * @throws ArgumentError if {@code self} is missing + */ + final static void checkHasSelf(Object[] args, String[] names) throws ArgumentError { + int nkwds = names == null ? 0 : names.length; + if (nkwds >= args.length) { + // Not even one argument (self) given by position + throw new ArgumentError(Mode.SELF); + } + } +} diff --git a/core/src/main/java/org/python/core/MethodSignature.java b/core/src/main/java/org/python/core/MethodSignature.java new file mode 100644 index 000000000..7767e415c --- /dev/null +++ b/core/src/main/java/org/python/core/MethodSignature.java @@ -0,0 +1,248 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static java.lang.invoke.MethodHandles.filterArguments; +import static java.lang.invoke.MethodHandles.filterReturnValue; +import static org.python.core.ClassShorthand.O; +import static org.python.core.ClassShorthand.OA; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; + +import org.python.base.MethodKind; +import org.python.core.Slot.EmptyException; + +/** + * The {@code enum MethodSignature} enumerates the method signatures + * for which an optimised implementation is possible. Sub-classes of + * {@link PyJavaFunction} and {@link PyMethodDescr} correspond to + * these values. It is not required that each value have a distinct + * optimised sub-class. This {@code enum} is used internally to + * choose between these sub-classes. + */ +// Compare CPython METH_* constants in methodobject.h +enum MethodSignature { + // Constructors describe the parameters after self + /** No arguments allowed after {@code self}. */ + NOARGS(), // METH_NOARGS + /** One argument allowed, possibly after {@code self}. */ + O1(O), // METH_O + /** Two arguments allowed, possibly after {@code self}. */ + O2(O, O), + /** Three arguments allowed, possibly after {@code self}. */ + O3(O, O, O), + /** + * Only positional arguments allowed, after {@code self} when + * describing an unbound method. + */ + POSITIONAL(OA), + /** + * Full generality of ArgParser allowed, after {@code self} when + * describing an unbound method. + */ + GENERAL(OA); + + /** + * The type of method handles matching this method signature when it + * describes a bound or static method. For {@code POSITIONAL} this + * is the type {@code (O[])O}. + */ + final MethodType boundType; + + /** + * The type of method handles matching this method signature when it + * describes an instance method. This differs from + * {@link #boundType} by a preceding {@code O}. For + * {@code POSITIONAL} this is the type {@code (O, O[])O}. + */ + final MethodType instanceType; + + /** + * Handle to throw a {@link Slot.EmptyException}, and having the + * signature {@code instanceType} for this {@code MethodSignature}. + */ + final MethodHandle empty; + + /** The second parameter is an object array. */ + private final boolean useArray; + + private MethodSignature(Class... ptypes) { + this.boundType = MethodType.methodType(O, ptypes); + this.instanceType = boundType.insertParameterTypes(0, O); + this.empty = + MethodHandles.dropArguments(Util.THROW_EMPTY, 0, instanceType.parameterArray()); + this.useArray = ptypes.length >= 1 && ptypes[0] == OA; + } + + /** Handle utilities, supporting signature creation. */ + private static class Util { + + /** Single re-used instance of {@link Slot.EmptyException} */ + private static final EmptyException EMPTY = new EmptyException(); + + /** + * A handle with signature {@code ()O} that throws a single re-used + * instance of {@code Slot.EmptyException}. We use this in sub-class + * constructors when given a {@code null} raw method handle, to + * ensure it is always safe to invoke {@link PyMethodDescr#method}. + * If the signature is to be believed, {@code EMPTY} returns + * {@code Object}, although it never actually returns at all. + */ + static final MethodHandle THROW_EMPTY = + MethodHandles.throwException(O, Slot.EmptyException.class).bindTo(EMPTY); + } + + /** + * Choose a {@code MethodSignature} based on the argument parser. + * Note that in a {@link PyMethodDescr}, the {@link ArgParser} + * describes the arguments after {@code self}, even if the + * implementation is declared {@code static} in Java, so that the + * {@code self} argument is explicit. + * + * @param ap argument parser describing the method + * @return a chosen {@code MethodSignature} + */ + static MethodSignature fromParser(ArgParser ap) { + if (ap.hasVarArgs() || ap.hasVarKeywords()) { + /* + * Signatures that have collector parameters for excess arguments + * given by position or keyword are not worth optimising (we + * assume). + */ + return GENERAL; + } else if (ap.posonlyargcount < ap.regargcount) { + /* + * Signatures that allow keyword arguments are too difficult to + * optimise (we assume). + */ + return GENERAL; + } else { + // Arguments may only be given by position + return positional(ap.regargcount); + } + } + + /** + * Choose a {@code MethodSignature} based on a {@code MethodType}. + * + * @param mt to look for + * @return a chosen {@code MethodSignature} + */ + static MethodSignature from(MethodType mt) { + for (MethodSignature ms : MethodSignature.values()) { + if (ms.empty.type().equals(mt)) { return ms; } + } + return GENERAL; + } + + /** + * Select a (potential) optimisation for a method that accepts + * arguments only by position. Signatures that allow only positional + * arguments (optionally with default values for trailing parameters + * not filled by the argument) may be optimised if the number is not + * too great. + * + * @param n number of arguments + * @return chosen method signature + */ + private static MethodSignature positional(int n) { + switch (n) { + case 0: + return NOARGS; + case 1: + return O1; + case 2: + return O2; + case 3: + return O3; + default: + return POSITIONAL; + } + } + + /** + * Prepare a raw method handle, consistent with this + * {@code MethodSignature}, so that it matches the type implied by + * the parser, and may be called in an optimised way. + * + * @param ap to which the handle is made to conform + * @param raw handle representing the Java implementation + * @return handle consistent with this {@code MethodSignature} + */ + MethodHandle prepare(ArgParser ap, MethodHandle raw) { + assert raw != null; + MethodHandle mh; + if (ap.methodKind == MethodKind.STATIC) { + // No self parameter: start at zero + mh = adapt(raw, 0); + // Discard the self argument that we pass + mh = MethodHandles.dropArguments(mh, 0, O); + } else { + // Skip self parameter: start at one + mh = adapt(raw, 1); + } + if (useArray) { + // We will present the last n args as an array + int n = ap.argnames.length; + mh = mh.asSpreader(OA, n); + } + return mh.asType(instanceType); + } + + /** + * Prepare and bind a provided raw method handle, consistent with + * this {@code MethodSignature}, so that it matches the type implied + * by the parser, and may be called in an optimised way. This has + * the right semantics for methods in a {@link JavaModule}, where + * {@code ap.methodKind==STATIC} means there is no {@code module} + * argument to bind. + * + * @param ap to which the handle is made to conform + * @param raw handle representing the Java implementation + * @param self to bind as the first argument if not Python static + * @return handle consistent with this {@code MethodSignature} + */ + MethodHandle prepareBound(ArgParser ap, MethodHandle raw, Object self) { + assert raw != null; + assert ap.methodKind != MethodKind.CLASS; + if (ap.methodKind != MethodKind.STATIC) { + // The type must match here + raw = raw.bindTo(self); + } + MethodHandle mh = adapt(raw, 0); + if (useArray) { + // We will present the last n args as an array + int n = ap.argnames.length; + mh = mh.asSpreader(OA, n); + } + return mh.asType(boundType); + } + + /** + * Adapt an arbitrary method handle to one that expects arguments + * from a given position onwards to be {@code Object}, and returns + * {@code Object}, using the conversions defined in {@link Clinic}. + * + * @param raw the handle to be prepared (or null for empty) + * @param pos index in the type at which to start. + * @return handle compatible with {@code methodDef} + */ + static final MethodHandle adapt(MethodHandle raw, int pos) { + /* + * To begin with, adapt the arguments after self to expect a + * java.lang.Object, if Clinic knows how to convert them. + */ + MethodType mt = raw.type(); + MethodHandle[] af = Clinic.argumentFilter(mt, pos); + MethodHandle mh = filterArguments(raw, pos, af); + MethodHandle rf = Clinic.returnFilter(mt); + if (rf != null) { mh = filterReturnValue(mh, rf); } + /* + * Let the method definition enforce specific constraints and + * conversions on the handle. + */ + return mh; + } +} diff --git a/core/src/main/java/org/python/core/ModuleDef.java b/core/src/main/java/org/python/core/ModuleDef.java new file mode 100644 index 000000000..9e3518ea6 --- /dev/null +++ b/core/src/main/java/org/python/core/ModuleDef.java @@ -0,0 +1,157 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles.Lookup; + +/** + * A {@code ModuleDef} is a definition from which instances of a module + * may be made. It stands in relation to the Java classes that define + * Python modules, somewhat in the way a Python {@code type} object + * stands in relation to the Java classes that define Python objects. + *

+ * What we most often encounter as "a module", a Python source file, is + * actually just a definition from which a module object may be made. + * This happens once in each interpreter where the module is + * imported. A distinct object, with mutable state, represents that + * module in each interpreter. There must therefore be a factory object + * that has access to the definition of the module, but is able to + * instantiate it (equivalent to executing the body of a module defined + * in Python). A {@code ModuleDef} is that factory. + *

+ * This initialisation cannot be identified with the static + * initialisation of the Java class, since that cannot be repeated, but + * must happen per instance. It is useful, however, to have an + * intermediate cache of the results of processing the defining Java + * class once statically initialised. + */ +public class ModuleDef { + // Compare CPython PyModuleDef + + /** Name of the module. */ + final String name; + + /** The Java class defining instances of the module. */ + final Class definingClass; + + /** + * Definitions for the members that appear in the dictionary of + * instances of the module named. Instances receive members by copy, + * by binding to the module instance (descriptors), or by reference + * (if immutable). + */ + private final MethodDef[] methods; + + /** + * Create a definition for the module, largely by introspection on + * the class and by forming {@code MethodHandle}s on discovered + * attributes. + * + * @param name of the module (e.g. "sys" or "math") + * @param lookup authorises access to the defining class. + */ + ModuleDef(String name, Lookup lookup) { + this.name = name; + this.definingClass = lookup.lookupClass(); + ModuleExposer exposer = Exposer.exposeModule(definingClass); + this.methods = exposer.getMethodDefs(lookup); + // XXX ... and for fields. + // XXX ... and for types defined in the module maybe? :o + } + + /** + * Get the method definitions. This method is provided for test use + * only. It isn't safe as for public use. + * + * @return the method definitions + */ + MethodDef[] getMethods() { return methods; } + + /** + * Add members defined here to the dictionary of a module instance. + * + * @param module to populate + */ + void addMembers(JavaModule module) { + PyDict d = module.dict; + for (MethodDef md : methods) { + // Create function by binding to the module + PyJavaFunction func = PyJavaFunction.fromParser( + md.argParser, md.handle, module, this.name); + d.put(md.argParser.name, func); + } + } + + /** + * A {@code MethodDef} describes a built-in function or method as it + * is declared in a Java module. It holds an argument parser and a + * handle for calling the method. + *

+ * Recall that a module definition may have multiple instances. The + * {@code MethodDef} represents the method between the definition of + * the module (exposure as a {@link ModuleDef}) and the creation of + * actual {@link JavaModule} instances. + *

+ * When a method is declared in Java as an instance method of the + * module, the {@code MethodDef} that describes it discounts the + * {@code self} argument. The {@link PyJavaFunction} created from it + * binds the module instance that is its target, so that it is is + * correct for a call to that {@code PyJavaFunction}. This is + * consistent with CPython. + */ + // Compare CPython struct PyMethodDef + static class MethodDef { + + /* + * The object here is only superficially similar to the CPython + * PyMethodDef: it is not used as a member of descriptors or + * methods; extension writers do not declare instances of them. + * Instead, we reify the argument information from the + * declaration in Java, and associated annotations. In CPython, + * this knowledge is present at run-time in the structure of the + * code generated by Argument Clinic, incompletely in the flags + * of the PyMethodDef, and textually in the signature that + * begins the documentation string. We do it by holding an + * ArgParser. + */ + + /** + * An argument parser constructed with this {@code MethodDef} + * from the description of the signature. Full information on + * the signature is available from this structure, and it is + * available to parse the arguments to a standard + * {@code (Object[], String[])} call. (In simple sub-classes it + * is only used to generate error messages once simple checks + * fail.) + */ + final ArgParser argParser; + + /** + * A handle to the implementation of the function or method. + * This is generated by reflecting the same object that + * {@link #argParser} describes. + */ + // CPython PyMethodDef: ml_meth + final MethodHandle handle; + + /** + * Create a {@link MethodDef} of the given kind from the + * {@link ArgParser} provided. + * + * @param argParser parser defining the method + * @param meth method handle prepared by sub-class + */ + MethodDef(ArgParser argParser, MethodHandle meth) { + this.argParser = argParser; + assert meth != null; + this.handle = meth; + } + + @Override + public String toString() { + return String.format("%s[%s]", getClass().getSimpleName(), + argParser); + } + } +} diff --git a/core/src/main/java/org/python/core/ModuleExposer.java b/core/src/main/java/org/python/core/ModuleExposer.java new file mode 100644 index 000000000..277f3b586 --- /dev/null +++ b/core/src/main/java/org/python/core/ModuleExposer.java @@ -0,0 +1,88 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.reflect.Method; + +import org.python.core.Exposed.PythonMethod; +import org.python.core.Exposed.PythonStaticMethod; +import org.python.base.InterpreterError; +import org.python.core.ModuleDef.MethodDef; + +/** + * A {@code ModuleExposer} provides access to the attributes of a module + * defined in Java (a built-in or extension module). These are primarily + * the {@link MethodDef}s derived from annotated methods in the defining + * class. It is normally obtained by a call to + * {@link Exposer#exposeModule(Class)}. + */ +class ModuleExposer extends Exposer { + + /** + * Construct the {@code ModuleExposer} instance for a particular + * module. + */ + ModuleExposer() {} + + /** + * Build the result from the defining class. + * + * @param definingClass to scan for definitions + */ + void expose(Class definingClass) { + // Scan the defining class for definitions + scanJavaMethods(definingClass); + // XXX ... and for fields. + // XXX ... and for types defined in the module maybe? :o + } + + @Override + ScopeKind kind() { return ScopeKind.MODULE; } + + /** + * From the methods discovered by introspection of the class, return + * an array of {@link MethodDef}s. This array will normally be part + * of a {@link ModuleDef} from which the dictionary of each instance + * of the module will be created. + * + * A {@link MethodDef} relies on {@code MethodHandle}, so a lookup + * object must be provided with the necessary access to the defining + * class. + * + * @param lookup authorisation to access methods + * @return method definitions + * @throws InterpreterError on lookup prohibited + */ + MethodDef[] getMethodDefs(Lookup lookup) throws InterpreterError { + MethodDef[] a = new MethodDef[methodSpecs.size()]; + int i = 0; + for (CallableSpec ms : methodSpecs) { + a[i++] = ms.getMethodDef(lookup); + } + return a; + } + + /** + * For a Python module defined in Java, add to {@link specs}, the + * methods found in the given defining class and annotated for + * exposure. + * + * @param definingClass to introspect for definitions + * @throws InterpreterError on duplicates or unsupported types + */ + @Override + void scanJavaMethods(Class definingClass) + throws InterpreterError { + + // Collect exposed functions (Java methods) + for (Method m : definingClass.getDeclaredMethods()) { + PythonMethod a = + m.getDeclaredAnnotation(PythonMethod.class); + if (a != null) { addMethodSpec(m, a); } + PythonStaticMethod sm = + m.getDeclaredAnnotation(PythonStaticMethod.class); + if (sm != null) { addStaticMethodSpec(m, sm); } + } + } +} diff --git a/core/src/main/java/org/python/core/NameError.java b/core/src/main/java/org/python/core/NameError.java new file mode 100644 index 000000000..b68e5f1a7 --- /dev/null +++ b/core/src/main/java/org/python/core/NameError.java @@ -0,0 +1,30 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code NameError} exception. */ +public class NameError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type object of Python {@code NameError} exceptions. */ + @SuppressWarnings("hiding") + public static final PyType TYPE = PyType + .fromSpec(new PyType.Spec("NameError", MethodHandles.lookup()).base(PyException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected NameError(PyType type, String msg, Object... args) { super(type, msg, args); } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public NameError(String msg, Object... args) { this(TYPE, msg, args); } +} diff --git a/core/src/main/java/org/python/core/OSError.java b/core/src/main/java/org/python/core/OSError.java new file mode 100644 index 000000000..3c6a0dc54 --- /dev/null +++ b/core/src/main/java/org/python/core/OSError.java @@ -0,0 +1,51 @@ +package org.python.core; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; + +/** The Python {@code OSError} exception. */ +public class OSError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type object of Python {@code OSError} exceptions. */ + @SuppressWarnings("hiding") + public static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("OSError", MethodHandles.lookup()) + .base(PyException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected OSError(PyType type, String msg, Object... args) { + super(type, msg, args); + } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public OSError(String msg, Object... args) { + this(TYPE, msg, args); + } + + /** + * Constructor based on the Java exception. + * + * @param ioe the Java exception + */ + public OSError(IOException ioe) { + this(TYPE, ioe.getMessage()); + } + + + // Full fat constructor from *Python* is: + // OSError(errno, strerror[, filename[, winerror[, filename2]]]) + // producing: + // OSError: [WinError 999] strerror: 'filename' -> 'filename2' +} diff --git a/core/src/main/java/org/python/core/Opcode311.java b/core/src/main/java/org/python/core/Opcode311.java new file mode 100644 index 000000000..516082f20 --- /dev/null +++ b/core/src/main/java/org/python/core/Opcode311.java @@ -0,0 +1,554 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +/** Constants for opcodes taken from CPython {@code opcode.h} */ +class Opcode311 { + + /** + * EXCEPT_HANDLER is a special, implicit block type that is created + * when entering an except handler. It is not an opcode. + */ + static final int EXCEPT_HANDLER = 257; + + /* Instruction opcodes for compiled code */ + + /** CPython opcode POP_TOP */ + static final int POP_TOP = 1; + /** CPython opcode PUSH_NULL */ + static final int PUSH_NULL = 2; + /** CPython opcode NOP */ + static final int NOP = 9; + /** CPython opcode UNARY_POSITIVE */ + static final int UNARY_POSITIVE = 10; + /** CPython opcode UNARY_NEGATIVE */ + static final int UNARY_NEGATIVE = 11; + /** CPython opcode UNARY_NOT */ + static final int UNARY_NOT = 12; + /** CPython opcode UNARY_INVERT */ + static final int UNARY_INVERT = 15; + /** CPython opcode BINARY_SUBSCR */ + static final int BINARY_SUBSCR = 25; + /** CPython opcode GET_LEN */ + static final int GET_LEN = 30; + /** CPython opcode MATCH_MAPPING */ + static final int MATCH_MAPPING = 31; + /** CPython opcode MATCH_SEQUENCE */ + static final int MATCH_SEQUENCE = 32; + /** CPython opcode MATCH_KEYS */ + static final int MATCH_KEYS = 33; + /** CPython opcode PUSH_EXC_INFO */ + static final int PUSH_EXC_INFO = 35; + /** CPython opcode CHECK_EXC_MATCH */ + static final int CHECK_EXC_MATCH = 36; + /** CPython opcode CHECK_EG_MATCH */ + static final int CHECK_EG_MATCH = 37; + /** CPython opcode WITH_EXCEPT_START */ + static final int WITH_EXCEPT_START = 49; + /** CPython opcode GET_AITER */ + static final int GET_AITER = 50; + /** CPython opcode GET_ANEXT */ + static final int GET_ANEXT = 51; + /** CPython opcode BEFORE_ASYNC_WITH */ + static final int BEFORE_ASYNC_WITH = 52; + /** CPython opcode BEFORE_WITH */ + static final int BEFORE_WITH = 53; + /** CPython opcode END_ASYNC_FOR */ + static final int END_ASYNC_FOR = 54; + /** CPython opcode STORE_SUBSCR */ + static final int STORE_SUBSCR = 60; + /** CPython opcode DELETE_SUBSCR */ + static final int DELETE_SUBSCR = 61; + /** CPython opcode GET_ITER */ + static final int GET_ITER = 68; + /** CPython opcode GET_YIELD_FROM_ITER */ + static final int GET_YIELD_FROM_ITER = 69; + /** CPython opcode PRINT_EXPR */ + static final int PRINT_EXPR = 70; + /** CPython opcode LOAD_BUILD_CLASS */ + static final int LOAD_BUILD_CLASS = 71; + /** CPython opcode LOAD_ASSERTION_ERROR */ + static final int LOAD_ASSERTION_ERROR = 74; + /** CPython opcode RETURN_GENERATOR */ + static final int RETURN_GENERATOR = 75; + /** CPython opcode LIST_TO_TUPLE */ + static final int LIST_TO_TUPLE = 82; + /** CPython opcode RETURN_VALUE */ + static final int RETURN_VALUE = 83; + /** CPython opcode IMPORT_STAR */ + static final int IMPORT_STAR = 84; + /** CPython opcode SETUP_ANNOTATIONS */ + static final int SETUP_ANNOTATIONS = 85; + /** CPython opcode YIELD_VALUE */ + static final int YIELD_VALUE = 86; + /** CPython opcode ASYNC_GEN_WRAP */ + static final int ASYNC_GEN_WRAP = 87; + /** CPython opcode PREP_RERAISE_STAR */ + static final int PREP_RERAISE_STAR = 88; + /** CPython opcode POP_EXCEPT */ + static final int POP_EXCEPT = 89; + + /** + * Opcodes with this value or greater are followed by a one-byte + * argument, and those less than or equal to it, by a zero byte that + * is ignored. + */ + static final int HAVE_ARGUMENT = 90; + + /** CPython opcode STORE_NAME */ + static final int STORE_NAME = 90; + /** CPython opcode DELETE_NAME */ + static final int DELETE_NAME = 91; + /** CPython opcode UNPACK_SEQUENCE */ + static final int UNPACK_SEQUENCE = 92; + /** CPython opcode FOR_ITER */ + static final int FOR_ITER = 93; + /** CPython opcode UNPACK_EX */ + static final int UNPACK_EX = 94; + /** CPython opcode STORE_ATTR */ + static final int STORE_ATTR = 95; + /** CPython opcode DELETE_ATTR */ + static final int DELETE_ATTR = 96; + /** CPython opcode STORE_GLOBAL */ + static final int STORE_GLOBAL = 97; + /** CPython opcode DELETE_GLOBAL */ + static final int DELETE_GLOBAL = 98; + /** CPython opcode SWAP */ + static final int SWAP = 99; + /** CPython opcode LOAD_CONST */ + static final int LOAD_CONST = 100; + /** CPython opcode LOAD_NAME */ + static final int LOAD_NAME = 101; + /** CPython opcode BUILD_TUPLE */ + static final int BUILD_TUPLE = 102; + /** CPython opcode BUILD_LIST */ + static final int BUILD_LIST = 103; + /** CPython opcode BUILD_SET */ + static final int BUILD_SET = 104; + /** CPython opcode BUILD_MAP */ + static final int BUILD_MAP = 105; + /** CPython opcode LOAD_ATTR */ + static final int LOAD_ATTR = 106; + /** CPython opcode COMPARE_OP */ + static final int COMPARE_OP = 107; + /** CPython opcode IMPORT_NAME */ + static final int IMPORT_NAME = 108; + /** CPython opcode IMPORT_FROM */ + static final int IMPORT_FROM = 109; + /** CPython opcode JUMP_FORWARD */ + static final int JUMP_FORWARD = 110; + /** CPython opcode JUMP_IF_FALSE_OR_POP */ + static final int JUMP_IF_FALSE_OR_POP = 111; + /** CPython opcode JUMP_IF_TRUE_OR_POP */ + static final int JUMP_IF_TRUE_OR_POP = 112; + /** CPython opcode POP_JUMP_FORWARD_IF_FALSE */ + static final int POP_JUMP_FORWARD_IF_FALSE = 114; + /** CPython opcode POP_JUMP_FORWARD_IF_TRUE */ + static final int POP_JUMP_FORWARD_IF_TRUE = 115; + /** CPython opcode LOAD_GLOBAL */ + static final int LOAD_GLOBAL = 116; + /** CPython opcode IS_OP */ + static final int IS_OP = 117; + /** CPython opcode CONTAINS_OP */ + static final int CONTAINS_OP = 118; + /** CPython opcode RERAISE */ + static final int RERAISE = 119; + /** CPython opcode COPY */ + static final int COPY = 120; + /** CPython opcode BINARY_OP */ + static final int BINARY_OP = 122; + /** CPython opcode SEND */ + static final int SEND = 123; + /** CPython opcode LOAD_FAST */ + static final int LOAD_FAST = 124; + /** CPython opcode STORE_FAST */ + static final int STORE_FAST = 125; + /** CPython opcode DELETE_FAST */ + static final int DELETE_FAST = 126; + /** CPython opcode POP_JUMP_FORWARD_IF_NOT_NONE */ + static final int POP_JUMP_FORWARD_IF_NOT_NONE = 128; + /** CPython opcode POP_JUMP_FORWARD_IF_NONE */ + static final int POP_JUMP_FORWARD_IF_NONE = 129; + /** CPython opcode RAISE_VARARGS */ + static final int RAISE_VARARGS = 130; + /** CPython opcode GET_AWAITABLE */ + static final int GET_AWAITABLE = 131; + /** CPython opcode MAKE_FUNCTION */ + static final int MAKE_FUNCTION = 132; + /** CPython opcode BUILD_SLICE */ + static final int BUILD_SLICE = 133; + /** CPython opcode JUMP_BACKWARD_NO_INTERRUPT */ + static final int JUMP_BACKWARD_NO_INTERRUPT = 134; + /** CPython opcode MAKE_CELL */ + static final int MAKE_CELL = 135; + /** CPython opcode LOAD_CLOSURE */ + static final int LOAD_CLOSURE = 136; + /** CPython opcode LOAD_DEREF */ + static final int LOAD_DEREF = 137; + /** CPython opcode STORE_DEREF */ + static final int STORE_DEREF = 138; + /** CPython opcode DELETE_DEREF */ + static final int DELETE_DEREF = 139; + /** CPython opcode JUMP_BACKWARD */ + static final int JUMP_BACKWARD = 140; + /** CPython opcode CALL_FUNCTION_EX */ + static final int CALL_FUNCTION_EX = 142; + /** CPython opcode EXTENDED_ARG */ + static final int EXTENDED_ARG = 144; + /** CPython opcode LIST_APPEND */ + static final int LIST_APPEND = 145; + /** CPython opcode SET_ADD */ + static final int SET_ADD = 146; + /** CPython opcode MAP_ADD */ + static final int MAP_ADD = 147; + /** CPython opcode LOAD_CLASSDEREF */ + static final int LOAD_CLASSDEREF = 148; + /** CPython opcode COPY_FREE_VARS */ + static final int COPY_FREE_VARS = 149; + /** CPython opcode RESUME */ + static final int RESUME = 151; + /** CPython opcode MATCH_CLASS */ + static final int MATCH_CLASS = 152; + /** CPython opcode FORMAT_VALUE */ + static final int FORMAT_VALUE = 155; + /** CPython opcode BUILD_CONST_KEY_MAP */ + static final int BUILD_CONST_KEY_MAP = 156; + /** CPython opcode BUILD_STRING */ + static final int BUILD_STRING = 157; + /** CPython opcode LOAD_METHOD */ + static final int LOAD_METHOD = 160; + /** CPython opcode LIST_EXTEND */ + static final int LIST_EXTEND = 162; + /** CPython opcode SET_UPDATE */ + static final int SET_UPDATE = 163; + /** CPython opcode DICT_MERGE */ + static final int DICT_MERGE = 164; + /** CPython opcode DICT_UPDATE */ + static final int DICT_UPDATE = 165; + /** CPython opcode PRECALL */ + static final int PRECALL = 166; + /** CPython opcode CALL */ + static final int CALL = 171; + /** CPython opcode KW_NAMES */ + static final int KW_NAMES = 172; + /** CPython opcode POP_JUMP_BACKWARD_IF_NOT_NONE */ + static final int POP_JUMP_BACKWARD_IF_NOT_NONE = 173; + /** CPython opcode POP_JUMP_BACKWARD_IF_NONE */ + static final int POP_JUMP_BACKWARD_IF_NONE = 174; + /** CPython opcode POP_JUMP_BACKWARD_IF_FALSE */ + static final int POP_JUMP_BACKWARD_IF_FALSE = 175; + /** CPython opcode POP_JUMP_BACKWARD_IF_TRUE */ + static final int POP_JUMP_BACKWARD_IF_TRUE = 176; + + // Adaptive opcodes (not needed for Jython). --------------------- + + /** CPython opcode BINARY_OP_ADAPTIVE */ + static final int BINARY_OP_ADAPTIVE = 3; + /** CPython opcode BINARY_OP_ADD_FLOAT */ + static final int BINARY_OP_ADD_FLOAT = 4; + /** CPython opcode BINARY_OP_ADD_INT */ + static final int BINARY_OP_ADD_INT = 5; + /** CPython opcode BINARY_OP_ADD_UNICODE */ + static final int BINARY_OP_ADD_UNICODE = 6; + /** CPython opcode BINARY_OP_INPLACE_ADD_UNICODE */ + static final int BINARY_OP_INPLACE_ADD_UNICODE = 7; + /** CPython opcode BINARY_OP_MULTIPLY_FLOAT */ + static final int BINARY_OP_MULTIPLY_FLOAT = 8; + /** CPython opcode BINARY_OP_MULTIPLY_INT */ + static final int BINARY_OP_MULTIPLY_INT = 13; + /** CPython opcode BINARY_OP_SUBTRACT_FLOAT */ + static final int BINARY_OP_SUBTRACT_FLOAT = 14; + /** CPython opcode BINARY_OP_SUBTRACT_INT */ + static final int BINARY_OP_SUBTRACT_INT = 16; + /** CPython opcode BINARY_SUBSCR_ADAPTIVE */ + static final int BINARY_SUBSCR_ADAPTIVE = 17; + /** CPython opcode BINARY_SUBSCR_DICT */ + static final int BINARY_SUBSCR_DICT = 18; + /** CPython opcode BINARY_SUBSCR_GETITEM */ + static final int BINARY_SUBSCR_GETITEM = 19; + /** CPython opcode BINARY_SUBSCR_LIST_INT */ + static final int BINARY_SUBSCR_LIST_INT = 20; + /** CPython opcode BINARY_SUBSCR_TUPLE_INT */ + static final int BINARY_SUBSCR_TUPLE_INT = 21; + /** CPython opcode CALL_ADAPTIVE */ + static final int CALL_ADAPTIVE = 22; + /** CPython opcode CALL_PY_EXACT_ARGS */ + static final int CALL_PY_EXACT_ARGS = 23; + /** CPython opcode CALL_PY_WITH_DEFAULTS */ + static final int CALL_PY_WITH_DEFAULTS = 24; + /** CPython opcode COMPARE_OP_ADAPTIVE */ + static final int COMPARE_OP_ADAPTIVE = 26; + /** CPython opcode COMPARE_OP_FLOAT_JUMP */ + static final int COMPARE_OP_FLOAT_JUMP = 27; + /** CPython opcode COMPARE_OP_INT_JUMP */ + static final int COMPARE_OP_INT_JUMP = 28; + /** CPython opcode COMPARE_OP_STR_JUMP */ + static final int COMPARE_OP_STR_JUMP = 29; + /** CPython opcode EXTENDED_ARG_QUICK */ + static final int EXTENDED_ARG_QUICK = 34; + /** CPython opcode JUMP_BACKWARD_QUICK */ + static final int JUMP_BACKWARD_QUICK = 38; + /** CPython opcode LOAD_ATTR_ADAPTIVE */ + static final int LOAD_ATTR_ADAPTIVE = 39; + /** CPython opcode LOAD_ATTR_INSTANCE_VALUE */ + static final int LOAD_ATTR_INSTANCE_VALUE = 40; + /** CPython opcode LOAD_ATTR_MODULE */ + static final int LOAD_ATTR_MODULE = 41; + /** CPython opcode LOAD_ATTR_SLOT */ + static final int LOAD_ATTR_SLOT = 42; + /** CPython opcode LOAD_ATTR_WITH_HINT */ + static final int LOAD_ATTR_WITH_HINT = 43; + /** CPython opcode LOAD_CONST__LOAD_FAST */ + static final int LOAD_CONST__LOAD_FAST = 44; + /** CPython opcode LOAD_FAST__LOAD_CONST */ + static final int LOAD_FAST__LOAD_CONST = 45; + /** CPython opcode LOAD_FAST__LOAD_FAST */ + static final int LOAD_FAST__LOAD_FAST = 46; + /** CPython opcode LOAD_GLOBAL_ADAPTIVE */ + static final int LOAD_GLOBAL_ADAPTIVE = 47; + /** CPython opcode LOAD_GLOBAL_BUILTIN */ + static final int LOAD_GLOBAL_BUILTIN = 48; + /** CPython opcode LOAD_GLOBAL_MODULE */ + static final int LOAD_GLOBAL_MODULE = 55; + /** CPython opcode LOAD_METHOD_ADAPTIVE */ + static final int LOAD_METHOD_ADAPTIVE = 56; + /** CPython opcode LOAD_METHOD_CLASS */ + static final int LOAD_METHOD_CLASS = 57; + /** CPython opcode LOAD_METHOD_MODULE */ + static final int LOAD_METHOD_MODULE = 58; + /** CPython opcode LOAD_METHOD_NO_DICT */ + static final int LOAD_METHOD_NO_DICT = 59; + /** CPython opcode LOAD_METHOD_WITH_DICT */ + static final int LOAD_METHOD_WITH_DICT = 62; + /** CPython opcode LOAD_METHOD_WITH_VALUES */ + static final int LOAD_METHOD_WITH_VALUES = 63; + /** CPython opcode PRECALL_ADAPTIVE */ + static final int PRECALL_ADAPTIVE = 64; + /** CPython opcode PRECALL_BOUND_METHOD */ + static final int PRECALL_BOUND_METHOD = 65; + /** CPython opcode PRECALL_BUILTIN_CLASS */ + static final int PRECALL_BUILTIN_CLASS = 66; + /** CPython opcode PRECALL_BUILTIN_FAST_WITH_KEYWORDS */ + static final int PRECALL_BUILTIN_FAST_WITH_KEYWORDS = 67; + /** CPython opcode PRECALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS */ + static final int PRECALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS = 72; + /** CPython opcode PRECALL_NO_KW_BUILTIN_FAST */ + static final int PRECALL_NO_KW_BUILTIN_FAST = 73; + /** CPython opcode PRECALL_NO_KW_BUILTIN_O */ + static final int PRECALL_NO_KW_BUILTIN_O = 76; + /** CPython opcode PRECALL_NO_KW_ISINSTANCE */ + static final int PRECALL_NO_KW_ISINSTANCE = 77; + /** CPython opcode PRECALL_NO_KW_LEN */ + static final int PRECALL_NO_KW_LEN = 78; + /** CPython opcode PRECALL_NO_KW_LIST_APPEND */ + static final int PRECALL_NO_KW_LIST_APPEND = 79; + /** CPython opcode PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST */ + static final int PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST = 80; + /** CPython opcode PRECALL_NO_KW_METHOD_DESCRIPTOR_NOARGS */ + static final int PRECALL_NO_KW_METHOD_DESCRIPTOR_NOARGS = 81; + /** CPython opcode PRECALL_NO_KW_METHOD_DESCRIPTOR_O */ + static final int PRECALL_NO_KW_METHOD_DESCRIPTOR_O = 113; + /** CPython opcode PRECALL_NO_KW_STR_1 */ + static final int PRECALL_NO_KW_STR_1 = 121; + /** CPython opcode PRECALL_NO_KW_TUPLE_1 */ + static final int PRECALL_NO_KW_TUPLE_1 = 127; + /** CPython opcode PRECALL_NO_KW_TYPE_1 */ + static final int PRECALL_NO_KW_TYPE_1 = 141; + /** CPython opcode PRECALL_PYFUNC */ + static final int PRECALL_PYFUNC = 143; + /** CPython opcode RESUME_QUICK */ + static final int RESUME_QUICK = 150; + /** CPython opcode STORE_ATTR_ADAPTIVE */ + static final int STORE_ATTR_ADAPTIVE = 153; + /** CPython opcode STORE_ATTR_INSTANCE_VALUE */ + static final int STORE_ATTR_INSTANCE_VALUE = 154; + /** CPython opcode STORE_ATTR_SLOT */ + static final int STORE_ATTR_SLOT = 158; + /** CPython opcode STORE_ATTR_WITH_HINT */ + static final int STORE_ATTR_WITH_HINT = 159; + /** CPython opcode STORE_FAST__LOAD_FAST */ + static final int STORE_FAST__LOAD_FAST = 161; + /** CPython opcode STORE_FAST__STORE_FAST */ + static final int STORE_FAST__STORE_FAST = 167; + /** CPython opcode STORE_SUBSCR_ADAPTIVE */ + static final int STORE_SUBSCR_ADAPTIVE = 168; + /** CPython opcode STORE_SUBSCR_DICT */ + static final int STORE_SUBSCR_DICT = 169; + /** CPython opcode STORE_SUBSCR_LIST_INT */ + static final int STORE_SUBSCR_LIST_INT = 170; + /** CPython opcode UNPACK_SEQUENCE_ADAPTIVE */ + static final int UNPACK_SEQUENCE_ADAPTIVE = 177; + /** CPython opcode UNPACK_SEQUENCE_LIST */ + static final int UNPACK_SEQUENCE_LIST = 178; + /** CPython opcode UNPACK_SEQUENCE_TUPLE */ + static final int UNPACK_SEQUENCE_TUPLE = 179; + /** CPython opcode UNPACK_SEQUENCE_TWO_TUPLE */ + static final int UNPACK_SEQUENCE_TWO_TUPLE = 180; + + /** Synthetic CPython opcode used to control instruction tracing. */ + static final int DO_TRACING = 255; + + // An encoding of binary operations used only by BINARY_OP + + /** Encoding of ADD used in BINARY_OP opcode. */ + static final int NB_ADD = 0; + /** Encoding of AND used in BINARY_OP opcode. */ + static final int NB_AND = 1; + /** Encoding of FLOOR_DIVIDE used in BINARY_OP opcode. */ + static final int NB_FLOOR_DIVIDE = 2; + /** Encoding of LSHIFT used in BINARY_OP opcode. */ + static final int NB_LSHIFT = 3; + /** Encoding of MATRIX_MULTIPLY used in BINARY_OP opcode. */ + static final int NB_MATRIX_MULTIPLY = 4; + /** Encoding of MULTIPLY used in BINARY_OP opcode. */ + static final int NB_MULTIPLY = 5; + /** Encoding of REMAINDER used in BINARY_OP opcode. */ + static final int NB_REMAINDER = 6; + /** Encoding of OR used in BINARY_OP opcode. */ + static final int NB_OR = 7; + /** Encoding of POWER used in BINARY_OP opcode. */ + static final int NB_POWER = 8; + /** Encoding of RSHIFT used in BINARY_OP opcode. */ + static final int NB_RSHIFT = 9; + /** Encoding of SUBTRACT used in BINARY_OP opcode. */ + static final int NB_SUBTRACT = 10; + /** Encoding of TRUE_DIVIDE used in BINARY_OP opcode. */ + static final int NB_TRUE_DIVIDE = 11; + /** Encoding of XOR used in BINARY_OP opcode. */ + static final int NB_XOR = 12; + /** Encoding of INPLACE_ADD used in BINARY_OP opcode. */ + static final int NB_INPLACE_ADD = 13; + /** Encoding of INPLACE_AND used in BINARY_OP opcode. */ + static final int NB_INPLACE_AND = 14; + /** Encoding of INPLACE_FLOOR_DIVIDE used in BINARY_OP opcode. */ + static final int NB_INPLACE_FLOOR_DIVIDE = 15; + /** Encoding of INPLACE_LSHIFT used in BINARY_OP opcode. */ + static final int NB_INPLACE_LSHIFT = 16; + /** Encoding of INPLACE_MATRIX_MULTIPLY used in BINARY_OP opcode. */ + static final int NB_INPLACE_MATRIX_MULTIPLY = 17; + /** Encoding of INPLACE_MULTIPLY used in BINARY_OP opcode. */ + static final int NB_INPLACE_MULTIPLY = 18; + /** Encoding of INPLACE_REMAINDER used in BINARY_OP opcode. */ + static final int NB_INPLACE_REMAINDER = 19; + /** Encoding of INPLACE_OR used in BINARY_OP opcode. */ + static final int NB_INPLACE_OR = 20; + /** Encoding of INPLACE_POWER used in BINARY_OP opcode. */ + static final int NB_INPLACE_POWER = 21; + /** Encoding of INPLACE_RSHIFT used in BINARY_OP opcode. */ + static final int NB_INPLACE_RSHIFT = 22; + /** Encoding of INPLACE_SUBTRACT used in BINARY_OP opcode. */ + static final int NB_INPLACE_SUBTRACT = 23; + /** Encoding of INPLACE_TRUE_DIVIDE used in BINARY_OP opcode. */ + static final int NB_INPLACE_TRUE_DIVIDE = 24; + /** Encoding of INPLACE_XOR used in BINARY_OP opcode. */ + static final int NB_INPLACE_XOR = 25; + + /* + * Various CPython opcodes are followed by an in-line cache, which + * is zero in the byte code initially. We do not implement this + * cache, or squeeze them out which would involve recomputing the + * jumps. + * + * It works to treat 0 as a NOP, but it is inefficient, so we end + * those instructions with a jump (advance of the IP) of the right + * size. CPython can get these from a sizeof() the appropriate + * struct but we work it out by hand from the struct quoted here in + * the comments. + */ + + /** + * In CPython 3.11 the in-line cache that follows certain + * instructions is zero in the byte code initially. + */ + static final int CACHE = 0; + + // @formatter:off + // #define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT)) + + // typedef struct { + // _Py_CODEUNIT counter; + // _Py_CODEUNIT index; + // _Py_CODEUNIT module_keys_version[2]; + // _Py_CODEUNIT builtin_keys_version; + // } _PyLoadGlobalCache; + + /** Cache following LOAD_GLOBAL opcode (words). */ + static final int INLINE_CACHE_ENTRIES_LOAD_GLOBAL = 5; // _PyLoadGlobalCache + + // typedef struct { + // _Py_CODEUNIT counter; + // } _PyBinaryOpCache; + + /** Cache following BINARY_OP opcode (words). */ + static final int INLINE_CACHE_ENTRIES_BINARY_OP = 1; // _PyBinaryOpCache + + // typedef struct { + // _Py_CODEUNIT counter; + // } _PyUnpackSequenceCache; + + /** Cache following UNPACK_SEQUENCE opcode (words). */ + static final int INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE = 1; // _PyUnpackSequenceCache + + // typedef struct { + // _Py_CODEUNIT counter; + // _Py_CODEUNIT mask; + // } _PyCompareOpCache; + + /** Cache following COMPARE_OP opcode (words). */ + static final int INLINE_CACHE_ENTRIES_COMPARE_OP = 2; // _PyCompareOpCache + + // typedef struct { + // _Py_CODEUNIT counter; + // _Py_CODEUNIT type_version[2]; + // _Py_CODEUNIT func_version; + // } _PyBinarySubscrCache; + + /** Cache following BINARY_SUBSCR opcode (words). */ + static final int INLINE_CACHE_ENTRIES_BINARY_SUBSCR = 4; // _PyBinarySubscrCache + + // typedef struct { + // _Py_CODEUNIT counter; + // _Py_CODEUNIT version[2]; + // _Py_CODEUNIT index; + // } _PyAttrCache; + + /** Cache following LOAD_ATTR opcode (words). */ + static final int INLINE_CACHE_ENTRIES_LOAD_ATTR = 4; // _PyAttrCache + /** Cache following STORE_ATTR opcode (words). */ + static final int INLINE_CACHE_ENTRIES_STORE_ATTR = 4; // _PyAttrCache + + // typedef struct { + // _Py_CODEUNIT counter; + // _Py_CODEUNIT type_version[2]; + // _Py_CODEUNIT dict_offset; + // _Py_CODEUNIT keys_version[2]; + // _Py_CODEUNIT descr[4]; + // } _PyLoadMethodCache; + + /** Cache following LOAD_METHOD opcode (words). */ + static final int INLINE_CACHE_ENTRIES_LOAD_METHOD = 10; // _PyLoadMethodCache + + // typedef struct { + // _Py_CODEUNIT counter; + // _Py_CODEUNIT func_version[2]; + // _Py_CODEUNIT min_args; + // } _PyCallCache; + + /** Cache following CALL opcode (words). */ + static final int INLINE_CACHE_ENTRIES_CALL = 4; // _PyCallCache + + // typedef struct { + // _Py_CODEUNIT counter; + // } _PyPrecallCache; + + /** Cache following PRECALL opcode (words). */ + static final int INLINE_CACHE_ENTRIES_PRECALL = 1; // _PyPrecallCache + + // typedef struct { + // _Py_CODEUNIT counter; + // } _PyStoreSubscrCache; + + /** Cache following STORE_SUBSCR opcode (words). */ + static final int INLINE_CACHE_ENTRIES_STORE_SUBSCR = 1; // _PyStoreSubscrCache + // @formatter:on +} diff --git a/core/src/main/java/org/python/core/Operations.java b/core/src/main/java/org/python/core/Operations.java new file mode 100644 index 000000000..4b037855f --- /dev/null +++ b/core/src/main/java/org/python/core/Operations.java @@ -0,0 +1,1082 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static java.lang.invoke.MethodHandles.exactInvoker; +import static java.lang.invoke.MethodHandles.filterReturnValue; +import static java.lang.invoke.MethodHandles.foldArguments; +import static org.python.core.ClassShorthand.O; +import static org.python.core.ClassShorthand.T; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.invoke.MethodType; +import java.lang.invoke.VarHandle.AccessMode; +import java.lang.invoke.WrongMethodTypeException; +import java.util.Map; +import java.util.WeakHashMap; + +import org.python.base.InterpreterError; +import org.python.core.Slot.Signature; + +/** + * An {@code Operations} object provides behaviour to a Java object + * by defining, for its Java class, a {@code MethodHandle} on the + * implementation of each special method required by the + * implementation and enumerated in {@link Slot}. (This is almost + * the same as the set of special methods defined by the Python data + * model.) + *

+ * The run-time system will form a mapping from each Java class to + * an instance of (a specific sub-class of) {@code Operations}. + * Apart from a small collection of bootstrap classes (all of them + * built-in types), this mapping will be developed as the classes + * are encountered through the use of instances of them in Python + * code. + *

+ * In cases where the behaviour depends on the Python type as well + * as the Java class, this is taken care of within the handle + * embedded in the {@code Operations} object for the Java class. + */ +abstract class Operations { + + /** + * The {@code Operations} object of sub-classes of built-in types. + * The slots of this (singleton) redirect through those on the + * PyType of the object instance. + */ + static final Operations DERIVED = Derived.getInstance(); + + /** + * There is only one instance of this class and it is + * {@link Operations#registry}. + */ + private static class Registry extends ClassValue { + + /** + * Mapping from Java class to {@link Operations} object. This is the + * map that backs this {@link Registry}. This map is protected from + * concurrent modification by synchronising on the containing + * {@code Registry} object. The keys are weak to allow classes to be + * unloaded. (Concurrent GC is not a threat to the consistency of + * the registry since a class we are working on cannot be unloaded.) + */ + private final Map, Operations> opsMap = new WeakHashMap<>(); + + /** + * Post an association from a Java class to an {@code Operations} + * object, that will be bound into {@link Operations#registry} when + * a look-up is made. + * + * @param c Java class + * @param ops operations to bind to the class + * @throws Clash when the class is already mapped + */ + synchronized void set(Class c, Operations ops) throws Clash { + Operations old = opsMap.putIfAbsent(c, ops); + if (old != null) { throw new Clash(c, old); } + } + + /** + * Post an association from multiple Java classes to corresponding + * {@code Operations} objects, that will be bound into + * {@link Operations#registry} when look-ups are made. + * + * @param c Java class + * @param ops operations to bind to the class + * @throws Clash when one of the classes is already mapped + */ + synchronized void set(Class[] c, Operations[] ops) throws Clash { + int i, n = c.length; + for (i = 0; i < n; i++) { + Operations old = opsMap.putIfAbsent(c[i], ops[i]); + if (old != null) { + // We failed to insert c[i]: erase what we did + for (int j = 0; j < i; j++) { opsMap.remove(c[j]); } + throw new Clash(c[i], old); + } + } + } + + /** + * Find an operations object for the given class. There are five + * broad cases. {@code c} might be: + *

    + *
  1. the crafted canonical implementation of a Python type
  2. + *
  3. an adopted implementation of some Python type
  4. + *
  5. the implementation of the base of Python sub-classes of a + * Python type
  6. + *
  7. a found Java type
  8. + *
  9. the crafted base of Python sub-classes of a found Java + * type
  10. + *
+ * Cases 1, 3 and 5 may be recognised by marker interfaces on + * {@code c}. Case 2 may only be distinguished from case 4 only + * because classes that are adopted implementations will have been + * posted to {@link #opsMap} before the first call, when their + * {@link PyType}s were created. + */ + @Override + protected synchronized Operations computeValue(Class c) { + + /* + * Operations.registry contained no mapping (as a ClassValue) for c + * at the time this thread called get(). We will either find an + * answer ready in opsMap, or construct one and post it there. + * + * It is possible that other threads have already passed through + * get() and blocked behind this thread at the entrance to + * computeValue(). This synchronisation guarantees that this thread + * completes the critical section before another thread enters. + * + * Threads entering subsequently, and needing a binding for the same + * class c, will therefore find the same value found or constructed + * by this thread. Even if the second thread overtakes this one + * after the protected region, and returns first, the class value + * will bind that same Operations object. + */ + + /* + * XXX There is more to say about re-entrancy (this thread) and + * concurrency. This design does not mean that another thread, or + * even the current one, has not already produced a competing + * Operations objects to post. + */ + + Operations ops = opsMap.get(c); + + if (ops != null) { + /* + * An answer already exists, for example because a PyType was built + * (cases 1 & 2), but is not yet in the registry class value: our + * return through get() will bind it there for future use. + */ + return ops; + + } else if (DerivedPyObject.class.isAssignableFrom(c)) { + // Case 3, 5: one of the derived cases + // Ensure c and super-classes statically initialised. + ensureInit(c); + // Always the same + return Derived.getInstance(); + + } else if (CraftedPyObject.class.isAssignableFrom(c)) { + // Case 1: one of the crafted cases + // Ensure c and super-classes statically initialised. + ensureInit(c); + // PyType posts results via Operations.register + return findOps(c); + + } else { + // Case 4: found Java type + // XXX Stop gap. Needs specialised exposure. + /* + * A Lookup object cannot be provided from here. Access to members + * of c will be determined by package and class at he point of use, + * in relation to c, according to Java rules. It follows that + * descriptors in the PyType cannot build method handles in advance + * of constructing the call site. + */ + PyType.Spec spec = + new PyType.Spec(c.getSimpleName(), MethodHandles.publicLookup().in(c)); + ops = PyType.fromSpec(spec); + // Must post answer to opsMap ourselves? + return ops; + } + } + + /** + * Ensure a class is statically initialised. Static initialisation + * will normally create a {@link PyType} and call + * {@link #set(Class, Operations)} to post a result to + * {@link #opsMap}. + * + * @param c to initialise + */ + private static void ensureInit(Class c) { + String name = c.getName(); + try { + Class.forName(name, true, c.getClassLoader()); + } catch (ClassNotFoundException e) { + throw new InterpreterError("failed to initialise class %s", name); + } + } + + /** + * Find the {@code Operations} object for this class, trying + * super-classes. {@code c} must be an initialised class. If it + * posted an {@link Operations} object for itself, it will be found + * immediately. Otherwise the method tries successive super-classes + * until one is found that has already been posted. + * + * @param c class to resolve + * @return operations object for {@code c} + */ + private Operations findOps(Class c) { + Operations ops; + Class prev; + while ((ops = opsMap.get(prev = c)) == null) { + // c has not been posted, but perhaps its superclass? + c = prev.getSuperclass(); + if (c == null) { + // prev was Object, or primitive or an interface + throw new InterpreterError("no operations defined by class %s", + prev.getSimpleName()); + } + } + return ops; + } + } + + /** + * Mapping from Java class to the {@code Operations} object that + * provides instances of the class with Python semantics. + */ + static final Registry registry = new Registry(); + + /** + * Register the {@link Operations} object for a Java class. + * Subsequent enquiries through {@link #of(Object)} and + * {@link #fromClass(Class)} will yield this {@code Operations} + * object. This is a one-time action on the JVM-wide registry, + * affecting the state of the {@code Class} object: the association + * cannot be changed, but the {@code Operations} object may be + * mutated (where it allows that). It is an error to attempt to + * associate different {@code Operations} with a class already + * bound. + * + * @param c class with which associated + * @param ops the operations object + * @throws Clash when the class is already mapped + */ + static void register(Class c, Operations ops) throws Clash { + Operations.registry.set(c, ops); + } + + /** + * Register the {@link Operations} objects for multiple Java + * classes, as with {@link #register(Class, Operations)}. All + * succeed or fail together. + * + * @param c classes with which associated + * @param ops the operations objects + * @throws Clash when one of the classes is already mapped + */ + static void register(Class[] c, Operations ops[]) throws Clash { + Operations.registry.set(c, ops); + } + + /** + * Map a Java class to the {@code Operations} object that provides + * Python semantics to instances of the class. + * + * @param c class on which operations are required + * @return {@code Operations} providing Python semantics + */ + static Operations fromClass(Class c) { + // Normally, this is completely straightforward + // TODO deal with re-entrancy and concurrency + return registry.get(c); + } + + /** + * Map an object to the {@code Operations} object that provides it + * with Python semantics. + * + * @param obj on which operations are required + * @return {@code Operations} providing Python semantics + */ + static Operations of(Object obj) { return fromClass(obj.getClass()); } + + /** + * Get the Python type of the object given that this is the + * operations object for it. + * + * @param x subject of the enquiry + * @return {@code type(x)} + */ + abstract PyType type(Object x); + + /** + * Get the unique Python type for which this is operations object. + * This is not always a meaningful enquiry: if this Operations + * object is able to serve multiple types, an error will be thrown. + * + * @return type represented + */ + abstract PyType uniqueType() throws IllegalArgumentException; + + /** + * Identify by index which Java implementation of the associated + * type this {@code Operations} object is for. (Some types have + * multiple acceptable implementations.) + * + * @return index in the type (0 if canonical) + */ + int getIndex() { return 0; } + + /** + * Get the Java implementation class this {@code Operations} object + * is for. + * + * @return class of the implementation + */ + abstract Class getJavaClass(); + + /** + * Fast check that the target is exactly a Python {@code int}. We + * can do this without reference to the object itself, since it is + * deducible from the Java class. + * + * @return target is exactly a Python {@code int} + */ + boolean isIntExact() { return this == PyLong.TYPE; } + + /** + * Fast check that the target is a data descriptor. + * + * @return target is a data descriptor + */ + boolean isDataDescr() { return false; } + + /** + * Fast check that the target is a method descriptor. + * + * @return target is a method descriptor + */ + boolean isMethodDescr() { return false; } + + // --------------------------------------------------------------- + + /** + * Operations for an accepted implementation (non-canonical + * implementation) are represented by an instance of this class. The + * operations of a canonical implementation are represented by the + * {@link PyType} itself. + */ + static class Accepted extends Operations { + + /** The type of which this is an accepted implementation. */ + final private PyType type; + + /** + * Index of this implementation in the type (see + * {@link PyType#indexAccepted(Class)}. + */ + final private int index; + + /** + * Create an operations object that is the {@code n}th + * implementation of the given type. ({@code n>0} since the + * implementation 0 is represented by the type itself.) + * + * @param type of which this is an accepted implementation + * @param n index of this implementation in the type + */ + Accepted(PyType type, int n) { + this.type = type; + this.index = n; + setAllSlots(); + } + + @Override + PyType type(Object x) { return type; } + + @Override + PyType uniqueType() { return type; } + + @Override + boolean isIntExact() { return type == PyLong.TYPE; } + + @Override + int getIndex() { return index; } + + @Override + Class getJavaClass() { return type.classes[index]; } + + /** + * Set all the slots ({@code op_*}) from the entries in the + * dictionaries of this type and its bases. + */ + private void setAllSlots() { + for (Slot s : Slot.values()) { + Object def = type.lookup(s.methodName); + s.setDefinition(this, def); + } + } + + @Override + public String toString() { + String javaName = getJavaClass().getSimpleName(); + return javaName + " as " + type.toString(); + } + } + + /** + * Operations for a Python class defined in Python are represented + * by an instance of this class. Many Python classes may be + * implemented by the same Java class, the actual type being The + * canonical implementation is represented by the {@link PyType} + * itself. + */ + static class Derived extends Operations { + + /** + * {@code MethodHandle} of type {@code (DerivedPyObject)PyType}, to + * get the actual Python type of a {@link DerivedPyObject} object. + */ + private static final MethodHandle getType; + /** + * The type {@code (PyType)MethodHandle} used to cast the method + * handle getter in {@link #indirectSlot(Slot)}. + */ + private static final MethodType MT_MH_FROM_TYPE; + + /** Rights to form method handles. */ + private static final Lookup LOOKUP = MethodHandles.lookup(); + + static { + try { + // Used as a cast in the formation of getMHfromType + // (PyType)MethodHandle + MT_MH_FROM_TYPE = MethodType.methodType(MethodHandle.class, T); + // Used as a cast in the formation of getType + // (PyType)MethodHandle + // getType = λ x : x.getType() + // .type() = (Object)PyType + getType = LOOKUP + .findVirtual(CraftedPyObject.class, "getType", MethodType.methodType(T)) + .asType(MethodType.methodType(T, O)); + } catch (NoSuchMethodException | IllegalAccessException e) { + throw new InterpreterError(e, "preparing handles in Operations.Derived"); + } + } + + /** + * Return a handle of the correct type for the slot, but that + * indirects through the type object of the first argument. + * + * @param s + * @return + */ + private static MethodHandle indirectSlot(Slot s) { + /* + * We form a method handle that can take any Object, and if it is a + * CraftedPyObject, navigate to its type object, and pick out the + * method handle from Slot s. + */ + // getOpFromType = λ t : s.getSlot(t) + // .type() = (PyType)MethodHandle + MethodHandle getMHfromType = + s.slotHandle.toMethodHandle(AccessMode.GET).asType(MT_MH_FROM_TYPE); + // getMHfromObj = λ x : s.getSlot(x.getType()) + // .type() = (CraftedPyObject)MethodHandle + MethodHandle getMHfromObj = filterReturnValue(getType, getMHfromType); + /* + * We create an exact invoker, that can take a handle with the + * correct signature for Slot s, and invoke it on the corresponding + * arguments. + */ + // invoker = λ h x ... : h(x, ...) + MethodType mt = s.signature.empty.type(); + MethodHandle invoker = exactInvoker(mt); + /* + * Finally we compose the invoker with getMHfromType, to make a new + * handle, with the correct signature for Slot s, that when invoked + * itself, indirects through the corresponding handle in the type + * object. + */ + // λ x ... : (s.getSlot(x.getType(x)) (x, ...) + return foldArguments(invoker, getMHfromObj); + } + + private static final Derived instance = new Derived(); + + static Derived getInstance() { return instance; } + + /** + * Create an operations object that is the implementation of + * potentially many types defined in Python. + */ + Derived() { setAllSlots(); } + + @Override + PyType type(Object x) { + if (x instanceof DerivedPyObject) + return ((DerivedPyObject)x).getType(); + else + throw new InterpreterError("object %.50s has wrong Operations type %s", x, + getClass().getSimpleName()); + } + + @Override + PyType uniqueType() { + throw new IllegalArgumentException("Python type not uniquely defined by Operations"); + } + + @Override + boolean isIntExact() { return false; } + + @Override + Class getJavaClass() { return null; } + + /** + * Set all the slots ({@code op_*}) to entries that will interrogate + * the actual type of their target object. + */ + private void setAllSlots() { + for (Slot s : Slot.values()) { s.setHandle(this, indirectSlot(s)); } + } + + @Override + public String toString() { return "Derived"; } + } + + /** + * A table of binary operations that may be indexed by a pair of + * classes (or their {@code Operations} objects). Binary operations, + * at the same time as appearing as the {@code op} and {@code rop} + * slots, meaning for example {@link Operations#op_add} and + * {@link Operations#op_radd}, are optionally given implementations + * specialised for the Java classes of their arguments. A + * {@code BinopGrid} describes the + */ + static class BinopGrid { + + /** Handle that marks an empty binary operation slot. */ + protected static final MethodHandle BINARY_EMPTY = Slot.Signature.BINARY.empty; + + /** The (binary) slot for which this is an operation. */ + final Slot slot; + /** the type on which we find this implemented. */ + final PyType type; + /** All the implementations, arrayed by argument class. */ + final MethodHandle[][] mh; + + /** + * Construct a grid for the given operation and type. + * + * @param slot of the binary operation + * @param type in which the definition is being made + */ + BinopGrid(Slot slot, PyType type) { + assert slot.signature == Signature.BINARY; + this.slot = slot; + this.type = type; + final int N = type.acceptedCount; + final int M = type.classes.length; + this.mh = new MethodHandle[N][M]; + } + + /** + * Post the definition for the {@link #slot} applicable to the + * classes in the method type. The handle must be the "raw" handle + * to the class-specific implementation, while the posted value + * (later returned by {@link #get(Class, Class)} will have the + * signature {@link Signature#BINARY}. + * + * @param mh handle to post + */ + void add(MethodHandle mh) throws WrongMethodTypeException, InterpreterError { + MethodType mt = mh.type(); + // Cast fails if the signature is incorrect for the slot + mh = mh.asType(slot.getType()); + // Find cell based on argument types + int i = type.indexAccepted(mt.parameterType(0)); + int j = type.indexOperand(mt.parameterType(1)); + if (i >= 0 && j >= 0) { + this.mh[i][j] = mh; + } else { + /* + * The arguments to m are not (respectively) an accepted class and + * an operand class for the type. Type spec and the declared binary + * ops disagree? + */ + throw new InterpreterError("unexpected signature of %s.%s: %s", type.name, + slot.methodName, mt); + } + } + + /** + * Check that every valid combination of classes has been added + * (therefore leads to a non-null method handle). + * + * @throws InterpreterError if a {@code null} was found + */ + void checkFilled() throws InterpreterError { + final int N = type.acceptedCount; + final int M = type.classes.length; + for (int i = 0; i < N; i++) { + for (int j = 0; j < M; j++) { + if (mh[i][j] == null) { + /* + * There's a gap in the table. Type spec and the declared binary ops + * disagree? + */ + throw new InterpreterError("binary op not defined: %s(%s, %s)", + slot.methodName, type.classes[i].getSimpleName(), + type.classes[j].getSimpleName()); + } + } + } + } + + /** + * Get the method handle of an implementation + * {@code Object op(V v, W w)} specialised to the given classes. If + * {@code V} is an accepted implementation of this type, and + * {@code W} is an operand class, the return will be a handle on an + * implementation accepting those classes. If no implementation is + * available for those classes (which means they are not accepted + * and operand types for the Python type) an empty slot handle is + * returned. + * + * @param accepted class of first argument to method + * @param operand class of second argument to method + * @return the special-to-class binary operation + */ + MethodHandle get(Class accepted, Class operand) { + // Find cell based on argument types + int i = type.indexAccepted(accepted); + int j = type.indexOperand(operand); + if (i >= 0 && j >= 0) { + return mh[i][j]; + } else { + return BINARY_EMPTY; + } + } + + /** + * Convenience method allowing look-up equivalent to + * {@link #get(Class, Class)}, but using the {@code Operations} + * objects as a proxy for the actual classes. + * + * @param accepted class of first argument to method + * @param operand class of second argument to method + * @return the special-to-class binary operation + */ + MethodHandle get(Operations accepted, Operations operand) { + return get(accepted.getJavaClass(), operand.getJavaClass()); + } + } + + /** + * Exception reporting that an attempt was made to register a second + * {@link Operations} object against a class already in the + * registry. + */ + static class Clash extends Exception { + private static final long serialVersionUID = 1L; + /** Class being redefined. */ + final Class klass; + /** + * The operations object already in the registry for {@link #klass} + */ + final Operations existing; + + Clash(Class klass, Operations existing) { + // super("repeat type/operations definition for %s", klass); + this.klass = klass; + this.existing = existing; + } + + @Override + public String getMessage() { + return String.format("repeat type/operations definition for %s", klass); + } + } + + // --------------------------------------------------------------- + + // Cache of the standard type slots. See CPython PyType. + // Same order as enum Slot (although only names are important). + + /** + * Handle on special method {@code __repr__}, described by + * {@link Slot#op_repr}. + */ + MethodHandle op_repr; + /** + * Handle on special method {@code __hash__}, described by + * {@link Slot#op_hash}. + */ + MethodHandle op_hash; + /** + * Handle on special method {@code __call__}, described by + * {@link Slot#op_call}. + */ + MethodHandle op_call; + /** + * Handle on special method {@code __str__}, described by + * {@link Slot#op_str}. + */ + MethodHandle op_str; + + /** + * Handle on special method {@code __getattribute__}, described by + * {@link Slot#op_getattribute}. + */ + MethodHandle op_getattribute; + /** + * Handle on special method {@code __getattr__}, described by + * {@link Slot#op_getattr}. + */ + MethodHandle op_getattr; + /** + * Handle on special method {@code __setattr__}, described by + * {@link Slot#op_setattr}. + */ + MethodHandle op_setattr; + /** + * Handle on special method {@code __delattr__}, described by + * {@link Slot#op_delattr}. + */ + MethodHandle op_delattr; + + /** + * Handle on special method {@code __lt__}, described by + * {@link Slot#op_lt}. + */ + MethodHandle op_lt; + /** + * Handle on special method {@code __le__}, described by + * {@link Slot#op_le}. + */ + MethodHandle op_le; + /** + * Handle on special method {@code __eq__}, described by + * {@link Slot#op_eq}. + */ + MethodHandle op_eq; + /** + * Handle on special method {@code __ne__}, described by + * {@link Slot#op_ne}. + */ + MethodHandle op_ne; + /** + * Handle on special method {@code __gt__}, described by + * {@link Slot#op_gt}. + */ + MethodHandle op_gt; + /** + * Handle on special method {@code __ge__}, described by + * {@link Slot#op_ge}. + */ + MethodHandle op_ge; + + /** + * Handle on special method {@code __iter__}, described by + * {@link Slot#op_iter}. + */ + MethodHandle op_iter; + /** + * Handle on special method {@code __next__}, described by + * {@link Slot#op_next}. + */ + MethodHandle op_next; + + /** + * Handle on special method {@code __get__}, described by + * {@link Slot#op_get}. + */ + MethodHandle op_get; + /** + * Handle on special method {@code __set__}, described by + * {@link Slot#op_set}. + */ + MethodHandle op_set; + /** + * Handle on special method {@code __delete__}, described by + * {@link Slot#op_delete}. + */ + MethodHandle op_delete; + + /** + * Handle on special method {@code __init__}, described by + * {@link Slot#op_init}. + */ + MethodHandle op_init; + + /** + * Handle on special method {@code __await__}, described by + * {@link Slot#op_await}. + */ + MethodHandle op_await; + /** + * Handle on special method {@code __aiter__}, described by + * {@link Slot#op_aiter}. + */ + MethodHandle op_aiter; + /** + * Handle on special method {@code __anext__}, described by + * {@link Slot#op_anext}. + */ + MethodHandle op_anext; + + /** + * Handle on special method {@code __radd__}, described by + * {@link Slot#op_radd}. + */ + MethodHandle op_radd; + /** + * Handle on special method {@code __rsub__}, described by + * {@link Slot#op_rsub}. + */ + MethodHandle op_rsub; + /** + * Handle on special method {@code __rmul__}, described by + * {@link Slot#op_rmul}. + */ + MethodHandle op_rmul; + /** + * Handle on special method {@code __rmod__}, described by + * {@link Slot#op_rmod}. + */ + MethodHandle op_rmod; + /** + * Handle on special method {@code __rdivmod__}, described by + * {@link Slot#op_rdivmod}. + */ + MethodHandle op_rdivmod; + /** + * Handle on special method {@code __rpow__}, described by + * {@link Slot#op_rpow}. + */ + MethodHandle op_rpow; + /** + * Handle on special method {@code __rlshift__}, described by + * {@link Slot#op_rlshift}. + */ + MethodHandle op_rlshift; + /** + * Handle on special method {@code __rrshift__}, described by + * {@link Slot#op_rrshift}. + */ + MethodHandle op_rrshift; + /** + * Handle on special method {@code __rand__}, described by + * {@link Slot#op_rand}. + */ + MethodHandle op_rand; + /** + * Handle on special method {@code __rxor__}, described by + * {@link Slot#op_rxor}. + */ + MethodHandle op_rxor; + /** + * Handle on special method {@code __ror__}, described by + * {@link Slot#op_ror}. + */ + MethodHandle op_ror; + /** + * Handle on special method {@code __rfloordiv__}, described by + * {@link Slot#op_rfloordiv}. + */ + MethodHandle op_rfloordiv; + /** + * Handle on special method {@code __rtruediv__}, described by + * {@link Slot#op_rtruediv}. + */ + MethodHandle op_rtruediv; + /** + * Handle on special method {@code __rmatmul__}, described by + * {@link Slot#op_rmatmul}. + */ + MethodHandle op_rmatmul; + + /** + * Handle on special method {@code __add__}, described by + * {@link Slot#op_add}. + */ + MethodHandle op_add; + /** + * Handle on special method {@code __sub__}, described by + * {@link Slot#op_sub}. + */ + MethodHandle op_sub; + /** + * Handle on special method {@code __mul__}, described by + * {@link Slot#op_mul}. + */ + MethodHandle op_mul; + /** + * Handle on special method {@code __mod__}, described by + * {@link Slot#op_mod}. + */ + MethodHandle op_mod; + /** + * Handle on special method {@code __divmod__}, described by + * {@link Slot#op_divmod}. + */ + MethodHandle op_divmod; + /** + * Handle on special method {@code __pow__}, described by + * {@link Slot#op_pow}. + */ + MethodHandle op_pow; + + /** + * Handle on special method {@code __neg__}, described by + * {@link Slot#op_neg}. + */ + MethodHandle op_neg; + /** + * Handle on special method {@code __pos__}, described by + * {@link Slot#op_pos}. + */ + MethodHandle op_pos; + /** + * Handle on special method {@code __abs__}, described by + * {@link Slot#op_abs}. + */ + MethodHandle op_abs; + /** + * Handle on special method {@code __invert__}, described by + * {@link Slot#op_invert}. + */ + MethodHandle op_invert; + + /** + * Handle on special method {@code __bool__}, described by + * {@link Slot#op_bool}. + */ + MethodHandle op_bool; + + /** + * Handle on special method {@code __lshift__}, described by + * {@link Slot#op_lshift}. + */ + MethodHandle op_lshift; + /** + * Handle on special method {@code __rshift__}, described by + * {@link Slot#op_rshift}. + */ + MethodHandle op_rshift; + /** + * Handle on special method {@code __and__}, described by + * {@link Slot#op_and}. + */ + MethodHandle op_and; + /** + * Handle on special method {@code __xor__}, described by + * {@link Slot#op_xor}. + */ + MethodHandle op_xor; + /** + * Handle on special method {@code __or__}, described by + * {@link Slot#op_or}. + */ + MethodHandle op_or; + + /** + * Handle on special method {@code __int__}, described by + * {@link Slot#op_int}. + */ + MethodHandle op_int; + /** + * Handle on special method {@code __float__}, described by + * {@link Slot#op_float}. + */ + MethodHandle op_float; + + /** + * Handle on special method {@code __iadd__}, described by + * {@link Slot#op_iadd}. + */ + MethodHandle op_iadd; + /** + * Handle on special method {@code __isub__}, described by + * {@link Slot#op_isub}. + */ + MethodHandle op_isub; + /** + * Handle on special method {@code __imul__}, described by + * {@link Slot#op_imul}. + */ + MethodHandle op_imul; + /** + * Handle on special method {@code __imod__}, described by + * {@link Slot#op_imod}. + */ + MethodHandle op_imod; + /** + * Handle on special method {@code __iand__}, described by + * {@link Slot#op_iand}. + */ + MethodHandle op_iand; + /** + * Handle on special method {@code __ixor__}, described by + * {@link Slot#op_ixor}. + */ + MethodHandle op_ixor; + /** + * Handle on special method {@code __ior__}, described by + * {@link Slot#op_ior}. + */ + MethodHandle op_ior; + + /** + * Handle on special method {@code __floordiv__}, described by + * {@link Slot#op_floordiv}. + */ + MethodHandle op_floordiv; + /** + * Handle on special method {@code __truediv__}, described by + * {@link Slot#op_truediv}. + */ + MethodHandle op_truediv; + /** + * Handle on special method {@code __ifloordiv__}, described by + * {@link Slot#op_ifloordiv}. + */ + MethodHandle op_ifloordiv; + /** + * Handle on special method {@code __itruediv__}, described by + * {@link Slot#op_itruediv}. + */ + MethodHandle op_itruediv; + + /** + * Handle on special method {@code __index__}, described by + * {@link Slot#op_index}. + */ + MethodHandle op_index; + + /** + * Handle on special method {@code __matmul__}, described by + * {@link Slot#op_matmul}. + */ + MethodHandle op_matmul; + /** + * Handle on special method {@code __imatmul__}, described by + * {@link Slot#op_imatmul}. + */ + MethodHandle op_imatmul; + + /** + * Handle on special method {@code __len__}, described by + * {@link Slot#op_len}. + */ + MethodHandle op_len; + /** + * Handle on special method {@code __getitem__}, described by + * {@link Slot#op_getitem}. + */ + MethodHandle op_getitem; + /** + * Handle on special method {@code __setitem__}, described by + * {@link Slot#op_setitem}. + */ + MethodHandle op_setitem; + /** + * Handle on special method {@code __delitem__}, described by + * {@link Slot#op_delitem}. + */ + MethodHandle op_delitem; + /** + * Handle on special method {@code __contains__}, described by + * {@link Slot#op_contains}. + */ + MethodHandle op_contains; +} diff --git a/core/src/main/java/org/python/core/OverflowError.java b/core/src/main/java/org/python/core/OverflowError.java new file mode 100644 index 000000000..8cb74f9ea --- /dev/null +++ b/core/src/main/java/org/python/core/OverflowError.java @@ -0,0 +1,33 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code OverflowError} exception. */ +class OverflowError extends ArithmeticError { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("OverflowError", MethodHandles.lookup())); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected OverflowError(PyType type, String msg, Object... args) { + super(type, msg, args); + } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public OverflowError(String msg, Object... args) { + this(TYPE, msg, args); + } +} diff --git a/core/src/main/java/org/python/core/Py.java b/core/src/main/java/org/python/core/Py.java new file mode 100644 index 000000000..13d4d44bf --- /dev/null +++ b/core/src/main/java/org/python/core/Py.java @@ -0,0 +1,159 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.math.BigInteger; + +/** Common run-time constants and constructors. */ +public class Py { + + private static class Singleton implements CraftedPyObject { + + final PyType type; + + @Override + public PyType getType() { return type; } + + String name; + + Singleton(String name) { + this.name = name; + type = PyType.fromSpec(new PyType.Spec(name, MethodHandles.lookup()) + .canonical(getClass()).flagNot(PyType.Flag.BASETYPE)); + } + + @Override + public String toString() { return name; } + } + + /** Python {@code None} object. */ + public static final Object None = new Singleton("None") {}; + + /** Python {@code NotImplemented} object. */ + static final Object NotImplemented = new Singleton("NotImplemented") {}; + + /** + * Return Python {@code int} for Java {@code int}. + * + * @param value to represent + * @return equivalent {@code int} + * @deprecated Use primitive auto-boxed or {@code Integer.valueOf}. + */ + @Deprecated + public static Integer val(int value) { return value; } + + /** + * Return Python {@code int} for Java {@code long}. + * + * @param value to represent + * @return equivalent {@code int} + */ + public static BigInteger val(long value) { return BigInteger.valueOf(value); } + + /** Python {@code False} object. */ + public static final Boolean False = false; + + /** Python {@code True} object. */ + public static final Boolean True = true; + + /** + * Return a Python {@code object}. + * + * @return {@code object()} + */ + static PyBaseObject object() { return new PyBaseObject(); } + + /** + * Return Python {@code tuple} for array of {@code Object}. + * + * @param values to contain + * @return equivalent {@code tuple} object + */ + public static PyTuple tuple(Object... values) { return PyTuple.from(values); } + + /** + * Return empty Python {@code dict}. + * + * @return {@code dict()} + */ + public static PyDict dict() { return new PyDict(); } + + /** Empty (zero-length) array of {@link Object}. */ + static final Object[] EMPTY_ARRAY = new Object[0]; + + /** Empty (zero-length) array of {@code String}. */ + static final String[] EMPTY_STRING_ARRAY = new String[0]; + + /** + * Convenient default toString implementation that tries __str__, if + * defined, but always falls back to something. Use as:
+     * public String toString() { return Py.defaultToString(this); }
+     * 
+ * + * @param o object to represent + * @return a string representation + */ + static String defaultToString(Object o) { + if (o == null) + return "null"; + else { + Operations ops = null; + try { + ops = Operations.of(o); + MethodHandle str = ops.op_str; + Object res = str.invokeExact(o); + return res.toString(); + } catch (Throwable e) {} + + // Even object.__str__ not working. + String name = ""; + try { + // Got a Python type at all? + name = ops.type(o).name; + } catch (Throwable e) { + // Maybe during start-up. Fall back to Java. + Class c = o.getClass(); + if (c.isAnonymousClass()) + name = c.getName(); + else + name = c.getSimpleName(); + } + return "<" + name + " object>"; + } + } + + /** + * Return the unique numerical identiy of a given Python object. + * Objects with the same id() are identical as long as both exist. + * By implementing it here, we encapsulate the problem of qualified + * type name and what "address" or "identity" should mean. + * + * @param o the object + * @return the Python {@code id(o)} + */ + static int id(Object o) { + // For the time being identity means: + return System.identityHashCode(o); + } + +// Interpreter --------------------------------------------------- + +// /** +// * Create an interpreter in its default state. +// * +// * @return the interpreter +// */ +// static Interpreter createInterpreter() { +// return new Interpreter(); +// } + + // Initialisation ------------------------------------------------ + + /** Action we might need to initialise the run-time system. */ + static synchronized void initialise() {} + + /** Action we might need to finalise the run-time system. */ + static synchronized void finalise() {} +} diff --git a/core/src/main/java/org/python/core/PyBaseObject.java b/core/src/main/java/org/python/core/PyBaseObject.java new file mode 100644 index 000000000..bb8afbd3f --- /dev/null +++ b/core/src/main/java/org/python/core/PyBaseObject.java @@ -0,0 +1,343 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.util.Map; + +import org.python.core.Slot.Signature; + +/** + * The Python {@code object} object: all Python objects by default + * inherit its Python method implementations. The canonical + * implementation of {@code object} is {@code Object}. This class + * provides its Python behaviours. + *

+ * Although all Python objects are sub-classes in Python of + * {@code object}, their implementation classes will not generally + * be sub-classes in Java of this one. In particular, many built-in + * types are not. + *

+ * The Java implementation class of a type defined in Python will be + * derived from the canonical implementation class of the "solid + * base" it inherits in Python. This may be {@code object}, + * in which case the implementation class will be a sub-class in + * Java of this class. + *

+ * + * @implNote All exposed methods, special methods and attribute get, + * set and delete methods defined here must be declared + * {@code static} in Java, with an explicit {@code Object self} + * argument. ({@code __new__} is excepted from this rule as it + * is {@code static} anyway.) This is so that methods defined + * here on {@code object} operate correctly on receiving Python + * objects whatever their Java class. Methods and fields must be + * package visible so that {@link PyType} is able to form + * {@code MethodHandle}s to them using its default lookup + * object. + */ +public class PyBaseObject extends AbstractPyObject { + + /** The type object of {@code object} objects. */ + public static final PyType TYPE = PyType.OBJECT_TYPE; + + /** + * Constructor for Python sub-class specifying {@code type}. + * + * @param type actual Python sub-class being created + */ + protected PyBaseObject(PyType type) { super(type); } + + /** Constructor for {@code object}. */ + public PyBaseObject() { super(TYPE); } + + // Special methods ------------------------------------------------ + + /* + * Methods must be static with a "self" argument of type Object so + * that method handles copied from the slots of "object" function + * correctly in the type slots of Python objects. + * + * It follows that operations performed here must be feasible for + * any Python object. + */ + + /** + * {@link Slot#op_repr} has signature {@link Signature#UNARY} and + * sometimes reproduces the source-code representation of the + * object. + * + * @param self target of the operation + * @return string form + */ + // Compare CPython object_repr in typeobject.c + static Object __repr__(Object self) { return "<" + PyObjectUtil.toAt(self) + ">"; } + + /** + * {@link Slot#op_str} has signature {@link Signature#UNARY} and + * returns a human-readable presentation of the object. The default + * definition of the {@code __str__} slot is to invoke the + * {@code __repr__} slot. + * + * @param self target of the operation + * @return string form + */ + // Compare CPython object_str in typeobject.c + static Object __str__(Object self) { + MethodHandle repr = Operations.of(self).op_repr; + // Be more bullet-proof than usual + try { + if (repr != null) + return repr.invokeExact(self); + } catch (Throwable e) {} + // Fall back on a direct call + return __repr__(self); + } + + /** + * {@link Slot#op_getattribute} has signature + * {@link Signature#GETATTR} and provides attribute read access on + * the object and its type. The default instance + * {@code __getattribute__} slot implements dictionary look-up on + * the type and the instance. It is the starting point for + * activating the descriptor protocol. The following order of + * precedence applies when looking for the value of an attribute: + *

    + *
  1. a data descriptor from the dictionary of the type
  2. + *
  3. a value in the instance dictionary of {@code obj}
  4. + *
  5. a non-data descriptor from dictionary of the type
  6. + *
  7. a value from the dictionary of the type
  8. + *
+ * If a matching entry on the type is a data descriptor (case 1), + * but throws {@link AttributeError}, the instance dictionary (if + * any) will be consulted, and the subsequent cases (3 and 4) + * skipped. A non-data descriptor that throws an + * {@link AttributeError} (case 3) causes case 4 to be skipped. + * + * @param obj the target of the get + * @param name of the attribute + * @return attribute value + * @throws AttributeError if no such attribute + * @throws Throwable on other errors, typically from the descriptor + */ + // Compare CPython PyObject_GenericGetAttr in object.c + // Also _PyObject_GenericGetAttrWithDict without the tricks. + static Object __getattribute__(Object obj, String name) throws AttributeError, Throwable { + + PyType objType = PyType.of(obj); + MethodHandle descrGet = null; + + // Look up the name in the type (null if not found). + Object typeAttr = objType.lookup(name); + if (typeAttr != null) { + // Found in the type, it might be a descriptor + Operations typeAttrOps = Operations.of(typeAttr); + descrGet = typeAttrOps.op_get; + if (typeAttrOps.isDataDescr()) { + // typeAttr is a data descriptor so call its __get__. + try { + return descrGet.invokeExact(typeAttr, obj, objType); + } catch (Slot.EmptyException e) { + /* + * Only __set__ or __delete__ was defined. We do not catch + * AttributeError: it's definitive. Suppress trying __get__ again. + */ + descrGet = null; + } + } + } + + /* + * At this stage: typeAttr is the value from the type, or a non-data + * descriptor, or null if the attribute was not found. It's time to + * give the object instance dictionary a chance. + */ + if (obj instanceof DictPyObject) { + Map d = ((DictPyObject)obj).getDict(); + Object instanceAttr = d.get(name); + if (instanceAttr != null) { + // Found something + return instanceAttr; + } + } + + /* + * The name wasn't in the instance dictionary (or there wasn't an + * instance dictionary). We are now left with the results of look-up + * on the type. + */ + if (descrGet != null) { + // typeAttr may be a non-data descriptor: call __get__. + try { + return descrGet.invokeExact(typeAttr, obj, objType); + } catch (Slot.EmptyException e) {} + } + + if (typeAttr != null) { + /* + * The attribute obtained from the meta-type, and that turned out + * not to be a descriptor, is the return value. + */ + return typeAttr; + } + + // All the look-ups and descriptors came to nothing :( + throw Abstract.noAttributeError(obj, name); + } + + /** + * {@link Slot#op_setattr} has signature {@link Signature#SETATTR} + * and provides attribute write access on the object. The default + * instance {@code __setattr__} slot implements dictionary look-up + * on the type and the instance. It is the starting point for + * activating the descriptor protocol. The following order of + * precedence applies when setting the value of an attribute: + *
    + *
  1. call a data descriptor from the dictionary of the type
  2. + *
  3. place a value in the instance dictionary of {@code obj}
  4. + *
+ * If a matching entry on the type is a data descriptor (case 1) , + * but it throws {@link AttributeError}, this is definitive and the + * instance dictionary (if any) will not be updated. + * + * @param obj the target of the set + * @param name of the attribute + * @param value to give the attribute + * @throws AttributeError if no such attribute or it is read-only + * @throws Throwable on other errors, typically from the descriptor + */ + // Compare CPython PyObject_GenericSetAttr in object.c + // Also _PyObject_GenericSetAttrWithDict without the extras. + static void __setattr__(Object obj, String name, Object value) + throws AttributeError, Throwable { + + // Accommodate CPython idiom that set null means delete. + if (value == null) { + // Do this to help porting. Really this is an error. + __delattr__(obj, name); + return; + } + + // Look up the name in the type (null if not found). + Object typeAttr = PyType.of(obj).lookup(name); + if (typeAttr != null) { + // Found in the type, it might be a descriptor. + Operations typeAttrOps = Operations.of(typeAttr); + if (typeAttrOps.isDataDescr()) { + // Try descriptor __set__ + try { + typeAttrOps.op_set.invokeExact(typeAttr, obj, value); + return; + } catch (Slot.EmptyException e) { + // We do not catch AttributeError: it's definitive. + // Descriptor but no __set__: do not fall through. + throw Abstract.readonlyAttributeError(obj, name); + } + } + } + + /* + * There was no data descriptor, so we will place the value in the + * object instance dictionary directly. + */ + if (obj instanceof DictPyObject) { + Map d = ((DictPyObject)obj).getDict(); + try { + // There is a dictionary, and this is a put. + d.put(name, value); + } catch (UnsupportedOperationException e) { + // But the dictionary is unmodifiable + throw Abstract.cantSetAttributeError(obj); + } + } else { + // Object has no dictionary (and won't support one). + if (typeAttr == null) { + // Neither had the type an entry for the name. + throw Abstract.noAttributeError(obj, name); + } else { + /* + * The type had either a value for the attribute or a non-data + * descriptor. Either way, it's read-only when accessed via the + * instance. + */ + throw Abstract.readonlyAttributeError(obj, name); + } + } + } + + /** + * {@link Slot#op_delattr} has signature {@link Signature#DELATTR} + * and provides attribute deletion on the object. The default + * instance {@code __delattr__} slot implements dictionary look-up + * on the type and the instance. It is the starting point for + * activating the descriptor protocol. The following order of + * precedence applies when setting the value of an attribute: + *
    + *
  1. call a data descriptor from the dictionary of the type
  2. + *
  3. remove an entry from the instance dictionary of + * {@code obj}
  4. + *
+ * If a matching entry on the type is a data descriptor (case 1) , + * but it throws {@link AttributeError}, this is definitive and the + * instance dictionary (if any) will not be updated. + * + * @param obj the target of the delete + * @param name of the attribute + * @throws AttributeError if no such attribute or it is read-only + * @throws Throwable on other errors, typically from the descriptor + */ + // Compare CPython PyObject_GenericSetAttr in object.c + static void __delattr__(Object obj, String name) throws AttributeError, Throwable { + + // Look up the name in the type (null if not found). + Object typeAttr = PyType.of(obj).lookup(name); + if (typeAttr != null) { + // Found in the type, it might be a descriptor. + Operations typeAttrOps = Operations.of(typeAttr); + if (typeAttrOps.isDataDescr()) { + // Try descriptor __delete__ + try { + typeAttrOps.op_delete.invokeExact(typeAttr, obj); + return; + } catch (Slot.EmptyException e) { + // We do not catch AttributeError: it's definitive. + // Data descriptor but no __delete__. + throw Abstract.mandatoryAttributeError(obj, name); + } + } + } + + /* + * There was no data descriptor, so we will remove the name from the + * object instance dictionary directly. + */ + if (obj instanceof DictPyObject) { + Map d = ((DictPyObject)obj).getDict(); + try { + // There is a dictionary, and this is a delete. + Object previous = d.remove(name); + if (previous == null) { + // A null return implies it didn't exist + throw Abstract.noAttributeError(obj, name); + } + } catch (UnsupportedOperationException e) { + // But the dictionary is unmodifiable + throw Abstract.cantSetAttributeError(obj); + } + } else { + // Object has no dictionary (and won't support one). + if (typeAttr == null) { + // Neither has the type an entry for the name. + throw Abstract.noAttributeError(obj, name); + } else { + /* + * The type had either a value for the attribute or a non-data + * descriptor. Either way, it's read-only when accessed via the + * instance. + */ + throw Abstract.readonlyAttributeError(obj, name); + } + } + } + +} diff --git a/core/src/main/java/org/python/core/PyBool.java b/core/src/main/java/org/python/core/PyBool.java new file mode 100644 index 000000000..eb555fcc9 --- /dev/null +++ b/core/src/main/java/org/python/core/PyBool.java @@ -0,0 +1,77 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** + * The Python {@code bool} object. The only instances of + * {@code bool} in Python are {@code False} and {@code True}, + * represented by Java {@code Boolean.FALSE} and + * {@code Boolean.TRUE}, and there are no sub-classes. (Rogue + * instances of Java {@code Boolean} will generally behave as + * {@code False} or {@code True} but may fail identity tests.) + */ +public final class PyBool { + + /** The type of Python object this class implements. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("bool", MethodHandles.lookup()) // + .canonical(Boolean.class) // + .base(PyLong.TYPE) // + .flagNot(PyType.Flag.BASETYPE)); + + private PyBool() {} // enforces the doubleton :) + + // special methods ------------------------------------------------ + + static Object __repr__(Boolean self) { return self ? "True" : "False"; } + + static Object __and__(Boolean v, Object w) { + if (w instanceof Boolean) + return v ? w : v; + else + // w is not a bool, go arithmetic. + return PyLongMethods.__and__(v, w); + } + + static Object __rand__(Boolean w, Object v) { + if (v instanceof Boolean) + return w ? v : w; + else + // v is not a bool, go arithmetic. + return PyLongMethods.__rand__(w, v); + } + + static Object __or__(Boolean v, Object w) { + if (w instanceof Boolean) + return v ? v : w; + else + // w is not a bool, go arithmetic. + return PyLongMethods.__or__(v, w); + } + + static Object __ror__(Boolean w, Object v) { + if (v instanceof Boolean) + return w ? w : v; + else + // v is not a bool, go arithmetic. + return PyLongMethods.__ror__(w, v); + } + + static Object __xor__(Boolean v, Object w) { + if (w instanceof Boolean) + return v ^ ((Boolean)w); + else + // w is not a bool, go arithmetic. + return PyLongMethods.__xor__(v, w); + } + + static Object __rxor__(Boolean w, Object v) { + if (v instanceof Boolean) + return ((Boolean)v) ^ w; + else + // v is not a bool, go arithmetic. + return PyLongMethods.__rxor__(w, v); + } +} diff --git a/core/src/main/java/org/python/core/PyBytes.java b/core/src/main/java/org/python/core/PyBytes.java new file mode 100644 index 000000000..e11b85a94 --- /dev/null +++ b/core/src/main/java/org/python/core/PyBytes.java @@ -0,0 +1,384 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.nio.ByteBuffer; +import java.util.AbstractList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.function.IntConsumer; +import java.util.stream.IntStream; +import java.util.stream.StreamSupport; + +import org.python.core.PyObjectUtil.NoConversion; +import org.python.core.PySequence.Delegate; +import org.python.core.PySlice.Indices; +import org.python.core.stringlib.ByteArrayBuilder; + +/** The Python {@code bytes} object. */ +public class PyBytes extends AbstractList implements CraftedPyObject { + + /** The type of Python object this class implements. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("bytes", MethodHandles.lookup())); + private static final byte[] EMPTY_BYTE_ARRAY = new byte[] {}; + static final PyBytes EMPTY = new PyBytes(EMPTY_BYTE_ARRAY); + + /** The Python type of this instance. */ + protected final PyType type; + + /** The elements of the {@code bytes}. */ + final byte[] value; + + /** + * Helper to implement {@code __getitem__} and other index-related + * operations. + */ + private BytesDelegate delegate = new BytesDelegate(); + + /** + * As {@link #PyBytes(byte[])} for Python sub-class specifying + * {@link #type}. Construct an instance of {@code PyBytes} or a + * sub-class, from a given array of bytes, with the option to re-use + * that array as the implementation. If the actual array is is + * re-used the caller must give up ownership and never modify it + * after the call. See {@link #concat(PySequenceInterface)} for a + * correct use. + * + * @param type sub-type for which this is being created + * @param iPromiseNotToModify if {@code true}, the array becomes the + * implementation array, otherwise the constructor takes a copy. + * @param value the array of the bytes to contain + */ + private PyBytes(PyType type, boolean iPromiseNotToModify, byte[] value) { + this.type = type; + if (value.length == 0) + this.value = EMPTY_BYTE_ARRAY; + else if (iPromiseNotToModify) + this.value = value; + else + this.value = Arrays.copyOf(value, value.length); + } + + /** + * As {@link #PyBytes(byte[])} for Python sub-class specifying + * {@link #type}. + * + * @param type sub-type for which this is being created + * @param value of the bytes + */ + protected PyBytes(PyType type, byte[] value) { this(type, false, value); } + + /** + * As {@link #PyBytes(int...)} for Python sub-class specifying + * {@link #type}. + * + * @param type sub-type for which this is being created + * @param value of the bytes + */ + protected PyBytes(PyType type, int... value) { + this.type = type; + int n = value.length; + if (n == 0) + this.value = EMPTY_BYTE_ARRAY; + else { + byte[] b = new byte[n]; + for (int i = 0; i < n; i++) { b[i] = (byte)value[i]; } + this.value = b; + } + } + + /** + * Construct a Python {@code bytes} object from bytes treated as + * unsigned. + * + * @param value of the bytes + */ + public PyBytes(byte[] value) { this(TYPE, false, value); } + + /** + * Construct a Python {@code bytes} object from Java {@code int}s + * treated as unsigned. + * + * @param value of the bytes + */ + public PyBytes(int... value) { this(TYPE, value); } + + /** + * Construct a Python {@code bytes} object from a + * {@link ByteArrayBuilder}. This provides a safe, zero-copy way to + * supply the contents from an algorithm. + * + * @param value of the bytes + */ + public PyBytes(ByteArrayBuilder value) { this(TYPE, true, value.take()); } + + /** + * Unsafely wrap an array of bytes as a {@code PyBytes}. The caller + * must not hold a reference to the argument array (and definitely + * not manipulate the contents). + * + * @param value to wrap as a {@code bytes} + * @return the {@code bytes} + */ + private static PyBytes wrap(byte[] value) { return new PyBytes(TYPE, true, value); } + + @Override + public PyType getType() { return type; } + + // Special methods ------------------------------------------------ + + @SuppressWarnings("unused") + private Object __add__(Object other) throws Throwable { return delegate.__add__(other); } + + @SuppressWarnings("unused") + private Object __radd__(Object other) throws Throwable { return delegate.__radd__(other); } + + @SuppressWarnings("unused") + private Object __mul__(Object n) throws Throwable { return delegate.__mul__(n); } + + @SuppressWarnings("unused") + private Object __rmul__(Object n) throws Throwable { return delegate.__mul__(n); } + + @SuppressWarnings("unused") + private int __len__() { return value.length; } + + @SuppressWarnings("unused") + private Object __getitem__(Object item) throws Throwable { return delegate.__getitem__(item); } + + // AbstractList methods ------------------------------------------- + + @Override + public Integer get(int i) { return 0xff & value[i]; } + + @Override + public int size() { return value.length; } + + @Override + public Iterator iterator() { + return new Iterator() { + + private int i = 0; + + @Override + public boolean hasNext() { return i < value.length; } + + @Override + public Integer next() { return 0xff & value[i++]; } + }; + } + + // Java API ------------------------------------------------------- + + /** + * Expose the contents of the object as a read-only + * {@code ByteBuffer}. + * {@code This is temporary API until we implement the buffer interface.} + * + * @return a Java NIO buffer + */ + public ByteBuffer getNIOByteBuffer() { return ByteBuffer.wrap(value).asReadOnlyBuffer(); } + + /** + * Expose the contents of the object as a read-only sequence of + * {@code int}. + * + * @return sequence of {@code int} + */ + public PySequence.OfInt asSequence() { return delegate; } + + /** + * Return the contents of the object as an array of of {@code byte}. + * + * @return array of {@code byte} + */ + public byte[] asByteArray() { + return Arrays.copyOf(value, value.length); + } + + // Plumbing ------------------------------------------------------- + + /** + * A class to act as the delegate implementing {@code __getitem__} + * and other index-related operations. By inheriting {@link Delegate + * PySequence.Delegate} in this inner class, we obtain boilerplate + * implementation code for slice translation and range checks. We + * need only specify the work specific to {@link PyBytes} instances. + */ + class BytesDelegate extends PySequence.Delegate implements PySequence.OfInt { + + @Override + public int length() { return value.length; } + + @Override + public PyType getType() { return type; } + + @Override + public Integer getItem(int i) { return 0xff & value[i]; } + + @Override + public int getInt(int i) { return 0xff & value[i]; } + + @Override + public PyBytes getSlice(Indices slice) { + byte[] v; + if (slice.step == 1) + v = Arrays.copyOfRange(value, slice.start, slice.stop); + else { + v = new byte[slice.slicelength]; + int i = slice.start; + for (int j = 0; j < slice.slicelength; j++) { + v[j] = value[i]; + i += slice.step; + } + } + return wrap(v); + } + + @Override + PyBytes add(Object ow) throws OutOfMemoryError, NoConversion, Throwable { + return concatBytes(delegate, adapt(ow)); + } + + @Override + PyBytes radd(Object ov) throws OutOfMemoryError, NoConversion, Throwable { + return concatBytes(adapt(ov), delegate); + } + + @Override + PyBytes repeat(int n) throws OutOfMemoryError, Throwable { + int m = value.length; + if (n == 0) + return EMPTY; + else if (n == 1 || m == 0) + return PyBytes.this; + else { + byte[] b = new byte[n * m]; + for (int i = 0, p = 0; i < n; i++, p += m) { System.arraycopy(value, 0, b, p, m); } + return wrap(b); + } + } + + // PySequence.OfInt interface -------------------------------- + + @Override + public Spliterator.OfInt spliterator() { return new BytesSpliterator(); } + + @Override + public Iterator iterator() { return PyBytes.this.iterator(); } + + @Override + public IntStream asIntStream() { return StreamSupport.intStream(spliterator(), false); } + + @Override + public int compareTo(PySequence.Delegate other) { + Iterator ib = other.iterator(); + for (int a : value) { + if (ib.hasNext()) { + int b = ib.next(); + // if a != b, then we've found an answer + if (a > b) + return 1; + else if (a < b) + return -1; + } else + // value has not run out, but other has. We win. + return 1; + } + /* + * The sequences matched over the length of value. The other is the + * winner if it still has elements. Otherwise its a tie. + */ + return ib.hasNext() ? -1 : 0; + } + + /** + * Compare for equality with a sequence. This is a little simpler + * than {@code compareTo}. + * + * @param b another + * @return whether values equal + */ + boolean equals(BytesDelegate b) { + // Lengths must be equal + if (length() != b.length()) { return false; } + // Scan the codes points in this.value and b + Iterator ib = b.iterator(); + for (int c : value) { if (c != ib.next()) { return false; } } + return true; + } + } + + private static PyBytes concatBytes(PySequence.OfInt v, PySequence.OfInt w) + throws OutOfMemoryError { + int n = v.length(), m = w.length(); + byte[] b = new byte[n + m]; + IntStream.concat(v.asIntStream(), w.asIntStream()).forEach(new ByteStore(b, 0)); + return wrap(b); + } + + /** + * Inner class defining the return type of + * {@link PyBytes#spliterator()}. We need this only because + * {@link #tryAdvance(IntConsumer) tryAdvance} deals in java + * {@code int}s, while our array is {@code byte[]}. There is no + * ready-made {@code Spliterator.OfByte}, and if there were, it + * would return signed values. + */ + private class BytesSpliterator extends Spliterators.AbstractIntSpliterator { + + static final int flags = Spliterator.IMMUTABLE | Spliterator.SIZED | Spliterator.ORDERED; + private int i = 0; + + BytesSpliterator() { super(value.length, flags); } + + @Override + public boolean tryAdvance(IntConsumer action) { + if (i < value.length) { + action.accept(0xff & value[i++]); + return true; + } else + return false; + } + } + + /** + * A consumer of primitive int values that stores them in an array + * given it at construction. + */ + private static class ByteStore implements IntConsumer { + + private final byte[] b; + private int i = 0; + + ByteStore(byte[] bytes, int start) { + this.b = bytes; + this.i = start; + } + + @Override + public void accept(int value) { b[i++] = (byte)value; } + } + + /** + * Adapt a Python object to a sequence of Java {@code int} values or + * throw an exception. If the method throws the special exception + * {@link NoConversion}, the caller must catch it and deal with it, + * perhaps by throwing a {@link TypeError}. A binary operation will + * normally return {@link Py#NotImplemented} in that case. + *

+ * Note that implementing {@link PySequence.OfInt} is not enough, + * which other types may, but be incompatible in Python. + * + * @param v to wrap or return + * @return adapted to a sequence + * @throws NoConversion if {@code v} is not a Python {@code str} + */ + static BytesDelegate adapt(Object v) throws NoConversion { + // Check against supported types, most likely first + if (v instanceof PyBytes /* || v instanceof PyByteArray */) + return ((PyBytes)v).delegate; + throw PyObjectUtil.NO_CONVERSION; + } +} diff --git a/core/src/main/java/org/python/core/PyCell.java b/core/src/main/java/org/python/core/PyCell.java new file mode 100644 index 000000000..d32b21067 --- /dev/null +++ b/core/src/main/java/org/python/core/PyCell.java @@ -0,0 +1,43 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.util.function.Supplier; + +/** Holder for objects appearing in the closure of a function. */ +public class PyCell implements Supplier, CraftedPyObject { + + /** The Python type {@code cell}. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("cell", MethodHandles.lookup()) + // Type admits no Python subclasses. + .flagNot(PyType.Flag.BASETYPE)); + + /** The object currently held. */ + Object obj; + + /** + * Construct a cell to hold the object. + * + * @param obj to hold + */ + PyCell(Object obj) { this.obj = obj; } + + /** Handy constant where no cells are neeed in a frame. */ + static final PyCell[] EMPTY_ARRAY = new PyCell[0]; + + // Java API ------------------------------------------------------- + + @Override + public PyType getType() { return TYPE; } + + @Override + public Object get() { return obj; } + + @Override + public String toString() { return Py.defaultToString(this); } + + // slot functions ------------------------------------------------- + + @SuppressWarnings("unused") + private Object __repr__() { return String.format("", obj); } +} diff --git a/core/src/main/java/org/python/core/PyCode.java b/core/src/main/java/org/python/core/PyCode.java new file mode 100644 index 000000000..a162e8d50 --- /dev/null +++ b/core/src/main/java/org/python/core/PyCode.java @@ -0,0 +1,571 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.stream.Stream; + +import org.python.core.Exposed.Getter; +import org.python.core.Exposed.Member; + +/** + * The Python {@code code} object. A {@code code} object describes + * the layout of a {@link PyFrame}, and is a factory for frames of + * matching type. + *

+ * In this implementation, while there is only one Python type + * {@code code}, we allow alternative implementations of it. In + * particular, we provide for a code object that is the result of + * compiling to JVM byte code, in addition to the expected support + * for Python byte code. + *

+ * The abstract base {@code PyCode} has a need to store fewer + * attributes than the concrete CPython {@code code} object, where + * the only realisation holds a block of byte code with broadly + * similar needs from one version to the next. We provide + * get-methods matching all those of CPython, and each concrete + * class can override them where meaningful. + */ +// Compare CPython PyCodeObject in codeobject.c +public abstract class PyCode implements CraftedPyObject { + + /** The Python type {@code code}. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("code", MethodHandles.lookup()).flagNot(PyType.Flag.BASETYPE)); + /* + * It is not easy to say, while there is only one concrete sub-class + * to learn from, which attributes may safely be be in the base, and + * which implemented in the sub-class to suit the local needs of a + * definition in CPython or Java byte code. + */ + + /** + * Characteristics of a {@code PyCode} (as CPython co_flags). These + * are not all relevant to all code types. + */ + // XXX Consider not having this, only flags. + enum Trait { + OPTIMIZED, NEWLOCALS, VARARGS, VARKEYWORDS, NESTED, GENERATOR, COROUTINE, + ITERABLE_COROUTINE, ASYNC_GENERATOR + } + + /** Characteristics of this {@code PyCode} (as CPython co_flags). */ + final EnumSet traits; + + /** Source file from which compiled. */ + @Member("co_filename") + final String filename; + /** Name of function etc. */ + @Member("co_name") + final String name; + /** Fully qualified name of function etc. */ + @Member("co_qualname") + final String qualname; + + /** {@code int} bitmap of code traits compatible with CPython. */ + final int flags; + + /** Number of positional parameters (not counting {@code *args}). */ + @Member("co_argcount") + final int argcount; + /** Number of positional-only parameters. */ + @Member("co_posonlyargcount") + final int posonlyargcount; + /** Number of keyword-only parameters. */ + @Member("co_kwonlyargcount") + final int kwonlyargcount; + + /** First source line number of this code. */ + final int firstlineno; + + /** Constant objects needed by the code. Not {@code null}. */ + final Object[] consts; + + /** Names referenced in the code. Not {@code null}. */ + final String[] names; + + // Bit masks appearing in flags. + // XXX Some of these should be CPython-specific. + /** The code uses fast local local variables, not a map. */ + public static final int CO_OPTIMIZED = 0x0001; + /** A new {@code dict} should be created for local variables. */ + // NEWLOCALS is never acted on in CPython (but set for functions) + public static final int CO_NEWLOCALS = 0x0002; + /** The function has a collector for excess positional arguments */ + public static final int CO_VARARGS = 0x0004; + /** The function has a collector for excess keyword arguments */ + public static final int CO_VARKEYWORDS = 0x0008; + /** The code is for a nested function. */ + public static final int CO_NESTED = 0x0010; + /** + * The code is for a generator function, i.e. a generator object is + * returned when the code object is executed.. + */ + public static final int CO_GENERATOR = 0x0020; + + /** + * The code is for a coroutine function (defined with + * {@code async def}). When the code object is executed it returns a + * coroutine object. + */ + public static final int CO_COROUTINE = 0x0080; + /** + * The flag is used to transform generators into generator-based + * coroutines. Generator objects with this flag can be used in + * {@code await} expression, and can {@code yield from} coroutine + * objects. See PEP 492 for more details. + */ + public static final int CO_ITERABLE_COROUTINE = 0x0100; + /** + * The code object is an asynchronous generator function. When the + * code object is executed it returns an asynchronous generator + * object. See PEP 525 for more details. + */ + public static final int CO_ASYNC_GENERATOR = 0x0200; + + // Construct with arrays not tuples. + /** + * Full constructor. The {@link #traits} of the code are supplied + * here as CPython reports them: as a bit array in an integer, but + * the constructor makes a conversion, and it is the {@link #traits} + * which should be used at the Java level. + *

+ * Where the parameters map directly to an attribute of the code + * object, that is the best way to explain them. Note that this + * factory method is tuned to the needs of {@code marshal.read} + * where the serialised form makes no secret of the version-specific + * implementation details. + * + * @param filename {@code co_filename} + * @param name {@code co_name} + * @param qualname {@code co_qualname} + * @param flags {@code co_flags} a bitmap of traits + * + * @param firstlineno {@code co_firstlineno} + * + * @param consts {@code co_consts} + * @param names {@code co_names} + * + * @param argcount {@code co_argcount} the number of positional + * parameters (including positional-only arguments and arguments + * with default values) + * @param posonlyargcount {@code co_posonlyargcount} the number of + * positional-only arguments (including arguments with default + * values) + * @param kwonlyargcount {@code co_kwonlyargcount} the number of + * keyword-only arguments (including arguments with default + * values) + */ + public PyCode( // + // Grouped as _PyCodeConstructor in pycore_code.h + // Metadata + String filename, String name, String qualname, // + int flags, + // The code (not seeing actual byte code in abstract base) + int firstlineno, // ??? sensible given filename + // Used by the code + Object[] consts, String[] names, // + // Parameter navigation with varnames + int argcount, int posonlyargcount, int kwonlyargcount) { + this.argcount = argcount; + this.posonlyargcount = posonlyargcount; + this.kwonlyargcount = kwonlyargcount; + + this.flags = flags; + this.consts = consts; + + this.names = names; + + this.filename = filename; + this.name = name; + this.qualname = qualname; + this.firstlineno = firstlineno; + + this.traits = traitsFrom(flags); + } + + /** + * Traits characterising local variables of the frame this code + * object will produce. + */ + enum VariableTrait { + /** + * Belongs in {@code co_varnames}. For legacy reasons this means + * parameters to the function (even if they are also cell + * variables), and other local variables that are not cells (or + * free). + */ + PLAIN, + /** + * Belongs in {@code co_cellvars}. This means the non-free cell + * variables, even if they are also parameters to the function, in + * which case they have the {@link #PLAIN} trait too. + */ + CELL, + /** + * Belongs in {@code co_freevars}. These are just the free cell + * variables. (They cannot also be parameters.) + */ + FREE + } + + /** + * Interface on a store of information about the variables required + * by a code object and where they will be stored in the frame it + * creates. It is used to initialise + */ + interface Layout { + + /** @return total number of local variables. */ + default int size() { + // This can't overflow since it is the size of an array. + return (int)localnames().count(); + } + + /** + * Return name of one local frame variable. + * + * @param index of variable + * @return name of one variable. + */ + String name(int index); + + /** + * Return the {@link VariableTrait}s of the variable at a given + * index . + * + * @param index of variable + * @return traits of the local variable + */ + EnumSet traits(int index); + + /** + * Return a stream of the names of all the local variables These are + * the parameters and then the other plain, cell and free variables, + * but occurring only once each (whereas {@code co_cellvars} will + * repeat names from {@code co_varnames} if they are parameters. + * + * @return names of all local variables. + */ + Stream localnames(); + + /** + * Return a stream of the names of variables to include in + * {@code co_varnames}. These are the parameters and then the plain + * (non-cell, non-free) variables. Note that some of the arguments + * may be cell variables. + * + * @return names of non-cell and parameters variables. + */ + Stream varnames(); + + /** + * Return a stream of the names of variables to include in + * {@code co_cellvars}. These are the variables defined by this + * {@code code} object and stored as cells. Note that some of the + * parameters may be cell variables. + * + * @return names of cell variables (may be parameters). + */ + Stream cellvars(); + + /** + * Return a stream of the names of variables to include in + * {@code co_freevars}. These are the variables stored as cells but + * defined in another {@code code} object. + * + * @return names of free variables. + */ + Stream freevars(); + + /** @return the length of {@code co_varnames} */ + default int nvarnames() { + // This can't overflow since it is the size of an array. + return (int)varnames().count(); + } + + /** @return the length of {@code co_cellvars} */ + default int ncellvars() { + // This can't overflow since it is the size of an array. + return (int)cellvars().count(); + } + + /** @return the length of {@code co_freevars} */ + default int nfreevars() { + // This can't overflow since it is the size of an array. + return (int)freevars().count(); + } + } + + /** + * Describe the layout of the frame local variables (at least the + * arguments), cell and free variables. {@link #co_varnames}, + * {@link #co_cellvars} and {@link #co_freevars} are derived from + * this, and the signature of the code as a function. + * + * @return a {@link Layout} object describing the variables + */ + // CPython specific at first glance but not after reflection. + // Compare CPython 3.11 localsplusnames and localspluskinds + abstract Layout layout(); + + // Attributes ----------------------------------------------------- + + @SuppressWarnings("static-method") + @Getter + int co_stacksize() { return 0; } + + @SuppressWarnings("static-method") + @Getter + PyBytes co_code() { return PyBytes.EMPTY; } + + @SuppressWarnings("static-method") + @Getter + PyBytes co_lnotab() { return PyBytes.EMPTY; } + + /** + * Get {@link #consts} as a {@code tuple}. + * + * @return {@link #consts} as a {@code tuple} + */ + @Getter + PyTuple co_consts() { return PyTuple.from(consts); } + + /** + * Get {@link #names} as a {@code tuple}. + * + * @return {@link #names} as a {@code tuple} + */ + @Getter + PyTuple co_names() { return PyTuple.from(names); } + + /** + * Get {@code co_varnames} as a {@code tuple}. + * + * @return {@code co_varnames} as a {@code tuple} + */ + @Getter + PyTuple co_varnames() { return new PyTuple(layout().varnames()); } + + /** + * Get {@code co_cellvars} as a {@code tuple}. + * + * @return {@code co_cellvars} as a {@code tuple} + */ + @Getter + PyTuple co_cellvars() { return new PyTuple(layout().cellvars()); } + + /** + * Get {@code co_freevars} as a {@code tuple}. + * + * @return {@code co_freevars} as a {@code tuple} + */ + @Getter + PyTuple co_freevars() { return new PyTuple(layout().freevars()); } + + // slot methods -------------------------------------------------- + + @SuppressWarnings("unused") + private Object __repr__() { return toString(); } + + @SuppressWarnings("unused") + private Object __str__() { return toString(); } + + // Java API ------------------------------------------------------- + + @Override + public PyType getType() { return TYPE; } + + @Override + // Compare CPython code_repr in codeobject.c + public String toString() { + int lineno = firstlineno != 0 ? firstlineno : -1; + String file = filename, q = "\""; + if (file == null) { + file = "???"; + q = ""; + } + return String.format("", name, Py.id(this), q, + file, q, lineno); + } + + /** + * Create a {@code PyFunction} that will execute this + * {@code PyCode}. The strongly-typed {@code defaults}, + * {@code kwdefaults} , {@code closure} and {@code annotations} may + * be {@code null} if they would otherwise be empty. + * {@code annotations} is always exposed as a {@code dict}, but may + * be presented to the constructor as a {@code dict} or + * {@code tuple} of keys and values (or {@code null}). + * + * @param interpreter providing the module context + * @param globals name space to treat as global variables + * @param defaults default positional argument values or + * {@code null} + * @param kwdefaults default keyword argument values or {@code null} + * @param annotations type annotations ({@code dict}, {@code null} + * or maybe {@code tuple}) + * @param closure variables referenced but not defined here, must be + * size expected by code or {@code null} if empty. + * @return the function from this code + */ + abstract PyFunction createFunction(Interpreter interpreter, PyDict globals, + Object[] defaults, PyDict kwdefaults, Object annotations, PyCell[] closure); + + /** + * Create a {@code PyFunction} that will execute this {@code PyCode} + * (adequate for module-level code). + * + * @param interpreter providing the module context + * @param globals name space to treat as global variables + * @return the function + */ + // Compare CPython PyFunction_New in funcobject.c + // ... with the interpreter required by architecture + PyFunction createFunction(Interpreter interpreter, PyDict globals) { + return createFunction(interpreter, globals, Py.EMPTY_ARRAY, Py.dict(), Py.dict(), + PyCell.EMPTY_ARRAY); + } + + /** + * Return the total space in a frame of a code object, that must be + * reserved for arguments. This is also the size of the layout array + * appearing as an argument to constructors. + * + * @return total space in frame for arguments + */ + int totalargs() { return totalargs(argcount, flags); } + + private static final int CO_VARARGS_SHIFT = // 2 + Integer.numberOfTrailingZeros(CO_VARARGS); + private static final int CO_VARKEYWORDS_SHIFT =// 3 + Integer.numberOfTrailingZeros(CO_VARKEYWORDS); + + /** + * From the values of {@code co_argcount} and {@code co_flags} (in + * practice, as they are de-marshalled), compute the total space in + * a frame of a code object, that must be reserved for arguments. + * This is also the size of the layout array appearing as an + * argument to certain constructors. + * + * @param argcount argument count excluding collector parameters. + * @param flags bit map of code traits + * @return total space in frame for arguments + */ + static int totalargs(int argcount, int flags) { + return argcount + (flags >>> CO_VARARGS_SHIFT & 1) + (flags >>> CO_VARKEYWORDS_SHIFT & 1); + } + + // Plumbing ------------------------------------------------------- + + /** Empty (zero-length) array of {@code String}. */ + protected static final String[] EMPTY_STRING_ARRAY = Py.EMPTY_STRING_ARRAY; + + private static final String NAME_TUPLES_STRING = + "name tuple must contain only strings, not '%s' (in %s)"; + + /** + * Check that all the argument is a tuple and that all objects in it + * are {@code str}, and return them as an array of {@code String}. + * + * @param v of names + * @param tupleName the name of the argument (for error production) + * @return the names as {@code String[]} + */ + protected static String[] names(Object v, String tupleName) { + PyTuple tuple = castTuple(v, tupleName); + String[] s = new String[tuple.size()]; + int i = 0; + for (Object name : tuple) { + s[i++] = PyUnicode.asString(name, + o -> Abstract.typeError(NAME_TUPLES_STRING, o, tupleName)); + } + return s; + } + + /** + * @param v to check is a Python {@code bytes} + * @param arg name of argument (for message only) + * @return {@code v} + * @throws TypeError if {@code v} cannot be cast to {@code bytes} + */ + protected static PyBytes castBytes(Object v, String arg) throws TypeError { + if (v instanceof PyBytes b) + return b; + else + throw Abstract.argumentTypeError("code", arg, "bytes", v); + } + + /** + * @param v to check is a Python {@code tuple} + * @param arg name of argument (for message only) + * @return {@code v} + * @throws TypeError if {@code v} cannot be cast to {@code tuple} + */ + protected static PyTuple castTuple(Object v, String arg) { + if (v instanceof PyTuple t) + return t; + else + throw Abstract.argumentTypeError("code", arg, "tuple", v); + } + + /** + * Cast a Python {@code str} to a Java String or raise a + * {@code TypeError} mentioning an argument name. + * + * @param v to check and cast/convert + * @param argName the name of the argument (for error production) + * @return {@code v} + */ + protected static String castString(Object v, String argName) { + return PyUnicode.asString(v, o -> Abstract.argumentTypeError("code", argName, "str", o)); + } + + /** + * Convert a CPython-style {@link #flags} specifier to + * {@link #traits}. + */ + private static EnumSet traitsFrom(int flags) { + ArrayList traits = new ArrayList<>(); + for (int m = 1; flags != 0; m <<= 1) { + switch (m & flags) { + case 0: + break; // When bit not set in flag. + case CO_OPTIMIZED: + traits.add(Trait.OPTIMIZED); + break; + case CO_NEWLOCALS: + traits.add(Trait.NEWLOCALS); + break; + case CO_VARARGS: + traits.add(Trait.VARARGS); + break; + case CO_VARKEYWORDS: + traits.add(Trait.VARKEYWORDS); + break; + case CO_NESTED: + traits.add(Trait.NESTED); + break; + case CO_GENERATOR: + traits.add(Trait.GENERATOR); + break; + case CO_COROUTINE: + traits.add(Trait.COROUTINE); + break; + case CO_ITERABLE_COROUTINE: + traits.add(Trait.ITERABLE_COROUTINE); + break; + case CO_ASYNC_GENERATOR: + traits.add(Trait.ASYNC_GENERATOR); + break; + default: + throw new IllegalArgumentException("Undefined bit set in 'flags' argument"); + } + // Ensure the bit we just tested is clear + flags &= ~m; + } + return traits.isEmpty() ? EnumSet.noneOf(Trait.class) : EnumSet.copyOf(traits); + } +} diff --git a/core/src/main/java/org/python/core/PyComplex.java b/core/src/main/java/org/python/core/PyComplex.java new file mode 100644 index 000000000..b19f8143d --- /dev/null +++ b/core/src/main/java/org/python/core/PyComplex.java @@ -0,0 +1,14 @@ +package org.python.core; + +/** + * This is a placeholder to satisfy references in implementations of + * {@code __complex__} preserved from Jython 2. + */ +public class PyComplex { + double real, imag; + + public PyComplex(double real, double imag) { + this.real = real; + this.imag = imag; + } +} diff --git a/core/src/main/java/org/python/core/PyDict.java b/core/src/main/java/org/python/core/PyDict.java new file mode 100644 index 000000000..49570ee03 --- /dev/null +++ b/core/src/main/java/org/python/core/PyDict.java @@ -0,0 +1,605 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.python.base.InterpreterError; + +/** + * The Python {@code dict} object. The Java API is provided directly + * by the base class implementing {@code Map}, while the Python API + * has been implemented on top of the Java one. + */ +public class PyDict extends AbstractMap implements CraftedPyObject { + + /** The type of Python object this class implements. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("dict", MethodHandles.lookup())); + + /** The Python type of this instance. */ + protected final PyType type; + + /** The dictionary as a hash map preserving insertion order. */ + private final LinkedHashMap map = new LinkedHashMap<>(); + + /** + * Construct an empty dictionary of a specified Python sub-class of + * {@code dict}. + * + * @param type sub-type for which this is being created + */ + protected PyDict(PyType type) { this.type = type; } + + /** + * Construct a dictionary of a specified Python sub-class of + * {@code dict}, filled by copying from a given Java map. + * + * @param key type of incoming map + * @param value type of incoming map + * @param type sub-type for which this is being created + * @param map Java map from which to copy + */ + protected PyDict(PyType type, Map map) { + this(type); + // Cannot bulk add since keys may need Pythonising + for (Map.Entry e : map.entrySet()) { put(e.getKey(), e.getValue()); } + } + + /** Construct an empty {@code dict}. */ + public PyDict() { this(TYPE); } + + /** + * Create a {@code dict} and add entries from key-value pairs that + * are supplied as successive values in an array slice. (This method + * supports the CPython byte code interpreter.) + * + * @param stack array containing key-value pairs + * @param start index of first key + * @param count number of pairs + * @return a new {@code dict} + */ + static PyDict fromKeyValuePairs(Object[] stack, int start, int count) { + PyDict dict = new PyDict(TYPE); + for (int i = 0, p = start; i < count; i++) { dict.put(stack[p++], stack[p++]); } + return dict; + } + + /** + * Create a {@code dict} and add entries from key-value pairs. + * + * @param tuples specifying key-value pairs to enter + * @return new {@code dict} + */ + public static PyDict fromKeyValuePairs(PyTuple... tuples) { + PyDict dict = new PyDict(TYPE); + for (int i = 0; i < tuples.length; i++) { + PyTuple t = tuples[i]; + if (t.size() == 2) { + dict.put(t.get(0), t.get(1)); + } else { + throw new ValueError(KV_TUPLE_LENGTH, i, t.size()); + } + } + return dict; + } + + @Override + public PyType getType() { return type; } + + @Override + public String toString() { return Py.defaultToString(this); } + + /** + * Override {@code Map.get} to give keys Python semantics. + * + * @param key whose associated value is to be returned + * @return value at {@code key} or {@code null} if not found + */ + @Override + public Object get(Object key) { return map.get(toKey(key)); } + + /** + * Override {@code Map.put} to give keys Python semantics. + * + * @param key with which the specified value is to be associated + * @param value to be associated + * @return previous value associated + */ + @Override + public Object put(Object key, Object value) { return map.put(toKey(key), value); } + + /** + * Override {@code Map.putIfAbsent} to give keys Python semantics. + * + * @param key with which the specified value is to be associated + * @param value to be associated + * @return previous value associated + */ + @Override + public Object putIfAbsent(Object key, Object value) { + return map.putIfAbsent(toKey(key), value); + } + + /** Modes for use with {@link #merge(Object, MergeMode)}. */ + enum MergeMode { + /** + * Ignore the operation if the key is already in the map. (CPython + * mode 0: first occurrence wins). + */ + IF_ABSENT, + /** + * Overwrite any existing entry with the same key. (CPython mode 1: + * last occurrence wins). + */ + PUT, + /** + * Raise a {@link KeyError} if the key is already in the map. + * (CPython mode 2.) + */ + UNIQUE + } + + /** + * Optimised get on this {@code PyDict} and some other {@code Map}, + * specifically to support the {@code LOAD_GLOBAL} opcode. We avoid + * the cost of a second key holder if the second mapping is also a + * {@code PyDict}, which it commonly is. + * + * @param builtins final mapping to search + * @param key to find + * @return found object or {@code null} + */ + // Compare CPython _PyDict_LoadGlobal in dictobject.c + final Object loadGlobal(Map builtins, String key) { + Key k = toKey(key); + Object v = map.get(k); + if (v == null) { + if (builtins instanceof PyDict) { + v = ((PyDict)builtins).map.get(k); + } else { + v = builtins.get(key); + } + } + return v; + } + + /** + * Update this dictionary from a sequence of key-value pairs, in the + * chosen mode. The sequence is any iterable object producing + * iterable objects of length 2, typically a {@code list} of + * {@code tuple}s. + * + * @param seq sequence of KV pairs to merge + * @param mode merge policy + * @throws TypeError + * @throws KeyError + * @throws Throwable + */ + // Compare CPython PyDict_MergeFromSeq2 in dictobject.c + void mergeFromSeq(Object seq, MergeMode mode) throws TypeError, KeyError, Throwable { + + // Python-iterate over the sequence + Object it = Abstract.getIterator(seq); + for (int i = 0;; i++) { + Object item = Abstract.next(it); // seq[i] + // Convert item to List and verify length 2. + try { + List fast = PySequence.fastList(item, null); + if (fast.size() != 2) { throw new ValueError(KV_TUPLE_LENGTH, i, fast.size()); } + // Update/merge with this (key, value) pair. + mergeSingle(fast.get(0), fast.get(1), mode); + } catch (TypeError te) { + /* + * We could not give PySequence.fastList a custom exception supplier + * because the message refers to non-final i. + */ + throw new TypeError(CANNOT_CONVERT_SEQ, i); + } + } + } + + // slot functions ------------------------------------------------- + + @SuppressWarnings("unused") + private Object __repr__() throws Throwable { return PyObjectUtil.mapRepr(this); } + + @SuppressWarnings("unused") + private Object __ne__(Object o) { + if (TYPE.check(o)) { + return !compareEQ((PyDict)o); + } else { + return Py.NotImplemented; + } + } + + @SuppressWarnings("unused") + private Object __eq__(Object o) { + if (TYPE.check(o)) { + return compareEQ((PyDict)o); + } else { + return Py.NotImplemented; + } + } + + @SuppressWarnings("unused") + private Object __getitem__(Object key) { + // This may be over-simplifying things but ... :) + return get(key); + } + + @SuppressWarnings("unused") + private void __setitem__(Object key, Object value) { + // This may be over-simplifying things but ... :) + put(key, value); + } + + @SuppressWarnings("unused") + private void __delitem__(Object key) { + // This may be over-simplifying things but ... :) + remove(key); + } + + // methods -------------------------------------------------------- + + /** + * Update the dictionary with the contents of another mapping + * object, allowing replacement of matching keys. This supports the + * {@code UPDATE_DICT} opcode. + * + * @param o to merge into this dictionary + */ + // Compare CPython PyDict_Update in dictobject.c + void update(Object o) { mergeObject(o, MergeMode.PUT); } + + /** + * Merge the contents of another mapping into this dictionary + * allowing replacement of matching keys only as specified. + * + * @param o to merge into this dictionary + * @param mode policy on replacement + */ + // Compare CPython _PyDict_MergeEx in dictobject.c + void merge(Object o, MergeMode mode) { mergeObject(o, mode); } + + // Non-Python API ------------------------------------------------- + + /** + * A {@code PyDict} is a {@code Map}, but contains a + * private implementation of {@code java.utilMap}. We + * use this class {@code Key} so that when Java needs key hashes or + * comparisons, it receives the hash or comparison that Python would + * produce. An object with the {@code Key} interface defines the + * standard Java {@code hashCode()} and {@code equals()} to return + * the answers Python would give for {@code __hash__} and + * {@code __eq__}. + *

+ * Some implementations of Python objects (e.g. the + * {@link PyUnicode} implementation of {@code str}) implement + * {@code PyDict.Key}, and can give Python semantics to hash and + * comparison directly. Other implementations (e.g. a Java + * {@code String} implementation of {@code str}) have to be wrapped + * in a {@link KeyHolder} that implements {@code PyDict.Key}. + * Conversely, when any method requires the keys of a Python + * {@code dict}, the {@code Key} must yield up the original Python + * object it contains. + *

+ * An implementation of a Python type that allows Python sub-classes + * must respect re-definition of the corresponding special methods. + * This is best done by by calling abstract API + * {@link Abstract#hash(Object)}, etc.. + */ + interface Key { + + /** + * If this object is a holder for the actual key, return the actual + * key. By default, return {@code this}. Python object + * implementations that implement {@link Key#hashCode()} and + * {@link Key#equals(Object)}, do not usually override this method. + * + * @return the underlying key object (by default {@code this}) + */ + default Object get() { return this; } + + /** + * Python objects that implement the interface {@code Key}, define + * this method so that calls from Java libraries to + * {@code Object.hashCode()} receive the hash defined by Python. A + * sufficient implementation is:

+         * @Override
+         * public int hashCode() throws PyException {
+         *     return PyDict.pythonHash(this);
+         * }
+         * 
Where it is known {@code __hash__} cannot have been + * redefined, object implementations may have a shorter option. + * + * @throws PyException from {@code __hash__} implementations + */ + @Override + public int hashCode() throws PyException; + + /** + * Python objects that implement the interface {@code Key}, define + * this method so that calls from Java libraries to + * {@code Object.equals(Object)} are answered with Python semantics. + * A sufficient implementation is:
+         * @Override
+         * public int equals(Object obj) throws PyException {
+         *     return PyDict.pythonEquals(this, obj);
+         * }
+         * 
Objects that provide their own specialised implementation + * of {@code equals}, receiving a {@code Key} a object as the + * {@code other} argument, must dereference it with {@code get()} + * and work on those contents. An idiom like this may be used:
+         * @Override
+         * public boolean equals(Object other) {
+         *     if (other instanceof PyDict.Key)
+         *         other = ((PyDict.Key) other).get();
+         *     // ... rest of implementation
+         * }
+         *  
+ * + * @throws PyException from {@code __eq__} implementations + */ + @Override + public boolean equals(Object other) throws PyException; + } + + /** + * This is a wrapper that gives Python semantics to objects used as + * keys. When using a Java collection, it is necessary to intercept + * the calls Java will make to {@code Object.hashCode} and + * {@code Object.equals}, and direct them to Python {@code __hash__} + * and {@code __eq__}. + */ + static class KeyHolder implements Key { + + /** The actual key this object is holding. */ + private final Object key; + + /** + * Create a key on the given object Python {@code __eq__} + * definitions on objects offered as keys. + * + * @param key to wrap + * @throws PyException from {@code __eq__} + */ + KeyHolder(Object key) { this.key = key; } + + /** Return the actual object held by this {@code Key} object. */ + @Override + public Object get() { return key; } + + @Override + public int hashCode() { return pythonHash(this); } + + /** + * Impose Python {@code __eq__} definitions on objects offered as + * keys. + * + * @throws PyException from {@code __eq__} + */ + @Override + public boolean equals(Object other) throws PyException { return pythonEquals(this, other); } + + @Override + public String toString() { return String.format("KeyHolder(%s)", key); } + } + + /** + * Turn an object into a {@link Key} suitable for lookup in + * {@link #map}. + * + * @param key to return or wrap + */ + private static Key toKey(Object key) { + if (key instanceof Key) + return (Key)key; + else + return new KeyHolder(key); + } + + /** + * Convenience function for Python objects that implement + * {@link PyDict.Key}, to impose Python semantics for {@code hash()} + * on {@code Object.hashCode}. See {@link PyDict.Key#hashCode()}. + * + * @param key to hash + * @return the hash + * @throws PyException from {@code __hash__} implementations + */ + public static int pythonHash(Key key) throws PyException { + try { + return Abstract.hash(key.get()); + } catch (PyException e) { + // A PyException is allowed to propagate as itself + throw e; + } catch (Throwable e) { + // Tunnel out non-Python errors as internal + throw new InterpreterError(e, "during hash(%s)", PyType.of(key)); + } + } + + /** + * Convenience function for Python objects that implement + * {@link PyDict.Key}, to impose Python semantics for {@code ==} on + * {@code Object.equals}. See {@link Key#equals(Object)}. + * + * @param key to test equal + * @param other to test equal + * @return whether equal + * @throws PyException from {@code __eq__} implementations + */ + public static boolean pythonEquals(Key key, Object other) throws PyException { + + if (other instanceof Key) { + // De-reference the key to its contents + other = ((Key)other).get(); + } + + // Quick answer if it contains the same object + Object self = key.get(); + if (other == self) { return true; } + + // Otherwise, make a full comparison + try { + Object r = Comparison.EQ.apply(self, other); + return Abstract.isTrue(r); + } catch (PyException e) { + // A PyException is allowed to propagate as itself + throw e; + } catch (Throwable e) { + // Tunnel out non-Python errors as internal + throw new InterpreterError(e, "during equals(%s, %s)", PyType.of(self), + PyType.of(other)); + } + } + + // Map interface -------------------------------------------------- + + @Override + public Set> entrySet() { return new EntrySetImpl(); } + + /** + * An instance of this class is returned by + * {@link PyDict#entrySet()}, and provides the view of the entries + * in the {@code PyDict} mentioned there. + *

+ * It is probably also the backing for a {@code dict_keys}. + */ + private class EntrySetImpl extends AbstractSet> { + + @Override + public Iterator> iterator() { return new EntrySetIteratorImpl(); } + + @Override + public int size() { return map.size(); } + } + + /** + * An instance of this class is returned by + * {@link EntrySetImpl#iterator()}. It is backed by an iterator on + * the underlying {@link #map}, and its job is to return an entry in + * which the {@link PyDict#Key} has been replaced with its contained + * object, the true key at the Python level. + */ + private class EntrySetIteratorImpl implements Iterator> { + + /** Backing iterator on the "real" implementation. */ + private final Iterator> mapIterator = map.entrySet().iterator(); + + @Override + public boolean hasNext() { return mapIterator.hasNext(); } + + /** + * {@inheritDoc} The difference from the underlying + * {@link mapIterator} is that the key in the entry returned by this + * method is the object embedded in the {@link KeyHolder}, which is + * the key as far as Python is concerned. + */ + @Override + public Entry next() { + Entry e = mapIterator.next(); + return new SimpleEntry(e.getKey().get(), e.getValue()); + } + + @Override + public void remove() { mapIterator.remove(); } + } + + // plumbing ------------------------------------------------------- + + private static final String ELEMENT_N = "dictionary update sequence element %d "; + private static final String CANNOT_CONVERT_SEQ = + "cannot convert " + ELEMENT_N + "to a sequence"; + private static final String KV_TUPLE_LENGTH = ELEMENT_N + "has length %d; 2 is required"; + + /** + * Compare this dictionary with the other {@code dict} for equality. + * + * @param other {@code dict} + * @return {@code true} if equal, {@code false} if not. + */ + private boolean compareEQ(PyDict other) { + try { + // Must we equal size to be equal + if (other.size() != size()) { return false; } + // All the keys must map to equal values. + for (Map.Entry e : entrySet()) { + Object w = other.get(e.getKey()); + if (w == null) + return false; + else if (!Abstract.richCompareBool(e.getValue(), w, Comparison.EQ)) + return false; + } + // The dictionaries matched at every key. + return true; + } catch (PyException e) { + // It's ok to throw legitimate Python exceptions + throw e; + } catch (Throwable t) { + throw new InterpreterError(t, "non-Python exeption in comparison"); + } + } + + /** + * Merge a key-value pair into this {@code dict}. + * + * @param k key + * @param v value + * @param mode what to do about duplicates + * @throws KeyError on duplicate key (if {@code mode == } + * {@link MergeMode#UNIQUE}) + */ + private void mergeSingle(Object k, Object v, MergeMode mode) throws KeyError { + if (mode == MergeMode.PUT) { + put(k, v); + } else { + // Sensitive to whether already present + Object u = putIfAbsent(k, v); + if (u != null && mode == MergeMode.UNIQUE) { throw new KeyError.Duplicate(k); } + } + } + + /** + * Merge the mapping {@code src} into this {@code dict}. + * + * @param src a mapping to merge in + * @param mode what to do about duplicates + * @throws KeyError on duplicate key (if {@code mode == } + * {@link MergeMode#UNIQUE}) + */ + // Compare CPython dict_merge in dictobject.c + private void mergeObject(Object src, MergeMode mode) throws KeyError, AttributeError { + + // Try to make src a Java Map + Map map; + if (src instanceof PyDict) { + map = (PyDict)src; + // XXX MissingFeature("Non-dict wrapped as mapping"); + // } else if (PyMapping.MapWrapper.check(src)) { + // map = PyMapping.map(src); + } else { + throw new AttributeError("'%.200s' object is not a mapping", PyType.of(src).getName()); + } + + // Now update according to the mode, using Java semantics. + if (mode == MergeMode.PUT) { + putAll(map); + } else { + // Sensitive to whether already present + for (Map.Entry e : map.entrySet()) { + Object k = e.getKey(); + Object u = putIfAbsent(k, e.getValue()); + if (u != null && mode == MergeMode.UNIQUE) { throw new KeyError.Duplicate(k); } + } + } + } +} diff --git a/core/src/main/java/org/python/core/PyException.java b/core/src/main/java/org/python/core/PyException.java new file mode 100644 index 000000000..a56598782 --- /dev/null +++ b/core/src/main/java/org/python/core/PyException.java @@ -0,0 +1,30 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code Exception} exception. */ +public class PyException extends BaseException { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("Exception", MethodHandles.lookup()) // + .base(BaseException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected PyException(PyType type, String msg, Object... args) { super(type, msg, args); } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public PyException(String msg, Object... args) { this(TYPE, msg, args); } +} diff --git a/core/src/main/java/org/python/core/PyFloat.java b/core/src/main/java/org/python/core/PyFloat.java new file mode 100644 index 000000000..fd17c83c0 --- /dev/null +++ b/core/src/main/java/org/python/core/PyFloat.java @@ -0,0 +1,1059 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.python.core.PyFloatMethods.toDouble; + +import java.lang.invoke.MethodHandles; +import java.math.BigInteger; +import java.util.Map; + +import org.python.base.InterpreterError; +import org.python.core.PyObjectUtil.NoConversion; +import org.python.core.stringlib.FloatFormatter; +import org.python.core.stringlib.InternalFormat; +import org.python.core.stringlib.InternalFormat.FormatError; +import org.python.core.stringlib.InternalFormat.FormatOverflow; +import org.python.core.stringlib.InternalFormat.Spec; +import org.python.modules.math; + +/** The Python {@code float} object. */ +public class PyFloat extends AbstractPyObject { + /** The type {@code float}. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("float", MethodHandles.lookup()) + .adopt(Double.class) + .operand(Integer.class, BigInteger.class, + PyLong.class, Boolean.class) + .methods(PyFloatMethods.class)); + + /** Format specification used by repr(). */ + static final Spec SPEC_REPR = InternalFormat.fromText(" >r"); + /** Format specification used by str(). */ + static final Spec SPEC_STR = Spec.NUMERIC; + + /** A constant Python {@code float(0)}. */ + static final Double ZERO = Double.valueOf(0.0); + + /** A constant Python {@code float(1)}. */ + static final Double ONE = Double.valueOf(1.0); + + /** A constant Python {@code float("nan")}. */ + static final Double NAN = Double.NaN; + + + /** Value of this {@code float} object. */ + final double value; + + public double getValue() { + return value; + } + + /** + * Constructor for Python sub-class specifying {@link #type}. + * + * @param type actual type + * @param value of the {@code float} + */ + PyFloat(PyType type, double value) { + super(type); + this.value = value; + } + + // XXX Provide factory from double, but expose no constructor. + // Is it safe to allow user-defined Java sub-classes of PyFloat? + + public PyFloat(double v) { + this(TYPE, v); + } + + public PyFloat(float v) { + this((double)v); + } + + // Instance methods on PyFloat ------------------------------------- + + @Override + public String toString() { return Py.defaultToString(this); } + + @Override + public boolean equals(Object obj) { + // XXX Use Dict.pythonEquals when available + if (obj instanceof PyFloat) { + PyFloat other = (PyFloat)obj; + return other.value == this.value; + } else + // XXX should try more accepted types. Or __eq__? + return false; + } + + @Override + public int hashCode() { return __hash__(); } + + // Constructor from Python ---------------------------------------- + + // @formatter:off + /* + @ExposedNew + public static Object float_new(PyNewWrapper new_, boolean init, PyType subtype, + Object[] args, String[] keywords) { + ArgParser ap = new ArgParser("float", args, keywords, new String[] {"x"}, 0); + Object x = ap.getPyObject(0, null); + if (x == null) { + if (new_.for_type == subtype) { + return ZERO; + } else { + return new PyFloatDerived(subtype, 0.0); + } + } else { + PyFloat floatObject = null; + try { + floatObject = x.__float__(); + } catch (PyException e) { + if (e.match(Py.AttributeError)) { + // Translate AttributeError to TypeError + // XXX: We are using the same message as CPython, even if + // it is not strictly correct (instances of types + // that implement the __float__ method are also + // valid arguments) + throw new TypeError("float() argument must be a string or a number"); + } + throw e; + } + if (new_.for_type == subtype) { + return floatObject; + } else { + return new PyFloatDerived(subtype, floatObject.value); + } + } + } + */ + // @formatter:on + + /* + @ExposedGet(name = "real", doc = BuiltinDocs.float_real_doc) + */ + public Object getReal() { + return value; + } + + /* + @ExposedGet(name = "imag", doc = BuiltinDocs.float_imag_doc) + */ + public Object getImag() { + return ZERO; + } + + /* + @ExposedClassMethod(doc = BuiltinDocs.float_fromhex_doc) + */ + public static Object float_fromhex(PyType type, Object o) { + // XXX: I'm sure this could be shortened/simplified, but Double.parseDouble() takes + // non-hex strings and requires the form 0xNUMBERpNUMBER for hex input which + // causes extra complexity here. + + String message = "invalid hexadecimal floating-point string"; + boolean negative = false; + + // XXX Should declare value as String parameter and coerce + String value = o.toString().trim().toLowerCase(); + + if (value.length() == 0) { + throw new ValueError(message); + } else if (value.equals("nan") || value.equals("-nan") || value.equals("+nan")) { + return NAN; + } else if (value.equals("inf") || value.equals("infinity") || value.equals("+inf") + || value.equals("+infinity")) { + return new PyFloat(Double.POSITIVE_INFINITY); + } else if (value.equals("-inf") || value.equals("-infinity")) { + return new PyFloat(Double.NEGATIVE_INFINITY); + } + + // Strip and record + or - + if (value.charAt(0) == '-') { + value = value.substring(1); + negative = true; + } else if (value.charAt(0) == '+') { + value = value.substring(1); + } + if (value.length() == 0) { + throw new ValueError(message); + } + + // Append 0x if not present. + if (!value.startsWith("0x") && !value.startsWith("0X")) { + value = "0x" + value; + } + + // reattach - if needed. + if (negative) { + value = "-" + value; + } + + // Append p if not present. + if (value.indexOf('p') == -1) { + value = value + "p0"; + } + + try { + double d = Double.parseDouble(value); + if (Double.isInfinite(d)) { + throw new OverflowError("hexadecimal value too large to represent as a float"); + } + return new PyFloat(d); + } catch (NumberFormatException n) { + throw new ValueError(message); + } + } + + private String pyHexString(Double f) { + // Simply rewrite Java hex repr to expected Python values; not + // the most efficient, but we don't expect this to be a hot + // spot in our code either + String java_hex = Double.toHexString(value); + if (java_hex.equals("Infinity")) { + return "inf"; + } else if (java_hex.equals("-Infinity")) { + return "-inf"; + } else if (java_hex.equals("NaN")) { + return "nan"; + } else if (java_hex.equals("0x0.0p0")) { + return "0x0.0p+0"; + } else if (java_hex.equals("-0x0.0p0")) { + return "-0x0.0p+0"; + } + + // replace hex rep of MpE to conform with Python such that + // 1. M is padded to 16 digits (ignoring a leading -) + // 2. Mp+E if E>=0 + // example: result of 42.0.hex() is translated from + // 0x1.5p5 to 0x1.5000000000000p+5 + int len = java_hex.length(); + boolean start_exponent = false; + StringBuilder py_hex = new StringBuilder(len + 1); + int padding = f > 0 ? 17 : 18; + for (int i = 0; i < len; i++) { + char c = java_hex.charAt(i); + if (c == 'p') { + for (int pad = i; pad < padding; pad++) { + py_hex.append('0'); + } + start_exponent = true; + } else if (start_exponent) { + if (c != '-') { + py_hex.append('+'); + } + start_exponent = false; + } + py_hex.append(c); + } + return py_hex.toString(); + } + + /* + @ExposedMethod(doc = BuiltinDocs.float_hex_doc) + */ + public Object float_hex() { + return pyHexString(value); + } + + + // Special methods ------------------------------------------------ + + /* + @ExposedMethod(doc = BuiltinDocs.float___str___doc) + */ + static final String __str__(Object self) { return formatDouble(doubleValue(self), SPEC_STR); } + + /* + @ExposedMethod(doc = BuiltinDocs.float___repr___doc) + */ + static final String __repr__(Object self) { return formatDouble(doubleValue(self), SPEC_REPR); } + + /* + @ExposedMethod(doc = BuiltinDocs.float___hash___doc) + */ + final int __hash__() { return __hash__(value); } + + static final int __hash__(Double self) { return __hash__(self.doubleValue()); } + + static final int __hash__(double value) { + // XXX Essentially copied from Jython 2 but not right for 3 + if (Double.isInfinite(value)) { + return value < 0 ? -271828 : 314159; + } else if (Double.isNaN(value)) { + return 0; + } + + double intPart = Math.floor(value); + double fractPart = value - intPart; + + if (fractPart == 0) { + if (intPart <= Integer.MAX_VALUE && intPart >= Integer.MIN_VALUE) { + // Yes (short cut) + return (int)value; + } else { + // No, but PyLong is also wrong in this way + return BigInteger.valueOf((long)intPart).hashCode(); + } + } else { + // No, but almost what Java does :/ + long v = Double.doubleToLongBits(value); + return (int)v ^ (int)(v >> 32); + } + } + + // @formatter:off + /* + public Object __tojava__(Class c) { + if (c == Double.TYPE || c == Number.class || c == Double.class || c == Object.class + || c == Serializable.class) { + return Double.valueOf(value); + } else if (c == Float.TYPE || c == Float.class) { + return Float.valueOf((float) value); + } + return super.__tojava__(c); + } + + + @Override + public Object __coerce_ex__(PyObject other) { + return float___coerce_ex__(other); + } + + @ExposedMethod(doc = BuiltinDocs.float___coerce___doc) + final PyObject float___coerce__(PyObject other) { + return adaptToCoerceTuple(float___coerce_ex__(other)); + } + + /** + * Coercion logic for float. Implemented as a final method to avoid invocation of virtual + * methods from the exposed coerce. + * / + final Object float___coerce_ex__(PyObject other) { + if (other instanceof PyFloat) { + return other; + } else if (other instanceof PyInteger) { + return new PyFloat((double)((PyInteger)other).getValue()); + } else if (other instanceof PyLong) { + return new PyFloat(((PyLong)other).doubleValue()); + } else { + return Py.None; + } + } + + private static boolean canCoerce(PyObject other) { + return other instanceof PyFloat || other instanceof PyInteger || other instanceof PyLong; + } + + private static double coerce(PyObject other) { + if (other instanceof PyFloat) { + return ((PyFloat)other).getValue(); + } else if (other instanceof PyInteger) { + return ((PyInteger)other).getValue(); + } else if (other instanceof PyLong) { + return ((PyLong)other).doubleValue(); + } else { + throw Py.TypeError("xxx"); + } + } + */ + // @formatter:on + + /** + * Python % operator: y = n*x + z. The modulo operator always yields + * a result with the same sign as its second operand (or zero). + * (Compare java.Math.IEEEremainder) + * + * @param x dividend + * @param y divisor + * @return x % y + */ + private static double modulo(double x, double y) { + if (y == 0.0) { + throw new ZeroDivisionError("float modulo"); + } else { + double z = x % y; + if (z == 0.0) { + // Has to be same sign as y (even when zero). + return Math.copySign(z, y); + } else if ((z > 0.0) == (y > 0.0)) { + // z has same sign as y, as it must. + return z; + } else { + // Note abs(z) < abs(y) and opposite sign. + return z + y; + } + } + } + + /* + @ExposedMethod(type = MethodType.BINARY, defaults = "null", // + doc = BuiltinDocs.float___pow___doc) + */ + static Object __pow__(Object left, Object right, Object modulus) { + try { + if (modulus == null || modulus == Py.None) { + return pow(toDouble(left), toDouble(right)); + } else { + // Note that we also call __pow__ from PyLong.__pow__ + throw new TypeError( + "pow() 3rd argument not allowed unless all arguments are integers"); + } + } catch (NoConversion e) { + return Py.NotImplemented; + } + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___rpow___doc) + */ + static Object __rpow__(Object right, Object left) { + try { + return pow(toDouble(left), toDouble(right)); + } catch (NoConversion e) { + return Py.NotImplemented; + } + } + + /** Smallest value that cannot be represented as an int */ + private static double INT_LONG_BOUNDARY = -(double)Integer.MIN_VALUE; // 2^31 + + /* + @ExposedMethod(doc = BuiltinDocs.float___int___doc) + */ + final Object __int__() { return __int__(value); } + + static final Object __int__(Double self) { return __int__(self.doubleValue()); } + + private static final Object __int__(double v) { + if (v < INT_LONG_BOUNDARY && v > -(INT_LONG_BOUNDARY + 1.0)) { + // v will fit into an int (when rounded towards zero). + return (int)v; + } else { + return bigIntegerFromDouble(v); + } + } + + /* + @ExposedMethod(doc = BuiltinDocs.float___float___doc) + */ + Object __float__() { return value; } + + static final Object __float__(Double self) { return self; } + + // Methods -------------------------------------------------------- + // Expose to Python when mechanisms are available + + /* + @ExposedMethod(doc = BuiltinDocs.float___trunc___doc) + */ + final Object __trunc__() { return __int__(value); } + + static final Object __trunc__(Double self) { return __int__(self.doubleValue()); } + + /* + @ExposedMethod(doc = BuiltinDocs.float_conjugate_doc) + */ + final Object conjugate() { return value; } + + static final Object conjugate(Double self) { return self; } + + /* + @ExposedMethod(doc = BuiltinDocs.float_is_integer_doc) + */ + final boolean is_integer() { return is_integer(value); } + + static final boolean is_integer(Double self) { return is_integer(self.doubleValue()); } + + static final boolean is_integer(double self) { + if (!Double.isFinite(self)) { return false; } + return Math.floor(self) == self; + } + + /* + @ExposedMethod(doc = BuiltinDocs.float___getnewargs___doc) + */ + final PyTuple __getnewargs__() { return new PyTuple(value); } + static final PyTuple __getnewargs__(Double self) { return new PyTuple(self); } + + /* + @ExposedMethod(doc = BuiltinDocs.float___format___doc) + */ + static final Object __format__(Object self, Object formatSpec) { + try { + /* + * Parse the specification, which must at least sub-class str in + * Python. + */ + if (!PyUnicode.TYPE.check(formatSpec)) { + throw Abstract.argumentTypeError("__format__", 0, "str", formatSpec); + } + + Spec spec = InternalFormat.fromText(formatSpec.toString()); + return formatDouble(doubleValue(self), spec); + } catch (IllegalArgumentException iae) { + // XXX Some format specification errors: why not FormatError? + throw new ValueError(iae.getMessage()); + } + } + + /** + * Format this float according to the specification passed in. + * Supports {@code __format__}, {@code __str__} and + * {@code __repr__}. + * + * @param value to format + * @param spec parsed format specification string + * @return formatted value + */ + private static String formatDouble(double value, Spec spec) { + try { + FloatFormatter f = new Formatter(spec, true); + return f.format(value).getResult(); + } catch (FormatOverflow fe) { + throw new OverflowError(fe.getMessage()); + } catch (FormatError fe) { + throw new ValueError(fe.getMessage()); + } + } + + /* + @ExposedMethod(doc = BuiltinDocs.float_as_integer_ratio_doc) + */ + final PyTuple as_integer_ratio() {return as_integer_ratio(value);} + static final PyTuple as_integer_ratio(Double self) {return as_integer_ratio(self.doubleValue());} + + private static PyTuple as_integer_ratio(double value) { + if (Double.isInfinite(value)) { throw cannotConvertInf("integer ratio"); } + if (Double.isNaN(value)) { throw cannotConvertNaN("integer ratio"); } + // XXX This is potty: use similar logic to bigIntegerFromDouble. + // Long.numberOfTrailingZeros​ on significand adjusts the exponent. + PyTuple frexp = math.frexp(value); + double float_part = ((Double)frexp.get(0)).doubleValue(); + int exponent = ((Integer)frexp.get(1)).intValue(); + for (int i = 0; i < 300 && float_part != Math.floor(float_part); i++) { + float_part *= 2.0; + exponent--; + } + /* + * CPython comment (not relevant after first sentence): + * self == float_part * 2**exponent exactly and float_part is integral. If FLT_RADIX != 2, + * the 300 steps may leave a tiny fractional part to be truncated by PyLong_FromDouble(). + */ + + // value == m * 2**exponent exactly and m is integral. + BigInteger numerator = bigIntegerFromDouble(float_part); + Object denominator = 1; + + // Factor to apply to numerator or denominator + + if (exponent >= 0) { + // Scale the numerator by 2**exponent + numerator = numerator.shiftLeft(exponent); + } else { + // exponent<0: make the denominator 2**-exponent + denominator = BigInteger.ONE.shiftLeft(-exponent); + } + + return new PyTuple(numerator, denominator); + } + + // Non-slot API ------------------------------------------------- + + /** + * Present the value as a Java {@code double} when the argument is + * expected to be a Python {@code float} or a sub-class of it. + * + * @param v claimed {@code float} + * @return {@code double} value + * @throws TypeError if {@code v} is not a Python {@code float} + */ + // Compare CPython floatobject.h: PyFloat_AS_DOUBLE + public static double doubleValue(Object v) throws TypeError { + if (v instanceof Double) + return ((Double)v).doubleValue(); + else if (v instanceof PyFloat) + return ((PyFloat)v).value; + else + throw Abstract.requiredTypeError("a float", v); + } + + /** + * Convert the argument to a Java {@code double} value. If {@code o} + * is not a Python {@code float} try the {@code __float__()} method, + * then {@code __index__()}. + * + * @param o to convert + * @return converted value + * @throws TypeError if o cannot be interpreted as a {@code float} + * @throws Throwable from {@code __float__)} or {@code __index__} + */ + // Compare CPython floatobject.c: PyFloat_AsDouble + static double asDouble(Object o) throws TypeError, Throwable { + /* + * Ever so similar to Number.toFloat, but returns the double + * value extracted from (potentially) a sub-type of PyFloat, and + * does not try to convert from strings. + */ + + if (TYPE.check(o)) { + return doubleValue(o); + + } else { + Operations ops = Operations.of(o); + try { + // Try __float__ (if defined) + Object res = ops.op_float.invokeExact(o); + PyType resType = PyType.of(res); + if (resType == PyFloat.TYPE) // Exact type + return doubleValue(res); + else if (resType.isSubTypeOf(PyFloat.TYPE)) { + // Warn about this and make a clean Python float + PyFloat.asDouble(Abstract.returnDeprecation( + "__float__", "float", res)); + } else + // Slot defined but not a Python float at all + throw Abstract.returnTypeError("__float__", "float", + res); + } catch (Slot.EmptyException e) {} + + // Fall out here if __float__ was not defined + if (Slot.op_index.isDefinedFor(ops)) + return PyLong.asDouble(PyNumber.index(o)); + else + throw Abstract.requiredTypeError("a real number", o); + } + } + + // standard singleton issues apply here to __getformat__/__setformat__, + // but this is what Python demands + public enum Format { + + UNKNOWN("unknown"), BE("IEEE, big-endian"), LE("IEEE, little-endian"); + + private final String format; + + Format(String format) { + this.format = format; + } + + public String format() { + return format; + } + } + + // subset of IEEE-754, the JVM is big-endian + public static volatile Format double_format = Format.BE; + public static volatile Format float_format = Format.BE; + + /* + @ExposedClassMethod(doc = BuiltinDocs.float___getformat___doc) + */ + public static String float___getformat__(PyType type, String typestr) { + if ("double".equals(typestr)) { + return double_format.format(); + } else if ("float".equals(typestr)) { + return float_format.format(); + } else { + throw new ValueError("__getformat__() argument 1 must be 'double' or 'float'"); + } + } + + /* + @ExposedClassMethod(doc = BuiltinDocs.float___setformat___doc) + */ + public static void float___setformat__(PyType type, String typestr, String format) { + Format new_format = null; + if (!"double".equals(typestr) && !"float".equals(typestr)) { + throw new ValueError("__setformat__() argument 1 must be 'double' or 'float'"); + } + if (Format.LE.format().equals(format)) { + throw new ValueError(String.format("can only set %s format to 'unknown' or the " + + "detected platform value", typestr)); + } else if (Format.BE.format().equals(format)) { + new_format = Format.BE; + } else if (Format.UNKNOWN.format().equals(format)) { + new_format = Format.UNKNOWN; + } else { + throw new ValueError("__setformat__() argument 2 must be 'unknown', " + + "'IEEE, little-endian' or 'IEEE, big-endian'"); + } + if (new_format != null) { + if ("double".equals(typestr)) { + double_format = new_format; + } else { + float_format = new_format; + } + } + } + + // Java-only API ------------------------------------------------- + + /** + * Convert a Python {@code float}, {@code int} or {@code bool} to a + * Java {@code double} (or throw {@link NoConversion}). Conversion + * from an {@code int} may overflow. + *

+ * If the method throws the special non-Python exception + * {@link NoConversion}, the caller must deal with it by throwing an + * appropriate Python exception or taking an alternative course of + * action. OverlowError could be allowed to propagate since it is a + * Python exception. + * + * @param v to convert + * @return converted to {@code double} + * @throws NoConversion if v is not a {@code float}, {@code int} or + * {@code bool} + * @throws OverflowError if v is an {@code int} out of range + */ + static double convertToDouble(Object v) throws NoConversion, OverflowError { + if (v instanceof Double) + return ((Double)v).doubleValue(); + else if (v instanceof PyUnicode) + return ((PyFloat)v).value; + else + // BigInteger, PyLong, Boolean, etc. or throw + return PyLong.convertToDouble(v); + } + + // Python sub-class ----------------------------------------------- + + /** + * Instances in Python of sub-classes of 'float', are represented in + * Java by instances of this class. + */ + static class Derived extends PyFloat implements DictPyObject { + + protected Derived(PyType subType, double value) { + super(subType, value); + } + + // /** The instance dictionary {@code __dict__}. */ + // protected PyDict dict = new PyDict(); + + @Override + public Map getDict() { return null; } + } + + // formatter ------------------------------------------------------ + + /** + * A {@link Formatter}, constructed from a {@link Spec}, with + * specific validations for {@code int.__format__}. + */ + static class Formatter extends FloatFormatter { + + /** + * If {@code true}, give {@code printf}-style meanings to + * {@link Spec#type}. + */ + final boolean printf; + + /** + * Prepare a {@link Formatter} in support of {@code str.__mod__}, + * that is, traditional {@code printf}-style formatting. + * + * @param spec a parsed format specification. + * @param printf f {@code true}, interpret {@code spec} + * {@code printf}-style, otherwise as + * {@link Formatter#Formatter(Spec) Formatter(Spec)} + * @throws FormatOverflow if a value is out of range (including the + * precision) + * @throws FormatError if an unsupported format character is + * encountered + */ + Formatter(Spec spec, boolean printf) throws FormatError { + super(validated(spec, printf)); + this.printf = printf; + } + + /** + * Prepare a {@link Formatter} in support of + * {@link PyFloat#__format__(Object, Object) float.__format__}. + * + * @param spec a parsed PEP-3101 format specification. + * @throws FormatOverflow if a value is out of range (including the + * precision) + * @throws FormatError if an unsupported format character is + * encountered + */ + Formatter(Spec spec) throws FormatError { this(spec, false); } + + /** + * Validations and defaults specific to {@code float}. + * + * @param spec to validate + * @return validated spec with defaults filled + * @throws FormatError on failure to validate + */ + private static Spec validated(Spec spec, boolean printf) throws FormatError { + String type = TYPE.name; + + switch (spec.type) { + + case 'n': + if (spec.grouping) { throw notAllowed("Grouping", type, spec.type); } + //$FALL-THROUGH$ + + case Spec.NONE: + case 'e': + case 'f': + case 'g': + case 'E': + case 'F': + case 'G': + case '%': + // Check for disallowed parts of the specification + if (spec.alternate) { throw alternateFormNotAllowed(type); } + break; + + case 'r': + case 's': + // Only allow for printf-style formatting + if (printf) { break; } + //$FALL-THROUGH$ + + default: + // The type code was not recognised + throw unknownFormat(spec.type, type); + } + + /* + * spec may be incomplete. The defaults are those commonly used for + * numeric formats. + */ + return spec.withDefaults(Spec.NUMERIC); + } + + @Override + public FloatFormatter format(Object o) throws NoConversion, FormatError { + return format(convertToDouble(o)); + } + } + + // plumbing ------------------------------------------------------ + + /** + * Convert a Java {@code double} to Java {@code BigInteger} by + * truncation. + * + * @param value to convert + * @return BigInteger equivalent. + * @throws OverflowError when this is a floating infinity + * @throws ValueError when this is a floating NaN + */ + // Somewhat like CPython longobject.c :: PyLong_FromDouble + static BigInteger bigIntegerFromDouble(double value) throws OverflowError, ValueError { + + long raw = Double.doubleToRawLongBits(value); + long e = (raw & EXPONENT) >>> SIGNIFICAND_BITS; + int exponent = ((int)e) - EXPONENT_BIAS; + + if (exponent < 63) + // Give the job to the hardware. + return BigInteger.valueOf((long)value); + + else if (exponent > 1023) { + // raw exponent was 0x7ff + if ((raw & SIGNIFICAND) == 0) + throw cannotConvertInf("integer"); + else + throw cannotConvertNaN("integer"); + + } else { + // Get the signed version of the significand + long significand = IMPLIED_ONE | raw & SIGNIFICAND; + long v = (raw & SIGN) == 0L ? significand : -significand; + // Shift (left or right) according to the exponent + return BigInteger.valueOf(v).shiftLeft(exponent - SIGNIFICAND_BITS); + } + } + + // IEE-754 64-bit floating point parameters + private static final int SIGNIFICAND_BITS = 52; // not counting the implied 1 + private static final int EXPONENT_BITS = 11; + private static final int EXPONENT_BIAS = 1023; + + // Masks derived from the 64-bit floating point parameters + private static final long IMPLIED_ONE = 1L << SIGNIFICAND_BITS; // = 0x0010000000000000L + private static final long SIGNIFICAND = IMPLIED_ONE - 1; // = 0x000fffffffffffffL + private static final long SIGN = IMPLIED_ONE << EXPONENT_BITS; // = 0x8000000000000000L; + private static final long EXPONENT = SIGN - IMPLIED_ONE; // = 0x7ff0000000000000L; + + private static OverflowError cannotConvertInf(String to) { + String msg = String.format(CANNOT_CONVERT, "infinity", to); + return new OverflowError(msg); + } + + private static ValueError cannotConvertNaN(String to) { + String msg = String.format(CANNOT_CONVERT, "NaN", to); + return new ValueError(msg); + } + + private static final String CANNOT_CONVERT = "cannot convert float %s to %s"; + + /** + * Exponentiation with Python semantics. + * + * @param v base value + * @param w exponent + * @return {@code v ** w} + */ + static double pow(double v, double w) { + /* + * This code was translated from the CPython implementation at + * v2.7.8 by progressively removing cases that could be delegated to + * Java. Jython differs from CPython in that where C pow() + * overflows, Java pow() returns inf (observed on Windows). This is + * not subject to regression tests, so we take it as an allowable + * platform dependency. All other differences in Java Math.pow() are + * trapped below and Python behaviour is enforced. + */ + if (w == 0) { + // v**0 is 1, even 0**0 and nan**0 + return ONE; + + } else if (Double.isNaN(v)) { + // nan**w = nan, unless w == 0 + return NAN; + + } else if (Double.isNaN(w)) { + // v**nan = nan, unless v == 1; 1**nan = 1 + return v == 1.0 ? ONE : NAN; + + } else if (Double.isInfinite(w)) { + /* + * In Java Math pow(1,inf) = pow(-1,inf) = pow(1,-inf) = + * pow(-1,-inf) = nan, but in Python they are all 1. + */ + if (v == 1.0 || v == -1.0) { return ONE; } + + } else if (v == 0.0) { + // 0**w is an error if w is negative. + if (w < 0.0) { + throw new ZeroDivisionError("0.0 cannot be raised to a negative power"); + } + + } else if (!Double.isInfinite(v) && v < 0.0) { + if (w != Math.floor(w)) { + throw new ValueError("negative number cannot be raised to a fractional power"); + } + } + + // In all other cases entrust the calculation to Java + return Math.pow(v, w); + } + + + /** Used as error message text for division by zero. */ + static final String DIV_ZERO = "float division by zero"; + /** Used as error message text for modulo zero. */ + static final String MOD_ZERO = "float modulo zero"; + + /** + * Convenience function to throw a {@link ZeroDivisionError} if the + * argument is zero. (Java float arithmetic does not throw whatever + * the arguments.) + * + * @param v value to check is not zero + * @param msg for exception if {@code v==0.0} + * @return {@code v} + */ + static double nonzero(double v, String msg) { + if (v == 0.0) { throw new ZeroDivisionError(msg); } + return v; + } + + /** + * Convenience function to throw a {@link ZeroDivisionError} if the + * argument is zero. (Java float arithmetic does not throw whatever + * the arguments.) + * + * @param v value to check is not zero + * @return {@code v} + */ + static double nonzero(double v) { + if (v == 0.0) { throw new ZeroDivisionError(DIV_ZERO); } + return v; + } + + /** + * Test that two {@code double}s have the same sign. + * + * @param u a double + * @param v another double + * @return if signs equal (works for signed zeros, etc.) + */ + private static boolean sameSign(double u, double v) { + long uBits = Double.doubleToRawLongBits(u); + long vBits = Double.doubleToRawLongBits(v); + return ((uBits ^ vBits) & SIGN) == 0L; + } + + /** + * Inner method for {@code __floordiv__} and {@code __rfloordiv__}. + * + * @param x operand + * @param y operand + * @return {@code x//y} + */ + static final double floordiv(double x, double y) { + // Java and Python agree a lot of the time (after floor()). + // Also, Java / never throws: it just returns nan or inf. + // So we ask Java first, then adjust the answer. + double z = x / y; + if (Double.isFinite(z)) { + // Finite result: only need floor ... + if (Double.isInfinite(y) && x != 0.0 && !sameSign(x, y)) + // ... except in this messy corner case :( + return -1.; + return Math.floor(z); + } else { + // Non-finite result: Java & Python differ + if (y == 0.) { + throw new ZeroDivisionError(DIV_ZERO); + } else { + return Double.NaN; + } + } + } + + /** + * Inner method for {@code __mod__} and {@code __rmod__}. + * + * @param x operand + * @param y operand + * @return {@code x%y} + */ + static final double mod(double x, double y) { + // Java and Python agree a lot of the time. + // Also, Java % never throws: it just returns nan. + // So we ask Java first, then adjust the answer. + double z = x % y; + if (Double.isNaN(z)) { + if (y == 0.) { throw new ZeroDivisionError(MOD_ZERO); } + // Otherwise nan is fine + } else if (!sameSign(z, y)) { + // z is finite (and x), but only correct if signs match + if (z == 0.) { + z = Math.copySign(z, y); + } else { + z = z + y; + } + } + return z; + } + + /** + * Inner method for {@code __divmod__} and {@code __rdivmod__}. + * + * @param x operand + * @param y operand + * @return {@code tuple} of {@code (x//y, x%y)} + */ + static final PyTuple divmod(double x, double y) { + // Possibly not the most efficient + return new PyTuple(floordiv(x, y), mod(x, y)); + } +} diff --git a/core/src/main/java/org/python/core/PyFrame.java b/core/src/main/java/org/python/core/PyFrame.java new file mode 100644 index 000000000..059649364 --- /dev/null +++ b/core/src/main/java/org/python/core/PyFrame.java @@ -0,0 +1,172 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.util.Map; + +/** + * A {@code PyFrame} is the context for the execution of code. + * Different concrete sub-classes of {@code PyFrame} exist to + * execute different compiled representations of Python code. For + * example, there is one for CPython 3.11 byte code and (we expect) + * another for Java byte code. The type of code object supported is + * the parameter {@code C} to the class. + *

+ * In order that argument processing may be uinform irrespective of + * concrete type, a {@code PyFrame} presents an abstraction that has + * arguments laid out in an array. For example, the function + * definition:

+ * def func(a, b, c=3, d=4, /, e=5, f=6, *aa, g=7, h, i=9, **kk):
+ *     v, w, x = b, c, d, e
+ *     return u
+ * 
the layout of the local variables in a frame would be as + * below + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
A Python {@code frame}
frameabcdefghiaakkuvwx
codeargcountkwonlyargcount***
posonlyargcount
functiondefaultskwdefaults
+ *

+ * In the last row of the table, the properties are supplied by the + * function object during each call. {@code defaults} apply in the + * position show, in order, while {@code kwdefaults} (in a map) + * apply to keywords wherever the name matches. The names in the + * frame are those in the {@link PyCode#varnames} field of the + * associated code object + *

+ * The frame presents an abstraction of an array of named local + * variables, and two more of cell and free variables, while + * concrete subclasses are free to implement these in whatever + * manner they choose. + * + * @param The type of code that this frame executes + */ +public abstract class PyFrame { + + /** The Python type {@code frame}. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("frame", MethodHandles.lookup()) + // Type admits no Python subclasses. + .flagNot(PyType.Flag.BASETYPE)); + + /** Frames form a stack by chaining through the back pointer. */ + PyFrame back; + + /** Function of which this is a frame. */ + final PyFunction func; + + /** + * Code this frame is to execute, exposed as immutable + * {@code f_code}. We have our own final copy because it is possible + * to change the code object that defines {@link #func} but the + * frame should continue to reference the code that created it. + */ + final C code; + + /** + * Local context (name space) of execution. (Assign if needed.) This + * is allowed to be any type, but if it is ever actually used, the + * interpreter will expect it to support the mapping protocol. + */ + Object locals; + + /** + * Foundation constructor on which subclass constructors rely. This + * provides a "loose" frame that is not yet part of any stack until + * explicitly pushed (with {@link ThreadState#push(PyFrame)}). In + * particular, the {@link #back} pointer is {@code null} in the + * newly-created frame. + *

+ * A frame always belongs to an {@link Interpreter} via its + * function, but it does not necessarily belong to a particular + * {@code ThreadState}. + * + * @param func defining the code and globals + */ + protected PyFrame(PyFunction func) { + this.func = func; + this.code = func.code; + } + + /** + * Get the interpreter that defines the import context when + * executing code. + * + * @return Interpreter that defines the import context. + */ + Interpreter getInterpreter() { return func.getInterpreter(); } + + // Java API ------------------------------------------------------ + + @Override + // Compare CPython frame_repr in frameobject.c + public String toString() { + int lineno = code.firstlineno; + if (lineno == 0) { lineno = -1; } + String file = code.filename, q = "'"; + if (file == null) { + file = ""; + q = ""; + } + return String.format("", Py.id(this), q, file, + q, lineno, code.name); + } + + /** + * Provide {@link #locals} as a Java Map. This does not re-compute + * {@code locals} as a dictionary in the way of + * {@link #fastToLocals()}, but only dresses an existing value as a + * Java {@code Map} (if it is not {@code null}). + * + * @return as a Java {@code Map} + */ + protected Map localsMapOrNull() { + if (locals == null) { + return null; + } else { + return PyMapping.map(locals); + } + } + + /** + * Execute the code in this frame. + * + * @return return value of the frame + */ + abstract Object eval(); +} diff --git a/core/src/main/java/org/python/core/PyFunction.java b/core/src/main/java/org/python/core/PyFunction.java new file mode 100644 index 000000000..4db5c37dc --- /dev/null +++ b/core/src/main/java/org/python/core/PyFunction.java @@ -0,0 +1,234 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.util.Collection; +import java.util.Map; + +import org.python.core.Exposed.FrozenArray; +import org.python.core.Exposed.Member; +import org.python.core.PyType.Flag; + +/** + * Python {@code function} object as created by a function + * definition and subsequently called. + * + * @param implementing class of {@code code} object + */ +public abstract class PyFunction extends AbstractPyObject + implements DictPyObject { + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType + .fromSpec(new PyType.Spec("function", MethodHandles.lookup()).flagNot(Flag.BASETYPE)); + + /** + * The interpreter that defines the import context. Not + * {@code null}. + */ + final Interpreter interpreter; + + /** + * Get the interpreter that defines the import context, which was + * current when this function was defined. Not {@code null}. + * + * @return interpreter that defines the import context + */ + Interpreter getInterpreter() { return interpreter; } + + /** + * The {@code __code__} attribute: a code object, which is writable, + * but only with the right implementation type for the concrete + * class of the function. Not {@code null}. + */ + protected C code; + + /** + * The read-only {@code __globals__} attribute is a {@code dict}: + * other mappings won't do. Not {@code null}. + */ + @Member(value = "__globals__", readonly = true) + final PyDict globals; + + /** + * The read-only {@code __builtins__} attribute is often a + * {@code dict} but may be any object. It will be accessed using the + * Python mapping protocol by the interpreter, at which point an + * error may be raised. Not {@code null}. + */ + @Member(value = "__builtins__", readonly = true) + final Object builtins; + + /** The (positional) {@code __defaults__} or {@code null}. */ + protected Object[] defaults; + + /** The {@code __kwdefaults__} or {@code null}. */ + protected PyDict kwdefaults; + + /** + * The read-only {@code __closure__} attribute, or {@code null}. See + * {@link #setClosure(Collection) __closure__} access method + */ + @FrozenArray + protected PyCell[] closure; + + /** + * The {@code __doc__} attribute, can be set to anything or + * {@code null}. + */ + // (but only a str prints in help) + @Member("__doc__") + Object doc; + + /** The function name ({@code __name__} attribute). */ + String name; + + /** + * The {@code __dict__} attribute, a {@code dict} or {@code null}. + */ + private PyDict dict; + + /** + * The {@code __module__} attribute, can be anything or {@code null} + * meaning {@code None} + */ + @Member(value = "__module__") + Object module; + + /** + * The {@code __annotations__} attribute, a {@code dict} or + * {@code null}. + */ + PyDict annotations; + + /** The function qualified name ({@code __qualname__} attribute). */ + private String qualname; + + /** + * Create a PyFunction supplying most of the attributes at + * construction time. + *

+ * The strongly-typed {@code defaults}, {@code kwdefaults}, + * {@code annotations} and {@code closure} may be {@code null} if + * they would otherwise be empty. {@code annotations} is always + * exposed as a {@code dict}, but may be presented to the + * constructor as a {@code dict} or {@code tuple} of keys and values + * (or {@code null}). + * + * @implNote We differ from CPython in requiring a reference to the + * interpreter as an argument. Also, we favour a constructor in + * which the attributes are supplied {@code defaults}, + * {@code kwdefaults}, {@code annotations} and {@code closure} + * rather than added after construction. + * + * @param interpreter providing the module context not {@code null} + * @param code to execute not {@code null} + * @param globals name space to treat as global variables not + * {@code null} + * @param defaults default positional argument values or + * {@code null} + * @param kwdefaults default keyword argument values or {@code null} + * @param annotations type annotations ({@code dict}, {@code null} + * or maybe {@code tuple}) + * @param closure variables referenced but not defined here, must be + * size expected by code or {@code null} if empty. + */ + // Compare CPython PyFunction_NewWithQualName in funcobject.c + PyFunction(Interpreter interpreter, C code, PyDict globals, Object[] defaults, + PyDict kwdefaults, Object annotations, PyCell[] closure) { + super(TYPE); + + // We differ from CPython in requiring this reference + this.interpreter = interpreter; + assert interpreter != null; + + this.globals = globals; + this.name = code.name; + this.qualname = code.qualname; + + // Get __doc__ from first constant in code (if str) + Object doc; + Object[] consts = code.consts; + if (consts.length >= 1 && PyUnicode.TYPE.check(doc = consts[0])) + this.doc = doc; + else + this.doc = Py.None; + + // __module__ = globals['__name__'] or null. + this.module = globals.get("__name__"); + this.builtins = getBuiltinsFromGlobals(); + + // We differ from CPython in having these in construction + this.defaults = defaults; + this.kwdefaults = kwdefaults; + this.closure = closure; + this.annotations = null; // XXX process argument instead + this.code = code; // XXX validate + } + + /** + * Look in {@code __globals__} then the {@code interpreter} to find + * the container of built-in objects. + * + * @return the {@code __builtins__} of the function + */ + // Compare CPython _PyEval_BuiltinsFromGlobals in frameobject.c + private Object getBuiltinsFromGlobals() { + Object builtins = globals.get("__builtins__"); + if (builtins != null) { + if (PyModule.TYPE.check(builtins)) { return ((PyModule)builtins).getDict(); } + return builtins; + } + /* + * Difference from CPython: this is always known and will be used by + * the frame created by a call, not the builtins of a previous + * frame. + */ + return interpreter.builtinsModule.dict; + } + + /** + * Create a {@code PyFrame} that will execute this + * {@code PyFunction}. This frame should be "loose": + * {@link PyFrame#back} should be {@code null} and it should not be + * on any thread's stack. + * + * @param locals name space to treat as local variables + * @return the frame + */ + abstract PyFrame createFrame(Object locals); + + // slot methods -------------------------------------------------- + + /** + * Canonical {@code __call__} slot with Jython conventions, making + * function implementations callable. + * + * @param args all the arguments (position then keyword) + * @param names of the keyword arguments (or {@code null}) + * @return the return from the call + * @throws Throwable for errors raised in the function + */ + abstract Object __call__(Object[] args, String[] names) throws Throwable; + + @SuppressWarnings("unused") + private Object __repr__() { return toString(); } + + @SuppressWarnings("unused") + private Object __str__() { return toString(); } + + // plumbing ------------------------------------------------------ + + @Override + public Map getDict() { return dict; } + + @Override + public PyType getType() { return TYPE; } + + @Override + // Compare CPython func_repr in funcobject.c + public String toString() { + return String.format("", qualname, Py.id(this)); + } +} diff --git a/core/src/main/java/org/python/core/PyGetSetDescr.java b/core/src/main/java/org/python/core/PyGetSetDescr.java new file mode 100644 index 000000000..5a21a8722 --- /dev/null +++ b/core/src/main/java/org/python/core/PyGetSetDescr.java @@ -0,0 +1,480 @@ +package org.python.core; + +import static org.python.core.ClassShorthand.O; +import static org.python.core.ClassShorthand.V; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.invoke.MethodType; + +import org.python.base.InterpreterError; +import org.python.core.Exposed.Deleter; +import org.python.core.Exposed.Getter; +import org.python.core.Exposed.Setter; +import org.python.core.PyType.Flag; +import org.python.core.Slot.EmptyException; + +/** + * Descriptor for an attribute that has been defined by a series of + * {@link Getter}, {@link Setter} and {@link Deleter} that annotate + * access methods defined in the object implementation to get, set + * or delete the value. {@code PyGetSetDescr} differs from + * {@link PyMemberDescr} in giving the author of an implementation + * class the power (and responsibility) entirely to define the + * behaviour corresponding to these actions. + */ +// Compare CPython struct PyGetSetDef in descrobject.h, +// and PyGetSetDescrObject also in descrobject.h +abstract class PyGetSetDescr extends DataDescriptor { + + static final Lookup LOOKUP = MethodHandles.lookup(); + static final PyType TYPE = + PyType.fromSpec(new PyType.Spec("getset_descriptor", LOOKUP).flagNot(Flag.BASETYPE)); + + /** The method handle type (O)O. */ + // CPython: PyObject *(*getter)(PyObject *, void *) + static final MethodType GETTER = MethodType.methodType(O, O); + /** The method handle type (O,O)V. */ + // CPython: int (*setter)(PyObject *, PyObject *, void *) + static final MethodType SETTER = MethodType.methodType(V, O, O); + /** The method handle type (O)V. */ + static final MethodType DELETER = MethodType.methodType(V, O); + + /** A handle on {@link #emptyGetter(PyObject)} */ + private static MethodHandle EMPTY_GETTER; + /** A handle on {@link #emptySetter(PyObject, PyObject)} */ + private static MethodHandle EMPTY_SETTER; + /** A handle on {@link #emptyDeleter(PyObject)} */ + private static MethodHandle EMPTY_DELETER; + /** Empty array of method handles */ + private static MethodHandle[] EMPTY_MH_ARRAY = new MethodHandle[0]; + + static { + /* + * Initialise the empty method handles in a block since it can fail + * (in theory). + */ + try { + EMPTY_GETTER = LOOKUP.findStatic(PyGetSetDescr.class, "emptyGetter", GETTER); + EMPTY_SETTER = LOOKUP.findStatic(PyGetSetDescr.class, "emptySetter", SETTER); + EMPTY_DELETER = LOOKUP.findStatic(PyGetSetDescr.class, "emptyDeleter", DELETER); + } catch (NoSuchMethodException | IllegalAccessException e) { + // This should never happen. + throw new InterpreterError(e, "cannot find get-set empty* functions"); + } + } + + /** Documentation string for this attribute. */ + final String doc; + + /** Java class of attribute accepted by set method. */ + final Class klass; + + /* + * CPython has a void * argument to [gs]etter but no uses are found + * in the CPython code base. Sub-classing may be the Java way to + * provide a closure. + */ + // void *closure; + + /** + * Construct a descriptor that calls the access methods for get, set + * and delete operations specified as method handles. + * + * @param objclass to which descriptor applies + * @param name of attribute + * @param doc documentation string + * @param klass Java class of attribute accepted by set method + */ + // Compare CPython PyDescr_NewGetSet + PyGetSetDescr(PyType objclass, String name, String doc, Class klass) { + super(TYPE, objclass, name); + this.doc = doc; + this.klass = klass; + } + + /** + * Return the getter contained in this descriptor applicable to the + * Java class supplied. The {@link Descriptor#objclass} is consulted + * to make this determination. If the class is not an accepted + * implementation of {@code objclass}, an empty slot handle (with + * the correct signature) is returned. + * + * @param selfClass Java class of the {@code self} argument + * @return corresponding handle (or {@code slot.getEmpty()}) + */ + abstract MethodHandle getWrappedGet(Class selfClass); + + /** + * Return the setter contained in this descriptor applicable to the + * Java class supplied. The {@link Descriptor#objclass} is consulted + * to make this determination. If the class is not an accepted + * implementation of {@code objclass}, an empty slot handle (with + * the correct signature) is returned. + * + * @param selfClass Java class of the {@code self} argument + * @return corresponding handle (or {@code slot.getEmpty()}) + */ + abstract MethodHandle getWrappedSet(Class selfClass); + + /** + * Return the deleter contained in this descriptor applicable to the + * Java class supplied. The {@link Descriptor#objclass} is consulted + * to make this determination. If the class is not an accepted + * implementation of {@code objclass}, an empty slot handle (with + * the correct signature) is returned. + * + * @param selfClass Java class of the {@code self} argument + * @return corresponding handle (or {@code slot.getEmpty()}) + */ + abstract MethodHandle getWrappedDelete(Class selfClass); + + /** + * The attribute may not be set or deleted. + * + * @return true if the attribute may not be set or deleted + */ + abstract boolean readonly(); + + /** + * The attribute may be deleted. + * + * @return true if the attribute may be deleted. + */ + abstract boolean optional(); + + /** + * A {@link PyGetSetDescr} to use for a get-set attribute when the + * owning Python type has just one accepted implementation. + */ + static class Single extends PyGetSetDescr { + + /** + * A handle on the getter defined by the unique implementation of + * {@link Descriptor#objclass} for this attribute. The method type + * is {@link #GETTER} = {@code (O)O}. + */ + // CPython: PyObject *(*getter)(PyObject *, void *) + // Compare CPython PyGetSetDef::get + final MethodHandle get; // MT = GETTER + + /** + * A handle on the setter defined by the unique implementation of + * {@link Descriptor#objclass} for this attribute. The method type + * is {@link #SETTER} = {@code (O,O)V}. + */ + // CPython: int (*setter)(PyObject *, PyObject *, void *) + // Compare CPython PyGetSetDef::set + final MethodHandle set; // MT = SETTER + + /** + * A handle on the deleter defined by the unique implementation of + * {@link Descriptor#objclass} for this attribute. The method type + * is {@link #DELETER} = {@code (O)V}. + */ + // Compare CPython PyGetSetDef::set with null + final MethodHandle delete; // MT = DELETER + + /** + * Construct a get-set descriptor, identifying by a method handle + * each implementation method applicable to {@code objclass}. These + * methods will be identified in an implementation by annotations + * {@link Getter}, {@link Setter}, {@link Deleter}. + * + * @param objclass to which descriptor applies + * @param name of attribute + * @param get handle on getter method (or {@code null}) + * @param set handle on setter method (or {@code null}) + * @param delete handle on deleter method (or {@code null}) + * @param doc documentation string + * @param klass Java class of attribute accepted by set method + */ + // Compare CPython PyDescr_NewGetSet + Single(PyType objclass, String name, MethodHandle get, MethodHandle set, + MethodHandle delete, String doc, Class klass) { + super(objclass, name, doc, klass); + this.get = get != null ? get : EMPTY_GETTER; + this.set = set != null ? set : EMPTY_SETTER; + this.delete = delete != null ? delete : EMPTY_DELETER; + } + + @Override + MethodHandle getWrappedGet(Class selfClass) { + // The first argument is acceptable as 'self' + assert objclass.getJavaClass().isAssignableFrom(selfClass); + return get; + } + + @Override + MethodHandle getWrappedSet(Class selfClass) { + // The first argument is acceptable as 'self' + assert objclass.getJavaClass().isAssignableFrom(selfClass); + return set; + } + + @Override + MethodHandle getWrappedDelete(Class selfClass) { + // The first argument is acceptable as 'self' + assert objclass.getJavaClass().isAssignableFrom(selfClass); + return delete; + } + + @Override + boolean readonly() { return set == EMPTY_SETTER; } + + @Override + boolean optional() { return delete != EMPTY_DELETER; } + } + + /** + * A {@link PyGetSetDescr} to use for a get-set attribute when the + * owning Python type has multiple accepted implementations. + */ + static class Multiple extends PyGetSetDescr { + + /** + * Handles for the particular implementations of the getter. The + * method type of each is {@code (O)O}. + */ + // Compare CPython PyGetSetDef::get + protected final MethodHandle[] get; + + /** + * Handles for the particular implementations of the setter. The + * method type of each is {@code (O,O)V}. + */ + // Compare CPython PyGetSetDef::set + protected final MethodHandle[] set; + + /** + * Handles for the particular implementations of the deleter. The + * method type of each is {@code (O)V}. + */ + // CPython uses PyGetSetDef::set + protected final MethodHandle[] delete; + + /** + * Construct a get-set descriptor, identifying by an array of method + * handles the implementation methods applicable to + * {@code objclass}. These methods will be identified in an + * implementation by annotations {@link Getter}, {@link Setter}, + * {@link Deleter}. + * + * @param objclass to which descriptor applies + * @param name of attribute + * @param get operation + * @param set operation + * @param delete operation + * @param doc documentation string + * @param klass Java class of attribute accepted by set method + */ + // Compare CPython PyDescr_NewGetSet + Multiple(PyType objclass, String name, MethodHandle[] get, MethodHandle[] set, + MethodHandle delete[], String doc, Class klass) { + super(objclass, name, doc, klass); + this.get = get; + this.set = set != null ? set : EMPTY_MH_ARRAY; + this.delete = delete != null ? delete : EMPTY_MH_ARRAY; + } + + /** + * {@inheritDoc} + *

+ * The method will check that the type of self matches + * {@link Descriptor#objclass}, according to its + * {@link PyType#indexAccepted(Class)}. + */ + @Override + MethodHandle getWrappedGet(Class selfClass) { + // Work out how to call this descriptor on that object + int index = objclass.indexAccepted(selfClass); + try { + return get[index]; + } catch (ArrayIndexOutOfBoundsException iobe) { + // This will behave as an empty slot + return EMPTY_GETTER; + } + } + + /** + * {@inheritDoc} + *

+ * The method will check that the type of self matches + * {@link Descriptor#objclass}, according to its + * {@link PyType#indexAccepted(Class)}. + */ + @Override + MethodHandle getWrappedSet(Class selfClass) { + // Work out how to call this descriptor on that object + int index = objclass.indexAccepted(selfClass); + try { + return set[index]; + } catch (ArrayIndexOutOfBoundsException iobe) { + // This will behave as an empty slot + return EMPTY_SETTER; + } + } + + /** + * {@inheritDoc} + *

+ * The method will check that the type of self matches + * {@link Descriptor#objclass}, according to its + * {@link PyType#indexAccepted(Class)}. + */ + @Override + MethodHandle getWrappedDelete(Class selfClass) { + // Work out how to call this descriptor on that object + int index = objclass.indexAccepted(selfClass); + try { + return delete[index]; + } catch (ArrayIndexOutOfBoundsException iobe) { + // This will behave as an empty slot + return EMPTY_DELETER; + } + } + + @Override + boolean readonly() { return set.length == 0; } + + @Override + boolean optional() { return delete.length != 0; } + } + + // Compare CPython getset_repr in descrobject.c + @SuppressWarnings("unused") + private Object __repr__() { return descrRepr("attribute"); } + + /** + * {@inheritDoc} + * + * If {@code obj != null} invoke {@code get} on it to return a + * value. {@code obj} must be of type {@link #objclass}. A call made + * with {@code obj == null} returns {@code this} descriptor. + * + * @param type is ignored + */ + // Compare CPython getset_get in descrobject.c + @Override + Object __get__(Object obj, PyType type) throws Throwable { + if (obj == null) + /* + * obj==null indicates the descriptor was found on the target object + * itself (or a base), see CPython type_getattro in typeobject.c + */ + return this; + else { + try { + check(obj); + MethodHandle mh = getWrappedGet(obj.getClass()); + return mh.invokeExact(obj); + } catch (EmptyException e) { + throw cannotReadAttr(); + } + } + } + + /** + * This method fills {@link #get} when the implementation leaves it + * blank. + * + * @param ignored object to operate on + * @return never + * @throws EmptyException always + */ + @SuppressWarnings("unused") // used reflectively + private static Object emptyGetter(Object ignored) throws EmptyException { throw EMPTY; } + + // Compare CPython getset_set in descrobject.c + @Override + void __set__(Object obj, Object value) throws TypeError, Throwable { + if (value == null) { + // This ought to be an error, but allow for CPython idiom. + __delete__(obj); + } else { + try { + checkSet(obj); + MethodHandle mh = getWrappedSet(obj.getClass()); + try { + mh.invokeExact(obj, value); + } catch (ClassCastException e) { + /* + * A cast of 'value' to the argument type of the set method has + * failed (so not Object). The required class is hidden in the + * handle, but we wrote it in this.klass during exposure. + */ + throw attrMustBe(klass, value); + } + } catch (EmptyException e) { + throw cannotWriteAttr(); + } + } + } + + /** + * This method fills {@link #set} when the implementation leaves it + * blank. + * + * @param ignored object to operate on + * @param v ignored too + * @throws EmptyException always + */ + @SuppressWarnings("unused") // used reflectively + private static void emptySetter(Object ignored, Object v) throws EmptyException { throw EMPTY; } + + // Compare CPython getset_set in descrobject.c with NULL value + @Override + void __delete__(Object obj) throws TypeError, Throwable { + try { + checkDelete(obj); + MethodHandle mh = getWrappedDelete(obj.getClass()); + mh.invokeExact(obj); + } catch (EmptyException e) { + throw readonly() ? cannotWriteAttr() : cannotDeleteAttr(); + } + } + + /** + * This method fills {@link #delete} when the implementation leaves + * it blank. + * + * @param ignored object to operate on + * @throws EmptyException always + */ + @SuppressWarnings("unused") // used reflectively + private static void emptyDeleter(Object ignored) throws EmptyException { throw EMPTY; } + + // Compare CPython getset_get_doc in descrobject.c + static Object getset_get_doc(PyGetSetDescr descr) { + if (descr.doc == null) { return Py.None; } + return descr.doc; + } + + /** + * A mapping from symbolic names for the types of method handle in a + * {@code PyGetSetDescr} to other properties like the method handle + * type. + */ + enum Type { + Getter(PyGetSetDescr.GETTER), // + Setter(PyGetSetDescr.SETTER), // + Deleter(PyGetSetDescr.DELETER); // + + final MethodType methodType; + + Type(MethodType mt) { this.methodType = mt; } + + /** + * Map the method handle type back to the {@code PyGetSetDescr.Type} + * that has it or {@code null}. + * + * @param mt to match + * @return matching type or {@code null} + */ + static Type fromMethodType(MethodType mt) { + for (Type t : Type.values()) { if (mt == t.methodType) { return t; } } + return null; + } + } +} diff --git a/core/src/main/java/org/python/core/PyJavaFunction.java b/core/src/main/java/org/python/core/PyJavaFunction.java new file mode 100644 index 000000000..08e2a93e4 --- /dev/null +++ b/core/src/main/java/org/python/core/PyJavaFunction.java @@ -0,0 +1,581 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.util.List; + +import org.python.base.InterpreterError; +import org.python.base.MethodKind; +import org.python.core.ArgumentError.Mode; +import org.python.core.Exposed.Getter; +import org.python.core.Exposed.Member; + +/** + * The Python {@code builtin_function_or_method} object. Java + * sub-classes represent either a built-in function or a built-in + * method bound to a particular object. + */ +public abstract class PyJavaFunction implements CraftedPyObject, FastCall { + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("builtin_function_or_method", MethodHandles.lookup())); + + @Override + public PyType getType() { return TYPE; } + + /** Name of the containing module (or {@code null}). */ + final String module; + + /** + * The object to which this is bound as target (or {@code null}). + * Conventions (adopted from CPython) around this field are that it + * should be {@code null} when representing a static method of a + * built-in class, and otherwise contain the bound target + * ({@code object} or {@code type}). A function obtained from a + * module may be a method bound to an instance of that module. + */ + @Member("__self__") + final Object self; + + /** + * A Java {@code MethodHandle} that implements the function or bound + * method. The type of this handle varies according to the sub-class + * of {@code PyJavaFunction}, but it is definitely "prepared" to + * accept {@code Object.class} instances or arrays, not the actual + * parameter types of the method definition in Java. + */ + final MethodHandle handle; + + /** + * An argument parser supplied to this {@code PyJavaFunction} at + * construction, from Java reflection of the definition in Java and + * from annotations on it. Full information on the signature is + * available from this structure, and it is available to parse the + * arguments to {@link #__call__(Object[], String[])}. + */ + final ArgParser argParser; + + /** + * Construct a Python {@code builtin_function_or_method} object, + * optionally bound to a particular "self" object, specifying the + * prepared method handle. The {@code self} object to which this is + * bound should be {@code null} if the method is Python static in a + * type. Otherwise, we will create a method bound to {@code self} as + * target. This may be any {@code object} in the case of an instance + * method, is a {@code type} in the case of a class method, and is a + * {@code module} in the case of a function in a module (whether the + * Java signature is Java static or not). + * + * @param argParser parser defining the method + * @param handle a prepared prepared to the method defined + * @param self object to which bound (or {@code null} if a static + * method) + * @param module name of the module supplying the definition + */ + protected PyJavaFunction(ArgParser argParser, MethodHandle handle, Object self, String module) { + this.argParser = argParser; + this.handle = handle; + this.self = self; + this.module = module; + } + + /** + * Construct a {@code PyJavaFunction} from an {@link ArgParser} and + * {@code MethodHandle} for the implementation method. The arguments + * described by the parser do not include "self". This is the + * factory we use to create a function in a module. + * + * @param ap argument parser (provides name etc.) + * @param method raw handle to the method defined + * @param self object to which bound (or {@code null} if a static + * method) + * @param module name of the module supplying the definition (or + * {@code null} if representing a bound method of a type) + * @return A method descriptor supporting the signature + */ + // Compare CPython PyCFunction_NewEx in methodobject.c + static PyJavaFunction fromParser(ArgParser ap, MethodHandle method, Object self, + String module) { + /* + * Note this is a recommendation on the assumption all optimisations + * are supported. The actual choice is made in the switch statement. + */ + MethodSignature sig = MethodSignature.fromParser(ap); + + assert ap.methodKind != MethodKind.CLASS; + + /* + * In each case, we must prepare a method handle of the chosen + * shape. + */ + switch (sig) { + case NOARGS: + method = MethodSignature.NOARGS.prepareBound(ap, method, self); + return new NoArgs(ap, method, self, module); + case O1: + method = MethodSignature.O1.prepareBound(ap, method, self); + return new O1(ap, method, self, module); + case O2: + method = MethodSignature.O2.prepareBound(ap, method, self); + return new O2(ap, method, self, module); + case O3: + method = MethodSignature.O3.prepareBound(ap, method, self); + return new O3(ap, method, self, module); + case POSITIONAL: + method = MethodSignature.POSITIONAL.prepareBound(ap, method, self); + return new Positional(ap, method, self, module); + default: + method = MethodSignature.GENERAL.prepareBound(ap, method, self); + return new General(ap, method, self, module); + } + } + + /** + * Construct a {@code PyJavaFunction} from a {@link PyMethodDescr} + * and optional object to bind. The {@link PyMethodDescr} provides + * the parser and unbound prepared {@code MethodHandle}. The + * arguments described by the parser do not include "self". This is + * the factory that supports descriptor {@code __get__}. + * + * @param descr descriptor being bound + * @param self object to which bound (or {@code null} if a static + * method) + * @return a Java method object supporting the signature + * @throws TypeError if {@code self} is not compatible with + * {@code descr} + * @throws Throwable on other errors while chasing the MRO + */ + // Compare CPython PyCFunction_NewEx in methodobject.c + static PyJavaFunction from(PyMethodDescr descr, Object self) throws TypeError, Throwable { + ArgParser ap = descr.argParser; + assert ap.methodKind == MethodKind.INSTANCE; + MethodHandle handle = descr.getHandle(self).bindTo(self); + // We must support the same optimisations as PyMethodDescr + switch (descr.signature) { + case NOARGS: + return new NoArgs(ap, handle, self, null); + case O1: + return new O1(ap, handle, self, null); + case O2: + return new O2(ap, handle, self, null); + case O3: + return new O3(ap, handle, self, null); + case POSITIONAL: + return new Positional(ap, handle, self, null); + case GENERAL: + return new General(ap, handle, self, null); + default: + throw new InterpreterError("Optimisation not supported: %s", descr.signature); + } + } + + // slot functions ------------------------------------------------- + + protected Object __repr__() throws Throwable { + if (self == null || self instanceof PyModule) + return String.format("", __name__()); + else + return String.format("", __name__(), PyObjectUtil.toAt(self)); + } + + Object __call__(Object[] args, String[] names) throws TypeError, Throwable { + try { + // It is *not* worth unpacking the array here + return call(args, names); + } catch (ArgumentError ae) { + throw typeError(ae, args, names); + } + } + + // exposed methods ----------------------------------------------- + + /** @return name of the function or method */ + // Compare CPython meth_get__name__ in methodobject.c + @Getter + String __name__() { return argParser.name; } + + // plumbing ------------------------------------------------------ + + @Override + public String toString() { return Py.defaultToString(this); } + + /** + * Translate a problem with the number and pattern of arguments, in + * a failed attempt to call the wrapped method, to a Python + * {@link TypeError}. + * + * @param ae expressing the problem + * @param args positional arguments (only the number will matter) + * @return a {@code TypeError} to throw + */ + // XXX Compare MethodDescriptor.typeError : unify? + @Override + @SuppressWarnings("fallthrough") + public TypeError typeError(ArgumentError ae, Object[] args, String[] names) { + int n = args.length; + switch (ae.mode) { + case NOARGS: + case NUMARGS: + case MINMAXARGS: + return new TypeError("%s() %s (%d given)", __name__(), ae, n); + case NOKWARGS: + assert names != null && names.length > 0; + default: + return new TypeError("%s() %s", __name__(), ae); + } + } + + /** + * The implementation may have any signature allowed by + * {@link ArgParser}. + */ + private static class General extends PyJavaFunction { + + /** + * Construct a method object, identifying the implementation by a + * parser and a method handle. + * + * @param argParser describing the signature of the method + * @param handle a prepared prepared to the method defined + * @param self object to which bound (or {@code null} if a static + * method) + * @param module name of the module supplying the definition (or + * {@code null} if representing a bound method of a type) + */ + General(ArgParser argParser, MethodHandle handle, Object self, String module) { + super(argParser, handle, self, module); + assert handle.type() == MethodSignature.GENERAL.boundType; + } + + @Override + public Object call(Object[] args, String[] names) throws TypeError, Throwable { + /* + * The method handle type is {@code (O[])O}. The parser will make an + * array of the args, and where allowed, gather excess arguments + * into a tuple or dict, and fill missing ones from defaults. + */ + Object[] frame = argParser.parse(args, names); + return handle.invokeExact(frame); + } + } + + /** + * Base class for methods that accept between defined maximum and + * minimum numbers of arguments, that must be given by position. + * Maximum and minimum may be equal to a single acceptable number. + *

+ * Arguments may not be given by keyword. There is no excess + * argument (varargs) collector. + *

+ * The number of arguments required by the wrapped Java method sets + * a maximum allowable number of arguments. Fewer arguments than + * this may be given, to the extent that defaults specified by the + * parser make up the difference. The number of available defaults + * determines the minimum number of arguments to be supplied. + * + * @ImplNote Sub-classes must define {@link #call(Object[])}: the + * default definition in {@link FastCall} is not enough. + */ + private static abstract class AbstractPositional extends PyJavaFunction { + + /** Default values of the trailing arguments. */ + protected final Object[] d; + + /** Minimum number of positional arguments in a call. */ + protected final int min; + + /** Maximum number of positional arguments in a call. */ + protected final int max; + + /** + * Construct a method descriptor, identifying the implementation by + * a parser and a method handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + AbstractPositional(ArgParser argParser, MethodHandle handle, Object self, String module) { + super(argParser, handle, self, module); + assert !argParser.hasVarArgs(); + // Cardinal values for positional argument processing + this.d = argParser.getDefaults(); + this.max = argParser.argcount; + this.min = argParser.argcount - d.length; + } + + @Override + public Object call(Object[] args, String[] names) throws TypeError, Throwable { + if (names == null || names.length == 0) { + return call(args); + } else { + throw new ArgumentError(Mode.NOKWARGS); + } + } + + @Override + public Object call(Object[] args) throws TypeError, Throwable { + // Make sure we find out if this is missing + throw new InterpreterError( + "Sub-classes of AbstractPositional must define call(Object[])"); + } + + // Save some indirection by specialising to positional + @Override + Object __call__(Object[] args, String[] names) throws TypeError, Throwable { + try { + if (names == null || names.length == 0) { + // It is *not* worth unpacking the array here + return call(args); + } else { + throw new ArgumentError(Mode.NOKWARGS); + } + } catch (ArgumentError ae) { + throw typeError(ae, args, names); + } + } + } + + /** The implementation signature accepts no arguments. */ + private static class NoArgs extends AbstractPositional { + + /** + * Construct a method object, identifying the implementation by a + * parser and a prepared method handle. + * + * @param argParser describing the signature of the method + * @param handle a prepared prepared to the method defined + * @param self object to which bound (or {@code null} if a static + * method) + * @param module name of the module supplying the definition (or + * {@code null} if representing a bound method of a type) + */ + NoArgs(ArgParser argParser, MethodHandle handle, Object self, String module) { + super(argParser, handle, self, module); + assert handle.type() == MethodSignature.NOARGS.boundType; + } + + @Override + public Object call(Object[] a) throws Throwable { + // The method handle type is {@code ()O}. + if (a.length == 0) { return handle.invokeExact(); } + // n < min || n > max + throw new ArgumentError(min, max); + } + } + + /** + * The implementation signature requires one argument, which may be + * supplied by {@link ArgParser#getDefaults()}. + */ + private static class O1 extends AbstractPositional { + + /** + * Construct a method object, identifying the implementation by a + * parser and a method handle. + * + * @param argParser describing the signature of the method + * @param handle a prepared prepared to the method defined + * @param self object to which bound (or {@code null} if a static + * method) + * @param module name of the module supplying the definition (or + * {@code null} if representing a bound method of a type) + */ + O1(ArgParser argParser, MethodHandle handle, Object self, String module) { + super(argParser, handle, self, module); + assert handle.type() == MethodSignature.O1.boundType; + } + + @Override + public Object call(Object[] a) throws TypeError, Throwable { + // The method handle type is {@code (O)O}. + int n = a.length; + if (n == 1) { + // Number of arguments matches number of parameters + return handle.invokeExact(a[0]); + } else if (n == min) { + // Since min<=max, max==1 and n!=1, we have n==min==0 + return handle.invokeExact(d[0]); + } + // n < min || n > max + throw new ArgumentError(min, max); + } + } + + /** + * The implementation signature requires two arguments, which may be + * supplied by {@link ArgParser#getDefaults()}. + */ + private static class O2 extends AbstractPositional { + + /** + * Construct a method descriptor, identifying the implementation by + * a parser and a method handle. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param method handle to invoke the wrapped method or {@code null} + * signifying a matching empty handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + O2(ArgParser argParser, MethodHandle handle, Object self, String module) { + super(argParser, handle, self, module); + assert handle.type() == MethodSignature.O2.boundType; + assert max == 2; + assert max - min == d.length; + } + + @Override + public Object call(Object[] a) throws ArgumentError, TypeError, Throwable { + // The method handle type is (O,O)O. + int n = a.length, k; + if (n == 2) { + // Number of arguments matches number of parameters + return handle.invokeExact(a[0], a[1]); + } else if ((k = n - min) >= 0) { + if (n == 1) { + return handle.invokeExact(a[0], d[k]); + } else if (n == 0) + return handle.invokeExact(d[k++], d[k]); + } + // n < min || n > max + throw new ArgumentError(min, max); + } + + @Override + public Object call() throws Throwable { + if (min == 0) { return handle.invokeExact(d[0], d[1]); } + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object a0) throws Throwable { + int k = 1 - min; + if (k >= 0) { return handle.invokeExact(a0, d[k]); } + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object self, Object a0, Object a1) throws Throwable { + return handle.invokeExact(self, a0, a1); + } + } + + /** + * The implementation signature requires three arguments, which may + * be supplied by {@link ArgParser#getDefaults()}. + */ + private static class O3 extends AbstractPositional { + + /** + * Construct a method descriptor, identifying the implementation by + * a parser and a method handle. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param method handle to invoke the wrapped method or {@code null} + * signifying a matching empty handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + O3(ArgParser argParser, MethodHandle handle, Object self, String module) { + super(argParser, handle, self, module); + assert handle.type() == MethodSignature.O3.boundType; + assert max == 3; + assert max - min == d.length; + } + + @Override + public Object call(Object[] a) throws ArgumentError, TypeError, Throwable { + // The method handle type is (O,O)O. + int n = a.length, k; + if (n == 3) { + // Number of arguments matches number of parameters + return handle.invokeExact(a[0], a[1], a[2]); + } else if ((k = n - min) >= 0) { + if (n == 2) { + return handle.invokeExact(a[0], a[1], d[k]); + } else if (n == 1) { + return handle.invokeExact(a[0], d[k++], d[k]); + } else { + return handle.invokeExact(d[k++], d[k++], d[k]); + } + } + // n < min || n > max + throw new ArgumentError(min, max); + } + + @Override + public Object call() throws Throwable { + if (min == 0) { return handle.invokeExact(d[0], d[1], d[2]); } + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object a0) throws Throwable { + int k = 1 - min; + if (k >= 0) { return handle.invokeExact(a0, d[k++], d[k]); } + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object a0, Object a1) throws Throwable { + int k = 2 - min; + if (k >= 0) { return handle.invokeExact(a0, a1, d[k]); } + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object a0, Object a1, Object a2) throws Throwable { + return handle.invokeExact(a0, a1, a2); + } + } + + /** + * A method represented by {@code Positional} only accepts arguments + * given by position. The constraints detailed for + * {@link AbstractPositional} apply. + *

+ * {@link #fromParser(PyType, ArgParser, List) fromParser()} will + * only choose a {@code Positional} (or sub-class) representation of + * the method when these conditions apply. + */ + private static class Positional extends AbstractPositional { + + /** + * Construct a method object, identifying the implementation by a + * parser and a method handle. + * + * @param argParser describing the signature of the method + * @param handle a prepared prepared to the method defined + * @param self object to which bound (or {@code null} if a static + * method) + * @param module name of the module supplying the definition (or + * {@code null} if representing a bound method of a type) + */ + // XXX Compare CPython XXX in XXX + Positional(ArgParser argParser, MethodHandle handle, Object self, String module) { + super(argParser, handle, self, module); + assert handle.type() == MethodSignature.POSITIONAL.boundType; + assert max == argParser.argcount; + assert max - min == d.length; + } + + @Override + public Object call(Object[] args) throws TypeError, Throwable { + // The method handle type is {@code (O[])O}. + int n = args.length, k; + if (n == max) { + // Number of arguments matches number of parameters + return handle.invokeExact(args); + } else if ((k = n - min) >= 0) { + // Concatenate args[:] and defaults[k:] + Object[] frame = new Object[max]; + System.arraycopy(args, 0, frame, 0, n); + System.arraycopy(d, k, frame, n, max - n); + return handle.invokeExact(frame); + } + // n < min || n > max + throw new ArgumentError(min, max); + } + } +} diff --git a/core/src/main/java/org/python/core/PyList.java b/core/src/main/java/org/python/core/PyList.java new file mode 100644 index 000000000..ba883cbfa --- /dev/null +++ b/core/src/main/java/org/python/core/PyList.java @@ -0,0 +1,1339 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +// Copyright (c) Corporation for National Research Initiatives +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.StringJoiner; +import java.util.function.Function; +import java.util.function.Supplier; + +import org.python.base.InterpreterError; +import org.python.core.PyObjectUtil.NoConversion; +import org.python.core.PySlice.Indices; +import org.python.core.PyType.Spec; + +/** + * The Python {@code list} object that is also a Java + * {@code List}. Operations in both APIs are synchronised + * for safety against concurrent threads. + *

+ * It is {@code synchronized} so that competing threads should be + * able to access it with roughly the same protection against + * concurrent modification that CPython offers. There are also + * necessary safeguards during {@code sort()} to detect modification + * from within the current thread as a side effect of comparison. + * Java brings its own safeguard within iterators against structural + * concurrent modification. + * + * @implNote The design follows that in Jython 2 with a private Java + * list member to which operations are delegated directly or + * indirectly. In the present design, the indirect delegation is + * through a private delegate member where in the former design + * behaviour was inherited. + */ +// @ExposedType(name = "list", base = PyObject.class, doc = BuiltinDocs.list_doc) +public class PyList implements List, CraftedPyObject { + + public static final PyType TYPE = + PyType.fromSpec(new Spec("list", MethodHandles.lookup())); + + /** The Python type of this instance. */ + protected final PyType type; + + /** Storage for the actual list elements (as a list). */ + private final List list; + + /** Implementation help for sequence methods. */ + private final ListDelegate delegate = new ListDelegate(); + + /** + * Synchronisation prevents modification by competing threads during + * an operation, but does not defend against actions within that + * operation by the same thread. One context where this occurs is + * {@code sort()}, where the implementation of {@code __lt__} on + * elements can, in principle, mutate the list being sorted. This + * variable is cleared at the beginning of {@code sort()} and set by + * any operation that modifies the list. + */ + private boolean changed = false; + + /** + * Fundamental constructor, specifying actual type and the list that + * will become the storage object. + * + * @param type actual type + * @param list storage object + */ + private PyList(PyType type, List list) { + this.type = type; + this.list = list; + } + + /** + * Construct a Python {@code list} object, specifying actual type + * and initial capacity. + * + * @param type actual type + * @param initialCapacity capacity + */ + public PyList(PyType type, int initialCapacity) { + this(type, new ArrayList<>(initialCapacity)); + } + + /** + * Construct a Python {@code list} object, specifying initial + * capacity. + * + * @param initialCapacity capacity + */ + public PyList(int initialCapacity) { this(TYPE, new ArrayList<>(initialCapacity)); } + + /** + * Construct an empty Python {@code list} object, specifying actual + * type. + * + * @param type actual type + */ + public PyList(PyType type) { this(type, 0); } + + /** Construct an empty Python {@code list} object. */ + public PyList() { this(TYPE, 0); } + + /** + * Construct a Python {@code list} object, specifying actual type + * and initial contents. The contents will be a (shallow) copy of + * the collection. + * + * @param type actual type + * @param c initial contents + */ + public PyList(PyType type, Collection c) { + this(type, c.size()); + addAll(c); + } + + /** + * Construct a Python {@code list} object, specifying initial + * contents. The contents will be a (shallow) copy of the + * collection. + * + * @param c initial contents + */ + public PyList(Collection c) { this(TYPE, c); } + + /** + * Construct a {@code list} with initial contents from an array + * slice. + * + * @param a the array + * @param start of slice + * @param count of elements to take + */ + PyList(Object[] a, int start, int count) { + this(TYPE, count); + int stop = start + count; + for (int i = start; i < stop; i++) { add(a[i]); } + } + + /** + * Return a Python {@code list} object, specifying initial + * contents. + * + * @param elements initial element values + * @return list of elements + */ + public static PyList of(Object... elements) { return new PyList(List.of(elements)); } + + @Override + public PyType getType() { return type; } + + // Special methods ----------------------------------------------- + + // @ExposedNew + // @ExposedMethod(doc = BuiltinDocs.list___init___doc) + // @formatter:off + /* + void __init__(Object[] args, String[] kwds) { + ArgParser ap = new ArgParser("list", args, kwds, new String[]{"sequence"}, 0); + Object seq = ap.getPyObject(0, null); + clear(); + if (seq == null) { + return; + } + + /* PyListDerived should be iterated over and not plain copied for cases where someone subclasses list + and overrides __iter__ + * / + if (seq instanceof PyListDerived) { + for (Object item : seq.asIterable()) { + append(item); + } + } else if (seq instanceof PyList) { + list.addAll(((PyList) seq).list); // don't convert + } else if (seq instanceof PyList) { + list.addAll((PyTuple) seq); + } else { + for (Object item : seq.asIterable()) { + append(item); + } + } + } + */ + // @formatter:on + + // @ExposedMethod(doc = BuiltinDocs.list___len___doc) + synchronized int __len__() { + return size(); + } + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___ne___doc) + synchronized Object __ne__(Object o) { + return delegate.cmp(o, Comparison.NE); + } + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___eq___doc) + synchronized Object __eq__(Object o) { + return delegate.cmp(o, Comparison.EQ); + } + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___lt___doc) + synchronized Object __lt__(Object o) { + return delegate.cmp(o, Comparison.LT); + } + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___le___doc) + synchronized Object __le__(Object o) { + return delegate.cmp(o, Comparison.LE); + } + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___gt___doc) + synchronized Object __gt__(Object o) { + return delegate.cmp(o, Comparison.GT); + } + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___ge___doc) + synchronized Object __ge__(Object o) { + return delegate.cmp(o, Comparison.GE); + } + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___imul___doc) + // @formatter:off + /* + synchronized Object __imul__(Object o) { + if (!o.isIndex()) { + return null; + } + int count = o.asIndex(Py.OverflowError); + + int size = size(); + if (size == 0 || count == 1) { + return this; + } + + if (count < 1) { + clear(); + return this; + } + + if (size > Integer.MAX_VALUE / count) { + throw new MemoryError(""); + } + + int newSize = size * count; + if (list instanceof ArrayList) { + ((ArrayList) list).ensureCapacity(newSize); + } + List oldList = new ArrayList(list); + for (int i = 1; i < count; i++) { + list.addAll(oldList); + } + gListAllocatedStatus = list.size(); // now omit? + return this; + } + */ + // @formatter:on + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___mul___doc) + synchronized Object __mul__(Object n) throws Throwable { return delegate.__mul__(n); } + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___rmul___doc) + synchronized Object __rmul__(Object n) throws Throwable { return delegate.__mul__(n); } + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___add___doc) + synchronized Object __add__(Object o) throws Throwable { return delegate.__add__(o); } + + // @ExposedMethod(type = MethodType.BINARY) + synchronized Object __radd__(Object o) throws Throwable { return delegate.__radd__(o); } + + // @ExposedMethod(doc = BuiltinDocs.list___contains___doc) + synchronized boolean __contains__(Object o) throws Throwable { + return delegate.__contains__(o); + } + + // @ExposedMethod(doc = BuiltinDocs.list___delitem___doc) + synchronized void __delitem__(Object index) throws Throwable { + changed = true; + delegate.__delitem__(index); + } + + // @ExposedMethod(doc = BuiltinDocs.list___setitem___doc) + synchronized void __setitem__(Object index, Object value) throws Throwable { + changed = true; + delegate.__setitem__(index, value); + } + + // @ExposedMethod(doc = BuiltinDocs.list___getitem___doc) + synchronized Object __getitem__(Object index) throws Throwable { + return delegate.__getitem__(index); + } + + // @formatter:off + /* + // @ExposedMethod(doc = BuiltinDocs.list___iter___doc) + Object __iter__() { + return new PyListIterator(this); + } + + // @ExposedMethod(doc = BuiltinDocs.list___reversed___doc) + synchronized PyIterator __reversed__() { + return new PyReversedIterator(this); + } + */ + // @formatter:on + + @Override + public String toString() { + // XXX Use repr for elements and guard against recursive references + StringJoiner sj = new StringJoiner(", ", "[", "]"); + for (Object v : list) { sj.add(v.toString()); } + return sj.toString(); + } + + // XXX object.__repr__ (calls toString()) should be enough. + // Retaining this code to indicate toString() additions needed. + // @formatter:off + /* + // @ExposedMethod(names = "__repr__") + final synchronized String __repr__() { + ThreadState ts = Py.getThreadState(); + if (!ts.enterRepr(this)) { + return "[...]"; + } + StringBuilder buf = new StringBuilder("["); + int length = size(); + int i = 0; + for (Object item : list) { + buf.append(item.__repr__().toString()); + if (i < length - 1) { + buf.append(", "); + } + i++; + } + buf.append("]"); + ts.exitRepr(this); + return buf.toString(); + } + */ + // @formatter:on + + /** + * Add a single element to the end of list. + * + * @param o the element to add. + */ + // @ExposedMethod(doc = BuiltinDocs.list_append_doc) + final synchronized void list_append(Object o) { + changed = true; + list.add(o); + } + + /** + * Remove all items from the list (same as {@code del s[:]}) + */ + // @ExposedMethod in Python 3 + final synchronized void list_clear() { + changed = true; + list.clear(); + } + + /** + * Return the number elements in the list that are Python-equal to + * the argument. + * + * @param v the value to test for. + * @return the number of occurrences. + * @throws Throwable from the implementation of {@code __eq__} + */ + // @ExposedMethod(doc = BuiltinDocs.list_count_doc) + final synchronized int list_count(Object v) throws Throwable { return delegate.count(v); } + + /** + * Return smallest index where an element in the list Python-equals the argument. + * + * @param v the value to look for. + * @param start first index to test + * @param stop first index not to test + * @return index of the occurrence + * @throws ValueError if {@code v} not found + * @throws TypeError from bad {@code start} and {@code stop} types + * @throws Throwable from errors other than indexing + */ + // @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.list_index_doc) + final synchronized int list_index(Object v, Object start, Object stop) throws TypeError, Throwable { + return delegate.index(v, start, stop); + } + + /** + * Insert the argument element into the list at the specified index. + * Same as {@code s[index:index] = [o] if index >= 0}. + * + * @param index the position where the element will be inserted. + * @param o the element to insert. + * @throws TypeError from bad {@code index} type + * @throws Throwable from other conversion errors + */ + // @ExposedMethod(doc = BuiltinDocs.list_insert_doc) + final synchronized void list_insert(Object index, Object o) throws TypeError, Throwable { + changed = true; + delegate.insert(index, o); + } + + /** + * Remove from the list the first element that is Python-equal to + * the argument. Same as {@code del s[s.index(x)]}. + * + * @param v the element to search for and remove. + * @throws Throwable from the implementation of {@code __eq__} + */ + // @ExposedMethod(doc = BuiltinDocs.list_remove_doc) + final synchronized void list_remove(Object v) throws Throwable { + int i = find(v); + if (i >= 0) { + changed = true; + list.remove(i); + } else { + throw new ValueError("%s.remove(x): x not in list", getType().name); + } + } + + /** + * Return the index of {@code v} in {@link #list} or -1 if not + * found. + * + * @param v the element to search for and remove. + * @return the index of {@code v} or -1 if not found. + * @throws Throwable from the implementation of {@code __eq__} + */ + private int find(Object v) throws Throwable { + int n = list.size(); + for (int i = 0; i < n; i++) { + if (Abstract.richCompareBool(v, list.get(i), Comparison.EQ)) { return i; } + } + return -1; + } + + /** + * Reverses the items of s in place. The reverse() methods modify the list in place for economy + * of space when reversing a large list. It doesn't return the reversed list to remind you of + * this side effect. + */ + // @ExposedMethod(doc = BuiltinDocs.list_reverse_doc) + final synchronized void reverse() { + Collections.reverse(list); + changed = true; + } + + /** + * Remove and return a specified element from the list. + * + * @param n the index of the element to remove and return. + * @return the popped item + */ + // @ExposedMethod(defaults = "-1", doc = BuiltinDocs.list_pop_doc) + final synchronized Object list_pop(int n) { + int size = size(); + if (size == 0) { + throw new IndexError("pop from empty list"); + } else { + if (n < 0) { n += size; } + if (n < 0 || n >= size) { throw new IndexError("pop index out of range"); } + changed = true; + return list.remove(n); + } + } + + /** + * Append the elements in the argument sequence to the end of the + * list, {@code s[len(s):len(s)] = o}. + * + * @param o the sequence of items to append to the list. + * @throws Throwable from attempting to get an iterator on {@code o} + */ + // @ExposedMethod(doc = BuiltinDocs.list_extend_doc) + final synchronized void list_extend(Object o) throws Throwable { + list_extend(o, null); + } + + /** + * Append the elements in the argument sequence to the end of the + * list, {@code s[len(s):len(s)] = o}. + * + * @param the type of exception to throw + * @param o the sequence of items to append to the list. + * @param exc a supplier (e.g. lambda expression) for the exception + * to throw if an iterator cannot be formed (or {@code null} for + * a default {@code TypeError}) + * @throws E to throw if an iterator cannot be formed + * @throws Throwable from the implementation of {@code o}. + */ + final void list_extend(Object o, Supplier exc) throws E, Throwable { + changed = true; + list.addAll(PySequence.fastList(o, exc)); + } + + // @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___iadd___doc) + synchronized Object __iadd__(Object o) throws Throwable { + changed = true; + list_extend(o); + return this; + } + + /** + * Sort the items of the list in place, using only < comparisons + * between items. Exceptions are not suppressed — if any + * comparison operations fail, the entire sort operation will fail + * (and the list will likely be left in a partially modified state). + * + * @param key specifies a function of one argument that is used to + * extract a comparison key from each list element, e.g. + * {@code key=str.lower}. The default value is {@code None}. + * @param reverse the list elements are sorted as if each comparison + * were reversed. + * @throws Throwable from object comparison + */ + // @ExposedMethod(doc = BuiltinDocs.list_sort_doc) + final synchronized void sort(Function key, boolean reverse) + throws Throwable { + // Python: sort(*, key=None, reverse=False) + if (key == null) { + sortOnValue(reverse); + } else { + sortOnKey(key, reverse); + } + } + + private synchronized void sortOnValue(boolean reverse) throws Throwable { + + // We shall sort on values using this (Python) comparator + final ListElementComparator cmp = new ListElementComparator(reverse) { + + @Override + boolean lessThan(Object o1, Object o2) throws Throwable { + return Abstract.richCompareBool(o1, o2, Comparison.LT); + } + + }; + + // Now sort the list, failing on any change + changed = false; + Collections.sort(list, cmp); + if (cmp.raisedException()) { throw cmp.getRaisedException(); } + changed = true; + } + + /** + * During {@link PyList#sortOnKey(Function, boolean)}, we actually + * sort a list of these key-value objects. + */ + private static class KV { + + private final Object key; + private final Object value; + + KV(Object key, Object value) { + this.key = key; + this.value = value; + } + } + + private synchronized void sortOnKey(Function keyfunc, boolean reverse) + throws Throwable { + + // Make a copy of the list as key-value pairs in kvList + int size = list.size(); + final ArrayList kvList = new ArrayList(size); + for (Object value : list) { + Object k = keyfunc.apply(value); + kvList.add(new KV(k, value)); + } + + // We shall sort kvList comparing keys + final ListElementComparator cmp = new ListElementComparator(reverse) { + + @Override + boolean lessThan(KV o1, KV o2) throws Throwable { + return Abstract.richCompareBool(o1.key, o2.key, Comparison.LT); + } + + }; + + // Now sort the kvList, failing on any change + changed = false; + Collections.sort(kvList, cmp); + if (cmp.raisedException()) { throw cmp.getRaisedException(); } + + // Copy values from kvList (sorted on key) + assert kvList.size() == size; + for (int i = 0; i < size; i++) { list.set(i, kvList.get(i).value); } + changed = true; + } + + public PyTuple __getnewargs__() { return new PyTuple(new PyTuple(list)); } + + // List interface ------------------------------------------------ + + @Override + public synchronized void add(int index, Object element) { + changed = true; + list.add(index, element); + } + + @Override + public synchronized boolean add(Object o) { + changed = true; + return list.add(o); + } + + @Override + public synchronized boolean addAll(int index, Collection c) { + changed = true; + return list.addAll(index, c); + } + + @Override + public boolean addAll(Collection c) { + changed = true; + return addAll(0, c); + } + + @Override + public synchronized void clear() { + changed = true; + list.clear(); + } + + @Override + public synchronized boolean contains(Object o) { + try { + // Use the Python definition of equality (which may throw) + return delegate.__contains__(o); + } catch (Throwable e) { + return false; + } + } + + @Override + public synchronized boolean containsAll(Collection c) { + try { + for (Object o : c) { + // Use the Python definition of equality (which may throw) + if (!delegate.__contains__(o)) { return false; } + } + return true; + } catch (Throwable e) { + return false; + } + } + + @Override + public synchronized Object get(int index) { + return list.get(index); + } + + @Override + public synchronized int indexOf(Object o) { + return list.indexOf(o); + } + + @Override + public synchronized boolean isEmpty() { + return list.isEmpty(); + } + + @Override + public Iterator iterator() { + return new Iterator() { + + private final Iterator iter = list.iterator(); + + @Override + public boolean hasNext() { return iter.hasNext(); } + + @Override + public Object next() { + synchronized (PyList.this) { + return iter.next(); + } + } + + @Override + public void remove() { + synchronized (PyList.this) { + changed = true; + iter.remove(); + } + } + }; + } + + @Override + public synchronized int lastIndexOf(Object o) { + return list.lastIndexOf(o); + } + + @Override + public ListIterator listIterator() { + return listIterator(0); + } + + @Override + public ListIterator listIterator(final int index) { + return new ListIterator() { + + private final ListIterator iter = list.listIterator(index); + + @Override + public boolean hasNext() { return iter.hasNext(); } + + @Override + public Object next() { + synchronized (PyList.this) { + return iter.next(); + } + } + + @Override + public boolean hasPrevious() { return iter.hasPrevious(); } + + @Override + public Object previous() { + synchronized (PyList.this) { + return iter.previous(); + } + } + + @Override + public int nextIndex() { return iter.nextIndex(); } + + @Override + public int previousIndex() { return iter.previousIndex(); } + + @Override + public void remove() { + synchronized (PyList.this) { + changed = true; + iter.remove(); + } + } + + @Override + public void set(Object o) { + synchronized (PyList.this) { + changed = true; + iter.set(o); + } + } + + @Override + public void add(Object o) { + synchronized (PyList.this) { + changed = true; + iter.add(o); + } + } + + }; + } + + @Override + public synchronized Object remove(int index) { + changed = true; + return list.remove(index); + } + + @Override + public synchronized boolean removeAll(Collection c) { + // Make a list of indices at which to remove an item + List erasures = new ArrayList<>(c.size()); + int n = list.size(); + for (int i = 0; i < n; i++) { + Object item = list.get(i); + for (Object o : c) { + // Use the Python definition of equality (which may throw) + try { + if (Abstract.richCompareBool(o, item, Comparison.EQ)) { + erasures.add(i); + break; + } + } catch (Throwable e) { + // Treat as non-match + } + } + } + // Do the removal + return erase(erasures); + } + + @Override + public synchronized boolean retainAll(Collection c) { + // Make a list of indices at which to remove an item + List erasures = new ArrayList<>(c.size()); + int n = list.size(); + for (int i = 0; i < n; i++) { + Object item = list.get(i); + boolean retain = false; + for (Object o : c) { + // Use the Python definition of equality (which may throw) + try { + if (Abstract.richCompareBool(o, item, Comparison.EQ)) { + retain = true; + break; + } + } catch (Throwable e) { + // Treat as non-match + } + } + if (!retain) { erasures.add(i); } + } + // Do the removal + return erase(erasures); + } + + @Override + public synchronized Object set(int index, Object element) { + changed = true; + return list.set(index, element); + } + + @Override + public synchronized int size() { + return list.size(); + } + + @Override + public synchronized List subList(int fromIndex, int toIndex) { + /* + * XXX There is a difficulty here in our management of concurrency. + * The sub-list is an *unsynchronised* view on this PyList's private + * list variable, so it bypasses the synchronisation in the PyList + * wrapper. Here, and in Jython 2, we wrap this view in a new + * PyList. Although that is synchronised (on itself), it is not + * synchronised on this PyList and concurrent access is possible + * through the two objects. Compare + * java.util.Collections.SynchronizedRandomAccessList, where the + * problem is solved by sharing a mutex. + */ + return new PyList(TYPE, list.subList(fromIndex, toIndex)); + } + + @Override + public synchronized Object[] toArray() { return list.toArray(); } + + @SuppressWarnings("unchecked") + @Override + public synchronized T[] toArray(T[] a) { + + Class type = (Class)a.getClass().getComponentType(); + + if (Object.class.equals(type)) { + // Special-case a request for Object[] + a = list.toArray(a); + + } else { + // Ensure we have a space the right size + int size = size(); + if (a.length < size) { + a = (T[])Array.newInstance(type, size); + } else if (a.length > size) { a[size] = null; } + + // Copy list into the array with conversion to T + for (int i = 0; i < size; i++) { a[i] = Abstract.tojava(list.get(i), type); } + } + return a; + } + + @Override + public synchronized boolean remove(Object o) { + changed = true; + return list.remove(o); + } + + // Java hash and equals ------------------------------------------ + + @Override + public int hashCode() { return list.hashCode(); } + + @Override + public boolean equals(Object other) { + if (this == other) { return true; } + synchronized (this) { + if (other instanceof List) { + return list.equals(other); + } else { + try { + return Abstract.richCompareBool(other, this, Comparison.EQ); + } catch (Throwable t) { + return false; + } + } + } + } + + // Supporting code ----------------------------------------------- + + /** + * This comparator is used in + * {@link PyList#sort(Function, boolean)}, sub-classed for the type + * of sort. + * + * @param type of element to sort (on practice {@code Object} or + * {@code KV}. + */ + private abstract class ListElementComparator implements Comparator { + + private Throwable comparatorException; + private final int less; + + /** + * @param reverse whether comparisons will be in reveres sense + */ + ListElementComparator(boolean reverse) { this.less = reverse ? 1 : -1; } + + /** + * @return the exception raised by any application of + * {@link #lessThan(Object, Object)} + */ + Throwable getRaisedException() { + return comparatorException; + } + + /** + * @return whether an exception was raised by any application of + * {@link #lessThan(Object, Object)} + */ + boolean raisedException() { + return comparatorException != null; + } + + /** + * Defines the comparison operation to use. + * + * @param o1 left operand + * @param o2 right operand + * @return true iff o1 is less than o2 + * @throws Throwable on errors in the comparison + */ + abstract boolean lessThan(T o1, T o2) throws Throwable; + + @Override + public int compare(T o1, T o2) { + try { + // PEP 207: sort should only depend on "less-than" + return lessThan(o1, o2) ? less : (lessThan(o2, o1) ? -less : 0); + } catch (Throwable pye) { + // Stash the exception to rethrow later + comparatorException = pye; + // XXX made up answer may violate contract of compare. Why stash? + return 0; + } finally { + // Detect change occurring during the comparisons + if (changed) { throw new ValueError("list modified during sort"); } + } + } + } + + /** + * Accept an index, treating negative values as end-relative, and + * bound it to the sequence range. It is not an error for the index + * value to fall outside the valid range. (It is simply clipped to + * the nearer end.) + * + * @param index as presented + * @return bounded {@code 0 <= index <= list.size()} + */ + private int boundedIndex(int index) { + int L = list.size(); + return index < 0 ? Math.max(0, index + L) : Math.min(L, index); + } + + /** + * Given an ordered ascending list of indices into {@link #list}, + * remove the elements at those indices. + * + * @param erasures to remove + * @return {@code true} if {@code erasures} is not empty + */ + private boolean erase(List erasures) { + // Copy list to itself skipping each erasure + if (erasures.isEmpty()) { return false; } + Iterator ei = erasures.iterator(); + changed = true; + + /* + * p, q are pointers into the list. We copy elements from list[q] to + * list[p], for p { + @Override + public int length() { return list.size(); }; + + @Override + public PyType getType() { return PyList.this.getType(); } + + @Override + public Object getItem(int i) { return list.get(i); } + + @Override + public Object get(int i) { return list.get(i); } + + @Override + public PyList getSlice(Indices slice) throws Throwable { + PyList v; + if (slice.step == 1) + v = new PyList(list.subList(slice.start, slice.stop)); + else { + v = new PyList(TYPE, slice.slicelength); + int i = slice.start; + for (int j = 0; j < slice.slicelength; j++) { + v.add(list.get(i)); + i += slice.step; + } + } + return v; + } + + @Override + public void setItem(int i, Object value) throws Throwable { list.set(i, value); } + + @Override + public void setSlice(PySlice.Indices slice, Object value) throws Throwable { + /* + * Accept iterables (and iterators) by creating a Java List Jython 2 + * would also try __tojava__. Necessary? + */ + List v = PySequence.fastList(value, + () -> new TypeError("can only assign an iterable to a slice")); + if (v == PyList.this) { v = new ArrayList<>(list); } // self[slice] = self + + // Now we have a List of values to assign + final int N = v.size(), M = slice.slicelength, D = N - M; + final int step = slice.step; + ; + if (D != 0) { + if (step == 1) { + // Contiguous slice requiring change of size + final int start = slice.start, stop = slice.stop; + if (D < 0) { + // v shorter by (-D) than slice it replaces. + // Discard (-D) elements [stop-(-D):stop] + list.subList(stop + D, stop).clear(); + // Copy N elements + Collections.copy(list.subList(start, start + N), v); + } else { + // v is longer by D than slice it replaces. + // Copy M elements to [start:stop-D] + Collections.copy(list.subList(start, start + M), v.subList(0, M)); + // Insert the last D elements of v at the slice stop. + list.addAll(stop, v.subList(N - D, N)); + } + } else { + // Extended slice, but not the same size as the value + throw new ValueError( + "attempt to assign sequence of size %d to extended slice of size %d", N, + M); + } + } else { + // Extended or contiguous slice of just the right size + for (int i = 0, k = slice.start; i < M; i++, k += step) { list.set(k, v.get(i)); } + } + } + + @Override + public void delItem(int i) throws Throwable { + list.remove(i); + } + + @Override + public void delSlice(PySlice.Indices slice) throws Throwable { + final int M = slice.slicelength; + if (M > 0) { + final int step = slice.step; + /* + * We will step through the list removing items. If we are stepping + * backwards, the increments will be slice.step (negative), but if + * stepping forwards, slice.step-1, because the items to right will + * have moved one closer, due to the deletion. (There are faster + * ways, but how often will it matter?) + */ + final int inc = step > 0 ? step - 1 : step; + if (inc == 0) { + // Contiguous slice + list.subList(slice.start, slice.stop).clear(); + } else { + // Extended slice + for (int i = 0, k = slice.start; i < M; i++, k += inc) { list.remove(k); } + } + } + } + + @Override + Object add(Object ow) throws NoConversion { + // We accept any sort of Java list as w except tuple + if (ow instanceof List && !(ow instanceof PyTuple)) { + return PyList.concat(list, (List)ow); + } else { + return Py.NotImplemented; + } + } + + @Override + Object radd(Object ov) throws NoConversion { + // We accept any sort of Java list as v except tuple + if (ov instanceof List && !(ov instanceof PyTuple)) { + return PyList.concat((List)ov, list); + } else { + return Py.NotImplemented; + } + } + + @Override + PyList repeat(int n) { + ArrayList u = new ArrayList<>(n * list.size()); + for (int i = 0; i < n; i++) { u.addAll(list); } + return new PyList(TYPE, u); + } + + /** + * {@inheritDoc} + *

+ * The iterator returned for the {@code ListDelegate} is + * specifically a {@code ListIterator}. + */ + @Override + public ListIterator iterator() { return list.listIterator(); } + + /** + * Implementation of the {@code __contains__} method of sequences. + * Determine whether the sequence contains an element equal to the + * argument. + * + * @param v value to match in the client + * @return whether found + * @throws Throwable from the implementation of {@code __eq__} + */ + // XXX Could this be supplied by PySequence.Delegate? + public boolean __contains__(Object v) throws Throwable { + // XXX What about changes to content during iteration? + for (Object item : this) { + if (Abstract.richCompareBool(v, item, Comparison.EQ)) { return true; } + } + return false; + } + + /** + * Implementation of the {@code count} method of sequences. + * Determine the number of times the sequence contains an element + * equal to the argument. + * + * @param v value to match in the client + * @return the number of times found + * @throws Throwable from the implementation of {@code __eq__} + */ + // XXX Could this be supplied by PySequence.Delegate? + public int count(Object v) throws Throwable { + int count = 0; + // XXX What about changes to content during iteration? + for (Object item : this) { + if (Abstract.richCompareBool(v, item, Comparison.EQ)) { count++; } + } + return count; + } + + /** + * Implementation of the {@code insert} method of sequences. + * + * @param index position to insert + * @param v value to insert + * @return the number of times found + * @throws TypeError from bad {@code index} type + * @throws Throwable from other conversion errors + */ + public void insert(Object index, Object v) throws TypeError, Throwable { + list.add(boundedIndex(index), v); + } + + /** + * Accept an object index, treating negative values as end-relative, + * and bound it to the sequence range. The index object must be + * convertible by + * {@link PyNumber#asSize(Object, java.util.function.Function) + * PyNumber.asSize}. It is not an error for the index value to fall + * outside the valid range. (It is simply clipped to the nearer + * end.) + * + * @param index purported index (not {@code null}) + * @return converted index + * @throws TypeError from bad {@code index} type + * @throws Throwable from other conversion errors + */ + protected int boundedIndex(Object index) throws TypeError, Throwable { + + // Convert the argument (or raise a TypeError) + int i, L = length(); + if (PyNumber.indexCheck(index)) { + i = PyNumber.asSize(index, IndexError::new); + } else { + throw Abstract.indexTypeError(this, index); + } + + // Bound the now integer index to the sequence (or L) + return i < 0 ? Math.max(0, i + L) : Math.min(L, i); + } + + @Override + public int compareTo(PySequence.Delegate other) { + try { + int N = list.size(), M = other.length(), i; + + for (i = 0; i < N; i++) { + Object a = list.get(i); + if (i < M) { + Object b = other.getItem(i); + // if a != b, then we've found an answer + if (!Abstract.richCompareBool(a, b, Comparison.EQ)) + return Abstract.richCompareBool(a, b, Comparison.GT) ? 1 : -1; + } else + // list has not run out, but other has. We win. + return 1; + } + + /* + * The lists matched over the length of Pylist.this.list. The other + * is the winner if it still has elements. Otherwise it's a tie. + */ + return i < M ? -1 : 0; + } catch (PyException e) { + // It's ok to throw legitimate Python exceptions + throw e; + } catch (Throwable t) { + /* + * Contract of Comparable prohibits propagation of checked + * exceptions, but richCompareBool in principle throws anything. + */ + // XXX perhaps need a PyException to wrap Java Throwable + throw new InterpreterError(t, "non-Python exeption in comparison"); + } + } + + /** + * Compare this delegate with the delegate of the other {@code list} + * for equality. We do this separately from + * {@link #cmp(Object, Comparison)} because it is slightly cheaper, + * but also because so we don't panic where an element is capable of + * an equality test, but not a less-than test. + * + * @param other delegate of list at right of comparison + * @return {@code true} if equal, {@code false} if not. + */ + private boolean compareEQ(PySequence.Delegate other) { + try { + if (other.length() != list.size()) { return false; } + Iterator i = list.iterator(); + for (Object b : other) { + Object a = i.next(); + // if a != b, then we've found an answer + if (!Abstract.richCompareBool(a, b, Comparison.EQ)) + return false; + } + // The arrays matched over their length. + return true; + } catch (PyException e) { + // It's ok to throw legitimate Python exceptions + throw e; + } catch (Throwable t) { + throw new InterpreterError(t, "non-Python exeption in comparison"); + } + } + + /** + * Compare this delegate with the delegate of the other + * {@code list}, or return {@code NotImplemented} if the other is + * not a {@code list}. + * + * @param other list at right of comparison + * @param op type of operation + * @return boolean result or {@code NotImplemented} + */ + private Object cmp(Object other, Comparison op) { + if (other instanceof PyList) { + // A Python list is comparable only with another list + ListDelegate o = ((PyList)other).delegate; + if (op == Comparison.EQ) { + return compareEQ(o); + } else if (op == Comparison.NE) { + return !compareEQ(o); + } else { + return op.toBool(delegate.compareTo(o)); + } + } else { + return Py.NotImplemented; + } + } + } + + /** Concatenate two lists (for {@code ListDelegate}). */ + private static PyList concat(List v, List w) { + int n = v.size(), m = w.size(); + PyList u = new PyList(TYPE, n + m); + u.addAll(v); + u.addAll(w); + return u; + } +} diff --git a/core/src/main/java/org/python/core/PyLong.java b/core/src/main/java/org/python/core/PyLong.java new file mode 100644 index 000000000..319959c84 --- /dev/null +++ b/core/src/main/java/org/python/core/PyLong.java @@ -0,0 +1,819 @@ +// Copyright (c) Corporation for National Research Initiatives +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static java.math.BigInteger.ONE; +import static java.math.BigInteger.ZERO; + +import java.io.Serializable; +import java.lang.invoke.MethodHandles; +import java.math.BigInteger; +import java.util.Map; + +import org.python.base.InterpreterError; +import org.python.base.MissingFeature; +import org.python.core.PyObjectUtil.NoConversion; +import org.python.core.Slot.EmptyException; +import org.python.core.stringlib.FloatFormatter; +import org.python.core.stringlib.IntegerFormatter; +import org.python.core.stringlib.InternalFormat; +import org.python.core.stringlib.InternalFormat.FormatError; +import org.python.core.stringlib.InternalFormat.FormatOverflow; +import org.python.core.stringlib.InternalFormat.AbstractFormatter; +import org.python.core.stringlib.InternalFormat.Spec; + +/** + * A Python {@code int} object may be represented by a + * {@code java.lang.Integer} or a {@code java.math.BigInteger}. An + * instance of a Python sub-class of {@code int}, must be + * represented by an instance of a Java sub-class of) this class. + */ +// @Untraversable +// @ExposedType(name = "long", doc = BuiltinDocs.long_doc) +public class PyLong extends AbstractPyObject implements PyDict.Key { + + /** The type {@code int}. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("int", MethodHandles.lookup()) // + .adopt(BigInteger.class, Integer.class) // + .accept(Boolean.class) // + .methods(PyLongMethods.class)); + + /** The minimum Java {@code int} as a {@code BigInteger}. */ + static final BigInteger MIN_INT = BigInteger.valueOf(Integer.MIN_VALUE); + /** The maximum Java {@code int} as a {@code BigInteger}. */ + static final BigInteger MAX_INT = BigInteger.valueOf(Integer.MAX_VALUE); + /** The minimum Java {@code long} as a {@code BigInteger}. */ + static final BigInteger MIN_LONG = BigInteger.valueOf(Long.MIN_VALUE); + /** The maximum Java {@code long} as a {@code BigInteger}. */ + static final BigInteger MAX_LONG = BigInteger.valueOf(Long.MAX_VALUE); + + /** + * The value of this Python {@code int} (in sub-class instances). + */ + // Has to be package visible for method implementations. + final BigInteger value; + + /** + * The value of this Python {@code int} (in sub-class instances). + * + * @return value + */ + public BigInteger getValue() { return value; } + + /** + * Constructor for Python sub-class specifying {@link #type}. + * + * @param subType actual Python sub-class being created + * @param v of the {@code int} + */ + PyLong(PyType subType, BigInteger v) { + super(subType); + value = v; + } + + // Instance methods on PyLong ------------------------------------- + + @Override + public String toString() { return Py.defaultToString(this); } + + @Override + public boolean equals(Object obj) { + return PyDict.pythonEquals(this, obj); + } + + @Override + public int hashCode() throws PyException { + // XXX or return value.hashCode() if not a sub-class? + return PyDict.pythonHash(this); + } + + // Constructor from Python ---------------------------------------- + + @SuppressWarnings("fallthrough") + static Object __new__(PyType subType, Object[] args, String[] kwnames) throws Throwable { + Object x = null, obase = null; + int argsLen = args.length; + switch (argsLen) { + case 2: + obase = args[1]; // fall through + case 1: + x = args[0]; // fall through + case 0: + break; + default: + throw new TypeError("int() takes at most %d arguments (%d given)", 2, argsLen); + } + return __new__impl(subType, x, obase); + } + + /** + * Implementation of {@code __new__} with classic arguments + * unpacked. + * + * @param subType actual sub-type of int to produce + * @param x {@code int}-like or {@code str}-like value or + * {@code null}. + * @param obase number base ({@code x} must be {@code str}-like) + * @return an {@code int} or sub-class with the right value + * @throws Throwable on argument type or other errors + */ + private static Object __new__impl(PyType subType, Object x, Object obase) throws Throwable { + + if (subType != TYPE) { return longSubtypeNew(subType, x, obase); } + + if (x == null) { + // Zero-arg int() ... unless invalidly like int(base=10) + if (obase != null) { throw new TypeError("int() missing string argument"); } + return 0; + } + + if (obase == null) + return PyNumber.asLong(x); + else { + int base = PyNumber.asSize(obase, null); + if (base != 0 && (base < 2 || base > 36)) + throw new ValueError("int() base must be >= 2 and <= 36, or 0"); + else if (PyUnicode.TYPE.check(x)) + return PyLong.fromUnicode(x, base); + // else if ... support for bytes-like objects + else + throw new TypeError(NON_STR_EXPLICIT_BASE); + } + } + + private static final String NON_STR_EXPLICIT_BASE = + "int() can't convert non-string with explicit base"; + + /** + * Wimpy, slow approach to {@code __new__} calls for sub-types of + * {@code int}, that will temporarily create a regular {@code int} + * from the arguments. + * + * @throws Throwable on argument type or other errors + */ + private static Object longSubtypeNew(PyType subType, Object x, Object obase) throws Throwable { + // Create a regular int from whatever arguments we got. + Object v = __new__impl(TYPE, x, obase); + // create a sub-type instance from the value in tmp + return new PyLong.Derived(subType, PyLong.asBigInteger(v)); + } + + // Representations of the value ----------------------------------- + + /** + * Present the value as a Java {@code int} when the argument is + * expected to be a Python {@code int} or a sub-class of it. + * + * @param v claimed {@code int} + * @return {@code int} value + * @throws TypeError if {@code v} is not a Python {@code int} + * @throws OverflowError if {@code v} is out of Java range + */ + static int asInt(Object v) throws TypeError, OverflowError { + try { + return convertToInt(v); + } catch (NoConversion nc) { + throw Abstract.requiredTypeError("an integer", v); + } + } + + /** + * Present the value as a Java {@code int} when the argument is + * expected to be a Python {@code int} or a sub-class of it. + * + * @param v claimed {@code int} + * @return {@code int} value + * @throws TypeError if {@code v} is not a Python {@code int} + * @throws OverflowError if {@code v} is out of Java range + */ + static int asSize(Object v) throws TypeError, OverflowError { return asInt(v); } + + /** + * Present the value as a Java {@code BigInteger} when the argument + * is expected to be a Python {@code int} or a sub-class of it. + * + * @param v claimed {@code int} + * @return {@code BigInteger} value + * @throws TypeError if {@code v} is not a Python {@code int} + */ + static BigInteger asBigInteger(Object v) throws TypeError { + try { + return convertToBigInteger(v); + } catch (NoConversion nc) { + throw Abstract.requiredTypeError("an integer", v); + } + } + + /** + * Value as a Java {@code double} using the round-half-to-even rule. + * + * @param v to convert + * @return nearest double + * @throws OverflowError if out of double range + */ + // Compare CPython longobject.c: PyLong_AsDouble + static double asDouble(Object v) { + try { + return convertToDouble(v); + } catch (NoConversion nc) { + throw Abstract.requiredTypeError("an integer", v); + } + } + + static int signum(Object v) throws TypeError { + if (v instanceof BigInteger) + return ((BigInteger)v).signum(); + else if (v instanceof Integer) + return Integer.signum((Integer)v); + else if (v instanceof PyLong) + return ((PyLong)v).value.signum(); + else if (v instanceof Boolean) + return (Boolean)v ? 1 : 0; + else + throw Abstract.requiredTypeError("an integer", v); + } + + // Factories ------------------------------------------------------ + + /* + * These methods create Python int from other Python objects, or + * from specific Java types. The methods make use of special methods + * on the argument and produce Python exceptions when that goes + * wrong. Note that they never produce a PyLong, but always Java + * Integer or BigInteger. The often correspond to CPython public or + * internal API. + */ + /** + * Convert the given object to a Python {@code int} using the + * {@code op_int} slot, if available. Raise {@code TypeError} if + * either the {@code op_int} slot is not available or the result of + * the call to {@code op_int} returns something not of type + * {@code int}. + *

+ * The return is not always exactly an {@code int}. + * {@code integral.__int__}, which this method wraps, may return any + * type: Python sub-classes of {@code int} are tolerated, but with a + * deprecation warning. Returns not even a sub-class type + * {@code int} raise {@link TypeError}. + * + * @param integral to convert to {@code int} + * @return integer value of argument + * @throws TypeError if {@code integral} seems not to be + * @throws Throwable from the supporting implementation + */ + // Compare CPython longobject.c::_PyLong_FromNbInt + static Object fromIntOf(Object integral) throws TypeError, Throwable { + Operations ops = Operations.of(integral); + + if (ops.isIntExact()) { + // Fast path for the case that we already have an int. + return integral; + } + + else + try { + /* + * Convert using the op_int slot, which should return something of + * exact type int. + */ + Object r = ops.op_int.invokeExact(integral); + if (PyLong.TYPE.checkExact(r)) { + return r; + } else if (PyLong.TYPE.check(r)) { + // Result not of exact type int but is a subclass + Abstract.returnDeprecation("__int__", "int", r); + return r; + } else + throw Abstract.returnTypeError("__int__", "int", r); + } catch (EmptyException e) { + // __int__ is not defined for t + throw Abstract.requiredTypeError("an integer", integral); + } + } + + /** + * Convert the given object to a {@code int} using the + * {@code __index__} or {@code __int__} special methods, if + * available (the latter is deprecated). + *

+ * The return is not always exactly an {@code int}. + * {@code integral.__index__} or {@code integral.__int__}, which + * this method wraps, may return any type: Python sub-classes of + * {@code int} are tolerated, but with a deprecation warning. + * Returns not even a sub-class type {@code int} raise + * {@link TypeError}. This method should be replaced with + * {@link PyNumber#index(Object)} after the end of the deprecation + * period. + * + * @param integral to convert to {@code int} + * @return integer value of argument + * @throws TypeError if {@code integral} seems not to be + * @throws Throwable from the supporting implementation + */ + // Compare CPython longobject.c :: _PyLong_FromNbIndexOrNbInt + static Object fromIndexOrIntOf(Object integral) throws TypeError, Throwable { + Operations ops = Operations.of(integral); + ; + + if (ops.isIntExact()) + // Fast path for the case that we already have an int. + return integral; + + try { + // Normally, the op_index slot will do the job + Object r = ops.op_index.invokeExact(integral); + if (Operations.of(r).isIntExact()) + return r; + else if (PyLong.TYPE.check(r)) { + // 'result' not of exact type int but is a subclass + Abstract.returnDeprecation("__index__", "int", r); + return r; + } else + throw Abstract.returnTypeError("__index__", "int", r); + } catch (EmptyException e) {} + + // We're here because op_index was empty. Try op_int. + if (Slot.op_int.isDefinedFor(ops)) { + Object r = fromIntOf(integral); + // ... but grumble about it. + // Warnings.format(DeprecationWarning.TYPE, 1, + // "an integer is required (got type %.200s). " + // + "Implicit conversion to integers " + // + "using __int__ is deprecated, and may be " + // + "removed in a future version of Python.", + // ops.type(integral).name); + return r; + } else + throw Abstract.requiredTypeError("an integer", integral); + } + + /** + * Convert a sequence of Unicode digits in the string u to a Python + * integer value. + * + * @param u string to convert + * @param base in which to interpret it + * @return converted value + * @throws ValueError if {@code u} is an invalid literal + * @throws TypeError if {@code u} is not a Python {@code str} + */ + // Compare CPython longobject.c :: PyLong_FromUnicodeObject + static BigInteger fromUnicode(Object u, int base) throws ValueError, TypeError { + try { + // XXX maybe check 2<=base<=36 even if Number.asLong does? + // XXX Should allow for only string types, but for now ... + String value = u.toString(); + // String value = PyUnicode.asString(u); + return new BigInteger(value, base); + } catch (NumberFormatException e) { + throw new ValueError("invalid literal for int() with base %d: %.200s", base, u); + } + } + + /** + * Return a Python {@code int} from a Python {@code int} or + * subclass. If the value has exactly Python type {@code int} return + * it, otherwise construct a new instance of exactly {@code int} + * type. + * + * @param value to represent + * @return the same value as exactly {@code int} + * @throws TypeError if not a Python {@code int} or sub-class + */ + // Compare CPython longobject.c :: long_long + static Object from(Object value) throws TypeError { + Operations ops = Operations.of(value); + if (ops.isIntExact()) + return value; + else if (value instanceof PyLong) + return ((PyLong)value).value; + else + throw Abstract.requiredTypeError("an integer", value); + } + + /** + * Create a Python {@code int} from a Java {@code double}. + * + * @param value to convert + * @return BigInteger equivalent. + * @throws OverflowError when {@code value} is a floating infinity + * @throws ValueError when {@code value} is a floating NaN + */ + // Compare CPython longobject.c :: PyLong_FromDouble + static BigInteger fromDouble(double value) { + // XXX Maybe return Object and Integer if possible + return PyFloat.bigIntegerFromDouble(value); + } + + // ---------------------------------------------------------------- + + public long getLong(long min, long max) throws OverflowError { + return getLong(min, max, "long int too large to convert"); + } + + public long getLong(long min, long max, String overflowMsg) throws OverflowError { + if (getValue().compareTo(MAX_LONG) <= 0 && getValue().compareTo(MIN_LONG) >= 0) { + long v = getValue().longValue(); + if (v >= min && v <= max) { return v; } + } + throw new OverflowError(overflowMsg); + } + + public long asLong() { + return getLong(Long.MIN_VALUE, Long.MAX_VALUE, "long too big to convert"); + } + + public Object __tojava__(Class c) { + // XXX something like this necessary in Jython 3 but what? + try { + if (c == Boolean.TYPE || c == Boolean.class) { + return Boolean.valueOf(!getValue().equals(BigInteger.ZERO)); + } + if (c == Byte.TYPE || c == Byte.class) { + return Byte.valueOf((byte)getLong(Byte.MIN_VALUE, Byte.MAX_VALUE)); + } + if (c == Short.TYPE || c == Short.class) { + return Short.valueOf((short)getLong(Short.MIN_VALUE, Short.MAX_VALUE)); + } + if (c == Integer.TYPE || c == Integer.class) { + return Integer.valueOf((int)getLong(Integer.MIN_VALUE, Integer.MAX_VALUE)); + } + if (c == Long.TYPE || c == Long.class) { + return Long.valueOf(getLong(Long.MIN_VALUE, Long.MAX_VALUE)); + } + if (c == Float.TYPE || c == Double.TYPE || c == Float.class || c == Double.class) { + return asDouble(this); + } + if (c == BigInteger.class || c == Number.class || c == Object.class + || c == Serializable.class) { + return getValue(); + } + } catch (OverflowError e) { + throw new InternalError(e); + } + throw new MissingFeature("default __tojava__ behaviour for %s", c.getSimpleName()); + } + + // XXX __coerce__ and __coerce_ex not needed in Jython 3 ? + + // special methods ------------------------------------------------ + + @SuppressWarnings("unused") + private static Object __repr__(Object self) { + assert TYPE.check(self); + return asBigInteger(self).toString(); + } + + // __str__: let object.__str__ handle it (by calling __repr__) + + // Methods -------------------------------------------------------- + // Expose to Python when mechanisms are available + + // @ExposedGet(name = "real", doc = BuiltinDocs.long_real_doc) + public static Object getReal(Object self) { return self; } + + // @ExposedGet(name = "imag", doc = BuiltinDocs.long_imag_doc) + public static Object getImag(Object self) { return 0; } + + // @ExposedGet(name = "numerator", doc = + // BuiltinDocs.long_numerator_doc) + public static Object getNumerator(Object self) { return self; } + + // @ExposedGet(name = "denominator", doc = + // BuiltinDocs.long_denominator_doc) + public static Object getDenominator(Object self) { return 1; } + + // @ExposedMethod(doc = BuiltinDocs.long___trunc___doc) + static Object __trunc__(Object self) { return from(self); } + + // @ExposedMethod(doc = BuiltinDocs.long_conjugate_doc) + static Object conjugate(Object self) { return from(self); } + + // @ExposedMethod(doc = BuiltinDocs.long___getnewargs___doc) + static PyTuple __getnewargs__(Object self) { + assert TYPE.check(self); + try { + return new PyTuple(convertToBigInteger(self)); + } catch (NoConversion e) { + throw impossible(self); + } + } + + // @ExposedMethod(doc = BuiltinDocs.long_bit_length_doc) + static int bit_length(Object self) { + try { + BigInteger v = convertToBigInteger(self); + if (v.signum() == -1) { v = v.negate(); } + return v.bitLength(); + } catch (NoConversion e) { + throw impossible(self); + } + } + + // @ExposedMethod(doc = BuiltinDocs.long___format___doc) + static final Object __format__(Object self, Object formatSpec) { + + String stringFormatSpec = PyUnicode.coerceToString(formatSpec, + () -> Abstract.argumentTypeError("__format__", + "specification", "str", formatSpec)); + + try { + // Parse the specification + Spec spec = InternalFormat.fromText(stringFormatSpec); + + // Get a formatter for the specification + AbstractFormatter f; + if ("efgEFG%".indexOf(spec.type) >= 0) { + // These are floating-point formats + f = new PyFloat.Formatter(spec); + } else { + f = new PyLong.Formatter(spec); + } + + /* + * Format, pad and return a result according to as the + * specification argument. + */ + return f.format(self).pad().getResult(); + + } catch (FormatOverflow fe) { + throw new OverflowError(fe.getMessage()); + } catch (FormatError fe) { + throw new ValueError(fe.getMessage()); + } catch (NoConversion e) { + throw Abstract.impossibleArgumentError(TYPE.name, self); + } + } + + // Python sub-class ----------------------------------------------- + + /** + * Instances in Python of sub-classes of 'int', are represented in + * Java by instances of this class. + */ + static class Derived extends PyLong implements DictPyObject { + + protected Derived(PyType subType, BigInteger value) { super(subType, value); } + + // /** The instance dictionary {@code __dict__}. */ + // protected PyDict dict = new PyDict(); + + @Override + public Map getDict() { return null; } + } + + // formatter ------------------------------------------------------ + + /** + * An {@link IntegerFormatter}, constructed from a {@link Spec}, + * with validations customised for {@code int.__format__}. + */ + private static class Formatter extends IntegerFormatter { + + /** + * Prepare an {@link IntegerFormatter} in support of + * {@link PyLong#__format__(Object, Object) int.__format__}. + * + * @param spec a parsed PEP-3101 format specification. + * @return a formatter ready to use. + * @throws FormatOverflow if a value is out of range (including + * the precision) + * @throws FormatError if an unsupported format character is + * encountered + */ + Formatter(Spec spec) throws FormatError { + super(validated(spec)); + } + + /** + * Validations and defaults specific to {@code int.__format__}. + * (Note that {@code int.__mod__} has slightly different rules.) + * + * @param spec to validate + * @return validated spec with defaults filled + * @throws FormatError on failure to validate + */ + private static Spec validated(Spec spec) throws FormatError { + String type = TYPE.name; + switch (spec.type) { + + case 'c': + // Character data: specific prohibitions. + if (Spec.specified(spec.sign)) { + throw signNotAllowed("integer", spec.type); + } else if (spec.alternate) { + throw alternateFormNotAllowed("integer", + spec.type); + } + //$FALL-THROUGH$ + + case 'x': + case 'X': + case 'o': + case 'b': + case 'n': + if (spec.grouping) { + throw notAllowed("Grouping", ',', "integer", + spec.type); + } + //$FALL-THROUGH$ + + case Spec.NONE: + case 'd': + // Check for disallowed parts of the specification + if (Spec.specified(spec.precision)) { + throw precisionNotAllowed("integer"); + } + break; + + default: + // The type code was not recognised + throw unknownFormat(spec.type, type); + } + + /* + * spec may be incomplete. The defaults are those commonly + * used for numeric formats. + */ + return spec.withDefaults(Spec.NUMERIC); + } + + @Override + public IntegerFormatter format(Object o) + throws NoConversion, FormatError { + return format(convertToBigInteger(o)); + } + } + + // plumbing ------------------------------------------------------- + + // Convert from int (core use) ------------------------------------ + + /* + * These methods are for use internal to the core, in the + * implementation of special functions: they may throw NoConversion + * of failure, which must be caught by those implementations. They + * convert a Python int, or a specific Java implementation of int, + * to a specific Java type. + */ + + /** + * Convert an {@code int} to a Java {@code double} (or throw + * {@link NoConversion}), using the round-half-to-even rule. + * Conversion to a {@code double} may overflow, raising an exception + * that is propagated to the caller. + *

+ * If the method throws the special exception {@link NoConversion}, + * the caller must deal with it by throwing an appropriate Python + * exception or taking an alternative course of action. Binary + * operations will normally return {@link Py#NotImplemented} in + * response. + * + * @param v to convert + * @return converted to {@code double} + * @throws NoConversion v is not an {@code int} + * @throws OverflowError v is too large to be a {@code float} + */ + // Compare CPython longobject.c: PyLong_AsDouble + static double convertToDouble(Object v) throws NoConversion, OverflowError { + // Check against supported types, most likely first + if (v instanceof Integer) + // No loss of precision + return ((Integer)v).doubleValue(); + else if (v instanceof BigInteger) + // Round half-to-even + return convertToDouble((BigInteger)v); + else if (v instanceof PyLong) + // Round half-to-even + return convertToDouble(((PyLong)v).value); + else if (v instanceof Boolean) + return (Boolean)v ? 1.0 : 0.0; + throw PyObjectUtil.NO_CONVERSION; + } + + /** + * Convert a {@code BigInteger} to a Java double , using the + * round-half-to-even rule. Conversion to a double may overflow, + * raising an exception that is propagated to the caller. + * + * @param v to convert + * @return converted to {@code double} + * @throws OverflowError if too large to be a {@code float} + */ + static double convertToDouble(BigInteger v) throws OverflowError { + /* + * According to the code, BigInteger.doubleValue() rounds + * half-to-even as required. This differs from conversion from long + * which rounds to nearest (JLS 3.0 5.1.2). + */ + double vv = v.doubleValue(); + // On overflow, doubleValue returns ±∞ rather than throwing. + if (Double.isInfinite(vv)) + throw tooLarge("Python int", "float"); + else + return vv; + } + + /** + * Convert a Python {@code int} to a Java {@code int} (or throw + * {@link NoConversion}). Conversion to an {@code int} may overflow, + * raising an exception that is propagated to the caller. + *

+ * If the method throws the special exception {@link NoConversion}, + * the caller must deal with it by throwing an appropriate Python + * exception or taking an alternative course of action. + * + * @param v to convert + * @return converted to {@code int} + * @throws NoConversion v is not an {@code int} + * @throws OverflowError v is too large to be a Java {@code int} + */ + // Compare CPython longobject.c: PyLong_AsSsize_t + static int convertToInt(Object v) throws NoConversion, OverflowError { + // Check against supported types, most likely first + if (v instanceof Integer) + return ((Integer)v).intValue(); + else if (v instanceof BigInteger) + return convertToInt((BigInteger)v); + else if (v instanceof PyLong) + return convertToInt(((PyLong)v).value); + else if (v instanceof Boolean) + return (Boolean)v ? 1 : 0; + throw PyObjectUtil.NO_CONVERSION; + } + + /** + * Convert a {@code BigInteger} to a Java {@code int}. Conversion to + * an {@code int} may overflow, raising an exception that is + * propagated to the caller. + * + * @param v to convert + * @return converted to {@code int} + * @throws OverflowError if too large to be a Java {@code int} + */ + static int convertToInt(BigInteger v) throws OverflowError { + if (v.bitLength() < 32) + return v.intValue(); + else + throw tooLarge("Python int", "int"); + } + + /** + * Convert a Python {@code int} to a Java {@code BigInteger} (or + * throw {@link NoConversion}). Conversion may raise an exception + * that is propagated to the caller. If the Java type of the + * {@code int} is declared, generally there is a better option than + * this method. We only use it for {@code Object} arguments. + *

+ * If the method throws the special exception {@link NoConversion}, + * the caller must deal with it by throwing an appropriate Python + * exception or taking an alternative course of action. + * + * @param v claimed {@code int} + * @return converted to {@code BigInteger} + * @throws NoConversion if {@code v} is not a Python {@code int} + */ + static BigInteger convertToBigInteger(Object v) throws NoConversion { + if (v instanceof BigInteger) + return (BigInteger)v; + else if (v instanceof Integer) + return BigInteger.valueOf(((Integer)v).longValue()); + else if (v instanceof PyLong) + return ((PyLong)v).value; + else if (v instanceof Boolean) + return (Boolean)v ? ONE : ZERO; + throw PyObjectUtil.NO_CONVERSION; + } + + /** + * Create an OverflowError with a message along the lines "X too + * large to convert to Y", where X is {@code from} and Y is + * {@code to}. + * + * @param from description of type to convert from + * @param to description of type to convert to + * @return an {@link OverflowError} with that message + */ + static OverflowError tooLarge(String from, String to) { + String msg = String.format(TOO_LARGE, from, to); + return new OverflowError(msg); + } + + private static final String TOO_LARGE = "%s too large to convert to %s"; + + /** + * We received an argument that should be impossible in a correct + * interpreter. We use this when conversion of an + * {@code Object self} argument may theoretically fail, but we know + * that we should only reach that point by paths that guarantee + * {@code self`} to be some kind on {@code int}. + * + * @param o actual argument + * @return exception to throw + */ + private static InterpreterError impossible(Object o) { + return Abstract.impossibleArgumentError("int", o); + } +} diff --git a/core/src/main/java/org/python/core/PyMapping.java b/core/src/main/java/org/python/core/PyMapping.java new file mode 100644 index 000000000..4fdad03b4 --- /dev/null +++ b/core/src/main/java/org/python/core/PyMapping.java @@ -0,0 +1,33 @@ +package org.python.core; + +import java.util.Map; + +import org.python.base.MissingFeature; + +/** + * Abstract API for operations on mapping types, corresponding to + * CPython methods defined in {@code abstract.h} and with names + * like: {@code PyMapping_*}. + */ +public class PyMapping extends PySequence { + + protected PyMapping() {} // only static methods here + + /** + * Return the mapping object {@code o} as a Java {@code Map}. If + * {@code o} is one of several built-in types that implement Java + * {@code Map}, this will be the object itself. + * Otherwise, it will be an adapter on the provided object. + * + * @param o to present as a map + * @return the map + */ + static Map map(Object o) { + if (PyDict.TYPE.check(o)) { + return (PyDict)o; + } else { + // return new MapWrapper(o); + throw new MissingFeature("Non-dict wrapped as mapping"); + } + } +} diff --git a/core/src/main/java/org/python/core/PyMemberDescr.java b/core/src/main/java/org/python/core/PyMemberDescr.java new file mode 100644 index 000000000..f899f7c47 --- /dev/null +++ b/core/src/main/java/org/python/core/PyMemberDescr.java @@ -0,0 +1,430 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.invoke.VarHandle; +import java.lang.reflect.Field; +import java.util.EnumSet; + +import org.python.base.InterpreterError; + +/** + * Descriptor for an attribute that has been defined (by a + * {@code @Member} annotations) to get and optionally set or delete + * the value, with default type conversions. + */ +abstract class PyMemberDescr extends DataDescriptor { + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("member_descriptor", MethodHandles.lookup()) + .flagNot(PyType.Flag.BASETYPE)); + + /** Acceptable values in the {@link #flags}. */ + enum Flag { + READONLY, OPTIONAL, READ_RESTRICTED, WRITE_RESTRICTED + } + + /** + * Attributes controlling access and audit. (In CPython, the + * RESTRICTED forms cause a call to {@code sys.audit} and are here + * for compatibility with that eventual idea.) Note that + * {@code OPTIONAL}, although in the API, is represented as a + * sub-class attribute, to minimise work in + * {@link #checkSet(Object)} and {@link #checkDelete(Object)} + */ + protected final EnumSet flags; + + /** Reference to the field (offset) to access. */ + // CPython PyMemberDef: int type; int offset; + VarHandle handle; + + /** Documentation string for the member (or {@code null}). */ + String doc; + + /** + * Construct a {@code PyMemberDescr} from a client-supplied handle. + * This allows all JVM-supported access modes, but you have to make + * your own handle. + * + * @param objclass Python type containing this member + * @param name by which the member is known to Python + * @param handle to the Java member + * @param flags characteristics controlling access + * @param doc documentation string + */ + PyMemberDescr(PyType objclass, String name, VarHandle handle, EnumSet flags, String doc) { + super(TYPE, objclass, name); + this.flags = flags; + this.handle = handle; + // Allow null to represent empty doc + this.doc = doc != null && doc.length() > 0 ? doc : null; + } + + private static VarHandle varHandle(Field f, Lookup lookup) { + try { + return lookup.unreflectVarHandle(f); + } catch (IllegalAccessException e) { + throw new InterpreterError(e, "cannot get handle for '%s'", f); + } + + } + + /** + * {@inheritDoc} + * + * @implNote Compare CPython {@code descr_check} in + * {@code descrobject.c}. We differ in that: (1) We throw + * directly on failure. (2) The condition {@code obj==null} + * (when found on a type) is the caller's job. (3) We fold the + * {@code sys.audit} call into this check. + */ + @Override + protected void check(Object obj) throws TypeError { + PyType objType = PyType.of(obj); + if (!objType.isSubTypeOf(objclass)) { + throw new TypeError(DESCRIPTOR_DOESNT_APPLY, name, objclass.name, objType.name); + /* + * It is not sufficient to skip the test and catch the class cast + * from VarHandle.get, because the wrong obj class is not + * necessarily the wrong Java class. + */ + } else if (flags.contains(Flag.READ_RESTRICTED)) { + // Sys.audit("object.__getattr__", "Os", + // obj != null ? obj : Py.None, name); + } + } + + /** + * {@code descr.__set__(obj, value)} has been called on this + * descriptor. We must check that the descriptor applies to the type + * of object supplied as the {@code obj} argument. From Python, + * anything could be presented, but when we operate on it, we'll be + * assuming the particular {@link #objclass} type. + * + * @param obj target object (argument to {@code __set__}) + * @throws TypeError if descriptor doesn't apply to {@code obj} + */ + // Compare CPython descr_setcheck in descrobject.c + @Override + protected void checkSet(Object obj) throws TypeError { + PyType objType = PyType.of(obj); + if (!objType.isSubTypeOf(objclass)) { + throw new TypeError(DESCRIPTOR_DOESNT_APPLY, name, objclass.name, objType.name); + } + if (!flags.isEmpty()) { + if (flags.contains(Flag.READONLY)) { + throw Abstract.readonlyAttributeOnType(objclass, name); + } else if (flags.contains(Flag.WRITE_RESTRICTED)) { + // Sys.audit("object.__setattr__", "Os", + // obj != null ? obj : Py.None, name); + } + } + } + + /** + * {@code descr.__delete__(obj)} has been called on this descriptor. + * We must check that the descriptor applies to the type of object + * supplied as the {@code obj} argument. From Python, anything could + * be presented, but when we operate on it, we'll be assuming the + * particular {@link #objclass} type. + */ + // Compare CPython descr_setcheck in descrobject.c + @Override + protected void checkDelete(Object obj) throws TypeError { + PyType objType = PyType.of(obj); + if (!objType.isSubTypeOf(objclass)) { + throw new TypeError(DESCRIPTOR_DOESNT_APPLY, name, objclass.name, objType.name); + } + if (!flags.isEmpty()) { + if (flags.contains(Flag.READONLY)) { + throw Abstract.readonlyAttributeOnType(objclass, name); + } else if (flags.contains(Flag.WRITE_RESTRICTED)) { + // Sys.audit("object.__delattr__", "Os", + // obj != null ? obj : Py.None, name); + } + } + } + + /** + * A method to get {@code o.name}, with conversion from the internal + * field value if necessary (which will always succeed). This method + * is called from {@link #__get__(Object, PyType)}, after checks, to + * implement the type-specific conversion. + * + * @param obj object to access via {@link #handle} (never null) + * @return field value + */ + // Compare CPython PyMember_GetOne in structmember.c + protected abstract Object get(Object obj) throws AttributeError; + + /** + * A method to set {@code o.name = v}, with conversion to the + * internal field value if necessary. This method is called from + * {@link #__set__(Object, Object)}, after checks, to implement the + * type-specific conversion. + * + * @param obj object to access via {@link #handle} (never null) + * @param v value to assign: never null, may be {@code None} + * @throws TypeError if v cannot be converted + * @throws Throwable potentially from conversion + */ + // Compare CPython PyMember_SetOne in structmember.c + protected abstract void set(Object obj, Object v) throws AttributeError, TypeError, Throwable; + + /** + * A method to delete {@code del o.name}. This method is called from + * {@link #__delete__(Object)}, after checks, to implement the + * type-specific delete. + * + * @implNote The default implementation is correct for primitive + * types (i.e. the majority) in raising {@link TypeError} with + * the message that the attribute cannot be deleted. + * @param obj object to access via {@link #handle} (never null) + * @throws TypeError when not a type that can be deleted + * @throws AttributeError when already deleted/undefined + */ + // Compare CPython PyMember_SetOne in structmember.c with NULL + protected void delete(Object obj) throws TypeError, AttributeError { throw cannotDeleteAttr(); } + + // Compare CPython member_repr in descrobject.c + @SuppressWarnings("unused") + private Object __repr__() { return descrRepr("member"); } + + /** + * {@inheritDoc} + * + * If {@code obj != null} call {@link #get} on it to return a value. + * {@code obj} must be of type {@link #objclass}. A call made with + * {@code obj == null} returns {@code this} descriptor. + * + * @param type is ignored + */ + @Override + // Compare CPython member_get in descrobject.c + Object __get__(Object obj, PyType type) { + if (obj == null) + /* + * obj==null indicates the descriptor was found on the target object + * itself (or a base), see CPython type_getattro in typeobject.c + */ + return this; + else { + check(obj); + return get(obj); + } + } + + // Compare CPython member_set in descrobject.c + @Override + void __set__(Object obj, Object value) throws TypeError, Throwable { + if (value == null) { + // This ought to be an error, but allow for CPython idiom. + __delete__(obj); + } else { + checkSet(obj); + set(obj, value); + } + } + + // Compare CPython member_set in descrobject.c with NULL + @Override + void __delete__(Object obj) throws TypeError, Throwable { + checkDelete(obj); + delete(obj); + } + + // XXX GetSetDef in CPython, but @Member appropriate in our case + // Compare CPython member_get_doc in descrobject.c + static Object member_get_doc(PyMemberDescr descr) { + if (descr.doc == null) { return Py.None; } + return descr.doc; + } + + /** + * Create a {@code PyMemberDescr} with behaviour specific to the + * class of object being exposed. + * + * @param objclass Python type that owns the descriptor + * @param name by which member known externally + * @param field field to expose through this descriptor + * @param lookup authorisation to access fields + * @param flags supplying additional characteristics + * @param doc documentation string (may be {@code null}) + * @return descriptor for access to the field + * @throws InterpreterError if the field type is not supported + */ + static PyMemberDescr forField(PyType objclass, String name, Field field, Lookup lookup, + EnumSet flags, String doc) throws InterpreterError { + Class fieldType = field.getType(); + VarHandle vh = varHandle(field, lookup); + // Note remove to minimise work in checkSet/checkDelete + boolean opt = flags.remove(Flag.OPTIONAL); + if (fieldType == int.class) + return new _int(objclass, name, vh, flags, doc); + else if (fieldType == double.class) + return new _double(objclass, name, vh, flags, doc); + else if (fieldType == String.class) + return new _String(objclass, name, vh, flags, doc, opt); + else if (fieldType.isPrimitive() || fieldType.isArray()) + // Identifiable exclusions as or-ed above. Enough? + throw new InterpreterError(UNSUPPORTED_TYPE, name, field.getDeclaringClass().getName(), + fieldType.getSimpleName()); + else + // Should be able to handle any object type not excluded. + return new _Object(objclass, name, vh, flags, doc, opt); + } + + private static final String UNSUPPORTED_TYPE = + "@Member target %.50s in %.100s has unsupported type %.50s"; + + private static class _int extends PyMemberDescr { + + _int(PyType objclass, String name, VarHandle handle, EnumSet flags, String doc) { + super(objclass, name, handle, flags, doc); + + } + + @Override + protected Object get(Object obj) { return (int)handle.get(obj); } + + @Override + protected void set(Object obj, Object value) throws TypeError, Throwable { + int v = PyNumber.asSize(value, null); + handle.set(obj, v); + } + } + + private static class _double extends PyMemberDescr { + + _double(PyType objclass, String name, VarHandle handle, EnumSet flags, String doc) { + super(objclass, name, handle, flags, doc); + } + + @Override + protected Object get(Object obj) { return (double)handle.get(obj); } + + @Override + protected void set(Object obj, Object value) throws TypeError, Throwable { + double v = PyFloat.asDouble(value); + handle.set(obj, v); + } + } + + /** Behaviour for reference types. */ + private static abstract class Reference extends PyMemberDescr { + + /** + * Controls what happens when the attribute implementation is + * {@code null}, If {@code true}, {@link #get(Object)} will raise + * {@link AttributeError}. If {@code false}, {@link #get(Object)} + * will return {@code None}. + * + * Delete sets the attribute implementation to {@code null}. + */ + protected final boolean optional; + + Reference(PyType objclass, String name, VarHandle handle, EnumSet flags, String doc, + boolean optional) { + super(objclass, name, handle, flags, doc); + this.optional = optional; + } + + /** + * {@inheritDoc} + *

+ * If {@link #optional} and the attribute is {@code null}, reference + * types raise an {@link AttributeError}. + */ + @Override + protected void delete(Object obj) { + if (optional && handle.get(obj) == null) + throw Abstract.noAttributeOnType(objclass, name); + handle.set(obj, null); + } + } + + /** + * A string attribute that may be deleted (represented by + * {@code null} in Java). + */ + private static class _String extends Reference { + + _String(PyType objclass, String name, VarHandle handle, EnumSet flags, String doc, + boolean optional) { + super(objclass, name, handle, flags, doc, optional); + } + + @Override + protected Object get(Object obj) { + String value = (String)handle.get(obj); + if (value == null) { + if (optional) + throw Abstract.noAttributeOnType(objclass, name); + else + return Py.None; + } + return value; + } + + @Override + protected void set(Object obj, Object value) throws TypeError, Throwable { + // Special-case None if *not* an optional attribute + if (value == Py.None && !optional) { + delete(obj); + return; + } else if (!PyUnicode.TYPE.check(value)) + throw attrMustBe("a string", value); + else { + String v = value.toString(); + handle.set(obj, v); + } + } + } + + /** + * An {@code object} attribute that may be deleted (represented by + * {@code null} in Java). + */ + private static class _Object extends Reference { + + _Object(PyType objclass, String name, VarHandle handle, EnumSet flags, String doc, + boolean optional) { + super(objclass, name, handle, flags, doc, optional); + } + + @Override + protected Object get(Object obj) { + Object value = handle.get(obj); + if (value == null) { + if (optional) + throw Abstract.noAttributeOnType(objclass, name); + else + return Py.None; + } + return value; + } + + @Override + protected void set(Object obj, Object value) throws TypeError, Throwable { + // Special-case None if *not* an optional attribute + if (value == Py.None && !optional) { + delete(obj); + return; + } + + try { + handle.set(obj, value); + } catch (ClassCastException cce) { + // Here if the type of the field is an object sub-type + Class javaType = handle.varType(); + /* + * This is a surprising place to discover a need to map Java classes + * to Python types. Do without for now. + */ + String typeName = javaType.getSimpleName(); + throw attrMustBe(typeName, value); + } + } + } +} diff --git a/core/src/main/java/org/python/core/PyMethodDescr.java b/core/src/main/java/org/python/core/PyMethodDescr.java new file mode 100644 index 000000000..7f5cf6244 --- /dev/null +++ b/core/src/main/java/org/python/core/PyMethodDescr.java @@ -0,0 +1,1189 @@ +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import java.lang.invoke.WrongMethodTypeException; +import java.util.List; + +import org.python.base.InterpreterError; +import org.python.base.MethodKind; +import org.python.core.ArgumentError.Mode; +import org.python.core.PyType.Flag; +import org.python.core.Slot.EmptyException; + +/** + * Descriptor for a method defined in Java, that is to be called + * from Python. A {@code PyMethodDescr} is a callable object itself, + * and provides binding behaviour through + * {@link #__get__(Object, PyType) __get__}, which usually creates a + * {@link PyJavaFunction}. + *

+ * It suits us to sub-class {@code PyMethodDescr} to express the + * multiplicity of implementations and to respond to the signature + * of the method, optimising its data flow to arguments. Instances + * are therefore obtained by calling + * {@link PyMethodDescr#fromParser(PyType, ArgParser, List) + * fromParser}. + */ +abstract class PyMethodDescr extends MethodDescriptor { + + static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("method_descriptor", MethodHandles.lookup()) // + .flagNot(Flag.BASETYPE) // + .flag(Flag.IS_METHOD_DESCR, Flag.IS_DESCR)); + + /* + * We depart from CPython in reifying information from the Java + * declaration and associated annotations, not as a separate + * PyMethodDef but in the PyMethodDescr itself and in an attached + * argument parser. In CPython, the same information is present at + * run-time in the structure of the code itself, generated by + * Argument Clinic, in the flags of the PyMethodDef, and as text in + * the signature that begins the documentation string. + */ + + /** + * An argument parser supplied to this {@code PyMethodDescr} at + * construction, from Java reflection of the implementation and from + * annotations on it. Full information on the signature is available + * from this structure, and it is available to parse the arguments + * to {@link #__call__(Object[], String[])} for methods with a + * complex signature. (In sub-classes that represent methods with a + * simple signature, it is only used to generate error messages once + * validity checks fail.) + */ + final ArgParser argParser; + + /** + * A handle to the implementation of the function or method. In the + * general case, this handle has signature {@code (O,O[])O}, and + * expects a "self" object and an array of arguments, but not in + * optimised cases. + *

+ * Sub-classes where {@code objclass} has multiple accepted + * implementations, supersede {@code method} with an array, and + * place here only a default that throws {@link EmptyException}. + */ + // CPython PyMethodDef::ml_meth + final MethodHandle method; + + /** + * Deduced method signature (useful to have cached when constructing + * a {@link PyJavaFunction}). Note that this is allowed to differ + * from {@link MethodSignature#fromParser(ArgParser) + * MethodSignature.fromParser(argParser)}. + */ + final MethodSignature signature; + + /** + * Construct a Python {@code method} descriptor, identifying the + * implementation by a parser and a method handle. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param signature to which to coerce the handle + * @param method handle to invoke the wrapped method or {@code null} + * signifying a matching empty handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + protected PyMethodDescr(PyType objclass, ArgParser argParser, MethodSignature signature, + MethodHandle method) { + super(TYPE, objclass, argParser.name); + this.argParser = argParser; + this.signature = signature; + // Types with multiple impl set method to null -> empty + assert method == null || objclass.acceptedCount == 1; + this.method = method == null ? signature.empty : signature.prepare(argParser, method); + } + + /** + * Construct a Python {@code method} descriptor from an + * {@link ArgParser} and {@code MethodHandle}s for the + * implementation methods. The arguments described by the parser do + * not include "self". + *

+ * In the most common case, where {@code objclass} has only one + * accepted implementation, there will be only one handle in the + * {@code candidates} list. Where {@code objclass} has multiple + * accepted implementations, {@code candidates} may have one entry + * for each, although it is possible for a single entry with + * {@code Object self}. + * + * @param objclass owning class + * @param ap argument parser (provides name etc.) + * @param candidates method handles for the implementations + * @return a method descriptor supporting the signature + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + static PyMethodDescr fromParser(PyType objclass, ArgParser ap, List candidates) { + assert ap.methodKind == MethodKind.INSTANCE; + /* + * Note this is a recommendation on the assumption all optimisations + * are supported. The actual choice is made in the switch statement. + */ + MethodSignature sig = MethodSignature.fromParser(ap); + + try { + if (objclass.acceptedCount == 1 && candidates.size() == 1) { + /* + * Wrap the single candidate implementation of this method for the + * single accepted implementation of objclass. This is the most + * common case. + */ + MethodHandle mh = candidates.get(0); + switch (sig) { + case NOARGS: + return new NoArgs(objclass, ap, mh); + case O1: + return new O1(objclass, ap, mh); + case O2: + return new O2(objclass, ap, mh); + case O3: + return new O3(objclass, ap, mh); + case POSITIONAL: + return new Positional(objclass, ap, mh); + default: + return new General(objclass, ap, mh); + } + } else { + /* + * There are multiple accepted implementations of objclass. The + * constructors here match them to candidate implementations of ap. + * (We also land here if there is not exactly one candidate, relying + * on the search for one to fail and throw InternalError.) + */ + switch (sig) { + case NOARGS: + return new Multiple.NoArgs(objclass, ap, candidates); + case O1: + return new Multiple.O1(objclass, ap, candidates); + case O2: + return new Multiple.O2(objclass, ap, candidates); + case O3: + return new Multiple.O3(objclass, ap, candidates); + case POSITIONAL: + return new Multiple.Positional(objclass, ap, candidates); + default: + return new Multiple.General(objclass, ap, candidates); + } + } + } catch (WrongMethodTypeException wmte) { + throw new InterpreterError(wmte, "Failed to create descriptor for '%s.%s'", + objclass.name, ap.name); + } + } + + /** + * Invoke the wrapped method handle for the given target + * {@code self}, and standard arguments ({@code Object[]} and + * {@code String[]}). The implementation will arrange the arguments + * as expected by the handle, or throw if they are not correct for + * that. In the general case, a call to {@link #argParser} is + * involved. We create sub-classes of {@link PyMethodDescr} to + * represent the finite repertoire of {@code MethodSignature}s, that + * override this method with simplified logic. + * + * @param self target object of the method call + * @param args arguments of the method call + * @param names of arguments given by keyword or {@code null} + * @return result of the method call + * @throws TypeError when the arguments ({@code args} {@code names}) + * are not correct for the method signature + * @throws ArgumentError as a shorthand for {@link TypeError}, which + * the caller must convert with + * {@link MethodDescriptor#typeError(ArgumentError, Object[])} + * @throws Throwable from the implementation of the method + */ + // Compare CPython method_vectorcall_* in descrobject.c + abstract Object callMethod(Object self, Object[] args, String[] names) + throws ArgumentError, TypeError, Throwable; + + /** + * Invoke the wrapped method handle for the given target + * {@code self}, and arguments ({@code Object[]} given only by + * position. Otherwise, this is the same as + * {@link #callMethod(Object, Object[], String[])}. + * + * @param self target object of the method call + * @param args arguments of the method call + * @return result of the method call + * @throws TypeError when the arguments ({@code args} are not + * correct for the method signature + * @throws ArgumentError as a shorthand for {@link TypeError}, which + * the caller must convert with + * {@link MethodDescriptor#typeError(ArgumentError, Object[])} + * @throws Throwable from the implementation of the method + */ + // Compare CPython method_vectorcall_* in descrobject.c + abstract Object callMethod(Object self, Object[] args) + throws ArgumentError, TypeError, Throwable; + + /* + * Having defined callMethod in which the first argument self is + * given special treatment, we now provide a complete set of + * implementations of call(...). These supersede the default + * implementations given in FastCall. In FastCall, each makes an + * array of its arguments for call(args), and ultimately we reach + * call(args, null). + * + * Here instead we express each in terms of callMethod(self, args), + * to avoid packing self into an array we then have to split. This + * only has an effect where a sub-class does not further override + * the method, to deal efficiently with a particular number of + * positional arguments. + */ + + @Override + public Object call(Object[] args, String[] names) throws TypeError, Throwable { + int m = args.length - 1; + if (m < 0) { + // Not even one argument + throw new TypeError(DESCRIPTOR_NEEDS_ARGUMENT, name, objclass.name); + } else { + // Split the leading element self from rest of args + Object self = args[0], rest[]; + if (m == 0) { + rest = Py.EMPTY_ARRAY; + } else { + rest = new Object[m]; + System.arraycopy(args, 1, rest, 0, m); + } + + // Call this as a method bound to self. + try { + return callMethod(self, rest, names); + } catch (ArgumentError ae) { + throw typeError(ae, rest, names); + } + } + } + + // call() throws so no need to optimise + + @Override + public Object call(Object self) throws Throwable { return callMethod(self, Py.EMPTY_ARRAY); } + + @Override + public Object call(Object self, Object a1) throws Throwable { + return callMethod(self, new Object[] {a1}); + } + + @Override + public Object call(Object self, Object a1, Object a2) throws Throwable { + return callMethod(self, new Object[] {a1, a2}); + } + + @Override + public Object call(Object self, Object a1, Object a2, Object a3) throws Throwable { + return callMethod(self, new Object[] {a1, a2, a3}); + } + + /** + * Return the handle contained in this descriptor applicable to the + * Java class of a {@code self} argument during a call. In the base + * implementation, this is by checking #objclass against the MRO of + * {@code PyType.of(self)}. Sub-classes where {@code objclass} has + * multiple accepted implementations override this method, because + * they supersede {@link #method} with an array. + *

+ * The {@link Descriptor#objclass} is consulted to make this + * determination. If the class of {@code self} is not an accepted + * implementation of {@code objclass}, an empty slot handle (with + * the correct signature) is returned. + * + * @param self the {@code self} argument in some call + * @return corresponding handle (or one that throws + * {@link EmptyException}) + * @throws TypeError if {@code self} is of unacceptable type + * @throws Throwable on other errors while chasing the MRO + */ + MethodHandle getHandle(Object self) throws TypeError, Throwable { + /* + * Note that when we override this descriptors supporting types with + * multiple accepted implementations, we do not need to check along + * the Python MRO since the outcome is guaranteed by the Java class + * match. + */ + // ??? At least, I think so. + checkSelfType(self); + return method; + } + + // special methods ------------------------------------------------ + + // Compare CPython method_repr in descrobject.c + Object __repr__() { return descrRepr("method"); } + + /** + * Invoke the Java method this method descriptor points to, using + * the standard {@code __call__} arguments supplied, default + * arguments and other information described in the associated + * {@link #argParser} for the method. + * + * @param args all arguments beginning with {@code self} + * @param names of keyword arguments + * @return result of calling the wrapped method + * @throws TypeError if {@code args[0]} is of the wrong type + * @throws Throwable from the implementation of the special method + */ + Object __call__(Object[] args, String[] names) throws TypeError, Throwable { + try { + if (names != null && names.length != 0) { + return call(args, names); + } else { + int n = args.length; + switch (n) { + // case 0 (an error) handled by default clause + case 1: + return call(args[0]); + case 2: + return call(args[0], args[1]); + case 3: + return call(args[0], args[1], args[2]); + case 4: + return call(args[0], args[1], args[2], args[3]); + default: + return call(args); + } + } + } catch (ArgumentError ae) { + throw typeError(ae, args); + } + } + + /* + * A simplified __call__ used in the narrative. To use, rename this + * to __call__, rename the real __call__ to something else, and + * force fromParser() always to select General as the implementation + * type. + */ + Object simple__call__(Object[] args, String[] names) throws TypeError, Throwable { + int m = args.length - 1, nk = names == null ? 0 : names.length; + if (m < nk) { + // Not even one argument (self) given by position + throw new TypeError(DESCRIPTOR_NEEDS_ARGUMENT, name, objclass.name); + } else { + // Call this with self and rest of args separately. + Object self = args[0]; + MethodHandle mh = getHandle(self); + // Parse args without the leading element self + Object[] frame = argParser.parse(args, 1, m, names); + return mh.invokeExact(self, frame); + } + } + + /** + * Return the described method, bound to {@code obj} as its "self" + * argument, or if {@code obj==null}, return this descriptor. In the + * non-null case, {@code __get__} returns a {@link PyJavaFunction}. + * Calling the returned object invokes the same Java method as this + * descriptor, with {@code obj} as first argument, and other + * arguments to the call appended. + * + * @param obj target (self) of the method, or {@code null} + * @param type ignored + * @return method bound to {@code obj} or this descriptor. + * @throws TypeError if {@code obj!=null} is not compatible + * @throws Throwable on other errors while chasing the MRO + */ + @Override + // Compare CPython method_get in descrobject.c + Object __get__(Object obj, PyType type) throws TypeError, Throwable { + if (obj == null) + // Return the descriptor itself. + return this; + else { + // Return a callable binding the method and the target + check(obj); + return PyJavaFunction.from(this, obj); + } + } + + // exposed methods ----------------------------------------------- + + /** @return name of the function or method */ + // Compare CPython meth_get__name__ in methodobject.c + // @Exposed.Getter + String __name__() { return argParser.name; } + + // Compare CPython method_get_doc in descrobject.c + Object get_doc() { return PyType.getDocFromInternalDoc(__name__(), argParser.doc); } + + // Compare CPython method_get_text_signature in descrobject.c + Object get_text_signature() { + return PyType.getTextSignatureFromInternalDoc(__name__(), argParser.doc); + } + + // plumbing ------------------------------------------------------ + + /** + * Construct an array of method handles, one for each accepted + * implementation of {@code objclass}, working from a list of + * candidate method handles. + * + * @param sig handle type information distilled from parser + * @param candidates handles on which to base return + * @return method handles aligned to {@code objclass.clases} + */ + protected MethodHandle[] prepareCandidates(MethodSignature sig, List candidates) { + + final int N = objclass.acceptedCount; + MethodHandle[] methods = new MethodHandle[N]; + + // Fill the method array with matching method handles + for (int i = 0; i < N; i++) { + // Seek most specific match for the i.th accepted class + Class ac = objclass.classes[i]; + MethodHandle match = null; + // The candidates are sorted most specific first + for (MethodHandle mh : candidates) { + MethodType mt = mh.type(); + if (mt.parameterType(0).isAssignableFrom(ac)) { + match = mh; + break; + } + } + + if (match != null) { + /* + * Prepare the implementation handle to match the invocation type in + * callMethod. + */ + methods[i] = sig.prepare(argParser, match); + } else { + // No match means no implementation we can use + throw new InterpreterError("'%s.%s' not implemented for %s", objclass.name, + __name__(), objclass.classes[i]); + } + } + return methods; + } + + /** + * Make sure that the argument is acceptable as 'self' by checking + * against the MRO of {@code PyType.of(self)}. + * + * @param self to be checked + * @throws TypeError if it is of unacceptable type + * @throws Throwable on other errors while chasing the MRO + */ + protected final void checkSelfType(Object self) throws TypeError, Throwable { + // Make sure that the argument is acceptable as 'self' + PyType selfType = PyType.of(self); + if (selfType == objclass || Abstract.recursiveIsSubclass(selfType, objclass)) { + return; + } else { + throw selfTypeError(self); + } + } + + protected TypeError selfTypeError(Object self) { + PyType selfType = PyType.of(self); + return new TypeError(DESCRIPTOR_REQUIRES, name, objclass.name, selfType.name); + } + + /** + * The implementation may have any signature supportable in Python + * by {@link ArgParser}. {@link #fromParser(PyType, ArgParser, List) + * fromParser()} will choose a {@code General} representation of the + * method when no optimisations apply. + */ + private static class General extends PyMethodDescr { + /** + * Construct a method descriptor, identifying the implementation by + * a parser and a method handle. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param method handle to invoke the wrapped method or {@code null} + * signifying a matching empty handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + General(PyType objclass, ArgParser argParser, MethodHandle method) { + super(objclass, argParser, MethodSignature.GENERAL, method); + } + + /** + * {@inheritDoc} + *

+ * Exceptionally, it pays to override the inherited implementation + * here because we have to use the fully general {@link ArgParser} + * and it is able to work on the slice {@code args[1:]} without a + * copy. + */ + @Override + public Object call(Object[] args, String[] names) throws ArgumentError, Throwable { + // Call this with self and rest of args separately. + checkHasSelf(args, names); + Object self = args[0]; + MethodHandle mh = getHandle(self); + // Parse args without the leading element self + Object[] frame = argParser.parse(args, 1, args.length - 1, names); + return mh.invokeExact(self, frame); + } + + /** + * {@inheritDoc} + *

+ * Exceptionally, it pays to override the inherited implementation + * here because we have to use the fully general {@link ArgParser} + * and it is able to work on the slice {@code s[p+1:p+n]} without a + * copy. + */ + @Override + public Object vectorcall(Object[] s, int p, int n, String[] names) + throws ArgumentError, Throwable { + int m = n - 1, nk = names == null ? 0 : names.length; + if (m < nk) { + // Not even one argument (self) given by position + throw new ArgumentError(Mode.SELF); + } + // Call this with self separated. + Object self = s[p]; + MethodHandle mh = getHandle(self); + // Parse args without the leading element self + Object[] frame = argParser.parse(s, p + 1, m, names); + return mh.invokeExact(self, frame); + } + + /** Equivalent to {@code vectorcall(s, p, n, null)}. */ + @Override + public Object vectorcall(Object[] s, int p, int n) throws ArgumentError, Throwable { + return vectorcall(s, p, n, null); + } + + @Override + Object callMethod(Object self, Object[] args, String[] names) throws TypeError, Throwable { + /* + * The method handle type is (O,O[])O. The parser will make an array + * of the args, gather excess arguments into a tuple or dict (where + * allowed), and fill missing ones from defaults. + */ + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.GENERAL.instanceType; + Object[] frame = argParser.parse(args, names); + return mh.invokeExact(self, frame); + } + + @Override + Object callMethod(Object self, Object[] args) throws TypeError, Throwable { + return callMethod(self, args, null); + } + } + + /** + * Base class for methods that accept between defined maximum and + * minimum numbers of arguments {@code self}, that must be given by + * position. Maximum and minimum may be equal to a single acceptable + * number. + *

+ * Arguments may not be given by keyword. There is no excess + * argument (varargs) collector. + *

+ * The number of arguments required by the wrapped Java method sets + * a maximum allowable number of arguments. Fewer arguments than + * this may be given, to the extent that defaults specified by the + * parser make up the difference. The number of available defaults + * determines the minimum number of arguments to be supplied. + */ + private static abstract class AbstractPositional extends PyMethodDescr { + + /** Default values of the trailing arguments. */ + protected final Object[] d; + + /** Minimum number of positional arguments in a call. */ + protected final int min; + + /** Maximum number of positional arguments in a call. */ + protected final int max; + + /** + * Construct a method descriptor, identifying the implementation by + * a parser and a method handle. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param signature to which to coerce the handle + * @param method handle to invoke the wrapped method or {@code null} + * signifying the empty handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + AbstractPositional(PyType objclass, ArgParser argParser, MethodSignature signature, + MethodHandle method) { + super(objclass, argParser, signature, method); + assert !argParser.hasVarArgs(); + // Cardinal values for positional argument processing + this.d = argParser.getDefaults(); + this.max = argParser.argcount; + this.min = argParser.argcount - d.length; + } + + @Override + Object callMethod(Object self, Object[] args, String[] names) throws TypeError, Throwable { + if (names == null || names.length == 0) { + return callMethod(self, args); + } else { + // The self-type error takes precedence if both. + checkSelfType(self); + throw new ArgumentError(Mode.NOKWARGS); + } + } + + @Override + public Object call(Object[] args) throws TypeError, Throwable { + int m = args.length - 1; + if (m < 0) { + // Not even one argument + throw new TypeError(DESCRIPTOR_NEEDS_ARGUMENT, name, objclass.name); + } else { + // Split the leading element self from rest of args + Object self = args[0], rest[]; + if (m == 0) { + rest = Py.EMPTY_ARRAY; + } else { + rest = new Object[m]; + System.arraycopy(args, 1, rest, 0, m); + } + + // Call this as a method bound to self. + try { + return callMethod(self, rest); + } catch (ArgumentError ae) { + throw typeError(ae, rest); + } + } + } + } + + /** + * A method represented by {@code NoArgs} accepts no + * arguments after {@code self}, by position or keyword. + *

+ * {@link #fromParser(PyType, ArgParser, List)} will only choose a + * {@code NoArgs} (or sub-class) representation of the method when + * this condition applies. + */ + private static class NoArgs extends AbstractPositional { + /** + * Construct a method descriptor, identifying the implementation by + * a parser and a method handle. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param method handle to invoke the wrapped method or {@code null} + * signifying a matching empty handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + NoArgs(PyType objclass, ArgParser argParser, MethodHandle method) { + super(objclass, argParser, MethodSignature.NOARGS, method); + assert max == 0; + assert max - min == d.length; + } + + @Override + Object callMethod(Object self, Object[] args) throws TypeError, Throwable { + // The method handle type is (O)O (no args after self). + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.NOARGS.instanceType; + return mh.invokeExact(self); + } + + @Override + public Object call(Object self) throws Throwable { + // The method handle type is (O)O. + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.NOARGS.instanceType; + return mh.invokeExact(self); + } + } + + /** + * The implementation signature requires {@code self} and one + * argument, which may be supplied by + * {@link ArgParser#getDefaults()}. + */ + private static class O1 extends AbstractPositional { + + /** + * Construct a method descriptor, identifying the implementation by + * a parser and a method handle. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param method handle to invoke the wrapped method or {@code null} + * signifying a matching empty handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + O1(PyType objclass, ArgParser argParser, MethodHandle method) { + super(objclass, argParser, MethodSignature.O1, method); + assert max == 1; + assert max - min == d.length; + } + + @Override + Object callMethod(Object self, Object[] a) throws TypeError, Throwable { + // The method handle type is (O,O)O. + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.O1.instanceType; + int n = a.length; + if (n == 1) { + // Number of arguments matches number of parameters + return mh.invokeExact(self, a[0]); + } else if (n == min) { + // Since min<=max, max==1 and n!=1, we have n==min==0 + return mh.invokeExact(self, d[0]); + } + // n < min || n > max + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object self) throws Throwable { + MethodHandle mh = getHandle(self); + if (min > 0) { throw new ArgumentError(min, 1); } + return mh.invokeExact(self, d[0]); + } + + @Override + public Object call(Object self, Object a1) throws Throwable { + MethodHandle mh = getHandle(self); + return mh.invokeExact(self, a1); + } + } + + /** + * The implementation signature requires {@code self} and two + * arguments, which may be supplied by + * {@link ArgParser#getDefaults()}. + */ + private static class O2 extends AbstractPositional { + + /** + * Construct a method descriptor, identifying the implementation by + * a parser and a method handle. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param method handle to invoke the wrapped method or {@code null} + * signifying a matching empty handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + O2(PyType objclass, ArgParser argParser, MethodHandle method) { + super(objclass, argParser, MethodSignature.O2, method); + assert max == 2; + assert max - min == d.length; + } + + @Override + Object callMethod(Object self, Object[] a) throws ArgumentError, TypeError, Throwable { + // The method handle type is (O,O,O)O. + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.O2.instanceType; + int n = a.length, k; + if (n == 2) { + // Number of arguments matches number of parameters + return mh.invokeExact(self, a[0], a[1]); + } else if ((k = n - min) >= 0) { + if (n == 1) { + return mh.invokeExact(self, a[0], d[k]); + } else if (n == 0) + return mh.invokeExact(self, d[k++], d[k]); + } + // n < min || n > max + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object self) throws Throwable { + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.O2.instanceType; + if (min == 0) { return mh.invokeExact(self, d[0], d[1]); } + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object self, Object a1) throws Throwable { + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.O2.instanceType; + int k = 1 - min; + if (k >= 0) { return mh.invokeExact(self, a1, d[k]); } + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object self, Object a1, Object a2) throws Throwable { + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.O2.instanceType; + return mh.invokeExact(self, a1, a2); + } + } + + /** + * The implementation signature requires {@code self} and three + * arguments, which may be supplied by + * {@link ArgParser#getDefaults()}. + */ + private static class O3 extends AbstractPositional { + + /** + * Construct a method descriptor, identifying the implementation by + * a parser and a method handle. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param method handle to invoke the wrapped method or {@code null} + * signifying a matching empty handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + O3(PyType objclass, ArgParser argParser, MethodHandle method) { + super(objclass, argParser, MethodSignature.O3, method); + assert max == 3; + assert max - min == d.length; + } + + @Override + Object callMethod(Object self, Object[] a) throws ArgumentError, TypeError, Throwable { + // The method handle type is (O,O,O,O)O. + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.O3.instanceType; + int n = a.length, k; + if (n == 3) { + // Number of arguments matches number of parameters + return mh.invokeExact(self, a[0], a[1], a[2]); + } else if ((k = n - min) >= 0) { + if (n == 2) { + return mh.invokeExact(self, a[0], a[1], d[k]); + } else if (n == 1) { + return mh.invokeExact(self, a[0], d[k++], d[k]); + } else if (n == 0) + return mh.invokeExact(self, d[k++], d[k]); + } + // n < min || n > max + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object self) throws Throwable { + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.O3.instanceType; + if (min == 0) { return mh.invokeExact(self, d[0]); } + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object self, Object a1) throws Throwable { + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.O3.instanceType; + int k = 1 - min; + if (k >= 0) { return mh.invokeExact(self, a1, d[k++], d[k]); } + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object self, Object a1, Object a2) throws Throwable { + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.O3.instanceType; + int k = 2 - min; + if (k >= 0) { return mh.invokeExact(self, a1, a2, d[k]); } + throw new ArgumentError(min, max); + } + + @Override + public Object call(Object self, Object a1, Object a2, Object a3) throws Throwable { + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.O3.instanceType; + return mh.invokeExact(self, a1, a2, a3); + } + } + + /** + * A method represented by {@code Positional} only accepts arguments + * after {@code self} given by position. The constraints detailed + * for {@link AbstractPositional} apply. + *

+ * {@link #fromParser(PyType, ArgParser, List) fromParser()} will + * only choose a {@code Positional} (or sub-class) representation of + * the method when these conditions apply. + */ + private static class Positional extends AbstractPositional { + + /** + * Construct a method descriptor, identifying the implementation by + * a parser and a method handle. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param method handle to invoke the wrapped method or {@code null} + * signifying the empty handle. + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + Positional(PyType objclass, ArgParser argParser, MethodHandle method) { + super(objclass, argParser, MethodSignature.POSITIONAL, method); + assert max == argParser.argcount; + assert max - min == d.length; + } + + @Override + Object callMethod(Object self, Object[] a) throws TypeError, Throwable { + // The method handle type is (O,O[])O. + MethodHandle mh = getHandle(self); + assert mh.type() == MethodSignature.POSITIONAL.instanceType; + int n = a.length, k; + if (n == max) { + // Number of arguments matches number of parameters + return mh.invokeExact(self, a); + } else if ((k = n - min) >= 0) { + // Concatenate a[:] and defaults[k:] + Object[] frame = new Object[max]; + System.arraycopy(a, 0, frame, 0, n); + System.arraycopy(d, k, frame, n, max - n); + return mh.invokeExact(self, frame); + } + // n < min || n > max + throw new ArgumentError(min, max); + } + } + + /** + * A name space for sub-classes of {@link PyMethodDescr} to use for + * an instance method when the owning Python type has multiple + * accepted implementations. + */ + private static class Multiple { + + /** + * A variant of {@link PyMethodDescr.General} that allows for + * multiple accepted implementations of {@code objclass}. + */ + private static class General extends PyMethodDescr.General { + /** + * Handles for the particular implementations of a method. The + * method type of each is the same as {@link PyMethodDescr#method}, + * which itself will be set to "empty". + */ + protected final MethodHandle[] methods; + + /** + * Construct a method descriptor, a variant of + * {@link PyMethodDescr.General} that allows for multiple accepted + * implementations of {@code objclass}. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param candidates handles to the implementations of that method + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + General(PyType objclass, ArgParser argParser, List candidates) { + super(objclass, argParser, null); + this.methods = prepareCandidates(MethodSignature.GENERAL, candidates); + } + + @Override + MethodHandle getHandle(Object self) { + // Work out how to call this descriptor on that object + int index = objclass.indexAccepted(self.getClass()); + try { + return methods[index]; + } catch (ArrayIndexOutOfBoundsException iobe) { + throw selfTypeError(self); + } + } + } + + /** + * A variant of {@link PyMethodDescr.NoArgs} that allows for + * multiple accepted implementations of {@code objclass}. + */ + static class NoArgs extends PyMethodDescr.NoArgs { + /** + * Handles for the particular implementations of a method. The + * method type of each is the same as {@link PyMethodDescr#method}, + * which itself will be set to "empty". + */ + protected final MethodHandle[] methods; + + /** + * Construct a method descriptor, a variant of + * {@link PyMethodDescr.General} that allows for multiple accepted + * implementations of {@code objclass}. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param candidates handles to the implementations of that method + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + NoArgs(PyType objclass, ArgParser argParser, List candidates) { + super(objclass, argParser, null); + this.methods = prepareCandidates(MethodSignature.NOARGS, candidates); + } + + @Override + MethodHandle getHandle(Object self) { + // Work out how to call this descriptor on that object + int index = objclass.indexAccepted(self.getClass()); + try { + return methods[index]; + } catch (ArrayIndexOutOfBoundsException iobe) { + throw selfTypeError(self); + } + } + } + + /** + * A variant of {@link PyMethodDescr.O1} that allows for multiple + * accepted implementations of {@code objclass}. + */ + static class O1 extends PyMethodDescr.O1 { + /** + * Handles for the particular implementations of a method. The + * method type of each is the same as {@link PyMethodDescr#method}, + * which itself will be set to "empty". + */ + protected final MethodHandle[] methods; + + /** + * Construct a method descriptor, a variant of + * {@link PyMethodDescr.O1} that allows for multiple accepted + * implementations of {@code objclass}. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param candidates handles to the implementations of that method + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + O1(PyType objclass, ArgParser argParser, List candidates) { + super(objclass, argParser, null); + this.methods = prepareCandidates(MethodSignature.O1, candidates); + } + + @Override + MethodHandle getHandle(Object self) { + // Work out how to call this descriptor on that object + int index = objclass.indexAccepted(self.getClass()); + try { + return methods[index]; + } catch (ArrayIndexOutOfBoundsException iobe) { + throw selfTypeError(self); + } + } + } + + /** + * A variant of {@link PyMethodDescr.O2} that allows for multiple + * accepted implementations of {@code objclass}. + */ + static class O2 extends PyMethodDescr.O2 { + /** + * Handles for the particular implementations of a method. The + * method type of each is the same as {@link PyMethodDescr#method}, + * which itself will be set to "empty". + */ + protected final MethodHandle[] methods; + + /** + * Construct a method descriptor, a variant of + * {@link PyMethodDescr.O1} that allows for multiple accepted + * implementations of {@code objclass}. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param candidates handles to the implementations of that method + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + O2(PyType objclass, ArgParser argParser, List candidates) { + super(objclass, argParser, null); + this.methods = prepareCandidates(MethodSignature.O2, candidates); + } + + @Override + MethodHandle getHandle(Object self) { + // Work out how to call this descriptor on that object + int index = objclass.indexAccepted(self.getClass()); + try { + return methods[index]; + } catch (ArrayIndexOutOfBoundsException iobe) { + throw selfTypeError(self); + } + } + } + + /** + * A variant of {@link PyMethodDescr.O3} that allows for multiple + * accepted implementations of {@code objclass}. + */ + static class O3 extends PyMethodDescr.O3 { + /** + * Handles for the particular implementations of a method. The + * method type of each is the same as {@link PyMethodDescr#method}, + * which itself will be set to "empty". + */ + protected final MethodHandle[] methods; + + /** + * Construct a method descriptor, a variant of + * {@link PyMethodDescr.O1} that allows for multiple accepted + * implementations of {@code objclass}. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param candidates handles to the implementations of that method + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + O3(PyType objclass, ArgParser argParser, List candidates) { + super(objclass, argParser, null); + this.methods = prepareCandidates(MethodSignature.O3, candidates); + } + + @Override + MethodHandle getHandle(Object self) { + // Work out how to call this descriptor on that object + int index = objclass.indexAccepted(self.getClass()); + try { + return methods[index]; + } catch (ArrayIndexOutOfBoundsException iobe) { + throw selfTypeError(self); + } + } + } + + /** + * A variant of {@link PyMethodDescr.Positional} that allows for + * multiple accepted implementations of {@code objclass}. + */ + static class Positional extends PyMethodDescr.Positional { + /** + * Handles for the particular implementations of a method. The + * method type of each is the same as {@link PyMethodDescr#method}, + * which itself will be set to "empty". + */ + protected final MethodHandle[] methods; + + /** + * Construct a method descriptor, a variant of + * {@link PyMethodDescr.Positional} that allows for multiple + * accepted implementations of {@code objclass}. + * + * @param objclass the class declaring the method + * @param argParser describing the signature of the method + * @param candidates handles to the implementations of that method + */ + // Compare CPython PyDescr_NewMethod in descrobject.c + Positional(PyType objclass, ArgParser argParser, List candidates) { + super(objclass, argParser, null); + this.methods = prepareCandidates(MethodSignature.POSITIONAL, candidates); + } + + @Override + MethodHandle getHandle(Object self) { + // Work out how to call this descriptor on that object + int index = objclass.indexAccepted(self.getClass()); + try { + return methods[index]; + } catch (ArrayIndexOutOfBoundsException iobe) { + throw selfTypeError(self); + } + } + } + } +} diff --git a/core/src/main/java/org/python/core/PyMethodWrapper.java b/core/src/main/java/org/python/core/PyMethodWrapper.java new file mode 100644 index 000000000..59cd7ff55 --- /dev/null +++ b/core/src/main/java/org/python/core/PyMethodWrapper.java @@ -0,0 +1,136 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; + +import org.python.core.Exposed.Member; +import org.python.core.PyType.Flag; + +/** + * The Python type {@code types.MethodWrapperType} (seen also as + * {@code }) represents a special method + * bound to a particular target "self" object. It is part of the + * mechanism which allows a special method defined in Java to be + * called from Python using its name as an attribute. + *

+ * An example from the wild is to evaluate: {@code (42).__add__}, + * binding the method {@code __add__} to the target {@code 42}, to + * produce a callable equivalent to λ x . 42+x. An instance + * of this class results from a call to + * {@link PyWrapperDescr#__get__(Object, PyType)}. + */ +// Compare CPython wrapperobject in descrobject.c +// and _PyMethodWrapper_Type in descrobject.c +class PyMethodWrapper extends AbstractPyObject implements FastCall { + + static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("method-wrapper", MethodHandles.lookup()).flagNot(Flag.BASETYPE)); + + // No subclasses so always this type + @Override + public PyType getType() { return TYPE; } + + /** Descriptor for the method being bound. */ + @Member + PyWrapperDescr descr; + + /** + * The target object of the method call that results when + * {@link #__call__(Object[], String[]) __call__} is invoked on this + * object. This is exposed to Python as {@code __self__}. + */ + @Member("__self__") + final Object self; + + /** + * Bind a slot wrapper descriptor to its target. The result is a + * callable object e.g. in {@code bark = "Woof!".__mul__}, + * {@code bark} will be an instance of this class, {@code "Woof!}" + * is {@code self} and {@code str.__mul__} is the descriptor. + * + * @param descr for the special method to bind + * @param self to which this method call is bound + */ + PyMethodWrapper(PyWrapperDescr descr, Object self) { + super(TYPE); + this.descr = descr; + this.self = self; + } + + // Exposed attributes --------------------------------------------- + + // Compare CPython wrapper_objclass in descrobject.c + protected Object __objclass__() { + Object c = descr.objclass; + return c; + } + + // Compare CPython wrapper_name in descrobject.c + protected Object __name__() { return descr.slot.methodName; } + + // Compare CPython wrapper_doc in descrobject.c + protected Object __doc__() { + return PyType.getDocFromInternalDoc(descr.slot.methodName, descr.slot.doc); + } + + // Compare CPython wrapper_text_signature in descrobject.c + protected Object __text_signature__() { + return PyType.getTextSignatureFromInternalDoc(descr.slot.methodName, descr.slot.doc); + } + + // Compare CPython wrapper_qualname in descrobject.c + protected Object __qualname__() throws AttributeError, Throwable { + return Descriptor.descr_get_qualname(descr, null); + } + + // Special methods ------------------------------------------------ + + // Compare CPython wrapper_repr in descrobject.c + protected Object __repr__() { + return String.format("", descr.slot.methodName, + PyObjectUtil.toAt(self)); + } + + // Compare CPython wrapper_richcompare in descrobject.c + protected Object __eq__(Object b) { + // Both arguments should be exactly PyMethodWrapper + if (b instanceof PyMethodWrapper) { + PyMethodWrapper wb = (PyMethodWrapper)b; + return descr == wb.descr && self == wb.self; + } + return Py.NotImplemented; + } + + // Compare CPython wrapper_richcompare in descrobject.c + protected Object __ne__(Object b) { + // Both arguments should be exactly PyMethodWrapper + if (b instanceof PyMethodWrapper) { + PyMethodWrapper wb = (PyMethodWrapper)b; + return descr != wb.descr || self != wb.self; + } + return Py.NotImplemented; + } + + // Compare CPython wrapper_hash in descrobject.c + protected int __hash__() { + int x = self.hashCode() ^ descr.hashCode(); + return x == -1 ? -2 : x; + } + + // Compare CPython wrapper_call in descrobject.c + public Object __call__(Object[] args, String[] names) throws Throwable { + // ??? Could specialise to numbers of arguments/nokwds? + return descr.callWrapped(self, args, names); + } + + @Override + public Object call(Object[] args, String[] names) throws Throwable { + return descr.callWrapped(self, args, names); + } + + @Override + public TypeError typeError(ArgumentError ae, Object[] args, String[] names) { + return descr.typeError(ae, args, names); + } +} diff --git a/core/src/main/java/org/python/core/PyModule.java b/core/src/main/java/org/python/core/PyModule.java new file mode 100644 index 000000000..b660f51d5 --- /dev/null +++ b/core/src/main/java/org/python/core/PyModule.java @@ -0,0 +1,78 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code module} object. */ +public class PyModule implements CraftedPyObject, DictPyObject { + + /** The type of Python object this class implements. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("module", MethodHandles.lookup())); + + protected final PyType type; + + /** Name of this module. Not {@code null}. **/ + final String name; + + /** Dictionary (globals) of this module. Not {@code null}. **/ + final PyDict dict; + + /** + * As {@link #PyModule(String)} for Python sub-class specifying + * {@link #type}. + * + * @param type actual Python sub-class to being created + * @param name of module + */ + PyModule(PyType type, String name) { + this.type = type; + this.name = name; + this.dict = new PyDict(); + } + + /** + * Construct an instance of the named module. + * + * @param name of module + */ + PyModule(String name) { this(TYPE, name); } + + /** + * Initialise the module instance. The main action will be to add + * entries to {@link #dict}. These become the members (globals) of + * the module. + */ + void exec() {} + + @Override + public PyType getType() { return type; } + + /** + * The global dictionary of a module instance. This is always a + * Python {@code dict} and never {@code null}. + * + * @return The globals of this module + */ + @Override + public PyDict getDict() { return dict; } + + @Override + public String toString() { return String.format("", name); } + + /** + * Add a type by name to the dictionary. + * + * @param t the type + */ + void add(PyType t) { dict.put(t.getName(), t); } + + /** + * Add an object by name to the module dictionary. + * + * @param name to use as key + * @param o value for key + */ + void add(String name, Object o) { dict.put(name, o); } +} diff --git a/core/src/main/java/org/python/core/PyNumber.java b/core/src/main/java/org/python/core/PyNumber.java new file mode 100644 index 000000000..f5af99def --- /dev/null +++ b/core/src/main/java/org/python/core/PyNumber.java @@ -0,0 +1,419 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.util.function.Function; + +import org.python.core.Slot.EmptyException; + +/** + * Abstract API for operations on numeric types, corresponding to + * CPython methods defined in {@code abstract.h} and with names + * like: {@code PyNumber_*}. + */ +public class PyNumber extends Abstract { + + private PyNumber() {} // only static methods here + + /** + * {@code -v}: unary negative with Python semantics. + * + * @param v operand + * @return {@code -v} + * @throws Throwable from invoked implementations + */ + public static Object negative(Object v) throws Throwable { + try { + return Operations.of(v).op_neg.invokeExact(v); + } catch (Slot.EmptyException e) { + throw operandError(Slot.op_neg, v); + } + } + + /** + * {@code ~v}: unary negative with Python semantics. + * + * @param v operand + * @return {@code ~v} + * @throws Throwable from invoked implementations + */ + public static Object invert(Object v) throws Throwable { + try { + return Operations.of(v).op_invert.invokeExact(v); + } catch (Slot.EmptyException e) { + throw operandError(Slot.op_invert, v); + } + } + + /** + * {@code abs(v)}: absolute value with Python semantics. + * + * @param v operand + * @return {@code -v} + * @throws Throwable from invoked implementations + */ + public static Object absolute(Object v) throws Throwable { + try { + return Operations.of(v).op_abs.invokeExact(v); + } catch (Slot.EmptyException e) { + throw operandError(Slot.op_abs, v); + } + } + + /** + * Create a {@code TypeError} for a named unary operation, along the + * lines "bad operand type for OP: 'T'". + * + * @param op operation to report + * @param v actual operand (only {@code PyType.of(v)} is used) + * @return exception to throw + */ + static PyException operandError(Slot op, Object v) { + return new TypeError("bad operand type for %s: '%.200s'", op.opName, + PyType.of(v).getName()); + } + + /** + * {@code v + w} with Python semantics. + * + * @param v left operand + * @param w right operand + * @return {@code v + w} + * @throws Throwable from invoked implementations + */ + public static Object add(Object v, Object w) throws Throwable { + return binary_op(v, w, Slot.op_add); + } + + /** + * {@code v - w} with Python semantics. + * + * @param v left operand + * @param w right operand + * @return {@code v - w} + * @throws Throwable from invoked implementations + */ + public static Object subtract(Object v, Object w) throws Throwable { + return binary_op(v, w, Slot.op_sub); + } + + /** + * {@code v * w} with Python semantics. + * + * @param v left operand + * @param w right operand + * @return {@code v * w} + * @throws Throwable from invoked implementations + */ + public static Object multiply(Object v, Object w) throws Throwable { + return binary_op(v, w, Slot.op_mul); + } + + /** + * {@code v | w} with Python semantics. + * + * @param v left operand + * @param w right operand + * @return {@code v | w} + * @throws Throwable from invoked implementations + */ + static final Object or(Object v, Object w) throws Throwable { + return binary_op(v, w, Slot.op_or); + } + + /** + * {@code v & w} with Python semantics. + * + * @param v left operand + * @param w right operand + * @return {@code v & w} + * @throws Throwable from invoked implementations + */ + static final Object and(Object v, Object w) throws Throwable { + return binary_op(v, w, Slot.op_and); + } + + /** + * {@code v ^ w} with Python semantics. + * + * @param v left operand + * @param w right operand + * @return {@code v ^ w} + * @throws Throwable from invoked implementations + */ + static final Object xor(Object v, Object w) throws Throwable { + return binary_op(v, w, Slot.op_xor); + } + + /** + * Helper for implementing a binary operation that has one, + * slot-based interpretation. + * + * @param v left operand + * @param w right operand + * @param binop operation to apply + * @return result of operation + * @throws TypeError if neither operand implements the operation + * @throws Throwable from the implementation of the operation + */ + private static Object binary_op(Object v, Object w, Slot binop) throws TypeError, Throwable { + try { + Object r = binary_op1(v, w, binop); + if (r != Py.NotImplemented) { return r; } + } catch (Slot.EmptyException e) {} + throw operandError(binop, v, w); + } + + /** + * Helper for implementing binary operation. If neither the left + * type nor the right type implements the operation, it will either + * return {@link Py#NotImplemented} or throw {@link EmptyException}. + * Both mean the same thing. + * + * @param v left operand + * @param w right operand + * @param binop operation to apply + * @return result or {@code Py.NotImplemented} + * @throws Slot.EmptyException when an empty slot is invoked + * @throws Throwable from the implementation of the operation + */ + private static Object binary_op1(Object v, Object w, Slot binop) + throws Slot.EmptyException, Throwable { + + Operations vOps = Operations.of(v); + PyType vtype = vOps.type(v); + + Operations wOps = Operations.of(w); + PyType wtype = wOps.type(w); + + MethodHandle slotv, slotw; + + /* + * CPython would also test: (slotw = rbinop.getSlot(wtype)) == slotv + * as an optimisation , but that's never the case since we use + * distinct binop and rbinop slots. + */ + if (wtype == vtype) { + // Same types so only try the binop slot + slotv = binop.getSlot(vOps); + return slotv.invokeExact(v, w); + + } else if (!wtype.isSubTypeOf(vtype)) { + // Ask left (if not empty) then right. + slotv = binop.getSlot(vOps); + if (slotv != BINARY_EMPTY) { + Object r = slotv.invokeExact(v, w); + if (r != Py.NotImplemented) { return r; } + } + slotw = binop.getAltSlot(wOps); + return slotw.invokeExact(w, v); + + } else { + // Right is sub-class: ask first (if not empty). + slotw = binop.getAltSlot(wOps); + if (slotw != BINARY_EMPTY) { + Object r = slotw.invokeExact(w, v); + if (r != Py.NotImplemented) { return r; } + } + slotv = binop.getSlot(vOps); + return slotv.invokeExact(v, w); + } + } + + private static final MethodHandle BINARY_EMPTY = Slot.Signature.BINARY.empty; + + /** + * True iff the object has a slot for conversion to the index type. + * + * @param obj to test + * @return whether {@code obj} has non-empty {@link Slot#op_index} + */ + // Compare CPython PyIndex_Check in abstract.c + static boolean indexCheck(Object obj) { return Slot.op_index.isDefinedFor(Operations.of(obj)); } + + /** + * Return a Python {@code int} (or subclass) from the object + * {@code o}. Raise {@code TypeError} if the result is not a Python + * {@code int} subclass, or if the object {@code o} cannot be + * interpreted as an index (it does not fill {@link Slot#op_index}). + * This method makes no guarantee about the range of the + * result. + * + * @param o operand + * @return {@code o} coerced to a Python {@code int} + * @throws TypeError if {@code o} cannot be interpreted as an + * {@code int} + * @throws Throwable otherwise from invoked implementations + */ + // Compare with CPython abstract.c :: PyNumber_Index + static Object index(Object o) throws TypeError, Throwable { + + Operations ops = Operations.of(o); + Object res; + + if (ops.isIntExact()) + return o; + else { + try { + res = ops.op_index.invokeExact(o); + // Enforce expectations on the return type + Operations resOps = Operations.of(res); + if (resOps.isIntExact()) + return res; + else if (resOps.type(res).isSubTypeOf(PyLong.TYPE)) + return returnDeprecation("__index__", "int", res); + else + throw returnTypeError("__index__", "int", res); + } catch (EmptyException e) { + throw typeError(CANNOT_INTERPRET_AS_INT, o); + } + } + } + + /** + * Returns {@code o} converted to a Java {@code int} if {@code o} + * can be interpreted as an integer. If the call fails, an exception + * is raised, which may be a {@link TypeError} or anything thrown by + * {@code o}'s implementation of {@code __index__}. In the special + * case of {@link OverflowError}, a replacement may be made where + * the message is formulated by this method and the type of + * exception by the caller. (Arcane, but it's what CPython does.) A + * recommended idiom for this is

+     *      int k = Number.asSize(key, IndexError::new);
+     * 
+ * + * @param o the object to convert to an {@code int} + * + * @param exc {@code null} or function of {@code String} returning + * the exception to use for overflow. + * @return {@code int} value of {@code o} + * @throws TypeError if {@code o} cannot be converted to a Python + * {@code int} + * @throws Throwable on other errors + */ + // Compare with CPython abstract.c :: PyNumber_AsSsize_t + static int asSize(Object o, Function exc) throws TypeError, Throwable { + + // Convert to Python int or sub-class. (May raise TypeError.) + Object value = PyNumber.index(o); + + try { + // We're done if PyLong.asSize() returns without error. + return PyLong.asSize(value); + } catch (OverflowError e) { + // Caller may replace overflow with own type of exception + if (exc == null) { + // No handler: default clipping is sufficient. + assert PyType.of(value).isSubTypeOf(PyLong.TYPE); + if (PyLong.signum(value) < 0) + return Integer.MIN_VALUE; + else + return Integer.MAX_VALUE; + } else { + // Throw an exception of the caller's preferred type. + String msg = String.format(CANNOT_FIT, PyType.of(o).getName()); + throw exc.apply(msg); + } + } + } + + /** + * Extract a slice index from a Python {@code int} or an object + * defining {@code __index__}, and return it as a Java {@code int}. + * So that the call need not be guarded by {@code v!=Py.None}, which + * is a common occurrence in the contexts where it is used, we + * special-case {@code None} to return a supplied default value. We + * silently reduce values larger than {@link Integer#MAX_VALUE} to + * {@code Integer.MAX_VALUE}, and silently boost values less than + * {@link Integer#MIN_VALUE} to {@code Integer.MIN_VALUE}. + * + * @param v to convert + * @param defaultValue to return when {@code v==Py.None} + * @return normalised value as a Java {@code int} + * @throws TypeError if {@code v!=None} has no {@code __index__} + * @throws Throwable from the implementation of {@code __index__} + */ + // Compare CPython _PyEval_SliceIndex in eval.c and where called + static int sliceIndex(Object v, int defaultValue) throws TypeError, Throwable { + if (v == Py.None) { + return defaultValue; + } else { + if (PyNumber.indexCheck(v)) { + return asSize(v, null); + } else { + throw new TypeError( + "slice indices must be integers or None or have an __index__ method"); + } + } + } + + /** + * Returns the {@code o} converted to an integer object. This is the + * equivalent of the Python expression {@code int(o)}. It will refer + * to the {@code __int__}, {@code __index_} and {@code __trunc__} + * special methods of {@code o}, in that order, an then (if + * {@code o} is string or bytes-like) attempt a conversion from text + * assuming decimal base. + * + * @param o operand + * @return {@code int(o)} + * @throws TypeError if {@code o} cannot be converted to a Python + * {@code int} + * @throws Throwable on other errors + */ + // Compare with CPython abstract.h :: PyNumber_Long + static Object asLong(Object o) throws TypeError, Throwable { + Object result; + PyType oType = PyType.of(o); + + if (oType == PyLong.TYPE) { + // Fast path for the case that we already have an int. + return o; + } + + else if (Slot.op_int.isDefinedFor(oType)) { + // XXX Need test of intiness and indexiness? + // Normalise away subclasses of int + result = PyLong.fromIntOf(o); + return PyLong.from(result); + } + + else if (Slot.op_index.isDefinedFor(oType)) { + // Normalise away subclasses of int + result = PyLong.fromIndexOrIntOf(o); + return PyLong.from(result); + } + + // XXX Not implemented: else try the __trunc__ method + + if (PyUnicode.TYPE.check(o)) + return PyLong.fromUnicode(o, 10); + + // else if ... support for bytes-like objects + else + throw argumentTypeError("int", 0, "a string, a bytes-like object or a number", o); + } + + private static final String CANNOT_INTERPRET_AS_INT = + "'%.200s' object cannot be interpreted as an integer"; + private static final String CANNOT_FIT = "cannot fit '%.200s' into an index-sized integer"; + + /** + * Throw a {@code TypeError} for the named binary operation, along + * the lines "unsupported operand type(s) for OP: 'V' and 'W'". + * + * @param op operation to report + * @param v left operand (only {@code PyType.of(v)} is used) + * @param w right operand (only {@code PyType.of(w)} is used) + * @return exception to throw + */ + // XXX Possibly move to Slot so may bind early. + static PyException operandError(Slot op, Object v, Object w) { + return new TypeError(UNSUPPORTED_TYPES, op.opName, PyType.of(v).getName(), + PyType.of(w).getName()); + } + + private static final String UNSUPPORTED_TYPES = + "unsupported operand type(s) for %s: '%.100s' and '%.100s'"; +} diff --git a/core/src/main/java/org/python/core/PyObjectUtil.java b/core/src/main/java/org/python/core/PyObjectUtil.java new file mode 100644 index 000000000..3e6cdf68d --- /dev/null +++ b/core/src/main/java/org/python/core/PyObjectUtil.java @@ -0,0 +1,122 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.util.Map; +import java.util.StringJoiner; + +/** + * Miscellaneous static helpers commonly needed to implement Python + * objects in Java. + */ +public class PyObjectUtil { + + private PyObjectUtil() {} // no instances + + /** + * An implementation of {@code dict.__repr__} that may be applied to + * any Java {@code Map} between {@code Object}s, in which keys and + * values are represented as with {@code repr()}. + * + * @param map to be reproduced + * @return a string like {'a': 2, 'b': 3} + * @throws Throwable from the {@code repr()} implementation + */ + static String mapRepr(Map map) throws Throwable { + StringJoiner sj = new StringJoiner(", ", "{", "}"); + for (Map.Entry e : map.entrySet()) { + String key = Abstract.repr(e.getKey()).toString(); + String value = Abstract.repr(e.getValue()).toString(); + sj.add(key + ": " + value); + } + return sj.toString(); + } + + /** + * A string along the lines "T object at 0xhhh", where T is the type + * of {@code o}. This is for creating default {@code __repr__} + * implementations seen around the code base and containing this + * form. By implementing it here, we encapsulate the problem of + * qualified type name and what "address" or "identity" should mean. + * + * @param o the object (not its type) + * @return string denoting {@code o} + */ + static String toAt(Object o) { + // For the time being identity means: + int id = System.identityHashCode(o); + // For the time being type name means: + String typeName = PyType.of(o).name; + return String.format("%s object at %#x", typeName, id); + } + + /** + * Produce a {@code String} name for a function-like object or its + * {@code str()} if it doesn't even have a + * {@code __qualname__}.
+     *     def functionStr(func):
+     *         try:
+     *             qualname = func.__qualname__
+     *         except AttributeError:
+     *             return str(func)
+     *         try:
+     *             module = func.__module__
+     *             if module is not None and mod != 'builtins':
+     *                 return ".".join(module, qualname)
+     *         except AttributeError:
+     *             pass
+     *         return qualname
+     * 
This differs from its CPython counterpart + * {@code _PyObject_FunctionStr} by decisively not adding + * parentheses. + * + * @param func the function + * @return a name for {@code func} + */ + // Compare CPython _PyObject_FunctionStr in object.c + static String functionStr(Object func) { + Object name; + try { + Object qualname = Abstract.lookupAttr(func, "__qualname__"); + if (qualname != null) { + Object module = Abstract.lookupAttr(func, "__module__"); + if (module != null && module != Py.None + && Abstract.richCompareBool("builtins", module, Comparison.NE)) { + name = Callables.callMethod(".", "join", module, qualname); + } + name = qualname; + } else { + name = Abstract.str(func); + } + return PyUnicode.asString(name); + } catch (Throwable e) { + // Unlike CPython fall back on a generic answer + return "function"; + } + } + + /** + * The type of exception thrown when an attempt to convert an object + * to a common data type fails. This type of exception carries no + * stack context, since it is used only as a sort of "alternative + * return value". + */ + public static class NoConversion extends Exception { + private static final long serialVersionUID = 1L; + + private NoConversion() { super(null, null, false, false); } + } + + /** + * A statically allocated {@link NoConversion} used in conversion + * methods to signal "cannot convert". No stack context is preserved + * in the exception. + */ + public static final NoConversion NO_CONVERSION = new NoConversion(); + + /** + * A statically allocated {@link StopIteration} used to signal + * exhaustion of an iterator, but providing no useful stack context. + */ + public static final StopIteration STOP_ITERATION = new StopIteration(); +} diff --git a/core/src/main/java/org/python/core/PySequence.java b/core/src/main/java/org/python/core/PySequence.java new file mode 100644 index 000000000..116ec3a8a --- /dev/null +++ b/core/src/main/java/org/python/core/PySequence.java @@ -0,0 +1,816 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.util.List; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.function.Supplier; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.python.base.MissingFeature; +import org.python.core.PyObjectUtil.NoConversion; +import org.python.core.PySlice.Indices; +import org.python.core.Slot.EmptyException; + +/** + * Abstract API for operations on sequence types, corresponding to + * CPython methods defined in {@code abstract.h} and with names + * like: {@code PySequence_*}. + */ +public class PySequence extends Abstract { + + protected PySequence() {} // only static methods here + + /** + * {@code len(o)} with Python semantics. + * + * @param o object to operate on + * @return {@code len(o)} + * @throws Throwable from invoked method implementations + */ + // Compare CPython PyObject_Size in abstract.c + public static int size(Object o) throws Throwable { + // Note that the slot is called op_len but this method, size. + try { + return (int)Operations.of(o).op_len.invokeExact(o); + } catch (Slot.EmptyException e) { + throw typeError(HAS_NO_LEN, o); + } + } + + /** + * {@code o * count} with Python semantics. + * + * @param o object to operate on + * @param count number of repetitions + * @return {@code o*count} + * @throws Throwable from invoked method implementations + */ + // Compare CPython PySequence_Repeat in abstract.c + public static Object repeat(Object o, int count) throws Throwable { + // There is no equivalent slot to sq_repeat + return PyNumber.multiply(o, count); + } + + /** + * {@code v + w} for sequences with Python semantics. + * + * @param v first object to concatenate + * @param w second object to concatenate + * @return {@code v + w} + * @throws Throwable from invoked method implementations + */ + // Compare CPython PySequence_Concat in abstract.c + public static Object concat(Object v, Object w) throws Throwable { + // There is no equivalent slot to sq_concat + return PyNumber.add(v, w); + } + + /** + * {@code o[key]} with Python semantics, where {@code o} may be a + * mapping or a sequence. + * + * @param o object to operate on + * @param key index + * @return {@code o[key]} + * @throws TypeError when {@code o} does not allow subscripting + * @throws Throwable from invoked method implementations + */ + // Compare CPython PyObject_GetItem in abstract.c + public static Object getItem(Object o, Object key) throws Throwable { + // Decisions are based on types of o and key + try { + Operations ops = Operations.of(o); + return ops.op_getitem.invokeExact(o, key); + } catch (EmptyException e) { + throw typeError(NOT_SUBSCRIPTABLE, o); + } + } + + /** + * {@code o[i1:12]} with Python semantics, where {@code o} must be a + * sequence. Receiving objects will normally interpret indices as + * end-relative, and bounded to the sequence length. + * + * @param o sequence to operate on + * @param i1 index of first item in slice + * @param i2 index of first item not in slice + * @return {@code o[i1:i2]} + * @throws TypeError when {@code o} does not allow subscripting + * @throws Throwable from invoked method implementations + */ + // Compare CPython PyObject_GetItem in abstract.c + static Object getSlice(Object o, int i1, int i2) throws Throwable { + // Decisions are based on type of o and known type of key + try { + Object key = new PySlice(i1, i2); + Operations ops = Operations.of(o); + return ops.op_getitem.invokeExact(o, key); + } catch (EmptyException e) { + throw typeError(NOT_SLICEABLE, o); + } + } + + /** + * {@code o[key] = value} with Python semantics, where {@code o} may + * be a mapping or a sequence. + * + * @param o object to operate on + * @param key index + * @param value to put at index + * @throws TypeError when {@code o} does not allow subscripting + * @throws Throwable from invoked method implementations + */ + // Compare CPython PyObject_SetItem in abstract.c + static void setItem(Object o, Object key, Object value) throws Throwable { + // Decisions are based on types of o and key + Operations ops = Operations.of(o); + try { + ops.op_setitem.invokeExact(o, key, value); + return; + } catch (EmptyException e) { + throw typeError(DOES_NOT_SUPPORT_ITEM, o, "assignment"); + } + } + + /** + * {@code del o[key]} with Python semantics, where {@code o} may be + * a mapping or a sequence. + * + * @param o object to operate on + * @param key index at which to delete element + * @throws TypeError when {@code o} does not allow subscripting + * @throws Throwable from invoked method implementations + */ + // Compare CPython PyObject_DelItem in abstract.c + static void delItem(Object o, Object key) throws Throwable { + // Decisions are based on types of o and key + Operations ops = Operations.of(o); + try { + ops.op_delitem.invokeExact(o, key); + return; + } catch (EmptyException e) { + throw typeError(DOES_NOT_SUPPORT_ITEM, o, "deletion"); + } + } + + /** + * Return the sequence or iterable {@code o} as a Java {@code List}. + * If {@code o} is one of several built-in types that implement Java + * {@code List}, this will be the object itself. Otherwise, + * it will be a copy in a Java list that supports efficient random + * access. + *

+ * If the object is not a Python sequence (defines + * {@code __getitem__}) or Python iterable (defines + * {@code __iter__}), call {@code exc} to raise an exception + * (typically a {@link TypeError}). + * + * @param the type of exception to throw + * @param o to present as a list + * @param exc a supplier (e.g. lambda expression) for the exception + * @return the iterable or its contents as a list + * @throws E to throw if an iterator cannot be formed + * @throws Throwable from the implementation of {@code o}. + */ + // Compare CPython PySequence_Fast in abstract.c + static List fastList(Object o, Supplier exc) + throws E, Throwable { + + if (PyList.TYPE.checkExact(o)) { + return (PyList)o; + + } else if (PyTuple.TYPE.checkExact(o)) { + return (PyTuple)o; + + } else { + // Not one of the ready-made lists + throw new MissingFeature("fastList() from iterable or sequence"); + } + } + + // Convenience functions constructing errors ---------------------- + + protected static final String HAS_NO_LEN = "object of type '%.200s' has no len()"; + private static final String NOT_SUBSCRIPTABLE = "'%.200s' object is not subscriptable"; + private static final String NOT_SLICEABLE = "'%.200s' object is unsliceable"; + protected static final String DOES_NOT_SUPPORT_ITEM = + "'%.200s' object does not support item %s"; + + // Classes supporting implementations of sequence types ----------- + + /** + * In the implementation of sequence types, it is useful to be able + * to create iterables and streams from their content. This + * interface provides a standardised API. Several {@link Delegate} + * implementations in the core also provide this interface. + * + * @param the type of element returned by the iterators + */ + public static interface Of extends Iterable { + + /** + * The length of this sequence. + * + * @return the length of this sequence + */ + int length(); + + /** + * Get an item from the sequence at a given index {@code i}. + * + * @param i index + * @return item at index {@code i}. + */ + E get(int i); + + /** + * {@inheritDoc} The characteristics {@code SIZED} and + * {@code SUBSIZED} are additionally reported. + */ + @Override + default Spliterator spliterator() { + return Spliterators.spliterator(iterator(), length(), 0); + } + + /** + * @return the elements of this sequence as a {@code Stream} + */ + default Stream asStream() { return StreamSupport.stream(spliterator(), false); } + } + + /** + * A specialisation of {@link Of PySequence.Of<Integer>} where + * the elements may be consumed as primitive {@code int}. + */ + public static interface OfInt extends Of { + + /** + * Get the int item from the sequence at a given index {@code i}. + * + * @param i index + * @return item at index {@code i}. + */ + int getInt(int i); + + @Override + default Integer get(int i) { return getInt(i); } + + @Override + Spliterator.OfInt spliterator(); + + /** + * Provide a stream specialised to primitive {@code int}. + * + * @return a stream of primitive {@code int} + */ + IntStream asIntStream(); + + /** + * {@inheritDoc} + * + * @implNote The default implementation is the stream of values from + * {@link #asIntStream()}, boxed to {@code Integer}. Consumers + * that are able, will obtain improved efficiency by preferring + * {@link #asIntStream()} and specialising intermediate + * processing to {@code int}. + */ + @Override + default Stream asStream() { return asIntStream().boxed(); } + } + + /** + * This is a helper class for implementations of sequence types. A + * client sequence implementation may hold an instance of a + * sub-class of {@code Delegate}, to which it delegates certain + * operations. This sub-class could be an inner class with access to + * private members and methods of the client. + *

+ * {@code Delegate} declares abstract or overridable methods + * representing elementary operations on the client sequence (to + * get, set or delete an element or slice, or to enquire its length + * or type). It offers methods based on these that are usable + * implementations of the Python special methods + * {@code __getitem__}, {@code __setitem__}, {@code __delitem__}, + * {@code __add__} and {@code __mul__}. It provides the boiler-plate + * that tends to be the same from one Python type to another – + * recognition that an index is a slice, end-relative addressing, + * index type and range checks, and the raising of index-related + * Python exceptions. (For examples of this similarity, compare the + * CPython implementation of {@code list_subscript} with that of + * {@code bytes_subscript} or any other {@code *_subscript} method.) + *

+ * The client must override abstract methods declared here, in the + * delegate sub-class it defines, to specialise the behaviour. A + * sub-class supporting a mutable sequence type must additionally + * override {@link #setItem(int, Object)}, + * {@link #setSlice(Indices, Object)} and + * {@link #delSlice(Indices)}. It may also override + * {@link #delItem(int)}, or rely on the default implementation + * using {@code delSlice}. + * + * @param the element type returned by {@code iterator().next()} + * @param the slice type, and return type of + * {@link #getSlice(Indices)} etc.. + */ + /* + * This has been adapted from Jython 2 SequenceIndexDelegate and + * documented. + */ + static abstract class Delegate implements Of, Comparable> { + + /** + * Returns the length of the client sequence from the perspective of + * indexing and slicing operations. + * + * @return the length of the client sequence + */ + @Override + public abstract int length(); + + /** + * Provide the type of client sequence, primarily for use in error + * messages e.g. "TYPE index out of bounds". + * + * @implNote This can simply return a constant characteristic of the + * the implementing class, the Python type implements or + * supports. E.g the adaptor for a Java String returns + * {@code PyUnicode.TYPE} which is {@code str}. + * + * @return the type of client sequence + */ + public abstract PyType getType(); + + /** + * Return the name of the Python type of the client sequence. This + * is used in exception messages generated here. By default this is + * {@code getType().getName()}, which is normally correct, but + * Python {@code str} likes to call itself "string", exceptionally. + * + * @return the name of Python type being served + */ + public String getTypeName() { return getType().getName(); } + + /** + * Inner implementation of {@code __getitem__}, called by + * {@link #__getitem__(Object)} when its argument is an integer. The + * argument is the equivalent {@code int}, adjusted and checked by + * {@link #adjustGet(int)}. + * + * @param i index of item to return + * @return the element from the client sequence + * @throws Throwable from accessing the client data + */ + public abstract Object getItem(int i) throws Throwable; + + /** + * Inner implementation of {@code __getitem__}, called by + * {@link #__getitem__(Object)} when its argument is a + * {@link PySlice}. The argument is the return from + * {@link PySlice#getIndices(int)}, which is guaranteed to be + * range-compatible with the sequence length {@link #length()}. + * + * @param slice containing [start, stop, step, count] of the slice + * to return + * @return the slice from the client sequence + * @throws Throwable from errors other than indexing + */ + public abstract S getSlice(PySlice.Indices slice) throws Throwable; + + /** + * Inner implementation of {@code __setitem__}, called by + * {@link #__setitem__(Object,Object)} when its argument is an + * integer. The argument is the equivalent {@code int}, adjusted and + * checked by {@link #adjustSet(int)}. + *

+ * In mutable types, override this to assign a value to the given + * element of the client sequence. The default implementation (for + * immutable types) does nothing. + * + * @param i index of item to set + * @param value to set at {@code i} + * @throws Throwable from accessing the client data + */ + public void setItem(int i, Object value) throws Throwable {}; + + /** + * Inner implementation of {@code __setitem__}, called by + * {@link #__setitem__(Object,Object)} when its argument is a + * {@link PySlice}. The argument is the return from + * {@link PySlice#getIndices(int)}, which is guaranteed to be + * range-compatible with the sequence length {@link #length()}. + *

+ * In mutable types, override this to assign a value to the given + * slice of the client sequence. The default implementation (for + * immutable types) does nothing. + * + * @param slice to assign in the client sequence + * @param value to assign + * @throws Throwable from errors other than indexing + */ + public void setSlice(PySlice.Indices slice, Object value) throws Throwable {}; + + /** + * Inner implementation of {@code __delitem__}, called by + * {@link #__setitem__(Object,Object)} when its argument is an + * integer. The argument is the equivalent {@code int}, adjusted and + * checked by {@link #adjustSet(int)}. + *

+ * The default implementation deletes a slice {@code [i:i+1]} using + * {@link #delSlice(Indices)}. + * + * @param i index of item to delete + * @throws Throwable from errors other than indexing + */ + public void delItem(int i) throws Throwable { + PySlice s = new PySlice(i, i + 1); + delSlice(s.new Indices(length())); + } + + /** + * Inner implementation of {@code __delitem__}, called by + * {@link #__delitem__(Object)} when its argument is a + * {@link PySlice}. The argument is the return from + * {@link PySlice#getIndices(int)}, which is guaranteed to be + * range-compatible with the sequence length {@link #length()}. + *

+ * In mutable types, override this to delete the given slice of the + * client sequence. The default implementation (for immutable types) + * does nothing. + * + * @param slice containing [start, stop, step, count] of the slice + * to delete + * @throws Throwable from accessing the client data + */ + public void delSlice(PySlice.Indices slice) throws Throwable {} + + /** + * Inner implementation of {@code __add__} on the client sequence, + * called by {@link #__add__(Object)}. + *

+ * The implementation of this method is responsible for validating + * the argument. If an {@code __add__} is being attempted between + * incompatible types it should return {@link Py#NotImplemented}, or + * throw a {@link NoConversion} exception, which will cause + * {@code __add__} to return {@code NotImplemented}. + * + * @param ow the right operand + * @return concatenation {@code self+ow} or + * {@link Py#NotImplemented} + * @throws OutOfMemoryError when allocating the result fails. + * {@link #__add__(Object) __add__} will raise a Python + * {@code OverflowError}. + * @throws NoConversion (optionally) when the client does not + * support the type of {@code ow}. + * @throws Throwable from other causes in the implementation + */ + abstract Object add(Object ow) throws OutOfMemoryError, NoConversion, Throwable; + + /** + * Inner implementation of {@code __radd__} on the client sequence, + * called by {@link #__radd__(Object)}. + *

+ * The implementation of this method is responsible for validating + * the argument. If an {@code __radd__} is being attempted between + * incompatible types it should return {@link Py#NotImplemented}, or + * throw a {@link NoConversion} exception, which will cause + * {@code __radd__} to return {@code NotImplemented}. + * + * @param ov the left operand + * @return concatenation {@code ov+self} or + * {@link Py#NotImplemented} + * @throws OutOfMemoryError when allocating the result fails. + * {@link #__radd__(Object) __radd__} will raise a Python + * {@code OverflowError}. + * @throws NoConversion (optionally) when the client does not + * support the type of {@code ov}. + * @throws Throwable from other causes in the implementation + */ + abstract Object radd(Object ov) throws OutOfMemoryError, NoConversion, Throwable; + + /** + * Inner implementation of {@code __mul__} on the client sequence, + * called by {@link #__mul__(Object)}. + * + * @param n the number of repetitions + * @return repetition {@code self*n} + * @throws OutOfMemoryError when allocating the result fails. + * {@link #__mul__(Object) __mul__} will raise a Python + * {@code OverflowError}. + * @throws Throwable from other causes in the implementation + */ + abstract S repeat(int n) throws OutOfMemoryError, Throwable; + + /** + * Implementation of {@code __getitem__}. Get either an element or a + * slice of the client sequence, after checks, by calling either + * {@link #getItem(int)} or {@link #getSlice(Indices)}. + * + * @param item (or slice) to get from in the client + * @return the element or slice + * @throws ValueError if {@code slice.step==0} + * @throws TypeError from bad slice index types + * @throws Throwable from errors other than indexing + */ + public Object __getitem__(Object item) throws TypeError, Throwable { + if (PyNumber.indexCheck(item)) { + int i = PyNumber.asSize(item, IndexError::new); + return getItem(adjustGet(i)); + } else if (item instanceof PySlice) { + Indices slice = ((PySlice)item).new Indices(length()); + return getSlice(slice); + } else { + throw Abstract.indexTypeError(this, item); + } + } + + /** + * Implementation of {@code __setitem__}. Assign a value to either + * an element or a slice of the client sequence, after checks, by + * calling either {@link #setItem(int, Object)} or + * {@link #setSlice(Indices, Object)}. + * + * @param item (or slice) to assign in the client + * @param value to assign + * @throws ValueError if {@code slice.step==0} or + * {@code slice.step!=1} (an "extended" slice) and {@code value} + * is the wrong length. + * @throws TypeError from bad slice index types + * @throws Throwable from errors other than indexing + */ + public void __setitem__(Object item, Object value) throws TypeError, Throwable { + if (PyNumber.indexCheck(item)) { + int i = PyNumber.asSize(item, IndexError::new); + setItem(adjustSet(i), value); + } else if (item instanceof PySlice) { + Indices slice = ((PySlice)item).new Indices(length()); + setSlice(slice, value); + } else { + throw Abstract.indexTypeError(this, item); + } + } + + /** + * Implementation of {@code __delitem__}. Delete either an element + * or a slice of the client sequence, after checks, by calling + * either {@link #delItem(int)} or {@link #delSlice(Indices)}. + * + * @param item (or slice) to delete in the client + * @throws ValueError if {@code slice.step==0} or value is the wrong + * length in an extended slice ({@code slice.step!=1} + * @throws TypeError from bad slice index types + * @throws Throwable from errors other than indexing + */ + public void __delitem__(Object item) throws TypeError, Throwable { + if (PyNumber.indexCheck(item)) { + int i = PyNumber.asSize(item, IndexError::new); + delItem(adjustSet(i)); + } else if (item instanceof PySlice) { + Indices slice = ((PySlice)item).new Indices(length()); + delSlice(slice); + } else { + throw Abstract.indexTypeError(this, item); + } + } + + /** + * Implementation of {@code __add__} (concatenation) by calling + * {@link #add(Object)}. + *

+ * The wrapper attempts no conversion of the argument, but it will + * catch {@link NoConversion} exceptions from {@link #add(Object)}, + * to return {@code NotImplemented}. It will also catch Java + * {@code OutOfMemoryError} and convert it to a Python + * {@link OverflowError}. + * + * @param w right operand + * @return {@code self+w} or {@code NotImplemented} + * @throws OverflowError when cannot allocate space + * @throws Throwable from other causes in the implementation. + */ + Object __add__(Object w) throws OverflowError, Throwable { + try { + return add(w); + } catch (OutOfMemoryError e) { + throw concatOverflow(); + } catch (NoConversion e) { + /* + * Since we do not implement __concat__ separate from __add_, unlike + * CPython, we do not yet know that Object w has no __radd__, and + * cannot produce the TypeError "can only concatenate S to S". + * Instead, Abstract.add will produce a TypeError about + * "unsupported operand types" for '+'. + */ + return Py.NotImplemented; + } + } + + /** + * Implementation of {@code __radd__} (reflected concatenation) by + * calling {@link #radd(Object)}. + *

+ * The wrapper attempts no conversion of the argument, but it will + * catch {@link NoConversion} exceptions from {@link #radd(Object)}, + * to return {@code NotImplemented}. It will also catch Java + * {@code OutOfMemoryError} and convert it to a Python + * {@link OverflowError}. + * + * @param v left operand + * @return {@code v+self} or {@code NotImplemented} + * @throws OverflowError when cannot allocate space + * @throws Throwable from other causes in the implementation. + */ + Object __radd__(Object v) throws OverflowError, Throwable { + try { + return radd(v); + } catch (OutOfMemoryError e) { + throw concatOverflow(); + } catch (NoConversion e) { + /* + * See comment in __add__, noting that sometimes __radd__ is called + * before v.__add__. + */ + return Py.NotImplemented; + } + } + + /** + * Implementation of {@code __mul__} (repetition) and + * {@code __rmul__} by calling {@link #repeat(int)}. + *

+ * The wrapper attempts conversion of the argument to {@code int}, + * and if this cannot be achieved, it will return + * {@code NotImplemented}. It will also catch Java + * {@code OutOfMemoryError} and convert it to a Python + * {@link OverflowError}. + * + * @param n number of repetitions in result + * @return {@code self*n} or {@code NotImplemented} + * @throws OverflowError when {@code n} over-size or cannot allocate + * space + * @throws TypeError if {@code n} has no {@code __index__} + * @throws Throwable from implementation of {@code __index__}, or + * other causes in the implementation. + */ + Object __mul__(Object n) throws TypeError, Throwable { + if (PyNumber.indexCheck(n)) { + int count = PyNumber.asSize(n, OverflowError::new); + try { + return repeat(count); + } catch (OutOfMemoryError e) { + throw repeatOverflow(); + } + } else { + /* + * Since we do not implement __repeat__ separate from __mul_, unlike + * CPython, we do not yet know that Object n has no __rmul__, so we + * cannot produce the TypeError + * "can't multiply sequence by non-int". Instead, Abstract.multiply + * will produce a TypeError about "unsupported operand types" for + * '*'. + */ + return Py.NotImplemented; + } + } + + /** + * Implementation of the {@code index} method of sequences. Find the + * index, in the given range, of an element equal to the argument. + * or a slice of the client sequence, after checks, by calling + * either {@link #getItem(int)} or {@link #getSlice(Indices)}. + * + * @param v value to match in the client + * @param start index of first element in range + * @param stop index of first element not in range + * @return the index at which found + * @throws ValueError if {@code v} not found + * @throws TypeError from bad {@code start} and {@code stop} types + * @throws Throwable from errors other than indexing + */ + public int index(Object v, Object start, Object stop) + throws TypeError, ValueError, Throwable { + int iStart = boundedIndex(start, 0); + int iStop = boundedIndex(stop, length()); + /* + * Note it is possible for iStart to be length(), but then + * iStop<=iStart so the loop doesn't run. + */ + for (int i = iStart; i < iStop; i++) { + if (Abstract.richCompareBool(v, getItem(i), Comparison.EQ)) { return i; } + } + throw new ValueError("%s.index(x): x not in %s", getType().name, getTypeName()); + } + + /** + * Check that an index {@code i} satisfies + * 0≤i<{@link #length()}. If the original index is negative, + * treat it as end-relative by first adding {@link #length()}. + * + * @param i to check is valid index + * @return range-checked {@code i} + * @throws IndexError if {@code i} out of range + */ + protected int adjustGet(int i) { + final int L = length(); + if (i < 0) { + i += L; + if (i >= 0) { return i; } + } else if (i < L) { return i; } + throw rangeIndexError(""); + } + + /** + * Check that an index {@code i} satisfies + * 0≤i<{@link #length()}. If the original index is negative, + * treat it as end-relative by first adding {@link #length()}. This + * differs from {@link #adjustGet(int)} only in that the message + * produced mentions "assignment". + * + * @param i to check is valid index + * @return range-checked {@code i} + * @throws IndexError if {@code i} out of range + */ + protected int adjustSet(int i) throws IndexError { + final int L = length(); + if (i < 0) { + i += L; + if (i >= 0) { return i; } + } else if (i < L) { return i; } + throw rangeIndexError("assignment"); + } + + /** + * Creates an {@link IndexError} with the message + * "{@link #getTypeName()} KIND index out of range", e.g. "list + * assignment index out of range". + * + * @param kind word to insert for KIND: "" or "assignment". + * @return an exception to throw + */ + final protected IndexError rangeIndexError(String kind) { + String space = kind.length() > 0 ? " " : ""; + return new IndexError("%s%s%s index out of range", getTypeName(), space, kind); + } + + /** + * An overflow error with the message "concatenated + * {@link #getTypeName()} is too long", involving the type name of + * the client sequence type. + * + * @param seq the sequence operated on + * @return an exception to throw + */ + private OverflowError concatOverflow() { + return new OverflowError("concatenated %s is too long", getTypeName()); + } + + /** + * An overflow error with the message "repeated S is too long", + * where S is the type mane of the argument. + * + * @return an exception to throw + */ + private OverflowError repeatOverflow() { + return new OverflowError("repeated %s is too long", getTypeName()); + } + + /** + * Accept an object index (or {@code null}), treating negative + * values as end-relative, and bound it to the sequence range. The + * index object must be convertible by + * {@link PyNumber#asSize(Object, java.util.function.Function) + * PyNumber.asSize}. Unlike {@link #adjustGet(int)}, is not an error + * for the index value to fall outside the valid range. (It is + * simply clipped to the nearer end.) + * + * @param index purported index (or {@code null}) + * @param defaultValue to use if {@code index==null} + * @return converted index + * @throws TypeError from bad {@code index} type + * @throws Throwable from other conversion errors + */ + protected int boundedIndex(Object index, int defaultValue) throws TypeError, Throwable { + + // Convert the argument (or raise a TypeError) + int i, L = length(); + if (index == null) { + i = defaultValue; + } else if (PyNumber.indexCheck(index)) { + i = PyNumber.asSize(index, IndexError::new); + } else { + throw Abstract.indexTypeError(this, index); + } + + // Bound the now integer index to the sequence + if (i < 0) { + i += L; + return Math.max(0, i); + } else { + return Math.min(L, i); + } + } + } +} diff --git a/core/src/main/java/org/python/core/PySlice.java b/core/src/main/java/org/python/core/PySlice.java new file mode 100644 index 000000000..b2153f0c8 --- /dev/null +++ b/core/src/main/java/org/python/core/PySlice.java @@ -0,0 +1,290 @@ +// Copyright (c)2022 Jython Developers. +// Copyright (c) Corporation for National Research Initiatives +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; + +import org.python.core.Exposed.Member; +import org.python.core.PyType.Flag; + +/** + * The Python {@code slice} object. + */ +public class PySlice extends AbstractPyObject { + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("slice", MethodHandles.lookup()).flagNot(Flag.BASETYPE)); + + @Member + final private Object start; + + @Member + final private Object stop; + + @Member + final private Object step; + + /** + * Create a Python {@code slice} from {@code object} arguments. + * + * @param start index or {@code null} (for {@code None}). + * @param stop index or {@code null} (for {@code None}). + * @param step or {@code null} (for {@code None}). + */ + public PySlice(Object start, Object stop, Object step) { + super(TYPE); + this.start = start != null ? start : Py.None; + this.stop = stop != null ? stop : Py.None; + this.step = step != null ? step : Py.None; + } + + /** + * Create a Python {@code slice} from two {@code object} arguments. + * The step is implicitly {@code None}. + * + * @param start index or {@code null} (for {@code None}). + * @param stop index or {@code null} (for {@code None}). + */ + public PySlice(Object start, Object stop) { this(start, stop, null); } + + /** + * Create a Python {@code slice} from Java {@code int} arguments. + * + * @param start index of first item in slice. + * @param stop index of first item not in slice. + */ + // Compare CPython _PySlice_FromIndices in sliceobject.c + public PySlice(int start, int stop) { this(start, stop, Py.None); } + + // @formatter:off + /* + @ExposedNew + static PyObject slice_new(PyNewWrapper new_, boolean init, PyType subtype, PyObject[] args, + String[] keywords) { + if (args.length == 0) { + throw new TypeError("slice expected at least 1 arguments, got " + args.length); + } else if (args.length > 3) { + throw new TypeError("slice expected at most 3 arguments, got " + args.length); + } + ArgParser ap = new ArgParser("slice", args, keywords, "start", "stop", "step"); + PySlice slice = new PySlice(); + if (args.length == 1) { + slice.stop = ap.getPyObject(0); + } else if (args.length == 2) { + slice.start = ap.getPyObject(0); + slice.stop = ap.getPyObject(1); + } else if (args.length == 3) { + slice.start = ap.getPyObject(0); + slice.stop = ap.getPyObject(1); + slice.step = ap.getPyObject(2); + } + return slice; + } + */ + // @formatter:on + + @SuppressWarnings("unused") + private Object __eq__(Object o) throws Throwable { + return this == o ? true : compare(o, Comparison.EQ); + } + + @SuppressWarnings("unused") + private Object __ne__(Object o) throws Throwable { + return this == o ? false : compare(o, Comparison.NE); + } + + /* + * @ExposedMethod(doc = BuiltinDocs.slice_indices_doc) + */ + final Object indices(Object length) throws Throwable { + Indices indices = new Indices(PyNumber.asSize(length, OverflowError::new)); + return Py.tuple(indices.start, indices.stop, indices.step); + } + + /** + * Calculate the actual indices of this slice in relation to a + * sequence of length {@code length}, reporting the effective start, + * stop, and step, and the number of elements in the slice. + * + * @param length of the sequence + * @return an {@link Indices} from this slice and the length + * @throws TypeError if any index has no {@code __index__} + * @throws Throwable from implementation of {@code __index__} + */ + // Compare CPython PySlice_GetIndicesEx in sliceobject.c + Indices getIndices(int length) throws TypeError, Throwable { return new Indices(length); } + + @SuppressWarnings("unused") + private Object __repr__() { return String.format("slice(%s, %s, %s)", start, stop, step); } + + /* + * @ExposedMethod + */ + final Object __reduce__() { return Py.tuple(TYPE, Py.tuple(start, stop, step)); } + + /** + * An object that presents the {@code start}, {@code stop} and + * {@code step} data members from this {@code slice} object as Java + * {@code int}s in an immutable data object, adjusted to a specific + * length of a notional source sequence (see + * {@link Indices#Indices(int)}). + *

+ * End-relative addressing (as in {@code a[-3:-1]}) and {@code None} + * indices (as in {@code a[:]}) have been translated in construction + * to absolute indices a client may use without further range + * checks. + */ + // Compare CPython PySlice_GetIndicesEx in sliceobject.c + public class Indices { + private static final int MIN = Integer.MIN_VALUE; + private static final int MAX = Integer.MAX_VALUE; + /** + * Absolute index in the source sequence of the first element to be + * taken by the slice. If {@link #slicelength}{@code != 0}, this + * index lies within the bounds of the sequence. + */ + public final int start; + /** + * Absolute index relative to the source sequence that is the image + * of {@link PySlice#stop}. Dealing correctly with a step size other + * than one is difficult. Clients should normally choose + * {@link #slicelength}, to decide how many elements to take from + * the sequence, rather than use {@code stop} to decide when to + * stop. + */ + public final int stop; + /** + * The index step to make when selecting elements from the source + * sequence. Never zero. + */ + public final int step; + /** + * The number of elements to select from the source sequence, and + * therefore the length of the slice to be generated. + */ + public final int slicelength; + + /** + * Extract the {@code start}, {@code stop} and {@code step} data + * members from the parent {@code slice} as Java {@code int}s in an + * instance of {@code Indices}, then adjust {@code start} and + * {@code stop} assuming they apply to a sequence of the specified + * {@code length}. Store in {@code slicelength} the number of + * elements the parent slice will take from that sequence. + *

+ * Out of bounds indices are clipped in a manner consistent with the + * handling of normal slices. The idiom:

+         * Indices x = slice.new Indices(a.length);
+         * for (int k=0; k<x.slicelength; k++) {
+         *     f(a[x.start + k*x.step]);
+         * }
+         * 
will access only in in-range elements. + *

+ * Detail: Before adjusting to the specific sequence length, + * the following occurs. Extract the {@code start}, {@code stop} and + * {@code step} data members from the parent {@code slice} and + * convert them to Java {@code int}s using their {@code __index__} + * methods, or mapping {@code None} to conventional values. Silently + * reduce values larger than {@code Integer.MAX_VALUE} to {@code + Integer.MAX_VALUE}. Silently boost {@code start} and {@code stop} + * values less than {@code Integer.MIN_VALUE} to {@code + Integer.MIN_VALUE}. And silently boost {@code step} values less + * than {@code -Integer.MAX_VALUE} to {@code -Integer.MAX_VALUE}. + * + * @param length of the target sequence + * @throws TypeError if any index not {@code None} has no + * {@code __index__} + * @throws ValueError if {@code step==0} + * @throws Throwable from the implementation of {@code __index__} + */ + // Compare CPython PySlice_Unpack in sliceobject.c + // Compare CPython PySlice_AdjustIndices in sliceobject.c + public Indices(int length) throws TypeError, ValueError, Throwable { + + // Counterparts while we think about final values. + int start0, stop0, step0; + + // Bound and validate the step. + step0 = PyNumber.sliceIndex(PySlice.this.step, 1); + if (step0 == 0) + throw new ValueError("slice step cannot be zero"); + /* + * Here step0 might be MIN = -MAX-1; in this case we replace it with + * -MAX. This doesn't affect the semantics, and it guards against + * later undefined behaviour resulting from code that does + * "step = -step" as part of a slice reversal. + */ + step = Math.max(step0, -MAX); + + if (step > 0) { + // The start, stop while ignoring the sequence length. + start0 = PyNumber.sliceIndex(PySlice.this.start, 0); + stop0 = PyNumber.sliceIndex(PySlice.this.stop, MAX); + + // Now adjust to the actual sequence length + + if (start0 < 0) + start = Math.max(start0 + length, 0); + else + start = Math.min(start0, length); + + if (stop0 < 0) + stop = Math.max(stop0 + length, start); + else + stop = Math.min(Math.max(stop0, start), length); + + assert stop >= start; + slicelength = (stop - start + step - 1) / step; + + } else { + // The start, stop while ignoring the sequence length. + start0 = PyNumber.sliceIndex(PySlice.this.start, MAX); + stop0 = PyNumber.sliceIndex(PySlice.this.stop, MIN); + + // Now adjust to the actual sequence length + + if (start0 < 0) + start = Math.max(start0 + length, -1); + else + start = Math.min(start0, length - 1); + + if (stop0 < 0) + stop = Math.max(stop0 + length, -1); + else + stop = Math.min(stop0, start); + + assert stop <= start; + slicelength = (start - stop - step - 1) / (-step); + } + } + + @Override + public String toString() { + return String.format("[%d:%d:%d] len= %d", start, stop, step, slicelength); + } + } + + // Plumbing ------------------------------------------------------- + + /** + * Invoke the comparison specified (supports {@code __eq__} and + * {@code __ne__}). + * + * @param o must be a slice or return {@code NotImplemented} + * @param op {@link Comparison#EQ} or {@link Comparison#NE} + * @return result of comparison or {@code NotImplemented} + * @throws Throwable from element comparison + */ + private Object compare(Object o, Comparison op) throws Throwable { + if (TYPE.checkExact(o)) { + // Compare the slices as if they were tuples + PySlice s = (PySlice)o; + return Abstract.richCompare(Py.tuple(start, stop, step), + Py.tuple(s.start, s.stop, s.step), op); + } else { + return Py.NotImplemented; + } + } +} diff --git a/core/src/main/java/org/python/core/PyTuple.java b/core/src/main/java/org/python/core/PyTuple.java new file mode 100644 index 000000000..1ef33a1c1 --- /dev/null +++ b/core/src/main/java/org/python/core/PyTuple.java @@ -0,0 +1,747 @@ +// Copyright (c)2023 Jython Developers. +// Copyright (c) Corporation for National Research Initiatives +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.util.AbstractList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.ListIterator; +import java.util.NoSuchElementException; +import java.util.StringJoiner; +import java.util.stream.Stream; + +import org.python.base.InterpreterError; +import org.python.core.PyObjectUtil.NoConversion; +import org.python.core.PySlice.Indices; +import org.python.core.PyType.Spec; + +/** The Python {@code tuple} object. */ +public class PyTuple extends AbstractList implements CraftedPyObject { + + /** The Python type object for {@code tuple}. */ + public static final PyType TYPE = PyType.fromSpec( // + new Spec("tuple", MethodHandles.lookup())); + + /** The Python type of this instance. */ + protected final PyType type; + + /** The elements of the {@code tuple}. */ + final Object[] value; + + /** Implementation help for sequence methods. */ + private TupleDelegate delegate = new TupleDelegate(); + + /** + * Potentially unsafe constructor, capable of creating a + * "{@code tuple} view" of an array, or a copy. We make a copy (the + * safe option) if the caller is not prepared to promise + * not to modify the array. The arguments begin with a + * claimed element type for the array, or the element type of the + * array to create. + * + * @param type sub-type for which this is being created + * @param cls class of elements + * @param iPromiseNotToModifyTheArray if {@code true} try to re-use + * the array, otherwise make a copy. + * @param value of the tuple + * @throws ArrayStoreException if any element of {@code value} is + * not assignment compatible with {@code Object}. Caller would + * have to have cast {@code value} to avoid static checks. + */ + // @SuppressWarnings("unchecked") + private PyTuple(PyType type, boolean iPromiseNotToModifyTheArray, E[] value) + throws ArrayStoreException { + this.type = type; + if (iPromiseNotToModifyTheArray) { + this.value = value; + } else { + // We make a new array . + int n = value.length; + this.value = new Object[n]; + System.arraycopy(value, 0, this.value, 0, n); + } + } + + /** + * Unsafely wrap an array of {@code Object} in a "tuple view". + *

+ * The method is unsafe insofar as the array becomes embedded as the + * value of the tuple. The client therefore promises not to + * modify the content. For this reason, this method should only + * ever be private. If you feel tempted to make it otherwise, + * consider using (or improving) {@link Builder}. + * + * @param component type of the array in the new tuple + * @param value of the new tuple or {@code null} + * @return a tuple with the given contents or {@link #EMPTY} + */ + private static PyTuple wrap(E[] value) throws ArrayStoreException { + if (value == null) + return EMPTY; + else + return new PyTuple(TYPE, true, value); + } + + /** + * Construct a {@code PyTuple} from an array of {@link Object}s or + * zero or more {@link Object} arguments. The argument is copied for + * use, so it is safe to modify an array passed in. + * + * @param Actual element type + * @param elements of the tuple + */ + @SafeVarargs + public PyTuple(E... elements) { this(TYPE, elements); } + + /** + * As {@link #PyTuple(Object...)} for Python sub-class specifying + * {@link #type}. + * + * @param element type of the {@code tuple} internally + * @param type actual Python sub-class to being created + * @param elements of the tuple + */ + @SafeVarargs + protected PyTuple(PyType type, E... elements) { this(type, false, elements); } + + /** + * Construct a {@code PyTuple} from the elements of a collection. + * + * @param c source of element values for this {@code tuple} + */ + PyTuple(Collection c) { this(TYPE, c); } + + /** + * As {@link #PyTuple(Collection)} for Python sub-class specifying + * {@link #type}. + * + * @param type actual Python sub-class to being created + * @param c elements of the tuple + */ + protected PyTuple(PyType type, Collection c) { + this(type, true, c.toArray(new Object[c.size()])); + } + + /** + * Construct a {@code PyTuple} from the elements of a stream. + * + * @param s source of element values for this {@code tuple} + */ + PyTuple(Stream s) { this(TYPE, true, s.toArray()); } + + /** + * As {@link #PyTuple(Object[], int, int)} for Python sub-class + * specifying {@link #type}. + * + * @param type actual Python type to construct + * @param a source of element values + * @param start first element to include + * @param count number of elements to take + */ + protected PyTuple(PyType type, Object a[], int start, int count) { + this.type = type; + // We make a new array. + this.value = new Object[count]; + System.arraycopy(a, start, this.value, 0, count); + } + + /** + * Construct a {@code PyTuple} from an array of {@link Object}s or + * zero or more {@link Object} arguments provided as a slice of an + * array. The argument is copied for use, so it is safe to modify an + * array passed in. + * + * @param a source of element values + * @param start first element to include + * @param count number of elements to take + */ + PyTuple(Object a[], int start, int count) { this(TYPE, a, start, count); } + + /** + * Construct a {@code PyTuple} from the elements of an array, or if + * the collection is empty, return {@link #EMPTY}. + * + * @param component type + * @param a value of new tuple + * @return a tuple with the given contents or {@link #EMPTY} + */ + static PyTuple from(E[] a) { + int n = a.length; + return a.length == 0 ? EMPTY : new PyTuple(a, 0, n); + } + + /** + * Construct a {@code PyTuple} from the elements of a collection, or + * if the collection is empty, return {@link #EMPTY}. In + * circumstances where the argument will often be empty, this has + * space and time advantages over the constructor + * {@link #PyTuple(Collection)}. + * + * @param component type + * @param c value of new tuple + * @return a tuple with the given contents or {@link #EMPTY} + */ + static PyTuple from(Collection c) { return c.size() == 0 ? EMPTY : new PyTuple(c); } + + // @formatter:off + /* + @ExposedNew + final static Object tuple_new(PyNewWrapper new_, boolean init, PyType subtype, + Object[] args, String[] keywords) { + ArgParser ap = new ArgParser("tuple", args, keywords, new String[] {"sequence"}, 0); + Object S = ap.getPyObject(0, null); + if (new_.for_type == subtype) { + if (S == null) { + return EMPTY; + } + if (S.getType() == PyTuple.TYPE) { + return S; + } + if (S instanceof PyTuple.Derived) { + return new PyTuple(((PyTuple) S).getArray()); + } + return fromArrayNoCopy(Py.make_array(S)); + } else { + if (S == null) { + return new PyTuple.Derived(subtype, Py.EmptyObjects); + } + return new PyTuple.Derived(subtype, Py.make_array(S)); + } + } + + /** + * Return a new PyTuple from an iterable. + * + * Raises a TypeError if the object is not iterable. + * + * @param iterable an iterable Object + * @return a PyTuple containing each item in the iterable + * / + public static PyTuple fromIterable(Object iterable) { + return fromArrayNoCopy(Py.make_array(iterable)); + } + */ + // @formatter:on + + + @Override + public PyType getType() { return type; } + + /** Convenient constant for a {@code tuple} with zero elements. */ + static final PyTuple EMPTY = new PyTuple(); + + // Special methods ----------------------------------------------- + + /* + @ExposedMethod(doc = BuiltinDocs.tuple___len___doc) + */ + @SuppressWarnings("unused") + private int __len__() { return size(); } + + /* + @ExposedMethod(doc = BuiltinDocs.tuple___contains___doc) + */ + @SuppressWarnings("unused") + private boolean __contains__(Object o) throws Throwable { + for (Object v : value) { + if (Abstract.richCompareBool(v, o, Comparison.EQ)) { return true; } + } + return false; + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___ne___doc) + */ + @SuppressWarnings("unused") + private Object __ne__(Object o) { + return delegate.cmp(o, Comparison.NE); + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___eq___doc) + */ + @SuppressWarnings("unused") + private Object __eq__(Object o) { + return delegate.cmp(o, Comparison.EQ); + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___gt___doc) + */ + @SuppressWarnings("unused") + private Object __gt__(Object o) { + return delegate.cmp(o, Comparison.GT); + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___ge___doc) + */ + @SuppressWarnings("unused") + private Object __ge__(Object o) { + return delegate.cmp(o, Comparison.GE); + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___lt___doc) + */ + @SuppressWarnings("unused") + private Object __lt__(Object o) { + return delegate.cmp(o, Comparison.LT); + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___le___doc) + */ + @SuppressWarnings("unused") + private Object __le__(Object o) { + return delegate.cmp(o, Comparison.LE); + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___add___doc) + */ + @SuppressWarnings("unused") + private Object __add__(Object w) throws Throwable { + return delegate.__add__(w); + } + + @SuppressWarnings("unused") + private Object __radd__(Object v) throws Throwable { + return delegate.__radd__(v); + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___mul___doc) + */ + @SuppressWarnings("unused") + private Object __mul__(Object n) throws Throwable { + return delegate.__mul__(n); + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___rmul___doc) + */ + @SuppressWarnings("unused") + private Object __rmul__(Object n) throws Throwable { + return delegate.__mul__(n); + } + + /* + @ExposedMethod(doc = BuiltinDocs.tuple___iter___doc) + */ + // @formatter:off + /* + @SuppressWarnings("unused") + private Object tuple___iter__() { + return new PyTupleIterator(this); + } + */ + // @formatter:on + + /* + @ExposedMethod(doc = BuiltinDocs.tuple___getitem___doc) + */ + @SuppressWarnings("unused") + private Object __getitem__(Object item) throws Throwable { + return delegate.__getitem__(item); + } + + /* + @ExposedMethod(doc = BuiltinDocs.tuple___getnewargs___doc) + */ + @SuppressWarnings("unused") + private PyTuple __getnewargs__() { + return new PyTuple((Object)this); + } + + @Override + public int hashCode() { + try { + return __hash__(); + } catch (PyException e) { + throw e; + } catch (Throwable t) { + throw new InterpreterError(t, "Non-Python exception in __hash__"); + } + } + + /* + @ExposedMethod(doc = BuiltinDocs.tuple___hash___doc) + */ + @SuppressWarnings("unused") + private int __hash__() throws Throwable { + /* + * Ported from C in CPython 3.11, which in turn is based on the + * xxHash specification. We do not attempt to maintain historic + * hash of () or avoid returning -1. Seed the accumulator based + * on the length. + */ + int acc = H32P5 * value.length; + for (Object x : value) { + acc += H32P2 * Abstract.hash(x); + // The parenthetical expression is rotate left 13 + acc = H32P1 * (acc << 13 | acc >>> 19); + } + return acc; + } + /* + @ExposedMethod(doc = BuiltinDocs.tuple_count_doc) + */ + @SuppressWarnings("unused") + private int count(Object v) throws Throwable { + int count = 0; + for (Object item : value) { + if (Abstract.richCompareBool(item, v, Comparison.EQ)) { count++; } + } + return count; + } + + /* + @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.tuple_index_doc) + */ + @SuppressWarnings("unused") + private Object index(Object v, Object start, Object stop) throws Throwable { + return delegate.index(v, start, stop); + } + + @Override + public boolean equals(Object other) { + try { + return Abstract.richCompareBool(this, other, Comparison.EQ); + } catch (PyException e) { + throw e; + } catch (Throwable e) { + return false; + } + } + + // AbstractList methods ------------------------------------------ + + @Override + public Object get(int i) { return value[i]; } + + @Override + public int size() { return value.length; } + + @Override + public Iterator iterator() { return listIterator(0); } + + @Override + public ListIterator listIterator(final int index) { + + if (index < 0 || index > value.length) + throw new IndexOutOfBoundsException( + String.format("%d outside [0, %d)", index, value.length)); + + return new ListIterator() { + + private int i = index; + + @Override + public boolean hasNext() { return i < value.length; } + + @Override + public Object next() { + if (i < value.length) + return value[i++]; + else + throw new NoSuchElementException(); + } + + @Override + public boolean hasPrevious() { return i > 0; } + + @Override + public Object previous() { + if (i > 0) + return value[--i]; + else + throw new NoSuchElementException(); + } + + @Override + public int nextIndex() { return i; } + + @Override + public int previousIndex() { return i - 1; } + + @Override + public void remove() { throw new UnsupportedOperationException(); } + + @Override + public void set(Object o) { throw new UnsupportedOperationException(); } + + @Override + public void add(Object o) { throw new UnsupportedOperationException(); } + }; + } + + /** + * A class for constructing a tuple element-wise. Sometimes the + * elements of a {@code tuple} have to be generated sequentially. + * The natural thing is to allocate and fill an array, and then for + * the sake of efficiency, to make that array the storage of a + * {@code PyTuple}. The direct approach breaks the encapsulation + * that guarantees a {@code PyTuple} is immutable. + *

+ * This class lets a client allocate and write an array + * element-wise, that becomes the storage of a {@code tuple}, + * without ever having a direct reference to the array. + */ + public static class Builder { + private static final int MINSIZE = 16; + private Object[] value; + private int len = 0; + + /** + * Create an empty buffer of a defined initial capacity. + * + * @param capacity initially + */ + public Builder(int capacity) { value = new Object[capacity]; } + + /** + * Create an empty buffer of a default initial capacity. + */ + Builder() { this.value = Py.EMPTY_ARRAY; } + + /** @return the number of elements currently. */ + public int length() { return len; } + + /** Ensure there is room for another {@code n} elements. */ + private void ensure(int n) { + if (len + n > value.length) { + int newSize = Math.max(value.length * 2, MINSIZE); + Object[] newValue = new Object[newSize]; + System.arraycopy(value, 0, newValue, 0, len); + value = newValue; + } + } + + /** + * Append one element. + * + * @param v to append + * @return this builder + */ + public Object append(Object v) { + ensure(1); + value[len++] = v; + return this; + } + + /** + * Append all the elements from a sequence. + * + * @param seq supplying elements to append + * @return this builder + */ + public Builder append(Collection seq) { + ensure(seq.size()); + for (Object v : seq) { value[len++] = v; } + return this; + } + + /** + * Append all the elements available from an iterator. + * + * @param iter supplying elements to append + * @return this builder + */ + public Builder append(Iterator iter) { + while (iter.hasNext()) { append(iter.next()); } + return this; + } + + /** + * Provide the contents as a Python {@code tuple} and reset the + * builder to empty. (This is a "destructive read".) + * + * @return the contents as a Python {@code tuple} + */ + public PyTuple take() { + Object[] v; + if (len == 0) { + return EMPTY; + } else if (len == value.length) { + // The array is exactly filled: use it without copy. + v = value; + value = Py.EMPTY_ARRAY; + } else { + // The array is partly filled: copy the part used. + v = Arrays.copyOf(value, len); + } + len = 0; + return wrap(v); + } + + /** + * Provide the contents as a Java {@code String} + * (non-destructively). + */ + @Override + public String toString() { return (new PyTuple(TYPE, value, 0, len)).toString(); } + } + + // Plumbing ------------------------------------------------------ + + /* + * Constants used in __hash__ (from CPython tupleobject.c), in the + * 32-bit configuration (SIZEOF_PY_UHASH_T > 4 is false). Although + * out of range for signed 32 bit integers, the multiplications are + * correct, since (U-C) * (V-C) = U*V when taken mod C. + */ + private static final int H32P1 = (int)2654435761L; + private static final int H32P2 = (int)2246822519L; + private static final int H32P5 = 374761393; + + /** + * Wrap this {@code PyTuple} as a {@link PySequence.Delegate}, for + * the management of indexing and other sequence operations. + */ + class TupleDelegate extends PySequence.Delegate { + + @Override + public int length() { return value.length; }; + + @Override + public PyType getType() { return type; } + + @Override + public Object getItem(int i) { return value[i]; } + + @Override + public Object get(int i) { return value[i]; } + + @Override + public Object getSlice(Indices slice) throws Throwable { + Object[] v; + if (slice.step == 1) + v = Arrays.copyOfRange(value, slice.start, slice.stop); + else { + v = new Object[slice.slicelength]; + int i = slice.start; + for (int j = 0; j < slice.slicelength; j++) { + v[j] = value[i]; + i += slice.step; + } + } + return new PyTuple(TYPE, true, v); + } + + @Override + Object add(Object ow) throws NoConversion { + if (ow instanceof PyTuple) { + PyTuple w = (PyTuple)ow; + return PyTuple.concat(value, w.value); + } else { + throw PyObjectUtil.NO_CONVERSION; + } + } + + @Override + Object radd(Object ov) throws NoConversion { + if (ov instanceof PyTuple) { + PyTuple v = (PyTuple)ov; + return PyTuple.concat(v.value, value); + } else { + throw PyObjectUtil.NO_CONVERSION; + } + } + + @Override + Object repeat(int n) { + if (n == 0) + return EMPTY; + else if (n == 1 || value.length == 0) + return PyTuple.this; + else { + int m = value.length; + Object[] b = new Object[n * m]; + for (int i = 0, p = 0; i < n; i++, p += m) { System.arraycopy(value, 0, b, p, m); } + return new PyTuple(TYPE, true, b); + } + } + + @Override + public Iterator iterator() { return PyTuple.this.iterator(); } + + @Override + public int compareTo(PySequence.Delegate other) { + try { + // Tuple is comparable only with another tuple + int N = value.length, M = other.length(), i = 0; + + for (i = 0; i < N; i++) { + Object a = value[i]; + if (i < M) { + Object b = other.getItem(i); + // if a != b, then we've found an answer + if (!Abstract.richCompareBool(a, b, Comparison.EQ)) + return Abstract.richCompareBool(a, b, Comparison.GT) ? 1 : -1; + if (!Abstract.richCompareBool(a, b, Comparison.EQ)) + return Abstract.richCompareBool(a, b, Comparison.GT) ? 1 : -1; + } else + // value has not run out, but other has. We win. + return 1; + } + + /* + * The arrays matched over the length of value. The other is the + * winner if it still has elements. Otherwise it's a tie. + */ + return i < M ? -1 : 0; + } catch (PyException e) { + // It's ok to throw legitimate Python exceptions + throw e; + } catch (Throwable t) { + /* + * Contract of Comparable prohibits propagation of checked + * exceptions, but richCompareBool in principle throws anything. + */ + // XXX perhaps need a PyException to wrap Java Throwable + throw new InterpreterError(t, "non-Python exeption in comparison"); + } + } + + /** + * Compare this delegate with the delegate of the other + * {@code tuple}, or return {@code NotImplemented} if the other is + * not a {@code tuple}. + * + * @param other tuple at right of comparison + * @param op type of operation + * @return boolean result or {@code NotImplemented} + */ + Object cmp(Object other, Comparison op) { + if (other instanceof PyTuple) { + // Tuple is comparable only with another tuple + TupleDelegate o = ((PyTuple)other).delegate; + return op.toBool(delegate.compareTo(o)); + } else { + return Py.NotImplemented; + } + } + } + + /** Concatenate two arrays into a tuple (for TupleAdapter). */ + private static PyTuple concat(Object[] v, Object[] w) { + int n = v.length, m = w.length; + Object[] b = new Object[n + m]; + System.arraycopy(v, 0, b, 0, n); + System.arraycopy(w, 0, b, n, m); + return new PyTuple(TYPE, true, b); + } + + @Override + public String toString() { + // Support the expletive comma "(x,)" for one element. + String suffix = value.length == 1 ? ",)" : ")"; + StringJoiner sj = new StringJoiner(", ", "(", suffix); + for (Object v : value) { sj.add(v.toString()); } + return sj.toString(); + } +} diff --git a/core/src/main/java/org/python/core/PyType.java b/core/src/main/java/org/python/core/PyType.java new file mode 100644 index 000000000..b76144c20 --- /dev/null +++ b/core/src/main/java/org/python/core/PyType.java @@ -0,0 +1,1659 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.invoke.VarHandle; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.EnumSet; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.python.base.InterpreterError; +import org.python.core.Exposed.Getter; +import org.python.core.Slot.Signature; + +/** + * The Python {@code type} object. Type objects are normally created + * (when created from Java) by a call to + * {@link PyType#fromSpec(Spec)}. + */ +public class PyType extends Operations implements DictPyObject { + /* + * The static initialisation of PyType is a delicate business, since + * it occurs early in the initialisation of the run-time system. The + * objective is simple: we must bring into existence type objects + * for both PyBaseObject ('object') and PyType ('type'), and then + * the descriptor types that will populate the dictionaries of all + * types including their own. + * + * This last fact makes it necessary to Java-initialise the classes + * that represent these objects, and afterwards return to build + * their dictionaries. This done, all subsequent type objects may be + * built in the obvious sequence. + */ + + // *** The order of these initialisations is critical + + /** + * Classes for which the type system has to prepare {@code PyType} + * objects in two stages, deferring the filling of the dictionary of + * the type until all classes in this set have completed their + * static initialisation in Java and built a {@code PyType}. + * Generally, this is because these types are necessary to create + * entries in the dictionary of any type. + */ + // Use an ordered list so we have full control over sequence. + static final Map, BootstrapTask> bootstrapTasks = new LinkedHashMap<>(); + static { + /* + * Name the classes needing this bootstrap treatment in the order + * they should be processed. + */ + Class[] bootstrapClasses = { + // Really special cases + PyBaseObject.class, // + PyType.class, + // The entries are descriptors so defer those + // PyMemberDescr.class, // + // PyGetSetDescr.class, // + PyWrapperDescr.class, // + PyMethodDescr.class, // + // And sometimes things go wrong :( + BaseException.class, // + // Types with multiple/adopted implementations + PyUnicode.class, // + PyLong.class, // + PyBool.class, // + PyFloat.class, // + }; + // Fill the map from the list. + for (Class c : bootstrapClasses) { bootstrapTasks.put(c, new BootstrapTask()); } + } + + /** An empty array of type objects */ + static final PyType[] EMPTY_TYPE_ARRAY = new PyType[0]; + /** Lookup object on {@code PyType}. */ + private static Lookup LOOKUP = MethodHandles.lookup(); + /** The type object of {@code type} objects. */ + public static final PyType TYPE = new PyType(); + /** The type object of {@code object} objects. */ + static final PyType OBJECT_TYPE = TYPE.base; + /** An array containing only 'object', the bases of many types. */ + private static final PyType[] ONLY_OBJECT = new PyType[] {OBJECT_TYPE}; + + static { + // For each bootstrap class: ensure static initialisation + for (Class c : bootstrapTasks.keySet()) { + String name = c.getName(); + try { + Class.forName(name); + } catch (ClassNotFoundException e) { + throw new InterpreterError("failed to initialise bootstrap class %s", + c.getSimpleName()); + } + } + } + + // *** End critically ordered section + + /** + * Particular type of this {@code PyType}. Why is this not always + * {@link #TYPE}? Because there may be subclasses of type + * (meta-classes) and objects having those as their {@code type}. + */ + private final PyType type; + + /** Name of the type. */ + final String name; + + /** The Java class defining operations on instances of the type. */ + final Class definingClass; + + /** + * Handle arrays for in which to look up binary class-specific + * methods when these are provided as a supplementary implementation + * class. {@code null} such a class is not provided in the + * specification. See {@link Spec#binops(Class)}. + */ + final Map binopTable; + + /** + * The Java classes appearing as operands in the operations and + * methods of the type. + *
    + *
  1. [0] is the canonical implementation class
  2. + *
  3. [1:implCount] are the adopted implementation classes
  4. + *
  5. [:acceptedCount] are the classes acceptable as + * {@code self}
  6. + *
  7. [:] (entire array) are the classes provided for as the + * "other" argument of binary operations
  8. + *
+ */ + final Class[] classes; + + /** + * The number of {@link #classes} in {@link #classes} recognised by + * the run-time as implementations of the type. + */ + final int implCount; + + /** + * The number of Java classes in {@link #classes} that are + * acceptable as {@code self} in methods. + */ + final int acceptedCount; + + /** + * Characteristics of the type, to determine behaviours (such as + * mutability) of instances or the type itself, or to provide quick + * answers to frequent questions such as "are instances data + * descriptors". + */ + EnumSet flags; + + // Support for class hierarchy + + /** + * The {@code __bases__} of this type, which are the types named in + * heading of the Python {@code class} definition, or just + * {@code object} if none are named, or an empty array in the + * special case of {@code object} itself. + */ + private PyType[] bases; + /** + * The {@code __base__} of this type. The {@code __base__} is a type + * from the {@code __bases__}, but its choice is determined by + * implementation details. + *

+ * It is the type earliest on the MRO after the current type, whose + * implementation contains all the members necessary to implement + * the current type. + */ + private PyType base; + /** + * The {@code __mro__} of this type, that is, the method resolution + * order, as defined for Python and constructed by the {@code mro()} + * method (which may be overridden), by analysis of the + * {@code __bases__}. + */ + private PyType[] mro; + + /** + * The dictionary of the type is always an ordered {@code Map}. It + * is only accessible (outside the core) through a + * {@code mappingproxy} that renders it a read-only + * {@code dict}-like object. Internally names are stored as + * {@code String} for speed and accessed via + * {@link #lookup(String)}. + */ + private final Map dict = new LinkedHashMap<>(); + + /** + * Partially construct a {@code type} object for {@code type}, and + * by side-effect the type object of its base {@code object}. The + * special constructor solves the problem that each of these has to + * exist in order properly to create the other. This constructor is + * only used once, during the static initialisation of + * {@code PyType}, after which these objects are constants. + */ + private PyType() { + /* + * We are creating the PyType for "type". We need a specification + * too, because there's nothing more bootstrappy than type. :) + */ + Spec spec = new Spec("type", LOOKUP).metaclass(this); + /* + * We cannot use fromSpec here, because we are already in a + * constructor and it needs TYPE, which we haven't set. + */ + this.type = this; + this.name = spec.name; + this.definingClass = spec.definingClass(); + this.binopTable = Collections.emptyMap(); + this.implCount = spec.adoptedCount(); + this.acceptedCount = spec.acceptedCount(); + this.classes = spec.getClasses(); + this.flags = spec.flags; + + /* + * Break off to construct the type object for "object", which we + * need as the base. Again, we need the spec. + */ + Spec objectSpec = new Spec("object", PyBaseObject.class, LOOKUP).metaclass(this) + .canonical(Object.class); + /* + * This time the constructor will work, as long as we supply the + * metatype. For consistency, take values from objectSpec. + */ + PyType objectType = new PyType(objectSpec); + + // The only base of type is object + this.base = objectType; + this.bases = new PyType[] {objectType}; + this.mro = new PyType[] {this, objectType}; + + // Defer filling the dictionary for both types we made + BootstrapTask.shelve(objectSpec, objectType); + BootstrapTask.shelve(spec, this); + } + + /** + * Partially construct a type from a type specification. This + * implements only the basic object creation, short of filling the + * dictionary, for example. It is intended to be used with or as + * part of {@link #fromSpec(Spec)}. + * + * @param spec specification for the type + */ + private PyType(Spec spec) { + this.type = spec.getMetaclass(); + this.name = spec.name; + this.definingClass = spec.definingClass(); + this.classes = spec.getClasses(); + this.implCount = spec.adoptedCount() + 1; + this.acceptedCount = spec.acceptedCount(); + // in case original changes + this.flags = EnumSet.copyOf(spec.flags); + // Sets base as well as bases + this.setBases(spec.getBases()); + // Fix-up base and MRO from bases array + this.setMROfromBases(); + // Create the binary operations table (none for now) + this.binopTable = Collections.emptyMap(); + } + + @Override + public PyType getType() { return type; } + + /** + * Construct a type from the given specification. This approach is + * preferred to the direct constructor. The type object does not + * retain a reference to the specification, once constructed, so + * that subsequent alterations have no effect on the {@code PyType}. + * + * @param spec specification + * @return the constructed {@code PyType} + */ + public static PyType fromSpec(Spec spec) { + + // Construct a type with an empty dictionary + PyType type; + + if (spec.getMetaclass() == TYPE) { + type = new PyType(spec); + } else { + throw new InterpreterError("Metaclasses not supported."); + } + + /* + * The next step for this type is to populate the dictionary from + * the information gathered in the specification. We can only do + * this if all the bootstrap types have also reached at least this + * stage (are no longer on the waiting list). + */ + if (bootstrapTasks.isEmpty()) { + // The bootstrap types have all completed. Make descriptors. + try { + type.fillDictionary(spec); + } catch (Clash clash) { + /* + * Another thread beat us to the construction of an operations + * object for (one of) the implementing classes, or perhaps we're + * repeating ourselves. + */ + Operations ops = clash.existing; + if (ops instanceof PyType && Arrays.equals(type.classes, ((PyType)ops).classes)) + // Graciously accept it as the result. (I think.) + type = (PyType)ops; + else + // Something bad is happening. -> SystemError? + throw new InterpreterError(clash, "constructing %s", type); + } + + } else { + /* + * Some bootstrap types are waiting for their dictionaries. It is + * not safe to create descriptors in the dictionary). + */ + BootstrapTask.shelve(spec, type); + + /* + * However, the current type may be the last bootstrap type we were + * waiting for. + */ + if (BootstrapTask.allReady()) { + /* + * Complete the types we had to shelve. Doing so may create new + * types, so we empty the waiting list into a private copy. + */ + List tasks = new ArrayList<>(bootstrapTasks.values()); + bootstrapTasks.clear(); + + for (BootstrapTask task : tasks) { + try { + task.type.fillDictionary(task.spec); + } catch (Clash clash) { + // We're trying to repeat ourselves? + throw new InterpreterError(clash, "constructing %s", task.type); + } + } + + /* + * Bootstrapping is over: the type we return will be + * fully-functional as a Python type object after all. + */ + } + } + + return type; + } + + /** + * Get the Python type corresponding to the given Java class. The + * method will find, or if necessary cause the creation of, a + * {@link PyType} that represents the type of instances of that Java + * class. The Java class given will be initialised, if it has not + * been already. + *

+ * This is not always a meaningful enquiry: if the given class is + * not the implementation of exactly one Python type, an error will + * be thrown. This also applies when the call causes creation of one + * or more {@link PyType}s and {@link Operations} objects. + * + * @param klass to inspect + * @return the Python type of {@code klass} + */ + static PyType fromClass(Class klass) { return Operations.fromClass(klass).uniqueType(); } + + /** + * A record used to defer the completion of a particular type + * object. When so deferred, {@link PyType#fromSpec(Spec)} will + * return a type object without filling the dictionary of the type. + * The type and the implementation class can be available to Java, + * but will not yet function properly as a Python object. + *

+ * This only happens while starting up the run-time. The purpose is + * to allow Java class initialisation to complete for all of the + * types needed to populate type dictionaries ("bootstrap types"). + * Other classes that request a type object during this time will be + * caught up temporarily in the same process. + *

+ * A {@code BootstrapTask} stores the {@link PyType} object and the + * {@link Spec} for a given type. All the waiting types are + * completed as soon as the last of them becomes available. + */ + private static class BootstrapTask { + + Spec spec; + PyType type; + + /** + * Place a partially-completed {@code type} on the + * {@link PyType#bootstrapTasks} list. + * + * @param spec specification for the type + * @param type corresponding (partial) type object + */ + static void shelve(Spec spec, PyType type) { + Class key = spec.definingClass(); + BootstrapTask t = bootstrapTasks.get(key); + if (t == null) + // Not present: add an entry. + bootstrapTasks.put(key, t = new BootstrapTask()); + else if (t.spec != null) + throw new InterpreterError(REPEAT_CLASS, key); + // Fill the entry as partially initialised. + t.spec = spec; + t.type = type; + } + + /** + * Check to see if all {@link PyType#bootstrapTasks} have reached + * partially complete (are awaiting a dictionary). + * + * @return true iff all are ready + */ + static boolean allReady() { + for (BootstrapTask t : bootstrapTasks.values()) { + if (t.spec == null) { return false; } + } + return true; + } + + @Override + public String toString() { return String.format("BootstrapTask[%s]", spec); } + + private static final String REPEAT_CLASS = + "PyType bootstrapping: class %s encountered twice"; + } + + /** + * Load the dictionary of this type with attributes discovered + * through the specification. + * + * @param spec to apply + * @throws Clash when an implementation class is already registered + */ + private void fillDictionary(Spec spec) throws Clash { + + // Fill slots from implClass or bases + addDefinitions(spec); + // XXX Possibly belong distinct from fillDictionary + defineOperations(spec); + deduceFlags(); + } + + /** + * Define the Operations objects for this type, posting them to the + * registry. + * + * @throws Clash when an implementation class is already registered + */ + private void defineOperations(Spec spec) throws Clash { + + setAllSlots(); + + int n = spec.adoptedCount(); + + if (n == 1) { + // Simple case: one class and the Operations is the PyType + Operations.register(classes[0], this); + + } else { + // Multiple implementations must stand or fall together + Class[] cls = Arrays.copyOf(classes, n); + Operations[] ops = new Operations[n]; + + // The first Operations object is this PyType + cls[0] = classes[0]; + ops[0] = this; + + // Create an Operations for each adopted implementation + for (int i = 1; i < n; i++) { + // Creating the operations object sets its slots + ops[i] = new Operations.Accepted(this, i); + cls[i] = classes[i]; + } + + // Register these pairings as a batch + Operations.register(cls, ops); + } + } + + /** + * Add methods, get-sets, members and special functions as + * attributes to this type, as discovered through the specification. + * + * @param spec to apply + */ + private void addDefinitions(Spec spec) { + // Add definitions found in the defining class to the type + TypeExposer exposer = Exposer.exposeType(this, spec.definingClass, spec.methodClass); + exposer.populate(dict, spec.lookup); + } + + /** + * The {@link #flags} field caches many characteristics of the type + * that we need to consult: we deduce them here. + */ + private void deduceFlags() { + if (Slot.op_get.isDefinedFor(this)) { + // It's a descriptor + flags.add(Flag.IS_DESCR); + } + if (Slot.op_set.isDefinedFor(this) || Slot.op_delete.isDefinedFor(this)) { + // It's a data descriptor + flags.add(Flag.IS_DESCR); + flags.add(Flag.IS_DATA_DESCR); + } + } + + /** + * Get the Python type of the given object {@code obj}. The Java + * class of {@code obj} will normally have been initialised, since + * an instance exists. + * + * @param obj to inspect + * @return the Python type of {@code obj} + */ + public static PyType of(Object obj) { + return Operations.fromClass(obj.getClass()).type(obj); + } + + @Override + PyType type(Object x) { return this; } + + @Override + PyType uniqueType() { return this; } + + /** + * Get the (canonical) Java implementation class of this + * {@code PyType} object. + */ + @Override + Class getJavaClass() { return classes[0]; } + + /** + * Set {@link #bases} and deduce {@link #base}. + * + * @param bases to set + */ + private void setBases(PyType bases[]) { + this.bases = bases; + this.base = bestBase(bases); + } + + /** Set the MRO, but at present only single base. */ + // XXX note may retain a reference todeclaredBases + private void setMROfromBases() { + + int n = bases.length; + + if (n == 0) { + // Special case of 'object' + this.mro = new PyType[] {this}; + + } else if (n == 1) { + // Just one base: short-cut: mro = (this,) + this.base.mro + PyType[] baseMRO = base.getMRO(); + int m = baseMRO.length; + PyType[] mro = new PyType[m + 1]; + mro[0] = this; + System.arraycopy(baseMRO, 0, mro, 1, m); + this.mro = mro; + + } else { // n >= 2 + // Need the proper C3 algorithm to set MRO + String fmt = "multiple inheritance not supported yet (type `%s`)"; + throw new InterpreterError(fmt, name); + } + } + + /** + * Set all the slots ({@code op_*}) from the entries in the + * dictionaries of this type and its bases. + */ + private void setAllSlots() { + for (Slot s : Slot.values()) { + Object def = lookup(s.methodName); + s.setDefinition(this, def); + } + } + + /** A name has the form __A__ where A is one or more characters. */ + private static boolean isDunderName(String n) { + final int L = n.length(); + return L > 4 && n.charAt(1) == '_' && n.charAt(0) == '_' && n.charAt(L - 2) == '_' + && n.charAt(L - 1) == '_'; + } + + /** + * Called from {@link #__setattr__(String, Object)} after an + * attribute has been set or deleted. This gives the type the + * opportunity to recompute slots and perform any other actions. + * + * @param name of the attribute modified + */ + protected void updateAfterSetAttr(String name) { + + // XXX What if a slot-wrapper is removed, not replaced? + // XXX Should also visit sub-classes + + // If the update is a slot wrapper change, slots must follow. + Slot s = Slot.forMethodName(name); + if (s != null) { + Object def = dict.get(name); + for (Class impl : classes) { + Operations ops = Operations.fromClass(impl); + s.setDefinition(ops, def); + } + } + } + + @Override + public String toString() { return ""; } + + /** + * The name of this type. + * + * @return name of this type + */ + @Getter("__name__") + public String getName() { return name; } + + /** + * Find the index of the given class in the accepted classes for + * this type. There is a match if the found class is a assignable + * from the given class. In the case that more than one matches, the + * first qualifying index is returned. The "accepted" classes + * consist of: + *

    + *
  1. the canonical class (index zero)
  2. + *
  3. other adopted implementations (like {@code Double} for + * {@code float})
  4. + *
  5. the accepted implementations of any sub-classes that are not + * assignable to the canonical or adopted
  6. + *
+ * + * @param c a class matching one of the accepted classes + * @return its index or -1 + */ + int indexAccepted(Class c) { + // Try the non-canonical accepted classes first (if any) + for (int i = 1; i < acceptedCount; i++) { + if (classes[i].isAssignableFrom(c)) { return i; } + } + // Try the canonical class last + return classes[0].isAssignableFrom(c) ? 0 : -1; + } + + /** + * Find the index of the given class in the known operand classes + * for this type. There is a match if the found class is a + * assignable from the given class. In the case that more than one + * matches, the first qualifying index is returned. + * + * @param c a class matching one of the operand classes + * @return its index or -1 + */ + int indexOperand(Class c) { + // Try the non-canonical known operand classes first (if any) + for (int i = 1; i < classes.length; i++) { + if (classes[i].isAssignableFrom(c)) { return i; } + } + // Try the canonical class last + return classes[0].isAssignableFrom(c) ? 0 : -1; + } + + /** + * {@code true} iff the type of {@code o} is a Python sub-type of + * {@code this} (including exactly {@code this} type). This is + * likely to be used in the form:
+     * if(!PyUnicode.TYPE.check(oName)) throw ...
+     * 
+ * + * @param o object to test + * @return {@code true} iff {@code o} is of a sub-type of this type + */ + boolean check(Object o) { + PyType t = PyType.of(o); + return t == this || t.isSubTypeOf(this); + } + + /** + * {@code true} iff the Python type of {@code o} is exactly + * {@code this}, not a Python sub-type of {@code this}, nor just any + * Java sub-class of {@code PyType}. This is likely to be used in + * the form:
+     * if(!PyUnicode.TYPE.checkExact(oName)) throw ...
+     * 
+ * + * @param o object to test + * @return {@code true} iff {@code o} is exactly of this type + */ + public boolean checkExact(Object o) { return PyType.of(o) == this; } + + /** + * Determine if this type is a Python sub-type of {@code b} (if + * {@code b} is on the MRO of this type). + * + * @param b to test + * @return {@code true} if {@code this} is a sub-type of {@code b} + */ + // Compare CPython PyType_IsSubtype in typeobject.c + boolean isSubTypeOf(PyType b) { + if (mro != null) { + /* + * Deal with multiple inheritance without recursion by walking the + * MRO tuple + */ + for (PyType base : mro) { + if (base == b) + return true; + } + return false; + } else + // a is not completely initialised yet; follow base + return type_is_subtype_base_chain(b); + } + + /** + * Determine if this type is a Python sub-type of {@code b} by + * chaining through the {@link #base} property. (This is a fall-back + * when {@link #mro} is not valid.) + * + * @param b to test + * @return {@code true} if {@code this} is a sub-type of {@code b} + */ + // Compare CPython type_is_subtype_base_chain in typeobject.c + private boolean type_is_subtype_base_chain(PyType b) { + PyType t = this; + while (t != b) { + t = t.base; + if (t == null) { return b == OBJECT_TYPE; } + } + return true; + } + + /** + * Return whether special methods in this type may be assigned new + * meanings after type creation (or may be safely cached). + * + * @return whether a data descriptor + */ + final boolean isMutable() { return flags.contains(Flag.MUTABLE); } + + /** + * Return whether an instance of this type is a data descriptor + * (defines {@code __get__} and at least one of {@code __set__} or + * {@code __delete__}. + * + * @return whether a data descriptor + */ + @Override + final boolean isDataDescr() { return flags.contains(Flag.IS_DATA_DESCR); } + + /** + * Return whether an instance of this type defines {@code __get__} + * participates in the optimised call pattern supported by + * {@link Opcode311#LOAD_METHOD}. + * + * @return whether a method descriptor + */ + @Override + final boolean isMethodDescr() { return flags.contains(Flag.IS_METHOD_DESCR); } + + /** + * Return whether an instance of this type is a descriptor (defines + * {@code __get__}). + * + * @return whether a descriptor + */ + final boolean isDescr() { return flags.contains(Flag.IS_DESCR); } + + /** + * Return whether this type uses object.__getattribute__ from . + * + * @return whether a descriptor + */ + final boolean hasGenericGetAttr() { + return op_getattribute == PyBaseObject.TYPE.op_getattribute; + } + + /** + * Get the {@code __base__} of this type. The {@code __base__} is a + * type from the MRO, but its choice is determined by implementation + * details. + *

+ * It is the type earliest on the MRO after the current type, whose + * implementation contains all the members necessary to implement + * the current type. + * + * @return the base (core use only). + */ + PyType getBase() { return base; } + + /** @return the bases as an array (core use only). */ + PyType[] getBases() { return bases; } + + /** @return the MRO as an array (core use only). */ + PyType[] getMRO() { return mro; } + + /** + * The dictionary of a {@code type} in a read-only view. + */ + @Override + public final Map getDict() { return Collections.unmodifiableMap(dict); } + + /** + * Look for a name, returning the entry directly from the first + * dictionary along the MRO containing key {@code name}. This may be + * a descriptor, but no {@code __get__} takes place on it: the + * descriptor itself will be returned. This method does not throw an + * exception if the name is not found, but returns {@code null} like + * a {@code Map.get} + * + * @param name to look up, must be exactly a {@code str} + * @return dictionary entry or null + */ + // Compare CPython _PyType_Lookup in typeobject.c + // and find_name_in_mro in typeobject.c + Object lookup(String name) { + + /* + * CPython wraps this in a cache keyed by (type, name) and sensitive + * to the "version" of this type. (Version changes when any change + * occurs, even in a super-class, that would alter the result of a + * look-up.) We do not reproduce that at present. + */ + + // Look in dictionaries of types in MRO + PyType[] mro = getMRO(); + + // CPython checks here to see in this type is "ready". + // Could we be "not ready" in some loop of types? + + for (PyType base : mro) { + Object res; + if ((res = base.dict.get(name)) != null) + return res; + } + return null; + } + + /** + * Equivalent to {@link #lookup(String)}, accepting + * {@link PyUnicode}. + * + * @param name to look up, must be exactly a {@code str} + * @return dictionary entry or null + */ + Object lookup(PyUnicode name) { return lookup(name.asString()); } + + /** + * Enumeration of the characteristics of a type. These are the + * members that appear appear in the {@link PyType#flags} to + * determine behaviours or provide quick answers to frequent + * questions such as "are you a data descriptor". + */ + public enum Flag { + /** + * Special methods may be assigned new meanings in the {@code type}, + * after creation. + */ + MUTABLE, + /** + * An object of this type can change to another type (within + * "layout" constraints). + */ + VARIABLE, + /** + * This type the type allows sub-classing (is acceptable as a base). + */ + BASETYPE, + /** + * An object of this type is a descriptor (defines {@code __get__}). + */ + IS_DESCR, + /** + * An object of this type is a data descriptor (defines + * {@code __get__} and at least one of {@code __set__} or + * {@code __delete__}). + */ + IS_DATA_DESCR, + /** + * An object of this type is a method descriptor (participates in an + * optimised call pattern supported by {@link Opcode#LOAD_METHOD}). + */ + IS_METHOD_DESCR, + } + + /** + * A specification for a Python type. A Java class intended as the + * implementation of a Python object creates one of these data + * structures during static initialisation, and configures it using + * the mutators. A fluent interface makes this configuration + * readable as a single, long statement. + */ + public static class Spec { + + /** Name of the class being specified. */ + final String name; + + /** Delegated authorisation to resolve names. */ + final Lookup lookup; + + /** + * The defining class for the type being specified, in which the + * {@code Spec.lookup} was created. + */ + private final Class definingClass; + + /** + * Additional class in which to look up method names or + * {@code null}. See {@link #methods(Class)} + */ + private Class methodClass; + + /** + * Additional class in which to look up binary class-specific + * methods or {@code null}. See {@link #binops(Class)}. + */ + private Class binopClass; + + /** + * The canonical and adopted implementations of the Python type, + * classes acceptable as {@code self}, and other known operand + * classes will be collected here. + */ + private ArrayList> classes = new ArrayList<>(1); + + /** + * The number of adopted implementations of the Python type, + * including the canonical one. Increment for each adopted class + * added. + */ + private int adoptedCount; + + /** + * The number of classes, including the (canonical and) adopted + * classes, that are accepted as instances of the Python type. + * Increment for each adopted or accepted class added. See + * {@link #accept(Class...)}. + */ + private int acceptedCount; + + /** + * The Python type being specified may be represented by a Python + * sub-class of {@code type}, i.e. something other than + * {@link PyType#TYPE}. This will be represented by a sub-class of + * {@link PyType}. + */ + private PyType metaclass; + + /** Python types that are bases of the type being specified. */ + // Must allow null element, needed when defining 'object' + private final List bases = new LinkedList<>(); + + /** Characteristics of the type being specified. */ + EnumSet flags = Spec.getDefaultFlags(); + + /** + * Create (begin) a specification for a {@link PyType} based on a + * specified implementation class. + *

+ * {@link PyType#fromSpec(Spec)} will interrogate the implementation + * class reflectively to discover attributes the type should have, + * and will form type dictionary entries with {@link MethodHandle}s + * or {@link VarHandle}s on qualifying members. The caller supplies + * a {@link Lookup} object to make this possible. An implementation + * class may declare methods and fields as {@code private}, and + * annotate them to be exposed to Python, as long as the lookup + * object provided to the {@code Spec} confers the right to access + * them. + *

+ * A {@code Spec} given private or package access to members should + * not be passed to untrusted code. {@code PyType} does not hold + * onto the {@code Spec} after completing the type object. + *

+ * Additional classes may be given containing the implementation and + * the lookup classes (see {code Lookup.lookupClass()}) to be + * different from the caller. Usually they are the same. + * + * @param name of the type + * @param definingClass in which operations are defined + * @param lookup authorisation to access {@code implClass} + * + * @deprecated Use {@link #Spec(String, Lookup)} instead + */ + @Deprecated + Spec(String name, Class definingClass, Lookup lookup) { + this.name = name; + this.definingClass = definingClass; + this.lookup = lookup; + this.methodClass = this.binopClass = null; + this.adopt(definingClass); + } + + /** + * Create (begin) a specification for a {@link PyType} based on the + * caller as the implementation class. This is the beginning + * normally made by built-in classes in their static initialisation. + *

+ * The caller supplies a {@link Lookup} object which must have been + * created by the implementation class. + * {@link PyType#fromSpec(Spec)} will interrogate the implementation + * class reflectively to discover attributes the type should have, + * and will form type dictionary entries with {@link MethodHandle}s + * or {@link VarHandle}s on qualifying members. An implementation + * class may declare methods and fields as {@code private}, and + * annotate them to be exposed to Python, as long as the lookup + * object provided to the {@code Spec} confers the right to access + * them. + *

+ * A {@code Spec} given private or package access to members should + * not be passed to untrusted code. {@code PyType} does not hold + * onto the {@code Spec} after completing the type object. + *

+ * Additional classes may be given containing the implementation and + * the lookup classes (see {code Lookup.lookupClass()}) to be + * different from the caller. Usually they are the same. + * + * @param name of the type + * @param lookup authorisation to access {@code implClass} + */ + public Spec(String name, Lookup lookup) { this(name, lookup.lookupClass(), lookup); } + + /** + * Create (begin) a specification for a {@link PyType} representing + * a sub-class of a built-in type. The same implementation class may + * be used to specify any number of Python types, instances of which + * are able to migrate between these types by {@code __class__} + * assignment. The {@link Operations} object of the implementation + * class will be an {@code Operations.}{@link Derived}. + * + * @param name of the type + * @param implClass in which operations are defined + */ + public Spec(String name, Class implClass) { + this(name, implClass, null); + } + + /** + * Specify the canonical implementation class for the type. By + * default, if {@link #canonical(Class)} is not called, the + * canonical implementation is the lookup class given in the + * constructor. This method makes it possible to have that lookup + * class not be an implementation. + *

+ * This is the case for the Python {@code object}, for which the + * canonical implementation is Java {@code java.lang.Object} while + * operations are defined elsewhere. Also {@code PyBool} makes + * {@code java.lang.Boolean} canonical for Python {@code bool}). + * + * @param impl replacement canonical implementation class + * @return {@code this} + */ + public Spec canonical(Class impl) { + classes.set(0, impl); + return this; + } + + /** + * Specify adopted implementation classes for the type. The adopted + * implementations are those that will be identified by the run-time + * as having the Python type of this {@code Spec}. Successive calls + * are cumulative. + *

+ * The note in {@link #accept(Class...)} about the availability of + * method definitions applies. + * + * @param classes classes to treat as adopted implementations + * @return {@code this} + */ + Spec adopt(Class... classes) { + for (Class c : classes) { + // Add at the end of the adopted classes + this.classes.add(adoptedCount, c); + adoptedCount++; + acceptedCount++; + } + return this; + } + + /** + * Specify Java classes to be accepted as "self" arguments for the + * type, in addition to the canonical and adopted implementations. + * The use for this is to ensure that the implementations of Python + * sub-types of the type being specified are acceptable as "self", + * when defined by unrelated Java classes. As an example, consider + * that operations on a Python {@code int} must have a Java + * implementation that accepts a Java {@code Boolean} (Python + * {@code bool}). + *

+ * Successive calls are cumulative. Classes assignable to existing + * accepted classes are ignored. + *

+ * + * @apiNote For every instance method {@code m} (including special + * methods) on a Python object, and for for every adopted or + * accepted class {@code C}, there must be an implementation + * {@code m(D self, ...)} where the "self" (first) argument type + * {@code D} is assignable from {@code C}. + *

+ * Note that this criterion could be satisfied by defining just + * one {@code m(Object self, ...} or by a series of specialised + * implementations, or any combination. When it selects an + * implementation, the run-time chooses the most specialised + * match. + * + * @param classes to append to the list + * @return {@code this} + */ + Spec accept(Class... classes) { + for (Class c : classes) { + if (indexOf(c) < 0) { + // Add at the end of the accepted classes + this.classes.add(acceptedCount, c); + acceptedCount++; + } + } + return this; + } + + /** + * Specify Java classes accepted as the second operand in binary + * operations. Successive calls are cumulative. Classes assignable + * to existing accepted classes are ignored. + * + * @param classes to append to the list + * @return {@code this} + */ + Spec operand(Class... classes) { + for (Class c : classes) { + if (indexOf(c) < 0) { + // Add at the very end + this.classes.add(c); + } + } + return this; + } + + /** + * The number of classes specified as adopted implementations of the + * Python type being specified, including the canonical + * implementation. + * + * @return number of adopted classes + */ + int adoptedCount() { return adoptedCount; } + + /** + * The number of classes specified as canonical, adopted or accepted + * as {@code self} for the Python type being specified. + * + * @return number of accepted classes + */ + int acceptedCount() { return acceptedCount; } + + /** + * The number of classes specified as canonical, adopted, accepted + * {@code self} or as operands for the Python type being specified. + * + * @return number of all classes to be treated as operands + */ + int classesCount() { return classes.size(); } + + /** + * Find c in the known operand classes. + * + * @param c class to find + * @return index of {@code c} in accepted + */ + private int indexOf(Class c) { + for (int i = classes.size(); --i >= 0;) { + if (classes.get(i).isAssignableFrom(c)) { return i; } + } + return -1; + } + + /** + * Specify a base for the type. Successive bases given are + * cumulative and ordered. + * + * @param base to append to the bases + * @return {@code this} + */ + public Spec base(PyType base) { + if (base == null) + throw new InterpreterError("null base specified for %s. (Base not ready?)", name); + bases.add(base); + return this; + } + + /** + * A new set of flags with the default values for a type defined in + * Java. + * + * @return new default flags + */ + static EnumSet getDefaultFlags() { return EnumSet.of(Flag.BASETYPE); } + + /** + * Specify a characteristic (type flag) to be added. + * + * @param f to add to the current flags + * @return {@code this} + */ + /* + * XXX Better encapsulation to have methods for things we want to + * set/unset. Most PyType.flags members should not be manipulated + * through the Spec and are derived in construction, or as a side + * effect of setting something else. + */ + Spec flag(Flag f) { + flags.add(f); + return this; + } + + /** + * Specify a characteristic (type flag), or several, to be added. + * + * @param f to add to the current flags + * @return {@code this} + */ + Spec flag(Flag... f) { + for (Flag x : f) { flags.add(x); } + return this; + } + + /** + * Specify a characteristic (type flag) to be removed. + * + * @param f to remove from the current flags + * @return {@code this} + */ + // XXX mostly used as flagNot(BASETYPE). Consider specific call. + // XXX Consider also reversing that default. + public Spec flagNot(Flag f) { + flags.remove(f); + return this; + } + + /** + * Specify that the Python type being specified will be represented + * by a an instance of this Python sub-class of {@code type}, i.e. + * something other than {@link PyType#TYPE}. + * + * @param metaclass to specify (or null for {@code type} + * @return {@code this} + */ + public Spec metaclass(PyType metaclass) { + this.metaclass = metaclass; + return this; + } + + /** + * Get the defining class for the type. This is often, and is by + * default, the canonical implementation class, but it doesn't have + * to be. + * + * @return the defining class for the type + */ + public Class definingClass() { return definingClass; } + + /** + * Set the class additionally defining methods for the type. This + * class will be consulted when filling the dictionary of the type. + * A separate class is useful when the method definitions are + * generated by a script, as for types that admit multiple + * realisations in Java. + * + * @param methodClass class with additional methods + * @return {@code this} + */ + Spec methods(Class methodClass) { + this.methodClass = methodClass; + return this; + } + + /** + * Get the class additionally defining methods for the type. See + * {@link #methods(Class)}. + * + * @return class additionally defining methods for the type + */ + Class methodClass() { return methodClass; } + + /** + * Set the class in which to look up binary class-specific + * operations, for example {@code __rsub__(MyObject, Integer)}. Such + * signatures are used in call sites. + *

+ * Types may ignore this technique if the designer is content with a + * {@code __rsub__(MyObject, Object)} that coerces its right-hand + * argument on each call. (This method has to exist to satisfy the + * Python data model.) The method may be defined in the + * {@link #definingClass()}, or {@link #methodClass()} + *

+ * A separate class is necessary since the method definition for + * {@code __rsub__(MyObject, Object)} must sometimes return + * {@link Py#NotImplemented}, and we should like to avoid checking + * for that in the call site. Rather, the absence of a definition + * should indicate that he operation is not defined for a given pair + * of types Certain built-ins use the technique to speed up call + * sites in JVM byte code compiled from Python. (The class may be + * generated by a script.) + * + * @param binopClass class with binary class-specific methods + * @return {@code this} + */ + Spec binops(Class binopClass) { + this.binopClass = binopClass; + return this; + } + + /** + * Get the class defining binary class-specific operations for the + * type. See {@link #binops(Class)}. {@code null} if there isn't + * one. + * + * @return class defining binary class-specific operations (or + * {@code null}) + */ + Class binopClass() { return binopClass; } + + /** + * Get all the operand classes for the type, in order, the canonical + * at index 0, adopted, accepted and operand classes following. + * + * @return a copy of all the operand classes + */ + Class[] getClasses() { return classes.toArray(new Class[classes.size()]); } + + /** + * Return the accumulated list of bases. If no bases were added, the + * result is just {@code [object]}, except when we do this for + * object itself, for which it is a zero-length array. + * + * @return array of the bases of this type + */ + public PyType[] getBases() { + if (bases.isEmpty()) { + /* + * No bases specified: that means 'object' is the implicit base, + * unless that's us. + */ + if (definingClass() != PyBaseObject.class) + return ONLY_OBJECT; // Normally + else + return EMPTY_TYPE_ARRAY; // For 'object' + } else + return bases.toArray(new PyType[bases.size()]); + } + + /** + * Return the meta-class of the type being created. If none was set, + * it is {@link PyType#TYPE}.. + * + * @return the proper meta-class + */ + public PyType getMetaclass() { return metaclass != null ? metaclass : TYPE; } + + // Something helpful in debugging (__repr__ is different) + @Override + public String toString() { + String fmt = "'%s' %s, flags=%s def=%s"; + return String.format(fmt, name, bases, flags, definingClass().getSimpleName()); + } + } + + // Special methods ----------------------------------------------- + + protected Object __repr__() throws Throwable { return String.format("", name); } + + /** + * {@link Slot#op_getattribute} has signature + * {@link Signature#GETATTR} and provides attribute read access on + * this type object and its metatype. This is very like + * {@code object.__getattribute__} + * ({@link PyBaseObject#__getattribute__(Object, String)}), but the + * instance is replaced by a type object, and that object's type is + * a meta-type (which is also a {@code type}). + *

+ * The behavioural difference is that in looking for attributes on a + * type: + *

    + *
  • we use {@link #lookup(String)} to search along along the MRO, + * and
  • + *
  • if we find a descriptor, we use it. + * ({@code object.__getattribute__} does not check for descriptors + * on the instance.)
  • + *
+ *

+ * The following order of precedence applies when looking for the + * value of an attribute: + *

    + *
  1. a data descriptor from the dictionary of the meta-type
  2. + *
  3. a descriptor or value in the dictionary of {@code type}
  4. + *
  5. a non-data descriptor or value from dictionary of the meta + * type
  6. + *
+ * + * @param name of the attribute + * @return attribute value + * @throws AttributeError if no such attribute + * @throws Throwable on other errors, typically from the descriptor + */ + // Compare CPython type_getattro in typeobject.c + protected Object __getattribute__(String name) throws AttributeError, Throwable { + + PyType metatype = getType(); + MethodHandle descrGet = null; + + // Look up the name in the type (null if not found). + Object metaAttr = metatype.lookup(name); + if (metaAttr != null) { + // Found in the metatype, it might be a descriptor + Operations metaAttrOps = Operations.of(metaAttr); + descrGet = metaAttrOps.op_get; + if (metaAttrOps.isDataDescr()) { + // metaAttr is a data descriptor so call its __get__. + try { + // Note the cast of 'this', to match op_get + return descrGet.invokeExact(metaAttr, (Object)this, metatype); + } catch (Slot.EmptyException e) { + /* + * Only __set__ or __delete__ was defined. We do not catch + * AttributeError: it's definitive. Suppress trying __get__ again. + */ + descrGet = null; + } + } + } + + /* + * At this stage: metaAttr is the value from the meta-type, or a + * non-data descriptor, or null if the attribute was not found. It's + * time to give the type's instance dictionary a chance. + */ + Object attr = lookup(name); + if (attr != null) { + // Found in this type. Try it as a descriptor. + try { + /* + * Note the args are (null, this): we respect descriptors in this + * step, but have not forgotten we are dereferencing a type. + */ + return Operations.of(attr).op_get.invokeExact(attr, (Object)null, this); + } catch (Slot.EmptyException e) { + // We do not catch AttributeError: it's definitive. + // Not a descriptor: the attribute itself. + return attr; + } + } + + /* + * The name wasn't in the type dictionary. metaAttr is now the + * result of look-up on the meta-type: a value, a non-data + * descriptor, or null if the attribute was not found. + */ + if (descrGet != null) { + // metaAttr may be a non-data descriptor: call __get__. + try { + return descrGet.invokeExact(metaAttr, (Object)this, metatype); + } catch (Slot.EmptyException e) {} + } + + if (metaAttr != null) { + /* + * The attribute obtained from the meta-type, and that turned out + * not to be a descriptor, is the return value. + */ + return metaAttr; + } + + // All the look-ups and descriptors came to nothing :( + throw Abstract.noAttributeError(this, name); + } + + /** + * {@link Slot#op_setattr} has signature {@link Signature#SETATTR} + * and provides attribute write access on this type object. The + * behaviour is very like the default {@code object.__setattr__} + * except that it has write access to the type dictionary that is + * denied through {@link #getDict()}. + * + * @param name of the attribute + * @param value to give the attribute + * @throws AttributeError if no such attribute or it is read-only + * @throws Throwable on other errors, typically from the descriptor + */ + // Compare CPython type_setattro in typeobject.c + protected void __setattr__(String name, Object value) throws AttributeError, Throwable { + + // Accommodate CPython idiom that set null means delete. + if (value == null) { + // Do this to help porting. Really this is an error. + __delattr__(name); + return; + } + + // Trap immutable types + if (!flags.contains(Flag.MUTABLE)) + throw Abstract.cantSetAttributeError(this); + + // Check to see if this is a special name + boolean special = isDunderName(name); + + // Look up the name in the meta-type (null if not found). + Object metaAttr = getType().lookup(name); + if (metaAttr != null) { + // Found in the meta-type, it might be a descriptor. + Operations metaAttrOps = Operations.of(metaAttr); + if (metaAttrOps.isDataDescr()) { + // Try descriptor __set__ + try { + metaAttrOps.op_set.invokeExact(metaAttr, (Object)this, value); + if (special) { updateAfterSetAttr(name); } + return; + } catch (Slot.EmptyException e) { + // We do not catch AttributeError: it's definitive. + // Descriptor but no __set__: do not fall through. + throw Abstract.readonlyAttributeError(this, name); + } + } + } + + /* + * There was no data descriptor, so we will place the value in the + * object instance dictionary directly. + */ + // Use the privileged put + dict.put(name, value); + if (special) { updateAfterSetAttr(name); } + } + + /** + * {@link Slot#op_delattr} has signature {@link Signature#DELATTR} + * and provides attribute deletion on this type object. The + * behaviour is very like the default {@code object.__delattr__} + * except that it has write access to the type dictionary that is + * denied through {@link #getDict()}. + * + * @param name of the attribute + * @throws AttributeError if no such attribute or it is read-only + * @throws Throwable on other errors, typically from the descriptor + */ + // Compare CPython type_setattro in typeobject.c + protected void __delattr__(String name) throws AttributeError, Throwable { + + // Trap immutable types + if (!flags.contains(Flag.MUTABLE)) + throw Abstract.cantSetAttributeError(this); + + // Check to see if this is a special name + boolean special = isDunderName(name); + + // Look up the name in the meta-type (null if not found). + Object metaAttr = getType().lookup(name); + if (metaAttr != null) { + // Found in the meta-type, it might be a descriptor. + Operations metaAttrOps = Operations.of(metaAttr); + if (metaAttrOps.isDataDescr()) { + // Try descriptor __delete__ + try { + metaAttrOps.op_delete.invokeExact(metaAttr, (Object)this); + if (special) { updateAfterSetAttr(name); } + return; + } catch (Slot.EmptyException e) { + // We do not catch AttributeError: it's definitive. + // Data descriptor but no __delete__. + throw Abstract.mandatoryAttributeError(this, name); + } + } + } + + /* + * There was no data descriptor, so it's time to give the type + * instance dictionary a chance to receive. A type always has a + * dictionary so this.dict can't be null. + */ + // Use the privileged remove + Object previous = dict.remove(name); + if (previous == null) { + // A null return implies it didn't exist + throw Abstract.noAttributeError(this, name); + } + + if (special) { updateAfterSetAttr(name); } + return; + } + + // plumbing -------------------------------------------------- + + /** + * Given the bases of a new class, choose the {@code type} on which + * a sub-class should be implemented. + *

+ * When a sub-class is defined in Python, it may have several bases, + * each with their own Java implementation. What Java class should + * implement the new sub-class? This chosen Java class must be + * acceptable as {@code self} to a method (slot functions, + * descriptors) inherited from any base. The methods of + * {@link PyBaseObject} accept any {@link Object}, but all other + * implementation classes require an instance of their own type to + * be presented. + *

+ * A method will accept any Java sub-type of the type of its + * declared parameter. We ensure compatibility by choosing that the + * implementation Java class of the new sub-type is a Java sub-class + * of the implementation types of all the bases (excluding those + * implemented on {@link PyBaseObject}). + *

+ * This imposes a constraint on the bases, except for those + * implemented by PyBaseObject, that their implementations have a + * common Java descendant. (The equivalent constraint in CPython is + * that the layout of the {@code struct} that represents an instance + * of every base should match a truncation of the one chosen.) + * + * @param bases sub-classed by the new type + * @return the acceptable base + */ + // Compare CPython best_base in typeobject.c + private static PyType bestBase(PyType[] bases) { + // XXX This is a stop-gap answer: revisit in due course. + /* + * Follow the logic of CPython typeobject.c, but adapted to a Java + * context. + */ + if (bases.length == 0) + return OBJECT_TYPE; + else { + return bases[0]; + } + } + + // Compare CPython _PyType_GetDocFromInternalDoc + // in typeobject.c + // XXX Consider implementing in ArgParser instead + static Object getDocFromInternalDoc(String name, String doc) { + // TODO Auto-generated method stub + return null; + } + + // Compare CPython: PyType_GetTextSignatureFromInternalDoc + // in typeobject.c + // XXX Consider implementing in ArgParser instead + static Object getTextSignatureFromInternalDoc(String name, String doc) { + // TODO Auto-generated method stub + return null; + } +} diff --git a/core/src/main/java/org/python/core/PyUnicode.java b/core/src/main/java/org/python/core/PyUnicode.java new file mode 100644 index 000000000..ddc07bfb5 --- /dev/null +++ b/core/src/main/java/org/python/core/PyUnicode.java @@ -0,0 +1,5155 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandles; +import java.math.BigInteger; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.NoSuchElementException; +import java.util.PrimitiveIterator; +import java.util.Set; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.function.Function; +import java.util.function.IntUnaryOperator; +import java.util.function.Supplier; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.StreamSupport; + +import org.python.base.InterpreterError; +import org.python.base.MissingFeature; +import org.python.core.Exposed.Default; +import org.python.core.Exposed.Name; +import org.python.core.Exposed.PythonMethod; +import org.python.core.PyObjectUtil.NoConversion; +import org.python.core.PySequence.Delegate; +import org.python.core.PySlice.Indices; +import org.python.core.stringlib.FieldNameIterator; +import org.python.core.stringlib.IntArrayBuilder; +import org.python.core.stringlib.IntArrayReverseBuilder; +import org.python.core.stringlib.InternalFormat; +import org.python.core.stringlib.InternalFormat.AbstractFormatter; +import org.python.core.stringlib.InternalFormat.FormatError; +import org.python.core.stringlib.InternalFormat.FormatOverflow; +import org.python.core.stringlib.InternalFormat.Spec; +import org.python.core.stringlib.MarkupIterator; +import org.python.core.stringlib.TextFormatter; +import org.python.modules.ucnhashAPI; + +/** + * The Python {@code str} object is implemented by both + * {@code PyUnicode} and Java {@code String}. All operations will + * produce the same result for Python, whichever representation is + * used. Both types are treated as an array of code points in + * Python. + *

+ * Most strings used as names (keys) and text are quite + * satisfactorily represented by Java {@code String}. Java + * {@code String}s are compact, but where they contain non-BMP + * characters, these are represented by a pair of code units. That + * makes certain operations (such as indexing or slicing) relatively + * expensive compared to Java. Accessing the code points of a + * {@code String} sequentially is still cheap. + *

+ * By contrast, a {@code PyUnicode} is time-efficient, but each + * character occupies one {@code int}. + */ +public class PyUnicode implements CraftedPyObject, PyDict.Key { + + /** The type {@code str}. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("str", MethodHandles.lookup()) // + .methods(PyUnicodeMethods.class) // + .adopt(String.class)); + + /** + * The actual Python type of this {@code PyUnicode}. + */ + protected PyType type; + + /** + * The implementation holds a Java {@code int} array of code points. + */ + private final int[] value; + + /** + * Helper to implement {@code __getitem__} and other index-related + * operations. + */ + private UnicodeAdapter delegate = new UnicodeAdapter(); + + /** + * Cached hash of the {@code str}, lazily computed in + * {@link #hashCode()}. Zero if unknown, and nearly always unknown + * if zero. + */ + private int hash; + + /** + * Construct an instance of {@code PyUnicode}, a {@code str} or a + * sub-class, from a given array of code points, with the option to + * re-use that array as the implementation. If the actual array is + * is re-used the caller must give up ownership and never modify it + * after the call. See {@link #fromCodePoint(int)} for a correct + * use. + * + * @param type actual type the instance should have + * @param iPromiseNotToModify if {@code true}, the array becomes the + * implementation array, otherwise the constructor takes a copy. + * @param codePoints the array of code points + */ + private PyUnicode(PyType type, boolean iPromiseNotToModify, int[] codePoints) { + this.type = type; + if (iPromiseNotToModify) + this.value = codePoints; + else + this.value = Arrays.copyOf(codePoints, codePoints.length); + } + + /** + * Construct an instance of {@code PyUnicode}, a {@code str} or a + * sub-class, from a given array of code points. The constructor + * takes a copy. + * + * @param type actual type the instance should have + * @param codePoints the array of code points + */ + protected PyUnicode(PyType type, int[] codePoints) { this(type, false, codePoints); } + + /** + * Construct an instance of {@code PyUnicode}, a {@code str} or a + * sub-class, from the given code points. The constructor takes a + * copy. + * + * @param codePoints the array of code points + */ + protected PyUnicode(int... codePoints) { this(TYPE, false, codePoints); } + + /** + * Construct an instance of {@code PyUnicode}, a {@code str} or a + * sub-class, from a given {@link IntArrayBuilder}. This will reset + * the builder to empty. + * + * @param value from which to take the code points + */ + protected PyUnicode(IntArrayBuilder value) { this(TYPE, true, value.take()); } + + /** + * Construct an instance of {@code PyUnicode}, a {@code str} or a + * sub-class, from a given {@link IntArrayReverseBuilder}. This will + * reset the builder to empty. + * + * @param value from which to take the code points + */ + protected PyUnicode(IntArrayReverseBuilder value) { this(TYPE, true, value.take()); } + + /** + * Construct an instance of {@code PyUnicode}, a {@code str} or a + * sub-class, from a given Java {@code String}. The constructor + * interprets surrogate pairs as defining one code point. Lone + * surrogates are preserved (e.g. for byte smuggling). + * + * @param type actual type the instance should have + * @param value to have + */ + protected PyUnicode(PyType type, String value) { + this(TYPE, true, value.codePoints().toArray()); + } + + // Factory methods ------------------------------------------------ + // These may return a Java String or a PyUnicode + + /** + * Unsafely wrap an array of code points as a {@code PyUnicode}. The + * caller must not hold a reference to the argument array (and + * definitely not manipulate the contents). + * + * @param codePoints to wrap as a {@code str} + * @return the {@code str} + */ + private static PyUnicode wrap(int[] codePoints) { + return new PyUnicode(TYPE, true, codePoints); + } + + /** + * Safely wrap the contents of an {@link IntArrayBuilder} of code + * points as a {@code PyUnicode}. + * + * @param codePoints to wrap as a {@code str} + * @return the {@code str} + */ + public static PyUnicode wrap(IntArrayBuilder codePoints) { + return new PyUnicode(codePoints); + } + + /** + * Return a Python {@code str} representing the single character + * with the given code point. The return may be a Java + * {@code String} (for BMP code points) or a {@code PyUnicode}. + * + * @param cp to code point convert + * @return a Python {@code str} + */ + public static Object fromCodePoint(int cp) { + // We really need to know how the string will be used :( + if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) + return String.valueOf((char)cp); + else + return wrap(new int[] {cp}); + } + + /** + * Return a Python {@code str} representing the same sequence of + * characters as the given Java {@code String} and implemented as a + * {@code PyUnicode}. + * + * @param s to convert + * @return a Python {@code str} + */ + public static PyUnicode fromJavaString(String s) { + // XXX share simple cases len==0 len==1 & ascii? + return new PyUnicode(TYPE, s); + } + + @Override + public PyType getType() { return type; } + + // ------------------------------------------------------------------------------------------ + + public static String checkEncoding(String s) { + if (s == null || s.chars().allMatch(c -> c < 128)) { return s; } + return codecs.PyUnicode_EncodeASCII(s, s.length(), null); + } + + // @formatter:off + /* + @ExposedNew + final static PyObject new(PyNewWrapper new_, boolean init, PyType subtype, + PyObject[] args, String[] keywords) { + ArgParser ap = new ArgParser("unicode", args, keywords, + new String[] {"string", "encoding", "errors"}, 0); + PyObject S = ap.getPyObject(0, null); + String encoding = checkEncoding(ap.getString(1, null)); + String errors = checkEncoding(ap.getString(2, null)); + if (new_.for_type == subtype) { + if (S == null) { + return new PyUnicode(""); + } + if (S instanceof PyUnicode) { + return new PyUnicode(((PyUnicode) S).getString()); + } + if (S instanceof PyString) { + if (S.getType() != PyString.TYPE && encoding == null && errors == null) { + return S.__unicode__(); + } + PyObject decoded = codecs.decode((PyString) S, encoding, errors); + if (decoded instanceof PyUnicode) { + return new PyUnicode((PyUnicode) decoded); + } else { + throw new TypeError("decoder did not return an unicode object (type=" + + decoded.getType().fastGetName() + ")"); + } + } + return S.__unicode__(); + } else { + if (S == null) { + return new PyUnicodeDerived(subtype, new PyString("")); + } + if (S instanceof PyUnicode) { + return new PyUnicodeDerived(subtype, (PyUnicode) S); + } else { + return new PyUnicodeDerived(subtype, S.__str__()); + } + } + } + */ + + + // Special methods ------------------------------------------------ + + /* + @ExposedMethod(doc = BuiltinDocs.unicode___str___doc) + */ + @SuppressWarnings("unused") + private Object __str__() { return this; } + + @SuppressWarnings("unused") + private static Object __str__(String self) { return self; } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode___repr___doc) + */ + @SuppressWarnings("unused") + private static Object __repr__(Object self) { + try { + // XXX make encode_UnicodeEscape (if needed) take a delegate + return encode_UnicodeEscape(convertToString(self), true); + } catch (NoConversion nc) { + throw Abstract.impossibleArgumentError("str", self); + } + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode___len___doc) + */ + private int __len__() { return value.length; } + + @SuppressWarnings("unused") + private static int __len__(String self) { + return self.codePointCount(0, self.length()); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode___hash___doc) + */ + private int __hash__() { + // Reproduce on value the hash defined for java.lang.String + if (hash == 0 && value.length > 0) { + int h = 0; + for (int c : value) { + if (Character.isBmpCodePoint(c)) { + // c is represented by itself in a String + h = h * 31 + c; + } else { + // c would be represented in a Java String by: + int hi = (c >>> 10) + HIGH_SURROGATE_OFFSET; + int lo = (c & 0x3ff) + Character.MIN_LOW_SURROGATE; + h = (h * 31 + hi) * 31 + lo; + } + } + hash = h; + } + return hash; + } + + @SuppressWarnings("unused") + private static int __hash__(String self) { return self.hashCode(); } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode___getitem___doc) + */ + @SuppressWarnings("unused") + private Object __getitem__(Object item) throws Throwable { + return delegate.__getitem__(item); + } + + @SuppressWarnings("unused") + private static Object __getitem__(String self, Object item) + throws Throwable { + StringAdapter delegate = adapt(self); + return delegate.__getitem__(item); + } + + // Copied from PyString + public Object __tojava__(Class c) { + // XXX something like this necessary in Jython 3 but not used yet + // Need PyUnicode and String versions + if (c.isAssignableFrom(String.class)) { + /* + * If c is a CharSequence we assume the caller is prepared to get maybe not an actual + * String. In that case we avoid conversion so the caller can do special stuff with the + * returned PyString or PyUnicode or whatever. (If c is Object.class, the caller usually + * expects to get actually a String) + */ + // XXX this is a bit questionable if non-BMP + return c == CharSequence.class ? this : asString(); + } + + if (c == Character.TYPE || c == Character.class) { + // XXX ? non-BMP + String s = asString(); + if (s.length() == 1) { + return s.charAt(0); + } + } + + if (c.isArray()) { +// if (c.getComponentType() == Byte.TYPE) { +// return toBytes(); +// } + if (c.getComponentType() == Character.TYPE) { + // XXX ? non-BMP + return asString().toCharArray(); + } + } + +// if (c.isAssignableFrom(Collection.class)) { +// List list = new ArrayList(); +// for (int i = 0; i < __len__(); i++) { +// list.add(pyget(i).__tojava__(String.class)); +// } +// return list; +// } + + if (c.isInstance(this)) { + return this; + } + + throw new MissingFeature("default __tojava__ behaviour for %s", c.getSimpleName()); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode___contains___doc) + */ + @SuppressWarnings("unused") + private boolean __contains__(Object o) { + return contains(delegate, o); + } + + @SuppressWarnings("unused") + private static boolean __contains__(String self, Object o) { + return contains(adapt(self), o); + } + + private static boolean contains(CodepointDelegate s, Object o) { + try { + CodepointDelegate p = adapt(o); + PySlice.Indices slice = getSliceIndices(s, null, null); + return find(s, p, slice) >= 0; + } catch (NoConversion nc) { + throw Abstract.typeError(IN_STRING_TYPE, o); + } + } + + private static final String IN_STRING_TYPE = + "'in ' requires string as left operand, not %s"; + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___add___doc) + */ + @SuppressWarnings("unused") + private Object __add__(Object w) throws Throwable { + return delegate.__add__(w); + } + + @SuppressWarnings("unused") + private static Object __add__(String v, Object w) throws Throwable { + return adapt(v).__add__(w); + } + + @SuppressWarnings("unused") + private Object __radd__(Object v) throws Throwable { + return delegate.__radd__(v); + } + + @SuppressWarnings("unused") + private static Object __radd__(String w, Object v) throws Throwable { + return adapt(w).__radd__(v); + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___mul___doc) + */ + private Object __mul__(Object n) throws Throwable { + return delegate.__mul__(n); + } + + private static Object __mul__(String self, Object n) + throws Throwable { + return adapt(self).__mul__(n); + } + + /* + @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___rmul___doc) + */ + @SuppressWarnings("unused") + private Object __rmul__(Object n) throws Throwable { + return __mul__(n); + } + + @SuppressWarnings("unused") + private static Object __rmul__(String self, Object n) + throws Throwable { + return __mul__(self, n); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode___mod___doc) + */ + static Object __mod__(Object self, Object other) { + throw new MissingFeature("printf-style formatting"); + } + + + // Strip methods -------------------------------------------------- + + /** + * Python {@code str.strip()}. Any character matching one of those + * in {@code chars} will be discarded from either end of this + * {@code str}. If {@code chars == None}, whitespace will be + * stripped. + * + * @param chars characters to strip from either end of this + * {@code str}, or {@code None} + * @return a new {@code str}, stripped of the specified characters + * @throws TypeError on {@code chars} type errors + */ + @PythonMethod(primary = false) + Object strip(Object chars) throws TypeError { + return strip(delegate, chars); + } + + @PythonMethod + static Object strip(String self, @Default("None") Object chars) throws TypeError { + return strip(adapt(self), chars); + } + + /** + * Inner implementation of Python {@code str.strip()} independent of + * the implementation type. + * + * @param s representing {@code self} + * @param chars to remove, or {@code null} or {@code None} + * @return the {@code str} stripped + * @throws TypeError on {@code chars} type errors + */ + private static Object strip(CodepointDelegate s, Object chars) + throws TypeError { + Set p = adaptStripSet("strip", chars); + int left, right; + if (p == null) { + // Stripping spaces + right = findRight(s); + // If it's all spaces, we know left==0 + left = right < 0 ? 0 : findLeft(s); + } else { + // Stripping specified characters + right = findRight(s, p); + // If it all matches, we know left==0 + left = right < 0 ? 0 : findLeft(s, p); + } + /* + * Substring from leftmost non-matching character up to and + * including the rightmost (or "") + */ + PySlice.Indices slice = getSliceIndices(s, left, right + 1); + return slice.slicelength == 0 ? "" : s.getSlice(slice); + } + + /** + * Helper for {@code strip}, {@code lstrip} implementation, when + * stripping space. + * + * @return index of leftmost non-space character or + * {@code s.length()} if entirely spaces. + */ + private static int findLeft(CodepointDelegate s) { + CodepointIterator si = s.iterator(0); + while (si.hasNext()) { + if (!isPythonSpace(si.nextInt())) + return si.previousIndex(); + } + return s.length(); + } + + /** + * Helper for {@code strip}, {@code lstrip} implementation, when + * stripping specified characters. + * + * @param p specifies set of characters to strip + * @return index of leftmost non-{@code p} character or + * {@code s.length()} if entirely found in {@code p}. + */ + private static int findLeft(CodepointDelegate s, Set p) { + CodepointIterator si = s.iterator(0); + while (si.hasNext()) { + if (!p.contains(si.nextInt())) + return si.previousIndex(); + } + return s.length(); + } + + /** + * Helper for {@code strip}, {@code rstrip} implementation, when + * stripping space. + * + * @return index of rightmost non-space character or {@code -1} if + * entirely spaces. + */ + private static int findRight(CodepointDelegate s) { + CodepointIterator si = s.iteratorLast(); + while (si.hasPrevious()) { + if (!isPythonSpace(si.previousInt())) + return si.nextIndex(); + } + return -1; + } + + /** + * Helper for {@code strip}, {@code rstrip} implementation, when + * stripping specified characters. + * + * @param p specifies set of characters to strip + * @return index of rightmost non-{@code p} character or {@code -1} + * if entirely found in {@code p}. + */ + private static int findRight(CodepointDelegate s, Set p) { + CodepointIterator si = s.iteratorLast(); + while (si.hasPrevious()) { + if (!p.contains(si.previousInt())) + return si.nextIndex(); + } + return -1; + } + + /** + * Python {@code str.lstrip()}. Any character matching one of those + * in {@code chars} will be discarded from the left of this + * {@code str}. If {@code chars == None}, whitespace will be + * stripped. + * + * @param chars characters to strip from this {@code str}, or + * {@code None} + * @return a new {@code str}, left-stripped of the specified + * characters + * @throws TypeError on {@code chars} type errors + */ + Object lstrip(Object chars) throws TypeError { + return lstrip(delegate, chars); + } + + static Object lstrip(String self, Object chars) throws TypeError { + return lstrip(adapt(self), chars); + } + + /** + * Inner implementation of Python {@code str.lstrip()} independent + * of the implementation type. + * + * @param s representing {@code self} + * @param chars to remove, or {@code null} or {@code None} + * @return the str stripped + * @throws TypeError on {@code chars} type errors + */ + private static Object lstrip(CodepointDelegate s, Object chars) + throws TypeError { + Set p = adaptStripSet("lstrip", chars); + int left; + if (p == null) { + // Stripping spaces + left = findLeft(s); + } else { + // Stripping specified characters + left = findLeft(s, p); + } + /* + * Substring from this leftmost non-matching character (or "") + */ + PySlice.Indices slice = getSliceIndices(s, left, null); + return s.getSlice(slice); + } + + /** + * Python {@code str.rstrip()}. Any character matching one of those + * in {@code chars} will be discarded from the right of this + * {@code str}. If {@code chars == None}, whitespace will be + * stripped. + * + * @param chars characters to strip from this {@code str}, or + * {@code None} + * @return a new {@code str}, right-stripped of the specified + * characters + * @throws TypeError on {@code chars} type errors + */ + Object rstrip(Object chars) throws TypeError { + return rstrip(delegate, chars); + } + + static Object rstrip(String self, Object chars) throws TypeError { + return rstrip(adapt(self), chars); + } + + /** + * Inner implementation of Python {@code str.rstrip()} independent + * of the implementation type. + * + * @param s representing {@code self} + * @param chars to remove, or {@code null} or {@code None} + * @return the str stripped + * @throws TypeError on {@code chars} type errors + */ + private static Object rstrip(CodepointDelegate s, Object chars) + throws TypeError { + Set p = adaptStripSet("rstrip", chars); + int right; + if (p == null) { + // Stripping spaces + right = findRight(s); + } else { + // Stripping specified characters + right = findRight(s, p); + } + /* + * Substring up to and including this rightmost non-matching + * character (or "") + */ + PySlice.Indices slice = getSliceIndices(s, null, right + 1); + return s.getSlice(slice); + } + + // Find-like methods ---------------------------------------------- + + // @formatter:off + + /* + * Several methods of str involve finding a target string within the + * object receiving the call, to locate an occurrence, to count or + * replace all occurrences, or to split the string at the first, + * last or all occurrences. + * + * The fundamental algorithms are those that find the substring, + * finding either the first occurrence, by scanning from the start + * forwards, or the last by scanning from the end in reverse. + * + * Follow how find() and rfind() work, and the others will make + * sense too, since they follow the same two patterns, but with + * additional data movement to build the result, or repetition to + * find all occurrences. + */ + + /** + * Return the lowest index in the string where substring {@code sub} + * is found, such that {@code sub} is contained in the slice + * {@code [start:end]}. Arguments {@code start} and {@code end} are + * interpreted as in slice notation, with {@code null} or + * {@link Py#None} representing "missing". + * + * @param sub substring to find. + * @param start start of slice. + * @param end end of slice. + * @return index of {@code sub} in this object or -1 if not found. + * @throws TypeError on {@code sub} type errors + */ + /* + @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_find_doc) + */ + int find(Object sub, Object start, Object end) { + return find(delegate, sub, start, end); + } + + static int find(String self, Object sub, Object start, Object end) { + return find(adapt(self), sub, start, end); + } + + private static int find(CodepointDelegate s, Object sub, + Object start, Object end) { + CodepointDelegate p = adaptSub("find", sub); + PySlice.Indices slice = getSliceIndices(s, start, end); + if (p.length() == 0) + return slice.start; + else + return find(s, p, slice); + } + + /** + * Inner implementation of Python {@code str.find()}. Return the + * index of the leftmost occurrence of a (non-empty) substring in a + * slice of some target string, or {@code -1} if there was no match. + * Each string is specified by its delegate object. + * + * @param s to be searched + * @param p the substring to look for + * @param slice of {@code s} in which to search + * @return the index of the occurrence or {@code -1} + */ + private static int find(CodepointDelegate s, CodepointDelegate p, + PySlice.Indices slice) { + /* + * Create an iterator for p (the needle string) and pick up the + * first character we are seeking. We scan s for pChar = p[0], + * and when it matches, divert into a full check using this + * iterator. + */ + CodepointIterator pi = p.iterator(0); + int pChar = pi.nextInt(), pLength = p.length(); + CodepointIterator.Mark pMark = pi.mark(); // at p[1] + assert pLength > 0; + + // Counting in pos avoids hasNext() calls + int pos = slice.start, lastPos = slice.stop - pLength; + + // An iterator on s[start:end], the string being searched + CodepointIterator si = s.iterator(pos, slice.start, slice.stop); + + while (pos++ <= lastPos) { + if (si.nextInt() == pChar) { + /* + * s[pos] matched p[0]: divert into matching the rest of + * p. Leave a mark in s where we shall resume if this is + * not a full match with p. + */ + CodepointIterator.Mark sPos = si.mark(); + int match = 1; + while (match < pLength) { + if (pi.nextInt() != si.nextInt()) { break; } + match++; + } + // If we reached the end of p it's a match + if (match == pLength) { return pos - 1; } + // We stopped on a mismatch: reset si and pi + sPos.restore(); + pMark.restore(); + } + } + return -1; + } + + /** + * Return the highest index in the string where substring + * {@code sub} is found, such that {@code sub} is contained in the + * slice {@code [start:end]}. Arguments {@code start} and + * {@code end} are interpreted as in slice notation, with null or + * {@link Py#None} representing "missing". + * + * @param sub substring to find. + * @param start start of slice. + * @param end end of slice. + * @return index of {@code sub} in this object or -1 if not found. + */ + /* + @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_rfind_doc) + */ + int rfind(Object sub, Object start, Object end) { + return rfind(delegate, sub, start, end); + } + + static int rfind(String self, Object sub, Object start, + Object end) { + return rfind(adapt(self), sub, start, end); + } + + private static int rfind(CodepointDelegate s, Object sub, + Object start, Object end) { + CodepointDelegate p = adaptSub("rfind", sub); + PySlice.Indices slice = getSliceIndices(s, start, end); + if (p.length() == 0) + return slice.stop; + else + return rfind(s, p, slice); + } + + /** + * Inner implementation of Python {@code str.rfind()}. Return the + * index of the rightmost occurrence of a (non-empty) substring in a + * slice of some target string, or {@code -1} if there was no match. + * Each string is specified by its delegate object. + * + * @param s to be searched + * @param p the substring to look for + * @param slice of {@code s} in which to search + * @return the index of the occurrence or {@code -1} + */ + private static int rfind(CodepointDelegate s, CodepointDelegate p, + PySlice.Indices slice) { + /* + * Create an iterator for p (the needle string) and pick up the + * last character we are seeking. We scan s in reverse for pChar + * = p[-1], and when it matches, divert into a full check using + * this iterator. + */ + int pLength = p.length(); + CodepointIterator pi = p.iterator(pLength); + int pChar = pi.previousInt(); + CodepointIterator.Mark pMark = pi.mark(); // p[-1] + + // Counting in pos avoids hasNext() calls. Start at the end. + int pos = slice.stop, firstPos = slice.start + (pLength - 1); + + // An iterator on s[start:end], the string being searched. + CodepointIterator si = s.iterator(pos, slice.start, slice.stop); + + while (--pos >= firstPos) { + if (si.previousInt() == pChar) { + /* + * s[pos] matched p[-1]: divert into matching the rest + * of p (still in reverse). Leave a mark in s where we + * shall resume if this is not a full match with p. + */ + CodepointIterator.Mark sPos = si.mark(); + int match = 1; + while (match < pLength) { + if (pi.previousInt() != si.previousInt()) { break; } + match++; + } + // If we reached the start of p it's a match + if (match == pLength) { return pos - (pLength - 1); } + // We stopped on a mismatch: reset si and pi + sPos.restore(); + pMark.restore(); + } + } + return -1; + } + + /** + * Python {@code str.partition()}, splits the {@code str} at the + * first occurrence of {@code sep} returning a {@link PyTuple} + * containing the part before the separator, the separator itself, + * and the part after the separator. + * + * @param sep on which to split the string + * @return tuple of parts + */ + /* + @ExposedMethod(doc = BuiltinDocs.unicode_partition_doc) + */ + PyTuple partition(Object sep) { + PyTuple r = partition(delegate, sep); + return r != null ? r : Py.tuple(this, "", ""); + } + + static PyTuple partition(String self, Object sep) { + PyTuple r = partition(adapt(self), sep); + return r != null ? r : Py.tuple(self, "", ""); + } + + /** + * Inner implementation of Python {@code str.partition()}. Return a + * {@code tuple} of the split result {@code (before, sep, after)}, + * or {@code null} if there was no match. + * + * @param s to be split + * @param sep the separator to look for + * @return tuple of parts or {@code null} + */ + private static PyTuple partition(CodepointDelegate s, Object sep) { + /* + * partition() uses the same pattern as find(), with the + * difference that it records characters in a buffer as it scans + * them, and the slice is always the whole string. + */ + // An iterator on p, the separator. + CodepointDelegate p = adaptSeparator("partition", sep); + CodepointIterator pi = p.iterator(0); + int sChar, pChar = pi.nextInt(), pLength = p.length(); + CodepointIterator.Mark pMark = pi.mark(); + assert pLength > 0; + + // Counting in pos avoids hasNext() calls. + int pos = 0, lastPos = s.length() - pLength; + + // An iterator on s, the string being split. + CodepointIterator si = s.iterator(pos); + IntArrayBuilder buffer = new IntArrayBuilder(); + + while (pos++ <= lastPos) { + if ((sChar = si.nextInt()) == pChar) { + /* + * s[pos] matched p[0]: divert into matching the rest of + * p. Leave a mark in s where we shall resume if this is + * not a full match with p. + */ + CodepointIterator.Mark sPos = si.mark(); + int match = 1; + while (match < pLength) { + if (pi.nextInt() != si.nextInt()) { break; } + match++; + } + // If we reached the end of p it's a match + if (match == pLength) { + // Grab what came before the match. + Object before = wrap(buffer.take()); + // Now consume (the known length) after the match. + buffer = new IntArrayBuilder(lastPos - pos + 1); + buffer.append(si); + Object after = wrap(buffer.take()); + // Return a result tuple + return Py.tuple(before, sep, after); + } + // We stopped on a mismatch: reset si and pi + sPos.restore(); + pMark.restore(); + } + // If we didn't return a result, consume one character + buffer.append(sChar); + } + // If we didn't return a result, there was no match + return null; + } + /** + * Python {@code str.rpartition()}, splits the {@code str} at the + * last occurrence of {@code sep}. Return a {@code tuple} containing + * the part before the separator, the separator itself, and the part + * after the separator. + * + * @param sep on which to split the string + * @return tuple of parts + */ + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_rpartition_doc) + */ + PyTuple rpartition(Object sep) { + PyTuple r; + r = rpartition(delegate, sep); + return r != null ? r : Py.tuple(this, "", ""); + } + + static PyTuple rpartition(String self, Object sep) { + PyTuple r = rpartition(adapt(self), sep); + return r != null ? r : Py.tuple(self, "", ""); + } + + /** + * Helper to Python {@code str.rpartition()}. Return a {@code tuple} + * of the split result {@code (before, sep, after)}, or {@code null} + * if there was no match. + * + * @param s to be split + * @param sep the separator to look for + * @return tuple of parts or {@code null} + */ + private static PyTuple rpartition(CodepointDelegate s, Object sep) { + /* + * Create an iterator for p (the needle string) and pick up the + * last character p[-1] we are seeking. We reset the iterator to + * that position (pChar is still valid) when a match to p is + * begun but proves partial. + */ + CodepointDelegate p = adaptSeparator("rpartition", sep); + CodepointIterator pi = p.iteratorLast(); + int sChar, pChar = pi.previousInt(), pLength = p.length(); + CodepointIterator.Mark pMark = pi.mark(); + assert pLength > 0; + + // Counting in pos avoids hasNext() calls. Start at the end. + int pos = s.length(), firstPos = pLength - 1; + + // An iterator on s, the string being split. + CodepointIterator si = s.iterator(pos); + IntArrayReverseBuilder buffer = new IntArrayReverseBuilder(); + + while (--pos >= firstPos) { + if ((sChar = si.previousInt()) == pChar) { + /* + * s[pos] matched p[-1]: divert into matching the rest + * of p (still in reverse). Leave a mark in s where we + * shall resume if this is not a full match with p. + */ + CodepointIterator.Mark sPos = si.mark(); + int match = 1; + while (match < pLength) { + if (pi.previousInt() != si.previousInt()) { break; } + match++; + } + // If we reached the end of p it's a match + if (match == pLength) { + // Grab what came after the match. + Object after = wrap(buffer.take()); + // Now consume (the known length) before the match. + buffer = new IntArrayReverseBuilder(si.nextIndex()); + buffer.prepend(si); + Object before = wrap(buffer.take()); + // Return a result + return Py.tuple(before, sep, after); + } + // We stopped on a mismatch: reset si and pi + sPos.restore(); + pMark.restore(); + } + // If we didn't return a result, consume one character + buffer.prepend(sChar); + } + // If we didn't return a result, there was no match + return null; + } + + /** + * Python {@code str.split([sep [, maxsplit]])} returning a + * {@link PyList} of {@code str}. The target {@code self} will be + * split at each occurrence of {@code sep}. If {@code sep == null}, + * whitespace will be used as the criterion. If {@code sep} has zero + * length, a Python {@code ValueError} is raised. If + * {@code maxsplit} >=0 and there are more feasible splits than + * {@code maxsplit} the last element of the list contains what is + * left over after the last split. + * + * @param sep string to use as separator (or {@code null} if to + * split on whitespace) + * @param maxsplit maximum number of splits to make (there may be + * {@code maxsplit+1} parts) or {@code -1} for all possible. + * @return list(str) result + */ + // split(self, /, sep=None, maxsplit=-1) + @PythonMethod(positionalOnly = false) + PyList split(@Default("None") Object sep, @Default("-1") int maxsplit) { + return split(delegate, sep, maxsplit); + } + + @PythonMethod(primary = false) + static PyList split(String self, Object sep, int maxsplit) { + return split(adapt(self), sep, maxsplit); + } + + private static PyList split(CodepointDelegate s, Object sep, + int maxsplit) { + if (sep == null || sep == Py.None) { + // Split on runs of whitespace + return splitAtSpaces(s, maxsplit); + } else if (maxsplit == 0) { + // Easy case: a list containing self. + PyList list = new PyList(); + list.add(s.principal()); + return list; + } else { + // Split on specified (non-empty) string + CodepointDelegate p = adaptSeparator("split", sep); + return split(s, p, maxsplit); + } + } + + /** + * Implementation of {@code str.split} splitting on white space and + * returning a list of the separated parts. If there are more than + * {@code maxsplit} feasible splits the last element of the list is + * the remainder of the original ({@code self}) string. + * + * @param s delegate presenting self as code points + * @param maxsplit limit on the number of splits (if >=0) + * @return {@code PyList} of split sections + */ + private static PyList splitAtSpaces(CodepointDelegate s, + int maxsplit) { + /* + * Result built here is a list of split parts, exactly as + * required for s.split(None, maxsplit). If there are to be n + * splits, there will be n+1 elements in L. + */ + PyList list = new PyList(); + + // -1 means make all possible splits, at most: + if (maxsplit < 0) { maxsplit = s.length(); } + + // An iterator on s, the string being searched + CodepointIterator si = s.iterator(0); + IntArrayBuilder segment = new IntArrayBuilder(); + + while (si.hasNext()) { + // We are currently scanning space characters + while (si.hasNext()) { + int c; + if (!isPythonSpace(c = si.nextInt())) { + // Just read a non-space: start a segment + segment.append(c); + break; + } + } + + /* + * Either s ran out while we were scanning space characters, + * or we have started a new segment. If s ran out, we'll + * burn past the next loop. If s didn't run out, the next + * loop accumulates the segment until the next space (or s + * runs out). + */ + + // We are currently building a non-space segment + while (si.hasNext()) { + int c = si.nextInt(); + // Twist: if we've run out of splits, append c anyway. + if (maxsplit > 0 && isPythonSpace(c)) { + // Just read a space: end the segment + break; + } else { + // Non-space, or last allowed segment + segment.append(c); + } + } + + /* + * Either s ran out while we were scanning space characters, + * or we have created a new segment. (It is possible s ran + * out while we created the segment, but that's ok.) + */ + if (segment.length() > 0) { + // We created a segment. + --maxsplit; + list.add(wrap(segment.take())); + } + } + return list; + } + + /** + * Implementation of Python {@code str.split}, returning a list of + * the separated parts. If there are more than {@code maxsplit} + * occurrences of {@code sep} the last element of the list is the + * remainder of the original ({@code self}) string. + * + * @param s delegate presenting self as code points + * @param p at occurrences of which {@code s} should be split + * @param maxsplit limit on the number of splits (if not <=0) + * @return {@code PyList} of split sections + */ + private static PyList split(CodepointDelegate s, + CodepointDelegate p, int maxsplit) { + /* + * The structure of split() resembles that of count() in that + * after a match we keep going. And it resembles partition() in + * that, between matches, we are accumulating characters into a + * segment buffer. + */ + + // -1 means make all possible splits, at most: + if (maxsplit < 0) { maxsplit = s.length(); } + + // An iterator on p, the string sought. + CodepointIterator pi = p.iterator(0); + int pChar = pi.nextInt(), pLength = p.length(); + CodepointIterator.Mark pMark = pi.mark(); + assert pLength > 0; + + // Counting in pos avoids hasNext() calls. + int pos = 0, lastPos = s.length() - pLength, sChar; + + // An iterator on s, the string being searched. + CodepointIterator si = s.iterator(pos); + + // Result built here is a list of split segments + PyList list = new PyList(); + IntArrayBuilder segment = new IntArrayBuilder(); + + while (si.hasNext()) { + + if (pos++ > lastPos || maxsplit <= 0) { + /* + * We are too close to the end for a match now, or in + * our final segment (according to maxsplit==0). + * Everything that is left belongs to this segment. + */ + segment.append(si); + + } else if ((sChar = si.nextInt()) == pChar) { + /* + * s[pos] matched p[0]: divert into matching the rest of + * p. Leave a mark in s where we shall resume if this is + * not a full match with p. + */ + CodepointIterator.Mark sPos = si.mark(); + int match = 1; + while (match < pLength) { + if (pi.nextInt() != si.nextInt()) { break; } + match++; + } + + if (match == pLength) { + /* + * We reached the end of p: it's a match. Emit the + * segment we have been accumulating, start a new + * one, and count a split. + */ + list.add(wrap(segment.take())); + --maxsplit; + // Catch pos up with si (matches do not overlap). + pos = si.nextIndex(); + } else { + /* + * We stopped on a mismatch: reset si to pos. The + * character that matched pChar is part of the + * current segment. + */ + sPos.restore(); + segment.append(sChar); + } + // In either case, reset pi to p[1]. + pMark.restore(); + + } else { + /* + * The character that wasn't part of a match with p is + * part of the current segment. + */ + segment.append(sChar); + } + } + + /* + * Add the segment we were building when s ran out, even if it + * is empty. + */ + list.add(wrap(segment.take())); + return list; + } + + /** + * Python {@code str.rsplit([sep [, maxsplit]])} returning a + * {@link PyList} of {@code str}. The target {@code self} will be + * split at each occurrence of {@code sep}. If {@code sep == null}, + * whitespace will be used as the criterion. If {@code sep} has zero + * length, a Python {@code ValueError} is raised. If + * {@code maxsplit} >=0 and there are more feasible splits than + * {@code maxsplit} the last element of the list contains what is + * left over after the last split. + * + * @param sep string to use as separator (or {@code null} if to + * split on whitespace) + * @param maxsplit maximum number of splits to make (there may be + * {@code maxsplit+1} parts) or {@code -1} for all possible. + * @return list(str) result + */ + /* + @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_rsplit_doc) + */ + PyList rsplit(Object sep, int maxsplit) { + return rsplit(delegate, sep, maxsplit); + } + + static PyList rsplit(String self, Object sep, int maxsplit) { + return rsplit(adapt(self), sep, maxsplit); + } + + private static PyList rsplit(CodepointDelegate s, Object sep, + int maxsplit) { + if (sep == null || sep == Py.None) { + // Split on runs of whitespace + return rsplitAtSpaces(s, maxsplit); + } else if (maxsplit == 0) { + // Easy case: a list containing self. + PyList list = new PyList(); + list.add(s.principal()); + return list; + } else { + // Split on specified (non-empty) string + CodepointDelegate p = adaptSeparator("rsplit", sep); + return rsplit(s, p, maxsplit); + } + } + + /** + * Implementation of {@code str.rsplit} splitting on white space and + * returning a list of the separated parts. If there are more than + * {@code maxsplit} feasible splits the last element of the list is + * the remainder of the original ({@code self}) string. + * + * @param s delegate presenting self as code points + * @param maxsplit limit on the number of splits (if >=0) + * @return {@code PyList} of split sections + */ + private static PyList rsplitAtSpaces(CodepointDelegate s, + int maxsplit) { + /* + * Result built here is a list of split parts, exactly as + * required for s.rsplit(None, maxsplit). If there are to be n + * splits, there will be n+1 elements in L. + */ + PyList list = new PyList(); + + // -1 means make all possible splits, at most: + if (maxsplit < 0) { maxsplit = s.length(); } + + // A reverse iterator on s, the string being searched + CodepointIterator si = s.iteratorLast(); + IntArrayReverseBuilder segment = new IntArrayReverseBuilder(); + + while (si.hasPrevious()) { + // We are currently scanning space characters + while (si.hasPrevious()) { + int c; + if (!isPythonSpace(c = si.previousInt())) { + // Just read a non-space: start a segment + segment.prepend(c); + break; + } + } + + /* + * Either s ran out while we were scanning space characters, + * or we have started a new segment. If s ran out, we'll + * burn past the next loop. If s didn't run out, the next + * loop accumulates the segment until the next space (or s + * runs out). + */ + + // We are currently building a non-space segment + while (si.hasPrevious()) { + int c = si.previousInt(); + // Twist: if we've run out of splits, prepend c anyway. + if (maxsplit > 0 && isPythonSpace(c)) { + // Just read a space: end the segment + break; + } else { + // Non-space, or last allowed segment + segment.prepend(c); + } + } + + /* + * Either s ran out while we were scanning space characters, + * or we have created a new segment. (It is possible s ran + * out while we created the segment, but that's ok.) + */ + if (segment.length() > 0) { + // We created a segment. + --maxsplit; + list.add(wrap(segment.take())); + } + } + + // We built the list backwards, so reverse it. + list.reverse(); + return list; + } + + /** + * Implementation of Python {@code str.rsplit}, returning a list of + * the separated parts. If there are more than {@code maxsplit} + * occurrences of {@code sep} the last element of the list is the + * remainder of the original ({@code self}) string. + * + * @param s delegate presenting self as code points + * @param p at occurrences of which {@code s} should be split + * @param maxsplit limit on the number of splits (if not <=0) + * @return {@code PyList} of split sections + */ + private static PyList rsplit(CodepointDelegate s, + CodepointDelegate p, int maxsplit) { + /* + * The structure of rsplit() resembles that of count() in that + * after a match we keep going. And it resembles rpartition() in + * that, between matches, we are accumulating characters into a + * segment buffer, and we are working backwards from the end. + */ + + // -1 means make all possible splits, at most: + if (maxsplit < 0) { maxsplit = s.length(); } + + // A reverse iterator on p, the string sought. + CodepointIterator pi = p.iteratorLast(); + int pChar = pi.previousInt(), pLength = p.length(); + CodepointIterator.Mark pMark = pi.mark(); + assert pLength > 0; + + /* + * Counting backwards in pos we recognise when there can be no + * further matches. + */ + int pos = s.length(), firstPos = pLength - 1, sChar; + + // An iterator on s, the string being searched. + CodepointIterator si = s.iterator(pos); + + // Result built here is a list of split segments + PyList list = new PyList(); + IntArrayReverseBuilder segment = new IntArrayReverseBuilder(); + + while (si.hasPrevious()) { + if (--pos < firstPos || maxsplit <= 0) { + /* + * We are too close to the start for a match now, or in + * our final segment (according to maxsplit==0). + * Everything that is left belongs to this segment. + */ + segment.prepend(si); + } else if ((sChar = si.previousInt()) == pChar) { + /* + * s[pos] matched p[-1]: divert into matching the rest + * of p. Leave a mark in s where we shall resume if this + * is not a full match with p. + */ + CodepointIterator.Mark sPos = si.mark(); + int match = 1; + while (match < pLength) { + if (pi.previousInt() != si.previousInt()) { break; } + match++; + } + + if (match == pLength) { + /* + * We reached the start of p: it's a match. Emit the + * segment we have been accumulating, start a new + * one, and count a split. + */ + list.add(wrap(segment.take())); + --maxsplit; + // Catch pos up with si (matches do not overlap). + pos = si.nextIndex(); + } else { + /* + * We stopped on a mismatch: reset si to pos. The + * character that matched pChar is part of the + * current segment. + */ + sPos.restore(); + segment.prepend(sChar); + } + // In either case, reset pi to p[1]. + pMark.restore(); + + } else { + /* + * The character that wasn't part of a match with p is + * part of the current segment. + */ + segment.prepend(sChar); + } + } + + /* + * Add the segment we were building when s ran out, even if it + * is empty. Note the list is backwards and we must reverse it. + */ + list.add(wrap(segment.take())); + list.reverse(); + return list; + } + + /** + * Python {@code str.splitlines([keepends])} returning a list of the + * lines in the string, breaking at line boundaries. Line breaks are + * not included in the resulting list unless {@code keepends} is + * given and true. + *

+ * This method splits on the following line boundaries: LF="\n", + * VT="\u000b", FF="\f", CR="\r", FS="\u001c", GS="\u001d", + * RS="\u001e", NEL="\u0085", LSEP="\u2028", PSEP="\u2029" and + * CR-LF="\r\n". In this last case, the sequence "\r\n" is treated + * as one line separator. + * + * @param keepends the lines in the list retain the separator that + * caused the split + * @return the list of lines + */ + /* + @ExposedMethod(defaults = "false", doc = BuiltinDocs.unicode_splitlines_doc) + */ + PyList splitlines(boolean keepends) { + return splitlines(delegate, keepends); + } + + static PyList splitlines(String self, boolean keepends) { + return splitlines(adapt(self), keepends); + } + + private static PyList splitlines(CodepointDelegate s, + boolean keepends) { + /* + * The structure of splitlines() resembles that of split() for + * explicit strings, except that the criteria for recognising + * the "needle" are implicit. + */ + // An iterator on s, the string being searched. + CodepointIterator si = s.iterator(0); + + // Result built here is a list of split segments + PyList list = new PyList(); + IntArrayBuilder line = new IntArrayBuilder(); + + /* + * We scan the input string looking for characters that mark + * line endings, and appending to the line buffer as we go. Each + * detected ending makes a PyUnicode to add t5o list. + */ + while (si.hasNext()) { + + int c = si.nextInt(); + + if (isPythonLineSeparator(c)) { + // Check for a possible CR-LF combination + if (c == '\r' && si.hasNext()) { + // Might be ... have to peek ahead + int c2 = si.nextInt(); + if (c2 == '\n') { + // We're processing CR-LF + if (keepends) { line.append(c); } + // Leave the \n for the main path to deal with + c = c2; + } else { + // There was no \n following \r: undo the read + si.previousInt(); + } + } + // Optionally append the (single) line separator c + if (keepends) { line.append(c); } + // Emit the line (and start another) + list.add(wrap(line.take())); + + } else { + // c is part of the current line. + line.append(c); + } + } + + /* + * Add the segment we were building when s ran out, but not if + * it is empty. + */ + if (line.length() > 0) { list.add(wrap(line.take())); } + + return list; + } + + /** + * As {@link #find(Object, Object, Object)}, but throws + * {@link ValueError} if the substring is not found. + * + * @param sub substring to find. + * @param start start of slice. + * @param end end of slice. + * @return index of {@code sub} in this object or -1 if not found. + * @throws ValueError if {@code sub} is not found + */ + /* + @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_index_doc) + */ + int index(Object sub, Object start, Object end) throws ValueError { + return checkIndexReturn(find(delegate, sub, start, end)); + } + + static int index(String self, Object sub, Object start, + Object end) { + return checkIndexReturn(find(adapt(self), sub, start, end)); + } + + /** + * As {@link #rfind(Object, Object, Object)}, but throws + * {@link ValueError} if the substring is not found. + * + * @param sub substring to find. + * @param start start of slice. + * @param end end of slice. + * @return index of {@code sub} in this object or -1 if not found. + * @throws ValueError if {@code sub} is not found + */ + /* + @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_index_doc) + */ + int rindex(Object sub, Object start, Object end) throws ValueError { + return checkIndexReturn(rfind(delegate, sub, start, end)); + } + + static int rindex(String self, Object sub, Object start, + Object end) { + return checkIndexReturn(rfind(adapt(self), sub, start, end)); + } + + /** + * Return the number of non-overlapping occurrences of substring + * {@code sub} in the range {@code [start:end]}. Optional arguments + * {@code start} and {@code end} are interpreted as in slice + * notation. + * + * @param sub substring to find. + * @param start start of slice. + * @param end end of slice. + * @return count of occurrences. + */ + /* + @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_count_doc) + */ + int count(Object sub, Object start, Object end) { + return count(delegate, sub, start, end); + } + + static int count(String self, Object sub, Object start, + Object end) { + return count(adapt(self), sub, start, end); + } + + private static int count(CodepointDelegate s, Object sub, + Object start, Object end) { + CodepointDelegate p = adaptSub("count", sub); + PySlice.Indices slice = getSliceIndices(s, start, end); + if (p.length() == 0) + return slice.slicelength + 1; + else + return count(s, p, slice); + } + + /** + * The inner implementation of {@code str.count}, returning the + * number of occurrences of a substring. It accepts slice-like + * arguments, which may be {@code None} or end-relative (negative). + * + * @param sub substring to find. + * @param startObj start of slice. + * @param endObj end of slice. + * @return count of occurrences + */ + private static int count(CodepointDelegate s, CodepointDelegate p, + PySlice.Indices slice) { + /* + * count() uses the same pattern as find(), with the difference + * that it keeps going rather than returning on the first match. + */ + // An iterator on p, the string sought. + CodepointIterator pi = p.iterator(0); + int pChar = pi.nextInt(), pLength = p.length(); + CodepointIterator.Mark pMark = pi.mark(); + assert pLength > 0; + + // Counting in pos avoids hasNext() calls. + int pos = slice.start, lastPos = slice.stop - pLength; + + // An iterator on s[start:end], the string being searched. + CodepointIterator si = s.iterator(pos, slice.start, slice.stop); + int count = 0; + + while (pos++ <= lastPos) { + if (si.nextInt() == pChar) { + /* + * s[pos] matched p[0]: divert into matching the rest of + * p. Leave a mark in s where we shall resume if this is + * not a full match with p. + */ + CodepointIterator.Mark sPos = si.mark(); + int match = 1; + while (match < pLength) { + if (pi.nextInt() != si.nextInt()) { break; } + match++; + } + if (match == pLength) { + // We reached the end of p: it's a match. + count++; + // Catch pos up with si (matches do not overlap). + pos = si.nextIndex(); + } else { + // We stopped on a mismatch: reset si to pos. + sPos.restore(); + } + // In either case, reset pi to p[1]. + pMark.restore(); + } + } + return count; + } + + /** + * Python {@code str.replace(old, new[, count])}, returning a copy + * of the string with all occurrences of substring {@code old} + * replaced by {@code rep}. If argument {@code count} is + * nonnegative, only the first {@code count} occurrences are + * replaced. + * + * @param old to replace where found. + * @param rep replacement text. + * @param count maximum number of replacements to make, or -1 + * meaning all of them. + * @return {@code self} string after replacements. + */ + @PythonMethod + Object replace(Object old, @Name("new") Object rep, @Default("-1") int count) { + return replace(delegate, old, rep, count); + } + + @PythonMethod(primary = false) + static Object replace(String self, Object old, Object rep, int count) { + return replace(adapt(self), old, rep, count); + } + + private static Object replace(CodepointDelegate s, Object old, Object rep, int count) { + // Convert arguments to their delegates or error + CodepointDelegate p = adaptSub("replace", old); + CodepointDelegate n = adaptRep("replace", rep); + if (p.length() == 0) { + return replace(s, n, count); + } else { + return replace(s, p, n, count); + } + } + + /** + * Implementation of Python {@code str.replace} in the case where + * the substring to find has zero length. This must result in the + * insertion of the replacement string at the start if the result + * and after every character copied from s, up to the limit imposed + * by {@code count}. For example {@code 'hello'.replace('', '-')} + * returns {@code '-h-e-l-l-o-'}. This is {@code N+1} replacements, + * where {@code N = s.length()}, or as limited by {@code count}. + * + * @param s delegate presenting self as code points + * @param r delegate representing the replacement string + * @param count limit on the number of replacements + * @return string interleaved with the replacement + */ + private static Object replace(CodepointDelegate s, + CodepointDelegate r, int count) { + + // -1 means make all replacements, which is exactly: + if (count < 0) { + count = s.length() + 1; + } else if (count == 0) { + // Zero replacements: short-cut return the original + return s.principal(); + } + + CodepointIterator si = s.iterator(0); + + // The result will be this size exactly + // 'hello'.replace('', '-', 3) == '-h-e-llo' + IntArrayBuilder result = + new IntArrayBuilder(s.length() + r.length() * count); + + // Start with the a copy of the replacement + result.append(r); + + // Put another copy of after each of count-1 characters of s + for (int i = 1; i < count; i++) { + assert si.hasNext(); + result.append(si.nextInt()).append(r); + } + + // Now copy any remaining characters of s + result.append(si); + return wrap(result.take()); + } + + /** + * Implementation of Python {@code str.replace} in the case where + * the substring to find has non-zero length, up to the limit + * imposed by {@code count}. + * + * @param s delegate presenting self as code points + * @param p delegate representing the string to replace + * @param r delegate representing the replacement string + * @param count limit on the number of replacements + * @return string with the replacements + */ + private static Object replace(CodepointDelegate s, + CodepointDelegate p, CodepointDelegate r, int count) { + + // -1 means make all replacements, but cannot exceed: + if (count < 0) { + count = s.length() + 1; + } else if (count == 0) { + // Zero replacements: short-cut return the original + return s.principal(); + } + + /* + * The structure of replace is a lot like that of split(), in + * that we iterate over s, copying as we go. The difference is + * the action we take upon encountering and instance of the + * "needle" string, which here is to emit the replacement into + * the result, rather than start a new segment. + */ + + // An iterator on p, the string sought. + CodepointIterator pi = p.iterator(0); + int pChar = pi.nextInt(), pLength = p.length(); + CodepointIterator.Mark pMark = pi.mark(); + assert pLength > 0; + + // An iterator on r, the replacement string. + CodepointIterator ri = r.iterator(0); + CodepointIterator.Mark rMark = ri.mark(); + + // Counting in pos avoids hasNext() calls. + int pos = 0, lastPos = s.length() - pLength, sChar; + + // An iterator on s, the string being searched. + CodepointIterator si = s.iterator(pos); + + // Result built here + IntArrayBuilder result = new IntArrayBuilder(); + + while (si.hasNext()) { + + if (pos++ > lastPos || count <= 0) { + /* + * We are too close to the end for a match now, or we + * have run out of permission to make (according to + * count==0). Everything that is left may be added to + * the result. + */ + result.append(si); + + } else if ((sChar = si.nextInt()) == pChar) { + /* + * s[pos] matched p[0]: divert into matching the rest of + * p. Leave a mark in s where we shall resume if this is + * not a full match with p. + */ + CodepointIterator.Mark sPos = si.mark(); + int match = 1; + while (match < pLength) { + if (pi.nextInt() != si.nextInt()) { break; } + match++; + } + + if (match == pLength) { + /* + * We reached the end of p: it's a match. Emit the + * replacement string to the result and lose a life. + */ + result.append(ri); + rMark.restore(); + --count; + // Catch pos up with si (matches do not overlap). + pos = si.nextIndex(); + } else { + /* + * We stopped on a mismatch: reset si to pos. The + * character that matched pChar is part of the + * result. + */ + sPos.restore(); + result.append(sChar); + } + // In either case, reset pi to p[1]. + pMark.restore(); + + } else { + /* + * The character that wasn't part of a match with p is + * part of the result. + */ + result.append(sChar); + } + } + + return wrap(result.take()); + } + + // @formatter:off + + // Transformation methods ----------------------------------------- + + /* + * We group here methods that are simple transformation functions of + * the string, based on tests of character properties, for example + * str.strip() and str.title(). + */ + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_lower_doc) + */ + PyUnicode lower() { return mapChars(Character::toLowerCase); } + + static String lower(String self) { + return mapChars(self, Character::toLowerCase); + } + + @PythonMethod + PyUnicode upper() { return mapChars(Character::toUpperCase); } + + @PythonMethod(primary = false) + static String upper(String self) { + return mapChars(self, Character::toUpperCase); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_title_doc) + */ + PyUnicode title() { return title(delegate); } + + static PyUnicode title(String self) { return title(adapt(self)); } + + private static PyUnicode title(PySequence.OfInt s) { + IntArrayBuilder buffer = new IntArrayBuilder(s.length()); + boolean previousCased = false; + for (int c : s) { + if (previousCased) { + buffer.append(Character.toLowerCase(c)); + } else { + buffer.append(Character.toTitleCase(c)); + } + previousCased = + Character.isLowerCase(c) || Character.isUpperCase(c) + || Character.isTitleCase(c); + } + return wrap(buffer.take()); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_swapcase_doc) + */ + PyUnicode swapcase() { return mapChars(PyUnicode::swapcase); } + + static String swapcase(String self) { + return mapChars(self, PyUnicode::swapcase); + } + + private static int swapcase(int c) { + if (Character.isUpperCase(c)) { + return Character.toLowerCase(c); + } else if (Character.isLowerCase(c)) { + return Character.toUpperCase(c); + } else { + return c; + } + } + + /* + @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_ljust_doc) + */ + Object ljust(int width, Object fillchar) { + return pad(false, delegate, true, width, + adaptFill("ljust", fillchar)); + } + + static Object ljust(String self, int width, Object fillchar) { + return pad(false, adapt(self), true, width, + adaptFill("ljust", fillchar)); + } + + /* + @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode__doc) + */ + Object rjust(int width, Object fillchar) { + return pad(true, delegate, false, width, + adaptFill("rjust", fillchar)); + } + + static Object rjust(String self, int width, Object fillchar) { + return pad(true, adapt(self), false, width, + adaptFill("rjust", fillchar)); + } + + /* + @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_rjust_doc) + */ + Object center(int width, Object fillchar) { + return pad(true, delegate, true, width, + adaptFill("center", fillchar)); + } + + static Object center(String self, int width, Object fillchar) { + return pad(true, adapt(self), true, width, + adaptFill("center", fillchar)); + } + + /** + * Common code for {@link #ljust(int, Object) ljust}, + * {@link #rjust(int, Object) rjust} and {@link #center(int, Object) + * center}. + * + * @param left whether to pad at the left + * @param s the {@code self} string + * @param right whether to pad at the right + * @param width the minimum width to attain + * @param fill the code point value to use as the fill + * @return the padded string (or {@code s.principal()}) + */ + private static Object pad(boolean left, CodepointDelegate s, + boolean right, int width, int fill) { + // Work out how much (or whether) to pad at the left and right. + int L = s.length(), pad = Math.max(width, L) - L; + if (pad == 0) { return s.principal(); } + + // It suits us to assume all right padding to begin with. + int leftPad = 0, rightPad = pad; + if (left) { + if (!right) { + // It is all on the left + leftPad = pad; + rightPad = 0; + } else { + // But sometimes you have to be Dutch + leftPad = pad / 2 + (pad & width & 1); + rightPad = width - leftPad; + } + } + + // Now, use a builder to create the result + IntArrayBuilder buf = new IntArrayBuilder(width); + + for (int i = 0; i < leftPad; i++) { buf.append(fill); } + buf.append(s); + for (int i = 0; i < rightPad; i++) { buf.append(fill); } + return wrap(buf.take()); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_zfill_doc) + */ + Object zfill(int width) { + return zfill(delegate, width); + } + + static Object zfill(String self, int width) { + return zfill(adapt(self), width); + } + + /** + * Inner implementation of {@link #zfill(int) zfill} + * + * @param s the {@code self} string + * @param width the achieve by inserting zeros + * @return the filled string + */ + private static Object zfill(CodepointDelegate s, int width) { + // Work out how much to pad. + int L = s.length(), pad = Math.max(width, L) - L; + if (pad == 0) { return s.principal(); } + + // Now, use a builder to create the result of the padded width + IntArrayBuilder buf = new IntArrayBuilder(width); + CodepointIterator si = s.iterator(0); + + // Possible sign goes first + if (si.hasNext()) { + int c = si.nextInt(); + if (c == '+' || c == '-') { + buf.append(c); + } else { + si.previousInt(); + } + } + + // Now the computed number of zeros + for (int i = 0; i < pad; i++) { buf.append('0'); } + buf.append(si); + return wrap(buf.take()); + } + + /* + @ExposedMethod(defaults = "8", doc = BuiltinDocs.str_expandtabs_doc) + */ + Object expandtabs(int tabsize) { + return expandtabs(delegate, tabsize); + } + + static Object expandtabs(String self, int tabsize) { + return expandtabs( adapt(self), tabsize); + } + + /** + * Inner implementation of {@link #expandtabs() expandtabs} + * + * @param s the {@code self} string + * @param tabsize number of spaces to tab to + * @return tab-expanded string + */ + private static Object expandtabs(CodepointDelegate s, int tabsize) { + // Build the result in buf. It can be multi-line. + IntArrayBuilder buf = new IntArrayBuilder(s.length()); + // Iterate through s, keeping track of position on line. + CodepointIterator si = s.iterator(0); + int pos = 0; + while (si.hasNext()) { + int c = si.nextInt(); + if (c == '\t') { + int spaces = tabsize - pos % tabsize; + while (spaces-- > 0) { buf.append(' '); } + pos += spaces; + } else { + if (c == '\n' || c == '\r') { pos = -1; } + buf.append(c); + pos++; + } + } + return wrap(buf.take()); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_capitalize_doc) + */ + Object capitalize() { return capitalize(delegate); } + + static Object capitalize(String self) { + return capitalize(adapt(self)); + } + + /** + * Inner implementation of {@link #capitalize() capitalize} + * + * @param s the {@code self} string + * @return capitalised string + */ + private static Object capitalize(CodepointDelegate s) { + // Iterate through s + CodepointIterator si = s.iterator(0); + if (si.hasNext()) { + // Build the result in buf. + IntArrayBuilder buf = new IntArrayBuilder(s.length()); + // Uppercase the first character + buf.append(Character.toUpperCase(si.nextInt())); + // Lowercase the rest + while (si.hasNext()) { + buf.append(Character.toLowerCase(si.nextInt())); + } + return wrap(buf.take()); + } else { + // String is empty + return ""; + } + } + + // @formatter:off + + /* + @ExposedMethod(doc = BuiltinDocs.str_join_doc) + */ + Object join(Object iterable) throws TypeError, Throwable { + return join(delegate, iterable); + } + + static Object join(String self, Object iterable) + throws TypeError, Throwable { + return join(adapt(self), iterable); + } + + /** + * Inner implementation of {@link #join() join}. + * + * @param s the {@code self} string (separator) + * @param iterable of strings + * @return capitalised string + * @throws TypeError if {@code iterable} isn't + * @throws Throwable from errors iterating {@code iterable} + */ + private static Object join(CodepointDelegate s, Object iterable) + throws TypeError, Throwable { + /* + * The argument is supposed to be a Python iterable: present it + * as a Java List. + */ + List parts = PySequence.fastList(iterable, + () -> Abstract.argumentTypeError("join", "", "iterable", + iterable)); + + /* + * It is safe assume L is constant since either seq is a + * well-behaved built-in, or we made a copy. + */ + final int L = parts.size(); + + // If empty sequence, return "" + if (L == 0) { + return ""; + } else if (L == 1) { + // One-element sequence: return that element (if a str). + Object item = parts.get(0); + if (TYPE.checkExact(item)) { return item; } + } + + /* + * There are at least two parts to join, or one and it isn't a + * str exactly. Do a pre-pass to figure out the total amount of + * space we'll need, and check that every element is str-like. + */ + int sepLen = s.length(); + // Start with the length contributed for by L-1 separators + long size = (L - 1) * sepLen; + + for (int i = 0; i < L; i++) { + + // Accumulate the length of the item according to type + Object item = parts.get(i); + if (item instanceof PyUnicode) { + size += ((PyUnicode)item).__len__(); + } else if (item instanceof String) { + /* + * If non-BMP, this will over-estimate. We assume this + * is preferable to counting characters properly. + */ + size += ((String)item).length(); + } else { + // If neither, then it's not a str + throw joinArgumentTypeError(item, i); + } + + if (size > Integer.MAX_VALUE) { + throw new OverflowError( + "join() result is too long for a Python string"); + } + } + + // Build the result here + IntArrayBuilder buf = new IntArrayBuilder((int)size); + + // Concatenate the parts and separators + for (int i = 0; i < L; i++) { + // Separator + if (i != 0) { buf.append(s); } + // item from the iterable + Object item = parts.get(i); + try { + buf.append(adapt(item)); + } catch (NoConversion e) { + // This can't really happen here, given checks above + throw joinArgumentTypeError(item, i); + } + } + + return wrap(buf.take()); + } + + private static TypeError joinArgumentTypeError(Object item, int i) { + return new TypeError( + "sequence item %d: expected str, %.80s found", i, + PyType.of(item).getName()); + } + + // Doc copied from PyString + /** + * Equivalent to the Python {@code str.startswith} method, testing + * whether a string starts with a specified prefix, where a + * sub-range is specified by {@code [start:end]}. Arguments + * {@code start} and {@code end} are interpreted as in slice + * notation, with null or {@link Py#None} representing "missing". + * {@code prefix} can also be a tuple of prefixes to look for. + * + * @param prefix string to check for (or a {@code PyTuple} of them). + * @param start start of slice. + * @param end end of slice. + * @return {@code true} if this string slice starts with a specified + * prefix, otherwise {@code false}. + */ + /* + @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_startswith_doc) + */ + Object startswith(Object prefix, Object start, Object end) { + return startswith(delegate, prefix, start, end); + } + + static Object startswith(String self, Object prefix, Object start, + Object end) { + return startswith(adapt(self), prefix, start, end); + } + + private static boolean startswith(CodepointDelegate s, + Object prefixObj, Object start, Object end) { + + PySlice.Indices slice = getSliceIndices(s, start, end); + + if (prefixObj instanceof PyTuple) { + /* + * Loop will return true if this slice starts with any + * prefix in the tuple + */ + for (Object prefix : (PyTuple)prefixObj) { + // It ought to be a str. + CodepointDelegate p = adaptSub("startswith", prefix); + if (startswith(s, p, slice)) { return true; } + } + // None matched + return false; + } else { + // It ought to be a str. + CodepointDelegate p = adaptSub("startswith", prefixObj); + return startswith(s, p, slice); + } + } + + private static boolean startswith(CodepointDelegate s, + CodepointDelegate p, PySlice.Indices slice) { + // If p is too long, it can't start s + if (p.length() > s.length()) { return false; } + CodepointIterator si = s.iterator(0, slice.start, slice.stop); + CodepointIterator pi = p.iterator(0); + // We know that p is no longer than s so only count in p + while (pi.hasNext()) { + if (pi.nextInt() != si.nextInt()) { return false; } + } + return true; + } + + // Doc copied from PyString + /** + * Equivalent to the Python {@code str.endswith} method, testing + * whether a string ends with a specified suffix, where a sub-range + * is specified by {@code [start:end]}. Arguments {@code start} and + * {@code end} are interpreted as in slice notation, with null or + * {@link Py#None} representing "missing". {@code suffix} can also + * be a tuple of suffixes to look for. + * + * @param suffix string to check for (or a {@code PyTuple} of them). + * @param start start of slice. + * @param end end of slice. + * @return {@code true} if this string slice ends with a specified + * suffix, otherwise {@code false}. + */ + /* + @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_endswith_doc) + */ + Object endswith(Object suffix, Object start, Object end) { + return endswith(delegate, suffix, start, end); + } + + static Object endswith(String self, Object suffix, Object start, + Object end) { + return endswith(adapt(self), suffix, start, end); + } + + private static boolean endswith(CodepointDelegate s, + Object suffixObj, Object start, Object end) { + + PySlice.Indices slice = getSliceIndices(s, start, end); + + if (suffixObj instanceof PyTuple) { + /* + * Loop will return true if this slice ends with any + * prefix in the tuple + */ + for (Object prefix : (PyTuple)suffixObj) { + // It ought to be a str. + CodepointDelegate p = adaptSub("endswith", prefix); + if (endswith(s, p, slice)) { return true; } + } + // None matched + return false; + } else { + // It ought to be a str. + CodepointDelegate p = adaptSub("endswith", suffixObj); + return endswith(s, p, slice); + } + } + + private static boolean endswith(CodepointDelegate s, + CodepointDelegate p, PySlice.Indices slice) { + // If p is too long, it can't end s + if (p.length() > s.length()) { return false; } + CodepointIterator si = s.iterator(slice.stop, slice.start, slice.stop); + CodepointIterator pi = p.iteratorLast(); + // We know that p is no longer than s so only count in p + while (pi.hasPrevious()) { + if (pi.previousInt() != si.previousInt()) { return false; } + } + return true; + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_translate_doc) + */ + final Object translate(Object table) { + return translateCharmap(this, "ignore", table); + } + + // Copied from PyString + /** + * Helper common to the Python and Java API implementing {@code str.translate} returning a + * copy of this string where all characters occurring in the argument + * {@code deletechars} are removed (if it is not {@code null}), and the remaining + * characters have been mapped through the translation {@code table}, which must be + * equivalent to a string of length 256 (if it is not {@code null}). + * + * @param table of character translations (or {@code null}) + * @param deletechars set of characters to remove (or {@code null}) + * @return transformed string + */ + private final String _translate(String table, String deletechars) { + + if (table != null && table.length() != 256) { + throw new ValueError("translation table must be 256 characters long"); + } + + StringBuilder buf = new StringBuilder(asString().length()); + + for (int i = 0; i < asString().length(); i++) { + char c = asString().charAt(i); + if (deletechars != null && deletechars.indexOf(c) >= 0) { + continue; + } + if (table == null) { + buf.append(c); + } else { + try { + buf.append(table.charAt(c)); + } catch (IndexOutOfBoundsException e) { + throw new TypeError("translate() only works for 8-bit character strings"); + } + } + } + return buf.toString(); + } + + // Predicate methods ---------------------------------------------- + + /* + * We group here methods that are boolean functions of the string, + * based on tests of character properties, for example + * str.isascii(). They have a common pattern. + */ + + // @formatter:off + /* + @ExposedMethod(doc = BuiltinDocs.unicode_islower_doc) + */ + boolean islower() { return islower(delegate); } + + static boolean islower(String s) { return islower(adapt(s)); } + + private static boolean islower(PySequence.OfInt s) { + boolean cased = false; + for (int codepoint : s) {; + if (Character.isUpperCase(codepoint) || Character.isTitleCase(codepoint)) { + return false; + } else if (!cased && Character.isLowerCase(codepoint)) { + cased = true; + } + } + return cased; + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_isupper_doc) + */ + final boolean isupper() { return isupper(delegate); } + + static boolean isupper(String s) { return isupper(adapt(s)); } + + private static boolean isupper(PySequence.OfInt s) { + boolean cased = false; + for (int codepoint : s) {; + if (Character.isLowerCase(codepoint) || Character.isTitleCase(codepoint)) { + return false; + } else if (!cased && Character.isUpperCase(codepoint)) { + cased = true; + } + } + return cased; + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_isalpha_doc) + */ + final boolean isalpha() { return isalpha(delegate); } + + static boolean isalpha(String s) { return isalpha(adapt(s)); } + + private static boolean isalpha(PySequence.OfInt s) { + if (s.length() == 0) { + return false; + } + for (int codepoint : s) { + if (!Character.isLetter(codepoint)) { + return false; + } + } + return true; + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_isalnum_doc) + */ + final boolean isalnum() { return isalnum(delegate); } + + static boolean isalnum(String s) { return isalnum(adapt(s)); } + + private static boolean isalnum(PySequence.OfInt s) { + if (s.length() == 0) { + return false; + } + for (int codepoint : s) {; + if (!(Character.isLetterOrDigit(codepoint) || // + Character.getType(codepoint) == Character.LETTER_NUMBER)) { + return false; + } + } + return true; + } + + @PythonMethod(primary = false) + public boolean isascii() { + for (int c : value) { if (c >>> 7 != 0) { return false; } } + return true; + } + + @PythonMethod + public static boolean isascii(String self) { + // We can test chars since any surrogate will fail. + return self.chars().dropWhile(c -> c >>> 7 == 0) + .findFirst().isEmpty(); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_isdecimal_doc) + */ + final boolean isdecimal() { return isdecimal(delegate); } + + static boolean isdecimal(String s) { return isdecimal(adapt(s)); } + + private static boolean isdecimal(PySequence.OfInt s) { + if (s.length() == 0) { + return false; + } + for (int codepoint : s) {; + if (Character.getType(codepoint) != Character.DECIMAL_DIGIT_NUMBER) { + return false; + } + } + return true; + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_isdigit_doc) + */ + final boolean isdigit() { return isdigit(delegate); } + + static boolean isdigit(String s) { return isdigit(adapt(s)); } + + private static boolean isdigit(PySequence.OfInt s) { + if (s.length() == 0) { + return false; + } + for (int codepoint : s) {; + if (!Character.isDigit(codepoint)) { + return false; + } + } + return true; + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_isnumeric_doc) + */ + final boolean isnumeric() { return isnumeric(delegate); } + + static boolean isnumeric(String s) { return isnumeric(adapt(s)); } + + private static boolean isnumeric(PySequence.OfInt s) { + if (s.length() == 0) { + return false; + } + for (int codepoint : s) {; + int type = Character.getType(codepoint); + if (type != Character.DECIMAL_DIGIT_NUMBER && type != Character.LETTER_NUMBER + && type != Character.OTHER_NUMBER) { + return false; + } + } + return true; + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_istitle_doc) + */ + final boolean istitle() { return istitle(delegate); } + + static boolean istitle(String s) { return istitle(adapt(s)); } + + private static boolean istitle(PySequence.OfInt s) { + if (s.length() == 0) { + return false; + } + boolean cased = false; + boolean previous_is_cased = false; + for (int codepoint : s) {; + if (Character.isUpperCase(codepoint) || Character.isTitleCase(codepoint)) { + if (previous_is_cased) { + return false; + } + previous_is_cased = true; + cased = true; + } else if (Character.isLowerCase(codepoint)) { + if (!previous_is_cased) { + return false; + } + previous_is_cased = true; + cased = true; + } else { + previous_is_cased = false; + } + } + return cased; + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_isspace_doc) + */ + final boolean isspace() { return isspace(delegate); } + + static boolean isspace(String s) { return isspace(adapt(s)); } + + private static boolean isspace(PySequence.OfInt s) { + if (s.length() == 0) { + return false; + } + for (int codepoint : s) {; + if (!isPythonSpace(codepoint)) { + return false; + } + } + return true; + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_encode_doc) + */ + Object encode(Object[] args, String[] keywords) { + return encode(delegate, args, keywords); + } + + Object encode(String self, Object[] args, String[] keywords) { + return encode(adapt(self), args, keywords); + } + + Object encode(CodepointDelegate s, Object[] args, String[] keywords) { + throw new MissingFeature("bytes, codecs, encoding ..."); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode___getnewargs___doc) + */ + private PyTuple __getnewargs__() { + /* This may be a sub-class but it should still be safe to share the value array. (I think.) */ + return new PyTuple(wrap(value)); + } + + private static PyTuple __getnewargs__(String self) { + return new PyTuple(self); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode___format___doc) + */ + static final Object __format__(Object self, Object formatSpec) { + + String stringFormatSpec = coerceToString(formatSpec, + () -> Abstract.argumentTypeError("__format__", + "specification", "str", formatSpec)); + + try { + // Parse the specification + Spec spec = InternalFormat.fromText(stringFormatSpec); + + // Get a formatter for the specification + TextFormatter f = new StrFormatter(spec); + + /* + * Format, pad and return a result according to as the + * specification argument. + */ + return f.format(self).pad().getResult(); + + } catch (FormatOverflow fe) { + throw new OverflowError(fe.getMessage()); + } catch (FormatError fe) { + throw new ValueError(fe.getMessage()); + } catch (NoConversion e) { + throw Abstract.impossibleArgumentError(TYPE.name, self); + } + } + + /** + * Implementation of {@code _string.formatter_parser}. Return an + * iterable that contains {@code tuple}s of the form: + * {@code (literal_text, field_name, format_spec, conversion)}. + *

+ * For example, the iterator formatter_parser("x={2:6.3f} + * y={y!r:>7s}.") yields successively

+     * ('x=', '2', '6.3f', None)
+     * (' y=', 'y', '>7s', 'r')
+     * ('.', None, None, None)
+     * 
{@code literal_text} can be zero length, and + * {@code field_name} can be {@code None}, in which case there's no + * object to format and output. If {@code field_name} is not + * {@code None}, it is looked up, formatted with {@code format_spec} + * and {@code conversion} and then used. + * + * @param formatString to parse + * @return an iterator of format {@code tuple}s + */ + // Compare CPython formatter_parser in unicode_formatter.h + /* + @ExposedMethod(doc = BuiltinDocs.unicode__formatter_parser_doc) + */ + // XXX belongs to the _string module, but where does that belong? + static Object formatter_parser(Object formatString) { + return new MarkupIterator(asString(formatString)); + } + + /** + * Implementation of {@code _string.formatter_field_name_split}. + * + * @param fieldName to split into components + * @return a tuple of the first field name component and the rest + */ + // Compare CPython formatter_field_name_split in unicode_formatter.h + /* + @ExposedMethod(doc = BuiltinDocs.unicode__formatter_field_name_split_doc) + */ + // XXX belongs to the _string module, but where does that belong? + static PyTuple formatter_field_name_split(Object fieldName) { + FieldNameIterator iterator = new FieldNameIterator(asString(fieldName)); + return new PyTuple(iterator.head(), iterator); + } + + /* + @ExposedMethod(doc = BuiltinDocs.unicode_format_doc) + */ + final Object format(Object[] args, String[] keywords) throws TypeError, Throwable { + try { + return buildFormattedString(args, keywords, null, null); + } catch (IllegalArgumentException e) { + throw new ValueError(e.getMessage()); + } + } + + // @formatter:off + + // Codec support ------------------------------------------------- + + // Copied from PyString + private static char[] hexdigit = "0123456789abcdef".toCharArray(); + + // Copied from PyString + public static String encode_UnicodeEscape(String str, boolean use_quotes) { + char quote = use_quotes ? '?' : 0; + return encode_UnicodeEscape(str, quote); + } + + // Copied from PyString + /** + * The inner logic of the string __repr__ producing an ASCII representation of the target + * string, optionally in quotations. The caller can determine whether the returned string will + * be wrapped in quotation marks, and whether Python rules are used to choose them through + * {@code quote}. + * + * @param str to process + * @param quote '"' or '\'' use that, '?' = let Python choose, 0 or anything = no quotes + * @return encoded string (possibly the same string if unchanged) + */ + static String encode_UnicodeEscape(String str, char quote) { + /* + * XXX consider re-work. The quotation logic is useful for repr, + * but not escaping all the non-ascii characters: modern Python + * does not assume the console is acsii. OTOH we do need a couple + * of unicode escape encodings elsewhere. Calls to this in Jython 2 + * are essentially PyUnicode.__repr__ (filenames mostly). + */ + + // Choose whether to quote and the actual quote character + boolean use_quotes; + switch (quote) { + case '?': + use_quotes = true; + // Python rules + quote = str.indexOf('\'') >= 0 && str.indexOf('"') == -1 ? '"' : '\''; + break; + case '"': + case '\'': + use_quotes = true; + break; + default: + use_quotes = false; + break; + } + + // Allocate a buffer for the result (25% bigger and room for quotes) + int size = str.length(); + StringBuilder v = new StringBuilder(size + (size >> 2) + 2); + + if (use_quotes) { + v.append(quote); + } + + // Now chunter through the original string a character at a time + for (int i = 0; size-- > 0;) { + int ch = str.charAt(i++); + // Escape quotes and backslash + if ((use_quotes && ch == quote) || ch == '\\') { + v.append('\\'); + v.append((char) ch); + continue; + } + /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */ + else if (size > 0 && ch >= 0xD800 && ch < 0xDC00) { + char ch2 = str.charAt(i++); + size--; + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { + int ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; + v.append('\\'); + v.append('U'); + v.append(hexdigit[(ucs >> 28) & 0xf]); + v.append(hexdigit[(ucs >> 24) & 0xf]); + v.append(hexdigit[(ucs >> 20) & 0xf]); + v.append(hexdigit[(ucs >> 16) & 0xf]); + v.append(hexdigit[(ucs >> 12) & 0xf]); + v.append(hexdigit[(ucs >> 8) & 0xf]); + v.append(hexdigit[(ucs >> 4) & 0xf]); + v.append(hexdigit[ucs & 0xf]); + continue; + } + /* Fall through: isolated surrogates are copied as-is */ + i--; + size++; + } + /* Map 16-bit characters to '\\uxxxx' */ + if (ch >= 256) { + v.append('\\'); + v.append('u'); + v.append(hexdigit[(ch >> 12) & 0xf]); + v.append(hexdigit[(ch >> 8) & 0xf]); + v.append(hexdigit[(ch >> 4) & 0xf]); + v.append(hexdigit[ch & 15]); + } + /* Map special whitespace to '\t', \n', '\r' */ + else if (ch == '\t') { + v.append("\\t"); + } else if (ch == '\n') { + v.append("\\n"); + } else if (ch == '\r') { + v.append("\\r"); + } else if (ch < ' ' || ch >= 127) { + /* Map non-printable US ASCII to '\xNN' */ + v.append('\\'); + v.append('x'); + v.append(hexdigit[(ch >> 4) & 0xf]); + v.append(hexdigit[ch & 0xf]); + } else {/* Copy everything else as-is */ + v.append((char) ch); + } + } + + if (use_quotes) { + v.append(quote); + } + + // Return the original string if we didn't quote or escape anything + return v.length() > size ? v.toString() : str; + } + + // Copied from PyString + private static ucnhashAPI pucnHash = null; + + // Copied from PyString + public static String decode_UnicodeEscape(String str, int start, int end, String errors, + boolean unicode) { + StringBuilder v = new StringBuilder(end - start); + for (int s = start; s < end;) { + char ch = str.charAt(s); + /* Non-escape characters are interpreted as Unicode ordinals */ + if (ch != '\\') { + v.append(ch); + s++; + continue; + } + int loopStart = s; + /* \ - Escapes */ + s++; + if (s == end) { + s = codecs.insertReplacementAndGetResume(v, errors, "unicodeescape", // + str, loopStart, s + 1, "\\ at end of string"); + continue; + } + ch = str.charAt(s++); + switch (ch) { + /* \x escapes */ + case '\n': + break; + case '\\': + v.append('\\'); + break; + case '\'': + v.append('\''); + break; + case '\"': + v.append('\"'); + break; + case 'b': + v.append('\b'); + break; + case 'f': + v.append('\014'); + break; /* FF */ + case 't': + v.append('\t'); + break; + case 'n': + v.append('\n'); + break; + case 'r': + v.append('\r'); + break; + case 'v': + v.append('\013'); + break; /* VT */ + case 'a': + v.append('\007'); + break; /* BEL, not classic C */ + /* \OOO (octal) escapes */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + int x = Character.digit(ch, 8); + for (int j = 0; j < 2 && s < end; j++, s++) { + ch = str.charAt(s); + if (ch < '0' || ch > '7') { + break; + } + x = (x << 3) + Character.digit(ch, 8); + } + v.append((char) x); + break; + case 'x': + s = hexescape(v, errors, 2, s, str, end, "truncated \\xXX"); + break; + case 'u': + if (!unicode) { + v.append('\\'); + v.append('u'); + break; + } + s = hexescape(v, errors, 4, s, str, end, "truncated \\uXXXX"); + break; + case 'U': + if (!unicode) { + v.append('\\'); + v.append('U'); + break; + } + s = hexescape(v, errors, 8, s, str, end, "truncated \\UXXXXXXXX"); + break; + case 'N': + if (!unicode) { + v.append('\\'); + v.append('N'); + break; + } + /* + * Ok, we need to deal with Unicode Character Names now, make sure we've + * imported the hash table data... + */ + if (pucnHash == null) { + // class org.python.modules.ucnhash +// Object mod = imp.importName("ucnhash", true); +// mod = mod.__call__(); +// pucnHash = (ucnhashAPI) mod.__tojava__(Object.class); + if (pucnHash.getCchMax() < 0) { + throw new UnicodeError("Unicode names not loaded"); + } + } + if (str.charAt(s) == '{') { + int startName = s + 1; + int endBrace = startName; + /* + * look for either the closing brace, or we exceed the maximum length of the + * unicode character names + */ + int maxLen = pucnHash.getCchMax(); + while (endBrace < end && str.charAt(endBrace) != '}' + && (endBrace - startName) <= maxLen) { + endBrace++; + } + if (endBrace != end && str.charAt(endBrace) == '}') { + int value = pucnHash.getValue(str, startName, endBrace); + if (storeUnicodeCharacter(value, v)) { + s = endBrace + 1; + } else { + s = codecs.insertReplacementAndGetResume( // + v, errors, "unicodeescape", // + str, loopStart, endBrace + 1, "illegal Unicode character"); + } + } else { + s = codecs.insertReplacementAndGetResume(v, errors, "unicodeescape", // + str, loopStart, endBrace, "malformed \\N character escape"); + } + break; + } else { + s = codecs.insertReplacementAndGetResume(v, errors, "unicodeescape", // + str, loopStart, s + 1, "malformed \\N character escape"); + } + break; + default: + v.append('\\'); + v.append(str.charAt(s - 1)); + break; + } + } + return v.toString(); + } + + // Copied from PyString + private static int hexescape(StringBuilder partialDecode, String errors, int digits, + int hexDigitStart, String str, int size, String errorMessage) { + if (hexDigitStart + digits > size) { + return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape", str, + hexDigitStart - 2, size, errorMessage); + } + int i = 0; + int x = 0; + for (; i < digits; ++i) { + char c = str.charAt(hexDigitStart + i); + int d = Character.digit(c, 16); + if (d == -1) { + return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape", + str, hexDigitStart - 2, hexDigitStart + i + 1, errorMessage); + } + x = (x << 4) & ~0xF; + if (c >= '0' && c <= '9') { + x += c - '0'; + } else if (c >= 'a' && c <= 'f') { + x += 10 + c - 'a'; + } else { + x += 10 + c - 'A'; + } + } + if (storeUnicodeCharacter(x, partialDecode)) { + return hexDigitStart + i; + } else { + return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape", str, + hexDigitStart - 2, hexDigitStart + i + 1, "illegal Unicode character"); + } + } + + // Copied from PyString + /* pass in an int since this can be a UCS-4 character */ + private static boolean storeUnicodeCharacter(int value, StringBuilder partialDecode) { + if (value < 0 || (value >= 0xD800 && value <= 0xDFFF)) { + return false; + } else if (value <= Character.MAX_CODE_POINT) { + partialDecode.appendCodePoint(value); + return true; + } + return false; + } + + // Java-only API -------------------------------------------------- + + // @formatter:on + + private static final int HIGH_SURROGATE_OFFSET = + Character.MIN_HIGH_SURROGATE - (Character.MIN_SUPPLEMENTARY_CODE_POINT >>> 10); + + /** + * The code points of this PyUnicode as a {@link PySequence.OfInt}. + * This interface will allow the code points to be streamed or + * iterated (but not modified, obviously). + * + * @return the code point sequence + */ + public PySequence.OfInt asSequence() { return delegate; } + + /** + * The hash of a {@link PyUnicode} is the same as that of a Java + * {@code String} equal to it. This is so that a given Python + * {@code str} may be found as a match in hashed data structures, + * whichever representation is used for the key or query. + */ + @Override + public int hashCode() throws PyException { return PyDict.pythonHash(this); } + + /** + * Compare for equality with another Python {@code str}, or a + * {@link PyDict.Key} containing a {@code str}. If the other object + * is not a {@code str}, or a {@code Key} containing a {@code str}, + * return {@code false}. If it is such an object, compare for + * equality of the code points. + */ + @Override + public boolean equals(Object obj) { return PyDict.pythonEquals(this, obj); } + + /** + * Create a {@code str} from a format and arguments. Note Java + * {@code String.format} semantics are applied, not the CPython + * ones. + * + * @param fmt format string (Java semantics) + * @param args arguments + * @return formatted string + */ + @Deprecated // XXX possibly want a version with Python semantics + static PyUnicode fromFormat(String fmt, Object... args) { + return new PyUnicode(TYPE, String.format(fmt, args)); + } + + /** + * Represent the `str` value in readable form, escaping lone + * surrogates. The {@code PyUnicode.toString()} is intended to + * produce a readable output, not always the closest Java + * {@code String}, for which {@link #asString()} is a better choice. + */ + @Override + public String toString() { + StringBuilder b = new StringBuilder(); + for (int c : value) { + if (c >= Character.MIN_SURROGATE && c <= Character.MAX_SURROGATE) { + // This is a lone surrogate: show the code + b.append(String.format("\\u%04x", c)); + } else { + b.appendCodePoint(c); + } + } + return b.toString(); + } + + /** + * Present a Python {@code str} as a Java {@code String} value or + * raise a {@link TypeError}. This is for use when the argument is + * expected to be a Python {@code str} or a sub-class of it. + * + * @param v claimed {@code str} + * @return {@code String} value + * @throws TypeError if {@code v} is not a Python {@code str} + */ + public static String asString(Object v) throws TypeError { + return asString(v, o -> Abstract.requiredTypeError("a str", o)); + } + + /** + * Present a qualifying object {@code v} as a Java {@code String} + * value or throw {@code E}. This is for use when the argument is + * expected to be a Python {@code str} or a sub-class of it. + *

+ * The detailed form of exception is communicated in a + * lambda-function {@code exc} that will be called (if necessary) + * with {@code v} as argument. We use a {@code Function} to avoid + * binding a variable {@code v} at the call site. + * + * @param type of exception to throw + * @param v claimed {@code str} + * @param exc to supply the exception to throw wrapping {@code v} + * @return {@code String} value + * @throws E if {@code v} is not a Python {@code str} + */ + public static String asString(Object v, + Function exc) throws PyException { + if (v instanceof String) + return (String)v; + else if (v instanceof PyUnicode) + return ((PyUnicode)v).asString(); + throw exc.apply(v); + } + + // Plumbing ------------------------------------------------------ + + // @formatter:on + + /** + * Convert a Python {@code str} to a Java {@code str} (or throw + * {@link NoConversion}). This is suitable for use where a method + * argument should be (exactly) a {@code str}, or an alternate path + * taken. + *

+ * If the method throws the special exception {@link NoConversion}, + * the caller must deal with it by throwing an appropriate Python + * exception or taking an alternative course of action. + * + * @param v to convert + * @return converted to {@code String} + * @throws NoConversion v is not a {@code str} + */ + static String convertToString(Object v) throws NoConversion { + if (v instanceof String) + return (String)v; + else if (v instanceof PyUnicode) + return ((PyUnicode)v).asString(); + throw PyObjectUtil.NO_CONVERSION; + } + + /** + * Coerce a Python {@code str} to a Java String, or raise a + * specified exception. This is suitable for use where a method + * argument should be (exactly) a {@code str}, or a context specific + * exception has to be raised. + * + * @param type of exception to throw + * @param arg to coerce + * @param exc supplier for actual exception + * @return {@code arg} as a {@code String} + */ + static String coerceToString(Object arg, Supplier exc) { + if (arg instanceof String) { + return (String)arg; + } else if (arg instanceof PyUnicode) { + return ((PyUnicode)arg).asString(); + } else { + throw exc.get(); + } + } + + /** + * Convert this {@code PyUnicode} to a Java {@code String} built + * from its code point values. If the code point value of a + * character in Python is a lone surrogate, it will become that + * UTF-16 unit in the result. + * + * @return this {@code PyUnicode} as a Java {@code String} + */ + String asString() { + StringBuilder b = new StringBuilder(); + for (int c : delegate) { b.appendCodePoint(c); } + return b.toString(); + } + + /** + * Test whether a string contains no characters above the BMP range, + * that is, any characters that require surrogate pairs to represent + * them. The method returns {@code true} if and only if the string + * consists entirely of BMP characters or is empty. + * + * @param s the string to test + * @return whether contains no non-BMP characters + */ + private static boolean isBMP(String s) { + return s.codePoints().dropWhile(Character::isBmpCodePoint).findFirst().isEmpty(); + } + + /** + * Define what characters are to be treated as a space according to + * Python 3. + */ + private static boolean isPythonSpace(int cp) { + // Use the Java built-in methods as far as possible + return Character.isWhitespace(cp) // ASCII spaces and some + // remaining Unicode spaces + || Character.isSpaceChar(cp) + // NEXT LINE (not a space in Java or Unicode) + || cp == 0x0085; + } + + /** + * Define what characters are to be treated as a line separator + * according to Python 3. In {@code splitlines} we treat these as + * separators, but also give singular treatment to the sequence + * CR-LF. + */ + private static boolean isPythonLineSeparator(int cp) { + // Bit i is set if code point i is a line break (i<32). + final int EOL = 0b0111_0000_0000_0000_0011_1100_0000_0000; + if (cp >>> 5 == 0) { + // cp < 32: use the little look-up table + return ((EOL >>> cp) & 1) != 0; + } else if (cp >>> 7 == 0) { + // 32 <= cp < 128 : the rest of ASCII + return false; + } else { + // NEL, L-SEP, P-SEP + return cp == 0x85 || cp == 0x2028 || cp == 0x2029; + } + } + + /** + * A base class for the delegate of either a {@code String} or a + * {@code PyUnicode}, implementing {@code __getitem__} and other + * index-related operations. The class is a + * {@link PySequence.Delegate}, an iterable of {@code Integer}, + * comparable with other instances of the same base, and is able to + * supply point codes as a stream. + */ + static abstract class CodepointDelegate extends PySequence.Delegate + implements PySequence.OfInt { + /** + * A bidirectional iterator on the sequence of code points between + * two indices. + * + * @param index starting position (code point index) + * @param start index of first element to include. + * @param end index of first element not to include. + * @return the iterator + */ + abstract CodepointIterator iterator(int index, int start, int end); + + /** + * A bidirectional iterator on the sequence of code points. + * + * @param index starting position (code point index) + * @return the iterator + */ + CodepointIterator iterator(int index) { return iterator(index, 0, length()); } + + /** + * A bidirectional iterator on the sequence of code points, + * positioned initially one beyond the end of the sequence, so that + * the first call to {@code previous()} returns the last element. + * + * @return the iterator + */ + CodepointIterator iteratorLast() { return iterator(length()); } + + @Override + public Iterator iterator() { return iterator(0); } + + /** + * Return the object of which this is the delegate. + * + * @return the object of which this is the delegate + */ + abstract Object principal(); + + // Re-declared here to remove throws clause + @Override + public abstract Object getItem(int i); + + // Re-declared here to remove throws clause + @Override + public abstract Object getSlice(Indices slice); + + @Override + public String toString() { + StringBuilder b = new StringBuilder("adapter(\""); + for (Integer c : this) { b.appendCodePoint(c); } + return b.append("\")").toString(); + } + } + + /** + * A {@code ListIterator} working bidirectionally in code point + * indices. + */ + interface CodepointIterator extends ListIterator, PrimitiveIterator.OfInt { + + @Override + default Integer next() { return nextInt(); } + + /** + * Returns {@code true} if this list iterator has the given number + * of elements when traversing the list in the forward direction. + * + * @param n number of elements needed + * @return {@code true} if has a further {@code n} elements going + * forwards + */ + boolean hasNext(int n); + + /** + * Equivalent to {@code n} calls to {@link #nextInt()} returning the + * last result. + * + * @param n the number of advances + * @return the {@code n}th next {@code int} + */ + int nextInt(int n); + + @Override + default Integer previous() { return previousInt(); } + + /** + * Returns {@code true} if this list iterator has the given number + * of elements when traversing the list in the reverse direction. + * + * @param n number of elements needed + * @return {@code true} if has a further {@code n} elements going + * backwards + */ + boolean hasPrevious(int n); + + /** + * Returns the previous {@code int} element in the iteration. This + * is just previous specialised to a primitive {@code int}. + * + * @return the previous {@code int} + */ + int previousInt(); + + /** + * Equivalent to {@code n} calls to {@link #previousInt()} returning + * the last result. + * + * @param n the number of steps to take (in reverse) + * @return the {@code n}th previous {@code int} + */ + int previousInt(int n); + + // Unsupported operations ----------------------------- + + @Override + default void remove() { throw new UnsupportedOperationException(); } + + @Override + default void set(Integer o) { throw new UnsupportedOperationException(); } + + @Override + default void add(Integer o) { throw new UnsupportedOperationException(); } + + // Iterator mark and restore -------------------------- + + /** + * Set a mark (a saved state) to which the iterator may be restored + * later. + * + * @return the mark + */ + Mark mark(); + + /** + * An opaque object to hold and restore the position of a particular + * {@link CodepointIterator}. + */ + interface Mark { + /** + * Restore the position of the iterator from which this {@code Mark} + * was obtained, to the position it had at the time. + */ + void restore(); + } + } + + /** + * Wrap a Java {@code String} as a {@link PySequence.Delegate}, that + * is also an iterable of {@code Integer}. If the {@code String} + * includes surrogate pairs of {@code char}s, these are interpreted + * as a single Python code point. + */ + static class StringAdapter extends CodepointDelegate { + + /** Value of the str encoded as a Java {@code String}. */ + private final String s; + /** Length in code points deduced from the {@code String}. */ + private final int length; + + /** + * Adapt a String so we can iterate or stream its code points. + * + * @param s to adapt + */ + StringAdapter(String s) { + this.s = s; + length = s.codePointCount(0, s.length()); + } + + /** + * Return {@code true} iff the string contains only basic plane + * characters or, possibly, isolated surrogates. All {@code char}s + * may be treated as code points. + * + * @return contains only BMP characters or isolated surrogates + */ + private boolean isBMP() { return length == s.length(); } + + @Override + public int length() { return length; }; + + @Override + public int getInt(int i) { + if (isBMP()) { + // No surrogate pairs. + return s.charAt(i); + } else { + // We have to count from the start + int k = toCharIndex(i); + return s.codePointAt(k); + } + } + + @Override + public PyType getType() { return TYPE; } + + @Override + public String getTypeName() { return "string"; } + + @Override + Object principal() { return s; } + + @Override + public Object getItem(int i) { + if (isBMP()) { + // No surrogate pairs. + return String.valueOf(s.charAt(i)); + } else { + return PyUnicode.fromCodePoint(getInt(i)); + } + } + + /** + * Translate a (valid) code point index into a {@code char} index + * into {@code s}, when s contains surrogate pairs. A call is + * normally guarded by {@link #isBMP()}, since when that is + * {@code true} we can avoid the work. + * + * @param cpIndex code point index + * @return {@code char} index into {@code s} + */ + private int toCharIndex(int cpIndex) { + int L = s.length(); + if (cpIndex == length) { + // Avoid counting to the end + return L; + } else { + int i = 0, cpCount = 0; + while (i < L && cpCount < cpIndex) { + char c = s.charAt(i++); + cpCount++; + if (Character.isHighSurrogate(c) && i < L) { + // Expect a low surrogate + char d = s.charAt(i); + if (Character.isLowSurrogate(d)) { i++; } + } + } + return i; + } + } + + @Override + public Object getSlice(Indices slice) { + if (slice.slicelength == 0) { + return ""; + } else if (slice.step == 1 && isBMP()) { + return s.substring(slice.start, slice.stop); + } else { + /* + * If the code points are not all BMP, it is less work in future if + * we use a PyUnicode. If step != 1, there is the possibility of + * creating an unintended surrogate pair, so only a PyUnicode should + * be trusted to represent the result. + */ + int L = slice.slicelength, i = slice.start; + int[] r = new int[L]; + if (isBMP()) { + // Treating surrogates as characters + for (int j = 0; j < L; j++) { + r[j] = s.charAt(i); + i += slice.step; + } + } else if (slice.step > 0) { + // Work forwards through the sequence + ListIterator cps = iterator(i); + r[0] = cps.next(); + for (int j = 1; j < L; j++) { + for (int k = 1; k < slice.step; k++) { cps.next(); } + r[j] = cps.next(); + } + } else { // slice.step < 0 + // Work backwards through the sequence + ListIterator cps = iterator(i + 1); + r[0] = cps.previous(); + for (int j = 1; j < L; j++) { + for (int k = -1; k > slice.step; --k) { cps.previous(); } + r[j] = cps.previous(); + } + } + return wrap(r); + } + } + + @Override + Object add(Object ow) throws OutOfMemoryError, NoConversion, Throwable { + if (ow instanceof String) { + return PyUnicode.concat(s, (String)ow); + } else { + IntStream w = adapt(ow).asIntStream(); + return concatUnicode(s.codePoints(), w); + } + } + + @Override + Object radd(Object ov) throws OutOfMemoryError, NoConversion, Throwable { + if (ov instanceof String) { + return PyUnicode.concat((String)ov, s); + } else { + IntStream v = adapt(ov).asIntStream(); + return concatUnicode(v, s.codePoints()); + } + } + + @Override + Object repeat(int n) throws OutOfMemoryError, Throwable { + if (n == 0) + return ""; + else if (n == 1 || length == 0) + return s; + else if (Character.isLowSurrogate(s.charAt(0)) + && Character.isHighSurrogate(s.charAt(length - 1))) + /* + * s ends with a high surrogate and starts with a low surrogate, so + * simply concatenated to itself by String.repeat, these would merge + * into one character. Only a PyUnicode properly represents the + * result. + */ + return (new PyUnicode(TYPE, s)).delegate.repeat(n); + else + // Java String repeat will do fine + return s.repeat(n); + } + + @Override + public int compareTo(PySequence.Delegate other) { + Iterator ia = iterator(); + Iterator ib = other.iterator(); + while (ia.hasNext()) { + if (ib.hasNext()) { + int a = ia.next(); + int b = ib.next(); + // if a != b, then we've found an answer + if (a > b) + return 1; + else if (a < b) + return -1; + } else + // s has not run out, but b has. s wins + return 1; + } + /* + * The sequences matched over the length of s. The other is the + * winner if it still has elements. Otherwise its a tie. + */ + return ib.hasNext() ? -1 : 0; + } + + // PySequence.OfInt interface -------------------------------- + + @Override + public Spliterator.OfInt spliterator() { return s.codePoints().spliterator(); } + + @Override + public IntStream asIntStream() { return s.codePoints(); } + + // ListIterator provision ------------------------------------ + + @Override + public CodepointIterator iterator(final int index, int start, int end) { + if (isBMP()) + return new BMPIterator(index, start, end); + else + return new SMPIterator(index, start, end); + } + + /** + * A {@code ListIterator} for use when the string in the surrounding + * adapter instance contains only basic multilingual plane (BMP) + * characters or isolated surrogates. {@link SMPIterator} extends + * this class for supplementary characters. + */ + class BMPIterator implements CodepointIterator { + /** + * Index into {@code s} in code points, which is also its index in + * {@code s} in chars when {@code s} is a BMP string. + */ + protected int index; + /** + * First index at which {@link #next()} is allowable for the + * iterator in code points, which is also its index in {@code s} in + * chars when {@code s} is a BMP string. + */ + protected final int start; + /** + * First index at which {@link #next()} is not allowable for the + * iterator in code points, which is also its index in {@code s} in + * chars when {@code s} is a BMP string. + */ + protected final int end; + + BMPIterator(int index, int start, int end) { + checkIndexRange(index, start, end, length); + this.start = start; + this.end = end; + this.index = index; + } + + @Override + public Mark mark() { + return new Mark() { + final int i = index; + + @Override + public void restore() { index = i; } + }; + } + + // The forward iterator ------------------------------- + + @Override + public boolean hasNext() { return index < end; } + + @Override + public boolean hasNext(int n) { + assert n >= 0; + return index + n <= end; + } + + @Override + public int nextInt() { + if (index < end) + return s.charAt(index++); + else + throw noSuchElement(nextIndex()); + } + + @Override + public int nextInt(int n) { + assert n >= 0; + int i = index + n; + if (i <= end) + return s.charAt((index = i) - 1); + else + throw noSuchElement(i - start); + } + + @Override + public int nextIndex() { return index - start; } + + // The reverse iterator ------------------------------- + + @Override + public boolean hasPrevious() { return index > start; } + + @Override + public boolean hasPrevious(int n) { + assert n >= 0; + return index - n >= 0; + } + + @Override + public int previousInt() { + if (index > start) + return s.charAt(--index); + else + throw noSuchElement(previousIndex()); + } + + @Override + public int previousInt(int n) { + assert n >= 0; + int i = index - n; + if (i >= start) + return s.charAt(index = i); + else + throw noSuchElement(i); + } + + @Override + public int previousIndex() { return index - start - 1; } + + // Diagnostic use ------------------------------------- + + @Override + public String toString() { + return String.format("[%s|%s]", s.substring(start, index), s.substring(index, end)); + } + } + + /** + * A {@code ListIterator} for use when the string in the surrounding + * adapter instance contains one or more supplementary multilingual + * plane characters represented by surrogate pairs. + */ + class SMPIterator extends BMPIterator { + + /** + * Index of the iterator position in {@code s} in chars. This always + * moves in synchrony with the base class index + * {@link BMPIterator#index}, which continues to represent the same + * position as a code point index. Both reference the same + * character. + */ + private int charIndex; + /** + * The double of {@link BMPIterator#start} in {@code s} in chars. + */ + final private int charStart; + /** + * The double of {@link BMPIterator#end} in {@code s} in chars. + */ + final private int charEnd; + + SMPIterator(int index, int start, int end) { + super(index, start, end); + // Convert the arguments to character indices + int p = 0, cp = 0; + while (p < start) { + cp = nextCharIndex(cp); + p += 1; + } + this.charStart = cp; + while (p < index) { + cp = nextCharIndex(cp); + p += 1; + } + this.charIndex = cp; + while (p < end) { + cp = nextCharIndex(cp); + p += 1; + } + this.charEnd = cp; + } + + /** @return next char index after argument. */ + private int nextCharIndex(int cp) { + if (Character.isBmpCodePoint(s.codePointAt(cp))) + return cp + 1; + else + return cp + 2; + } + + @Override + public Mark mark() { + return new Mark() { + // In the SMP iterator, we must save both indices + final int i = index, ci = charIndex; + + @Override + public void restore() { + index = i; + charIndex = ci; + } + }; + } + + // The forward iterator ------------------------------- + + @Override + public int nextInt() { + if (charIndex < charEnd) { + char c = s.charAt(charIndex++); + index++; + if (Character.isHighSurrogate(c) && charIndex < charEnd) { + // Expect a low surrogate + char d = s.charAt(charIndex); + if (Character.isLowSurrogate(d)) { + charIndex++; + return Character.toCodePoint(c, d); + } + } + return c; + } else + throw new NoSuchElementException(); + } + + @Override + public int nextInt(int n) { + assert n >= 0; + int i = index + n, indexSaved = index, charIndexSaved = charIndex; + while (hasNext()) { + int c = nextInt(); + if (index == i) { return c; } + } + index = indexSaved; + charIndex = charIndexSaved; + throw noSuchElement(i); + } + + // The reverse iterator ------------------------------- + + @Override + public int previousInt() { + if (charIndex > charStart) { + --index; + char d = s.charAt(--charIndex); + if (Character.isLowSurrogate(d) && charIndex > charStart) { + // Expect a low surrogate + char c = s.charAt(--charIndex); + if (Character.isHighSurrogate(c)) { return Character.toCodePoint(c, d); } + charIndex++; + } + return d; + } else + throw new NoSuchElementException(); + } + + @Override + public int previousInt(int n) { + assert n >= 0; + int i = index - n, indexSaved = index, charIndexSaved = charIndex; + while (hasPrevious()) { + int c = previousInt(); + if (index == i) { return c; } + } + index = indexSaved; + charIndex = charIndexSaved; + throw noSuchElement(i); + } + + // Diagnostic use ------------------------------------- + + @Override + public String toString() { + return String.format("[%s|%s]", s.substring(charStart, charIndex), + s.substring(charIndex, charEnd)); + } + } + } + + /** + * A class to act as the delegate implementing {@code __getitem__} + * and other index-related operations. By inheriting {@link Delegate + * PySequence.Delegate} in this inner class, we obtain boilerplate + * implementation code for slice translation and range checks. We + * need only specify the work specific to {@link PyUnicode} + * instances. + */ + class UnicodeAdapter extends CodepointDelegate { + + @Override + public int length() { return value.length; } + + @Override + public int getInt(int i) { return value[i]; } + + @Override + public PyType getType() { return TYPE; } + + @Override + public String getTypeName() { return "string"; } + + @Override + Object principal() { return PyUnicode.this; } + + @Override + public Object getItem(int i) { return PyUnicode.fromCodePoint(value[i]); } + + @Override + public Object getSlice(Indices slice) { + int[] v; + if (slice.step == 1) + v = Arrays.copyOfRange(value, slice.start, slice.stop); + else { + v = new int[slice.slicelength]; + int i = slice.start; + for (int j = 0; j < slice.slicelength; j++) { + v[j] = value[i]; + i += slice.step; + } + } + return wrap(v); + } + + @Override + Object add(Object ow) throws OutOfMemoryError, NoConversion, Throwable { + if (ow instanceof PyUnicode) { + // Optimisation (or is it?) over concatUnicode + PyUnicode w = (PyUnicode)ow; + int L = value.length, M = w.value.length; + int[] r = new int[L + M]; + System.arraycopy(value, 0, r, 0, L); + System.arraycopy(w.value, 0, r, L, M); + return wrap(r); + } else { + return concatUnicode(asIntStream(), adapt(ow).asIntStream()); + } + } + + @Override + Object radd(Object ov) throws OutOfMemoryError, NoConversion, Throwable { + if (ov instanceof PyUnicode) { + // Optimisation (or is it?) over concatUnicode + PyUnicode v = (PyUnicode)ov; + int L = v.value.length, M = value.length; + int[] r = new int[L + M]; + System.arraycopy(v.value, 0, r, 0, L); + System.arraycopy(value, 0, r, L, M); + return wrap(r); + } else { + return concatUnicode(adapt(ov).asIntStream(), asIntStream()); + } + } + + @Override + Object repeat(int n) throws OutOfMemoryError, Throwable { + int m = value.length; + if (n == 0) + return ""; + else if (n == 1 || m == 0) + return PyUnicode.this; + else { + int[] b = new int[n * m]; + for (int i = 0, p = 0; i < n; i++, p += m) { System.arraycopy(value, 0, b, p, m); } + return wrap(b); + } + } + + @Override + public int compareTo(PySequence.Delegate other) { + Iterator ib = other.iterator(); + for (int a : value) { + if (ib.hasNext()) { + int b = ib.next(); + // if a != b, then we've found an answer + if (a > b) + return 1; + else if (a < b) + return -1; + } else + // value has not run out, but other has. We win. + return 1; + } + /* + * The sequences matched over the length of value. The other is the + * winner if it still has elements. Otherwise its a tie. + */ + return ib.hasNext() ? -1 : 0; + } + + // PySequence.OfInt interface -------------------------------- + + @Override + public Spliterator.OfInt spliterator() { + final int flags = Spliterator.IMMUTABLE | Spliterator.SIZED | Spliterator.ORDERED; + return Spliterators.spliterator(value, flags); + } + + @Override + public IntStream asIntStream() { + int flags = Spliterator.IMMUTABLE | Spliterator.SIZED; + Spliterator.OfInt s = Spliterators.spliterator(value, flags); + return StreamSupport.intStream(s, false); + } + + // ListIterator provision ------------------------------------ + + @Override + public CodepointIterator iterator(final int index, int start, int end) { + return new UnicodeIterator(index, start, end); + } + + /** + * A {@code ListIterator} for use when the string in the surrounding + * adapter instance contains only basic multilingual plane + * characters or isolated surrogates. + */ + class UnicodeIterator implements CodepointIterator { + + private int index; + private final int start, end; + + UnicodeIterator(int index, int start, int end) { + checkIndexRange(index, start, end, value.length); + this.start = start; + this.end = end; + this.index = index; + } + + @Override + public Mark mark() { + return new Mark() { + final int i = index; + + @Override + public void restore() { index = i; } + }; + } + + // The forward iterator ------------------------------- + + @Override + public boolean hasNext() { return index < value.length; } + + @Override + public boolean hasNext(int n) { + assert n >= 0; + return index + n <= value.length; + } + + @Override + public int nextInt() { + if (index < end) + return value[index++]; + else + throw noSuchElement(nextIndex()); + } + + @Override + public int nextInt(int n) { + assert n >= 0; + int i = index + n; + if (i <= end) + return value[(index = i) - 1]; + else + throw noSuchElement(i - start); + } + + @Override + public int nextIndex() { return index - start; } + + // The reverse iterator ------------------------------- + + @Override + public boolean hasPrevious() { return index > start; } + + @Override + public boolean hasPrevious(int n) { + assert n >= 0; + return index - n >= 0; + } + + @Override + public int previousInt() { + if (index > start) + return value[--index]; + else + throw noSuchElement(previousIndex()); + } + + @Override + public int previousInt(int n) { + assert n >= 0; + int i = index - n; + if (i >= start) + return value[index = i]; + else + throw noSuchElement(i); + } + + @Override + public int previousIndex() { return index - start - 1; } + + // Diagnostic use ------------------------------------- + + @Override + public String toString() { + return String.format("[%s|%s]", new String(value, start, index - start), + new String(value, index, end - index)); + } + } + } + + /** + * Adapt a Python {@code str} to a sequence of Java {@code int} + * values or throw an exception. If the method throws the special + * exception {@link NoConversion}, the caller must catch it and deal + * with it, perhaps by throwing a {@link TypeError}. A binary + * operation will normally return {@link Py#NotImplemented} in that + * case. + *

+ * Note that implementing {@link PySequence.OfInt} is not enough, + * which other types may, but be incompatible in Python. + * + * @param v to wrap or return + * @return adapted to a sequence + * @throws NoConversion if {@code v} is not a Python {@code str} + */ + static CodepointDelegate adapt(Object v) throws NoConversion { + // Check against supported types, most likely first + if (v instanceof String) + return new StringAdapter((String)v); + else if (v instanceof PyUnicode) + return ((PyUnicode)v).delegate; + throw PyObjectUtil.NO_CONVERSION; + } + + /** + * Short-cut {@link #adapt(Object)} when type statically known. + * + * @param v to wrap + * @return new StringAdapter(v) + */ + static StringAdapter adapt(String v) { return new StringAdapter(v); } + + /** + * Short-cut {@link #adapt(Object)} when type statically known. + * + * @return the delegate for sequence operations on this {@code str} + */ + UnicodeAdapter adapt() { return delegate; } + + /** + * Adapt a Python {@code str}, as by {@link #adapt(Object)}, that is + * intended as a substring to find, in {@code str.find()} or + * {@code str.replace()}, for example. If the argument cannot be + * adapted as a {@code str}, a {@code TypeError} will be raised, + * with message like "METHOD(): string to find must be str not T", + * where {@code T} is the type of the errant argument. + * + * @param method in which encountered + * @param sub alleged string + * @return adapted to a sequence + * @throws TypeError if {@code sub} cannot be wrapped as a delegate + */ + static CodepointDelegate adaptSub(String method, Object sub) throws TypeError { + try { + return adapt(sub); + } catch (NoConversion nc) { + throw Abstract.argumentTypeError(method, "string to find", "str", sub); + } + } + + /** + * Adapt a Python {@code str}, as by {@link #adapt(Object)}, that is + * intended as a replacement substring in {@code str.replace()}, for + * example. + * + * @param method in which encountered + * @param replacement alleged string + * @return adapted to a sequence + * @throws TypeError if {@code sub} cannot be wrapped as a delegate + */ + static CodepointDelegate adaptRep(String method, Object replacement) throws TypeError { + try { + return adapt(replacement); + } catch (NoConversion nc) { + throw Abstract.argumentTypeError(method, "replacement", "str", replacement); + } + } + + /** + * Adapt a Python {@code str} intended as a separator, as by + * {@link #adapt(Object)}. + * + * @param method in which encountered + * @param sep alleged separator + * @return adapted to a sequence + * @throws TypeError if {@code sep} cannot be wrapped as a delegate + * @throws ValueError if {@code sep} is the empty string + */ + static CodepointDelegate adaptSeparator(String method, Object sep) + throws TypeError, ValueError { + try { + CodepointDelegate p = adapt(sep); + if (p.length() == 0) { throw new ValueError("%s(): empty separator", method); } + return p; + } catch (NoConversion nc) { + throw Abstract.argumentTypeError(method, "separator", "str or None", sep); + } + } + + /** + * Adapt a Python {@code str} intended as a fill character in + * justification and centring operations. The behaviour is quite + * like {@link #adapt(Object)}, but it returns a single code point. + * A null argument returns the default choice, a space. + * + * @param method in which encountered + * @param fill alleged fill character (or {@code null}) + * @return fill as a code point + * @throws TypeError if {@code fill} is not a one-character string + */ + private static int adaptFill(String method, Object fill) { + if (fill == null) { + return ' '; + } else if (fill instanceof String) { + String s = (String)fill; + if (s.codePointCount(0, s.length()) != 1) + throw new TypeError(BAD_FILLCHAR); + return s.codePointAt(0); + } else if (fill instanceof PyUnicode) { + PyUnicode u = (PyUnicode)fill; + if (u.value.length != 1) + throw new TypeError(BAD_FILLCHAR); + return u.value[0]; + } else { + throw Abstract.argumentTypeError(method, "fill", "a character", fill); + } + } + + private static String BAD_FILLCHAR = "the fill character must be exactly one character long"; + + /** + * Adapt a Python {@code str}, intended as a list of characters to + * strip, as by {@link #adapt(Object)} then conversion to a set. + * + * @param method in which encountered + * @param chars characters defining the set (or {@code None} or + * {@code null}) + * @return {@code null} or characters adapted to a set + * @throws TypeError if {@code sep} cannot be wrapped as a delegate + */ + static Set adaptStripSet(String method, Object chars) throws TypeError, ValueError { + if (chars == null || chars == Py.None) { + return null; + } else { + try { + return adapt(chars).asStream().collect(Collectors.toCollection(HashSet::new)); + } catch (NoConversion nc) { + throw Abstract.argumentTypeError(method, "chars", "str or None", chars); + } + } + } + + /** + * Convert slice end indices to a {@link PySlice.Indices} object. + * + * @param s sequence being sliced + * @param start first index included + * @param end first index not included + * @return indices of the slice + * @throws TypeError if {@code start} or {@code end} cannot be + * considered an index + */ + private static PySlice.Indices getSliceIndices(CodepointDelegate s, Object start, Object end) + throws TypeError { + try { + return (new PySlice(start, end)).getIndices(s.length()); + } catch (PyException pye) { + throw pye; + } catch (Throwable t) { + throw new InterpreterError(t, "non-python exception)"); + } + } + + /** + * Concatenate two {@code String} representations of {@code str}. + * This method almost always calls {@code String.concat(v, w)} and + * almost always returns a {@code String}. There is a delicate case + * where {@code v} ends with a high surrogate and {@code w} starts + * with a low surrogate. Simply concatenated, these merge into one + * character. Only a {@code PyUnicode} properly represents the + * result in that case. + * + * @param v first string to concatenate + * @param w second string to concatenate + * @return the concatenation {@code v + w} + */ + private static Object concat(String v, String w) throws OutOfMemoryError { + /* + * Since we have to guard against empty strings, we may as well take + * the optimisation these paths invite. + */ + int vlen = v.length(); + if (vlen == 0) + return w; + else if (w.length() == 0) + return v; + else if (Character.isLowSurrogate(w.charAt(0)) + && Character.isHighSurrogate(v.charAt(vlen - 1))) + // Only a PyUnicode properly represents the result + return concatUnicode(v.codePoints(), w.codePoints()); + else { + // Java String concatenation will do fine + return v.concat(w); + } + } + + /** + * Concatenate two streams of code points into a {@code PyUnicode}. + * + * @param v first string to concatenate + * @param w second string to concatenate + * @return the concatenation {@code v + w} + * @throws OutOfMemoryError when the concatenated string is too long + */ + private static PyUnicode concatUnicode(IntStream v, IntStream w) throws OutOfMemoryError { + return wrap(IntStream.concat(v, w).toArray()); + } + + /** + * Apply a unary operation to every character of a string and return + * them as a string. This supports transformations like + * {@link #upper() str.upper()}. + * + * @param op the operation + * @return transformed string + */ + private PyUnicode mapChars(IntUnaryOperator op) { + return wrap(delegate.asIntStream().map(op).toArray()); + } + + /** + * Apply a unary operation to every character of a string and return + * them as a string. This supports transformations like + * {@link #upper() str.upper()}. + * + * @param op the operation + * @return transformed string + */ + private static String mapChars(String s, IntUnaryOperator op) { + int[] v = s.codePoints().map(op).toArray(); + return new String(v, 0, v.length); + } + + /** A {@code NoSuchElementException} identifying the index. */ + private static NoSuchElementException noSuchElement(int k) { + return new NoSuchElementException(Integer.toString(k)); + } + + /** + * Assert that 0 ≤ start ≤index ≤ end ≤ len or if + * not, throw an exception. + * + * @param index e.g. the start position of na iterator. + * @param start first in range + * @param end first beyond range (i.e. non-inclusive bound) + * @param len of sequence + * @throws IndexOutOfBoundsException if the condition is violated + */ + private static void checkIndexRange(int index, int start, int end, int len) + throws IndexOutOfBoundsException { + if ((0 <= start && start <= end && end <= len) == false) + throw new IndexOutOfBoundsException( + String.format("start=%d, end=%d, len=%d", start, end, len)); + else if (index < start) + throw new IndexOutOfBoundsException("before start"); + else if (index > end) + throw new IndexOutOfBoundsException("beyond end"); + } + + /** + * A little helper for converting str.find to str.index that will + * raise {@code ValueError("substring not found")} if the argument + * is negative, otherwise passes the argument through. + * + * @param index to check + * @return {@code index} if non-negative + * @throws ValueError if argument is negative + */ + private static final int checkIndexReturn(int index) throws ValueError { + if (index >= 0) { + return index; + } else { + throw new ValueError("substring not found"); + } + } + + // Plumbing (Jython 2) ------------------------------------------- + + // @formatter:off + + public int atoi(int base) { + return atoi(encodeDecimal(), base); + } + + public PyLong atol(int base) { + return atol(encodeDecimal(), base); + } + + public double atof() { + return atof(encodeDecimal()); + } + + /** + * Encode unicode into a valid decimal String. Throws a UnicodeEncodeError on invalid + * characters. + * + * @return a valid decimal as an encoded String + */ + @Deprecated // See _PyUnicode_TransformDecimalAndSpaceToASCII + private String encodeDecimal() { + + // XXX This all has a has a Jython 2 smell: bytes/str confusion. + // XXX Also, String and PyUnicode implementations are needed. + // XXX Follow CPython _PyUnicode_TransformDecimalAndSpaceToASCII + + int digit; + StringBuilder sb = new StringBuilder(); + + for (CodepointIterator si = delegate.iterator(0); si.hasNext();) { + int codePoint = si.nextInt(); + if (isPythonSpace(codePoint)) { + sb.append(' '); + continue; + } + digit = Character.digit(codePoint, 10); + if (digit >= 0) { + sb.append(digit); + continue; + } + if (0 < codePoint && codePoint < 256) { + sb.appendCodePoint(codePoint); + continue; + } + // All other characters are considered unencodable + int i = si.previousIndex(); + // Signature has a Jython 2 smell: String->String? + codecs.encoding_error("strict", "decimal", asString(), i, i + 1, + "invalid decimal Unicode string"); + } + return sb.toString(); + } + + /** + * Encode unicode in the basic plane into a valid decimal String. Throws a UnicodeEncodeError on + * invalid characters. + * + * @return a valid decimal as an encoded String + */ + private String encodeDecimalBasic() { + int digit; + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < asString().length(); i++) { + char ch = asString().charAt(i); + if (isPythonSpace(ch)) { + sb.append(' '); + continue; + } + digit = Character.digit(ch, 10); + if (digit >= 0) { + sb.append(digit); + continue; + } + if (0 < ch && ch < 256) { + sb.append(ch); + continue; + } + // All other characters are considered unencodable + codecs.encoding_error("strict", "decimal", asString(), i, i + 1, + "invalid decimal Unicode string"); + } + return sb.toString(); + } + + // Copied from PyString + /** + * A little helper for converting str.find to str.index that will raise + * {@code ValueError("substring not found")} if the argument is negative, otherwise passes + * the argument through. + * + * @param index to check + * @return {@code index} if non-negative + * @throws PyException {@code ValueError} if not found + */ + protected final int checkIndex(int index) throws PyException { + if (index >= 0) { + return index; + } else { + throw new ValueError("substring not found"); + } + } + + // Copied from PyString with this -> self + /** + * Convert a {@code String} to a floating-point value according to Python rules. + * + * @param self to convert + * @return the value + */ + public static double atof(String self) { + double x = 0.0; + Matcher m = getFloatPattern().matcher(self); + boolean valid = m.matches(); + + if (valid) { + // Might be a valid float: trimmed of white space in group 1. + String number = m.group(1); + try { + char lastChar = number.charAt(number.length() - 1); + if (Character.isLetter(lastChar)) { + // It's something like "nan", "-Inf" or "+nifty" + x = atofSpecials(m.group(1)); + } else { + // A numeric part was present, try to convert the whole + x = Double.parseDouble(m.group(1)); + } + } catch (NumberFormatException e) { + valid = false; + } + } + + // At this point, valid will have been cleared if there was a problem. + if (valid) { + return x; + } else { + String fmt = "invalid literal for float: %s"; + throw new ValueError(String.format(fmt, self.trim())); + } + } + + // Copied from PyString + /** + * Regular expression for an unsigned Python float, accepting also any sequence of the letters + * that belong to "NaN" or "Infinity" in whatever case. This is used within the regular + * expression patterns that define a priori acceptable strings in the float and complex + * constructors. The expression contributes no capture groups. + */ + private static final String UF_RE = + "(?:(?:(?:\\d+\\.?|\\.\\d)\\d*(?:[eE][+-]?\\d+)?)|[infatyINFATY]+)"; + + // Copied from PyString + /** + * Return the (lazily) compiled regular expression that matches all valid a Python float() + * arguments, in which Group 1 captures the number, stripped of white space. Various invalid + * non-numerics are provisionally accepted (e.g. "+inanity" or "-faint"). + */ + private static synchronized Pattern getFloatPattern() { + if (floatPattern == null) { + floatPattern = Pattern.compile("\\s*([+-]?" + UF_RE + ")\\s*"); + } + return floatPattern; + } + + // Copied from PyString + /** Access only through {@link #getFloatPattern()}. */ + private static Pattern floatPattern = null; + + // Copied from PyString + /** + * Return the (lazily) compiled regular expression for a Python complex number. This is used + * within the regular expression patterns that define a priori acceptable strings in the complex + * constructors. The expression contributes five named capture groups a, b, x, y and j. x and y + * are the two floats encountered, and if j is present, one of them is the imaginary part. a and + * b are the optional parentheses. They must either both be present or both omitted. + */ + private static synchronized Pattern getComplexPattern() { + if (complexPattern == null) { + complexPattern = Pattern.compile("\\s*(?\\(\\s*)?" // Parenthesis + + "(?[+-]?" + UF_RE + "?)" // + + "(?[+-]" + UF_RE + "?)?(?[jJ])?" // + + + "\\s*(?\\)\\s*)?"); // Parenthesis + } + return complexPattern; + } + + // Copied from PyString + /** Access only through {@link #getComplexPattern()} */ + private static Pattern complexPattern = null; + + // Copied from PyString + /** + * Conversion for non-numeric floats, accepting signed or unsigned "inf" and "nan", in any case. + * + * @param s to convert + * @return non-numeric result (if valid) + * @throws NumberFormatException if not a valid non-numeric indicator + */ + private static double atofSpecials(String s) throws NumberFormatException { + switch (s.toLowerCase()) { + case "nan": + case "+nan": + case "-nan": + return Double.NaN; + case "inf": + case "+inf": + case "infinity": + case "+infinity": + return Double.POSITIVE_INFINITY; + case "-inf": + case "-infinity": + return Double.NEGATIVE_INFINITY; + default: + throw new NumberFormatException(); + } + } + + // Copied from PyString + /** + * Convert this PyString to a complex value according to Python rules. + * + * @return the value + */ + private PyComplex atocx() { + double x = 0.0, y = 0.0; + Matcher m = getComplexPattern().matcher(asString()); + boolean valid = m.matches(); + + if (valid) { + // Passes a priori, but we have some checks to make. Brackets: both or neither. + if ((m.group("a") != null) != (m.group("b") != null)) { + valid = false; + + } else { + try { + // Pick up the two numbers [+-]? [+-] j? + String xs = m.group("x"), ys = m.group("y"); + + if (m.group("j") != null) { + // There is a 'j', so there is an imaginary part. + if (ys != null) { + // There were two numbers, so the second is the imaginary part. + y = toComplexPart(ys); + // And the first is the real part + x = toComplexPart(xs); + } else if (xs != null) { + // There was only one number (and a 'j')so it is the imaginary part. + y = toComplexPart(xs); + // x = 0.0; + } else { + // There were no numbers, just the 'j'. (Impossible return?) + y = 1.0; + // x = 0.0; + } + + } else { + // There is no 'j' so can only be one number, the real part. + x = Double.parseDouble(xs); + if (ys != null) { + // Something like "123 +" or "123 + 456" but no 'j'. + throw new NumberFormatException(); + } + } + + } catch (NumberFormatException e) { + valid = false; + } + } + } + + // At this point, valid will have been cleared if there was a problem. + if (valid) { + return new PyComplex(x, y); + } else { + String fmt = "complex() arg is a malformed string: %s"; + throw new ValueError(String.format(fmt, asString().trim())); + } + + } + + // @formatter:off + + // Copied from PyString + /** + * Helper for interpreting each part (real and imaginary) of a complex number expressed as a + * string in {@link #atocx(String)}. It deals with numbers, inf, nan and their variants, and + * with the "implied one" in +j or 10-j. + * + * @param s to interpret + * @return value of s + * @throws NumberFormatException if the number is invalid + */ + private static double toComplexPart(String s) throws NumberFormatException { + if (s.length() == 0) { + // Empty string (occurs only as 'j') + return 1.0; + } else { + char lastChar = s.charAt(s.length() - 1); + if (Character.isLetter(lastChar)) { + // Possibly a sign, then letters that ought to be "nan" or "inf[inity]" + return atofSpecials(s); + } else if (lastChar == '+') { + // Occurs only as "+j" + return 1.0; + } else if (lastChar == '-') { + // Occurs only as "-j" + return -1.0; + } else { + // Possibly a sign then an unsigned float + return Double.parseDouble(s); + } + } + } + + // Copied from PyString with this -> self + private static BigInteger asciiToBigInteger(String self, int base, boolean isLong) { + + int b = 0; + int e = self.length(); + + while (b < e && Character.isWhitespace(self.charAt(b))) { + b++; + } + + while (e > b && Character.isWhitespace(self.charAt(e - 1))) { + e--; + } + + char sign = 0; + if (b < e) { + sign = self.charAt(b); + if (sign == '-' || sign == '+') { + b++; + while (b < e && Character.isWhitespace(self.charAt(b))) { + b++; + } + } + + if (base == 16) { + if (self.charAt(b) == '0') { + if (b < e - 1 && Character.toUpperCase(self.charAt(b + 1)) == 'X') { + b += 2; + } + } + } else if (base == 0) { + if (self.charAt(b) == '0') { + if (b < e - 1 && Character.toUpperCase(self.charAt(b + 1)) == 'X') { + base = 16; + b += 2; + } else if (b < e - 1 && Character.toUpperCase(self.charAt(b + 1)) == 'O') { + base = 8; + b += 2; + } else if (b < e - 1 && Character.toUpperCase(self.charAt(b + 1)) == 'B') { + base = 2; + b += 2; + } else { + base = 8; + } + } + } else if (base == 8) { + if (b < e - 1 && Character.toUpperCase(self.charAt(b + 1)) == 'O') { + b += 2; + } + } else if (base == 2) { + if (b < e - 1 && Character.toUpperCase(self.charAt(b + 1)) == 'B') { + b += 2; + } + } + } + + if (base == 0) { + base = 10; + } + + // if the base >= 22, then an 'l' or 'L' is a digit! + if (isLong && base < 22 && e > b + && (self.charAt(e - 1) == 'L' || self.charAt(e - 1) == 'l')) { + e--; + } + + String s = self; + if (b > 0 || e < self.length()) { + s = self.substring(b, e); + } + + BigInteger bi; + if (sign == '-') { + bi = new BigInteger("-" + s, base); + } else { + bi = new BigInteger(s, base); + } + return bi; + } + + // Copied from PyString + public int atoi() { + return atoi(10); + } + + // Copied from PyString with this -> self + public static int atoi(String self, int base) { + if ((base != 0 && base < 2) || (base > 36)) { + throw new ValueError("invalid base for atoi()"); + } + + try { + BigInteger bi = asciiToBigInteger(self, base, false); + if (bi.compareTo(PyLong.MAX_INT) > 0 || bi.compareTo(PyLong.MIN_INT) < 0) { + throw new OverflowError("long int too large to convert to int"); + } + return bi.intValue(); + } catch (NumberFormatException exc) { + throw new ValueError( + "invalid literal for int() with base " + base + ": '" + self + "'"); + } catch (StringIndexOutOfBoundsException exc) { + throw new ValueError( + "invalid literal for int() with base " + base + ": '" + self + "'"); + } + } + + // Copied from PyString + public PyLong atol() { + return atol(10); + } + + // Copied from PyString with this -> self + public static PyLong atol(String self, int base) { + // XXX Likely this belongs in PyLong + if ((base != 0 && base < 2) || (base > 36)) { + throw new ValueError("invalid base for long literal:" + base); + } + + try { + BigInteger bi = asciiToBigInteger(self, base, true); + return new PyLong(PyLong.TYPE, bi); // XXX should return Object bi + } catch (NumberFormatException | StringIndexOutOfBoundsException exc) { + throw new ValueError( + "invalid literal for long() with base " + base + ": '" + self + "'"); + } + } + + // Copied from PyString + /** + * Implements PEP-3101 {}-formatting method {@code str.format()}. + * When called with {@code enclosingIterator == null}, this + * method takes this object as its formatting string. The method is also called (calls itself) + * to deal with nested formatting specifications. In that case, {@code enclosingIterator} + * is a {@link MarkupIterator} on this object and {@code value} is a substring of this + * object needing recursive translation. + * + * @param args to be interpolated into the string + * @param keywords for the trailing args + * @param enclosingIterator when used nested, null if subject is this {@code PyString} + * @param value the format string when {@code enclosingIterator} is not null + * @return the formatted string based on the arguments + * @throws TypeError if {@code __repr__} or {@code __str__} conversions returned a non-string. + * @throws Throwable from other errors in {@code __repr__} or {@code __str__} + */ + // XXX make this support format(String) too + private String buildFormattedString(Object[] args, String[] keywords, + MarkupIterator enclosingIterator, String value) throws TypeError, Throwable { + + MarkupIterator it; + if (enclosingIterator == null) { + // Top-level call acts on this object. + it = new MarkupIterator(this.asString()); + } else { + // Nested call acts on the substring and some state from existing iterator. + it = new MarkupIterator(enclosingIterator, value); + } + + // Result will be formed here + StringBuilder result = new StringBuilder(); + + while (true) { + MarkupIterator.Chunk chunk = it.nextChunk(); + if (chunk == null) { + break; + } + // A Chunk encapsulates a literal part ... + result.append(chunk.literalText); + // ... and the parsed form of the replacement field that followed it (if any) + if (chunk.fieldName != null) { + // The grammar of the replacement field is: + // "{" [field_name] ["!" conversion] [":" format_spec] "}" + + // Get the object referred to by the field name (which may be omitted). + Object fieldObj = getFieldObject(chunk.fieldName, it.isBytes(), args, keywords); + if (fieldObj == null) { + continue; + } + + // The conversion specifier is s = __str__ or r = __repr__. + if ("r".equals(chunk.conversion)) { + fieldObj = Abstract.repr(fieldObj); + } else if ("s".equals(chunk.conversion)) { + fieldObj = Abstract.str(fieldObj); + } else if (chunk.conversion != null) { + throw new ValueError("Unknown conversion specifier %s", chunk.conversion); + } + + // The format_spec may be simple, or contained nested replacement fields. + String formatSpec = chunk.formatSpec; + if (chunk.formatSpecNeedsExpanding) { + if (enclosingIterator != null) { + // PEP 3101 says only 2 levels + throw new ValueError("Max string recursion exceeded"); + } + // Recursively interpolate further args into chunk.formatSpec + formatSpec = buildFormattedString(args, keywords, it, formatSpec); + } + renderField(fieldObj, formatSpec, result); + } + } + return result.toString(); + } + + // Copied from PyString + /** + * Return the object referenced by a given field name, interpreted in the context of the given + * argument list, containing positional and keyword arguments. + * + * @param fieldName to interpret. + * @param bytes true if the field name is from a PyString, false for PyUnicode. + * @param args argument list (positional then keyword arguments). + * @param keywords naming the keyword arguments. + * @return the object designated or {@code null}. + * @throws Throwable from errors accessing referenced fields + */ + private Object getFieldObject(String fieldName, boolean bytes, Object[] args, + String[] keywords) throws Throwable { + FieldNameIterator iterator = new FieldNameIterator(fieldName, bytes); + Object head = iterator.head(); + Object obj = null; + int positionalCount = args.length - keywords.length; + + if (PyNumber.indexCheck(head)) { + // The field name begins with an integer argument index (not a [n]-type index). + int index = PyNumber.asSize(head, null); + if (index >= positionalCount) { + throw new IndexError("tuple index out of range"); + } + obj = args[index]; + + } else { + // The field name begins with keyword. + for (int i = 0; i < keywords.length; i++) { + if (Abstract.richCompareBool(obj, keywords[i], Comparison.EQ)) { + obj = args[positionalCount + i]; + break; + } + } + // And if we don't find it, that's an error + if (obj == null) { + // throw new KeyError(head); + throw new MissingFeature("dictionary"); + } + } + + // Now deal with the iterated sub-fields + while (obj != null) { + FieldNameIterator.Chunk chunk = iterator.nextChunk(); + if (chunk == null) { + // End of iterator + break; + } + Object key = chunk.value; + if (chunk.is_attr) { + // key must be an attribute name + obj = Abstract.getAttr(obj, key); + } else { + // obj = PySequence.getItem(obj, key); + throw new MissingFeature("dictionary"); + } + } + + return obj; + } + + // Copied from PyString + /** + * Append to a formatting result, the presentation of one object, according to a given format + * specification and the object's {@code __format__} method. + * + * @param fieldObj to format. + * @param formatSpec specification to apply. + * @param result to which the result will be appended. + */ + private void renderField(Object fieldObj, String formatSpec, StringBuilder result) { + String formatSpecStr = formatSpec == null ? "" : formatSpec; + //result.append(fieldObj.__format__(formatSpecStr).asString()); + throw new MissingFeature("String formatting"); + } + + // @formatter:on + + /** + * A {@link AbstractFormatter}, constructed from a {@link Spec}, + * with specific validations for {@code str.__format__}. + */ + private static class StrFormatter extends TextFormatter { + + /** + * Prepare a {@link TextFormatter} in support of + * {@link PyUnicode#__format__(Object, Object) str.__format__}. + * + * @param spec a parsed PEP-3101 format specification. + * @return a formatter ready to use. + * @throws FormatOverflow if a value is out of range (including the + * precision) + * @throws FormatError if an unsupported format character is + * encountered + */ + StrFormatter(Spec spec) throws FormatError { super(validated(spec)); } + + @Override + public TextFormatter format(Object self) throws NoConversion { + return format(convertToString(self)); + } + + private static Spec validated(Spec spec) throws FormatError { + String type = TYPE.name; + switch (spec.type) { + + case Spec.NONE: + case 's': + // Check for disallowed parts of the specification + if (spec.grouping) { + throw notAllowed("Grouping", type, spec.type); + } else if (Spec.specified(spec.sign)) { + throw signNotAllowed(type, '\0'); + } else if (spec.alternate) { + throw alternateFormNotAllowed(type); + } else if (spec.align == '=') { throw alignmentNotAllowed('=', type); } + // Passed (whew!) + break; + + default: + // The type code was not recognised + throw unknownFormat(spec.type, type); + } + + /* + * spec may be incomplete. The defaults are those commonly used for + * string formats. + */ + return spec.withDefaults(Spec.STRING); + } + } + + // @formatter:off + // Copied from _codecs + // parallel to CPython's PyUnicode_TranslateCharmap + static Object translateCharmap(PyUnicode str, String errors, Object mapping) { + + throw new MissingFeature("str.translate"); + /* + StringBuilder buf = new StringBuilder(str.toString().length()); + + for (Iterator iter = str.newSubsequenceIterator(); iter.hasNext();) { + int codePoint = iter.next(); + Object result = mapping.__finditem__(Py.newInteger(codePoint)); + if (result == null) { + // No mapping found means: use 1:1 mapping + buf.appendCodePoint(codePoint); + } else if (result == Py.None) { + // XXX: We don't support the fancier error handling CPython does here of + // capturing regions of chars removed by the None mapping to optionally + // pass to an error handler. Though we don't seem to even use this + // functionality anywhere either + ; + } else if (result instanceof PyInteger) { + int value = result.asInt(); + if (value < 0 || value > PySystemState.maxunicode) { + throw Py.TypeError(String.format("character mapping must be in range(0x%x)", + PySystemState.maxunicode + 1)); + } + buf.appendCodePoint(value); + } else if (result instanceof PyUnicode) { + buf.append(result.toString()); + } else { + // wrong return value + throw new TypeError("character mapping must return integer, None or unicode"); + } + } + return new PyUnicode(buf.toString()); + */ + } +} diff --git a/core/src/main/java/org/python/core/PyWrapperDescr.java b/core/src/main/java/org/python/core/PyWrapperDescr.java new file mode 100644 index 000000000..286128d41 --- /dev/null +++ b/core/src/main/java/org/python/core/PyWrapperDescr.java @@ -0,0 +1,298 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; + +import org.python.core.PyType.Flag; +import org.python.core.Slot.Signature; + +/** + * A {@link Descriptor} for a particular definition in Java + * of one of the special methods of the Python data model (such as + * {@code __sub__}). The type also appears as + * {@code }. + *

+ * The owner of the descriptor is the Python type providing the + * definition. Type construction places a {@code PyWrapperDescr} in + * the dictionary of the defining {@link PyType}, against a key that + * is the "dunder name" of the special method it wraps. (This does + * not preclude client code moving it around afterwards!) + *

+ * The {@code PyWrapperDescr} provides a {@code MethodHandle} for + * the defining method. In every Python type where a + * {@code PyWrapperDescr} appears as the attribute value + * corresponding to a special method, the handle will fill the + * corresponding type slot. This may happen because the type is the + * defining type, by inheritance, or by insertion of the + * {@code PyWrapperDescr} as an attribute of the type. (In the last + * case, the signature of the wrapped and destination slots must + * match.) + */ +/* + * Difference from CPython: In CPython, a PyWrapperDescr is created + * because the slot at the corresponding offset in the PyTypeObject + * of the owning Python type is filled, statically or by + * PyType_FromSpec. + * + * In this implementation, we create a PyWrapperDescr as an + * attribute because the Java implementation of the owning type + * defines a method with that slot's name. Then we fill the slot + * because the type has an attribute with the matching name. The + * result should be the same but the process is more regular. + */ +public abstract class PyWrapperDescr extends MethodDescriptor { + + static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("wrapper_descriptor", MethodHandles.lookup()).flagNot(Flag.BASETYPE)); + + /** + * The {@link Slot} ({@code enum}) describing the generic + * characteristics the special method of which + * {@link Descriptor#objclass} provides a particular implementation. + */ + final Slot slot; + + /** + * Construct a slot wrapper descriptor for the {@code slot} in + * {@code objclass}. + * + * @param objclass the class declaring the special method + * @param slot for the generic special method + */ + // Compare CPython PyDescr_NewClassMethod in descrobject.c + PyWrapperDescr(PyType objclass, Slot slot) { + super(TYPE, objclass, slot.methodName); + this.slot = slot; + } + + // Exposed attributes --------------------------------------------- + + //@Getter + // Compare CPython wrapperdescr_get_doc in descrobject.c + protected Object __doc__() { return PyType.getDocFromInternalDoc(slot.methodName, slot.doc); } + + //@Getter + // Compare CPython wrapperdescr_get_text_signature in descrobject.c + protected Object __text_signature__() { + return PyType.getTextSignatureFromInternalDoc(slot.methodName, slot.doc); + } + + // Special methods ------------------------------------------------ + + // Compare CPython wrapperdescr_repr in descrobject.c + @SuppressWarnings("unused") + private Object __repr__() { return descrRepr("slot wrapper"); } + + // Compare CPython wrapperdescr_get in descrobject.c + @Override + protected Object __get__(Object obj, PyType type) { + if (obj == null) + /* + * obj==null indicates the descriptor was found on the target object + * itself (or a base), see CPython type_getattro in typeobject.c + */ + return this; + else { + // Return callable binding this and obj + check(obj); + return new PyMethodWrapper(this, obj); + } + } + + /** + * Return the handle contained in this descriptor applicable to the + * Java class supplied (typically that of a {@code self} argument + * during a call). The {@link Descriptor#objclass} is consulted to + * make this determination. If the class is not an accepted + * implementation of {@code objclass}, an empty slot handle (with + * the correct signature) is returned. + * + * @param selfClass Java class of the {@code self} argument + * @return corresponding handle (or {@code slot.getEmpty()}) + */ + abstract MethodHandle getWrapped(Class selfClass); + + /** + * Call the wrapped method with positional arguments (the first + * being the target object) and optionally keywords arguments. The + * arguments, in type and number, must match the signature of the + * special function slot. + * + * @param args positional arguments beginning with {@code self} + * @param names of keywords in the method call + * @return result of calling the wrapped method + * @throws TypeError if {@code args[0]} is the wrong type + * @throws Throwable from the implementation of the special method + */ + // Compare CPython wrapperdescr_call in descrobject.c + public Object __call__(Object[] args, String[] names) throws TypeError, Throwable { + + int argc = args.length; + if (argc > 0) { + // Split the leading element self from args + Object self = args[0]; + Object[] newargs; + if (argc == 1) { + newargs = Py.EMPTY_ARRAY; + } else { + newargs = new Object[argc - 1]; + System.arraycopy(args, 1, newargs, 0, newargs.length); + } + + // Make sure that the first argument is acceptable as 'self' + PyType selfType = PyType.of(self); + if (!Abstract.recursiveIsSubclass(selfType, objclass)) { + throw new TypeError(DESCRIPTOR_REQUIRES, name, objclass.name, selfType.name); + } + + return callWrapped(self, newargs, names); + + } else { + // Not even one argument + throw new TypeError(DESCRIPTOR_NEEDS_ARGUMENT, name, objclass.name); + } + } + + @Override + public Object call(Object[] args, String[] names) throws TypeError, Throwable { + + int n = args.length, m = n - 1; + + if (m < 0) { + // Not even one argument + throw new TypeError(DESCRIPTOR_NEEDS_ARGUMENT, name, objclass.name); + } else { + // Split the leading element self from rest of args + Object self = args[0], rest[]; + if (m == 0) { + rest = Py.EMPTY_ARRAY; + } else { + rest = new Object[m]; + System.arraycopy(args, 1, rest, 0, m); + } + + try { + // Call this as a method bound to self. + Signature sig = slot.signature; + MethodHandle wrapped = getWrapped(self.getClass()); + return sig.callWrapped(wrapped, self, rest, names); + } catch (ArgumentError ae) { + /* + * Implementations may throw ArgumentError as a simplified encoding + * of a TypeError. + */ + throw typeError(ae, rest); + } + } + } + + /** + * Invoke the method described by this {@code PyWrapperDescr} the + * given target {@code self}, and the arguments supplied. + * + * @param self target object of the method call + * @param args of the method call + * @param names of keywords in the method call + * @return result of the method call + * @throws TypeError if the arguments do not fit the special method + * @throws Throwable from the implementation of the special method + */ + // Compare CPython wrapperdescr_raw_call in descrobject.c + Object callWrapped(Object self, Object[] args, String[] names) throws Throwable { + try { + // Call through the correct wrapped handle + MethodHandle wrapped = getWrapped(self.getClass()); + Slot.Signature sig = slot.signature; + return sig.callWrapped(wrapped, self, args, names); + } catch (ArgumentError ae) { + throw typeError(ae, args, names); + } + } + + /** + * A {@link PyWrapperDescr} for use when the owning Python type has + * just one accepted implementation. + */ + static class Single extends PyWrapperDescr { + + /** + * A handle for the particular implementation of a special method + * being wrapped. The method type is that of + * {@link #slot}{@code .signature}. + */ + protected final MethodHandle wrapped; + + /** + * Construct a slot wrapper descriptor, identifying by a method + * handle the implementation method for the {@code slot} in + * {@code objclass}. + * + * @param objclass the class declaring the special method + * @param slot for the generic special method + * @param wrapped a handle to an implementation of that slot + */ + // Compare CPython PyDescr_NewClassMethod in descrobject.c + Single(PyType objclass, Slot slot, MethodHandle wrapped) { + super(objclass, slot); + this.wrapped = wrapped; + } + + @Override + MethodHandle getWrapped(Class selfClass) { + // Make sure that the first argument is acceptable as 'self' + if (objclass.getJavaClass().isAssignableFrom(selfClass)) + return wrapped; + else + return slot.getEmpty(); + } + } + + /** + * A {@link PyWrapperDescr} for use when the owning Python type has + * multiple accepted implementations. + */ + static class Multiple extends PyWrapperDescr { + + /** + * Handles for the particular implementations of a special method + * being wrapped. The method type of each is that of + * {@link #slot}{@code .signature}. + */ + protected final MethodHandle[] wrapped; + + /** + * Construct a slot wrapper descriptor, identifying by an array of + * method handles the implementation methods for the {@code slot} in + * {@code objclass}. + * + * @param objclass the class declaring the special method + * @param slot for the generic special method + * @param wrapped handles to the implementation of that slot + */ + // Compare CPython PyDescr_NewClassMethod in descrobject.c + Multiple(PyType objclass, Slot slot, MethodHandle[] wrapped) { + super(objclass, slot); + this.wrapped = wrapped; + } + + /** + * {@inheritDoc} + *

+ * The method will check that the type of self matches + * {@link Descriptor#objclass}, according to its + * {@link PyType#indexAccepted(Class)}. + */ + @Override + MethodHandle getWrapped(Class selfClass) { + // Work out how to call this descriptor on that object + int index = objclass.indexAccepted(selfClass); + try { + return wrapped[index]; + } catch (ArrayIndexOutOfBoundsException iobe) { + return slot.getEmpty(); + } + } + } +} diff --git a/core/src/main/java/org/python/core/ScopeKind.java b/core/src/main/java/org/python/core/ScopeKind.java new file mode 100644 index 000000000..732268b7f --- /dev/null +++ b/core/src/main/java/org/python/core/ScopeKind.java @@ -0,0 +1,17 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +/** Exposers are of type or module kind. */ +enum ScopeKind { + + MODULE("$module"), // + TYPE("$self"); + + ScopeKind(String selfName) { + this.selfName = selfName; + } + + /** Name of a "self" parameter in instance methods. */ + String selfName; +} diff --git a/core/src/main/java/org/python/core/Slot.java b/core/src/main/java/org/python/core/Slot.java new file mode 100644 index 000000000..0acab2701 --- /dev/null +++ b/core/src/main/java/org/python/core/Slot.java @@ -0,0 +1,1170 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.python.core.ClassShorthand.B; +import static org.python.core.ClassShorthand.I; +import static org.python.core.ClassShorthand.O; +import static org.python.core.ClassShorthand.OA; +import static org.python.core.ClassShorthand.S; +import static org.python.core.ClassShorthand.SA; +import static org.python.core.ClassShorthand.T; +import static org.python.core.ClassShorthand.V; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.invoke.MethodType; +import java.lang.invoke.VarHandle; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.python.base.InterpreterError; +import org.python.base.MissingFeature; +import org.python.core.ArgumentError.Mode; + +/** + * This {@code enum} provides a set of structured constants that are + * used to refer to the special methods of the Python data model. + *

+ * These are structured constants that provide not only the + * {@code String} method name, but also a signature, and much + * information used internally by the run-time system in the + * creation of type objects, the interpretation of code and the + * creation of call sites. + *

+ * In principle, any Python object may support all of the special + * methods, through "slots" in the Python type object + * {@code PyType}. These slots have identical names to the + * corresponding constant in this {@code enum}. The "slots" in the + * Python type object hold pointers ({@code MethodHandle}s) to their + * implementations in Java for that type, which of course define the + * behaviour of instances in Python. Where a special method is + * absent from the implementation of a type, a default "empty" + * handle is provided from the {@code Slot} constant. + */ +// Compare CPython struct wrapperbase in descrobject.h +// also typedef slotdef and slotdefs[] table in typeobject.h +enum Slot { + /* + * The order of the members is not significant, but we take it from + * the slotdefs[] table for definiteness. We do not have quite the + * same entries, and no duplicates. There may yet be special methods + * here that need not be cached, and maybe properties it would be + * useful to add. + */ + /** + * Defines {@link Operations#op_repr}, support for built-in + * {@code repr()}, with signature {@link Signature#UNARY}. + */ + op_repr(Signature.UNARY), + /** + * Defines {@link Operations#op_hash}, support for object hashing, + * with signature {@link Signature#LEN}. + */ + op_hash(Signature.LEN), + /** + * Defines {@link Operations#op_call}, support for calling an + * object, with signature {@link Signature#CALL}. + */ + op_call(Signature.CALL), + /** + * Defines {@link Operations#op_str}, support for built-in + * {@code str()}, with signature {@link Signature#UNARY}. + */ + op_str(Signature.UNARY), + + /** + * Defines {@link Operations#op_getattribute}, attribute get, with + * signature {@link Signature#GETATTR}. + */ + op_getattribute(Signature.GETATTR), + /** + * Defines {@link Operations#op_getattr}, attribute get, with + * signature {@link Signature#GETATTR}. + */ + op_getattr(Signature.GETATTR), + /** + * Defines {@link Operations#op_setattr}, attribute set, with + * signature {@link Signature#SETATTR}. + */ + op_setattr(Signature.SETATTR), + /** + * Defines {@link Operations#op_delattr}, attribute deletion, with + * signature {@link Signature#DELATTR}. + */ + op_delattr(Signature.DELATTR), + + /** + * Defines {@link Operations#op_lt}, the {@code <} operation, with + * signature {@link Signature#BINARY}. + */ + op_lt(Signature.BINARY, "<"), + /** + * Defines {@link Operations#op_le}, the {@code <=} operation, with + * signature {@link Signature#BINARY}. + */ + op_le(Signature.BINARY, "<="), + /** + * Defines {@link Operations#op_eq}, the {@code ==} operation, with + * signature {@link Signature#BINARY}. + */ + op_eq(Signature.BINARY, "=="), + /** + * Defines {@link Operations#op_ne}, the {@code !=} operation, with + * signature {@link Signature#BINARY}. + */ + op_ne(Signature.BINARY, "!="), + /** + * Defines {@link Operations#op_gt}, the {@code >} operation, with + * signature {@link Signature#BINARY}. + */ + op_gt(Signature.BINARY, ">"), + /** + * Defines {@link Operations#op_ge}, the {@code >=} operation, with + * signature {@link Signature#BINARY}. + */ + op_ge(Signature.BINARY, ">="), + + /** + * Defines {@link Operations#op_iter}, get an iterator, with + * signature {@link Signature#UNARY}. + */ + op_iter(Signature.UNARY), // unexplored territory + /** + * Defines {@link Operations#op_next}, advance an iterator, with + * signature {@link Signature#UNARY}. + */ + op_next(Signature.UNARY), // unexplored territory + + /** + * Defines {@link Operations#op_get}, descriptor {@code __get__}, + * with signature {@link Signature#DESCRGET}. + */ + op_get(Signature.DESCRGET), + /** + * Defines {@link Operations#op_set}, descriptor {@code __set__}, + * with signature {@link Signature#SETITEM}. + */ + op_set(Signature.SETITEM), + /** + * Defines {@link Operations#op_delete}, descriptor + * {@code __delete__}, with signature {@link Signature#DELITEM}. + */ + op_delete(Signature.DELITEM), + + /** + * Defines {@link Operations#op_init}, object {@code __init__}, with + * signature {@link Signature#INIT}. + */ + op_init(Signature.INIT), + // __new__ is not a slot + // __del__ is not a slot + + /** + * Defines {@link Operations#op_await}, with signature + * {@link Signature#UNARY}. + */ + op_await(Signature.UNARY), // unexplored territory + /** + * Defines {@link Operations#op_aiter}, with signature + * {@link Signature#UNARY}. + */ + op_aiter(Signature.UNARY), // unexplored territory + /** + * Defines {@link Operations#op_anext}, with signature + * {@link Signature#UNARY}. + */ + op_anext(Signature.UNARY), // unexplored territory + + // Binary ops: reflected form comes first so we can reference it. + /** + * Defines {@link Operations#op_radd}, the reflected {@code +} + * operation, with signature {@link Signature#BINARY}. + */ + op_radd(Signature.BINARY, "+"), + /** + * Defines {@link Operations#op_rsub}, the reflected {@code -} + * operation, with signature {@link Signature#BINARY}. + */ + op_rsub(Signature.BINARY, "-"), + /** + * Defines {@link Operations#op_rmul}, the reflected {@code *} + * operation, with signature {@link Signature#BINARY}. + */ + op_rmul(Signature.BINARY, "*"), + /** + * Defines {@link Operations#op_rmod}, the reflected {@code %} + * operation, with signature {@link Signature#BINARY}. + */ + op_rmod(Signature.BINARY, "%"), + /** + * Defines {@link Operations#op_rdivmod}, the reflected + * {@code divmod} operation, with signature + * {@link Signature#BINARY}. + */ + op_rdivmod(Signature.BINARY, "divmod()"), + /** + * Defines {@link Operations#op_rpow}, the reflected {@code pow} + * operation, with signature {@link Signature#BINARY} (not + * {@link Signature#TERNARY} since only an infix operation can be + * reflected). + */ + op_rpow(Signature.BINARY, "**"), // unexplored territory + /** + * Defines {@link Operations#op_rlshift}, the reflected {@code <<} + * operation, with signature {@link Signature#BINARY}. + */ + op_rlshift(Signature.BINARY, "<<"), + /** + * Defines {@link Operations#op_rrshift}, the reflected {@code >>} + * operation, with signature {@link Signature#BINARY}. + */ + op_rrshift(Signature.BINARY, ">>"), + /** + * Defines {@link Operations#op_rand}, the reflected {@code &} + * operation, with signature {@link Signature#BINARY}. + */ + op_rand(Signature.BINARY, "&"), + /** + * Defines {@link Operations#op_rxor}, the reflected {@code ^} + * operation, with signature {@link Signature#BINARY}. + */ + op_rxor(Signature.BINARY, "^"), + /** + * Defines {@link Operations#op_ror}, the reflected {@code |} + * operation, with signature {@link Signature#BINARY}. + */ + op_ror(Signature.BINARY, "|"), + /** + * Defines {@link Operations#op_rfloordiv}, the reflected {@code //} + * operation, with signature {@link Signature#BINARY}. + */ + op_rfloordiv(Signature.BINARY, "//"), + /** + * Defines {@link Operations#op_rtruediv}, the reflected {@code /} + * operation, with signature {@link Signature#BINARY}. + */ + op_rtruediv(Signature.BINARY, "/"), + /** + * Defines {@link Operations#op_rmatmul}, the reflected {@code @} + * operation, with signature {@link Signature#BINARY}. + */ + op_rmatmul(Signature.BINARY, "@"), + + /** + * Defines {@link Operations#op_add}, the {@code +} operation, with + * signature {@link Signature#BINARY}. + */ + op_add(Signature.BINARY, "+", op_radd), + /** + * Defines {@link Operations#op_sub}, the {@code -} operation, with + * signature {@link Signature#BINARY}. + */ + op_sub(Signature.BINARY, "-", op_rsub), + /** + * Defines {@link Operations#op_mul}, the {@code *} operation, with + * signature {@link Signature#BINARY}. + */ + op_mul(Signature.BINARY, "*", op_rmul), + /** + * Defines {@link Operations#op_mod}, the {@code %} operation, with + * signature {@link Signature#BINARY}. + */ + op_mod(Signature.BINARY, "%", op_rmod), + /** + * Defines {@link Operations#op_divmod}, the {@code divmod} + * operation, with signature {@link Signature#BINARY}. + */ + op_divmod(Signature.BINARY, "divmod()", op_rdivmod), + /** + * Defines {@link Operations#op_pow}, the {@code pow} operation, + * with signature {@link Signature#TERNARY}. + */ + op_pow(Signature.TERNARY, "**", op_rpow), // unexplored territory + + /** + * Defines {@link Operations#op_neg}, the unary {@code -} operation, + * with signature {@link Signature#UNARY}. + */ + op_neg(Signature.UNARY, "unary -"), + /** + * Defines {@link Operations#op_pos}, the unary {@code +} operation, + * with signature {@link Signature#UNARY}. + */ + op_pos(Signature.UNARY, "unary +"), + /** + * Defines {@link Operations#op_abs}, the {@code abs()} operation, + * with signature {@link Signature#UNARY}. + */ + op_abs(Signature.UNARY, "abs()"), + /** + * Defines {@link Operations#op_bool}, conversion to a truth value, + * with signature {@link Signature#PREDICATE}. + */ + op_bool(Signature.PREDICATE), + /** + * Defines {@link Operations#op_invert}, the unary {@code ~} + * operation, with signature {@link Signature#UNARY}. + */ + op_invert(Signature.UNARY, "unary ~"), + + /** + * Defines {@link Operations#op_lshift}, the {@code <<} operation, + * with signature {@link Signature#BINARY}. + */ + op_lshift(Signature.BINARY, "<<", op_rlshift), + /** + * Defines {@link Operations#op_rshift}, the {@code >>} operation, + * with signature {@link Signature#BINARY}. + */ + op_rshift(Signature.BINARY, ">>", op_rrshift), + /** + * Defines {@link Operations#op_and}, the {@code &} operation, with + * signature {@link Signature#BINARY}. + */ + op_and(Signature.BINARY, "&", op_rand), + /** + * Defines {@link Operations#op_xor}, the {@code ^} operation, with + * signature {@link Signature#BINARY}. + */ + op_xor(Signature.BINARY, "^", op_rxor), + /** + * Defines {@link Operations#op_or}, the {@code |} operation, with + * signature {@link Signature#BINARY}. + */ + op_or(Signature.BINARY, "|", op_ror), + + /** + * Defines {@link Operations#op_int}, conversion to an integer + * value, with signature {@link Signature#UNARY}. + */ + op_int(Signature.UNARY), + /** + * Defines {@link Operations#op_float}, conversion to a float value, + * with signature {@link Signature#UNARY}. + */ + op_float(Signature.UNARY), + + /** + * Defines {@link Operations#op_iadd}, the {@code +=} operation, + * with signature {@link Signature#BINARY}. + */ + op_iadd(Signature.BINARY, "+="), // in-place: unexplored territory + /** + * Defines {@link Operations#op_isub}, the {@code -=} operation, + * with signature {@link Signature#BINARY}. + */ + op_isub(Signature.BINARY, "-="), + /** + * Defines {@link Operations#op_imul}, the {@code *=} operation, + * with signature {@link Signature#BINARY}. + */ + op_imul(Signature.BINARY, "*="), + /** + * Defines {@link Operations#op_imod}, the {@code %=} operation, + * with signature {@link Signature#BINARY}. + */ + op_imod(Signature.BINARY, "%="), + /** + * Defines {@link Operations#op_iand}, the {@code &=} operation, + * with signature {@link Signature#BINARY}. + */ + op_iand(Signature.BINARY, "&="), + /** + * Defines {@link Operations#op_ixor}, the {@code ^=} operation, + * with signature {@link Signature#BINARY}. + */ + op_ixor(Signature.BINARY, "^="), + /** + * Defines {@link Operations#op_ior}, the {@code |=} operation, with + * signature {@link Signature#BINARY}. + */ + op_ior(Signature.BINARY, "|="), + + /** + * Defines {@link Operations#op_floordiv}, the {@code //} operation, + * with signature {@link Signature#BINARY}. + */ + op_floordiv(Signature.BINARY, "//", op_rfloordiv), + /** + * Defines {@link Operations#op_truediv}, the {@code /} operation, + * with signature {@link Signature#BINARY}. + */ + op_truediv(Signature.BINARY, "/", op_rtruediv), + /** + * Defines {@link Operations#op_ifloordiv}, the {@code //=} + * operation, with signature {@link Signature#BINARY}. + */ + op_ifloordiv(Signature.BINARY, "//="), + /** + * Defines {@link Operations#op_itruediv}, the {@code /=} operation, + * with signature {@link Signature#BINARY}. + */ + op_itruediv(Signature.BINARY, "/="), + + /** + * Defines {@link Operations#op_index}, conversion to an index + * value, with signature {@link Signature#UNARY}. + */ + op_index(Signature.UNARY), + + /** + * Defines {@link Operations#op_matmul}, the {@code @} (matrix + * multiply) operation, with signature {@link Signature#BINARY}. + */ + op_matmul(Signature.BINARY, "@", op_rmatmul), + /** + * Defines {@link Operations#op_imatmul}, the {@code @=} (matrix + * multiply in place) operation, with signature + * {@link Signature#BINARY}. + */ + op_imatmul(Signature.BINARY, "@="), + + /* + * Note that CPython repeats for "mappings" the following "sequence" + * slots, and slots for __add_ and __mul__, but that we do not need + * to. + */ + /** + * Defines {@link Operations#op_len}, support for built-in + * {@code len()}, with signature {@link Signature#LEN}. + */ + op_len(Signature.LEN, "len()"), + /** + * Defines {@link Operations#op_getitem}, get at index, with + * signature {@link Signature#BINARY}. + */ + op_getitem(Signature.BINARY), + /** + * Defines {@link Operations#op_setitem}, set at index, with + * signature {@link Signature#SETITEM}. + */ + op_setitem(Signature.SETITEM), + /** + * Defines {@link Operations#op_delitem}, delete from index, with + * signature {@link Signature#DELITEM}. + */ + op_delitem(Signature.DELITEM), + /** + * Defines {@link Operations#op_contains}, the {@code in} operation, + * with signature {@link Signature#BINARY_PREDICATE}. + */ + op_contains(Signature.BINARY_PREDICATE); + + /** Method signature to match when filling this slot. */ + final Signature signature; + /** Name of implementation method to bind e.g. "{@code __add__}". */ + final String methodName; + /** Name to use in error messages, e.g. "{@code +}" */ + final String opName; + /** Handle to throw a {@link TypeError} (same signature as slot). */ + private MethodHandle operandError; + /** Description to use in help messages */ + final String doc; + /** Reference to field holding this slot in an {@link Operations} */ + final VarHandle slotHandle; + /** The alternate slot e.g. {@code __radd__} in {@code __add__}. */ + final Slot alt; + + /** + * Constructor for enum constants. + * + * @param signature of the function to be called + * @param opName symbol (such as "+") + * @param methodName implementation method (e.g. "__add__") + * @param alt alternate slot (e.g. "op_radd") + */ + Slot(Signature signature, String opName, String methodName, Slot alt) { + this.opName = opName == null ? name() : opName; + this.methodName = dunder(methodName); + this.signature = signature; + this.slotHandle = Util.slotHandle(this); + this.alt = alt; + // XXX Need something convenient as in CPython. + this.doc = "Doc of " + this.opName; + } + + Slot(Signature signature) { this(signature, null, null, null); } + + Slot(Signature signature, String opName) { this(signature, opName, null, null); } + + Slot(Signature signature, String opName, Slot alt) { this(signature, opName, null, alt); } + + /** Compute corresponding double-underscore method name. */ + private String dunder(String methodName) { + if (methodName != null) + return methodName; + else { + String s = name(); + int i = s.indexOf('_'); + if (i == 2) + s = "__" + s.substring(i + 1) + "__"; + return s; + } + } + + @Override + public java.lang.String toString() { + return "Slot." + name() + " ( " + methodName + signature.type + " ) [" + signature.name() + + "]"; + } + + /** + * Lookup by method name, returning {@code null} if it is not a + * recognised name for any slot. + * + * @param name of a (possible) special method + * @return the Slot corresponding, or {@code null} + */ + public static Slot forMethodName(String name) { return MethodNameLookup.table.get(name); } + + + /** + * Get the name of the method that, by convention, identifies the + * corresponding operation in the implementing class. This is not + * the same as the slot name. + * + * @return conventional special method name. + */ + String getMethodName() { return methodName; } + + /** + * Return the invocation type of slots of this name. + * + * @return the invocation type of slots of this name. + */ + MethodType getType() { return signature.empty.type(); } + + /** + * Get the default that fills the slot when it is "empty". + * + * @return empty method handle for this type of slot + */ + MethodHandle getEmpty() { return signature.empty; } + + /** + * Get a handle to throw a {@link TypeError} with a message + * conventional for the slot. This handle has the same signature as + * the slot, and some data specific to the slot. This is useful when + * the target of a call site may have to raise a type error. + * + * @return throwing method handle for this type of slot + */ + MethodHandle getOperandError() { + // Not in the constructor so as not to provoke PyType + if (operandError == null) { + // Possibly racing, but that's harmless + operandError = Util.operandError(this); + } + return operandError; + } + + /** + * Test whether this slot is non-empty in the given operations + * object. + * + * @param ops to examine for this slot + * @return true iff defined (non-empty) + */ + boolean isDefinedFor(Operations ops) { return slotHandle.get(ops) != signature.empty; } + + /** + * Get the {@code MethodHandle} of this slot's operation from the + * given operations object. Each member of this {@code enum} + * corresponds to a method handle of the same name, which must also + * have the correct signature. + * + * @param ops target operations object + * @return current contents of this slot in {@code ops} + */ + MethodHandle getSlot(Operations ops) { return (MethodHandle)slotHandle.get(ops); } + + /** + * Get the {@code MethodHandle} of this slot's "alternate" operation + * from the given operations object. For a binary operation this is + * the reflected operation. + * + * @param ops target operations object + * @return current contents of the alternate slot in {@code t} + * @throws NullPointerException if there is no alternate + */ + MethodHandle getAltSlot(Operations ops) throws NullPointerException { + return (MethodHandle)alt.slotHandle.get(ops); + } + + /** + * Set the {@code MethodHandle} of this slot's operation in the + * given operations object. + * + * @param ops target type object + * @param mh handle value to assign + */ + void setHandle(Operations ops, MethodHandle mh) { + if (mh == null || !mh.type().equals(getType())) + throw slotTypeError(this, mh); + slotHandle.set(ops, mh); + } + + /** + * Set the {@code MethodHandle} of this slot's operation in the + * given operations object to one that calls the object given in a + * manner appropriate to its type. This method is used when updating + * setting the operation slots of a new type from the new type's + * dictionary, and when updating them after a change. The object + * argument is then the entry found by lookup of this slot's name. + * It may be {@code null} if no entry was found. + *

+ * Where the object is a {@link PyWrapperDescr}, the wrapped method + * handle will be set as by + * {@link #setHandle(Operations, MethodHandle)}. The + * {@link PyWrapperDescr#slot} is not necessarily this slot: client + * Python code can enter any wrapper descriptor against the name. + * + * @param ops target {@code Operations} (or {@code PyType}). + * @param def object defining the handle to set (or {@code null}) + */ + // Compare CPython update_one_slot in typeobject.c + void setDefinition(Operations ops, Object def) { + MethodHandle mh; + if (def == null) { + // No definition available for the special method + if (this == op_next) { + // XXX We should special-case __next__ + /* + * In CPython, this slot is sometimes null=empty, and sometimes + * _PyObject_NextNotImplemented. PyIter_Check checks both, but + * PyIter_Next calls it without checking and a null would then cause + * a crash. We have EmptyException for a similar purpose. + */ + } + mh = signature.empty; + + } else if (def instanceof PyWrapperDescr) { + /* + * When we invoke this slot in ops, the Java class of self will be + * assignable to ops.getJavaClass(), since that class led us to ops. + * It had better also be compatible with the method ultimately + * invoked by the handle we install. We have no control over what + * gets into the dictionary of a type, however, we do know that + * method in a PyWrapperDescr are applicable to the accepted + * implementations of classes of their defining class. We check here + * that ops.getJavaClass() is assignable to an accepted + * implementation of the defining type. + */ + PyWrapperDescr wd = (PyWrapperDescr)def; + mh = wd.getWrapped(ops.getJavaClass()); + if (wd.slot.signature != signature || mh == signature.empty) { + /* + * wd is not compatible with objects of the type(s) that will show + * up at this slot: for example we have inserted float.__add__ into + * a sub-type of int. Python chooses to fail later, when the slot is + * bound or invoked, so insert something that checks. + */ + throw new MissingFeature("equivalent of the slot_* functions"); + // mh = signature.slotCalling(def); + } + + } else if (def == Py.None && this == op_hash) { + throw new MissingFeature("special case __hash__ == None"); + // mh = PyObject_HashNotImplemented + + } else { + throw new MissingFeature("equivalent of the slot_* functions"); + // mh = makeSlotHandle(wd); + } + + slotHandle.set(ops, mh); + } + + /** The type of exception thrown by invoking an empty slot. */ + static class EmptyException extends Exception { + private static final long serialVersionUID = 1L; + + // Suppression and stack trace disabled since singleton. + EmptyException() { super(null, null, false, false); } + } + + /** + * An enumeration of the acceptable signatures for slots in an {link + * Operations} object. For each {@code MethodHandle} we may place in + * a slot of the {@code Operations} object, we must know in advance + * the acceptable signature (the {@code MethodType}), and the slot + * when empty must contain a handle with this signature to a method + * that will raise {@link EmptyException}. Each {@code enum} + * constant here gives a symbolic name to that {@code MethodType}, + * and provides the handle used when a slot of that type is empty. + *

+ * Names are equivalent to {@code typedef}s provided in CPython + * {@code Include/object.h}, but are not exactly the same. We do not + * need quite the same signatures as CPython: we do not return + * integer status, for example. Also, C-specifics like + * {@code Py_ssize_t} are echoed in the C-API names but not here. + *

+ * The shorthand notation we use to describe a signature, for + * example {@code (O,O[],S[])O}, essentially specifies a + * {@code MethodType}, and may be decoded as follows. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
ShorthandJava class
{@link ClassShorthand#B B}{@code boolean.class}
{@link ClassShorthand#I I}{@code int.class}
{@link ClassShorthand#O O}{@code Object.class}
{@link ClassShorthand#S S}{@code String.class}
{@link ClassShorthand#T T}{@link PyType PyType.class}
{@link ClassShorthand#T V}{@code void.class}
{@code []}array of
Signature shorthands
+ */ + enum Signature { + + /* + * The makeDescriptor overrides returning anonymous sub-classes of + * PyWrapperDescr are fairly ugly. However, sub-classes seem to be + * the right solution, and defining them here keeps information + * together that belongs together. + */ + + /** + * The signature {@code (O)O}, for example {@link Slot#op_repr} or + * {@link Slot#op_neg}. + */ + // In CPython: unaryfunc + UNARY(O, O) { + + @Override + Object callWrapped(MethodHandle wrapped, Object self, Object[] args, String[] names) + throws ArgumentError, Throwable { + checkArgs(args, 0, names); + return wrapped.invokeExact(self); + } + }, + + /** + * The signature {@code (O,O)O}, for example {@link Slot#op_add} or + * {@link Slot#op_getitem}. + */ + // In CPython: binaryfunc + BINARY(O, O, O) { + + @Override + Object callWrapped(MethodHandle wrapped, Object self, Object[] args, String[] names) + throws ArgumentError, Throwable { + checkArgs(args, 1, names); + return wrapped.invokeExact(self, args[0]); + } + }, + /** + * The signature {@code (O,O,O)O}, used for {@link Slot#op_pow}. + */ + // In CPython: ternaryfunc + TERNARY(O, O, O, O), + + /** + * The signature {@code (O,O[],S[])O}, used for + * {@link Slot#op_call}. Note that in Jython, standard calls are + * what CPython refers to as vector calls (although they cannot use + * a stack slice as the array). + */ + // Not in CPython + CALL(O, O, OA, SA) { + + @Override + Object callWrapped(MethodHandle wrapped, Object self, Object[] args, String[] names) + throws ArgumentError, Throwable { + return wrapped.invokeExact(self, args, names); + } + }, + + /** + * The signature {@code (O)B}, used for {@link Slot#op_bool}. + */ + // In CPython: inquiry + PREDICATE(B, O), + + /** + * The signature {@code (O,O)B}, used for {@link Slot#op_contains}. + * It is not used for comparisons, because they may return an + * arbitrary object (e.g. in {@code numpy} array comparison). + */ + BINARY_PREDICATE(B, O, O), + + /** + * The signature {@code (O)I}, used for {@link Slot#op_hash} and + * {@link Slot#op_len}. + */ + // In CPython: lenfunc + LEN(I, O) { + + @Override + Object callWrapped(MethodHandle wrapped, Object self, Object[] args, String[] names) + throws ArgumentError, Throwable { + checkArgs(args, 0, names); + return (int)wrapped.invokeExact(self); + } + }, + + /** + * The signature {@code (O,O,O)V}, used for {@link Slot#op_setitem} + * and {@link Slot#op_set}. The arguments have quite different + * meanings in each. + */ + // In CPython: objobjargproc + SETITEM(V, O, O, O), + + /** + * The signature {@code (O,O)V}, used for {@link Slot#op_delitem} + * and {@link Slot#op_delete}. The arguments have quite different + * meanings in each. + */ + // Not in CPython + DELITEM(V, O, O), + + /** + * The signature {@code (O,O)S}, used for {@link Slot#op_getattr}. + */ + // In CPython: getattrofunc + GETATTR(O, O, S) { + + @Override + Object callWrapped(MethodHandle wrapped, Object self, Object[] args, String[] names) + throws ArgumentError, Throwable { + checkArgs(args, 1, names); + String name = args[0].toString(); + return wrapped.invokeExact(self, name); + } + }, + + /** + * The signature {@code (O,S,O)V}, used for {@link Slot#op_setattr}. + */ + // In CPython: setattrofunc + SETATTR(V, O, S, O) { + + @Override + Object callWrapped(MethodHandle wrapped, Object self, Object[] args, String[] names) + throws ArgumentError, Throwable { + checkArgs(args, 2, names); + String name = args[0].toString(); + wrapped.invokeExact(self, name, args[1]); + return Py.None; + } + }, + + /** + * The signature {@code (O,S)V}, used for {@link Slot#op_delattr}. + */ + // Not in CPython + DELATTR(V, O, S) { + + @Override + Object callWrapped(MethodHandle wrapped, Object self, Object[] args, String[] names) + throws ArgumentError, Throwable { + checkArgs(args, 1, names); + String name = args[0].toString(); + wrapped.invokeExact(self, name); + return Py.None; + } + }, + + /** + * The signature {@code (O,O,T)O}, used for {@link Slot#op_get}. + */ + // In CPython: descrgetfunc + DESCRGET(O, O, O, T) { + + @Override + Object callWrapped(MethodHandle wrapped, Object self, Object[] args, String[] names) + throws ArgumentError, Throwable { + checkArgs(args, 1, 2, names); + Object obj = args[0]; + if (obj == Py.None) { obj = null; } + Object type = null; + if (type != Py.None) { type = args[1]; } + if (type == null && obj == null) { + throw new TypeError("__get__(None, None) is invalid"); + } + return wrapped.invokeExact(self, obj, (PyType)type); + } + }, + + /** + * The signature {@code (O,O,O[],S[])V}, used for + * {@link Slot#op_init}. This is the same as {@link #CALL} except + * with {@code void} return. + */ + // In CPython: initproc + INIT(V, O, OA, SA); + + /** + * The signature was defined with this nominal method type. + */ + final MethodType type; + /** + * When empty, the slot should hold this handle. The method type of + * this handle also tells us the method type by which the slot must + * always be invoked, see {@link Slot#getType()}. + */ + final MethodHandle empty; + + /** + * Constructor to which we specify the signature of the slot, with + * the same semantics as {@code MethodType.methodType()}. Every + * {@code MethodHandle} stored in the slot (including + * {@link Signature#empty}) must be of this method type. + * + * @param returnType that the slot functions all return + * @param ptypes types of parameters the slot function takes + */ + Signature(Class returnType, Class... ptypes) { + // The signature is recorded exactly as given + this.type = MethodType.methodType(returnType, ptypes); + // em = λ : throw Util.EMPTY + // (with correct nominal return type for slot) + MethodHandle em = MethodHandles.throwException(returnType, EmptyException.class) + .bindTo(Util.EMPTY); + // empty = λ u v ... : throw Util.EMPTY + // (with correct parameter types for slot) + this.empty = MethodHandles.dropArguments(em, 0, this.type.parameterArray()); + + // Prepare the kind of lookup we should do + Class p0 = ptypes.length > 0 ? ptypes[0] : null; + if (p0 != O) { throw new InterpreterError("Special methods must be instance methods"); } + } + + /** + * Check that no positional or keyword arguments are supplied. This + * is for use when implementing + * {@link #callWrapped(MethodHandle, Object, Object[], String[])}. + * + * @param args positional argument array to be checked + * @param names to be checked + * @throws ArgumentError if positional arguments are given or + * {@code names} is not {@code null} or empty + */ + final protected void checkNoArgs(Object[] args, String[] names) throws ArgumentError { + if (args.length != 0) + throw new ArgumentError(Mode.NOARGS); + else if (names != null && names.length != 0) + throw new ArgumentError(Mode.NOKWARGS); + } + + /** + * Check the number of positional arguments and that no keywords are + * supplied. This is for use when implementing + * {@link #callWrapped(MethodHandle, Object, Object[], String[])}. + * + * @param args positional argument tuple to be checked + * @param expArgs expected number of positional arguments + * @param names to be checked + * @throws ArgumentError if the wrong number of positional arguments + * are given or {@code kwargs} is not {@code null} or empty + */ + final protected void checkArgs(Object[] args, int expArgs, String[] names) + throws ArgumentError { + if (args.length != expArgs) + throw new ArgumentError(expArgs); + else if (names != null && names.length != 0) + throw new ArgumentError(Mode.NOKWARGS); + } + + /** + * Check the number of positional arguments and that no keywords are + * supplied. This is for use when implementing + * {@link #callWrapped(MethodHandle, Object, Object[], String[])}. + * + * @param args positional argument tuple to be checked + * @param minArgs minimum number of positional arguments + * @param maxArgs maximum number of positional arguments + * @param names to be checked + * @throws ArgumentError if the wrong number of positional arguments + * are given or {@code kwargs} is not {@code null} or empty + */ + final protected void checkArgs(Object[] args, int minArgs, int maxArgs, String[] names) + throws ArgumentError { + int n = args.length; + if (n < minArgs || n > maxArgs) + throw new ArgumentError(minArgs, maxArgs); + else if (names != null && names.length != 0) + throw new ArgumentError(Mode.NOKWARGS); + } + + /** + * Invoke the given method handle for the given target {@code self}, + * having arranged the arguments as expected by a slot. We create + * {@code enum} members of {@code Signature} to handle different + * slot signatures, in which this method accepts arguments in a + * generic way (from the interpreter, say) and adapts them to the + * specific needs of a wrapped method. The caller guarantees that + * the wrapped method has the {@code Signature} to which the call is + * addressed. + * + * @param wrapped handle of the method to call + * @param self target object of the method call + * @param args of the method call + * @param names of trailing arguments in {@code args} + * @return result of the method call + * @throws ArgumentError when the arguments ({@code args}, + * {@code names}) are not correct for the {@code Signature} + * @throws Throwable from the implementation of the special method + */ + // Compare CPython wrap_* in typeobject.c + // XXX should be abstract, but only when defined for each + /* abstract */ Object callWrapped(MethodHandle wrapped, Object self, Object[] args, + String[] names) throws ArgumentError, Throwable { + checkNoArgs(args, names); + return wrapped.invokeExact(self); + } + } + + /** + * Helper for {@link Slot#setHandle(PyType, MethodHandle)}, when a + * bad handle is presented. + * + * @param slot that the client attempted to set + * @param mh offered value found unsuitable + * @return exception with message filled in + */ + private static InterpreterError slotTypeError(Slot slot, MethodHandle mh) { + String fmt = "%s not of required type %s for slot %s"; + return new InterpreterError(fmt, mh, slot.getType(), slot); + } + + /** + * Helpers for {@link Slot} and {@link Signature} that can be used + * in the constructors. + */ + private static class Util { + + /* + * This is a class separate from Slot to solve problems with the + * order of static initialisation. The enum constants have to come + * first, and their constructors are called as they are encountered. + * This means that other constants in Slot are not initialised by + * the time the constructors need them. + */ + private static final Lookup LOOKUP = MethodHandles.lookup(); + + /** Single re-used instance of {@code Slot.EmptyException} */ + static final EmptyException EMPTY = new EmptyException(); + + /** + * Helper for {@link Slot} constructors at the point they need a + * handle for their named field within an {@code Operations} class. + */ + static VarHandle slotHandle(Slot slot) { + Class opsClass = Operations.class; + try { + // The field has the same name as the enum + return LOOKUP.findVarHandle(opsClass, slot.name(), MethodHandle.class); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new InterpreterError(e, "creating handle for %s in %s", slot.name(), + opsClass.getSimpleName()); + } + } + + /** + * Helper for {@link Slot} and thereby for call sites providing a + * method handle that throws a Python exception when invoked, with + * an appropriate message for the operation. + *

+ * To be concrete, if the slot is a binary operation, the returned + * handle may throw something like {@code TypeError: + * unsupported operand type(s) for -: 'str' and 'str'}. + * + * @param slot to mention in the error message + * @return a handle that throws the exception + */ + static MethodHandle operandError(Slot slot) { + // The type of the method that creates the TypeError + MethodType errorMT = slot.getType().insertParameterTypes(0, Slot.class) + .changeReturnType(PyException.class); + // Exception thrower with nominal return type of the slot + // thrower = λ(e): throw e + MethodHandle thrower = + MethodHandles.throwException(slot.getType().returnType(), PyException.class); + + try { + /* + * Look up a method f to create the exception, when applied the + * arguments v, w, ... (types matching the slot signature) prepended + * with this slot. We'll only call it if the handle is invoked. + */ + // error = λ(slot, v, w, ...): f(slot, v, w, ...) + MethodHandle error; + switch (slot.signature) { + case UNARY: + // Same name, although signature differs ... + case BINARY: + error = LOOKUP.findStatic(PyNumber.class, "operandError", errorMT); + break; + default: + // error = λ(slot): default(slot, v, w, ...) + error = LOOKUP.findStatic(Util.class, "defaultOperandError", errorMT); + // error = λ(slot, v, w, ...): default(slot) + error = MethodHandles.dropArguments(error, 0, + slot.getType().parameterArray()); + } + + // A handle that creates and throws the exception + // λ(v, w, ...): throw f(slot, v, w, ...) + return MethodHandles.collectArguments(thrower, 0, error.bindTo(slot)); + + } catch (NoSuchMethodException | IllegalAccessException e) { + throw new InterpreterError(e, "creating handle for type error", slot.name()); + } + } + + /** Uninformative exception, mentioning the slot. */ + @SuppressWarnings("unused") // reflected in operandError + static PyException defaultOperandError(Slot op) { + return new TypeError("bad operand type for %s", op.opName); + } + } + + /** + * Lookup from special method name to {@code Slot}, to support + * {@link Slot#forMethodName(String)}. We make this a class of its + * own to obtain a thread-safe lazy initialisation of the + * {@link MethodNameLookup#table} as a singleton, guaranteed to fill + * its table after creation of the Slot enum. + */ + private static class MethodNameLookup { + /** Lookup from special method name to {@code Slot}. */ + static final Map table; + + static { + Slot[] slots = Slot.values(); + HashMap t = new HashMap<>(2 * slots.length); + for (Slot s : slots) { t.put(s.methodName, s); } + table = Collections.unmodifiableMap(t); + } + } +} diff --git a/core/src/main/java/org/python/core/StopIteration.java b/core/src/main/java/org/python/core/StopIteration.java new file mode 100644 index 000000000..850aa6c97 --- /dev/null +++ b/core/src/main/java/org/python/core/StopIteration.java @@ -0,0 +1,39 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code StopIteration} exception. */ +public class StopIteration extends PyException { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + public static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("StopIteration", MethodHandles.lookup()) + .base(PyException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected StopIteration(PyType type, String msg, Object... args) { + super(type, msg, args); + } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public StopIteration(String msg, Object... args) { + this(TYPE, msg, args); + } + + /** + * Constructor specifying no arguments. + */ + public StopIteration() { this(TYPE, ""); } +} diff --git a/core/src/main/java/org/python/core/SystemError.java b/core/src/main/java/org/python/core/SystemError.java new file mode 100644 index 000000000..f94dd61f9 --- /dev/null +++ b/core/src/main/java/org/python/core/SystemError.java @@ -0,0 +1,30 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code SystemError} exception. */ +public class SystemError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type object of Python {@code SystemError} exceptions. */ + @SuppressWarnings("hiding") + public static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("SystemError", MethodHandles.lookup()).base(PyException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected SystemError(PyType type, String msg, Object... args) { super(type, msg, args); } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public SystemError(String msg, Object... args) { this(TYPE, msg, args); } +} diff --git a/core/src/main/java/org/python/core/TypeError.java b/core/src/main/java/org/python/core/TypeError.java new file mode 100644 index 000000000..bcc342bfe --- /dev/null +++ b/core/src/main/java/org/python/core/TypeError.java @@ -0,0 +1,30 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code TypeError} exception. */ +public class TypeError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("TypeError", MethodHandles.lookup()) // + .base(PyException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected TypeError(PyType type, String msg, Object... args) { super(type, msg, args); } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public TypeError(String msg, Object... args) { this(TYPE, msg, args); } +} diff --git a/core/src/main/java/org/python/core/TypeExposer.java b/core/src/main/java/org/python/core/TypeExposer.java new file mode 100644 index 000000000..5f9b8fbec --- /dev/null +++ b/core/src/main/java/org/python/core/TypeExposer.java @@ -0,0 +1,898 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.lang.annotation.Annotation; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.invoke.MethodType; +import java.lang.invoke.WrongMethodTypeException; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.function.Function; + +import org.python.base.InterpreterError; +import org.python.core.Exposed.Deleter; +import org.python.core.Exposed.DocString; +import org.python.core.Exposed.Getter; +import org.python.core.Exposed.Member; +import org.python.core.Exposed.PythonMethod; +import org.python.core.Exposed.PythonStaticMethod; +import org.python.core.Exposed.Setter; +import org.python.core.Operations.BinopGrid; +import org.python.core.PyMemberDescr.Flag; +import org.python.core.Slot.Signature; + +class TypeExposer extends Exposer { + + /** + * Type for which attributes are to be exposed (or {@code null} + * during certain tests). It is referenced (e.g. where we create a + * descriptor), but is not otherwise accessed, since it is + * (necessarily) incomplete at this time. + */ + final PyType type; + + /** + * The table of intermediate descriptions for members. They will + * eventually become descriptors in a built-in object type. Every + * entry here is also a value in {@link Exposer#specs}. + */ + final Set memberSpecs; + + /** + * The table of intermediate descriptions for get-sets. They will + * eventually become descriptors in a built-in object type. Every + * entry here is also a value in {@link Exposer#specs}. + */ + final Set getSetSpecs; + + /** + * Construct the {@code TypeExposer} instance for a particular + * Python type. The {@code type} object is referenced (e.g. in + * intermediate specification objects), but is not otherwise + * accessed, since it is (necessarily) incomplete at this time. It + * will be interrogated as to its implementing classes, where we + * create descriptors, at the point {@link #expose(Class)} is + * called. + * + * @param type being exposed + */ + TypeExposer(PyType type) { + this.type = type; + this.memberSpecs = new TreeSet<>(); + this.getSetSpecs = new TreeSet<>(); + } + + @Override + ScopeKind kind() { return ScopeKind.TYPE; } + + /** + * Build the result from the defining class. + * + * @param definingClass to scan for definitions + */ + void expose(Class definingClass) { + // Scan the defining class for exposed and special methods + scanJavaMethods(definingClass); + // ... and for fields. + scanJavaFields(definingClass); + } + + /** + * For each name having a definition in {@link #specs}, construct + * the attribute and add it to the map passed in. The map is + * normally the dictionary of the type. Attributes may rely on a + * {@code MethodHandle} or {@code VarHandle}, so a lookup object + * must be provided that can create them. + * + * @param dict to which the attributes should be delivered + * @param lookup authorisation to access members + */ + void populate(Map dict, Lookup lookup) { + if (type == null) + // type may only properly be null during certain tests + throw new InterpreterError("Cannot generate descriptors for type 'null'"); + for (Spec spec : specs.values()) { + spec.checkFormation(); + Object attr = spec.asAttribute(type, lookup); + dict.put(spec.name, attr); + } + } + + /** + * Add to {@link #specs}, definitions based on methods found in the + * given class and either annotated for exposure or having the name + * of a special method. + * + * @param defsClass to introspect for methods + * @throws InterpreterError on duplicates or unsupported types + */ + @Override + void scanJavaMethods(Class defsClass) throws InterpreterError { + + // Iterate over methods looking for those to expose + for (Class c : superClasses(defsClass)) { + for (Method m : c.getDeclaredMethods()) { + /* + * Note: method annotations (and special names) are not treated as + * alternatives, to catch exposure of methods by multiple routes. + */ + + // Check for instance method + PythonMethod pm = m.getDeclaredAnnotation(PythonMethod.class); + if (pm != null) { addMethodSpec(m, pm); } + + // Check for static method + PythonStaticMethod psm = m.getDeclaredAnnotation(PythonStaticMethod.class); + if (psm != null) { addStaticMethodSpec(m, psm); } + + // Check for getter, setter, deleter methods + Getter get = m.getAnnotation(Getter.class); + if (get != null) { addGetter(m, get); } + Setter set = m.getAnnotation(Setter.class); + if (set != null) { addSetter(m, set); } + Deleter del = m.getAnnotation(Deleter.class); + if (del != null) { addDeleter(m, del); } + + // If it has a special method name record that definition. + String name = m.getName(); + Slot slot = Slot.forMethodName(name); + if (slot != null) { addWrapperSpec(m, slot); } + } + } + } + + /** + * Process a method annotated as an exposed attribute get method, + * into a specification, and find a {@link GetSetSpec} to the table + * of specifications by name (or add one) to hold it. + * + * @param m method annotated + * @param anno annotation encountered + * @throws InterpreterError on duplicates or unsupported types + */ + private void addGetter(Method m, Getter anno) { + addSpec(m, anno.value(), TypeExposer::castGetSet, GetSetSpec::new, + ms -> getSetSpecs.add(ms), GetSetSpec::addGetter); + } + + /** + * Process a method annotated as an exposed attribute set method, + * into a specification, and find a {@link GetSetSpec} to the table + * of specifications by name (or add one) to hold it. + * + * + * @param m method annotated + * @param anno annotation encountered + * @throws InterpreterError on duplicates or unsupported types + */ + private void addSetter(Method m, Setter anno) { + addSpec(m, anno.value(), TypeExposer::castGetSet, GetSetSpec::new, + ms -> getSetSpecs.add(ms), GetSetSpec::addSetter); + } + + /** + * Process a method annotated as an exposed attribute get method, + * into a specification, and find a {@link GetSetSpec} to the table + * of specifications by name (or add one) to hold it. + * + * + * @param m method annotated + * @param anno annotation encountered + * @throws InterpreterError on duplicates or unsupported types + */ + private void addDeleter(Method m, Deleter anno) { + addSpec(m, anno.value(), TypeExposer::castGetSet, GetSetSpec::new, + ms -> getSetSpecs.add(ms), GetSetSpec::addDeleter); + } + + /** + * Cast an arbitrary {@link Spec} to a {@link GetSetSpec} or return + * {@code null}. + * + * @param spec to cast + * @return {@code spec} or {@code null} + */ + private static GetSetSpec castGetSet(Spec spec) { + return spec instanceof GetSetSpec ? (GetSetSpec)spec : null; + } + + /** + * Process a method that matches a slot name to a descriptor + * specification and add it to the table of specifications by name. + * + * @param meth method annotated + * @param slot annotation encountered + * @throws InterpreterError on duplicates or unsupported types + */ + private void addWrapperSpec(Method meth, Slot slot) throws InterpreterError { + + // For clarity, name lambda expression for cast + Function cast = + // Test and cast a found Spec to MethodSpec + spec -> spec instanceof WrapperSpec ? (WrapperSpec)spec : null; + // Now use the generic create/update + addSpec(meth, slot.methodName, cast, (String ignored) -> new WrapperSpec(slot), ms -> {}, + WrapperSpec::add); + } + + /** + * Add to {@link #specs}, definitions of fields found in the given + * class and annotated for exposure. + * + * @param defsClass to introspect for field definitions + * @throws InterpreterError on duplicates or unsupported types + */ + void scanJavaFields(Class defsClass) throws InterpreterError { + // Iterate over fields looking for the relevant annotations + for (Class c : superClasses(defsClass)) { + for (Field f : c.getDeclaredFields()) { + Member m = f.getDeclaredAnnotation(Member.class); + if (m != null) { addMemberSpec(f, m); } + } + } + } + + /** + * Process an annotated field, that describes an exposed attribute, + * into a specification, and add it to the table of specifications + * by name. + * + * @param f field annotated + * @param anno annotation encountered + * @throws InterpreterError on duplicates or unsupported types + */ + void addMemberSpec(Field f, Member anno) throws InterpreterError { + + // The name is as annotated or the "natural" one + String name = anno.value(); + if (name == null || name.length() == 0) + name = f.getName(); + + /* + * XXX we follow the same pattern as with other spec types, in + * accumulating multiple definitions in a list. Repeat definition is + * almost certainly an error, and at this time, MemberSpec.add + * treats it as such. This makes Member annotations incompatible + * with the idea of multiple accepted implementations of a type. + */ + // Find any existing definition + Spec spec = specs.get(name); + MemberSpec memberSpec; + if (spec == null) { + // A new entry is needed + memberSpec = new MemberSpec(name); + specs.put(memberSpec.name, memberSpec); + memberSpecs.add(memberSpec); + } else if (spec instanceof MemberSpec) { + // Existing entry will be updated + memberSpec = (MemberSpec)spec; + } else { + // Existing entry is not compatible + memberSpec = new MemberSpec(name); + throw duplicateError(name, f, memberSpec, spec); + } + // Add the field, processing the additional properties + memberSpec.add(f, anno.optional(), anno.readonly()); + } + + @Override + public String toString() { return "TypeExposer [type=" + type + "]"; } + + /** + * A specialisation of {@link Exposer.Spec} to describe a named, + * built-in data-like object, during the exposure process. + */ + static class MemberSpec extends Exposer.Spec { + + /** Collects the fields declared (should be just one). */ + final List fields; + + /** + * The member disappears when the field is {@code null}. This is + * always {@code false} for primitive types. + */ + boolean optional; + /** The member may be read but not written or deleted. */ + boolean readonly; + + MemberSpec(String name) { + super(name, ScopeKind.TYPE); + this.fields = new ArrayList<>(1); + } + + /** + * Add a field implementing this member to the collection. + * + * @param field to add to {@link #fields} + * @param optional member is optional + * @param readonly member is read only + */ + void add(Field field, boolean optional, boolean readonly) { + + // Final overrides readonly=false + int modifiers = field.getModifiers(); + readonly |= (modifiers & Modifier.FINAL) != 0; + + // Disallow static (in Java) + boolean javaStatic = (modifiers & Modifier.STATIC) != 0; + if (javaStatic) { throw new InterpreterError(CANNOT_BE_JAVA_STATIC, getJavaName()); } + + // Disallow optional if primitive (in Java) + if (optional) { + if (field.getType().isPrimitive()) { + throw new InterpreterError(CANNOT_BE_OPTIONAL, "Primitive", getJavaName()); + } else if (readonly) { + throw new InterpreterError(CANNOT_BE_OPTIONAL, "Read-only", getJavaName()); + } + } + + // Add the only definition (do we actually need a list?) + fields.add(field); + if (fields.size() != 1) { throw duplicateError(name, field, this, this); } + + // Finally insert the allowed combination + this.optional = optional; + this.readonly = readonly; + + // There may be a @DocString annotation + DocString docAnno = field.getAnnotation(DocString.class); + if (docAnno != null) { doc = docAnno.value(); } + } + + private static final String CANNOT_BE_JAVA_STATIC = + "The definition of '%s' cannot be Java static because it is a Python member"; + private static final String CANNOT_BE_OPTIONAL = "%s field '%s' cannot be optional"; + + @Override + Class annoClass() { return Member.class; } + + /** + * {@inheritDoc} + *

+ * In a type, the attribute must be represented by a descriptor for + * the Python member attribute from this specification. + *

+ * Note that specification may have collected multiple Java + * definitions of the same name. This method checks there is exactly + * one. + * + * @return descriptor for access to the methods + * @throws InterpreterError if the method type is not supported + */ + @Override + PyMemberDescr asAttribute(PyType objclass, Lookup lookup) { + EnumSet flags = EnumSet.noneOf(Flag.class); + if (readonly) { flags.add(Flag.READONLY); } + if (optional) { flags.add(Flag.OPTIONAL); } + return PyMemberDescr.forField(objclass, name, fields.get(0), lookup, flags, doc); + } + + @Override + public void checkFormation() throws InterpreterError {} + + /** @return a name designating the field */ + @Override + String getJavaName() { + StringBuilder b = new StringBuilder(64); + if (!fields.isEmpty()) { + // It shouldn't matter, but take the last added + Field field = fields.get(fields.size() - 1); + b.append(field.getDeclaringClass().getSimpleName()); + b.append('.'); + b.append(field.getName()); + } else { + // Take the name from the Spec instead + b.append(name); + } + return b.toString(); + } + + /** @return the Java declaration for the field */ + String getJavaDeclaration() { + StringBuilder b = new StringBuilder(64); + if (!fields.isEmpty()) { + // It shouldn't matter, but take the last added + Field field = fields.get(fields.size() - 1); + b.append(field.getType().getSimpleName()); + b.append(' '); + b.append(field.getName()); + } + return b.toString(); + } + + @Override + public String toString() { + return String.format("%s(%s [%s])", getClass().getSimpleName(), name, + getJavaDeclaration()); + } + } + + /** + * A specialisation of {@link Exposer.Spec} to describe a named, + * built-in data-like object, during the exposure process. + */ + static class GetSetSpec extends BaseMethodSpec { + + /** Collects the getters declared (often just one). */ + final List getters; + /** Collects the setters declared (often just one). */ + final List setters; + /** Collects the deleters declared (often just one). */ + final List deleters; + /** Java class of attribute from setter parameter. */ + Class klass = Object.class; + + GetSetSpec(String name) { + super(name, ScopeKind.TYPE); + this.getters = methods; + this.setters = new ArrayList<>(1); + this.deleters = new ArrayList<>(1); + } + + /** + * The attribute may not be set or deleted. + * + * @return true if set and delete are absent + */ + boolean readonly() { return setters.isEmpty() && deleters.isEmpty(); } + + /** + * The attribute may be deleted. + * + * @return true if delete is present + */ + boolean optional() { return !deleters.isEmpty(); } + + /** + * Add a getter to the collection. + * + * @param method to add to {@link #getters} + */ + void addGetter(Method method) { + // Add to list of methods + getters.add(method); + // There may be a @DocString annotation + maybeAddDoc(method); + } + + /** + * Add a setter to the collection. + * + * @param method to add to {@link #setters} + */ + void addSetter(Method method) { + // Add to list of methods + setters.add(method); + // There may be a @DocString annotation + maybeAddDoc(method); + // Process parameters of the Setter + determineAttrType(method); + } + + /** + * Add a deleter to the collection. + * + * @param method to add to {@link #deleters} + */ + void addDeleter(Method method) { + // Add to list of methods + deleters.add(method); + // There may be a @DocString annotation + maybeAddDoc(method); + } + + /** + * Deduce the attribute type from the (raw) set method signature. We + * do this in order to give a sensible {@link TypeError} when a cast + * fails for the {@link PyGetSetDescr#__set__} operation. + * + * @param method annotated with a {@code Setter} + */ + private void determineAttrType(Method method) { + // Save class of value accepted (if signature is sensible) + int modifiers = method.getModifiers(); + int v = (modifiers & Modifier.STATIC) != 0 ? 1 : 0; + Class[] paramClasses = method.getParameterTypes(); + if (paramClasses.length == v + 1) { + Class valueClass = paramClasses[v]; + if (valueClass == klass) { + // No change + } else if (klass.isAssignableFrom(valueClass)) { + // The parameter is more specific than klass + klass = valueClass; + } + } + } + + @Override + Object asAttribute(PyType objclass, Lookup lookup) throws InterpreterError { + if (objclass.acceptedCount == 1) + return createDescrSingle(objclass, lookup); + else + return createDescrMultiple(objclass, lookup); + } + + @Override + public void checkFormation() throws InterpreterError {} + + private Object createDescrSingle(PyType objclass, Lookup lookup) { + // TODO Stop-gap: do general case first + return createDescrMultiple(objclass, lookup); + } + + /** + * Create a {@code PyGetSetDescr} from this specification. Note that + * a specification collects all the methods as declared with this + * name (in separate getter, setter and deleter lists). Normally + * there is at most one of each. + *

+ * Normally also, a Python type has just one Java implementation. If + * a type has N accepted implementations, there should be + * definitions of the getter, setter, and deleter methods, if + * defined at all, applicable to each accepted implementation. This + * method matches defined methods to the supported implementations. + * + * @param objclass Python type that owns the descriptor + * @param lookup authorisation to access fields + * @return descriptor for access to the field + * @throws InterpreterError if the method type is not supported + */ + private PyGetSetDescr createDescrMultiple(PyType objclass, Lookup lookup) + throws InterpreterError { + + // Handles on implementation methods + MethodHandle[] g, s = null, d = null; + g = unreflect(objclass, lookup, PyGetSetDescr.GETTER, getters); + if (!readonly()) { + // We can set this attribute + s = unreflect(objclass, lookup, PyGetSetDescr.SETTER, setters); + if (optional()) { + // We can delete this attribute + d = unreflect(objclass, lookup, PyGetSetDescr.DELETER, deleters); + } + } + + return new PyGetSetDescr.Multiple(objclass, name, g, s, d, doc, klass); + } + + private MethodHandle[] unreflect(PyType objclass, Lookup lookup, MethodType mt, + List methods) throws InterpreterError { + + /* + * In the first stage, translate each method to a handle. There + * could be any number of candidates in the defining classes. There + * may be a method for each accepted implementation of the type , or + * a method may match more than one (e.g. Number matching Long and + * Integer). We build a list with the more type-specific handles (in + * the first argument) before the less type-specific. + */ + LinkedList candidates = new LinkedList<>(); + for (Method m : methods) { + // Convert m to a handle (if L args and accessible) + try { + MethodHandle mh = lookup.unreflect(m); + addOrdered(candidates, mh); + } catch (IllegalAccessException e) { + throw cannotGetHandle(m, e); + } + } + + /* + * + * We will try to create a handle for each implementation of an + * instance method. + */ + final int N = objclass.acceptedCount; + MethodHandle[] method = new MethodHandle[N]; + + // Fill the method array with matching method handles + for (int i = 0; i < N; i++) { + Class acceptedClass = objclass.classes[i]; + /* + * Fill method[i] with the method handle where the first parameter + * is the most specific match for class accepted[i]. + */ + // Try the candidate method until one matches + for (MethodHandle mh : candidates) { + MethodType mt1 = mh.type(); + if (mt1.parameterType(0).isAssignableFrom(acceptedClass)) { + /* + * Each sub-type of MethodDef handles callMethod(self, args, kwargs) + * in its own way, and must prepare the arguments of the generic + * method handle to match. + */ + try { + // XXX not yet supporting Java args + method[i] = mh.asType(mt); + } catch (WrongMethodTypeException wmte) { + // Wrong number of args or cannot cast. + throw methodSignatureError(objclass, mh); + } + break; + } + } + + // We should have a value in each of method[] + if (method[i] == null) { + PyGetSetDescr.Type dt = PyGetSetDescr.Type.fromMethodType(mt); + throw new InterpreterError(ATTR_NOT_IMPL, dt, name, objclass.name, + objclass.classes[i]); + } + } + + /* + * There are multiple definitions so use the array form of built-in + * method. This is the case for types that have multiple accepted + * implementations and methods on them that are not static or + * "Object self". + */ + return method; + } + + private static String ATTR_NOT_IMPL = + "%s of attribute '%s' of '%s' objects is not defined for implementation %s"; + + @Override + Class annoClass() { + // Try annotations in order of popularity + if (getters.size() > 0) + ; // -> Getter + else if (setters.size() > 0) + return Setter.class; + else if (deleters.size() > 0) + return Deleter.class; + // Or by default, claim to have a Getter + return Getter.class; + } + + @Override + public String toString() { + return String.format("%s(%s[%d,%d,%d])", getClass().getSimpleName(), name, + getters.size(), setters.size(), deleters.size()); + } + } + + /** + * Specification in which we assemble information about a Python + * special method in advance of creating a special method + * descriptor. + */ + static class WrapperSpec extends BaseMethodSpec { + + /** The special method being defined. */ + final Slot slot; + + WrapperSpec(Slot slot) { + super(slot.methodName, ScopeKind.TYPE); + this.slot = slot; + } + + @Override + Object asAttribute(PyType objclass, Lookup lookup) throws InterpreterError { + /* + * We will try to create a handle for each implementation of a + * special (instance) method. See corresponding logic in + * Slot.setSlot(Operations, Object) + */ + return createDescrForInstanceMethod(objclass, lookup); + } + + @Override + public void checkFormation() throws InterpreterError { + // XXX Check the signature instead of in createDescr? + } + + @Override + void add(Method method) { super.add(method); } + + @Override + Class annoClass() { + // Special methods recognised by name, so no annotation + return Annotation.class; + } + + /** + * {@inheritDoc} + *

+ * In this case, we name the slot function, as there is no + * annotation. + */ + @Override + protected String annoClassName() { return slot.toString(); } + + /** + * Create a {@code PyWrapperDescr} from this specification. Note + * that a specification describes the methods as declared, and that + * there may be any number. This method matches them to the + * supported implementations. + * + * @param objclass Python type that owns the descriptor + * @param lookup authorisation to access fields + * @return descriptor for access to the field + * @throws InterpreterError if the method type is not supported + */ + private PyWrapperDescr createDescrForInstanceMethod(PyType objclass, Lookup lookup) + throws InterpreterError { + + // Acceptable methods can be coerced to this signature + MethodType slotType = slot.getType(); + final int L = slotType.parameterCount(); + assert L >= 1; + + /* + * There could be any number of candidates in the implementation. An + * implementation method could match multiple accepted + * implementations of the type (e.g. Number matching Long and + * Integer). + */ + LinkedList candidates = new LinkedList<>(); + for (Method m : methods) { + // Convert m to a handle (if L args and accessible) + try { + MethodHandle mh = lookup.unreflect(m); + if (mh.type().parameterCount() == L) + addOrdered(candidates, mh); + } catch (IllegalAccessException e) { + throw cannotGetHandle(m, e); + } + } + + /* + * We will try to create a handle for each implementation of an + * instance method, but only one handle for static/class methods + * (like __new__). See corresponding logic in + * Slot.setSlot(Operations, Object) + */ + final int N = objclass.acceptedCount; + MethodHandle[] wrapped = new MethodHandle[N]; + + // Fill the wrapped array with matching method handles + for (int i = 0; i < N; i++) { + Class acceptedClass = objclass.classes[i]; + /* + * Fill wrapped[i] with the method handle where the first parameter + * is the most specific match for class accepted[i]. + */ + // Try the candidate method until one matches + for (MethodHandle mh : candidates) { + if (mh.type().parameterType(0).isAssignableFrom(acceptedClass)) { + try { + // must have the expected signature + checkCast(mh, slotType); + wrapped[i] = mh.asType(slotType); + break; + } catch (WrongMethodTypeException wmte) { + // Wrong number of args or cannot cast. + throw methodSignatureError(objclass, mh); + } + } + } + + // We should have a value in each of wrapped[] + if (wrapped[i] == null) { + throw new InterpreterError("'%s.%s' not defined for %s", objclass.name, + slot.methodName, objclass.classes[i]); + } + } + + if (N == 1) + /* + * There is only one definition so use the simpler form of + * slot-wrapper. This is the frequent case. + */ + return new PyWrapperDescr.Single(objclass, slot, wrapped[0]); + else + /* + * There are multiple definitions so use the array form of + * slot-wrapper. This is the case for types that have multiple + * accepted implementations and methods on them that are not static + * or "Object self". + */ + return new PyWrapperDescr.Multiple(objclass, slot, wrapped); + } + + /** + * Throw a {@code WrongMethodTypeException} if the offered method + * (e.g. a special method) cannot be called with arguments matching + * the specified type. This makes up for the fact that + * {@code MethodHandle.asType} does not do much checking. This way, + * we get an error at specification time, not run-time. + * + * @param mh handle of method offered + * @param slotType required type + * @throws WrongMethodTypeException if cannot cast + */ + private static void checkCast(MethodHandle mh, MethodType slotType) + throws WrongMethodTypeException { + MethodType mt = mh.type(); + int n = mt.parameterCount(); + if (n != slotType.parameterCount()) + throw new WrongMethodTypeException(); + boolean ok = slotType.returnType().isAssignableFrom(mt.returnType()); + if (!ok) { throw new WrongMethodTypeException(); } + for (int i = 0; i < n; i++) { + ok = slotType.parameterType(i).isAssignableFrom(mt.parameterType(i)); + if (!ok) { throw new WrongMethodTypeException(); } + } + } + } + + /** + * Create a table of {@code MethodHandle}s from binary operations + * defined in the given class, on behalf of the type given. This + * table is 3-dimensional, being indexed by the slot of the method + * being defined, which must be a binary operation, and the indices + * of the operand classes in the type. These handles are used + * privately by the type to create call sites. Although the process + * of creating them is similar to making wrapper descriptors, these + * structures do not become exposed as descriptors. + * + * @param lookup authorisation to access methods + * @param binops to introspect for binary operations + * @param type to which these descriptors apply + * @return attributes defined (in the order first encountered) + * @throws InterpreterError on duplicates or unsupported types + */ + static Map binopTable(Lookup lookup, Class binops, PyType type) + throws InterpreterError { + + // Iterate over methods looking for the relevant annotations + Map defs = new HashMap<>(); + + for (Method m : binops.getDeclaredMethods()) { + // If it is a special method, record the definition. + String name = m.getName(); + Slot slot = Slot.forMethodName(name); + if (slot != null && slot.signature == Signature.BINARY) { + binopTableAdd(defs, slot, m, lookup, binops, type); + } + } + + // Check for nulls in the table. + for (BinopGrid grid : defs.values()) { grid.checkFilled(); } + + return defs; + } + + /** + * Add a method handle to the table, verifying that the method type + * produced is compatible with the {@link #slot}. + * + * @param defs the method table to add to + * @param slot being matched + * @param m implementing method + * @param lookup authorisation to access fields + * @param binops class defining class-specific binary operations + * @param type to which these belong + */ + private static void binopTableAdd(Map defs, Slot slot, Method m, Lookup lookup, + Class binops, PyType type) { + + // Get (or create) the table for this slot + BinopGrid def = defs.get(slot); + if (def == null) { + // A new special method has been encountered + def = new BinopGrid(slot, type); + defs.put(slot, def); + } + + try { + // Convert the method to a handle + def.add(lookup.unreflect(m)); + } catch (IllegalAccessException | WrongMethodTypeException e) { + throw new InterpreterError(e, "ill-formed or inaccessible binary op '%s'", m); + } + } + +} diff --git a/core/src/main/java/org/python/core/UnicodeError.java b/core/src/main/java/org/python/core/UnicodeError.java new file mode 100644 index 000000000..2a059dbed --- /dev/null +++ b/core/src/main/java/org/python/core/UnicodeError.java @@ -0,0 +1,37 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code UnicodeError} exception. */ +class UnicodeError extends ValueError { + + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("UnicodeError", MethodHandles.lookup()).base(PyException.TYPE)); + + /* + * PyUnicodeError should have 5 exposed attributes, although they + * are only set by its sub-classes. See where CPython exceptions.c + * defines UnicodeEncodeError, UnicodeDecodeError and + * UnicodeTranslateError. + */ + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected UnicodeError(PyType type, String msg, Object... args) { super(type, msg, args); } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public UnicodeError(String msg, Object... args) { this(TYPE, msg, args); } +} diff --git a/core/src/main/java/org/python/core/ValueError.java b/core/src/main/java/org/python/core/ValueError.java new file mode 100644 index 000000000..966786dba --- /dev/null +++ b/core/src/main/java/org/python/core/ValueError.java @@ -0,0 +1,34 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code ValueError} exception. */ +public class ValueError extends PyException { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = PyType.fromSpec( + new PyType.Spec("ValueError", MethodHandles.lookup()) + .base(PyException.TYPE)); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected ValueError(PyType type, String msg, Object... args) { + super(type, msg, args); + } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public ValueError(String msg, Object... args) { + this(TYPE, msg, args); + } +} diff --git a/core/src/main/java/org/python/core/ZeroDivisionError.java b/core/src/main/java/org/python/core/ZeroDivisionError.java new file mode 100644 index 000000000..4e152bd90 --- /dev/null +++ b/core/src/main/java/org/python/core/ZeroDivisionError.java @@ -0,0 +1,35 @@ +package org.python.core; + +import java.lang.invoke.MethodHandles; + +/** The Python {@code ZeroDivisionError} exception. */ +class ZeroDivisionError extends ArithmeticError { + private static final long serialVersionUID = 1L; + + /** The type of Python object this class implements. */ + static final PyType TYPE = + PyType.fromSpec(new PyType.Spec("ZeroDivisionError", + MethodHandles.lookup())); + + /** + * Constructor for sub-class use specifying {@link #type}. + * + * @param type object being constructed + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + protected ZeroDivisionError(PyType type, String msg, + Object... args) { + super(type, msg, args); + } + + /** + * Constructor specifying a message. + * + * @param msg a Java format string for the message + * @param args to insert in the format string + */ + public ZeroDivisionError(String msg, Object... args) { + this(TYPE, msg, args); + } +} diff --git a/core/src/main/java/org/python/core/codecs.java b/core/src/main/java/org/python/core/codecs.java new file mode 100644 index 000000000..6030bdbca --- /dev/null +++ b/core/src/main/java/org/python/core/codecs.java @@ -0,0 +1,24 @@ +package org.python.core; + +import org.python.base.MissingFeature; + +/** Stop-gap definition to satisfy references in the project. */ +class codecs { + + static int insertReplacementAndGetResume(StringBuilder v, String errors, String string, + String str, int loopStart, int i, String string2) { + // TODO Auto-generated method stub + return 0; + } + + static String PyUnicode_EncodeASCII(String s, int length, Object object) { + // TODO Auto-generated method stub + return null; + } + + static Object encoding_error(String errors, String encoding, String toEncode, int start, + int end, String reason) { + throw new MissingFeature("codecs.java"); + } + +} diff --git a/core/src/main/java/org/python/core/package-info.java b/core/src/main/java/org/python/core/package-info.java new file mode 100644 index 000000000..ad6c28f0e --- /dev/null +++ b/core/src/main/java/org/python/core/package-info.java @@ -0,0 +1,5 @@ +/** + * This package contains the classes that implement the objects of the Python + * language language. + */ +package org.python.core; diff --git a/core/src/main/java/org/python/core/stringlib/AbstractIntArrayBuilder.java b/core/src/main/java/org/python/core/stringlib/AbstractIntArrayBuilder.java new file mode 100644 index 000000000..6317b408e --- /dev/null +++ b/core/src/main/java/org/python/core/stringlib/AbstractIntArrayBuilder.java @@ -0,0 +1,204 @@ +package org.python.core.stringlib; + +import java.util.Iterator; +import java.util.ListIterator; + +import org.python.core.PySequence; + +/** + * The base of two classes that that provide elastic buffers of + * integer values, somewhat like the + * {@code java.lang.StringBuilder}, but for arrays of integers. + * There is an abstract base for arrays to which a client appends, + * and one for arrays to which the client prepends new values. + *

+ * The particular virtue of these classes is that, if the ultimate + * size of the built array may be known in advance, then the result + * may be returned without a copy, using {@link #take()}. + */ +public abstract class AbstractIntArrayBuilder { + + /** An empty array of int for builder initial state, etc.. */ + protected static final int[] EMPTY_INT_ARRAY = new int[0]; + + /** Number of elements by default. */ + protected static final int MINSIZE = 16; + + /** + * The number of elements currently + * + * @return the number of elements currently. + */ + public abstract int length(); + + /** + * The maximum value stored. + * + * @implNote The motivation for this is to know the range of code + * point values when representing a string. An over-estimate + * would be ok.) + * + * @return The maximum int stored. + */ + public abstract int max(); + + /** + * Ensure there is room for another {@code n} elements. In general, + * this will mean allocating new storage (of a carefully-chosen size + * ≥ {@code n+length()}) and copying the existing contents to it. + * + * @param n to make additional space for + */ + protected abstract void ensure(int n); + + /** + * An array of the elements in the buffer (not modified by methods + * on this object hereafter). + * + * @return the elements in the buffer + */ + protected abstract int[] value(); + + /** + * Provide the contents as an array and reset the builder to empty. + * (This is a "destructive read".) The return type is Object in + * order that sub-classes may define the specific type (for example, + * {@code int[]} or {@code byte[]}). + * + * @implNote In many implementations, if the client has chosen an + * initial capacity exactly matching {@link #length()} at the + * time this method is called, then the result may be returned + * without a copy. (This must be the last remaining reference to + * the array originally allocated: it must be impossible for the + * the builder to re-use it.) Otherwise, returning a new array + * is inevitable. + * + * @return the contents as a Python {@code str} + */ + public abstract Object take(); + + /** + * Provide the contents as a Java {@code String} (non-destructively, + * but inefficiently). + */ + @Override + public String toString() { + int[] v = value(); + return new String(v, 0, v.length); + } + + /** + * Abstract base of integer array builders that append to their + * content, building the result left to right. Implementations need + * only define {@link #appendUnchecked(int)}. + */ + public static abstract class Forward extends AbstractIntArrayBuilder { + /** + * Append one element without ensuring that there is space. This + * method is for use when it is known that there is space for the + * element, for example, inside a loop before which when + * {@link #ensure(int)} has been called. + * + * @param v to append + */ + abstract protected void appendUnchecked(int v); + + /** + * Append one element. + * + * @param v to append + * @return this builder + */ + public Forward append(int v) { + ensure(1); + appendUnchecked(v); + return this; + } + + /** + * Append all the elements from a sequence. + * + * @param seq from which to take items + * @return this builder + */ + public Forward append(PySequence.OfInt seq) { + // Make sure there is room: do it once + int n = seq.length(); + ensure(n); + // Fill (forwards) from the current position + for (int i = 0; i < n; i++) { appendUnchecked(seq.getInt(i)); } + return this; + } + + /** + * Append all the elements available from an iterator. + * + * @param iter from which to take items + * @return this builder + */ + public Forward append(Iterator iter) { + // We don't know what capacity to ensure. + while (iter.hasNext()) { append(iter.next()); } + return this; + } + } + + /** + * Abstract base of integer array builders that prepend to their + * content, building the result right to left. Implementations need + * only define {@link #prependUnchecked(int)}. + */ + public static abstract class Reverse extends AbstractIntArrayBuilder { + /** + * Prepend one element without ensuring that there is space. This + * method is for use when it is known that there is space for the + * element, for example, inside a loop before which when + * {@link #ensure(int)} has been called. + * + * @param v to prepend + */ + protected abstract void prependUnchecked(int v); + + /** + * Prepend one element. + * + * @param v to prepend + * @return this builder + */ + public Reverse prepend(int v) { + ensure(1); + prependUnchecked(v); + return this; + } + + /** + * Prepend all the elements from a sequence. The sequence is not + * reversed by this: it is prepended the right way around. After the + * call {@code seq[0]} is first in the buffer. + * + * @param seq from which to take items + * @return this builder + */ + public Reverse prepend(PySequence.OfInt seq) { + // Make sure there is room: do it once + int n = seq.length(); + ensure(n); + // Fill (backwards) from the current position + while (n > 0) { prependUnchecked(seq.getInt(--n)); } + return this; + } + + /** + * Prepend all the elements available from an iterator, working + * backwards with {@code iter.previous()}. + * + * @param iter from which to take items + * @return this builder + */ + public Reverse prepend(ListIterator iter) { + // We don't know what capacity to ensure. + while (iter.hasPrevious()) { prepend(iter.previous()); } + return this; + } + } +} diff --git a/core/src/main/java/org/python/core/stringlib/ByteArrayBuilder.java b/core/src/main/java/org/python/core/stringlib/ByteArrayBuilder.java new file mode 100644 index 000000000..af7a7af78 --- /dev/null +++ b/core/src/main/java/org/python/core/stringlib/ByteArrayBuilder.java @@ -0,0 +1,158 @@ +package org.python.core.stringlib; + +/** + * An elastic buffer of byte values, somewhat like the + * {@code java.lang.StringBuilder}, but for arrays of bytes. The + * client appends data and may finally take the built array, often + * without copying the data. + */ +public final class ByteArrayBuilder extends AbstractIntArrayBuilder.Forward { + static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; + private byte[] value; + private int len = 0; + private byte max = 0; + + /** + * Create an empty buffer of a defined initial capacity. + * + * @param capacity initially + */ + public ByteArrayBuilder(int capacity) { value = new byte[capacity]; } + + /** Create an empty buffer of a default initial capacity. */ + public ByteArrayBuilder() { + value = EMPTY_BYTE_ARRAY; + } + + @Override + protected void appendUnchecked(int v) { + value[len++] = (byte)v; + max |= v; + } + + @Override + public int length() { return len; } + + @Override + public int max() { return 0xff & max; } + + /** + * Ensure there is room for another {@code n} elements. + * + * @param n to make space for + */ + @Override + protected void ensure(int n) { + if (len + n > value.length) { + int newSize = Math.max(value.length * 2, MINSIZE); + byte[] newValue = new byte[newSize]; + System.arraycopy(value, 0, newValue, 0, len); + value = newValue; + } + } + + @Override + protected int[] value() { + int[] v = new int[len]; + for (int i = 0; i < len; i++) { v[i] = 0xff & value[i]; } + return v; + } + + @Override + public byte[] take() { + byte[] v; + if (len == value.length) { + // The array is exactly filled: use it without copy. + v = value; + value = EMPTY_BYTE_ARRAY; + } else { + // The array is partly filled: copy it and re-use it. + v = new byte[len]; + System.arraycopy(value, 0, v, 0, len); + } + len = 0; + max = 0; + return v; + } + + /** + * Append the 2 bytes of a {@code short} value big-endian. + * + * @param v the value + */ + public void appendShortBE(int v) { + ensure(2); + appendUnchecked(v >>> 8); + appendUnchecked(v); + } + + /** + * Append the 2 bytes of a {@code short} value little-endian. + * + * @param v the value + */ + public void appendShortLE(int v) { + ensure(4); + appendUnchecked(v); + appendUnchecked(v >>> 8); + } + + /** + * Append the 4 bytes of a {@code int} value big-endian. + * + * @param v the value + */ + public void appendIntBE(int v) { + ensure(4); + appendUnchecked(v >>> 24); + appendUnchecked(v >>> 16); + appendUnchecked(v >>> 8); + appendUnchecked(v); + } + + /** + * Append the 4 bytes of a {@code int} value little-endian. + * + * @param v the value + */ + public void appendIntLE(int v) { + ensure(4); + appendUnchecked(v); + appendUnchecked(v >>> 8); + appendUnchecked(v >>> 16); + appendUnchecked(v >>> 24); + } + + /** + * Append the 8 bytes of a {@code long} value big-endian. + * + * @param v the value + */ + public void appendLongBE(long v) { + appendIntBE((int)(v >>> 32)); + appendIntBE((int)v); + } + + /** + * Append the 8 bytes of a {@code long} value little-endian. + * + * @param v the value + */ + public void appendLongLE(long v) { + appendIntLE((int)v); + appendIntLE((int)(v >>> 32)); + } + + /** + * Append a specified number of bytes from a given offset in a + * {@code byte} array. + * + * @param b the value + * @param off index of the first byte written + * @param n number of bytes to write + */ + public void append(byte[] b, int off, int n) { + ensure(n); + for (int i = off; n > 0; n--) { appendUnchecked(b[i++]); } + } +} diff --git a/core/src/main/java/org/python/core/stringlib/FieldNameIterator.java b/core/src/main/java/org/python/core/stringlib/FieldNameIterator.java new file mode 100644 index 000000000..39b36d9db --- /dev/null +++ b/core/src/main/java/org/python/core/stringlib/FieldNameIterator.java @@ -0,0 +1,174 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core.stringlib; + +import java.lang.invoke.MethodHandles; + +import org.python.base.MissingFeature; +import org.python.core.CraftedPyObject; +import org.python.core.PyTuple; +import org.python.core.PyType; + + +/** + * This class is an implementation of the iterator object returned + * by {@code string.formatter_field_name_split()}. The function + * {@code formatter_field_name_split()} returns a pair (tuple) + * consisting of a head element and an instance of this iterator. + * The constructor of this class effectively implements that + * function, since as well as being the iterator (second member), + * the object has an extra method {@link #head()} to return the + * required first member of the pair. + */ +public class FieldNameIterator implements CraftedPyObject { + + /** The Python type {@code fieldnameiterator} of this class. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("fieldnameiterator", MethodHandles.lookup()) + .flagNot(PyType.Flag.BASETYPE)); + + /** The UTF-16 string from which elements are being returned. */ + private final String markup; + /** + * True if originally given a PyString (so must return PyString not + * PyUnicode). + */ + // XXX re-think for Jython 3 when we have PyBytes + private final boolean bytes; + /** How far along that string we are. */ + private int index; + private Object head; + + /** + * Create an iterator for the parts of this field name (and extract + * the head name field, which may be an empty string). According to + * the Python Standard Library documentation, a replacement field + * name has the structure: + * + *

+     * field_name        ::=  arg_name ("." attribute_name | "[" element_index "]")*
+     * arg_name          ::=  [identifier | integer]
+     * attribute_name    ::=  identifier
+     * element_index     ::=  integer | index_string
+     * 
+ * + * The object is used from PyUnicode and from PyString, and we have + * to signal which it is, so that returned values may match in type. + * + * @param fieldName the field name as UTF-16 + * @param bytes true if elements returned should be bytes + */ + public FieldNameIterator(String fieldName, boolean bytes) { + this.markup = fieldName; + this.bytes = bytes; + this.index = nextDotOrBracket(fieldName); + String headStr = fieldName.substring(0, index); + try { + this.head = Integer.parseInt(headStr); + } catch (NumberFormatException e) { + this.head = headStr; + } + } + + /** + * Create an iterator for the parts of this field name (and extract + * the head name field, which may be an empty string). + * + * @param fieldName to parse + */ + public FieldNameIterator(String fieldName) { + /* + * XXX Extract UTF-16 string but remember whether str or bytes + * should result. + */ + this(fieldName, false); + } + + @Override + public PyType getType() { return TYPE; } + + @SuppressWarnings("unused") + private final Object __iter__() { return this; } + + @SuppressWarnings("unused") + private final Object __next__() { + Chunk chunk = nextChunk(); + if (chunk == null) { throw new MissingFeature("StopIteration"); } + return new PyTuple(chunk.is_attr, chunk.value); + } + + private int nextDotOrBracket(String markup) { + int dotPos = markup.indexOf('.', index); + if (dotPos < 0) { dotPos = markup.length(); } + int bracketPos = markup.indexOf('[', index); + if (bracketPos < 0) { bracketPos = markup.length(); } + return Math.min(dotPos, bracketPos); + } + + /** + * Return the head object from the field name, as {@code int} or + * {@code str}. + * + * @return the isolated head object from the field name. + */ + public Object head() { return head; } + + /** + * Return the next "chunk" of the field name (or return null if + * ended). A chunk is a 2-tuple describing: + *
    + *
  1. whether the chunk is an attribute name,
  2. + *
  3. the name or number (as a String or Integer) for accessing the + * value.
  4. + *
+ * + * @return next element of the field name + */ + public Chunk nextChunk() { + if (index == markup.length()) { return null; } + Chunk chunk = new Chunk(); + if (markup.charAt(index) == '[') { + parseItemChunk(chunk); + } else if (markup.charAt(index) == '.') { + parseAttrChunk(chunk); + } else { + throw new IllegalArgumentException( + "Only '.' or '[' may follow ']' in format field specifier"); + } + return chunk; + } + + private void parseItemChunk(Chunk chunk) { + chunk.is_attr = false; + int endBracket = markup.indexOf(']', index + 1); + if (endBracket < 0) { throw new IllegalArgumentException("Missing ']' in format string"); } + String itemValue = markup.substring(index + 1, endBracket); + if (itemValue.length() == 0) { + throw new IllegalArgumentException("Empty attribute in format string"); + } + try { + chunk.value = Integer.parseInt(itemValue); + } catch (NumberFormatException e) { + chunk.value = itemValue; + } + index = endBracket + 1; + } + + private void parseAttrChunk(Chunk chunk) { + index++; // skip dot + chunk.is_attr = true; + int pos = nextDotOrBracket(markup); + if (pos == index) { + throw new IllegalArgumentException("Empty attribute in format string"); + } + chunk.value = markup.substring(index, pos); + index = pos; + } + + public static class Chunk { + + public boolean is_attr; + /** Integer or String. */ + public Object value; + } +} diff --git a/src/org/python/core/stringlib/FloatFormatter.java b/core/src/main/java/org/python/core/stringlib/FloatFormatter.java similarity index 52% rename from src/org/python/core/stringlib/FloatFormatter.java rename to core/src/main/java/org/python/core/stringlib/FloatFormatter.java index 621f1b963..eecee8938 100644 --- a/src/org/python/core/stringlib/FloatFormatter.java +++ b/core/src/main/java/org/python/core/stringlib/FloatFormatter.java @@ -1,41 +1,64 @@ -// Copyright (c) Jython Developers +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. package org.python.core.stringlib; import java.math.BigDecimal; import java.math.MathContext; import java.math.RoundingMode; +import org.python.core.stringlib.InternalFormat.FormatError; +import org.python.core.stringlib.InternalFormat.FormatOverflow; import org.python.core.stringlib.InternalFormat.Spec; /** - * A class that provides the implementation of floating-point formatting. In a limited way, it acts - * like a StringBuilder to which text and one or more numbers may be appended, formatted according - * to the format specifier supplied at construction. These are ephemeral objects that are not, on - * their own, thread safe. + * A class that provides the implementation of floating-point + * formatting. In a limited way, it acts like a StringBuilder to + * which text and one or more numbers may be appended, formatted + * according to the format specifier supplied at construction. These + * are ephemeral objects that are not, on their own, thread safe. */ -public class FloatFormatter extends InternalFormat.Formatter { +public abstract class FloatFormatter extends InternalFormat.AbstractFormatter { /** The rounding mode dominant in the formatter. */ static final RoundingMode ROUND_PY = RoundingMode.HALF_EVEN; - /** Limit the size of results. */ - // No-one needs more than log(Double.MAX_VALUE) - log2(Double.MIN_VALUE) = 1383 digits. + /** + * Limit the size of results. No-one needs more than + * {@code log(Double.MAX_VALUE) - log2(Double.MIN_VALUE) = 1383} + * digits. + */ static final int MAX_PRECISION = 1400; - - /** If it contains no decimal point, this length is zero, and 1 otherwise. */ + /** + * If it contains no decimal point, this length is zero, and 1 + * otherwise. + */ private int lenPoint; - /** The length of the fractional part, right of the decimal point. */ + /** + * The length of the fractional part, right of the decimal point. + */ private int lenFraction; - /** The length of the exponent marker ("e"), "inf" or "nan", or zero if there isn't one. */ + /** + * The length of the exponent marker ("e"), "inf" or "nan", or zero + * if there isn't one. + */ private int lenMarker; - /** The length of the exponent sign and digits or zero if there isn't one. */ + /** + * The length of the exponent sign and digits or zero if there isn't + * one. + */ private int lenExponent; - /** if ≥0, minimum digits to follow decimal point (where consulted) */ + /** + * If {@code minFracDigits≥0}, minimum digits to follow decimal + * point (where consulted). When {@code minFracDigits<0} this + * signifies "no truncation" mode, in which trailing zeros generated + * in the conversion are not removed. + */ private int minFracDigits; /** - * Construct the formatter from a client-supplied buffer, to which the result will be appended, - * and a specification. Sets {@link #mark} to the end of the buffer. + * Construct the formatter from a client-supplied buffer, to which + * the result will be appended, and a specification. Sets + * {@link #mark} to the end of the buffer. * * @param result destination buffer * @param spec parsed conversion specification @@ -43,55 +66,61 @@ public class FloatFormatter extends InternalFormat.Formatter { public FloatFormatter(StringBuilder result, Spec spec) { super(result, spec); if (spec.alternate) { - // Alternate form means do not trim the zero fractional digits. + /* + * Alternate form means do not trim the zero fractional digits. + */ minFracDigits = -1; } else if (spec.type == 'r' || spec.type == Spec.NONE) { - // These formats by default show at least one fractional digit. + /* + * These formats by default show at least one fractional digit. + */ minFracDigits = 1; } else { /* - * Every other format (if it does not ignore the setting) will by default trim off all - * the trailing zero fractional digits. + * Every other format (if it does not ignore the setting) will by + * default trim off all the trailing zero fractional digits. */ minFracDigits = 0; } } /** - * Construct the formatter from a specification, allocating a buffer internally for the result. + * Construct the formatter from a specification, allocating a buffer + * internally for the result. * * @param spec parsed conversion specification */ - public FloatFormatter(Spec spec) { - this(new StringBuilder(size(spec)), spec); - } + public FloatFormatter(Spec spec) { this(new StringBuilder(size(spec)), spec); } /** - * Recommend a buffer size for a given specification, assuming one float is converted. This will - * be a "right" answer for e and g-format, and for f-format with values up to 9,999,999. + * Recommend a buffer size for a given specification, assuming one + * float is converted. This will be a "right" answer for e and + * g-format, and for f-format with values up to 9,999,999. * * @param spec parsed conversion specification + * @return recommended a buffer size */ public static int size(Spec spec) { // Rule of thumb used here (no right answer): - // in e format each float occupies: (p-1) + len("+1.e+300") = p+7; - // in f format each float occupies: p + len("1,000,000.%") = p+11; - // or an explicit (minimum) width may be given, with one overshoot possible. - return Math.max(spec.width + 1, spec.getPrecision(6) + 11); + // in e format a float occupies: (p-1) + len("+1.e+300") = p+7; + // in f format a float occupies: p + len("1,000,000.%") = p+11; + // or an explicit (minimum) width may be given, + // and then we add a handful :) + return Math.max(spec.getPrecision(6) + 11, spec.width) + 5; } /** - * Override the default truncation behaviour for the specification originally supplied. Some - * formats remove trailing zero digits, trimming to zero or one. Set member - * minFracDigits, to modify this behaviour. + * Override the default truncation behaviour for the specification + * originally supplied. Some formats remove trailing zero digits, + * trimming to zero or one. Set member {@code minFracDigits}, to + * modify this behaviour. * - * @param minFracDigits if <0 prevent truncation; if ≥0 the minimum number of fractional - * digits; when this is zero, and all fractional digits are zero, the decimal point - * will also be removed. + * @param minFracDigits if <0 prevent truncation; if ≥0 the + * minimum number of fractional digits; when this is zero, and + * all fractional digits are zero, the decimal point will also + * be removed. */ - public void setMinFracDigits(int minFracDigits) { - this.minFracDigits = minFracDigits; - } + public void setMinFracDigits(int minFracDigits) { this.minFracDigits = minFracDigits; } @Override protected void reset() { @@ -121,38 +150,51 @@ public FloatFormatter append(CharSequence csq) { } @Override - public FloatFormatter append(CharSequence csq, int start, int end) // + public FloatFormatter append(CharSequence csq, int start, int end) throws IndexOutOfBoundsException { super.append(csq, start, end); return this; } /** - * Format a floating-point number according to the specification represented by this - * FloatFormatter. + * Format a floating-point number according to the specification + * represented by this {@code FloatFormatter}. * * @param value to convert * @return this object + * @throws FormatOverflow if the precision is too large + * @throws FormatError if an unsupported format character is + * encountered */ - public FloatFormatter format(double value) { + public FloatFormatter format(double value) throws FormatOverflow, FormatError { return format(value, null); } /** - * Format a floating-point number according to the specification represented by this - * FloatFormatter. The conversion type, precision, and flags for grouping or - * percentage are dealt with here. At the point this is used, we know the {@link #spec} is one - * of the floating-point types. This entry point allows explicit control of the prefix of - * positive numbers, overriding defaults for the format type. + * Format a floating-point number according to the specification + * represented by this {@code FloatFormatter}. The conversion type, + * precision, and flags for grouping or percentage are dealt with + * here. At the point this is used, we know the {@link #spec} is one + * of the floating-point types. This entry point allows explicit + * control of the prefix of positive numbers, overriding defaults + * for the format type. * * @param value to convert - * @param positivePrefix to use before positive values (e.g. "+") or null to default to "" + * @param positivePrefix to use before positive values (e.g. "+") or + * null to default to "" * @return this object + * @throws FormatOverflow if the precision is too large + * @throws FormatError if an unsupported format character is + * encountered */ - @SuppressWarnings("fallthrough") - public FloatFormatter format(double value, String positivePrefix) { + // @SuppressWarnings("fallthrough") + public FloatFormatter format(double value, String positivePrefix) + throws FormatOverflow, FormatError { - // Puts all instance variables back to their starting defaults, and start = result.length(). + /* + * Puts all instance variables back to their starting defaults, and + * start = result.length(). + */ setStart(); // Precision defaults to 6 (or 12 for none-format) @@ -160,20 +202,21 @@ public FloatFormatter format(double value, String positivePrefix) { // Guard against excessive result precision // XXX Possibly better raised before result is allocated/sized. - if (precision > MAX_PRECISION) { - throw precisionTooLarge("float"); - } + if (precision > MAX_PRECISION) { throw precisionTooLarge("float"); } /* - * By default, the prefix of a positive number is "", but the format specifier may override - * it, and the built-in type complex needs to override the format. + * By default, the prefix of a positive number is "", but the format + * specifier may override it, and the built-in type complex needs to + * override the format. */ char sign = spec.sign; if (positivePrefix == null && Spec.specified(sign) && sign != '-') { positivePrefix = Character.toString(sign); } - // Different process for each format type, ignoring case for now. + /* + * Different process for each format type, ignoring case for now. + */ switch (Character.toLowerCase(spec.type)) { case 'e': // Exponential case: 1.23e-45 @@ -186,50 +229,66 @@ public FloatFormatter format(double value, String positivePrefix) { break; case 'n': - // Locale-sensitive version of g-format should be here. (Désolé de vous decevoir.) - // XXX Set a variable here to signal localisation in/after groupDigits? + /* + * Locale-sensitive version of g-format should be here. (Désolé de + * vous decevoir.) + */ + /* + * XXX Set flag to signal localisation in/after groupDigits? + */ case 'g': - // General format: fixed or exponential according to value. + /* + * General format: fixed or exponential according to value. + */ format_g(value, positivePrefix, precision, 0); break; case Spec.NONE: - // None format like g-format but goes exponential at precision-1 + /* + * None format like g-format but goes exponential at precision-1 + */ format_g(value, positivePrefix, precision, -1); break; case 'r': - // For float.__repr__, very special case, breaks all the rules. + /* + * For float.__repr__, very special case, breaks all the rules. + */ format_r(value, positivePrefix); break; case '%': - // Multiplies by 100 and displays in f-format, followed by a percent sign. + /* + * Multiplies by 100 and displays in f-format, followed by a percent + * sign. + */ format_f(100. * value, positivePrefix, precision); result.append('%'); break; default: - // Should never get here, since this was checked in PyFloat. + /* + * Should never get here, since this was checked in PyFloat. + */ throw unknownFormat(spec.type, "float"); } - // If the format type is an upper-case letter, convert the result to upper case. - if (Character.isUpperCase(spec.type)) { - uppercase(); - } + /* + * If the format type is an upper-case letter, convert the result to + * upper case. + */ + if (Character.isUpperCase(spec.type)) { uppercase(); } // If required to, group the whole-part digits. - if (spec.grouping) { - groupDigits(3, ','); - } + if (spec.grouping) { groupDigits(3, ','); } return this; } /** - * Convert just the letters in the representation of the current number (in {@link #result}) to - * upper case. (That's the exponent marker or the "inf" or "nan".) + * Convert just the letters in the representation of the current + * number (in {@link #result}) to upper case. (That's the exponent + * marker or the "inf" or "nan".) */ @Override protected void uppercase() { @@ -242,17 +301,22 @@ protected void uppercase() { } /** - * Common code to deal with the sign, and the special cases "0", "-0", "nan, "inf", or "-inf". - * If the method returns false, we have started a non-zero number and the sign is - * already in {@link #result}. The client need then only encode abs(value). If the method - * returns true, and {@link #lenMarker}==0, the value was "0" or "-0": the caller - * may have to zero-extend this, and/or add an exponent, to match the requested format. If the - * method returns true, and {@link #lenMarker}>0, the method has placed "nan, "inf" - * in the {@link #result} buffer (preceded by a sign if necessary). + * Common code to deal with the sign, and the special cases "0", + * "-0", "nan, "inf", or "-inf". If the method returns + * {@code false}, we have started a non-zero number and the sign is + * already in {@link #result}. The client need then only encode + * abs(value). If the method returns {@code true}, and + * {@link #lenMarker}==0, the value was "0" or "-0": the caller may + * have to zero-extend this, and/or add an exponent, to match the + * requested format. If the method returns {@code true}, and + * {@link #lenMarker}>0, the method has placed "nan, "inf" in the + * {@link #result} buffer (preceded by a sign if necessary). * * @param value to convert - * @return true if the value was one of "0", "-0", "nan, "inf", or "-inf". - * @param positivePrefix to use before positive values (e.g. "+") or null to default to "" + * @return true if the value was one of "0", "-0", "nan, "inf", or + * "-inf". + * @param positivePrefix to use before positive values (e.g. "+") or + * null to default to "" */ private boolean signAndSpecialNumber(double value, String positivePrefix) { @@ -260,9 +324,7 @@ private boolean signAndSpecialNumber(double value, String positivePrefix) { long bits = Double.doubleToRawLongBits(value); // NaN is always positive - if (Double.isNaN(value)) { - bits &= ~SIGN_MASK; - } + if (Double.isNaN(value)) { bits &= ~SIGN_MASK; } if ((bits & SIGN_MASK) != 0) { // Negative: encode a minus sign and strip it off bits @@ -271,13 +333,19 @@ private boolean signAndSpecialNumber(double value, String positivePrefix) { bits &= ~SIGN_MASK; } else if (positivePrefix != null) { - // Positive, and a prefix is required. Note CPython 2.7 produces "+nan", " nan". + /* + * Positive, and a prefix is required. Note CPython 2.7 produces + * "+nan", " nan". + */ result.append(positivePrefix); lenSign = positivePrefix.length(); } if (bits == 0L) { - // All zero means it's zero. (It may have been negative, producing -0.) + /* + * All zero means it's zero. (It may have been negative, producing + * -0.) + */ result.append('0'); lenWhole = 1; return true; @@ -297,13 +365,15 @@ private boolean signAndSpecialNumber(double value, String positivePrefix) { private static final long EXP_MASK = 0x7ff0000000000000L; /** - * The e-format helper function of {@link #format(double, String)} that uses Java's - * {@link BigDecimal} to provide conversion and rounding. The converted number is appended to - * the {@link #result} buffer, and {@link #start} will be set to the index of its first + * The e-format helper function of {@link #format(double, String)} + * that uses Java's {@link BigDecimal} to provide conversion and + * rounding. The converted number is appended to the {@link #result} + * buffer, and {@link #start} will be set to the index of its first * character. * * @param value to convert - * @param positivePrefix to use before positive values (e.g. "+") or null to default to "" + * @param positivePrefix to use before positive values (e.g. "+") or + * null to default to "" * @param precision precision (maximum number of fractional digits) */ private void format_e(double value, String positivePrefix, int precision) { @@ -312,11 +382,16 @@ private void format_e(double value, String positivePrefix, int precision) { int exp = 0; if (!signAndSpecialNumber(value, positivePrefix)) { - // Convert abs(value) to decimal with p+1 digits of accuracy. + /* + * Convert abs(value) to decimal with p+1 digits of accuracy. + */ MathContext mc = new MathContext(precision + 1, ROUND_PY); BigDecimal vv = new BigDecimal(Math.abs(value), mc); - // Take explicit control in order to get exponential notation out of BigDecimal. + /* + * Take explicit control in order to get exponential notation out of + * BigDecimal. + */ String digits = vv.unscaledValue().toString(); int digitCount = digits.length(); result.append(digits.charAt(0)); @@ -330,7 +405,10 @@ private void format_e(double value, String positivePrefix, int precision) { exp = lenFraction - vv.scale(); } - // If the result is not already complete, add point and zeros as necessary, and exponent. + /* + * If the result is not already complete, add point and zeros as + * necessary, and exponent. + */ if (lenMarker == 0) { ensurePointAndTrailingZeros(precision); appendExponent(exp); @@ -338,63 +416,76 @@ private void format_e(double value, String positivePrefix, int precision) { } /** - * The f-format inner helper function of {@link #format(double, String)} that uses Java's - * {@link BigDecimal} to provide conversion and rounding. The converted number is appended to - * the {@link #result} buffer, and {@link #start} will be set to the index of its first - * character. + * The f-format inner helper function of + * {@link #format(double, String)} that uses Java's + * {@link BigDecimal} to provide conversion and rounding. The + * converted number is appended to the {@link #result} buffer, and + * {@link #start} will be set to the index of its first character. * * @param value to convert - * @param positivePrefix to use before positive values (e.g. "+") or null to default to "" + * @param positivePrefix to use before positive values (e.g. "+") or + * null to default to "" * @param precision precision (maximum number of fractional digits) */ private void format_f(double value, String positivePrefix, int precision) { if (!signAndSpecialNumber(value, positivePrefix)) { - // Convert value to decimal exactly. (This can be very long.) + /* + * Convert value to decimal exactly. (This can be very long.) + */ BigDecimal vLong = new BigDecimal(Math.abs(value)); - // Truncate to the defined number of places to the right of the decimal point). + /* + * Truncate to the defined number of places to the right of the + * decimal point). + */ BigDecimal vv = vLong.setScale(precision, ROUND_PY); - // When converted to text, the number of fractional digits is exactly the scale we set. + /* + * When converted to text, the number of fractional digits is + * exactly the scale we set. + */ String raw = vv.toPlainString(); result.append(raw); if ((lenFraction = vv.scale()) > 0) { // There is a decimal point and some digits following lenWhole = result.length() - (start + lenSign + (lenPoint = 1) + lenFraction); } else { - // There are no fractional digits and so no decimal point + /* + * There are no fractional digits and so no decimal point + */ lenWhole = result.length() - (start + lenSign); } } // Finally, ensure we have all the fractional digits we should. - if (lenMarker == 0) { - ensurePointAndTrailingZeros(precision); - } + if (lenMarker == 0) { ensurePointAndTrailingZeros(precision); } } /** - * Append a decimal point and trailing fractional zeros if necessary for 'e' and 'f' format. - * This should not be called if the result is not numeric ("inf" for example). This method deals - * with the following complexities: on return there will be at least the number of fractional - * digits specified in the argument n, and at least {@link #minFracDigits}; - * further, if minFracDigits<0, signifying the "alternate mode" of certain - * formats, the method will ensure there is a decimal point, even if there are no fractional - * digits to follow. + * Append a decimal point and trailing fractional zeros if necessary + * for 'e' and 'f' format. This should not be called if the result + * is not numeric ("inf" for example). This method deals with the + * following complexities: on return there will be at least the + * number of fractional digits specified in the argument {@code n}, + * and at least {@link #minFracDigits}; further, if + * {@code minFracDigits<0}, signifying the "alternate mode" of + * certain formats, the method will ensure there is a decimal point, + * even if there are no fractional digits to follow. * * @param n smallest number of fractional digits on return */ private void ensurePointAndTrailingZeros(int n) { // Set n to the number of fractional digits we should have. - if (n < minFracDigits) { - n = minFracDigits; - } + if (n < minFracDigits) { n = minFracDigits; } // Do we have a decimal point already? if (lenPoint == 0) { - // No decimal point: add one if there will be any fractional digits or + /* + * No decimal point: add one if there will be any fractional digits + * (a defined number or "alternate mode"). + */ if (n > 0 || minFracDigits < 0) { // First need to add a decimal point. result.append('.'); @@ -406,33 +497,37 @@ private void ensurePointAndTrailingZeros(int n) { int f = lenFraction; if (n > f) { // Make up the required number of zeros. - for (; f < n; f++) { - result.append('0'); - } + for (; f < n; f++) { result.append('0'); } lenFraction = f; } } /** - * Implementation of the variants of g-format, that uses Java's {@link BigDecimal} to provide - * conversion and rounding. These variants are g-format proper, alternate g-format (available - * for "%#g" formatting), n-format (as g but subsequently "internationalised"), and none-format - * (type code Spec.NONE). + * Implementation of the variants of g-format, that uses Java's + * {@link BigDecimal} to provide conversion and rounding. These + * variants are g-format proper, alternate g-format (available for + * "%#g" formatting), n-format (as g but subsequently + * "internationalised"), and none-format (type code Spec.NONE). *

- * None-format is the basis of float.__str__. + * None-format is the basis of {@code float.__str__}. *

- * According to the Python documentation for g-format, the precise rules are as follows: suppose - * that the result formatted with presentation type 'e' and precision p-1 - * would have exponent exp. Then if -4 <= exp < p, the number is formatted with - * presentation type 'f' and precision p-1-exp. Otherwise, the number is - * formatted with presentation type 'e' and precision p-1. In both cases - * insignificant trailing zeros are removed from the significand, and the decimal point is also - * removed if there are no remaining digits following it. + * According to the Python documentation for g-format, the precise + * rules are as follows: suppose that the result formatted with + * presentation type {@code 'e'} and precision p-1 would have + * exponent exp. Then if -4 <= exp < p, the number is + * formatted with presentation type {@code 'f'} and precision + * p-1-exp. Otherwise, the number is formatted with + * presentation type {@code 'e'} and precision p-1. In both + * cases insignificant trailing zeros are removed from the + * significand, and the decimal point is also removed if there are + * no remaining digits following it. *

- * The Python documentation says none-format is the same as g-format, but the observed behaviour - * differs from this, in that f-format is only used if -4 <= exp < p-1 (i.e. one - * less), and at least one digit to the right of the decimal point is preserved in the f-format - * (but not the e-format). That behaviour is controlled through the following arguments, with + * The Python documentation says none-format is the same as + * g-format, but the observed behaviour differs from this, in that + * f-format is only used if -4 <= exp < p-1 (i.e. one + * less), and at least one digit to the right of the decimal point + * is preserved in the f-format (but not the e-format). That + * behaviour is controlled through the following arguments, with * these recommended values: * * @@ -475,10 +570,12 @@ private void ensurePointAndTrailingZeros(int n) { *
* * @param value to convert - * @param positivePrefix to use before positive values (e.g. "+") or null to default to "" - * @param precision total number of significant digits (precision 0 behaves as 1) - * @param expThresholdAdj +precision = the exponent at which to resume using - * exponential notation + * @param positivePrefix to use before positive values (e.g. "+") or + * null to default to "" + * @param precision total number of significant digits (precision 0 + * behaves as 1) + * @param expThresholdAdj {@code +precision =} the exponent at which + * to resume using exponential notation */ private void format_g(double value, String positivePrefix, int precision, int expThresholdAdj) { @@ -489,7 +586,10 @@ private void format_g(double value, String positivePrefix, int precision, int ex int expThreshold = precision + expThresholdAdj; if (signAndSpecialNumber(value, positivePrefix)) { - // Finish formatting if zero result. (This is a no-op for nan or inf.) + /* + * Finish formatting if zero result. (This is a no-op for nan or + * inf.) + */ zeroHelper(precision, expThreshold); } else { @@ -498,14 +598,21 @@ private void format_g(double value, String positivePrefix, int precision, int ex MathContext mc = new MathContext(precision, ROUND_PY); BigDecimal vv = new BigDecimal(Math.abs(value), mc); - // This gives us the digits we need for either fixed or exponential format. + /* + * This gives us the digits we need for either fixed or exponential + * format. + */ String pointlessDigits = vv.unscaledValue().toString(); - // If we were to complete this as e-format, the exponent would be: + /* + * If we were to complete this as e-format, the exponent would be: + */ int exp = pointlessDigits.length() - vv.scale() - 1; if (-4 <= exp && exp < expThreshold) { - // Finish the job as f-format with variable-precision p-(exp+1). + /* + * Finish the job as f-format with variable-precision p-(exp+1). + */ appendFixed(pointlessDigits, exp, precision); } else { @@ -516,35 +623,49 @@ private void format_g(double value, String positivePrefix, int precision, int ex } /** - * Implementation of r-format (float.__repr__) that uses Java's - * {@link Double#toString(double)} to provide conversion and rounding. That method gives us - * almost what we need, but not quite (sometimes it yields 18 digits): here we always round to - * 17 significant digits. Much of the formatting after conversion is shared with - * {@link #format_g(double, String, int, int, int)}. minFracDigits is consulted - * since while float.__repr__ truncates to one digit, within - * complex.__repr__ we truncate fully. + * Implementation of r-format ({@code float.__repr__}) that uses + * Java's {@link Double#toString(double)} to provide conversion and + * rounding. That method gives us almost what we need, but not quite + * (sometimes it yields 18 digits): here we always round to 17 + * significant digits. Much of the formatting after conversion is + * shared with {@link #format_g(double, String, int, int, int)}. + * {@code minFracDigits} is consulted since while + * {@code float.__repr__} truncates to one digit, within + * {@code complex.__repr__} we truncate fully. * * @param value to convert - * @param positivePrefix to use before positive values (e.g. "+") or null to default to "" + * @param positivePrefix to use before positive values (e.g. "+") or + * null to default to "" */ private void format_r(double value, String positivePrefix) { - // Characteristics of repr (precision = 17 and go exponential at 16). + /* + * Characteristics of repr (precision = 17 and go exponential at + * 16). + */ int precision = 17; int expThreshold = precision - 1; if (signAndSpecialNumber(value, positivePrefix)) { - // Finish formatting if zero result. (This is a no-op for nan or inf.) + /* + * Finish formatting if zero result. (This is a no-op for nan or + * inf.) + */ zeroHelper(precision, expThreshold); } else { - // Generate digit sequence (with no decimal point) with custom rounding. + /* + * Generate digit sequence (with no decimal point) with custom + * rounding. + */ StringBuilder pointlessBuffer = new StringBuilder(20); int exp = reprDigits(Math.abs(value), precision, pointlessBuffer); if (-4 <= exp && exp < expThreshold) { - // Finish the job as f-format with variable-precision p-(exp+1). + /* + * Finish the job as f-format with variable-precision p-(exp+1). + */ appendFixed(pointlessBuffer, exp, precision); } else { @@ -555,66 +676,86 @@ private void format_r(double value, String positivePrefix) { } /** - * Common code for g-format, none-format and r-format called when the conversion yields "inf", - * "nan" or zero. The method completes formatting of the zero, with the appropriate number of - * decimal places or (in particular circumstances) exponential; notation. + * Common code for g-format, none-format and r-format called when + * the conversion yields "inf", "nan" or zero. The method completes + * formatting of the zero, with the appropriate number of decimal + * places or (in particular circumstances) exponential; notation. * * @param precision of conversion (number of significant digits). - * @param expThreshold if zero, causes choice of exponential notation for zero. + * @param expThreshold if zero, causes choice of exponential + * notation for zero. */ private void zeroHelper(int precision, int expThreshold) { if (lenMarker == 0) { // May be 0 or -0 so we still need to ... if (minFracDigits < 0) { - // In "alternate format", we won't economise trailing zeros. + /* + * In "alternate format", we won't economise trailing zeros. + */ appendPointAndTrailingZeros(precision - 1); } else if (lenFraction < minFracDigits) { - // Otherwise, it should be at least the stated minimum length. + /* + * Otherwise, it should be at least the stated minimum length. + */ appendTrailingZeros(minFracDigits); } - // And just occasionally (in none-format) we go exponential even when exp = 0... - if (0 >= expThreshold) { - appendExponent(0); - } + /* + * And just occasionally (in none-format) we go exponential even + * when exp = 0... + */ + if (0 >= expThreshold) { appendExponent(0); } } } /** - * Common code for g-format, none-format and r-format used when the exponent is such that a - * fixed-point presentation is chosen. Normally the method removes trailing digits so as to - * shorten the presentation without loss of significance. This method respects the minimum - * number of fractional digits (digits after the decimal point), in member - * minFracDigits, which is 0 for g-format and 1 for none-format and r-format. When - * minFracDigits<0 this signifies "no truncation" mode, in which trailing zeros - * generated in the conversion are not removed. This supports "%#g" format. + * Common code for g-format, none-format and r-format used when the + * exponent is such that a fixed-point presentation is chosen. + * Normally the method removes trailing digits so as to shorten the + * presentation without loss of significance. This method respects + * the minimum number of fractional digits (digits after the decimal + * point), in member {@code minFracDigits}, which is 0 for g-format + * and 1 for none-format and r-format. When + * {@code minFracDigits<0} this signifies "no truncation" mode, + * in which trailing zeros generated in the conversion are not + * removed. This supports "%#g" format. * * @param digits from converting the value at a given precision. - * @param exp would be the exponent (in e-format), used to position the decimal point. + * @param exp would be the exponent (in e-format), used to position + * the decimal point. * @param precision of conversion (number of significant digits). */ private void appendFixed(CharSequence digits, int exp, int precision) { - // Check for "alternate format", where we won't economise trailing zeros. + /* + * Check for "alternate format", where we won't economise trailing + * zeros. + */ boolean noTruncate = (minFracDigits < 0); int digitCount = digits.length(); if (exp < 0) { - // For a negative exponent, we must insert leading zeros 0.000 ... + /* + * For a negative exponent, we must insert leading zeros 0.000 ... + */ result.append("0."); lenWhole = lenPoint = 1; - for (int i = -1; i > exp; --i) { - result.append('0'); - } - // Then the generated digits (always enough to satisfy no-truncate mode). + for (int i = -1; i > exp; --i) { result.append('0'); } + /* + * Then the generated digits (always enough to satisfy no-truncate + * mode). + */ result.append(digits); lenFraction = digitCount - exp - 1; } else { - // For a non-negative exponent, it's a question of placing the decimal point. + /* + * For a non-negative exponent, it's a question of placing the + * decimal point. + */ int w = exp + 1; if (w < digitCount) { // There are w whole-part digits @@ -626,7 +767,10 @@ private void appendFixed(CharSequence digits, int exp, int precision) { } else { // All the digits are whole-part digits. result.append(digits); - // Just occasionally (in r-format) we need more digits than the precision. + /* + * Just occasionally (in r-format) we need more digits than the + * precision. + */ while (digitCount < w) { result.append('0'); digitCount += 1; @@ -635,15 +779,22 @@ private void appendFixed(CharSequence digits, int exp, int precision) { } if (noTruncate) { - // Extend the fraction as BigDecimal will have economised on zeros. + /* + * Extend the fraction as BigDecimal will have economised on zeros. + */ appendPointAndTrailingZeros(precision - digitCount); } } - // Finally, ensure we have all and only the fractional digits we should. + /* + * Finally, ensure we have all and only the fractional digits we + * should. + */ if (!noTruncate) { if (lenFraction < minFracDigits) { - // Otherwise, it should be at least the stated minimum length. + /* + * Otherwise, it should be at least the stated minimum length. + */ appendTrailingZeros(minFracDigits); } else { // And no more @@ -653,15 +804,18 @@ private void appendFixed(CharSequence digits, int exp, int precision) { } /** - * Common code for g-format, none-format and r-format used when the exponent is such that an - * exponential presentation is chosen. Normally the method removes trailing digits so as to - * shorten the presentation without loss of significance. Although no minimum number of - * fractional digits is enforced in the exponential presentation, when - * minFracDigits<0 this signifies "no truncation" mode, in which trailing zeros - * generated in the conversion are not removed. This supports "%#g" format. + * Common code for g-format, none-format and r-format used when the + * exponent is such that an exponential presentation is chosen. + * Normally the method removes trailing digits so as to shorten the + * presentation without loss of significance. Although no minimum + * number of fractional digits is enforced in the exponential + * presentation, when {@code minFracDigits<0} this signifies "no + * truncation" mode, in which trailing zeros generated in the + * conversion are not removed. This supports "%#g" format. * * @param digits from converting the value at a given precision. - * @param exp would be the exponent (in e-format), used to position the decimal point. + * @param exp would be the exponent (in e-format), used to position + * the decimal point. */ private void appendExponential(CharSequence digits, int exp) { @@ -675,9 +829,14 @@ private void appendExponential(CharSequence digits, int exp) { lenPoint = 1; lenFraction = digitCount - 1; - // In no-truncate mode, the fraction is full precision. Otherwise trim it. + /* + * In no-truncate mode, the fraction is full precision. Otherwise + * trim it. + */ if (minFracDigits >= 0) { - // Note positive minFracDigits only applies to fixed formats. + /* + * Note positive minFracDigits only applies to fixed formats. + */ removeTrailingZeros(0); } @@ -686,16 +845,21 @@ private void appendExponential(CharSequence digits, int exp) { } /** - * Convert a double to digits and an exponent for use in float.__repr__ (or - * r-format). This method takes advantage of (or assumes) a close correspondence between - * {@link Double#toString(double)} and Python float.__repr__. The correspondence - * appears to be exact, insofar as the Java method produces the minimal non-zero digit string. - * It mostly chooses the same number of digits (and the same digits) as the CPython repr, but in - * a few cases Double.toString produces more digits. This method truncates to the - * number maxDigits, which in practice is always 17. + * Convert a double to digits and an exponent for use in + * {@code float.__repr__} (or r-format). This method takes advantage + * of (or assumes) a close correspondence between + * {@link Double#toString(double)} and Python + * {@code float.__repr__}. The correspondence appears to be exact, + * insofar as the Java method produces the minimal non-zero digit + * string. It mostly chooses the same number of digits (and the same + * digits) as the CPython repr, but in a few cases + * {@code Double.toString} produces more digits. This method + * truncates to the number {@code maxDigits}, which in practice is + * always 17. * * @param value to convert - * @param maxDigits maximum number of digits to return in buf. + * @param maxDigits maximum number of digits to return in + * {@code buf}. * @param buf for digits of result (recommend size be 20) * @return the exponent */ @@ -728,7 +892,10 @@ private static int reprDigits(double value, int maxDigits, StringBuilder buf) { } } else if (c == '.') { - // We remember this location (one *after* '.') to calculate the exponent later. + /* + * We remember this location (one *after* '.') to calculate the + * exponent later. + */ point = p; } else { @@ -737,16 +904,28 @@ private static int reprDigits(double value, int maxDigits, StringBuilder buf) { } } - // Possibly followed by an exponent. p has already advanced past the 'E'. + /* + * Possibly followed by an exponent. p has already advanced past the + * 'E'. + */ if (p < end && c == 'E') { - // If there is an exponent, the mantissa must be in standard form: m.mmmm + /* + * If there is an exponent, the mantissa must be in standard form: + * m.mmmm + */ assert point == first + 1; exp = Integer.parseInt(s.substring(p)); } else { - // Exponent is based on relationship of decimal point and first non-zero digit. + /* + * Exponent is based on relationship of decimal point and first + * non-zero digit. + */ exp = point - first - 1; - // But that's only correct when the point is to the right (or absent). + /* + * But that's only correct when the point is to the right (or + * absent). + */ if (exp < 0) { // The point is to the left of the first digit exp += 1; // = -(first-point) @@ -754,24 +933,35 @@ private static int reprDigits(double value, int maxDigits, StringBuilder buf) { } /* - * XXX This still does not round in all the cases it could. I think Java stops generating - * digits when the residual is <= ulp/2. This is to neglect the possibility that the extra - * ulp/2 (before it becomes a different double) could take us to a rounder numeral. To fix - * this, we could express ulp/2 as digits in the same scale as those in the buffer, and - * consider adding them. But Java's behaviour here is probably a manifestation of bug - * JDK-4511638. + * XXX This still does not round in all the cases it could. I think + * Java stops generating digits when the residual is <= ulp/2. This + * is to neglect the possibility that the extra ulp/2 (before it + * becomes a different double) could take us to a rounder numeral. + * To fix this, we could express ulp/2 as digits in the same scale + * as those in the buffer, and consider adding them. But Java's + * behaviour here is probably a manifestation of bug JDK-4511638. */ // Sometimes the result is more digits than we want for repr. if (buf.length() > maxDigits) { - // Chop the trailing digits, remembering the most significant lost digit. + /* + * Chop the trailing digits, remembering the most significant lost + * digit. + */ int d = buf.charAt(maxDigits); buf.setLength(maxDigits); - // We round half up. Not absolutely correct since Double has already rounded. + /* + * We round half up. Not absolutely correct since Double has already + * rounded. + */ if (d >= '5') { - // Treat this as a "carry one" into the numeral buf[0:maxDigits]. + /* + * Treat this as a "carry one" into the numeral buf[0:maxDigits]. + */ for (p = maxDigits - 1; p >= 0; p--) { - // Each pass of the loop does one carry from buf[p+1] to buf[p]. + /* + * Each pass of the loop does one carry from buf[p+1] to buf[p]. + */ d = buf.charAt(p) + 1; if (d <= '9') { // Carry propagation stops here. @@ -784,9 +974,10 @@ private static int reprDigits(double value, int maxDigits, StringBuilder buf) { } if (p < 0) { /* - * We fell off the bottom of the buffer with one carry still to propagate. You - * may expect: buf.insert(0, '1') here, but note that every digit in - * buf[0:maxDigits] is currently '0', so all we need is: + * We fell off the bottom of the buffer with one carry still to + * propagate. You may expect: buf.insert(0, '1') here, but note that + * every digit in buf[0:maxDigits] is currently '0', so all we need + * is: */ buf.setCharAt(0, '1'); exp += 1; @@ -798,9 +989,10 @@ private static int reprDigits(double value, int maxDigits, StringBuilder buf) { } /** - * Append the trailing fractional zeros, as required by certain formats, so that the total - * number of fractional digits is no less than specified. If n<=0, the method - * leaves the {@link #result} buffer unchanged. + * Append the trailing fractional zeros, as required by certain + * formats, so that the total number of fractional digits is no less + * than specified. If {@code n<=0}, the method leaves the + * {@link #result} buffer unchanged. * * @param n smallest number of fractional digits on return */ @@ -810,54 +1002,60 @@ private void appendTrailingZeros(int n) { if (n > f) { if (lenPoint == 0) { - // First need to add a decimal point. (Implies lenFraction=0.) + /* + * First need to add a decimal point. (Implies lenFraction=0.) + */ result.append('.'); lenPoint = 1; } // Now make up the required number of zeros. - for (; f < n; f++) { - result.append('0'); - } + for (; f < n; f++) { result.append('0'); } lenFraction = f; } } /** - * Append the trailing fractional zeros, as required by certain formats, so that the total - * number of fractional digits is no less than specified. If there is no decimal point - * originally (and therefore no fractional part), the method will add a decimal point, even if - * it adds no zeros. + * Append the trailing fractional zeros, as required by certain + * formats, so that the total number of fractional digits is no less + * than specified. If there is no decimal point originally (and + * therefore no fractional part), the method will add a decimal + * point, even if it adds no zeros. * * @param n smallest number of fractional digits on return */ private void appendPointAndTrailingZeros(int n) { if (lenPoint == 0) { - // First need to add a decimal point. (Implies lenFraction=0.) + /* + * First need to add a decimal point. (Implies lenFraction=0.) + */ result.append('.'); lenPoint = 1; } // Now make up the required number of zeros. int f; - for (f = lenFraction; f < n; f++) { - result.append('0'); - } + for (f = lenFraction; f < n; f++) { result.append('0'); } lenFraction = f; } /** - * Remove trailing zeros from the fractional part, as required by certain formats, leaving at - * least the number of fractional digits specified. If the resultant number of fractional digits - * is zero, this method will also remove the trailing decimal point (if there is one). + * Remove trailing zeros from the fractional part, as required by + * certain formats, leaving at least the number of fractional digits + * specified. If the resultant number of fractional digits is zero, + * this method will also remove the trailing decimal point (if there + * is one). * * @param n smallest number of fractional digits on return */ private void removeTrailingZeros(int n) { if (lenPoint > 0) { - // There's a decimal point at least, and there may be some fractional digits. + /* + * There's a decimal point at least, and there may be some + * fractional digits. + */ int f = lenFraction; if (n == 0 || f > n) { @@ -869,9 +1067,14 @@ private void removeTrailingZeros(int n) { } } - // f is now the number of fractional digits we wish to retain. + /* + * f is now the number of fractional digits we wish to retain. + */ if (f == 0 && lenPoint > 0) { - // We will be stripping all the fractional digits. Take the decimal point too. + /* + * We will be stripping all the fractional digits. Take the decimal + * point too. + */ lenPoint = lenFraction = 0; f = -1; } else { @@ -879,16 +1082,15 @@ private void removeTrailingZeros(int n) { } // Snip the characters we are going to remove (if any). - if (fracStart + f < result.length()) { - result.setLength(fracStart + f); - } + if (fracStart + f < result.length()) { result.setLength(fracStart + f); } } } } /** - * Append the current value of {@code exp} in the format "e{:+02d}" (for example - * e+05, e-10, e+308 , etc.). + * Append the current value of {@code exp} in the format + * {@code "e{:+02d}"} (for example {@code e+05}, {@code e-10}, + * {@code e+308} , etc.). * * @param exp exponent value to append */ @@ -911,11 +1113,9 @@ private void appendExponent(int exp) { } /** - * Return the index in {@link #result} of the first letter. This is a helper for - * {@link #uppercase()} and {@link #getExponent()} + * Return the index in {@link #result} of the first letter. This is + * a helper for {@link #uppercase()} and {@link #getExponent()} */ - private int indexOfMarker() { - return start + lenSign + lenWhole + lenPoint + lenFraction; - } + private int indexOfMarker() { return start + lenSign + lenWhole + lenPoint + lenFraction; } } diff --git a/core/src/main/java/org/python/core/stringlib/IntArrayBuilder.java b/core/src/main/java/org/python/core/stringlib/IntArrayBuilder.java new file mode 100644 index 000000000..1961eeb67 --- /dev/null +++ b/core/src/main/java/org/python/core/stringlib/IntArrayBuilder.java @@ -0,0 +1,101 @@ +package org.python.core.stringlib; + +import java.util.Arrays; +import java.util.Spliterator; +import java.util.stream.IntStream; + +/** + * An elastic buffer of integer values, somewhat like the + * {@code java.lang.StringBuilder}, but for arrays of integers. The + * client appends data and may finally take the built array, often + * without copying the data. + */ +public final class IntArrayBuilder extends AbstractIntArrayBuilder.Forward { + private int[] value; + private int len = 0; + private int max = 0; + + /** + * Create an empty buffer of a defined initial capacity. + * + * @param capacity initially + */ + public IntArrayBuilder(int capacity) { value = new int[capacity]; } + + /** Create an empty buffer of a default initial capacity. */ + public IntArrayBuilder() { + value = EMPTY_INT_ARRAY; + } + + @Override + protected void appendUnchecked(int v) { + value[len++] = v; + max = Math.max(max, v); + } + + @Override + public int length() { return len; } + + @Override + public int max() { return max; } + + /** + * Ensure there is room for another {@code n} elements. + * + * @param n to make space for + */ + @Override + protected void ensure(int n) { + if (len > value.length - n) { + if (len == 0) { + // Adding to empty: try exact fit. + value = new int[n]; + } else { + // Not empty: grow storage and copy into it + int newSize = Math.max(value.length * 2, MINSIZE); + int[] newValue = new int[newSize]; + System.arraycopy(value, 0, newValue, 0, len); + value = newValue; + } + } + } + + /** + * Append the {@code int}s from the given stream. + * + * @param s stream to append from + * @return this builder + */ + public IntArrayBuilder append(IntStream s) { + Spliterator.OfInt iter = s.spliterator(); + long N = iter.estimateSize(); + int n = (int)Math.min(Integer.MAX_VALUE, N); + if (n == N) { + ensure(n); + iter.forEachRemaining((int c) -> appendUnchecked(c)); + } else { + // Maybe N is unknown, else will overflow eventually ... + iter.forEachRemaining((int c) -> append(c)); + } + return this; + } + + @Override + protected int[] value() { return Arrays.copyOf(value, len); } + + @Override + public int[] take() { + int[] v; + if (len == value.length) { + // The array is exactly filled: use it without copy. + v = value; + value = EMPTY_INT_ARRAY; + } else { + // The array is partly filled: copy it and re-use it. + v = Arrays.copyOf(value, len); + } + len = 0; + max = 0; + return v; + } +} diff --git a/core/src/main/java/org/python/core/stringlib/IntArrayReverseBuilder.java b/core/src/main/java/org/python/core/stringlib/IntArrayReverseBuilder.java new file mode 100644 index 000000000..4b0da0e7c --- /dev/null +++ b/core/src/main/java/org/python/core/stringlib/IntArrayReverseBuilder.java @@ -0,0 +1,80 @@ +package org.python.core.stringlib; + +import java.util.Arrays; + +/** + * An elastic buffer of integer values, somewhat like the + * {@code java.lang.StringBuilder}, but for arrays of integers. The + * client prepends data, so the array builds right to left, and may + * finally take the built array, often without copying the data. + */ +public final class IntArrayReverseBuilder extends AbstractIntArrayBuilder.Reverse { + private int[] value; + private int ptr = 0; + private int max = 0; + + /** + * Create an empty buffer of a defined initial capacity. + * + * @param capacity initially + */ + public IntArrayReverseBuilder(int capacity) { + value = new int[capacity]; + ptr = value.length; + } + + /** Create an empty buffer of a default initial capacity. */ + public IntArrayReverseBuilder() { + value = EMPTY_INT_ARRAY; + } + + @Override + protected void prependUnchecked(int v) { + value[--ptr] = v; + max = Math.max(max, v); + } + + @Override + public int length() { return value.length - ptr; } + + @Override + public int max() { return max; } + + @Override + protected void ensure(int n) { + if (n > ptr) { + if (ptr == value.length) { + // Adding to empty: try exact fit. + value = new int[n]; + ptr = n; + } else { + int len = value.length - ptr; + int newSize = Math.max(value.length * 2, MINSIZE); + int newPtr = newSize - len; + int[] newValue = new int[newSize]; + System.arraycopy(value, ptr, newValue, newPtr, len); + value = newValue; + ptr = newPtr; + } + } + } + + @Override + protected int[] value() { return Arrays.copyOfRange(value, ptr, value.length); } + + @Override + public int[] take() { + int[] v; + if (ptr == 0) { + // The array is exactly filled: use it without copy. + v = value; + value = EMPTY_INT_ARRAY; + } else { + // The array is partly filled: copy it and re-use it. + v = Arrays.copyOfRange(value, ptr, value.length); + ptr = value.length; + } + max = 0; + return v; + } +} diff --git a/src/org/python/core/stringlib/IntegerFormatter.java b/core/src/main/java/org/python/core/stringlib/IntegerFormatter.java similarity index 63% rename from src/org/python/core/stringlib/IntegerFormatter.java rename to core/src/main/java/org/python/core/stringlib/IntegerFormatter.java index 48958b80a..dfb0ec5d3 100644 --- a/src/org/python/core/stringlib/IntegerFormatter.java +++ b/core/src/main/java/org/python/core/stringlib/IntegerFormatter.java @@ -1,14 +1,11 @@ -// Copyright (c) Jython Developers +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. package org.python.core.stringlib; import java.math.BigInteger; -import org.python.core.Py; -import org.python.core.PyInteger; -import org.python.core.PyLong; -import org.python.core.PyObject; -import org.python.core.PyString; -import org.python.core.PySystemState; +import org.python.core.stringlib.InternalFormat.FormatError; +import org.python.core.stringlib.InternalFormat.FormatOverflow; import org.python.core.stringlib.InternalFormat.Spec; /** @@ -17,7 +14,7 @@ * format specifier supplied at construction. These are ephemeral objects that are not, on their * own, thread safe. */ -public class IntegerFormatter extends InternalFormat.Formatter { +public abstract class IntegerFormatter extends InternalFormat.AbstractFormatter { /** * Construct the formatter from a client-supplied buffer, to which the result will be appended, @@ -70,9 +67,11 @@ public IntegerFormatter append(CharSequence csq, int start, int end) // * * @param value to convert * @return this object + * @throws FormatOverflow if a value is out of range (including the precision) + * @throws FormatError if an unsupported format character is encountered */ - @SuppressWarnings("fallthrough") - public IntegerFormatter format(BigInteger value) { + public IntegerFormatter format(BigInteger value) + throws FormatOverflow, FormatError { try { // Different process for each format type. switch (spec.type) { @@ -227,12 +226,13 @@ void format_b(BigInteger value) { * Format the value as a character (into {@link #result}). * * @param value to convert + * @throws FormatOverflow if {@code value} out of range */ - void format_c(BigInteger value) { + void format_c(BigInteger value) throws FormatOverflow { // Limit is 256 if we're formatting for byte output, unicode range otherwise. BigInteger limit = bytes ? LIMIT_BYTE : LIMIT_UNICODE; if (value.signum() < 0 || value.compareTo(limit) >= 0) { - throw Py.OverflowError("%c arg not in range(0x" + toHexString(limit) + ")"); + throw new FormatOverflow("%c arg not in range(0x" + toHexString(limit) + ")"); } else { result.appendCodePoint(value.intValue()); } @@ -240,190 +240,9 @@ void format_c(BigInteger value) { // Limits used in format_c(BigInteger) private static final BigInteger LIMIT_UNICODE = BigInteger - .valueOf(PySystemState.maxunicode + 1); + .valueOf(Character.MAX_CODE_POINT + 1); private static final BigInteger LIMIT_BYTE = BigInteger.valueOf(256); - /** - * Format an integer according to the specification represented by this - * IntegerFormatter. The conversion type, and flags for grouping or base prefix are - * dealt with here. At the point this is used, we know the {@link #spec} is one of the integer - * types. - * - * @param value to convert - * @return this object - */ - @SuppressWarnings("fallthrough") - public IntegerFormatter format(int value) { - try { - // Scratch all instance variables and start = result.length(). - setStart(); - - // Different process for each format type. - switch (spec.type) { - case 'd': - case Spec.NONE: - case 'u': - case 'i': - // None format or d-format: decimal - format_d(value); - break; - - case 'x': - // hexadecimal. - format_x(value, false); - break; - - case 'X': - // HEXADECIMAL! - format_x(value, true); - break; - - case 'o': - // Octal. - format_o(value); - break; - - case 'b': - // Binary. - format_b(value); - break; - - case 'c': - case '%': - // Binary. - format_c(value); - break; - - case 'n': - // Locale-sensitive version of d-format should be here. - format_d(value); - break; - - default: - // Should never get here, since this was checked in caller. - throw unknownFormat(spec.type, "integer"); - } - - // If required to, group the whole-part digits. - if (spec.grouping) { - groupDigits(3, ','); - } - - return this; - } catch (OutOfMemoryError eme) { - // Most probably due to excessive precision. - throw precisionTooLarge("integer"); - } - } - - /** - * Format the value as decimal (into {@link #result}). The option for mandatory sign is dealt - * with by reference to the format specification. - * - * @param value to convert - */ - void format_d(int value) { - String number; - if (value < 0) { - // Negative value: deal with sign and base, and convert magnitude. - negativeSign(null); - // Here there is a special case for int min value due to wrapping, to avoid a double - // negative sign being added see http://bugs.jython.org/issue2672 - // The string constant here is -Integer.MIN_VALUE - number = value == Integer.MIN_VALUE ? "2147483648" : Integer.toString(-value); - } else { - // Positive value: deal with sign, base and magnitude. - positiveSign(null); - number = Integer.toString(value); - } - appendNumber(number); - } - - /** - * Format the value as hexadecimal (into {@link #result}), with the option of using upper-case - * or lower-case letters. The options for mandatory sign and for the presence of a base-prefix - * "0x" or "0X" are dealt with by reference to the format specification. - * - * @param value to convert - * @param upper if the hexadecimal should be upper case - */ - void format_x(int value, boolean upper) { - String base = upper ? "0X" : "0x"; - String number; - if (value < 0) { - // Negative value: deal with sign and base, and convert magnitude. - negativeSign(base); - number = Integer.toHexString(-value); - } else { - // Positive value: deal with sign, base and magnitude. - positiveSign(base); - number = Integer.toHexString(value); - } - // Append to result, case-shifted if necessary. - if (upper) { - number = number.toUpperCase(); - } - appendNumber(number); - } - - /** - * Format the value as octal (into {@link #result}). The options for mandatory sign and for the - * presence of a base-prefix "0o" are dealt with by reference to the format specification. - * - * @param value to convert - */ - void format_o(int value) { - String base = "0o"; - String number; - if (value < 0) { - // Negative value: deal with sign and base, and convert magnitude. - negativeSign(base); - number = Integer.toOctalString(-value); - } else { - // Positive value: deal with sign, base and magnitude. - positiveSign(base); - number = Integer.toOctalString(value); - } - // Append to result. - appendNumber(number); - } - - /** - * Format the value as binary (into {@link #result}). The options for mandatory sign and for the - * presence of a base-prefix "0b" are dealt with by reference to the format specification. - * - * @param value to convert - */ - void format_b(int value) { - String base = "0b"; - String number; - if (value < 0) { - // Negative value: deal with sign and base, and convert magnitude. - negativeSign(base); - number = Integer.toBinaryString(-value); - } else { - // Positive value: deal with sign, base and magnitude. - positiveSign(base); - number = Integer.toBinaryString(value); - } - // Append to result. - appendNumber(number); - } - - /** - * Format the value as a character (into {@link #result}). - * - * @param value to convert - */ - void format_c(int value) { - // Limit is 256 if we're formatting for byte output, unicode range otherwise. - int limit = bytes ? 256 : PySystemState.maxunicode + 1; - if (value < 0 || value >= limit) { - throw Py.OverflowError("%c arg not in range(0x" + Integer.toHexString(limit) + ")"); - } else { - result.appendCodePoint(value); - } - } /** * Append to {@link #result} buffer a sign (if one is specified for positive numbers) and, in @@ -586,53 +405,12 @@ private static final String toBinaryString(BigInteger value) { return signum < 0 ? "-" + result : result; } - /** Format specification used by bin(). */ - public static final Spec BIN = InternalFormat.fromText("#b"); - - /** Format specification used by oct(). */ - public static final Spec OCT = InternalFormat.fromText("#o"); - - /** Format specification used by hex(). */ - public static final Spec HEX = InternalFormat.fromText("#x"); - - /** - * Convert the object to binary according to the conventions of Python built-in - * bin(). The object's __index__ method is called, and is responsible for raising - * the appropriate error (which the base {@link PyObject#__index__()} does). - * - * @param number to convert - * @return PyString converted result - */ - // Follow this pattern in Python 3, where objects no longer have __hex__, __oct__ members. - public static PyString bin(PyObject number) { - return formatNumber(number, BIN); - } - - /** - * Convert the object according to the conventions of Python built-in hex(), or - * oct(). The object's __index__ method is called, and is responsible - * for raising the appropriate error (which the base {@link PyObject#__index__()} does). - * - * @param number to convert - * @return PyString converted result - */ - public static PyString formatNumber(PyObject number, Spec spec) { - number = number.__index__(); - IntegerFormatter f = new IntegerFormatter(spec); - if (number instanceof PyInteger) { - f.format(((PyInteger)number).getValue()); - } else { - f.format(((PyLong)number).getValue()); - } - return new PyString(f.getResult()); - } - /** * A minor variation on {@link IntegerFormatter} to handle "traditional" %-formatting. The * difference is in support for spec.precision, the formatting octal in "alternate" * mode (0 and 0123, not 0o0 and 0o123), and in c-format (in the error logic). */ - public static class Traditional extends IntegerFormatter { + public abstract static class Traditional extends IntegerFormatter { /** * Construct the formatter from a client-supplied buffer, to which the result will be @@ -683,65 +461,23 @@ void format_o(BigInteger value) { * Format the value as a character (into {@link #result}). * * @param value to convert + * @throws FormatOverflow if {@code value} out of range */ @Override - void format_c(BigInteger value) { + void format_c(BigInteger value) throws FormatOverflow { if (value.signum() < 0) { - throw Py.OverflowError("unsigned byte integer is less than minimum"); + throw new FormatOverflow("unsigned byte integer is less than minimum"); } else { // Limit is 256 if we're formatting for byte output, unicode range otherwise. BigInteger limit = bytes ? LIMIT_BYTE : LIMIT_UNICODE; if (value.compareTo(limit) >= 0) { - throw Py.OverflowError("unsigned byte integer is greater than maximum"); + throw new FormatOverflow("unsigned byte integer is greater than maximum"); } else { result.appendCodePoint(value.intValue()); } } } - /** - * Format the value as octal (into {@link #result}). The options for mandatory sign and for - * the presence of a base-prefix "0" are dealt with by reference to the format - * specification. - * - * @param value to convert - */ - @Override - void format_o(int value) { - String number; - if (value < 0) { - // Negative value: deal with sign and convert magnitude. - negativeSign(null); - number = Integer.toOctalString(-value); - } else { - // Positive value: deal with sign, base and magnitude. - positiveSign(null); - number = Integer.toOctalString(value); - } - // Append to result. - appendOctalNumber(number); - } - - /** - * Format the value as a character (into {@link #result}). - * - * @param value to convert - */ - @Override - void format_c(int value) { - if (value < 0) { - throw Py.OverflowError("unsigned byte integer is less than minimum"); - } else { - // Limit is 256 if we're formatting for byte output, unicode range otherwise. - int limit = bytes ? 256 : PySystemState.maxunicode + 1; - if (value >= limit) { - throw Py.OverflowError("unsigned byte integer is greater than maximum"); - } else { - result.appendCodePoint(value); - } - } - } - /** * Append a string (number) to {@link #result}, but insert leading zeros first in order * that, on return, the whole-part length #lenWhole should be no less than the precision. diff --git a/src/org/python/core/stringlib/InternalFormat.java b/core/src/main/java/org/python/core/stringlib/InternalFormat.java similarity index 50% rename from src/org/python/core/stringlib/InternalFormat.java rename to core/src/main/java/org/python/core/stringlib/InternalFormat.java index 4e6111111..eecdeee97 100644 --- a/src/org/python/core/stringlib/InternalFormat.java +++ b/core/src/main/java/org/python/core/stringlib/InternalFormat.java @@ -1,12 +1,13 @@ -// Copyright (c) Jython Developers +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. package org.python.core.stringlib; -import org.python.core.Py; -import org.python.core.PyException; -import org.python.core.PyObject; -import org.python.core.PyString; -import org.python.core.PyUnicode; +import org.python.core.PyObjectUtil.NoConversion; +/** + * Static methods and classes for representing a format string and + * building the formatted output. + */ public class InternalFormat { /** @@ -14,88 +15,90 @@ public class InternalFormat { * * @param text to parse * @return parsed equivalent to text + * @throws IllegalArgumentException on a parse error */ - public static Spec fromText(String text) { + public static Spec fromText(String text) throws IllegalArgumentException { Parser parser = new Parser(text); - try { - return parser.parse(); - } catch (IllegalArgumentException e) { - throw Py.ValueError(e.getMessage()); - } - } - - /** - * Create a {@link Spec} object by parsing a format specification, supplied as an object. - * - * @param text to parse - * @return parsed equivalent to text - */ - public static Spec fromText(PyObject text, String method) { - if (text instanceof PyString) { - return fromText(((PyString)text).getString()); - } else { - throw Py.TypeError(method + " requires str or unicode"); - } + return parser.parse(); } /** - * A class that provides the base for implementations of type-specific formatting. In a limited - * way, it acts like a StringBuilder to which text and one or more numbers may be appended, - * formatted according to the format specifier supplied at construction. These are ephemeral - * objects that are not, on their own, thread safe. + * A class that provides the base for implementations of + * type-specific formatting. In a limited way, it acts like a + * StringBuilder to which text and one or more numbers may be + * appended, formatted according to the format specifier supplied at + * construction. These are ephemeral objects that are not, on their + * own, thread safe. */ - public static class Formatter implements Appendable { + public static abstract class AbstractFormatter implements Appendable { - /** The specification according to which we format any number supplied to the method. */ + /** + * The specification according to which we format any number + * supplied to the method. + */ protected final Spec spec; + /** The (partial) result. */ protected StringBuilder result; /** - * Signals the client's intention to make a PyString (or other byte-like) interpretation of - * {@link #result}, rather than a PyUnicode one. + * Signals the client's intention to make a {@code bytes} (or other + * byte-like) interpretation of {@link #result}, rather than a + * {@code String} one. */ + // XXX Needs a re-think for Jython 3: specialised adapter? protected boolean bytes; - /** The start of the formatted data for padding purposes, <={@link #start} */ + /** + * The start of the formatted data for padding purposes, + * <={@link #start} + */ protected int mark; - /** The latest number we are working on floats at the end of the result, and starts here. */ + /** + * The latest number we are working on floats at the end of the + * result, and starts here. + */ protected int start; - /** If it contains no sign, this length is zero, and >0 otherwise. */ + /** + * If it contains no sign, this length is zero, and >0 otherwise. + */ protected int lenSign; - /** The length of the whole part (to left of the decimal point or exponent) */ + /** + * The length of the whole part (to left of the decimal point or + * exponent) + */ protected int lenWhole; /** - * Construct the formatter from a client-supplied buffer and a specification. Sets - * {@link #mark} and {@link #start} to the end of the buffer. The new formatted object will - * therefore be appended there and, when the time comes, padding will be applied to (just) - * the new text. + * Construct the formatter from a client-supplied buffer and a + * specification. Sets {@link #mark} and {@link #start} to the end + * of the buffer. The new formatted object will therefore be + * appended there and, when the time comes, padding will be applied + * to (just) the new text. * * @param result destination buffer * @param spec parsed conversion specification */ - public Formatter(StringBuilder result, Spec spec) { + public AbstractFormatter(StringBuilder result, Spec spec) { this.spec = spec; this.result = result; this.start = this.mark = result.length(); } /** - * Construct the formatter from a specification and initial buffer capacity. Sets - * {@link #mark} to the end of the buffer. + * Construct the formatter from a specification and initial buffer + * capacity. Sets {@link #mark} to the end of the buffer. * * @param spec parsed conversion specification * @param width of buffer initially */ - public Formatter(Spec spec, int width) { - this(new StringBuilder(width), spec); - } + public AbstractFormatter(Spec spec, int width) { this(new StringBuilder(width), spec); } /** - * Signals the client's intention to make a PyString (or other byte-like) interpretation of - * {@link #result}, rather than a PyUnicode one. Only formatters that could produce - * characters >255 are affected by this (e.g. c-format). Idiom: + * Signals the client's intention to make a PyString (or other + * byte-like) interpretation of {@link #result}, rather than a + * PyUnicode one. Only formatters that could produce characters + * >255 are affected by this (e.g. c-format). Idiom: * *

          * MyFormatter f = new MyFormatter( InternalFormatter.fromText(formatSpec) );
@@ -104,115 +107,126 @@ public Formatter(Spec spec, int width) {
          * return f.getPyResult();
          * 
* - * @param bytes true to signal the intention to make a byte-like interpretation + * @param bytes true to signal the intention to make a byte-like + * interpretation */ - public void setBytes(boolean bytes) { - this.bytes = bytes; - } + @Deprecated + public void setBytes(boolean bytes) { this.bytes = bytes; } /** * Whether initialised for a byte-like interpretation. * * @return bytes attribute */ - public boolean isBytes() { - return bytes; - } + @Deprecated + public boolean isBytes() { return bytes; } /** - * Current (possibly final) result of the formatting, as a String. + * Format an object into the buffer according to the specification + * with which this instance has been initialised. The specific + * behaviour, and the acceptable types, are for a sub-class to + * define. Also, it is usually desirable for the return type in an + * implementing class to be a more specific. * - * @return formatted result + * @param o to format + * @return this formatter + * @throws NoConversion if {@code o} is not convertible + * @throws FormatError from the process of formatting */ - public String getResult() { - return result.toString(); - } + public abstract AbstractFormatter format(Object o) throws NoConversion, FormatError; /** - * Convenience method to return the current result of the formatting, as a - * PyObject, either {@link PyString} or {@link PyUnicode} according to - * {@link #bytes}. + * Current (possibly final) result of the formatting, as a + * {@code String}. * * @return formatted result */ - public PyString getPyResult() { - String r = getResult(); - if (bytes) { - return new PyString(r); - } else { - return new PyUnicode(r); - } - } + public String getResult() { return result.toString(); } /* - * Implement Appendable interface by delegation to the result buffer. - * - * @see java.lang.Appendable#append(char) + * Implement Appendable interface by delegation to the result + * buffer. See java.lang.Appendable#append(char) */ @Override - public Formatter append(char c) { + public AbstractFormatter append(char c) { result.append(c); return this; } + public AbstractFormatter append(int c) { + result.appendCodePoint(c); + return this; + } + @Override - public Formatter append(CharSequence csq) { + public AbstractFormatter append(CharSequence csq) { result.append(csq); return this; } @Override - public Formatter append(CharSequence csq, int start, int end) // + public AbstractFormatter append(CharSequence csq, int start, int end) throws IndexOutOfBoundsException { result.append(csq, start, end); return this; } /** - * Clear the instance variables describing the latest object in {@link #result}, ready to - * receive a new one: sets {@link #start} and calls {@link #reset()}. This is necessary when - * a Formatter is to be re-used. Note that this leaves {@link #mark} where it - * is. In the core, we need this to support complex: two floats in the same - * format, but padded as a unit. + * Clear the instance variables describing the latest object in + * {@link #result}, ready to receive a new one: sets {@link #start} + * and calls {@link #reset()}. This is necessary when a + * {@code Formatter} is to be re-used. Note that this leaves + * {@link #mark} where it is. In the core, we need this to support + * {@code complex}: two floats in the same format, but padded as a + * unit. */ public void setStart() { - // The new value will float at the current end of the result buffer. + /* + * The new value will float at the current end of the result buffer. + */ start = result.length(); - // If anything has been added since construction, reset all state. + /* + * If anything has been added since construction, reset all state. + */ if (start > mark) { - // Clear the variable describing the latest number in result. + /* + * Clear the variable describing the latest number in result. + */ reset(); } } /** - * Clear the instance variables describing the latest object in {@link #result}, ready to - * receive a new one. This is called from {@link #setStart()}. Subclasses override this - * method and call {@link #setStart()} at the start of their format method. + * Clear the instance variables describing the latest object in + * {@link #result}, ready to receive a new one. This is called from + * {@link #setStart()}. Subclasses override this method and call + * {@link #setStart()} at the start of their format method. */ protected void reset() { - // Clear the variables describing the latest object in result. + /* + * Clear the variables describing the latest object in result. + */ lenSign = lenWhole = 0; } /** - * Supports {@link #toString()} by returning the lengths of the successive sections in the - * result buffer, used for navigation relative to {@link #start}. The toString - * method shows a '|' character between each section when it prints out the buffer. Override - * this when you define more lengths in the subclass. + * Supports {@link #toString()} by returning the lengths of the + * successive sections in the result buffer, used for navigation + * relative to {@link #start}. The {@code toString} method shows a + * '|' character between each section when it prints out the buffer. + * Override this when you define more lengths in the subclass. * * @return the lengths of the successive sections */ - protected int[] sectionLengths() { - return new int[] {lenSign, lenWhole}; - } + protected int[] sectionLengths() { return new int[] {lenSign, lenWhole}; } /** * {@inheritDoc} *

- * Overridden to provide a debugging view in which the actual text is shown divided up by - * the len* member variables. If the dividers don't look right, those variables - * have not remained consistent with the text. + * Overridden to provide a debugging view in which the actual text + * is shown divided up by the {@code len*} member variables. If the + * dividers don't look right, those variables have not remained + * consistent with the text. */ @Override public String toString() { @@ -230,38 +244,51 @@ public String toString() { } buf.setCharAt(p - 1, ']'); } catch (IndexOutOfBoundsException e) { - // Some length took us beyond the end of the result buffer. Pass. + /* + * Some length took us beyond the end of the result buffer. Pass. + */ } return buf.toString(); } } /** - * Insert grouping characters (conventionally commas) into the whole part of the number. - * {@link #lenWhole} will increase correspondingly. + * Insert grouping characters (conventionally commas) into the whole + * part of the number. {@link #lenWhole} will increase + * correspondingly. * * @param groupSize normally 3. * @param comma or some other character to use as a separator. */ protected void groupDigits(int groupSize, char comma) { - // Work out how many commas (or whatever) it takes to group the whole-number part. + /* + * Work out how many commas (or whatever) it takes to group the + * whole-number part. + */ int commasNeeded = (lenWhole - 1) / groupSize; if (commasNeeded > 0) { - // Index *just after* the current last digit of the whole part of the number. + /* + * Index *just after* the current last digit of the whole part of + * the number. + */ int from = start + lenSign + lenWhole; // Open a space into which the whole part will expand. makeSpaceAt(from, commasNeeded); // Index *just after* the end of that space. int to = from + commasNeeded; - // The whole part will be longer by the number of commas to be inserted. + /* + * The whole part will be longer by the number of commas to be + * inserted. + */ lenWhole += commasNeeded; /* - * Now working from high to low, copy all the digits that have to move. Each pass - * copies one group and inserts a comma, which makes the to-pointer move one place - * extra. The to-pointer descends upon the from-pointer from the right. + * Now working from high to low, copy all the digits that have to + * move. Each pass copies one group and inserts a comma, which makes + * the to-pointer move one place extra. The to-pointer descends upon + * the from-pointer from the right. */ while (to > from) { // Copy a group @@ -275,9 +302,10 @@ protected void groupDigits(int groupSize, char comma) { } /** - * Make a space in {@link #result} of a certain size and position. On return, the segment - * lengths are likely to be invalid until the caller adjusts them corresponding to the - * insertion. There is no guarantee what the opened space contains. + * Make a space in {@link #result} of a certain size and position. + * On return, the segment lengths are likely to be invalid until the + * caller adjusts them corresponding to the insertion. There is no + * guarantee what the opened space contains. * * @param pos at which to make the space * @param size of the space @@ -285,7 +313,9 @@ protected void groupDigits(int groupSize, char comma) { protected void makeSpaceAt(int pos, int size) { int n = result.length(); if (pos < n) { - // Space is not at the end: must copy what's to the right of pos. + /* + * Space is not at the end: must copy what's to the right of pos. + */ String tail = result.substring(pos); result.setLength(n + size); result.replace(pos + size, n + size, tail); @@ -296,8 +326,8 @@ protected void makeSpaceAt(int pos, int size) { } /** - * Convert letters in the representation of the current number (in {@link #result}) to upper - * case. + * Convert letters in the representation of the current number (in + * {@link #result}) to upper case. */ protected void uppercase() { int end = result.length(); @@ -308,15 +338,18 @@ protected void uppercase() { } /** - * Pad the result so far (defined as the contents of {@link #result} from {@link #mark} to - * the end) using the alignment, target width and fill character defined in {@link #spec}. - * The action of padding will increase the length of this segment to the target width, if - * that is greater than the current length. + * Pad the result so far (defined as the contents of {@link #result} + * from {@link #mark} to the end) using the alignment, target width + * and fill character defined in {@link #spec}. The action of + * padding will increase the length of this segment to the target + * width, if that is greater than the current length. *

- * When the padding method has decided that that it needs to add n padding characters, it - * will affect {@link #start} or {@link #lenWhole} as follows. - * - * + * When the padding method has decided that that it needs to add n + * padding characters, it will affect {@link #start} or + * {@link #lenWhole} as follows. + *
Effect of padding on {@link #start} or {@link #lenWhole}
+ * * * * @@ -353,39 +386,49 @@ protected void uppercase() { * * *
Effect of padding on {@link #start} or + * {@link #lenWhole}
alignmeaning+n
- * Note that in the "pad after sign" mode, only the last number into the buffer receives the - * padding. This padding gets incorporated into the whole part of the number. (In other - * modes, the padding is around result[mark:].) When this would not be - * appropriate, it is up to the client to disallow this (which complex does). + * Note that in the "pad after sign" mode, only the last number into + * the buffer receives the padding. This padding gets incorporated + * into the whole part of the number. (In other modes, the padding + * is around {@code result[mark:]}.) When this would not be + * appropriate, it is up to the client to disallow this (which + * {@code complex} does). * * @return this Formatter object */ - public Formatter pad() { - // We'll need this many pad characters (if>0). Note Spec.UNDEFINED<0. + public AbstractFormatter pad() { + /* + * We'll need this many pad characters (if>0). Note + * Spec.UNDEFINED<0. + */ int n = spec.width - (result.length() - mark); - if (n > 0) { - pad(mark, n); - } + if (n > 0) { pad(mark, n); } return this; } /** - * Pad the last result (defined as the contents of {@link #result} from argument - * leftIndex to the end) using the alignment, by n repetitions of - * the fill character defined in {@link #spec}, and distributed according to - * spec.align. The value of leftIndex is only used if the - * alignment is '>' (left) or '^' (both). The value of the critical lengths (lenWhole, - * lenSign, etc.) are not affected, because we assume that leftIndex <= - * {@link #start}. + * Pad the last result (defined as the contents of {@link #result} + * from argument {@code leftIndex} to the end) using the alignment, + * by {@code n} repetitions of the fill character defined in + * {@link #spec}, and distributed according to {@code spec.align}. + * The value of {@code leftIndex} is only used if the alignment is + * '>' (left) or '^' (both). The value of the critical lengths + * (lenWhole, lenSign, etc.) are not affected, because we assume + * that {@code leftIndex <= } {@link #start}. * - * @param leftIndex the index in result at which to insert left-fill characters. + * @param leftIndex the index in result at which to insert left-fill + * characters. * @param n number of fill characters to insert. */ protected void pad(int leftIndex, int n) { - char align = spec.getAlign('>'); // Right for numbers (strings will supply '<' align) + char align = spec.getAlign('>'); + /* + * Right for numbers (strings will supply '<' align) + */ char fill = spec.getFill(' '); - // Start by assuming padding is all leading ('>' case or '=') + /* + * Start by assuming padding is all leading ('>' case or '=') + */ int leading = n; // Split the total padding according to the alignment @@ -407,33 +450,35 @@ protected void pad(int leftIndex, int n) { leftIndex = start + lenSign; lenWhole += leading; } else { - // Default is to insert at the stated leftIndex <= start. + /* + * Default is to insert at the stated leftIndex <= start. + */ start += leading; } makeSpaceAt(leftIndex, leading); - for (int i = 0; i < leading; i++) { - result.setCharAt(leftIndex + i, fill); - } + for (int i = 0; i < leading; i++) { result.setCharAt(leftIndex + i, fill); } } // Append the trailing space - for (int i = 0; i < trailing; i++) { - result.append(fill); - } + for (int i = 0; i < trailing; i++) { result.append(fill); } // Check for special case if (align == '=' && fill == '0' && spec.grouping) { - // We must extend the grouping separator into the padding + /* + * We must extend the grouping separator into the padding + */ zeroPadAfterSignWithGroupingFixup(3, ','); } } /** - * Fix-up the zero-padding of the last formatted number in {@link #result} in the special - * case where a sign-aware padding ({@link #spec}.align='=') was requested, the - * fill character is '0', and the digits are to be grouped. In these exact - * circumstances, the grouping, which must already have been applied to the (whole part) - * number itself, has to be extended into the zero-padding. + * Fix-up the zero-padding of the last formatted number in + * {@link #result} in the special case where a sign-aware padding + * ({@code {@link #spec}.align='='}) was requested, the fill + * character is {@code '0'}, and the digits are to be grouped. In + * these exact circumstances, the grouping, which must already have + * been applied to the (whole part) number itself, has to be + * extended into the zero-padding. * *

          * >>> format(-12e8, " =30,.3f")
@@ -448,9 +493,9 @@ protected void pad(int leftIndex, int n) {
          * '-0,000,000,001,200,000,000.000'
          * 
* - * The padding has increased the overall length of the result to the target width. About one - * in three calls to this method adds one to the width, because the whole part cannot start - * with a comma. + * The padding has increased the overall length of the result to the + * target width. About one in three calls to this method adds one to + * the width, because the whole part cannot start with a comma. * *
          * >>> format(-12e8, " =30,.4f")
@@ -464,31 +509,38 @@ protected void pad(int leftIndex, int n) {
          */
         protected void zeroPadAfterSignWithGroupingFixup(int groupSize, char comma) {
             /*
-             * Suppose the format call was format(-12e8, "0=30,.3f"). At this point, we have
-             * something like this in result: .. [-|0000000000001,200,000,000|.|000||]
+             * Suppose the format call was format(-12e8, "0=30,.3f"). At this
+             * point, we have something like this in result: ..
+             * [-|0000000000001,200,000,000|.|000||]
              *
-             * All we need do is over-write some of the zeros with the separator comma, in the
-             * portion marked as the whole-part: [-|0,000,000,001,200,000,000|.|000||]
+             * All we need do is over-write some of the zeros with the separator
+             * comma, in the portion marked as the whole-part:
+             * [-|0,000,000,001,200,000,000|.|000||]
              */
 
             // First digit of the whole-part.
             int firstZero = start + lenSign;
             // One beyond last digit of the whole-part.
             int p = firstZero + lenWhole;
-            // Step back down the result array visiting the commas. (Easiest to do all of them.)
+            /*
+             * Step back down the result array visiting the commas. (Easiest to
+             * do all of them.)
+             */
             int step = groupSize + 1;
-            for (p = p - step; p >= firstZero; p -= step) {
-                result.setCharAt(p, comma);
-            }
+            for (p = p - step; p >= firstZero; p -= step) { result.setCharAt(p, comma); }
 
-            // Sometimes the last write was exactly at the first padding zero.
+            /*
+             * Sometimes the last write was exactly at the first padding zero.
+             */
             if (p + step == firstZero) {
                 /*
-                 * Suppose the format call was format(-12e8, "0=30,.4f"). At the beginning, we had
-                 * something like this in result: . [-|000000000001,200,000,000|.|0000||]
+                 * Suppose the format call was format(-12e8, "0=30,.4f"). At the
+                 * beginning, we had something like this in result: .
+                 * [-|000000000001,200,000,000|.|0000||]
                  *
-                 * And now, result looks like this: [-|,000,000,001,200,000,000|.|0000||] in which
-                 * the first comma is wrong, but so would be a zero. We have to insert another zero,
+                 * And now, result looks like this:
+                 * [-|,000,000,001,200,000,000|.|0000||] in which the first comma is
+                 * wrong, but so would be a zero. We have to insert another zero,
                  * even though this makes the result longer than we were asked for.
                  */
                 result.insert(firstZero, '0');
@@ -497,115 +549,144 @@ protected void zeroPadAfterSignWithGroupingFixup(int groupSize, char comma) {
         }
 
         /**
-         * Convenience method returning a {@link Py#ValueError} reporting:
-         * 

- * "Unknown format code '"+code+"' for object of type '"+forType+"'" + * Convenience method returning a {@link FormatError} reporting: + * "Unknown format code 'X' for object of type 'T'" * * @param code the presentation type * @param forType the type it was found applied to * @return exception to throw */ - public static PyException unknownFormat(char code, String forType) { + public static FormatError unknownFormat(char code, String forType) { String msg = "Unknown format code '" + code + "' for object of type '" + forType + "'"; - return Py.ValueError(msg); + return new FormatError(msg); } /** - * Convenience method returning a {@link Py#ValueError} reporting that alternate form is not - * allowed in a format specifier for the named type. + * Convenience method returning a {@link FormatError} reporting that + * alternate form is not allowed in a format specifier for the named + * type. * * @param forType the type it was found applied to * @return exception to throw */ - public static PyException alternateFormNotAllowed(String forType) { + public static FormatError alternateFormNotAllowed(String forType) { return alternateFormNotAllowed(forType, '\0'); } /** - * Convenience method returning a {@link Py#ValueError} reporting that alternate form is not - * allowed in a format specifier for the named type and specified typoe code. + * Convenience method returning a {@link FormatError} reporting that + * alternate form is not allowed in a format specifier for the named + * type and specified typoe code. * * @param forType the type it was found applied to * @param code the formatting code (or '\0' not to mention one) * @return exception to throw */ - public static PyException alternateFormNotAllowed(String forType, char code) { - return notAllowed("Alternate form (#)", forType, code); + public static FormatError alternateFormNotAllowed(String forType, char code) { + return notAllowed("Alternate form", '#', forType, code); } /** - * Convenience method returning a {@link Py#ValueError} reporting that the given alignment - * flag is not allowed in a format specifier for the named type. + * Convenience method returning a {@link FormatError} reporting that + * the given alignment flag is not allowed in a format specifier for + * the named type. * * @param align type of alignment * @param forType the type it was found applied to * @return exception to throw */ - public static PyException alignmentNotAllowed(char align, String forType) { - return notAllowed("'" + align + "' alignment flag", forType, '\0'); + public static FormatError alignmentNotAllowed(char align, String forType) { + return notAllowed("'" + align + "' alignment flag", forType); } /** - * Convenience method returning a {@link Py#ValueError} reporting that specifying a sign is - * not allowed in a format specifier for the named type. + * Convenience method returning a {@link FormatError} reporting that + * specifying a sign is not allowed in a format specifier for the + * named type. * * @param forType the type it was found applied to * @param code the formatting code (or '\0' not to mention one) * @return exception to throw */ - public static PyException signNotAllowed(String forType, char code) { - return notAllowed("Sign", forType, code); + public static FormatError signNotAllowed(String forType, char code) { + return notAllowed("Sign", '\0', forType, code); } /** - * Convenience method returning a {@link Py#ValueError} reporting that specifying a - * precision is not allowed in a format specifier for the named type. + * Convenience method returning a {@link FormatError} reporting that + * zero padding is not allowed in a format specifier for the named + * type. * * @param forType the type it was found applied to * @return exception to throw */ - public static PyException precisionNotAllowed(String forType) { - return notAllowed("Precision", forType, '\0'); + public static FormatError zeroPaddingNotAllowed(String forType) { + return notAllowed("Zero padding is", forType); } /** - * Convenience method returning a {@link Py#ValueError} reporting that zero padding is not - * allowed in a format specifier for the named type. + * Convenience method returning a {@link FormatError} reporting that + * specifying a precision is not allowed in a format specifier for + * the named type. * * @param forType the type it was found applied to * @return exception to throw */ - public static PyException zeroPaddingNotAllowed(String forType) { - return notAllowed("Zero padding", forType, '\0'); + public static FormatError precisionNotAllowed(String forType) { + return notAllowed("Precision", forType); } /** - * Convenience method returning a {@link Py#ValueError} reporting that some format specifier - * feature is not allowed for the named data type. + * Convenience method returning a {@link FormatError} reporting that + * some format specifier feature is not allowed for the named data + * type. * * @param outrage committed in the present case - * @param forType the data type (e.g. "integer") it where it is an outrage + * @param forType the data type (e.g. "integer") where it is an + * outrage * @return exception to throw */ - public static PyException notAllowed(String outrage, String forType) { - return notAllowed(outrage, forType, '\0'); + public static FormatError notAllowed(String outrage, String forType) { + return notAllowed(outrage, '\0', forType, '\0'); } /** - * Convenience method returning a {@link Py#ValueError} reporting that some format specifier - * feature is not allowed for the named format code and data type. Produces a message like: - *

- * outrage+" not allowed with "+forType+" format specifier '"+code+"'" - *

- * outrage+" not allowed in "+forType+" format specifier" + * Convenience method returning a {@link FormatError} reporting that + * some format specifier feature is not allowed for the named data + * type and format specifier {@code code}. + * + * @param outrage committed in the present case + * @param forType the data type (e.g. "integer") where it is an + * outrage + * @param code the formatting code for which it is an outrage (or + * '\0' not to mention one) + * @return exception to throw + */ + public static FormatError notAllowed(String outrage, String forType, char code) { + return notAllowed(outrage, '\0', forType, code); + } + + /** + * Convenience method returning a {@link FormatError} reporting that + * some format specifier feature is not allowed for the named format + * code and data type. Produces a message like "OUTRAGE (SYMBOL) not + * allowed with TYPE format specifier 'X'" or "OUTRAGE not allowed + * in TYPE format specifier". * * @param outrage committed in the present case - * @param forType the data type (e.g. "integer") it where it is an outrage - * @param code the formatting code for which it is an outrage (or '\0' not to mention one) + * @param symbol the modifier ('#' say) (or '\0' not to mention one) + * @param forType the data type (e.g. "integer") it where it is an + * outrage + * @param code the formatting code for which it is an outrage (or + * '\0' not to mention one) * @return exception to throw */ - public static PyException notAllowed(String outrage, String forType, char code) { + public static FormatError notAllowed(String outrage, char symbol, String forType, + char code) { // Try really hard to be like CPython + // We may mention a symbol like # or , + String symbolAsString = symbol == 0 ? "" : " (" + symbol + ")"; + // We may mention the type, and this changes the sentence. String codeAsString, withOrIn; if (code == 0) { withOrIn = "in "; @@ -614,41 +695,41 @@ public static PyException notAllowed(String outrage, String forType, char code) withOrIn = "with "; codeAsString = " '" + code + "'"; } - String msg = - outrage + " not allowed " + withOrIn + forType + " format specifier" - + codeAsString; - return Py.ValueError(msg); + String msg = outrage + symbolAsString + " not allowed " + withOrIn + forType + + " format specifier" + codeAsString; + return new FormatError(msg); } /** - * Convenience method returning a {@link Py#OverflowError} reporting: - *

- * "formatted "+type+" is too long (precision too large?)" + * Convenience method returning a {@link FormatOverflow} reporting: + * "formatted TYPE is too long (precision too large?)". * * @param type of formatting ("integer", "float") * @return exception to throw */ - public static PyException precisionTooLarge(String type) { + public static FormatOverflow precisionTooLarge(String type) { String msg = "formatted " + type + " is too long (precision too large?)"; - return Py.OverflowError(msg); + return new FormatOverflow(msg); } - } /** - * Parsed PEP-3101 format specification of a single field, encapsulating the format for use by - * formatting methods. This class holds the several attributes that might be decoded from a - * format specifier. Each attribute has a reserved value used to indicate "unspecified". - * Spec objects may be merged such that one Spec provides values, - * during the construction of a new Spec, for attributes that are unspecified in a + * Parsed PEP-3101 format specification of a single field, + * encapsulating the format for use by formatting methods. This + * class holds the several attributes that might be decoded from a + * format specifier. Each attribute has a reserved value used to + * indicate "unspecified". {@code Spec} objects may be merged such + * that one {@code Spec} provides values, during the construction of + * a new {@code Spec}, for attributes that are unspecified in a * primary source. *

- * This structure is returned by factory method {@link #fromText(String)}, and having public - * final members is freely accessed by formatters such as {@link FloatFormatter}, and the - * __format__ methods of client object types. + * This structure is returned by factory method + * {@link #fromText(String)}, and having public final members is + * freely accessed by formatters such as {@link FloatFormatter}, and + * the __format__ methods of client object types. *

- * The fields correspond to the elements of a format specification. The grammar of a format - * specification is: + * The fields correspond to the elements of a format specification. + * The grammar of a format specification is: * *

      * [[fill]align][sign][#][0][width][,][.precision][type]
@@ -670,27 +751,39 @@ public static class Spec {
 
         /** The fill character specified, or U+FFFF if unspecified. */
         public final char fill;
-        /** Alignment indicator is one of {'<', '^', '>', '=', or U+FFFF if unspecified. */
-        public final char align;
         /**
-         * Sign-handling flag, one of '+', '-', or ' ', or
+         * Alignment indicator is one of {'<', '^', '>', '=', or
          * U+FFFF if unspecified.
          */
+        public final char align;
+        /**
+         * Sign-handling flag, one of {@code '+'}, {@code '-'}, or
+         * {@code ' '}, or {@code U+FFFF} if unspecified.
+         */
         public final char sign;
         /** The alternative format flag '#' was given. */
         public final boolean alternate;
         /** Width to which to pad the result, or -1 if unspecified. */
         public final int width;
-        /** Insert the grouping separator (which in Python always indicates a group-size of 3). */
+        /**
+         * Insert the grouping separator (which in Python always indicates a
+         * group-size of 3).
+         */
         public final boolean grouping;
         /** Precision decoded from the format, or -1 if unspecified. */
         public final int precision;
         /** Type key from the format, or U+FFFF if unspecified. */
         public final char type;
 
-        /** Non-character code point used to represent "no value" in char attributes. */
+        /**
+         * Non-character code point used to represent "no value" in
+         * {@code char} attributes.
+         */
         public static final char NONE = '\uffff';
-        /** Negative value used to represent "no value" in int attributes. */
+        /**
+         * Negative value used to represent "no value" in {@code int}
+         * attributes.
+         */
         public static final int UNSPECIFIED = -1;
 
         /**
@@ -699,19 +792,16 @@ public static class Spec {
          * @param c attribute
          * @return true only if the attribute is not equal to {@link #NONE}
          */
-        public static final boolean specified(char c) {
-            return c != NONE;
-        }
+        public static final boolean specified(char c) { return c != NONE; }
 
         /**
          * Test to see if an attribute has been specified.
          *
          * @param value of attribute
-         * @return true only if the attribute is ≥0 (meaning that it has been specified).
+         * @return true only if the attribute is ≥0 (meaning that it has
+         *     been specified).
          */
-        public static final boolean specified(int value) {
-            return value >= 0;
-        }
+        public static final boolean specified(int value) { return value >= 0; }
 
         /**
          * Constructor to set all the fields in the format specifier.
@@ -721,9 +811,12 @@ public static final boolean specified(int value) {
          * 
* * @param fill fill character (or {@link #NONE} - * @param align alignment indicator, one of {'<', '^', '>', '='} - * @param sign policy, one of '+', '-', or ' '. - * @param alternate true to request alternate formatting mode ('#' flag). + * @param align alignment indicator, one of {'<', '^', '>', + * '='} + * @param sign policy, one of {@code '+'}, {@code '-'}, or + * {@code ' '}. + * @param alternate true to request alternate formatting mode + * ({@code '#'} flag). * @param width of field after padding or -1 to default * @param grouping true to request comma-separated groups * @param precision (e.g. decimal places) or -1 to default @@ -742,50 +835,39 @@ public Spec(char fill, char align, char sign, boolean alternate, int width, } /** - * Return a format specifier (text) equivalent to the value of this Spec. + * Return a format specifier (text) equivalent to the value of this + * Spec. */ @Override public String toString() { StringBuilder buf = new StringBuilder(); - if (specified(fill)) { - buf.append(fill); - } - if (specified(align)) { - buf.append(align); - } - if (specified(sign)) { - buf.append(sign); - } - if (alternate) { - buf.append('#'); - } - if (specified(width)) { - buf.append(width); - } - if (grouping) { - buf.append(','); - } - if (specified(precision)) { - buf.append('.').append(precision); - } - if (specified(type)) { - buf.append(type); - } + if (specified(fill)) { buf.append(fill); } + if (specified(align)) { buf.append(align); } + if (specified(sign)) { buf.append(sign); } + if (alternate) { buf.append('#'); } + if (specified(width)) { buf.append(width); } + if (grouping) { buf.append(','); } + if (specified(precision)) { buf.append('.').append(precision); } + if (specified(type)) { buf.append(type); } return buf.toString(); } /** - * Return a merged Spec object, in which any attribute of this object that is - * specified (or true), has the same value in the result, and any attribute of - * this object that is unspecified (or false), has the value that attribute - * takes in the other object. Thus the second object supplies default values. (These - * defaults may also be unspecified.) The use of this method is to allow a Spec - * constructed from text to record exactly, and only, what was in the textual specification, - * while the __format__ method of a client object supplies its type-specific defaults. Thus - * "20" means "<20s" to a str, ">20.12" to a float and - * ">20.12g" to a complex. + * Return a merged {@code Spec} object, in which any attribute of + * this object that is specified (or {@code true}), has the same + * value in the result, and any attribute of this object that is + * unspecified (or {@code false}), has the value that attribute + * takes in the other object. Thus the second object supplies + * default values. (These defaults may also be unspecified.) The use + * of this method is to allow a {@code Spec} constructed from text + * to record exactly, and only, what was in the textual + * specification, while the __format__ method of a client object + * supplies its type-specific defaults. Thus "20" means "<20s" to + * a {@code str}, ">20.12" to a {@code float} and ">20.12g" to + * a {@code complex}. * - * @param other defaults to merge where this object does not specify the attribute. + * @param other defaults to merge where this object does not specify + * the attribute. * @return a new Spec object. */ public Spec withDefaults(Spec other) { @@ -801,11 +883,15 @@ public Spec withDefaults(Spec other) { ); } - /** Defaults applicable to most numeric types. Equivalent to " >" */ + /** + * Defaults applicable to most numeric types. Equivalent to " >" + */ public static final Spec NUMERIC = new Spec(' ', '>', Spec.NONE, false, Spec.UNSPECIFIED, false, Spec.UNSPECIFIED, Spec.NONE); - /** Defaults applicable to string types. Equivalent to " <" */ + /** + * Defaults applicable to string types. Equivalent to " <" + */ public static final Spec STRING = new Spec(' ', '<', Spec.NONE, false, Spec.UNSPECIFIED, false, Spec.UNSPECIFIED, Spec.NONE); @@ -823,31 +909,48 @@ public Spec(int precision, char type) { this(' ', '>', Spec.NONE, false, UNSPECIFIED, false, precision, type); } - /** The alignment from the parsed format specification, or default. */ - public char getFill(char defaultFill) { - return specified(fill) ? fill : defaultFill; - } + /** + * The fill from the parsed format specification, or default. + * + * @param defaultFill to return if not specified + * @return fill to use + */ + public char getFill(char defaultFill) { return specified(fill) ? fill : defaultFill; } - /** The alignment from the parsed format specification, or default. */ + /** + * The alignment from the parsed format specification, or default. * + * + * @param defaultAlign to return if not specified + * @return alignment to use + */ public char getAlign(char defaultAlign) { return specified(align) ? align : defaultAlign; } - /** The precision from the parsed format specification, or default. */ + /** + * The precision from the parsed format specification, or default. + * + * @param defaultPrecision to return if not specified + * @return precision to use + */ public int getPrecision(int defaultPrecision) { return specified(precision) ? precision : defaultPrecision; } - /** The type code from the parsed format specification, or default supplied. */ - public char getType(char defaultType) { - return specified(type) ? type : defaultType; - } - + /** + * The type code from the parsed format specification, or default + * supplied. + * + * @param defaultType to return if not specified + * @return type code to use + */ + public char getType(char defaultType) { return specified(type) ? type : defaultType; } } /** - * Parser for PEP-3101 field format specifications. This class provides a {@link #parse()} - * method that translates the format specification into an Spec object. + * Parser for PEP-3101 field format specifications. This class + * provides a {@link #parse()} method that translates the format + * specification into an {@code Spec} object. */ private static class Parser { @@ -855,8 +958,8 @@ private static class Parser { private int ptr; /** - * Constructor simply holds the specification string ahead of the {@link #parse()} - * operation. + * Constructor simply holds the specification string ahead of the + * {@link #parse()} operation. * * @param spec format specifier to parse (e.g. "<+12.3f") */ @@ -866,17 +969,21 @@ private static class Parser { } /** - * Parse the specification with which this object was initialised into an {@link Spec}, - * which is an object encapsulating the format for use by formatting methods. This parser - * deals only with the format specifiers themselves, as accepted by the - * __format__ method of a type, or the format() built-in, not - * format strings in general as accepted by str.format(). + * Parse the specification with which this object was initialised + * into an {@link Spec}, which is an object encapsulating the format + * for use by formatting methods. This parser deals only with the + * format specifiers themselves, as accepted by the + * {@code __format__} method of a type, or the {@code format()} + * built-in, not format strings in general as accepted by + * {@code str.format()}. * - * @return the Spec equivalent to the string given. + * @return the {@code Spec} equivalent to the string given. */ /* - * This method is the equivalent of CPython's parse_internal_render_format_spec() in - * ~/Objects/stringlib/formatter.h, but we deal with defaults another way. + * This method is the equivalent of CPython's + * parse_internal_render_format_spec() in + * ~/Objects/stringlib/formatter.h, but we deal with defaults + * another way. */ Spec parse() { @@ -897,35 +1004,39 @@ Spec parse() { fill = spec.charAt(0); align = spec.charAt(ptr++); } else { - // Second character is not alignment. We are still at square zero. + /* + * Second character is not alignment. We are still at square zero. + */ ptr = 0; } } // Scan [sign] ... - if (isAt("+- ")) { - sign = spec.charAt(ptr++); - } + if (isAt("+- ")) { sign = spec.charAt(ptr++); } // Scan [#] ... alternate = scanPast('#'); // Scan [0] ... if (scanPast('0')) { - // Accept 0 here as equivalent to zero-fill but only not set already. + /* + * Accept 0 here as equivalent to zero-fill but only not set + * already. + */ if (!Spec.specified(fill)) { fill = '0'; if (!Spec.specified(align)) { - // Also accept it as equivalent to "=" aligment but only not set already. + /* + * Also accept it as equivalent to "=" alignment but only not set + * already. + */ align = '='; } } } // Scan [width] - if (isDigit()) { - width = scanInteger(); - } + if (isDigit()) { width = scanInteger(); } // Scan [,][.precision][type] grouping = scanPast(','); @@ -940,20 +1051,21 @@ Spec parse() { } // Scan [type] - if (ptr < spec.length()) { - type = spec.charAt(ptr++); - } + if (ptr < spec.length()) { type = spec.charAt(ptr++); } // If we haven't reached the end, something is wrong if (ptr != spec.length()) { - throw new IllegalArgumentException("Invalid conversion specification"); + throw new IllegalArgumentException("Invalid format specifier"); } // Create a specification return new Spec(fill, align, sign, alternate, width, grouping, precision, type); } - /** Test that the next character is exactly the one specified, and advance past it if it is. */ + /** + * Test that the next character is exactly the one specified, and + * advance past it if it is. + */ private boolean scanPast(char c) { if (ptr < spec.length() && spec.charAt(ptr) == c) { ptr++; @@ -968,7 +1080,9 @@ private boolean isAt(String chars) { return ptr < spec.length() && (chars.indexOf(spec.charAt(ptr)) >= 0); } - /** Test that the next character is one of the alignment characters. */ + /** + * Test that the next character is one of the alignment characters. + */ private boolean isAlign() { return ptr < spec.length() && ("<^>=".indexOf(spec.charAt(ptr)) >= 0); } @@ -978,15 +1092,45 @@ private boolean isDigit() { return ptr < spec.length() && Character.isDigit(spec.charAt(ptr)); } - /** The current character is a digit (maybe a sign). Scan the integer, */ + /** + * The current character is a digit (maybe a sign). Scan the + * integer, + */ private int scanInteger() { int p = ptr++; - while (isDigit()) { - ptr++; - } + while (isDigit()) { ptr++; } return Integer.parseInt(spec.substring(p, ptr)); } } + /** + * An exception signifying a problem with a format string. The + * client code will normally convert this to a Python + * {@code ValueError}. + */ + /* + * In Jython 2 we threw a ValueError directly, but would like to + * keep the formatter pure from Python object types, + */ + public static class FormatError extends Exception { + private static final long serialVersionUID = 1L; + + public FormatError(String message) { super(message); } + } + + /** + * An exception signifying an overflow (or precision too large) + * during a formatting operation. The client code will normally + * convert this to a Python {@code OverflowError}. + */ + /* + * In Jython 2 we threw a OverflowError directly, but would like to + * keep the formatter pure from Python object types, + */ + public static class FormatOverflow extends FormatError { + private static final long serialVersionUID = 1L; + + public FormatOverflow(String message) { super(message); } + } } diff --git a/src/org/python/core/stringlib/MarkupIterator.java b/core/src/main/java/org/python/core/stringlib/MarkupIterator.java similarity index 52% rename from src/org/python/core/stringlib/MarkupIterator.java rename to core/src/main/java/org/python/core/stringlib/MarkupIterator.java index 2fb1783df..25f526d6b 100644 --- a/src/org/python/core/stringlib/MarkupIterator.java +++ b/core/src/main/java/org/python/core/stringlib/MarkupIterator.java @@ -1,104 +1,136 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. package org.python.core.stringlib; +import java.lang.invoke.MethodHandles; + +import org.python.base.MissingFeature; +import org.python.core.CraftedPyObject; import org.python.core.Py; -import org.python.core.PyObject; -import org.python.core.PyString; import org.python.core.PyTuple; import org.python.core.PyType; -import org.python.core.PyUnicode; -import org.python.core.Untraversable; -import org.python.expose.ExposedMethod; -import org.python.expose.ExposedType; +import org.python.core.ValueError; /** - * Provides an implementation of the object that str._formatter_parser() returns, which - * is an iterator returning successive 4-tuples, the sequence being equivalent to the original - * string. + * Provides an implementation of the object that + * {@code string.formatter_parser()} returns, which is an iterator + * returning successive 4-tuples, the sequence being equivalent to + * the original string. */ -@Untraversable -@ExposedType(name = "formatteriterator", base = PyObject.class, isBaseType = false) -public class MarkupIterator extends PyObject { +public class MarkupIterator implements CraftedPyObject { - public static final PyType TYPE = PyType.fromClass(MarkupIterator.class); + /** The Python type {@code formatteriterator} of this class. */ + public static final PyType TYPE = PyType.fromSpec( // + new PyType.Spec("formatteriterator", MethodHandles.lookup()) + .flagNot(PyType.Flag.BASETYPE)); /** The UTF-16 string from which elements are being returned. */ private final String markup; - /** True if originally given a PyString (so must return PyString not PyUnicode). */ + /** + * True if originally given a {@code bytes} or {@code bytearray} (so + * that when a value is formatted using a chunk, it should be to + * bytes not the code points of a {@code str}). + */ private final boolean bytes; /** How far along that string we are. */ private int index; - /** A counter used to auto-number fields when not explicitly numbered in the format. */ + /** + * A counter used to auto-number fields when not explicitly numbered + * in the format. + */ private final FieldNumbering numbering; - /** Constructor used at top-level to enumerate a format. */ - public MarkupIterator(PyString markupObject) { - markup = markupObject.getString(); - bytes = !(markupObject instanceof PyUnicode); - numbering = new FieldNumbering(); + /** + * Constructor used at top-level to enumerate a format. + * + * @param markup to parse + * @param bytes if originally bytes-like + * @param numbering for automatically numbered arguments + */ + private MarkupIterator(String markup, boolean bytes, FieldNumbering numbering) { + this.markup = markup; + this.bytes = bytes; + this.numbering = numbering; } - /** Variant constructor used when formats are nested. */ - public MarkupIterator(MarkupIterator enclosingIterator, String subMarkup) { - markup = subMarkup; - bytes = enclosingIterator.bytes; - numbering = enclosingIterator.numbering; - } + /** + * Constructor used at top-level to enumerate a format. + * + * @param markup to parse + */ + public MarkupIterator(String markup) { this(markup, false, new FieldNumbering()); } - @Override - public PyObject __iter__() { - return formatteriterator___iter__(); + /** + * Constructor used at top-level to enumerate a format that may be + * for a bytes-like object. + * + * @param markup to parse + * @param bytes if originally bytes-like + */ + public MarkupIterator(String markup, boolean bytes) { + this(markup, bytes, new FieldNumbering()); } - @ExposedMethod - final PyObject formatteriterator___iter__() { - return this; + /** + * Variant constructor used when formats are nested. + * + * @param enclosingIterator within which this is nested + * @param subMarkup the substring this is to parse + */ + public MarkupIterator(MarkupIterator enclosingIterator, String subMarkup) { + this(subMarkup, enclosingIterator.bytes, enclosingIterator.numbering); } @Override - public PyObject __iternext__() { - return formatteriterator___iternext__(); - } + public PyType getType() { return TYPE; } + + final Object __iter__() { return this; } /** - * Return the next "chunk" of the format (or return null if ended). A chunk is a 4-tuple - * describing + * Return the next "chunk" of the format (or return {@code null} if + * ended). A chunk is a 4-tuple describing *
    *
  1. the text leading up to the next format field,
  2. - *
  3. the field name or number (as a string) for accessing the value,
  4. - *
  5. the format specifier such as "#12x", and
  6. - *
  7. any conversion that should be applied (the 's' or 'r' codes for - * str() and repr())
  8. + *
  9. the field name or number (as a string) for accessing the + * value,
  10. + *
  11. the format specifier such as {@code "#12x"}, and
  12. + *
  13. any conversion that should be applied (the {@code 's'} or + * {@code 'r'} codes for {@code str()} and {@code repr()})
  14. *
- * Elements 1-3 are None if this chunk contains no format specifier. Elements 0-2 are - * zero-length strings if missing from the format, while element 3 will be None if missing. + * Elements 1-3 are None if this chunk contains no format specifier. + * Elements 0-2 are zero-length strings if missing from the format, + * while element 3 will be None if missing. * - * @return PyTuple chunk or null + * @return {@code PyTuple} chunk or {@code null} */ - @ExposedMethod - final PyObject formatteriterator___iternext__() { + final Object __next__() { try { // Parse off the next literal text and replacement field Chunk chunk = nextChunk(); if (chunk != null) { // Result will be built here - PyObject[] elements = new PyObject[4]; + Object[] elements = new Object[4]; // Literal text is used verbatim. elements[0] = wrap(chunk.literalText, ""); if (chunk.fieldName == null) { - // A field name is null only if there was no replacement field at all. - for (int i = 1; i < elements.length; i++) { - elements[i] = Py.None; - } + /* + * A fieldName is null only if there was no replacement field at + * all. + */ + for (int i = 1; i < elements.length; i++) { elements[i] = Py.None; } } else { // Otherwise, this is the field name elements[1] = wrap(chunk.fieldName, ""); // The format spec may be blank elements[2] = wrap(chunk.formatSpec, ""); - // There may have been a conversion specifier (if not, None is signalled). + /* + * There may have been a conversion specifier (if not, None is + * signalled). + */ elements[3] = wrap(chunk.conversion, null); } @@ -107,56 +139,62 @@ final PyObject formatteriterator___iternext__() { } else { // End of format: end of iteration - return null; + throw new MissingFeature("StopIteration"); } } catch (IllegalArgumentException e) { - throw Py.ValueError(e.getMessage()); + throw new ValueError(e.getMessage()); } } /** - * Convenience method for populating the return tuple, returning a PyString or - * PyUnicode according to the type of the original markup string, or - * Py.None if both arguments are null. + * Convenience method for populating the return tuple, returning a + * {@code String} or {@code Py.None} if both arguments are + * {@code null}. * - * @param value to wrap as a PyObject or null if defaultValue should be wrapped. - * @param defaultValue to return or null if default return is None. + * @param value to wrap as a PyObject or null if + * {@code defaultValue} should be wrapped. + * @param defaultValue to return or {@code null} if default return + * is {@code None}. * @return object for tuple */ - private PyObject wrap(String value, String defaultValue) { - if (value == null) { - value = defaultValue; - } + private Object wrap(String value, String defaultValue) { + if (value == null) { value = defaultValue; } if (value == null) { // It's still null, we want a None return Py.None; } else if (value.length() == 0) { // This is frequent so avoid the constructor - return bytes ? Py.EmptyString : Py.EmptyUnicode; + return ""; } else { - return bytes ? Py.newString(value) : Py.newUnicode(value); + return value; } } /** - * Return the next {@link Chunk} from the iterator, which is a structure containing parsed - * elements of the replacement field (if any), and its preceding text. This is the Java - * equivalent of the tuple returned by {@link #__iternext__()}. This finds use in the - * implementation of str.format and unicode.format. + * Return the next {@link Chunk} from the iterator, which is a + * structure containing parsed elements of the replacement field (if + * any), and its preceding text. This is the Java equivalent of the + * tuple returned by {@link #__next__()}. This finds use in the + * implementation of {@code str.format} and {@code unicode.format}. * * @return the chunk */ public Chunk nextChunk() { - if (index == markup.length()) { - return null; - } + + if (index == markup.length()) { return null; } + Chunk result = new Chunk(); - // pos = index is the index of the first text not already chunked + /* + * pos = index is the index of the first text not already chunked. + */ int pos = index; - // Advance pos to the first '{' that is not a "{{" (escaped brace), or pos<0 if none such. + /* + * Advance pos to the first '{' that is not a "{{" (escaped brace), + * or pos<0 if none such. + */ while (true) { pos = indexOfFirst(markup, pos, '{', '}'); if (pos >= 0 && pos < markup.length() - 1 @@ -174,27 +212,40 @@ public Chunk nextChunk() { // markup[index:pos] is the literal part of this chunk. if (pos < 0) { - // ... except pos<0, and there is no further format specifier, only literal text. + /* + * ... except pos<0, and there is no further format specifier, only + * literal text. + */ result.literalText = unescapeBraces(markup.substring(index)); index = markup.length(); } else { // Grab the literal text, dealing with escaped braces. result.literalText = unescapeBraces(markup.substring(index, pos)); - // Scan through the contents of the format spec, between the braces. Skip one '{'. + /* + * Scan through the contents of the format spec, between the braces. + * Skip one '{'. + */ pos++; int fieldStart = pos; int count = 1; while (pos < markup.length()) { if (markup.charAt(pos) == '{') { - // This means the spec we are gathering itself contains nested specifiers. + /* + * This means the spec we are gathering itself contains nested + * specifiers. + */ count++; result.formatSpecNeedsExpanding = true; } else if (markup.charAt(pos) == '}') { - // And here is a '}' matching one we already counted. + /* + * And here is a '}' matching one we already counted. + */ count--; if (count == 0) { - // ... matching the one we began with: parse the replacement field. + /* + * ... matching the one we began with: parse the replacement field. + */ parseField(result, markup.substring(fieldStart, pos)); pos++; break; @@ -202,33 +253,33 @@ public Chunk nextChunk() { } pos++; } + if (count > 0) { // Must be end of string without matching '}'. throw new IllegalArgumentException("Single '{' encountered in format string"); } + index = pos; } return result; } /** - * If originally given a PyString, string elements in the returned tuples must be PyString not - * PyUnicode. + * If originally given a PyString, string elements in the returned + * tuples must be PyString not PyUnicode. * * @return true if originally given a PyString */ - public final boolean isBytes() { - return bytes; - } + public final boolean isBytes() { return bytes; } private String unescapeBraces(String substring) { return substring.replace("{{", "{").replace("}}", "}"); } /** - * Parse a "replacement field" consisting of a name, conversion and format specification. - * According to the Python Standard Library documentation, a replacement field has the - * structure: + * Parse a "replacement field" consisting of a name, conversion and + * format specification. According to the Python Standard Library + * documentation, a replacement field has the structure: * *
      * replacement_field ::=  "{" [field_name] ["!" conversion] [":" format_spec] "}"
@@ -241,32 +292,45 @@ private String unescapeBraces(String substring) {
      * except at this point, we have already discarded the outer braces.
      *
      * @param result destination chunk
-     * @param fieldMarkup specifying a replacement field, possibly with nesting
+     * @param fieldMarkup specifying a replacement field, possibly with
+     *     nesting
      */
     private void parseField(Chunk result, String fieldMarkup) {
+
         int pos = indexOfFirst(fieldMarkup, 0, '!', ':');
+
         if (pos >= 0) {
-            // There's a '!' or a ':', so what precedes the first of them is a field name.
+            /*
+             * There's a '!' or a ':', so what precedes the first of them is a
+             * field name.
+             */
             result.fieldName = fieldMarkup.substring(0, pos);
             if (fieldMarkup.charAt(pos) == '!') {
                 // There's a conversion specifier
                 if (pos == fieldMarkup.length() - 1) {
-                    throw new IllegalArgumentException("end of format while "
-                            + "looking for conversion specifier");
+                    throw new IllegalArgumentException(
+                            "end of format while " + "looking for conversion specifier");
                 }
                 result.conversion = fieldMarkup.substring(pos + 1, pos + 2);
                 pos += 2;
-                // And if that's not the end, there ought to be a ':' now.
+                /*
+                 * And if that's not the end, there ought to be a ':' now.
+                 */
                 if (pos < fieldMarkup.length()) {
                     if (fieldMarkup.charAt(pos) != ':') {
-                        throw new IllegalArgumentException("expected ':' "
-                                + "after conversion specifier");
+                        throw new IllegalArgumentException(
+                                "expected ':' " + "after conversion specifier");
                     }
-                    // So the format specifier is from the ':' to the end.
+                    /*
+                     * So the format specifier is from the ':' to the end.
+                     */
                     result.formatSpec = fieldMarkup.substring(pos + 1);
                 }
             } else {
-                // No '!', so the format specifier is from the ':' to the end. Or empty.
+                /*
+                 * No '!', so the format specifier is from the ':' to the end. Or
+                 * empty.
+                 */
                 result.formatSpec = fieldMarkup.substring(pos + 1);
             }
         } else {
@@ -280,35 +344,36 @@ private void parseField(Chunk result, String fieldMarkup) {
             return;
         }
 
-        // Automatic numbers must also work when there is an .attribute or [index]
+        /*
+         * Automatic numbers must also work when there is an .attribute or
+         * [index].
+         */
         char c = result.fieldName.charAt(0);
         if (c == '.' || c == '[') {
             result.fieldName = numbering.nextAutomaticFieldNumber() + result.fieldName;
             return;
         }
 
-        // Finally, remember the argument number was specified (perhaps complain of mixed use)
-        if (Character.isDigit(c)) {
-            numbering.useManualFieldNumbering();
-        }
+        /*
+         * Finally, remember the argument number was specified (perhaps
+         * complain of mixed use).
+         */
+        if (Character.isDigit(c)) { numbering.useManualFieldNumbering(); }
     }
 
     /** Find the first of two characters, or return -1. */
     private int indexOfFirst(String s, int start, char c1, char c2) {
         int i1 = s.indexOf(c1, start);
         int i2 = s.indexOf(c2, start);
-        if (i1 == -1) {
-            return i2;
-        }
-        if (i2 == -1) {
-            return i1;
-        }
+        if (i1 == -1) { return i2; }
+        if (i2 == -1) { return i1; }
         return Math.min(i1, i2);
     }
 
     /**
-     * Class used locally to assign indexes to the automatically-numbered arguments (see String
-     * Formatting section of the Python Standard Library).
+     * Class used locally to assign indexes to the
+     * automatically-numbered arguments (see String Formatting section
+     * of the Python Standard Library).
      */
     static final class FieldNumbering {
 
@@ -316,8 +381,8 @@ static final class FieldNumbering {
         private int automaticFieldNumber = 0;
 
         /**
-         * Generate a numeric argument index automatically, or raise an error if already started
-         * numbering manually.
+         * Generate a numeric argument index automatically, or raise an
+         * error if already started numbering manually.
          *
          * @return index as string
          */
@@ -330,13 +395,11 @@ String nextAutomaticFieldNumber() {
         }
 
         /**
-         * Remember we are numbering manually, and raise an error if already started numbering
-         * automatically.
+         * Remember we are numbering manually, and raise an error if already
+         * started numbering automatically.
          */
         void useManualFieldNumbering() {
-            if (manualFieldNumberSpecified) {
-                return;
-            }
+            if (manualFieldNumberSpecified) { return; }
             if (automaticFieldNumber != 0) {
                 throw new IllegalArgumentException(
                         "cannot switch from automatic field numbering to manual field specification");
@@ -349,13 +412,19 @@ public static final class Chunk {
 
         /** The text leading up to the next format field. */
         public String literalText;
-        /** The field name or number (as a string) for accessing the value. */
+        /**
+         * The field name or number (as a string) for accessing the value.
+         */
         public String fieldName;
-        /** The format specifier such as "#12x". */
+        /** The format specifier such as {@code "#12x"}. */
         public String formatSpec;
-        /** Conversion to be applied, e.g. 'r' for repr(). */
+        /**
+         * Conversion to be applied, e.g. {@code 'r'} for {@code repr()}.
+         */
         public String conversion;
-        /** Signals the formatSpec needs expanding recursively. */
+        /**
+         * Signals the {@code formatSpec} needs expanding recursively.
+         */
         public boolean formatSpecNeedsExpanding;
     }
 }
diff --git a/core/src/main/java/org/python/core/stringlib/TextFormatter.java b/core/src/main/java/org/python/core/stringlib/TextFormatter.java
new file mode 100644
index 000000000..3818b62a8
--- /dev/null
+++ b/core/src/main/java/org/python/core/stringlib/TextFormatter.java
@@ -0,0 +1,186 @@
+// Copyright (c)2021 Jython Developers.
+// Licensed to PSF under a contributor agreement.
+package org.python.core.stringlib;
+
+import org.python.core.PySequence;
+import org.python.core.stringlib.InternalFormat.Spec;
+
+/**
+ * A class that provides the implementation of {@code str} and
+ * {@code unicode} formatting. In a limited way, it acts like a
+ * StringBuilder to which text, formatted according to the format
+ * specifier supplied at construction. These are ephemeral objects
+ * that are not, on their own, thread safe.
+ */
+public abstract class TextFormatter extends InternalFormat.AbstractFormatter {
+
+    /**
+     * Construct the formatter from a client-supplied buffer, to which
+     * the result will be appended, and a specification. Sets
+     * {@link #mark} to the end of the buffer.
+     *
+     * @param result destination buffer
+     * @param spec parsed conversion specification
+     */
+    public TextFormatter(StringBuilder result, Spec spec) { super(result, spec); }
+
+    /**
+     * Construct the formatter from a specification, allocating a buffer
+     * internally for the result.
+     *
+     * @param spec parsed conversion specification
+     */
+    public TextFormatter(Spec spec) { this(new StringBuilder(), spec); }
+
+    /*
+     * Re-implement the text appends so they return the right type.
+     */
+    @Override
+    public TextFormatter append(char c) {
+        super.append(c);
+        return this;
+    }
+
+    @Override
+    public TextFormatter append(int c) {
+        super.append(c);
+        return this;
+    }
+
+    @Override
+    public TextFormatter append(CharSequence csq) {
+        super.append(csq);
+        return this;
+    }
+
+    @Override
+    public TextFormatter append(CharSequence csq, int start, int end) //
+            throws IndexOutOfBoundsException {
+        super.append(csq, start, end);
+        return this;
+    }
+
+    /**
+     * Format the given {@code String} into the {@code result} buffer.
+     * Largely, this is a matter of copying the value of the argument,
+     * but a subtlety arises when the string contains supplementary
+     * (non-BMP) Unicode characters, which are represented as surrogate
+     * pairs. The precision specified in the format relates to a count
+     * of Unicode characters (code points), not Java {@code char}s. The
+     * method deals with this correctly, essentially by not counting the
+     * high-surrogates in the allowance. The final value of
+     * {@link #lenWhole} counts the UTF-16 units added.
+     *
+     * @param value to format
+     * @return this {@code TextFormatter} object
+     */
+    public TextFormatter format(String value) {
+
+        // Scratch all instance variables and start = result.length().
+        setStart();
+
+        int p = spec.precision, n = value.length();
+
+        if (Spec.specified(p) && p < n) {
+            /*
+             * Estimate the space for the converted result (preempt multiple
+             * re-allocation)
+             */
+            int space = Math.max(spec.width, p);
+            result.ensureCapacity(result.length() + space + (bytes ? 0 : space / 4));
+            /*
+             * A precision p was specified less than the length: we may have to
+             * truncate. Note we compared p with the UTF-16 length, even though
+             * it is the code point length that matters. But the code point
+             * length cannot be greater than n.
+             */
+            int count = 0;
+            while (count < p) {
+                // count is the number of UTF-16 chars.
+                char c = value.charAt(count++);
+                result.append(c);
+                /*
+                 * A high-surrogate will always be followed by a low, so doesn't
+                 * count.
+                 */
+                if (Character.isHighSurrogate(c) && p < n) {
+                    /*
+                     * Accomplish "not counting" by bumping the limit p, within the
+                     * array bounds.
+                     */
+                    p += 1;
+                }
+            }
+            // Record the UTF-16 count as the length in buffer
+            lenWhole = count;
+
+        } else {
+            /*
+             * We definitely don't need to truncate. Append the whole string.
+             */
+            lenWhole = n;
+            result.append(value);
+        }
+
+        return this;
+    }
+
+    /**
+     * Format the given {@code String} into the {@code result} buffer.
+     * Largely, this is a matter of copying the value of the argument,
+     * but a subtlety arises when the string contains supplementary
+     * (non-BMP) Unicode characters, which are represented as surrogate
+     * pairs. The precision specified in the format relates to a count
+     * of Unicode characters (code points), not Java {@code char}s. The
+     * method deals with this correctly, essentially by not counting the
+     * high-surrogates in the allowance. The final value of
+     * {@link #lenWhole} counts the UTF-16 units added.
+     *
+     * @param value to format
+     * @return this {@code TextFormatter} object
+     */
+    public TextFormatter format(PySequence.OfInt value) {
+
+        // Scratch all instance variables and start = result.length().
+        setStart();
+
+        int p = spec.precision, n = value.length();
+
+        if (Spec.specified(p) && p < n) {
+            /*
+             * Estimate the space for the converted result (preempt multiple
+             * re-allocation).
+             */
+            int space = Math.max(spec.width, p);
+            result.ensureCapacity(result.length() + space + (bytes ? 0 : space / 4));
+            // Pile it all in
+            value.asIntStream().forEach(c -> result.appendCodePoint(c));
+            // Record the UTF-16 count as the length in buffer
+            lenWhole = p;
+
+        } else {
+            /*
+             * We definitely don't need to truncate. Append the whole string.
+             */
+            lenWhole = n;
+            result.append(value);
+        }
+
+        return this;
+    }
+
+    /*
+     * Variant to deal with supplementary characters: other formatters
+     * don't produce them.
+     */
+    @Override
+    public TextFormatter pad() {
+        /*
+         * We'll need this many pad characters (if>0). Note
+         * Spec.UNDEFINED<0.
+         */
+        int n = spec.width - result.codePointCount(mark, result.length());
+        if (n > 0) { pad(mark, n); }
+        return this;
+    }
+}
diff --git a/core/src/main/java/org/python/modules/marshal.java b/core/src/main/java/org/python/modules/marshal.java
new file mode 100644
index 000000000..8943652fe
--- /dev/null
+++ b/core/src/main/java/org/python/modules/marshal.java
@@ -0,0 +1,1611 @@
+// Copyright (c)2023 Jython Developers.
+// Licensed to PSF under a contributor agreement.
+package org.python.modules;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.math.BigInteger;
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.python.core.Abstract;
+import org.python.core.CPython311Code;
+import org.python.core.EOFError;
+import org.python.core.Exposed.Default;
+import org.python.core.Exposed.Member;
+import org.python.core.Exposed.PythonStaticMethod;
+import org.python.core.OSError;
+import org.python.core.Py;
+import org.python.core.PyBaseObject;
+import org.python.core.PyBool;
+import org.python.core.PyBytes;
+import org.python.core.PyCode;
+import org.python.core.PyDict;
+import org.python.core.PyException;
+import org.python.core.PyFloat;
+import org.python.core.PyList;
+import org.python.core.PyLong;
+import org.python.core.PyObjectUtil;
+import org.python.core.PyObjectUtil.NoConversion;
+import org.python.core.PySequence;
+import org.python.core.PySequence.OfInt;
+import org.python.core.PyTuple;
+import org.python.core.PyType;
+import org.python.core.PyUnicode;
+import org.python.core.StopIteration;
+import org.python.core.TypeError;
+import org.python.core.ValueError;
+import org.python.core.stringlib.ByteArrayBuilder;
+import org.python.core.stringlib.IntArrayBuilder;
+
+/**
+ * Write Python objects to files and read them back. This is primarily
+ * intended for writing and reading compiled Python code, even though
+ * {@code dict}s, {@code list}s, {@code set}s a nd {@code frozenset}s,
+ * not commonly seen in {@code code} objects, are supported. Version 3
+ * of this protocol properly supports circular links and sharing.
+ */
+
+public class marshal /* extends JavaModule */ {
+
+    @Member("version")
+    final static int VERSION = 4;
+
+    /*
+     * High water mark to determine when the marshalled object is
+     * dangerously deep and risks coring the interpreter. When the
+     * object stack gets this deep, raise an exception instead of
+     * continuing.
+     */
+    private final static int MAX_MARSHAL_STACK_DEPTH = 2000;
+
+    /*
+     * Enumerate all the legal record types. Each corresponds to a type
+     * of data, or a specific value, except {@code NULL}. The values are
+     * the same as in the CPython marshal.c, but the names have been
+     * clarified.
+     */
+    private final static int TYPE_NULL = '0';
+    /** The record encodes {@code None} (in one byte) */
+    private final static int TYPE_NONE = 'N';
+    /** The record encodes {@code False} (in one byte) */
+    private final static int TYPE_FALSE = 'F';
+    /** The record encodes {@code True} (in one byte) */
+    private final static int TYPE_TRUE = 'T';
+    /** The record encodes the type {@code StopIteration} */
+    private final static int TYPE_STOPITER = 'S';
+    /** The record encodes {@code Ellipsis} (in one byte) */
+    private final static int TYPE_ELLIPSIS = '.';
+    /** The record encodes an {@code int} (4 bytes follow) */
+    private final static int TYPE_INT = 'i';
+    /*
+     * TYPE_INT64 is not generated anymore. Supported for backward
+     * compatibility only.
+     */
+    private final static int TYPE_INT64 = 'I';
+    private final static int TYPE_FLOAT = 'f';
+    private final static int TYPE_BINARY_FLOAT = 'g';
+    private final static int TYPE_COMPLEX = 'x';
+    private final static int TYPE_BINARY_COMPLEX = 'y';
+    /** The record encodes an {@code int} (counted 15-bit digits) */
+    private final static int TYPE_LONG = 'l';
+    private final static int TYPE_BYTES = 's'; // not TYPE_STRING
+    private final static int TYPE_INTERNED = 't'; // str
+    private final static int TYPE_REF = 'r';
+    /** The record encodes a {@code tuple} (counted objects follow) */
+    private final static int TYPE_TUPLE = '(';
+    /** The record encodes a {@code list} (counted objects follow) */
+    private final static int TYPE_LIST = '[';
+    /** The record encodes a {@code dict} (key-value pairs follow) */
+    private final static int TYPE_DICT = '{';
+    private final static int TYPE_CODE = 'c';
+    /** The record encodes a {@code str} (counted code points follow) */
+    private final static int TYPE_UNICODE = 'u'; // str
+    private final static int TYPE_UNKNOWN = '?';
+    private final static int TYPE_SET = '<';
+    private final static int TYPE_FROZENSET = '>';
+
+    /** The record encodes a {@code str} (counted bytes follow) */
+    private final static int TYPE_ASCII = 'a'; // str
+    private final static int TYPE_ASCII_INTERNED = 'A'; // str
+    /** The record encodes a {@code tuple} (counted objects follow) */
+    private final static int TYPE_SMALL_TUPLE = ')';
+    /** The record encodes a {@code str} (counted bytes follow) */
+    private final static int TYPE_SHORT_ASCII = 'z'; // str
+    private final static int TYPE_SHORT_ASCII_INTERNED = 'Z'; // str
+
+    /**
+     * We add this to a {@code TYPE_*} code to indicate that the encoded
+     * object is cached at the next free index. When reading, each
+     * occurrence appends its object to a list (thus the encounter order
+     * defines the index). When writing, we look in a cache to see if
+     * the object has already been encoded (therefore given an index)
+     * and if it has, we record the index instead in a {@link #TYPE_REF}
+     * record. If it is new, we assign it the next index.
+     */
+    private final static int FLAG_REF = 0x80;
+
+    /** A mask for the low 15 bits. */
+    private final static int MASK15 = 0x7fff;
+    /** A mask for the low 15 bits. */
+    private final static BigInteger BIG_MASK15 =
+            BigInteger.valueOf(MASK15);
+
+    /**
+     * We apply a particular {@code Decoder} to the stream after we read
+     * a type code byte that tells us which one to use, to decode the
+     * data following. If that code has no data following, then the
+     * corresponding {@link Decoder#read(Reader)} returns a constant.
+     */
+    @FunctionalInterface
+    private interface Decoder {
+        /**
+         * Read an object value, of a particular Python type, from the
+         * input managed by a given {@link Reader}, the matching type
+         * code having been read from it already. If specified through,
+         * the second argument {@code ref}, the decoder will also
+         * allocate the next reference index in the given reader to the
+         * object created. (Decoders for simple constants that are
+         * always re-used may ignore the {@code ref} argument.)
+         *
+         * @param r from which to read
+         * @param ref if {@code true}, define a reference in {@code r}
+         * @return the object value read
+         */
+        Object read(Reader r, boolean ref);
+    }
+
+    /**
+     * A {@code Codec} groups together the code for writing and reading
+     * instances of a particular Python type. The {@code write()} method
+     * encodes a value of that type onto the stream, choosing from
+     * available representations when there is more than one. The
+     * {@code Codec} provides (potentially) multiple {@link Decoder}s,
+     * one for each representation (type code), in a {@link Map}
+     * supplied by the decoders() method.
+     */
+    interface Codec {
+        /**
+         * The Python type this codec is implemented to encode and
+         * decode.
+         *
+         * @return target Python type
+         */
+        PyType type();
+
+        /**
+         * Write a value, of a particular Python type, onto the output
+         * managed by the {@link Writer}.
+         *
+         * @param w to receive the data
+         * @param v to be written
+         * @throws IOException on file write errors
+         * @throws ArrayIndexOutOfBoundsException on byte array write
+         *     errors
+         * @throws Throwable from operations on {@code v}
+         */
+        void write(Writer w, Object v) throws IOException, Throwable;
+
+        /**
+         * Return a mapping from each type code supported to a function
+         * that is able to read the object following that type code, in
+         * the input managed by a given {@link Reader}.
+         *
+         * @return the table of decoders
+         */
+        Map decoders();
+    }
+
+    /**
+     * A mapping from Python type to the Codec that is able to encode
+     * and decode that type. Note that the {@code null} key is
+     * supported.
+     */
+    private static HashMap codecForType =
+            new HashMap<>();
+
+    /**
+     * A mapping from the type code to the {@link Decoder} able to
+     * render the record as a Python object.
+     */
+    private static HashMap decoderForCode =
+            new HashMap<>();
+
+    /**
+     * Associate a codec with its target Python type in
+     * {@link #codecForType} and each read method it supplies with the
+     * type code it supports.
+     *
+     * @param codec to register
+     */
+    private static void register(Codec codec) {
+        // Get the type served (object for reference, null for null).
+        PyType targetType = codec.type();
+        codecForType.put(targetType, codec);
+        // Register a read method for each type code
+        for (Map.Entry e : codec.decoders()
+                .entrySet()) {
+            Decoder d = decoderForCode.put(e.getKey(), e.getValue());
+            assert d == null; // No codec should duplicate a code
+        }
+    }
+
+    // Register all the defined codecs
+    static {
+        register(new TypeCodec());
+        register(new BoolCodec());
+        register(new IntCodec());
+        register(new FloatCodec());
+
+        register(new BytesCodec());
+        register(new StrCodec());
+        register(new TupleCodec());
+        register(new ListCodec());
+        register(new DictCodec());
+
+        register(new CodeCodec());
+
+        register(new RefCodec());
+    }
+
+    /**
+     * {@code marshal.dump(value, file, version=4)}: Write the value on
+     * the open file. The value must be a supported type. The file must
+     * be a writable binary file.
+     *
+     * @param value to write
+     * @param file on which to write
+     * @param version of the format to use
+     * @throws ValueError if the value has (or contains an object that
+     *     has) an unsupported type
+     * @throws OSError from file operations
+     */
+    @PythonStaticMethod
+    public static void dump(Object value, Object file,
+            @Default("4") int version) throws ValueError, OSError {
+        try (OutputStream os = StreamWriter.adapt(file)) {
+            Writer writer = new StreamWriter(os, version);
+            writer.writeObject(value);
+        } catch (NoConversion | IOException e) {
+            throw Abstract.argumentTypeError("dump", "file",
+                    "a file-like object with write", file);
+        }
+    }
+
+    /**
+     * {@code marshal.load(file)}: read one value from an open file and
+     * return it. If no valid value is read (e.g. because the data has
+     * an incompatible marshal format), raise {@code EOFError},
+     * {@code ValueError} or {@code TypeError}. The file must be a
+     * readable binary file.
+     *
+     * @param file to read
+     * @return the object read
+     * @throws ValueError when an object being read is over-size or
+     *     contains values out of range.
+     * @throws TypeError when file reading returns non-byte data or a
+     *     container contains a null element.
+     * @throws EOFError when a partial object is read
+     * @throws OSError from file operations generally
+     */
+    @PythonStaticMethod
+    public static Object load(Object file) {
+        try (InputStream is = StreamReader.adapt(file)) {
+            Reader reader = new StreamReader(is);
+            return reader.readObject();
+        } catch (NoConversion | IOException e) {
+            throw Abstract.argumentTypeError("load", "file",
+                    "a file-like object with read", file);
+        }
+    }
+
+    /**
+     * {@code marshal.dumps(value, version=4)}: Return a {@code bytes}
+     * object into which the given value has been written, as to a file
+     * using {@link #dump(Object, Object, int)}. The value must be a
+     * supported type.
+     *
+     * @param value to write
+     * @param version of the format to use
+     * @return {@code bytes} containing result
+     * @throws ValueError if the value has (or contains an object that
+     *     has) an unsupported type
+     */
+    @PythonStaticMethod
+    public static PyBytes dumps(Object value, @Default("4") int version)
+            throws ValueError {
+        ByteArrayBuilder bb = new ByteArrayBuilder();
+        Writer writer = new BytesWriter(bb, version);
+        writer.writeObject(value);
+        return new PyBytes(bb);
+    }
+
+    /**
+     * {@code marshal.loads(bytes)}: read one value from a bytes-like
+     * object and return it. If no valid value is read, raise
+     * {@code EOFError}, {@code ValueError} or {@code TypeError}.
+     *
+     * @param bytes to read
+     * @return the object read
+     * @throws ValueError when an object being read is over-size or
+     *     contains values out of range.
+     * @throws TypeError when a container contains a null element.
+     * @throws EOFError when a partial object is read
+     */
+    @PythonStaticMethod
+    public static Object loads(Object bytes) {
+        try {
+            ByteBuffer bb = BytesReader.adapt(bytes);
+            Reader reader = new BytesReader(bb);
+            return reader.readObject();
+        } catch (NoConversion nc) {
+            throw Abstract.argumentTypeError("loads", "bytes",
+                    "a bytes-like object", bytes);
+        }
+    }
+
+    /**
+     * A {@code marshal.Writer} holds an {@code OutputStream} during the
+     * time that the {@code marshal} module is serialising objects to
+     * it. It provides operations to write individual field values to
+     * the stream, that support classes extending {@link Codec} in their
+     * implementation of {@link Codec#write(Writer, Object) write()}.
+     * 

+ * The wrapped {@code OutputStream} may be writing to a file or to + * an array. + */ + abstract static class Writer { + + /** + * Version of the protocol this {@code Writer} is supposed to + * write. + */ + private final int version; + + /** + * Create a {@code Writer} with a specified version of the + * protocol. The version affects whether certain type codes will + * be used. + * + * @param version of protocol to write + */ + public Writer(int version) { this.version = version; } + + /** + * Encode a complete object. + * + * @param obj to encode + */ + public void writeObject(Object obj) {} + + /** + * Write one {@code byte} onto the destination. The parameter is + * an {@code int} because it may be the result of a calculation, + * but only the the low 8 bits are used. + * + * @param v to write + */ + abstract void writeByte(int v); + + /** + * Write one {@code short} onto the destination. The parameter + * is an {@code int} because it may be the result of a + * calculation, but only the the low 16 bits are used. + * + * @param v to write + */ + abstract void writeShort(int v); + + /** + * Write one {@code int} onto the destination. + * + * @param v to write + */ + abstract void writeInt(int v); + + /** + * Write one {@code long} onto the destination. + * + * @param v to write + */ + abstract void writeLong(long v); + + /** + * Write one {@code float} onto the destination (8 bytes). + * + * @param v to write + */ + void writeDouble(double v) { + long bits = Double.doubleToLongBits(v); + writeLong(bits); + } + + /** + * Write multiple {@code byte}s onto the destination supplied as + * an integer sequence. Only the the low 8 bits of each element + * are used. + * + * @param seq to write + */ + void writeBytes(OfInt seq) { + seq.asIntStream().forEachOrdered(v -> writeByte(v)); + } + + /** + * Write multiple {@code int}s onto the destination supplied as + * an integer sequence. + * + * @param seq to write + */ + void writeInts(OfInt seq) { + seq.asIntStream().forEachOrdered(v -> writeInt(v)); + } + + /** + * Write a {@code BigInteger} as a counted sequence of 15-bit + * units (the form Python expects). + * + * @param v value to write + */ + void writeBigInteger(BigInteger v) { + boolean negative = v.signum() < 0; + if (negative) { v = v.negate(); } + int size = (v.bitLength() + 14) / 15; + writeInt(negative ? -size : size); + for (int i = 0; i < size; i++) { + writeShort(v.and(BIG_MASK15).intValue()); + v = v.shiftRight(15); + } + } + + /** + * Construct a ValueError expressing the impossibility of + * marshalling whatever it is. + * + * @param v object we couldn't marshal. + * @return throwable exception + */ + protected static ValueError unmarshallableObject(Object v) { + String t = v == null ? "" + : "of type '" + PyType.of(v).getName() + "'"; + return new ValueError("unmarshallable object %s", t); + } + } + + /** + * A {@link Writer} that has a {@code java.io.OutputStream} as its + * destination. When the underlying destination is a file, it is + * preferable for efficiency that this be a + * {@code java.io.BufferedOutputStream}. A + * {@code java.io.ByteArrayOutputStream} needs no additional + * buffering. + */ + static class StreamWriter extends Writer { + + /** + * The destination wrapped in a {@code DataOutputStream} on + * which we shall call {@code getInt()} etc. to write items. A + * Python marshal stream is little-endian, while Java will write + * big-endian data. However, note that + * {@code Integer.reverseBytes()} and friends are HotSpot + * intrinsics. + */ + private final DataOutputStream file; + + /** + * Form a {@link Writer} on a {@code java.io.OutputStream}. + * + * @param file output + * @param version of protocol to write + */ + StreamWriter(OutputStream file, int version) { + super(version); + this.file = new DataOutputStream(file); + } + + @Override + void writeByte(int b) { + try { + file.write(b); + } catch (IOException ioe) { + throw new OSError(ioe); + } + } + + @Override + void writeShort(int v) { + try { + file.writeShort(Short.reverseBytes((short)v)); + } catch (IOException ioe) { + throw new OSError(ioe); + } + } + + @Override + void writeInt(int v) { + try { + file.writeInt(Integer.reverseBytes(v)); + } catch (IOException ioe) { + throw new OSError(ioe); + } + } + + @Override + void writeLong(long v) { + try { + file.writeLong(Long.reverseBytes(v)); + } catch (IOException ioe) { + throw new OSError(ioe); + } + } + + @Override + void writeBytes(OfInt seq) { + seq.asIntStream().forEachOrdered(v -> writeByte(v)); + } + + @Override + void writeInts(OfInt seq) { + seq.asIntStream().forEachOrdered(v -> writeInt(v)); + } + + /** + * Recognise or wrap an eligible file-like data sink as an + * {@code OutputStream}. + */ + private static OutputStream adapt(Object file) + throws NoConversion { + if (file instanceof OutputStream) { + return (OutputStream)file; + } else { + // Adapt any object with write accepting a byte + // But for now ... + throw PyObjectUtil.NO_CONVERSION; + } + } + } + + /** + * A {@link Writer} that has a {@link ByteArrayBuilder} as its + * destination. + */ + static class BytesWriter extends Writer { + + /** + * The destination {@link ByteArrayBuilder} on which we write + * little-endian + */ + final ByteArrayBuilder builder; + + /** + * Form a {@link Writer} on a byte array. + * + * @param builder destination + * @param version of protocol to write + */ + BytesWriter(ByteArrayBuilder builder, int version) { + super(version); + this.builder = builder; + } + + @Override + void writeByte(int v) { builder.append(v); } + + @Override + void writeShort(int v) { builder.appendShortLE(v); } + + @Override + void writeInt(int v) { builder.appendIntLE(v); } + + @Override + void writeLong(long v) { builder.appendLongLE(v); } + + @Override + void writeBytes(OfInt seq) { builder.append(seq); } + + @Override + void writeInts(OfInt seq) { + seq.asIntStream() + .forEachOrdered(v -> builder.appendIntLE(v)); + } + } + + /** + * A {@code marshal.Reader} holds either an {@code InputStream} or a + * {@code java.nio.ByteBuffer} (maybe wrapping a {@code byte[]}) + * provided by the caller, from which it will read during the time + * that the {@code marshal} module is de-serialising objects from + * it. It provides operations to read individual field values from + * this source, that support classes extending {@link Codec} in + * their implementation of decoding methods registered against the + * type codes they support. (See also {@link Codec#decoders()}. + */ + public abstract static class Reader { + + /** + * Objects read from the source may have been marked (by the + * {@link Writer}) as defining potentially shared objects, and + * are assigned an index (one up from zero) as they are + * encountered. In other places within the same source, where + * one of those occurs, a record beginning + * {@link marshal#TYPE_REF} is created with only the + * corresponding index as payload. This list is where we collect + * those objects (in encounter order) so as to map an index to + * an object. + */ + // Allocate generous initial size for typical code object + protected List refs = new ArrayList(); + + /** + * Decode a complete object from the source. + * + * @return the object read + */ + public Object readObject() { + // Get the type code and the decoder for it + int tcflag = readByte(), tc = tcflag & ~FLAG_REF; + Decoder decoder = decoderForCode.get(tc); + // The decoder will define a reference if requested + boolean ref = (tcflag & FLAG_REF) != 0; + if (decoder != null) { + // Decode using the decoder we retrieved for tc + Object obj = decoder.read(this, ref); + if (tc != TYPE_NULL && obj == null) { + throw nullObject("object"); + } + return obj; + } else { + // No decoder registered for tc (see static init) + throw badData("unknown type 0x%02x = '%c'%s", tcflag, + tc, ref ? "+ref" : ""); + } + } + + /** + * Read one {@code byte} from the source (as an unsigned + * integer), advancing the stream one byte. + * + * @return byte read unsigned + */ + // Compare CPython r_byte in marshal.c + public abstract int readByte(); + + /** + * Read one {@code short} value from the source, advancing the + * stream 2 bytes. + * + * @return value read + */ + // Compare CPython r_int in marshal.c + public abstract int readShort(); + + /** + * Read one {@code int} value from the source, advancing the + * stream 4 bytes. + * + * @return value read + */ + // Compare CPython r_long in marshal.c + public abstract int readInt(); + + /** + * Read one {@code long} value from the source, advancing the + * stream 8 bytes. + * + * @return value read + */ + // Compare CPython r_long64 in marshal.c + public abstract long readLong(); + + /** + * Read one {@code float} value from the source, advancing the + * stream 8 bytes. + * + * @return value read + */ + // Compare CPython r_float_bin in marshal.c + public double readDouble() { + long bits = readLong(); + return Double.longBitsToDouble(bits); + } + + /** + * Read a given number of {@code byte}s from the source and + * present them as a read-only, little-endian, + * {@code java.nio.ByteBuffer}, advancing the stream over these + * bytes. + * + * @param n number of bytes to read + * @return the next {@code n} bytes + */ + // Compare CPython r_byte in marshal.c + public abstract ByteBuffer readByteBuffer(int n); + + /** + * Read one {@code BigInteger} value from the source, advancing + * the stream a variable number of bytes. + * + * @return value read + */ + // Compare CPython r_PyLong in marshal.c + BigInteger readBigInteger() throws ValueError { + // Encoded as size and 15-bit digits + int size = readInt(); + if (size == Integer.MIN_VALUE) { + throw badData("size out of range in big int"); + } + + // Size carries the sign + boolean negative = size < 0; + size = Math.abs(size); + + // Or each digit as we read it into v + BigInteger v = BigInteger.ZERO; + for (int i = 0, shift = 0; i < size; i++, shift += 15) { + int digit = readShort(); + if ((digit & ~MASK15) != 0) { + // Bits set where they shouldn't be + throw badData("digit out of range in big int"); + } + BigInteger d = BigInteger.valueOf(digit); + v = (i == 0) ? d : v.or(d.shiftLeft(shift)); + } + + // Sign from size + if (negative) { v = v.negate(); } + return v; + } + + /** + * Reserve an index in the list of references for use later. The + * entry will be {@code null} until replaced by the caller with + * the genuine object. + *

+ * We do this when reading objects that cannot be constructed + * until their fields or elements have been constructed (such as + * {@code tuple} and {@code code}), since objects take reference + * numbers in the order they are encountered in the stream (both + * reading and writing). + * + * @return the index that has been reserved + */ + // Compare CPython r_ref_reserve() in marshal.c + private int reserveRef() { + int idx = refs.size(); + refs.add(null); + return idx; + } + + /** + * Insert a new object {@code o} into the {@link #refs} list at + * the index {@code idx} previously allocated by + * {@link #reserveRef()}. If the index {@code idx<0} there is + * no insertion. (This is an implementation convenience for + * codecs.) + * + * @param type of object referred to + * @param o object to insert or {@code null} (ignored) + * @param idx previously allocated index + * @return {@code o} + * @throws IndexOutOfBoundsException on a bad index + */ + // Compare CPython r_ref_insert() in marshal.c + private T defineRef(T o, int idx) { + if (o != null && idx >= 0) { refs.set(idx, o); } + return o; + } + + /** + * Add the object to the known references, if required. + * + * @param type of object referred to + * @param o to make the target of a reference. + * @param ref if {@code true}, define a reference in {@code r} + * @return {@code o} + */ + // Compare CPython r_ref() or R_REF() in marshal.c + private T defineRef(T o, boolean ref) { + if (ref && o != null) { refs.add(o); } + return o; + } + + /** + * Prepare a Python {@link PyException} for throwing, based on + * the Java {@code IOException}. We may return a Python + * {@link EOFError} or {@link OSError}. + * + * @param ioe to convert + * @return the chosen Python exception + */ + protected PyException pyException(IOException ioe) { + if (ioe instanceof EOFException) { + return endOfData(); + } else { + return new OSError(ioe); + } + } + + /** + * Prepare a Python {@link EOFError} for throwing, with the + * message that the data are too short. We throw one of these on + * encountering and end of file or buffer where more of the + * object was expected. + * + * @return a Python exception to throw + */ + protected static EOFError endOfData() { + return new EOFError("marshal data too short"); + } + + /** + * Create a {@link ValueError} to throw, with a message along + * the lines "bad marshal data (REASON(args))" + * + * @param reason to insert + * @param args arguments to fill format + * @return to throw + */ + protected static ValueError badData(String reason, + Object... args) { + return badData(String.format(reason, args)); + } + + /** + * Create a {@link ValueError} to throw, with a message along + * the lines "bad marshal data (REASON)" + * + * @param reason to insert + * @return to throw + */ + protected static ValueError badData(String reason) { + return new ValueError("bad marshal data (%s)", reason); + } + + /** + * Create a {@link TypeError} to throw, with a message along the + * lines "null object in marshal data for (TYPE)" + * + * @param type to insert + * @return to throw + */ + protected static TypeError nullObject(String type) { + return new TypeError("null object in marshal data for %s", + type); + } + } + + /** + * A {@link Reader} that has a {@code java.io.InputStream} as its + * source. When the underlying source is a file, it is preferable + * for efficiency that this be a + * {@code java.io.BufferedInputStream}. A + * {@code java.io.ByteArrayInputStream} needs no additional + * buffering. + */ + public static class StreamReader extends Reader { + + /** + * The source wrapped in a {@code DataInputStream} on which we + * shall call {@code getInt()} etc. to read items. A Python + * marshal stream is little-endian, while Java will read + * big-endian data. However, note that + * {@code Integer.reverseBytes()} and friends are HotSpot + * intrinsics. + */ + private final DataInputStream file; + + /** + * Form a {@link Reader} on a {@code java.io.InputStream}. + * + * @param file input + */ + public StreamReader(InputStream file) { + this.file = new DataInputStream(file); + } + + @Override + public int readByte() { + try { + return file.readByte() & 0xff; + } catch (IOException ioe) { + throw new OSError(ioe); + } + } + + @Override + public int readShort() { + try { + return Short.reverseBytes(file.readShort()); + } catch (IOException ioe) { + throw new OSError(ioe); + } + } + + @Override + public int readInt() { + try { + return Integer.reverseBytes(file.readInt()); + } catch (IOException ioe) { + throw new OSError(ioe); + } + } + + @Override + public long readLong() { + try { + return Long.reverseBytes(file.readLong()); + } catch (IOException ioe) { + throw new OSError(ioe); + } + } + + @Override + public ByteBuffer readByteBuffer(int n) { + try { + byte[] b = new byte[n]; + file.read(b); + ByteBuffer slice = ByteBuffer.wrap(b).asReadOnlyBuffer() + .order(ByteOrder.LITTLE_ENDIAN); + return slice; + } catch (IOException ioe) { + throw new OSError(ioe); + } + } + + /** + * Recognise or wrap an eligible file-like data source as an + * {@code InputStream}. + */ + private static InputStream adapt(Object file) + throws NoConversion { + if (file instanceof InputStream) { + return (InputStream)file; + } else { + // Adapt any object with read returning bytes + // But for now ... + throw PyObjectUtil.NO_CONVERSION; + } + } + } + + /** + * A {@link Reader} that has a {@code ByteBuffer} as its source. + */ + public static class BytesReader extends Reader { + + /** + * The source as little-endian a {@code ByteBuffer} on which we + * shall call {@code getInt()} etc. to read items. A Python + * marshal stream is little-endian + */ + private final ByteBuffer buf; + + /** + * Form a {@link Reader} on a byte array. + * + * @param bytes input + */ + public BytesReader(byte[] bytes) { + this(ByteBuffer.wrap(bytes)); + } + + /** + * Form a {@link Reader} on an existing {@code ByteBuffer}. This + * {@code ByteBuffer} will have its order set to + * {@code ByteOrder.LITTLE_ENDIAN}. + * + * @param buf input + */ + public BytesReader(ByteBuffer buf) { + this.buf = buf; + buf.order(ByteOrder.LITTLE_ENDIAN); + } + + @Override + public int readByte() { + try { + return buf.get() & 0xff; + } catch (BufferUnderflowException boe) { + throw endOfData(); + } + } + + @Override + public int readShort() { + try { + return buf.getShort(); + } catch (BufferUnderflowException boe) { + throw endOfData(); + } + } + + @Override + public int readInt() { + try { + return buf.getInt(); + } catch (BufferUnderflowException boe) { + throw endOfData(); + } + } + + @Override + public long readLong() { + try { + return buf.getLong(); + } catch (BufferUnderflowException boe) { + throw endOfData(); + } + } + + @Override + public ByteBuffer readByteBuffer(int n) { + try { + ByteBuffer slice = + buf.slice().order(ByteOrder.LITTLE_ENDIAN); + // The n bytes are read, as far as buf is concerned + buf.position(buf.position() + n); + // And we set the limit in slice at their end + return slice.limit(n); + } catch (BufferUnderflowException boe) { + throw endOfData(); + } + } + + /** + * Recognise or wrap an eligible file-like data source as a + * {@code ByteBuffer}. + */ + private static ByteBuffer adapt(Object bytes) + throws NoConversion { + if (bytes instanceof ByteBuffer) { + return (ByteBuffer)bytes; + } else if (bytes instanceof PyBytes) { + return ((PyBytes)bytes).getNIOByteBuffer(); + } else { + if (bytes instanceof byte[]) { + ByteBuffer bb = ByteBuffer.wrap((byte[])bytes); + return bb; + } else { + // Adapt any object with read returning bytes + // But for now ... + throw PyObjectUtil.NO_CONVERSION; + } + } + } + } + + /** {@link Codec} for several Python singletons. */ + private static class SingletonCodec implements Codec { + private final int typeCode; + private final Object value; + + private SingletonCodec(int typeCode, Object value) { + this.typeCode = typeCode; + this.value = value; + } + + @Override + public PyType type() { + /* + * It is possible to serialise a null, and reading it back + * is not always an error. + */ + return value == null ? null : PyType.of(value); + } + + @Override + public void write(Writer w, Object v) + throws IOException, Throwable { + // All objects of the value's type are considered the same + assert v == value; + w.writeByte(typeCode); + } + + @Override + public Map decoders() { + // All this type share the same value (ref ignored) + return Map.of(typeCode, (r, ref) -> value); + } + } + + static { + register(new SingletonCodec(TYPE_NULL, null)); + register(new SingletonCodec(TYPE_NONE, Py.None)); + // register(new SingletonCodec(TYPE_ELLIPSIS, Py.Ellipsis)); + } + + /** + * {@link Codec} for {@code type}s. The only case of this in + * practice is the type {@code StopIteration}, but this codec will + * receive any any type object. + */ + private static class TypeCodec implements Codec { + + @Override + public PyType type() { return PyType.TYPE; } + + @Override + public void write(Writer w, Object v) + throws IOException, Throwable { + if (v == StopIteration.TYPE) + w.writeByte(TYPE_STOPITER); + else + throw Writer.unmarshallableObject(v); + } + + @Override + public Map decoders() { + // All this type share the same value (ref ignored) + return Map.of(TYPE_STOPITER, + (r, ref) -> StopIteration.TYPE); + } + } + + /** {@link Codec} for Python {@code bool}. */ + private static class BoolCodec implements Codec { + @Override + public PyType type() { return PyBool.TYPE; } + + @Override + public void write(Writer w, Object v) + throws IOException, Throwable { + assert type().checkExact(v); + // Must be Boolean + w.writeByte((Boolean)v ? TYPE_TRUE : TYPE_FALSE); + } + + @Override + public Map decoders() { + return Map.of( // + TYPE_FALSE, (r, ref) -> Py.False, // + TYPE_TRUE, (r, ref) -> Py.True); + } + } + + /** {@link Codec} for Python {@code int}. */ + private static class IntCodec implements Codec { + @Override + public PyType type() { return PyLong.TYPE; } + + @Override + public void write(Writer w, Object v) + throws IOException, Throwable { + assert type().checkExact(v); + // May be Integer or BigInteger + if (v instanceof Integer) { + w.writeByte(TYPE_INT); + w.writeInt(((Integer)v).intValue()); + } else { + w.writeByte(TYPE_LONG); + w.writeBigInteger((BigInteger)v); + } + } + + @Override + public Map decoders() { + Map m = new HashMap<>(); + m.put(TYPE_INT, (r, ref) -> r.defineRef(r.readInt(), ref)); + m.put(TYPE_LONG, + (r, ref) -> r.defineRef(r.readBigInteger(), ref)); + return m; + } + } + + /** {@link Codec} for Python {@code float}. */ + private static class FloatCodec implements Codec { + @Override + public PyType type() { return PyFloat.TYPE; } + + @Override + public void write(Writer w, Object v) { + // May be Double or PyFloat + double d = PyFloat.doubleValue(v); + if (w.version > 1) { + w.writeByte(TYPE_BINARY_FLOAT); + w.writeDouble(d); + } else { + PyUnicode u = PyUnicode + .fromJavaString(String.format("%17.0g", d)); + PySequence.OfInt seq = u.asSequence(); + w.writeByte(TYPE_FLOAT); + w.writeBytes(seq); + } + } + + @Override + public Map decoders() { + Map m = new HashMap<>(); + m.put(TYPE_BINARY_FLOAT, + (r, ref) -> r.defineRef(r.readDouble(), ref)); + m.put(TYPE_FLOAT, FloatCodec::readStr); + return m; + } + + private static Object readStr(Reader r, boolean ref) { + int n = r.readInt(); + ByteArrayBuilder builder = new ByteArrayBuilder(n); + for (int i = 0; i < n; i++) { + builder.append(r.readByte()); + } + return r.defineRef(new PyBytes(builder), ref); + } + } + + /** {@link Codec} for Python {@code bytes}. */ + private static class BytesCodec implements Codec { + @Override + public PyType type() { return PyBytes.TYPE; } + + @Override + public void write(Writer w, Object v) + throws IOException, Throwable { + assert type().checkExact(v); + write(w, (PyBytes)v); + } + + private static void write(Writer w, PyBytes v) + throws IOException, Throwable { + int n = PySequence.size(v); + w.writeByte(TYPE_BYTES); + w.writeInt(n); + w.writeBytes(v.asSequence()); + } + + @Override + public Map decoders() { + return Map.of(TYPE_BYTES, BytesCodec::read); + } + + private static Object read(Reader r, boolean ref) { + int n = r.readInt(); + ByteArrayBuilder builder = new ByteArrayBuilder(n); + for (int i = 0; i < n; i++) { + builder.append(r.readByte()); + } + return r.defineRef(new PyBytes(builder), ref); + } + } + + /** {@link Codec} for Python {@code str}. */ + private static class StrCodec implements Codec { + @Override + public PyType type() { return PyUnicode.TYPE; } + + @Override + public void write(Writer w, Object v) + throws IOException, Throwable { + assert type().checkExact(v); + if (v instanceof String) { + write(w, PyUnicode.fromJavaString((String)v)); + } else { + write(w, (PyUnicode)v); + } + } + + private static void write(Writer w, PyUnicode v) + throws IOException, Throwable { + int n = PySequence.size(v); + if (w.version >= 4 && v.isascii()) { + if (n < 256) { + w.writeByte(TYPE_SHORT_ASCII); + w.writeInt(n); + } else { + w.writeByte(TYPE_ASCII); + w.writeInt(n); + } + w.writeBytes(v.asSequence()); + } else { + w.writeByte(TYPE_UNICODE); + w.writeInt(n); + w.writeInts(v.asSequence()); + } + } + + @Override + public Map decoders() { + Map m = new HashMap<>(); + m.put(TYPE_ASCII, + (r, ref) -> readAscii(r, ref, r.readInt(), false)); + m.put(TYPE_SHORT_ASCII, + (r, ref) -> readAscii(r, ref, r.readByte(), false)); + m.put(TYPE_ASCII_INTERNED, + (r, ref) -> readAscii(r, ref, r.readInt(), true)); + m.put(TYPE_SHORT_ASCII_INTERNED, + (r, ref) -> readAscii(r, ref, r.readByte(), true)); + m.put(TYPE_UNICODE, + (r, ref) -> readUtf8(r, ref, r.readInt(), false)); + m.put(TYPE_INTERNED, + (r, ref) -> readUtf8(r, ref, r.readInt(), true)); + return m; + } + + private static Charset ASCII = Charset.forName("ASCII"); + private static Charset UTF8 = Charset.forName("UTF-8"); + + private static Object readAscii(Reader r, boolean ref, int n, + boolean interned) { + ByteBuffer buf = r.readByteBuffer(n); + CharBuffer cb = ASCII.decode(buf); + String s = cb.toString(); + if (interned) { s = s.intern(); } + return r.defineRef(s, ref); + } + + private static String readUtf8(Reader r, boolean ref, int n, + boolean interned) { + ByteBuffer buf = r.readByteBuffer(n); + // XXX use our own codec (& 'surrogatepass') when available + CharBuffer cb = UTF8.decode(buf); + // Note cb is chars, not code points so cp-length unknown + IntArrayBuilder builder = new IntArrayBuilder(); + builder.append(cb.codePoints()); + // ??? Always a String, even if not BMP + String s = builder.toString(); + if (interned) { s = s.intern(); } + return r.defineRef(s, ref); + } + } + + /** + * {@link Codec} for Python {@code int}. A {@code tuple} cannot + * contain itself as a member. + */ + private static class TupleCodec implements Codec { + @Override + public PyType type() { return PyTuple.TYPE; } + + @Override + public void write(Writer w, Object v) + throws IOException, Throwable { + assert type().checkExact(v); + // May only be PyTuple + PyTuple tuple = (PyTuple)v; + int n = tuple.size(); + // Version 4+ supports a small tuple + if (w.version >= 4 && n < 256) { + w.writeByte(TYPE_SMALL_TUPLE); + w.writeByte(n); + } else { + w.writeByte(TYPE_TUPLE); + w.writeInt(n); + } + // Write out the body of the tuple + for (int i = 0; i < n; i++) { w.writeObject(tuple.get(i)); } + } + + @Override + public Map decoders() { + return Map.of(// + TYPE_TUPLE, TupleCodec::read, // ; + TYPE_SMALL_TUPLE, TupleCodec::readSmall); + } + + private static PyTuple read(Reader r, boolean ref) { + return read(r, ref, r.readInt()); + } + + private static PyTuple readSmall(Reader r, boolean ref) { + return read(r, ref, r.readByte()); + } + + private static PyTuple read(Reader r, boolean ref, int n) { + // We may allocate a tuple builder of the right size + if (n < 0) { + throw Reader.badData("tuple size out of range"); + } + PyTuple.Builder builder = new PyTuple.Builder(n); + // Get an index now to ensure encounter-order numbering + int idx = ref ? r.reserveRef() : -1; + for (int i = 0; i < n; i++) { + Object v = r.readObject(); + if (v == null) { throw Reader.nullObject("tuple"); } + builder.append(v); + } + // Now we can give an object meaning to the index + return r.defineRef(builder.take(), idx); + } + } + + /** + * {@link Codec} for Python {@code list}. An interesting thing about + * a {@code list} is that it can contain itself as a member. + */ + private static class ListCodec implements Codec { + @Override + public PyType type() { return PyList.TYPE; } + + @Override + public void write(Writer w, Object v) + throws IOException, Throwable { + assert type().checkExact(v); + // May only be PyList + PyList list = (PyList)v; + w.writeByte(TYPE_LIST); + int n = list.size(); + w.writeInt(n); + for (int i = 0; i < n; i++) { w.writeObject(list.get(i)); } + } + + @Override + public Map decoders() { + return Map.of(TYPE_LIST, ListCodec::read); + } + + private static PyList read(Reader r, boolean ref) { + // We may allocate a list of the right size + int n = r.readInt(); + if (n < 0) { + throw Reader.badData("list size out of range"); + } + PyList list = new PyList(n); + // Cache the object now: list may contain itself + r.defineRef(list, ref); + for (int i = 0; i < n; i++) { + Object v = r.readObject(); + if (v == null) { throw Reader.nullObject("list"); } + list.add(v); + } + return list; + } + } + + /** + * {@link Codec} for Python {@code dict}. + */ + private static class DictCodec implements Codec { + @Override + public PyType type() { return PyDict.TYPE; } + + @Override + public void write(Writer w, Object v) + throws IOException, Throwable { + assert type().checkExact(v); + // May only be PyDict + PyDict dict = (PyDict)v; + w.writeByte(TYPE_DICT); + // The sequel is a null-terminated key-value pairs + for (Map.Entry e : dict.entrySet()) { + w.writeObject(e.getKey()); + w.writeObject(e.getValue()); + } + w.writeObject(null); + } + + @Override + public Map decoders() { + return Map.of(TYPE_DICT, DictCodec::read); + } + + private static PyDict read(Reader r, boolean ref) { + // The sequel is a null-terminated key-value pairs + PyDict dict = new PyDict(); + // Cache the object now: dict may contain itself + r.defineRef(dict, ref); + // The sequel is a null-terminated key-value pairs + while (true) { + Object key = r.readObject(); + if (key == null) { break; } + // CPython does not treat (k,null) as an error + Object value = r.readObject(); + if (value == null) { break; } + dict.put(key, value); + } + return dict; + } + } + + /** + * {@link Codec} for Python {@code code}. + */ + private static class CodeCodec implements Codec { + @Override + public PyType type() { return PyCode.TYPE; } + + @Override + public void write(Writer w, Object v) + throws IOException, Throwable { + assert type().checkExact(v); + /* + * We intend different concrete sub-classes of PyCode, that + * create different frame types, but at the moment only one. + */ + CPython311Code code = (CPython311Code)v; + w.writeByte(TYPE_CODE); + // XXX Write the fields (quite complicated) + } + + @Override + public Map decoders() { + return Map.of(TYPE_CODE, CodeCodec::read); + } + + private static CPython311Code read(Reader r, boolean ref) { + + // Get an index now to ensure encounter-order numbering + int idx = ref ? r.reserveRef() : -1; + + int argcount = r.readInt(); + int posonlyargcount = r.readInt(); + int kwonlyargcount = r.readInt(); + int stacksize = r.readInt(); + + int flags = r.readInt(); + Object code = r.readObject(); + + Object consts = r.readObject(); + Object names = r.readObject(); + Object localsplusnames = r.readObject(); + Object localspluskinds = r.readObject(); + + Object filename = r.readObject(); + Object name = r.readObject(); + Object qualname = r.readObject(); + + int firstlineno = r.readInt(); + Object linetable = r.readObject(); + Object exceptiontable = r.readObject(); + + // PySys_Audit("code.__new__", blah ...); + + CPython311Code v = CPython311Code.create( // + filename, name, qualname, flags, // + code, firstlineno, linetable, // + consts, names, // + localsplusnames, localspluskinds, // + argcount, posonlyargcount, kwonlyargcount, + stacksize, // + exceptiontable); + + return r.defineRef(v, idx); + } + } + + /** + * Pseudo-{@link Codec} for records containing a reference, which + * must previously have been defined (and not still be + * {@code null}). + */ + private static class RefCodec implements Codec { + + @Override + public PyType type() { + // It's not really a sensible question + return PyBaseObject.TYPE; + } + + @Override + public void write(Writer w, Object v) { + // XXX do we do this? + } + + @Override + public Map decoders() { + return Map.of(TYPE_REF, (r, ref) -> read(r)); + } + + private static Object read(Reader r) { + // The record makes reference to a cached object + int idx = r.readInt(); + try { + Object obj = r.refs.get(idx); + if (obj == null) { + throw Reader.nullObject("object ref"); + } + return obj; + } catch (IndexOutOfBoundsException iobe) { + throw Reader.badData("invalid reference"); + } + } + } +} diff --git a/core/src/main/java/org/python/modules/math.java b/core/src/main/java/org/python/modules/math.java new file mode 100644 index 000000000..face02c8c --- /dev/null +++ b/core/src/main/java/org/python/modules/math.java @@ -0,0 +1,45 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.modules; + +import org.python.core.PyTuple; + +/** + * Stop-gap {@code math} module holding just the things we use form + * code at present. Not exposed to Python. + */ +public class math { + + public static PyTuple frexp(double x) { + int exponent; + double mantissa; + + switch (exponent = Math.getExponent(x)) { + + default: + // x = m * 2**exponent and 1 <=abs(m) <2 + exponent = exponent + 1; + // x = m * 2**exponent and 0.5 <=abs(m) <1 + mantissa = Math.scalb(x, -exponent); + break; + + case 1024: // nan or inf + mantissa = x; + exponent = 0; + break; + + case -1023: + if (x == 0.) { // , 0.0 or -0.0 + mantissa = x; + exponent = 0; + } else { // denormalised value + // x = m * 2**exponent but 0 < abs(m) < 1 + exponent = Math.getExponent(x * 0x1p52) - 51; + mantissa = Math.scalb(x, -exponent); + } + break; + } + + return new PyTuple(mantissa, exponent); + } +} diff --git a/core/src/main/java/org/python/modules/package-info.java b/core/src/main/java/org/python/modules/package-info.java new file mode 100644 index 000000000..ce7b5edd4 --- /dev/null +++ b/core/src/main/java/org/python/modules/package-info.java @@ -0,0 +1,7 @@ +/** + * The {@code modules} package contains built-in modules implemented in + * Java. As far as possible, the class name is equal to the Python + * module name, to aid to navigation. They will mostly be used from + * Python via an {@code import}, but use from Java is also possible. + */ +package org.python.modules; diff --git a/src/org/python/core/ucnhashAPI.java b/core/src/main/java/org/python/modules/ucnhashAPI.java similarity index 92% rename from src/org/python/core/ucnhashAPI.java rename to core/src/main/java/org/python/modules/ucnhashAPI.java index 788a2a380..ba17f3079 100644 --- a/src/org/python/core/ucnhashAPI.java +++ b/core/src/main/java/org/python/modules/ucnhashAPI.java @@ -6,7 +6,7 @@ * Originally written by Marc-Andre Lemburg (mal@lemburg.com). */ -package org.python.core; +package org.python.modules; public interface ucnhashAPI { public int getCchMax(); diff --git a/core/src/main/javaTemplate/org/python/core/PyFloatMethods.java b/core/src/main/javaTemplate/org/python/core/PyFloatMethods.java new file mode 100644 index 000000000..de42ac3c9 --- /dev/null +++ b/core/src/main/javaTemplate/org/python/core/PyFloatMethods.java @@ -0,0 +1,59 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +// $OBJECT_GENERATOR$ PyFloatGenerator + +import org.python.core.PyObjectUtil.NoConversion; +import static org.python.core.PyFloat.nonzero; +import static org.python.core.PyFloat.floordiv; +import static org.python.core.PyFloat.mod; +import static org.python.core.PyFloat.divmod; + +/** + * This class contains static methods implementing operations on the + * Python {@code float} object, supplementary to those defined in + * {@link PyFloat}. + *

+ * These methods may cause creation of descriptors in the dictionary of + * the type. Those with reserved names in the data model will also fill + * slots in the {@code Operations} object for the type. + *

+ * Implementations of binary operations defined here will have + * {@code Object} as their second argument, and should return + * {@link Py#NotImplemented} when the type in that position is not + * supported. + */ +class PyFloatMethods { + + PyFloatMethods() {} // no instances + + // $SPECIAL_METHODS$ --------------------------------------------- + + // plumbing ------------------------------------------------------ + + /** + * Convert an object to a Java double. Conversion to a double may + * raise an exception that is propagated to the caller. If the + * method throws the special exception {@link NoConversion}, the + * caller must catch it, and will normally return + * {@link Py#NotImplemented}. + * + * @param v to convert + * @return converted to {@code double} + * @throws NoConversion v is not a {@code float} or {@code int} + * @throws OverflowError v is an {@code int} too large to be a + * {@code float} + */ + static double toDouble(Object v) throws NoConversion, OverflowError { + // Check against supported types, most likely first + if (v instanceof Double) + return ((Double) v).doubleValue(); + else if (v instanceof PyFloat) + return ((PyFloat) v).value; + else + // BigInteger, PyLong, Boolean, etc. + // or throw PyObjectUtil.NO_CONVERSION; + return PyLong.convertToDouble(v); + } +} diff --git a/core/src/main/javaTemplate/org/python/core/PyLongMethods.java b/core/src/main/javaTemplate/org/python/core/PyLongMethods.java new file mode 100644 index 000000000..91da02d77 --- /dev/null +++ b/core/src/main/javaTemplate/org/python/core/PyLongMethods.java @@ -0,0 +1,558 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.math.BigInteger; +import static java.math.BigInteger.ZERO; +import static java.math.BigInteger.ONE; + +import org.python.core.PyObjectUtil.NoConversion; + +// $OBJECT_GENERATOR$ PyLongGenerator + +/** + * This class contains static methods implementing operations on the + * Python {@code int} object, supplementary to those defined in + * {@link PyLong}. + *

+ * These methods may cause creation of descriptors in the dictionary of + * the type. Those with reserved names in the data model will also fill + * slots in the {@code Operations} object for the type. + *

+ * Implementations of binary operations defined here will have + * {@code Object} as their second argument, and should return + * {@link Py#NotImplemented} when the type in that position is not + * supported. + */ +class PyLongMethods { + + private PyLongMethods() {} // no instances + + // $SPECIAL_METHODS$ --------------------------------------------- + + // ----------------------------------------------------- __pow__ + // Hand-crafted + static Object __pow__(Object v, Object w, Object modulus) { + modulus = (modulus == Py.None) ? null : modulus; + try { + // If any conversion fails __pow__ is not implemented + BigInteger y = toBig(w); + if (y.signum() < 0 && modulus == null) { + // No modulus and w<0: let PyFloat handle it + return floatPow(v, w, modulus); + } else { + BigInteger z = modulus == null ? null : toBig(modulus); + return pow(toBig(v), y, z); + } + } catch (NoConversion e) { + return Py.NotImplemented; + } + } + + // ---------------------------------------------------- __rpow__ + // Hand-crafted + static Object __rpow__(Object w, Object v) { + try { + // If either conversion fails __rpow__ is not implemented + BigInteger y = toBig(w); + // For negative exponent, resort to float calculation + if (y.signum() < 0) { return floatPow(v, w, null); } + BigInteger x = toBig(v); + return pow(x, y, null); + } catch (NoConversion e) { + return Py.NotImplemented; + } + } + + // -------------------------------------------------- __lshift__ + // Hand-crafted + static Object __lshift__(PyLong v, Object w) { + return __lshift__(v.value, w); + } + + static Object __lshift__(BigInteger v, Object w) { + try { + int iw; + if (v.signum() == 0) + return 0; + else if ((iw = toShift(w)) == 0) + return v; + else { + return toInt(v.shiftLeft(iw)); + } + } catch (NoConversion e) { + return Py.NotImplemented; + } + } + + static Object __lshift__(Integer v, Object w) { + if (v == 0) { + return 0; + } else { + BigInteger vv = BigInteger.valueOf(v.longValue()); + return __lshift__(vv, w); + } + } + + static Object __lshift__(Boolean v, Object w) { + return v ? __lshift__(ONE, w) : 0; + } + + // ------------------------------------------------- __rlshift__ + // Hand-crafted + static Object __rlshift__(Object w, Object v) { + try { + return __lshift__(toBig(v), w); + } catch (NoConversion e) { + return Py.NotImplemented; + } + } + + // -------------------------------------------------- __rshift__ + // Hand-crafted + static Object __rshift__(PyLong v, Object w) { + return __rshift__(v.value, w); + } + + static Object __rshift__(BigInteger v, Object w) { + try { + int iw; + if (v.signum() == 0) + return 0; + else if ((iw = toShift(w)) == 0) + return v; + else { + return toInt(v.shiftRight(iw)); + } + } catch (NoConversion e) { + return Py.NotImplemented; + } + } + + static Object __rshift__(Integer v, Object w) { + if (v == 0) { + return 0; + } else { + BigInteger vv = BigInteger.valueOf(v.longValue()); + return __rshift__(vv, w); + } + } + + static Object __rshift__(Boolean v, Object w) { + return v ? __rshift__(ONE, w) : 0; + } + + // ------------------------------------------------- __rrshift__ + // Hand-crafted + static Object __rrshift__(Object w, Object v) { + try { + return __rshift__(toBig(v), w); + } catch (NoConversion e) { + return Py.NotImplemented; + } + } + + // plumbing ------------------------------------------------------ + + /** + * Convert an {@code int} or its sub-class to a Java + * {@code BigInteger}. Conversion may raise an exception that is + * propagated to the caller. If the Java type of the {@code int} is + * declared, generally there is a better option than this method. We + * only use it for {@code Object} arguments. If the method throws + * the special exception {@link NoConversion}, the caller must catch + * it, and will normally return {@link Py#NotImplemented}. + * + * @param v to convert + * @return converted to {@code BigInteger} + * @throws NoConversion v is not an {@code int} + */ + private static BigInteger toBig(Object v) throws NoConversion { + // Check against supported types, most likely first + if (v instanceof Integer) + return BigInteger.valueOf(((Integer) v).longValue()); + else if (v instanceof BigInteger) + return (BigInteger) v; + else if (v instanceof PyLong) + return ((PyLong) v).value; + else if (v instanceof Boolean) + return (Boolean) v ? ONE : ZERO; + + throw PyObjectUtil.NO_CONVERSION; + } + + /** + * Reduce a {@code BigInteger} result to {@code Integer} if + * possible. This makes it more likely the next operation will be + * 32-bit. + * + * @param r to reduce + * @return equal value + */ + static Object toInt(BigInteger r) { + /* + * Implementation note: r.intValueExact() is for exactly this + * purpose, but building the ArithmeticException is a huge cost. + * (2900ns is added to a 100ns __add__.) The compiler (as tested + * in JDK 11.0.9) doesn't recognise that it can be optimised + * to a jump. This version of toInt() adds around 5ns. + */ + if (r.bitLength() < 32) + return r.intValue(); + else + return r; + } + + /** + * Convert a Python {@code object} to a Java {@code int} suitable as + * a shift distance. Negative values are a {@link ValueError}, while + * positive values too large to convert are clipped to the maximum + * Java {@code int} value. + * + * @param shift to interpret as an {@code int} shift + * @return {@code min(v, Integer.MAX_VALUE)} + * @throws NoConversion for values not convertible to a Python + * {@code int} + * @throws ValueError when the argument is negative + */ + private static final int toShift(Object shift) + throws NoConversion, ValueError { + BigInteger s = toBig(shift); // implicitly: check it's an int + if (s.signum() < 0) { + throw new ValueError("negative shift count"); + } else if (s.bitLength() < 32) { + return s.intValue(); + } else { + return Integer.MAX_VALUE; + } + } + + /** 2**31 aka Integer.MIN_VALUE / -1, which Java can't do. */ + private static BigInteger MINUS_INT_MIN = + BigInteger.valueOf(-(long)Integer.MIN_VALUE); + + /** + * Convenience function to create a {@link ZeroDivisionError}. + * + * @return to throw + */ + private static ZeroDivisionError zeroDivisionError() { + return new ZeroDivisionError( + "integer division or modulo by zero"); + } + + /** + * Divide x by y with integer result, following the Python sign + * convention. The convention makes sense taken together with that + * for remainders (the modulo operation {@code %}). As would be + * expected, Python guarantees that {@code x = (x//y)*y + (x%y)}. It + * also chooses that the sign of {@code x%y}, if it is not zero, + * should be the same as that of {@code y}. This causes both + * {@code /} and {@code %} to differ from their semantics in Java. + * + * @param x dividend + * @param y divisor + * @return quotient + */ + static Object divide(int x, int y) { + /* + * Differences from Java integer quotient require adjustments in + * quadrants 2 and 4 (excluding axes). A branch-free formula for + * the Python quotient in terms of the Java one Q is q(x,y) = + * Q(x+a,y)-b where a=b=0 if x and y have the same sign, or x=0, + * and otherwise b=-1 and a is +1 or -1 with the opposite sign + * to x. + */ + try { + if (x << 1 != 0) { // x !=0 and x != Integer.MIN_VALUE + // x>>31 is 0 or -1 according to the sign of x + int u = x >> 31; + // y>>31 is 0 or -1 according to the sign of y + int v = y >> 31; + int a = v - u; // -1, 0 or 1 + int b = v ^ u; // 0 or -1 + // Q(x+a,y) + b where a = 1, 0, -1 and b = 0, -1 + return (x + a) / y + b; + } else { + // Special cases where the formula above fails: + // x ==0 or x == Integer.MIN_VALUE + if (x == 0 || y < -1) + // Java and Python agree + return x / y; + else if (y >= 0) // and x == Integer.MIN_VALUE + // Opposite signs: use adjusted formula + return (x + 1) / y - 1; + else + // y == -1 and x == Integer.MIN_VALUE + return MINUS_INT_MIN; + } + } catch (ArithmeticException ae) { + // This can only be because y==0 + throw zeroDivisionError(); + } + } + + /** + * {@code x mod y} with {@code int} arguments, following the Python + * sign convention. The convention makes sense taken together with + * that for floor division (the modulo operation {@code //}). As + * would be expected, Python guarantees that + * {@code x = (x//y)*y + (x%y)}. It also chooses that the sign of + * {@code x%y}, if it is not zero, should be the same as that of + * {@code y}. This causes both {@code /} and {@code %} to differ + * from their semantics in Java. + * + * @param x dividend + * @param y divisor + * @return remainder + */ + static int modulo(int x, int y) { + /* + * Differences from Java integer remainder require adjustments + * in quadrants 2 and 4 (excluding axes). A branch-free formula + * for the Python remainder in terms of the Java one R is r(x,y) + * = R(x+a,y)-b*y-a where a=b=0 if x and y have the same sign, + * or x=0, and otherwise b=-1 and a is +1 or -1 with the + * opposite sign to x. + */ + try { + if (x << 1 != 0) { // x !=0 and x != Integer.MIN_VALUE + // x>>31 is 0 or -1 according to the sign of x + int u = x >> 31; + // y>>31 is 0 or -1 according to the sign of y + int v = y >> 31; + int a = v - u; // -1, 0 or 1 + int b = v ^ u; // 0 or -1 + // R(x+a,y) - b*y - a where -b*y can be done with & + return (x + a) % y + (b & y) - a; + } else { + // Special cases where the formula above fails: + // x ==0 or x == Integer.MIN_VALUE + if (y < -1 || x == 0) + // Java and Python agree + return x % y; + else if (y >= 0) // and x == Integer.MIN_VALUE + // Opposite signs: use adjusted formula + return (x + 1) % y + y - 1; + else + // y == -1 and x == Integer.MIN_VALUE + return 0; + + } + } catch (ArithmeticException ae) { + // This can only be because y==0 + throw zeroDivisionError(); + } + } + + /** + * Divide x by y with integer result, following the Python sign + * convention. The convention makes sense taken together with that + * for remainders (the modulo operation {@code %}). As would be + * expected, Python guarantees that {@code x = (x//y)*y + (x%y)}. It + * also chooses that the sign of {@code x%y}, if it is not zero, + * should be the same as that of {@code y}. This causes both + * {@code /} and {@code %} to differ from their semantics in Java. + * + * @param x dividend + * @param y divisor + * @return quotient + */ + static BigInteger divide(BigInteger x, BigInteger y) { + /* + * Getting signs correct for integer division is accomplished by + * adjusting x in the cases where the signs are opposite. This + * convention makes sense when you consider it with modulo. + */ + int ySign = y.signum(); + if (ySign == 0) { + throw zeroDivisionError(); + } else if (ySign < 0) { + if (x.signum() > 0) { x = x.subtract(y).subtract(ONE); } + } else { + if (x.signum() < 0) { x = x.subtract(y).add(ONE); } + } + return x.divide(y); + } + + /** + * {@code x mod y} with {@code BigInteger} arguments, following the + * Python sign convention. The convention makes sense taken together + * with that for floor division (the modulo operation {@code //}). + * As would be expected, Python guarantees that + * {@code x = (x//y)*y + (x%y)}. It also chooses that the sign of + * {@code x%y}, if it is not zero, should be the same as that of + * {@code y}. This causes both {@code /} and {@code %} to differ + * from their semantics in Java. + * + * @param x dividend + * @param y divisor + * @return remainder + */ + static BigInteger modulo(BigInteger x, BigInteger y) { + BigInteger q = divide(x, y); + return x.subtract(q.multiply(y)); + } + + /** + * {@code divmod(x,y)} with {@code int} arguments, following the + * Python sign convention. + * + * @param x dividend + * @param y divisor + * @return quotient and remainder + */ + static PyTuple divmod(int x, int y) { + try { + /* + * Compute the quotient as in divide(x,y), as an int (except + * in a corner-case), and the remainder from it. + */ + int q; + if (x << 1 != 0) { // x !=0 and x != Integer.MIN_VALUE + // x>>31 is 0 or -1 according to the sign of x + int u = x >> 31; + // y>>31 is 0 or -1 according to the sign of y + int v = y >> 31; + int a = v - u; // -1, 0 or 1 + int b = v ^ u; // 0 or -1 + // Q(x+a,y) + b where a = 1, 0, -1 and b = 0, -1 + q = (x + a) / y + b; + } else { + // Special cases where the formula above fails: + // x ==0 or x == Integer.MIN_VALUE + if (y < -1 || x == 0) + // Java and Python agree + q = x / y; + else if (y >= 0) // and x == Integer.MIN_VALUE + // Opposite signs: use adjusted formula + q = (x + 1) / y - 1; + else + // y == -1 and x == Integer.MIN_VALUE + return new PyTuple(MINUS_INT_MIN, 0); + } + return new PyTuple(q, x - q * y); + } catch (ArithmeticException ae) { + // This can only be because y==0 + throw zeroDivisionError(); + } + } + + /** + * {@code divmod(x,y)} with {@code BigInteger} arguments, following + * the Python sign convention. + * + * @param x dividend + * @param y divisor + * @return quotient and remainder + */ + static PyTuple divmod(BigInteger x, BigInteger y) { + BigInteger q = divide(x, y); + return new PyTuple(q, x.subtract(q.multiply(y))); + } + + /** + * Python true-division of {@code BigInteger} arguments. + * + * @param x dividend + * @param y divisor + * @return quotient + */ + static final double trueDivide(BigInteger x, BigInteger y) { + int[] xe = new int[1]; + int[] ye = new int[1]; + double xd = scaledDoubleValue(x, xe); + double yd = scaledDoubleValue(y, ye); + + if (yd == 0) { throw zeroDivisionError(); } + + double q = xd / yd; + int exp = xe[0] - ye[0]; + + if (exp > Integer.MAX_VALUE / 8) { + throw PyLong.tooLarge("integer division result", "float"); + } else if (exp < -(Integer.MAX_VALUE / 8)) { return 0.0; } + + q = q * Math.pow(2.0, exp * 8); + + if (Double.isInfinite(q)) { + throw PyLong.tooLarge("integer division result", "float"); + } + + return q; + } + + // Helper for trueDivide (fresh from Jython 2, so no comments) + private static final double scaledDoubleValue(BigInteger val, + int[] exp) { + double x = 0; + int signum = val.signum(); + byte[] digits; + + if (signum >= 0) { + digits = val.toByteArray(); + } else { + digits = val.negate().toByteArray(); + } + + int count = 8; + int i = 0; + + if (digits[0] == 0) { i++; count++; } + count = count <= digits.length ? count : digits.length; + + while (i < count) { x = x * 256 + (digits[i] & 0xff); i++; } + exp[0] = digits.length - i; + return signum * x; + } + + /** + * Helper for the case where {@code y<0}, using + * {@link PyFloat#__pow__(Object, Object, Object)} if possible. + */ + private static Object floatPow(Object ox, Object oy, + Object modulus) { + double x = PyLong.asDouble(ox); + if (x != 0.0) { + return PyFloat.__pow__(x, oy, modulus); + } else { + throw new ZeroDivisionError("zero to a negative power"); + } + } + + /** + * The implementation of exponentiation (behind {@code __pow__} and + * {@code __rpow__}) in terms of {@code BigInteger}. {@code __pow__} + * has a ternary form in which an integer modulus is provided. + * + * @param x base + * @param y exponent + * @param z the modulus (or {@code null} + * @return xymod z + */ + private static Object pow(BigInteger x, BigInteger y, + BigInteger z) { + + if (z == null) { + return toInt(x.pow(y.intValue())); + + } else { + // Identify some special cases for quick treatment + if (z.signum() == 0) { + throw new ValueError("pow(x, y, z) with z == 0"); + } else if (z.abs().equals(ONE)) { + return 0; + } else if (z.signum() < 0) { + // Handle negative modulo specially + y = x.modPow(y, z.negate()); + if (y.signum() > 0) { + return toInt(z.add(y)); + } else { + return toInt(y); + } + } else { + return toInt(x.modPow(y, z)); + } + } + } +} diff --git a/core/src/main/javaTemplate/org/python/core/PyUnicodeMethods.java b/core/src/main/javaTemplate/org/python/core/PyUnicodeMethods.java new file mode 100644 index 000000000..5840e04d0 --- /dev/null +++ b/core/src/main/javaTemplate/org/python/core/PyUnicodeMethods.java @@ -0,0 +1,44 @@ +package org.python.core; + +import org.python.core.PyObjectUtil.NoConversion; +import static org.python.core.PyUnicode.adapt; + +import java.util.Iterator; + +// $OBJECT_GENERATOR$ PyUnicodeGenerator + +/** + * This class contains static methods implementing operations on the + * Python {@code str} object, supplementary to those defined in + * {@link PyUnicode}. + *

+ * Implementations of binary operations defined here will have + * {@code Object} as their second argument, and should return + * {@link Py#NotImplemented} when the type in that position is not + * supported. + */ +class PyUnicodeMethods { + + private PyUnicodeMethods() {} // no instances + + // $SPECIAL_METHODS$ --------------------------------------------- + + // plumbing ------------------------------------------------------ + + /** + * Compare sequences for equality. This is a little simpler than + * {@code compareTo}. + * + * @param a sequence + * @param b another + * @return whether values equal + */ + private static boolean eq(PySequence.OfInt a, PySequence.OfInt b) { + // Lengths must be equal + if (a.length() != b.length()) { return false; } + // Scan the code points in a and b + Iterator ib = b.iterator(); + for (int c : a) { if (c != ib.next()) { return false; } } + return true; + } +} diff --git a/core/src/main/javadoc/project-styles.css b/core/src/main/javadoc/project-styles.css new file mode 100644 index 000000000..f7d383435 --- /dev/null +++ b/core/src/main/javadoc/project-styles.css @@ -0,0 +1,20 @@ +@charset "UTF-8"; + +/* Styles supplementary to the standard Javadoc doclet. */ + +/* Table layout for framed data. */ +table.framed-layout { + border: 1px solid black; + border-collapse: collapse; +} + +table.framed-layout th, table.framed-layout td { + border: 1px solid black; + text-align: center; + min-width: 2em; +} + +table.framed-layout td.label { + text-align: left; + font-weight: bold; +} \ No newline at end of file diff --git a/core/src/test/java/org/python/core/AbstractAPITest.java b/core/src/test/java/org/python/core/AbstractAPITest.java new file mode 100644 index 000000000..1389b0342 --- /dev/null +++ b/core/src/test/java/org/python/core/AbstractAPITest.java @@ -0,0 +1,159 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.math.BigInteger; +import java.util.Iterator; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +/** + * Test the {@link Abstract} API class on a variety of types. We are + * looking for correct behaviour in the cases attempted but mostly + * testing the invocation of special methods through the operations + * objects of the particular implementation classes. + *

+ * To reach our main goal, we need only try enough types to exercise + * every abstract method once in some type. + */ +@DisplayName("The API class Abstract") +class AbstractAPITest extends UnitTestSupport { + + /** A shorthand for the Python {@code int} type. */ + static PyType INT = PyLong.TYPE; + + /** + * This abstract base forms a check-list of methods we mean to test. + */ + abstract static class Standard { + + abstract void supports_repr() throws Throwable; + + abstract void supports_str() throws Throwable; + + abstract void supports_hash() throws Throwable; + + abstract void supports_isTrue() throws Throwable; + + abstract void supports_richCompare() throws Throwable; + + abstract void supports_getAttr_String() throws Throwable; + + abstract void supports_lookupAttr_String() throws Throwable; + } + + /** The {@code int} implementations all behave like this. */ + abstract static class IntLike extends Standard { + + // Working variables for the tests + Object zero; + Object small; + Object large; + Object negative; + List all; + + void setup(Object zero, Object small, Object large, Object negative) { + this.zero = zero; + this.small = small; + this.large = large; + this.negative = negative; + this.all = List.of(zero, small, large, negative); + } + + @Override + @Test + void supports_repr() throws Throwable { + for (Object v : all) { assertEquals(v.toString(), Abstract.repr(v)); } + } + + @Override + @Test + void supports_str() throws Throwable { + for (Object v : all) { assertEquals(v.toString(), Abstract.str(v)); } + } + + @Override + @Test + void supports_hash() throws Throwable { + for (Object v : all) { assertEquals(v.hashCode(), Abstract.hash(v)); } + } + + @Override + @Test + void supports_isTrue() throws Throwable { + // Zero is false + assertFalse(Abstract.isTrue(zero)); + // The rest are true + Iterator rest = all.listIterator(1); + while (rest.hasNext()) { assertTrue(Abstract.isTrue(rest.next())); } + } + + @Override + @Test + void supports_richCompare() throws Throwable { + // Let's not try to be exhaustive + assertEquals(Boolean.TRUE, Abstract.richCompare(zero, small, Comparison.LT)); + assertEquals(Boolean.TRUE, Abstract.richCompare(large, large, Comparison.LE)); + assertEquals(Boolean.TRUE, Abstract.richCompare(zero, negative, Comparison.GT)); + assertEquals(Boolean.TRUE, Abstract.richCompare(large, large, Comparison.GE)); + assertEquals(Boolean.TRUE, Abstract.richCompare(zero, "zero", Comparison.NE)); + assertEquals(Boolean.TRUE, Abstract.richCompare(zero, 0, Comparison.EQ)); + assertEquals(Boolean.FALSE, Abstract.richCompare(small, negative, Comparison.LT)); + assertEquals(Boolean.FALSE, Abstract.richCompare(small, small, Comparison.GT)); + assertEquals(Boolean.FALSE, Abstract.richCompare(large, small, Comparison.LE)); + assertEquals(Boolean.FALSE, Abstract.richCompare(large, large, Comparison.NE)); + assertEquals(Boolean.FALSE, Abstract.richCompare(zero, small, Comparison.GE)); + assertEquals(Boolean.FALSE, Abstract.richCompare(zero, "zero", Comparison.EQ)); + } + + @Override + @Test + void supports_getAttr_String() throws Throwable { + // An int has a "real" attribute that is itself + // But we haven't implemented it yet + // for (Object v : all) { + // assertSame(v, Abstract.getAttr(v, "real")); + // } + // An int doesn't have a "foo" attribute + assertThrows(AttributeError.class, () -> Abstract.getAttr(small, "foo")); + } + + @Override + @Test + void supports_lookupAttr_String() throws Throwable { + // An int doesn't have a foo attribute + assertNull(Abstract.lookupAttr(small, "foo")); + } + } + + @Nested + @DisplayName("with Integer argument") + class WithInteger extends IntLike { + + @BeforeEach + void setup() throws Throwable { setup(0, 42, Integer.MAX_VALUE, Integer.MIN_VALUE); } + } + + @Nested + @DisplayName("with BigInteger argument") + class WithBigInteger extends IntLike { + + @BeforeEach + void setup() throws Throwable { + setup(BigInteger.valueOf(0), BigInteger.valueOf(42), + BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.TEN), + BigInteger.valueOf(Long.MIN_VALUE).multiply(BigInteger.TEN)); + } + } + +} diff --git a/core/src/test/java/org/python/core/AbstractNumberAPITest.java b/core/src/test/java/org/python/core/AbstractNumberAPITest.java new file mode 100644 index 000000000..6f7e4b5a2 --- /dev/null +++ b/core/src/test/java/org/python/core/AbstractNumberAPITest.java @@ -0,0 +1,274 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static java.math.BigInteger.TEN; +import static java.math.BigInteger.ZERO; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.math.BigInteger; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +/** + * Test the {@link PyNumber} API class on a variety of types. We are + * looking for correct behaviour in the cases attempted but mostly + * testing the invocation of special methods through the operations + * objects of the particular implementation classes. + *

+ * To reach our main goal, we need only try enough types to exercise + * every abstract method once in some type. + */ +@DisplayName("The API class PyNumber") +class AbstractNumberAPITest extends UnitTestSupport { + + /** A shorthand for the Python {@code int} type. */ + private static PyType INT = PyLong.TYPE; + + private static final BigInteger INT_MIN = BigInteger.valueOf(Integer.MIN_VALUE); + private static final BigInteger INT_MAX = BigInteger.valueOf(Integer.MAX_VALUE); + + /** + * This abstract base forms a check-list of methods we mean to test. + */ + abstract static class Standard { + + abstract void supports_negative() throws Throwable; + + abstract void supports_absolute() throws Throwable; + + abstract void supports_add() throws Throwable; + + abstract void supports_subtract() throws Throwable; + + abstract void supports_multiply() throws Throwable; + + abstract void supports_or() throws Throwable; + + abstract void supports_and() throws Throwable; + + abstract void supports_xor() throws Throwable; + + abstract void supports_index() throws Throwable; + + abstract void supports_asSize() throws Throwable; + + abstract void supports_asLong() throws Throwable; + } + + /** The {@code int} implementations all behave like this. */ + abstract static class IntLike extends Standard { + + // Working variables for the tests + Object zero; + Object small; + Object large; + Object negative; + List all; + List other; + + void setup(Object zero, Object small, Object large, Object negative) { + this.zero = zero; + this.small = small; + this.large = large; + this.negative = negative; + this.all = List.of(zero, small, large, negative); + } + + void other(Object... otherValues) { this.other = List.of(otherValues); } + + /** + * Assert that the argument is an {@code Integer} if it could be so + * represented. (Simple unary operations may not normalise this way, + * and none of them need to, but it is desirable if it may be done + * quickly.) + * + * @param result to test + */ + static void assertRightSize(Object result) { + boolean ok = result instanceof Integer; + if (!ok && result instanceof BigInteger) { + // Justify by being outside the Integer range + BigInteger r = (BigInteger)result; + ok = r.compareTo(INT_MIN) < 0 || r.compareTo(INT_MAX) > 0; + } + assertTrue(ok, () -> String.format("result %s should be an Integer", result)); + } + + @Override + @Test + void supports_negative() throws Throwable { + for (Object v : all) { + Object r = PyNumber.negative(v); + BigInteger e = PyLong.asBigInteger(v).negate(); + assertEquals(e, PyLong.asBigInteger(r)); + if (v instanceof Integer && !v.equals(Integer.MIN_VALUE)) { assertRightSize(r); } + } + } + + @Override + @Test + void supports_absolute() throws Throwable { + for (Object v : all) { + Object r = PyNumber.absolute(v); + BigInteger e = PyLong.asBigInteger(v).abs(); + assertEquals(e, PyLong.asBigInteger(r)); + if (v instanceof Integer) { assertRightSize(r); } + } + } + + @Override + @Test + void supports_add() throws Throwable { + for (Object v : all) { + for (Object w : other) { + Object r = PyNumber.add(v, w); + BigInteger vv = PyLong.asBigInteger(v); + BigInteger ww = PyLong.asBigInteger(w); + BigInteger e = vv.add(ww); + assertEquals(e, PyLong.asBigInteger(r)); + assertRightSize(r); + } + } + } + + @Override + @Test + void supports_subtract() throws Throwable { + for (Object v : all) { + for (Object w : other) { + Object r = PyNumber.subtract(v, w); + BigInteger vv = PyLong.asBigInteger(v); + BigInteger ww = PyLong.asBigInteger(w); + BigInteger e = vv.subtract(ww); + assertEquals(e, PyLong.asBigInteger(r)); + assertRightSize(r); + } + } + } + + @Override + @Test + void supports_multiply() throws Throwable { + for (Object v : all) { + for (Object w : other) { + Object r = PyNumber.multiply(v, w); + BigInteger vv = PyLong.asBigInteger(v); + BigInteger ww = PyLong.asBigInteger(w); + BigInteger e = vv.multiply(ww); + assertEquals(e, PyLong.asBigInteger(r)); + assertRightSize(r); + } + } + } + + @Override + @Test + void supports_or() throws Throwable { + for (Object v : all) { + for (Object w : other) { + Object r = PyNumber.or(v, w); + BigInteger vv = PyLong.asBigInteger(v); + BigInteger ww = PyLong.asBigInteger(w); + BigInteger e = vv.or(ww); + assertEquals(e, PyLong.asBigInteger(r)); + assertRightSize(r); + } + } + } + + @Override + @Test + void supports_and() throws Throwable { + for (Object v : all) { + for (Object w : other) { + Object r = PyNumber.and(v, w); + BigInteger vv = PyLong.asBigInteger(v); + BigInteger ww = PyLong.asBigInteger(w); + BigInteger e = vv.and(ww); + assertEquals(e, PyLong.asBigInteger(r)); + assertRightSize(r); + } + } + } + + @Override + @Test + void supports_xor() throws Throwable { + for (Object v : all) { + for (Object w : other) { + Object r = PyNumber.xor(v, w); + BigInteger vv = PyLong.asBigInteger(v); + BigInteger ww = PyLong.asBigInteger(w); + BigInteger e = vv.xor(ww); + assertEquals(e, PyLong.asBigInteger(r)); + assertRightSize(r); + } + } + } + + @Override + @Test + void supports_index() throws Throwable { + for (Object v : all) { + Object r = PyNumber.index(v); + // For an int-like, it should be the same + assertEquals(v, r); + } + } + + @Override + @Test + void supports_asSize() throws Throwable { + for (Object v : all) { + Object r = PyNumber.asSize(v, null); + BigInteger e = PyLong.asBigInteger(v); + // For an int-like, it should be the same, but clipped. + if (e.compareTo(INT_MAX) > 0) + e = INT_MAX; + else if (e.compareTo(INT_MIN) < 0) + e = INT_MIN; + assertEquals(e, PyLong.asBigInteger(r)); + assertPythonType(INT, r); + } + } + + @Override + @Test + void supports_asLong() throws Throwable { + for (Object v : all) { + Object r = PyNumber.asLong(v); + // For an int-like, it should be the same + assertEquals(v, r); + } + } + } + + @Nested + @DisplayName("with Integer argument") + class WithInteger extends IntLike { + + @BeforeEach + void setup() throws Throwable { + setup(0, 42, Integer.MAX_VALUE, Integer.MIN_VALUE); + other(0, 932, ZERO, TEN); + } + } + + @Nested + @DisplayName("with BigInteger argument") + class WithBigInteger extends IntLike { + + @BeforeEach + void setup() throws Throwable { + setup(ZERO, BigInteger.valueOf(42), BigInteger.valueOf(Long.MAX_VALUE).multiply(TEN), + BigInteger.valueOf(Long.MIN_VALUE).multiply(TEN)); + other(0, 932, Integer.MIN_VALUE, ZERO, TEN.negate(), TEN.pow(10)); + } + } +} diff --git a/core/src/test/java/org/python/core/AbstractSequenceAPITest.java b/core/src/test/java/org/python/core/AbstractSequenceAPITest.java new file mode 100644 index 000000000..f5727ad39 --- /dev/null +++ b/core/src/test/java/org/python/core/AbstractSequenceAPITest.java @@ -0,0 +1,565 @@ +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Test the {@link PySequence} API class on a variety of types. We + * are looking for correct behaviour in the cases attempted but + * mostly testing the invocation of special methods through the + * operations objects of the particular implementation classes. + *

+ * To reach our main goal, we need only try enough types to exercise + * every abstract method once in some type. + */ +@DisplayName("In the Abstract API for sequences") +class AbstractSequenceAPITest extends UnitTestSupport { + + /** + * Provide a stream of examples as parameter sets to the tests of + * methods that search or concatenate their arguments. Each argument + * object provides a reference value and a test object compatible + * with the parameterised test methods. + * + * @return the examples for non-mutating tests. + */ + static Stream readableProvider() { + return Stream.of(// + bytesExample("", "abc"), // + bytesExample("a", "bc"), // + bytesExample("café", " crème"), // bytes > 127 + tupleExample(Collections.emptyList(), List.of(42)), // + tupleExample(List.of(42), Collections.emptyList()), // + tupleExample( // + List.of(-1, 0, 1, 42 * 42, "y", -1e42, 42 * 42), // + List.of("y", -1, 42 * 42)), + tupleExample(List.of(Py.None, 1, PyLong.TYPE), List.of("other", List.of(1, 2, 3))), + stringExample("a", "bc"), // + stringExample("", "abc"), // + stringExample("Σωκρατικὸς", " λόγος"), // + unicodeExample("a", "bc"), // + unicodeExample("", "abc"), // + unicodeExample("Σωκρατικὸς", " λόγος"), // + unicodeExample("画蛇", "添足"), // + /* + * The following contain non-BMP characters 🐍=U+1F40D and + * 🦓=U+1F993, each of which Python must consider to be a single + * character. + */ + // In the Java String realisation each is two chars + stringExample("one 🐍", "🦓 two"), // 🐍=\ud83d\udc0d + stringExample("🐍🦓", ""), // 🐍=\ud83d\udc0d + // In the PyUnicode realisation each is one int + unicodeExample("one 🐍", "🦓 two"), // 🐍=U+1F40D + unicodeExample("🐍🦓", ""), // 🐍=U+1F40D + // Surrogate concatenation should not create U+1F40D + stringExample("\udc0d A \ud83d", "\udc0d B"), + unicodeExample("\udc0d A \ud83d", "\udc0d B")); + } + + /** + * Construct an example with two Python {@code bytes} objects, from + * text. One is {@code self} in the test, and the other is to be a + * second argument when needed (for testing {@code concatenation}, + * say). + * + * @param s to encode to bytes ({@code self}) + * @param t to encode to bytes ({@code other}) + * @return the example (a reference value, test object, and other) + */ + static Arguments bytesExample(String s, String t) { + try { + return bytesExample(s.getBytes("UTF-8"), t.getBytes("UTF-8")); + } catch (UnsupportedEncodingException e) { + fail("failed to encode bytes"); + return arguments(); + } + } + + /** + * Construct an example with two Python {@code bytes} objects, from + * bytes. One is {@code self} in the test, and the other is to be a + * second argument when needed (for testing {@code concatenation}, + * say). + * + * @param a the "self" bytes + * @param b the other bytes + * @return the example (a reference value, test object, and other) + */ + static Arguments bytesExample(byte[] a, byte[] b) { + ArrayList vv = new ArrayList<>(a.length); + for (byte x : a) { vv.add(x & 0xff); } + ArrayList ww = new ArrayList<>(b.length); + for (byte x : b) { ww.add(x & 0xff); } + Object v = new PyBytes(a), w = new PyBytes(b); + return arguments(PyType.of(v).name, vv, v, ww, w); + } + + /** + * Construct an example with two Python {@code tuple}, from + * arbitrary objects. One is {@code self} in the test, and the other + * is to be a second argument when needed (for testing + * {@code concatenation}, say). + * + * @param a the objects for {@code self} + * @param b the objects for the other + * @return the example (a reference value, test object, and other) + */ + static Arguments tupleExample(List a, List b) { + Object v = new PyTuple(a), w = new PyTuple(b); + return arguments(PyType.of(v).name, a, v, b, w); + } + + /** + * Construct an example with two Python {@code str}, each + * implemented by a Java {@code String}. One is {@code self} in the + * test, and the other is to be a second argument when needed (for + * testing {@code concatenation}, say). + * + * @param a the String to treat as a Python sequence + * @param b a second Python sequence as the other argument + * @return the example (a reference value, test object, and other) + */ + static Arguments stringExample(String a, String b) { + // The sequence element of a str is a str of one char. + List aa = listCodePoints(a); + List bb = listCodePoints(b); + return arguments("str(String)", aa, a, bb, b); + } + + /** + * Construct an example with two Python {@code str}, each + * implemented by a {@code PyUnicode}. One is {@code self} in the + * test, and the other is to be a second argument when needed (for + * testing {@code concatenation}, say). + * + * @param a the String to treat as a Python sequence + * @param b a second Python sequence as the other argument + * @return the example (a reference value, test object, and other) + */ + static Arguments unicodeExample(String a, String b) { + // The sequence element of a str is a str of one code point. + List vv = listCodePoints(a); + List ww = listCodePoints(b); + Object v = newPyUnicode(a), w = newPyUnicode(b); + return arguments("str(PyUnicode)", vv, v, ww, w); + } + + /** Break the String into Python {@code str} code points */ + private static List listCodePoints(String a) { + return a.codePoints().mapToObj(PyUnicode::fromCodePoint).collect(Collectors.toList()); + } + + /** + * Test {@link PySequence#size(Object) PySequence.size}. The methods + * {@code size()} and {@code getItem()} are in a sense fundamental + * since we shall use them to access members when testing the result + * of other operations. + * + * @param type unused (for parameterised name only) + * @param ref a list having elements equal to those of {@code obj} + * @param obj Python object under test + * @throws Throwable from the implementation + */ + @DisplayName("PySequence.size") + @ParameterizedTest(name = "{0}: size({2})") + @MethodSource("readableProvider") + @SuppressWarnings("static-method") + void supports_size(String type, List ref, Object obj) throws Throwable { + Object r = PySequence.size(obj); + assertEquals(ref.size(), r); + } + + /** + * Test {@link PySequence#getItem(Object, Object) + * PySequence.getItem} for integer index. The methods {@code size()} + * and {@code getItem()} are in a sense fundamental since we shall + * use them to access members when testing the result of other + * operations. + * + * @param type unused (for parameterised name only) + * @param ref a list having elements equal to those of {@code obj} + * @param obj Python object under test + * @throws Throwable from the implementation + */ + @DisplayName("PySequence.getItem(int)") + @ParameterizedTest(name = "{0}: getItem({2}, i)") + @MethodSource("readableProvider") + @SuppressWarnings("static-method") + void supports_getItem(String type, List ref, Object obj) throws Throwable { + final int N = ref.size(); + for (int i = 0; i < N; i++) { + Object r = PySequence.getItem(obj, i); + assertEquals(ref.get(i), r); + } + // And again relative to the end -1...-N + for (int i = 1; i <= N; i++) { + Object r = PySequence.getItem(obj, -i); + assertEquals(ref.get(N - i), r); + } + Class ie = IndexError.class; + assertThrows(ie, () -> PySequence.getItem(obj, -(N + 1))); + assertThrows(ie, () -> PySequence.getItem(obj, N)); + } + + /** + * Test {@link PySequence#getItem(Object, Object) + * PySequence.getItem} for slice index. + * + * @param type unused (for parameterised name only) + * @param ref a list having elements equal to those of {@code obj} + * @param obj Python object under test + * @throws Throwable from the implementation + */ + @DisplayName("PySequence.getItem(slice)") + @ParameterizedTest(name = "{0}: getItem({2}, slice(p,q,s))") + @MethodSource("readableProvider") + @SuppressWarnings("static-method") + void supports_getItemSlice(String type, List ref, Object obj) throws Throwable { + + // Get size and locate middle + final int N = ref.size(), M = (N + 1) / 2; + getItemTest(ref, obj, new PySlice(0, N)); + getItemTest(ref, obj, new PySlice(0, M)); + getItemTest(ref, obj, new PySlice(0, M, 2)); + getItemTest(ref, obj, new PySlice(M, N)); + // End-relative + getItemTest(ref, obj, new PySlice(0, -1)); + getItemTest(ref, obj, new PySlice(M, -1, 2)); + getItemTest(ref, obj, new PySlice(N, -1)); + getItemTest(ref, obj, new PySlice(-1, 0, -2)); + getItemTest(ref, obj, new PySlice(-1, M, -2)); + getItemTest(ref, obj, new PySlice(-1, N)); + // Out of bounds + getItemTest(ref, obj, new PySlice(-1000, 1000)); + getItemTest(ref, obj, new PySlice(-1000, 1000, 3)); + getItemTest(ref, obj, new PySlice(-1000, M)); + getItemTest(ref, obj, new PySlice(M, 1000)); + } + + /** + * Perform one test of + * {@link #supports_getItemSlice(String, List, Object)} with given + * slice. + * + * @param ref a list having elements equal to those of {@code obj} + * @param obj Python object under test + * @param s index slice + * @throws Throwable from the implementation + */ + private static void getItemTest(List ref, Object obj, PySlice s) throws Throwable { + // Use library to decode s, but check constraints + PySlice.Indices i = s.new Indices(ref.size()); + if (i.slicelength == 0) { + // CPython does not guarantee, but our logic does + assertEquals(i.start, i.stop, "start==stop"); + } else if (i.step > 0) { + // stop index consistent with addressing equation + assertTrue(i.stop > i.start); + assertTrue(i.stop <= i.start + i.slicelength * i.step); + } else if (i.step < 0) { + // stop index consistent with addressing equation + assertTrue(i.stop < i.start); + assertTrue(i.stop >= i.start + i.slicelength * i.step); + } + // Now check the actual method we're testing + Object result = PySequence.getItem(obj, s); + sliceCheck(result, ref, obj, i.start, i.stop, i.step); + } + + /** + * Test {@link PySequence#concat(Object, Object) PySequence.concat} + * + * @param type unused (for parameterised name only) + * @param ref a list having elements equal to those of {@code obj} + * @param obj Python object under test + * @param ref2 a list having elements equal to those of {@code obj2} + * @param obj2 argument to method + * @throws Throwable from the implementation + */ + @DisplayName("PySequence.concat") + @ParameterizedTest(name = "{0}: concat({2}, {4})") + @MethodSource("readableProvider") + @SuppressWarnings("static-method") + void supports_concat(String type, List ref, Object obj, List ref2, Object obj2) + throws Throwable { + Object r = PySequence.concat(obj, obj2); + final int N = ref.size(), T = ref2.size(); + assertEquals(PyType.of(obj), PyType.of(r)); // Same type + assertEquals(N + T, PySequence.size(r)); // Right length + // Now check all the elements (if N+T != 0). + for (int i = 0; i < N + T; i++) { + Object e = PySequence.getItem(r, i); + if (i < N) + assertEquals(ref.get(i), e); + else + assertEquals(ref2.get(i - N), e); + } + } + + /** + * Test {@link PySequence#repeat(Object, int) PySequence.repeat} + * + * @param type unused (for parameterised name only) + * @param ref a list having elements equal to those of {@code obj} + * @param obj Python object under test + * @throws Throwable from the implementation + */ + @DisplayName("PySequence.repeat") + @ParameterizedTest(name = "{0}: repeat({2}, n)") + @MethodSource("readableProvider") + @SuppressWarnings("static-method") + void supports_repeat(String type, List ref, Object obj) throws Throwable { + final int N = ref.size(); + // Try this for a few repeat sizes. + for (int n = 0; n <= 3; n++) { + Object r = PySequence.repeat(obj, n); + assertEquals(PyType.of(obj), PyType.of(r)); // Same type + assertEquals(N * n, PySequence.size(r)); // Right length + // Now check all the elements (if n*N != 0). + for (int i = 0; i < N * n; i++) { + Object e = PySequence.getItem(r, i); + assertEquals(ref.get(i % N), e); + } + } + } + + // /** + // * Test {@link PySequence#setItem(Object, int, Object) + // * PySequence.setItem} + // */ + // void supports_setItem(String type, List ref, Object obj) + // throws Throwable{fail("not implemented");} + // + // /** + // * Test {@link PySequence#delItem(Object, int) PySequence.delItem} + // */ + // void supports_delItem(String type, List ref, Object obj) + // throws Throwable{fail("not implemented");} + + /** + * Test {@link PySequence#getSlice(Object, int, int) + * PySequence.getSlice} + * + * @param type unused (for parameterised name only) + * @param ref a list having elements equal to those of {@code obj} + * @param obj Python object under test + * @throws Throwable from the implementation + */ + @DisplayName("PySequence.getSlice") + @ParameterizedTest(name = "{0}: getSlice({2}, p, q)") + @MethodSource("readableProvider") + @SuppressWarnings("static-method") + void supports_getSlice(String type, List ref, Object obj) throws Throwable { + // Get size and locate middle + final int N = ref.size(), M = (N + 1) / 2; + getSliceTest(ref, obj, 0, N); + getSliceTest(ref, obj, 0, M); + getSliceTest(ref, obj, M, N); + // End-relative + getSliceTest(ref, obj, 0, -1); + getSliceTest(ref, obj, M, -1); + getSliceTest(ref, obj, N, -1); + getSliceTest(ref, obj, -1, 0); + getSliceTest(ref, obj, -1, M); + getSliceTest(ref, obj, -1, N); + // Out of bounds + getSliceTest(ref, obj, -1000, 1000); + getSliceTest(ref, obj, -1000, M); + getSliceTest(ref, obj, M, 1000); + } + + /** + * Perform one test of + * {@link #supports_getSlice(String, List, Object)} with given + * indices. + * + * @param ref a list having elements equal to those of {@code obj} + * @param obj Python object under test + * @param p start index + * @param q stop index (exclusive) + * @throws Throwable from the implementation + */ + private static void getSliceTest(List ref, Object obj, int p, int q) throws Throwable { + Object result = PySequence.getSlice(obj, p, q); + sliceCheck(result, ref, obj, p, q, 1); + } + + /** + * Check a slice result against items obtained by indexing a + * reference list. + * + * @param result of invocation + * @param ref a list having elements equal to those of {@code obj} + * @param obj Python object under test + * @param p start index + * @param q stop index (exclusive) + * @param s index step + * @throws Throwable from the implementation + */ + private static void sliceCheck(Object result, List ref, Object obj, int p, int q, int s) + throws Throwable { + + final int N = ref.size(); + + // Deal with end-relative addressing of the source sequence + if (p < 0) { p = p + N; } + if (q < 0) { q = q + N; } + + // Effective indices are the bounded version of each + p = Math.max(Math.min(p, N), 0); + q = Math.max(Math.min(q, N), 0); + + // Form expected result by stepping naïvely through ref + List expected = new ArrayList<>(); + for (int i = p; i >= 0 && i < N; i += s) { + // Check we have not passed q in the direction of travel + if (s > 0 && i >= q || s < 0 && i <= q) { break; } + expected.add(ref.get(i)); + } + + // Check the result slice against the reference + assertEquals(PyType.of(obj), PyType.of(result)); // Same type + final int M = expected.size(); + assertEquals(M, PySequence.size(result)); // Right length + for (int i = 0; i < M; i++) { + Object e = PySequence.getItem(result, i); + assertEquals(expected.get(i), e); + } + } + + // /** + // * Test {@link PySequence#setSlice(Object, int, int, Object) + // * PySequence.setSlice} + // */ + // void supports_setSlice(String type, List ref, Object obj) + // throws Throwable {fail("not implemented");} + // + // /** + // * Test {@link PySequence#delSlice(Object, int, int) + // * PySequence.delSlice} + // */ + // void supports_delSlice(String type, List ref, Object obj) + // throws Throwable {fail("not implemented");} + + // XXX AWaits implementation of PySequence.tuple + // /** + // * Test {@link PySequence#tuple(Object) PySequence.tuple} + // * + // * @param type unused (for parameterised name only) + // * @param ref a list having elements equal to those of {@code obj} + // * @param obj Python object under test + // * @throws Throwable from the implementation + // */ + // @DisplayName("PySequence.tuple") + // @ParameterizedTest(name = "{0}: tuple({2})") + // @MethodSource("readableProvider") + // @SuppressWarnings("static-method") + // void supports_tuple(String type, List ref, Object obj) + // throws Throwable { + // PyTuple result = PySequence.tuple(obj); + // checkItems(ref, result); + // } + + // XXX AWaits implementation of PySequence.tuple + // /** + // * Test {@link PySequence#list(Object) PySequence.list} + // * + // * @param type unused (for parameterised name only) + // * @param ref a list having elements equal to those of {@code obj} + // * @param obj Python object under test + // * @throws Throwable from the implementation + // */ + // @DisplayName("PySequence.list") + // @ParameterizedTest(name = "{0}: list({2})") + // @MethodSource("readableProvider") + // @SuppressWarnings("static-method") + // void supports_list(String type, List ref, Object obj) + // throws Throwable { + // PyList result = PySequence.list(obj); + // checkItems(ref, result); + // } + + /** + * Check a test result for size and content. The result must allow + * indexing with {@link PySequence#getItem(Object, Object)}. + * + * @param ref a list having elements expected of {@code result} + * @param result Python object under test + * @throws Throwable from the implementation + */ + private static void checkItems(List ref, Object result) throws Throwable { + int L = ref.size(); + assertEquals(L, PySequence.size(result)); + for (int i = 0; i < L; i++) { assertEquals(ref.get(i), PySequence.getItem(result, i)); } + } + + /** + * Test {@link PySequence#list(Object) PySequence.list} + * + * @param type unused (for parameterised name only) + * @param ref a list having elements equal to those of {@code obj} + * @param obj Python object under test + * @throws Throwable from the implementation + */ + @Disabled("Missing feature: fastList() from iterable or sequence (str/bytes)") + @DisplayName("PySequence.fastList (Java API)") + @ParameterizedTest(name = "{0}: fastList({2})") + @MethodSource("readableProvider") + @SuppressWarnings("static-method") + void supports_fastList(String type, List ref, Object obj) throws Throwable { + List result = PySequence.fastList(obj, () -> new ValueError("")); + checkItems(ref, result); + } + + /** + * Check a test result for size and content. The result must be a + * Java List. + * + * @param ref a list having elements expected of {@code result} + * @param result Python object under test + * @throws Throwable from the implementation + */ + private static void checkItems(List ref, List result) throws Throwable { + int L = ref.size(); + assertEquals(L, result.size()); + for (int i = 0; i < L; i++) { assertEquals(ref.get(i), result.get(i)); } + } + + // /** + // * Test {@link PySequence#count(Object, Object) PySequence.count} + // */ + // void supports_count(String type, List ref, Object obj) + // throws Throwable {fail("not implemented");} + // + // /** + // * Test {@link PySequence#contains(Object, Object) + // * PySequence.contains} + // */ + // void supports_contains(String type, List ref, Object obj) + // throws Throwable {fail("not implemented");} + // + // // Not to be confused with PyNumber.index + // /** + // * Test {@link PySequence#index(Object, Object) PySequence.index} + // */ + // void supports_index(String type, List ref, Object obj) + // throws Throwable {fail("not implemented");} + +} diff --git a/core/src/test/java/org/python/core/ArgParserTest.java b/core/src/test/java/org/python/core/ArgParserTest.java new file mode 100644 index 000000000..fe0a00534 --- /dev/null +++ b/core/src/test/java/org/python/core/ArgParserTest.java @@ -0,0 +1,376 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.List; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +/** + * This is a test of {@link ArgParser}. Since it is quite a + * complicated beast, and that might make it fragile, we try to be + * thorough here, rather than wait for it to let us down inside some + * complicated Python built-in called in an unforeseen way. + *

+ * Each nested test class provides one parser specification to all + * its test methods, which then exercise that parser in a range of + * circumstances. As far as possible, we use the same test names + * when testing the same kind of behaviour. + */ +class ArgParserTest { + + abstract static class Standard { + + /** + * A parser should have field values that correctly reflect the + * arguments used in its construction. + */ + abstract void has_expected_fields(); + + /** + * A parser should obtain the correct result (and not throw) when + * applied to classic arguments matching its specification. + */ + abstract void parses_classic_args(); + + /** + * {@link ArgParser#toString()} matches its specification. + */ + @Test + abstract void has_expected_toString(); + } + + @Nested + @DisplayName("A parser for no arguments") + class NoArgs extends Standard { + + ArgParser ap = ArgParser.fromSignature("func"); + + @Override + @Test + void has_expected_fields() { + assertEquals("func", ap.name); + assertEquals(0, ap.argnames.length); + assertEquals(0, ap.argcount); + assertEquals(0, ap.posonlyargcount); + assertEquals(0, ap.kwonlyargcount); + assertEquals(0, ap.regargcount); + assertEquals(-1, ap.varArgsIndex); + assertEquals(-1, ap.varKeywordsIndex); + } + + @Override + @Test + void parses_classic_args() { + PyTuple args = PyTuple.EMPTY; + PyDict kwargs = Py.dict(); + + // It's enough that this not throw + ap.parse(args, kwargs); + } + + @Override + @Test + void has_expected_toString() { assertEquals("func()", ap.toString()); } + } + + @Nested + @DisplayName("A parser for positional arguments") + class PositionalArgs extends Standard { + + ArgParser ap = ArgParser.fromSignature("func", "a", "b", "c"); + + @Override + @Test + void has_expected_fields() { + assertEquals("func", ap.name); + assertEquals(3, ap.argnames.length); + assertEquals(3, ap.argcount); + assertEquals(0, ap.posonlyargcount); + assertEquals(0, ap.kwonlyargcount); + assertEquals(3, ap.regargcount); + assertEquals(-1, ap.varArgsIndex); + assertEquals(-1, ap.varKeywordsIndex); + } + + @Override + @Test + void parses_classic_args() { + PyTuple args = Py.tuple(1, 2, 3); + PyDict kwargs = Py.dict(); + + Object[] frame = ap.parse(args, kwargs); + assertArrayEquals(new Object[] {1, 2, 3}, frame); + } + + @Test + void parses_classic_kwargs() { + PyTuple args = Py.tuple(1); + PyDict kwargs = Py.dict(); + kwargs.put("c", 3); + kwargs.put("b", 2); + + Object[] frame = ap.parse(args, kwargs); + assertArrayEquals(new Object[] {1, 2, 3}, frame); + } + + @Override + @Test + void has_expected_toString() { assertEquals("func(a, b, c)", ap.toString()); } + } + + @Nested + @DisplayName("A parser for positional-only arguments") + class PositionalOnlyArgs extends Standard { + + ArgParser ap = ArgParser.fromSignature("func", "a", "b", "c", "/"); + + @Override + @Test + void has_expected_fields() { + assertEquals("func", ap.name); + assertEquals(3, ap.argnames.length); + assertEquals(3, ap.argcount); + assertEquals(3, ap.posonlyargcount); + assertEquals(0, ap.kwonlyargcount); + assertEquals(3, ap.regargcount); + assertEquals(-1, ap.varArgsIndex); + assertEquals(-1, ap.varKeywordsIndex); + } + + @Override + @Test + void parses_classic_args() { + PyTuple args = Py.tuple(1, 2, 3); + PyDict kwargs = Py.dict(); + + Object[] frame = ap.parse(args, kwargs); + assertArrayEquals(new Object[] {1, 2, 3}, frame); + } + + @Test + void raises_TypeError_on_kwargs() { + PyTuple args = Py.tuple(1); + PyDict kwargs = Py.dict(); + kwargs.put("c", 3); + kwargs.put("b", 2); + + assertThrows(TypeError.class, () -> ap.parse(args, kwargs)); + } + + @Override + @Test + void has_expected_toString() { assertEquals("func(a, b, c, /)", ap.toString()); } + } + + @Nested + @DisplayName("A parser for some positional-only arguments") + class SomePositionalOnlyArgs extends Standard { + + ArgParser ap = ArgParser.fromSignature("func", "a", "b", "/", "c"); + + @Override + @Test + void has_expected_fields() { + assertEquals("func", ap.name); + assertEquals(3, ap.argnames.length); + assertEquals(3, ap.argcount); + assertEquals(2, ap.posonlyargcount); + assertEquals(0, ap.kwonlyargcount); + assertEquals(3, ap.regargcount); + assertEquals(-1, ap.varArgsIndex); + assertEquals(-1, ap.varKeywordsIndex); + } + + @Override + @Test + void parses_classic_args() { + PyTuple args = Py.tuple(1, 2, 3); + PyDict kwargs = Py.dict(); + + Object[] frame = ap.parse(args, kwargs); + assertArrayEquals(new Object[] {1, 2, 3}, frame); + } + + @Test + void throws_when_arg_missing() { + PyTuple args = Py.tuple(1); + PyDict kwargs = Py.dict(); + kwargs.put("c", 3); + assertThrows(TypeError.class, () -> ap.parse(args, kwargs)); + } + + @Override + @Test + void has_expected_toString() { assertEquals("func(a, b, /, c)", ap.toString()); } + } + + @Nested + @DisplayName("A parser for a positional collector") + class PositionalCollector extends Standard { + + ArgParser ap = ArgParser.fromSignature("func", "*aa"); + + @Override + @Test + void has_expected_fields() { + assertEquals("func", ap.name); + assertEquals(1, ap.argnames.length); + assertEquals(0, ap.argcount); + assertEquals(0, ap.posonlyargcount); + assertEquals(0, ap.kwonlyargcount); + assertEquals(0, ap.regargcount); + assertEquals(0, ap.varArgsIndex); + assertEquals(-1, ap.varKeywordsIndex); + } + + @Override + @Test + void parses_classic_args() { + PyTuple args = Py.tuple(1, 2, 3); + PyDict kwargs = Py.dict(); + + Object[] frame = ap.parse(args, kwargs); + assertEquals(1, frame.length); + assertEquals(List.of(1, 2, 3), frame[0]); + } + + @Test + void throws_on_keyword() { + PyTuple args = Py.tuple(1); + PyDict kwargs = Py.dict(); + kwargs.put("c", 3); + assertThrows(TypeError.class, () -> ap.parse(args, kwargs)); + } + + @Override + @Test + void has_expected_toString() { assertEquals("func(*aa)", ap.toString()); } + } + + @Nested + @DisplayName("A parser for a keyword collector") + class KeywordCollector extends Standard { + + ArgParser ap = ArgParser.fromSignature("func", "**kk"); + + @Override + @Test + void has_expected_fields() { + assertEquals("func", ap.name); + assertEquals(1, ap.argnames.length); + assertEquals(0, ap.argcount); + assertEquals(0, ap.posonlyargcount); + assertEquals(0, ap.kwonlyargcount); + assertEquals(0, ap.regargcount); + assertEquals(-1, ap.varArgsIndex); + assertEquals(0, ap.varKeywordsIndex); + } + + @Override + @Test + void parses_classic_args() { + PyTuple args = Py.tuple(); + PyDict kwargs = Py.dict(); + kwargs.put("b", 2); + kwargs.put("c", 3); + kwargs.put("a", 1); + + Object[] frame = ap.parse(args, kwargs); + assertEquals(1, frame.length); + PyDict kk = (PyDict)frame[0]; + assertEquals(1, kk.get("a")); + assertEquals(2, kk.get("b")); + assertEquals(3, kk.get("c")); + } + + @Test + void throws_on_positional() { + PyTuple args = Py.tuple(1); + PyDict kwargs = Py.dict(); + kwargs.put("b", 2); + kwargs.put("c", 3); + kwargs.put("a", 1); + assertThrows(TypeError.class, () -> ap.parse(args, kwargs)); + } + + @Override + @Test + void has_expected_toString() { assertEquals("func(**kk)", ap.toString()); } + } + + @Nested + @DisplayName("Example from the Javadoc") + class FromJavadoc extends Standard { + + String[] names = {"a", "b", "c", "d", "e", "f", "g", "h", "i", "aa", "kk"}; + ArgParser ap = new ArgParser("func", names, names.length - 2, 4, 3, true, true) // + .defaults(3, 4, 5, 6) // + .kwdefaults(77, null, 99); + private String SIG = "func(a, b, c=3, d=4, /, e=5, f=6, *aa, g=77, h, i=99, **kk)"; + + @Override + @Test + void has_expected_fields() { + assertEquals("func", ap.name); + assertEquals(11, ap.argnames.length); + assertEquals(6, ap.argcount); + assertEquals(4, ap.posonlyargcount); + assertEquals(3, ap.kwonlyargcount); + assertEquals(9, ap.regargcount); + assertEquals(9, ap.varArgsIndex); + assertEquals(10, ap.varKeywordsIndex); + } + + @Override + @Test + void parses_classic_args() { + PyTuple args = Py.tuple(10, 20, 30); + PyDict kwargs = Py.dict(); + kwargs.put("g", 70); + kwargs.put("h", 80); + + PyTuple expectedTuple = PyTuple.EMPTY; + PyDict expectedDict = Py.dict(); + Object[] expected = + new Object[] {10, 20, 30, 4, 5, 6, 70, 80, 99, expectedTuple, expectedDict}; + + Object[] frame = ap.parse(args, kwargs); + assertArrayEquals(expected, frame); + } + + /** + * When the keyword defaults are replaced with a client-supplied + * {@code dict}, the new values take effect. + */ + @Test + void parses_classic_args_kwmap() { + PyTuple args = Py.tuple(10, 20, 30); + PyDict kwargs = Py.dict(); + kwargs.put("g", 70); + + PyDict kwd = Py.dict(); + kwd.put("h", 28); + kwd.put("i", 29); + ap.kwdefaults(kwd); + + PyTuple expectedTuple = PyTuple.EMPTY; + PyDict expectedDict = Py.dict(); + Object[] expected = + new Object[] {10, 20, 30, 4, 5, 6, 70, 28, 29, expectedTuple, expectedDict}; + + Object[] frame = ap.parse(args, kwargs); + assertArrayEquals(expected, frame); + } + + @Override + @Test + void has_expected_toString() { assertEquals(SIG, ap.toString()); } + } +} diff --git a/core/src/test/java/org/python/core/BinarySlotWrapperTest.java b/core/src/test/java/org/python/core/BinarySlotWrapperTest.java new file mode 100644 index 000000000..b7d9477ed --- /dev/null +++ b/core/src/test/java/org/python/core/BinarySlotWrapperTest.java @@ -0,0 +1,314 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.fail; + +import java.math.BigInteger; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +/** + * Test the {@link PyWrapperDescr}s for binary special functions on + * a variety of types. Unlike the companion call-site tests, a + * descriptor is the descriptor in a particular type. The + * particular operations are not the focus: we are testing the + * mechanisms for creating and calling slot wrappers. + */ +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class BinarySlotWrapperTest extends UnitTestSupport { + + @Nested + @DisplayName("The slot wrapper '__sub__'") + class Slot__sub__ extends SlotWrapperTestBase { + + final String NAME = "__sub__"; + + @Nested + @DisplayName("of 'int' objects") + class OfInt extends BinaryTest { + + @Override + Object expected(Object s, Object o) { + return PyLong.asBigInteger(s).subtract(PyLong.asBigInteger(o)); + } + + @Override + void check(Object exp, Object r) throws Throwable { checkInt(exp, r); } + + @BeforeEach + void setup() throws AttributeError, Throwable { + Integer iv = 50, iw = 8; + List vList = + List.of(iv, BigInteger.valueOf(iv), newPyLong(iv), true, false); + // other argument accepts same types + List wList = + List.of(iw, BigInteger.valueOf(iw), newPyLong(iw), true, false); + super.setup(PyLong.TYPE, NAME, vList, wList); + } + + /** + * As {@link #supports_call()} but with empty keyword array. + */ + @Test + void supports_call_with_keywords() throws Throwable { + for (Args args : getCases()) { + Object exp = expected(args.s, args.o); + checkInt(exp, makeBoundCallKW(args.s, args.o)); + } + } + + /** + * As {@link #supports_bound_call()} but with empty keyword array. + */ + @Test + void supports_bound_call_with_keywords() throws Throwable { + for (Args args : getCases()) { + Object exp = expected(args.s, args.o); + checkInt(exp, makeBoundCallKW(args.s, args.o)); + } + } + } + + @Nested + @DisplayName("of 'bool' objects") + class OfBool extends BinaryTest { + + @Override + Object expected(Boolean s, Object o) { + return (s ? BigInteger.ONE : BigInteger.ZERO).subtract(PyLong.asBigInteger(o)); + } + + @Override + void check(Object exp, Object r) throws Throwable { + checkInt(exp, r); // even bool-bool is int + } + + @BeforeEach + void setup() throws AttributeError, Throwable { + List vList = List.of(true, false); + // other argument accepts int and bool types + Integer iw = 42; + List wList = + List.of(true, false, iw, BigInteger.valueOf(iw), newPyLong(iw)); + super.setup(PyBool.TYPE, NAME, vList, wList); + } + + @Test + @Override + void has_expected_fields() { + super.has_expected_fields(); + // The descriptor should be *exactly* that from int + assertSame(PyLong.TYPE.lookup(NAME), descr); + } + } + + @Nested + @DisplayName("of 'float' objects") + class OfFloat extends BinaryTest { + + @Override + Object expected(Object s, Object o) { + try { + return PyFloat.asDouble(s) - PyFloat.asDouble(o); + } catch (Throwable e) { + return fail("unconvertible"); + } + } + + @Override + void check(Object exp, Object r) throws Throwable { + checkFloat(exp, r); + } + + @BeforeEach + void setup() throws AttributeError, Throwable { + Integer iw = 8; + Double dv = 50.0, dw = iw.doubleValue(); + + // self argument must be a float + List vList = List.of(dv, newPyFloat(dv)); + // other argument accepts float, int, bool + List wList = List.of(dw, newPyFloat(dw), iw, + BigInteger.valueOf(iw), newPyLong(iw), false, + true); + super.setup(PyFloat.TYPE, NAME, vList, wList); + } + } + } + + @Nested + @DisplayName("The slot wrapper '__rsub__'") + class Slot__rsub__ extends SlotWrapperTestBase { + + final String NAME = "__rsub__"; + + @Nested + @DisplayName("of 'int' objects") + class OfInt extends BinaryTest { + + @Override + Object expected(Object s, Object o) { + return PyLong.asBigInteger(o).subtract(PyLong.asBigInteger(s)); + } + + @Override + void check(Object exp, Object r) throws Throwable { checkInt(exp, r); } + + @BeforeEach + void setup() throws AttributeError, Throwable { + Integer iv = 800, iw = 5000; + // int and bool are both served by int.__rsub__ + List vList = + List.of(iv, BigInteger.valueOf(iv), newPyLong(iv), true, false); + // other argument accepts same types + List wList = + List.of(iw, BigInteger.valueOf(iw), newPyLong(iw), true, false); + super.setup(PyLong.TYPE, NAME, vList, wList); + } + } + + @Nested + @DisplayName("of 'bool' objects") + class OfBool extends BinaryTest { + + @Override + Object expected(Boolean s, Object o) { + return PyLong.asBigInteger(o).subtract(s ? BigInteger.ONE : BigInteger.ZERO); + } + + @Override + void check(Object exp, Object r) throws Throwable { + checkInt(exp, r); // even bool-bool is int + } + + @BeforeEach + void setup() throws AttributeError, Throwable { + List vList = List.of(true, false); + // other argument accepts int and bool types + Integer iw = 4200; + List wList = + List.of(true, false, iw, BigInteger.valueOf(iw), newPyLong(iw)); + super.setup(PyBool.TYPE, NAME, vList, wList); + } + + @Test + @Override + void has_expected_fields() { + super.has_expected_fields(); + // The descriptor should be *exactly* that from int + assertSame(PyLong.TYPE.lookup(NAME), descr); + } + } + + @Nested + @DisplayName("of 'float' objects") + class OfFloat extends BinaryTest { + + @Override + Object expected(Object s, Object o) { + try { + return PyFloat.asDouble(o) - PyFloat.asDouble(s); + } catch (Throwable e) { + return fail("unconvertible"); + } + } + + @Override + void check(Object exp, Object r) throws Throwable { + checkFloat(exp, r); + } + + @BeforeEach + void setup() throws AttributeError, Throwable { + Integer iw = 5000; + Double dv = 800.0, dw = iw.doubleValue(); + + // self argument must be a float + List vList = List.of(dv, newPyFloat(dv)); + // other argument accepts float, int, bool + List wList = List.of(dw, newPyFloat(dw), iw, + BigInteger.valueOf(iw), newPyLong(iw), false, + true); + super.setup(PyFloat.TYPE, NAME, vList, wList); + } + } + } + + @Nested + @DisplayName("The slot wrapper '__and__'") + class Slot__and__ extends SlotWrapperTestBase { + + final String NAME = "__and__"; + + @Nested + @DisplayName("of 'int' objects") + class OfInt extends BinaryTest { + + @Override + Object expected(Object s, Object o) { + return PyLong.asBigInteger(s).and(PyLong.asBigInteger(o)); + } + + @Override + void check(Object exp, Object r) throws Throwable { checkInt(exp, r); } + + @BeforeEach + void setup() throws AttributeError, Throwable { + Integer iv = 50, iw = 8; + // not bool here as bool.__and__ is distinct + List vList = List.of(iv, BigInteger.valueOf(iv), newPyLong(iv)); + // other argument accepts int or bool + List wList = + List.of(iw, BigInteger.valueOf(iw), newPyLong(iw), true, false); + super.setup(PyLong.TYPE, NAME, vList, wList); + } + } + + @Nested + @DisplayName("of 'bool' objects") + class OfBool extends BinaryTest { + + @Override + Object expected(Boolean s, Object o) { + if (o instanceof Boolean) + return (s) && s.equals(o); + else + return PyLong.asBigInteger(s).and(PyLong.asBigInteger(o)); + } + + @Override + void check(Object exp, Object r) throws Throwable { + if (exp instanceof Boolean) + checkBool(exp, r); + else + checkInt(exp, r); + } + + @BeforeEach + void setup() throws AttributeError, Throwable { + List vList = List.of(true, false); + List wList = + List.of(true, false, 100, 101, BigInteger.valueOf(102), newPyLong(103)); + super.setup(PyBool.TYPE, NAME, vList, wList); + } + + @Test + @Override + void has_expected_fields() { + super.has_expected_fields(); + // The descriptor should not be that from int + assertNotSame(PyLong.TYPE.lookup(NAME), descr); + } + } + } +} diff --git a/core/src/test/java/org/python/core/BuiltinsModuleTest.java b/core/src/test/java/org/python/core/BuiltinsModuleTest.java new file mode 100644 index 000000000..188417495 --- /dev/null +++ b/core/src/test/java/org/python/core/BuiltinsModuleTest.java @@ -0,0 +1,112 @@ +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +/** + * This is a test of instantiating and using the {@code builtins} + * module, which has a special place in the Python interpreter as the + * name space. Many built-in types and functions are named there for use + * by the Python interpreter and it is effectively implicitly imported. + */ +@DisplayName("The builtins module") +class BuiltinsModuleTest extends UnitTestSupport { + + static final String FILE = "BuiltinsModuleTest.java"; + + @Test + @DisplayName("exists on an interepreter") + @SuppressWarnings("static-method") + void existsOnInterpreter() { + Interpreter interp = new Interpreter(); + PyModule builtins = interp.builtinsModule; + assertNotNull(builtins); + } + + @Test + @DisplayName("has independent instances") + @SuppressWarnings("static-method") + void canBeInstantiated() { + Interpreter interp1 = new Interpreter(); + Interpreter interp2 = new Interpreter(); + // Look up an arbitrary function in each interpreter + PyJavaFunction abs1 = (PyJavaFunction)interp1.getBuiltin("abs"); + assertSame(abs1.self, interp1.builtinsModule); + PyJavaFunction abs2 = (PyJavaFunction)interp2.getBuiltin("abs"); + assertSame(abs2.self, interp2.builtinsModule); + // Each module provides distinct function objects + assertNotSame(abs1, abs2); + // builtins module instances are distinct + assertNotSame(interp1.builtinsModule, interp2.builtinsModule); + } + + @Nested + @DisplayName("provides expected function ...") + class TestFunctions { + Interpreter interp; + PyDict globals; + /* BuiltinsModule? */ PyModule builtins; + + @BeforeEach + void setup() { + interp = new Interpreter(); + globals = Py.dict(); + builtins = interp.builtinsModule; + } + + + @Test + @DisplayName("abs") + void testAbs() throws Throwable { + Object f = Abstract.getAttr(builtins, "abs"); + Object r = Callables.callFunction(f, -5.0); + assertEquals(5.0, r); + } + + + @Test + @DisplayName("len") + void testLen() throws Throwable { + Object f = Abstract.getAttr(builtins, "len"); + Object r = Callables.callFunction(f, "hello"); + assertEquals(5, r); + } + + @Test + @DisplayName("max") + void testMax() throws Throwable { + Object f = Abstract.getAttr(builtins, "max"); + Object r = Callables.callFunction(f, 4, 4.2, 5.0, 6); + assertEquals(6, r); + r = Callables.callFunction(f, Py.tuple(4, 4.2, 5.0, 6)); + assertEquals(6, r); + } + + @Test + @DisplayName("min") + void testMin() throws Throwable { + Object f = Abstract.getAttr(builtins, "min"); + Object r = Callables.callFunction(f, 4, 5.0, 6, 4.2); + assertEquals(4, r); + r = Callables.callFunction(f, Py.tuple(4, 5.0, 6, 4.2)); + assertEquals(4, r); + } + + @Test + @DisplayName("repr") + void testRepr() throws Throwable { + Object f = Abstract.getAttr(builtins, "repr"); + assertEquals("123", Callables.callFunction(f, 123)); + assertEquals("'spam'", Callables.callFunction(f, "spam")); + // XXX implement None.__repr__ + // assertEquals("None", Callables.callFunction(f, Py.None)); + } + } +} diff --git a/core/src/test/java/org/python/core/CPython311CodeTest.java b/core/src/test/java/org/python/core/CPython311CodeTest.java new file mode 100644 index 000000000..82c9d774f --- /dev/null +++ b/core/src/test/java/org/python/core/CPython311CodeTest.java @@ -0,0 +1,308 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Map; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.python.base.InterpreterError; +import org.python.modules.marshal; + +/** + * Tests that read code objects from prepared {@code .pyc} files and + * execute the byte code. + * + * These files are prepared in the Gradle build using a compatible + * version of CPython, from Python source in + * {@code core/src/test/pythonExample}. To run these in the IDE, + * first execute the task:
+ * .\gradlew --console=plain core:compileTestPythonExamples
+ * 
+ */ +@DisplayName("Given programs compiled by CPython 3.11 ...") +class CPython311CodeTest extends UnitTestSupport { + + @SuppressWarnings("static-method") + @DisplayName("marshal can read a code object") + @ParameterizedTest(name = "from {0}") + @ValueSource(strings = {"load_store_name", "unary_op", "binary_op", "bool_left_arith", + "bool_right_arith", "simple_if", "multi_if"}) + void loadCodeObject(String name) { + PyCode code = readCode(name); + assertPythonType(PyCode.TYPE, code); + } + + @SuppressWarnings("static-method") + @DisplayName("marshal can read a result object") + @ParameterizedTest(name = "from {0}") + @ValueSource(strings = {"load_store_name", "unary_op", "binary_op", "bool_left_arith", + "bool_right_arith", "simple_if", "multi_if"}) + void loadResultDict(String name) { + PyDict dict = readResultDict(name); + assertPythonType(PyDict.TYPE, dict); + } + + @DisplayNameGeneration(DisplayNameGenerator.Simple.class) + static abstract class CodeAttributes { + final String name; + final PyCode code; + + CodeAttributes(String name) { + this.name = name; + this.code = readCode(name); + } + + @Test + void co_cellvars() { assertEquals(0, code.co_cellvars().size()); } + + @Test + void co_code() { + // Can't predict, but not zero for CPython examples + assertNotEquals(0, code.co_code().size()); + } + + @Test + void co_freevars() { assertEquals(0, code.co_freevars().size()); } + + @Test + void co_filename() { + assertTrue(code.filename.contains(name), "file name"); + assertTrue(code.filename.contains(".py"), "file name"); + } + + @Test + protected void co_name() { assertEquals("", code.name); } + + void co_names() { checkNames(code.co_names(), EMPTY_STRINGS); } + + @Test + void co_varnames() { checkNames(code.co_varnames(), EMPTY_STRINGS); } + + /** + * Check {@code code} name enquiry against the expected list. + * + * @param names result from code object + * @param exp expected names in expected order + */ + void checkNames(PyTuple names, String... exp) { + assertEquals(exp.length, names.size()); + for (int i = 0; i < exp.length; i++) { assertPythonEquals(exp[i], names.get(i)); } + } + + /** + * Check {@code code} values enquiry against the expected list. + * + * @param values result from code object + * @param exp expected values in expected order + */ + void checkValues(PyTuple values, Object... exp) { + assertEquals(exp.length, values.size()); + for (int i = 0; i < exp.length; i++) { assertPythonEquals(exp[i], values.get(i)); } + } + } + + @Nested + @DisplayName("A simple code object has expected ...") + class SimpleCodeAttributes extends CodeAttributes { + + SimpleCodeAttributes() { super("load_store_name"); } + + @Test + @Override + void co_names() { + // Names in order encountered + assertPythonEquals("a", code.names[0]); + assertPythonEquals("β", code.names[1]); + assertPythonEquals("c", code.names[2]); + assertPythonEquals("ਛਲ", code.names[3]); + } + + @Test + void co_consts() { + // Fairly reliably 3 consts and a None to return + assertEquals(4, code.co_consts().size()); + } + } + + /** + * Tests of individual operations up to calling a built-in method, + * without control structures in Python. + * + * @param name of the Python example + */ + @SuppressWarnings("static-method") + @DisplayName("We can execute simple ...") + @ParameterizedTest(name = "{0}.py") + @ValueSource(strings = {"load_store_name", "unary_op", "binary_op", "bool_left_arith", + "bool_right_arith", "comparison", "tuple_index", "list_index", "call_method_builtin", + "builtins_module"}) + void executeSimple(String name) { + CPython311Code code = readCode(name); + PyDict globals = new PyDict(); + Interpreter interp = new Interpreter(); + Object r = interp.eval(code, globals); + assertEquals(Py.None, r); + assertExpectedVariables(readResultDict(name), globals); + } + + /** + * Tests involving transfer of control. + * + * @param name of the Python example + */ + @SuppressWarnings("static-method") + @DisplayName("We can execute branches and while loops ...") + @ParameterizedTest(name = "{0}.py") + @ValueSource(strings = {"simple_if", "multi_if", "simple_loop", "tuple_dot_product", + "list_dot_product"}) + void executeBranchAndLoop(String name) { + CPython311Code code = readCode(name); + PyDict globals = new PyDict(); + Interpreter interp = new Interpreter(); + Object r = interp.eval(code, globals); + assertEquals(Py.None, r); + assertExpectedVariables(readResultDict(name), globals); + } + + // Supporting constants and methods ------------------------------- + + /** The Gradle build directory. */ + private static final Path BUILD = buildDirectory(); + + /** + * Python source of the examples for test. This must be consistent + * with the definition of {@code testPythonExampleOutputDir} in the + * project Gradle build, and below "test", with any sub-directory + * structure leading to the Python source files. + */ + private static final Path PYTHON_DIR = BUILD // + .resolve("generated/sources/pythonExample") // + .resolve("test"); + + /** Where compiled files are placed by CPython. */ + private static final Path PYC_DIR = PYTHON_DIR.resolve("__pycache__"); + + /** + * The name fragment used by the compiler in the supported version + * of CPython, e.g. {@code "cpython-311"}. + */ + private static final String CPYTHON_VER = "cpython-311"; + /** + * The magic number placed by the supported version of CPython, in + * the header of compiled files. + */ + private static final int MAGIC_NUMBER = 3495; + + private static final String PYC_SUFFIX = "pyc"; + private static final String VAR_SUFFIX = "var"; + private static final String[] EMPTY_STRINGS = {}; + + /** + * Read a {@code code} object with {@code marshal}. The method looks + * for compiled examples in the customary directory + * ({@link #PYC_DIR}}, being provided only the base name of the + * program. So for example, {@code "unary_op"} will retrieve a code + * object from {@code unary_op.cpython-311.pyc} in + * {@code generated/sources/pythonExample/test/__pycache__}. + * + * @param progName base name of program + * @return {@code code} object read in + */ + static CPython311Code readCode(String progName) { + String name = progName + "." + CPYTHON_VER + "." + PYC_SUFFIX; + File f = PYC_DIR.resolve(name).toFile(); + try (FileInputStream fs = new FileInputStream(f); + BufferedInputStream s = new BufferedInputStream(fs);) { + + // Wrap a marshal reader around the input stream + marshal.Reader reader = new marshal.StreamReader(s); + + // First 4 bytes is a magic header + int magic = reader.readShort(); + int magic2 = reader.readShort(); + boolean good = magic == MAGIC_NUMBER && magic2 == 0x0a0d; + + // Undocumented + for (int i = 0; i < 3; i++) { reader.readInt(); } + + // Next should be a code object + if (good) { + Object o = reader.readObject(); + if (o instanceof PyCode) { return (CPython311Code)o; } + } + + // Didn't return a code object + throw new InterpreterError("Not a CPython code object: %s", name); + + } catch (IOException ioe) { + throw new InterpreterError(ioe); + } + } + + /** + * Read a {@code dict} object with {@code marshal}. The method looks + * for the saved results of compiled examples in the customary + * directory ({@link #PYC_DIR}}, being provided only the base name + * of the program. So for example, {@code "unary_op"} will retrieve + * a code object from {@code unary_op.cpython-311.var} in + * {@code generated/sources/pythonExample/test/vsj3/evo1/__pycache__}. + * + * @param progName base name of program + * @return {@code dict} object read in + */ + static PyDict readResultDict(String progName) { + String name = progName + "." + CPYTHON_VER + "." + VAR_SUFFIX; + File f = PYC_DIR.resolve(name).toFile(); + try (FileInputStream fs = new FileInputStream(f); + BufferedInputStream s = new BufferedInputStream(fs);) { + + // Wrap a marshal reader around the input stream + marshal.Reader reader = new marshal.StreamReader(s); + + // Should be a dict object + Object o = reader.readObject(); + if (o instanceof PyDict) { + return (PyDict)o; + } else { + throw new InterpreterError("Not a dict object: %s", name); + } + + } catch (IOException ioe) { + throw new InterpreterError(ioe); + } + } + + /** + * Assert that all the keys of a reference dictionary are present in + * the test dictionary, and with the same value according to + * {@link #assertPythonEquals(Object, Object) Python equality} + * + * @param ref dictionary of reference results + * @param test dictionary of results to test + */ + private static void assertExpectedVariables(Map ref, Map test) { + for (Map.Entry e : ref.entrySet()) { + Object k = e.getKey(); + Object x = e.getValue(); + Object v = test.get(k); + assertNotNull(v, () -> String.format("variable %s missing from result", k)); + assertPythonEquals(x, v, () -> String.format("%s = %s (not %s)", k, v, x)); + } + } +} diff --git a/core/src/test/java/org/python/core/ComparisonSlotWrapperTest.java b/core/src/test/java/org/python/core/ComparisonSlotWrapperTest.java new file mode 100644 index 000000000..61b58256d --- /dev/null +++ b/core/src/test/java/org/python/core/ComparisonSlotWrapperTest.java @@ -0,0 +1,177 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.*; + +import java.math.BigInteger; +import java.util.List; + +import org.junit.jupiter.api.Test; + +/** + * Test the {@link PyWrapperDescr}s for comparison special functions + * on a variety of types. + */ +class ComparisonSlotWrapperTest extends UnitTestSupport { + + /** + * Test invocation of the {@code float.__lt__} descriptor on + * accepted {@code float} classes in all combinations with + * {@code float} and {@code int} operand types. + */ + @Test + void float_lt() throws Throwable { + + PyWrapperDescr lt = (PyWrapperDescr)PyFloat.TYPE.lookup("__lt__"); + + Double dv = 7.0, dw = 6.0; + PyFloat pv = newPyFloat(dv), pw = newPyFloat(dw); + Integer iw = 6; + + List wList = + List.of(pw, dw, newPyLong(iw), iw, BigInteger.valueOf(iw), false, true); + + // v is Double, PyFloat. + for (Object v : List.of(dv, pv)) { + // w is PyFloat, Double, and int types + for (Object w : wList) { + Object r = lt.__call__(new Object[] {v, w}, null); + assertEquals(Boolean.class, r.getClass()); + assertEquals(false, r); + } + } + + dv = -0.1; // less than everything in wList + pv = newPyFloat(dv); + + // v is Double, PyFloat. + for (Object v : List.of(dv, pv)) { + // w is PyFloat, Double, and int types + for (Object w : wList) { + Object r = lt.__call__(new Object[] {v, w}, null); + assertEquals(Boolean.class, r.getClass()); + assertEquals(true, r); + } + } + } + + /** + * Test invocation of the {@code float.__eq__} descriptor on + * accepted {@code float} classes in all combinations with + * {@code float} and {@code int} operand types. + */ + @Test + void float_eq() throws Throwable { + + PyWrapperDescr eq = (PyWrapperDescr)PyFloat.TYPE.lookup("__eq__"); + + Double dv = 2.0, dw = 1.0; + PyFloat pv = newPyFloat(dv), pw = newPyFloat(dw); + Integer iw = 1; + + List wList = List.of(pw, dw, newPyLong(iw), iw, BigInteger.valueOf(iw), true); + + // v is Double, PyFloat. + for (Object v : List.of(dv, pv)) { + // w is PyFloat, Double, and int types + for (Object w : wList) { + Object r = eq.__call__(new Object[] {v, w}, null); + assertEquals(Boolean.class, r.getClass()); + assertEquals(false, r); + } + } + + dv = dw; // equal to everything in wList + pv = newPyFloat(dv); + + // v is Double, PyFloat. + for (Object v : List.of(dv, pv)) { + // w is PyFloat, Double, and int types + for (Object w : wList) { + Object r = eq.__call__(new Object[] {v, w}, null); + assertEquals(Boolean.class, r.getClass()); + assertEquals(true, r); + } + } + } + + /** + * Test invocation of the {@code int.__lt__} descriptor on accepted + * {@code int} classes in all combinations. + */ + @Test + void int_lt() throws Throwable { + + PyWrapperDescr lt = (PyWrapperDescr)PyLong.TYPE.lookup("__lt__"); + + Integer iv = 4, iw = -1; + BigInteger bv = BigInteger.valueOf(iv), bw = BigInteger.valueOf(iw); + PyLong pv = newPyLong(iv), pw = newPyLong(iw); + + // v is Integer, BigInteger, PyLong, Boolean + for (Object v : List.of(iv, bv, pv, true)) { + // w is Integer, BigInteger, PyLong, Boolean + for (Object w : List.of(iw, bw, pw, false)) { + Object r = lt.__call__(new Object[] {v, w}, null); + assertEquals(Boolean.class, r.getClass()); + assertEquals(false, r); + } + } + + bv = BigInteger.valueOf(iv = -2); + pv = newPyLong(iv); + bw = BigInteger.valueOf(iw = 3); + pw = newPyLong(iw); + + // v is Integer, BigInteger, PyLong, Boolean + for (Object v : List.of(iv, bv, pv, false)) { + // w is Integer, BigInteger, PyLong, Boolean + for (Object w : List.of(iw, bw, pw, true)) { + Object r = lt.__call__(new Object[] {v, w}, null); + assertEquals(Boolean.class, r.getClass()); + assertEquals(true, r); + } + } + } + + /** + * Test invocation of the {@code int.__eq__} descriptor on accepted + * {@code int} classes in all combinations. + */ + @Test + void int_eq() throws Throwable { + + PyWrapperDescr eq = (PyWrapperDescr)PyLong.TYPE.lookup("__eq__"); + + Integer iv = 5, iw = 7; + BigInteger bv = BigInteger.valueOf(iv), bw = BigInteger.valueOf(iw); + PyLong pv = newPyLong(iv), pw = newPyLong(iw); + + // v is Integer, BigInteger, PyLong, Boolean + for (Object v : List.of(iv, bv, pv, true)) { + // w is Integer, BigInteger, PyLong, Boolean + for (Object w : List.of(iw, bw, pw, false)) { + Object r = eq.__call__(new Object[] {v, w}, null); + assertEquals(Boolean.class, r.getClass()); + assertEquals(false, r); + } + } + + iv = iw = 1; + bv = BigInteger.valueOf(iv); + pv = newPyLong(iv); + bw = BigInteger.valueOf(iw); + pw = newPyLong(iw); + + // v is Integer, BigInteger, PyLong, Boolean + for (Object v : List.of(iv, bv, pv, true)) { + // w is Integer, BigInteger, PyLong, Boolean + for (Object w : List.of(iw, bw, pw, true)) { + Object r = eq.__call__(new Object[] {v, w}, null); + assertEquals(Boolean.class, r.getClass()); + assertEquals(true, r); + } + } + } +} diff --git a/core/src/test/java/org/python/core/FloatDivisionTest.java b/core/src/test/java/org/python/core/FloatDivisionTest.java new file mode 100644 index 000000000..e1e4c3181 --- /dev/null +++ b/core/src/test/java/org/python/core/FloatDivisionTest.java @@ -0,0 +1,485 @@ +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.function.DoubleBinaryOperator; +import java.util.function.Supplier; + +import org.junit.jupiter.api.Test; + +/** + * These are unit tests of the methods that underlie floating point + * division and modulus: {@code x//y} and {@code x%y}. The Java and + * Python have differing semantics for integer division and remainder, + * so it is not just a case of wrapping up the Java implementation, but + * of carefully adjusting the arguments and result according to quadrant + * in which {@code (x,y)} lies. + *

+ * See also {@link IntegerDivisionTest}. + */ +class FloatDivisionTest extends UnitTestSupport { + + /** Test successful division. */ + @Test + void testDivide() { + for (Example example : FLOORDIV) { + if (!example.expectThrow) { + double x = example.x; + double y = example.y; + double r = PyFloat.floordiv(x, y); + example.test(r); + } + } + } + + /** Test raising {@link ZeroDivisionError} from division. */ + @Test + void testDivideByZero() { + // Wrap the method under test as operator + DoubleBinaryOperator func = PyFloat::floordiv; + for (Example example : FLOORDIV) { + if (example.expectThrow) { example.testThrows(func); } + } + } + + /** Test remainder on division. */ + @Test + void testModulo() { + for (Example example : MODULO) { + if (!example.expectThrow) { + double x = example.x; + double y = example.y; + double r = PyFloat.mod(x, y); + example.test(r); + } + } + } + + /** Test raising {@link ZeroDivisionError} from remainder. */ + @Test + void testModuloByZero() { + // Wrap the method under test as operator + DoubleBinaryOperator func = PyFloat::mod; + for (Example example : MODULO) { + if (example.expectThrow) { example.testThrows(func); } + } + } + + /** Test division and modulus combined "div" part. */ + @Test + void testDivMod0() { + for (Example example : FLOORDIV) { + if (!example.expectThrow) { + double x = example.x; + double y = example.y; + double r = PyFloat + .doubleValue(PyFloat.divmod(x, y).get(0)); + example.test(r); + } + } + } + + /** Test division and modulus combined "mod" part. */ + @Test + void testDivMod1() { + for (Example example : MODULO) { + if (!example.expectThrow) { + double x = example.x; + double y = example.y; + double r = PyFloat + .doubleValue(PyFloat.divmod(x, y).get(1)); + example.test(r); + } + } + } + + /** + * Test raising {@link ZeroDivisionError} from division and modulus + * combined. + */ + @Test + void testDivModByZero() { + // Wrap the method under test as operator + DoubleBinaryOperator func = (double x, double y) -> PyFloat + .doubleValue(PyFloat.divmod(x, y).get(0)); + for (Example example : FLOORDIV) { + if (example.expectThrow) { example.testThrows(func); } + } + } + + /** + * Hold a set of values and the expected result of a test case and + * scrupulously validate a result. The class is agnostic about the + * actual computation, which is performed by the client. + */ + private static class Example { + final double x; + final double y; + final double expected; + final boolean expectThrow; + final double tolerance; + + private Example(double x, double y, double expected, + boolean expectThrow) { + this.x = x; + this.y = y; + this.expected = expected; + this.expectThrow = expectThrow; + if (!expectThrow && Double.isFinite(expected)) { + this.tolerance = Math.abs(expected) * 1e-4; + } else { + this.tolerance = 0.0; + } + } + + /** + * A case where a result should be returned. + * + * @param x dividend + * @param y divisor + * @param expected result + */ + Example(double x, double y, double expected) { + this(x, y, expected, false); + } + + /** + * A case where a {@link ZeroDivisionError} should be thrown. + * + * @param x dividend + * @param y divisor + **/ + Example(double x, double y) { this(x, y, 0.0, true); } + + /** + * Test the given result against the expected answer in all + * relevant detail. + * + * @param result of the invocation + */ + void test(double result) { + Supplier msg = () -> this.toString(); + if (Double.isNaN(expected)) { + // Expecting nan. All nans are equivalent. + assertTrue(Double.isNaN(result), msg); + } else { + // Signs should match (even if zero or infinite) + assertEquals(Math.copySign(1, expected), + Math.copySign(1, result), msg); + if (Double.isInfinite(expected)) { + // Expecting infinity. + assertTrue(Double.isInfinite(result), msg); + } else { + // Finite value, so check it normally. + assertEquals(expected, result, tolerance, msg); + } + } + } + + /** + * Test that a {@link ZeroDivisionError} is be thrown. + * + * @param result of the invocation + */ + void testThrows(DoubleBinaryOperator func) { + Supplier xy = () -> this.toString(); + assertThrows(ZeroDivisionError.class, + () -> func.applyAsDouble(x, y), xy); + } + + @Override + public String toString() { + return String.format( + "Example [x=% .4g, y=% .4g, expected=% .4g]", x, y, + expected); + } + } + + private static final double inf = Double.POSITIVE_INFINITY; + private static final double nan = Double.NaN; + + private static final Example[] FLOORDIV = new Example[] { + // @formatter:off + // Reference material to test x//y + new Example( -inf, nan, nan), // + new Example(-5.00e+300, nan, nan), // + new Example( -5.00, nan, nan), // + new Example( -0.00, nan, nan), // + new Example( 0.00, nan, nan), // + new Example( 5.00, nan, nan), // + new Example( 5.00e+300, nan, nan), // + new Example( inf, nan, nan), // + new Example( nan, nan, nan), // + new Example( -inf, inf, nan), // + new Example(-5.00e+300, inf, -1.0000000), // + new Example( -5.00, inf, -1.0000000), // + new Example( -0.00, inf, -0.0000000), // + new Example( 0.00, inf, 0.0000000), // + new Example( 5.00, inf, 0.0000000), // + new Example( 5.00e+300, inf, 0.0000000), // + new Example( inf, inf, nan), // + new Example( nan, inf, nan), // + new Example( -inf, 3.00e+300, nan), // + new Example(-5.00e+300, 3.00e+300, -2.0000000), // + new Example( -5.00, 3.00e+300, -1.0000000), // + new Example( -0.00, 3.00e+300, -0.0000000), // + new Example( 0.00, 3.00e+300, 0.0000000), // + new Example( 5.00, 3.00e+300, 0.0000000), // + new Example( 5.00e+300, 3.00e+300, 1.0000000), // + new Example( inf, 3.00e+300, nan), // + new Example( nan, 3.00e+300, nan), // + new Example( -inf, 3.00, nan), // + new Example(-5.00e+300, 3.00, -1.6666667e+300), // + new Example( -5.00, 3.00, -2.0000000), // + new Example( -0.00, 3.00, -0.0000000), // + new Example( 0.00, 3.00, 0.0000000), // + new Example( 5.00, 3.00, 1.0000000), // + new Example( 5.00e+300, 3.00, 1.6666667e+300), // + new Example( inf, 3.00, nan), // + new Example( nan, 3.00, nan), // + new Example( -inf, 2.60, nan), // + new Example(-5.00e+300, 2.60, -1.9230769e+300), // + new Example( -5.00, 2.60, -2.0000000), // + new Example( -0.00, 2.60, -0.0000000), // + new Example( 0.00, 2.60, 0.0000000), // + new Example( 5.00, 2.60, 1.0000000), // + new Example( 5.00e+300, 2.60, 1.9230769e+300), // + new Example( inf, 2.60, nan), // + new Example( nan, 2.60, nan), // + new Example( -inf, 2.50, nan), // + new Example(-5.00e+300, 2.50, -2.0000000e+300), // + new Example( -5.00, 2.50, -2.0000000), // + new Example( -0.00, 2.50, -0.0000000), // + new Example( 0.00, 2.50, 0.0000000), // + new Example( 5.00, 2.50, 2.0000000), // + new Example( 5.00e+300, 2.50, 2.0000000e+300), // + new Example( inf, 2.50, nan), // + new Example( nan, 2.50, nan), // + new Example( -inf, 2.40, nan), // + new Example(-5.00e+300, 2.40, -2.0833333e+300), // + new Example( -5.00, 2.40, -3.0000000), // + new Example( -0.00, 2.40, -0.0000000), // + new Example( 0.00, 2.40, 0.0000000), // + new Example( 5.00, 2.40, 2.0000000), // + new Example( 5.00e+300, 2.40, 2.0833333e+300), // + new Example( inf, 2.40, nan), // + new Example( nan, 2.40, nan), // + new Example( -inf, 0.00), // ZeroDivisionError + new Example(-5.00e+300, 0.00), // ZeroDivisionError + new Example( -5.00, 0.00), // ZeroDivisionError + new Example( -0.00, 0.00), // ZeroDivisionError + new Example( 0.00, 0.00), // ZeroDivisionError + new Example( 5.00, 0.00), // ZeroDivisionError + new Example( 5.00e+300, 0.00), // ZeroDivisionError + new Example( inf, 0.00), // ZeroDivisionError + new Example( nan, 0.00), // ZeroDivisionError + new Example( -inf, -0.00), // ZeroDivisionError + new Example(-5.00e+300, -0.00), // ZeroDivisionError + new Example( -5.00, -0.00), // ZeroDivisionError + new Example( -0.00, -0.00), // ZeroDivisionError + new Example( 0.00, -0.00), // ZeroDivisionError + new Example( 5.00, -0.00), // ZeroDivisionError + new Example( 5.00e+300, -0.00), // ZeroDivisionError + new Example( inf, -0.00), // ZeroDivisionError + new Example( nan, -0.00), // ZeroDivisionError + new Example( -inf, -2.40, nan), // + new Example(-5.00e+300, -2.40, 2.0833333e+300), // + new Example( -5.00, -2.40, 2.0000000), // + new Example( -0.00, -2.40, 0.0000000), // + new Example( 0.00, -2.40, -0.0000000), // + new Example( 5.00, -2.40, -3.0000000), // + new Example( 5.00e+300, -2.40, -2.0833333e+300), // + new Example( inf, -2.40, nan), // + new Example( nan, -2.40, nan), // + new Example( -inf, -2.50, nan), // + new Example(-5.00e+300, -2.50, 2.0000000e+300), // + new Example( -5.00, -2.50, 2.0000000), // + new Example( -0.00, -2.50, 0.0000000), // + new Example( 0.00, -2.50, -0.0000000), // + new Example( 5.00, -2.50, -2.0000000), // + new Example( 5.00e+300, -2.50, -2.0000000e+300), // + new Example( inf, -2.50, nan), // + new Example( nan, -2.50, nan), // + new Example( -inf, -2.60, nan), // + new Example(-5.00e+300, -2.60, 1.9230769e+300), // + new Example( -5.00, -2.60, 1.0000000), // + new Example( -0.00, -2.60, 0.0000000), // + new Example( 0.00, -2.60, -0.0000000), // + new Example( 5.00, -2.60, -2.0000000), // + new Example( 5.00e+300, -2.60, -1.9230769e+300), // + new Example( inf, -2.60, nan), // + new Example( nan, -2.60, nan), // + new Example( -inf, -3.00, nan), // + new Example(-5.00e+300, -3.00, 1.6666667e+300), // + new Example( -5.00, -3.00, 1.0000000), // + new Example( -0.00, -3.00, 0.0000000), // + new Example( 0.00, -3.00, -0.0000000), // + new Example( 5.00, -3.00, -2.0000000), // + new Example( 5.00e+300, -3.00, -1.6666667e+300), // + new Example( inf, -3.00, nan), // + new Example( nan, -3.00, nan), // + new Example( -inf, -3.00e+300, nan), // + new Example(-5.00e+300, -3.00e+300, 1.0000000), // + new Example( -5.00, -3.00e+300, 0.0000000), // + new Example( -0.00, -3.00e+300, 0.0000000), // + new Example( 0.00, -3.00e+300, -0.0000000), // + new Example( 5.00, -3.00e+300, -1.0000000), // + new Example( 5.00e+300, -3.00e+300, -2.0000000), // + new Example( inf, -3.00e+300, nan), // + new Example( nan, -3.00e+300, nan), // + new Example( -inf, -inf, nan), // + new Example(-5.00e+300, -inf, 0.0000000), // + new Example( -5.00, -inf, 0.0000000), // + new Example( -0.00, -inf, 0.0000000), // + new Example( 0.00, -inf, -0.0000000), // + new Example( 5.00, -inf, -1.0000000), // + new Example( 5.00e+300, -inf, -1.0000000), // + new Example( inf, -inf, nan), // + new Example( nan, -inf, nan), // + // @formatter:on + }; + + private static final Example[] MODULO = new Example[] { + // insert output of float_division.py + // @formatter:off + // Reference material to test x%y + new Example( -inf, nan, nan), // + new Example(-5.00e+300, nan, nan), // + new Example( -5.00, nan, nan), // + new Example( -0.00, nan, nan), // + new Example( 0.00, nan, nan), // + new Example( 5.00, nan, nan), // + new Example( 5.00e+300, nan, nan), // + new Example( inf, nan, nan), // + new Example( nan, nan, nan), // + new Example( -inf, inf, nan), // + new Example(-5.00e+300, inf, inf), // + new Example( -5.00, inf, inf), // + new Example( -0.00, inf, 0.0000000), // + new Example( 0.00, inf, 0.0000000), // + new Example( 5.00, inf, 5.0000000), // + new Example( 5.00e+300, inf, 5.0000000e+300), // + new Example( inf, inf, nan), // + new Example( nan, inf, nan), // + new Example( -inf, 3.00e+300, nan), // + new Example(-5.00e+300, 3.00e+300, 1.0000000e+300), // + new Example( -5.00, 3.00e+300, 3.0000000e+300), // + new Example( -0.00, 3.00e+300, 0.0000000), // + new Example( 0.00, 3.00e+300, 0.0000000), // + new Example( 5.00, 3.00e+300, 5.0000000), // + new Example( 5.00e+300, 3.00e+300, 2.0000000e+300), // + new Example( inf, 3.00e+300, nan), // + new Example( nan, 3.00e+300, nan), // + new Example( -inf, 3.00, nan), // + new Example(-5.00e+300, 3.00, 0.0000000), // + new Example( -5.00, 3.00, 1.0000000), // + new Example( -0.00, 3.00, 0.0000000), // + new Example( 0.00, 3.00, 0.0000000), // + new Example( 5.00, 3.00, 2.0000000), // + new Example( 5.00e+300, 3.00, 0.0000000), // + new Example( inf, 3.00, nan), // + new Example( nan, 3.00, nan), // + new Example( -inf, 2.60, nan), // + new Example(-5.00e+300, 2.60, 0.30303273), // + new Example( -5.00, 2.60, 0.20000000), // + new Example( -0.00, 2.60, 0.0000000), // + new Example( 0.00, 2.60, 0.0000000), // + new Example( 5.00, 2.60, 2.4000000), // + new Example( 5.00e+300, 2.60, 2.2969673), // + new Example( inf, 2.60, nan), // + new Example( nan, 2.60, nan), // + new Example( -inf, 2.50, nan), // + new Example(-5.00e+300, 2.50, 0.0000000), // + new Example( -5.00, 2.50, 0.0000000), // + new Example( -0.00, 2.50, 0.0000000), // + new Example( 0.00, 2.50, 0.0000000), // + new Example( 5.00, 2.50, 0.0000000), // + new Example( 5.00e+300, 2.50, 0.0000000), // + new Example( inf, 2.50, nan), // + new Example( nan, 2.50, nan), // + new Example( -inf, 2.40, nan), // + new Example(-5.00e+300, 2.40, 1.3339075), // + new Example( -5.00, 2.40, 2.2000000), // + new Example( -0.00, 2.40, 0.0000000), // + new Example( 0.00, 2.40, 0.0000000), // + new Example( 5.00, 2.40, 0.20000000), // + new Example( 5.00e+300, 2.40, 1.0660925), // + new Example( inf, 2.40, nan), // + new Example( nan, 2.40, nan), // + new Example( -inf, 0.00), // ZeroDivisionError + new Example(-5.00e+300, 0.00), // ZeroDivisionError + new Example( -5.00, 0.00), // ZeroDivisionError + new Example( -0.00, 0.00), // ZeroDivisionError + new Example( 0.00, 0.00), // ZeroDivisionError + new Example( 5.00, 0.00), // ZeroDivisionError + new Example( 5.00e+300, 0.00), // ZeroDivisionError + new Example( inf, 0.00), // ZeroDivisionError + new Example( nan, 0.00), // ZeroDivisionError + new Example( -inf, -0.00), // ZeroDivisionError + new Example(-5.00e+300, -0.00), // ZeroDivisionError + new Example( -5.00, -0.00), // ZeroDivisionError + new Example( -0.00, -0.00), // ZeroDivisionError + new Example( 0.00, -0.00), // ZeroDivisionError + new Example( 5.00, -0.00), // ZeroDivisionError + new Example( 5.00e+300, -0.00), // ZeroDivisionError + new Example( inf, -0.00), // ZeroDivisionError + new Example( nan, -0.00), // ZeroDivisionError + new Example( -inf, -2.40, nan), // + new Example(-5.00e+300, -2.40, -1.0660925), // + new Example( -5.00, -2.40, -0.20000000), // + new Example( -0.00, -2.40, -0.0000000), // + new Example( 0.00, -2.40, -0.0000000), // + new Example( 5.00, -2.40, -2.2000000), // + new Example( 5.00e+300, -2.40, -1.3339075), // + new Example( inf, -2.40, nan), // + new Example( nan, -2.40, nan), // + new Example( -inf, -2.50, nan), // + new Example(-5.00e+300, -2.50, -0.0000000), // + new Example( -5.00, -2.50, -0.0000000), // + new Example( -0.00, -2.50, -0.0000000), // + new Example( 0.00, -2.50, -0.0000000), // + new Example( 5.00, -2.50, -0.0000000), // + new Example( 5.00e+300, -2.50, -0.0000000), // + new Example( inf, -2.50, nan), // + new Example( nan, -2.50, nan), // + new Example( -inf, -2.60, nan), // + new Example(-5.00e+300, -2.60, -2.2969673), // + new Example( -5.00, -2.60, -2.4000000), // + new Example( -0.00, -2.60, -0.0000000), // + new Example( 0.00, -2.60, -0.0000000), // + new Example( 5.00, -2.60, -0.20000000), // + new Example( 5.00e+300, -2.60, -0.30303273), // + new Example( inf, -2.60, nan), // + new Example( nan, -2.60, nan), // + new Example( -inf, -3.00, nan), // + new Example(-5.00e+300, -3.00, -0.0000000), // + new Example( -5.00, -3.00, -2.0000000), // + new Example( -0.00, -3.00, -0.0000000), // + new Example( 0.00, -3.00, -0.0000000), // + new Example( 5.00, -3.00, -1.0000000), // + new Example( 5.00e+300, -3.00, -0.0000000), // + new Example( inf, -3.00, nan), // + new Example( nan, -3.00, nan), // + new Example( -inf, -3.00e+300, nan), // + new Example(-5.00e+300, -3.00e+300, -2.0000000e+300), // + new Example( -5.00, -3.00e+300, -5.0000000), // + new Example( -0.00, -3.00e+300, -0.0000000), // + new Example( 0.00, -3.00e+300, -0.0000000), // + new Example( 5.00, -3.00e+300, -3.0000000e+300), // + new Example( 5.00e+300, -3.00e+300, -1.0000000e+300), // + new Example( inf, -3.00e+300, nan), // + new Example( nan, -3.00e+300, nan), // + new Example( -inf, -inf, nan), // + new Example(-5.00e+300, -inf, -5.0000000e+300), // + new Example( -5.00, -inf, -5.0000000), // + new Example( -0.00, -inf, -0.0000000), // + new Example( 0.00, -inf, -0.0000000), // + new Example( 5.00, -inf, -inf), // + new Example( 5.00e+300, -inf, -inf), // + new Example( inf, -inf, nan), // + new Example( nan, -inf, nan), // + // @formatter:on + }; +} diff --git a/core/src/test/java/org/python/core/FormatTest.java b/core/src/test/java/org/python/core/FormatTest.java new file mode 100644 index 000000000..f2e3bebfe --- /dev/null +++ b/core/src/test/java/org/python/core/FormatTest.java @@ -0,0 +1,285 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.params.provider.Arguments.arguments; +import static org.python.core.PyObjectUtil.NO_CONVERSION; + +import java.math.BigInteger; +import java.util.stream.Stream; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.python.core.PyObjectUtil.NoConversion; +import org.python.core.stringlib.InternalFormat; +import org.python.core.stringlib.InternalFormat.FormatError; +import org.python.core.stringlib.InternalFormat.Spec; +import org.python.core.stringlib.TextFormatter;; + +/** + * Tests of formatting of built-in types to string using their __format__ method (on which built-in method {@code format()} relies).. + *

+ * At present, we only have tests for {@code int.__format__}. + */ +class FormatTest extends UnitTestSupport { + + /** Base of tests that format integers. */ + abstract static class AbstractIntFormatTest { + + /** + * The values corresponding to the expected results in the + * stream of examples provided by {@link #intExamples()}. A + * fragment of Python for generating this array is:

+         * ival = [42, -42, 226, 128013, 2**31-1, -2**31,
+         *            False, True, 2**36, -2**36, 7**42]
+         *
+         * def java_value(v):
+         *     if isinstance(v, bool):
+         *         return repr(v).lower()
+         *     elif 2**31 > v >= -2**31:
+         *         return repr(v)
+         *     else:
+         *         return f'new BigInteger("{v!r:s}")'
+         *
+         * print('static final Object[] VALUES = {',
+         *       ', '.join(java_value(v) for v in ival), '};')
+         * print()
+         * 
+ */ + static final Object[] VALUES = {42, -42, 226, 128013, + 2147483647, -2147483648, false, true, + new BigInteger("68719476736"), + new BigInteger("-68719476736"), + new BigInteger("311973482284542371301330321821976049")}; + + /** + * Provide a stream of examples as parameter sets to the tests. + * In each example, one format has been used to format all the + * {@link #VALUES}. A fragment of Python for generating these + * calls is:
+         * ifmt = ["d", "o", "x", "X", "b",
+         *        ",d", "#o", "#X", "+d", " d",
+         *        "5", "^6", "~<5", "0=+6" ]
+         *
+         * def java_values_in_fmt(values, fmt):
+         *     args = '", "'.join(format(v, fmt) for v in values)
+         *     return f'intExample("{fmt:s}", "{args}"), //'
+         *
+         * for f in ifmt:
+         *     print(java_values_in_fmt(ival, f))
+         * 
+ * + * @return the examples for search tests. + */ + static Stream intExamples() { + return Stream.of( // + intExample("d", "42", "-42", "226", "128013", + "2147483647", "-2147483648", "0", "1", + "68719476736", "-68719476736", + "311973482284542371301330321821976049"), // + intExample("o", "52", "-52", "342", "372015", + "17777777777", "-20000000000", "0", "1", + "1000000000000", "-1000000000000", + "1701257274030155626774437006214073142761"), // + intExample("x", "2a", "-2a", "e2", "1f40d", + "7fffffff", "-80000000", "0", "1", + "1000000000", "-1000000000", + "3c157af0306dcb7f23e06460ecc5f1"), // + intExample("X", "2A", "-2A", "E2", "1F40D", + "7FFFFFFF", "-80000000", "0", "1", + "1000000000", "-1000000000", + "3C157AF0306DCB7F23E06460ECC5F1"), // + intExample("b", "101010", "-101010", "11100010", + "11111010000001101", + "1111111111111111111111111111111", + "-10000000000000000000000000000000", "0", + "1", + "1000000000000000000000000000000000000", + "-1000000000000000000000000000000000000", + "1111000001010101111010111100000011000001101101110010110111111100100011111000000110010001100000111011001100010111110001"), // + intExample(",d", "42", "-42", "226", "128,013", + "2,147,483,647", "-2,147,483,648", "0", "1", + "68,719,476,736", "-68,719,476,736", + "311,973,482,284,542,371,301,330,321,821,976,049"), // + intExample("#o", "0o52", "-0o52", "0o342", + "0o372015", "0o17777777777", + "-0o20000000000", "0o0", "0o1", + "0o1000000000000", "-0o1000000000000", + "0o1701257274030155626774437006214073142761"), // + intExample("#X", "0X2A", "-0X2A", "0XE2", "0X1F40D", + "0X7FFFFFFF", "-0X80000000", "0X0", "0X1", + "0X1000000000", "-0X1000000000", + "0X3C157AF0306DCB7F23E06460ECC5F1"), // + intExample("+d", "+42", "-42", "+226", "+128013", + "+2147483647", "-2147483648", "+0", "+1", + "+68719476736", "-68719476736", + "+311973482284542371301330321821976049"), // + intExample(" d", " 42", "-42", " 226", " 128013", + " 2147483647", "-2147483648", " 0", " 1", + " 68719476736", "-68719476736", + " 311973482284542371301330321821976049"), // + intExample("5", " 42", " -42", " 226", "128013", + "2147483647", "-2147483648", " 0", + " 1", "68719476736", "-68719476736", + "311973482284542371301330321821976049"), // + intExample("^6", " 42 ", " -42 ", " 226 ", + "128013", "2147483647", "-2147483648", + " 0 ", " 1 ", "68719476736", + "-68719476736", + "311973482284542371301330321821976049"), // + intExample("~<5", "42~~~", "-42~~", "226~~", + "128013", "2147483647", "-2147483648", + "0~~~~", "1~~~~", "68719476736", + "-68719476736", + "311973482284542371301330321821976049"), // + intExample("0=+6", "+00042", "-00042", "+00226", + "+128013", "+2147483647", "-2147483648", + "+00000", "+00001", "+68719476736", + "-68719476736", + "+311973482284542371301330321821976049"), // + + // char formats (hand-crafted) -------------------- + intExample("c", new Object[] {42, 226, 128013}, // + "*", "â", "🐍")); + } + + /** + * Construct a set of test arguments for a single format type + * and a reference result for each value in {@link #VALUES}, + * provided by the caller. We convert reference results to + * {@code PyUnicode} to ensure we get Python comparison + * semantics. + * + * @param format to apply + * @param expected results to expect + * @return example data for a test + */ + private static Arguments intExample(String format, + String... expected) { + return intExample(format, VALUES, expected); + } + + /** + * Construct a set of test arguments for a single format type + * and a reference result for each value in {@code values}, + * provided by the caller. We convert reference results to + * {@code PyUnicode} to ensure we get Python comparison + * semantics. + * + * @param format to apply + * @param values to apply {@code format} to + * @param expected results to expect + * @return example data for a test + */ + private static Arguments intExample(String format, + Object[] values, String... expected) { + assert expected.length == values.length; + PyUnicode[] uExpected = new PyUnicode[expected.length]; + for (int i = 0; i < expected.length; i++) { + uExpected[i] = newPyUnicode(expected[i]); + } + return arguments(format, values, uExpected); + } + } + + /** + * Test formatting an integer + */ + @Nested + @DisplayName("int.__format__") + class IntFormatTest extends AbstractIntFormatTest { + + @DisplayName("int.__format__(int, String)") + @ParameterizedTest(name = "int.__format__(x, \"{0}\")") + @MethodSource("intExamples") + void intFormat(String format, Object[] values, + PyUnicode[] expected) { + for (int i = 0; i < values.length; i++) { + Object r = PyLong.__format__(values[i], format); + assertEquals(expected[i], r); + } + } + + @DisplayName("int.__format__: unknown specifier") + @ParameterizedTest(name = "int.__format__(x, \"{0}\")") + @ValueSource(strings = {"z", "#10z"}) + void intFormatUnknown(String format) { + assertRaises(ValueError.class, + () -> PyLong.__format__(0, format), + "Unknown format code 'z' for object of type 'int'"); + } + + @DisplayName("int.__format__: precision not allowed") + @ParameterizedTest(name = "int.__format__(x, \"{0}\")") + @ValueSource(strings = {"123.456x", ".5d", "0=+6.1"}) + void intFormatWithPrecision(String format) { + assertRaises(ValueError.class, + () -> PyLong.__format__(0, format), + "Precision not allowed in integer format specifier"); + } + + @DisplayName("int.__format__: grouping not allowed") + @ParameterizedTest(name = "int.__format__(x, \"{0}\")") + @ValueSource(strings = {"10,n", ",n"}) + void intFormatNWithGrouping(String format) { + assertRaises(ValueError.class, + () -> PyLong.__format__(0, format), + // CPython: "Cannot specify ',' with 'n'"); + // But we prefer: + "Grouping (,) not allowed with integer format specifier 'n'"); + } + + @DisplayName("int.__format__: sign not allowed") + @ParameterizedTest(name = "int.__format__(x, \"{0}\")") + @ValueSource(strings = {"+c", "-c", "+10c"}) + void intFormatCWithSign(String format) { + assertRaises(ValueError.class, + () -> PyLong.__format__(0, format), + "Sign not allowed with integer format specifier 'c'"); + } + + @DisplayName("int.__format__: alternate form not allowed") + @ParameterizedTest(name = "int.__format__(x, \"{0}\")") + @ValueSource(strings = {"#c", "#10c"}) + void intFormatCWithAltForm(String format) { + assertRaises(ValueError.class, + () -> PyLong.__format__(0, format), + "Alternate form (#) not allowed with integer format specifier 'c'"); + } + } + + public void strFormat() { + String v = "abc"; + TextFormatter f = newTextFormatter(""); + assertEquals("abc", f.format(v).pad().getResult()); + + String v2 = "abcdef"; + f = newTextFormatter(".3"); + assertEquals("abc", f.format(v2).pad().getResult()); + + f = newTextFormatter("6"); + assertEquals("abc ", f.format(v).pad().getResult()); + } + + private static TextFormatter newTextFormatter(String fmt) { + Spec spec = InternalFormat.fromText(fmt); + return new TextFormatter(spec) { + @Override + public TextFormatter format(Object o) + throws FormatError, NoConversion { + if (o instanceof String) { + return format((String)o); + } else if (o instanceof PyUnicode) { + return format(PyUnicode.asString(o)); + } else { + throw NO_CONVERSION; + } + } + }; + } +} diff --git a/core/src/test/java/org/python/core/IntegerDivisionTest.java b/core/src/test/java/org/python/core/IntegerDivisionTest.java new file mode 100644 index 000000000..a54bdbe3a --- /dev/null +++ b/core/src/test/java/org/python/core/IntegerDivisionTest.java @@ -0,0 +1,203 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.python.core.PyLongMethods.divide; +import static org.python.core.PyLongMethods.modulo; +import static org.python.core.PyLongMethods.divmod; + +import java.math.BigInteger; +import java.util.LinkedList; +import java.util.List; + +import org.junit.jupiter.api.Test; + +/** + * These are unit tests of the methods that underlie integer division + * and modulus: {@code x//y} and {@code x%y}. The Java and Python have + * differing semantics for integer division and remainder, so it is not + * just a case of wrapping up the Java implementation, but of carefully + * adjusting the arguments and result according to quadrant in which + * {@code (x,y)} lies. + *

+ * Furthermore, we have made an attempt to optimise these + * implementations by arcane logic. All things considered, a careful + * test is called for. + */ +class IntegerDivisionTest extends UnitTestSupport { + + /** + * We use these values as the dividend. The list includes zero and + * {@code Integer.MIN_VALUE}, both of which have alternate paths in + * the implementation. {@code -Integer.MIN_VALUE / -1} is important + * because Java division silently overflows. + */ + private static final int[] XVALUES = argValues(8191, true); + /** + * We use these values as the divisor. The list does not include + * zero, since you can't divide by that. + */ + private static final int[] YVALUES = argValues(8933, false); + + /** + * 2**31 aka {@code -Integer.MIN_VALUE / -1}, which Java can't + * represent, but is a reasonable request in Python. + */ + private static BigInteger MINUS_INT_MIN = + BigInteger.valueOf(-(long)Integer.MIN_VALUE); + + /** + * Reference implementation of integer division. + * + * @param x dividend + * @param y divisor + * @return quotient + */ + private static int refDivide(int x, int y) { + // Python division rounds towards negative infinity + double dq = (double)x / (double)y; + long lq = Math.round(Math.floor(dq)); + int q = (int)lq; + if (q != lq) + throw new IllegalArgumentException(String + .format("Can't return an int from Q(%d,%d)", x, y)); + return q; + } + + /** + * Reference implementation of integer division returning + * {@code Object} and correctly handling dividend of + * {@link Integer#MIN_VALUE}. + * + * @param x dividend + * @param y divisor + * @return quotient + */ + private static Object refDivideObject(int x, int y) { + if (x == Integer.MIN_VALUE && y == -1) + // Java division overflows on these values (only) + return MINUS_INT_MIN; + else + return refDivide(x, y); + } + + /** + * Compare {@code x//y} computed by class under test and reference + * means. + * + * @param x dividend + * @param y divisor + */ + private static void singleDivision(int x, int y) { + // The result is sometimes an Object not an Integer + Object ref = refDivideObject(x, y); + Object q = divide(x, y); + assertEquals(ref, q, () -> String.format("q(%d,%d)", x, y)); + } + + /** + * Reference implementation of integer remainder on division. + * + * @param x dividend + * @param y divisor + * @return remainder + */ + private static int refRemainder(int x, int y) { + if (x == Integer.MIN_VALUE && y == -1) + // Java division overflows on these values (only) + return 0; + int q = refDivide(x, y); + return x - q * y; + } + + /** + * Compare {@code x%y} computed by class under test and reference + * means. + * + * @param x dividend + * @param y divisor + */ + private static void singleModulo(int x, int y) { + int ref = refRemainder(x, y); + int r = PyLong.asInt(modulo(x, y)); + assertEquals(ref, r, () -> String.format("r(%d,%d)", x, y)); + } + + /** + * Compare {@code divmod(x,y)} computed by class under test and + * reference means. + * + * @param x dividend + * @param y divisor + */ + private static void singleDivMod(int x, int y) { + Object ref_q = refDivideObject(x, y); + int ref_r = refRemainder(x, y); + + PyTuple qr = divmod(x, y); + Object q = qr.get(0); + int r = PyLong.asInt(qr.get(1)); + + assertEquals(ref_q, q, () -> String.format("q(%d,%d)", x, y)); + assertEquals(ref_r, r, () -> String.format("r(%d,%d)", x, y)); + } + + /** + * Generate values across the range of integers, positive, + * (optionally) zero and negative, with some at or near the + * extremes. A good choice for the parameter {@code P} is a large + * prime less than a million. + * + * @param P some values will be small multiples of this. + * @param withZero include zero in the range + * @return an array of the values + */ + private static int[] argValues(final int P, boolean withZero) { + final int N = 5, M = 3; + List values = new LinkedList<>(); + // Some values either side of zero + for (int x = -N; x <= N; x++) { + if (x != 0 || withZero) { values.add(x); } + } + // A few values at each extreme + for (int i = 0; i < M; i++) { + values.add(Integer.MIN_VALUE + i); + values.add(Integer.MAX_VALUE - i); + } + // Some large-ish values from the middle of the range + for (int i = 2; i <= N; i++) { + values.add(P * i); + values.add(-P * i); + } + + final int L = values.size(); + int[] v = new int[L]; + for (int i = 0; i < L; i++) { v[i] = values.get(i); } + return v; + } + + /** Test division in 4 quadrants. */ + @Test + void testDivide() { + for (int y : YVALUES) { + for (int x : XVALUES) { singleDivision(x, y); } + } + } + + /** Test remainder on division in 4 quadrants. */ + @Test + void testModulo() { + for (int y : YVALUES) { + for (int x : XVALUES) { singleModulo(x, y); } + } + } + + /** Test division and modulus combined in 4 quadrants. */ + @Test + void testDivMod() { + for (int y : YVALUES) { + for (int x : XVALUES) { singleDivMod(x, y); } + } + } +} diff --git a/core/src/test/java/org/python/core/LookupTest.java b/core/src/test/java/org/python/core/LookupTest.java new file mode 100644 index 000000000..b858dc4dd --- /dev/null +++ b/core/src/test/java/org/python/core/LookupTest.java @@ -0,0 +1,434 @@ +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.math.BigInteger; +import java.util.LinkedList; +import java.util.List; +import java.util.function.Function; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +/** + * Test actions that depend on looking up keys in dictionaries, + * including those embedded in {@code type} objects (which have + * special characteristics). A particular concern is to verify the + * interchangeability of acceptable implementations of Python types + * when used as key. For example, data entered with a key that is an + * INteger, should be retrievable by one that is a BigInteger with + * the same value. + */ +class LookupTest extends UnitTestSupport { + + /** + * A {@code KeyTuple} holds a single Python value, realised in each + * of the accepted implementations of some Python type. + */ + private abstract static class KeyTuple { + + final Object py; + + KeyTuple(Object py) { this.py = py; } + } + + private static final String A = "a"; + private static final String B = "b123"; + private static final String C = "Python"; + private static final String D = "QZthon"; // Same hash as C + + /** + * A {@code StrKeyTuple} holds a single Python {@code str} value, + * realised in each of the accepted implementations {@code String} + * and {@link PyUnicode}. + */ + private static class StrKeyTuple extends KeyTuple { + + final String s; + + StrKeyTuple(String s) { + super(newPyUnicode(s)); + this.s = s; + } + } + + private static final List strKeyTuples = new LinkedList(); + + @BeforeAll + static void fillStrKeyTuples() { + strKeyTuples.add(new StrKeyTuple(A)); + strKeyTuples.add(new StrKeyTuple(B)); + strKeyTuples.add(new StrKeyTuple(C)); + strKeyTuples.add(new StrKeyTuple(D)); + } + + /** + * An {@code IntKeyTuple} holds a single Python {@code int} value, + * realised in each of the accepted implementations {@code Integer}, + * {@code BigInteger}, and {@link PyLong}. + * + */ + private static class IntKeyTuple extends KeyTuple { + + final Integer i; + final BigInteger b; + + IntKeyTuple(int i) { + super(newPyLong(i)); + this.i = i; + this.b = BigInteger.valueOf(i); + } + } + + private static final List intKeyTuples = new LinkedList(); + + @BeforeAll + static void fillIntKeyTuples() { + intKeyTuples.add(new IntKeyTuple(4)); + intKeyTuples.add(new IntKeyTuple(-5)); + intKeyTuples.add(new IntKeyTuple(Integer.MAX_VALUE)); + intKeyTuples.add(new IntKeyTuple(Integer.MIN_VALUE)); + intKeyTuples.add(new IntKeyTuple(Integer.MIN_VALUE + 1)); + } + + @Nested + @DisplayName("Object hashes equal each other and Java hashCode()") + class HashesEqual { + + /** + * Verify that {@code str.__hash__} produces the same value as Java + * {@code hashCode()} in each accepted implementation. + * + * @throws TypeError if {@code v} is an unhashable type + * @throws Throwable on errors within {@code __hash__} + */ + @Test + void where_str() throws TypeError, Throwable { + assertEqualHashes(strKeyTuples, k -> k.s, k -> k.py); + } + + /** + * Verify that {@code int.__hash__} produces the same value as Java + * {@code hashCode()} in each accepted implementation. + * + * @throws TypeError if {@code v} is an unhashable type + * @throws Throwable on errors within {@code __hash__} + */ + @Test + void where_int() throws TypeError, Throwable { + assertEqualHashes(intKeyTuples, k -> k.i, k -> k.b, k -> k.py); + } + } + + /** + * Iterate a list of key tuples, which are tuples of Python values, + * and assert that each member of the tuple has a hash that is equal + * to the Java {@code hashCode} of the first. {@code keyImpl} + * specifies which implementations by a sequence of functions that + * may be given as an argument like {@code k -> k.py} + * + * @param one of the {@code *KeyTuple} classes containing + * different realisations of the same value + * @param keyTuples keys to validate + * @param keyImpl specifies which implementation to test + * @throws TypeError if we find an unhashable type + * @throws Throwable on errors within {@code __hash__} + */ + @SafeVarargs + private static void assertEqualHashes(List keyTuples, Function... keyImpl) + throws TypeError, Throwable { + + for (KT keyTuple : keyTuples) { + // Get the Java hash of the first key type + Object value = keyImpl[0].apply(keyTuple); + int expected = value.hashCode(); + + // Each Python hash should be equal to it + for (Function ki : keyImpl) { + // Compare Python hash obtained via abstract object API + value = ki.apply(keyTuple); + Object hash = Abstract.hash(value); + assertEquals(expected, hash); + } + } + } + + @Nested + @DisplayName("Keys in a dict match every accepted implementation") + class DictKeysMatch { + + /** + * Test that for a {@code dict} (a {@link PyDict}) {@code str} keys + * that are equal act as equal keys. Here we put data in with a + * {@code String} key. + */ + @Test + void when_str_key_is_String() { + + PyDict dict = Py.dict(); + + // Insert counter value by the String key + insertSequentialInts(dict, strKeyTuples, k -> k.s); + + // Retrieve the same value by the various keys + assertSequentialInts(dict, strKeyTuples, k -> k.s); + assertSequentialInts(dict, strKeyTuples, k -> k.py); + } + + /** + * Test that for a {@code dict} (a {@link PyDict}) {@code str} keys + * that are equal act as equal keys. Here we put data in with a + * {@code PyUnicode} key. + */ + @Test + void when_str_key_is_PyUnicode() { + + PyDict dict = Py.dict(); + + // Insert counter value by the String key + insertSequentialInts(dict, strKeyTuples, k -> k.py); + + // Retrieve the same value by the various keys + assertSequentialInts(dict, strKeyTuples, k -> k.s); + assertSequentialInts(dict, strKeyTuples, k -> k.py); + } + + /** + * Test that for a {@code dict} (a {@link PyDict}) {@code int} keys + * that are equal act as equal keys. Here we put data in with an + * {@code Integer} key. + */ + @Test + void when_int_key_is_Integer() { + + PyDict dict = Py.dict(); + + // Insert counter value by the PyLong key + insertSequentialInts(dict, intKeyTuples, k -> k.py); + + // Retrieve the same value by the various keys + assertSequentialInts(dict, intKeyTuples, k -> k.py); + assertSequentialInts(dict, intKeyTuples, k -> k.i); + assertSequentialInts(dict, intKeyTuples, k -> k.b); + } + + /** + * Test that for a {@code dict} (a {@link PyDict}) {@code int} keys + * that are equal act as equal keys. Here we put data in with a + * {@code BigInteger} key. + */ + @Test + void when_int_key_is_BigInteger() { + + PyDict dict = Py.dict(); + + // Insert counter value by the PyLong key + insertSequentialInts(dict, intKeyTuples, k -> k.b); + + // Retrieve the same value by the various keys + assertSequentialInts(dict, intKeyTuples, k -> k.b); + assertSequentialInts(dict, intKeyTuples, k -> k.i); + assertSequentialInts(dict, intKeyTuples, k -> k.py); + } + + /** + * Test that for a {@code dict} (a {@link PyDict}) {@code int} keys + * that are equal act as equal keys. Here we put data in with a + * {@code PyLong} key. + */ + @Test + void when_int_key_is_PyLong() { + + PyDict dict = Py.dict(); + + // Insert counter value by the PyLong key + insertSequentialInts(dict, intKeyTuples, k -> k.py); + + // Retrieve the same value by the various keys + assertSequentialInts(dict, intKeyTuples, k -> k.i); + assertSequentialInts(dict, intKeyTuples, k -> k.b); + assertSequentialInts(dict, intKeyTuples, k -> k.py); + } + + /** + * Test that for a {@code dict} (a {@link PyDict}) {@code int} keys + * that are equal to {@code bool} values act as equal keys. Here we + * put data in with a {@code Boolean} key. + */ + @Test + void when_int_key_is_Boolean() { + + PyDict dict = Py.dict(); + + // Insert counter value by the Boolean key + int counter = 100; + dict.put(Boolean.FALSE, counter++); + dict.put(Boolean.TRUE, counter++); + + // Now try to retrieve as if the values were int 0 and 1 + List boolKeyTuples = List.of(new IntKeyTuple(0), new IntKeyTuple(1)); + + // Retrieve the same value by the various keys + assertSequentialInts(dict, boolKeyTuples, k -> k.i); + assertSequentialInts(dict, boolKeyTuples, k -> k.b); + assertSequentialInts(dict, boolKeyTuples, k -> k.py); + } + + /** + * Insert data into a {@link PyDict} in the Python {@code int} + * sequence {@code 100, 101, 102, ...}. We use a specified + * acceptable implementation of a the key type under test. + * {@code keyImpl} specifies which implementation by a function that + * may be given as an argument like {@code k -> k.py} + * + * @param one of the {@code *KeyTuple} classes containing + * different realisations of the same value + * @param dict to insert to + * @param keyTuples keys to insert with + * @param keyImpl specifies which implementation to use + */ + private void insertSequentialInts(PyDict dict, List keyTuples, + Function keyImpl) { + + int counter = 100; + + for (KT keyTuple : keyTuples) { + // Insert the value by the specified key type + Object key = keyImpl.apply(keyTuple); + dict.put(key, counter++); + } + } + + /** + * Retrieve data from a {@link PyDict}, asserting that it consists + * of a Python {@code int} sequence {@code 100, 101, 102, ...} . We + * use keys of the same value (from the same list of key tuples) as + * was used to make each entry, but in a specified acceptable + * implementation of a the key type under test. {@code keyImpl} + * specifies which implementation by a function that may be given as + * an argument like {@code k -> k.py} + * + * @param one of the {@code *KeyTuple} classes containing + * different realisations of the same value + * @param dict to retrieve from + * @param keyTuples keys to retrieve with + * @param keyImpl specifies which implementation to test + */ + private void assertSequentialInts(PyDict dict, List keyTuples, + Function keyImpl) { + + int counter = 100; + + for (KT keyTuple : keyTuples) { + // Retrieve the value by the specified key type + Object key = keyImpl.apply(keyTuple); + Object value = dict.get(key); + + // The result should be a Python int + assertNotNull(value, () -> String.format("key %s %s not matched in dict", + key.getClass().getSimpleName(), key)); + assertPythonType(PyLong.TYPE, value); + + // And the value should equal the counter + assertEquals(counter++, value); + } + } + } + + @Nested + @DisplayName("Expected attributes found") + class AttrLookupString { + + /** + * Test that {@link PyType#lookup(String)} and + * {@link PyType#lookup(String)} retrieve some well-known + * attributes. + */ + @Test + void type_lookup() { + checkTypeLookup(PyBaseObject.TYPE, "__getattribute__"); + checkTypeLookup(PyLong.TYPE, "__neg__"); + checkTypeLookup(PyUnicode.TYPE, "__add__"); + checkTypeLookup(PyTuple.TYPE, "__repr__"); + } + + /** + * Check {@code PyType.lookup} succeeds for {@code String} and + * {@link PyUnicode}. + * + * @param type to address + * @param name to look up + */ + private void checkTypeLookup(PyType type, String name) { + Object u, s; + // Lookup signals no match with null + assertNotNull(s = type.lookup(name)); + assertNotNull(u = type.lookup(newPyUnicode(name))); + assertEquals(s, u); + } + + /** + * Test that {@link PyType#lookup(String)} and + * {@link PyType#lookup(String)} retrieve some well-known + * attributes. + * + * @throws Throwable on failure + */ + @Test + void abstract_lookupAttr() throws Throwable { + checkLookupAttr(PyBaseObject.TYPE, "__getattribute__"); + checkLookupAttr(PyLong.TYPE, "__neg__"); + checkLookupAttr(PyUnicode.TYPE, "__add__"); + checkLookupAttr(PyTuple.TYPE, "__repr__"); + } + + /** + * Check {@code Abstract.lookupAttr} succeeds for {@code String} and + * {@link PyUnicode}. + * + * @param obj to address + * @param name to look up + * @throws Throwable on errors + */ + private void checkLookupAttr(Object obj, String name) throws Throwable { + Object u, s; + // lookupAttr signals no match with null + assertNotNull(s = Abstract.lookupAttr(obj, name)); + assertNotNull(u = Abstract.lookupAttr(obj, newPyUnicode(name))); + assertEquals(s, u); + } + + /** + * Test that {@link PyType#lookup(String)} and + * {@link PyType#lookup(String)} retrieve some well-known + * attributes. + * + * @throws Throwable on failure + */ + @Test + void abstract_getAttr() throws Throwable { + checkGetAttr(PyBaseObject.TYPE, "__getattribute__"); + checkGetAttr(PyLong.TYPE, "__neg__"); + checkGetAttr(PyUnicode.TYPE, "__add__"); + checkGetAttr(PyTuple.TYPE, "__repr__"); + } + + /** + * Check {@code Abstract.getAttr} succeeds for {@code String} and + * {@link PyUnicode}. + * + * @param obj to address + * @param name to look up + * @throws Throwable on failure + */ + private void checkGetAttr(Object obj, String name) throws Throwable { + // GetAttr signals no match with and exception + Object s = Abstract.getAttr(obj, name); + Object u = Abstract.getAttr(obj, newPyUnicode(name)); + assertEquals(s, u); + } + } +} diff --git a/core/src/test/java/org/python/core/ModuleExposerMethodTest.java b/core/src/test/java/org/python/core/ModuleExposerMethodTest.java new file mode 100644 index 000000000..db73c9fbc --- /dev/null +++ b/core/src/test/java/org/python/core/ModuleExposerMethodTest.java @@ -0,0 +1,686 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.lang.invoke.MethodHandles; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import org.python.core.Exposed.PositionalOnly; +import org.python.core.Exposed.PythonMethod; +import org.python.core.Exposed.PythonStaticMethod; +import org.python.base.MethodKind; + +/** + * Test that functions exposed by a Python module defined in + * Java, using the scheme of annotations defined in {@link Exposed}, + * result in {@link PyJavaFunction} objects with characteristics + * that correspond to the definition. + *

+ * The first test in each case is to examine the fields in the + * parser that attaches to the {@link ModuleDef.MethodDef}. Then we + * call the function using the {@code __call__} special method, and + * using our "Java call" signatures. + *

+ * There is a nested test suite for each signature pattern. + */ +@DisplayName("A method exposed by a module") +class ModuleExposerMethodTest { + + /** + * Nested test classes implement these as standard. A base class + * here is just a way to describe the tests once that we repeat in + * each nested case. + */ + abstract static class Standard { + + // Working variables for the tests + /** The module we create. */ + final PyModule module; + /** The function to examine or call. */ + PyJavaFunction func; + /** The parser in the function we examine. */ + ArgParser ap; + /** The expected result of calling the function */ + Object[] exp; + + Standard() { + this.module = new ExampleModule(); + this.module.exec(); + } + + /** + * A parser attached to the function object should have field values + * that correctly reflect the signature and annotations in the + * defining class. + */ + abstract void has_expected_fields(); + + /** + * Call the function using the {@code __call__} special method with + * arguments correct for the function's specification. The function + * should obtain the correct result (and not throw). + * + * @throws Throwable unexpectedly + */ + abstract void supports__call__() throws Throwable; + + /** + * Call the function using the {@code __call__} special method with + * arguments correct for the function's specification, and + * explicitly zero or more keywords. The function should obtain the + * correct result (and not throw). + * + * @throws Throwable unexpectedly + */ + abstract void supports_keywords() throws Throwable; + + /** + * Call the function using the {@code __call__} special method and + * an unexpected keyword: where none is expected, for a positional + * argument, or simply an unacceptable name. The function should + * throw {@link TypeError}. + * + * @throws Throwable unexpectedly + */ + abstract void raises_TypeError_on_unexpected_keyword() throws Throwable; + + /** + * Call the function using the Java call interface with arguments + * correct for the function's specification. The function should + * obtain the correct result (and not throw). + * + * @throws Throwable unexpectedly + */ + abstract void supports_java_call() throws Throwable; + + /** + * Check that the fields of the parser match expectations for a + * method with no collector parameters and a certain number of + * positional-only parameters. + * + * @param kind static or instance + * @param name of method + * @param count of parameters + * @param posonlycount count of positional-only parameters + */ + void no_collector(MethodKind kind, String name, int count, int posonlycount) { + assertEquals(name, ap.name); + assertEquals(kind, ap.methodKind); + assertEquals(count, ap.argnames.length); + assertEquals(count, ap.argcount); + assertEquals(posonlycount, ap.posonlyargcount); + assertEquals(0, ap.kwonlyargcount); + assertEquals(count, ap.regargcount); + assertEquals(-1, ap.varArgsIndex); + assertEquals(-1, ap.varKeywordsIndex); + } + + /** + * Check that the fields of the parser match expectations for a + * static method with no collector parameters and a certain number + * of positional-only parameters. + * + * @param name of method + * @param count of parameters + * @param posonly count of positional-only parameters + */ + void no_collector_static(String name, int count, int posonly) { + no_collector(MethodKind.STATIC, name, count, posonly); + } + + /** + * Check that the fields of the parser match expectations for a + * instance method with no collector parameters and a certain number + * of positional-only parameters. + * + * @param name of method + * @param count of parameters + * @param posonly count of positional-only parameters + */ + void no_collector_instance(String name, int count, int posonly) { + no_collector(MethodKind.INSTANCE, name, count, posonly); + } + + /** + * Check the result of a call against {@link #exp}. The reference + * rtesult is the same throughout a given sub-class test. + * + * @param result of call + */ + void check_result(PyTuple result) { assertArrayEquals(exp, result.value); } + } + + /** + * A Python module definition that exhibits a range of method + * signatures explored in the tests. + */ + static class ExampleModule extends JavaModule { + + static final ModuleDef DEF = new ModuleDef("example", MethodHandles.lookup()); + + ExampleModule() { super(DEF); } + + /** + * See {@link StaticNoParams}: no parameters are allowed. + */ + @PythonStaticMethod + static void f0() {} + + /** + * See {@link NoParams}: no parameters are allowed. + */ + @PythonMethod + void m0() {} + + /** + * See {@link StaticOneParam}: the parameter is positional-only as a + * result of the default exposure. + * + * @param a positional arg + * @return the arg (tuple) + */ + @PythonStaticMethod + static PyTuple f1(double a) { return Py.tuple(a); } + + /** + * See {@link OneParam}: the parameter is positional-only as a + * result of the default exposure. + * + * @param a positional arg + * @return the arg (tuple) + */ + @PythonMethod + PyTuple m1(double a) { return Py.tuple(this, a); } + + /** + * See {@link StaticDefaultPositionalParams}: the parameters are + * positional-only as a result of the default exposure. + * + * @param a positional arg + * @param b positional arg + * @param c positional arg + * @return the args + */ + @PythonStaticMethod + static PyTuple f3(int a, String b, Object c) { return Py.tuple(a, b, c); } + + /** + * See {@link DefaultPositionalParams}: the parameters are + * positional-only as a result of the default exposure. + * + * @param a positional arg + * @param b positional arg + * @param c positional arg + * @return the args + */ + @PythonMethod + PyTuple m3(int a, String b, Object c) { return Py.tuple(this, a, b, c); } + + /** + * See {@link StaticPositionalOrKeywordParams}: the parameters are + * positional-or-keyword but none are positional-only. + * + * @param a positional-or-keyword arg + * @param b positional-or-keyword arg + * @param c positional-or-keyword arg + * @return the args + */ + @PythonStaticMethod(positionalOnly = false) + static PyTuple f3pk(int a, String b, Object c) { return Py.tuple(a, b, c); } + + /** + * See {@link PositionalOrKeywordParams}: the parameters are + * positional-or-keyword but none are positional-only. + * + * @param a positional-or-keyword arg + * @param b positional-or-keyword arg + * @param c positional-or-keyword arg + * @return the args + */ + @PythonMethod(positionalOnly = false) + PyTuple m3pk(int a, String b, Object c) { return Py.tuple(this, a, b, c); } + + /** + * See {@link SomePositionalOnlyParams}: two parameters are + * positional-only as a result of an annotation. + * + * @param a positional arg + * @param b positional arg + * @param c positional-or-keyword arg + * @return the args + */ + @PythonStaticMethod + static PyTuple f3p2(int a, @PositionalOnly String b, Object c) { return Py.tuple(a, b, c); } + + /** + * See {@link StaticSomePositionalOnlyParams}: two parameters are + * positional-only as a result of an annotation. + * + * @param a positional arg + * @param b positional arg + * @param c positional-or-keyword arg + * @return the args + */ + @PythonMethod + PyTuple m3p2(int a, @PositionalOnly String b, Object c) { return Py.tuple(this, a, b, c); } + } + + /** {@link ExampleModule#m0()} accepts no arguments. */ + @Nested + @DisplayName("with no parameters") + class NoParams extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // func = module.m0 + func = (PyJavaFunction)Abstract.getAttr(module, "m0"); + ap = func.argParser; + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m0", 0, 0); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call func() + Object[] args = {}; + + // The method is declared void (which means return None) + Object r = func.__call__(args, null); + assertEquals(Py.None, r); + } + + /** Keywords must be empty. */ + @Override + @Test + void supports_keywords() throws Throwable { + // We call func() + Object[] args = {}; + String[] names = {}; + + // The method is declared void (which means return None) + Object r = func.__call__(args, names); + assertEquals(Py.None, r); + } + + @Override + @Test + void raises_TypeError_on_unexpected_keyword() { + // We call func(c=3) + Object[] args = {3}; + String[] names = {"c"}; // Nothing expected + + assertThrows(TypeError.class, () -> func.__call__(args, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + // We call func() + // The method is declared void (which means return None) + Object r = func.call(); + assertEquals(Py.None, r); + } + } + + /** {@link ExampleModule#f0()} accepts no arguments. */ + @Nested + @DisplayName("static, with no parameters") + class StaticNoParams extends NoParams { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // func = module.f0 + func = (PyJavaFunction)Abstract.getAttr(module, "f0"); + ap = func.argParser; + } + + @Override + @Test + void has_expected_fields() { no_collector_static("f0", 0, 0); } + } + + /** + * {@link ExampleModule#m1(double)} accepts one argument that + * must be given by position. + */ + @Nested + @DisplayName("with one positional-only parameter") + class OneParam extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // func = module.m1 + func = (PyJavaFunction)Abstract.getAttr(module, "m1"); + ap = func.argParser; + exp = new Object[] {module, 42.0}; + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m1", 1, 1); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call func(42.0) + Object[] args = {42.0}; + // The method reports its arguments as a tuple + PyTuple r = (PyTuple)func.__call__(args, null); + check_result(r); + } + + @Override + @Test + void supports_keywords() throws Throwable { + // We call func(42.0) + Object[] args = {42.0}; + String[] names = {}; + // The method reports its arguments as a tuple + PyTuple r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + @Override + @Test + void raises_TypeError_on_unexpected_keyword() { + // We call func(42.0, a=5) + Object[] args = {42.0, 5}; + String[] names = {"a"}; + + assertThrows(TypeError.class, () -> func.__call__(args, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + // We call func(42.0) + PyTuple r = (PyTuple)func.call(42.0); + check_result(r); + } + } + + /** + * {@link ExampleModule#f1(double)} accepts one argument that + * must be given by position. + */ + @Nested + @DisplayName("static, with one positional-only parameter") + class StaticOneParam extends OneParam { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // func = module.f1 + func = (PyJavaFunction)Abstract.getAttr(module, "f1"); + ap = func.argParser; + exp = new Object[] {42.0}; + } + + @Override + @Test + void has_expected_fields() { no_collector_static("f1", 1, 1); } + } + + /** + * {@link ExampleModule#m3(int, String, Object)} accepts 3 arguments + * that must be given by position. + */ + @Nested + @DisplayName("with positional-only parameters by default") + class DefaultPositionalParams extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // func = module.m3 + func = (PyJavaFunction)Abstract.getAttr(module, "m3"); + ap = func.argParser; + exp = new Object[] {module, 1, "2", 3}; + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m3", 3, 3); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call func(1, '2', 3) + Object[] args = {1, "2", 3}; + // The method reports its arguments as a tuple + PyTuple r = (PyTuple)func.__call__(args, null); + check_result(r); + } + + @Override + @Test + void supports_keywords() throws Throwable { + // We call func(1, '2', 3) + Object[] args = {1, "2", 3}; + String[] names = {}; + // The method reports its arguments as a tuple + PyTuple r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + @Override + @Test + void raises_TypeError_on_unexpected_keyword() { + // We call func(1, '2', c=3) + Object[] args = {1, "2", 3}; + String[] names = {"c"}; + + assertThrows(TypeError.class, () -> func.__call__(args, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + // We call func(1, '2', 3) + PyTuple r = (PyTuple)func.call(1, "2", 3); + check_result(r); + } + } + + /** + * {@link ExampleModule#f3(int, String, Object)} accepts 3 arguments + * that must be given by position. + */ + @Nested + @DisplayName("static, with positional-only parameters by default") + class StaticDefaultPositionalParams extends DefaultPositionalParams { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // func = module.f3 + func = (PyJavaFunction)Abstract.getAttr(module, "f3"); + ap = func.argParser; + exp = new Object[] {1, "2", 3}; + } + + @Override + @Test + void has_expected_fields() { no_collector_static("f3", 3, 3); } + } + + /** + * {@link ExampleModule#m3pk(int, String, Object)} accepts 3 + * arguments that may be given by position or keyword. + */ + @Nested + @DisplayName("with positional-or-keyword parameters") + class PositionalOrKeywordParams extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // func = module.m3pk + func = (PyJavaFunction)Abstract.getAttr(module, "m3pk"); + ap = func.argParser; + exp = new Object[] {module, 1, "2", 3}; + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m3pk", 3, 0); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call func(1, '2', 3) + Object[] args = {1, "2", 3}; + String[] names = {}; + PyTuple r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + /** Supply second and third arguments by keyword. */ + @Override + @Test + void supports_keywords() throws Throwable { + // We call func(1, c=3, b='2') + Object[] args = {1, 3, "2"}; + String[] names = {"c", "b"}; + PyTuple r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + /** Get the wrong keyword. */ + @Override + @Test + void raises_TypeError_on_unexpected_keyword() throws Throwable { + // We call func(1, c=3, b='2', x=4) + Object[] args = {1, 3, "2", 4}; + String[] names = {"c", "b", /* unknown */"x"}; + assertThrows(TypeError.class, () -> func.__call__(args, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + PyTuple r = (PyTuple)func.call(1, "2", 3); + check_result(r); + } + } + + /** + * {@link ExampleModule#f3pk(int, String, Object)} accepts 3 + * arguments that may be given by position or keyword. + */ + @Nested + @DisplayName("static, with positional-or-keyword parameters") + class StaticPositionalOrKeywordParams extends PositionalOrKeywordParams { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // func = module.f3pk + func = (PyJavaFunction)Abstract.getAttr(module, "f3pk"); + ap = func.argParser; + exp = new Object[] {1, "2", 3}; + } + + @Override + @Test + void has_expected_fields() { no_collector_static("f3pk", 3, 0); } + + } + + /** + * {@link ExampleModule#m3p2(int, String, Object)} accepts 3 + * arguments, two of which may be given by position only, and the + * last by either position or keyword. + */ + @Nested + @DisplayName("with two positional-only parameters") + class SomePositionalOnlyParams extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // func = module.m3p2 + func = (PyJavaFunction)Abstract.getAttr(module, "m3p2"); + ap = func.argParser; + exp = new Object[] {module, 1, "2", 3}; + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m3p2", 3, 2); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call func(1, '2', 3) + Object[] args = {1, "2", 3}; + String[] names = {}; + + // The method just parrots its arguments as a tuple + PyTuple r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + /** Supply third argument by keyword. */ + @Override + @Test + void supports_keywords() throws Throwable { + // We call func(1, '2', c=3) + Object[] args = {1, "2", 3}; + String[] names = {"c"}; + + // The method reports its arguments as a tuple + PyTuple r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + @Override + @Test + void raises_TypeError_on_unexpected_keyword() throws Throwable { + // We call func(1, c=3, b='2') + Object[] args = {1, 3, "2"}; + String[] names = {"c", /* positional */"b"}; + assertThrows(TypeError.class, () -> func.__call__(args, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + // The method reports its arguments as a tuple + PyTuple r = (PyTuple)func.call(1, "2", 3); + check_result(r); + } + } + + /** + * {@link ExampleModule#f3p2(int, String, Object)} accepts 3 + * arguments, two of which may be given by position only, and the + * last by either position or keyword. + */ + @Nested + @DisplayName("static, with two positional-only parameters") + class StaticSomePositionalOnlyParams extends SomePositionalOnlyParams { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // func = module.f3p2 + func = (PyJavaFunction)Abstract.getAttr(module, "f3p2"); + ap = func.argParser; + exp = new Object[] {1, "2", 3}; + } + + @Override + @Test + void has_expected_fields() { no_collector_static("f3p2", 3, 2); } + } +} diff --git a/core/src/test/java/org/python/core/ModuleExposerTest.java b/core/src/test/java/org/python/core/ModuleExposerTest.java new file mode 100644 index 000000000..dc1713965 --- /dev/null +++ b/core/src/test/java/org/python/core/ModuleExposerTest.java @@ -0,0 +1,220 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import org.python.core.Exposed.PositionalOnly; +import org.python.core.Exposed.PythonMethod; +import org.python.core.Exposed.PythonStaticMethod; +import org.python.core.ModuleDef.MethodDef; + +/** + * Test that a Python module defined in Java, using the scheme of + * annotations defined in {@link Exposed}, can be processed correctly by + * a {@link Exposer} to a {@link ModuleDef}. This tests a large part of + * the exposure mechanism. + *

+ * The class used in the test {@link FakeModule} is not actually a + * {@link PyModule}, but we go through the actions of the + * {@link ModuleExposer} so we can examine the intermediate results. + */ +@DisplayName("For a module exposed from a Java definition") +class ModuleExposerTest extends UnitTestSupport { + + /** + * This class is not actually a Python module definition, but is + * annotated as if it were. We will test whether the + * {@link MethodDef}s are created as expected. We'll also act on it + * to produce a dictionary as if it were a real module. + */ + static class FakeModule { + + static final Lookup LOOKUP = MethodHandles.lookup(); + + // Signature: () + @PythonStaticMethod + static void f0() {} + + // Signature: ($module, /) + @PythonMethod + void m0() {} + + // Signature: (a) + @PythonStaticMethod + static PyTuple f1(double a) {return Py.tuple(a);} + + // Signature: ($module, a, /) + @PythonMethod + @SuppressWarnings("static-method") + PyTuple m1(double a) {return Py.tuple(a);} + + // Signature: (a, b, c, /) + @PythonStaticMethod + static PyTuple f3(int a, String b, Object c) { + return Py.tuple(a, b, c); + } + + // Signature: ($module, a, b, c, /) + @PythonMethod + @SuppressWarnings("static-method") + PyTuple m3(int a, String b, Object c) { + return Py.tuple(a, b, c); + } + + // Signature: (/, a, b, c) + @PythonStaticMethod(positionalOnly = false) + static PyTuple f3pk(int a, String b, Object c) { + return Py.tuple(a, b, c); + } + + // Signature: ($module, /, a, b, c) + @PythonMethod(positionalOnly = false) + @SuppressWarnings("static-method") + PyTuple m3pk(int a, String b, Object c) { + return Py.tuple(a, b, c); + } + + // Signature: (a, b, /, c) + @PythonStaticMethod + static PyTuple f3p2(int a, @PositionalOnly String b, Object c) { + return Py.tuple(a, b, c); + } + + // Signature: ($module, a, b, /, c) + @PythonMethod + @SuppressWarnings("static-method") + PyTuple m3p2(int a, @PositionalOnly String b, Object c) { + return Py.tuple(a, b, c); + } + } + + @Nested + @DisplayName("calling the Exposer") + class TestExposer { + + @Test + @DisplayName("produces a ModuleExposer") + void getExposer() { + ModuleExposer exposer = + Exposer.exposeModule(FakeModule.class); + assertNotNull(exposer); + } + + @Test + @DisplayName("finds the expected methods") + void getMethodDefs() { + ModuleExposer exposer = + Exposer.exposeModule(FakeModule.class); + MethodDef[] mdArray = + exposer.getMethodDefs(FakeModule.LOOKUP); + checkMethodDefArray(mdArray); + } + } + + @Nested + @DisplayName("constructing a ModuleDef") + class TestDefinition { + + @Test + @DisplayName("produces a MethodDef array") + void createMethodDef() { + ModuleDef def = new ModuleDef("example", FakeModule.LOOKUP); + checkMethodDefArray(def.getMethods()); + } + } + + @Nested + @DisplayName("a module instance") + class TestInstance { + + @Test + @DisplayName("has expected method signatures") + void hasMethods() { + /* + * As FakeModule is not a PyModule, we must work a bit + * harder to take care of things normally automatic. Make a + * ModuleDef to hold the MethodDefs from the Exposer. + */ + ModuleDef def = new ModuleDef("example", FakeModule.LOOKUP); + // An instance of the "module" to bind in PyJavaMethods + FakeModule fake = new FakeModule(); + // A map to stand in for the module dictionary to hold them + Map dict = new HashMap<>(); + // Which we now fill ... + for (MethodDef md : def.getMethods()) { + ArgParser ap = md.argParser; + MethodHandle mh = md.handle; + PyJavaFunction m = + PyJavaFunction.fromParser(ap, mh, fake, def.name); + dict.put(md.argParser.name, m); + } + // And here we check what's in it + checkMethodSignatures(dict); + } + } + + private static void checkMethodDefArray(MethodDef[] defs) { + assertNotNull(defs); + + Map mds = new TreeMap<>(); + for (MethodDef def : defs) { mds.put(def.argParser.name, def); } + + Set expected = new TreeSet<>(); + expected.addAll(List.of( // + "f0", "f1", "f3", "f3pk", "f3p2", // + "m0", "m1", "m3", "m3pk", "m3p2")); + + assertEquals(expected, mds.keySet(), "contains expected names"); + } + + private static void + checkMethodSignatures(Map dict) { + assertNotNull(dict); + + checkSignature(dict, "f0()"); + checkSignature(dict, "m0($module, /)"); + checkSignature(dict, "f1(a, /)"); + checkSignature(dict, "m1($module, a, /)"); + checkSignature(dict, "f3(a, b, c, /)"); + checkSignature(dict, "m3($module, a, b, c, /)"); + checkSignature(dict, "f3pk(a, b, c)"); + checkSignature(dict, "m3pk($module, /, a, b, c)"); + checkSignature(dict, "f3p2(a, b, /, c)"); + checkSignature(dict, "m3p2($module, a, b, /, c)"); + } + + /** + * Check that a method with the expected signature is in the + * dictionary. + * + * @param dict dictionary + * @param spec signature + */ + private static void checkSignature(Map dict, + String spec) { + int k = spec.indexOf('('); + assertTrue(k > 0); + String name = spec.substring(0, k); + String expect = spec.substring(k); + PyJavaFunction pjf = (PyJavaFunction)dict.get(name); + assertEquals(expect, pjf.argParser.textSignature()); + } + +} diff --git a/core/src/test/java/org/python/core/OperationsFormationTest.java b/core/src/test/java/org/python/core/OperationsFormationTest.java new file mode 100644 index 000000000..c24a1d54e --- /dev/null +++ b/core/src/test/java/org/python/core/OperationsFormationTest.java @@ -0,0 +1,424 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.fail; + +import java.lang.invoke.MethodHandles; +import java.math.BigInteger; +import java.util.HashMap; +import java.util.Map; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.python.base.InterpreterError; +import org.python.core.Operations.Clash; +import org.python.core.PyType.Spec; + +/** + * An {@link Operations} object contains information about a Java + * class considered as a Python object. There are several patterns + * to explore. + *

+ * These tests can't work unless parts of {@link PyType} formation + * also work, so there is a bit of overlap. In fact many of the + * {@code Operations} objects involved are {@code PyType}s. We do + * not test that the {@code PyType}s encountered are fully-working + * as a Python {@code type}. + */ +@DisplayName("The Operations object of") +class OperationsFormationTest { + + /** + * A built-in Python type something like:

+     * class A:
+     *     pass
+     * 
+ */ + static class A implements CraftedPyObject { + static PyType TYPE = PyType.fromSpec(new Spec("A", MethodHandles.lookup())); + private PyType type; + + A(PyType type) { this.type = type; } + + A() { this(TYPE); } + + @Override + public PyType getType() { return type; } + + static class Derived extends A implements DerivedPyObject, DictPyObject { + protected Map __dict__; + + Derived(PyType type) { + super(type); + this.__dict__ = new HashMap<>(); + } + + @Override + public Map getDict() { return __dict__; } + } + + } + + /** + * A built-in Python type something like:
+     * class B(A):
+     *     pass
+     * 
+ */ + static class B { + static PyType TYPE = PyType.fromSpec(new Spec("B", MethodHandles.lookup()) // + .base(A.TYPE)); + } + + /** + * Built-in Python {@code class C} that has adopted implementations. + */ + static class C implements CraftedPyObject { + static PyType TYPE = PyType.fromSpec(new Spec("C", MethodHandles.lookup()) + .adopt(C1.class, C2.class).flagNot(PyType.Flag.BASETYPE)); + + @Override + public PyType getType() { return TYPE; } + } + + /** An adopted implementation of Python class {@code C}. */ + static class C1 {} + + /** An adopted implementation of Python class {@code C}. */ + static class C2 {} + + /** Built-in Python {@code class BadC} identical to C. */ + static class BadC implements CraftedPyObject { + static PyType TYPE = PyType.fromSpec(new Spec("BadC", MethodHandles.lookup()) + .adopt(BadC2.class).flagNot(PyType.Flag.BASETYPE)); + + @Override + public PyType getType() { return TYPE; } + } + + /** An adopted implementation of Python class {@code BadC}. */ + static class BadC2 {} + + /** + * A pure Java class (a found class in the tests) + * {@code uk.co.farowl.vsj3.evo1.OperationsFormationTest.J}. + */ + static class J {} + + /** + * A Java class simulating one generated when we extend a found Java + * class {@link J} in Python. There is no {@link PyType} + * corresponding directly to this class (unless "found"). + */ + static class JDerived extends J implements DerivedPyObject, DictPyObject { + + /** The Python type of this instance. */ + private PyType type; + protected Map __dict__; + + JDerived(PyType type) { + this.type = type; + this.__dict__ = new HashMap<>(); + } + + @Override + public Map getDict() { return __dict__; } + + @Override + public PyType getType() { return type; } + } + + /** + * Certain nested test classes implement these as standard. A base + * class here is just a way to describe the tests once that reappear + * in each nested case. + */ + abstract static class Base { + + // Working variables for the tests + Operations ops; + String repr; + Class javaClass; + + void setup(Class javaClass, String repr) throws Throwable { + this.javaClass = javaClass; + this.repr = repr; + this.ops = Operations.fromClass(javaClass); + } + + /** + * The {@link Operations} object finds the expected {@link PyType}, + * given the target class definition. + * + * @throws Throwable unexpectedly + */ + @Test + void finds_expected_type() throws Throwable { fail("Not yet implemented"); } + + /** + * The toString (repr) describes the Operations. + * + * @throws Throwable unexpectedly + */ + @Test + void has_expected_toString() throws Throwable { assertEquals(repr, ops.toString()); } + + } + + @Nested + @DisplayName("a built-in type") + class BuiltInTest extends Base { + @BeforeEach + void setup() throws Throwable { setup(A.class, ""); } + + @Override + @Test + void finds_expected_type() { + A a = new A(); + PyType t = ops.type(a); + assertSame(A.TYPE, t); + } + } + + @Nested + @DisplayName("a built-in type adopting classes") + class BuiltInAdoptiveTest extends Base { + @BeforeEach + void setup() throws Throwable { setup(C.class, ""); } + + @Override + @Test + void finds_expected_type() { + /* + * The Operations object of the canonical implementation is the type + * itself. (Python must touch C before C2.) + */ + assertSame(C.TYPE, ops); + C c = new C(); + assertSame(C.TYPE, ops.type(c)); + /* + * An instance of the adopted implementation has the adopting type. + */ + C2 c2 = new C2(); + Operations ops2 = Operations.of(c2); + assertNotSame(C.TYPE, ops2); + assertSame(C.TYPE, ops2.type(c2)); + } + + @Override + @Test + void has_expected_toString() throws Throwable { + super.has_expected_toString(); + // Only seen in debugging: + Operations ops2 = Operations.fromClass(C2.class); + assertEquals("C2 as ", ops2.toString()); + } + + /** + * This is an un-feature. The test is like + * {@link #finds_expected_type()} but the adopted class gets handled + * as a Python object before its adopting class can create its + * {@link PyType}. This causes an unintended binding that prevents + * {@link BadC} initialising correctly.The problem seems + * unavoidable, and the requirement is to detect it. + */ + @Test + void is_sensitive_to_order_of_use() { + /* + * An instance of the adopted implementation has the fails to have + * the adopting type if it treated as a Python object before that + * type. + */ + BadC2 c2 = new BadC2(); // ok + Operations ops2 = Operations.of(c2); + // That created a PyType but not for BadC + assertNotSame(BadC.class, ops2.type(c2).definingClass); + /* + * The Operations object of the canonical implementation is the type + * itself. BadC will try to adopt BadC2 and this is detected as a + * clash. + */ + try { + Operations.fromClass(BadC.class); + fail("Exception not raised when " + "adoped class is exposed prematurely"); + } catch (ExceptionInInitializerError e) { + Throwable cause = e.getCause(); + if (!(cause instanceof InterpreterError)) + throw e; + cause = cause.getCause(); + if (!(cause instanceof Clash)) + throw e; + } + } + } + + @Nested + @DisplayName("a Python sub-class of a built-in type") + class PythonSubBuiltInTest extends Base { + + @BeforeEach + void setup() throws Throwable { + // toString is seen only in debugging + setup(A.Derived.class, "Derived"); + } + + /** + * Simulating a Python sub-class of a built-in Python type something + * like:
+         * class MyA(A):
+         *     pass
+         * 
{@code MyA} must be a Java sub-class of {@code A} in order + * that methods defined in {@code A} in Java be applicable to + * instances of {@code MyA}. + */ + @Override + @Test + void finds_expected_type() { + // Define a new type + // XXX cheating by short-cutting type.__new__ + Spec specMyA = new Spec("MyA", A.Derived.class).base(A.TYPE); + PyType typeMyA = PyType.fromSpec(specMyA); + + // Define an object of that type + // XXX again, cheating by short-cutting type.__call__ + Object obj = new A.Derived(typeMyA); + Operations ops = Operations.of(obj); + + // This ops is not the type of an A or a MyA + assertNotSame(A.TYPE, ops); + assertNotSame(typeMyA, ops); + + // However, the type of an instance is MyA + assertSame(typeMyA, ops.type(obj)); + } + } + + @Nested + @DisplayName("a found Java class") + class FoundTest extends Base { + @BeforeEach + void setup() throws Throwable { setup(J.class, ""); } + + @Override + @Test + void finds_expected_type() { + /* + * The Operations object is the type itself. Even if J has + * initialised, Operations.Registry.computeValue will not find it in + * opsMap. + */ + PyType type = ops.uniqueType(); + assertSame(type, ops); + + J obj = new J(); + assertSame(type, ops.type(obj)); + + // We probably expect the simple name "J". Probably. + String expectedName = J.class.getSimpleName(); + assertEquals(expectedName, type.getName()); + } + } + + @Nested + @DisplayName("a Python sub-class of a found Java class") + class PythonSubFoundTest extends Base { + @BeforeEach + void setup() throws Throwable { + // toString is seen only in debugging + setup(JDerived.class, "Derived"); + } + + @Override + @Test + void finds_expected_type() throws Throwable { + // Define a new type + // XXX cheating by short-cutting type.__new__ + PyType JTYPE = PyType.fromClass(J.class); + Spec specMyJ = new Spec("MyJ", JDerived.class).base(JTYPE); + PyType typeMyJ = PyType.fromSpec(specMyJ); + + // XXX cheating by short-cutting type.__call__ + Object obj = new JDerived(typeMyJ); + Operations ops = Operations.of(obj); + + // This ops is not the type of a J or a MyJ + assertNotSame(JTYPE, ops); + assertNotSame(typeMyJ, ops); + + // However, the type of an instance is MyA + assertSame(typeMyJ, ops.type(obj)); + } + } + + @Nested + @DisplayName("the adopted Integer class") + class IntegerTest extends Base { + // Python must get PyType ready before touching any objects. + PyType OBJECT = PyType.OBJECT_TYPE; + + @BeforeEach + void setup() throws Throwable { setup(Integer.class, "Integer as "); } + + @Override + @Test + void finds_expected_type() { + /* + * The Operations object of non-canonical implementation differs + * from the type itself, but that type is found for instances. + */ + assertNotSame(PyLong.TYPE, ops); + assertSame(PyLong.TYPE, ops.type(42)); + assertSame(PyLong.TYPE, ops.type(Integer.MIN_VALUE)); + } + } + + @Nested + @DisplayName("the adopted BigInteger class") + class BigIntegerTest extends Base { + // Python must get PyType ready before touching any objects. + PyType OBJECT = PyType.OBJECT_TYPE; + + @BeforeEach + void setup() throws Throwable { setup(BigInteger.class, "BigInteger as "); } + + @Override + @Test + void finds_expected_type() { + /* + * The Operations object of non-canonical implementation differs + * from the type itself, but that type is found for instances. + */ + assertNotSame(PyLong.TYPE, ops); + assertSame(PyLong.TYPE, ops.type(BigInteger.valueOf(42L))); + assertSame(PyLong.TYPE, ops.type(BigInteger.TEN)); + } + } + + @Nested + @DisplayName("the adopted Boolean class") + class BooleanTest extends Base { + // Python must get PyType ready before touching any objects. + PyType OBJECT = PyType.OBJECT_TYPE; + + @BeforeEach + void setup() throws Throwable { setup(Boolean.class, ""); } + + @Override + @Test + void finds_expected_type() { + /* + * The Operations object of the canonical implementation is the type + * itself. + */ + assertSame(PyBool.TYPE, ops); + assertSame(PyBool.TYPE, ops.type(true)); + assertSame(PyBool.TYPE, ops.type(Boolean.FALSE)); + } + } +} diff --git a/core/src/test/java/org/python/core/PyListTest.java b/core/src/test/java/org/python/core/PyListTest.java new file mode 100644 index 000000000..9f3d1277c --- /dev/null +++ b/core/src/test/java/org/python/core/PyListTest.java @@ -0,0 +1,230 @@ +// Copyright (c)2023 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Test selected methods of {@link PyList} on a variety of argument + * types. We can mostly use Java List as a reference, except that, + * where we need it, the definition of equality must be Python's. + */ +@DisplayName("In PyList") +class PyListTest extends UnitTestSupport { + + /** Base of tests that add, insert and remove elements. */ + abstract static class AbstractInsertionTest { + /** + * Provide a stream of examples as parameter sets to the tests of + * methods that have "insertion" character, that is {@code insert}, + * {@code remove}, assignment to a zero-length slice, etc.. + * + * @return the examples for search tests. + */ + static Stream insertionExamples() { + return Stream.of(// + insertionExample(List.of(1, 2, 3, 4, 5), 9, 1), // + insertionExample(Collections.emptyList(), 9, 0) // + ); + } + + /** + * Construct an insertion (etc.) problem and reference result. This + * uses Java {@code ArrayList} for the reference answer. The result + * is an + * {@code arguments(without, with, front, back, needle, index)} + * where {@code without} is (a mutable copy of) the corresponding + * list argument, {@code with} is the same list with {@code needle} + * inserted at {@code index}, and front and back have the if added + * first and last respectively. The arguments can be used for + * testing several types of insert and remove. + * + * @param without list without the needle + * @param needle to insert + * @param index location to insert + * @return example data for a test + */ + private static Arguments insertionExample(List without, Object needle, int index) { + ArrayList with = new ArrayList<>(without); + with.add(index, needle); + ArrayList front = new ArrayList<>(without); + front.add(0, needle); + ArrayList back = new ArrayList<>(without); + back.add(needle); + without = new ArrayList<>(without); + return arguments(without, with, front, back, needle, index); + } + } + + /** + * Tests of several things that amount to insertion of one element. + */ + @Nested + @SuppressWarnings("unused") + class InsertionTest extends AbstractInsertionTest { + + @DisplayName("Java add(i, v)") + @ParameterizedTest(name = "{0}.add({5}, {4})") + @MethodSource("insertionExamples") + void java_add(List without, List with, List front, + List back, Object needle, int index) { + PyList list = new PyList(without); + list.add(index, needle); + assertEquals(with, list); + } + + @DisplayName("Python insert") + @ParameterizedTest(name = "{0}.insert({5}, {4})") + @MethodSource("insertionExamples") + void insert(List without, List with, List front, List back, + Object needle, int index) throws Throwable { + PyList list = new PyList(without); + list.list_insert(index, needle); + assertEquals(with, list); + } + + @DisplayName("Python slice-insert") + @ParameterizedTest(name = "list[{5}:{5}] = [{4}])") + @MethodSource("insertionExamples") + void setitem(List without, List with, List front, List back, + Object needle, int index) throws Throwable { + PyList list = new PyList(without); + PyList rhs = new PyList(List.of(needle)); + PySlice slice = new PySlice(index, index); + list.__setitem__(slice, rhs); + assertEquals(with, list); + } + } + + /** Base of tests that sort lists. */ + abstract static class AbstractSortTest { + /** + * Provide a stream of examples as parameter sets to the tests of + * methods that sort the list in various ways. + * + * @return the examples for search tests. + */ + static Stream sortExamples() { + return Stream.of(// + sortExample(List.of(1, 2, 3, 4, 5, 6, 7, 8), 100023), // + sortExample(List.of("a", "b", "c", "d", "e"), 420042), // + sortExample(List.of(1, 2, 3, 4, 5, 6, 7, 8), 100023), // + sortExample(Stream.iterate(0, i -> i < 1000, i -> i + 1) + .collect(Collectors.toList()), 555555), // + sortExample(List.of(1, 2, 3, 4, 5, 6, 7, 8), 100024, Function.identity()), // + sortExample(List.of("python", "anaconda", "boa", "coral snake", "bushmaster"), + 420042, s -> ((String)s).indexOf('a')), // + sortExample(Collections.emptyList(), 1) // + ); + } + + /** + * Construct a sort problem and reference result. + * + * @param sorted list before randomisation + * @param seed for randomisation + * @return example data for a test + */ + private static Arguments sortExample(List sorted, long seed) { + return sortExample(sorted, seed, null); + } + + /** + * Construct a sort problem and reference result. + * + * @param sorted list before randomisation + * @param seed for randomisation + * @return example data for a test + */ + private static Arguments sortExample(List sorted, long seed, + Function cmp) { + ArrayList muddled = new ArrayList<>(sorted); + randomise(muddled, new Random(seed)); + String mudString = shortString(muddled, 5); + return arguments(mudString, sorted, muddled, cmp == null ? "null" : "key=f", cmp); + } + + /** + * A toString that limits the array size + * + * @param a the array to return as a string + * @param n maximum number of array elements to show + * @return string representation of {@code a} + */ + private static String shortString(ArrayList a, int n) { + if (a.size() <= n) { + return a.toString(); + } else { + String mudString = a.subList(0, n).toString(); + return mudString.substring(0, mudString.length() - 1) + ", ... ]"; + } + } + } + + /** + * Randomise the order of elements in a list. + * + * @param m to randomise + * @param r random generator + */ + private static void randomise(List m, Random r) { + for (int i = m.size() - 1; i > 0; --i) { + int j = r.nextInt(i + 1); + if (j != i) { + // Swap [i] and [j] + Object temp = m.get(j); + m.set(j, m.get(i)); + m.set(i, temp); + } + } + } + + /** + * Tests of {@code list.sort} with key functions and reverse + * comparison. These are simplistic: tests of change detection and + * concurrency are needed, and could be added here the CPython + * regression tests don't do so, or extra fidelity is required, + * which is often easier under a Java debugger. + */ + @Nested + @SuppressWarnings("unused") + class SortTest extends AbstractSortTest { + + @DisplayName("Normal sort") + @ParameterizedTest(name = "{0}.sort({3})") + @MethodSource("sortExamples") + void normalSort(String mudString, List sorted, List muddled, + String keyString, Function key) throws Throwable { + PyList list = new PyList(muddled); + list.sort(key, false); + assertEquals(sorted, list); + } + + @DisplayName("Reverse sort") + @ParameterizedTest(name = "{0}.sort({3}, reverse=true)") + @MethodSource("sortExamples") + void reverseSort(String mudString, List sorted, List muddled, + String keyString, Function key) throws Throwable { + PyList list = new PyList(muddled); + sorted = new ArrayList<>(sorted); + Collections.reverse(sorted); + list.sort(key, true); + assertEquals(sorted, list); + } + } +} diff --git a/core/src/test/java/org/python/core/PyUnicodeTest.java b/core/src/test/java/org/python/core/PyUnicodeTest.java new file mode 100644 index 000000000..e103e715f --- /dev/null +++ b/core/src/test/java/org/python/core/PyUnicodeTest.java @@ -0,0 +1,1996 @@ +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +import java.lang.invoke.MethodHandles; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.util.LinkedList; +import java.util.List; +import java.util.stream.Stream; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.python.base.MissingFeature; +import org.python.core.PyType.Spec; + +/** + * Test selected methods of {@link PyUnicode} on a variety of + * argument types. + */ +@DisplayName("In PyUnicode") +class PyUnicodeTest extends UnitTestSupport { + + /** Base of tests that find strings in others. */ + abstract static class AbstractFindTest { + /** + * Provide a stream of examples as parameter sets to the tests of + * methods that have "search" character, that is {@code find}, + * {@code index}, {@code partition}, {@code count}, etc.. + * + * @return the examples for search tests. + */ + static Stream findExamples() { + return Stream.of(// + findExample("pandemic", "pan"), // + findExample("pandemic", "mic"), // + findExample("abracadabra", "bra"), // + findExample("abracadabra", "a"), // + findExample("Bananaman", "ana"), // + findExample(GREEK, "λόγος"), // + findExample(GREEK, " "), // + findExample("画蛇添足 添足 添足", " 添"), // + /* + * The following contain non-BMP characters 🐍=U+1F40D and + * 🦓=U+1F993, each of which Python must consider to be a single + * character, but in the Java String realisation each is two chars. + */ + // 🐍=\ud83d\udc0d, 🦓=\ud83e\udd93 + findExample("One 🐍, a 🦓, two 🐍🐍.", "🐍", new int[] {4, 16, 17}), + findExample("Left 🐍🦓🐍🦓: right.", "🐍🦓:", new int[] {7})); + } + + /** + * Construct a search problem and reference result. This uses Java + * {@code String.indexOf} for the reference answer, so it will work + * correctly only for BMP strings. Where any SMP characters are + * involved, call + * {@link #findExample(String, String, int[], String)}. + * + * @param self to search + * @param needle to search for + * @return example data for a test + */ + private static Arguments findExample(String self, String needle) { + int[] indices = findIndices(self, needle); + return findExample(self, needle, indices); + } + + /** + * Construct a search problem and reference result, where the needle + * occurs at a list of indices. + * + * @param self to search + * @param needle to search for + * @param indices at which {@code needle}is found (code points) + * @param pin to replace needle (if tested) + * @return example data for a test + */ + private static Arguments findExample(String self, String needle, int[] indices) { + return arguments(self, needle, indices); + } + } + + /** Tests of {@code str.find} operating on the whole string. */ + @Nested + @DisplayName("find (whole string)") + class FindTest extends AbstractFindTest { + + @DisplayName("find(String, String, null, null)") + @ParameterizedTest(name = "\"{0}\".find(\"{1}\")") + @MethodSource("findExamples") + void S_find_S(String s, String needle, int[] indices) { + int r = PyUnicode.find(s, needle, null, null); + if (indices.length == 0) { + // There should be no match + assertEquals(-1, r); + } else { + // Match at indices[0] + assertEquals(indices[0], r); + } + } + + @DisplayName("find(String, PyUnicode, null, null)") + @ParameterizedTest(name = "\"{0}\".find(\"{1}\")") + @MethodSource("findExamples") + void S_find_U(String s, String needle, int[] indices) { + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + int r = PyUnicode.find(s, uNeedle, null, null); + if (indices.length == 0) { + // There should be no match + assertEquals(-1, r); + } else { + // Match at indices[0] + assertEquals(indices[0], r); + } + } + + @DisplayName("find(PyUnicode, String, null, null)") + @ParameterizedTest(name = "\"{0}\".find(\"{1}\")") + @MethodSource("findExamples") + void U_find_S(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + int r = u.find(needle, null, null); + if (indices.length == 0) { + // There should be no match + assertEquals(-1, r); + } else { + // Match at indices[0] + assertEquals(indices[0], r); + } + } + + @DisplayName("find(PyUnicode, PyUnicode, null, null)") + @ParameterizedTest(name = "\"{0}\".find(\"{1}\")") + @MethodSource("findExamples") + void U_find_U(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + int r = u.find(uNeedle, null, null); + if (indices.length == 0) { + // There should be no match + assertEquals(-1, r); + } else { + // Match at indices[0] + assertEquals(indices[0], r); + } + } + } + + /** Tests of {@code str.partition}. */ + @Nested + @DisplayName("partition") + class PartitionTest extends AbstractFindTest { + + @DisplayName("partition(String, String)") + @ParameterizedTest(name = "\"{0}\".partition(\"{1}\")") + @MethodSource("findExamples") + void S_partition_S(String s, String needle, int[] indices) { + PyTuple r = PyUnicode.partition(s, needle); + assertPythonType(PyTuple.TYPE, r); + assertEquals(3, r.size()); + for (int i = 0; i < 3; i++) { assertPythonType(PyUnicode.TYPE, r.get(i)); } + if (indices.length == 0) { + // There should be no match + assertEquals(Py.tuple(s, "", ""), r); + } else { + // Match at indices[0] + int[] charIndices = toCharIndices(s, indices); + // Work in char indices (so doubtful with surrogates) + int n = charIndices[0], m = n + needle.length(); + assertEquals(Py.tuple(s.substring(0, n), needle, s.substring(m)), r); + } + } + + @DisplayName("partition(String, PyUnicode)") + @ParameterizedTest(name = "\"{0}\".partition(\"{1}\")") + @MethodSource("findExamples") + void S_partition_U(String s, String needle, int[] indices) { + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + PyTuple r = PyUnicode.partition(s, uNeedle); + assertPythonType(PyTuple.TYPE, r); + assertEquals(3, r.size()); + for (int i = 0; i < 3; i++) { assertPythonType(PyUnicode.TYPE, r.get(i)); } + if (indices.length == 0) { + // There should be no match + assertEquals(Py.tuple(s, "", ""), r); + } else { + // Match at indices[0] + int[] charIndices = toCharIndices(s, indices); + // Work in char indices (so doubtful with surrogates) + int n = charIndices[0], m = n + needle.length(); + assertEquals(Py.tuple(s.substring(0, n), needle, s.substring(m)), r); + } + } + + @DisplayName("partition(PyUnicode, String)") + @ParameterizedTest(name = "\"{0}\".partition(\"{1}\")") + @MethodSource("findExamples") + void U_partition_S(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyTuple r = u.partition(needle); + assertPythonType(PyTuple.TYPE, r); + assertEquals(3, r.size()); + for (int i = 0; i < 3; i++) { assertPythonType(PyUnicode.TYPE, r.get(i)); } + if (indices.length == 0) { + // There should be no match + assertEquals(Py.tuple(s, "", ""), r); + } else { + // Match at indices[0] + int[] charIndices = toCharIndices(s, indices); + // Work in char indices (so doubtful with surrogates) + int n = charIndices[0], m = n + needle.length(); + assertEquals(Py.tuple(s.substring(0, n), needle, s.substring(m)), r); + } + } + + @DisplayName("partition(PyUnicode, PyUnicode)") + @ParameterizedTest(name = "\"{0}\".partition(\"{1}\")") + @MethodSource("findExamples") + void U_partition_U(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + PyTuple r = u.partition(uNeedle); + assertPythonType(PyTuple.TYPE, r); + assertEquals(3, r.size()); + for (int i = 0; i < 3; i++) { assertPythonType(PyUnicode.TYPE, r.get(i)); } + if (indices.length == 0) { + // There should be no match + assertEquals(Py.tuple(s, "", ""), r); + } else { + // Match at indices[0] + int[] charIndices = toCharIndices(s, indices); + // Work in char indices (so doubtful with surrogates) + int n = charIndices[0], m = n + needle.length(); + assertEquals(Py.tuple(s.substring(0, n), needle, s.substring(m)), r); + } + } + } + + /** Tests of {@code str.count} operating on the whole string. */ + @Nested + @DisplayName("count (whole string)") + class CountTest extends AbstractFindTest { + + @DisplayName("count(String, String, null, null)") + @ParameterizedTest(name = "\"{0}\".count(\"{1}\")") + @MethodSource("findExamples") + void S_count_S(String s, String needle, int[] indices) { + int r = PyUnicode.count(s, needle, null, null); + assertEquals(indices.length, r); + } + + @DisplayName("count(String, PyUnicode, null, null)") + @ParameterizedTest(name = "\"{0}\".count(\"{1}\")") + @MethodSource("findExamples") + void S_count_U(String s, String needle, int[] indices) { + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + int r = PyUnicode.count(s, uNeedle, null, null); + assertEquals(indices.length, r); + } + + @DisplayName("count(PyUnicode, String, null, null)") + @ParameterizedTest(name = "\"{0}\".count(\"{1}\")") + @MethodSource("findExamples") + void U_count_S(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + int r = u.count(needle, null, null); + assertEquals(indices.length, r); + } + + @DisplayName("count(PyUnicode, PyUnicode, null, null)") + @ParameterizedTest(name = "\"{0}\".count(\"{1}\")") + @MethodSource("findExamples") + void U_count_U(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + int r = u.count(uNeedle, null, null); + assertEquals(indices.length, r); + } + } + + /** Tests of {@code str.split} on an explicit separator. */ + @Nested + @DisplayName("split on string") + class SplitOnStringTest extends AbstractFindTest { + + @DisplayName("split(String, String)") + @ParameterizedTest(name = "\"{0}\".split(\"{1}\")") + @MethodSource("findExamples") + void S_split_S(String s, String needle, int[] indices) { + PyList r = PyUnicode.split(s, needle, -1); + PyUnicode[] segments = expectedSplit(s, needle, indices); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @DisplayName("split(String, PyUnicode)") + @ParameterizedTest(name = "\"{0}\".split(\"{1}\")") + @MethodSource("findExamples") + void S_split_U(String s, String needle, int[] indices) { + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + PyList r = PyUnicode.split(s, uNeedle, -1); + PyUnicode[] segments = expectedSplit(s, needle, indices); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @DisplayName("split(PyUnicode, String)") + @ParameterizedTest(name = "\"{0}\".split(\"{1}\")") + @MethodSource("findExamples") + void U_split_S(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyList r = u.split(needle, -1); + PyUnicode[] segments = expectedSplit(s, needle, indices); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @DisplayName("split(PyUnicode, PyUnicode)") + @ParameterizedTest(name = "\"{0}\".split(\"{1}\")") + @MethodSource("findExamples") + void U_split_U(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + PyList r = u.split(uNeedle, -1); + PyUnicode[] segments = expectedSplit(s, needle, indices); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @Test + @DisplayName("split(FOX, \"o\", 2)") + void U_split_S_maxsplit() { + PyUnicode u = newPyUnicode(FOX); + PyList r = u.split("o", 2); + PyUnicode[] segments = + toPyUnicodeArray("The quick br", "wn f", FOX.substring(FOX.indexOf("x"))); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @Test + @DisplayName("split(FOX, \"o\", 0)") + void U_split_S_maxsplit0() { + PyUnicode u = newPyUnicode(FOX); + PyList r = u.split("o", 0); + PyUnicode[] segments = toPyUnicodeArray(FOX); + assertEquals(1, r.size(), "number of segments"); + assertEquals(segments[0], r.get(0)); + } + } + + /** + * Base of tests that find strings in others (in reverse search). + */ + abstract static class AbstractReverseFindTest { + /** + * Provide a stream of examples as parameter sets to the tests of + * methods that have "search" character but scan in reverse, that is + * {@code rfind}, {@code rindex}, {@code rpartition}, etc.. + * + * @return the examples for search tests. + */ + static Stream rfindExamples() { + return Stream.of(// + rfindExample("pandemic", "pan"), // + rfindExample("pandemic", "mic"), // + rfindExample("abracadabra", "bra"), // + rfindExample("Bananaman", "ana"), // + rfindExample(GREEK, "λόγος"), // + rfindExample(GREEK, " "), // + rfindExample("画蛇添足 添足 添足", " 添"), // + /* + * The following contain non-BMP characters 🐍=U+1F40D and + * 🦓=U+1F993, each of which Python must consider to be a single + * character, but in the Java String realisation each is two chars. + */ + // 🐍=\ud83d\udc0d, 🦓=\ud83e\udd93 + rfindExample("One 🐍, a 🦓, two 🐍🐍.", "🐍", new int[] {4, 16, 17}), + rfindExample("Left 🐍🦓🐍🦓: right.", "🐍🦓:", new int[] {7})); + } + + /** + * Construct a search problem and reference result. This uses Java + * {@code String.indexOf} for the reference answer, so it will work + * correctly only for BMP strings. Where any SMP characters are + * involved, call + * {@link #rfindExample(String, String, int[], String)}. + * + * @param self to search + * @param needle to search for + * @return example data for a test + */ + private static Arguments rfindExample(String self, String needle) { + int[] indices = rfindIndices(self, needle); + return rfindExample(self, needle, indices); + } + + /** + * Construct a search problem and reference result, where the needle + * occurs at a list of indices. + * + * @param self to search + * @param needle to search for + * @param indices at which {@code needle}is found (code points) + * @param pin to replace needle (if tested) + * @return example data for a test + */ + private static Arguments rfindExample(String self, String needle, int[] indices) { + return arguments(self, needle, indices); + } + } + + /** Tests of {@code str.rfind} operating on the whole string. */ + @Nested + @DisplayName("rfind (whole string)") + class ReverseFindTest extends AbstractReverseFindTest { + + @DisplayName("rfind(String, String, null, null)") + @ParameterizedTest(name = "\"{0}\".rfind(\"{1}\")") + @MethodSource("rfindExamples") + void S_rfind_S(String s, String needle, int[] indices) { + int r = PyUnicode.rfind(s, needle, null, null); + int M = indices.length; + if (M == 0) { + // There should be no match + assertEquals(-1, r); + } else { + // Match at indices[M-1] + assertEquals(indices[M - 1], r); + } + } + + @DisplayName("rfind(String, PyUnicode, null, null)") + @ParameterizedTest(name = "\"{0}\".rfind(\"{1}\")") + @MethodSource("rfindExamples") + void S_rfind_U(String s, String needle, int[] indices) { + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + int r = PyUnicode.rfind(s, uNeedle, null, null); + int M = indices.length; + if (M == 0) { + // There should be no match + assertEquals(-1, r); + } else { + // Match at indices[M-1] + assertEquals(indices[M - 1], r); + } + } + + @DisplayName("rfind(PyUnicode, String, null, null)") + @ParameterizedTest(name = "\"{0}\".rfind(\"{1}\")") + @MethodSource("rfindExamples") + void U_rfind_S(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + int r = u.rfind(needle, null, null); + int M = indices.length; + if (M == 0) { + // There should be no match + assertEquals(-1, r); + } else { + // Match at indices[M-1] + assertEquals(indices[M - 1], r); + } + } + + @DisplayName("rfind(PyUnicode, PyUnicode, null, null)") + @ParameterizedTest(name = "\"{0}\".rfind(\"{1}\")") + @MethodSource("rfindExamples") + void U_rfind_U(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + int r = u.rfind(uNeedle, null, null); + int M = indices.length; + if (M == 0) { + // There should be no match + assertEquals(-1, r); + } else { + // Match at indices[M-1] + assertEquals(indices[M - 1], r); + } + } + } + + /** Tests of {@code str.rpartition}. */ + @Nested + @DisplayName("rpartition") + class ReversePartitionTest extends AbstractReverseFindTest { + + @DisplayName("rpartition(String, String)") + @ParameterizedTest(name = "\"{0}\".rpartition(\"{1}\")") + @MethodSource("rfindExamples") + void S_rpartition_S(String s, String needle, int[] indices) { + PyTuple r = PyUnicode.rpartition(s, needle); + assertPythonType(PyTuple.TYPE, r); + assertEquals(3, r.size()); + for (int i = 0; i < 3; i++) { assertPythonType(PyUnicode.TYPE, r.get(i)); } + int M = indices.length; + if (M == 0) { + // There should be no match + assertEquals(Py.tuple(s, "", ""), r); + } else { + // Match at indices[M-1] + int[] charIndices = toCharIndices(s, indices); + // Work in char indices (so doubtful with surrogates) + int n = charIndices[M - 1], m = n + needle.length(); + assertEquals(Py.tuple(s.substring(0, n), needle, s.substring(m)), r); + } + } + + @DisplayName("rpartition(String, String)") + @ParameterizedTest(name = "\"{0}\".rpartition(\"{1}\")") + @MethodSource("rfindExamples") + void S_rpartition_U(String s, String needle, int[] indices) { + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + PyTuple r = PyUnicode.rpartition(s, uNeedle); + assertPythonType(PyTuple.TYPE, r); + assertEquals(3, r.size()); + for (int i = 0; i < 3; i++) { assertPythonType(PyUnicode.TYPE, r.get(i)); } + int M = indices.length; + if (M == 0) { + // There should be no match + assertEquals(Py.tuple(s, "", ""), r); + } else { + // Match at indices[M-1] + int[] charIndices = toCharIndices(s, indices); + // Work in char indices (so doubtful with surrogates) + int n = charIndices[M - 1], m = n + needle.length(); + assertEquals(Py.tuple(s.substring(0, n), needle, s.substring(m)), r); + } + } + + @DisplayName("rpartition(String, String)") + @ParameterizedTest(name = "\"{0}\".rpartition(\"{1}\")") + @MethodSource("rfindExamples") + void U_rpartition_S(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyTuple r = u.rpartition(needle); + assertPythonType(PyTuple.TYPE, r); + assertEquals(3, r.size()); + for (int i = 0; i < 3; i++) { assertPythonType(PyUnicode.TYPE, r.get(i)); } + int M = indices.length; + if (M == 0) { + // There should be no match + assertEquals(Py.tuple(s, "", ""), r); + } else { + // Match at indices[M-1] + int[] charIndices = toCharIndices(s, indices); + // Work in char indices (so doubtful with surrogates) + int n = charIndices[M - 1], m = n + needle.length(); + assertEquals(Py.tuple(s.substring(0, n), needle, s.substring(m)), r); + } + } + + @DisplayName("rpartition(String, String)") + @ParameterizedTest(name = "\"{0}\".rpartition(\"{1}\")") + @MethodSource("rfindExamples") + void U_rpartition_U(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + PyTuple r = u.rpartition(uNeedle); + assertPythonType(PyTuple.TYPE, r); + assertEquals(3, r.size()); + for (int i = 0; i < 3; i++) { assertPythonType(PyUnicode.TYPE, r.get(i)); } + int M = indices.length; + if (M == 0) { + // There should be no match + assertEquals(Py.tuple(s, "", ""), r); + } else { + // Match at indices[M-1] + int[] charIndices = toCharIndices(s, indices); + // Work in char indices (so doubtful with surrogates) + int n = charIndices[M - 1], m = n + needle.length(); + assertEquals(Py.tuple(s.substring(0, n), needle, s.substring(m)), r); + } + } + } + + /** Tests of {@code str.rsplit} on an explicit separator. */ + @Nested + @DisplayName("rsplit on string") + class ReverseSplitOnStringTest extends AbstractReverseFindTest { + + @DisplayName("rsplit(String, String)") + @ParameterizedTest(name = "\"{0}\".rsplit(\"{1}\")") + @MethodSource("rfindExamples") + void S_rsplit_S(String s, String needle, int[] indices) { + PyList r = PyUnicode.rsplit(s, needle, -1); + PyUnicode[] segments = expectedSplit(s, needle, indices); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @DisplayName("rsplit(String, PyUnicode)") + @ParameterizedTest(name = "\"{0}\".rsplit(\"{1}\")") + @MethodSource("rfindExamples") + void S_rsplit_U(String s, String needle, int[] indices) { + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + PyList r = PyUnicode.rsplit(s, uNeedle, -1); + PyUnicode[] segments = expectedSplit(s, needle, indices); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @DisplayName("rsplit(PyUnicode, String)") + @ParameterizedTest(name = "\"{0}\".rsplit(\"{1}\")") + @MethodSource("rfindExamples") + void U_rsplit_S(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyList r = u.rsplit(needle, -1); + PyUnicode[] segments = expectedSplit(s, needle, indices); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @DisplayName("rsplit(PyUnicode, PyUnicode)") + @ParameterizedTest(name = "\"{0}\".rsplit(\"{1}\")") + @MethodSource("rfindExamples") + void U_rsplit_U(String s, String needle, int[] indices) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + PyList r = u.rsplit(uNeedle, -1); + PyUnicode[] segments = expectedSplit(s, needle, indices); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @Test + @DisplayName("rsplit(FOX, \"o\", 2)") + void U_rsplit_S_maxsplit() { + PyUnicode u = newPyUnicode(FOX); + PyList r = u.rsplit("o", 2); + PyUnicode[] segments = + toPyUnicodeArray(FOX.substring(0, FOX.indexOf("over")), "ver the lazy d", "g."); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @Test + @DisplayName("rsplit(FOX, \"o\", 0)") + void U_rsplit_S_maxsplit0() { + PyUnicode u = newPyUnicode(FOX); + PyList r = u.rsplit("o", 0); + PyUnicode[] segments = toPyUnicodeArray(FOX); + assertEquals(1, r.size(), "number of segments"); + assertEquals(segments[0], r.get(0)); + } + } + + /** Base of tests that exercise string replacement. */ + abstract static class AbstractReplaceTest { + /** + * Provide a stream of examples as parameter sets to the tests of + * methods that have "search" character, that is . + * + * @return the examples for search tests. + */ + static Stream replaceExamples() { + return Stream.of(// + replaceExample("pandemic", "pan", "ping"), // + replaceExample("pandemic", "ic", "onium"), // + replaceExample("pandemic", "", "-*-"), // + replaceExample("abracadabra", "bra", "x"), // + replaceExample("bananarama", "anar", " dr"), // + replaceExample("Σωκρατικὸς λόγος", "ὸς", "ὸι"), // + replaceExample("Σωκρατικὸς λόγος", "ς", "σ"), // + replaceExample("画蛇添足 添足 添足", " 添", "**"), // + /* + * The following contain non-BMP characters 🐍=U+1F40D and + * 🦓=U+1F993, each of which Python must consider to be a single + * character, but in the Java String realisation each is two chars. + */ + // 🐍=\ud83d\udc0d, 🦓=\ud83e\udd93 + replaceExample("One 🐍, a 🦓, two 🐍🐍.", "🐍", new int[] {4, 16, 17}, "🦓"), + replaceExample("Swap 🐍🦓.", "🐍🦓", new int[] {5}, "(🦓🐍)")); + } + + /** + * Construct a search problem and reference result. This uses Java + * {@code String.indexOf} for the reference answer, so it will work + * correctly only for BMP strings. Where any SMP characters are + * involved, call + * {@link #replaceExample(String, String, int[], String)}. + * + * @param self to search + * @param needle to search for + * @param pin to replace needle + * @return example data for a test + */ + private static Arguments replaceExample(String self, String needle, String pin) { + int[] indices = findIndices(self, needle); + return replaceExample(self, needle, indices, pin); + } + + /** + * Construct a search problem and reference result, where the needle + * occurs at a list of indices. + * + * @param self to search + * @param needle to search for + * @param indices at which {@code needle}is found (code points) + * @param pin to replace needle (if tested) + * @return example data for a test + */ + private static Arguments replaceExample(String self, String needle, int[] indices, + String pin) { + return arguments(self, needle, indices, pin); + } + + /** + * Return a list of strings equal to {@code s} with {@code 0} to + * {@code M} replacements of the needle by the pin, guided by an + * array of {@code M} char indices for the needle. Element zero of + * the returned value is {@code s}. We return this as + * {@link PyUnicode} to ensure that {@code assertEquals} uses + * {@link PyUnicode#equals(Object)} for comparison during tests. + * + * @param s in which to effect the replacements. + * @param needle to replace + * @param cpIndices array of {@code M} character indices + * @param pin replacement string + * @return {@code M+1} strings + */ + static PyUnicode[] replaceResults(String s, String needle, int[] cpIndices, String pin) { + int[] charIndices = toCharIndices(s, cpIndices); + final int M = charIndices.length, N = needle.length(), P = pin.length(); + // Make a list of s with 0..M replacements at the indices + List results = new LinkedList<>(); + StringBuilder r = new StringBuilder(s); + results.add(s); + for (int m = 0; m < M; m++) { + /* + * r contains s with m replacements, and its value has already been + * emitted to results. We shall compute the result of m+1 + * replacements. We start by trimming r at the (m+1)th needle. + */ + r.setLength(charIndices[m] + m * (P - N)); + // Now append the pin and the rest of s after the needle + r.append(pin).append(s.substring(charIndices[m] + N)); + results.add(r.toString()); + } + return toPyUnicodeArray(results.toArray(new String[M + 1])); + } + } + + @Nested + @DisplayName("replace") + class ReplaceTest extends AbstractReplaceTest { + + @DisplayName("replace(String, String, String)") + @ParameterizedTest(name = "\"{0}\".replace(\"{1}\", \"{3}\")") + @MethodSource("replaceExamples") + void S_replace_SS(String s, String needle, int[] indices, String pin) { + PyUnicode[] e = replaceResults(s, needle, indices, pin); + final int M = indices.length; + for (int count = -1; count <= M; count++) { + Object r = PyUnicode.replace(s, needle, pin, count); + assertEquals(e[count < 0 ? M : count], r); + } + } + + @DisplayName("replace(String, PyUnicode, String)") + @ParameterizedTest(name = "\"{0}\".replace(\"{1}\", \"{3}\")") + @MethodSource("replaceExamples") + void S_replace_US(String s, String needle, int[] indices, String pin) { + PyUnicode[] e = replaceResults(s, needle, indices, pin); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + final int M = indices.length; + for (int count = -1; count <= M; count++) { + Object r = PyUnicode.replace(s, uNeedle, pin, count); + assertEquals(e[count < 0 ? M : count], r); + } + } + + @DisplayName("replace(String, String, PyUnicode)") + @ParameterizedTest(name = "\"{0}\".replace(\"{1}\", \"{3}\")") + @MethodSource("replaceExamples") + void S_replace_SU(String s, String needle, int[] indices, String pin) { + PyUnicode[] e = replaceResults(s, needle, indices, pin); + PyUnicode uPin = new PyUnicode(pin.codePoints().toArray()); + final int M = indices.length; + for (int count = -1; count <= M; count++) { + Object r = PyUnicode.replace(s, needle, uPin, count); + assertEquals(e[count < 0 ? M : count], r); + } + } + + @DisplayName("replace(String, PyUnicode, PyUnicode)") + @ParameterizedTest(name = "\"{0}\".replace(\"{1}\", \"{3}\")") + @MethodSource("replaceExamples") + void S_replace_UU(String s, String needle, int[] indices, String pin) { + PyUnicode[] e = replaceResults(s, needle, indices, pin); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + PyUnicode uPin = new PyUnicode(pin.codePoints().toArray()); + final int M = indices.length; + for (int count = -1; count <= M; count++) { + Object r = PyUnicode.replace(s, uNeedle, uPin, count); + assertEquals(e[count < 0 ? M : count], r); + } + } + + @DisplayName("replace(PyUnicode, String, String)") + @ParameterizedTest(name = "\"{0}\".replace(\"{1}\", \"{3}\")") + @MethodSource("replaceExamples") + void U_replace_SS(String s, String needle, int[] indices, String pin) { + PyUnicode[] e = replaceResults(s, needle, indices, pin); + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + final int M = indices.length; + for (int count = -1; count <= M; count++) { + Object r = u.replace(needle, pin, count); + assertEquals(e[count < 0 ? M : count], r); + } + } + + @DisplayName("replace(PyUnicode, PyUnicode, String)") + @ParameterizedTest(name = "\"{0}\".replace(\"{1}\", \"{3}\")") + @MethodSource("replaceExamples") + void U_replace_US(String s, String needle, int[] indices, String pin) { + PyUnicode[] e = replaceResults(s, needle, indices, pin); + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + final int M = indices.length; + for (int count = -1; count <= M; count++) { + Object r = u.replace(uNeedle, pin, count); + assertEquals(e[count < 0 ? M : count], r); + } + } + + @DisplayName("replace(PyUnicode, String, PyUnicode)") + @ParameterizedTest(name = "\"{0}\".replace(\"{1}\", \"{3}\")") + @MethodSource("replaceExamples") + void U_replace_SU(String s, String needle, int[] indices, String pin) { + PyUnicode[] e = replaceResults(s, needle, indices, pin); + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyUnicode uPin = new PyUnicode(pin.codePoints().toArray()); + final int M = indices.length; + for (int count = -1; count <= M; count++) { + Object r = u.replace(needle, uPin, count); + assertEquals(e[count < 0 ? M : count], r); + } + } + + @DisplayName("replace(PyUnicode, PyUnicode, PyUnicode)") + @ParameterizedTest(name = "\"{0}\".replace(\"{1}\", \"{3}\")") + @MethodSource("replaceExamples") + void U_replace_UU(String s, String needle, int[] indices, String pin) { + PyUnicode[] e = replaceResults(s, needle, indices, pin); + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyUnicode uNeedle = new PyUnicode(needle.codePoints().toArray()); + PyUnicode uPin = new PyUnicode(pin.codePoints().toArray()); + final int M = indices.length; + for (int count = -1; count <= M; count++) { + Object r = u.replace(uNeedle, uPin, count); + assertEquals(e[count < 0 ? M : count], r); + } + } + + @Test + @DisplayName("''.replace('', '-')") + void emptyReplace() { + // We have ''.replace('', '-', 0) == '' + Object r = PyUnicode.replace("", "", "-", 0); + assertEquals(newPyUnicode(""), r); + + // But ''.replace('', '-') == '-' + r = PyUnicode.replace("", "", "-", -1); + assertEquals(newPyUnicode("-"), r); + } + + // Cases where simulation by Java String is too hard. + // 🐍=\ud83d\udc0d, 🦓=\ud83e\udd93 + + @Test + void surrogatePairNotSplit_SS() { + // No high surrogate (D800-DBFF) accidental replacement + String s = "🐍🐍", needle = "\ud83d", pin = "#"; + // Assert that Java gets the non-Pythonic answer + assert s.replace(needle, pin).equals("#\udc0d#\udc0d"); + + // Check on result must use PyUnicode.equals + PyUnicode su = newPyUnicode(s); + + // Python does not match paired high surrogates as isolated + Object r = PyUnicode.replace(s, needle, pin, -1); + assertEquals(su, r); + + // No low surrogate (DC00-DFFF) accidental replacement + needle = "\udc0d"; + // Assert that Java gets the non-Pythonic answer + assert s.replace(needle, pin).equals("\ud83d#\ud83d#"); + + // Python does not match paired low surrogates as isolated + r = PyUnicode.replace(s, needle, pin, -1); + assertEquals(su, r); + } + + @Test + void surrogatePairNotSplit_US() { + // No high surrogate (D800-DBFF) accidental replacement + String s = "🐍🐍", needle = "\ud83d", pin = "#"; + PyUnicode uNeedle = newPyUnicode(needle); + // Assert that Java gets the non-Pythonic answer + assert s.replace(needle, pin).equals("#\udc0d#\udc0d"); + + // Check on result must use PyUnicode.equals + PyUnicode su = newPyUnicode(s); + + // Python does not match paired low surrogates as isolated + Object r = PyUnicode.replace(s, uNeedle, pin, -1); + assertEquals(su, r); + + // No low surrogate (DC00-DFFF) accidental replacement + needle = "\udc0d"; + uNeedle = newPyUnicode(needle); + // Assert that Java gets the non-Pythonic answer + assert s.replace(needle, pin).equals("\ud83d#\ud83d#"); + + // Python does not match paired low surrogates as isolated + r = PyUnicode.replace(s, uNeedle, pin, -1); + assertEquals(su, r); + } + + @Test + @DisplayName("🐍 is not dissected as \\ud83d\\udc0d") + void supplementaryCharacterNotSplit_SS() { + // No high surrogate (D800-DBFF) accidental replacement + String s = "🐍🐍", needle = "\ud83d", pin = "#"; + // Assert that Java gets the non-Pythonic answer + assert s.replace(needle, pin).equals("#\udc0d#\udc0d"); + + // PyUnicode stores a surrogate pair as one character + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + assert u.equals(s); + Object r = u.replace(needle, pin, -1); + assertEquals(u, r); + + // No low surrogate (DC00-DFFF) accidental replacement + needle = "\udc0d"; + // Assert that Java gets the non-Pythonic answer + assert s.replace(needle, pin).equals("\ud83d#\ud83d#"); + + // PyUnicode stores a surrogate pair as one character + r = u.replace(needle, pin, -1); + assertEquals(u, r); + } + + @Test + @DisplayName("a 🦓 is not produced by String \\ud83e\\udd93") + void S_noSpontaneousZebras() { + // Deleting "-" risks surrogate pair formation + String s = "\ud83e-\udd93\ud83e-\udd93", needle = "-"; + // Java String: nothing, bang, zebras + assert s.contains("🦓") == false; + assert s.replace(needle, "").equals("🦓🦓"); + + // Python lone surrogates remain aloof even when adjacent + PyUnicode e = new PyUnicode(0xd83e, 0xdd93, 0xd83e, 0xdd93); + Object r = PyUnicode.replace(s, needle, "", -1); + assertEquals(e, r); + } + + @Test + @DisplayName("a 🦓 is not produced by PyUnicode \\ud83e\\udd93") + void U_noSpontaneousZebras_SS() { + // No accidental surrogate pair formation + String s = "\ud83e-\udd93\ud83e-\udd93", needle = "-"; + // Java String: nothing, bang, zebras + assert s.contains("🦓") == false; + assert s.replace(needle, "").equals("🦓🦓"); + + // Python lone surrogates remain aloof even when adjacent + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + assert u.equals(s); + PyUnicode e = new PyUnicode(0xd83e, 0xdd93, 0xd83e, 0xdd93); + Object r = u.replace(needle, "", -1); + assertEquals(e, r); + } + } + + /** Base of tests that find and split on spaces. */ + abstract static class AbstractSplitAtSpaceTest { + /** + * Provide a stream of examples as parameter sets to the tests of + * methods that search for runs of spaces, that is {@code split} and + * {@code rsplit} with no sub-string given. + * + * @return the examples for search tests. + */ + static Stream splitExamples() { + return Stream.of(// + splitExample("cunning"), // + splitExample("the quick brown fox"), // + splitExample("\fthe\u000bquick\nbrown\u0085fox"), // + splitExample("\f the \u000b quick\n\r" + " brown \u0085 fox\r\n"), // + splitExample(""), // + splitExample("\f\u000b\n\u0085"), // + splitExample(GREEK), // + splitExample("画蛇添足 添足 添足"), // + /* + * The following contain non-BMP characters 🐍=U+1F40D and + * 🦓=U+1F993, each of which Python must consider to be a single + * character, but in the Java String realisation each is two chars. + */ + // 🐍=\ud83d\udc0d, 🦓=\ud83e\udd93 + splitExample("One 🐍, a 🦓, two 🐍🐍."), // + splitExample("Left 🐍🦓🐍🦓: right.") // + ); + } + + /** + * Construct a search problem and reference result that is segments + * that are separated in {@code s} by runs of space characters. + * + * @param self to search + * @return example data for a test + */ + private static Arguments splitExample(String self) { + return arguments(self, split(self)); + } + + /** + * Return an array of segments that are separated in {@code s} by + * runs of space characters. This uses Java {@code char} tests and + * will work correctly for BMP strings, but would be unreliable + * where any SMP space characters are involved. We return these as + * {@link PyUnicode} to ensure that {@code assertEquals} uses + * {@link PyUnicode#equals(Object)} for comparison during tests. + * + * @param s string in question + * @return the segments of {@code s} + */ + private static PyUnicode[] split(String s) { + LinkedList segment = new LinkedList<>(); + int p = 0, start = 0, N = s.length(); + boolean text = false; + while (true) { + if (text) { + if (p >= N) { + segment.add(s.substring(start, p)); + break; + } else if (isPythonSpace(s.charAt(p))) { + segment.add(s.substring(start, p)); + text = false; + } + } else { + if (p >= N) { + break; + } else if (!isPythonSpace(s.charAt(p))) { + start = p; + text = true; + } + } + p++; + } + return toPyUnicodeArray(segment.toArray(new String[segment.size()])); + } + } + + /** Tests of {@code str.split} splitting at runs of spaces. */ + @Nested + @DisplayName("split at spaces") + class SplitAtSpaceTest extends AbstractSplitAtSpaceTest { + + @DisplayName("split(String)") + @ParameterizedTest(name = "\"{0}\".split()") + @MethodSource("splitExamples") + void S_split_S(String s, PyUnicode[] segments) { + PyList r = PyUnicode.split(s, null, -1); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @DisplayName("split(PyUnicode)") + @ParameterizedTest(name = "\"{0}\".split()") + @MethodSource("splitExamples") + void U_split_S(String s, PyUnicode[] segments) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyList r = u.split(null, -1); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @Test + @DisplayName("FOX.split(maxsplit=3)") + void U_split_maxsplit() { + PyUnicode u = newPyUnicode(FOX); + PyList r = u.split(null, 3); + PyUnicode[] segments = + toPyUnicodeArray("The", "quick", "brown", "fox jumps over the lazy dog."); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @Test + @DisplayName("split(FOX, 0)") + void U_split_S_maxsplit0() { + PyUnicode u = newPyUnicode(FOX); + PyList r = u.split(null, 0); + PyUnicode[] segments = toPyUnicodeArray(FOX); + assertEquals(1, r.size(), "number of segments"); + assertEquals(segments[0], r.get(0)); + } + } + + /** Tests of {@code str.rsplit} splitting at runs of spaces. */ + @Nested + @DisplayName("rsplit at spaces") + class ReverseSplitAtSpaceTest extends AbstractSplitAtSpaceTest { + + @DisplayName("rsplit(String)") + @ParameterizedTest(name = "\"{0}\".rsplit()") + @MethodSource("splitExamples") + void S_split_S(String s, PyUnicode[] segments) { + PyList r = PyUnicode.rsplit(s, null, -1); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @DisplayName("rsplit(PyUnicode)") + @ParameterizedTest(name = "\"{0}\".rsplit()") + @MethodSource("splitExamples") + void U_split_S(String s, PyUnicode[] segments) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyList r = u.rsplit(null, -1); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @Test + @DisplayName("FOX.rsplit(maxsplit=3)") + void U_split_maxsplit() { + PyUnicode u = newPyUnicode(FOX); + PyList r = u.rsplit(null, 3); + PyUnicode[] segments = + toPyUnicodeArray("The quick brown fox jumps over", "the", "lazy", "dog."); + assertEquals(segments.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(segments[i++], ri); } + } + + @Test + @DisplayName("rsplit(FOX, 0)") + void U_rsplit_S_maxsplit0() { + PyUnicode u = newPyUnicode(FOX); + PyList r = u.split(null, 0); + PyUnicode[] segments = toPyUnicodeArray(FOX); + assertEquals(1, r.size(), "number of segments"); + assertEquals(segments[0], r.get(0)); + } + } + + /** Base of tests that find and split line breaks. */ + abstract static class AbstractSplitlinesTest { + /** + * Provide a stream of examples as parameter sets to the tests of + * {@code splitlines}. + * + * @return the examples for search tests. + */ + static Stream splitExamples() { + return Stream.of(// + splitExample("word", "word"), // + splitExample("Line1\nLine2", "Line1\n", "Line2"), // + splitExample("Line1\rLine2", "Line1\r", "Line2"), // + splitExample("Line1\r\nLine2", "Line1\r\n", "Line2"), // + splitExample("Line1\n\rLine2", "Line1\n", "\r", "Line2"), // + splitExample("\nLine1\nLine2\n", "\n", "Line1\n", "Line2\n"), // + splitExample(NEWLINES, NEWLINES_SPLIT), // + splitExample("画蛇\u2029画蛇\u2028添足\u2029", "画蛇\u2029", "画蛇\u2028", "添足\u2029"), // + /* + * The following contain non-BMP characters 🐍=U+1F40D and + * 🦓=U+1F993, each of which Python must consider to be a single + * character, but in the Java String realisation each is two chars. + */ + // 🐍=\ud83d\udc0d, 🦓=\ud83e\udd93 + splitExample("One 🐍\na 🦓,\ftwo 🐍🐍.", "One 🐍\n", "a 🦓,\f", "two 🐍🐍."), // + splitExample("Left 🐍🦓\r🐍🦓: right.\r", "Left 🐍🦓\r", "🐍🦓: right.\r") // + ); + } + + /** + * Construct a line-split problem and reference result that includes + * the end-of-line characters. + * + * @param self to split + * @param lines of the split (with ends kept) + * @return example data for a test + */ + private static Arguments splitExample(String self, String... lines) { + return arguments(self, lines); + } + + /** + * Return a line with trailing end-of-line characters optionally + * removed. We return this as {@link PyUnicode} to ensure that + * {@code assertEquals} uses {@link PyUnicode#equals(Object)} for + * comparison during tests. + * + * @param line string in question + * @param keepend do not remove trailing end-of-lines + * @return the {@code line} as {@code PyUnicode} + */ + static PyUnicode refLine(String line, boolean keepend) { + if (!keepend) { + int n = line.length(); + if (line.endsWith("\r\n")) { + // Special case CR-LF. + line = line.substring(0, n - 2); + } else if (n > 0) { + // Use Java definition. (any tweaks needed?) + char c = line.charAt(n - 1); + if (isPythonLineSeparator(c)) { line = line.substring(0, n - 1); } + } + } + return newPyUnicode(line); + } + + /** + * Names of line separators followed by the separators themselves. + * The exceptions are CR-LF and LF-CR sequences: the first is one + * separator and the second is two (creating a blank line). + */ + private static final String NEWLINES = + "LF\nVT\u000bFF\fCR\rFS\u001cGS\u001dRS\u001eNEL\u0085" + + "LSEP\u2028PSEP\u2029CR-LF\r\nLF-CR\n\rEND"; + private static String[] NEWLINES_SPLIT = {"LF\n", "VT\u000b", "FF\f", "CR\r", "FS\u001c", + "GS\u001d", "RS\u001e", "NEL\u0085", "LSEP\u2028", "PSEP\u2029", "CR-LF\r\n", + "LF-CR\n", "\r", "END"}; + + } + + /** Tests of {@code str.splitlines} splitting at line breaks. */ + @Nested + @DisplayName("split at line boundaries") + class SplitlinesTest extends AbstractSplitlinesTest { + + @DisplayName("splitlines(String)") + @ParameterizedTest(name = "\"{0}\".splitlines()") + @MethodSource("splitExamples") + void S_split_S(String s, String[] lines) { splitlinesTest(s, lines, false); } + + @DisplayName("splitlines(PyUnicode)") + @ParameterizedTest(name = "\"{0}\".splitlines()") + @MethodSource("splitExamples") + void U_splitlines(String s, String[] lines) { splitlinesUnicodeTest(s, lines, false); } + + @DisplayName("splitlines(String) keepends=True") + @ParameterizedTest(name = "\"{0}\".splitlines(True)") + @MethodSource("splitExamples") + void S_splitlines_keepends(String s, String[] lines) { splitlinesTest(s, lines, true); } + + @DisplayName("splitlines(PyUnicode) keepends=True") + @ParameterizedTest(name = "\"{0}\".splitlines(True)") + @MethodSource("splitExamples") + void U_splitlines_keepends(String s, String[] lines) { + splitlinesUnicodeTest(s, lines, true); + } + + /** Call and check {@code str.splitlines} for PyUnicode */ + private void splitlinesTest(String s, String[] lines, boolean keepends) { + PyList r = PyUnicode.splitlines(s, keepends); + splitlinesCheck(lines, keepends, r); + } + + /** Call and check {@code str.splitlines} for PyUnicode */ + private void splitlinesUnicodeTest(String s, String[] lines, boolean keepends) { + PyUnicode u = new PyUnicode(s.codePoints().toArray()); + PyList r = u.splitlines(keepends); + splitlinesCheck(lines, keepends, r); + } + + /** Check the result of {@code str.splitlines} */ + private void splitlinesCheck(String[] lines, boolean keepends, PyList r) { + assertEquals(lines.length, r.size(), "number of segments"); + int i = 0; + for (Object ri : r) { assertEquals(refLine(lines[i++], keepends), ri); } + } + } + + /** Tests of predicate functions. */ + abstract static class PredicateTest { + @Test + void testIsascii() { fail("Not yet implemented"); } + } + + /** + * Base of tests of {@code str.join}, here so it can be declared + * static, and the test class itself nested. + */ + static class AbstractJoinTest { + /** + * Provide a stream of examples as parameter sets to the tests of + * methods that have "search" character, that is {@code find}, + * {@code index}, {@code partition}, {@code count}, etc.. + * + * @return the examples for search tests. + */ + protected static Stream joinExamples() { + return Stream.of(// + joinExample("-", List.of()), // + joinExample("-", List.of("a", "bb", "ccc")), // + joinExample("123", List.of("a", "bb", "ccc")), // + joinExample("", List.of()), // + joinExample("", List.of("a", "bb", "ccc")), // + // 🐍=\ud83d\udc0d, 🦓=\ud83e\udd93 + joinExample("🐍", List.of("🦓", "Zebra")), + joinExample("-🐍-", List.of("🦓🦓", "(🦓)", "Zebras")), + // Avoid making a zebra + joinExample("\ud83e", List.of("Z", "\udd93")), // + joinExample("\udd93", List.of("\ud83e", "Z")) // + ); + } + + /** + * Construct a join problem and reference result. + * + * @param self joiner + * @param parts to be joined + * @return example data for a test + */ + private static Arguments joinExample(String self, List parts) { + PyUnicode e = refJoin(self, parts); + return arguments(self, parts, e); + } + + /** + * Compute a reference join using integer arrays of code points as + * the intermediary representation. We return {@link PyUnicode} so + * that comparisons have Python semantics. + * + * @param self joiner + * @param parts to be joined + * @return reference join + */ + private static PyUnicode refJoin(String self, List parts) { + + int[] s = self.codePoints().toArray(); + int P = parts.size(); + if (P == 0) { return newPyUnicode(""); } + + // Make an array of arrays of each part as code ponts + int[][] cpParts = new int[P][]; + // Also count the number of code points in the result + int size = (P - 1) * s.length, p = 0; + for (String part : parts) { + int[] cpPart = part.codePoints().toArray(); + size += cpPart.length; + cpParts[p++] = cpPart; + } + + // So the expected result is: + int[] e = new int[size]; + for (int i = 0, j = 0; i < P; i++) { + if (i > 0) { + // Copy the joiner int e + System.arraycopy(s, 0, e, j, s.length); + j += s.length; + } + // Copy the part into e + int[] part = cpParts[i]; + System.arraycopy(part, 0, e, j, part.length); + j += part.length; + } + + return new PyUnicode(e); + } + + protected static PyTuple tupleOfString(List parts) { return new PyTuple(parts); } + + protected static PyTuple tupleOfPyUnicode(List parts) { + List u = new LinkedList<>(); + for (String p : parts) { u.add(newPyUnicode(p)); } + return new PyTuple(u); + } + + protected static PyList listOfString(List parts) { return new PyList(parts); } + + protected static PyList listOfPyUnicode(List parts) { + PyList list = new PyList(); + for (String p : parts) { list.add(newPyUnicode(p)); } + return list; + } + + /** + * A Python type that is not iterable but defines + * {@code __getitem__}. We should find this to be an acceptable + * argument to {@code str.join()}. + */ + protected static class MySequence extends AbstractPyObject { + static PyType TYPE = PyType.fromSpec(new Spec("MySequence", MethodHandles.lookup())); + final String value; + + protected MySequence(String value) { + super(TYPE); + this.value = value; + } + + @SuppressWarnings("unused") + Object __getitem__(Object index) { + int i = PyLong.asSize(index); + if (i < value.length()) + return value.substring(i, i + 1); + else + throw new IndexError(""); + } + } + + /** + * A Python type that is an iterator, defining {@code __iter__} and + * {@code __next__}. We should find this to be an acceptable + * argument to {@code str.join()}. + */ + protected static class MyIterator extends AbstractPyObject { + static PyType TYPE = PyType.fromSpec(new Spec("MyIterator", MethodHandles.lookup())); + final String value; + int i = 0; + + protected MyIterator(String value) { + super(TYPE); + this.value = value; + } + + @SuppressWarnings("unused") + Object __iter__() { return this; } + + @SuppressWarnings("unused") + Object __next__() { + if (i < value.length()) + return value.substring(i++, i); + else + // throw new StopIteration(); + throw new MissingFeature("StopIteration"); + } + } + } + + /** + * Test that join works on a range of {@code str} implementations, + * values and iterables. + */ + @Nested + @DisplayName("join") + class JoinTest extends AbstractJoinTest { + @DisplayName("join(String, [String])") + @ParameterizedTest(name = "\"{0}\".join({1})") + @MethodSource("joinExamples") + void S_join_list_S(String s, List parts, PyUnicode expected) throws Throwable { + Object r = PyUnicode.join(s, listOfString(parts)); + assertEquals(expected, r); + } + + @DisplayName("join(PyUnicode, [PyUnicode])") + @ParameterizedTest(name = "\"{0}\".join({1})") + @MethodSource("joinExamples") + void U_join_list_U(String s, List parts, PyUnicode expected) throws Throwable { + PyUnicode u = newPyUnicode(s); + Object r = u.join(listOfPyUnicode(parts)); + assertEquals(expected, r); + } + + @DisplayName("join(String, (String,))") + @ParameterizedTest(name = "\"{0}\".join({1})") + @MethodSource("joinExamples") + void S_join_tuple_S(String s, List parts, PyUnicode expected) throws Throwable { + Object r = PyUnicode.join(s, tupleOfString(parts)); + assertEquals(expected, r); + } + + @DisplayName("join(PyUnicode, (PyUnicode,))") + @ParameterizedTest(name = "\"{0}\".join({1})") + @MethodSource("joinExamples") + void U_join_tuple_U(String s, List parts, PyUnicode expected) throws Throwable { + PyUnicode u = newPyUnicode(s); + Object r = u.join(tupleOfPyUnicode(parts)); + assertEquals(expected, r); + } + + @Disabled("until PySequence.fastList accepts more types") + @Test + @DisplayName("'-+'.join('hello') [String, String]") + void S_join_str_S() throws Throwable { + String s = "-+"; + String parts = "hello"; + PyUnicode expected = newPyUnicode("h-+e-+l-+l-+o"); + Object r = PyUnicode.join(s, parts); + assertEquals(expected, r); + } + + @Disabled("until PySequence.fastList accepts more types") + @Test + @DisplayName("'-+'.join('hello') [String, PyUnicode]") + void S_join_str_U() throws Throwable { + String s = "-+"; + PyUnicode parts = newPyUnicode("hello"); + PyUnicode expected = newPyUnicode("h-+e-+l-+l-+o"); + Object r = PyUnicode.join(s, parts); + assertEquals(expected, r); + } + + @Disabled("until PySequence.fastList accepts more types") + @Test + @DisplayName("'-+'.join('hello') [PyUnicode, String]") + void U_join_str_S() throws Throwable { + PyUnicode u = newPyUnicode("-+"); + String parts = "hello"; + PyUnicode expected = newPyUnicode("h-+e-+l-+l-+o"); + Object r = u.join(parts); + assertEquals(expected, r); + } + + @Disabled("until PySequence.fastList accepts more types") + @Test + @DisplayName("'-+'.join('hello') [PyUnicode, PyUnicode]") + void U_join_str_U() throws Throwable { + PyUnicode u = newPyUnicode("-+"); + PyUnicode parts = newPyUnicode("hello"); + PyUnicode expected = newPyUnicode("h-+e-+l-+l-+o"); + Object r = u.join(parts); + assertEquals(expected, r); + } + + @Disabled("until PySequence.fastList accepts more types") + @Test + @DisplayName("', '.join(MySequence('hello'))") + void U_join_sequence() throws Throwable { + PyUnicode u = newPyUnicode(", "); + MySequence seq = new MySequence("hello"); + PyUnicode expected = newPyUnicode("h, e, l, l, o"); + Object r = u.join(seq); + assertEquals(expected, r); + } + + @Disabled("until PySequence.fastList accepts more types") + @Test + @DisplayName("', '.join(MyIterator('hello'))") + void U_join_iterator() throws Throwable { + PyUnicode u = newPyUnicode(", "); + MyIterator seq = new MyIterator("hello"); + PyUnicode expected = newPyUnicode("h, e, l, l, o"); + Object r = u.join(seq); + assertEquals(expected, r); + } + } + + /** Base of test classes that strip characters. */ + static class AbstractStripTest { + static final String SPACES = " \t\u0085 "; + static final String CHAFF = "ABBA"; + static final String CHARS = "ABC"; + static final PyUnicode UCHARS = newPyUnicode(CHARS); + } + + /** + * Test that {@code lstrip} works on a range of {@code str} + * implementations, values and iterables. + */ + @Nested + @DisplayName("lstrip") + class LStripTest extends AbstractStripTest { + + @ParameterizedTest(name = "\"{0}\".lstrip()") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("lstrip(String) (spaces)") + void S_lstrip(String text) throws Throwable { + String s = SPACES + text; + PyUnicode expected = newPyUnicode(text); + Object r = PyUnicode.lstrip(s, null); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".lstrip()") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("lstrip(PyUnicode) (spaces)") + void U_lstrip(String text) throws Throwable { + String s = SPACES + text; + PyUnicode u = newPyUnicode(s); + PyUnicode expected = newPyUnicode(text); + Object r = u.lstrip(null); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".lstrip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("lstrip(String, String)") + void S_lstrip_S(String text) throws Throwable { + String s = CHAFF + text; + PyUnicode expected = newPyUnicode(text); + Object r = PyUnicode.lstrip(s, CHARS); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".lstrip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("lstrip(PyUnicode, String)") + void U_lstrip_S(String text) throws Throwable { + String s = CHAFF + text; + PyUnicode u = newPyUnicode(s); + PyUnicode expected = newPyUnicode(text); + Object r = u.lstrip(CHARS); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".lstrip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("lstrip(String, PyUnicode)") + void S_lstrip_U(String text) throws Throwable { + String s = CHAFF + text; + PyUnicode expected = newPyUnicode(text); + Object r = PyUnicode.lstrip(s, UCHARS); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".lstrip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("lstrip(PyUnicode, PyUnicode)") + void U_lstrip_U(String text) throws Throwable { + String s = CHAFF + text; + PyUnicode u = newPyUnicode(s); + PyUnicode expected = newPyUnicode(text); + Object r = u.lstrip(UCHARS); + assertEquals(expected, r); + } + } + + /** + * Test that {@code rstrip} works on a range of {@code str} + * implementations, values and iterables. + */ + @Nested + @DisplayName("rstrip") + class RStripTest extends AbstractStripTest { + + @ParameterizedTest(name = "\"{0}\".rstrip()") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("rstrip(String) (spaces)") + void S_rstrip(String text) throws Throwable { + String s = text + SPACES; + PyUnicode expected = newPyUnicode(text); + Object r = PyUnicode.rstrip(s, null); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".rstrip()") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("rstrip(PyUnicode) (spaces)") + void U_rstrip(String text) throws Throwable { + String s = text + SPACES; + PyUnicode u = newPyUnicode(s); + PyUnicode expected = newPyUnicode(text); + Object r = u.rstrip(null); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".rstrip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("rstrip(String, String)") + void S_rstrip_S(String text) throws Throwable { + String s = text + CHAFF; + PyUnicode expected = newPyUnicode(text); + Object r = PyUnicode.rstrip(s, CHARS); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".rstrip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("rstrip(PyUnicode, String)") + void U_rstrip_S(String text) throws Throwable { + String s = text + CHAFF; + PyUnicode u = newPyUnicode(s); + PyUnicode expected = newPyUnicode(text); + Object r = u.rstrip(CHARS); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".rstrip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("rstrip(String, PyUnicode)") + void S_rstrip_U(String text) throws Throwable { + String s = text + CHAFF; + PyUnicode expected = newPyUnicode(text); + Object r = PyUnicode.rstrip(s, UCHARS); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".rstrip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("rstrip(PyUnicode, PyUnicode)") + void U_rstrip_U(String text) throws Throwable { + String s = text + CHAFF; + PyUnicode u = newPyUnicode(s); + PyUnicode expected = newPyUnicode(text); + Object r = u.rstrip(UCHARS); + assertEquals(expected, r); + } + } + + /** + * Test that {@code strip} works on a range of {@code str} + * implementations, values and iterables. + */ + @Nested + @DisplayName("strip") + class StripTest extends AbstractStripTest { + + @ParameterizedTest(name = "\"{0}\".strip()") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("strip(String) (spaces)") + void S_strip(String text) throws Throwable { + String s = SPACES + text + SPACES; + PyUnicode expected = newPyUnicode(text); + Object r = PyUnicode.strip(s, null); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".strip()") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("strip(PyUnicode) (spaces)") + void U_strip(String text) throws Throwable { + String s = SPACES + text + SPACES; + PyUnicode u = newPyUnicode(s); + PyUnicode expected = newPyUnicode(text); + Object r = u.strip(null); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".strip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("strip(String, String)") + void S_strip_S(String text) throws Throwable { + String s = CHAFF + text + CHAFF; + PyUnicode expected = newPyUnicode(text); + Object r = PyUnicode.strip(s, CHARS); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".strip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("strip(PyUnicode, String)") + void U_strip_S(String text) throws Throwable { + String s = CHAFF + text + CHAFF; + PyUnicode u = newPyUnicode(s); + PyUnicode expected = newPyUnicode(text); + Object r = u.strip(CHARS); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".strip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("strip(String, PyUnicode)") + void S_strip_U(String text) throws Throwable { + String s = CHAFF + text + CHAFF; + PyUnicode expected = newPyUnicode(text); + Object r = PyUnicode.strip(s, UCHARS); + assertEquals(expected, r); + } + + @ParameterizedTest(name = "\"{0}\".strip(chars)") + @ValueSource(strings = {"hello", "", "a b"}) + @DisplayName("strip(PyUnicode, PyUnicode)") + void U_strip_U(String text) throws Throwable { + String s = CHAFF + text + CHAFF; + PyUnicode u = newPyUnicode(s); + PyUnicode expected = newPyUnicode(text); + Object r = u.strip(UCHARS); + assertEquals(expected, r); + } + } + + /** + * This isn't a test of our implementation at all. It simply + * validates our understanding of how Java treats lone surrogates + * and SMP characters. + */ + @SuppressWarnings("static-method") + @Test + @DisplayName("Java UTF-16 behaves as expected") + void javaUTF16Expectations() { + + // This is a valid string in Java and Python ------------------ + + String snake = "🐍"; + + // "🐍" (one character in Python) is two chars to Java. + assertEquals(2, snake.length()); + char head = snake.charAt(0); + char tail = snake.charAt(1); + assert Character.isHighSurrogate(head); + assert Character.isLowSurrogate(tail); + + // But it is still one code point, even to Java. + assertEquals(1, snake.codePointCount(0, snake.length())); + assertEquals(1L, snake.codePoints().count()); + + // We can encode "🐍" in UTF-16 + Charset u16 = Charset.forName("UTF-16LE"); + ByteBuffer snakeBB = u16.encode(snake); + assertEquals(4, snakeBB.remaining()); + + // A round-trip is successful + CharBuffer snakeCB = u16.decode(snakeBB); + assertEquals(snake, snakeCB.toString()); + + // Java (and Python) will tolerate lone surrogates ------------ + + String loners = new String(new char[] {head, head, tail, tail, head}); + + // But in Java, surrogates may make one character + assertEquals("\ud83d🐍\udc0d\ud83d", loners); + + // Encoding lone surrogates in UTF-16 does not throw + ByteBuffer lonersBB = u16.encode(loners); + assertEquals(10, lonersBB.remaining()); + + // But the round-trip loses information + CharBuffer lonersCB = u16.decode(lonersBB); + assertNotEquals(loners, lonersCB.toString()); + + // A lone surrogate becomes a UNICODE REPLACEMENT + final char REPLACEMENT = '\ufffd'; + assertEquals(REPLACEMENT, lonersCB.get()); + assertEquals(head, lonersCB.get()); + assertEquals(tail, lonersCB.get()); + assertEquals(REPLACEMENT, lonersCB.get()); + assertEquals(REPLACEMENT, lonersCB.get()); + } + + // Support code -------------------------------------------------- + + /** + * Return a list of char indices on {@code s} at which the given + * {@code needle} may be found. Occurrences found are + * non-overlapping. This uses Java {@code String.indexOf} and will + * work correctly for BMP strings, but is unreliable where any SMP + * characters are involved. + * + * @param s string in question + * @param needle to search for + * @return char indices at which {@code needle} may be found + */ + static int[] findIndices(String s, String needle) { + LinkedList charIndices = new LinkedList<>(); + int n = Math.max(1, needle.length()), p = 0; + while (p <= s.length() && (p = s.indexOf(needle, p)) >= 0) { + charIndices.add(p); + p += n; + } + int[] a = new int[charIndices.size()]; + for (int i = 0; i < a.length; i++) { a[i] = charIndices.pop(); } + return a; + } + + /** + * Return a list of char indices on {@code s} at which the given + * {@code needle} may be found, working backwards from the end. + * Although generated by a reverse scan, the return array is in + * ascending order. This uses Java {@code String.indexOf} and will + * work correctly for BMP strings, but is unreliable where any SMP + * characters are involved. + * + * @param s string in question + * @param needle to search for + * @return char indices at which {@code needle} may be found + */ + static int[] rfindIndices(String s, String needle) { + LinkedList charIndices = new LinkedList<>(); + int n = needle.length(), p = s.length() - n; + while ((p = s.lastIndexOf(needle, p)) >= 0) { + charIndices.push(p); + p -= n; + } + int[] a = new int[charIndices.size()]; + for (Integer i = 0; i < a.length; i++) { a[i] = charIndices.pop(); } + return a; + } + + /** + * Return a list of char indices on {@code s} equivalent to the code + * point indices supplied. Indices in the supplied array must be in + * ascending order. + * + * @param s string in question + * @param cpIndices code point indices to convert + * @return equivalent char indices on s + */ + static int[] toCharIndices(String s, int[] cpIndices) { + final int M = cpIndices.length; + int[] charIndices = new int[M]; + int cpi = 0, p = 0, m = 0; + for (int cpIndex : cpIndices) { + // Advance p to char index of next cp index + while (cpi < cpIndex) { + int cp = s.codePointAt(p); + p += Character.isBmpCodePoint(cp) ? 1 : 2; + cpi++; + } + charIndices[m++] = p; + } + return charIndices; + } + + /** + * Convert an argument list or array of String to an array of + * {@link PyUnicode}. + * + * @param strings to convert + * @return as {@code PyUnicode} + */ + private static PyUnicode[] toPyUnicodeArray(String... strings) { + int n = strings.length; + PyUnicode[] r = new PyUnicode[n]; + for (int i = 0; i < n; i++) { r[i] = newPyUnicode(strings[i]); } + return r; + } + + /** + * Take the {@code M} occurrences of the {@code needle} in + * {@code s}, which are enumerated by ascending code point index in + * {@code cpIndex}, and return an array of {@code M+1} segments + * between the needles. We return {@link PyUnicode} so that + * comparisons have Python semantics. + * + * @param s the reference string + * @param needle identified (we use the length only) + * @param cpIndices code point indices in {@code s} + * @return the array of segments + */ + private static PyUnicode[] expectedSplit(String s, String needle, int[] cpIndices) { + int p = 0, i = 0, N = needle.length(); + int[] indices = toCharIndices(s, cpIndices); + // One more segment than there are splits + String[] segments = new String[indices.length + 1]; + for (int q : indices) { + // needle at q ends the current segment + segments[i++] = s.substring(p, q); + // next segment is after the needle + p = q + N; + } + // And the last segment is from after the last needle + segments[i] = s.substring(p); + return toPyUnicodeArray(segments); + } + + /** Simple English string for ad hoc tests. */ + static final String FOX = "The quick brown fox jumps over the lazy dog."; + + /** Non-ascii precomposed polytonic Greek characters. */ + static final String GREEK = "Ἐν ἀρχῇ ἦν ὁ λόγος, " // + + "καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, " // + + "καὶ θεὸς ἦν ὁ λόγος."; + + /** + * Define what characters are to be treated as a space according to + * Python 3. + */ + private static boolean isPythonSpace(char ch) { + // Use the Java built-in methods as far as possible + return Character.isWhitespace(ch) // ASCII spaces and some + // remaining Unicode spaces + || Character.isSpaceChar(ch) + // NEXT LINE (not a space in Java or Unicode) + || ch == 0x0085; + } + + /** + * Define what characters are to be treated as a line separator + * according to Python 3. + */ + private static boolean isPythonLineSeparator(char c) { + return c == '\n' || c == '\r' || c == 0xb || c == '\f' || c == 0x1c || c == 0x1d + || c == 0x1e || c == 0x85 || c == 0x2028 || c == 0x2029; + } +} diff --git a/core/src/test/java/org/python/core/SlotWrapperTestBase.java b/core/src/test/java/org/python/core/SlotWrapperTestBase.java new file mode 100644 index 000000000..fa5ddbcf4 --- /dev/null +++ b/core/src/test/java/org/python/core/SlotWrapperTestBase.java @@ -0,0 +1,593 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.python.core.UnitTestSupport.assertPythonType; + +import java.lang.invoke.MethodHandle; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.junit.jupiter.api.Test; +import org.python.core.Slot.Signature; + +/** + * + * Support for invoking slot wrapper descriptors in several forms, + * and for checking the results, as used in the nested tests of + * {@link UnarySlotWrapperTest}, {@link BinarySlotWrapperTest}, and + * others. As a base class for a test it creates and holds the + * descriptor object under test certain related information, and + * defines test methods that we repeat in each nested case. + */ +class SlotWrapperTestBase { + + /** Empty array of {@code Object}. */ + static final Object[] NOARGS = new Object[0]; + /** Empty array of {@code String}. */ + static final String[] NOKEYWORDS = new String[0]; + + // Working variables for the tests + /** Name of the special method under test. */ + String name; + /** Unbound slot wrapper to examine or call. */ + PyWrapperDescr descr; + /** The slot corresp9onding to the descriptor name. */ + Slot slot; + /** The type on which to invoke the special method. */ + PyType type; + + /** + * The slot wrapper should have field values that correctly reflect + * the signature and defining class. + */ + void has_expected_fields() { + assertEquals(name, descr.name); + assertTrue(type.isSubTypeOf(descr.objclass), "target is sub-type of defining class"); + // more ... + } + + /** + * Helper to set up each test. + * + * @param type under test + * @param name of the special method + * @param signature required of the slot method + * @throws IllegalArgumentException if not a slot method name of the + * required signature + * @throws AttributeError if method not found + * @throws Throwable other errors + */ + void setup(PyType type, String name, Signature signature) throws AttributeError, Throwable { + this.name = name; + this.type = type; + + this.slot = Slot.forMethodName(name); + if (slot == null) + throw new IllegalArgumentException(String.format("'%s' does not name a slot", name)); + else if (slot.signature != signature) + throw new IllegalArgumentException(String.format("'%s' is not %s", name, signature)); + + descr = (PyWrapperDescr)type.lookup(name); + if (descr == null) + throw Abstract.noAttributeOnType(type, name); + } + + /** + * Check a return value that is expected to be a Python {@code int}. + * + * @param exp value expected + * @param r return value to test + * @throws Throwable unexpectedly + */ + static void checkInt(Object exp, Object r) throws Throwable { + assertPythonType(PyLong.TYPE, r); + BigInteger e = PyLong.asBigInteger(exp); + BigInteger res = PyLong.asBigInteger(r); + assertEquals(e, res); + } + + /** + * Check a return value that is expected to be a Python + * {@code bool}. + * + * @param exp value expected + * @param r return value to test + * @throws Throwable unexpectedly + */ + static void checkBool(Object exp, Object r) throws Throwable { + assertPythonType(PyBool.TYPE, r); + BigInteger e = PyLong.asBigInteger(exp); + BigInteger res = PyLong.asBigInteger(r); + assertEquals(e, res); + } + + /** + * Check a return value that is expected to be a Python {@code str}. + * + * @param exp value expected + * @param r return value to test + * @throws Throwable unexpectedly + */ + static void checkStr(Object exp, Object r) throws Throwable { + assertPythonType(PyUnicode.TYPE, r); + assertEquals(exp.toString(), r.toString()); + } + + /** + * Check a return value that is expected to be a Python + * {@code float}. + * + * @param exp value expected + * @param r return value to test + * @throws Throwable unexpectedly + */ + static void checkFloat(Object exp, Object r) throws Throwable { + assertPythonType(PyFloat.TYPE, r); + double e = PyFloat.asDouble(exp); + double res = PyFloat.asDouble(r); + assertEquals(e, res); + } + + /** + * A class that implements the tests for one combination of slot + * wrapper and type. The class specialises its type to the Java + * return type {@code R} of the special method under test, and a + * Java super-type {@code S} of the {@code self} argument. For a + * Python type with just one implementation, {@code S} may be that + * implementation type. For a Python type with multiple + * implementations, {@code S} must be the common super-type, which + * is usually {@code Object}. + * + * @param the return type of the special method under test + * @param the common Java super-type of implementations + */ + abstract class BaseTest { + /** + * Check the result of a call, potentially failing the test. Quite + * often this simply calls one of the base tests + * {@link #checkInt(Object, Object)}, etc.. + * + * @param exp value expected + * @param r return value to test + * @throws Throwable unexpectedly + */ + abstract void check(R exp, Object r) throws Throwable; + + /** + * The slot wrapper should have field values that correctly reflect + * the signature and defining class. + */ + @Test + void has_expected_fields() { + // Implement using the enclosing instance + SlotWrapperTestBase.this.has_expected_fields(); + } + + /** + * Call the slot wrapper using the {@code __call__} special method, + * unbound, with arguments correct for the slot's specification. The + * called method should obtain the correct result (and not throw). + * + * @throws Throwable unexpectedly + */ + abstract void supports_call() throws Throwable; + + /** + * Call the slot wrapper using the {@code __call__} special method, + * bound, with arguments correct for the slot's specification. The + * called method should obtain the correct result (and not throw). + * + * @throws Throwable unexpectedly + */ + abstract void supports_bound_call() throws Throwable; + + /** + * Call the slot wrapper using the Java call interface with + * arguments correct for the slot's specification. The function + * should obtain the correct result (and not throw). + * + * @throws Throwable unexpectedly + */ + abstract void supports_java_call() throws Throwable; + + /** + * Call the wrapped operation through the {@link Operations} object + * for the implementation type, using invokeExact and arguments + * correct for the slot's specification. The function should obtain + * the correct result (and not throw). (Unlike CPython, the + * "wrapper" is not a wrapper on the slot, on the implementing + * method for which op_NAME becomes the cache.) + * + * @throws Throwable unexpectedly + */ + abstract void fills_op_slot() throws Throwable; + } + + /** + * A class that implements the tests for one combination of a + * {@link Signature#UNARY} slot wrapper and type, extending + * {@link BaseTest}. + */ + abstract class UnaryTest extends BaseTest { + + /** + * A list of arguments to which the special method under test will + * be applied. + */ + private List cases; + + /** + * Compute the expected result of a call + * + * @param x argument to the call under test + * @return expected return from call under test + */ + abstract R expected(S x); + + /** + * Check the result of a call, potentially failing the test. Quite + * often this simply calls one of the base tests + * {@link #checkInt(Object, Object)}, etc.. + * + * @param exp value expected + * @param r return value to test + * @throws Throwable unexpectedly + */ + @Override + abstract void check(R exp, Object r) throws Throwable; + + /** + * Helper to set up each test specifying the slot signature. + * + * @param type under test + * @param name of the special method + * @param signature required of the slot method + * @param cases list of values to use as self + * @throws IllegalArgumentException if not a slot method name of the + * required signature + * @throws AttributeError if method not found + * @throws Throwable other errors + */ + void setup(PyType type, String name, Signature signature, List cases) + throws IllegalArgumentException, AttributeError, Throwable { + SlotWrapperTestBase.this.setup(type, name, signature); + this.cases = cases; + } + + /** + * Helper to set up each test. + * + * @param type under test + * @param name of the special method + * @param signature required of the slot method + * @param cases list of values to use as self + * @throws IllegalArgumentException if not a unary slot method name + * @throws AttributeError if method not found + * @throws Throwable other errors + */ + void setup(PyType type, String name, List cases) + throws IllegalArgumentException, AttributeError, Throwable { + setup(type, name, Signature.UNARY, cases); + } + + @Override + @Test + void supports_call() throws Throwable { + for (S x : cases) { + R exp = expected(x); + check(exp, makeCall(x)); + } + } + + @Override + @Test + void supports_bound_call() throws Throwable { + for (S x : cases) { + R exp = expected(x); + check(exp, makeBoundCall(x)); + } + } + + @Override + @Test + void supports_java_call() throws Throwable { + for (S x : cases) { + R exp = expected(x); + check(exp, makeJavaCall(x)); + } + } + + @Override + @Test + void fills_op_slot() throws Throwable { + for (S x : cases) { + R exp = expected(x); + check(exp, makeHandleCall(x)); + } + } + + List getCases() { return Collections.unmodifiableList(cases); } + + /** + * Make a single invocation of {@link #descr} with {@code null} + * keywords argument. + * + * @param x argument on which to invoke (it's unary) + * @return result of call + * @throws Throwable unexpectedly + */ + Object makeCall(Object x) throws Throwable { + return descr.__call__(new Object[] {x}, null); + } + + /** + * Make a single invocation of {@link #descr} with empty keywords + * argument. + * + * @param x argument on which to invoke (it's unary) + * @return result of call + * @throws Throwable unexpectedly + */ + Object makeCallKW(Object x) throws Throwable { + return descr.__call__(new Object[] {x}, NOKEYWORDS); + } + + /** + * Make a single invocation of {@link #descr} having bound it to the + * argument. + * + * @param x argument on which to invoke (it's unary) + * @return result of call + * @throws Throwable unexpectedly + */ + Object makeBoundCall(Object x) throws Throwable { + PyMethodWrapper meth = (PyMethodWrapper)descr.__get__(x, null); + return meth.__call__(NOARGS, null); + } + + /** + * Make a single invocation of {@link #descr} having bound it to the + * argument. + * + * @param x argument on which to invoke (it's unary) + * @return result of call + * @throws Throwable unexpectedly + */ + Object makeBoundCallKW(Object x) throws Throwable { + PyMethodWrapper meth = (PyMethodWrapper)descr.__get__(x, null); + return meth.__call__(NOARGS, NOKEYWORDS); + } + + /** + * Make a single invocation of {@link #descr} as a Java call. + * + * @param x argument on which to invoke (it's unary) + * @return result of call + * @throws Throwable unexpectedly + */ + Object makeJavaCall(Object x) throws Throwable { return descr.call(x); } + + /** + * Make a single invocation of the slot as a method handle. + * + * @param x argument on which to invoke (it's unary) + * @return result of call + * @throws Throwable unexpectedly + */ + Object makeHandleCall(Object x) throws Throwable { + Operations ops = Operations.of(x); + MethodHandle mh = slot.getSlot(ops); + return mh.invokeExact(x); + } + } + + /** + * A class that implements the tests for one combination of a + * {@link Signature#LEN} slot wrapper and type, extending + * {@link BaseTest}. + */ + abstract class LenTest extends UnaryTest { + @Override + void setup(PyType type, String name, List cases) + throws IllegalArgumentException, AttributeError, Throwable { + setup(type, name, Signature.LEN, cases); + } + + @Override + Object makeHandleCall(Object x) throws Throwable { + Operations ops = Operations.of(x); + MethodHandle mh = slot.getSlot(ops); + return (int)mh.invokeExact(x); + } + } + + /** + * A class that implements the tests for one combination of a + * {@link Signature#BINARY} slot wrapper and type, extending + * {@link BaseTest}. + */ + abstract class BinaryTest extends BaseTest { + + /** Holds a pair of arguments for a binary call. */ + class Args { + S s; + Object o; + + Args(S s, Object o) { + this.s = s; + this.o = o; + } + + } + + /** + * A list of arguments to which the special method under test will + * be applied. + */ + private List cases; + + /** + * Compute the expected result of a call + * + * @param s self argument on which to invoke + * @param o other argument on which to invoke + * @return expected return from call under test + */ + abstract R expected(S s, Object o); + + /** + * Check the result of a call, potentially failing the test. Quite + * often this simply calls one of the base tests + * {@link #checkInt(Object, Object)}, etc.. + * + * @param exp value expected + * @param r return value to test + * @throws Throwable unexpectedly + */ + @Override + abstract void check(R exp, Object r) throws Throwable; + + /** + * Helper to set up each test. The schedule of tests will be all + * pairs of values that may be formed from the two lists. + * + * @param type under test + * @param name of the special method + * @param sList list of values to use as self + * @param oList list of values to use as other argument + * @throws AttributeError if method not found + * @throws Throwable other errors + */ + void setup(PyType type, String name, List sList, List oList) + throws AttributeError, Throwable { + SlotWrapperTestBase.this.setup(type, name, Signature.BINARY); + cases = new ArrayList<>(); + for (S s : sList) { + for (Object o : oList) { + addCase(s, o); + } + } + } + + @Override + @Test + void supports_call() throws Throwable { + for (Args args : cases) { + R exp = expected(args.s, args.o); + check(exp, makeCall(args.s, args.o)); + } + } + + @Override + @Test + void supports_bound_call() throws Throwable { + for (Args args : cases) { + R exp = expected(args.s, args.o); + check(exp, makeBoundCall(args.s, args.o)); + } + } + + @Override + @Test + void supports_java_call() throws Throwable { + for (Args args : cases) { + R exp = expected(args.s, args.o); + check(exp, makeJavaCall(args.s, args.o)); + } + } + + @Override + @Test + void fills_op_slot() throws Throwable { + for (Args args : cases) { + R exp = expected(args.s, args.o); + check(exp, makeHandleCall(args.s, args.o)); + } + } + + List getCases() { return Collections.unmodifiableList(cases); } + + void addCase(S s, Object o) { cases.add(new Args(s, o)); } + + /** + * Make a single invocation of {@link #descr} with {@code null} + * keywords argument. + * + * @param s self argument on which to invoke + * @param o other argument on which to invoke + * @throws Throwable unexpectedly + */ + Object makeCall(Object s, Object o) throws Throwable { + return descr.__call__(new Object[] {s, o}, null); + } + + /** + * Make a single invocation of {@link #descr} with empty keywords + * argument. + * + * @param s self argument on which to invoke + * @param o other argument on which to invoke + * @throws Throwable unexpectedly + */ + Object makeCallKW(Object s, Object o) throws Throwable { + return descr.__call__(new Object[] {s, o}, NOKEYWORDS); + } + + /** + * Make a single invocation of {@link #descr} having bound it to the + * argument. + * + * @param s self argument on which to invoke + * @param o other argument on which to invoke + * @return result of call + * @throws Throwable unexpectedly + */ + Object makeBoundCall(Object s, Object o) throws Throwable { + PyMethodWrapper meth = (PyMethodWrapper)descr.__get__(s, null); + return meth.__call__(new Object[] {o}, null); + } + + /** + * Make a single invocation of {@link #descr} having bound it to the + * argument. + * + * @param s self argument on which to invoke + * @param o other argument on which to invoke + * @return result of call + * @throws Throwable unexpectedly + */ + Object makeBoundCallKW(Object s, Object o) throws Throwable { + PyMethodWrapper meth = (PyMethodWrapper)descr.__get__(s, null); + return meth.__call__(new Object[] {o}, NOKEYWORDS); + } + + /** + * Make a single invocation of {@link #descr} as a Java call. + * + * @param s self argument on which to invoke + * @param o other argument on which to invoke + * @return result of call + * @throws Throwable unexpectedly + */ + Object makeJavaCall(Object s, Object o) throws Throwable { return descr.call(s, o); } + + /** + * Make a single invocation of the slot as a method handle. + * + * @param s self argument on which to invoke + * @param o other argument on which to invoke + * @return result of call + * @throws Throwable unexpectedly + */ + Object makeHandleCall(Object s, Object o) throws Throwable { + Operations ops = Operations.of(s); + MethodHandle mh = slot.getSlot(ops); + return mh.invokeExact(s, o); + } + } +} diff --git a/core/src/test/java/org/python/core/TypeExposerGetSetTest.java b/core/src/test/java/org/python/core/TypeExposerGetSetTest.java new file mode 100644 index 000000000..b5c4506ad --- /dev/null +++ b/core/src/test/java/org/python/core/TypeExposerGetSetTest.java @@ -0,0 +1,1391 @@ +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.lang.invoke.MethodHandles; +import java.lang.reflect.Array; +import java.math.BigInteger; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.function.Function; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.python.core.Exposed.Deleter; +import org.python.core.Exposed.DocString; +import org.python.core.Exposed.FrozenArray; +import org.python.core.Exposed.Getter; +import org.python.core.Exposed.Setter; + +/** + * Test that get-set attributes exposed by a Python type + * defined in Java, that is, using methods annotated with + * {@link Exposed.Getter}, {@link Exposed.Setter} and + * {@link Exposed.Deleter}, result in data descriptors with + * characteristics that correspond to the definitions. + *

+ * This gets a bit complicated, but if it works, should cover + * anything we want to do in real life. The test object is a Python + * type {@code ObjectWithGetSets} defined by the inner Java class + * {@link ObjectWithGetSets}. This definition adopts a second Java + * class {@link AdoptedWithGetSets}, so that instances of either + * Java type are accepted as Python objects of type + * {@code ObjectWithGetSets}. + *

+ * For simplicity, in the test both implementations get most of + * their definition by inheritance from a common base class + * {@link BaseGetSets}. Note that implementations of get-set methods + * operating on the state of an {@code AdoptedWithGetSets}, have to + * reside in the defining class {@code ObjectWithGetSets} or the + * base. + *

+ * There is a nested test suite for each pattern of characteristics. + * For test purposes, we mostly mimic the behaviour of identified + * types of member attribute. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Patterns of get-set behaviour
getsetdeleteget after delete
readonlyyesAttributeErrorAttributeErrorn/a
settableyesyesTypeErrorn/a
optionalyesyesremovesAttributeError
not optionalyesyessets defaultgets default
+ */ +@DisplayName("For an attribute exposed by a type") +class TypeExposerGetSetTest extends UnitTestSupport { + + static final String[] GUMBYS = {"Prof L.R.Gumby", "Prof Enid Gumby", "D.P.Gumby"}; + static final String[] TWITS = {"Vivian Smith-Smythe-Smith", "Simon Zinc-Trumpet-Harris", + "Nigel Incubator-Jones", "Gervaise Brook-Hampster", "Oliver St. John-Mollusc"}; + + /** + * Java base class of a Python type definition. We use this class to + * prepare two classes that jointly define the get-set attributes of + * a type {@code ObjectWithGetSets}. + *

+ * As well as giving us less to type, using a base allows us to show + * that some of the get-set attribute definitions explored in the + * tests can be Java-inherited. + */ + private static abstract class BaseGetSets { + + /** The actual Python type */ + protected PyType type; + + /** Primitive double attribute (not optional). */ + double x; + + @DocString("My test x") + @Getter + Object x() { return x; } + + @Setter + void x(Object v) throws TypeError, Throwable { x = PyFloat.asDouble(v); } + + @Deleter("x") + void _x() { x = Double.NaN; } + + /** + * Optional {@code String} attribute that can be properly deleted + * without popping up as {@code None}. + */ + String s; + + @Getter + Object s() { return errorIfNull(s, (o) -> Abstract.noAttributeError(this, "s")); } + + @Setter + void s(Object v) { s = PyUnicode.asString(v); } + + @Deleter("s") + void _s() { + errorIfNull(s, (o) -> Abstract.noAttributeError(this, "s")); + s = null; + } + + /** + * String with change of name. Deletion leads to a distinctive + * value. + */ + String t; + + @Getter("text") + Object t() { return t; } + + @Setter("text") + void t(Object v) { t = PyUnicode.asString(v); } + + @Deleter("text") + void _t() { t = ""; } + + /** + * Read-only double attribute. {@code DocString} after + * {@code Getter} + */ + final double x2; + + @Getter + @DocString("Another x") + Object x2() { return x2; } + + /** Read-only {@code String} attribute. */ + String t2; + + @Getter("text2") + Object t2() { return t2; } + + /** + * Strongly-typed primitive ({@code double}) array internally, but + * {@code tuple} to Python. + */ + @FrozenArray + double[] doubleArray; + + @Getter + Object doubles() { + PyTuple.Builder tb = new PyTuple.Builder(doubleArray.length); + for (double d : doubleArray) { tb.append(d); } + return tb.take(); + } + + @Setter + void doubles(Object v) throws Throwable { doubleArray = doubleFromTuple(v); } + + @Deleter("doubles") + void _doubles() { doubleArray = new double[0]; } + + /** + * Strongly-typed {@code String} array internally, but {@code tuple} + * to Python or {@code None} when deleted. + */ + @FrozenArray + String[] nameArray; + + @Getter + Object names() { return new PyTuple(nameArray); } + + @Setter + void names(Object v) { nameArray = fromTuple(v, String[].class); } + + @Deleter("names") + void _names() { nameArray = new String[0]; } + + /** + * Create new array value for {@link #nameArray}. + * + * @param v new value + */ + void setNameArray(String[] v) { nameArray = Arrays.copyOf(v, v.length); } + + /** + * Create new array value for {@link #doubleArray}. + * + * @param v new value + */ + void setDoubleArray(double[] v) { doubleArray = Arrays.copyOf(v, v.length); } + + /** + * {@code Object} get-set attribute, acting as a non-optional + * member. That is {@code null} represents deleted and appears as + * {@code None} externally. + */ + Object obj; + + @Getter + Object obj() { return defaultIfNull(obj, Py.None); } + + @Setter + void obj(Object v) { obj = v; } + + @Deleter("obj") + void _obj() { obj = null; } + + /* + * Attribute with tuple value will be implemented at Java level + * through this abstract interface. + */ + /** @return notional field {@code tup}. */ + abstract PyTuple getTup(); + + /** + * Strongly-typed {@code PyTuple} attribute with default value + * {@code None}. The attribute is defined through a pair of abstract + * methods {@link #getTup()} and {@link #setTup(PyTuple)}. This + * allows us to have quite different implementations in the two + * subclasses while defining the get-set methods in the base. + */ + @Getter + Object tup() { return defaultIfNull(getTup(), Py.None); } + + /* + * Notice the strongly typed argument to the Setter. This makes + * checks in the method body unnecessary. PyGetSetDescr.__set__ will + * check the supplied value and report a mismatch in terms of Python + * types. + * + */ + @Setter + void tup(PyTuple v) { setTup(v); } + + @Deleter("tup") + void _tup() { setTup(null); } + + /** + * Assign or delete notional field {@code tup}. + * + * @param tup new value ({@code null} for delete) + */ + abstract void setTup(PyTuple tup); + + BaseGetSets(PyType type, double value) { + this.type = type; + x2 = x = value; + doubleArray = new double[] {1, x, x * x, x * x * x}; + nameArray = TWITS.clone(); + } + + /** + * Return a default value if {@code v} is {@code null}. + * + * @param type of {@code v} + * @param v to return if not {@code null} + * @param defaultValue to return if {@code v} is {@code null} + * @return {@code v} or {@code defaultValue} + */ + static T defaultIfNull(T v, T defaultValue) { return v != null ? v : defaultValue; } + + /** + * Throw an exception if {@code v} is {@code null}. + * + * @param type of {@code v} + * @param type of exception to throw + * @param v to return if not {@code null} + * @param exc supplier of exception to throw + * @return {@code v} + * @throws E if {@code v} is {@code null} + */ + static T errorIfNull(T v, Function exc) throws E { + if (v != null) { return v; } + throw exc.apply(v); + } + } + + /** + * A Python type definition that exhibits a range of get-set + * attribute definitions explored in the tests. + */ + private static class ObjectWithGetSets extends BaseGetSets { + + static PyType TYPE = + PyType.fromSpec(new PyType.Spec("ObjectWithGetSets", MethodHandles.lookup()) + .adopt(AdoptedWithGetSets.class)); + + /** Primitive integer attribute (not optional). */ + int i; + + @Getter + Integer i() { return i; } + + @Getter + static BigInteger i(AdoptedWithGetSets self) { return self.i; } + + @Setter + void i(Object v) { i = PyLong.asInt(v); } + + @Setter + static void i(AdoptedWithGetSets self, Object v) { self.i = PyLong.asBigInteger(v); } + + /** Read-only access. */ + int i2; + + @Getter + Object i2() { return i2; } + + @Getter + static BigInteger i2(AdoptedWithGetSets self) { return self.i2; } + + /** + * Strongly-typed {@code PyTuple} attribute with default value + * {@code None}. + */ + PyTuple tup; + + ObjectWithGetSets(PyType type, double value) { + super(type, value); + i2 = i = BigInteger.valueOf(Math.round(value)).intValueExact(); + obj = i; + t2 = t = s = String.format("%d", i); + tup = new PyTuple(i, x, t); + } + + ObjectWithGetSets(double value) { this(TYPE, value); } + + @Override + PyTuple getTup() { return tup; } + + @Override + void setTup(PyTuple tup) { this.tup = tup; } + } + + /** + * A class that represents an adopted implementation of the + * Python class {@code ObjectWithGetSets} defined above. Attribute + * access methods implemented in the common base class + * {@link BaseGetSets} also apply to this class. This class and the + * canonical class {@link ObjectWithGetSets} implement certain + * attributes each in their own way. The attribute access methods + * for this class are implemented as {@code static} methods in the + * canonical {@code ObjectWithGetSets}. + */ + private static class AdoptedWithGetSets extends BaseGetSets { + // FIXME not used in tests + + /** Primitive integer attribute (not optional). */ + BigInteger i; + + /** Read-only access. */ + BigInteger i2; + + /** + * Strongly-typed {@code PyTuple} attribute with default value + * {@code None}. + */ + Object[] aTuple; + + AdoptedWithGetSets(double value) { + super(ObjectWithGetSets.TYPE, value); + i2 = i = BigInteger.valueOf(Math.round(value)); + obj = i; + t2 = t = s = String.format("%d", i); + aTuple = new Object[] {i, x, t}; + } + + @Override + PyTuple getTup() { return aTuple == null ? null : PyTuple.from(aTuple); } + + @Override + void setTup(PyTuple tup) { this.aTuple = tup == null ? null : tup.toArray(); } + } + + /** + * Copy {@code tuple} elements to a new {@code T[]}, raising a + * {@link TypeError} if any element cannot be assigned to variable + * of type {@code T}. + */ + private static T[] fromTuple(Object tuple, Class arrayType) + throws TypeError { + // Loosely based on java.util.Arrays.copyOf + if (tuple instanceof PyTuple) { + PyTuple t = (PyTuple)tuple; + int n = t.size(); + @SuppressWarnings("unchecked") + T[] copy = (T[])Array.newInstance(arrayType.getComponentType(), n); + try { + System.arraycopy(t.value, 0, copy, 0, n); + } catch (ArrayStoreException ase) { + PyType dstType = PyType.of(arrayType.getComponentType()); + throw new TypeError("tuple of %s expected", dstType); + } + return copy; + } else { + throw new TypeError("tuple expected"); + } + } + + /** + * Copy tuple elements to a new {@code double[]}, converting them + * with {@link PyFloat#doubleValue(Object)}. + * + * @throws Throwable + */ + private static double[] doubleFromTuple(Object tuple) throws Throwable { + if (tuple instanceof PyTuple) { + PyTuple t = (PyTuple)tuple; + int n = t.size(); + Object[] value = t.value; + double[] copy = new double[n]; + for (int i = 0; i < n; i++) { copy[i] = PyFloat.asDouble(value[i]); } + return copy; + } else { + throw new TypeError("tuple expected"); + } + } + + /** + * Create a {@code tuple} in which the elements are equal to the + * values in a given array. + * + * @param a values + * @return tuple of those values + */ + private static PyTuple tupleFrom(double[] a) { + int n = a.length; + Double[] oa = new Double[n]; + for (int i = 0; i < n; i++) { oa[i] = a[i]; } + return new PyTuple(oa); + } + + /** + * Certain nested test classes implement these as standard. A base + * class here is just a way to describe the tests once that reappear + * in each nested case. + */ + abstract static class Base { + + // Working variables for the tests + /** Name of the attribute. */ + String name; + /** Documentation string. */ + String doc; + /** The value set by delete. */ + Object deleted; + /** Unbound descriptor by type access to examine or call. */ + PyGetSetDescr gsd; + /** The object on which to attempt access. */ + ObjectWithGetSets o; + /** + * An object of the adopted implementation on which to attempt the + * same kind of access (in case we are getting instances mixed up). + */ + AdoptedWithGetSets p; + + void setup(String name, String doc, Object deleted, double oValue, double pValue) + throws Throwable { + this.name = name; + this.doc = doc; + this.deleted = deleted; + try { + this.gsd = (PyGetSetDescr)ObjectWithGetSets.TYPE.lookup(name); + this.o = new ObjectWithGetSets(oValue); + this.p = new AdoptedWithGetSets(pValue); + } catch (ExceptionInInitializerError eie) { + // Errors detected by the Exposer get wrapped so: + Throwable t = eie.getCause(); + throw t == null ? eie : t; + } + } + + void setup(String name, String doc, double oValue, double pValue) throws Throwable { + setup(name, doc, null, oValue, pValue); + } + + void setup(String name, double oValue, double pValue) throws Throwable { + setup(name, null, null, oValue, pValue); + } + + /** + * The attribute is a get-set descriptor that correctly reflects the + * annotations in the defining class. + * + * @throws Throwable unexpectedly + */ + @Test + void descr_has_expected_fields() throws Throwable { + assertEquals(name, gsd.name); + assertEquals(doc, gsd.doc); + String s = String.format("", name); + assertEquals(s, gsd.toString()); + assertEquals(s, Abstract.repr(gsd)); + } + + /** + * The string (repr) describes the type and attribute. + * + * @throws Throwable unexpectedly + */ + void checkToString() throws Throwable { + String s = String.format("", name); + assertEquals(s, gsd.toString()); + assertEquals(s, Abstract.repr(gsd)); + } + + /** + * The get-set descriptor may be used to read the field in an + * instance of the object. + * + * @throws Throwable unexpectedly + */ + abstract void descr_get_works() throws Throwable; + + /** + * {@link Abstract#getAttr(Object, String)} may be used to read the + * field in an instance of the object. + * + * @throws Throwable unexpectedly + */ + abstract void abstract_getAttr_works() throws Throwable; + } + + /** + * Add tests of setting values to the base tests. + */ + abstract static class BaseSettable extends Base { + + /** + * The get-set descriptor may be used to set the field in an + * instance of the object. + * + * @throws Throwable unexpectedly + */ + abstract void descr_set_works() throws Throwable; + + /** + * {@link Abstract#setAttr(Object, String, Object)} may be used to + * set the field in an instance of the object. + * + * @throws Throwable unexpectedly + */ + abstract void abstract_setAttr_works() throws Throwable; + + /** + * The get-set attribute raises {@link TypeError} when supplied a + * value of unacceptable type. + */ + abstract void set_detects_TypeError(); + } + + /** + * Base test of settable attribute that may not be deleted. + */ + abstract static class BaseSettableIndelible extends BaseSettable { + + /** + * Attempting to delete the get-set attribute, where it has a setter + * but no deleter, from an instance of the object, through the + * get-set descriptor, raises {@link TypeError}. + */ + @Test + void rejects_descr_delete() { + assertThrows(TypeError.class, () -> gsd.__delete__(o)); + assertThrows(TypeError.class, () -> gsd.__set__(o, null)); + assertThrows(TypeError.class, () -> gsd.__delete__(p)); + assertThrows(TypeError.class, () -> gsd.__set__(p, null)); + } + + /** + * Attempting to delete the get-set attribute, where it has a setter + * but no deleter, from an instance of the object, through + * {@link Abstract#delAttr(Object, String)}, raises + * {@link TypeError}. + */ + @Test + void rejects_abstract_delAttr() { + assertThrows(TypeError.class, () -> Abstract.delAttr(o, name)); + assertThrows(TypeError.class, () -> Abstract.setAttr(o, name, null)); + assertThrows(TypeError.class, () -> Abstract.delAttr(p, name)); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, null)); + } + } + + /** + * Base test of an optional attribute. Instances will raise + * {@link AttributeError} on access after deletion. + */ + abstract static class BaseOptionalReference extends BaseSettable { + + /** + * The get-set descriptor may be used to delete a field from an + * instance of the object, causing it to disappear externally. + * + * @throws Throwable unexpectedly + */ + @Test + void descr_delete_removes() throws Throwable { + gsd.__delete__(o); + gsd.__delete__(p); + // After deletion, ... + // ... __get__ raises AttributeError + assertThrows(AttributeError.class, () -> gsd.__get__(o, null)); + assertThrows(AttributeError.class, () -> gsd.__get__(p, null)); + // ... __delete__ raises AttributeError + assertThrows(AttributeError.class, () -> gsd.__delete__(o)); + assertThrows(AttributeError.class, () -> gsd.__delete__(p)); + } + + /** + * {@link Abstract#delAttr(Object, String)} to delete a field from + * an instance of the object, causing it to disappear externally. + * + * @throws Throwable unexpectedly + */ + @Test + void abstract_delAttr_removes() throws Throwable { + Abstract.delAttr(o, name); + Abstract.delAttr(p, name); + // After deletion, ... + // ... getAttr and delAttr raise AttributeError + assertThrows(AttributeError.class, () -> Abstract.getAttr(o, name)); + assertThrows(AttributeError.class, () -> Abstract.getAttr(p, name)); + assertThrows(AttributeError.class, () -> Abstract.delAttr(o, name)); + assertThrows(AttributeError.class, () -> Abstract.delAttr(p, name)); + } + } + + /** + * Base test of settable attribute where deletion sets a particular + * value. + */ + abstract static class BaseSettableDefault extends BaseSettable { + + /** + * The get-set descriptor may be used to delete a field from an + * instance of the object, meaning whatever the {@code Deleter} + * chooses. For test purposes, we set a distinctive {@code deleted} + * value. + * + * @throws Throwable unexpectedly + */ + @Test + void descr_delete_sets_deleted() throws Throwable { + gsd.__delete__(o); + assertEquals(deleted, gsd.__get__(o, null)); + // __delete__ is idempotent + gsd.__delete__(o); + assertEquals(deleted, gsd.__get__(o, null)); + // And again for the adopted implementation + gsd.__delete__(p); + assertEquals(deleted, gsd.__get__(p, null)); + gsd.__delete__(p); + assertEquals(deleted, gsd.__get__(p, null)); + } + + /** + * {@link Abstract#delAttr(Object, String)} to delete a field from + * an instance of the object, meaning whatever the {@code deleter} + * chooses. For test purposes, we mimic the behaviour of an optional + * member: ({@code null} internally appears as {@code None} + * externally. + * + * @throws Throwable unexpectedly + */ + @Test + void abstract_delAttr_sets_deleted() throws Throwable { + Abstract.delAttr(o, name); + assertEquals(deleted, Abstract.getAttr(o, name)); + // delAttr is idempotent + Abstract.delAttr(o, name); + assertEquals(deleted, Abstract.getAttr(o, name)); + // And again for the adopted implementation + Abstract.delAttr(p, name); + assertEquals(deleted, Abstract.getAttr(p, name)); + Abstract.delAttr(p, name); + assertEquals(deleted, Abstract.getAttr(p, name)); + } + } + + @Nested + @DisplayName("implemented as an int") + class TestInt extends BaseSettableIndelible { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("i", 42, -1); } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals(42, gsd.__get__(o, null)); + assertPythonEquals(-1, gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(42, Abstract.getAttr(o, name)); + assertPythonEquals(-1, Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + gsd.__set__(o, 43); + gsd.__set__(p, BigInteger.valueOf(44)); + assertEquals(43, o.i); + assertPythonEquals(44, p.i); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + Abstract.setAttr(o, name, 43); + Abstract.setAttr(p, name, BigInteger.valueOf(44)); + assertEquals(43, o.i); + assertPythonEquals(44, p.i); + } + + @Override + @Test + void set_detects_TypeError() { + // Things that are not a Python int + assertThrows(TypeError.class, () -> gsd.__set__(o, "Gumby")); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, 1.0)); + assertThrows(TypeError.class, () -> gsd.__set__(o, Py.None)); + assertThrows(TypeError.class, () -> gsd.__set__(p, Py.None)); + } + } + + @Nested + @DisplayName("implemented as a double") + class TestDouble extends BaseSettable { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("x", "My test x", 42, -1); } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals(42.0, gsd.__get__(o, null)); + assertEquals(-1.0, gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(42.0, Abstract.getAttr(o, name)); + assertEquals(-1.0, Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + gsd.__set__(o, 1.125); + gsd.__set__(p, BigInteger.valueOf(111_222_333_444L)); + assertEquals(1.125, o.x); + assertEquals(111222333444.0, p.x); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + Abstract.setAttr(o, name, 1.125); + Abstract.setAttr(p, name, BigInteger.valueOf(111_222_333_444L)); + assertEquals(1.125, o.x); + assertEquals(111222333444.0, p.x); + } + + @Override + @Test + void set_detects_TypeError() { + // Things that are not a Python float + assertThrows(TypeError.class, () -> gsd.__set__(o, "Gumby")); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, "42")); + assertThrows(TypeError.class, () -> gsd.__set__(o, Py.None)); + assertThrows(TypeError.class, () -> gsd.__set__(p, Py.None)); + } + + /** + * The get-set descriptor may be used to delete a field from an + * instance of the object, meaning in this case, set it to + * {@code NaN}. + * + * @throws Throwable unexpectedly + */ + @Test + void descr_delete_sets_NaN() throws Throwable { + gsd.__delete__(o); + assertEquals(Double.NaN, gsd.__get__(o, null)); + // __delete__ is idempotent + gsd.__delete__(o); + assertEquals(Double.NaN, gsd.__get__(o, null)); + + // And again for the adopted implementation + gsd.__delete__(p); + assertEquals(Double.NaN, gsd.__get__(p, null)); + gsd.__delete__(p); + assertEquals(Double.NaN, gsd.__get__(p, null)); + } + + /** + * {@link Abstract#delAttr(Object, String)} to delete a field from + * an instance of the object, meaning in this case, set it to + * {@code NaN}. + * + * @throws Throwable unexpectedly + */ + @Test + void abstract_delAttr_sets_NaN() throws Throwable { + Abstract.delAttr(o, name); + assertEquals(Double.NaN, Abstract.getAttr(o, name)); + // delAttr is idempotent + Abstract.delAttr(o, name); + assertEquals(Double.NaN, Abstract.getAttr(o, name)); + // And again for the adopted implementation + Abstract.delAttr(p, name); + assertEquals(Double.NaN, Abstract.getAttr(p, name)); + Abstract.delAttr(p, name); + assertEquals(Double.NaN, Abstract.getAttr(p, name)); + } + } + + @Nested + @DisplayName("implemented as a String") + class TestString extends BaseSettableDefault { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("text", null, "", 42, -1); } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals("42", gsd.__get__(o, null)); + assertEquals("-1", gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals("42", Abstract.getAttr(o, name)); + assertEquals("-1", Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + gsd.__set__(o, "D.P."); + gsd.__set__(p, newPyUnicode("Gumby")); + assertEquals("D.P.", o.t); + assertEquals("Gumby", p.t); + // __set__ works after delete + gsd.__delete__(o); + assertEquals(deleted, o.t); + gsd.__set__(o, "Palin"); + assertEquals("Palin", o.t); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + Abstract.setAttr(o, name, "D.P."); + Abstract.setAttr(p, name, "Gumby"); + assertEquals("D.P.", o.t); + assertEquals("Gumby", p.t); + // setAttr works after delete + Abstract.delAttr(o, name); + assertEquals(deleted, o.t); + Abstract.setAttr(o, name, "Palin"); + assertEquals("Palin", o.t); + // And again for the adopted implementation + Abstract.delAttr(p, name); + assertEquals(deleted, p.t); + Abstract.setAttr(p, name, "Palin"); + assertEquals("Palin", p.t); + } + + @Override + @Test + void set_detects_TypeError() { + // Things that are not a Python str + assertThrows(TypeError.class, () -> gsd.__set__(o, 1)); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, 10.0)); + assertThrows(TypeError.class, () -> gsd.__set__(o, new Object())); + assertThrows(TypeError.class, () -> gsd.__set__(p, new Object())); + } + } + + @Nested + @DisplayName("implemented as an optional String") + class TestOptionalString extends BaseOptionalReference { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("s", 42, -1); } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals("42", gsd.__get__(o, null)); + assertEquals("-1", gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals("42", Abstract.getAttr(o, name)); + assertEquals("-1", Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + gsd.__set__(o, "D.P."); + gsd.__set__(p, "Gumby"); + assertEquals("D.P.", o.s); + assertEquals("Gumby", p.s); + // __set__ works after delete + gsd.__delete__(o); + assertNull(o.s); + gsd.__set__(o, "Palin"); + assertEquals("Palin", o.s); + // And again for the adopted implementation + gsd.__delete__(p); + assertNull(p.s); + gsd.__set__(p, "Palin"); + assertEquals("Palin", p.s); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + Abstract.setAttr(o, name, "D.P."); + Abstract.setAttr(p, name, newPyUnicode("Gumby")); + assertEquals("D.P.", o.s); + assertEquals("Gumby", p.s); + // setAttr works after delete + Abstract.delAttr(o, name); + assertNull(o.s); + Abstract.setAttr(o, name, "Palin"); + assertEquals("Palin", o.s); + // And again for the adopted implementation + Abstract.delAttr(p, name); + assertNull(p.s); + Abstract.setAttr(p, name, "Palin"); + assertEquals("Palin", p.s); + } + + @Override + @Test + void set_detects_TypeError() { + // Things that are not a Python str + assertThrows(TypeError.class, () -> gsd.__set__(o, 1)); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, 10.0)); + assertThrows(TypeError.class, () -> gsd.__set__(o, new Object())); + } + } + + @Nested + @DisplayName("implemented as an Object") + class TestObject extends BaseSettableDefault { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("obj", null, Py.None, 42, -1); } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals(42, gsd.__get__(o, null)); + assertPythonEquals(-1, gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(42, Abstract.getAttr(o, name)); + assertPythonEquals(-1, Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + final Object dp = "D.P.", gumby = newPyUnicode("Gumby"); + gsd.__set__(o, dp); + gsd.__set__(p, gumby); + // Should get the same object + assertSame(dp, o.obj); + assertSame(gumby, p.obj); + // __set__ works after delete + gsd.__delete__(o); + assertNull(o.obj); + final Object nonPython = new HashMap(); + gsd.__set__(o, nonPython); + assertSame(nonPython, o.obj); + // And again for the adopted implementation + gsd.__delete__(p); + assertNull(p.obj); + gsd.__set__(p, nonPython); + assertSame(nonPython, p.obj); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + final Object dp = "D.P.", gumby = newPyUnicode("Gumby"); + Abstract.setAttr(o, name, dp); + Abstract.setAttr(p, name, gumby); + // Should get the same object + assertSame(dp, o.obj); + assertSame(gumby, p.obj); + // setAttr works after delete + Abstract.delAttr(o, name); + assertNull(o.obj); + final Object palin = "Palin"; + Abstract.setAttr(o, name, palin); + assertSame(palin, o.obj); + } + + @Override + @Test + void set_detects_TypeError() { + // Everything is a Python object (no TypeError) + final float[] everything = {1, 2, 3}; + assertDoesNotThrow( + () -> { gsd.__set__(o, everything); Abstract.setAttr(p, name, System.err); }); + assertSame(everything, o.obj); + assertSame(System.err, p.obj); + } + } + + @Nested + @DisplayName("implemented as a PyTuple") + class TestTuple extends BaseSettableDefault { + + PyTuple oRef, pRef; + + @BeforeEach + void setup() throws AttributeError, Throwable { + setup("tup", null, Py.None, 42, -1); + oRef = new PyTuple(42, 42.0, "42"); + pRef = new PyTuple(-1, -1.0, "-1"); + } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals(oRef, gsd.__get__(o, null)); + assertEquals(pRef, gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(oRef, Abstract.getAttr(o, name)); + assertEquals(pRef, Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + final Object tup2 = new PyTuple(2, 3, 4); + gsd.__set__(o, tup2); + assertEquals(tup2, o.tup); + // __set__ works after delete + final Object[] tup3array = new Object[] {3, 4, 5}; + final Object tup3 = PyTuple.from(tup3array); + gsd.__delete__(o); + assertNull(o.tup); + gsd.__set__(o, tup3); + assertEquals(tup3, o.tup); + // And again for the adopted implementation + gsd.__delete__(p); + assertNull(p.aTuple); + gsd.__set__(p, tup3); + assertArrayEquals(tup3array, p.aTuple); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + final Object gumby = PyTuple.from(List.of("D", "P", "Gumby")); + Abstract.setAttr(o, name, gumby); + // Should get the same object + assertSame(gumby, o.tup); + // setAttr works after delete + final Object[] tup3array = new Object[] {3, 4, 5}; + final Object tup3 = PyTuple.from(tup3array); + Abstract.delAttr(o, name); + assertNull(o.tup); + Abstract.setAttr(o, name, tup3); + assertSame(tup3, o.tup); + // And again for the adopted implementation + Abstract.delAttr(p, name); + assertNull(p.aTuple); + Abstract.setAttr(p, name, tup3); + assertArrayEquals(tup3array, p.aTuple); + } + + @Override + @Test + void set_detects_TypeError() { + // Things that are not a Python tuple + assertThrows(TypeError.class, () -> gsd.__set__(o, 1)); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, "")); + assertThrows(TypeError.class, () -> gsd.__set__(o, new Object())); + assertThrows(TypeError.class, () -> gsd.__set__(p, new Object())); + } + } + + @Nested + @DisplayName("providing a double array (as a tuple)") + class TestDoubleArray extends BaseSettable { + + PyTuple oval, pval, ival, rval; + double[] ref; + + @BeforeEach + void setup() throws AttributeError, Throwable { + setup("doubles", 42, -1); + oval = Py.tuple(1., 42., 1764., 74088.); + pval = Py.tuple(1., -1., 1., -1.); + ival = Py.tuple(3, 14, 15, 926); + ref = new double[] {3.0, 14.0, 15.0, 926.0}; + rval = tupleFrom(ref); + } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals(oval, gsd.__get__(o, null)); + p.setDoubleArray(ref); + assertEquals(rval, gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(oval, Abstract.getAttr(o, name)); + p.setDoubleArray(ref); + assertEquals(rval, Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + gsd.__set__(o, ival); + gsd.__set__(p, pval); + assertArrayEquals(ref, o.doubleArray); + // __set__ works after delete + gsd.__delete__(o); + assertEquals(0, o.doubleArray.length); + gsd.__set__(o, ival); + assertArrayEquals(ref, o.doubleArray); + } + + @Override + void abstract_setAttr_works() throws Throwable { + Abstract.setAttr(o, name, ival); + assertArrayEquals(ref, o.doubleArray); + // __set__ works after delete + Abstract.delAttr(o, name); + assertEquals(0, o.doubleArray.length); + Abstract.setAttr(o, name, ival); + assertArrayEquals(ref, o.doubleArray); + } + + @Override + @Test + void set_detects_TypeError() { + // Things that are not a Python tuple + assertThrows(TypeError.class, () -> gsd.__set__(o, 2.0)); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, Py.None)); + assertThrows(TypeError.class, () -> gsd.__set__(o, new double[] {1, 2, 3})); + } + } + + @Nested + @DisplayName("providing a string array (as a tuple)") + class TestStringArray extends BaseSettable { + + PyTuple twits, gumbys, rval, sval; + + @BeforeEach + void setup() throws AttributeError, Throwable { + setup("names", 42, -1); + twits = PyTuple.from(TWITS); + gumbys = PyTuple.from(GUMBYS); + } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals(twits, gsd.__get__(o, null)); + p.setNameArray(GUMBYS); + assertEquals(gumbys, gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(twits, Abstract.getAttr(o, name)); + p.setNameArray(GUMBYS); + assertEquals(gumbys, Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + gsd.__set__(o, gumbys); + assertArrayEquals(GUMBYS, o.nameArray); + // __set__ works after delete + gsd.__delete__(o); + assertEquals(0, o.nameArray.length); + gsd.__set__(o, twits); + assertArrayEquals(TWITS, o.nameArray); + } + + @Override + void abstract_setAttr_works() throws Throwable { + Abstract.setAttr(o, name, gumbys); + assertArrayEquals(GUMBYS, o.nameArray); + // __set__ works after delete + Abstract.delAttr(o, name); + assertEquals(0, o.nameArray.length); + Abstract.setAttr(o, name, twits); + assertArrayEquals(TWITS, o.nameArray); + } + + @Override + @Test + void set_detects_TypeError() { + // Things that are not a Python tuple + assertThrows(TypeError.class, () -> gsd.__set__(o, "")); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, Py.None)); + assertThrows(TypeError.class, () -> gsd.__set__(o, new String[] {})); + } + } + + /** + * Base test of read-only attribute tests. + */ + abstract static class BaseReadonly extends Base { + + /** + * Raises {@link AttributeError} when the get-set descriptor is + * asked to set the field in an instance of the object, even if the + * type is correct. + */ + @Test + void rejects_descr_set() { + assertThrows(AttributeError.class, () -> gsd.__set__(o, 1234)); + assertThrows(AttributeError.class, () -> gsd.__set__(p, 1.0)); + assertThrows(AttributeError.class, () -> gsd.__set__(o, "Gumby")); + assertThrows(AttributeError.class, () -> gsd.__set__(p, Py.None)); + } + + /** + * Raises {@link AttributeError} when + * {@link Abstract#setAttr(Object, String, Object)} tries to set the + * field in an instance of the object, even if the type is correct. + */ + @Test + void rejects_abstract_setAttr() { + assertThrows(AttributeError.class, () -> Abstract.setAttr(o, name, 1234)); + assertThrows(AttributeError.class, () -> Abstract.setAttr(p, name, 1.0)); + assertThrows(AttributeError.class, () -> Abstract.setAttr(o, name, "Gumby")); + assertThrows(AttributeError.class, () -> Abstract.setAttr(p, name, Py.None)); + } + + /** + * Attempting to delete a get-set attribute, where it has no setter + * or deleter (is read-only), from an instance of the object, + * through the get-set descriptor, raises {@link AttributeError}. + */ + @Test + void rejects_descr_delete() { + assertThrows(AttributeError.class, () -> gsd.__delete__(o)); + assertThrows(AttributeError.class, () -> gsd.__set__(o, null)); + } + + /** + * Attempting to delete a get-set attribute, where it has no setter + * or deleter (is read-only), from an instance of the object, + * through {@link Abstract#delAttr(Object, String)}, raises + * {@link AttributeError}. + */ + @Test + void rejects_abstract_delAttr() { + assertThrows(AttributeError.class, () -> Abstract.delAttr(o, name)); + } + } + + @Nested + @DisplayName("implemented as a read-only int") + class TestIntRO extends BaseReadonly { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("i2", 42, -1); } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals(42, gsd.__get__(o, null)); + assertPythonEquals(-1, gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(42, Abstract.getAttr(o, name)); + assertPythonEquals(-1, Abstract.getAttr(p, name)); + } + } + + @Nested + @DisplayName("implemented as a final double") + class TestDoubleRO extends BaseReadonly { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("x2", "Another x", 42, -1); } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals(42.0, gsd.__get__(o, null)); + assertEquals(-1.0, gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(42.0, Abstract.getAttr(o, name)); + assertEquals(-1.0, Abstract.getAttr(p, name)); + } + } + + @Nested + @DisplayName("implemented as a read-only String") + class TestStringRO extends BaseReadonly { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("text2", 42, -1); } + + @Override + @Test + void descr_get_works() throws Throwable { + assertEquals("42", gsd.__get__(o, null)); + assertEquals("-1", gsd.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals("42", Abstract.getAttr(o, name)); + assertEquals("-1", Abstract.getAttr(p, name)); + } + } +} diff --git a/core/src/test/java/org/python/core/TypeExposerMemberTest.java b/core/src/test/java/org/python/core/TypeExposerMemberTest.java new file mode 100644 index 000000000..d03e22faf --- /dev/null +++ b/core/src/test/java/org/python/core/TypeExposerMemberTest.java @@ -0,0 +1,817 @@ +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.lang.invoke.MethodHandles; +import java.math.BigInteger; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import org.python.core.Exposed.DocString; +import org.python.core.Exposed.Member; + +/** + * Test that members exposed by a Python type defined in + * Java, using the annotation defined in {@link Exposed.Member} on + * fields implementing them, results in data descriptors with + * characteristics that correspond to the definition. + *

+ * There is a nested test suite for each pattern of characteristics. + */ +@DisplayName("For a member exposed by a type") +class TypeExposerMemberTest extends UnitTestSupport { + + /** + * Java base class of a Python type definition showing that some of + * the member definitions explored in the tests can be + * Java-inherited. + */ + private static class BaseMembers { + @Member + int i; + + /** String with change of name. */ + @Member("text") + String t; + } + + /** + * A Python type definition that exhibits a range of member + * definitions explored in the tests. + */ + private static class ObjectWithMembers extends BaseMembers { + + static PyType TYPE = + PyType.fromSpec(new PyType.Spec("ObjectWithMembers", MethodHandles.lookup()) + .adopt(DerivedWithMembers.class)); + + @Member + @DocString("My test x") + double x; + + /** String can be properly deleted without popping up as None */ + @Member(optional = true) + String s; + + /** {@code Object} member (not optional) */ + @Member + Object obj; + + /** Read-only access. */ + @Member(readonly = true) + int i2; + + /** Read-only access since final. */ + @Member + final double x2; + + /** Read-only access given first. */ + @Member(readonly = true, value = "text2") + String t2; + + /** {@code PyTuple} member. */ + @Member + PyTuple tup; + + /** {@code PyUnicode} member: not practical to allow set. */ + @Member(readonly = true) + PyUnicode strhex; + + /** + * Give all the members values based on a single "seed" + * + * @param value starting value for all the members + */ + ObjectWithMembers(double value) { + x2 = x = value; + i2 = i = Math.round((float)value); + t2 = t = s = String.format("%d", i); + obj = i; + tup = new PyTuple(i, x, t); + strhex = newPyUnicode(Integer.toString(i, 16)); + } + } + + /** + * A class that extends the above, with the same Python type. We + * want to check that what we're doing to reflect on the parent + * produces descriptors we can apply to a sub-class. + */ + private static class DerivedWithMembers extends ObjectWithMembers { + DerivedWithMembers(double value) { super(value); } + } + + /** + * Certain nested test classes implement these as standard. A base + * class here is just a way to describe the tests once that reappear + * in each nested case. + */ + abstract static class Base { + + // Working variables for the tests + /** Name of the attribute. */ + String name; + /** Documentation string. */ + String doc; + /** Unbound descriptor by type access to examine or call. */ + PyMemberDescr md; + /** The object on which to attempt access. */ + ObjectWithMembers o; + /** + * Another object on which to attempt access (in case we are getting + * instances mixed up). + */ + ObjectWithMembers p; + + void setup(String name, String doc, double oValue, double pValue) throws Throwable { + this.name = name; + this.doc = doc; + try { + this.md = (PyMemberDescr)ObjectWithMembers.TYPE.lookup(name); + this.o = new ObjectWithMembers(oValue); + this.p = new ObjectWithMembers(pValue); + } catch (ExceptionInInitializerError eie) { + // Errors detected by the Exposer get wrapped so: + Throwable t = eie.getCause(); + throw t == null ? eie : t; + } + } + + void setup(String name, double oValue, double pValue) throws Throwable { + setup(name, null, oValue, pValue); + } + + /** + * The attribute is a member descriptor that correctly reflects the + * annotations in the defining class. + * + * @throws Throwable unexpectedly + */ + @Test + void descr_has_expected_fields() throws Throwable { + assertEquals(name, md.name); + assertEquals(doc, md.doc); + String s = String.format("", name); + assertEquals(s, md.toString()); + assertEquals(s, Abstract.repr(md)); + } + + /** + * The string (repr) describes the type and attribute. + * + * @throws Throwable unexpectedly + */ + void checkToString() throws Throwable { + String s = String.format("", name); + assertEquals(s, md.toString()); + assertEquals(s, Abstract.repr(md)); + } + + /** + * The member descriptor may be used to read the field in an + * instance of the object. + */ + abstract void descr_get_works(); + + /** + * {@link Abstract#getAttr(Object, String)} may be used to read the + * field in an instance of the object. + * + * @throws Throwable unexpectedly + */ + abstract void abstract_getAttr_works() throws Throwable; + } + + /** + * Add tests of setting values to the base tests. + */ + abstract static class BaseSettable extends Base { + + /** + * The member descriptor may be used to set the field in an instance + * of the object. + * + * @throws Throwable unexpectedly + */ + abstract void descr_set_works() throws Throwable; + + /** + * {@link Abstract#setAttr(Object, String, Object)} may be used to + * set the field in an instance of the object. + * + * @throws Throwable unexpectedly + */ + abstract void abstract_setAttr_works() throws Throwable; + + /** + * The member raises {@link TypeError} when supplied a value of + * unacceptable type. + * + * @throws Throwable unexpectedly + */ + abstract void set_detects_TypeError() throws Throwable; + } + + /** + * Base test of settable attribute with primitive implementation. + */ + abstract static class BaseSettablePrimitive extends BaseSettable { + + /** + * Attempting to delete the member implemented by a primitive raises + * {@link TypeError}. + */ + @Test + void rejects_descr_delete() { + assertThrows(TypeError.class, () -> md.__delete__(o)); + assertThrows(TypeError.class, () -> md.__set__(o, null)); + } + + /** + * Attempting to delete the member implemented by a primitive raises + * {@link TypeError}. + */ + @Test + void rejects_abstract_delAttr() { + assertThrows(TypeError.class, () -> Abstract.delAttr(o, name)); + assertThrows(TypeError.class, () -> Abstract.setAttr(o, name, null)); + } + } + + @Nested + @DisplayName("implemented as an int") + class TestInt extends BaseSettablePrimitive { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("i", 42, -1); } + + @Override + @Test + void descr_get_works() { + assertEquals(42, md.__get__(o, null)); + assertEquals(-1, md.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(42, Abstract.getAttr(o, name)); + assertEquals(-1, Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + md.__set__(o, 43); + md.__set__(p, BigInteger.valueOf(44)); + assertEquals(43, o.i); + assertEquals(44, p.i); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + Abstract.setAttr(o, name, 43); + Abstract.setAttr(p, name, BigInteger.valueOf(44)); + assertEquals(43, o.i); + assertEquals(44, p.i); + } + + @Override + @Test + void set_detects_TypeError() throws Throwable { + // Things that are not a Python int + assertThrows(TypeError.class, () -> md.__set__(o, "Gumby")); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, 1.0)); + assertThrows(TypeError.class, () -> md.__set__(o, Py.None)); + } + + } + + @Nested + @DisplayName("implemented as a double") + class TestDouble extends BaseSettablePrimitive { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("x", "My test x", 42, -1); } + + @Override + @Test + void descr_get_works() { + assertEquals(42.0, md.__get__(o, null)); + assertEquals(-1.0, md.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(42.0, Abstract.getAttr(o, name)); + assertEquals(-1.0, Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + md.__set__(o, 1.125); + md.__set__(p, BigInteger.valueOf(111_222_333_444L)); + assertEquals(1.125, o.x); + assertEquals(111222333444.0, p.x); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + Abstract.setAttr(o, name, 1.125); + Abstract.setAttr(p, name, BigInteger.valueOf(111_222_333_444L)); + assertEquals(1.125, o.x); + assertEquals(111222333444.0, p.x); + } + + @Override + @Test + void set_detects_TypeError() throws Throwable { + // Things that are not a Python float + assertThrows(TypeError.class, () -> md.__set__(o, "Gumby")); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, "42")); + assertThrows(TypeError.class, () -> md.__set__(o, Py.None)); + } + + } + + /** + * Base test of settable attribute with object reference + * implementation. + */ + abstract static class BaseSettableReference extends BaseSettable { + + /** + * The member descriptor may be used to delete a field from an + * instance of the object, meaning set it to {@code null} + * internally, appearing as {@code None} externally. + * + * @throws Throwable unexpectedly + */ + @Test + void descr_delete_sets_None() throws Throwable { + md.__delete__(o); + assertEquals(Py.None, md.__get__(o, null)); + // __delete__ is idempotent + md.__delete__(o); + assertEquals(Py.None, md.__get__(o, null)); + } + + /** + * {@link Abstract#delAttr(Object, String)} to delete a field from + * an instance of the object, meaning set it to {@code null} + * internally, appearing as {@code None} externally. + * + * @throws Throwable unexpectedly + */ + @Test + void abstract_delAttr_sets_None() throws Throwable { + Abstract.delAttr(o, name); + assertEquals(Py.None, Abstract.getAttr(o, name)); + // delAttr is idempotent + Abstract.delAttr(o, name); + assertEquals(Py.None, Abstract.getAttr(o, name)); + } + } + + @Nested + @DisplayName("implemented as a String") + class TestString extends BaseSettableReference { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("text", 42, -1); } + + @Override + @Test + void descr_get_works() { + assertEquals("42", md.__get__(o, null)); + assertEquals("-1", md.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals("42", Abstract.getAttr(o, name)); + assertEquals("-1", Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + md.__set__(o, "D.P."); + md.__set__(p, newPyUnicode("Gumby")); + assertEquals("D.P.", o.t); + assertEquals("Gumby", p.t); + // __set__ works after delete + md.__delete__(o); + assertNull(o.t); + md.__set__(o, "Palin"); + assertEquals("Palin", o.t); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + Abstract.setAttr(o, name, "D.P."); + Abstract.setAttr(p, name, "Gumby"); + assertEquals("D.P.", o.t); + assertEquals("Gumby", p.t); + // setAttr works after delete + Abstract.delAttr(o, name); + assertNull(o.t); + Abstract.setAttr(o, name, "Palin"); + assertEquals("Palin", o.t); + } + + @Override + @Test + void set_detects_TypeError() throws Throwable { + // Things that are not a Python str + assertThrows(TypeError.class, () -> md.__set__(o, 1)); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, 10.0)); + assertThrows(TypeError.class, () -> md.__set__(o, new Object())); + } + } + + /** + * Base test of an optional attribute, necessarily with object + * reference implementation. + */ + abstract static class BaseOptionalReference extends BaseSettable { + + /** + * The member descriptor may be used to delete a field from an + * instance of the object, causing it to disappear externally. + * + * @throws Throwable unexpectedly + */ + @Test + void descr_delete_removes() throws Throwable { + md.__delete__(o); + // After deletion, ... + // ... __get__ raises AttributeError + assertThrows(AttributeError.class, () -> md.__get__(o, null)); + // ... __delete__ raises AttributeError + assertThrows(AttributeError.class, () -> md.__delete__(o)); + } + + /** + * {@link Abstract#delAttr(Object, String)} to delete a field from + * an instance of the object, causing it to disappear externally. + * + * @throws Throwable unexpectedly + */ + @Test + void abstract_delAttr_removes() throws Throwable { + Abstract.delAttr(o, name); + // After deletion, ... + // ... getAttr and delAttr raise AttributeError + assertThrows(AttributeError.class, () -> Abstract.getAttr(o, name)); + assertThrows(AttributeError.class, () -> Abstract.delAttr(o, name)); + } + + } + + @Nested + @DisplayName("implemented as an optional String") + class TestOptionalString extends BaseOptionalReference { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("s", 42, -1); } + + @Override + @Test + void descr_get_works() { + assertEquals("42", md.__get__(o, null)); + assertEquals("-1", md.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals("42", Abstract.getAttr(o, name)); + assertEquals("-1", Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + md.__set__(o, "D.P."); + md.__set__(p, "Gumby"); + assertEquals("D.P.", o.s); + assertEquals("Gumby", p.s); + // __set__ works after delete + md.__delete__(o); + assertNull(o.s); + md.__set__(o, "Palin"); + assertEquals("Palin", o.s); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + Abstract.setAttr(o, name, "D.P."); + Abstract.setAttr(p, name, newPyUnicode("Gumby")); + assertEquals("D.P.", o.s); + assertEquals("Gumby", p.s); + // setAttr works after delete + Abstract.delAttr(o, name); + assertNull(o.s); + Abstract.setAttr(o, name, "Palin"); + assertEquals("Palin", o.s); + } + + @Override + @Test + void set_detects_TypeError() throws Throwable { + // Things that are not a Python str + assertThrows(TypeError.class, () -> md.__set__(o, 1)); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, 10.0)); + assertThrows(TypeError.class, () -> md.__set__(o, new Object())); + } + } + + @Nested + @DisplayName("implemented as an Object") + class TestObject extends BaseSettableReference { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("obj", 42, -1); } + + @Override + @Test + void descr_get_works() { + assertEquals(42, md.__get__(o, null)); + assertEquals(-1, md.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(42, Abstract.getAttr(o, name)); + assertEquals(-1, Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + final Object dp = "D.P.", gumby = newPyUnicode("Gumby"); + md.__set__(o, dp); + md.__set__(p, gumby); + // Should get the same object + assertSame(dp, o.obj); + assertSame(gumby, p.obj); + // __set__ works after delete + md.__delete__(o); + assertNull(o.obj); + final Object palin = "Palin"; + md.__set__(o, palin); + assertSame(palin, o.obj); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + final Object dp = "D.P.", gumby = newPyUnicode("Gumby"); + Abstract.setAttr(o, name, dp); + Abstract.setAttr(p, name, gumby); + // Should get the same object + assertSame(dp, o.obj); + assertSame(gumby, p.obj); + // setAttr works after delete + Abstract.delAttr(o, name); + assertNull(o.obj); + final Object palin = "Palin"; + Abstract.setAttr(o, name, palin); + assertSame(palin, o.obj); + } + + @Override + @Test + void set_detects_TypeError() throws Throwable { + // Everything is a Python object (no TypeError) + final float[] everything = {1, 2, 3}; + assertDoesNotThrow( + () -> { md.__set__(o, everything); Abstract.setAttr(p, name, System.err); }); + assertSame(everything, o.obj); + assertSame(System.err, p.obj); + } + } + + @Nested + @DisplayName("implemented as a PyTuple") + class TestTuple extends BaseSettableReference { + + PyTuple oRef, pRef; + + @BeforeEach + void setup() throws AttributeError, Throwable { + setup("tup", 42, -1); + oRef = new PyTuple(42, 42.0, "42"); + pRef = new PyTuple(-1, -1.0, "-1"); + } + + @Override + @Test + void descr_get_works() { + assertEquals(oRef, md.__get__(o, null)); + assertEquals(pRef, md.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(oRef, Abstract.getAttr(o, name)); + assertEquals(pRef, Abstract.getAttr(p, name)); + } + + @Override + @Test + void descr_set_works() throws Throwable { + final Object tup2 = new PyTuple(2, 3, 4); + md.__set__(o, tup2); + assertEquals(tup2, o.tup); + // __set__ works after delete + md.__delete__(o); + assertNull(o.tup); + final Object tup3 = new PyTuple(3, 4, 5); + md.__set__(o, tup3); + assertEquals(tup3, o.tup); + } + + @Override + @Test + void abstract_setAttr_works() throws Throwable { + final Object gumby = PyTuple.from(List.of("D", "P", "Gumby")); + Abstract.setAttr(o, name, gumby); + // Should get the same object + assertSame(gumby, o.tup); + // setAttr works after delete + Abstract.delAttr(o, name); + assertNull(o.tup); + final Object empty = PyTuple.EMPTY; + Abstract.setAttr(o, name, empty); + assertSame(empty, o.tup); + } + + @Override + @Test + void set_detects_TypeError() throws Throwable { + // Things that are not a Python tuple + assertThrows(TypeError.class, () -> md.__set__(o, 1)); + assertThrows(TypeError.class, () -> Abstract.setAttr(p, name, "")); + assertThrows(TypeError.class, () -> md.__set__(o, new Object())); + } + } + + /** + * Base test of read-only attribute tests. + */ + abstract static class BaseReadonly extends Base { + + /** + * Raises {@link AttributeError} when the member descriptor is asked + * to set the field in an instance of the object, even if the type + * is correct. + */ + @Test + void rejects_descr_set() { + assertThrows(AttributeError.class, () -> md.__set__(o, 1234)); + assertThrows(AttributeError.class, () -> md.__set__(p, 1.0)); + assertThrows(AttributeError.class, () -> md.__set__(o, "Gumby")); + assertThrows(AttributeError.class, () -> md.__set__(p, Py.None)); + } + + /** + * Raises {@link AttributeError} when + * {@link Abstract#setAttr(Object, String, Object)} tries to set the + * field in an instance of the object, even if the type is correct. + */ + @Test + void rejects_abstract_setAttr() { + assertThrows(AttributeError.class, () -> Abstract.setAttr(o, name, 1234)); + assertThrows(AttributeError.class, () -> Abstract.setAttr(p, name, 1.0)); + assertThrows(AttributeError.class, () -> Abstract.setAttr(o, name, "Gumby")); + assertThrows(AttributeError.class, () -> Abstract.setAttr(p, name, Py.None)); + } + + /** + * Raises {@link AttributeError} when the member descriptor is asked + * to delete the field in an instance of the object. + */ + @Test + void rejects_descr_delete() { + assertThrows(AttributeError.class, () -> md.__delete__(o)); + assertThrows(AttributeError.class, () -> md.__set__(o, null)); + } + + /** + * Raises {@link AttributeError} when + * {@link Abstract#delAttr(Object, String)} tries to delete the + * field from an instance of the object. + */ + @Test + void rejects_abstract_delAttr() { + assertThrows(AttributeError.class, () -> Abstract.delAttr(o, name)); + } + } + + @Nested + @DisplayName("implemented as a read-only int") + class TestIntRO extends BaseReadonly { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("i2", 42, -1); } + + @Override + @Test + void descr_get_works() { + assertEquals(42, md.__get__(o, null)); + assertEquals(-1, md.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(42, Abstract.getAttr(o, name)); + assertEquals(-1, Abstract.getAttr(p, name)); + } + } + + @Nested + @DisplayName("implemented as a final double") + class TestDoubleRO extends BaseReadonly { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("x2", 42, -1); } + + @Override + @Test + void descr_get_works() { + assertEquals(42.0, md.__get__(o, null)); + assertEquals(-1.0, md.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(42.0, Abstract.getAttr(o, name)); + assertEquals(-1.0, Abstract.getAttr(p, name)); + } + } + + @Nested + @DisplayName("implemented as a read-only String") + class TestStringRO extends BaseReadonly { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("text2", 42, -1); } + + @Override + @Test + void descr_get_works() { + assertEquals("42", md.__get__(o, null)); + assertEquals("-1", md.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals("42", Abstract.getAttr(o, name)); + assertEquals("-1", Abstract.getAttr(p, name)); + } + } + + @Nested + @DisplayName("implemented as a PyUnicode (read-only)") + class TestPyUnicodeRO extends BaseReadonly { + + @BeforeEach + void setup() throws AttributeError, Throwable { setup("strhex", 42, -1); } + + @Override + @Test + void descr_get_works() { + assertEquals(newPyUnicode("2a"), md.__get__(o, null)); + assertEquals(newPyUnicode("-1"), md.__get__(p, null)); + } + + @Override + @Test + void abstract_getAttr_works() throws Throwable { + assertEquals(newPyUnicode("2a"), Abstract.getAttr(o, name)); + assertEquals(newPyUnicode("-1"), Abstract.getAttr(p, name)); + } + } +} diff --git a/core/src/test/java/org/python/core/TypeExposerMethodTest.java b/core/src/test/java/org/python/core/TypeExposerMethodTest.java new file mode 100644 index 000000000..d06c6cbcf --- /dev/null +++ b/core/src/test/java/org/python/core/TypeExposerMethodTest.java @@ -0,0 +1,1027 @@ +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.lang.invoke.MethodHandles; +import java.util.Arrays; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.python.base.MethodKind; +import org.python.core.Exposed.Default; +import org.python.core.Exposed.PositionalOnly; +import org.python.core.Exposed.PythonMethod; +import org.python.core.PyType.Spec; + +/** + * Test that methods exposed by a Python type defined in + * Java, using the scheme of annotations defined in {@link Exposed}, + * result in method descriptors with characteristics that correspond + * to their definitions. + *

+ * The first test in each case is to examine the fields in the + * parser that attaches to the {@link ModuleDef.MethodDef}. Then we + * call the function using the {@code __call__} special method, and + * using our "fast call" signatures. + *

+ * There is a nested test suite for each signature pattern. + */ +@DisplayName("A method exposed by a type") +class TypeExposerMethodTest { + + /** + * Certain nested test classes implement these as standard. A base + * class here is just a way to describe the tests once that reappear + * in each nested case. + */ + abstract static class Standard { + + // Working variables for the tests + /** Unbound descriptor by type access to examine or call. */ + PyMethodDescr descr; + /** The object on which to invoke the method. */ + Object obj; + /** The function to examine or call (bound to {@code obj}). */ + PyJavaFunction func; + /** The parser we examine. */ + ArgParser ap; + /** The expected result of calling the method. */ + Object[] exp; + + /** + * A parser attached to the method descriptor should have field + * values that correctly reflect the signature and annotations in + * the defining class. + */ + abstract void has_expected_fields(); + + /** + * Call the function using the {@code __call__} special method with + * arguments correct for the method's specification. The method + * should obtain the correct result (and not throw). + * + * @throws Throwable unexpectedly + */ + abstract void supports__call__() throws Throwable; + + /** + * Call the method using the {@code __call__} special method with + * arguments correct for the method's specification, and explicitly + * zero or more keywords. The method should obtain the correct + * result (and not throw). + * + * @throws Throwable unexpectedly + */ + abstract void supports_keywords() throws Throwable; + + /** + * Call the method using the {@code __call__} special method and an + * unexpected keyword: where none is expected, for a positional + * argument, or simply an unacceptable name. The method should throw + * {@link TypeError}. + * + * @throws Throwable unexpectedly + */ + abstract void raises_TypeError_on_unexpected_keyword() throws Throwable; + + /** + * Call the function using the Java call interface with arguments + * correct for the function's specification. The function should + * obtain the correct result (and not throw). + * + * @throws Throwable unexpectedly + */ + abstract void supports_java_call() throws Throwable; + + /** + * Check that the fields of the parser match expectations for a + * method with no collector parameters and a certain number of + * positional-only parameters. + * + * @param kind static or instance + * @param name of method + * @param count of parameters + * @param posonlycount count of positional-only parameters + */ + void no_collector(MethodKind kind, String name, int count, int posonlycount) { + assertEquals(name, ap.name); + assertEquals(kind, ap.methodKind); + assertEquals(count, ap.argnames.length); + assertEquals(count, ap.argcount); + assertEquals(posonlycount, ap.posonlyargcount); + assertEquals(0, ap.kwonlyargcount); + assertEquals(count, ap.regargcount); + assertEquals(-1, ap.varArgsIndex); + assertEquals(-1, ap.varKeywordsIndex); + } + + /** + * Check that the fields of the parser match expectations for a + * static method with no collector parameters and a certain number + * of positional-only parameters. + * + * @param name of method + * @param count of parameters + * @param posonly count of positional-only parameters + */ + void no_collector_static(String name, int count, int posonly) { + no_collector(MethodKind.STATIC, name, count, posonly); + } + + /** + * Check that the fields of the parser match expectations for a + * instance method with no collector parameters and a certain number + * of positional-only parameters. + * + * @param name of method + * @param count of parameters + * @param posonly count of positional-only parameters + */ + void no_collector_instance(String name, int count, int posonly) { + no_collector(MethodKind.INSTANCE, name, count, posonly); + } + + /** + * Helper to set up each test. + * + * @param name of the method + * @param o to use as the self argument + * @throws AttributeError if method not found + * @throws Throwable other errors + */ + void setup(String name, Object o) throws AttributeError, Throwable { + descr = (PyMethodDescr)PyType.of(o).lookup(name); + ap = descr.argParser; + obj = o; + func = (PyJavaFunction)Abstract.getAttr(obj, name); + } + + /** + * Check the result of a call against {@link #exp}. The reference + * result is the same throughout a given sub-class test. + * + * @param result of call + */ + void check_result(PyTuple result) { assertArrayEquals(exp, result.value); } + + } + + /** + * A Python type definition that exhibits a range of method + * signatures explored in the tests. Methods named {@code m*()} are + * instance methods to Python, declared to Java as either instance + * methods ({@code this} is {@code self}) or as static methods + * ({@code self} is the first parameter). + */ + static class SimpleObject { + + static PyType TYPE = PyType.fromSpec(new Spec("Simple", MethodHandles.lookup())); + + /** + * See {@link NoParams}: no parameters are allowed (after + * {@code self}). + */ + @PythonMethod + void m0() {} + + /** + * See {@link OnePos}: a single positional parameter + * + * @param a positional arg + * @return the arg (tuple) + */ + @PythonMethod + PyTuple m1(double a) { return Py.tuple(this, a); } + + /** + * See {@link PositionalByDefault}: the parameters are + * positional-only as a result of the default exposure. Use static + * style, arbitrarily. + * + * @param self target + * @param a positional arg + * @param b positional arg + * @param c positional arg + * @return the args + */ + @PythonMethod + static PyTuple m3(SimpleObject self, int a, String b, Object c) { + return Py.tuple(self, a, b, c); + } + + /** + * See {@link PositionalWithDefaults}: the parameters are + * positional-only as a result of the default exposure. Use static + * style, arbitrarily. + * + * @param self target + * @param a positional arg + * @param b positional arg = 2 + * @param c positional arg = 3 + * @return the args + */ + @PythonMethod + static PyTuple m3pd(SimpleObject self, int a, @Default("2") String b, + @Default("3") Object c) { + return Py.tuple(self, a, b, c); + } + + /** + * See {@link PositionalOrKeywordParams}: the parameters are + * positional-or-keyword but none is positional-only. + * + * @param a positional-or-keyword arg + * @param b positional-or-keyword arg + * @param c positional-or-keyword arg + * @return the args + */ + @PythonMethod(positionalOnly = false) + PyTuple m3pk(int a, String b, Object c) { return Py.tuple(this, a, b, c); } + + /** + * See {@link SomePositionalOnlyParams}: two parameters are + * positional-only as a result of an annotation. + * + * @param a positional arg + * @param b positional arg + * @param c positional-or-keyword arg + * @return the args + */ + @PythonMethod + PyTuple m3p2(int a, @PositionalOnly String b, Object c) { return Py.tuple(this, a, b, c); } + } + + /** + * A Python type definition that exhibits a range of method + * signatures explored in the tests, and has a an adopted + * implementation {@link ExampleObject2}. Methods named {@code m*()} + * are instance methods to Python, declared to Java as either + * instance methods ({@code this} is {@code self}) or as static + * methods ({@code self} is the first parameter). + */ + static class ExampleObject { + + static PyType TYPE = PyType.fromSpec( // + new Spec("Example", MethodHandles.lookup()) // + .adopt(ExampleObject2.class)); + + /** + * See {@link NoParams}: no parameters are allowed (after + * {@code self}). + */ + @PythonMethod(primary = false) + void m0() {} + + @SuppressWarnings("unused") + @PythonMethod + static void m0(ExampleObject2 self) {} + + /** + * See {@link OnePos}: a single positional parameter + * + * @param a positional arg + * @return the args + */ + @PythonMethod + PyTuple m1(double a) { return Py.tuple(this, a); } + + @PythonMethod(primary = false) + static PyTuple m1(ExampleObject2 self, double a) { return Py.tuple(self, a); } + + /** + * See {@link PositionalByDefault}: the parameters are + * positional-only as a result of the default exposure. + * + * @param a positional arg + * @param b positional arg + * @param c positional arg + * @return the args + */ + @PythonMethod + PyTuple m3(int a, String b, Object c) { return Py.tuple(this, a, b, c); } + + @PythonMethod(primary = false) + static PyTuple m3(ExampleObject2 self, int a, String b, Object c) { + return Py.tuple(self, a, b, c); + } + + /** + * See {@link PositionalWithDefaults}: the parameters are + * positional-only as a result of the default exposure. Use static + * style, arbitrarily. + * + * @param a positional arg + * @param b positional arg = 2 + * @param c positional arg = 3 + * @return the args + */ + @PythonMethod + PyTuple m3pd(int a, @Default("2") String b, @Default("3") Object c) { + return Py.tuple(this, a, b, c); + } + + /** + * Secondary definition does not repeat annotations. + * + * @param self target + * @param a positional arg + * @param b positional arg = 2 + * @param c positional arg = 3 + * @return the args + */ + @PythonMethod(primary = false) + static PyTuple m3pd(ExampleObject2 self, int a, String b, Object c) { + return Py.tuple(self, a, b, c); + } + + /** + * See {@link PositionalOrKeywordParams}: the parameters are + * positional-or-keyword but none are positional-only. + * + * @param a positional arg + * @param b positional arg + * @param c positional-or-keyword arg + * @return the args + */ + @PythonMethod(positionalOnly = false) + PyTuple m3pk(int a, String b, Object c) { return Py.tuple(this, a, b, c); } + + @PythonMethod(primary = false) + static PyTuple m3pk(ExampleObject2 self, int a, String b, Object c) { + return Py.tuple(self, a, b, c); + } + + /** + * See {@link SomePositionalOnlyParams}: two parameters are + * positional-only as a result of an annotation. + * + * @param a positional arg + * @param b positional arg + * @param c positional-or-keyword arg + * @return the args + */ + @PythonMethod + PyTuple m3p2(int a, @PositionalOnly String b, Object c) { return Py.tuple(this, a, b, c); } + + @PythonMethod(primary = false) + static PyTuple m3p2(ExampleObject2 self, int a, String b, Object c) { + return Py.tuple(self, a, b, c); + } + } + + /** + * Class cited as an "adopted implementation" of + * {@link ExampleObject} + */ + static class ExampleObject2 { + // Ensure canonical counterpart is initialised + @SuppressWarnings("unused") + private static PyType CANONICAL = ExampleObject.TYPE; + } + + /** {@link SimpleObject#m0()} accepts no arguments. */ + @Nested + @DisplayName("with no parameters") + class NoParams extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Simple.m0 + setup("m0", new SimpleObject()); + // The method is declared void (which means return None) + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m0", 0, 0); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call type(obj).m0(obj) + Object[] args = {obj}; + Object r = descr.__call__(args, null); + assertEquals(Py.None, r); + + // We call obj.m0() + args = new Object[0]; + r = func.__call__(args, null); + assertEquals(Py.None, r); + } + + @Override + @Test + void supports_keywords() throws Throwable { + // We call type(obj).m0(obj) + Object[] args = {obj}; + String[] names = {}; + Object r = descr.__call__(args, names); + assertEquals(Py.None, r); + + // We call obj.m0() + args = new Object[0]; + r = func.__call__(args, names); + assertEquals(Py.None, r); + } + + /** To set anything by keyword is a {@code TypeError}. */ + @Override + @Test + void raises_TypeError_on_unexpected_keyword() { + // We call type(obj).m0(obj, c=3) + Object[] args = {obj, 3}; + String[] names = {"c"}; + assertThrows(TypeError.class, () -> descr.__call__(args, names)); + + // We call obj.m0(c=3) + Object[] args2 = Arrays.copyOfRange(args, 1, args.length); + assertThrows(TypeError.class, () -> func.__call__(args2, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + // We call type(obj).m0(obj) + Object r = descr.call(obj); + assertEquals(Py.None, r); + + // We call obj.m0() + r = func.call(); + assertEquals(Py.None, r); + } + } + + /** + * {@link NoParams} with {@link ExampleObject} as the + * implementation. + */ + @Nested + @DisplayName("with no parameters" + " (canonical)") + class NoParams1 extends NoParams { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m0 + setup("m0", new ExampleObject()); + } + } + + /** + * {@link NoParams} with {@link ExampleObject2} as the + * implementation. + */ + @Nested + @DisplayName("with no parameters" + " (adopted)") + class NoParams2 extends NoParams { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m0 + setup("m0", new ExampleObject2()); + } + } + + /** + * {@link SimpleObject#m1(double)} accepts 1 argument that + * must be given by position. + */ + @Nested + @DisplayName("with a single positional-only parameter by default") + class OnePos extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Simple.m1 + setup("m1", new SimpleObject()); + exp = new Object[] {obj, 42.0}; + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m1", 1, 1); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call type(obj).m1(obj, 42.0) + Object[] args = {obj, 42.0}; + PyTuple r = (PyTuple)descr.__call__(args, null); + check_result(r); + + // We call obj.m1(42.0) + args = Arrays.copyOfRange(args, 1, args.length); + r = (PyTuple)func.__call__(args, null); + check_result(r); + } + + @Override + @Test + void supports_keywords() throws Throwable { + // We call type(obj).m1(obj, 42.0) + Object[] args = {obj, 42.0}; + String[] names = {}; + PyTuple r = (PyTuple)descr.__call__(args, names); + check_result(r); + + // We call obj.m1(42.0) + args = Arrays.copyOfRange(args, 1, args.length); + r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + @Override + @Test + void raises_TypeError_on_unexpected_keyword() { + // We call type(obj).m1(obj, a=42.0) + Object[] args = {obj, 42.0}; + String[] names = {"a"}; + assertThrows(TypeError.class, () -> descr.__call__(args, names)); + + // We call obj.m1(a=42.0) + Object[] args2 = Arrays.copyOfRange(args, 1, args.length); + assertThrows(TypeError.class, () -> func.__call__(args2, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + // We call type(obj).m1(obj, 42.0) + PyTuple r = (PyTuple)descr.call(obj, 42.0); + check_result(r); + + // We call obj.m1(obj, 42.0) + r = (PyTuple)func.call(42.0); + check_result(r); + } + } + + /** + * {@link OnePos} with {@link ExampleObject} as the implementation. + */ + @Nested + @DisplayName("with a single positional-only parameter by default" + " (canonical)") + class OnePos1 extends OnePos { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m1 + setup("m1", new ExampleObject()); + exp = new Object[] {obj, 42.0}; + } + } + + /** + * {@link OnePos} with {@link ExampleObject2} as the implementation. + */ + @Nested + @DisplayName("with a single positional-only parameter by default" + " (adopted)") + class OnePos2 extends OnePos { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m1 + setup("m1", new ExampleObject2()); + exp = new Object[] {obj, 42.0}; + } + } + + /** + * {@link SimpleObject#m3(SimpleObject, int, String, Object)} + * accepts 3 arguments that must be given by position. + */ + @Nested + @DisplayName("with positional-only parameters by default") + class PositionalByDefault extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Simple.m3 + setup("m3", new SimpleObject()); + exp = new Object[] {obj, 1, "2", 3}; + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m3", 3, 3); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call type(obj).m3(obj, 1, '2', 3) + Object[] args = {obj, 1, "2", 3}; + PyTuple r = (PyTuple)descr.__call__(args, null); + check_result(r); + + // We call obj.m3(1, '2', 3) + args = Arrays.copyOfRange(args, 1, args.length); + r = (PyTuple)func.__call__(args, null); + check_result(r); + } + + @Override + @Test + void supports_keywords() throws Throwable { + // We call type(obj).m3(obj, 1, '2', 3) + Object[] args = {obj, 1, "2", 3}; + String[] names = {}; + PyTuple r = (PyTuple)descr.__call__(args, names); + check_result(r); + + // We call obj.m3(1, '2', 3) + args = Arrays.copyOfRange(args, 1, args.length); + r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + @Override + @Test + void raises_TypeError_on_unexpected_keyword() { + // We call type(obj).m3(obj, 1, '2', c=3) + Object[] args = {obj, 1, "2", 3}; + String[] names = {"c"}; + assertThrows(TypeError.class, () -> descr.__call__(args, names)); + + // We call obj.m3(1, '2', c=3) + Object[] args2 = Arrays.copyOfRange(args, 1, args.length); + assertThrows(TypeError.class, () -> func.__call__(args2, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + // We call type(obj).m3(obj, 1, '2', 3) + PyTuple r = (PyTuple)descr.call(obj, 1, "2", 3); + check_result(r); + + // We call obj.m3(obj, 1, '2', 3) + r = (PyTuple)func.call(1, "2", 3); + check_result(r); + } + } + + /** + * {@link PositionalByDefault} with {@link ExampleObject} as the + * implementation. + */ + @Nested + @DisplayName("with positional-only parameters by default" + " (canonical)") + class PositionalByDefault1 extends PositionalByDefault { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m3 + setup("m3", new ExampleObject()); + exp = new Object[] {obj, 1, "2", 3}; + } + } + + /** + * {@link PositionalByDefault} with {@link ExampleObject2} as the + * implementation. + */ + @Nested + @DisplayName("with positional-only parameters by default" + " (adopted)") + class PositionalByDefault2 extends PositionalByDefault { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m3 + setup("m3", new ExampleObject2()); + exp = new Object[] {obj, 1, "2", 3}; + } + } + + /** + * {@link SimpleObject#m3pd(SimpleObject, int, String, Object)} + * accepts 3 arguments that must be given by position but two + * have defaults. + */ + @Nested + @DisplayName("with positional-only parameters and default values") + class PositionalWithDefaults extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Simple.m3pd + setup("m3pd", new SimpleObject()); + exp = new Object[] {obj, 1, "2", 3}; + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m3pd", 3, 3); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call type(obj).m3pd(obj, 1) + Object[] args = {obj, 1}; + PyTuple r = (PyTuple)descr.__call__(args, null); + check_result(r); + + // We call obj.m3pd(1) + args = Arrays.copyOfRange(args, 1, args.length); + r = (PyTuple)func.__call__(args, null); + check_result(r); + } + + @Override + @Test + void supports_keywords() throws Throwable { + // We call type(obj).m3pd(obj, 1) + Object[] args = {obj, 1}; + String[] names = {}; + PyTuple r = (PyTuple)descr.__call__(args, names); + check_result(r); + + // We call obj.m3pd(1) + args = Arrays.copyOfRange(args, 1, args.length); + r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + @Override + @Test + void raises_TypeError_on_unexpected_keyword() { + // We call type(obj).m3pd(obj, 1, c=3) + Object[] args = {obj, 1, 3}; + String[] names = {"c"}; + assertThrows(TypeError.class, () -> descr.__call__(args, names)); + + // We call obj.m3pd(1, c=3) + Object[] args2 = Arrays.copyOfRange(args, 1, args.length); + assertThrows(TypeError.class, () -> func.__call__(args2, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + // We call type(obj).m3pd(obj, 1) + PyTuple r = (PyTuple)descr.call(obj, 1); + check_result(r); + + // We call obj.m3pd(obj, 1) + r = (PyTuple)func.call(1); + check_result(r); + } + } + + /** + * {@link PositionalWithDefaults} with {@link ExampleObject} as the + * implementation. + */ + @Nested + @DisplayName("with positional-only parameters and default values" + " (canonical)") + class PositionalWithDefaults1 extends PositionalWithDefaults { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m3pd + setup("m3pd", new ExampleObject()); + exp = new Object[] {obj, 1, "2", 3}; + } + } + + /** + * {@link PositionalWithDefaults} with {@link ExampleObject2} as the + * implementation. + */ + @Nested + @DisplayName("with positional-only parameters and default values" + " (adopted)") + class PositionalWithDefaults2 extends PositionalWithDefaults { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m3pd + setup("m3pd", new ExampleObject2()); + exp = new Object[] {obj, 1, "2", 3}; + } + } + + /** + * {@link SimpleObject#m3pk(int, String, Object)} accepts 3 + * arguments that may be given by position or keyword. + */ + @Nested + @DisplayName("with positional-or-keyword parameters") + class PositionalOrKeywordParams extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Simple.m3pk + setup("m3pk", new SimpleObject()); + exp = new Object[] {obj, 1, "2", 3}; + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m3pk", 3, 0); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call type(obj).m3pk(obj, 1, '2', 3) + Object[] args = {obj, 1, "2", 3}; + String[] names = {}; + PyTuple r = (PyTuple)descr.__call__(args, names); + check_result(r); + + // We call obj.m3pk(1, '2', 3) + args = Arrays.copyOfRange(args, 1, args.length); + r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + @Override + @Test + void supports_keywords() throws Throwable { + // We call type(obj).m3pk(obj, 1, c=3, b='2') + Object[] args = {obj, 1, 3, "2"}; + String[] names = {"c", "b"}; + PyTuple r = (PyTuple)descr.__call__(args, names); + check_result(r); + + // We call obj.m3pk(1, c=3, b='2') + args = Arrays.copyOfRange(args, 1, args.length); + r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + @Override + @Test + void raises_TypeError_on_unexpected_keyword() throws Throwable { + // We call type(obj).m3pk(obj, 1, c=3, b='2', x=4) + Object[] args = {obj, 1, 3, "2", 4}; + String[] names = {"c", "b", /* unknown */"x"}; + assertThrows(TypeError.class, () -> descr.__call__(args, names)); + + // We call obj.m3pk(1, c=3, b='2', x=4) + Object[] args2 = Arrays.copyOfRange(args, 1, args.length); + assertThrows(TypeError.class, () -> func.__call__(args2, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + // We call type(obj).m3pk(obj, 1, '2', 3) + PyTuple r = (PyTuple)descr.call(obj, 1, "2", 3); + check_result(r); + + // We call obj.m3pk(1, '2', 3) + r = (PyTuple)func.call(1, "2", 3); + check_result(r); + } + } + + /** + * {@link PositionalOrKeywordParams} with {@link ExampleObject} as + * the implementation. + */ + @Nested + @DisplayName("with positional-or-keyword parameters" + " (canonical)") + class PositionalOrKeywordParams1 extends PositionalOrKeywordParams { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m3pk + setup("m3pk", new ExampleObject()); + exp = new Object[] {obj, 1, "2", 3}; + } + } + + /** + * {@link PositionalOrKeywordParams} with {@link ExampleObject2} as + * the implementation. + */ + @Nested + @DisplayName("with positional-or-keyword parameters" + " (adopted)") + class PositionalOrKeywordParams2 extends PositionalOrKeywordParams { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m3pk + setup("m3pk", new ExampleObject2()); + exp = new Object[] {obj, 1, "2", 3}; + } + } + + /** + * {@link SimpleObject#m3p2(int, String, Object)} accepts 3 + * arguments, two of which may be given by position only, and the + * last by either position or keyword. + */ + @Nested + @DisplayName("with two positional-only parameters") + class SomePositionalOnlyParams extends Standard { + + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Simple.m3p2 + setup("m3p2", new SimpleObject()); + exp = new Object[] {obj, 1, "2", 3}; + } + + @Override + @Test + void has_expected_fields() { no_collector_instance("m3p2", 3, 2); } + + @Override + @Test + void supports__call__() throws Throwable { + // We call type(obj).m3p2(obj, 1, '2', 3) + Object[] args = {obj, 1, "2", 3}; + String[] names = {}; + PyTuple r = (PyTuple)descr.__call__(args, names); + check_result(r); + } + + /** To set {@code c} by keyword is a ok. */ + @Override + @Test + void supports_keywords() throws Throwable { + // We call type(obj).m3p2(obj, 1, '2', c=3) + Object[] args = {obj, 1, "2", 3}; + String[] names = {"c"}; + PyTuple r = (PyTuple)descr.__call__(args, names); + check_result(r); + + // We call obj.m3p2(1, '2', c=3) + args = Arrays.copyOfRange(args, 1, args.length); + r = (PyTuple)func.__call__(args, names); + check_result(r); + } + + @Override + @Test + void raises_TypeError_on_unexpected_keyword() throws Throwable { + // We call type(obj).m3p2(obj, 1, c=3, b='2') + Object[] args = {obj, 1, 3, "2"}; + String[] names = {"c", /* positional */"b"}; + assertThrows(TypeError.class, () -> descr.__call__(args, names)); + + // We call obj.m3p2(1, c=3, b='2') + Object[] args2 = Arrays.copyOfRange(args, 1, args.length); + assertThrows(TypeError.class, () -> func.__call__(args2, names)); + } + + @Override + @Test + void supports_java_call() throws Throwable { + // We call type(obj).m3p2(obj, 1, '2', 3) + PyTuple r = (PyTuple)descr.call(obj, 1, "2", 3); + check_result(r); + + // We call obj.m3p2(1, '2', 3) + r = (PyTuple)func.call(1, "2", 3); + check_result(r); + } + } + + /** + * {@link PositionalOrKeywordParams} with {@link ExampleObject} as + * the implementation. + */ + @Nested + @DisplayName("with two positional-only parameters" + " (canonical)") + class SomePositionalOnlyParams1 extends SomePositionalOnlyParams { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m3p2 + setup("m3p2", new ExampleObject()); + exp = new Object[] {obj, 1, "2", 3}; + } + } + + /** + * {@link PositionalOrKeywordParams} with {@link ExampleObject2} as + * the implementation. + */ + @Nested + @DisplayName("with two positional-only parameters" + " (adopted)") + class SomePositionalOnlyParams2 extends SomePositionalOnlyParams { + + @Override + @BeforeEach + void setup() throws AttributeError, Throwable { + // descr = Example.m3p2 + setup("m3p2", new ExampleObject2()); + exp = new Object[] {obj, 1, "2", 3}; + } + } +} diff --git a/core/src/test/java/org/python/core/TypeExposerTest.java b/core/src/test/java/org/python/core/TypeExposerTest.java new file mode 100644 index 000000000..60444f2e8 --- /dev/null +++ b/core/src/test/java/org/python/core/TypeExposerTest.java @@ -0,0 +1,425 @@ +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.util.Map; +import java.util.TreeMap; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.python.core.Exposed.Deleter; +import org.python.core.Exposed.DocString; +import org.python.core.Exposed.Getter; +import org.python.core.Exposed.KeywordCollector; +import org.python.core.Exposed.Member; +import org.python.core.Exposed.PositionalCollector; +import org.python.core.Exposed.PositionalOnly; +import org.python.core.Exposed.PythonMethod; +import org.python.core.Exposed.PythonStaticMethod; +import org.python.core.Exposed.Setter; +import org.python.core.Exposer.CallableSpec; +import org.python.core.TypeExposer.GetSetSpec; +import org.python.core.TypeExposer.MemberSpec; + +/** + * Test that the annotations defined in {@link Exposed}, and + * intended for exposing attributes of a type defined in Java, are + * processed correctly by a {@link Exposer} to a {@link TypeExposer} + * containing appropriate attribute specifications. This tests a + * large part of the exposure mechanism, without activating the + * wider Python type system. + */ +@DisplayName("The Fake built-in type ...") +class TypeExposerTest { + + /** + * This class is not actually a Python type definition, but is + * annotated as if it were. We will test whether the type dictionary + * is filled as expected. + * + * Methods named {@code m*()} are instance methods to Python, + * declared to Java as instance methods ({@code this} is + * {@code self}). + * + * Methods named {@code f*()} are static methods to Python (no + * {@code self}), declared to Java as static methods. + */ + static class Fake { + + static final Lookup LOOKUP = MethodHandles.lookup(); + + // Instance methods ------------------------------------------- + + // Signature: (/) + @PythonStaticMethod + static void f0() {} + + // Signature: ($self, /) + @PythonMethod + void m0() {} + + // Signature: (a, b, c /) + @PythonStaticMethod + static PyTuple f3(int a, String b, Object c) { return Py.tuple(a, b, c); } + + // Signature: ($self, a, b, c /) + @SuppressWarnings("static-method") + @PythonMethod + PyTuple m3(int a, String b, Object c) { return Py.tuple(a, b, c); } + + // Signature: (/, a, b, c) + @PythonStaticMethod(positionalOnly = false) + static PyTuple f3pk(int a, String b, Object c) { return Py.tuple(a, b, c); } + + // Signature: ($self, /, a, b, c) + @SuppressWarnings("static-method") + @PythonMethod(positionalOnly = false) + PyTuple m3pk(int a, String b, Object c) { return Py.tuple(a, b, c); } + + // Signature: (a, b, /, c) + @PythonStaticMethod + static PyTuple f3p2(int a, @PositionalOnly String b, Object c) { return Py.tuple(a, b, c); } + + // Signature: ($self, a, b, /, c) + @SuppressWarnings("static-method") + @PythonMethod + PyTuple m3p2(int a, @PositionalOnly String b, Object c) { return Py.tuple(a, b, c); } + + // Signature: (a, b, /, *c) + @PythonStaticMethod + static PyTuple f2v(int a, String b, @PositionalCollector PyTuple c) { + return Py.tuple(a, b, c); + } + + // Signature: ($self, a, b, /, *c) + @SuppressWarnings("static-method") + @PythonMethod + PyTuple m2v(int a, String b, @PositionalCollector PyTuple c) { return Py.tuple(a, b, c); } + + // Signature: (a, b, /, *c) + @PythonStaticMethod + static PyTuple f2pvk(int a, String b, @PositionalCollector PyTuple c, + @KeywordCollector PyDict d) { + return Py.tuple(a, b, c, d); + } + + // Signature: ($self, a, b, /, *c) + @SuppressWarnings("static-method") + @PythonMethod + PyTuple m2pvk(int a, String b, @PositionalCollector PyTuple c, @KeywordCollector PyDict d) { + return Py.tuple(a, b, c, d); + } + + // Instance members ------------------------------------------- + + // Plain int + @Member + int i; + + // Plain float (with doc string) + @Member + @DocString("Doc string for x") + double x; + + // String with change of name. + @Member("text") + String t; + + // String can be properly deleted without popping up as None + @Member(optional = true) + String s; + + // Arbitrary object + @Member + Object obj; + + // Read-only by annotation + @Member(readonly = true) + int i2; + + // Read-only by final. + @Member + final double x2 = 1.0; + + // Read-only by annotation given before name change + @Member(readonly = true, value = "text2") + String t2; + + // String again (?) + @Member(readonly = true) + PyUnicode strhex2; + + // Instance attributes ---------------------------------------- + + // Read-only (but changes to count updates to foo + int count = 0; + // Writable, but cannot delete + String foo; + // Writable, and has delete operation + double thingValue; + + @Getter + Object count() { return count; } + + @Getter + Object foo() { return thingValue; } + + @Setter + void foo(Object v) throws TypeError, Throwable { + try { + foo = (String)v; + } catch (ClassCastException cce) { + foo = ""; + } + } + + @Getter + Object thing() { return thingValue; } + + @Setter("thing") + void thing(Object v) throws TypeError, Throwable { + try { + thingValue = (Double)v; + } catch (ClassCastException cce) { + thingValue = Double.NaN; + } + count += 1; + } + + @Deleter("thing") + void deleteThing() throws TypeError, Throwable { + thingValue = Double.NaN; + count = 0; + } + } + + /** + * We collect the method specifications here during set-up for + * examination in tests. + */ + static Map methods = new TreeMap<>(); + /** + * We collect the member specifications here during set-up for + * examination in tests. + */ + static Map members = new TreeMap<>(); + /** + * We collect the get-set attribute specifications here during + * set-up for examination in tests. + */ + static Map getsets = new TreeMap<>(); + + /** + * Set-up method filling {@link #methods}, {@link #members} and + * {@link #getsets}. + */ + @BeforeAll + static void createExposer() { + // type=null in order not to wake the type system + TypeExposer exposer = Exposer.exposeType(null, Fake.class, null); + + // Populate the dictionaries used in the tests. + for (Exposer.Spec s : exposer.specs.values()) { + if (s instanceof CallableSpec) { + CallableSpec ms = (CallableSpec)s; + methods.put(ms.name, ms); + } else if (s instanceof MemberSpec) { + MemberSpec ms = (MemberSpec)s; + members.put(ms.name, ms); + } else if (s instanceof GetSetSpec) { + GetSetSpec gs = (GetSetSpec)s; + getsets.put(gs.name, gs); + } + } + } + + /** + * Check that a method, member or get-set for a given name. + * + * @param dict of members + * @param name of member + * @return the spec (for further checks) + */ + private static S find(Map dict, String name) { + S spec = dict.get(name); + assertNotNull(spec, () -> name + " not found"); + return spec; + } + + // ---------------------------------------------------------------- + @Test + @DisplayName("has the expected number of methods.") + @SuppressWarnings("static-method") + void numberOfMethods() { assertEquals(12, methods.size(), "number of methods"); } + + /** + * Check that a method with the expected signature is in the method + * table. + * + * @param sig signature + */ + @ParameterizedTest(name = "{0}") + @DisplayName("has a method with signature ...") + @ValueSource(strings = { // + "f0()", // + "m0($self, /)", // + "f3(a, b, c, /)", // + "m3($self, a, b, c, /)", // + "f3pk(a, b, c)", // + "m3pk($self, /, a, b, c)", // + "f3p2(a, b, /, c)", // + "m3p2($self, a, b, /, c)", // + "f2v(a, b, /, *c)", // + "m2v($self, a, b, /, *c)", // + "f2pvk(a, b, /, *c, **d)", // + "m2pvk($self, a, b, /, *c, **d)", // + }) + @SuppressWarnings("static-method") + void checkSignature(String sig) { + int k = sig.indexOf('('); + assert k > 0; + String name = sig.substring(0, k); + String expect = sig.substring(k); + CallableSpec ms = find(methods, name); + ArgParser ap = ms.getParser(); + assertEquals(expect, ap.textSignature()); + } + + // ---------------------------------------------------------------- + @Test + @DisplayName("has the expected number of members.") + @SuppressWarnings("static-method") + void numberOfMembers() { assertEquals(9, members.size(), "number of members"); } + + @ParameterizedTest(name = "{0}") + @DisplayName("has a writable member ...") + @ValueSource(strings = { // + "i", // + "x", // + "text", // name for t + "s", // + "obj", // + }) + @SuppressWarnings("static-method") + void checkWritableMember(String name) { + MemberSpec ms = find(members, name); + assertFalse(ms.readonly, () -> name + " readonly"); + } + + @ParameterizedTest(name = "{0}") + @DisplayName("has a readonly member ...") + @ValueSource(strings = { // + "i2", // + "x2", // + "text2", // + "strhex2", // + }) + @SuppressWarnings("static-method") + void checkReadonlyMember(String name) { + MemberSpec ms = find(members, name); + assertTrue(ms.readonly, () -> name + " readonly"); + } + + @ParameterizedTest(name = "{0}") + @DisplayName("has an optional member ...") + @ValueSource(strings = { // + "s", // + }) + @SuppressWarnings("static-method") + void checkOptionalMember(String name) { + MemberSpec ms = find(members, name); + assertTrue(ms.optional, () -> name + " optional"); + } + + @ParameterizedTest(name = "{0}") + @DisplayName("has a non-optional member ...") + @ValueSource(strings = { // + "i", // + "x", // + "text", // name for t + "obj", // + "i2", // + "x2", // + "text2", // + "strhex2", // + }) + @SuppressWarnings("static-method") + void checkMandatoryMember(String name) { + MemberSpec ms = find(members, name); + assertFalse(ms.optional, () -> name + " optional"); + } + + @ParameterizedTest(name = "{0}") + @DisplayName("has a documented member ...") + @ValueSource(strings = { // + "x", // + }) + @SuppressWarnings("static-method") + void checkDocMember(String name) { + MemberSpec ms = find(members, name); + assertEquals(ms.doc, "Doc string for " + name); + } + + // ---------------------------------------------------------------- + @Test + @DisplayName("has the expected number of get-set attributes.") + @SuppressWarnings("static-method") + void numberOfGetSets() { assertEquals(3, getsets.size(), "number of get-set attributes"); } + + @ParameterizedTest(name = "{0}") + @DisplayName("has a readonly get-set ...") + @ValueSource(strings = { // + "count", // + }) + @SuppressWarnings("static-method") + void checkReadonlyGetSet(String name) { + GetSetSpec gs = find(getsets, name); + assertTrue(gs.readonly(), () -> name + " readonly"); + } + + @ParameterizedTest(name = "{0}") + @DisplayName("has a writable get-set ...") + @ValueSource(strings = { // + "foo", // + "thing", // + }) + @SuppressWarnings("static-method") + void checkWritableGetSet(String name) { + GetSetSpec gs = find(getsets, name); + assertFalse(gs.readonly(), () -> name + " readonly"); + // There must be a setter for each implementation + assertEquals(gs.getters.size(), gs.setters.size(), () -> name + " setter size mismatch"); + } + + @ParameterizedTest(name = "{0}") + @DisplayName("has a non-optional get-set ...") + @ValueSource(strings = { // + "thing", // + }) + @SuppressWarnings("static-method") + void checkMandatoryGetSet(String name) { + GetSetSpec gs = find(getsets, name); + assertTrue(gs.optional(), () -> name + " optional"); + } + + @ParameterizedTest(name = "{0}") + @DisplayName("has an optional get-set ...") + @ValueSource(strings = { // + "thing", // + }) + @SuppressWarnings("static-method") + void checkOptionalGetSet(String name) { + GetSetSpec gs = find(getsets, name); + assertTrue(gs.optional(), () -> name + " optional"); + // There must be a deleter for each implementation + assertEquals(gs.getters.size(), gs.deleters.size(), () -> name + " deleter size mismatch"); + } +} diff --git a/core/src/test/java/org/python/core/UnarySlotWrapperTest.java b/core/src/test/java/org/python/core/UnarySlotWrapperTest.java new file mode 100644 index 000000000..f70538125 --- /dev/null +++ b/core/src/test/java/org/python/core/UnarySlotWrapperTest.java @@ -0,0 +1,246 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import java.math.BigInteger; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +/** + * Test the {@link PyWrapperDescr}s for unary special functions on a + * variety of types. The particular operations are not the focus: we + * are testing the mechanisms for creating and calling slot + * wrappers. + */ +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class UnarySlotWrapperTest extends UnitTestSupport { + + @Nested + @DisplayName("The slot wrapper '__neg__'") + class Slot__neg__ extends SlotWrapperTestBase { + + final String NAME = "__neg__"; + + @Nested + @DisplayName("of 'int' objects") + class OfInt extends UnaryTest { + + @Override + Object expected(Object x) { + // Test material is 32 bit. Maybe BigInteger instead? + return Integer.valueOf(-toInt(x)); + } + + @Override + void check(Object exp, Object r) throws Throwable { checkInt(exp, r); } + + @BeforeEach + void setup() throws AttributeError, Throwable { + // x is Integer, BigInteger, PyLong, Boolean + Integer ix = 42; + super.setup(PyLong.TYPE, NAME, + List.of(ix, BigInteger.valueOf(ix), newPyLong(ix), false, true)); + } + + /** + * As {@link #supports_call()} but with empty keyword array. + */ + @Test + void supports_call_with_keywords() throws Throwable { + for (Object x : getCases()) { + Object exp = expected(x); + checkInt(exp, makeBoundCallKW(x)); + } + } + + /** + * As {@link #supports_bound_call()} but with empty keyword array. + */ + @Test + void supports_bound_call_with_keywords() throws Throwable { + for (Object x : getCases()) { + Object exp = expected(x); + checkInt(exp, makeBoundCallKW(x)); + } + } + } + + @Nested + @DisplayName("of 'bool' objects") + class OfBool extends UnaryTest { + + @Override + Object expected(Boolean x) { return x ? -1 : 0; } + + @Override + void check(Object exp, Object r) throws Throwable { checkInt(exp, r); } + + @BeforeEach + void setup() throws AttributeError, Throwable { + super.setup(PyBool.TYPE, NAME, List.of(false, true)); + } + } + + @Nested + @DisplayName("of 'float' objects") + class OfFloat extends UnaryTest { + + private double exp; + + @Override + Object expected(Object x) { return exp; } + + @Override + void check(Object exp, Object r) throws Throwable { + checkFloat(exp, r); + } + + @BeforeEach + void setup() throws AttributeError, Throwable { + // Invoke for Double, PyFloat + double dx = 42.0; + exp = -dx; + super.setup(PyFloat.TYPE, NAME, + List.of(dx, newPyFloat(dx))); + } + } + } + + @Nested + @DisplayName("The slot wrapper '__repr__'") + class Slot__repr__ extends SlotWrapperTestBase { + + final String NAME = "__repr__"; + + @Nested + @DisplayName("of 'int' objects") + class OfInt extends UnaryTest { + + @Override + String expected(Object x) { return Integer.toString(toInt(x)); } + + @Override + void check(String exp, Object r) throws Throwable { checkStr(exp, r); } + + @BeforeEach + void setup() throws AttributeError, Throwable { + // x is Integer, BigInteger, PyLong but not Boolean + Integer ix = 42; + super.setup(PyLong.TYPE, NAME, List.of(ix, BigInteger.valueOf(ix), newPyLong(ix))); + } + } + + @Nested + @DisplayName("of 'bool' objects") + class OfBool extends UnaryTest { + + @Override + String expected(Boolean x) { return x ? "True" : "False"; } + + @Override + void check(String exp, Object r) throws Throwable { checkStr(exp, r); } + + @BeforeEach + void setup() throws AttributeError, Throwable { + super.setup(PyBool.TYPE, NAME, List.of(false, true)); + } + } + + @Nested + @DisplayName("of 'float' objects") + class OfFloat extends UnaryTest { + + private String exp; + + @Override + String expected(Object x) { return exp; } + + @Override + void check(String exp, Object r) throws Throwable { + checkStr(exp, r); + } + + @BeforeEach + void setup() throws AttributeError, Throwable { + // Invoke for Double, PyFloat + double dx = 42.0; + exp = "42.0"; + super.setup(PyFloat.TYPE, NAME, + List.of(dx, newPyFloat(dx))); + } + } + } + + @Nested + @DisplayName("The slot wrapper '__hash__'") + class Slot__hash__ extends SlotWrapperTestBase { + + final String NAME = "__hash__"; + + @Nested + @DisplayName("of 'int' objects") + class OfInt extends LenTest { + + @Override + Integer expected(Object x) { return toInt(x); } + + @Override + void check(Integer exp, Object r) throws Throwable { checkInt(exp, r); } + + @BeforeEach + void setup() throws AttributeError, Throwable { + // x is Integer, BigInteger, PyLong, Boolean + Integer ix = 42; + super.setup(PyLong.TYPE, NAME, + List.of(ix, BigInteger.valueOf(ix), newPyLong(ix), false, true)); + } + } + + @Nested + @DisplayName("of 'bool' objects") + class OfBool extends LenTest { + + @Override + Integer expected(Boolean x) { return x ? 1 : 0; } + + @Override + void check(Integer exp, Object r) throws Throwable { checkInt(exp, r); } + + @BeforeEach + void setup() throws AttributeError, Throwable { + super.setup(PyBool.TYPE, NAME, List.of(false, true)); + } + } + + // XXX Disabled until float.__hash__ implemented + // @Nested + @DisplayName("of 'float' objects") + class OfFloat extends LenTest { + + private Integer exp; + + @Override + Integer expected(Object x) { return exp; } + + @Override + void check(Integer exp, Object r) throws Throwable { + checkInt(exp, r); + } + + @BeforeEach + void setup() throws AttributeError, Throwable { + // Invoke for Double, PyFloat + double dx = 42.0; + exp = 42; // since equal in Python + super.setup(PyFloat.TYPE, NAME, + List.of(dx, newPyFloat(dx))); + } + } + } +} diff --git a/core/src/test/java/org/python/core/UnitTestSupport.java b/core/src/test/java/org/python/core/UnitTestSupport.java new file mode 100644 index 000000000..fcea52f0d --- /dev/null +++ b/core/src/test/java/org/python/core/UnitTestSupport.java @@ -0,0 +1,309 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import java.math.BigInteger; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.util.function.Supplier; + +import org.junit.jupiter.api.function.Executable; +import org.python.base.InterpreterError; + +/** + * A base class for unit tests that defines some common convenience + * functions for which the need recurs. A unit test that extends + * this base will initialise the type system before running. + */ +public class UnitTestSupport { + + /** The {@link PyType} {@code object}. */ + /* + * This is needed to initialise the type system in a controlled way. + * Java static initialisation of PyType brings into being the + * critical built-in types in a carefully chosen order. If we use a + * Python type out of the blue (e.g. call a PyLong static method), + * initialising that class to use it causes the type system to + * initialise, but the type that caused it will complete its + * initialisation last. This subverts the careful ordering of the + * Python types in PyType. + */ + static PyType OBJECT = PyType.OBJECT_TYPE; + + /** + * Convert test value to Java {@code int} (avoiding + * {@link PyLong#asInt(Object)}). + * + * @param v to convert + * @return converted value + * @throws ArithmeticError if out of range + * @throws IllegalArgumentException if wrong type + */ + static int toInt(Object v) throws ArithmeticError, IllegalArgumentException { + if (v instanceof Integer) + return ((Integer)v).intValue(); + else if (v instanceof BigInteger) + return ((BigInteger)v).intValueExact(); + else if (v instanceof PyLong) + return ((PyLong)v).value.intValue(); + else if (v instanceof Boolean) + return (Boolean)v ? 1 : 0; + + throw new IllegalArgumentException(String.format("cannot convert '%s' to int", v)); + } + + /** + * Convert test value to Java {@code String} (avoiding + * {@code __str__} for {@code PyUnicode} and non-crafted types). + * + * @param v to convert + * @return converted value + */ + static String toString(Object v) { + if (v instanceof String) + return (String)v; + else if (v instanceof PyUnicode) + return ((PyUnicode)v).toString(); + else + return v.toString(); + } + + /** + * Force creation of an actual {@link PyLong} + * + * @param value to assign + * @return from this value. + */ + static PyLong newPyLong(BigInteger value) { return new PyLong(PyLong.TYPE, value); } + + /** + * Force creation of an actual {@link PyLong} from Object + * + * @param value to assign + * @return from this value. + */ + static PyLong newPyLong(Object value) { + BigInteger vv = BigInteger.ZERO; + try { + vv = PyLong.asBigInteger(value); + } catch (Throwable e) { + e.printStackTrace(); + fail("Failed to create a PyLong"); + } + return newPyLong(vv); + } + + /** + * Convert test value to double (avoiding + * {@link PyFloat#asDouble(Object)}). + * + * @param v to convert + * @return converted value + * @throws IllegalArgumentException if wrong type + */ + static double toDouble(Object v) { + if (v instanceof Double) + return ((Double)v).doubleValue(); + else if (v instanceof PyFloat) + return ((PyFloat)v).value; + else if (v instanceof Integer) + return ((Integer)v).intValue(); + else if (v instanceof BigInteger) + return ((BigInteger)v).doubleValue(); + else if (v instanceof PyLong) + return ((PyLong)v).value.doubleValue(); + else if (v instanceof Boolean) + return (Boolean)v ? 1. : 0.; + + throw new IllegalArgumentException(String.format("cannot convert '%s' to double", v)); + } + + /** + * Force creation of an actual {@link PyFloat} + * + * @param value to wrap + * @return from this value. + */ + static PyFloat newPyFloat(double value) { return new PyFloat(PyFloat.TYPE, value); } + + /** + * Force creation of an actual {@link PyFloat} from Object + * + * @param value to wrap + * @return from this value. + */ + static PyFloat newPyFloat(Object value) { + double vv = 0.0; + try { + vv = toDouble(value); + } catch (Throwable e) { + fail("Failed to create a PyFloat"); + } + return newPyFloat(toDouble(vv)); + } + + /** + * Force creation of an actual {@link PyUnicode} from a + * {@code String} to be treated as in the usual Java encoding. + * Surrogate pairs will be interpreted as their characters, unless + * lone. + * + * @param value to wrap + * @return from this value. + */ + static PyUnicode newPyUnicode(String value) { return new PyUnicode(PyUnicode.TYPE, value); } + + /** + * Force creation of an actual {@link PyUnicode} from an array of + * code points, which could include surrogates, even in pairs. + * + * @param value the code points + * @return from this value. + */ + static PyUnicode newPyUnicode(int[] value) { + return new PyUnicode(PyUnicode.TYPE, value); + } + + /** + * The object {@code o} is equal to the expected value according to + * Python (e.g. {@code True == 1} and strings may be equal even if + * one is {@code String} and the other {@link PyUnicode}). An + * unchecked exception may be thrown if the comparison goes badly + * enough. + * + * @param expected value + * @param o to test + */ + public static void assertPythonEquals(Object expected, Object o) { + if (pythonEquals(expected, o)) { + return; + } else { + // This saves making a message ourselves + assertEquals(expected, o); + } + } + + /** + * As {@link #assertPythonEquals(Object, Object)} but with a message + * supplied by the caller. + * + * @param expected value + * @param o to test + * @param messageSupplier supplies the message seen in failures + */ + public static void assertPythonEquals(Object expected, Object o, + Supplier messageSupplier) { + if (pythonEquals(expected, o)) { + return; + } else { + fail(messageSupplier); + } + } + + /** + * Test whether the object {@code o} is equal to the expected value + * according to Python (e.g. {@code True == 1} and strings may be + * equal even if one is a {@link PyUnicode}. An unchecked exception + * may be thrown if the comparison goes badly enough. + * + * @param x value + * @param o to test + */ + private static boolean pythonEquals(Object x, Object o) { + try { + return Abstract.richCompareBool(x, o, Comparison.EQ); + } catch (RuntimeException | Error e) { + // Let unchecked exception fly + throw e; + } catch (Throwable t) { + // Wrap checked exception + throw new InterpreterError(t); + } + } + + /** + * The Python type of {@code o} is exactly the one expected. + * + * @param expected type + * @param o to test + */ + public static void assertPythonType(PyType expected, Object o) { + assertTrue(expected.checkExact(o), + () -> String.format("Java %s not a Python '%s'", + o.getClass().getSimpleName(), expected.name)); + } + + /** + * Assertion for test that a result is a string beginning a certain + * way. + * + * @param expected prefix + * @param actual result to match + */ + static void assertStartsWith(String expected, String actual) { + assertTrue(actual.startsWith(expected), + "should start with " + expected); + } + + /** + * Invoke an action expected to raise a Python exception and check + * the message. The return value may be the subject of further + * assertions. + * + * @param type of exception + * @param expected type of exception + * @param action to invoke + * @param expectedMessage expected message text + * @return what was thrown + */ + static T assertRaises(Class expected, + Executable action, String expectedMessage) { + T t = assertThrows(expected, action); + assertEquals(expectedMessage, t.getMessage()); + return t; + } + + /** + * Find the (Gradle) build directory by ascending the file structure + * from the path to this class as a resource. Several files we need + * in tests are to be found at a well-defined location relative to + * the build directory. + * + * This may be used from classes build by the IDE, as long as a + * Gradle build has been run too. * + * + * @return the build directory + */ + public static Path buildDirectory() { + // Start at the resources for this class + Class c = UnitTestSupport.class; + try { + URI rsc = c.getResource("").toURI(); + Path path = Path.of(rsc); + // Navigate up by the length of the package name + String pkg = c.getPackage().getName(); + int k = -1; + do { path = path.getParent(); } while ((k = pkg.indexOf('.', k + 1)) >= 0); + + // path is now the folder that contains project classes + // System.err.println(" ... contains classes"); + + // Continue up until path/build exists + while ((path = path.getParent()) != null) { + Path buildPath = path.resolve("build"); + if (buildPath.toFile().isDirectory()) { return buildPath; } + } + + // We reached the root: maybe we did a "clean" + throw new InterpreterError("build directory not found from %s", rsc.toString()); + } catch (URISyntaxException e) { + throw new InterpreterError(e); + } + } +} diff --git a/core/src/test/java/org/python/core/stringlib/FormatParsingTest.java b/core/src/test/java/org/python/core/stringlib/FormatParsingTest.java new file mode 100644 index 000000000..1df281bc3 --- /dev/null +++ b/core/src/test/java/org/python/core/stringlib/FormatParsingTest.java @@ -0,0 +1,342 @@ +// Copyright (c)2021 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.core.stringlib; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import org.python.core.PyType; +import org.python.core.stringlib.InternalFormat.Spec; + +/** + * Tests of {@code stringlib} support for formatting. These facilities + * lie behind the {@code __format__} methods of built-in types, and + * methods exposed by Python module {@code _string}, for example. + */ +class FormatParsingTest { + + /** + * Test constructing a specification {@link Spec} from a format + * string. Note that a parsed specification is not a Python type so + * it reports errors usiung Java-only exceptions that clients may + * convert to {@code ValueError}. + */ + @Nested + @DisplayName("An InternalFormat.Spec correctly") + class InternalFormatSpecTest { + + @Test + @DisplayName("interprets ''") + public void fromEmpty() { + InternalFormat.Spec spec = InternalFormat.fromText(""); + assertEquals(Spec.NONE, spec.type); + } + + @Test + @DisplayName("interprets 'x'") + public void fromPlain() { + InternalFormat.Spec spec = InternalFormat.fromText("x"); + assertFalse(Spec.specified(spec.align)); + assertFalse(Spec.specified(spec.fill)); + assertFalse(Spec.specified(spec.width)); + assertFalse(Spec.specified(spec.precision)); + assertEquals('x', spec.type); + } + + @Test + @DisplayName("interprets ' type of exception + * @param expected type of exception + * @param spec to parse + * @param expectedMessage expected message text + * @return what was thrown + */ + private T assertParseError( + Class expected, String spec, + String expectedMessage) { + T t = assertThrows(expected, + () -> InternalFormat.fromText(spec)); + assertEquals(expectedMessage, t.getMessage()); + return t; + } + } + + /** + * Test that the correct "chunks" are parsed from a format string, + * and presented as a {@link MarkupIterator} + * ({@code formatteriterator} in Python). We make this test using + * the Java API here, because it is statically-typed, and Python + * iteration need not be working. In Python it produces + * {@code tuple}s. + */ + @Nested + @DisplayName("A stringlib.MarkupIterator") + class MarkupIteratorTest { + + @Test + @DisplayName("is a Python formatteriterator") + void hasPythonType() { + MarkupIterator it = new MarkupIterator("abc"); + PyType type = it.getType(); + assertEquals("formatteriterator", type.getName()); + } + + @Test + @DisplayName("parses the literal 'abc'") + void parsesLiteral() { + MarkupIterator it = new MarkupIterator("abc"); + assertEquals("abc", it.nextChunk().literalText); + assertNull(it.nextChunk()); + } + + @Test + @DisplayName("parses 'First, thou shalt count to {0}'") + void parsesTerminalIndex() { + MarkupIterator it = new MarkupIterator( + "First, thou shalt count to {0}"); + MarkupIterator.Chunk chunk = it.nextChunk(); + assertEquals("First, thou shalt count to ", + chunk.literalText); + assertEquals("0", chunk.fieldName); + assertNull(it.nextChunk()); + } + + @Test + @DisplayName("parses 'Weight in tons {0.weight!r:s}'") + void parsesTerminalAttribute() { + MarkupIterator it = + new MarkupIterator("Weight in tons {0.weight!r:s}"); + MarkupIterator.Chunk chunk = it.nextChunk(); + assertEquals("Weight in tons ", chunk.literalText); + assertEquals("0.weight", chunk.fieldName); + assertEquals("r", chunk.conversion); + assertEquals("s", chunk.formatSpec); + } + + @Test + @DisplayName("parses '{{' and '}}'") + void parsesEscapedBraces() { + MarkupIterator.Chunk chunk = + (new MarkupIterator("{{")).nextChunk(); + assertEquals("{", chunk.literalText); + + chunk = (new MarkupIterator("}}")).nextChunk(); + assertEquals("}", chunk.literalText); + + chunk = (new MarkupIterator("{{}}")).nextChunk(); + assertEquals("{}", chunk.literalText); + } + + @Test + @DisplayName("parses '{0:.{1}}'") + void parsesNestedReference() { + MarkupIterator.Chunk chunk = + (new MarkupIterator("{0:.{1}}")).nextChunk(); + assertEquals("0", chunk.fieldName); + assertEquals(".{1}", chunk.formatSpec); + assertTrue(chunk.formatSpecNeedsExpanding); + } + + @Test + @DisplayName("rejects conversions '{!}' and '{!rrrr}'") + void rejectsBadConversion() { + assertMarkupError("{!}", + "end of format while looking for conversion specifier"); + assertMarkupError("{!rrrr}", + "expected ':' after conversion specifier"); + } + + @Test + @DisplayName("rejects unbalanced '{' and '}'") + void rejectsUnbalencedBraces() { + assertMarkupError("{", + "Single '{' encountered in format string"); + assertMarkupError("}", + "Single '}' encountered in format string"); + } + + private void assertMarkupError(String markup, String expected) { + MarkupIterator it = new MarkupIterator(markup); + Exception e = assertThrows(IllegalArgumentException.class, + () -> it.nextChunk()); + assertEquals(expected, e.getMessage()); + } + } + + /** + * Test that field names as they might appear in format statements + * are correctly broken up by a {@link FieldNameIterator} + * ({@code fieldnameiterator} in Python). + * + * We make this test using the Java API here, because it is + * statically-typed, and Python iteration need not be working. In + * Python it produces a {@code tuple} containing a first element and + * iterator of succeeding elements, each described by a tuple. + */ + @Nested + @DisplayName("A stringlib.FieldNameIterator") + class FieldNameIteratorTest { + + @Test + @DisplayName("is a Python fieldnameiterator") + void hasPythonType() { + FieldNameIterator it = new FieldNameIterator("abc"); + PyType type = it.getType(); + assertEquals("fieldnameiterator", type.getName()); + } + + @Test + @DisplayName("parses a simple name") + void simpleName() { + FieldNameIterator it = new FieldNameIterator("abc"); + assertEquals("abc", it.head()); + assertNull(it.nextChunk()); + } + + @Test + @DisplayName("parses a simple number") + void simpleNumber() { + FieldNameIterator it = new FieldNameIterator("3"); + assertEquals(3, it.head()); + assertNull(it.nextChunk()); + } + + @Test + @DisplayName("parses 'abc[0]'") + void nameIndex() { + FieldNameIterator it = new FieldNameIterator("abc[0]"); + assertEquals("abc", it.head()); + FieldNameIterator.Chunk chunk = it.nextChunk(); + assertEquals(0, chunk.value); + assertFalse(chunk.is_attr); + assertNull(it.nextChunk()); + } + + @Test + @DisplayName("parses 'abc.def'") + void nameDotName() { + FieldNameIterator it = new FieldNameIterator("abc.def"); + assertEquals("abc", it.head()); + FieldNameIterator.Chunk chunk = it.nextChunk(); + assertEquals("def", chunk.value); + assertTrue(chunk.is_attr); + assertNull(it.nextChunk()); + } + + @Test + @DisplayName("parses 'a[2].b'") + void nameIndexDotName() { + FieldNameIterator it = new FieldNameIterator("a[2].b"); + FieldNameIterator.Chunk chunk; + assertEquals("a", it.head()); + chunk = it.nextChunk(); + assertEquals(2, chunk.value); + assertFalse(chunk.is_attr); + chunk = it.nextChunk(); + assertEquals("b", chunk.value); + assertTrue(chunk.is_attr); + assertNull(it.nextChunk()); + } + + @Test + @DisplayName("parses '1.a[2].b[3]'") + void numberDotNameIndexDotNameIndex() { + FieldNameIterator it = new FieldNameIterator("1.a[2].b[3]"); + FieldNameIterator.Chunk chunk; + assertEquals(1, it.head()); + chunk = it.nextChunk(); + assertEquals("a", chunk.value); + chunk = it.nextChunk(); + assertEquals(2, chunk.value); + assertFalse(chunk.is_attr); + chunk = it.nextChunk(); + assertEquals("b", chunk.value); + assertTrue(chunk.is_attr); + chunk = it.nextChunk(); + assertEquals(3, chunk.value); + assertFalse(chunk.is_attr); + assertNull(it.nextChunk()); + } + } +} diff --git a/core/src/test/java/org/python/modules/marshalTest.java b/core/src/test/java/org/python/modules/marshalTest.java new file mode 100644 index 000000000..7d3218620 --- /dev/null +++ b/core/src/test/java/org/python/modules/marshalTest.java @@ -0,0 +1,572 @@ +// Copyright (c)2022 Jython Developers. +// Licensed to PSF under a contributor agreement. +package org.python.modules; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.math.BigInteger; +import java.util.List; +import java.util.stream.Stream; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import org.python.core.Py; +import org.python.core.PyBytes; +import org.python.core.PyDict; +import org.python.core.PyList; +import org.python.core.PySequence; +import org.python.core.PyTuple; +import org.python.core.PyType; +import org.python.core.StopIteration; +import org.python.core.UnitTestSupport; +import org.python.modules.marshal.BytesReader; +import org.python.modules.marshal.BytesWriter; +import org.python.modules.marshal.Reader; +import org.python.modules.marshal.StreamReader; +import org.python.modules.marshal.StreamWriter; +import org.python.modules.marshal.Writer; +import org.python.core.stringlib.ByteArrayBuilder; + +/** + * Test reading (and to some extent writing) objects using the + * marshal module. We are interested for now only in reading code + * objects and their reference results. + * + * We test the Java API only, consistent with our interest in + * reading code for execution. + */ +@DisplayName("Read and write objects with marshal") +class marshalTest extends UnitTestSupport { + + /** + * Base of tests that read or write elementary values where a + * reference is available serialised by CPython. + */ + abstract static class AbstractElementTest { + + /** + * Test cases for serialising 16-bit ints. + * + * @return the examples + */ + static Stream int16() { + return Stream.of( // + intArguments(0, bytes(0x00, 0x00)), // + intArguments(1, bytes(0x01, 0x00)), // + intArguments(-42, bytes(0xd6, 0xff)), + intArguments(Short.MAX_VALUE, bytes(0xff, 0x7f))); + } + + /** + * Test cases for serialising 32-bit ints. + * + * @return the examples + */ + static Stream int32() { + return Stream.of( // + intArguments(0, bytes(0x00, 0x00, 0x00, 0x00)), + intArguments(1, bytes(0x01, 0x00, 0x00, 0x00)), + intArguments(-42, bytes(0xd6, 0xff, 0xff, 0xff)), + intArguments(Integer.MAX_VALUE, bytes(0xff, 0xff, 0xff, 0x7f))); + } + + /** + * Test cases for serialising 64-bit ints. + * + * @return the examples + */ + static Stream int64() { + return Stream.of( // + longArguments(0, bytes(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)), + longArguments(1, bytes(0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)), + longArguments(-42, bytes(0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff)), + longArguments(7450580596923828125L, + bytes(0x9d, 0x07, 0x10, 0xfa, 0x93, 0xc7, 0x65, 0x67)), + longArguments(Long.MAX_VALUE, + bytes(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f))); + } + + /** + * Test cases for serialising {@code BigInteger}s. + * + * @return the examples + */ + static Stream bigint() { + return Stream.of( // + arguments(new BigInteger("17557851463681"), // + bytes(0x03, 0x00, 0x00, 0x00, 0x01, 0x60, 0xff, 0x02, 0xe0, 0x3f)), + arguments(new BigInteger("35184372088832"), // + bytes(0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00)), + arguments(new BigInteger("-2232232135326160725639168"), // + bytes(0xfa, 0xff, 0xff, 0xff, 0x00, 0x00, 0xfd, 0x20, 0xa7, 0x39, 0x4b, + 0x5f, 0x18, 0x0b, 0x3b, 0x00))); + } + + /** + * Wrap a {@code byte}, {@code short} or {@code int} expected value + * and its marshalled form as a arguments for a test. + * + * @param expected result + * @param bytes containing value to decode + * @return arguments for the test + */ + private static Arguments intArguments(int expected, byte[] bytes) { + return arguments(expected, bytes); + } + + /** + * Wrap a {@code long} expected value and its marshalled form as a + * arguments for a test. + * + * @param expected result + * @param bytes containing value to decode + * @return arguments for the test + */ + private static Arguments longArguments(long expected, byte[] bytes) { + assert bytes.length == 8; + return arguments(expected, bytes); + } + } + + /** + * Tests reading from a {@code ByteBuffer}, which is also how we + * shall address objects with the Python buffer protocol + * ({@link PyBytes} etc.), and native {@code byte[]}. + */ + @Nested + @DisplayName("Read elementary values from bytes") + class ReadBytesElementary extends AbstractElementTest { + + @DisplayName("r.readShort()") + @ParameterizedTest(name = "r.readShort() = {0}") + @MethodSource("int16") + void int16read(Integer expected, byte[] b) { + Reader r = new BytesReader(b); + assertEquals(expected, r.readShort()); + } + + @DisplayName("r.readInt()") + @ParameterizedTest(name = "r.readInt() = {0}") + @MethodSource("int32") + void int32read(Integer expected, byte[] b) { + Reader r = new BytesReader(b); + assertEquals(expected, r.readInt()); + } + + @DisplayName("r.readLong()") + @ParameterizedTest(name = "r.readInt() = {0}") + @MethodSource("int64") + void int64read(Long expected, byte[] b) { + Reader r = new BytesReader(b); + assertEquals(expected, r.readLong()); + } + + @DisplayName("r.readBigInteger()") + @ParameterizedTest(name = "r.readBigInteger() = {0}") + @MethodSource("bigint") + void bigintread(BigInteger expected, byte[] b) { + Reader r = new BytesReader(b); + assertEquals(expected, r.readBigInteger()); + } + } + + /** + * Tests reading elementary values from an {@code InputStream}, + * which is also how we shall address file-like objects in Python, + * and native Java input streams. + */ + @Nested + @DisplayName("Read elementary values from a stream") + class ReadStreamElementary extends AbstractElementTest { + + @DisplayName("r.readShort()") + @ParameterizedTest(name = "r.readShort() = {0}") + @MethodSource("int16") + void int16read(Integer expected, byte[] b) { + Reader r = new StreamReader(new ByteArrayInputStream(b)); + assertEquals(expected, r.readShort()); + } + + @DisplayName("r.readInt()") + @ParameterizedTest(name = "r.readInt() = {0}") + @MethodSource("int32") + void int32read(Integer expected, byte[] b) { + Reader r = new StreamReader(new ByteArrayInputStream(b)); + assertEquals(expected, r.readInt()); + } + + @DisplayName("r.readLong()") + @ParameterizedTest(name = "r.readInt() = {0}") + @MethodSource("int64") + void int64read(Long expected, byte[] b) { + Reader r = new StreamReader(new ByteArrayInputStream(b)); + assertEquals(expected, r.readLong()); + } + + @DisplayName("r.readBigInteger()") + @ParameterizedTest(name = "r.readBigInteger() = {0}") + @MethodSource("bigint") + void bigintread(BigInteger expected, byte[] b) { + Reader r = new StreamReader(new ByteArrayInputStream(b)); + assertEquals(expected, r.readBigInteger()); + } + } + + /** + * Tests writing to a {@code ByteArrayBuilder}, which is how we + * create a {@link PyBytes} serialising an object. In the test, we + * recover a native {@code byte[]} to compare with the expected + * bytes. + */ + @Nested + @DisplayName("Write elementary values to bytes") + class WriteBytesElementary extends AbstractElementTest { + + @DisplayName("w.writeShort()") + @ParameterizedTest(name = "w.writeShort({0})") + @MethodSource("int16") + void int16write(Integer v, byte[] expected) { + ByteArrayBuilder b = new ByteArrayBuilder(2); + Writer w = new BytesWriter(b, 4); + w.writeShort(v); + assertArrayEquals(expected, b.take()); + } + + @DisplayName("w.writeInt()") + @ParameterizedTest(name = "w.writeInt({0})") + @MethodSource("int32") + void int32write(Integer v, byte[] expected) { + ByteArrayBuilder b = new ByteArrayBuilder(4); + Writer w = new BytesWriter(b, 4); + w.writeInt(v); + assertArrayEquals(expected, b.take()); + } + + @DisplayName("w.writeLong()") + @ParameterizedTest(name = "w.writeInt({0})") + @MethodSource("int64") + void int64write(Long v, byte[] expected) { + ByteArrayBuilder b = new ByteArrayBuilder(8); + Writer w = new BytesWriter(b, 4); + w.writeLong(v); + assertArrayEquals(expected, b.take()); + } + + @DisplayName("w.writeBigInteger()") + @ParameterizedTest(name = "w.writeBigInteger({0})") + @MethodSource("bigint") + void bigintwrite(BigInteger v, byte[] expected) { + ByteArrayBuilder b = new ByteArrayBuilder(); + Writer w = new BytesWriter(b, 4); + w.writeBigInteger(v); + assertArrayEquals(expected, b.take()); + } + } + + /** + * Tests writing elementary values to an {@code OutputStream}, which + * is also how we shall address file-like objects in Python, and + * native Java input streams. In the test, we write to a + * {@link ByteArrayOutputStream} and recover a native {@code byte[]} + * to compare with the expected bytes. + */ + @Nested + @DisplayName("Write elementary values to a stream") + class WriteStreamElementary extends AbstractElementTest { + + @DisplayName("w.writeShort()") + @ParameterizedTest(name = "w.writeShort({0})") + @MethodSource("int16") + void int16write(Integer v, byte[] expected) { + ByteArrayOutputStream b = new ByteArrayOutputStream(); + Writer w = new StreamWriter(b, 4); + w.writeShort(v); + assertArrayEquals(expected, b.toByteArray()); + } + + @DisplayName("w.writeInt()") + @ParameterizedTest(name = "w.writeInt({0})") + @MethodSource("int32") + void int32write(Integer v, byte[] expected) { + ByteArrayOutputStream b = new ByteArrayOutputStream(); + Writer w = new StreamWriter(b, 4); + w.writeInt(v); + assertArrayEquals(expected, b.toByteArray()); + } + + @DisplayName("w.writeLong()") + @ParameterizedTest(name = "w.writeInt({0})") + @MethodSource("int64") + void int64write(Long v, byte[] expected) { + ByteArrayOutputStream b = new ByteArrayOutputStream(); + Writer w = new StreamWriter(b, 4); + w.writeLong(v); + assertArrayEquals(expected, b.toByteArray()); + } + + @DisplayName("w.writeBigInteger()") + @ParameterizedTest(name = "w.writeBigInteger({0})") + @MethodSource("bigint") + void bigintwrite(BigInteger v, byte[] expected) { + ByteArrayOutputStream b = new ByteArrayOutputStream(); + Writer w = new StreamWriter(b, 4); + w.writeBigInteger(v); + assertArrayEquals(expected, b.toByteArray()); + } + } + + /** Base of tests that read objects serialised by CPython. */ + abstract static class AbstractLoadTest { + + /** + * Provide a stream of examples as parameter sets to the tests. In + * each example, the expression is given (as documentation only) + * that was originally evaluated by CPython, and the serialisation + * of the result as bytes. The final argument is an equivalent + * expression within this implementation of Python. Deserialising + * the bytes should be equal to this argument. + *

+ * The examples were generated programmatically from a list of the + * expressions using the script at + * {@code ~/build-tools/python/tool/marshal_test.py}. + * + * @return the examples for object loading tests. + */ + static Stream objectLoadExamples() { + return Stream.of( // + loadExample("None", // tc='N' + bytes(0x4e), Py.None), + loadExample("False", // tc='F' + bytes(0x46), false), + loadExample("True", // tc='T' + bytes(0x54), true), + loadExample("0", // tc='i' + bytes(0xe9, 0x00, 0x00, 0x00, 0x00), 0), + loadExample("1", // tc='i' + bytes(0xe9, 0x01, 0x00, 0x00, 0x00), 1), + loadExample("-42", // tc='i' + bytes(0xe9, 0xd6, 0xff, 0xff, 0xff), -42), + loadExample("2**31-1", // tc='i' + bytes(0xe9, 0xff, 0xff, 0xff, 0x7f), 2147483647), + loadExample("2047**4", // tc='l' + bytes(0xec, 0x03, 0x00, 0x00, 0x00, 0x01, 0x60, 0xff, 0x02, 0xe0, 0x3f), + new BigInteger("17557851463681")), + loadExample("2**45", // tc='l' + bytes(0xec, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00), + new BigInteger("35184372088832")), + loadExample("-42**15", // tc='l' + bytes(0xec, 0xfa, 0xff, 0xff, 0xff, 0x00, 0x00, 0xfd, 0x20, 0xa7, 0x39, + 0x4b, 0x5f, 0x18, 0x0b, 0x3b, 0x00), + new BigInteger("-2232232135326160725639168")), + loadExample("0.", // tc='g' + bytes(0xe7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 0x0.0p+0), + loadExample("1.", // tc='g' + bytes(0xe7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f), + 0x1.0000000000000p+0), + loadExample("-42.", // tc='g' + bytes(0xe7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0xc0), + -0x1.5000000000000p+5), + loadExample("1e42", // tc='g' + bytes(0xe7, 0x61, 0xa0, 0xe0, 0xc4, 0x78, 0xf5, 0xa6, 0x48), + 0x1.6f578c4e0a061p+139), + loadExample("1.8e300", // tc='g' + bytes(0xe7, 0xa6, 0x36, 0xcd, 0xe0, 0x9c, 0x80, 0x45, 0x7e), + 0x1.5809ce0cd36a6p+997), + loadExample("1.12e-308", // tc='g' + bytes(0xe7, 0xd7, 0xb2, 0x64, 0x01, 0xbd, 0x0d, 0x08, 0x00), + 0x0.80dbd0164b2d7p-1022), + loadExample("float.fromhex('0x1.fffffffffffffp1023')", // tc='g' + bytes(0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0x7f), + 0x1.fffffffffffffp+1023), + loadExample("float.fromhex('-0x1.p-1022')", // tc='g' + bytes(0xe7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x80), + -0x1.0000000000000p-1022), + loadExample("float('inf')", // tc='g' + bytes(0xe7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x7f), + Double.POSITIVE_INFINITY), + loadExample("float('-inf')", // tc='g' + bytes(0xe7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0xff), + Double.NEGATIVE_INFINITY), + loadExample("float('nan')", // tc='g' + bytes(0xe7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f), + Double.NaN), + loadExample("'hello'", // tc='Z' + bytes(0xda, 0x05, 0x68, 0x65, 0x6c, 0x6c, 0x6f), "hello"), + loadExample("'sæll'", // tc='t' + bytes(0x74, 0x05, 0x00, 0x00, 0x00, 0x73, 0xc3, 0xa6, 0x6c, 0x6c), + "sæll"), + loadExample("'🐍'", // tc='t' + bytes(0x74, 0x04, 0x00, 0x00, 0x00, 0xf0, 0x9f, 0x90, 0x8d), "🐍"), + loadExample("()", // tc=')' + bytes(0xa9, 0x00), Py.tuple()), + loadExample("(sa,sa,sa)", // tc=')' + bytes(0xa9, 0x03, 0xda, 0x05, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x72, 0x01, + 0x00, 0x00, 0x00, 0x72, 0x01, 0x00, 0x00, 0x00), + Py.tuple("hello", "hello", "hello")), + loadExample("(sb,sb,t,t)", // tc=')' + bytes(0xa9, 0x04, 0xf5, 0x05, 0x00, 0x00, 0x00, 0x73, 0xc3, 0xa6, 0x6c, + 0x6c, 0x72, 0x01, 0x00, 0x00, 0x00, 0xa9, 0x03, 0xe9, 0x01, + 0x00, 0x00, 0x00, 0xe9, 0x02, 0x00, 0x00, 0x00, 0xe9, 0x03, + 0x00, 0x00, 0x00, 0x72, 0x02, 0x00, 0x00, 0x00), + Py.tuple("sæll", "sæll", Py.tuple(1, 2, 3), Py.tuple(1, 2, 3))), + loadExample("[]", // tc='[' + bytes(0xdb, 0x00, 0x00, 0x00, 0x00), new PyList(List.of())), + loadExample("[sa]", // tc='[' + bytes(0xdb, 0x01, 0x00, 0x00, 0x00, 0xda, 0x05, 0x68, 0x65, 0x6c, 0x6c, + 0x6f), + new PyList(List.of("hello"))), + loadExample("[sa, 2, t]", // tc='[' + bytes(0xdb, 0x03, 0x00, 0x00, 0x00, 0xda, 0x05, 0x68, 0x65, 0x6c, 0x6c, + 0x6f, 0xe9, 0x02, 0x00, 0x00, 0x00, 0xa9, 0x03, 0xe9, 0x01, + 0x00, 0x00, 0x00, 0x72, 0x02, 0x00, 0x00, 0x00, 0xe9, 0x03, + 0x00, 0x00, 0x00), + new PyList(List.of("hello", 2, Py.tuple(1, 2, 3)))), + loadExample("{}", // tc='{' + bytes(0xfb, 0x30), PyDict.fromKeyValuePairs()), + loadExample("{sa:sb}", // tc='{' + bytes(0xfb, 0xda, 0x05, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0xf5, 0x05, 0x00, + 0x00, 0x00, 0x73, 0xc3, 0xa6, 0x6c, 0x6c, 0x30), + PyDict.fromKeyValuePairs(Py.tuple("hello", "sæll"))), + loadExample("dict(python=su)", // tc='{' + bytes(0xfb, 0xda, 0x06, 0x70, 0x79, 0x74, 0x68, 0x6f, 0x6e, 0xf5, 0x04, + 0x00, 0x00, 0x00, 0xf0, 0x9f, 0x90, 0x8d, 0x30), + PyDict.fromKeyValuePairs(Py.tuple("python", "🐍"))), + loadExample("{sa:1, sb:2, su:t}", // tc='{' + bytes(0xfb, 0xda, 0x05, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0xe9, 0x01, 0x00, + 0x00, 0x00, 0xf5, 0x05, 0x00, 0x00, 0x00, 0x73, 0xc3, 0xa6, + 0x6c, 0x6c, 0xe9, 0x02, 0x00, 0x00, 0x00, 0xf5, 0x04, 0x00, + 0x00, 0x00, 0xf0, 0x9f, 0x90, 0x8d, 0xa9, 0x03, 0x72, 0x02, + 0x00, 0x00, 0x00, 0x72, 0x04, 0x00, 0x00, 0x00, 0xe9, 0x03, + 0x00, 0x00, 0x00, 0x30), + PyDict.fromKeyValuePairs(Py.tuple("hello", 1), Py.tuple("sæll", 2), + Py.tuple("🐍", Py.tuple(1, 2, 3)))), + + // Hand-generated examples + + loadExample("StopIteration", // tc='S' + bytes('S'), StopIteration.TYPE)); + + } + + /** + * A list referring to itself: {@code listself = [1, listself, 3]}, + * used for tesing the marshalling of a self-referential list. + */ + static final PyList LISTSELF = listself(); + + /** @return {@code listself = [1, listself, 3]} */ + private static PyList listself() { + PyList list = new PyList(List.of(1, 2, 3)); + list.set(1, list); + return list; + } + + /** The result of marshalling {@code (listself,4)}. */ + static final byte[] LISTSELF_BYTES = bytes( // + 0xa9, 0x02, 0xdb, 0x03, 0x00, 0x00, 0x00, 0xe9, 0x01, 0x00, 0x00, 0x00, 0x72, 0x01, + 0x00, 0x00, 0x00, 0xe9, 0x03, 0x00, 0x00, 0x00, 0xe9, 0x04, 0x00, 0x00, 0x00); + + /** + * Construct a set of test arguments for a single test of load, and + * a reference result provided by the caller. + * + * @param expression to identify the test + * @param bytes to deserialise + * @param expected results to expect + */ + private static Arguments loadExample(String name, byte[] bytes, Object expected) { + return arguments(name, bytes, expected); + } + } + + /** + * Tests reading a complete object from a {@code PyBytes}, using the + * Python buffer protocol. + */ + @Nested + @DisplayName("Read object from bytes-like") + class MarshalLoadBytesTest extends AbstractLoadTest { + + @DisplayName("loads(b)") + @ParameterizedTest(name = "loads(b) = {0}") + @MethodSource("objectLoadExamples") + void loadsTest(String name, byte[] bytes, Object expected) { + Object r = marshal.loads(new PyBytes(bytes)); + assertPythonType(PyType.of(expected), r); + assertPythonEquals(expected, r); + } + + @DisplayName("loads((listself,4))") + @Test + void loadsListSelf() throws Throwable { + Object r = marshal.loads(new PyBytes(LISTSELF_BYTES)); + assertPythonType(PyTuple.TYPE, r); + // We can't simply compare values, r with expected + // assertPythonEquals(expected, r); + PyList list = (PyList)PySequence.getItem(r, 0); + // Item 1 of this list should be the list itself + PyList list1 = (PyList)PySequence.getItem(list, 1); + assertSame(list, list1); + assertPythonEquals(4, PySequence.getItem(r, 1)); + } + } + + /** + * Tests reading a complete object from a {@code byte[]}, wrapping + * it as a stream. + */ + @Nested + @DisplayName("Read object from a stream") + class MarshalLoadStreamTest extends AbstractLoadTest { + + @DisplayName("load(f)") + @ParameterizedTest(name = "load(f) = {0}") + @MethodSource("objectLoadExamples") + void loadsTest(String name, byte[] b, Object expected) { + Object r = marshal.load(new ByteArrayInputStream(b)); + assertPythonType(PyType.of(expected), r); + assertPythonEquals(expected, r); + } + + @DisplayName("loads((listself,4))") + @Test + void loadsListSelf() throws Throwable { + Object r = marshal.load(new ByteArrayInputStream(LISTSELF_BYTES)); + assertPythonType(PyTuple.TYPE, r); + // We can't simply compare values, r with expected + // assertPythonEquals(expected, r); + PyList list = (PyList)PySequence.getItem(r, 0); + // Item 1 of this list should be the list itself + PyList list1 = (PyList)PySequence.getItem(list, 1); + assertSame(list, list1); + assertPythonEquals(4, PySequence.getItem(r, 1)); + } + } + + // Support methods ------------------------------------------------ + + /** + * Copy values to a new {@code byte[]} casting each to a + * {@code byte}. + * + * @param v to convert to {@code byte} + * @return the byte array of cast values + */ + private static byte[] bytes(int... v) { + byte[] b = new byte[v.length]; + for (int i = 0; i < b.length; i++) { b[i] = (byte)v[i]; } + return b; + } +} diff --git a/core/src/test/pythonExample/binary_op.py b/core/src/test/pythonExample/binary_op.py new file mode 100644 index 000000000..7c5f4be81 --- /dev/null +++ b/core/src/test/pythonExample/binary_op.py @@ -0,0 +1,8 @@ +# binary_op.py + +a = 7 +b = 6 + +sum = a + b +diff = a - b +prod = a * b diff --git a/core/src/test/pythonExample/bool_left_arith.py b/core/src/test/pythonExample/bool_left_arith.py new file mode 100644 index 000000000..ed8c97194 --- /dev/null +++ b/core/src/test/pythonExample/bool_left_arith.py @@ -0,0 +1,19 @@ +# bool_left_arith.py +# binary operations invoked as bool op number + +t = True +f = False + +# Note bool is a sub-class of int +u = 42 +a = t + u +b = t * u +c = f * u +d = -f + +# Note bool is *not* a sub-class of float +u = 42. +a1 = t + u +b1 = t * u +c1 = f * u + diff --git a/core/src/test/pythonExample/bool_right_arith.py b/core/src/test/pythonExample/bool_right_arith.py new file mode 100644 index 000000000..1d3ae52f2 --- /dev/null +++ b/core/src/test/pythonExample/bool_right_arith.py @@ -0,0 +1,19 @@ +# bool_right_arith.py +# binary operations invoked as number op bool + +t = True +f = False + +# Note bool is a sub-class of int +u = 42 +a = u + t +b = u * t +c = u * f +d = -t + +# Note bool is *not* a sub-class of float +u = 42. +a1 = u + t +b1 = u * t +c1 = u * f + diff --git a/core/src/test/pythonExample/builtins_module.py b/core/src/test/pythonExample/builtins_module.py new file mode 100644 index 000000000..043601492 --- /dev/null +++ b/core/src/test/pythonExample/builtins_module.py @@ -0,0 +1,28 @@ +# builtins_module.py +# +# The focus of this test is the way the interpreter resolves names +# in the builtins dictionary (after local and global namespaces). +# This happens in opcodes LOAD_NAME and LOAD_GLOBAL. + +# Access sample objects from the builtins module implicitly +# Opcode is LOAD_NAME + +int_name = int.__name__ +max_name = max.__name__ + +# Call functions to prove we can +# Opcode is LOAD_NAME +ai = abs(-42) +af = abs(-41.9) + + +# Sometimes __builtins__ is not the builtins module. Find it with: +bi = max.__self__ + +# Check explicit attribute access to the (real) builtins module +bi_int_name = bi.int.__name__ +bi_max_name = bi.max.__name__ + + +# Not marshallable +del bi diff --git a/core/src/test/pythonExample/call_method_builtin.py b/core/src/test/pythonExample/call_method_builtin.py new file mode 100644 index 000000000..8c068803f --- /dev/null +++ b/core/src/test/pythonExample/call_method_builtin.py @@ -0,0 +1,30 @@ +# call_method_builtin.py + +# Call methods on some built-in types to exercise CALL_METHOD + +a = "abracadabra" + +asc = a.isascii() +A = a.upper() + +# Signature: strip(self, chars=None, /) +cad = a.strip("bar") +wood = " \twood \x85\r\n".strip() + +# Signature: replace(self, old, new, count=-1, /) +sox = a.replace("bra", "sock") +sock = a.replace("bra", "sock", 1) + +# Signature: split(self, /, sep=None, maxsplit=-1) +split1 = a.split('br', 1) +split = a.split('bra') +split0 = a.split() +split1k = a.split('br', maxsplit=1) +split2k = a.split(maxsplit=4, sep='a') + +# Force use of CALL_FUNCTION_EX 0 +sock_ex = a.replace(*("bra", "sock", 1)) + +# Force use of CALL_FUNCTION_EX 1 +split1k_ex = a.split('br', **{'maxsplit':1}) + diff --git a/core/src/test/pythonExample/comparison.py b/core/src/test/pythonExample/comparison.py new file mode 100644 index 000000000..7a3eabfde --- /dev/null +++ b/core/src/test/pythonExample/comparison.py @@ -0,0 +1,55 @@ +# comparison.py + +# Tests of the order comparisons + +a = 2 +b = 4 + +lt = a < b +le = a <= b +eq = a == b +ne = a != b +ge = a >= b +gt = a > b + + +a = 4 +b = 2 + +lt1 = a < b +le1 = a <= b +eq1 = a == b +ne1 = a != b +ge1 = a >= b +gt1 = a > b + + +a = 2 +b = 2 + +lt2 = a < b +le2 = a <= b +eq2 = a == b +ne2 = a != b +ge2 = a >= b +gt2 = a > b + +# Tests of 'in' + +t = ("cow", 2, "pig", None, 42.0) +f0 = 1 in t +f1 = "c" in t +t1x = "c" not in t +f2 = 42.1 in t +f3 = (2,) in t +f4 = "c" in t[2] + + +t0 = 2 in t +t1 = "pig" in t +f1x = "pig" not in t +t2 = None in t +t3 = 42 in t +t4 = "p" in t[2] + + diff --git a/core/src/test/pythonExample/list_dot_product.py b/core/src/test/pythonExample/list_dot_product.py new file mode 100644 index 000000000..de4a5ba7d --- /dev/null +++ b/core/src/test/pythonExample/list_dot_product.py @@ -0,0 +1,17 @@ +# list_dot_product.py + +# Multiply-add of float vectors (without for loops) +# Also, multiplication as repetition. + +n = 2 + +a = [1.2, 3.4, 5.6, 7.8] * (3 * n) +b = (4 * n) * [1.2, 4.5, 7.8] +n = 12 * n # lists are this long + +i = 0 +sum = 0.0 + +while i < n: + sum = sum + a[i] * b[i] + i = i + 1 diff --git a/core/src/test/pythonExample/list_index.py b/core/src/test/pythonExample/list_index.py new file mode 100644 index 000000000..aae9502c4 --- /dev/null +++ b/core/src/test/pythonExample/list_index.py @@ -0,0 +1,12 @@ +# list_index.py + +# Just enough to exercise indexed access opcodes and methods + +c = 22.0 + +d = [20, "hello", c] +a = d[0] +b = d[1] +d[2] = a + c +c = d[2] + diff --git a/core/src/test/pythonExample/load_store_name.py b/core/src/test/pythonExample/load_store_name.py new file mode 100644 index 000000000..e65dba32f --- /dev/null +++ b/core/src/test/pythonExample/load_store_name.py @@ -0,0 +1,10 @@ +# load_store_name.py + +a = 1 +β = 2 # non-ascii + +c = β +β = 4 +a = c + +ਛਲ = β \ No newline at end of file diff --git a/core/src/test/pythonExample/multi_if.py b/core/src/test/pythonExample/multi_if.py new file mode 100644 index 000000000..fef08d5d9 --- /dev/null +++ b/core/src/test/pythonExample/multi_if.py @@ -0,0 +1,38 @@ +# multi_if.py + +a = False +b = False + +if a and b: + r = 2 +elif a or b: + r = 1 +else: + r = 0 + +a = False +b = True +if a and b: + r1 = 2 +elif a or b: + r1 = 1 +else: + r1 = 0 + +a = True +b = False +if a and b: + r2 = 2 +elif a or b: + r2 = 1 +else: + r2 = 0 + +a = True +b = True +if a and b: + r3 = 2 +elif a or b: + r3 = 1 +else: + r3 = 0 diff --git a/core/src/test/pythonExample/simple_if.py b/core/src/test/pythonExample/simple_if.py new file mode 100644 index 000000000..8cd9f1d49 --- /dev/null +++ b/core/src/test/pythonExample/simple_if.py @@ -0,0 +1,32 @@ +# simple_if.py + +b = False +if b: + r0 = 1 +else: + r0 = 0 + +b = True +if b: + r1 = 1 +else: + r1 = 0 + +b = 0 +if b: + r2 = 1 +else: + r2 = 0 + +b = 1 +r3 = 1 if b else 0 + +b = "" +r4 = 1 if b else 0 + +b = "something" +r5 = 1 if b else 0 + +b = None +r6 = 1 if b else 0 + diff --git a/core/src/test/pythonExample/simple_loop.py b/core/src/test/pythonExample/simple_loop.py new file mode 100644 index 000000000..71f4f639f --- /dev/null +++ b/core/src/test/pythonExample/simple_loop.py @@ -0,0 +1,10 @@ +# simple_loop.py + +n = 6 + +# ? n, sum +sum = 0 +while n > 0: + sum = sum + n + n = n - 1 + diff --git a/core/src/test/pythonExample/tuple_dot_product.py b/core/src/test/pythonExample/tuple_dot_product.py new file mode 100644 index 000000000..b2f995226 --- /dev/null +++ b/core/src/test/pythonExample/tuple_dot_product.py @@ -0,0 +1,25 @@ +# tuple_dot_product.py + +# Multiply-add of int and float vectors (without for loops) + +a = (2, 3, 4) +b = (3, 4, 6) +n = 3 + +# ? sum + +sum = a[0] * b[0] +i = 1 +while i < n: + sum = sum + a[i] * b[i] + i = i + 1 + +a= (1., 2., 3., 4.) +b = (4., 3., 4., 5.) +n = 4 + +sum2 = a[0] * b[0] +i = 1 +while i < n: + sum2 = sum2 + a[i] * b[i] + i = i + 1 diff --git a/core/src/test/pythonExample/tuple_index.py b/core/src/test/pythonExample/tuple_index.py new file mode 100644 index 000000000..e407ba0af --- /dev/null +++ b/core/src/test/pythonExample/tuple_index.py @@ -0,0 +1,9 @@ +# tuple_index.py + +# Just enough to exercise indexed access opcodes and methods + +c = 22.0 + +d = (20, "hello", c) +b = d[1] +c = d[2] + d[0] diff --git a/core/src/test/pythonExample/unary_op.py b/core/src/test/pythonExample/unary_op.py new file mode 100644 index 000000000..4bf3fbe69 --- /dev/null +++ b/core/src/test/pythonExample/unary_op.py @@ -0,0 +1,8 @@ +# unary_op.py + +a = 6. +b = -7 + +a = -a +b = -b +c = ~b diff --git a/extlibs/antlr-complete-3.5.2.jar b/extlibs/antlr-complete-3.5.2.jar deleted file mode 100644 index 260de7634..000000000 Binary files a/extlibs/antlr-complete-3.5.2.jar and /dev/null differ diff --git a/extlibs/antlr-runtime-3.5.2.jar b/extlibs/antlr-runtime-3.5.2.jar deleted file mode 100644 index d48e3e867..000000000 Binary files a/extlibs/antlr-runtime-3.5.2.jar and /dev/null differ diff --git a/extlibs/asm-7.1.jar b/extlibs/asm-7.1.jar deleted file mode 100644 index 355eb08db..000000000 Binary files a/extlibs/asm-7.1.jar and /dev/null differ diff --git a/extlibs/asm-commons-7.1.jar b/extlibs/asm-commons-7.1.jar deleted file mode 100644 index 31ffb987d..000000000 Binary files a/extlibs/asm-commons-7.1.jar and /dev/null differ diff --git a/extlibs/asm-util-7.1.jar b/extlibs/asm-util-7.1.jar deleted file mode 100644 index 5e61f2c10..000000000 Binary files a/extlibs/asm-util-7.1.jar and /dev/null differ diff --git a/extlibs/bcpkix-jdk15on-1.62.jar b/extlibs/bcpkix-jdk15on-1.62.jar deleted file mode 100644 index d63da57bd..000000000 Binary files a/extlibs/bcpkix-jdk15on-1.62.jar and /dev/null differ diff --git a/extlibs/bcprov-jdk15on-1.62.jar b/extlibs/bcprov-jdk15on-1.62.jar deleted file mode 100644 index c52b16e4e..000000000 Binary files a/extlibs/bcprov-jdk15on-1.62.jar and /dev/null differ diff --git a/extlibs/commons-compress-1.19.jar b/extlibs/commons-compress-1.19.jar deleted file mode 100644 index 5c9f52a29..000000000 Binary files a/extlibs/commons-compress-1.19.jar and /dev/null differ diff --git a/extlibs/cpptasks/cpptasks.jar b/extlibs/cpptasks/cpptasks.jar deleted file mode 100644 index 1febc9cbe..000000000 Binary files a/extlibs/cpptasks/cpptasks.jar and /dev/null differ diff --git a/extlibs/failureaccess-1.0.1.jar b/extlibs/failureaccess-1.0.1.jar deleted file mode 100644 index 9b56dc751..000000000 Binary files a/extlibs/failureaccess-1.0.1.jar and /dev/null differ diff --git a/extlibs/guava-28.0-android.jar b/extlibs/guava-28.0-android.jar deleted file mode 100644 index 516fc5fa1..000000000 Binary files a/extlibs/guava-28.0-android.jar and /dev/null differ diff --git a/extlibs/icu4j-59_1.jar b/extlibs/icu4j-59_1.jar deleted file mode 100644 index 3dc69c8a1..000000000 Binary files a/extlibs/icu4j-59_1.jar and /dev/null differ diff --git a/extlibs/jarjar-1.7.2.jar b/extlibs/jarjar-1.7.2.jar deleted file mode 100644 index 305260ea8..000000000 Binary files a/extlibs/jarjar-1.7.2.jar and /dev/null differ diff --git a/extlibs/java-sizeof-0.0.5.jar b/extlibs/java-sizeof-0.0.5.jar deleted file mode 100644 index 23c555cc1..000000000 Binary files a/extlibs/java-sizeof-0.0.5.jar and /dev/null differ diff --git a/extlibs/jffi-1.2.20.jar b/extlibs/jffi-1.2.20.jar deleted file mode 100644 index 011e3afa8..000000000 Binary files a/extlibs/jffi-1.2.20.jar and /dev/null differ diff --git a/extlibs/jffi-Darwin.jar b/extlibs/jffi-Darwin.jar deleted file mode 100644 index 009191ac8..000000000 Binary files a/extlibs/jffi-Darwin.jar and /dev/null differ diff --git a/extlibs/jffi-aarch64-Linux.jar b/extlibs/jffi-aarch64-Linux.jar deleted file mode 100644 index 1d4438e3f..000000000 Binary files a/extlibs/jffi-aarch64-Linux.jar and /dev/null differ diff --git a/extlibs/jffi-arm-Linux.jar b/extlibs/jffi-arm-Linux.jar deleted file mode 100644 index f9a048b77..000000000 Binary files a/extlibs/jffi-arm-Linux.jar and /dev/null differ diff --git a/extlibs/jffi-i386-FreeBSD.jar b/extlibs/jffi-i386-FreeBSD.jar deleted file mode 100644 index 8235a3484..000000000 Binary files a/extlibs/jffi-i386-FreeBSD.jar and /dev/null differ diff --git a/extlibs/jffi-i386-Linux.jar b/extlibs/jffi-i386-Linux.jar deleted file mode 100644 index 7e23b400c..000000000 Binary files a/extlibs/jffi-i386-Linux.jar and /dev/null differ diff --git a/extlibs/jffi-i386-OpenBSD.jar b/extlibs/jffi-i386-OpenBSD.jar deleted file mode 100644 index 8235a3484..000000000 Binary files a/extlibs/jffi-i386-OpenBSD.jar and /dev/null differ diff --git a/extlibs/jffi-i386-SunOS.jar b/extlibs/jffi-i386-SunOS.jar deleted file mode 100644 index 68bcacc39..000000000 Binary files a/extlibs/jffi-i386-SunOS.jar and /dev/null differ diff --git a/extlibs/jffi-i386-Windows.jar b/extlibs/jffi-i386-Windows.jar deleted file mode 100644 index 90a6baecf..000000000 Binary files a/extlibs/jffi-i386-Windows.jar and /dev/null differ diff --git a/extlibs/jffi-ppc-AIX.jar b/extlibs/jffi-ppc-AIX.jar deleted file mode 100644 index eed0ab732..000000000 Binary files a/extlibs/jffi-ppc-AIX.jar and /dev/null differ diff --git a/extlibs/jffi-ppc-Linux.jar b/extlibs/jffi-ppc-Linux.jar deleted file mode 100644 index 8235a3484..000000000 Binary files a/extlibs/jffi-ppc-Linux.jar and /dev/null differ diff --git a/extlibs/jffi-ppc64-Linux.jar b/extlibs/jffi-ppc64-Linux.jar deleted file mode 100644 index cfae4bbf6..000000000 Binary files a/extlibs/jffi-ppc64-Linux.jar and /dev/null differ diff --git a/extlibs/jffi-ppc64le-Linux.jar b/extlibs/jffi-ppc64le-Linux.jar deleted file mode 100644 index 7931dd1c2..000000000 Binary files a/extlibs/jffi-ppc64le-Linux.jar and /dev/null differ diff --git a/extlibs/jffi-s390x-Linux.jar b/extlibs/jffi-s390x-Linux.jar deleted file mode 100644 index 8235a3484..000000000 Binary files a/extlibs/jffi-s390x-Linux.jar and /dev/null differ diff --git a/extlibs/jffi-sparc-SunOS.jar b/extlibs/jffi-sparc-SunOS.jar deleted file mode 100644 index 8235a3484..000000000 Binary files a/extlibs/jffi-sparc-SunOS.jar and /dev/null differ diff --git a/extlibs/jffi-sparcv9-SunOS.jar b/extlibs/jffi-sparcv9-SunOS.jar deleted file mode 100644 index 306788c08..000000000 Binary files a/extlibs/jffi-sparcv9-SunOS.jar and /dev/null differ diff --git a/extlibs/jffi-x86_64-FreeBSD.jar b/extlibs/jffi-x86_64-FreeBSD.jar deleted file mode 100644 index 32824a7e2..000000000 Binary files a/extlibs/jffi-x86_64-FreeBSD.jar and /dev/null differ diff --git a/extlibs/jffi-x86_64-Linux.jar b/extlibs/jffi-x86_64-Linux.jar deleted file mode 100644 index 99deb014d..000000000 Binary files a/extlibs/jffi-x86_64-Linux.jar and /dev/null differ diff --git a/extlibs/jffi-x86_64-OpenBSD.jar b/extlibs/jffi-x86_64-OpenBSD.jar deleted file mode 100644 index 3e14e03e4..000000000 Binary files a/extlibs/jffi-x86_64-OpenBSD.jar and /dev/null differ diff --git a/extlibs/jffi-x86_64-SunOS.jar b/extlibs/jffi-x86_64-SunOS.jar deleted file mode 100644 index b8240422d..000000000 Binary files a/extlibs/jffi-x86_64-SunOS.jar and /dev/null differ diff --git a/extlibs/jffi-x86_64-Windows.jar b/extlibs/jffi-x86_64-Windows.jar deleted file mode 100644 index 667e48db8..000000000 Binary files a/extlibs/jffi-x86_64-Windows.jar and /dev/null differ diff --git a/extlibs/jline-2.14.5.jar b/extlibs/jline-2.14.5.jar deleted file mode 100644 index 761acd41a..000000000 Binary files a/extlibs/jline-2.14.5.jar and /dev/null differ diff --git a/extlibs/jnr-constants-0.9.12.jar b/extlibs/jnr-constants-0.9.12.jar deleted file mode 100644 index d894741f4..000000000 Binary files a/extlibs/jnr-constants-0.9.12.jar and /dev/null differ diff --git a/extlibs/jnr-ffi-2.1.10.jar b/extlibs/jnr-ffi-2.1.10.jar deleted file mode 100644 index d3382b5c4..000000000 Binary files a/extlibs/jnr-ffi-2.1.10.jar and /dev/null differ diff --git a/extlibs/jnr-netdb-1.1.6.jar b/extlibs/jnr-netdb-1.1.6.jar deleted file mode 100644 index 0f49b70d6..000000000 Binary files a/extlibs/jnr-netdb-1.1.6.jar and /dev/null differ diff --git a/extlibs/jnr-posix-3.0.50.jar b/extlibs/jnr-posix-3.0.50.jar deleted file mode 100644 index 1288fce57..000000000 Binary files a/extlibs/jnr-posix-3.0.50.jar and /dev/null differ diff --git a/extlibs/junit-4.10.jar b/extlibs/junit-4.10.jar deleted file mode 100644 index bf5c0b9c6..000000000 Binary files a/extlibs/junit-4.10.jar and /dev/null differ diff --git a/extlibs/mockrunner-0.4.1/jar/commons-logging-1.0.4.jar b/extlibs/mockrunner-0.4.1/jar/commons-logging-1.0.4.jar deleted file mode 100644 index b73a80fab..000000000 Binary files a/extlibs/mockrunner-0.4.1/jar/commons-logging-1.0.4.jar and /dev/null differ diff --git a/extlibs/mockrunner-0.4.1/jar/j2ee1.3/servlet.jar b/extlibs/mockrunner-0.4.1/jar/j2ee1.3/servlet.jar deleted file mode 100644 index 74329dc83..000000000 Binary files a/extlibs/mockrunner-0.4.1/jar/j2ee1.3/servlet.jar and /dev/null differ diff --git a/extlibs/mockrunner-0.4.1/jar/jakarta-oro-2.0.8.jar b/extlibs/mockrunner-0.4.1/jar/jakarta-oro-2.0.8.jar deleted file mode 100644 index 23488d260..000000000 Binary files a/extlibs/mockrunner-0.4.1/jar/jakarta-oro-2.0.8.jar and /dev/null differ diff --git a/extlibs/mockrunner-0.4.1/jar/jdom.jar b/extlibs/mockrunner-0.4.1/jar/jdom.jar deleted file mode 100644 index 97c85f564..000000000 Binary files a/extlibs/mockrunner-0.4.1/jar/jdom.jar and /dev/null differ diff --git a/extlibs/mockrunner-0.4.1/jar/nekohtml.jar b/extlibs/mockrunner-0.4.1/jar/nekohtml.jar deleted file mode 100644 index 0be02315f..000000000 Binary files a/extlibs/mockrunner-0.4.1/jar/nekohtml.jar and /dev/null differ diff --git a/extlibs/mockrunner-0.4.1/jar/xml-apis-2.11.0.jar b/extlibs/mockrunner-0.4.1/jar/xml-apis-2.11.0.jar deleted file mode 100644 index 46733464f..000000000 Binary files a/extlibs/mockrunner-0.4.1/jar/xml-apis-2.11.0.jar and /dev/null differ diff --git a/extlibs/mockrunner-0.4.1/lib/jdk1.5/j2ee1.3/dependencies.txt b/extlibs/mockrunner-0.4.1/lib/jdk1.5/j2ee1.3/dependencies.txt deleted file mode 100644 index cfa9a27a7..000000000 --- a/extlibs/mockrunner-0.4.1/lib/jdk1.5/j2ee1.3/dependencies.txt +++ /dev/null @@ -1,135 +0,0 @@ -This file lists all the jar files provided by Mockrunner and -the required third party libraries for each jar. Please note -that this file is created automatically by analyzing the -compile time dependencies of all classes in the jar. This -is done recursively, i.e. the dependencies of the third-party -jars are recognized as well. If you add all dependend jars -for a specified mockrunner-xyz.jar to your classpath, you -are on the safe side. However, not all listed dependencies -are necessary at runtime in all cases. Especially with the -"all-in-one"-file mockrunner.jar you don't have to add everything -to the classpath. E.g. if you're only using EJB and JMS, you don't have -to add the web related jar files, because the necessary factories and modules -are created when they are used and lazy initialized respectively. -Please note that the Struts test framework only needs CGLib, if custom action -mappings are used. The jasper related jar files are only necessary if -the JasperJspFactory is used. If you only need one technology it's recommended -to use the corresponding jar file instead of the "all-in-one" mockrunner.jar. -E.g. if you only want to use the JDBC test framework, you can use -mockrunner-jdbc.jar. Please note that each mockrunner-xyz.jar file contains a -jarversion.txt which lists the Mockrunner version and the supported JDK and -J2EE version. - -Created: 06/26/2008 05:59 PM - -Jar file name: mockrunner-tag.jar - -Depends on: - -commons-beanutils-1.7.0.jar -commons-logging-1.0.4.jar -jakarta-oro-2.0.8.jar -jdom.jar -junit.jar -nekohtml.jar -servlet.jar -xercesImpl.jar -xml-apis.jar - - -Jar file name: mockrunner-jms.jar - -Depends on: - -commons-logging-1.0.4.jar -jakarta-oro-2.0.8.jar -jboss-j2ee.jar -junit.jar - - -Jar file name: mockrunner-servlet.jar - -Depends on: - -commons-logging-1.0.4.jar -jakarta-oro-2.0.8.jar -jdom.jar -junit.jar -nekohtml.jar -servlet.jar -xercesImpl.jar -xml-apis.jar - - -Jar file name: mockrunner.jar - -Depends on: - -cglib-nodep-2.2.jar -commons-beanutils-1.7.0.jar -commons-digester-1.8.jar -commons-logging-1.0.4.jar -commons-validator-1.3.1.jar -jakarta-oro-2.0.8.jar -jboss-j2ee.jar -jdom.jar -junit.jar -mockejb.jar -nekohtml.jar -servlet.jar -struts.jar -xercesImpl.jar -xml-apis.jar - - -Jar file name: mockrunner-jca.jar - -Depends on: - -commons-logging-1.0.4.jar -jakarta-oro-2.0.8.jar -jboss-j2ee.jar -junit.jar - - -Jar file name: mockrunner-jdbc.jar - -Depends on: - -commons-logging-1.0.4.jar -jakarta-oro-2.0.8.jar -jdom.jar -junit.jar -xml-apis.jar - - -Jar file name: mockrunner-ejb.jar - -Depends on: - -cglib-nodep-2.2.jar -commons-beanutils-1.7.0.jar -commons-logging-1.0.4.jar -jakarta-oro-2.0.8.jar -jboss-j2ee.jar -junit.jar -mockejb.jar - - -Jar file name: mockrunner-struts.jar - -Depends on: - -cglib-nodep-2.2.jar -commons-beanutils-1.7.0.jar -commons-digester-1.8.jar -commons-logging-1.0.4.jar -commons-validator-1.3.1.jar -jakarta-oro-2.0.8.jar -jdom.jar -junit.jar -nekohtml.jar -servlet.jar -struts.jar -xercesImpl.jar -xml-apis.jar diff --git a/extlibs/mockrunner-0.4.1/lib/jdk1.5/j2ee1.3/mockrunner-servlet.jar b/extlibs/mockrunner-0.4.1/lib/jdk1.5/j2ee1.3/mockrunner-servlet.jar deleted file mode 100644 index a2b3bf9e8..000000000 Binary files a/extlibs/mockrunner-0.4.1/lib/jdk1.5/j2ee1.3/mockrunner-servlet.jar and /dev/null differ diff --git a/extlibs/mockrunner-0.4.1/readme.txt b/extlibs/mockrunner-0.4.1/readme.txt deleted file mode 100644 index 7afac7152..000000000 --- a/extlibs/mockrunner-0.4.1/readme.txt +++ /dev/null @@ -1,9 +0,0 @@ -This contains the minimal set of jars from mockrunner-0.4.1 to run the modjy -tests against j2ee1.3 with jdk1.5. - -These are run from the main Jython directory as part of "ant test", -or from tests/modjy with "ant". In the latter case, JYTHON_HOME must -be set to the project/dist folder and MOCKRUNNER_HOME to this folder. - -xml-apis-*.jar is added to mockrunner because it is needed for these tests, -but not for Jython generally. diff --git a/extlibs/mysql-connector-java-5.1.42-bin.jar b/extlibs/mysql-connector-java-5.1.42-bin.jar deleted file mode 100644 index 4c6df38c1..000000000 Binary files a/extlibs/mysql-connector-java-5.1.42-bin.jar and /dev/null differ diff --git a/extlibs/netty-buffer-4.1.45.Final.jar b/extlibs/netty-buffer-4.1.45.Final.jar deleted file mode 100644 index 45d9ff28a..000000000 Binary files a/extlibs/netty-buffer-4.1.45.Final.jar and /dev/null differ diff --git a/extlibs/netty-codec-4.1.45.Final.jar b/extlibs/netty-codec-4.1.45.Final.jar deleted file mode 100644 index e8378e766..000000000 Binary files a/extlibs/netty-codec-4.1.45.Final.jar and /dev/null differ diff --git a/extlibs/netty-common-4.1.45.Final.jar b/extlibs/netty-common-4.1.45.Final.jar deleted file mode 100644 index 038f1f72c..000000000 Binary files a/extlibs/netty-common-4.1.45.Final.jar and /dev/null differ diff --git a/extlibs/netty-handler-4.1.45.Final.jar b/extlibs/netty-handler-4.1.45.Final.jar deleted file mode 100644 index ef3d012e7..000000000 Binary files a/extlibs/netty-handler-4.1.45.Final.jar and /dev/null differ diff --git a/extlibs/netty-resolver-4.1.45.Final.jar b/extlibs/netty-resolver-4.1.45.Final.jar deleted file mode 100644 index defcf0483..000000000 Binary files a/extlibs/netty-resolver-4.1.45.Final.jar and /dev/null differ diff --git a/extlibs/netty-transport-4.1.45.Final.jar b/extlibs/netty-transport-4.1.45.Final.jar deleted file mode 100644 index 2a16b1ced..000000000 Binary files a/extlibs/netty-transport-4.1.45.Final.jar and /dev/null differ diff --git a/extlibs/postgresql-42.1.1.jre7.jar b/extlibs/postgresql-42.1.1.jre7.jar deleted file mode 100644 index 99b60a338..000000000 Binary files a/extlibs/postgresql-42.1.1.jre7.jar and /dev/null differ diff --git a/extlibs/profile.jar b/extlibs/profile.jar deleted file mode 100644 index 4c0af29f1..000000000 Binary files a/extlibs/profile.jar and /dev/null differ diff --git a/extlibs/profile.properties b/extlibs/profile.properties deleted file mode 100644 index 0acd965a9..000000000 --- a/extlibs/profile.properties +++ /dev/null @@ -1,69 +0,0 @@ -# -# Is the profiler on or off when the app starts? -# (on | off) -# default = on -# -profiler=on -# -# Can the profiler be controlled remotely ? -# (on | off) -# default = off -# -remote=off -# -# TCP listen port for remote control -# default =15599 -# -port=15599 -# -# -#ClassLoaderFilter.1=com.mentorgen.tools.profile.instrument.clfilter.StandardClassLoaderFilter -# -# What is the maximum depth for thread dumps -# (-1 means no limit) -# default = -1 -# (you may also use 'compact') -# -thread-depth=compact -# -# When compacting thread dumps, what in the minimum total time you want -# to show -# default = 10 (ms) -# -thread.compact.threshold.ms=1 -# -# What is the maximum number of methods to show in the method dump -# (-1 means no limit) -# default = -1 -# (you may also use 'compact') -# -max-method-count=10 -# -# defaults to 10 -# -method.compact.threshold.ms=1 -# -# What is the default file name for the profile information -# default=./profile.txt -# -file=profile.txt -# -# What packages are excluded from the display -# (comma separated) -# Note: com.mentorgen.tools.profile is always excluded -# -include=python -# -# Track Object Allocation (very expensive) -# values: on, off -# default = off -# -#track.object.alloc=on -# -output=text -#output-method-signatures=yes -#profiler-class=net.sourceforge.jiprof.timeline.TimeLineProfiler -#clock-resolution=ms - -output-summary-only=yes -accept-class-loaders=org.python.core.BytecodeLoader diff --git a/extlibs/servlet-api-2.5.jar b/extlibs/servlet-api-2.5.jar deleted file mode 100644 index fb5249346..000000000 Binary files a/extlibs/servlet-api-2.5.jar and /dev/null differ diff --git a/extlibs/xercesImpl-2.12.0.jar b/extlibs/xercesImpl-2.12.0.jar deleted file mode 100644 index b69d01dac..000000000 Binary files a/extlibs/xercesImpl-2.12.0.jar and /dev/null differ diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 1948b9074..e708b1c02 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 290541c73..070cb702f 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-4.10.3-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.6-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew old mode 100644 new mode 100755 index cccdd3d51..4f906e0c8 --- a/gradlew +++ b/gradlew @@ -1,5 +1,21 @@ #!/usr/bin/env sh +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + ############################################################################## ## ## Gradle start up script for UN*X @@ -28,7 +44,7 @@ APP_NAME="Gradle" APP_BASE_NAME=`basename "$0"` # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS="" +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD="maximum" @@ -66,6 +82,7 @@ esac CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + # Determine the Java command to use to start the JVM. if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then @@ -109,10 +126,11 @@ if $darwin; then GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" fi -# For Cygwin, switch paths to Windows format before running java -if $cygwin ; then +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then APP_HOME=`cygpath --path --mixed "$APP_HOME"` CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` # We build the pattern for arguments to be converted via cygpath @@ -138,19 +156,19 @@ if $cygwin ; then else eval `echo args$i`="\"$arg\"" fi - i=$((i+1)) + i=`expr $i + 1` done case $i in - (0) set -- ;; - (1) set -- "$args0" ;; - (2) set -- "$args0" "$args1" ;; - (3) set -- "$args0" "$args1" "$args2" ;; - (4) set -- "$args0" "$args1" "$args2" "$args3" ;; - (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; esac fi @@ -159,14 +177,9 @@ save () { for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done echo " " } -APP_ARGS=$(save "$@") +APP_ARGS=`save "$@"` # Collect all arguments for the java command, following the shell quoting and substitution rules eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" -# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong -if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then - cd "$(dirname "$0")" -fi - exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat old mode 100644 new mode 100755 index e95643d6a..ac1b06f93 --- a/gradlew.bat +++ b/gradlew.bat @@ -1,3 +1,19 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + @if "%DEBUG%" == "" @echo off @rem ########################################################################## @rem @@ -13,15 +29,18 @@ if "%DIRNAME%" == "" set DIRNAME=. set APP_BASE_NAME=%~n0 set APP_HOME=%DIRNAME% +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS= +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" @rem Find java.exe if defined JAVA_HOME goto findJavaFromJavaHome set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto init +if "%ERRORLEVEL%" == "0" goto execute echo. echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. @@ -35,7 +54,7 @@ goto fail set JAVA_HOME=%JAVA_HOME:"=% set JAVA_EXE=%JAVA_HOME%/bin/java.exe -if exist "%JAVA_EXE%" goto init +if exist "%JAVA_EXE%" goto execute echo. echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% @@ -45,28 +64,14 @@ echo location of your Java installation. goto fail -:init -@rem Get command-line arguments, handling Windows variants - -if not "%OS%" == "Windows_NT" goto win9xME_args - -:win9xME_args -@rem Slurp the command line arguments. -set CMD_LINE_ARGS= -set _SKIP=2 - -:win9xME_args_slurp -if "x%~1" == "x" goto execute - -set CMD_LINE_ARGS=%* - :execute @rem Setup the command line set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + @rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* :end @rem End local scope for the variables with windows NT shell diff --git a/settings.gradle b/settings.gradle index 69e01717d..c3b832e91 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,5 +1,33 @@ /* - * Gradle settings for Jython. See also build.gradle. + * Settings for the top-level project build of Jython. */ -rootProject.name = 'jython-slim' +rootProject.name = 'jython' + +/* + * Sub-projects each generate a Java module or documentation. + * Tasks from each may run in any (depenency) order Gradle chooses. + */ +include 'core' // core API and objects of the interpreter runtime +//include 'extensions' // extension modules implemented in Java +//include 'interpreter' // interpreter(s) and support for embedding Jython +//include 'compiler' // Python compiler(s) +//include 'stdlib' // standard library +//include 'artefact' // published Jython artefact (slim jar) +//include 'command' // the Jython command (an application) +//include 'installer' // installer (an application) + +// Each sub-project contains a build file named after it. +rootProject.children.each { + println it + it.buildFileName = it.name + '.gradle' +} + +/* + * Tools used to generate code but not deployed as part of Jython + * (e.g. plug-ins we use) enter the main build as dependencies. + * These are built as needed but built completely before use. + */ +// https://docs.gradle.org/current/userguide/composite_builds.html +includeBuild 'build-tools' + diff --git a/src/org/python/core/PyBaseString.java b/src/org/python/core/PyBaseString.java deleted file mode 100644 index adf323fe2..000000000 --- a/src/org/python/core/PyBaseString.java +++ /dev/null @@ -1,31 +0,0 @@ -package org.python.core; - -import org.python.expose.ExposedType; - -/** - * base class for jython strings. - */ -@ExposedType(name = "basestring", base = PyObject.class, doc = BuiltinDocs.basestring_doc) -public abstract class PyBaseString extends PySequence implements CharSequence { - - public static final PyType TYPE = PyType.fromClass(PyBaseString.class); - - protected PyBaseString(PyType type) { - super(type); - } - - @Override - public char charAt(int index) { - return toString().charAt(index); - } - - @Override - public int length() { - return toString().length(); - } - - @Override - public CharSequence subSequence(int start, int end) { - return toString().subSequence(start, end); - } -} diff --git a/src/org/python/core/PyFloat.java b/src/org/python/core/PyFloat.java deleted file mode 100644 index 6ff4f9d18..000000000 --- a/src/org/python/core/PyFloat.java +++ /dev/null @@ -1,1116 +0,0 @@ -// Copyright (c) Corporation for National Research Initiatives -// Copyright (c) Jython Developers -package org.python.core; - -import java.io.Serializable; -import java.math.BigDecimal; - -import org.python.core.stringlib.FloatFormatter; -import org.python.core.stringlib.InternalFormat; -import org.python.core.stringlib.InternalFormat.Formatter; -import org.python.core.stringlib.InternalFormat.Spec; -import org.python.expose.ExposedClassMethod; -import org.python.expose.ExposedGet; -import org.python.expose.ExposedMethod; -import org.python.expose.ExposedNew; -import org.python.expose.ExposedType; -import org.python.expose.MethodType; -import org.python.modules.math; - -/** - * A builtin python float. - */ -@Untraversable -@ExposedType(name = "float", doc = BuiltinDocs.float_doc) -public class PyFloat extends PyObject { - - public static final PyType TYPE = PyType.fromClass(PyFloat.class); - - /** Format specification used by repr(). */ - static final Spec SPEC_REPR = InternalFormat.fromText(" >r"); - /** Format specification used by str(). */ - static final Spec SPEC_STR = Spec.NUMERIC; - /** Constant float(0). */ - static final PyFloat ZERO = new PyFloat(0.0); - /** Constant float(1). */ - static final PyFloat ONE = new PyFloat(1.0); - /** Constant float("nan"). */ - static final PyFloat NAN = new PyFloat(Double.NaN); - - private final double value; - - public double getValue() { - return value; - } - - public PyFloat(PyType subtype, double v) { - super(subtype); - value = v; - } - - public PyFloat(double v) { - this(TYPE, v); - } - - public PyFloat(float v) { - this((double)v); - } - - @ExposedNew - public static PyObject float_new(PyNewWrapper new_, boolean init, PyType subtype, - PyObject[] args, String[] keywords) { - ArgParser ap = new ArgParser("float", args, keywords, new String[] {"x"}, 0); - PyObject x = ap.getPyObject(0, null); - if (x == null) { - if (new_.for_type == subtype) { - return ZERO; - } else { - return new PyFloatDerived(subtype, 0.0); - } - } else { - PyFloat floatObject = null; - try { - floatObject = x.__float__(); - } catch (PyException e) { - if (e.match(Py.AttributeError)) { - // Translate AttributeError to TypeError - // XXX: We are using the same message as CPython, even if - // it is not strictly correct (instances of types - // that implement the __float__ method are also - // valid arguments) - throw Py.TypeError("float() argument must be a string or a number"); - } - throw e; - } - if (new_.for_type == subtype) { - return floatObject; - } else { - return new PyFloatDerived(subtype, floatObject.getValue()); - } - } - } - - @ExposedGet(name = "real", doc = BuiltinDocs.float_real_doc) - public PyObject getReal() { - return float___float__(); - } - - @ExposedGet(name = "imag", doc = BuiltinDocs.float_imag_doc) - public PyObject getImag() { - return ZERO; - } - - @ExposedClassMethod(doc = BuiltinDocs.float_fromhex_doc) - public static PyObject float_fromhex(PyType type, PyObject o) { - // XXX: I'm sure this could be shortened/simplified, but Double.parseDouble() takes - // non-hex strings and requires the form 0xNUMBERpNUMBER for hex input which - // causes extra complexity here. - - String message = "invalid hexadecimal floating-point string"; - boolean negative = false; - - PyString s = o.__str__(); - String value = s.getString().trim().toLowerCase(); - - if (value.length() == 0) { - throw Py.ValueError(message); - } else if (value.equals("nan") || value.equals("-nan") || value.equals("+nan")) { - return NAN; - } else if (value.equals("inf") || value.equals("infinity") || value.equals("+inf") - || value.equals("+infinity")) { - return new PyFloat(Double.POSITIVE_INFINITY); - } else if (value.equals("-inf") || value.equals("-infinity")) { - return new PyFloat(Double.NEGATIVE_INFINITY); - } - - // Strip and record + or - - if (value.charAt(0) == '-') { - value = value.substring(1); - negative = true; - } else if (value.charAt(0) == '+') { - value = value.substring(1); - } - if (value.length() == 0) { - throw Py.ValueError(message); - } - - // Append 0x if not present. - if (!value.startsWith("0x") && !value.startsWith("0X")) { - value = "0x" + value; - } - - // reattach - if needed. - if (negative) { - value = "-" + value; - } - - // Append p if not present. - if (value.indexOf('p') == -1) { - value = value + "p0"; - } - - try { - double d = Double.parseDouble(value); - if (Double.isInfinite(d)) { - throw Py.OverflowError("hexadecimal value too large to represent as a float"); - } - return new PyFloat(d); - } catch (NumberFormatException n) { - throw Py.ValueError(message); - } - } - - // @ExposedClassMethod(doc = BuiltinDocs.float_hex_doc) - // public static PyObject float_hex(PyType type, double value) { - // return new PyString(Double.toHexString(value)); - // } - - private String pyHexString(Double f) { - // Simply rewrite Java hex repr to expected Python values; not - // the most efficient, but we don't expect this to be a hot - // spot in our code either - String java_hex = Double.toHexString(getValue()); - if (java_hex.equals("Infinity")) { - return "inf"; - } else if (java_hex.equals("-Infinity")) { - return "-inf"; - } else if (java_hex.equals("NaN")) { - return "nan"; - } else if (java_hex.equals("0x0.0p0")) { - return "0x0.0p+0"; - } else if (java_hex.equals("-0x0.0p0")) { - return "-0x0.0p+0"; - } - - // replace hex rep of MpE to conform with Python such that - // 1. M is padded to 16 digits (ignoring a leading -) - // 2. Mp+E if E>=0 - // example: result of 42.0.hex() is translated from - // 0x1.5p5 to 0x1.5000000000000p+5 - int len = java_hex.length(); - boolean start_exponent = false; - StringBuilder py_hex = new StringBuilder(len + 1); - int padding = f > 0 ? 17 : 18; - for (int i = 0; i < len; i++) { - char c = java_hex.charAt(i); - if (c == 'p') { - for (int pad = i; pad < padding; pad++) { - py_hex.append('0'); - } - start_exponent = true; - } else if (start_exponent) { - if (c != '-') { - py_hex.append('+'); - } - start_exponent = false; - } - py_hex.append(c); - } - return py_hex.toString(); - } - - @ExposedMethod(doc = BuiltinDocs.float_hex_doc) - public PyObject float_hex() { - return new PyString(pyHexString(getValue())); - } - - /** - * Determine if this float is not infinity, nor NaN. - */ - public boolean isFinite() { - return !Double.isInfinite(getValue()) && !Double.isNaN(getValue()); - } - - @Override - public String toString() { - return __str__().toString(); - } - - @Override - public PyString __str__() { - return float___str__(); - } - - @ExposedMethod(doc = BuiltinDocs.float___str___doc) - final PyString float___str__() { - return Py.newString(formatDouble(SPEC_STR)); - } - - @Override - public PyString __repr__() { - return float___repr__(); - } - - @ExposedMethod(doc = BuiltinDocs.float___repr___doc) - final PyString float___repr__() { - return Py.newString(formatDouble(SPEC_REPR)); - } - - /** - * Format this float according to the specification passed in. Supports __str__ and - * __repr__. - * - * @param spec parsed format specification string - * @return formatted value - */ - private String formatDouble(Spec spec) { - FloatFormatter f = new FloatFormatter(spec); - return f.format(value).getResult(); - } - - @Override - public int hashCode() { - return float___hash__(); - } - - @ExposedMethod(doc = BuiltinDocs.float___hash___doc) - final int float___hash__() { - double value = getValue(); - if (Double.isInfinite(value)) { - return value < 0 ? -271828 : 314159; - } else if (Double.isNaN(value)) { - return 0; - } - - double intPart = Math.floor(value); - double fractPart = value - intPart; - - if (fractPart == 0) { - if (intPart <= Integer.MAX_VALUE && intPart >= Integer.MIN_VALUE) { - return (int)value; - } else { - return __long__().hashCode(); - } - } else { - long v = Double.doubleToLongBits(getValue()); - return (int)v ^ (int)(v >> 32); - } - } - - @Override - public boolean __nonzero__() { - return float___nonzero__(); - } - - @ExposedMethod(doc = BuiltinDocs.float___nonzero___doc) - final boolean float___nonzero__() { - return getValue() != 0; - } - - @Override - public Object __tojava__(Class c) { - if (c == Double.TYPE || c == Number.class || c == Double.class || c == Object.class - || c == Serializable.class) { - return Double.valueOf(getValue()); - } else if (c == Float.TYPE || c == Float.class) { - return Float.valueOf((float) getValue()); - } - return super.__tojava__(c); - } - - @Override - public PyObject __eq__(PyObject other) { - // preclude _cmp_unsafe's this == other shortcut because NaN != anything, even - // itself - if (Double.isNaN(getValue())) { - return Py.False; - } - return null; - } - - @Override - public PyObject __ne__(PyObject other) { - if (Double.isNaN(getValue())) { - return Py.True; - } - return null; - } - - @Override - public PyObject __gt__(PyObject other) { - // NaN > anything is always false. - if (Double.isNaN(getValue())) { - return Py.False; - } - return null; - } - - @Override - public PyObject __ge__(PyObject other) { - // NaN >= anything is always false. - if (Double.isNaN(getValue())) { - return Py.False; - } - return null; - } - - @Override - public PyObject __lt__(PyObject other) { - // NaN < anything is always false. - if (Double.isNaN(getValue())) { - return Py.False; - } - return null; - } - - @Override - public PyObject __le__(PyObject other) { - // NaN >= anything is always false. - if (Double.isNaN(getValue())) { - return Py.False; - } - return null; - } - - @Override - public int __cmp__(PyObject other) { - return float___cmp__(other); - } - - // XXX: needs __doc__ - @ExposedMethod(type = MethodType.CMP) - final int float___cmp__(PyObject other) { - double i = getValue(); - double j; - - if (other instanceof PyFloat) { - j = ((PyFloat)other).getValue(); - } else if (!isFinite()) { - // we're infinity: our magnitude exceeds any finite - // integer, so it doesn't matter which int we compare i - // with. If NaN, similarly. - if (other instanceof PyInteger || other instanceof PyLong) { - j = 0.0; - } else { - return -2; - } - } else if (other instanceof PyInteger) { - j = ((PyInteger)other).getValue(); - } else if (other instanceof PyLong) { - BigDecimal v = new BigDecimal(getValue()); - BigDecimal w = new BigDecimal(((PyLong)other).getValue()); - return v.compareTo(w); - } else { - return -2; - } - - if (i < j) { - return -1; - } else if (i > j) { - return 1; - } else if (i == j) { - return 0; - } else { - // at least one side is NaN - return Double.isNaN(i) ? (Double.isNaN(j) ? 1 : -1) : 1; - } - } - - @Override - public Object __coerce_ex__(PyObject other) { - return float___coerce_ex__(other); - } - - @ExposedMethod(doc = BuiltinDocs.float___coerce___doc) - final PyObject float___coerce__(PyObject other) { - return adaptToCoerceTuple(float___coerce_ex__(other)); - } - - /** - * Coercion logic for float. Implemented as a final method to avoid invocation of virtual - * methods from the exposed coerce. - */ - final Object float___coerce_ex__(PyObject other) { - if (other instanceof PyFloat) { - return other; - } else if (other instanceof PyInteger) { - return new PyFloat((double)((PyInteger)other).getValue()); - } else if (other instanceof PyLong) { - return new PyFloat(((PyLong)other).doubleValue()); - } else { - return Py.None; - } - } - - private static boolean canCoerce(PyObject other) { - return other instanceof PyFloat || other instanceof PyInteger || other instanceof PyLong; - } - - private static double coerce(PyObject other) { - if (other instanceof PyFloat) { - return ((PyFloat)other).getValue(); - } else if (other instanceof PyInteger) { - return ((PyInteger)other).getValue(); - } else if (other instanceof PyLong) { - return ((PyLong)other).doubleValue(); - } else { - throw Py.TypeError("xxx"); - } - } - - @Override - public PyObject __add__(PyObject right) { - return float___add__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___add___doc) - final PyObject float___add__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - double rightv = coerce(right); - return new PyFloat(getValue() + rightv); - } - - @Override - public PyObject __radd__(PyObject left) { - return float___radd__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___radd___doc) - final PyObject float___radd__(PyObject left) { - return __add__(left); - } - - @Override - public PyObject __sub__(PyObject right) { - return float___sub__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___sub___doc) - final PyObject float___sub__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - double rightv = coerce(right); - return new PyFloat(getValue() - rightv); - } - - @Override - public PyObject __rsub__(PyObject left) { - return float___rsub__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___rsub___doc) - final PyObject float___rsub__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - double leftv = coerce(left); - return new PyFloat(leftv - getValue()); - } - - @Override - public PyObject __mul__(PyObject right) { - return float___mul__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___mul___doc) - final PyObject float___mul__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - double rightv = coerce(right); - return new PyFloat(getValue() * rightv); - } - - @Override - public PyObject __rmul__(PyObject left) { - return float___rmul__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___rmul___doc) - final PyObject float___rmul__(PyObject left) { - return __mul__(left); - } - - @Override - public PyObject __div__(PyObject right) { - return float___div__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___div___doc) - final PyObject float___div__(PyObject right) { - if (!canCoerce(right)) { - return null; - } else if (Options.division_warning >= 2) { - Py.warning(Py.DeprecationWarning, "classic float division"); - } - - double rightv = coerce(right); - if (rightv == 0) { - throw Py.ZeroDivisionError("float division"); - } - return new PyFloat(getValue() / rightv); - } - - @Override - public PyObject __rdiv__(PyObject left) { - return float___rdiv__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___rdiv___doc) - final PyObject float___rdiv__(PyObject left) { - if (!canCoerce(left)) { - return null; - } else if (Options.division_warning >= 2) { - Py.warning(Py.DeprecationWarning, "classic float division"); - } - - double leftv = coerce(left); - if (getValue() == 0) { - throw Py.ZeroDivisionError("float division"); - } - return new PyFloat(leftv / getValue()); - } - - @Override - public PyObject __floordiv__(PyObject right) { - return float___floordiv__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___floordiv___doc) - final PyObject float___floordiv__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - double rightv = coerce(right); - if (rightv == 0) { - throw Py.ZeroDivisionError("float division"); - } - return new PyFloat(Math.floor(getValue() / rightv)); - } - - @Override - public PyObject __rfloordiv__(PyObject left) { - return float___rfloordiv__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___rfloordiv___doc) - final PyObject float___rfloordiv__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - double leftv = coerce(left); - if (getValue() == 0) { - throw Py.ZeroDivisionError("float division"); - } - return new PyFloat(Math.floor(leftv / getValue())); - } - - @Override - public PyObject __truediv__(PyObject right) { - return float___truediv__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___truediv___doc) - final PyObject float___truediv__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - double rightv = coerce(right); - if (rightv == 0) { - throw Py.ZeroDivisionError("float division"); - } - return new PyFloat(getValue() / rightv); - } - - @Override - public PyObject __rtruediv__(PyObject left) { - return float___rtruediv__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___rtruediv___doc) - final PyObject float___rtruediv__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - double leftv = coerce(left); - if (getValue() == 0) { - throw Py.ZeroDivisionError("float division"); - } - return new PyFloat(leftv / getValue()); - } - - /** - * Python % operator: y = n*x + z. The modulo operator always yields a result with the same sign - * as its second operand (or zero). (Compare java.Math.IEEEremainder) - * - * @param x dividend - * @param y divisor - * @return x % y - */ - private static double modulo(double x, double y) { - if (y == 0.0) { - throw Py.ZeroDivisionError("float modulo"); - } else { - double z = x % y; - if (z == 0.0) { - // Has to be same sign as y (even when zero). - return Math.copySign(z, y); - } else if ((z > 0.0) == (y > 0.0)) { - // z has same sign as y, as it must. - return z; - } else { - // Note abs(z) < abs(y) and opposite sign. - return z + y; - } - } - } - - @Override - public PyObject __mod__(PyObject right) { - return float___mod__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___mod___doc) - final PyObject float___mod__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - double rightv = coerce(right); - return new PyFloat(modulo(getValue(), rightv)); - } - - @Override - public PyObject __rmod__(PyObject left) { - return float___rmod__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___rmod___doc) - final PyObject float___rmod__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - double leftv = coerce(left); - return new PyFloat(modulo(leftv, getValue())); - } - - @Override - public PyObject __divmod__(PyObject right) { - return float___divmod__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___divmod___doc) - final PyObject float___divmod__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - double rightv = coerce(right); - - if (rightv == 0) { - throw Py.ZeroDivisionError("float division"); - } - double z = Math.floor(getValue() / rightv); - - return new PyTuple(new PyFloat(z), new PyFloat(getValue() - z * rightv)); - } - - @Override - public PyObject __rdivmod__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - double leftv = coerce(left); - - if (getValue() == 0) { - throw Py.ZeroDivisionError("float division"); - } - double z = Math.floor(leftv / getValue()); - - return new PyTuple(new PyFloat(z), new PyFloat(leftv - z * getValue())); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___rdivmod___doc) - final PyObject float___rdivmod__(PyObject left) { - return __rdivmod__(left); - } - - @Override - public PyObject __pow__(PyObject right, PyObject modulo) { - return float___pow__(right, modulo); - } - - @ExposedMethod(type = MethodType.BINARY, defaults = "null", // - doc = BuiltinDocs.float___pow___doc) - final PyObject float___pow__(PyObject right, PyObject modulo) { - if (!canCoerce(right)) { - return null; - } - - modulo = (modulo == Py.None) ? null : modulo; - if (modulo != null) { - throw Py.TypeError("pow() 3rd argument not allowed unless all arguments are integers"); - } else { - return _pow(getValue(), coerce(right)); - } - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.float___rpow___doc) - final PyObject float___rpow__(PyObject left) { - return __rpow__(left); - } - - @Override - public PyObject __rpow__(PyObject left) { - if (!canCoerce(left)) { - return null; - } else { - return _pow(coerce(left), getValue()); - } - } - - private static PyFloat _pow(double v, double w) { - /* - * This code was translated from the CPython implementation at v2.7.8 by progressively - * removing cases that could be delegated to Java. Jython differs from CPython in that where - * C pow() overflows, Java pow() returns inf (observed on Windows). This is not subject to - * regression tests, so we take it as an allowable platform dependency. All other - * differences in Java Math.pow() are trapped below and Python behaviour is enforced. - */ - if (w == 0) { - // v**0 is 1, even 0**0 - return ONE; - - } else if (Double.isNaN(v)) { - // nan**w = nan, unless w == 0 - return NAN; - - } else if (Double.isNaN(w)) { - // v**nan = nan, unless v == 1; 1**nan = 1 - if (v == 1.0) { - return ONE; - } else { - return NAN; - } - - } else if (Double.isInfinite(w)) { - /* - * In Java Math pow(1,inf) = pow(-1,inf) = pow(1,-inf) = pow(-1,-inf) = nan, but in - * Python they are all 1. - */ - if (v == 1.0 || v == -1.0) { - return ONE; - } - - } else if (v == 0.0) { - // 0**w is an error if w is negative. - if (w < 0.0) { - throw Py.ZeroDivisionError("0.0 cannot be raised to a negative power"); - } - - } else if (!Double.isInfinite(v) && v < 0.0) { - if (w != Math.floor(w)) { - throw Py.ValueError("negative number cannot be raised to a fractional power"); - } - - } - - // In all cases not caught above we can entrust the calculation to Java - return new PyFloat(Math.pow(v, w)); - - } - - @Override - public PyObject __neg__() { - return float___neg__(); - } - - @ExposedMethod(doc = BuiltinDocs.float___neg___doc) - final PyObject float___neg__() { - return new PyFloat(-getValue()); - } - - @Override - public PyObject __pos__() { - return float___pos__(); - } - - @ExposedMethod(doc = BuiltinDocs.float___pos___doc) - final PyObject float___pos__() { - return float___float__(); - } - - @Override - public PyObject __invert__() { - throw Py.TypeError("bad operand type for unary ~"); - } - - @Override - public PyObject __abs__() { - return float___abs__(); - } - - @ExposedMethod(doc = BuiltinDocs.float___abs___doc) - final PyObject float___abs__() { - return new PyFloat(Math.abs(getValue())); - } - - @Override - public PyObject __int__() { - return float___int__(); - } - - /** Smallest value that cannot be represented as an int */ - private static double INT_LONG_BOUNDARY = -(double)Integer.MIN_VALUE; // 2^31 - - @ExposedMethod(doc = BuiltinDocs.float___int___doc) - final PyObject float___int__() { - double v = getValue(); - if (v < INT_LONG_BOUNDARY && v > -(INT_LONG_BOUNDARY + 1.0)) { - // v will fit into an int (when rounded towards zero). - return new PyInteger((int)v); - } else { - return __long__(); - } - } - - @Override - public PyObject __long__() { - return float___long__(); - } - - @ExposedMethod(doc = BuiltinDocs.float___long___doc) - final PyObject float___long__() { - return new PyLong(getValue()); - } - - @Override - public PyFloat __float__() { - return float___float__(); - } - - @ExposedMethod(doc = BuiltinDocs.float___float___doc) - final PyFloat float___float__() { - return getType() == TYPE ? this : Py.newFloat(getValue()); - } - - @Override - public PyObject __trunc__() { - return float___trunc__(); - } - - @ExposedMethod(doc = BuiltinDocs.float___trunc___doc) - final PyObject float___trunc__() { - if (Double.isNaN(value)) { - throw Py.ValueError("cannot convert float NaN to integer"); - } - if (Double.isInfinite(value)) { - throw Py.OverflowError("cannot convert float infinity to integer"); - } - if (value < Integer.MAX_VALUE) { - return new PyInteger((int)value); - } else if (value < Long.MAX_VALUE) { - return new PyLong((long)value); - } - BigDecimal d = new BigDecimal(value); - return new PyLong(d.toBigInteger()); - } - - @Override - public PyObject conjugate() { - return float_conjugate(); - } - - @ExposedMethod(doc = BuiltinDocs.float_conjugate_doc) - final PyObject float_conjugate() { - return this; - } - - public boolean is_integer() { - return float_is_integer(); - } - - @ExposedMethod(doc = BuiltinDocs.float_is_integer_doc) - final boolean float_is_integer() { - if (Double.isInfinite(value)) { - return false; - } - return Math.floor(value) == value; - } - - @Override - public PyComplex __complex__() { - return new PyComplex(getValue(), 0.); - } - - @ExposedMethod(doc = BuiltinDocs.float___getnewargs___doc) - final PyTuple float___getnewargs__() { - return new PyTuple(new PyObject[] {new PyFloat(getValue())}); - } - - @Override - public PyTuple __getnewargs__() { - return float___getnewargs__(); - } - - @Override - public PyObject __format__(PyObject formatSpec) { - return float___format__(formatSpec); - } - - @ExposedMethod(doc = BuiltinDocs.float___format___doc) - final PyObject float___format__(PyObject formatSpec) { - - // Parse the specification - Spec spec = InternalFormat.fromText(formatSpec, "__format__"); - - // Get a formatter for the specification - FloatFormatter f = prepareFormatter(spec); - - if (f != null) { - // Bytes mode if formatSpec argument is not unicode. - f.setBytes(!(formatSpec instanceof PyUnicode)); - // Convert as per specification. - f.format(value); - // Return a result that has the same type (str or unicode) as the formatSpec argument. - return f.pad().getPyResult(); - - } else { - // The type code was not recognised in prepareFormatter - throw Formatter.unknownFormat(spec.type, "float"); - } - } - - /** - * Common code for PyFloat, {@link PyInteger} and {@link PyLong} to prepare a - * {@link FloatFormatter} from a parsed specification. The object returned has format method - * {@link FloatFormatter#format(double)}. - * - * @param spec a parsed PEP-3101 format specification. - * @return a formatter ready to use, or null if the type is not a floating point format type. - * @throws PyException {@code ValueError} if the specification is faulty. - */ - @SuppressWarnings("fallthrough") - static FloatFormatter prepareFormatter(Spec spec) { - - // Slight differences between format types - switch (spec.type) { - - case 'n': - if (spec.grouping) { - throw Formatter.notAllowed("Grouping", "float", spec.type); - } - // Fall through - - case Spec.NONE: - case 'e': - case 'f': - case 'g': - case 'E': - case 'F': - case 'G': - case '%': - // Check for disallowed parts of the specification - if (spec.alternate) { - throw FloatFormatter.alternateFormNotAllowed("float"); - } - // spec may be incomplete. The defaults are those commonly used for numeric formats. - spec = spec.withDefaults(Spec.NUMERIC); - return new FloatFormatter(spec); - - default: - return null; - } - } - - @ExposedMethod(doc = BuiltinDocs.float_as_integer_ratio_doc) - public PyTuple as_integer_ratio() { - if (Double.isInfinite(value)) { - throw Py.OverflowError("Cannot pass infinity to float.as_integer_ratio."); - } - if (Double.isNaN(value)) { - throw Py.ValueError("Cannot pass NaN to float.as_integer_ratio."); - } - PyTuple frexp = math.frexp(value); - double float_part = ((Double)frexp.get(0)).doubleValue(); - int exponent = ((Integer)frexp.get(1)).intValue(); - for (int i = 0; i < 300 && float_part != Math.floor(float_part); i++) { - float_part *= 2.0; - exponent--; - } - /* - * self == float_part * 2**exponent exactly and float_part is integral. If FLT_RADIX != 2, - * the 300 steps may leave a tiny fractional part to be truncated by PyLong_FromDouble(). - */ - - PyLong numerator = new PyLong(float_part); - PyLong denominator = new PyLong(1); - PyLong py_exponent = new PyLong(Math.abs(exponent)); - py_exponent = (PyLong)denominator.__lshift__(py_exponent); - if (exponent > 0) { - numerator = new PyLong(numerator.getValue().multiply(py_exponent.getValue())); - } else { - denominator = py_exponent; - } - return new PyTuple(numerator, denominator); - } - - @Override - public double asDouble() { - return getValue(); - } - - @Override - public boolean isNumberType() { - return true; - } - - // standard singleton issues apply here to __getformat__/__setformat__, - // but this is what Python demands - public enum Format { - - UNKNOWN("unknown"), BE("IEEE, big-endian"), LE("IEEE, little-endian"); - - private final String format; - - Format(String format) { - this.format = format; - } - - public String format() { - return format; - } - } - - // subset of IEEE-754, the JVM is big-endian - public static volatile Format double_format = Format.BE; - public static volatile Format float_format = Format.BE; - - @ExposedClassMethod(doc = BuiltinDocs.float___getformat___doc) - public static String float___getformat__(PyType type, String typestr) { - if ("double".equals(typestr)) { - return double_format.format(); - } else if ("float".equals(typestr)) { - return float_format.format(); - } else { - throw Py.ValueError("__getformat__() argument 1 must be 'double' or 'float'"); - } - } - - @ExposedClassMethod(doc = BuiltinDocs.float___setformat___doc) - public static void float___setformat__(PyType type, String typestr, String format) { - Format new_format = null; - if (!"double".equals(typestr) && !"float".equals(typestr)) { - throw Py.ValueError("__setformat__() argument 1 must be 'double' or 'float'"); - } - if (Format.LE.format().equals(format)) { - throw Py.ValueError(String.format("can only set %s format to 'unknown' or the " - + "detected platform value", typestr)); - } else if (Format.BE.format().equals(format)) { - new_format = Format.BE; - } else if (Format.UNKNOWN.format().equals(format)) { - new_format = Format.UNKNOWN; - } else { - throw Py.ValueError("__setformat__() argument 2 must be 'unknown', " - + "'IEEE, little-endian' or 'IEEE, big-endian'"); - } - if (new_format != null) { - if ("double".equals(typestr)) { - double_format = new_format; - } else { - float_format = new_format; - } - } - } -} diff --git a/src/org/python/core/PyList.java b/src/org/python/core/PyList.java deleted file mode 100644 index 89f0bf727..000000000 --- a/src/org/python/core/PyList.java +++ /dev/null @@ -1,1315 +0,0 @@ -// Copyright (c) Corporation for National Research Initiatives -package org.python.core; - -import java.util.ArrayList; -import java.util.Arrays; - -import org.python.expose.ExposedMethod; -import org.python.expose.ExposedNew; -import org.python.expose.ExposedType; -import org.python.expose.MethodType; -import org.python.util.Generic; - -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.ConcurrentModificationException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.ListIterator; - -import java.lang.reflect.Array; -import java.util.Map; - -@ExposedType(name = "list", base = PyObject.class, doc = BuiltinDocs.list_doc) -public class PyList extends PySequenceList { - - public static final PyType TYPE = PyType.fromClass(PyList.class); - { - // Ensure list is not Hashable - TYPE.object___setattr__("__hash__", Py.None); - } - - private final List list; - public volatile int gListAllocatedStatus = -1; - - public PyList() { - this(TYPE); - } - - public PyList(PyType type) { - super(type); - list = Generic.list(); - } - - private PyList(List list, boolean convert) { - super(TYPE); - if (!convert) { - this.list = (List) list; - } else { - this.list = Generic.list(); - for (Object o : list) { - add(o); - } - } - } - - public PyList(PyType type, PyObject[] elements) { - super(type); - list = new ArrayList(Arrays.asList(elements)); - } - - public PyList(PyType type, Collection c) { - super(type); - list = new ArrayList(c.size()); - for (Object o : c) { - add(o); - } - } - - public PyList(PyObject[] elements) { - this(TYPE, elements); - } - - public PyList(Collection c) { - this(TYPE, c); - } - - public PyList(PyObject o) { - this(TYPE); - for (PyObject item : o.asIterable()) { - list.add(item); - } - } - - public static PyList fromList(List list) { - return new PyList(list, false); - } - - List getList() { - return Collections.unmodifiableList(list); - } - - private static List listify(Iterator iter) { - List list = Generic.list(); - while (iter.hasNext()) { - list.add(iter.next()); - } - return list; - } - - public PyList(Iterator iter) { - this(TYPE, listify(iter)); - } - - // refactor and put in Py presumably; - // presumably we can consume an arbitrary iterable too! - private static void addCollection(List list, Collection seq) { - Map seen = new HashMap<>(); - for (Object item : seq) { - long id = Py.java_obj_id(item); - PyObject seen_obj = seen.get(id); - if (seen_obj != null) { - seen_obj = Py.java2py(item); - seen.put(id, seen_obj); - } - list.add(seen_obj); - } - } - - @SuppressWarnings("unchecked") - @ExposedNew - @ExposedMethod(doc = BuiltinDocs.list___init___doc) - final void list___init__(PyObject[] args, String[] kwds) { - ArgParser ap = new ArgParser("list", args, kwds, new String[]{"sequence"}, 0); - PyObject seq = ap.getPyObject(0, null); - clear(); - if (seq == null) { - return; - } - - /* PyListDerived should be iterated over and not plain copied for cases where someone subclasses list - and overrides __iter__ - */ - if (seq instanceof PyListDerived) { - for (PyObject item : seq.asIterable()) { - append(item); - } - } else if (seq instanceof PyList) { - list.addAll(((PyList) seq).list); // don't convert - } else if (seq instanceof PyTuple) { - list.addAll(((PyTuple) seq).getList()); - } else if (seq.getClass().isAssignableFrom(Collection.class)) { - System.err.println("Adding from collection"); - addCollection(list, (Collection) seq); - } else { - for (PyObject item : seq.asIterable()) { - append(item); - } - } - } - - @Override - public int __len__() { - return list___len__(); - } - - @ExposedMethod(doc = BuiltinDocs.list___len___doc) - final synchronized int list___len__() { - return size(); - } - - @Override - protected void del(int i) { - remove(i); - } - - @Override - protected void delRange(int start, int stop) { - remove(start, stop); - } - - @SuppressWarnings("unchecked") - @Override - protected void setslice(int start, int stop, int step, PyObject value) { - if (stop < start) { - stop = start; - } - if (value instanceof PyList) { - if (value == this) { // copy - value = new PyList((PySequence) value); - } - setslicePyList(start, stop, step, (PyList) value); - } else if (value instanceof PySequence) { - setsliceIterator(start, stop, step, value.asIterable().iterator()); - } else if (value instanceof List) { - setsliceList(start, stop, step, (List)value); - } else { - Object valueList = value.__tojava__(List.class); - if (valueList != null && valueList != Py.NoConversion) { - setsliceList(start, stop, step, (List)valueList); - } else { - value = new PyList(value); - setsliceIterator(start, stop, step, value.asIterable().iterator()); - } - } - } - - final private void setsliceList(int start, int stop, int step, List value) { - if (step == 1) { - list.subList(start, stop).clear(); - int n = value.size(); - for (int i=0, j=start; i iter = value.listIterator(); - for (int j = start; iter.hasNext(); j += step) { - PyObject item = Py.java2py(iter.next()); - if (j >= size) { - list.add(item); - } else { - list.set(j, item); - } - } - } - } - - final private void setsliceIterator(int start, int stop, int step, Iterator iter) { - if (step == 1) { - List insertion = new ArrayList(); - if (iter != null) { - while (iter.hasNext()) { - insertion.add(iter.next()); - } - } - list.subList(start, stop).clear(); - list.addAll(start, insertion); - } else { - int size = list.size(); - for (int j = start; iter.hasNext(); j += step) { - PyObject item = iter.next(); - if (j >= size) { - list.add(item); - } else { - list.set(j, item); - } - } - } - } - - final private void setslicePyList(int start, int stop, int step, PyList other) { - if (step == 1) { - list.subList(start, stop).clear(); - list.addAll(start, other.list); - } else { - int size = list.size(); - Iterator iter = other.list.listIterator(); - for (int j = start; iter.hasNext(); j += step) { - PyObject item = iter.next(); - if (j >= size) { - list.add(item); - } else { - list.set(j, item); - } - } - } - } - - @Override - protected synchronized PyObject repeat(int count) { - if (count < 0) { - count = 0; - } - int size = size(); - int newSize = size * count; - if (count != 0 && newSize / count != size) { - throw Py.MemoryError(""); - } - - PyObject[] elements = list.toArray(new PyObject[size]); - PyObject[] newList = new PyObject[newSize]; - for (int i = 0; i < count; i++) { - System.arraycopy(elements, 0, newList, i * size, size); - } - return new PyList(newList); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___ne___doc) - final synchronized PyObject list___ne__(PyObject o) { - return seq___ne__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___eq___doc) - final synchronized PyObject list___eq__(PyObject o) { - return seq___eq__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___lt___doc) - final synchronized PyObject list___lt__(PyObject o) { - return seq___lt__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___le___doc) - final synchronized PyObject list___le__(PyObject o) { - return seq___le__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___gt___doc) - final synchronized PyObject list___gt__(PyObject o) { - return seq___gt__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___ge___doc) - final synchronized PyObject list___ge__(PyObject o) { - return seq___ge__(o); - } - - @Override - public PyObject __imul__(PyObject o) { - return list___imul__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___imul___doc) - final synchronized PyObject list___imul__(PyObject o) { - if (!o.isIndex()) { - return null; - } - int count = o.asIndex(Py.OverflowError); - - int size = size(); - if (size == 0 || count == 1) { - return this; - } - - if (count < 1) { - clear(); - return this; - } - - if (size > Integer.MAX_VALUE / count) { - throw Py.MemoryError(""); - } - - int newSize = size * count; - if (list instanceof ArrayList) { - ((ArrayList) list).ensureCapacity(newSize); - } - List oldList = new ArrayList(list); - for (int i = 1; i < count; i++) { - list.addAll(oldList); - } - gListAllocatedStatus = list.size(); // now omit? - return this; - } - - @Override - public PyObject __mul__(PyObject o) { - return list___mul__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___mul___doc) - final synchronized PyObject list___mul__(PyObject o) { - if (!o.isIndex()) { - return null; - } - return repeat(o.asIndex(Py.OverflowError)); - } - - @Override - public PyObject __rmul__(PyObject o) { - return list___rmul__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___rmul___doc) - final synchronized PyObject list___rmul__(PyObject o) { - if (!o.isIndex()) { - return null; - } - return repeat(o.asIndex(Py.OverflowError)); - } - - @Override - public PyObject __add__(PyObject o) { - return list___add__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___add___doc) - final synchronized PyObject list___add__(PyObject o) { - PyList sum = null; - if (o instanceof PySequenceList && !(o instanceof PyTuple)) { - if (o instanceof PyList) { - List oList = ((PyList) o).list; - ArrayList newList = new ArrayList<>(list.size() + oList.size()); - newList.addAll(list); - newList.addAll(oList); - sum = fromList(newList); - } - } else if (!(o instanceof PySequenceList)) { - // also support adding java lists (but not PyTuple!) - Object oList = o.__tojava__(List.class); - if (oList != Py.NoConversion && oList != null) { - @SuppressWarnings("unchecked") - List otherList = (List) oList; - sum = new PyList(); - sum.list_extend(this); - for (Object ob: otherList) { - sum.add(ob); - } - } - } - return sum; - } - - @Override - public PyObject __radd__(PyObject o) { - return list___radd__(o); - } - - //XXX: needs __doc__ - @SuppressWarnings("unchecked") - @ExposedMethod(type = MethodType.BINARY) - final synchronized PyObject list___radd__(PyObject o) { - // Support adding java.util.List, but prevent adding PyTuple. - // 'o' should never be a PyNewList since __add__ is defined. - PyList sum = null; - if (o instanceof PySequence) { - return null; - } - Object oList = o.__tojava__(List.class); - if (oList != Py.NoConversion && oList != null) { - sum = new PyList(); - sum.addAll((List) oList); - sum.extend(this); - } - return sum; - } - - @ExposedMethod(doc = BuiltinDocs.list___contains___doc) - final synchronized boolean list___contains__(PyObject o) { - return object___contains__(o); - } - - @ExposedMethod(doc = BuiltinDocs.list___delitem___doc) - final synchronized void list___delitem__(PyObject index) { - seq___delitem__(index); - } - - @ExposedMethod(doc = BuiltinDocs.list___setitem___doc) - final synchronized void list___setitem__(PyObject o, PyObject def) { - seq___setitem__(o, def); - } - - @ExposedMethod(doc = BuiltinDocs.list___getitem___doc) - final synchronized PyObject list___getitem__(PyObject o) { - PyObject ret = seq___finditem__(o); - if (ret == null) { - throw Py.IndexError("index out of range: " + o); - } - return ret; - } - - @Override - public PyObject __iter__() { - return list___iter__(); - } - - @ExposedMethod(doc = BuiltinDocs.list___iter___doc) - final PyObject list___iter__() { - return new PyListIterator(this); - } - - //@Override - public PyIterator __reversed__() { - return list___reversed__(); - } - - @ExposedMethod(doc = BuiltinDocs.list___reversed___doc) - final synchronized PyIterator list___reversed__() { - return new PyReversedIterator(this); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.list___getslice___doc) - final synchronized PyObject list___getslice__(PyObject start, PyObject stop, PyObject step) { - return seq___getslice__(start, stop, step); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.list___setslice___doc) - final synchronized void list___setslice__(PyObject start, PyObject stop, PyObject step, PyObject value) { - seq___setslice__(start, stop, step, value); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.list___delslice___doc) - final synchronized void list___delslice__(PyObject start, PyObject stop, PyObject step) { - seq___delslice__(start, stop, step); - } - - @Override - protected String unsupportedopMessage(String op, PyObject o2) { - if (op.equals("+")) { - return "can only concatenate list (not \"{2}\") to list"; - } - return super.unsupportedopMessage(op, o2); - } - - public String toString() { - return list_toString(); - } - - //XXX: needs __doc__ - @ExposedMethod(names = "__repr__") - final synchronized String list_toString() { - ThreadState ts = Py.getThreadState(); - if (!ts.enterRepr(this)) { - return "[...]"; - } - StringBuilder buf = new StringBuilder("["); - int length = size(); - int i = 0; - for (PyObject item : list) { - buf.append(item.__repr__().toString()); - if (i < length - 1) { - buf.append(", "); - } - i++; - } - buf.append("]"); - ts.exitRepr(this); - return buf.toString(); - } - - /** - * Add a single element to the end of list. - * - * @param o - * the element to add. - */ - public void append(PyObject o) { - list_append(o); - } - - @ExposedMethod(doc = BuiltinDocs.list_append_doc) - final synchronized void list_append(PyObject o) { - pyadd(o); - gListAllocatedStatus = list.size(); - } - - /** - * Return the number elements in the list that equals the argument. - * - * @param o - * the argument to test for. Testing is done with the == operator. - */ - public int count(PyObject o) { - return list_count(o); - } - - @ExposedMethod(doc = BuiltinDocs.list_count_doc) - final synchronized int list_count(PyObject o) { - int count = 0; - for (PyObject item : list) { - if (item.equals(o)) { - count++; - } - } - return count; - } - - /** - * return smallest index where an element in the list equals the argument. - * - * @param o - * the argument to test for. Testing is done with the == operator. - */ - public int index(PyObject o) { - return index(o, 0); - } - - public int index(PyObject o, int start) { - return list_index(o, start, size()); - } - - public int index(PyObject o, int start, int stop) { - return list_index(o, start, stop); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.list_index_doc) - final synchronized int list_index(PyObject o, PyObject start, PyObject stop) { - int startInt = start == null ? 0 : PySlice.calculateSliceIndex(start); - int stopInt = stop == null ? size() : PySlice.calculateSliceIndex(stop); - return list_index(o, startInt, stopInt); - } - - final synchronized int list_index(PyObject o, int start, int stop) { - return _index(o, "list.index(x): x not in list", start, stop); - } - - final synchronized int list_index(PyObject o, int start) { - return _index(o, "list.index(x): x not in list", start, size()); - } - - final synchronized int list_index(PyObject o) { - return _index(o, "list.index(x): x not in list", 0, size()); - } - - private int _index(PyObject o, String message, int start, int stop) { - // Follow Python 2.3+ behavior - int validStop = boundToSequence(stop); - int validStart = boundToSequence(start); - int i = validStart; - if (validStart <= validStop) { - try { - for (PyObject item : list.subList(validStart, validStop)) { - if (item.equals(o)) { - return i; - } - i++; - } - } catch (ConcurrentModificationException ex) { - throw Py.ValueError(message); - } - } - throw Py.ValueError(message); - } - - /** - * Insert the argument element into the list at the specified index.
- * Same as s[index:index] = [o] if index >= 0. - * - * @param index - * the position where the element will be inserted. - * @param o - * the element to insert. - */ - public void insert(int index, PyObject o) { - list_insert(index, o); - } - - @ExposedMethod(doc = BuiltinDocs.list_insert_doc) - final synchronized void list_insert(int index, PyObject o) { - if (index < 0) { - index = Math.max(0, size() + index); - } - if (index > size()) { - index = size(); - } - pyadd(index, o); - gListAllocatedStatus = list.size(); - } - - /** - * Remove the first occurence of the argument from the list. The elements arecompared with the - * == operator.
- * Same as del s[s.index(x)] - * - * @param o - * the element to search for and remove. - */ - public void remove(PyObject o) { - list_remove(o); - } - - @ExposedMethod(doc = BuiltinDocs.list_remove_doc) - final synchronized void list_remove(PyObject o) { - del(_index(o, "list.remove(x): x not in list", 0, size())); - gListAllocatedStatus = list.size(); - } - - /** - * Reverses the items of s in place. The reverse() methods modify the list in place for economy - * of space when reversing a large list. It doesn't return the reversed list to remind you of - * this side effect. - */ - public void reverse() { - list_reverse(); - } - - @ExposedMethod(doc = BuiltinDocs.list_reverse_doc) - final synchronized void list_reverse() { - Collections.reverse(list); - gListAllocatedStatus = list.size(); - } - - /** - * Removes and return the last element in the list. - */ - public PyObject pop() { - return pop(-1); - } - - /** - * Removes and return the n indexed element in the list. - * - * @param n - * the index of the element to remove and return. - */ - public PyObject pop(int n) { - return list_pop(n); - } - - @ExposedMethod(defaults = "-1", doc = BuiltinDocs.list_pop_doc) - final synchronized PyObject list_pop(int n) { - int length = size(); - if (length == 0) { - throw Py.IndexError("pop from empty list"); - } - if (n < 0) { - n += length; - } - if (n < 0 || n >= length) { - throw Py.IndexError("pop index out of range"); - } - PyObject v = list.remove(n); - return v; - } - - /** - * Append the elements in the argument sequence to the end of the list.
- * Same as s[len(s):len(s)] = o. - * - * @param o - * the sequence of items to append to the list. - */ - public void extend(PyObject o) { - list_extend(o); - } - - @ExposedMethod(doc = BuiltinDocs.list_extend_doc) - final synchronized void list_extend(PyObject o) { - if (o instanceof PyList) { - list.addAll(((PyList) o).list); - } else { - for (PyObject item : o.asIterable()) { - list.add(item); - } - } - gListAllocatedStatus = list.size(); - } - - @Override - public PyObject __iadd__(PyObject o) { - return list___iadd__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.list___iadd___doc) - final synchronized PyObject list___iadd__(PyObject o) { - PyType oType = o.getType(); - if (oType == TYPE || oType == PyTuple.TYPE || this == o) { - extend(fastSequence(o, "argument must be iterable")); - return this; - } - - PyObject it; - try { - it = o.__iter__(); - } catch (PyException pye) { - if (!pye.match(Py.TypeError)) { - throw pye; - } - return null; - } - extend(it); - return this; - } - - /** - * Sort the items of the list in place. The compare argument is a function of two arguments - * (list items) which should return -1, 0 or 1 depending on whether the first argument is - * considered smaller than, equal to, or larger than the second argument. Note that this slows - * the sorting process down considerably; e.g. to sort a list in reverse order it is much faster - * to use calls to the methods sort() and reverse() than to use the built-in function sort() - * with a comparison function that reverses the ordering of the elements. - * - * @param compare - * the comparison function. - */ - /** - * Sort the items of the list in place. Items is compared with the normal relative comparison - * operators. - */ - @ExposedMethod(doc = BuiltinDocs.list_sort_doc) - final synchronized void list_sort(PyObject[] args, String[] kwds) { - ArgParser ap = new ArgParser("list", args, kwds, new String[]{"cmp", "key", "reverse"}, 0); - PyObject cmp = ap.getPyObject(0, Py.None); - PyObject key = ap.getPyObject(1, Py.None); - PyObject reverse = ap.getPyObject(2, Py.False); - sort(cmp, key, reverse); - } - - public void sort(PyObject cmp, PyObject key, PyObject reverse) { - boolean bReverse = reverse.__nonzero__(); - if (key == Py.None || key == null) { - if (cmp == Py.None || cmp == null) { - sort(bReverse); - } else { - sort(cmp, bReverse); - } - } else { - sort(cmp, key, bReverse); - } - } - - // a bunch of optimized paths for sort to avoid unnecessary work, such as DSU or checking compare functions for null - - public void sort() { - sort(false); - } - - private synchronized void sort(boolean reverse) { - gListAllocatedStatus = -1; - if (reverse) { - Collections.reverse(list); // maintain stability of sort by reversing first - } - final PyObjectDefaultComparator comparator = new PyObjectDefaultComparator(this); - Collections.sort(list, comparator); - if (comparator.raisedException()) { - throw comparator.getRaisedException(); - } - if (reverse) { - Collections.reverse(list); // maintain stability of sort by reversing first - } - gListAllocatedStatus = list.size(); - } - - private static class PyObjectDefaultComparator implements Comparator { - - private final PyList list; - private PyException comparatorException; - - PyObjectDefaultComparator(PyList list) { - this.list = list; - } - - public PyException getRaisedException() { - return comparatorException; - } - - public boolean raisedException() { - return comparatorException != null; - } - - @Override - public int compare(PyObject o1, PyObject o2) { - // PEP 207 specifies that sort should only depend on "less-than" (Issue #1767) - int result = 0; // If exception is raised return objects are equal - try { - if (o1._lt(o2).__nonzero__()) { - result = -1; - } else if (o2._lt(o1).__nonzero__()) { - result = 1; - } - } catch (PyException pye) { - // #2399 Stash the exception so we can rethrow it later, and allow the sort to continue - comparatorException = pye; - } - if (this.list.gListAllocatedStatus >= 0) { - throw Py.ValueError("list modified during sort"); - } - return result; - } - - @Override - public boolean equals(Object o) { - if (o == this) { - return true; - } - if (o instanceof PyObjectDefaultComparator) { - return true; - } - return false; - } - } - - public void sort(PyObject compare) { - sort(compare, false); - } - - private synchronized void sort(PyObject compare, boolean reverse) { - gListAllocatedStatus = -1; - if (reverse) { - Collections.reverse(list); // maintain stability of sort by reversing first - } - final PyObjectComparator comparator = new PyObjectComparator(this, compare); - Collections.sort(list, comparator); - if (comparator.raisedException()) { - throw comparator.getRaisedException(); - } - if (reverse) { - Collections.reverse(list); - } - gListAllocatedStatus = list.size(); - } - - private static class PyObjectComparator implements Comparator { - - private final PyList list; - private final PyObject cmp; - private PyException comparatorException; - - PyObjectComparator(PyList list, PyObject cmp) { - this.list = list; - this.cmp = cmp; - } - - public PyException getRaisedException() { - return comparatorException; - } - - public boolean raisedException() { - return comparatorException != null; - } - - @Override - public int compare(PyObject o1, PyObject o2) { - int result = 0; // If exception is raised return objects are equal - try { - result = cmp.__call__(o1, o2).asInt(); - } catch (PyException pye) { - // #2399 Stash the exception so we can rethrow it later, and allow the sort to continue - comparatorException = pye; - } - if (this.list.gListAllocatedStatus >= 0) { - throw Py.ValueError("list modified during sort"); - } - return result; - } - - @Override - public boolean equals(Object o) { - if (o == this) { - return true; - } - - if (o instanceof PyObjectComparator) { - return cmp.equals(((PyObjectComparator) o).cmp); - } - return false; - } - } - - private static class KV { - - private final PyObject key; - private final PyObject value; - - KV(PyObject key, PyObject value) { - this.key = key; - this.value = value; - } - } - - private static class KVComparator implements Comparator { - - private final PyList list; - private final PyObject cmp; - - KVComparator(PyList list, PyObject cmp) { - this.list = list; - this.cmp = cmp; - } - - public int compare(KV o1, KV o2) { - int result; - if (cmp != null && cmp != Py.None) { - result = cmp.__call__(o1.key, o2.key).asInt(); - } else { - // PEP 207 specifies that sort should only depend on "less-than" (Issue #1767) - if (o1.key._lt(o2.key).__nonzero__()) { - result = -1; - } else if (o2.key._lt(o1.key).__nonzero__()) { - result = 1; - } else { - result = 0; - } - } - if (this.list.gListAllocatedStatus >= 0) { - throw Py.ValueError("list modified during sort"); - } - return result; - } - - public boolean equals(Object o) { - if (o == this) { - return true; - } - - if (o instanceof KVComparator) { - return cmp.equals(((KVComparator) o).cmp); - } - return false; - } - } - - private synchronized void sort(PyObject cmp, PyObject key, boolean reverse) { - gListAllocatedStatus = -1; - - int size = list.size(); - final ArrayList decorated = new ArrayList(size); - for (PyObject value : list) { - decorated.add(new KV(key.__call__(value), value)); - } - list.clear(); - KVComparator c = new KVComparator(this, cmp); - if (reverse) { - Collections.reverse(decorated); // maintain stability of sort by reversing first - } - Collections.sort(decorated, c); - if (reverse) { - Collections.reverse(decorated); - } - if (list instanceof ArrayList) { - ((ArrayList) list).ensureCapacity(size); - } - for (KV kv : decorated) { - list.add(kv.value); - } - gListAllocatedStatus = list.size(); - } - - public int hashCode() { - return list___hash__(); - } - - @ExposedMethod(doc = BuiltinDocs.list___hash___doc) - final synchronized int list___hash__() { - throw Py.TypeError(String.format("unhashable type: '%.200s'", getType().fastGetName())); - } - - @Override - public PyTuple __getnewargs__() { - return new PyTuple(new PyTuple(getArray())); - } - - @Override - public void add(int index, Object element) { - pyadd(index, Py.java2py(element)); - } - - @Override - public boolean add(Object o) { - pyadd(Py.java2py(o)); - return true; - } - - @Override - public synchronized boolean addAll(int index, Collection c) { - PyList elements = new PyList(c); - return list.addAll(index, elements.list); - } - - @Override - public boolean addAll(Collection c) { - return addAll(0, c); - } - - @Override - public synchronized void clear() { - list.clear(); - } - - @Override - public synchronized boolean contains(Object o) { - return list.contains(Py.java2py(o)); - } - - @Override - public synchronized boolean containsAll(Collection c) { - if (c instanceof PyList) { - return list.containsAll(((PyList) c).list); - } else if (c instanceof PyTuple) { - return list.containsAll(((PyTuple) c).getList()); - } else { - return list.containsAll(new PyList(c)); - } - } - - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } - - if (other instanceof PyObject) { - synchronized (this) { - return _eq((PyObject)other).__nonzero__(); - } - } - if (other instanceof List) { - synchronized (this) { - return list.equals(other); - } - } - return false; - } - - @Override - public synchronized Object get(int index) { - return list.get(index).__tojava__(Object.class); - } - - @Override - public synchronized PyObject[] getArray() { - return list.toArray(Py.EmptyObjects); - } - - @Override - public synchronized int indexOf(Object o) { - return list.indexOf(Py.java2py(o)); - } - - @Override - public synchronized boolean isEmpty() { - return list.isEmpty(); - } - - @Override - public Iterator iterator() { - return new Iterator() { - - private final Iterator iter = list.iterator(); - - public boolean hasNext() { - return iter.hasNext(); - } - - public Object next() { - return iter.next().__tojava__(Object.class); - } - - public void remove() { - iter.remove(); - } - }; - } - - @Override - public synchronized int lastIndexOf(Object o) { - return list.lastIndexOf(Py.java2py(o)); - } - - @Override - public ListIterator listIterator() { - return listIterator(0); - } - - @Override - public ListIterator listIterator(final int index) { - return new ListIterator() { - - private final ListIterator iter = list.listIterator(index); - - public boolean hasNext() { - return iter.hasNext(); - } - - public Object next() { - return iter.next().__tojava__(Object.class); - } - - public boolean hasPrevious() { - return iter.hasPrevious(); - } - - public Object previous() { - return iter.previous().__tojava__(Object.class); - } - - public int nextIndex() { - return iter.nextIndex(); - } - - public int previousIndex() { - return iter.previousIndex(); - } - - public void remove() { - iter.remove(); - } - - public void set(Object o) { - iter.set(Py.java2py(o)); - } - - public void add(Object o) { - iter.add(Py.java2py(o)); - } - }; - } - - @Override - public synchronized void pyadd(int index, PyObject element) { - list.add(index, element); - } - - @Override - public synchronized boolean pyadd(PyObject o) { - list.add(o); - return true; - } - - @Override - public synchronized PyObject pyget(int index) { - return list.get(index); - } - - public synchronized void pyset(int index, PyObject element) { - list.set(index, element); - } - - @Override - public synchronized Object remove(int index) { - return list.remove(index); - } - - @Override - public synchronized void remove(int start, int stop) { - list.subList(start, stop).clear(); - } - - @Override - public synchronized boolean removeAll(Collection c) { - if (c instanceof PySequenceList) { - return list.removeAll(c); - } else { - return list.removeAll(new PyList(c)); - } - } - - @Override - public synchronized boolean retainAll(Collection c) { - if (c instanceof PySequenceList) { - return list.retainAll(c); - } else { - return list.retainAll(new PyList(c)); - } - } - - @Override - public synchronized Object set(int index, Object element) { - return list.set(index, Py.java2py(element)).__tojava__(Object.class); - } - - @Override - public synchronized int size() { - return list.size(); - } - - @Override - public synchronized List subList(int fromIndex, int toIndex) { - return fromList(list.subList(fromIndex, toIndex)); - } - - @Override - public synchronized Object[] toArray() { - Object copy[] = list.toArray(); - for (int i = 0; i < copy.length; i++) { - copy[i] = ((PyObject) copy[i]).__tojava__(Object.class); - } - return copy; - } - - @Override - public synchronized Object[] toArray(Object[] a) { - int size = size(); - Class type = a.getClass().getComponentType(); - if (a.length < size) { - a = (Object[])Array.newInstance(type, size); - } - for (int i = 0; i < size; i++) { - a[i] = list.get(i).__tojava__(type); - } - if (a.length > size) { - for (int i = size; i < a.length; i++) { - a[i] = null; - } - } - return a; - } - - protected PyObject getslice(int start, int stop, int step) { - if (step > 0 && stop < start) { - stop = start; - } - int n = sliceLength(start, stop, step); - List newList; - if (step == 1) { - newList = new ArrayList(list.subList(start, stop)); - } else { - newList = new ArrayList(n); - for (int i = start, j = 0; j < n; i += step, j++) { - newList.add(list.get(i)); - } - } - return fromList(newList); - } - - @Override - public synchronized boolean remove(Object o) { - return list.remove(Py.java2py(o)); - } - - - /* Traverseproc implementation */ - @Override - public int traverse(Visitproc visit, Object arg) { - if (list != null) { - int retVal; - for (PyObject ob: list) { - if (ob != null) { - retVal = visit.visit(ob, arg); - if (retVal != 0) { - return retVal; - } - } - } - } - return 0; - } - - @Override - public boolean refersDirectlyTo(PyObject ob) { - return list == null ? false : list.contains(ob); - } -} diff --git a/src/org/python/core/PyLong.java b/src/org/python/core/PyLong.java deleted file mode 100644 index 8b403ec93..000000000 --- a/src/org/python/core/PyLong.java +++ /dev/null @@ -1,1141 +0,0 @@ -// Copyright (c) Corporation for National Research Initiatives -// Copyright (c) Jython Developers - -package org.python.core; - -import java.io.Serializable; -import java.math.BigDecimal; -import java.math.BigInteger; - -import org.python.core.stringlib.FloatFormatter; -import org.python.core.stringlib.IntegerFormatter; -import org.python.core.stringlib.InternalFormat; -import org.python.core.stringlib.InternalFormat.Formatter; -import org.python.core.stringlib.InternalFormat.Spec; -import org.python.expose.ExposedGet; -import org.python.expose.ExposedMethod; -import org.python.expose.ExposedNew; -import org.python.expose.ExposedType; -import org.python.expose.MethodType; - -/** - * A builtin python long. This is implemented as a java.math.BigInteger. - */ -@Untraversable -@ExposedType(name = "long", doc = BuiltinDocs.long_doc) -public class PyLong extends PyObject { - - public static final PyType TYPE = PyType.fromClass(PyLong.class); - - public static final BigInteger MIN_LONG = BigInteger.valueOf(Long.MIN_VALUE); - public static final BigInteger MAX_LONG = BigInteger.valueOf(Long.MAX_VALUE); - public static final BigInteger MAX_ULONG = BigInteger.valueOf(1).shiftLeft(64) - .subtract(BigInteger.valueOf(1)); - - /** @deprecated Use MIN_INT instead. */ - @Deprecated - public static final BigInteger minLong = MIN_LONG; - /** @deprecated Use MAX_INT instead. */ - @Deprecated - public static final BigInteger maxLong = MAX_LONG; - /** @deprecated Use MAX_ULONG instead. */ - @Deprecated - public static final BigInteger maxULong = MAX_ULONG; - - private final BigInteger value; - - public BigInteger getValue() { - return value; - } - - public PyLong(PyType subType, BigInteger v) { - super(subType); - value = v; - } - - public PyLong(BigInteger v) { - this(TYPE, v); - } - - public PyLong(double v) { - this(toBigInteger(v)); - } - - public PyLong(long v) { - this(BigInteger.valueOf(v)); - } - - public PyLong(String s) { - this(new BigInteger(s)); - } - - @ExposedNew - public static PyObject long___new__(PyNewWrapper new_, boolean init, PyType subtype, - PyObject[] args, String[] keywords) { - if (new_.for_type != subtype) { - return longSubtypeNew(new_, init, subtype, args, keywords); - } - - ArgParser ap = new ArgParser("long", args, keywords, new String[] {"x", "base"}, 0); - PyObject x = ap.getPyObject(0, null); - if (x != null && x.getJavaProxy() instanceof BigInteger) { - return new PyLong((BigInteger)x.getJavaProxy()); - } - int base = ap.getInt(1, -909); - - if (x == null) { - return new PyLong(0); - } - if (base == -909) { - return asPyLong(x); - } - if (!(x instanceof PyString)) { - throw Py.TypeError("long: can't convert non-string with explicit base"); - } - return ((PyString)x).atol(base); - } - - /** - * @return convert to a long. - * @throws TypeError and AttributeError. - */ - private static PyObject asPyLong(PyObject x) { - try { - return x.__long__(); - } catch (PyException pye) { - if (!pye.match(Py.AttributeError)) { - throw pye; - } - try { - PyObject integral = x.invoke("__trunc__"); - return convertIntegralToLong(integral); - } catch (PyException pye2) { - if (!pye2.match(Py.AttributeError)) { - throw pye2; - } - throw Py.TypeError(String.format( - "long() argument must be a string or a number, not '%.200s'", x.getType() - .fastGetName())); - } - } - } - - /** - * @return convert to an int. - * @throws TypeError and AttributeError. - */ - private static PyObject convertIntegralToLong(PyObject integral) { - if (!(integral instanceof PyInteger) && !(integral instanceof PyLong)) { - PyObject i = integral.invoke("__int__"); - if (!(i instanceof PyInteger) && !(i instanceof PyLong)) { - throw Py.TypeError(String.format("__trunc__ returned non-Integral (type %.200s)", - integral.getType().fastGetName())); - } - return i; - } - return integral; - } - - /** - * Wimpy, slow approach to new calls for subtypes of long. - * - * First creates a regular long from whatever arguments we got, then allocates a subtype - * instance and initializes it from the regular long. The regular long is then thrown away. - */ - private static PyObject longSubtypeNew(PyNewWrapper new_, boolean init, PyType subtype, - PyObject[] args, String[] keywords) { - PyObject tmp = long___new__(new_, init, TYPE, args, keywords); - if (tmp instanceof PyInteger) { - int intValue = ((PyInteger)tmp).getValue(); - return new PyLongDerived(subtype, BigInteger.valueOf(intValue)); - } else { - return new PyLongDerived(subtype, ((PyLong)tmp).getValue()); - } - } - - /** - * Convert a double to BigInteger, raising an OverflowError if infinite. - */ - private static BigInteger toBigInteger(double value) { - if (Double.isInfinite(value)) { - throw Py.OverflowError("cannot convert float infinity to long"); - } - if (Double.isNaN(value)) { - throw Py.ValueError("cannot convert float NaN to integer"); - } - return new BigDecimal(value).toBigInteger(); - } - - @ExposedGet(name = "real", doc = BuiltinDocs.long_real_doc) - public PyObject getReal() { - return long___long__(); - } - - @ExposedGet(name = "imag", doc = BuiltinDocs.long_imag_doc) - public PyObject getImag() { - return Py.newLong(0); - } - - @ExposedGet(name = "numerator", doc = BuiltinDocs.long_numerator_doc) - public PyObject getNumerator() { - return long___long__(); - } - - @ExposedGet(name = "denominator", doc = BuiltinDocs.long_denominator_doc) - public PyObject getDenominator() { - return Py.newLong(1); - } - - @Override - public String toString() { - return long_toString(); - } - - @ExposedMethod(names = "__repr__", doc = BuiltinDocs.long___repr___doc) - final String long_toString() { - return getValue().toString() + "L"; - } - - @Override - public int hashCode() { - return long___hash__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___hash___doc) - final int long___hash__() { - return getValue().hashCode(); - } - - @Override - public boolean __nonzero__() { - return long___nonzero__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___nonzero___doc) - public boolean long___nonzero__() { - return !getValue().equals(BigInteger.ZERO); - } - - public double doubleValue() { - double v = getValue().doubleValue(); - if (Double.isInfinite(v)) { - throw Py.OverflowError("long int too large to convert to float"); - } - return v; - } - - private static final double scaledDoubleValue(BigInteger val, int[] exp) { - double x = 0; - int signum = val.signum(); - byte[] digits; - - if (signum >= 0) { - digits = val.toByteArray(); - } else { - digits = val.negate().toByteArray(); - } - - int count = 8; - int i = 0; - - if (digits[0] == 0) { - i++; - count++; - } - count = count <= digits.length ? count : digits.length; - - while (i < count) { - x = x * 256 + (digits[i] & 0xff); - i++; - } - exp[0] = digits.length - i; - return signum * x; - } - - public double scaledDoubleValue(int[] exp) { - return scaledDoubleValue(getValue(), exp); - } - - public long getLong(long min, long max) { - return getLong(min, max, "long int too large to convert"); - } - - public long getLong(long min, long max, String overflowMsg) { - if (getValue().compareTo(MAX_LONG) <= 0 && getValue().compareTo(MIN_LONG) >= 0) { - long v = getValue().longValue(); - if (v >= min && v <= max) { - return v; - } - } - throw Py.OverflowError(overflowMsg); - } - - @Override - public long asLong(int index) { - return asLong(); - } - - @Override - public int asInt(int index) { - return (int)getLong(Integer.MIN_VALUE, Integer.MAX_VALUE, - "long int too large to convert to int"); - } - - @Override - public int asInt() { - return (int)getLong(Integer.MIN_VALUE, Integer.MAX_VALUE, - "long int too large to convert to int"); - } - - @Override - public long asLong() { - return getLong(Long.MIN_VALUE, Long.MAX_VALUE, "long too big to convert"); - } - - @Override - public Object __tojava__(Class c) { - try { - if (c == Boolean.TYPE || c == Boolean.class) { - return Boolean.valueOf(!getValue().equals(BigInteger.ZERO)); - } - if (c == Byte.TYPE || c == Byte.class) { - return Byte.valueOf((byte)getLong(Byte.MIN_VALUE, Byte.MAX_VALUE)); - } - if (c == Short.TYPE || c == Short.class) { - return Short.valueOf((short)getLong(Short.MIN_VALUE, Short.MAX_VALUE)); - } - if (c == Integer.TYPE || c == Integer.class) { - return Integer.valueOf((int)getLong(Integer.MIN_VALUE, Integer.MAX_VALUE)); - } - if (c == Long.TYPE || c == Long.class) { - return Long.valueOf(getLong(Long.MIN_VALUE, Long.MAX_VALUE)); - } - if (c == Float.TYPE || c == Double.TYPE || c == Float.class || c == Double.class) { - return __float__().__tojava__(c); - } - if (c == BigInteger.class || c == Number.class || c == Object.class - || c == Serializable.class) { - return getValue(); - } - } catch (PyException e) { - return Py.NoConversion; - } - return super.__tojava__(c); - } - - @Override - public int __cmp__(PyObject other) { - return long___cmp__(other); - } - - @ExposedMethod(type = MethodType.CMP, doc = BuiltinDocs.long___cmp___doc) - final int long___cmp__(PyObject other) { - if (!canCoerce(other)) { - return -2; - } - return getValue().compareTo(coerce(other)); - } - - @Override - public Object __coerce_ex__(PyObject other) { - return long___coerce_ex__(other); - } - - @ExposedMethod(doc = BuiltinDocs.long___coerce___doc) - final PyObject long___coerce__(PyObject other) { - return adaptToCoerceTuple(long___coerce_ex__(other)); - } - - /** - * Coercion logic for long. Implemented as a final method to avoid invocation of virtual methods - * from the exposed coerce. - */ - final Object long___coerce_ex__(PyObject other) { - if (other instanceof PyLong) { - return other; - } else if (other instanceof PyInteger) { - return Py.newLong(((PyInteger)other).getValue()); - } else { - return Py.None; - } - } - - private static final boolean canCoerce(PyObject other) { - return other instanceof PyLong || other instanceof PyInteger; - } - - private static final BigInteger coerce(PyObject other) { - if (other instanceof PyLong) { - return ((PyLong)other).getValue(); - } else if (other instanceof PyInteger) { - return BigInteger.valueOf(((PyInteger)other).getValue()); - } else { - throw Py.TypeError("xxx"); - } - } - - @Override - public PyObject __add__(PyObject right) { - return long___add__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___add___doc) - final PyObject long___add__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - return Py.newLong(getValue().add(coerce(right))); - } - - @Override - public PyObject __radd__(PyObject left) { - return long___radd__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___radd___doc) - final PyObject long___radd__(PyObject left) { - return __add__(left); - } - - @Override - public PyObject __sub__(PyObject right) { - return long___sub__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___sub___doc) - final PyObject long___sub__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - return Py.newLong(getValue().subtract(coerce(right))); - } - - @Override - public PyObject __rsub__(PyObject left) { - return long___rsub__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rsub___doc) - final PyObject long___rsub__(PyObject left) { - return Py.newLong(coerce(left).subtract(getValue())); - } - - @Override - public PyObject __mul__(PyObject right) { - return long___mul__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___mul___doc) - final PyObject long___mul__(PyObject right) { - if (right instanceof PySequence) { - return ((PySequence)right).repeat(coerceInt(this)); - } - - if (!canCoerce(right)) { - return null; - } - return Py.newLong(getValue().multiply(coerce(right))); - } - - @Override - public PyObject __rmul__(PyObject left) { - return long___rmul__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rmul___doc) - final PyObject long___rmul__(PyObject left) { - if (left instanceof PySequence) { - return ((PySequence)left).repeat(coerceInt(this)); - } - if (!canCoerce(left)) { - return null; - } - return Py.newLong(coerce(left).multiply(getValue())); - } - - // Getting signs correct for integer division - // This convention makes sense when you consider it in tandem with modulo - private BigInteger divide(BigInteger x, BigInteger y) { - BigInteger zero = BigInteger.valueOf(0); - if (y.equals(zero)) { - throw Py.ZeroDivisionError("long division or modulo"); - } - - if (y.compareTo(zero) < 0) { - if (x.compareTo(zero) > 0) { - return (x.subtract(y).subtract(BigInteger.valueOf(1))).divide(y); - } - } else { - if (x.compareTo(zero) < 0) { - return (x.subtract(y).add(BigInteger.valueOf(1))).divide(y); - } - } - return x.divide(y); - } - - @Override - public PyObject __div__(PyObject right) { - return long___div__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___div___doc) - final PyObject long___div__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - if (Options.division_warning > 0) { - Py.warning(Py.DeprecationWarning, "classic long division"); - } - return Py.newLong(divide(getValue(), coerce(right))); - } - - @Override - public PyObject __rdiv__(PyObject left) { - return long___rdiv__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rdiv___doc) - final PyObject long___rdiv__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - if (Options.division_warning > 0) { - Py.warning(Py.DeprecationWarning, "classic long division"); - } - return Py.newLong(divide(coerce(left), getValue())); - } - - @Override - public PyObject __floordiv__(PyObject right) { - return long___floordiv__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___floordiv___doc) - final PyObject long___floordiv__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - return Py.newLong(divide(getValue(), coerce(right))); - } - - @Override - public PyObject __rfloordiv__(PyObject left) { - return long___rfloordiv__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rfloordiv___doc) - final PyObject long___rfloordiv__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - return Py.newLong(divide(coerce(left), getValue())); - } - - private static final PyFloat true_divide(BigInteger a, BigInteger b) { - int[] ae = new int[1]; - int[] be = new int[1]; - double ad, bd; - - ad = scaledDoubleValue(a, ae); - bd = scaledDoubleValue(b, be); - - if (bd == 0) { - throw Py.ZeroDivisionError("long division or modulo"); - } - - ad /= bd; - int aexp = ae[0] - be[0]; - - if (aexp > Integer.MAX_VALUE / 8) { - throw Py.OverflowError("long/long too large for a float"); - } else if (aexp < -(Integer.MAX_VALUE / 8)) { - return PyFloat.ZERO; - } - - ad = ad * Math.pow(2.0, aexp * 8); - - if (Double.isInfinite(ad)) { - throw Py.OverflowError("long/long too large for a float"); - } - - return new PyFloat(ad); - } - - @Override - public PyObject __truediv__(PyObject right) { - return long___truediv__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___truediv___doc) - final PyObject long___truediv__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - return true_divide(this.getValue(), coerce(right)); - } - - @Override - public PyObject __rtruediv__(PyObject left) { - return long___rtruediv__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rtruediv___doc) - final PyObject long___rtruediv__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - return true_divide(coerce(left), this.getValue()); - } - - private BigInteger modulo(BigInteger x, BigInteger y, BigInteger xdivy) { - return x.subtract(xdivy.multiply(y)); - } - - @Override - public PyObject __mod__(PyObject right) { - return long___mod__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___mod___doc) - final PyObject long___mod__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - BigInteger rightv = coerce(right); - return Py.newLong(modulo(getValue(), rightv, divide(getValue(), rightv))); - } - - @Override - public PyObject __rmod__(PyObject left) { - return long___rmod__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rmod___doc) - final PyObject long___rmod__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - BigInteger leftv = coerce(left); - return Py.newLong(modulo(leftv, getValue(), divide(leftv, getValue()))); - } - - @Override - public PyObject __divmod__(PyObject right) { - return long___divmod__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___divmod___doc) - final PyObject long___divmod__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - BigInteger rightv = coerce(right); - - BigInteger xdivy = divide(getValue(), rightv); - return new PyTuple(Py.newLong(xdivy), Py.newLong(modulo(getValue(), rightv, xdivy))); - } - - @Override - public PyObject __rdivmod__(PyObject left) { - return long___rdivmod__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rdivmod___doc) - final PyObject long___rdivmod__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - BigInteger leftv = coerce(left); - - BigInteger xdivy = divide(leftv, getValue()); - return new PyTuple(Py.newLong(xdivy), Py.newLong(modulo(leftv, getValue(), xdivy))); - } - - @Override - public PyObject __pow__(PyObject right, PyObject modulo) { - return long___pow__(right, modulo); - } - - @ExposedMethod(type = MethodType.BINARY, defaults = {"null"}, - doc = BuiltinDocs.long___pow___doc) - final PyObject long___pow__(PyObject right, PyObject modulo) { - if (!canCoerce(right)) { - return null; - } - - modulo = (modulo == Py.None) ? null : modulo; - if (modulo != null && !canCoerce(modulo)) { - return null; - } - - return _pow(getValue(), coerce(right), modulo, this, right); - } - - @Override - public PyObject __rpow__(PyObject left) { - return long___rpow__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rpow___doc) - final PyObject long___rpow__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - - return _pow(coerce(left), getValue(), null, left, this); - } - - public static PyObject _pow(BigInteger value, BigInteger y, PyObject modulo, PyObject left, - PyObject right) { - if (y.compareTo(BigInteger.ZERO) < 0) { - if (value.compareTo(BigInteger.ZERO) != 0) { - return left.__float__().__pow__(right, modulo); - } else { - throw Py.ZeroDivisionError("zero to a negative power"); - } - } - if (modulo == null) { - return Py.newLong(value.pow(y.intValue())); - } else { - // This whole thing can be trivially rewritten after bugs - // in modPow are fixed by SUN - - BigInteger z = coerce(modulo); - // Clear up some special cases right away - if (z.equals(BigInteger.ZERO)) { - throw Py.ValueError("pow(x, y, z) with z == 0"); - } - if (z.abs().equals(BigInteger.ONE)) { - return Py.newLong(0); - } - - if (z.compareTo(BigInteger.valueOf(0)) <= 0) { - // Handle negative modulo specially - // if (z.compareTo(BigInteger.valueOf(0)) == 0) { - // throw Py.ValueError("pow(x, y, z) with z == 0"); - // } - y = value.modPow(y, z.negate()); - if (y.compareTo(BigInteger.valueOf(0)) > 0) { - return Py.newLong(z.add(y)); - } else { - return Py.newLong(y); - } - // return __pow__(right).__mod__(modulo); - } else { - // XXX: 1.1 no longer supported so review this. - // This is buggy in SUN's jdk1.1.5 - // Extra __mod__ improves things slightly - return Py.newLong(value.modPow(y, z)); - // return __pow__(right).__mod__(modulo); - } - } - } - - private static final int coerceInt(PyObject other) { - if (other instanceof PyLong) { - return ((PyLong)other).asInt(); - } else if (other instanceof PyInteger) { - return ((PyInteger)other).getValue(); - } else { - throw Py.TypeError("xxx"); - } - } - - @Override - public PyObject __lshift__(PyObject right) { - return long___lshift__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___lshift___doc) - final PyObject long___lshift__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - int rightv = coerceInt(right); - if (rightv < 0) { - throw Py.ValueError("negative shift count"); - } - return Py.newLong(getValue().shiftLeft(rightv)); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rlshift___doc) - final PyObject long___rlshift__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - if (getValue().intValue() < 0) { - throw Py.ValueError("negative shift count"); - } - return Py.newLong(coerce(left).shiftLeft(coerceInt(this))); - } - - @Override - public PyObject __rshift__(PyObject right) { - return long___rshift__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rshift___doc) - final PyObject long___rshift__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - int rightv = coerceInt(right); - if (rightv < 0) { - throw Py.ValueError("negative shift count"); - } - return Py.newLong(getValue().shiftRight(rightv)); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rrshift___doc) - final PyObject long___rrshift__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - if (getValue().intValue() < 0) { - throw Py.ValueError("negative shift count"); - } - return Py.newLong(coerce(left).shiftRight(coerceInt(this))); - } - - @Override - public PyObject __and__(PyObject right) { - return long___and__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___and___doc) - final PyObject long___and__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - return Py.newLong(getValue().and(coerce(right))); - } - - @Override - public PyObject __rand__(PyObject left) { - return long___rand__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rand___doc) - final PyObject long___rand__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - return Py.newLong(coerce(left).and(getValue())); - } - - @Override - public PyObject __xor__(PyObject right) { - return long___xor__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___xor___doc) - final PyObject long___xor__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - return Py.newLong(getValue().xor(coerce(right))); - } - - @Override - public PyObject __rxor__(PyObject left) { - return long___rxor__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___rxor___doc) - final PyObject long___rxor__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - return Py.newLong(coerce(left).xor(getValue())); - } - - @Override - public PyObject __or__(PyObject right) { - return long___or__(right); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___or___doc) - final PyObject long___or__(PyObject right) { - if (!canCoerce(right)) { - return null; - } - return Py.newLong(getValue().or(coerce(right))); - } - - @Override - public PyObject __ror__(PyObject left) { - return long___ror__(left); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.long___ror___doc) - final PyObject long___ror__(PyObject left) { - if (!canCoerce(left)) { - return null; - } - return Py.newLong(coerce(left).or(getValue())); - } - - @Override - public PyObject __neg__() { - return long___neg__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___neg___doc) - final PyObject long___neg__() { - return Py.newLong(getValue().negate()); - } - - @Override - public PyObject __pos__() { - return long___pos__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___pos___doc) - final PyObject long___pos__() { - return long___long__(); - } - - @Override - public PyObject __abs__() { - return long___abs__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___abs___doc) - final PyObject long___abs__() { - if (getValue().signum() == -1) { - return long___neg__(); - } - return long___long__(); - } - - @Override - public PyObject __invert__() { - return long___invert__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___invert___doc) - final PyObject long___invert__() { - return Py.newLong(getValue().not()); - } - - @Override - public PyObject __int__() { - return long___int__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___int___doc) - final PyObject long___int__() { - if (getValue().compareTo(PyInteger.MAX_INT) <= 0 - && getValue().compareTo(PyInteger.MIN_INT) >= 0) { - return Py.newInteger(getValue().intValue()); - } - return long___long__(); - } - - @Override - public PyObject __long__() { - return long___long__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___long___doc) - final PyObject long___long__() { - return getType() == TYPE ? this : Py.newLong(getValue()); - } - - @Override - public PyFloat __float__() { - return long___float__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___float___doc) - final PyFloat long___float__() { - return new PyFloat(doubleValue()); - } - - @Override - public PyComplex __complex__() { - return long___complex__(); - } - - final PyComplex long___complex__() { - return new PyComplex(doubleValue(), 0.); - } - - @Override - public PyObject __trunc__() { - return long___trunc__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___trunc___doc) - final PyObject long___trunc__() { - return this; - } - - @Override - public PyObject conjugate() { - return long_conjugate(); - } - - @ExposedMethod(doc = BuiltinDocs.long_conjugate_doc) - final PyObject long_conjugate() { - return this; - } - - @Override - public PyString __oct__() { - return long___oct__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___oct___doc) - final PyString long___oct__() { - // Use the prepared format specifier for octal. - return formatImpl(IntegerFormatter.OCT); - } - - @Override - public PyString __hex__() { - return long___hex__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___hex___doc) - final PyString long___hex__() { - // Use the prepared format specifier for hexadecimal. - return formatImpl(IntegerFormatter.HEX); - } - - /** - * Common code used by the number-base conversion method __oct__ and __hex__. - * - * @param spec prepared format-specifier. - * @return converted value of this object - */ - private PyString formatImpl(Spec spec) { - // Traditional formatter (%-format) because #o means "-0123" not "-0o123". - IntegerFormatter f = new IntegerFormatter.Traditional(spec); - f.format(value).append('L'); - return new PyString(f.getResult()); - } - - @ExposedMethod(doc = BuiltinDocs.long___str___doc) - public PyString long___str__() { - return Py.newString(getValue().toString()); - } - - @Override - public PyString __str__() { - return long___str__(); - } - - @Override - public PyUnicode __unicode__() { - return new PyUnicode(getValue().toString()); - } - - @ExposedMethod(doc = BuiltinDocs.long___getnewargs___doc) - final PyTuple long___getnewargs__() { - return new PyTuple(new PyLong(this.getValue())); - } - - @Override - public PyTuple __getnewargs__() { - return long___getnewargs__(); - } - - @Override - public PyObject __index__() { - return long___index__(); - } - - @ExposedMethod(doc = BuiltinDocs.long___index___doc) - final PyObject long___index__() { - return this; - } - - @Override - public int bit_length() { - return long_bit_length(); - } - - @ExposedMethod(doc = BuiltinDocs.long_bit_length_doc) - final int long_bit_length() { - BigInteger v = value; - if (v.compareTo(BigInteger.ZERO) == -1) { - v = v.negate(); - } - return v.bitLength(); - } - - @Override - public PyObject __format__(PyObject formatSpec) { - return long___format__(formatSpec); - } - - @ExposedMethod(doc = BuiltinDocs.long___format___doc) - final PyObject long___format__(PyObject formatSpec) { - - // Parse the specification - Spec spec = InternalFormat.fromText(formatSpec, "__format__"); - InternalFormat.Formatter f; - - // Try to make an integer formatter from the specification - IntegerFormatter fi = PyInteger.prepareFormatter(spec); - if (fi != null) { - // Bytes mode if formatSpec argument is not unicode. - fi.setBytes(!(formatSpec instanceof PyUnicode)); - // Convert as per specification. - fi.format(value); - f = fi; - - } else { - // Try to make a float formatter from the specification - FloatFormatter ff = PyFloat.prepareFormatter(spec); - if (ff != null) { - // Bytes mode if formatSpec argument is not unicode. - ff.setBytes(!(formatSpec instanceof PyUnicode)); - // Convert as per specification. - ff.format(value.doubleValue()); - f = ff; - - } else { - // The type code was not recognised in either prepareFormatter - throw Formatter.unknownFormat(spec.type, "integer"); - } - } - - // Return a result that has the same type (str or unicode) as the formatSpec argument. - return f.pad().getPyResult(); - } - - @Override - public boolean isIndex() { - return true; - } - - @Override - public int asIndex(PyObject err) { - boolean tooLow = getValue().compareTo(PyInteger.MIN_INT) < 0; - boolean tooHigh = getValue().compareTo(PyInteger.MAX_INT) > 0; - if (tooLow || tooHigh) { - if (err != null) { - throw new PyException(err, "cannot fit 'long' into an index-sized integer"); - } - return tooLow ? Integer.MIN_VALUE : Integer.MAX_VALUE; - } - return (int)getValue().longValue(); - } - - @Override - public boolean isMappingType() { - return false; - } - - @Override - public boolean isNumberType() { - return true; - } - - @Override - public boolean isSequenceType() { - return false; - } -} diff --git a/src/org/python/core/PySequence.java b/src/org/python/core/PySequence.java deleted file mode 100644 index b829d7165..000000000 --- a/src/org/python/core/PySequence.java +++ /dev/null @@ -1,541 +0,0 @@ -// Copyright (c) Corporation for National Research Initiatives -package org.python.core; - -/** - * The abstract superclass of PyObjects that implements a Sequence. Minimize the work in creating - * such objects. - * - * Method names are designed to make it possible for subclasses of PySequence to implement - * java.util.List. - * - * Subclasses must also implement get, getslice, and repeat methods. - * - * Subclasses that are mutable should also implement: set, setslice, del, and delRange. - */ -public abstract class PySequence extends PyObject { - - /** - * A delegate that handles index checking and manipulation for get, set and del operations on - * this sequence in the form of a "pluggable behaviour". Because different types of sequence - * exhibit subtly different behaviour, there is scope for subclasses to customise the behaviour - * with their own extension of SequenceIndexDelegate. - */ - protected SequenceIndexDelegate delegator; - - /** - * Construct a PySequence for the given sub-type with the default index behaviour. - * - * @param type actual (Python) type of sub-class - */ - protected PySequence(PyType type) { - super(type); - delegator = new DefaultIndexDelegate(); - } - - /** - * Construct a PySequence for the given sub-type with custom index behaviour. In practice, - * restrictions on the construction of inner classes will mean null has to be passed and the - * actual delegator assigned later. - * - * @param type actual (Python) type of sub-class - * @param behaviour specific index behaviour (or null) - */ - protected PySequence(PyType type, SequenceIndexDelegate behaviour) { - super(type); - delegator = behaviour; - } - - // These methods must be defined for any sequence - /** - * Returns the element of the sequence at the given index. This is an extension point called by - * PySequence in its implementation of {@link #__getitem__} It is guaranteed by PySequence that - * when it calls pyget(int) the index is within the bounds of the array. Any other - * clients must make the same guarantee. - * - * @param index index of element to return. - * @return the element at the given position in the list. - */ - protected abstract PyObject pyget(int index); - - /** - * Returns a range of elements from the sequence. - * - * @param start the position of the first element. - * @param stop one more than the position of the last element. - * @param step the step size. - * @return a sequence corresponding the the given range of elements. - */ - protected abstract PyObject getslice(int start, int stop, int step); - - /** - * Returns a (concrete subclass of) PySequence that repeats the given sequence, as in the - * implementation of __mul__ for strings. - * - * @param count the number of times to repeat the sequence. - * @return this sequence repeated count times. - */ - protected abstract PyObject repeat(int count); - - // These methods only apply to mutable sequences - /** - * Sets the indexed element of the sequence to the given value. This is an extension point - * called by PySequence in its implementation of {@link #__setitem__} It is guaranteed by - * PySequence that when it calls pyset(int) the index is within the bounds of the array. Any - * other clients must make the same guarantee. - * - * @param index index of the element to set. - * @param value the value to set this element to. - */ - protected void pyset(int index, PyObject value) { - throw Py.TypeError("can't assign to immutable object"); - } - - /** - * Sets the given range of elements according to Python slice assignment semantics. If the step - * size is one, it is a simple slice and the operation is equivalent to deleting that slice, - * then inserting the value at that position, regarding the value as a sequence (if possible) or - * as a single element if it is not a sequence. If the step size is not one, but - * start==stop, it is equivalent to insertion at that point. If the step size is - * not one, and start!=stop, the slice defines a certain number of elements to be - * replaced, and the value must be a sequence of exactly that many elements (or convertible to - * such a sequence). - * - * @param start the position of the first element. - * @param stop one more than the position of the last element. - * @param step the step size. - * @param value an object consistent with the slice assignment - */ - protected void setslice(int start, int stop, int step, PyObject value) { - throw Py.TypeError(String.format("'%s' object does not support item assignment", getType() - .fastGetName())); - } - - /** - * Deletes an element from the sequence (and closes up the gap). - * - * @param index index of the element to delete. - */ - protected void del(int index) { - delslice(index, index, 1, 1); // Raises TypeError (for immutable types). - } - - /** - * Deletes a contiguous sub-sequence (and closes up the gap). - * - * @param start the position of the first element. - * @param stop one more than the position of the last element. - */ - protected void delRange(int start, int stop) { - delslice(start, stop, 1, Math.abs(stop - start)); // Raises TypeError (for immutable types). - } - - /** - * Deletes a simple or extended slice and closes up the gap(s). The slice parameters - * [start:stop:step] mean what they would in Python, after application of - * the "end-relative" rules for negative numbers and None. The count n - * is as supplied by {@link PySlice#indicesEx(int)}. This method is unsafe in that slice - * parameters are assumed correct. - * - * @param start the position of the first element. - * @param stop beyond the position of the last element (not necessarily just beyond). - * @param step from one element to the next (positive or negative) - * @param n number of elements to delete - */ - protected void delslice(int start, int stop, int step, int n) { - // Raises TypeError (for immutable types). - throw Py.TypeError(String.format("'%s' object does not support item deletion", getType() - .fastGetName())); - } - - @Override - public boolean __nonzero__() { - return seq___nonzero__(); - } - - final boolean seq___nonzero__() { - return __len__() != 0; - } - - @Override - public PyObject __iter__() { - return seq___iter__(); - } - - final PyObject seq___iter__() { - return new PySequenceIter(this); - } - - @Override - public PyObject __eq__(PyObject o) { - return seq___eq__(o); - } - - final PyObject seq___eq__(PyObject o) { - if (!isSubType(o) || o.getType() == PyObject.TYPE) { - return null; - } - int tl = __len__(); - int ol = o.__len__(); - if (tl != ol) { - return Py.False; - } - int i = cmp(this, tl, o, ol); - return i < 0 ? Py.True : Py.False; - } - - @Override - public PyObject __ne__(PyObject o) { - return seq___ne__(o); - } - - final PyObject seq___ne__(PyObject o) { - if (!isSubType(o) || o.getType() == PyObject.TYPE) { - return null; - } - int tl = __len__(); - int ol = o.__len__(); - if (tl != ol) { - return Py.True; - } - int i = cmp(this, tl, o, ol); - return i < 0 ? Py.False : Py.True; - } - - @Override - public PyObject __lt__(PyObject o) { - return seq___lt__(o); - } - - final PyObject seq___lt__(PyObject o) { - if (!isSubType(o) || o.getType() == PyObject.TYPE) { - return null; - } - int i = cmp(this, -1, o, -1); - if (i < 0) { - return i == -1 ? Py.True : Py.False; - } - return __finditem__(i)._lt(o.__finditem__(i)); - } - - @Override - public PyObject __le__(PyObject o) { - return seq___le__(o); - } - - final PyObject seq___le__(PyObject o) { - if (!isSubType(o) || o.getType() == PyObject.TYPE) { - return null; - } - int i = cmp(this, -1, o, -1); - if (i < 0) { - return i == -1 || i == -2 ? Py.True : Py.False; - } - return __finditem__(i)._le(o.__finditem__(i)); - } - - @Override - public PyObject __gt__(PyObject o) { - return seq___gt__(o); - } - - final PyObject seq___gt__(PyObject o) { - if (!isSubType(o) || o.getType() == PyObject.TYPE) { - return null; - } - int i = cmp(this, -1, o, -1); - if (i < 0) { - return i == -3 ? Py.True : Py.False; - } - return __finditem__(i)._gt(o.__finditem__(i)); - } - - @Override - public PyObject __ge__(PyObject o) { - return seq___ge__(o); - } - - final PyObject seq___ge__(PyObject o) { - if (!isSubType(o) || o.getType() == PyObject.TYPE) { - return null; - } - int i = cmp(this, -1, o, -1); - if (i < 0) { - return i == -3 || i == -2 ? Py.True : Py.False; - } - return __finditem__(i)._ge(o.__finditem__(i)); - } - - /** - * isSubType tailored for PySequence binops. - * - * @param other PyObject - * @return true if subclass of other - */ - protected boolean isSubType(PyObject other) { - PyType type = getType(); - PyType otherType = other.getType(); - return type == otherType || type.isSubType(otherType); - } - - /** - * Compare the specified object/length pairs. - * - * @return value ≥ 0 is the index where the sequences differs. -1: reached the end of o1 - * without a difference -2: reached the end of both sequences without a difference -3: - * reached the end of o2 without a difference - */ - protected static int cmp(PyObject o1, int ol1, PyObject o2, int ol2) { - if (ol1 < 0) { - ol1 = o1.__len__(); - } - if (ol2 < 0) { - ol2 = o2.__len__(); - } - for (int i = 0; i < ol1 && i < ol2; i++) { - if (!o1.__getitem__(i).equals(o2.__getitem__(i))) { - return i; - } - } - if (ol1 == ol2) { - return -2; - } - return ol1 < ol2 ? -1 : -3; - } - - /** - * Return a copy of a sequence where the __len__() method is telling the truth. - */ - protected static PySequence fastSequence(PyObject seq, String msg) { - if (seq instanceof PySequence) { - return (PySequence)seq; - } - PyList list = new PyList(); - PyObject iter = Py.iter(seq, msg); - for (PyObject item = null; (item = iter.__iternext__()) != null;) { - list.append(item); - } - return list; - } - - /** - * Make step a long in case adding the start, stop and step together overflows an int. - */ - protected static final int sliceLength(int start, int stop, long step) { - int ret; - if (step > 0) { - ret = (int)((stop - start + step - 1) / step); - } else { - ret = (int)((stop - start + step + 1) / step); - } - if (ret < 0) { - return 0; - } - return ret; - } - - /** - * Adjusts index such that it's ≥0 and ≤ __len__. If index - * starts off negative, it's treated as an index from the end of the sequence going back to the - * start. - */ - protected int boundToSequence(int index) { - int length = __len__(); - if (index < 0) { - index += length; - if (index < 0) { - index = 0; - } - } else if (index > length) { - index = length; - } - return index; - } - - @Override - public PyObject __finditem__(int index) { - return seq___finditem__(index); - } - - final PyObject seq___finditem__(int index) { - return delegator.checkIdxAndFindItem(index); - } - - @Override - public PyObject __finditem__(PyObject index) { - return seq___finditem__(index); - } - - final PyObject seq___finditem__(PyObject index) { - return delegator.checkIdxAndFindItem(index); - } - - @Override - public PyObject __getitem__(PyObject index) { - return seq___getitem__(index); - } - - final PyObject seq___getitem__(PyObject index) { - return delegator.checkIdxAndGetItem(index); - } - - @Override - public boolean isMappingType() throws PyIgnoreMethodTag { - return false; - } - - @Override - public boolean isNumberType() throws PyIgnoreMethodTag { - return false; - } - - @Override - public PyObject __getslice__(PyObject start, PyObject stop, PyObject step) { - return seq___getslice__(start, stop, step); - } - - final PyObject seq___getslice__(PyObject start, PyObject stop, PyObject step) { - return delegator.getSlice(new PySlice(start, stop, step)); - } - - @Override - public void __setslice__(PyObject start, PyObject stop, PyObject step, PyObject value) { - seq___setslice__(start, stop, step, value); - } - - final void seq___setslice__(PyObject start, PyObject stop, PyObject step, PyObject value) { - if (value == null) { - value = step; - step = null; - } - delegator.checkIdxAndSetSlice(new PySlice(start, stop, step), value); - } - - @Override - public void __delslice__(PyObject start, PyObject stop, PyObject step) { - seq___delslice__(start, stop, step); - } - - final void seq___delslice__(PyObject start, PyObject stop, PyObject step) { - delegator.checkIdxAndDelItem(new PySlice(start, stop, step)); - } - - @Override - public void __setitem__(int index, PyObject value) { - delegator.checkIdxAndSetItem(index, value); - } - - @Override - public void __setitem__(PyObject index, PyObject value) { - seq___setitem__(index, value); - } - - final void seq___setitem__(PyObject index, PyObject value) { - delegator.checkIdxAndSetItem(index, value); - } - - @Override - public void __delitem__(PyObject index) { - seq___delitem__(index); - } - - final void seq___delitem__(PyObject index) { - delegator.checkIdxAndDelItem(index); - } - - @Override - public synchronized Object __tojava__(Class c) throws PyIgnoreMethodTag { - if (c.isArray()) { - Class component = c.getComponentType(); - try { - int n = __len__(); - PyArray array = new PyArray(component, n); - for (int i = 0; i < n; i++) { - PyObject o = pyget(i); - array.set(i, o); - } - return array.getArray(); - } catch (Throwable t) { - // ok - } - } - return super.__tojava__(c); - } - - /** - * Return sequence-specific error messages suitable for substitution. - * - * {0} is the op name. {1} is the left operand type. {2} is the right operand type. - */ - @Override - protected String unsupportedopMessage(String op, PyObject o2) { - if (op.equals("*")) { - return "can''t multiply sequence by non-int of type ''{2}''"; - } - return null; - } - - /** - * Return sequence-specific error messages suitable for substitution. - * - * {0} is the op name. {1} is the left operand type. {2} is the right operand type. - */ - @Override - protected String runsupportedopMessage(String op, PyObject o2) { - if (op.equals("*")) { - return "can''t multiply sequence by non-int of type ''{1}''"; - } - return null; - } - - @Override - public boolean isSequenceType() { - return true; - } - - /** - * Class defining the default behaviour of sequences with respect to slice assignment, etc., - * which is the one correct for list. - */ - protected class DefaultIndexDelegate extends SequenceIndexDelegate { - - @Override - public String getTypeName() { - return getType().fastGetName(); - } - - @Override - public void setItem(int idx, PyObject value) { - pyset(idx, value); - } - - @Override - public void setSlice(int start, int stop, int step, PyObject value) { - setslice(start, stop, step, value); - } - - @Override - public int len() { - return __len__(); - } - - @Override - public void delItem(int idx) { - del(idx); - } - - @Override - public void delItems(int start, int stop) { - delRange(start, stop); - } - - @Override - public PyObject getItem(int idx) { - return pyget(idx); - } - - @Override - public PyObject getSlice(int start, int stop, int step) { - return getslice(start, stop, step); - } - }; -} diff --git a/src/org/python/core/PySlice.java b/src/org/python/core/PySlice.java deleted file mode 100644 index 3973f9cb0..000000000 --- a/src/org/python/core/PySlice.java +++ /dev/null @@ -1,267 +0,0 @@ -//Copyright (c) Corporation for National Research Initiatives -package org.python.core; - -import org.python.expose.ExposedGet; -import org.python.expose.ExposedMethod; -import org.python.expose.ExposedNew; -import org.python.expose.ExposedType; - -/** - * The Python slice object. - */ -@ExposedType(name = "slice", isBaseType = false, doc = BuiltinDocs.slice_doc) -public class PySlice extends PyObject implements Traverseproc { - - public static final PyType TYPE = PyType.fromClass(PySlice.class); - - @ExposedGet(doc = BuiltinDocs.slice_start_doc) - public PyObject start = Py.None; - - @ExposedGet(doc = BuiltinDocs.slice_stop_doc) - public PyObject stop = Py.None; - - @ExposedGet(doc = BuiltinDocs.slice_step_doc) - public PyObject step = Py.None; - - public PySlice() { - super(TYPE); - } - - public PySlice(PyObject start, PyObject stop, PyObject step) { - super(TYPE); - if (start != null) { - this.start = start; - } - if (stop != null) { - this.stop = stop; - } - if (step != null) { - this.step = step; - } - } - - @ExposedNew - static PyObject slice_new(PyNewWrapper new_, boolean init, PyType subtype, PyObject[] args, - String[] keywords) { - if (args.length == 0) { - throw Py.TypeError("slice expected at least 1 arguments, got " + args.length); - } else if (args.length > 3) { - throw Py.TypeError("slice expected at most 3 arguments, got " + args.length); - } - ArgParser ap = new ArgParser("slice", args, keywords, "start", "stop", "step"); - PySlice slice = new PySlice(); - if (args.length == 1) { - slice.stop = ap.getPyObject(0); - } else if (args.length == 2) { - slice.start = ap.getPyObject(0); - slice.stop = ap.getPyObject(1); - } else if (args.length == 3) { - slice.start = ap.getPyObject(0); - slice.stop = ap.getPyObject(1); - slice.step = ap.getPyObject(2); - } - return slice; - } - - @Override - public int hashCode() { - return slice___hash__(); - } - - @ExposedMethod(doc = BuiltinDocs.slice___hash___doc) - final int slice___hash__() { - throw Py.TypeError(String.format("unhashable type: '%.200s'", getType().fastGetName())); - } - - @Override - public PyObject __eq__(PyObject o) { - if (getType() != o.getType() && !(getType().isSubType(o.getType()))) { - return null; - } - if (this == o) { - return Py.True; - } - PySlice oSlice = (PySlice)o; - return Py.newBoolean(eq(getStart(), oSlice.getStart()) && eq(getStop(), oSlice.getStop()) - && eq(getStep(), oSlice.getStep())); - } - - private static final boolean eq(PyObject o1, PyObject o2) { - return o1._cmp(o2) == 0; - } - - @Override - public PyObject __ne__(PyObject o) { - return __eq__(o).__not__(); - } - - public PyObject indices(PyObject len) { - return slice_indices(len); - } - - @ExposedMethod(doc = BuiltinDocs.slice_indices_doc) - final PyObject slice_indices(PyObject len) { - int[] indices = indicesEx(len.asIndex(Py.OverflowError)); - return new PyTuple(Py.newInteger(indices[0]), Py.newInteger(indices[1]), - Py.newInteger(indices[2])); - } - - /** - * Calculates the actual indices of a slice with this slice's start, stop, step and - * slicelength values for a sequence of length len. - * - * @return an array with the start at index 0, stop at index 1, step at index 2 and - * slicelength at index 3 - */ - public int[] indicesEx(int length) { - /* The corresponding C code (PySlice_GetIndicesEx) states: - * "this is harder to get right than you might think" - * As a consequence, I have chosen to copy the code and translate to Java. - * Note *rstart, etc., become result_start - the usual changes we need - * when going from pointers to corresponding Java. - */ - - int defstart, defstop; - int result_start, result_stop, result_step, result_slicelength; - - if (step == Py.None) { - result_step = 1; - } else { - result_step = calculateSliceIndex(step); - if (result_step == 0) { - throw Py.ValueError("slice step cannot be zero"); - } - } - - defstart = result_step < 0 ? length - 1 : 0; - defstop = result_step < 0 ? -1 : length; - - if (start == Py.None) { - result_start = defstart; - } else { - result_start = calculateSliceIndex(start); - if (result_start < 0) result_start += length; - if (result_start < 0) result_start = (result_step < 0) ? -1 : 0; - if (result_start >= length) { - result_start = (result_step < 0) ? length - 1 : length; - } - } - - if (stop == Py.None) { - result_stop = defstop; - } else { - result_stop = calculateSliceIndex(stop); - if (result_stop < 0) result_stop += length; - if (result_stop < 0) result_stop = (result_step < 0) ? -1 : 0; - if (result_stop >= length) { - result_stop = (result_step < 0) ? length - 1 : length; - } - } - - if ((result_step < 0 && result_stop >= result_start) - || (result_step > 0 && result_start >= result_stop)) { - result_slicelength = 0; - } else if (result_step < 0) { - result_slicelength = (result_stop - result_start + 1) / (result_step) + 1; - } else { - result_slicelength = (result_stop - result_start - 1) / (result_step) + 1; - } - - return new int[]{result_start, result_stop, result_step, result_slicelength}; - } - - - /** - * Calculate indices for the deprecated __get/set/delslice__ methods. - * - * @param obj the object being sliced - * @param start the slice operation's start - * @param stop the slice operation's stop - * @return an array with start at index 0 and stop at index 1 - */ - public static PyObject[] indices2(PyObject obj, PyObject start, PyObject stop) { - PyObject[] indices = new PyObject[2]; - int istart = (start == null || start == Py.None) ? 0 : calculateSliceIndex(start); - int istop = (stop == null || stop == Py.None) - ? PySystemState.maxint : calculateSliceIndex(stop); - if (istart < 0 || istop < 0) { - try { - int len = obj.__len__(); - if (istart < 0) { - istart += len; - } - if (istop < 0) { - istop += len; - } - } catch (PyException pye) { - if (!pye.match(Py.TypeError)) { - throw pye; - } - } - } - indices[0] = Py.newInteger(istart); - indices[1] = Py.newInteger(istop); - return indices; - } - - public static int calculateSliceIndex(PyObject v) { - if (v.isIndex()) { - return v.asIndex(); - } - throw Py.TypeError("slice indices must be integers or None or have an __index__ method"); - } - - @Override - public String toString() { - return slice_toString(); - } - - @ExposedMethod(names = "__repr__", doc = BuiltinDocs.slice___repr___doc) - final String slice_toString() { - return String.format("slice(%s, %s, %s)", getStart(), getStop(), getStep()); - } - - public final PyObject getStart() { - return start; - } - - public final PyObject getStop() { - return stop; - } - - public final PyObject getStep() { - return step; - } - - @ExposedMethod - final PyObject slice___reduce__() { - return new PyTuple(getType(), new PyTuple(start, stop, step)); - } - - @ExposedMethod(defaults = "Py.None") - final PyObject slice___reduce_ex__(PyObject protocol) { - return new PyTuple(getType(), new PyTuple(start, stop, step)); - } - - - /* Traverseproc implementation */ - @Override - public int traverse(Visitproc visit, Object arg) { - //start, stop, step cannot be null - int retVal = visit.visit(start, arg); - if (retVal != 0) { - return retVal; - } - retVal = visit.visit(stop, arg); - if (retVal != 0) { - return retVal; - } - return visit.visit(step, arg); - } - - @Override - public boolean refersDirectlyTo(PyObject ob) { - //start, stop, step cannot be null - return ob == start || ob == stop || ob == step; - } -} diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java deleted file mode 100644 index 193d03932..000000000 --- a/src/org/python/core/PyString.java +++ /dev/null @@ -1,4901 +0,0 @@ -// Copyright (c) Corporation for National Research Initiatives -package org.python.core; - -import java.lang.ref.Reference; -import java.lang.ref.SoftReference; -import java.math.BigInteger; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.python.core.buffer.BaseBuffer; -import org.python.core.buffer.SimpleStringBuffer; -import org.python.core.stringlib.FieldNameIterator; -import org.python.core.stringlib.FloatFormatter; -import org.python.core.stringlib.IntegerFormatter; -import org.python.core.stringlib.InternalFormat; -import org.python.core.stringlib.InternalFormat.Formatter; -import org.python.core.stringlib.InternalFormat.Spec; -import org.python.core.stringlib.MarkupIterator; -import org.python.core.stringlib.TextFormatter; -import org.python.core.util.StringUtil; -import org.python.expose.ExposedMethod; -import org.python.expose.ExposedNew; -import org.python.expose.ExposedType; -import org.python.expose.MethodType; - -/** - * A builtin python string. - */ -@Untraversable -@ExposedType(name = "str", base = PyBaseString.class, doc = BuiltinDocs.str_doc) -public class PyString extends PyBaseString implements BufferProtocol { - - public static final PyType TYPE = PyType.fromClass(PyString.class); - protected String string; // cannot make final because of Python intern support - protected transient boolean interned = false; - /** Supports the buffer API, see {@link #getBuffer(int)}. */ - private Reference export; - - public String getString() { - return string; - } - - // for PyJavaClass.init() - public PyString() { - this("", true); - } - - protected PyString(PyType subType, String string, boolean isBytes) { - super(subType); - if (string == null) { - throw new IllegalArgumentException("Cannot create PyString from null"); - } else if (!isBytes && !isBytes(string)) { - throw new IllegalArgumentException("Cannot create PyString with non-byte value"); - } - this.string = string; - } - - /** - * Fundamental constructor for PyString objects when the client provides a Java - * String, necessitating that we range check the characters. - * - * @param subType the actual type being constructed - * @param string a Java String to be wrapped - */ - public PyString(PyType subType, String string) { - this(subType, string, false); - } - - public PyString(String string) { - this(TYPE, string); - } - - public PyString(char c) { - this(TYPE, String.valueOf(c)); - } - - PyString(StringBuilder buffer) { - this(TYPE, buffer.toString()); - } - - PyString(PyBuffer buffer) { - this(TYPE, buffer.toString()); - } - - /** - * Local-use constructor in which the client is allowed to guarantee that the - * String argument contains only characters in the byte range. We do not then - * range-check the characters. - * - * @param string a Java String to be wrapped (not null) - * @param isBytes true if the client guarantees we are dealing with bytes - */ - private PyString(String string, boolean isBytes) { - super(TYPE); - if (isBytes || isBytes(string)) { - this.string = string; - } else { - throw new IllegalArgumentException("Cannot create PyString with non-byte value"); - } - } - - /** - * Determine whether a string consists entirely of characters in the range 0 to 255. Only such - * characters are allowed in the PyString (str) type, when it is not a - * {@link PyUnicode}. - * - * @return true if and only if every character has a code less than 256 - */ - private static boolean isBytes(String s) { - int k = s.length(); - if (k == 0) { - return true; - } else { - // Bitwise-or the character codes together in order to test once. - char c = 0; - // Blocks of 8 to reduce loop tests - while (k > 8) { - c |= s.charAt(--k); - c |= s.charAt(--k); - c |= s.charAt(--k); - c |= s.charAt(--k); - c |= s.charAt(--k); - c |= s.charAt(--k); - c |= s.charAt(--k); - c |= s.charAt(--k); - } - // Now the rest - while (k > 0) { - c |= s.charAt(--k); - } - // We require there to be no bits set from 0x100 upwards - return c < 0x100; - } - } - - /** - * Creates a PyString from an already interned String. Just means it won't be reinterned if used - * in a place that requires interned Strings. - */ - public static PyString fromInterned(String interned) { - PyString str = new PyString(TYPE, interned); - str.interned = true; - return str; - } - - /** - * Determine whether the string consists entirely of basic-plane characters. For a - * {@link PyString}, of course, it is always true, but this is useful in cases - * where either a PyString or a {@link PyUnicode} is acceptable. - * - * @return true - */ - public boolean isBasicPlane() { - return true; - } - - @ExposedNew - static PyObject str_new(PyNewWrapper new_, boolean init, PyType subtype, PyObject[] args, - String[] keywords) { - ArgParser ap = new ArgParser("str", args, keywords, new String[] {"object"}, 0); - PyObject S = ap.getPyObject(0, null); - // Get the textual representation of the object into str/bytes form - String str; - if (S == null) { - str = ""; - } else { - // Let the object tell us its representation: this may be str or unicode. - S = S.__str__(); - if (S instanceof PyUnicode) { - // Encoding will raise UnicodeEncodeError if not 7-bit clean. - str = codecs.encode((PyUnicode) S, null, null); - } else { - // Must be str/bytes, and should be 8-bit clean already. - str = S.toString(); - } - } - if (new_.for_type == subtype) { - return new PyString(str); - } else { - return new PyStringDerived(subtype, str); - } - } - - public int[] toCodePoints() { - int n = getString().length(); - int[] codePoints = new int[n]; - for (int i = 0; i < n; i++) { - codePoints[i] = getString().charAt(i); - } - return codePoints; - } - - /** - * Return a read-only buffer view of the contents of the string, treating it as a sequence of - * unsigned bytes. The caller specifies its requirements and navigational capabilities in the - * flags argument (see the constants in interface {@link PyBUF} for an - * explanation). The method may return the same PyBuffer object to more than one consumer. - * - * @param flags consumer requirements - * @return the requested buffer - */ - @Override - public synchronized PyBuffer getBuffer(int flags) { - // If we have already exported a buffer it may still be available for re-use - BaseBuffer pybuf = getExistingBuffer(flags); - if (pybuf == null) { - /* - * No existing export we can re-use. Return a buffer, but specialised to defer - * construction of the buf object, and cache a soft reference to it. - */ - pybuf = new SimpleStringBuffer(flags, this, getString()); - export = new SoftReference(pybuf); - } - return pybuf; - } - - /** - * Helper for {@link #getBuffer(int)} that tries to re-use an existing exported buffer, or - * returns null if can't. - */ - private BaseBuffer getExistingBuffer(int flags) { - BaseBuffer pybuf = null; - if (export != null) { - // A buffer was exported at some time. - pybuf = export.get(); - if (pybuf != null) { - /* - * And this buffer still exists. Even in the case where the buffer has been released - * by all its consumers, it remains safe to re-acquire it because the target String - * has not changed. - */ - pybuf = pybuf.getBufferAgain(flags); - } - } - return pybuf; - } - - /** - * Return a substring of this object as a Java String. - * - * @param start the beginning index, inclusive. - * @param end the ending index, exclusive. - * @return the specified substring. - */ - public String substring(int start, int end) { - return getString().substring(start, end); - } - - @Override - public PyString __str__() { - return str___str__(); - } - - @ExposedMethod(doc = BuiltinDocs.str___str___doc) - final PyString str___str__() { - if (getClass() == PyString.class) { - return this; - } - return new PyString(getString(), true); - } - - @Override - public PyUnicode __unicode__() { - return new PyUnicode(this); // Decodes with default codec. - } - - @Override - public int __len__() { - return str___len__(); - } - - @ExposedMethod(doc = BuiltinDocs.str___len___doc) - final int str___len__() { - return getString().length(); - } - - @Override - public String toString() { - return getString(); - } - - public String internedString() { - if (interned) { - return getString(); - } else { - string = getString().intern(); - interned = true; - return getString(); - } - } - - @Override - public PyString __repr__() { - return str___repr__(); - } - - @ExposedMethod(doc = BuiltinDocs.str___repr___doc) - final PyString str___repr__() { - return new PyString(encode_UnicodeEscape(getString(), true)); - } - - private static char[] hexdigit = "0123456789abcdef".toCharArray(); - - public static String encode_UnicodeEscape(String str, boolean use_quotes) { - char quote = use_quotes ? '?' : 0; - return encode_UnicodeEscape(str, quote); - } - - /** - * The inner logic of the string __repr__ producing an ASCII representation of the target - * string, optionally in quotations. The caller can determine whether the returned string will - * be wrapped in quotation marks, and whether Python rules are used to choose them through - * quote. - * - * @param str - * @param quoteChar '"' or '\'' use that, '?' = let Python choose, 0 or anything = no quotes - * @return encoded string (possibly the same string if unchanged) - */ - static String encode_UnicodeEscape(String str, char quote) { - - // Choose whether to quote and the actual quote character - boolean use_quotes; - switch (quote) { - case '?': - use_quotes = true; - // Python rules - quote = str.indexOf('\'') >= 0 && str.indexOf('"') == -1 ? '"' : '\''; - break; - case '"': - case '\'': - use_quotes = true; - break; - default: - use_quotes = false; - break; - } - - // Allocate a buffer for the result (25% bigger and room for quotes) - int size = str.length(); - StringBuilder v = new StringBuilder(size + (size >> 2) + 2); - - if (use_quotes) { - v.append(quote); - } - - // Now chunter through the original string a character at a time - for (int i = 0; size-- > 0;) { - int ch = str.charAt(i++); - // Escape quotes and backslash - if ((use_quotes && ch == quote) || ch == '\\') { - v.append('\\'); - v.append((char) ch); - continue; - } - /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */ - else if (size > 0 && ch >= 0xD800 && ch < 0xDC00) { - char ch2 = str.charAt(i++); - size--; - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { - int ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; - v.append('\\'); - v.append('U'); - v.append(hexdigit[(ucs >> 28) & 0xf]); - v.append(hexdigit[(ucs >> 24) & 0xf]); - v.append(hexdigit[(ucs >> 20) & 0xf]); - v.append(hexdigit[(ucs >> 16) & 0xf]); - v.append(hexdigit[(ucs >> 12) & 0xf]); - v.append(hexdigit[(ucs >> 8) & 0xf]); - v.append(hexdigit[(ucs >> 4) & 0xf]); - v.append(hexdigit[ucs & 0xf]); - continue; - } - /* Fall through: isolated surrogates are copied as-is */ - i--; - size++; - } - /* Map 16-bit characters to '\\uxxxx' */ - if (ch >= 256) { - v.append('\\'); - v.append('u'); - v.append(hexdigit[(ch >> 12) & 0xf]); - v.append(hexdigit[(ch >> 8) & 0xf]); - v.append(hexdigit[(ch >> 4) & 0xf]); - v.append(hexdigit[ch & 15]); - } - /* Map special whitespace to '\t', \n', '\r' */ - else if (ch == '\t') { - v.append("\\t"); - } else if (ch == '\n') { - v.append("\\n"); - } else if (ch == '\r') { - v.append("\\r"); - } else if (ch < ' ' || ch >= 127) { - /* Map non-printable US ASCII to '\xNN' */ - v.append('\\'); - v.append('x'); - v.append(hexdigit[(ch >> 4) & 0xf]); - v.append(hexdigit[ch & 0xf]); - } else {/* Copy everything else as-is */ - v.append((char) ch); - } - } - - if (use_quotes) { - v.append(quote); - } - - // Return the original string if we didn't quote or escape anything - return v.length() > size ? v.toString() : str; - } - - private static ucnhashAPI pucnHash = null; - - public static String decode_UnicodeEscape(String str, int start, int end, String errors, - boolean unicode) { - StringBuilder v = new StringBuilder(end - start); - for (int s = start; s < end;) { - char ch = str.charAt(s); - /* Non-escape characters are interpreted as Unicode ordinals */ - if (ch != '\\') { - v.append(ch); - s++; - continue; - } - int loopStart = s; - /* \ - Escapes */ - s++; - if (s == end) { - s = codecs.insertReplacementAndGetResume(v, errors, "unicodeescape", // - str, loopStart, s + 1, "\\ at end of string"); - continue; - } - ch = str.charAt(s++); - switch (ch) { - /* \x escapes */ - case '\n': - break; - case '\\': - v.append('\\'); - break; - case '\'': - v.append('\''); - break; - case '\"': - v.append('\"'); - break; - case 'b': - v.append('\b'); - break; - case 'f': - v.append('\014'); - break; /* FF */ - case 't': - v.append('\t'); - break; - case 'n': - v.append('\n'); - break; - case 'r': - v.append('\r'); - break; - case 'v': - v.append('\013'); - break; /* VT */ - case 'a': - v.append('\007'); - break; /* BEL, not classic C */ - /* \OOO (octal) escapes */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - int x = Character.digit(ch, 8); - for (int j = 0; j < 2 && s < end; j++, s++) { - ch = str.charAt(s); - if (ch < '0' || ch > '7') { - break; - } - x = (x << 3) + Character.digit(ch, 8); - } - v.append((char) x); - break; - case 'x': - s = hexescape(v, errors, 2, s, str, end, "truncated \\xXX"); - break; - case 'u': - if (!unicode) { - v.append('\\'); - v.append('u'); - break; - } - s = hexescape(v, errors, 4, s, str, end, "truncated \\uXXXX"); - break; - case 'U': - if (!unicode) { - v.append('\\'); - v.append('U'); - break; - } - s = hexescape(v, errors, 8, s, str, end, "truncated \\UXXXXXXXX"); - break; - case 'N': - if (!unicode) { - v.append('\\'); - v.append('N'); - break; - } - /* - * Ok, we need to deal with Unicode Character Names now, make sure we've - * imported the hash table data... - */ - if (pucnHash == null) { - PyObject mod = imp.importName("ucnhash", true); - mod = mod.__call__(); - pucnHash = (ucnhashAPI) mod.__tojava__(Object.class); - if (pucnHash.getCchMax() < 0) { - throw Py.UnicodeError("Unicode names not loaded"); - } - } - if (str.charAt(s) == '{') { - int startName = s + 1; - int endBrace = startName; - /* - * look for either the closing brace, or we exceed the maximum length of the - * unicode character names - */ - int maxLen = pucnHash.getCchMax(); - while (endBrace < end && str.charAt(endBrace) != '}' - && (endBrace - startName) <= maxLen) { - endBrace++; - } - if (endBrace != end && str.charAt(endBrace) == '}') { - int value = pucnHash.getValue(str, startName, endBrace); - if (storeUnicodeCharacter(value, v)) { - s = endBrace + 1; - } else { - s = codecs.insertReplacementAndGetResume( // - v, errors, "unicodeescape", // - str, loopStart, endBrace + 1, "illegal Unicode character"); - } - } else { - s = codecs.insertReplacementAndGetResume(v, errors, "unicodeescape", // - str, loopStart, endBrace, "malformed \\N character escape"); - } - break; - } else { - s = codecs.insertReplacementAndGetResume(v, errors, "unicodeescape", // - str, loopStart, s + 1, "malformed \\N character escape"); - } - break; - default: - v.append('\\'); - v.append(str.charAt(s - 1)); - break; - } - } - return v.toString(); - } - - private static int hexescape(StringBuilder partialDecode, String errors, int digits, - int hexDigitStart, String str, int size, String errorMessage) { - if (hexDigitStart + digits > size) { - return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape", str, - hexDigitStart - 2, size, errorMessage); - } - int i = 0; - int x = 0; - for (; i < digits; ++i) { - char c = str.charAt(hexDigitStart + i); - int d = Character.digit(c, 16); - if (d == -1) { - return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape", - str, hexDigitStart - 2, hexDigitStart + i + 1, errorMessage); - } - x = (x << 4) & ~0xF; - if (c >= '0' && c <= '9') { - x += c - '0'; - } else if (c >= 'a' && c <= 'f') { - x += 10 + c - 'a'; - } else { - x += 10 + c - 'A'; - } - } - if (storeUnicodeCharacter(x, partialDecode)) { - return hexDigitStart + i; - } else { - return codecs.insertReplacementAndGetResume(partialDecode, errors, "unicodeescape", str, - hexDigitStart - 2, hexDigitStart + i + 1, "illegal Unicode character"); - } - } - - /* pass in an int since this can be a UCS-4 character */ - private static boolean storeUnicodeCharacter(int value, StringBuilder partialDecode) { - if (value < 0 || (value >= 0xD800 && value <= 0xDFFF)) { - return false; - } else if (value <= PySystemState.maxunicode) { - partialDecode.appendCodePoint(value); - return true; - } - return false; - } - - @ExposedMethod(doc = BuiltinDocs.str___getitem___doc) - final PyObject str___getitem__(PyObject index) { - PyObject ret = seq___finditem__(index); - if (ret == null) { - throw Py.IndexError("string index out of range"); - } - return ret; - } - - // XXX: need doc - @ExposedMethod(defaults = "null") - final PyObject str___getslice__(PyObject start, PyObject stop, PyObject step) { - return seq___getslice__(start, stop, step); - } - - @Override - public int __cmp__(PyObject other) { - return str___cmp__(other); - } - - @ExposedMethod(type = MethodType.CMP) - final int str___cmp__(PyObject other) { - if (!(other instanceof PyString)) { - return -2; - } - - int c = getString().compareTo(((PyString) other).getString()); - return c < 0 ? -1 : c > 0 ? 1 : 0; - } - - @Override - public PyObject __eq__(PyObject other) { - return str___eq__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___eq___doc) - final PyObject str___eq__(PyObject other) { - String s = coerce(other); - if (s == null) { - return null; - } - return getString().equals(s) ? Py.True : Py.False; - } - - @Override - public PyObject __ne__(PyObject other) { - return str___ne__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___ne___doc) - final PyObject str___ne__(PyObject other) { - String s = coerce(other); - if (s == null) { - return null; - } - return getString().equals(s) ? Py.False : Py.True; - } - - @Override - public PyObject __lt__(PyObject other) { - return str___lt__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___lt___doc) - final PyObject str___lt__(PyObject other) { - String s = coerce(other); - if (s == null) { - return null; - } - return getString().compareTo(s) < 0 ? Py.True : Py.False; - } - - @Override - public PyObject __le__(PyObject other) { - return str___le__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___le___doc) - final PyObject str___le__(PyObject other) { - String s = coerce(other); - if (s == null) { - return null; - } - return getString().compareTo(s) <= 0 ? Py.True : Py.False; - } - - @Override - public PyObject __gt__(PyObject other) { - return str___gt__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___gt___doc) - final PyObject str___gt__(PyObject other) { - String s = coerce(other); - if (s == null) { - return null; - } - return getString().compareTo(s) > 0 ? Py.True : Py.False; - } - - @Override - public PyObject __ge__(PyObject other) { - return str___ge__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___ge___doc) - final PyObject str___ge__(PyObject other) { - String s = coerce(other); - if (s == null) { - return null; - } - return getString().compareTo(s) >= 0 ? Py.True : Py.False; - } - - /** Interpret the object as a Java String representing bytes or return null. */ - private static String coerce(PyObject o) { - if (o instanceof PyString && !(o instanceof PyUnicode)) { - return o.toString(); - } - return null; - } - - @Override - public int hashCode() { - return str___hash__(); - } - - @ExposedMethod(doc = BuiltinDocs.str___hash___doc) - final int str___hash__() { - return getString().hashCode(); - } - - /** - * @return a byte array with one byte for each char in this object's underlying String. Each - * byte contains the low-order bits of its corresponding char. - */ - public byte[] toBytes() { - return StringUtil.toBytes(getString()); - } - - @Override - public Object __tojava__(Class c) { - if (c.isAssignableFrom(String.class)) { - /* - * If c is a CharSequence we assume the caller is prepared to get maybe not an actual - * String. In that case we avoid conversion so the caller can do special stuff with the - * returned PyString or PyUnicode or whatever. (If c is Object.class, the caller usually - * expects to get actually a String) - */ - return c == CharSequence.class ? this : getString(); - } - - if (c == Character.TYPE || c == Character.class) { - if (getString().length() == 1) { - return getString().charAt(0); - } - } - - if (c.isArray()) { - if (c.getComponentType() == Byte.TYPE) { - return toBytes(); - } - if (c.getComponentType() == Character.TYPE) { - return getString().toCharArray(); - } - } - - if (c.isAssignableFrom(Collection.class)) { - List list = new ArrayList(); - for (int i = 0; i < __len__(); i++) { - list.add(pyget(i).__tojava__(String.class)); - } - return list; - } - - if (c.isInstance(this)) { - return this; - } - - return Py.NoConversion; - } - - @Override - protected PyObject pyget(int i) { - // Method is overridden in PyUnicode, so definitely a PyString - return Py.makeCharacter(string.charAt(i)); - } - - public int getInt(int i) { - return string.charAt(i); - } - - @Override - protected PyObject getslice(int start, int stop, int step) { - if (step > 0 && stop < start) { - stop = start; - } - if (step == 1) { - return fromSubstring(start, stop); - } else { - int n = sliceLength(start, stop, step); - char new_chars[] = new char[n]; - int j = 0; - for (int i = start; j < n; i += step) { - new_chars[j++] = getString().charAt(i); - } - - return createInstance(new String(new_chars), true); - } - } - - /** - * Create an instance of the same type as this object, from the Java String given as argument. - * This is to be overridden in a subclass to return its own type. - * - * @param str to wrap - * @return instance wrapping {@code str} - */ - public PyString createInstance(String str) { - return new PyString(str); - } - - /** - * Create an instance of the same type as this object, from the Java String given as argument. - * This is to be overridden in a subclass to return its own type. - * - * @param str Java string representing the characters (as Java UTF-16). - * @param isBasic is ignored in PyString (effectively true). - * @return instance wrapping {@code str} - */ - protected PyString createInstance(String str, boolean isBasic) { - // ignore isBasic, doesn't apply to PyString, just PyUnicode - return new PyString(str); - } - - /** - * Return a Java String that is the Jython-internal equivalent of the byte-like - * argument (a str or any object that supports a one-dimensional byte buffer). If - * the argument is not acceptable (this includes a unicode argument) return null. - * - * @param obj to coerce to a String - * @return coerced value or null if it can't be - */ - private static String asU16BytesOrNull(PyObject obj) { - if (obj instanceof PyString) { - if (obj instanceof PyUnicode) { - return null; - } - // str but not unicode object: go directly to the String - return ((PyString) obj).getString(); - } else if (obj instanceof BufferProtocol) { - // Other object with buffer API: briefly access the buffer - try (PyBuffer buf = ((BufferProtocol) obj).getBuffer(PyBUF.FULL_RO)) { - return buf.toString(); - } - } else { - return null; - } - } - - /** - * Return a String equivalent to the argument. This is a helper function to those methods that - * accept any byte array type (any object that supports a one-dimensional byte buffer), but - * not a unicode. - * - * @param obj to coerce to a String - * @return coerced value - * @throws PyException {@code TypeError} if the coercion fails (including unicode) - */ - protected static String asU16BytesOrError(PyObject obj) throws PyException { - String ret = asU16BytesOrNull(obj); - if (ret != null) { - return ret; - } else { - throw Py.TypeError("expected str, bytearray or other buffer compatible object"); - } - } - - /** - * Return a String equivalent to the argument according to the calling conventions of methods - * that accept as a byte string anything bearing the buffer interface, or accept - * PyNone, but not a unicode. (Or the argument may be omitted, - * showing up here as null.) These include the strip and split methods - * of str, where a null indicates that the criterion is whitespace, and - * str.translate. - * - * @param obj to coerce to a String or null - * @param name of method - * @return coerced value or null - * @throws PyException if the coercion fails (including unicode) - */ - private static String asU16BytesNullOrError(PyObject obj, String name) throws PyException { - if (obj == null || obj == Py.None) { - return null; - } else { - String ret = asU16BytesOrNull(obj); - if (ret != null) { - return ret; - } else if (name == null) { - // A nameless method is the client - throw Py.TypeError("expected None, str or buffer compatible object"); - } else { - // Tuned for .strip and its relations, which supply their name - throw Py.TypeError(name + " arg must be None, str or buffer compatible object"); - } - } - } - - @Override - public boolean __contains__(PyObject o) { - return str___contains__(o); - } - - @ExposedMethod(doc = BuiltinDocs.str___contains___doc) - final boolean str___contains__(PyObject o) { - String other = asU16BytesOrNull(o); - if (other != null) { - return getString().indexOf(other) >= 0; - } else if (o instanceof PyUnicode) { - return decode().__contains__(o); - } else { - throw Py.TypeError("'in ' requires string as left operand, not " - + (o == null ? Py.None : o).getType().fastGetName()); - } - } - - @Override - protected PyObject repeat(int count) { - if (count < 0) { - count = 0; - } - int s = getString().length(); - if ((long) s * count > Integer.MAX_VALUE) { - // Since Strings store their data in an array, we can't make one - // longer than Integer.MAX_VALUE. Without this check we get - // NegativeArraySize exceptions when we create the array on the - // line with a wrapped int. - throw Py.OverflowError("max str len is " + Integer.MAX_VALUE); - } - char new_chars[] = new char[s * count]; - for (int i = 0; i < count; i++) { - getString().getChars(0, s, new_chars, i * s); - } - return createInstance(new String(new_chars)); - } - - @Override - public PyObject __mul__(PyObject o) { - return str___mul__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___mul___doc) - final PyObject str___mul__(PyObject o) { - if (!o.isIndex()) { - return null; - } - return repeat(o.asIndex(Py.OverflowError)); - } - - @Override - public PyObject __rmul__(PyObject o) { - return str___rmul__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___rmul___doc) - final PyObject str___rmul__(PyObject o) { - if (!o.isIndex()) { - return null; - } - return repeat(o.asIndex(Py.OverflowError)); - } - - /** - * {@inheritDoc} For a str addition means concatenation and returns a - * str ({@link PyString}) result, except when a {@link PyUnicode} argument is - * given, when a PyUnicode results. - */ - @Override - public PyObject __add__(PyObject other) { - return str___add__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.str___add___doc) - final PyObject str___add__(PyObject other) { - // Expect other to be some kind of byte-like object. - String otherStr = asU16BytesOrNull(other); - if (otherStr != null) { - // Yes it is: concatenate as strings, which are guaranteed byte-like. - return new PyString(getString().concat(otherStr), true); - } else if (other instanceof PyUnicode) { - // Escalate the problem to PyUnicode - return decode().__add__(other); - } else { - // Allow PyObject._basic_add to pick up the pieces or raise informative error - return null; - } - } - - @ExposedMethod(doc = BuiltinDocs.str___getnewargs___doc) - final PyTuple str___getnewargs__() { - return new PyTuple(new PyString(this.getString())); - } - - @Override - public PyTuple __getnewargs__() { - return str___getnewargs__(); - } - - @Override - public PyObject __mod__(PyObject other) { - return str___mod__(other); - } - - @ExposedMethod(doc = BuiltinDocs.str___mod___doc) - public PyObject str___mod__(PyObject other) { - StringFormatter fmt = new StringFormatter(getString(), false); - return fmt.format(other); - } - - @Override - public PyObject __int__() { - try { - return Py.newInteger(atoi(10)); - } catch (PyException e) { - if (e.match(Py.OverflowError)) { - return atol(10); - } - throw e; - } - } - - @Override - public PyObject __long__() { - return atol(10); - } - - @Override - public PyFloat __float__() { - return new PyFloat(atof()); - } - - @Override - public PyObject __pos__() { - throw Py.TypeError("bad operand type for unary +"); - } - - @Override - public PyObject __neg__() { - throw Py.TypeError("bad operand type for unary -"); - } - - @Override - public PyObject __invert__() { - throw Py.TypeError("bad operand type for unary ~"); - } - - @Override - public PyComplex __complex__() { - return atocx(); - } - - // Add in methods from string module - public String lower() { - return str_lower(); - } - - @ExposedMethod(doc = BuiltinDocs.str_lower_doc) - final String str_lower() { - String s = getString(); - int n = s.length(); - if (n == 1) { - // Special-case single byte string - char c = s.charAt(0); - return _isupper(c) ? String.valueOf((char) (c ^ SWAP_CASE)) : s; - } else { - // Copy chars to buffer, converting to lower-case. - char[] buf = new char[n]; - for (int i = 0; i < n; i++) { - char c = s.charAt(i); - buf[i] = _isupper(c) ? (char) (c ^ SWAP_CASE) : c; - } - return new String(buf); - } - } - - public String upper() { - return str_upper(); - } - - @ExposedMethod(doc = BuiltinDocs.str_upper_doc) - final String str_upper() { - String s = getString(); - int n = s.length(); - if (n == 1) { - // Special-case single byte string - char c = s.charAt(0); - return _islower(c) ? String.valueOf((char) (c ^ SWAP_CASE)) : s; - } else { - // Copy chars to buffer, converting to upper-case. - char[] buf = new char[n]; - for (int i = 0; i < n; i++) { - char c = s.charAt(i); - buf[i] = _islower(c) ? (char) (c ^ SWAP_CASE) : c; - } - return new String(buf); - } - } - - public String title() { - return str_title(); - } - - @ExposedMethod(doc = BuiltinDocs.str_title_doc) - final String str_title() { - char[] chars = getString().toCharArray(); - int n = chars.length; - boolean previous_is_cased = false; - for (int i = 0; i < n; i++) { - char ch = chars[i]; - if (_isalpha(ch)) { - if (previous_is_cased) { - // Should be lower case - if (_isupper(ch)) { - chars[i] = (char) (ch ^ SWAP_CASE); - } - } else { - // Should be upper case - if (_islower(ch)) { - chars[i] = (char) (ch ^ SWAP_CASE); - } - } - // And this was a letter - previous_is_cased = true; - } else { - // This was not a letter - previous_is_cased = false; - } - } - return new String(chars); - } - - public String swapcase() { - return str_swapcase(); - } - - @ExposedMethod(doc = BuiltinDocs.str_swapcase_doc) - final String str_swapcase() { - String s = getString(); - int n = s.length(); - if (n == 1) { - // Special-case single byte string - char c = s.charAt(0); - return _isalpha(c) ? String.valueOf((char) (c ^ SWAP_CASE)) : s; - } else { - // Copy chars to buffer, converting lower to upper case, upper to lower case. - char[] buf = new char[n]; - for (int i = 0; i < n; i++) { - char c = s.charAt(i); - buf[i] = _isalpha(c) ? (char) (c ^ SWAP_CASE) : c; - } - return new String(buf); - } - } - - // Bit to twiddle (XOR) for lowercase letter to uppercase and vice-versa. - private static final int SWAP_CASE = 0x20; - - /** - * Equivalent of Python str.strip() with no argument, meaning strip whitespace. Any - * whitespace byte/character will be discarded from either end of this str. - * - * @return a new String, stripped of the whitespace characters/bytes - */ - public String strip() { - return _strip(); - } - - /** - * Equivalent of Python str.strip(). - * - * @param stripChars characters to strip from either end of this str/bytes, or null - * @return a new String, stripped of the specified characters/bytes - */ - public String strip(String stripChars) { - return _strip(stripChars); - } - - /** - * Equivalent of Python str.strip(). Any byte/character matching one of those in - * stripChars will be discarded from either end of this str. If - * stripChars == null, whitespace will be stripped. If stripChars is a - * PyUnicode, the result will also be a PyUnicode. - * - * @param stripChars characters to strip from either end of this str/bytes, or null - * @return a new PyString (or {@link PyUnicode}), stripped of the specified - * characters/bytes - */ - public PyObject strip(PyObject stripChars) { - return str_strip(stripChars); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_strip_doc) - final PyObject str_strip(PyObject chars) { - if (chars instanceof PyUnicode) { - // Promote the problem to a Unicode one - return ((PyUnicode) decode()).unicode_strip(chars); - } else { - // It ought to be None, null, some kind of bytes with the buffer API. - String stripChars = asU16BytesNullOrError(chars, "strip"); - // Strip specified characters or whitespace if stripChars == null - return new PyString(_strip(stripChars), true); - } - } - - /** - * Implementation of Python str.strip() common to exposed and Java API, when - * stripping whitespace. Any whitespace byte/character will be discarded from either end of this - * str. - *

- * Implementation note: although a str contains only bytes, this method is also - * called by {@link PyUnicode#unicode_strip(PyObject)} when this is a basic-plane string. - * - * @return a new String, stripped of the whitespace characters/bytes - */ - protected final String _strip() { - // Rightmost non-whitespace - int right = _findRight(); - if (right < 0) { - // They're all whitespace - return ""; - } else { - // Leftmost non-whitespace character: right known not to be a whitespace - int left = _findLeft(right); - return getString().substring(left, right + 1); - } - } - - /** - * Implementation of Python str.strip() common to exposed and Java API. Any - * byte/character matching one of those in stripChars will be discarded from either - * end of this str. If stripChars == null, whitespace will be - * stripped. - *

- * Implementation note: although a str contains only bytes, this method is also - * called by {@link PyUnicode#unicode_strip(PyObject)} when both arguments are basic-plane - * strings. - * - * @param stripChars characters to strip or null - * @return a new String, stripped of the specified characters/bytes - */ - protected final String _strip(String stripChars) { - if (stripChars == null) { - // Divert to the whitespace version - return _strip(); - } else { - // Rightmost non-matching character - int right = _findRight(stripChars); - if (right < 0) { - // They all match - return ""; - } else { - // Leftmost non-matching character: right is known not to match - int left = _findLeft(stripChars, right); - return getString().substring(left, right + 1); - } - } - } - - /** - * Helper for strip, lstrip implementation, when stripping whitespace. - * - * @param right rightmost extent of string search - * @return index of leftmost non-whitespace character or right if they all are. - */ - protected int _findLeft(int right) { - String s = getString(); - for (int left = 0; left < right; left++) { - if (!BaseBytes.isspace((byte) s.charAt(left))) { - return left; - } - } - return right; - } - - /** - * Helper for strip, lstrip implementation, when stripping specified - * characters. - * - * @param stripChars specifies set of characters to strip - * @param right rightmost extent of string search - * @return index of leftmost character not in stripChars or right if - * they all are. - */ - private int _findLeft(String stripChars, int right) { - String s = getString(); - for (int left = 0; left < right; left++) { - if (stripChars.indexOf(s.charAt(left)) < 0) { - return left; - } - } - return right; - } - - /** - * Helper for strip, rstrip implementation, when stripping whitespace. - * - * @return index of rightmost non-whitespace character or -1 if they all are. - */ - protected int _findRight() { - String s = getString(); - for (int right = s.length(); --right >= 0;) { - if (!BaseBytes.isspace((byte) s.charAt(right))) { - return right; - } - } - return -1; - } - - /** - * Helper for strip, rstrip implementation, when stripping specified - * characters. - * - * @param stripChars specifies set of characters to strip - * @return index of rightmost character not in stripChars or -1 if they all are. - */ - private int _findRight(String stripChars) { - String s = getString(); - for (int right = s.length(); --right >= 0;) { - if (stripChars.indexOf(s.charAt(right)) < 0) { - return right; - } - } - return -1; - } - - /** - * Equivalent of Python str.lstrip() with no argument, meaning strip whitespace. - * Any whitespace byte/character will be discarded from the left of this str. - * - * @return a new String, stripped of the whitespace characters/bytes - */ - public String lstrip() { - return _lstrip(); - } - - /** - * Equivalent of Python str.lstrip(). - * - * @param stripChars characters to strip from the left end of this str/bytes, or null - * @return a new String, stripped of the specified characters/bytes - */ - public String lstrip(String stripChars) { - return _lstrip(stripChars); - } - - /** - * Equivalent of Python str.lstrip(). Any byte/character matching one of those in - * stripChars will be discarded from the left end of this str. If - * stripChars == null, whitespace will be stripped. If stripChars is a - * PyUnicode, the result will also be a PyUnicode. - * - * @param stripChars characters to strip from the left end of this str/bytes, or null - * @return a new PyString (or {@link PyUnicode}), stripped of the specified - * characters/bytes - */ - public PyObject lstrip(PyObject stripChars) { - return str_lstrip(stripChars); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_lstrip_doc) - final PyObject str_lstrip(PyObject chars) { - if (chars instanceof PyUnicode) { - // Promote the problem to a Unicode one - return ((PyUnicode) decode()).unicode_lstrip(chars); - } else { - // It ought to be None, null, some kind of bytes with the buffer API. - String stripChars = asU16BytesNullOrError(chars, "lstrip"); - // Strip specified characters or whitespace if stripChars == null - return new PyString(_lstrip(stripChars), true); - } - } - - /** - * Implementation of Python str.lstrip() common to exposed and Java API, when - * stripping whitespace. Any whitespace byte/character will be discarded from the left end of - * this str. - *

- * Implementation note: although a str contains only bytes, this method is also called by - * {@link PyUnicode#unicode_lstrip(PyObject)} when this is a basic-plane string. - * - * @return a new String, stripped of the whitespace characters/bytes - */ - protected final String _lstrip() { - String s = getString(); - // Leftmost non-whitespace character: cannot exceed length - int left = _findLeft(s.length()); - return s.substring(left); - } - - /** - * Implementation of Python str.lstrip() common to exposed and Java API. Any - * byte/character matching one of those in stripChars will be discarded from the - * left end of this str. If stripChars == null, whitespace will be - * stripped. - *

- * Implementation note: although a str contains only bytes, this method is also - * called by {@link PyUnicode#unicode_lstrip(PyObject)} when both arguments are basic-plane - * strings. - * - * @param stripChars characters to strip or null - * @return a new String, stripped of the specified characters/bytes - */ - protected final String _lstrip(String stripChars) { - if (stripChars == null) { - // Divert to the whitespace version - return _lstrip(); - } else { - String s = getString(); - // Leftmost matching character: cannot exceed length - int left = _findLeft(stripChars, s.length()); - return s.substring(left); - } - } - - /** - * Equivalent of Python str.rstrip() with no argument, meaning strip whitespace. - * Any whitespace byte/character will be discarded from the right end of this str. - * - * @return a new String, stripped of the whitespace characters/bytes - */ - public String rstrip() { - return _rstrip(); - } - - /** - * Equivalent of Python str.rstrip(). - * - * @param stripChars characters to strip from either end of this str/bytes, or null - * @return a new String, stripped of the specified characters/bytes - */ - public String rstrip(String stripChars) { - return _rstrip(stripChars); - } - - /** - * Equivalent of Python str.rstrip(). Any byte/character matching one of those in - * stripChars will be discarded from the right end of this str. If - * stripChars == null, whitespace will be stripped. If stripChars is a - * PyUnicode, the result will also be a PyUnicode. - * - * @param stripChars characters to strip from the right end of this str/bytes, or null - * @return a new PyString (or {@link PyUnicode}), stripped of the specified - * characters/bytes - */ - public PyObject rstrip(PyObject stripChars) { - return str_rstrip(stripChars); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_rstrip_doc) - final PyObject str_rstrip(PyObject chars) { - if (chars instanceof PyUnicode) { - // Promote the problem to a Unicode one - return ((PyUnicode) decode()).unicode_rstrip(chars); - } else { - // It ought to be None, null, some kind of bytes with the buffer API. - String stripChars = asU16BytesNullOrError(chars, "rstrip"); - // Strip specified characters or whitespace if stripChars == null - return new PyString(_rstrip(stripChars), true); - } - } - - /** - * Implementation of Python str.rstrip() common to exposed and Java API, when - * stripping whitespace. Any whitespace byte/character will be discarded from the right end of - * this str. - *

- * Implementation note: although a str contains only bytes, this method is also - * called by {@link PyUnicode#unicode_rstrip(PyObject)} when this is a basic-plane string. - * - * @return a new String, stripped of the whitespace characters/bytes - */ - protected final String _rstrip() { - // Rightmost non-whitespace - int right = _findRight(); - if (right < 0) { - // They're all whitespace - return ""; - } else { - // Substring up to and including this rightmost non-whitespace - return getString().substring(0, right + 1); - } - } - - /** - * Implementation of Python str.rstrip() common to exposed and Java API. Any - * byte/character matching one of those in stripChars will be discarded from the - * right end of this str. If stripChars == null, whitespace will be - * stripped. - *

- * Implementation note: although a str contains only bytes, this method is also - * called by {@link PyUnicode#unicode_strip(PyObject)} when both arguments are basic-plane - * strings. - * - * @param stripChars characters to strip or null - * @return a new String, stripped of the specified characters/bytes - */ - protected final String _rstrip(String stripChars) { - if (stripChars == null) { - // Divert to the whitespace version - return _rstrip(); - } else { - // Rightmost non-matching character - int right = _findRight(stripChars); - // Substring up to and including this rightmost non-matching character (or "") - return getString().substring(0, right + 1); - } - } - - /** - * Equivalent to Python str.split(), splitting on runs of whitespace. - * - * @return list(str) result - */ - public PyList split() { - return _split(null, -1); - } - - /** - * Equivalent to Python str.split(), splitting on a specified string. - * - * @param sep string to use as separator (or null if to split on whitespace) - * @return list(str) result - */ - public PyList split(String sep) { - return _split(sep, -1); - } - - /** - * Equivalent to Python str.split(), splitting on a specified string. - * - * @param sep string to use as separator (or null if to split on whitespace) - * @param maxsplit maximum number of splits to make (there may be maxsplit+1 - * parts). - * @return list(str) result - */ - public PyList split(String sep, int maxsplit) { - return _split(sep, maxsplit); - } - - /** - * Equivalent to Python str.split() returning a {@link PyList} of - * PyStrings (or PyUnicodes). The str will be split at - * each occurrence of sep. If sep == null, whitespace will be used as - * the criterion. If sep has zero length, a Python ValueError is - * raised. - * - * @param sep string to use as separator (or null if to split on whitespace) - * @return list(str) result - */ - public PyList split(PyObject sep) { - return str_split(sep, -1); - } - - /** - * As {@link #split(PyObject)} but if maxsplit >=0 and there are more feasible - * splits than maxsplit, the last element of the list contains the rest of the - * string. - * - * @param sep string to use as separator (or null if to split on whitespace) - * @param maxsplit maximum number of splits to make (there may be maxsplit+1 - * parts). - * @return list(str) result - */ - public PyList split(PyObject sep, int maxsplit) { - return str_split(sep, maxsplit); - } - - @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.str_split_doc) - final PyList str_split(PyObject sepObj, int maxsplit) { - if (sepObj instanceof PyUnicode) { - // Promote the problem to a Unicode one - return ((PyUnicode) decode()).unicode_split(sepObj, maxsplit); - } else { - // It ought to be None, null, some kind of bytes with the buffer API. - String sep = asU16BytesNullOrError(sepObj, "split"); - // Split on specified string or whitespace if sep == null - return _split(sep, maxsplit); - } - } - - /** - * Implementation of Python str.split() common to exposed and Java API returning a - * {@link PyList} of PyStrings. The str will be split at each - * occurrence of sep. If sep == null, whitespace will be used as the - * criterion. If sep has zero length, a Python ValueError is raised. - * If maxsplit >=0 and there are more feasible splits than maxsplit - * the last element of the list contains the what is left over after the last split. - *

- * Implementation note: although a str contains only bytes, this method is also called by - * {@link PyUnicode#unicode_split(PyObject, int)}. - * - * @param sep string to use as separator (or null if to split on whitespace) - * @param maxsplit maximum number of splits to make (there may be maxsplit+1 - * parts). - * @return list(str) result - */ - protected final PyList _split(String sep, int maxsplit) { - if (sep == null) { - // Split on runs of whitespace - return splitfields(maxsplit); - } else if (sep.length() == 0) { - throw Py.ValueError("empty separator"); - } else { - // Split on specified (non-empty) string - return splitfields(sep, maxsplit); - } - } - - /** - * Helper function for .split, in str and (when overridden) in - * unicode, splitting on white space and returning a list of the separated parts. - * If there are more than maxsplit feasible splits the last element of the list is - * the remainder of the original (this) string. - * - * @param maxsplit limit on the number of splits (if >=0) - * @return PyList of split sections - */ - protected PyList splitfields(int maxsplit) { - /* - * Result built here is a list of split parts, exactly as required for s.split(None, - * maxsplit). If there are to be n splits, there will be n+1 elements in L. - */ - PyList list = new PyList(); - - String s = getString(); - int length = s.length(), start = 0, splits = 0, index; - - if (maxsplit < 0) { - // Make all possible splits: there can't be more than: - maxsplit = length; - } - - // start is always the first character not consumed into a piece on the list - while (start < length) { - - // Find the next occurrence of non-whitespace - while (start < length) { - if (!BaseBytes.isspace((byte) s.charAt(start))) { - // Break leaving start pointing at non-whitespace - break; - } - start++; - } - - if (start >= length) { - // Only found whitespace so there is no next segment - break; - - } else if (splits >= maxsplit) { - // The next segment is the last and contains all characters up to the end - index = length; - - } else { - // The next segment runs up to the next next whitespace or end - for (index = start; index < length; index++) { - if (BaseBytes.isspace((byte) s.charAt(index))) { - // Break leaving index pointing at whitespace - break; - } - } - } - - // Make a piece from start up to index - list.append(fromSubstring(start, index)); - splits++; - - // Start next segment search at that point - start = index; - } - - return list; - } - - /** - * Helper function for .split and .replace, in str and - * unicode, returning a list of the separated parts. If there are more than - * maxsplit occurrences of sep the last element of the list is the - * remainder of the original (this) string. If sep is the zero-length string, the - * split is between each character (as needed by .replace). The split sections will - * be {@link PyUnicode} if this object is a PyUnicode. - * - * @param sep at occurrences of which this string should be split - * @param maxsplit limit on the number of splits (if >=0) - * @return PyList of split sections - */ - private PyList splitfields(String sep, int maxsplit) { - /* - * Result built here is a list of split parts, exactly as required for s.split(sep), or to - * produce the result of s.replace(sep, r) by a subsequent call r.join(L). If there are to - * be n splits, there will be n+1 elements in L. - */ - PyList list = new PyList(); - - String s = getString(); - int length = s.length(); - int sepLength = sep.length(); - - if (maxsplit < 0) { - // Make all possible splits: there can't be more than: - maxsplit = length + 1; - } - - if (maxsplit == 0) { - // Degenerate case - list.append(this); - - } else if (sepLength == 0) { - /* - * The separator is "". This cannot happen with s.split(""), as that's an error, but it - * is used by s.replace("", A) and means that the result should be A interleaved between - * the characters of s, before the first, and after the last, the number always limited - * by maxsplit. - */ - - // There will be m+1 parts, where m = maxsplit or length+1 whichever is smaller. - int m = (maxsplit > length) ? length + 1 : maxsplit; - - // Put an empty string first to make one split before the first character - list.append(createInstance("")); // PyString or PyUnicode as this class - int index; - - // Add m-1 pieces one character long - for (index = 0; index < m - 1; index++) { - list.append(fromSubstring(index, index + 1)); - } - - // And add the last piece, so there are m+1 splits (m+1 pieces) - list.append(fromSubstring(index, length)); - - } else { - // Index of first character not yet in a piece on the list - int start = 0; - - // Add at most maxsplit pieces - for (int splits = 0; splits < maxsplit; splits++) { - - // Find the next occurrence of sep - int index = s.indexOf(sep, start); - - if (index < 0) { - // No more occurrences of sep: we're done - break; - - } else { - // Make a piece from start up to where we found sep - list.append(fromSubstring(start, index)); - // New start (of next piece) is just after sep - start = index + sepLength; - } - } - - // Last piece is the rest of the string (even if start==length) - list.append(fromSubstring(start, length)); - } - - return list; - } - - /** - * Equivalent to Python str.rsplit(), splitting on runs of whitespace. - * - * @return list(str) result - */ - public PyList rsplit() { - return _rsplit(null, -1); - } - - /** - * Equivalent to Python str.rsplit(), splitting on a specified string. - * - * @param sep string to use as separator (or null if to split on whitespace) - * @return list(str) result - */ - public PyList rsplit(String sep) { - return _rsplit(sep, -1); - } - - /** - * Equivalent to Python str.rsplit(), splitting on a specified string. - * - * @param sep string to use as separator (or null if to split on whitespace) - * @param maxsplit maximum number of splits to make (there may be maxsplit+1 - * parts). - * @return list(str) result - */ - public PyList rsplit(String sep, int maxsplit) { - return _rsplit(sep, maxsplit); - } - - /** - * Equivalent to Python str.rsplit() returning a {@link PyList} of - * PyStrings (or PyUnicodes). The str will be split at - * each occurrence of sep, working from the right. If sep == null, - * whitespace will be used as the criterion. If sep has zero length, a Python - * ValueError is raised. - * - * @param sep string to use as separator (or null if to split on whitespace) - * @return list(str) result - */ - public PyList rsplit(PyObject sep) { - return str_rsplit(sep, -1); - } - - /** - * As {@link #rsplit(PyObject)} but if maxsplit >=0 and there are more feasible - * splits than maxsplit the last element of the list contains the rest of the - * string. - * - * @param sep string to use as separator (or null if to split on whitespace) - * @param maxsplit maximum number of splits to make (there may be maxsplit+1 - * parts). - * @return list(str) result - */ - public PyList rsplit(PyObject sep, int maxsplit) { - return str_rsplit(sep, maxsplit); - } - - @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.str_split_doc) - final PyList str_rsplit(PyObject sepObj, int maxsplit) { - if (sepObj instanceof PyUnicode) { - // Promote the problem to a Unicode one - return ((PyUnicode) decode()).unicode_rsplit(sepObj, maxsplit); - } else { - // It ought to be None, null, some kind of bytes with the buffer API. - String sep = asU16BytesNullOrError(sepObj, "rsplit"); - // Split on specified string or whitespace if sep == null - return _rsplit(sep, maxsplit); - } - } - - /** - * Implementation of Python str.rsplit() common to exposed and Java API returning a - * {@link PyList} of PyStrings. The str will be split at each - * occurrence of sep, working from the right. If sep == null, - * whitespace will be used as the criterion. If sep has zero length, a Python - * ValueError is raised. If maxsplit >=0 and there are more - * feasible splits than maxsplit the first element of the list contains the what is - * left over after the last split. - *

- * Implementation note: although a str contains only bytes, this method is also called by - * {@link PyUnicode#unicode_rsplit(PyObject, int)} . - * - * @param sep string to use as separator (or null if to split on whitespace) - * @param maxsplit maximum number of splits to make (there may be maxsplit+1 - * parts). - * @return list(str) result - */ - protected final PyList _rsplit(String sep, int maxsplit) { - if (sep == null) { - // Split on runs of whitespace - return rsplitfields(maxsplit); - } else if (sep.length() == 0) { - throw Py.ValueError("empty separator"); - } else { - // Split on specified (non-empty) string - return rsplitfields(sep, maxsplit); - } - } - - /** - * Helper function for .rsplit, in str and (when overridden) in - * unicode, splitting on white space and returning a list of the separated parts. - * If there are more than maxsplit feasible splits the first element of the list is - * the remainder of the original (this) string. - * - * @param maxsplit limit on the number of splits (if >=0) - * @return PyList of split sections - */ - protected PyList rsplitfields(int maxsplit) { - /* - * Result built here (in reverse) is a list of split parts, exactly as required for - * s.rsplit(None, maxsplit). If there are to be n splits, there will be n+1 elements. - */ - PyList list = new PyList(); - - String s = getString(); - int length = s.length(), end = length - 1, splits = 0, index; - - if (maxsplit < 0) { - // Make all possible splits: there can't be more than: - maxsplit = length; - } - - // end is always the rightmost character not consumed into a piece on the list - while (end >= 0) { - - // Find the next occurrence of non-whitespace (working leftwards) - while (end >= 0) { - if (!BaseBytes.isspace((byte) s.charAt(end))) { - // Break leaving end pointing at non-whitespace - break; - } - --end; - } - - if (end < 0) { - // Only found whitespace so there is no next segment - break; - - } else if (splits >= maxsplit) { - // The next segment is the last and contains all characters back to the beginning - index = -1; - - } else { - // The next segment runs back to the next next whitespace or beginning - for (index = end; index >= 0; --index) { - if (BaseBytes.isspace((byte) s.charAt(index))) { - // Break leaving index pointing at whitespace - break; - } - } - } - - // Make a piece from index+1 start up to end+1 - list.append(fromSubstring(index + 1, end + 1)); - splits++; - - // Start next segment search at that point - end = index; - } - - list.reverse(); - return list; - } - - /** - * Helper function for .rsplit, in str and unicode, - * returning a list of the separated parts, in the reverse order of their occurrence in - * this string. If there are more than maxsplit occurrences of sep the - * first element of the list is the left end of the original (this) string. The split sections - * will be {@link PyUnicode} if this object is a PyUnicode. - * - * @param sep at occurrences of which this string should be split - * @param maxsplit limit on the number of splits (if >=0) - * @return PyList of split sections - */ - private PyList rsplitfields(String sep, int maxsplit) { - /* - * Result built here (in reverse) is a list of split parts, exactly as required for - * s.rsplit(sep, maxsplit). If there are to be n splits, there will be n+1 elements. - */ - PyList list = new PyList(); - - String s = getString(); - int length = s.length(); - int sepLength = sep.length(); - - if (maxsplit < 0) { - // Make all possible splits: there can't be more than: - maxsplit = length + 1; - } - - if (maxsplit == 0) { - // Degenerate case - list.append(this); - - } else if (sepLength == 0) { - // Empty separator is not allowed - throw Py.ValueError("empty separator"); - - } else { - // Index of first character of the last piece already on the list - int end = length; - - // Add at most maxsplit pieces - for (int splits = 0; splits < maxsplit; splits++) { - - // Find the next occurrence of sep (working leftwards) - int index = s.lastIndexOf(sep, end - sepLength); - - if (index < 0) { - // No more occurrences of sep: we're done - break; - - } else { - // Make a piece from where we found sep up to end - list.append(fromSubstring(index + sepLength, end)); - // New end (of next piece) is where we found sep - end = index; - } - } - - // Last piece is the rest of the string (even if end==0) - list.append(fromSubstring(0, end)); - } - - list.reverse(); - return list; - } - - /** - * Equivalent to Python str.partition(), splits the PyString at the - * first occurrence of sepObj returning a {@link PyTuple} containing the part - * before the separator, the separator itself, and the part after the separator. - * - * @param sepObj str, unicode or object implementing {@link BufferProtocol} - * @return tuple of parts - */ - public PyTuple partition(PyObject sepObj) { - return str_partition(sepObj); - } - - @ExposedMethod(doc = BuiltinDocs.str_partition_doc) - final PyTuple str_partition(PyObject sepObj) { - - if (sepObj instanceof PyUnicode) { - // Deal with Unicode separately - return unicodePartition(sepObj); - - } else { - // It ought to be some kind of bytes with the buffer API. - String sep = asU16BytesOrError(sepObj); - - if (sep.length() == 0) { - throw Py.ValueError("empty separator"); - } - - int index = getString().indexOf(sep); - if (index != -1) { - return new PyTuple(fromSubstring(0, index), sepObj, - fromSubstring(index + sep.length(), getString().length())); - } else { - return new PyTuple(this, Py.EmptyString, Py.EmptyString); - } - } - } - - final PyTuple unicodePartition(PyObject sepObj) { - PyUnicode strObj = __unicode__(); - String str = strObj.getString(); - - // Will throw a TypeError if not a basestring - String sep = sepObj.asString(); - sepObj = sepObj.__unicode__(); - - if (sep.length() == 0) { - throw Py.ValueError("empty separator"); - } - - int index = str.indexOf(sep); - if (index != -1) { - return new PyTuple(strObj.fromSubstring(0, index), sepObj, - strObj.fromSubstring(index + sep.length(), str.length())); - } else { - PyUnicode emptyUnicode = Py.newUnicode(""); - return new PyTuple(this, emptyUnicode, emptyUnicode); - } - } - - /** - * Equivalent to Python str.rpartition(), splits the PyString at the - * last occurrence of sepObj returning a {@link PyTuple} containing the part before - * the separator, the separator itself, and the part after the separator. - * - * @param sepObj str, unicode or object implementing {@link BufferProtocol} - * @return tuple of parts - */ - public PyTuple rpartition(PyObject sepObj) { - return str_rpartition(sepObj); - } - - @ExposedMethod(doc = BuiltinDocs.str_rpartition_doc) - final PyTuple str_rpartition(PyObject sepObj) { - - if (sepObj instanceof PyUnicode) { - // Deal with Unicode separately - return unicodeRpartition(sepObj); - - } else { - // It ought to be some kind of bytes with the buffer API. - String sep = asU16BytesOrError(sepObj); - - if (sep.length() == 0) { - throw Py.ValueError("empty separator"); - } - - int index = getString().lastIndexOf(sep); - if (index != -1) { - return new PyTuple(fromSubstring(0, index), sepObj, - fromSubstring(index + sep.length(), getString().length())); - } else { - return new PyTuple(Py.EmptyString, Py.EmptyString, this); - } - } - } - - final PyTuple unicodeRpartition(PyObject sepObj) { - PyUnicode strObj = __unicode__(); - String str = strObj.getString(); - - // Will throw a TypeError if not a basestring - String sep = sepObj.asString(); - sepObj = sepObj.__unicode__(); - - if (sep.length() == 0) { - throw Py.ValueError("empty separator"); - } - - int index = str.lastIndexOf(sep); - if (index != -1) { - return new PyTuple(strObj.fromSubstring(0, index), sepObj, - strObj.fromSubstring(index + sep.length(), str.length())); - } else { - PyUnicode emptyUnicode = Py.newUnicode(""); - return new PyTuple(emptyUnicode, emptyUnicode, this); - } - } - - public PyList splitlines() { - return str_splitlines(false); - } - - public PyList splitlines(boolean keepends) { - return str_splitlines(keepends); - } - - @ExposedMethod(defaults = "false", doc = BuiltinDocs.str_splitlines_doc) - final PyList str_splitlines(boolean keepends) { - PyList list = new PyList(); - - char[] chars = getString().toCharArray(); - int n = chars.length; - - int j = 0; - for (int i = 0; i < n;) { - /* Find a line and append it */ - while (i < n && chars[i] != '\n' && chars[i] != '\r' - && Character.getType(chars[i]) != Character.LINE_SEPARATOR) { - i++; - } - - /* Skip the line break reading CRLF as one line break */ - int eol = i; - if (i < n) { - if (chars[i] == '\r' && i + 1 < n && chars[i + 1] == '\n') { - i += 2; - } else { - i++; - } - if (keepends) { - eol = i; - } - } - list.append(fromSubstring(j, eol)); - j = i; - } - if (j < n) { - list.append(fromSubstring(j, n)); - } - return list; - } - - /** - * Return a new object of the same type as this one equal to the slice - * [begin:end]. (Python end-relative indexes etc. are not supported.) Subclasses ( - * {@link PyUnicode#fromSubstring(int, int)}) override this to return their own type.) - * - * @param begin first included character. - * @param end first excluded character. - * @return new object. - */ - protected PyString fromSubstring(int begin, int end) { - // Method is overridden in PyUnicode, so definitely a PyString - return new PyString(getString().substring(begin, end), true); - } - - /** - * Return the lowest index in the string where substring sub is found. Raises - * ValueError if the substring is not found. - * - * @param sub substring to find. - * @return index of sub in this object. - * @throws PyException {@code ValueError} if not found. - */ - public int index(PyObject sub) { - return str_index(sub, null, null); - } - - /** - * Return the lowest index in the string where substring sub is found, such that - * sub is contained in the slice s[start:]. Raises - * ValueError if the substring is not found. - * - * @param sub substring to find. - * @param start start of slice. - * @return index of sub in this object. - * @throws PyException {@code ValueError} if not found. - */ - public int index(PyObject sub, PyObject start) throws PyException { - return str_index(sub, start, null); - } - - /** - * Return the lowest index in the string where substring sub is found, such that - * sub is contained in the slice s[start:end]. Arguments - * start and end are interpreted as in slice notation, with null or - * {@link Py#None} representing "missing". Raises ValueError if the substring is - * not found. - * - * @param sub substring to find. - * @param start start of slice. - * @param end end of slice. - * @return index of sub in this object. - * @throws PyException {@code ValueError} if not found. - */ - public int index(PyObject sub, PyObject start, PyObject end) throws PyException { - return checkIndex(str_index(sub, start, end)); - } - - /** Equivalent to {@link #index(PyObject)} specialized to String. */ - public int index(String sub) { - return index(sub, null, null); - } - - /** Equivalent to {@link #index(PyObject, PyObject)} specialized to String. */ - public int index(String sub, PyObject start) { - return index(sub, start, null); - } - - /** - * Equivalent to {@link #index(PyObject, PyObject, PyObject)} specialized to String - * . - */ - public int index(String sub, PyObject start, PyObject end) { - return checkIndex(_find(sub, start, end)); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_index_doc) - final int str_index(PyObject subObj, PyObject start, PyObject end) { - return checkIndex(str_find(subObj, start, end)); - } - - /** - * Return the highest index in the string where substring sub is found. Raises - * ValueError if the substring is not found. - * - * @param sub substring to find. - * @return index of sub in this object. - * @throws PyException {@code ValueError} if not found. - */ - public int rindex(PyObject sub) { - return str_rindex(sub, null, null); - } - - /** - * Return the highest index in the string where substring sub is found, such that - * sub is contained in the slice s[start:]. Raises - * ValueError if the substring is not found. - * - * @param sub substring to find. - * @param start start of slice. - * @return index of sub in this object. - * @throws PyException {@code ValueError} if not found. - */ - public int rindex(PyObject sub, PyObject start) throws PyException { - return str_rindex(sub, start, null); - } - - /** - * Return the highest index in the string where substring sub is found, such that - * sub is contained in the slice s[start:end]. Arguments - * start and end are interpreted as in slice notation, with null or - * {@link Py#None} representing "missing". Raises ValueError if the substring is - * not found. - * - * @param sub substring to find. - * @param start start of slice. - * @param end end of slice. - * @return index of sub in this object. - * @throws PyException {@code ValueError} if not found. - */ - public int rindex(PyObject sub, PyObject start, PyObject end) throws PyException { - return checkIndex(str_rindex(sub, start, end)); - } - - /** Equivalent to {@link #rindex(PyObject)} specialized to String. */ - public int rindex(String sub) { - return rindex(sub, null, null); - } - - /** Equivalent to {@link #rindex(PyObject, PyObject)} specialized to String. */ - public int rindex(String sub, PyObject start) { - return rindex(sub, start, null); - } - - /** - * Equivalent to {@link #rindex(PyObject, PyObject, PyObject)} specialized to - * String. - */ - public int rindex(String sub, PyObject start, PyObject end) { - return checkIndex(_rfind(sub, start, end)); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_rindex_doc) - final int str_rindex(PyObject subObj, PyObject start, PyObject end) { - return checkIndex(str_rfind(subObj, start, end)); - } - - /** - * A little helper for converting str.find to str.index that will raise - * ValueError("substring not found") if the argument is negative, otherwise passes - * the argument through. - * - * @param index to check - * @return index if non-negative - * @throws PyException {@code ValueError} if not found - */ - protected final int checkIndex(int index) throws PyException { - if (index >= 0) { - return index; - } else { - throw Py.ValueError("substring not found"); - } - } - - /** - * Return the number of non-overlapping occurrences of substring sub. - * - * @param sub substring to find. - * @return count of occurrences. - */ - public int count(PyObject sub) { - return count(sub, null, null); - } - - /** - * Return the number of non-overlapping occurrences of substring sub in the range - * [start:]. - * - * @param sub substring to find. - * @param start start of slice. - * @return count of occurrences. - */ - public int count(PyObject sub, PyObject start) { - return count(sub, start, null); - } - - /** - * Return the number of non-overlapping occurrences of substring sub in the range - * [start:end]. Optional arguments start and end are - * interpreted as in slice notation. - * - * @param sub substring to find. - * @param start start of slice. - * @param end end of slice. - * @return count of occurrences. - */ - public int count(PyObject sub, PyObject start, PyObject end) { - return str_count(sub, start, end); - } - - /** Equivalent to {@link #count(PyObject)} specialized to String. */ - public int count(String sub) { - return count(sub, null, null); - } - - /** Equivalent to {@link #count(PyObject, PyObject)} specialized to String. */ - public int count(String sub, PyObject start) { - return count(sub, start, null); - } - - /** - * Equivalent to {@link #count(PyObject, PyObject, PyObject)} specialized to String - * . - */ - public int count(String sub, PyObject start, PyObject end) { - return _count(sub, start, end); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_count_doc) - final int str_count(PyObject subObj, PyObject start, PyObject end) { - if (subObj instanceof PyUnicode) { - // Promote the problem to a Unicode one - return asUnicode(start, end).unicode_count(subObj, null, null); - } else { - // It ought to be some kind of bytes with the buffer API. - String sub = asU16BytesOrError(subObj); - return _count(sub, start, end); - } - } - - /** - * Helper common to the Python and Java API returning the number of occurrences of a substring. - * It accepts slice-like arguments, which may be None or end-relative (negative). - * This method also supports {@link PyUnicode#unicode_count(PyObject, PyObject, PyObject)}. - * - * @param sub substring to find. - * @param startObj start of slice. - * @param endObj end of slice. - * @return count of occurrences - */ - protected final int _count(String sub, PyObject startObj, PyObject endObj) { - - // Interpret the slice indices as concrete values - int[] indices = translateIndices(startObj, endObj); - int subLen = sub.length(); - - if (subLen == 0) { - // Special case counting the occurrences of an empty string. - int start = indices[2], end = indices[3], n = __len__(); - if (end < 0 || end < start || start > n) { - // Slice is reversed or does not overlap the string. - return 0; - } else { - // Count of '' is one more than number of characters in overlap. - return Math.min(end, n) - Math.max(start, 0) + 1; - } - - } else { - - // Skip down this string finding occurrences of sub - int start = indices[0], end = indices[1]; - int limit = end - subLen, count = 0; - - while (start <= limit) { - int index = getString().indexOf(sub, start); - if (index >= 0 && index <= limit) { - // Found at index. - count += 1; - // Next search begins after this instance, at: - start = index + subLen; - } else { - // not found, or found too far right (index>limit) - break; - } - } - return count; - } - } - - /** - * Return the lowest index in the string where substring sub is found. - * - * @param sub substring to find. - * @return index of sub in this object or -1 if not found. - */ - public int find(PyObject sub) { - return find(sub, null, null); - } - - /** - * Return the lowest index in the string where substring sub is found, such that - * sub is contained in the slice s[start:]. - * - * @param sub substring to find. - * @param start start of slice. - * @return index of sub in this object or -1 if not found. - */ - public int find(PyObject sub, PyObject start) { - return find(sub, start, null); - } - - /** - * Return the lowest index in the string where substring sub is found, such that - * sub is contained in the slice s[start:end]. Arguments - * start and end are interpreted as in slice notation, with null or - * {@link Py#None} representing "missing". - * - * @param sub substring to find. - * @param start start of slice. - * @param end end of slice. - * @return index of sub in this object or -1 if not found. - */ - public int find(PyObject sub, PyObject start, PyObject end) { - return str_find(sub, start, end); - } - - /** Equivalent to {@link #find(PyObject)} specialized to String. */ - public int find(String sub) { - return find(sub, null, null); - } - - /** Equivalent to {@link #find(PyObject, PyObject)} specialized to String. */ - public int find(String sub, PyObject start) { - return find(sub, start, null); - } - - /** - * Equivalent to {@link #find(PyObject, PyObject, PyObject)} specialized to String. - */ - public int find(String sub, PyObject start, PyObject end) { - return _find(sub, start, end); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_find_doc) - final int str_find(PyObject subObj, PyObject start, PyObject end) { - if (subObj instanceof PyUnicode) { - // Promote the problem to a Unicode one - // XXX Questionable: return is a Unicode character index not byte index - return ((PyUnicode) decode()).unicode_find(subObj, start, end); - } else { - // It ought to be a bytes-like object. - String sub = asU16BytesOrError(subObj); - return _find(sub, start, end); - } - } - - /** - * Helper common to the Python and Java API returning the index of the substring or -1 for not - * found. It accepts slice-like arguments, which may be None or end-relative - * (negative). This method also supports - * {@link PyUnicode#unicode_find(PyObject, PyObject, PyObject)}. - * - * @param sub substring to find. - * @param startObj start of slice. - * @param endObj end of slice. - * @return index of sub in this object or -1 if not found. - */ - protected final int _find(String sub, PyObject startObj, PyObject endObj) { - // Interpret the slice indices as concrete values - int[] indices = translateIndices(startObj, endObj); - int subLen = sub.length(); - - if (subLen == 0) { - // Special case: an empty string may be found anywhere, ... - int start = indices[2], end = indices[3]; - if (end < 0 || end < start || start > __len__()) { - // ... except ln a reverse slice or beyond the end of the string, - return -1; - } else { - // ... and will be reported at the start of the overlap. - return indices[0]; - } - - } else { - // General case: search for first match then check against slice. - int start = indices[0], end = indices[1]; - int found = getString().indexOf(sub, start); - if (found >= 0 && found + subLen <= end) { - return found; - } else { - return -1; - } - } - } - - /** - * Return the highest index in the string where substring sub is found. - * - * @param sub substring to find. - * @return index of sub in this object or -1 if not found. - */ - public int rfind(PyObject sub) { - return rfind(sub, null, null); - } - - /** - * Return the highest index in the string where substring sub is found, such that - * sub is contained in the slice s[start:]. - * - * @param sub substring to find. - * @param start start of slice. - * @return index of sub in this object or -1 if not found. - */ - public int rfind(PyObject sub, PyObject start) { - return rfind(sub, start, null); - } - - /** - * Return the highest index in the string where substring sub is found, such that - * sub is contained in the slice s[start:end]. Arguments - * start and end are interpreted as in slice notation, with null or - * {@link Py#None} representing "missing". - * - * @param sub substring to find. - * @param start start of slice. - * @param end end of slice. - * @return index of sub in this object or -1 if not found. - */ - public int rfind(PyObject sub, PyObject start, PyObject end) { - return str_rfind(sub, start, end); - } - - /** Equivalent to {@link #find(PyObject)} specialized to String. */ - public int rfind(String sub) { - return rfind(sub, null, null); - } - - /** Equivalent to {@link #find(PyObject, PyObject)} specialized to String. */ - public int rfind(String sub, PyObject start) { - return rfind(sub, start, null); - } - - /** - * Equivalent to {@link #find(PyObject, PyObject, PyObject)} specialized to String. - */ - public int rfind(String sub, PyObject start, PyObject end) { - return _rfind(sub, start, end); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_rfind_doc) - final int str_rfind(PyObject subObj, PyObject start, PyObject end) { - if (subObj instanceof PyUnicode) { - // Promote the problem to a Unicode one - return ((PyUnicode) decode()).unicode_rfind(subObj, start, end); - } else { - // It ought to be some kind of bytes with the buffer API. - String sub = asU16BytesOrError(subObj); - return _rfind(sub, start, end); - } - } - - /** - * Helper common to the Python and Java API returning the last index of the substring or -1 for - * not found. It accepts slice-like arguments, which may be None or end-relative - * (negative). This method also supports - * {@link PyUnicode#unicode_rfind(PyObject, PyObject, PyObject)}. - * - * @param sub substring to find. - * @param startObj start of slice. - * @param endObj end of slice. - * @return index of sub in this object or -1 if not found. - */ - protected final int _rfind(String sub, PyObject startObj, PyObject endObj) { - // Interpret the slice indices as concrete values - int[] indices = translateIndices(startObj, endObj); - int subLen = sub.length(); - - if (subLen == 0) { - // Special case: an empty string may be found anywhere, ... - int start = indices[2], end = indices[3]; - if (end < 0 || end < start || start > __len__()) { - // ... except ln a reverse slice or beyond the end of the string, - return -1; - } else { - // ... and will be reported at the end of the overlap. - return indices[1]; - } - - } else { - // General case: search for first match then check against slice. - int start = indices[0], end = indices[1]; - int found = getString().lastIndexOf(sub, end - subLen); - if (found >= start) { - return found; - } else { - return -1; - } - } - } - - /** - * Convert this PyString to a floating-point value according to Python rules. - * - * @return the value - */ - public double atof() { - double x = 0.0; - Matcher m = getFloatPattern().matcher(getString()); - boolean valid = m.matches(); - - if (valid) { - // Might be a valid float: trimmed of white space in group 1. - String number = m.group(1); - try { - char lastChar = number.charAt(number.length() - 1); - if (Character.isLetter(lastChar)) { - // It's something like "nan", "-Inf" or "+nifty" - x = atofSpecials(m.group(1)); - } else { - // A numeric part was present, try to convert the whole - x = Double.parseDouble(m.group(1)); - } - } catch (NumberFormatException e) { - valid = false; - } - } - - // At this point, valid will have been cleared if there was a problem. - if (valid) { - return x; - } else { - String fmt = "invalid literal for float: %s"; - throw Py.ValueError(String.format(fmt, getString().trim())); - } - - } - - /** - * Regular expression for an unsigned Python float, accepting also any sequence of the letters - * that belong to "NaN" or "Infinity" in whatever case. This is used within the regular - * expression patterns that define a priori acceptable strings in the float and complex - * constructors. The expression contributes no capture groups. - */ - private static final String UF_RE = - "(?:(?:(?:\\d+\\.?|\\.\\d)\\d*(?:[eE][+-]?\\d+)?)|[infatyINFATY]+)"; - - /** - * Return the (lazily) compiled regular expression that matches all valid a Python float() - * arguments, in which Group 1 captures the number, stripped of white space. Various invalid - * non-numerics are provisionally accepted (e.g. "+inanity" or "-faint"). - */ - private static synchronized Pattern getFloatPattern() { - if (floatPattern == null) { - floatPattern = Pattern.compile("\\s*([+-]?" + UF_RE + ")\\s*"); - } - return floatPattern; - } - - /** Access only through {@link #getFloatPattern()}. */ - private static Pattern floatPattern = null; - - /** - * Return the (lazily) compiled regular expression for a Python complex number. This is used - * within the regular expression patterns that define a priori acceptable strings in the complex - * constructors. The expression contributes five named capture groups a, b, x, y and j. x and y - * are the two floats encountered, and if j is present, one of them is the imaginary part. a and - * b are the optional parentheses. They must either both be present or both omitted. - */ - private static synchronized Pattern getComplexPattern() { - if (complexPattern == null) { - complexPattern = Pattern.compile("\\s*(?\\(\\s*)?" // Parenthesis - + "(?[+-]?" + UF_RE + "?)" // - + "(?[+-]" + UF_RE + "?)?(?[jJ])?" // + - + "\\s*(?\\)\\s*)?"); // Parenthesis - } - return complexPattern; - } - - /** Access only through {@link #getComplexPattern()} */ - private static Pattern complexPattern = null; - - /** - * Conversion for non-numeric floats, accepting signed or unsigned "inf" and "nan", in any case. - * - * @param s to convert - * @return non-numeric result (if valid) - * @throws NumberFormatException if not a valid non-numeric indicator - */ - private static double atofSpecials(String s) throws NumberFormatException { - switch (s.toLowerCase()) { - case "nan": - case "+nan": - case "-nan": - return Double.NaN; - case "inf": - case "+inf": - case "infinity": - case "+infinity": - return Double.POSITIVE_INFINITY; - case "-inf": - case "-infinity": - return Double.NEGATIVE_INFINITY; - default: - throw new NumberFormatException(); - } - } - - /** - * Convert this PyString to a complex value according to Python rules. - * - * @return the value - */ - private PyComplex atocx() { - double x = 0.0, y = 0.0; - Matcher m = getComplexPattern().matcher(getString()); - boolean valid = m.matches(); - - if (valid) { - // Passes a priori, but we have some checks to make. Brackets: both or neither. - if ((m.group("a") != null) != (m.group("b") != null)) { - valid = false; - - } else { - try { - // Pick up the two numbers [+-]? [+-] j? - String xs = m.group("x"), ys = m.group("y"); - - if (m.group("j") != null) { - // There is a 'j', so there is an imaginary part. - if (ys != null) { - // There were two numbers, so the second is the imaginary part. - y = toComplexPart(ys); - // And the first is the real part - x = toComplexPart(xs); - } else if (xs != null) { - // There was only one number (and a 'j')so it is the imaginary part. - y = toComplexPart(xs); - // x = 0.0; - } else { - // There were no numbers, just the 'j'. (Impossible return?) - y = 1.0; - // x = 0.0; - } - - } else { - // There is no 'j' so can only be one number, the real part. - x = Double.parseDouble(xs); - if (ys != null) { - // Something like "123 +" or "123 + 456" but no 'j'. - throw new NumberFormatException(); - } - } - - } catch (NumberFormatException e) { - valid = false; - } - } - } - - // At this point, valid will have been cleared if there was a problem. - if (valid) { - return new PyComplex(x, y); - } else { - String fmt = "complex() arg is a malformed string: %s"; - throw Py.ValueError(String.format(fmt, getString().trim())); - } - - } - - /** - * Helper for interpreting each part (real and imaginary) of a complex number expressed as a - * string in {@link #atocx(String)}. It deals with numbers, inf, nan and their variants, and - * with the "implied one" in +j or 10-j. - * - * @param s to interpret - * @return value of s - * @throws NumberFormatException if the number is invalid - */ - private static double toComplexPart(String s) throws NumberFormatException { - if (s.length() == 0) { - // Empty string (occurs only as 'j') - return 1.0; - } else { - char lastChar = s.charAt(s.length() - 1); - if (Character.isLetter(lastChar)) { - // Possibly a sign, then letters that ought to be "nan" or "inf[inity]" - return atofSpecials(s); - } else if (lastChar == '+') { - // Occurs only as "+j" - return 1.0; - } else if (lastChar == '-') { - // Occurs only as "-j" - return -1.0; - } else { - // Possibly a sign then an unsigned float - return Double.parseDouble(s); - } - } - } - - private BigInteger asciiToBigInteger(int base, boolean isLong) { - String str = getString(); - - int b = 0; - int e = str.length(); - - while (b < e && Character.isWhitespace(str.charAt(b))) { - b++; - } - - while (e > b && Character.isWhitespace(str.charAt(e - 1))) { - e--; - } - - char sign = 0; - if (b < e) { - sign = str.charAt(b); - if (sign == '-' || sign == '+') { - b++; - while (b < e && Character.isWhitespace(str.charAt(b))) { - b++; - } - } - - if (base == 16) { - if (str.charAt(b) == '0') { - if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'X') { - b += 2; - } - } - } else if (base == 0) { - if (str.charAt(b) == '0') { - if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'X') { - base = 16; - b += 2; - } else if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'O') { - base = 8; - b += 2; - } else if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'B') { - base = 2; - b += 2; - } else { - base = 8; - } - } - } else if (base == 8) { - if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'O') { - b += 2; - } - } else if (base == 2) { - if (b < e - 1 && Character.toUpperCase(str.charAt(b + 1)) == 'B') { - b += 2; - } - } - } - - if (base == 0) { - base = 10; - } - - // if the base >= 22, then an 'l' or 'L' is a digit! - if (isLong && base < 22 && e > b - && (str.charAt(e - 1) == 'L' || str.charAt(e - 1) == 'l')) { - e--; - } - - String s = str; - if (b > 0 || e < str.length()) { - s = str.substring(b, e); - } - - BigInteger bi; - if (sign == '-') { - bi = new BigInteger("-" + s, base); - } else { - bi = new BigInteger(s, base); - } - return bi; - } - - public int atoi() { - return atoi(10); - } - - public int atoi(int base) { - if ((base != 0 && base < 2) || (base > 36)) { - throw Py.ValueError("invalid base for atoi()"); - } - - try { - BigInteger bi = asciiToBigInteger(base, false); - if (bi.compareTo(PyInteger.MAX_INT) > 0 || bi.compareTo(PyInteger.MIN_INT) < 0) { - throw Py.OverflowError("long int too large to convert to int"); - } - return bi.intValue(); - } catch (NumberFormatException exc) { - throw Py.ValueError( - "invalid literal for int() with base " + base + ": '" + getString() + "'"); - } catch (StringIndexOutOfBoundsException exc) { - throw Py.ValueError( - "invalid literal for int() with base " + base + ": '" + getString() + "'"); - } - } - - public PyLong atol() { - return atol(10); - } - - public PyLong atol(int base) { - if ((base != 0 && base < 2) || (base > 36)) { - throw Py.ValueError("invalid base for long literal:" + base); - } - - try { - BigInteger bi = asciiToBigInteger(base, true); - return new PyLong(bi); - } catch (NumberFormatException exc) { - if (this instanceof PyUnicode) { - // TODO: here's a basic issue: do we use the BigInteger constructor - // above, or add an equivalent to CPython's PyUnicode_EncodeDecimal; - // we should note that the current error string does not quite match - // CPython regardless of the codec, that's going to require some more work - throw Py.UnicodeEncodeError("decimal", "codec can't encode character", 0, 0, - "invalid decimal Unicode string"); - } else { - throw Py.ValueError( - "invalid literal for long() with base " + base + ": '" + getString() + "'"); - } - } catch (StringIndexOutOfBoundsException exc) { - throw Py.ValueError( - "invalid literal for long() with base " + base + ": '" + getString() + "'"); - } - } - - private static String padding(int n, char pad) { - char[] chars = new char[n]; - for (int i = 0; i < n; i++) { - chars[i] = pad; - } - return new String(chars); - } - - private static char parse_fillchar(String function, String fillchar) { - if (fillchar == null) { - return ' '; - } - if (fillchar.length() != 1) { - throw Py.TypeError(function + "() argument 2 must be char, not str"); - } - return fillchar.charAt(0); - } - - public String ljust(int width) { - return str_ljust(width, null); - } - - public String ljust(int width, String padding) { - return str_ljust(width, padding); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_ljust_doc) - final String str_ljust(int width, String fillchar) { - char pad = parse_fillchar("ljust", fillchar); - int n = width - getString().length(); - if (n <= 0) { - return getString(); - } - return getString() + padding(n, pad); - } - - public String rjust(int width) { - return str_rjust(width, null); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_rjust_doc) - final String str_rjust(int width, String fillchar) { - char pad = parse_fillchar("rjust", fillchar); - int n = width - getString().length(); - if (n <= 0) { - return getString(); - } - return padding(n, pad) + getString(); - } - - public String center(int width) { - return str_center(width, null); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.str_center_doc) - final String str_center(int width, String fillchar) { - char pad = parse_fillchar("center", fillchar); - int n = width - getString().length(); - if (n <= 0) { - return getString(); - } - int half = n / 2; - if (n % 2 > 0 && width % 2 > 0) { - half += 1; - } - - return padding(half, pad) + getString() + padding(n - half, pad); - } - - public String zfill(int width) { - return str_zfill(width); - } - - @ExposedMethod(doc = BuiltinDocs.str_zfill_doc) - final String str_zfill(int width) { - String s = getString(); - int n = s.length(); - if (n >= width) { - return s; - } - char[] chars = new char[width]; - int nzeros = width - n; - int i = 0; - int sStart = 0; - if (n > 0) { - char start = s.charAt(0); - if (start == '+' || start == '-') { - chars[0] = start; - i += 1; - nzeros++; - sStart = 1; - } - } - for (; i < nzeros; i++) { - chars[i] = '0'; - } - s.getChars(sStart, s.length(), chars, i); - return new String(chars); - } - - public String expandtabs() { - return str_expandtabs(8); - } - - public String expandtabs(int tabsize) { - return str_expandtabs(tabsize); - } - - @ExposedMethod(defaults = "8", doc = BuiltinDocs.str_expandtabs_doc) - final String str_expandtabs(int tabsize) { - String s = getString(); - StringBuilder buf = new StringBuilder((int) (s.length() * 1.5)); - char[] chars = s.toCharArray(); - int n = chars.length; - int position = 0; - - for (int i = 0; i < n; i++) { - char c = chars[i]; - if (c == '\t') { - int spaces = tabsize - position % tabsize; - position += spaces; - while (spaces-- > 0) { - buf.append(' '); - } - continue; - } - if (c == '\n' || c == '\r') { - position = -1; - } - buf.append(c); - position++; - } - return buf.toString(); - } - - public String capitalize() { - return str_capitalize(); - } - - @ExposedMethod(doc = BuiltinDocs.str_capitalize_doc) - final String str_capitalize() { - String s = getString(); - int n = s.length(); - if (n == 0) { - return s; - } else { - char[] buf = new char[n]; - // At least one byte: if lower convert to upper case. - char c = s.charAt(0); - buf[0] = _islower(c) ? (char) (c ^ SWAP_CASE) : c; - // Copy the rest, converting to lower case. - for (int i = 1; i < n; i++) { - c = s.charAt(i); - buf[i] = _isupper(c) ? (char) (c ^ SWAP_CASE) : c; - } - return new String(buf); - } - } - - /** - * Equivalent to Python str.replace(old, new), returning a copy of the string with all - * occurrences of substring old replaced by new. If either argument is a {@link PyUnicode} (or - * this object is), the result will be a PyUnicode. - * - * @param oldPiece to replace where found. - * @param newPiece replacement text. - * @return PyString (or PyUnicode if any string is one), this string after replacements. - */ - public PyString replace(PyObject oldPiece, PyObject newPiece) { - return str_replace(oldPiece, newPiece, -1); - } - - /** - * Equivalent to Python str.replace(old, new[, count]), returning a copy of the string with all - * occurrences of substring old replaced by new. If argument count is nonnegative, - * only the first count occurrences are replaced. If either argument is a - * {@link PyUnicode} (or this object is), the result will be a PyUnicode. - * - * @param oldPiece to replace where found. - * @param newPiece replacement text. - * @param count maximum number of replacements to make, or -1 meaning all of them. - * @return PyString (or PyUnicode if any string is one), this string after replacements. - */ - public PyString replace(PyObject oldPiece, PyObject newPiece, int count) { - return str_replace(oldPiece, newPiece, count); - } - - @ExposedMethod(defaults = "-1", doc = BuiltinDocs.str_replace_doc) - final PyString str_replace(PyObject oldPieceObj, PyObject newPieceObj, int count) { - if (oldPieceObj instanceof PyUnicode || newPieceObj instanceof PyUnicode) { - // Promote the problem to a Unicode one - return ((PyUnicode) decode()).unicode_replace(oldPieceObj, newPieceObj, count); - } else { - // Neither is a PyUnicode: both ought to be some kind of bytes with the buffer API. - String oldPiece = asU16BytesOrError(oldPieceObj); - String newPiece = asU16BytesOrError(newPieceObj); - return _replace(oldPiece, newPiece, count); - } - } - - /** - * Helper common to the Python and Java API for str.replace, returning a new string - * equal to this string with ocurrences of oldPiece replaced by - * newPiece, up to a maximum of count occurrences, or all of them. - * This method also supports {@link PyUnicode#unicode_replace(PyObject, PyObject, int)}, in - * which context it returns a PyUnicode - * - * @param oldPiece to replace where found. - * @param newPiece replacement text. - * @param count maximum number of replacements to make, or -1 meaning all of them. - * @return PyString (or PyUnicode if this string is one), this string after replacements. - */ - protected final PyString _replace(String oldPiece, String newPiece, int count) { - - String s = getString(); - int len = s.length(); - int oldLen = oldPiece.length(); - int newLen = newPiece.length(); - - if (len == 0) { - if (count < 0 && oldLen == 0) { - return createInstance(newPiece, true); - } - return createInstance(s, true); - - } else if (oldLen == 0 && newLen != 0 && count != 0) { - /* - * old="" and new != "", interleave new piece with each char in original, taking into - * account count - */ - StringBuilder buffer = new StringBuilder(); - int i = 0; - buffer.append(newPiece); - for (; i < len && (count < 0 || i < count - 1); i++) { - buffer.append(s.charAt(i)).append(newPiece); - } - buffer.append(s.substring(i)); - return createInstance(buffer.toString(), true); - - } else { - if (count < 0) { - count = (oldLen == 0) ? len + 1 : len; - } - return createInstance(newPiece).join(splitfields(oldPiece, count)); - } - } - - public PyString join(PyObject seq) { - return str_join(seq); - } - - @ExposedMethod(doc = BuiltinDocs.str_join_doc) - final PyString str_join(PyObject obj) { - PySequence seq = fastSequence(obj, ""); - int seqLen = seq.__len__(); - if (seqLen == 0) { - return Py.EmptyString; - } - - PyObject item; - if (seqLen == 1) { - item = seq.pyget(0); - if (item.getType() == PyString.TYPE || item.getType() == PyUnicode.TYPE) { - return (PyString) item; - } - } - - // There are at least two things to join, or else we have a subclass of the - // builtin types in the sequence. Do a pre-pass to figure out the total amount of - // space we'll need, see whether any argument is absurd, and defer to the Unicode - // join if appropriate - int i = 0; - long size = 0; - int sepLen = getString().length(); - for (; i < seqLen; i++) { - item = seq.pyget(i); - if (!(item instanceof PyString)) { - throw Py.TypeError(String.format("sequence item %d: expected string, %.80s found", - i, item.getType().fastGetName())); - } - if (item instanceof PyUnicode) { - // Defer to Unicode join. CAUTION: There's no gurantee that the original - // sequence can be iterated over again, so we must pass seq here - return unicodeJoin(seq); - } - - if (i != 0) { - size += sepLen; - } - size += ((PyString) item).getString().length(); - if (size > Integer.MAX_VALUE) { - throw Py.OverflowError("join() result is too long for a Python string"); - } - } - - // Catenate everything - StringBuilder buf = new StringBuilder((int) size); - for (i = 0; i < seqLen; i++) { - item = seq.pyget(i); - if (i != 0) { - buf.append(getString()); - } - buf.append(((PyString) item).getString()); - } - return new PyString(buf.toString(), true); // Guaranteed to be byte-like - } - - final PyUnicode unicodeJoin(PyObject obj) { - PySequence seq = fastSequence(obj, ""); - // A codec may be invoked to convert str objects to Unicode, and so it's possible - // to call back into Python code during PyUnicode_FromObject(), and so it's - // possible for a sick codec to change the size of fseq (if seq is a list). - // Therefore we have to keep refetching the size -- can't assume seqlen is - // invariant. - int seqLen = seq.__len__(); - // If empty sequence, return u"" - if (seqLen == 0) { - return new PyUnicode(); - } - - // If singleton sequence with an exact Unicode, return that - PyObject item; - if (seqLen == 1) { - item = seq.pyget(0); - if (item.getType() == PyUnicode.TYPE) { - return (PyUnicode) item; - } - } - - String sep = null; - if (seqLen > 1) { - if (this instanceof PyUnicode) { - sep = getString(); - } else { - sep = ((PyUnicode) decode()).getString(); - // In case decode()'s codec mutated seq - seqLen = seq.__len__(); - } - } - - // At least two items to join, or one that isn't exact Unicode - long size = 0; - int sepLen = getString().length(); - StringBuilder buf = new StringBuilder(); - String itemString; - for (int i = 0; i < seqLen; i++) { - item = seq.pyget(i); - // Convert item to Unicode - if (!(item instanceof PyString)) { - throw Py.TypeError(String.format( - "sequence item %d: expected string or Unicode," + " %.80s found", i, - item.getType().fastGetName())); - } - if (!(item instanceof PyUnicode)) { - item = ((PyString) item).decode(); - // In case decode()'s codec mutated seq - seqLen = seq.__len__(); - } - itemString = ((PyUnicode) item).getString(); - - if (i != 0) { - size += sepLen; - buf.append(sep); - } - size += itemString.length(); - if (size > Integer.MAX_VALUE) { - throw Py.OverflowError("join() result is too long for a Python string"); - } - buf.append(itemString); - } - return new PyUnicode(buf.toString()); - } - - /** - * Equivalent to the Python str.startswith method testing whether a string starts - * with a specified prefix. prefix can also be a tuple of prefixes to look for. - * - * @param prefix string to check for (or a PyTuple of them). - * @return true if this string slice starts with a specified prefix, otherwise - * false. - */ - public boolean startswith(PyObject prefix) { - return startswith(prefix, null, null); - } - - /** - * Equivalent to the Python str.startswith method, testing whether a string starts - * with a specified prefix, where a sub-range is specified by [start:]. - * start is interpreted as in slice notation, with null or {@link Py#None} - * representing "missing". prefix can also be a tuple of prefixes to look for. - * - * @param prefix string to check for (or a PyTuple of them). - * @param start start of slice. - * @return true if this string slice starts with a specified prefix, otherwise - * false. - */ - public boolean startswith(PyObject prefix, PyObject start) { - return startswith(prefix, start, null); - } - - /** - * Equivalent to the Python str.startswith method, testing whether a string starts - * with a specified prefix, where a sub-range is specified by [start:end]. - * Arguments start and end are interpreted as in slice notation, with - * null or {@link Py#None} representing "missing". prefix can also be a tuple of - * prefixes to look for. - * - * @param prefix string to check for (or a PyTuple of them). - * @param start start of slice. - * @param end end of slice. - * @return true if this string slice starts with a specified prefix, otherwise - * false. - */ - public boolean startswith(PyObject prefix, PyObject start, PyObject end) { - return str_startswith(prefix, start, end); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_startswith_doc) - final boolean str_startswith(PyObject prefix, PyObject startObj, PyObject endObj) { - - int[] indices = translateIndices(startObj, endObj); - int start = indices[0]; - int sliceLen = indices[1] - start; - - if (!(prefix instanceof PyTuple)) { - if (prefix instanceof PyUnicode) { - // Promote to a unicode problem on the decoded slice - return asUnicode(startObj, endObj).unicode_startswith(prefix, null, null); - } else { - // It ought to be a bytes-like object. - String s = asU16BytesOrError(prefix); - return sliceLen >= s.length() && getString().startsWith(s, start); - } - - } else { - // It's a tuple so we have to iterate through the members. - PyObject[] prefixes = ((PyTuple) prefix).getArray(); - String string = getString(); - - // Test with only the bytes prefixes first and save the unicode ones - int unicodeCount = 0; - for (PyObject o : prefixes) { - if (o instanceof PyUnicode) { - // Pack the unicode prefixes to the start of the array without trying them - prefixes[unicodeCount++] = o; - } else { - // It ought to be a bytes-like object. - String s = asU16BytesOrError(o); - if (sliceLen >= s.length() && string.startsWith(s, start)) { - return true; - } - } - } - - if (unicodeCount == 0) { - // Only bytes prefixes given and nothing matched - return false; - } else { - // There were unicode prefixes: test the decoded slice for them. - PyTuple t = new PyTuple(Arrays.copyOf(prefixes, unicodeCount)); - return asUnicode(startObj, endObj).unicode_startswith(t, null, null); - } - } - } - - /** - * Equivalent to the Python str.endswith method, testing whether a string ends with - * a specified suffix. suffix can also be a tuple of suffixes to look for. - * - * @param suffix string to check for (or a PyTuple of them). - * @return true if this string slice ends with a specified suffix, otherwise - * false. - */ - public boolean endswith(PyObject suffix) { - return endswith(suffix, null, null); - } - - /** - * Equivalent to the Python str.endswith method, testing whether a string ends with - * a specified suffix, where a sub-range is specified by [start:]. - * start is interpreted as in slice notation, with null or {@link Py#None} - * representing "missing". suffix can also be a tuple of suffixes to look for. - * - * @param suffix string to check for (or a PyTuple of them). - * @param start start of slice. - * @return true if this string slice ends with a specified suffix, otherwise - * false. - */ - public boolean endswith(PyObject suffix, PyObject start) { - return endswith(suffix, start, null); - } - - /** - * Equivalent to the Python str.endswith method, testing whether a string ends with - * a specified suffix, where a sub-range is specified by [start:end]. Arguments - * start and end are interpreted as in slice notation, with null or - * {@link Py#None} representing "missing". suffix can also be a tuple of suffixes - * to look for. - * - * @param suffix string to check for (or a PyTuple of them). - * @param start start of slice. - * @param end end of slice. - * @return true if this string slice ends with a specified suffix, otherwise - * false. - */ - public boolean endswith(PyObject suffix, PyObject start, PyObject end) { - return str_endswith(suffix, start, end); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_endswith_doc) - final boolean str_endswith(PyObject suffix, PyObject startObj, PyObject endObj) { - - int[] indices = translateIndices(startObj, endObj); - - if (!(suffix instanceof PyTuple)) { - if (suffix instanceof PyUnicode) { - // Promote to a unicode problem on the decoded slice - return asUnicode(startObj, endObj).unicode_endswith(suffix, null, null); - } else { - // It ought to be a bytes-like object. - String s = asU16BytesOrError(suffix); - return getString().substring(indices[0], indices[1]).endsWith(s); - } - - } else { - // It's a tuple so we have to iterate through the members. - PyObject[] suffixes = ((PyTuple) suffix).getArray(); - String string = getString().substring(indices[0], indices[1]); - - // Test with only the bytes suffixes first and save the unicode ones - int unicodeCount = 0; - for (PyObject o : suffixes) { - if (o instanceof PyUnicode) { - // Pack the unicode suffixes to the start of the array without trying them - suffixes[unicodeCount++] = o; - } else { - // It ought to be a bytes-like object. - String s = asU16BytesOrError(o); - if (string.endsWith(s)) { - return true; - } - } - } - - if (unicodeCount == 0) { - // Only bytes suffixes given and nothing matched - return false; - } else { - // There were unicode suffixes: test the decoded slice for them. - PyTuple t = new PyTuple(Arrays.copyOf(suffixes, unicodeCount)); - return asUnicode(startObj, endObj).unicode_endswith(t, null, null); - } - } - } - - /** - * Many of the string methods deal with slices specified using Python slice semantics: - * endpoints, which are PyObjects, may be null or None - * (meaning default to one end or the other) or may be negative (meaning "from the end"). - * Meanwhile, the implementation methods need integer indices, both within the array, and - * 0<=start<=end<=N the length of the array. - *

- * This method first translates the Python slice startObj and endObj - * according to the slice semantics for null and negative values, and stores these in elements 2 - * and 3 of the result. Then, since the end points of the range may lie outside this sequence's - * bounds (in either direction) it reduces them to the nearest points satisfying - * 0<=start<=end<=N, and stores these in elements [0] and [1] of the - * result. - * - * @param startObj Python start of slice - * @param endObj Python end of slice - * @return a 4 element array of two range-safe indices, and two original indices. - */ - protected int[] translateIndices(PyObject startObj, PyObject endObj) { - int start, end; - int n = __len__(); - int[] result = new int[4]; - - // Decode the start using slice semantics - if (startObj == null || startObj == Py.None) { - start = 0; - // result[2] = 0 already - } else { - // Convert to int but limit to Integer.MIN_VALUE <= start <= Integer.MAX_VALUE - start = startObj.asIndex(null); - if (start < 0) { - // Negative value means "from the end" - start = n + start; - } - result[2] = start; - } - - // Decode the end using slice semantics - if (endObj == null || endObj == Py.None) { - result[1] = result[3] = end = n; - } else { - // Convert to int but limit to Integer.MIN_VALUE <= end <= Integer.MAX_VALUE - end = endObj.asIndex(null); - if (end < 0) { - // Negative value means "from the end" - result[3] = end = end + n; - // Ensure end is safe for String.substring(start,end). - if (end < 0) { - end = 0; - // result[1] = 0 already - } else { - result[1] = end; - } - } else { - result[3] = end; - // Ensure end is safe for String.substring(start,end). - if (end > n) { - result[1] = end = n; - } else { - result[1] = end; - } - } - } - - // Ensure start is safe for String.substring(start,end). - if (start < 0) { - start = 0; - // result[0] = 0 already - } else if (start > end) { - result[0] = start = end; - } else { - result[0] = start; - } - - return result; - } - - /** - * Equivalent to Python str.translate returning a copy of this string where the - * characters have been mapped through the translation table. table - * must be equivalent to a string of length 256 (if it is not null). - * - * @param table of character (byte) translations (or null) - * @return transformed byte string - */ - public String translate(PyObject table) { - return translate(table, null); - } - - /** - * Equivalent to Python str.translate returning a copy of this string where all - * characters (bytes) occurring in the argument deletechars are removed (if it is - * not null), and the remaining characters have been mapped through the translation - * table. table must be equivalent to a string of length 256 (if it is - * not null). - * - * @param table of character (byte) translations (or null) - * @param deletechars set of characters to remove (or null) - * @return transformed byte string - */ - public String translate(PyObject table, PyObject deletechars) { - return str_translate(table, deletechars); - } - - /** - * Equivalent to {@link #translate(PyObject)} specialized to String. - */ - public String translate(String table) { - return _translate(table, null); - } - - /** - * Equivalent to {@link #translate(PyObject, PyObject)} specialized to String. - */ - public String translate(String table, String deletechars) { - return _translate(table, deletechars); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.str_translate_doc) - final String str_translate(PyObject tableObj, PyObject deletecharsObj) { - // Accept anythiong withthe buffer API or null - String table = asU16BytesNullOrError(tableObj, null); - String deletechars = asU16BytesNullOrError(deletecharsObj, null); - return _translate(table, deletechars); - } - - /** - * Helper common to the Python and Java API implementing str.translate returning a - * copy of this string where all characters (bytes) occurring in the argument - * deletechars are removed (if it is not null), and the remaining - * characters have been mapped through the translation table, which must be - * equivalent to a string of length 256 (if it is not null). - * - * @param table of character (byte) translations (or null) - * @param deletechars set of characters to remove (or null) - * @return transformed byte string - */ - private final String _translate(String table, String deletechars) { - - if (table != null && table.length() != 256) { - throw Py.ValueError("translation table must be 256 characters long"); - } - - StringBuilder buf = new StringBuilder(getString().length()); - - for (int i = 0; i < getString().length(); i++) { - char c = getString().charAt(i); - if (deletechars != null && deletechars.indexOf(c) >= 0) { - continue; - } - if (table == null) { - buf.append(c); - } else { - try { - buf.append(table.charAt(c)); - } catch (IndexOutOfBoundsException e) { - throw Py.TypeError("translate() only works for 8-bit character strings"); - } - } - } - return buf.toString(); - } - - public boolean islower() { - return str_islower(); - } - - @ExposedMethod(doc = BuiltinDocs.str_islower_doc) - final boolean str_islower() { - String s = getString(); - int n = s.length(); - - if (n == 1) { - // Special case single character strings. - return _islower(s.charAt(0)); - } - - boolean cased = false; - for (int i = 0; i < n; i++) { - char ch = s.charAt(i); - if (_isupper(ch)) { - return false; - } else if (!cased && _islower(ch)) { - cased = true; - } - } - return cased; - } - - private boolean _islower(char ch) { - if (ch < 256) { - return BaseBytes.islower((byte) ch); - } else { - // This is an internal error. Really, the test should be unnecessary. - throw new java.lang.IllegalArgumentException("non-byte character in PyString"); - } - } - - public boolean isupper() { - return str_isupper(); - } - - @ExposedMethod(doc = BuiltinDocs.str_isupper_doc) - final boolean str_isupper() { - String s = getString(); - int n = s.length(); - - if (n == 1) { - // Special case single character strings. - return _isupper(s.charAt(0)); - } - - boolean cased = false; - for (int i = 0; i < n; i++) { - char ch = s.charAt(i); - if (_islower(ch)) { - return false; - } else if (!cased && _isupper(ch)) { - cased = true; - } - } - return cased; - } - - private boolean _isupper(char ch) { - if (ch < 256) { - return BaseBytes.isupper((byte) ch); - } else { - // This is an internal error. Really, the test should be unnecessary. - throw new java.lang.IllegalArgumentException("non-byte character in PyString"); - } - } - - public boolean isalpha() { - return str_isalpha(); - } - - @ExposedMethod(doc = BuiltinDocs.str_isalpha_doc) - final boolean str_isalpha() { - String s = getString(); - int n = s.length(); - - if (n == 1) { - // Special case single character strings. - return _isalpha(s.charAt(0)); - } - - for (int i = 0; i < n; i++) { - if (!_isalpha(s.charAt(i))) { - return false; - } - } - return n > 0; - } - - private boolean _isalpha(char ch) { - if (ch < 256) { - return BaseBytes.isalpha((byte) ch); - } else { - // This is an internal error. Really, the test should be unnecessary. - throw new java.lang.IllegalArgumentException("non-byte character in PyString"); - } - } - - public boolean isalnum() { - return str_isalnum(); - } - - @ExposedMethod(doc = BuiltinDocs.str_isalnum_doc) - final boolean str_isalnum() { - String s = getString(); - int n = s.length(); - - if (n == 1) { - // Special case single character strings. - return _isalnum(s.charAt(0)); - } - - for (int i = 0; i < n; i++) { - if (!_isalnum(s.charAt(i))) { - return false; - } - } - return n > 0; - } - - private boolean _isalnum(char ch) { - // This is now entirely compatible with CPython, as long as only bytes are stored. - if (ch < 256) { - return BaseBytes.isalnum((byte) ch); - } else { - // This is an internal error. Really, the test should be unnecessary. - throw new java.lang.IllegalArgumentException("non-byte character in PyString"); - } - } - - public boolean isdecimal() { - return str_isdecimal(); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_isdecimal_doc) - final boolean str_isdecimal() { // XXX this ought not to exist in str (in Python 2) - return str_isdigit(); - } - - private boolean _isdecimal(char ch) { - // See the comment in _isalnum. Here it is even worse. - return Character.getType(ch) == Character.DECIMAL_DIGIT_NUMBER; - } - - public boolean isdigit() { - return str_isdigit(); - } - - @ExposedMethod(doc = BuiltinDocs.str_isdigit_doc) - final boolean str_isdigit() { - String s = getString(); - int n = s.length(); - - if (n == 1) { - // Special case single character strings. - return _isdigit(s.charAt(0)); - } - - for (int i = 0; i < n; i++) { - if (!_isdigit(s.charAt(i))) { - return false; - } - } - return n > 0; - } - - private boolean _isdigit(char ch) { - if (ch < 256) { - return BaseBytes.isdigit((byte) ch); - } else { - // This is an internal error. Really, the test should be unnecessary. - throw new java.lang.IllegalArgumentException("non-byte character in PyString"); - } - } - - public boolean isnumeric() { - return str_isnumeric(); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_isnumeric_doc) - final boolean str_isnumeric() { // XXX this ought not to exist in str (in Python 2) - return str_isdigit(); - } - - public boolean istitle() { - return str_istitle(); - } - - @ExposedMethod(doc = BuiltinDocs.str_istitle_doc) - final boolean str_istitle() { - String s = getString(); - int n = s.length(); - - if (n == 1) { - // Special case single character strings. - return _isupper(s.charAt(0)); - } - - boolean cased = false; - boolean previous_is_cased = false; - for (int i = 0; i < n; i++) { - char ch = s.charAt(i); - if (_isupper(ch)) { - if (previous_is_cased) { - return false; - } - previous_is_cased = true; - cased = true; - } else if (_islower(ch)) { - if (!previous_is_cased) { - return false; - } - previous_is_cased = true; - cased = true; - } else { - previous_is_cased = false; - } - } - return cased; - } - - public boolean isspace() { - return str_isspace(); - } - - @ExposedMethod(doc = BuiltinDocs.str_isspace_doc) - final boolean str_isspace() { - String s = getString(); - int n = s.length(); - - if (n == 1) { - // Special case single character strings. - return _isspace(s.charAt(0)); - } - - for (int i = 0; i < n; i++) { - if (!_isspace(s.charAt(i))) { - return false; - } - } - return n > 0; - } - - private boolean _isspace(char ch) { - if (ch < 256) { - return BaseBytes.isspace((byte) ch); - } else { - // This is an internal error. Really, the test should be unnecessary. - throw new java.lang.IllegalArgumentException("non-byte character in PyString"); - } - } - - public boolean isunicode() { - return str_isunicode(); - } - - @ExposedMethod(doc = "isunicode is deprecated.") - final boolean str_isunicode() { - Py.warning(Py.DeprecationWarning, "isunicode is deprecated."); - int n = getString().length(); - for (int i = 0; i < n; i++) { - char ch = getString().charAt(i); - if (ch > 255) { - return true; - } - } - return false; - } - - public String encode() { - return encode(null, null); - } - - public String encode(String encoding) { - return encode(encoding, null); - } - - public String encode(String encoding, String errors) { - return codecs.encode(this, encoding, errors); - } - - @ExposedMethod(doc = BuiltinDocs.str_encode_doc) - final String str_encode(PyObject[] args, String[] keywords) { - ArgParser ap = new ArgParser("encode", args, keywords, "encoding", "errors"); - String encoding = ap.getString(0, null); - String errors = ap.getString(1, null); - return encode(encoding, errors); - } - - public PyObject decode() { - return decode(null, null); - } - - public PyObject decode(String encoding) { - return decode(encoding, null); - } - - public PyObject decode(String encoding, String errors) { - return codecs.decode(this, encoding, errors); - } - - @ExposedMethod(doc = BuiltinDocs.str_decode_doc) - final PyObject str_decode(PyObject[] args, String[] keywords) { - ArgParser ap = new ArgParser("decode", args, keywords, "encoding", "errors"); - String encoding = ap.getString(0, null); - String errors = ap.getString(1, null); - return decode(encoding, errors); - } - - @ExposedMethod(doc = BuiltinDocs.str__formatter_parser_doc) - final PyObject str__formatter_parser() { - return new MarkupIterator(this); - } - - @ExposedMethod(doc = BuiltinDocs.str__formatter_field_name_split_doc) - final PyObject str__formatter_field_name_split() { - FieldNameIterator iterator = new FieldNameIterator(this); - return new PyTuple(iterator.pyHead(), iterator); - } - - @ExposedMethod(doc = BuiltinDocs.str_format_doc) - final PyObject str_format(PyObject[] args, String[] keywords) { - try { - return new PyString(buildFormattedString(args, keywords, null, null)); - } catch (IllegalArgumentException e) { - throw Py.ValueError(e.getMessage()); - } - } - - /** - * Implements PEP-3101 {}-formatting methods str.format() and - * unicode.format(). When called with enclosingIterator == null, this - * method takes this object as its formatting string. The method is also called (calls itself) - * to deal with nested formatting specifications. In that case, enclosingIterator - * is a {@link MarkupIterator} on this object and value is a substring of this - * object needing recursive translation. - * - * @param args to be interpolated into the string - * @param keywords for the trailing args - * @param enclosingIterator when used nested, null if subject is this PyString - * @param value the format string when enclosingIterator is not null - * @return the formatted string based on the arguments - */ - protected String buildFormattedString(PyObject[] args, String[] keywords, - MarkupIterator enclosingIterator, String value) { - - MarkupIterator it; - if (enclosingIterator == null) { - // Top-level call acts on this object. - it = new MarkupIterator(this); - } else { - // Nested call acts on the substring and some state from existing iterator. - it = new MarkupIterator(enclosingIterator, value); - } - - // Result will be formed here - StringBuilder result = new StringBuilder(); - - while (true) { - MarkupIterator.Chunk chunk = it.nextChunk(); - if (chunk == null) { - break; - } - // A Chunk encapsulates a literal part ... - result.append(chunk.literalText); - // ... and the parsed form of the replacement field that followed it (if any) - if (chunk.fieldName != null) { - // The grammar of the replacement field is: - // "{" [field_name] ["!" conversion] [":" format_spec] "}" - - // Get the object referred to by the field name (which may be omitted). - PyObject fieldObj = getFieldObject(chunk.fieldName, it.isBytes(), args, keywords); - if (fieldObj == null) { - continue; - } - - // The conversion specifier is s = __str__ or r = __repr__. - if ("r".equals(chunk.conversion)) { - fieldObj = fieldObj.__repr__(); - } else if ("s".equals(chunk.conversion)) { - fieldObj = fieldObj.__str__(); - } else if (chunk.conversion != null) { - throw Py.ValueError("Unknown conversion specifier " + chunk.conversion); - } - - // Check for "{}".format(u"abc") - if (fieldObj instanceof PyUnicode && !(this instanceof PyUnicode)) { - // Down-convert to PyString, at the risk of raising UnicodeEncodingError - fieldObj = ((PyUnicode) fieldObj).__str__(); - } - - // The format_spec may be simple, or contained nested replacement fields. - String formatSpec = chunk.formatSpec; - if (chunk.formatSpecNeedsExpanding) { - if (enclosingIterator != null) { - // PEP 3101 says only 2 levels - throw Py.ValueError("Max string recursion exceeded"); - } - // Recursively interpolate further args into chunk.formatSpec - formatSpec = buildFormattedString(args, keywords, it, formatSpec); - } - renderField(fieldObj, formatSpec, result); - } - } - return result.toString(); - } - - /** - * Return the object referenced by a given field name, interpreted in the context of the given - * argument list, containing positional and keyword arguments. - * - * @param fieldName to interpret. - * @param bytes true if the field name is from a PyString, false for PyUnicode. - * @param args argument list (positional then keyword arguments). - * @param keywords naming the keyword arguments. - * @return the object designated or null. - */ - private PyObject getFieldObject(String fieldName, boolean bytes, PyObject[] args, - String[] keywords) { - FieldNameIterator iterator = new FieldNameIterator(fieldName, bytes); - PyObject head = iterator.pyHead(); - PyObject obj = null; - int positionalCount = args.length - keywords.length; - - if (head.isIndex()) { - // The field name begins with an integer argument index (not a [n]-type index). - int index = head.asIndex(); - if (index >= positionalCount) { - throw Py.IndexError("tuple index out of range"); - } - obj = args[index]; - - } else { - // The field name begins with keyword. - for (int i = 0; i < keywords.length; i++) { - if (keywords[i].equals(head.asString())) { - obj = args[positionalCount + i]; - break; - } - } - // And if we don't find it, that's an error - if (obj == null) { - throw Py.KeyError(head); - } - } - - // Now deal with the iterated sub-fields - while (obj != null) { - FieldNameIterator.Chunk chunk = iterator.nextChunk(); - if (chunk == null) { - // End of iterator - break; - } - Object key = chunk.value; - if (chunk.is_attr) { - // key must be a String - obj = obj.__getattr__((String) key); - } else { - if (key instanceof Integer) { - // Can this happen? - obj = obj.__getitem__(((Integer) key).intValue()); - } else { - obj = obj.__getitem__(new PyString(key.toString())); - } - } - } - - return obj; - } - - /** - * Append to a formatting result, the presentation of one object, according to a given format - * specification and the object's __format__ method. - * - * @param fieldObj to format. - * @param formatSpec specification to apply. - * @param result to which the result will be appended. - */ - private void renderField(PyObject fieldObj, String formatSpec, StringBuilder result) { - PyString formatSpecStr = formatSpec == null ? Py.EmptyString : new PyString(formatSpec); - result.append(fieldObj.__format__(formatSpecStr).asString()); - } - - @Override - public PyObject __format__(PyObject formatSpec) { - return str___format__(formatSpec); - } - - @ExposedMethod(doc = BuiltinDocs.str___format___doc) - final PyObject str___format__(PyObject formatSpec) { - - // Parse the specification - Spec spec = InternalFormat.fromText(formatSpec, "__format__"); - - // Get a formatter for the specification - TextFormatter f = prepareFormatter(spec); - if (f == null) { - // The type code was not recognised - throw Formatter.unknownFormat(spec.type, "string"); - } - - // Bytes mode if neither this nor formatSpec argument is Unicode. - boolean unicode = this instanceof PyUnicode || formatSpec instanceof PyUnicode; - f.setBytes(!unicode); - - // Convert as per specification. - f.format(getString()); - - // Return a result that has the same type (str or unicode) as the formatSpec argument. - return f.pad().getPyResult(); - } - - /** - * Common code for {@link PyString} and {@link PyUnicode} to prepare a {@link TextFormatter} - * from a parsed specification. The object returned has format method - * {@link TextFormatter#format(String)} that treats its argument as UTF-16 encoded unicode (not - * just chars). That method will format its argument ( str or - * unicode) according to the PEP 3101 formatting specification supplied here. This - * would be used during text.__format__(".5s") or - * "{:.5s}".format(text) where text is this Python string. - * - * @param spec a parsed PEP-3101 format specification. - * @return a formatter ready to use, or null if the type is not a string format type. - * @throws PyException {@code ValueError} if the specification is faulty. - */ - @SuppressWarnings("fallthrough") - static TextFormatter prepareFormatter(Spec spec) throws PyException { - // Slight differences between format types - switch (spec.type) { - - case Spec.NONE: - case 's': - // Check for disallowed parts of the specification - if (spec.grouping) { - throw Formatter.notAllowed("Grouping", "string", spec.type); - } else if (Spec.specified(spec.sign)) { - throw Formatter.signNotAllowed("string", '\0'); - } else if (spec.alternate) { - throw Formatter.alternateFormNotAllowed("string"); - } else if (spec.align == '=') { - throw Formatter.alignmentNotAllowed('=', "string"); - } - // spec may be incomplete. The defaults are those commonly used for string formats. - spec = spec.withDefaults(Spec.STRING); - // Get a formatter for the specification - return new TextFormatter(spec); - - default: - // The type code was not recognised - return null; - } - } - - @Override - public String asString(int index) throws PyObject.ConversionException { - return getString(); - } - - @Override - public String asString() { - return getString(); - } - - @Override - public int asInt() { - // We have to override asInt/Long/Double because we override __int/long/float__, - // but generally don't want implicit atoi conversions for the base types. blah - asNumberCheck("__int__", "an integer"); - return super.asInt(); - } - - @Override - public long asLong() { - asNumberCheck("__long__", "an integer"); - return super.asLong(); - } - - @Override - public double asDouble() { - asNumberCheck("__float__", "a float"); - return super.asDouble(); - } - - private void asNumberCheck(String methodName, String description) { - PyType type = getType(); - if (type == PyString.TYPE || type == PyUnicode.TYPE || type.lookup(methodName) == null) { - throw Py.TypeError(description + " is required"); - } - } - - @Override - public String asName(int index) throws PyObject.ConversionException { - return internedString(); - } - - @Override - protected String unsupportedopMessage(String op, PyObject o2) { - if (op.equals("+")) { - return "cannot concatenate ''{1}'' and ''{2}'' objects"; - } - return super.unsupportedopMessage(op, o2); - } - - @Override - public char charAt(int index) { - return string.charAt(index); - } - - @Override - public int length() { - return string.length(); - } - - @Override - public CharSequence subSequence(int start, int end) { - return string.subSequence(start, end); - } - - /** - * Decode this str object to a unicode, like - * __unicode__() but without the possibility it will be overridden. - * - * @return this as a unicode using the default encoding. - */ - private PyUnicode asUnicode() { - return new PyUnicode(this); - } - - /** - * Decode a slice of this str object to a unicode, using Python slice - * semantics and the default encoding. This supports the many library methods that accept - * slicing as part of the API, in the case where the calculation must be promoted due to a - * unicode argument. - * - * @param startObj start index (or null or None) - * @param endObj end index (or null or None) - * @return the slice as a unicode using the default encoding. - */ - private PyUnicode asUnicode(PyObject startObj, PyObject endObj) { - if (startObj == null && endObj == null) { - return asUnicode(); - } else { - int[] indices = translateIndices(startObj, endObj); - return new PyUnicode(fromSubstring(indices[0], indices[1])); - } - } -} - - -/** - * Interpreter for %-format strings. (Note visible across the core package.) - */ -final class StringFormatter { - - /** Index into {@link #format} being interpreted. */ - int index; - /** Format being interpreted. */ - String format; - /** Where the output is built. */ - StringBuilder buffer; - /** - * Index into args of argument currently being worked, or special values indicating -1: a single - * item that has not yet been used, -2: a single item that has already been used, -3: a mapping. - */ - int argIndex; - /** Arguments supplied to {@link #format(PyObject)} method. */ - PyObject args; - /** Indicate a PyUnicode result is expected. */ - boolean needUnicode; - - final char pop() { - try { - return format.charAt(index++); - } catch (StringIndexOutOfBoundsException e) { - throw Py.ValueError("incomplete format"); - } - } - - final char peek() { - return format.charAt(index); - } - - final void push() { - index--; - } - - /** - * Initialise the interpreter with the given format string, ready for {@link #format(PyObject)}. - * - * @param format string to interpret - */ - public StringFormatter(String format) { - this(format, false); - } - - /** - * Initialise the interpreter with the given format string, ready for {@link #format(PyObject)}. - * - * @param format string to interpret - * @param unicodeCoercion to indicate a PyUnicode result is expected - */ - public StringFormatter(String format, boolean unicodeCoercion) { - index = 0; - this.format = format; - this.needUnicode = unicodeCoercion; - buffer = new StringBuilder(format.length() + 100); - } - - /** - * Read the next object from the argument list, taking special values of argIndex - * into account. - */ - PyObject getarg() { - PyObject ret = null; - switch (argIndex) { - case -3: // special index indicating a mapping - return args; - case -2: // special index indicating a single item that has already been used - break; - case -1: // special index indicating a single item that has not yet been used - argIndex = -2; - return args; - default: - ret = args.__finditem__(argIndex++); - break; - } - if (ret == null) { - throw Py.TypeError("not enough arguments for format string"); - } - return ret; - } - - /** - * Parse a number from the format, except if the next thing is "*", read it from the argument - * list. - */ - int getNumber() { - char c = pop(); - if (c == '*') { - PyObject o = getarg(); - if (o instanceof PyInteger) { - return ((PyInteger) o).getValue(); - } - throw Py.TypeError("* wants int"); - } else { - if (Character.isDigit(c)) { - int numStart = index - 1; - while (Character.isDigit(c = pop())) {} - index -= 1; - Integer i = Integer.valueOf(format.substring(numStart, index)); - return i.intValue(); - } - index -= 1; - return 0; - } - } - - /** - * Return the argument as either a {@link PyInteger} or a {@link PyLong} according to its - * __int__ method, or its __long__ method. If the argument has neither - * method, or both raise an exception, we return the argument itself. The caller must check the - * return type. - * - * @param arg to convert - * @return PyInteger or PyLong if possible - */ - private PyObject asNumber(PyObject arg) { - if (arg instanceof PyInteger || arg instanceof PyLong) { - // arg is already acceptable - return arg; - - } else { - // use __int__ or __long__to get an int (or long) - if (arg.getClass() == PyFloat.class) { - // A common case where it is safe to return arg.__int__() - return arg.__int__(); - - } else { - /* - * In general, we can't simply call arg.__int__() because PyString implements it - * without exposing it to python (str has no __int__). This would make str - * acceptacle to integer format specifiers, which is forbidden by CPython tests - * (test_format.py). PyString implements __int__ perhaps only to help the int - * constructor. Maybe that was a bad idea? - */ - try { - // Result is the result of arg.__int__() if that works - return arg.__getattr__("__int__").__call__(); - } catch (PyException e) { - // Swallow the exception - } - - // Try again with arg.__long__() - try { - // Result is the result of arg.__long__() if that works - return arg.__getattr__("__long__").__call__(); - } catch (PyException e) { - // No __long__ defined (at Python level) - return arg; - } - } - } - } - - /** - * Return the argument as a {@link PyFloat} according to its __float__ method. If - * the argument has no such method, or it raises an exception, we return the argument itself. - * The caller must check the return type. - * - * @param arg to convert - * @return PyFloat if possible - */ - private PyObject asFloat(PyObject arg) { - - if (arg instanceof PyFloat) { - // arg is already acceptable - return arg; - - } else { - // use __float__ to get a float. - if (arg.getClass() == PyFloat.class) { - // A common case where it is safe to return arg.__float__() - return arg.__float__(); - - } else { - /* - * In general, we can't simply call arg.__float__() because PyString implements it - * without exposing it to python (str has no __float__). This would make str - * acceptacle to float format specifiers, which is forbidden by CPython tests - * (test_format.py). PyString implements __float__ perhaps only to help the float - * constructor. Maybe that was a bad idea? - */ - try { - // Result is the result of arg.__float__() if that works - return arg.__getattr__("__float__").__call__(); - } catch (PyException e) { - // No __float__ defined (at Python level) - return arg; - } - } - } - } - - /** - * Return the argument as either a {@link PyString} or a {@link PyUnicode}, and set the - * {@link #needUnicode} member accordingly. If we already know we are building a Unicode string - * (needUnicode==true), then any argument that is not already a - * PyUnicode will be converted by calling its __unicode__ method. - * Conversely, if we are not yet building a Unicode string (needUnicode==false ), - * then a PyString will pass unchanged, a PyUnicode will switch us to Unicode mode - * (needUnicode=true), and any other type will be converted by calling its - * __str__ method, which will return a PyString, or possibly a - * PyUnicode, which will switch us to Unicode mode. - * - * @param arg to convert - * @return PyString or PyUnicode equivalent - */ - private PyString asText(PyObject arg) { - - if (arg instanceof PyUnicode) { - // arg is already acceptable. - needUnicode = true; - return (PyUnicode) arg; - - } else if (needUnicode) { - // The string being built is unicode, so we need that version of the arg. - return arg.__unicode__(); - - } else if (arg instanceof PyString) { - // The string being built is not unicode, so arg is already acceptable. - return (PyString) arg; - - } else { - // The string being built is not unicode, so use __str__ to get a PyString. - PyString s = arg.__str__(); - // But __str__ might return PyUnicode, and we have to notice that. - if (s instanceof PyUnicode) { - needUnicode = true; - } - return s; - } - } - - /** - * Main service of this class: format one or more arguments with the format string supplied at - * construction. - * - * @param args tuple or map containing objects, or a single object, to convert - * @return result of formatting - */ - @SuppressWarnings("fallthrough") - public PyString format(PyObject args) { - PyObject dict = null; - this.args = args; - - if (args instanceof PyTuple) { - // We will simply work through the tuple elements - argIndex = 0; - } else { - // Not a tuple, but possibly still some kind of container: use special argIndex values. - argIndex = -1; - if (args instanceof AbstractDict || (!(args instanceof PySequence) && - // See issue 2511: __getitem__ should be looked up directly in the dict, rather - // than going through another __getattr__ call. We achieve this by using - // object___findattr__ instead of generic __findattr__. - args.object___findattr__("__getitem__".intern()) != null)) { - dict = args; - argIndex = -3; - } - } - - while (index < format.length()) { - - // Read one character from the format string - char c = pop(); - if (c != '%') { - buffer.append(c); - continue; - } - - // It's a %, so the beginning of a conversion specifier. Parse it. - - // Attributes to be parsed from the next format specifier - boolean altFlag = false; - char sign = Spec.NONE; - char fill = ' '; - char align = '>'; - int width = Spec.UNSPECIFIED; - int precision = Spec.UNSPECIFIED; - - // A conversion specifier contains the following components, in this order: - // + The '%' character, which marks the start of the specifier. - // + Mapping key (optional), consisting of a parenthesised sequence of characters. - // + Conversion flags (optional), which affect the result of some conversion types. - // + Minimum field width (optional), or an '*' (asterisk). - // + Precision (optional), given as a '.' (dot) followed by the precision or '*'. - // + Length modifier (optional). - // + Conversion type. - - c = pop(); - if (c == '(') { - // Mapping key, consisting of a parenthesised sequence of characters. - if (dict == null) { - throw Py.TypeError("format requires a mapping"); - } - // Scan along until a matching close parenthesis is found - int parens = 1; - int keyStart = index; - while (parens > 0) { - c = pop(); - if (c == ')') { - parens--; - } else if (c == '(') { - parens++; - } - } - // Last c=pop() is the closing ')' while indexKey is just after the opening '(' - String tmp = format.substring(keyStart, index - 1); - // Look it up using this extent as the (right type of) key. - this.args = dict.__getitem__(needUnicode ? new PyUnicode(tmp) : new PyString(tmp)); - } else { - // Not a mapping key: next clause will re-read c. - push(); - } - - // Conversion flags (optional) that affect the result of some conversion types. - while (true) { - switch (c = pop()) { - case '-': - align = '<'; - continue; - case '+': - sign = '+'; - continue; - case ' ': - if (!Spec.specified(sign)) { - // Blank sign only wins if '+' not specified. - sign = ' '; - } - continue; - case '#': - altFlag = true; - continue; - case '0': - fill = '0'; - continue; - } - break; - } - // Push back c as next clause will re-read c. - push(); - - /* - * Minimum field width (optional). If specified as an '*' (asterisk), the actual width - * is read from the next element of the tuple in values, and the object to convert comes - * after the minimum field width and optional precision. A custom getNumber() takes care - * of the '*' case. - */ - width = getNumber(); - if (width < 0) { - width = -width; - align = '<'; - } - - /* - * Precision (optional), given as a '.' (dot) followed by the precision. If specified as - * '*' (an asterisk), the actual precision is read from the next element of the tuple in - * values, and the value to convert comes after the precision. A custom getNumber() - * takes care of the '*' case. - */ - c = pop(); - if (c == '.') { - precision = getNumber(); - if (precision < -1) { - precision = 0; - } - c = pop(); - } - - // Length modifier (optional). (Compatibility feature?) It has no effect. - if (c == 'h' || c == 'l' || c == 'L') { - c = pop(); - } - - /* - * As a function of the conversion type (currently in c) override some of the formatting - * flags we read from the format specification. - */ - switch (c) { - case 's': - case 'r': - case 'c': - case '%': - // These have string-like results: fill, if needed, is always blank. - fill = ' '; - break; - - default: - if (fill == '0' && align == '>') { - // Zero-fill comes after the sign in right-justification. - align = '='; - } else { - // If left-justifying, the fill is always blank. - fill = ' '; - } - } - - /* - * Encode as an InternalFormat.Spec. The values in the constructor always have specified - * values, except for sign, width and precision. - */ - Spec spec = new Spec(fill, align, sign, altFlag, width, false, precision, c); - - /* - * Process argument according to format specification decoded from the string. It is - * important we don't read the argument from the list until this point because of the - * possibility that width and precision were specified via the argument list. - */ - - // Depending on the type of conversion, we use one of these formatters: - FloatFormatter ff; - IntegerFormatter fi; - TextFormatter ft; - Formatter f; // = ff, fi or ft, whichever we actually use. - - switch (spec.type) { - - case 's': // String: converts any object using __str__(), __unicode__() ... - case 'r': // ... or repr(). - PyObject arg = getarg(); - - // Get hold of the actual object to display (may set needUnicode) - PyString argAsString = asText(spec.type == 's' ? arg : arg.__repr__()); - // Format the str/unicode form of the argument using this Spec. - f = ft = new TextFormatter(buffer, spec); - ft.setBytes(!needUnicode); - ft.format(argAsString.getString()); - break; - - case 'd': // All integer formats (+case for X). - case 'o': - case 'x': - case 'X': - case 'c': // Single character (accepts integer or single character string). - case 'u': // Obsolete type identical to 'd'. - case 'i': // Compatibility with scanf(). - - // Format the argument using this Spec. - f = fi = new IntegerFormatter.Traditional(buffer, spec); - // If not producing PyUnicode, disallow codes >255. - fi.setBytes(!needUnicode); - - arg = getarg(); - - if (arg instanceof PyString && spec.type == 'c') { - if (arg.__len__() != 1) { - throw Py.TypeError("%c requires int or char"); - } else { - if (!needUnicode && arg instanceof PyUnicode) { - // Change of mind forced by encountering unicode object. - needUnicode = true; - fi.setBytes(false); - } - fi.format(((PyString) arg).getString().codePointAt(0)); - } - - } else { - // Note various types accepted here as long as they have an __int__ method. - PyObject argAsNumber = asNumber(arg); - - // We have to check what we got back. - if (argAsNumber instanceof PyInteger) { - fi.format(((PyInteger) argAsNumber).getValue()); - } else if (argAsNumber instanceof PyLong) { - fi.format(((PyLong) argAsNumber).getValue()); - } else { - // It couldn't be converted, raise the error here - throw Py.TypeError( - "%" + spec.type + " format: a number is required, not " - + arg.getType().fastGetName()); - } - } - - break; - - case 'e': // All floating point formats (+case). - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': - - // Format using this Spec the double form of the argument. - f = ff = new FloatFormatter(buffer, spec); - ff.setBytes(!needUnicode); - - // Note various types accepted here as long as they have a __float__ method. - arg = getarg(); - PyObject argAsFloat = asFloat(arg); - - // We have to check what we got back.. - if (argAsFloat instanceof PyFloat) { - ff.format(((PyFloat) argAsFloat).getValue()); - } else { - // It couldn't be converted, raise the error here - throw Py.TypeError( - "float argument required, not " + arg.getType().fastGetName()); - } - - break; - - case '%': // Percent symbol, but surprisingly, padded. - - // We use an integer formatter. - f = fi = new IntegerFormatter.Traditional(buffer, spec); - fi.setBytes(!needUnicode); - fi.format('%'); - break; - - default: - throw Py.ValueError("unsupported format character '" - + codecs.encode(Py.newUnicode(spec.type), null, "replace") + "' (0x" - + Integer.toHexString(spec.type) + ") at index " + (index - 1)); - } - - // Pad the result as specified (in-place, in the buffer). - f.pad(); - } - - /* - * All fields in the format string have been used to convert arguments (or used the argument - * as a width, etc.). This had better not leave any arguments unused. Note argIndex is an - * index into args or has a special value. If args is a 'proper' index, It should now be out - * of range; if a special value, it would be wrong if it were -1, indicating a single item - * that has not yet been used. - */ - if (argIndex == -1 || (argIndex >= 0 && args.__finditem__(argIndex) != null)) { - throw Py.TypeError("not all arguments converted during string formatting"); - } - - // Return the final buffer contents as a str or unicode as appropriate. - return needUnicode ? new PyUnicode(buffer) : new PyString(buffer); - } - -} diff --git a/src/org/python/core/PyTuple.java b/src/org/python/core/PyTuple.java deleted file mode 100644 index 346e54d5b..000000000 --- a/src/org/python/core/PyTuple.java +++ /dev/null @@ -1,653 +0,0 @@ -// Copyright (c) Corporation for National Research Initiatives -package org.python.core; - -import java.util.Arrays; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.ListIterator; - -import java.lang.reflect.Array; - -import org.python.expose.ExposedMethod; -import org.python.expose.ExposedNew; -import org.python.expose.ExposedType; -import org.python.expose.MethodType; - -/** - * A builtin python tuple. - */ -@ExposedType(name = "tuple", base = PyObject.class, doc = BuiltinDocs.tuple_doc) -public class PyTuple extends PySequenceList { - - public static final PyType TYPE = PyType.fromClass(PyTuple.class); - - private final PyObject[] array; - - private volatile List cachedList = null; - - public PyTuple() { - this(TYPE, Py.EmptyObjects); - } - - public PyTuple(PyObject... elements) { - this(TYPE, elements); - } - - public PyTuple(PyType subtype, PyObject[] elements) { - super(subtype); - if (elements == null) { - array = new PyObject[0]; - } else { - array = new PyObject[elements.length]; - System.arraycopy(elements, 0, array, 0, elements.length); - } - } - - public PyTuple(PyObject[] elements, boolean copy) { - this(TYPE, elements, copy); - } - - public PyTuple(PyType subtype, PyObject[] elements, boolean copy) { - super(subtype); - - if (copy) { - array = new PyObject[elements.length]; - System.arraycopy(elements, 0, array, 0, elements.length); - } else { - array = elements; - } - } - - private static PyTuple fromArrayNoCopy(PyObject[] elements) { - return new PyTuple(elements, false); - } - - List getList() { - if (cachedList == null) { - cachedList = Arrays.asList(array); - } - return cachedList; - } - - @ExposedNew - final static PyObject tuple_new(PyNewWrapper new_, boolean init, PyType subtype, - PyObject[] args, String[] keywords) { - ArgParser ap = new ArgParser("tuple", args, keywords, new String[] {"sequence"}, 0); - PyObject S = ap.getPyObject(0, null); - if (new_.for_type == subtype) { - if (S == null) { - return Py.EmptyTuple; - } - if (S.getType() == PyTuple.TYPE) { - return S; - } - if (S instanceof PyTupleDerived) { - return new PyTuple(((PyTuple) S).getArray()); - } - return fromArrayNoCopy(Py.make_array(S)); - } else { - if (S == null) { - return new PyTupleDerived(subtype, Py.EmptyObjects); - } - return new PyTupleDerived(subtype, Py.make_array(S)); - } - } - - /** - * Return a new PyTuple from an iterable. - * - * Raises a TypeError if the object is not iterable. - * - * @param iterable an iterable PyObject - * @return a PyTuple containing each item in the iterable - */ - public static PyTuple fromIterable(PyObject iterable) { - return fromArrayNoCopy(Py.make_array(iterable)); - } - - protected PyObject getslice(int start, int stop, int step) { - if (step > 0 && stop < start) { - stop = start; - } - int n = sliceLength(start, stop, step); - PyObject[] newArray = new PyObject[n]; - - if (step == 1) { - System.arraycopy(array, start, newArray, 0, stop - start); - return fromArrayNoCopy(newArray); - } - for (int i = start, j = 0; j < n; i += step, j++) { - newArray[j] = array[i]; - } - return fromArrayNoCopy(newArray); - } - - protected PyObject repeat(int count) { - if (count < 0) { - count = 0; - } - int size = size(); - if (size == 0 || count == 1) { - if (getType() == TYPE) { - // Since tuples are immutable, we can return a shared copy in this case - return this; - } - if (size == 0) { - return Py.EmptyTuple; - } - } - - int newSize = size * count; - if (newSize / size != count) { - throw Py.MemoryError(""); - } - - PyObject[] newArray = new PyObject[newSize]; - for (int i = 0; i < count; i++) { - System.arraycopy(array, 0, newArray, i * size, size); - } - return fromArrayNoCopy(newArray); - } - - @Override - public int __len__() { - return tuple___len__(); - } - - @ExposedMethod(doc = BuiltinDocs.tuple___len___doc) - final int tuple___len__() { - return size(); - } - - @ExposedMethod(doc = BuiltinDocs.tuple___contains___doc) - final boolean tuple___contains__(PyObject o) { - return super.__contains__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___ne___doc) - final PyObject tuple___ne__(PyObject o) { - return super.__ne__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___eq___doc) - final PyObject tuple___eq__(PyObject o) { - return super.__eq__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___gt___doc) - final PyObject tuple___gt__(PyObject o) { - return super.__gt__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___ge___doc) - final PyObject tuple___ge__(PyObject o) { - return super.__ge__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___lt___doc) - final PyObject tuple___lt__(PyObject o) { - return super.__lt__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___le___doc) - final PyObject tuple___le__(PyObject o) { - return super.__le__(o); - } - - @Override - public PyObject __add__(PyObject generic_other) { - return tuple___add__(generic_other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___add___doc) - final PyObject tuple___add__(PyObject generic_other) { - PyTuple sum = null; - if (generic_other instanceof PyTuple) { - PyTuple other = (PyTuple) generic_other; - PyObject[] newArray = new PyObject[array.length + other.array.length]; - System.arraycopy(array, 0, newArray, 0, array.length); - System.arraycopy(other.array, 0, newArray, array.length, other.array.length); - sum = fromArrayNoCopy(newArray); - } - return sum; - } - - @Override - public PyObject __mul__(PyObject o) { - return tuple___mul__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___mul___doc) - final PyObject tuple___mul__(PyObject o) { - if (!o.isIndex()) { - return null; - } - return repeat(o.asIndex(Py.OverflowError)); - } - - @Override - public PyObject __rmul__(PyObject o) { - return tuple___rmul__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.tuple___rmul___doc) - final PyObject tuple___rmul__(PyObject o) { - if (!o.isIndex()) { - return null; - } - return repeat(o.asIndex(Py.OverflowError)); - } - - @Override - public PyObject __iter__() { - return tuple___iter__(); - } - - @ExposedMethod(doc = BuiltinDocs.tuple___iter___doc) - public PyObject tuple___iter__() { - return new PyTupleIterator(this); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.tuple___getslice___doc) - final PyObject tuple___getslice__(PyObject s_start, PyObject s_stop, PyObject s_step) { - return seq___getslice__(s_start, s_stop, s_step); - } - - @ExposedMethod(doc = BuiltinDocs.tuple___getitem___doc) - final PyObject tuple___getitem__(PyObject index) { - PyObject ret = seq___finditem__(index); - if (ret == null) { - throw Py.IndexError("index out of range: " + index); - } - return ret; - } - - @ExposedMethod(doc = BuiltinDocs.tuple___getnewargs___doc) - final PyTuple tuple___getnewargs__() { - return new PyTuple(new PyTuple(getArray())); - } - - @Override - public PyTuple __getnewargs__() { - return tuple___getnewargs__(); - } - - @Override - public int hashCode() { - return tuple___hash__(); - } - - @ExposedMethod(doc = BuiltinDocs.tuple___hash___doc) - final int tuple___hash__() { - // strengthened hash to avoid common collisions. from CPython - // tupleobject.tuplehash. See http://bugs.python.org/issue942952 - int y; - int len = size(); - int mult = 1000003; - int x = 0x345678; - while (--len >= 0) { - y = array[len].hashCode(); - x = (x ^ y) * mult; - mult += 82520 + len + len; - } - return x + 97531; - } - - private String subobjRepr(PyObject o) { - if (o == null) { - return "null"; - } - return o.__repr__().toString(); - } - - @Override - public String toString() { - return tuple___repr__(); - } - - @ExposedMethod(doc = BuiltinDocs.tuple___repr___doc) - final String tuple___repr__() { - StringBuilder buf = new StringBuilder("("); - for (int i = 0; i < array.length - 1; i++) { - buf.append(subobjRepr(array[i])); - buf.append(", "); - } - if (array.length > 0) { - buf.append(subobjRepr(array[array.length - 1])); - } - if (array.length == 1) { - buf.append(","); - } - buf.append(")"); - return buf.toString(); - } - - public List subList(int fromIndex, int toIndex) { - if (fromIndex < 0 || toIndex > size()) { - throw new IndexOutOfBoundsException(); - } else if (fromIndex > toIndex) { - throw new IllegalArgumentException(); - } - PyObject elements[] = new PyObject[toIndex - fromIndex]; - for (int i = 0, j = fromIndex; i < elements.length; i++, j++) { - elements[i] = array[j]; - } - return new PyTuple(elements); - } - - public Iterator iterator() { - return new Iterator() { - - private final Iterator iter = getList().iterator(); - - public void remove() { - throw new UnsupportedOperationException(); - } - - public boolean hasNext() { - return iter.hasNext(); - } - - public Object next() { - return iter.next().__tojava__(Object.class); - } - }; - } - - public boolean add(Object o) { - throw new UnsupportedOperationException(); - } - - public boolean remove(Object o) { - throw new UnsupportedOperationException(); - } - - public boolean addAll(Collection coll) { - throw new UnsupportedOperationException(); - } - - public boolean removeAll(Collection coll) { - throw new UnsupportedOperationException(); - } - - public boolean retainAll(Collection coll) { - throw new UnsupportedOperationException(); - } - - public void clear() { - throw new UnsupportedOperationException(); - } - - public Object set(int index, Object element) { - throw new UnsupportedOperationException(); - } - - public void add(int index, Object element) { - throw new UnsupportedOperationException(); - } - - public Object remove(int index) { - throw new UnsupportedOperationException(); - } - - public boolean addAll(int index, Collection c) { - throw new UnsupportedOperationException(); - } - - public ListIterator listIterator() { - return listIterator(0); - } - - public ListIterator listIterator(final int index) { - return new ListIterator() { - - private final ListIterator iter = getList().listIterator(index); - - public boolean hasNext() { - return iter.hasNext(); - } - - public Object next() { - return iter.next().__tojava__(Object.class); - } - - public boolean hasPrevious() { - return iter.hasPrevious(); - } - - public Object previous() { - return iter.previous().__tojava__(Object.class); - } - - public int nextIndex() { - return iter.nextIndex(); - } - - public int previousIndex() { - return iter.previousIndex(); - } - - public void remove() { - throw new UnsupportedOperationException(); - } - - public void set(Object o) { - throw new UnsupportedOperationException(); - } - - public void add(Object o) { - throw new UnsupportedOperationException(); - } - }; - } - - protected String unsupportedopMessage(String op, PyObject o2) { - if (op.equals("+")) { - return "can only concatenate tuple (not \"{2}\") to tuple"; - } - return super.unsupportedopMessage(op, o2); - } - - public void pyset(int index, PyObject value) { - throw Py.TypeError("'tuple' object does not support item assignment"); - } - - @Override - public boolean contains(Object o) { - return getList().contains(Py.java2py(o)); - } - - @Override - public boolean containsAll(Collection c) { - if (c instanceof PyList) { - return getList().containsAll(((PyList)c).getList()); - } else if (c instanceof PyTuple) { - return getList().containsAll(((PyTuple)c).getList()); - } else { - return getList().containsAll(new PyList(c)); - } - } - - public int count(PyObject value) { - return tuple_count(value); - } - - @ExposedMethod(doc = BuiltinDocs.tuple_count_doc) - final int tuple_count(PyObject value) { - int count = 0; - for (PyObject item : array) { - if (item.equals(value)) { - count++; - } - } - return count; - } - - public int index(PyObject value) { - return index(value, 0); - } - - public int index(PyObject value, int start) { - return index(value, start, size()); - } - - public int index(PyObject value, int start, int stop) { - return tuple_index(value, start, stop); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.tuple_index_doc) - final int tuple_index(PyObject value, PyObject start, PyObject stop) { - int startInt = start == null ? 0 : PySlice.calculateSliceIndex(start); - int stopInt = stop == null ? size() : PySlice.calculateSliceIndex(stop); - return tuple_index(value, startInt, stopInt); - } - - final int tuple_index(PyObject value, int start, int stop) { - int validStart = boundToSequence(start); - int validStop = boundToSequence(stop); - for (int i = validStart; i < validStop; i++) { - if (array[i].equals(value)) { - return i; - } - } - throw Py.ValueError("tuple.index(x): x not in list"); - } - - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } - - if (other instanceof PyObject) { - return _eq((PyObject)other).__nonzero__(); - } - if (other instanceof List) { - return other.equals(this); - } - return false; - } - - @Override - public Object get(int index) { - return array[index].__tojava__(Object.class); - } - - @Override - public PyObject[] getArray() { - return array; - } - - @Override - public int indexOf(Object o) { - return getList().indexOf(Py.java2py(o)); - } - - @Override - public boolean isEmpty() { - return array.length == 0; - } - - @Override - public int lastIndexOf(Object o) { - return getList().lastIndexOf(Py.java2py(o)); - } - - @Override - public void pyadd(int index, PyObject element) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean pyadd(PyObject o) { - throw new UnsupportedOperationException(); - } - - @Override - public PyObject pyget(int index) { - return array[index]; - } - - @Override - public void remove(int start, int stop) { - throw new UnsupportedOperationException(); - } - - @Override - public int size() { - return array.length; - } - - @Override - public Object[] toArray() { - Object[] converted = new Object[array.length]; - for (int i = 0; i < array.length; i++) { - converted[i] = array[i].__tojava__(Object.class); - } - return converted; - } - - @Override - public Object[] toArray(Object[] converted) { - Class type = converted.getClass().getComponentType(); - if (converted.length < array.length) { - converted = (Object[])Array.newInstance(type, array.length); - } - for (int i = 0; i < array.length; i++) { - converted[i] = type.cast(array[i].__tojava__(type)); - } - if (array.length < converted.length) { - for (int i = array.length; i < converted.length; i++) { - converted[i] = null; - } - } - return converted; - } - - - /* Traverseproc implementation */ - @Override - public int traverse(Visitproc visit, Object arg) { - int retVal; - for (PyObject ob: array) { - if (ob != null) { - retVal = visit.visit(ob, arg); - if (retVal != 0) { - return retVal; - } - } - } - if (cachedList != null) { - for (PyObject ob: cachedList) { - if (ob != null) { - retVal = visit.visit(ob, arg); - if (retVal != 0) { - return retVal; - } - } - } - } - return 0; - } - - @Override - public boolean refersDirectlyTo(PyObject ob) { - if (ob == null) { - return false; - } - for (PyObject obj: array) { - if (obj == ob) { - return true; - } - } - if (cachedList != null) { - for (PyObject obj: cachedList) { - if (obj == ob) { - return true; - } - } - } - return false; - } -} diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java deleted file mode 100644 index b649e2989..000000000 --- a/src/org/python/core/PyUnicode.java +++ /dev/null @@ -1,2411 +0,0 @@ -package org.python.core; - -import java.io.Serializable; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; - -import org.python.core.stringlib.FieldNameIterator; -import org.python.core.stringlib.MarkupIterator; -import org.python.expose.ExposedMethod; -import org.python.expose.ExposedNew; -import org.python.expose.ExposedType; -import org.python.expose.MethodType; -import org.python.modules._codecs; -import org.python.util.Generic; - -import com.google.common.base.CharMatcher; - -/** - * a builtin python unicode string. - */ -@Untraversable -@ExposedType(name = "unicode", base = PyBaseString.class, doc = BuiltinDocs.unicode_doc) -public class PyUnicode extends PyString implements Iterable { - - /** - * Nearly every significant method comes in two versions: one applicable when the string - * contains only basic plane characters, and one that is correct when supplementary characters - * are also present. Set this constant true to treat all strings as containing - * supplementary characters, so that these versions will be exercised in tests. - */ - private static final boolean DEBUG_NON_BMP_METHODS = false; - - public static final PyType TYPE = PyType.fromClass(PyUnicode.class); - - // for PyJavaClass.init() - public PyUnicode() { - this(TYPE, "", true); - } - - /** - * Construct a PyUnicode interpreting the Java String argument as UTF-16. - * - * @param string UTF-16 string encoding the characters (as Java). - */ - public PyUnicode(String string) { - this(TYPE, string, false); - } - - /** - * Construct a PyUnicode interpreting the Java String argument as UTF-16. If it is known that - * the string contains no supplementary characters, argument isBasic may be set true by the - * caller. If it is false, the PyUnicode will scan the string to find out. - * - * @param string UTF-16 string encoding the characters (as Java). - * @param isBasic true if it is known that only BMP characters are present. - */ - public PyUnicode(String string, boolean isBasic) { - this(TYPE, string, isBasic); - } - - public PyUnicode(PyType subtype, String string) { - this(subtype, string, false); - } - - public PyUnicode(PyString pystring) { - this(TYPE, pystring); - } - - public PyUnicode(PyType subtype, PyString pystring) { - this(subtype, // - pystring instanceof PyUnicode ? pystring.string : pystring.decode().toString(), // - pystring.isBasicPlane()); - } - - public PyUnicode(char c) { - this(TYPE, String.valueOf(c), true); - } - - public PyUnicode(int codepoint) { - this(TYPE, new String(new int[] {codepoint}, 0, 1)); - } - - public PyUnicode(int[] codepoints) { - this(new String(codepoints, 0, codepoints.length)); - } - - PyUnicode(StringBuilder buffer) { - this(TYPE, buffer.toString()); - } - - private static StringBuilder fromCodePoints(Iterator iter) { - StringBuilder buffer = new StringBuilder(); - while (iter.hasNext()) { - buffer.appendCodePoint(iter.next()); - } - return buffer; - } - - public PyUnicode(Iterator iter) { - this(fromCodePoints(iter)); - } - - public PyUnicode(Collection ucs4) { - this(ucs4.iterator()); - } - - /** - * Fundamental all-features constructor on which the others depend. If it is known that the - * string contains no supplementary characters, argument isBasic may be set true by the caller. - * If it is false, the PyUnicode will scan the string to find out. - * - * @param subtype actual type to create. - * @param string UTF-16 string encoding the characters (as Java). - * @param isBasic true if it is known that only BMP characters are present. - */ - private PyUnicode(PyType subtype, String string, boolean isBasic) { - super(subtype, ""); - this.string = string; - translator = isBasic ? BASIC : this.chooseIndexTranslator(); - } - - @Override - public int[] toCodePoints() { - int n = getCodePointCount(); - int[] codePoints = new int[n]; - int i = 0; - for (Iterator iter = newSubsequenceIterator(); iter.hasNext(); i++) { - codePoints[i] = iter.next(); - } - return codePoints; - } - - /** - * {@code PyUnicode} implements the interface {@link BufferProtocol} technically by inheritance from {@link PyString}, - * but does not provide a buffer (in CPython). We therefore arrange that all calls to {@code getBuffer} - * raise an error. - * - * @return always throws a {@code ClassCastException} - */ - @Override - public synchronized PyBuffer getBuffer(int flags) throws ClassCastException { - throw new ClassCastException("'unicode' does not support the buffer protocol"); - } - - // ------------------------------------------------------------------------------------------ - // Index translation for Unicode beyond the BMP - // ------------------------------------------------------------------------------------------ - - /** - * Index translation between code point index (as seen by Python) and UTF-16 index (as used in - * the Java String. - */ - private interface IndexTranslator extends Serializable { - - /** Number of supplementary characters (hence point code length may be found). */ - public int suppCount(); - - /** Translate a UTF-16 code unit index to its equivalent code point index. */ - public int codePointIndex(int utf16Index); - - /** Translate a code point index to its equivalent UTF-16 code unit index. */ - public int utf16Index(int codePointIndex); - } - - /** - * The instance of index translation in use in this string. It will be set to either - * {@link #BASIC} or an instance of {@link PyUnicode.Supplementary}. - */ - private final IndexTranslator translator; - - /** - * A singleton provides the translation service (which is a pass-through) for all BMP strings. - */ - static final IndexTranslator BASIC = new IndexTranslator() { - - @Override - public int suppCount() { - return 0; - } - - @Override - public int codePointIndex(int u) { - return u; - } - - @Override - public int utf16Index(int i) { - return i; - } - }; - - /** - * A class of index translation that uses the cumulative count so far of supplementary - * characters, tabulated in blocks of a standard size. The count is then used as an offset - * between the code point index and the corresponding point in the UTF-16 representation. - */ - private final class Supplementary implements IndexTranslator { - - /** Tabulates cumulative count so far of supplementary characters, by blocks of size M. */ - final int[] count; - - /** Configure the block size M, as this power of 2. */ - static final int LOG2M = 4; - /** The block size used for indexing (power of 2). */ - static final int M = 1 << LOG2M; - /** A mask used to separate the block number and offset in the block. */ - static final int MASK = M - 1; - - /** - * The constructor works on a count array prepared by - * {@link PyUnicode#getSupplementaryCounts(String)}. - */ - Supplementary(int[] count) { - this.count = count; - } - - @Override - public int codePointIndex(int u) { - /* - * Let the desired result be j such that utf16Index(j) = u. As we have only a forward - * index of the string, we have to conduct a search. In principle, we bound j by a pair - * of values (j1,j2) such that j1<=j> LOG2M) + 1; - // The count of supplementary characters before the start of block k2 is: - int c2 = count[k2 - 1]; - /* - * Since the count array is non-decreasing, and j < k2*M, we have u-j <= count[k2-1]. - * That is, j >= k1*M, where: - */ - int k1 = Math.max(0, u - c2) >> LOG2M; - // The count of supplementary characters before the start of block k1 is: - int c1 = (k1 == 0) ? 0 : count[k1 - 1]; - - /* - * Now, j (to be found) is in an unknown block k, where k1<=k u) { - // k*M+c > u therefore j is not in block k but to its left. - k2 = k; - c2 = c; - } else { - // k*M+c <= u therefore j must be in block k, or to its right. - k1 = k; - c1 = c; - } - } - } - - /* - * At this point, j is known to be in block k1 (and k2=k1+1). c1 is the number of - * supplementary characters to the left of code point index k1*M and c2 is the number of - * supplementary characters to the left of code point index (k1+1)*M. We have to search - * this block sequentially. The current position in the UTF-16 is: - */ - int p = (k1 << LOG2M) + c1; - while (p < u) { - if (Character.isHighSurrogate(string.charAt(p++))) { - // c1 tracks the number of supplementary characters to the left of p - c1 += 1; - if (c1 == c2) { - // We have found all supplementary characters in the block. - break; - } - // Skip the trailing surrogate. - p++; - } - } - // c1 is the number of supplementary characters to the left of u, so the result j is: - return u - c1; - } - - @Override - public int utf16Index(int i) { - // The code point index i lies in the k-th block where: - int k = i >> LOG2M; - // The offset for the code point index k*M is exactly - int d = (k == 0) ? 0 : count[k - 1]; - // The offset for the code point index (k+1)*M is exactly - int e = count[k]; - if (d == e) { - /* - * The offset for the code point index (k+1)*M is the same, and since this is a - * non-decreasing function of k, it is also the value for i. - */ - return i + d; - } else { - /* - * The offset for the code point index (k+1)*M is different (higher). We must scan - * along until we have found all the supplementary characters that precede i, - * starting the scan at code point index k*M. - */ - for (int q = i & ~MASK; q < i; q++) { - if (Character.isHighSurrogate(string.charAt(q + d))) { - d += 1; - if (d == e) { - /* - * We have found all the supplementary characters in this block, so we - * must have found all those to the left of i. - */ - break; - } - } - } - - // d counts all the supplementary characters to the left of i. - return i + d; - } - } - - @Override - public int suppCount() { - // The last element of the count array is the total number of supplementary characters. - return count[count.length - 1]; - } - } - - /** - * Generate the table that is used by the class {@link Supplementary} to accelerate access to - * the the implementation string. The method returns null if the string passed - * contains no surrogate pairs, in which case we'll use {@link #BASIC} as the translator. This - * method is sensitive to {@link #DEBUG_NON_BMP_METHODS} which if true will prevent it returning - * null, hance we will always use a {@link Supplementary} {@link #translator}. - * - * @param string to index - * @return the index (counts) or null if basic plane - */ - private static int[] getSupplementaryCounts(final String string) { - - final int n = string.length(); - int p; // Index of the current UTF-16 code unit. - - /* - * We scan to the first surrogate code unit, in a simple loop. If we hit the end before we - * find one, no count array will be necessary and we'll use BASIC. If we find a surrogate it - * may be half a supplementary character, or a lone surrogate: we'll find out later. - */ - for (p = 0; p < n; p++) { - if (Character.isSurrogate(string.charAt(p))) { - break; - } - } - - if (p == n && !DEBUG_NON_BMP_METHODS) { - // There are no supplementary characters so the 1:1 translator is fine. - return null; - - } else { - /* - * We have to do this properly, using a scheme in which code point indexes are - * efficiently translatable to UTF-16 indexes through a table called here count[]. In - * this array, count[k] contains the total number of supplementary characters up to the - * end of the k.th block, that is, to the left of code point (k+1)M. We have to fill - * this array by scanning the string. - */ - int q = p; // The current code point index (q = p+s). - int k = q >> Supplementary.LOG2M; // The block number k = q/M. - - /* - * When addressing with a code point index q<=L (the length in code points) we will - * index the count array with k = q/M. We have q<=L<=n, therefore q/M <= n/M, the - * maximum valid k is 1 + n/M. A q>=L should raise IndexOutOfBoundsException, but it - * doesn't matter whether that's from indexing this array, or the string later. - */ - int[] count = new int[1 + (n >> Supplementary.LOG2M)]; - - /* - * To get the generation of count[] going efficiently, we need to advance the next whole - * block. The next loop will complete processing of the block containing the first - * supplementary character. Note that in all these loops, if we exit because p reaches a - * limit, the count for the last partial block is known from p-q and we take care of - * that right at the end of this method. The limit of these loops is n-1, so if we spot - * a lead surrogate, the we may access the low-surrogate confident that p+1p we find a lead surrogate without a trailing one - * following, or a trailing surrogate directly. It should not be called on the final code unit, - * when p==string.length()-1, since it may check the next code unit as well. - * - * @param string of UTF-16 code units - * @param p index into that string - * @return 2 if a surrogate pair stands at p, 1 if not - * @throws PyException {@code ValueError} if a lone surrogate stands at p. - */ - private static int calcAdvance(String string, int p) throws PyException { - - // Catch supplementary characters and lone surrogate code units. - char c = string.charAt(p); - - if (c >= Character.MIN_SURROGATE) { - if (c < Character.MIN_LOW_SURROGATE) { - // This is a lead surrogate. - if (Character.isLowSurrogate(string.charAt(p + 1))) { - // Required trailing surrogate follows, so step over both. - return 2; - } else { - // Required trailing surrogate missing. - throw unpairedSurrogate(p, c); - } - - } else if (c <= Character.MAX_SURROGATE) { - // This is a lone trailing surrogate - throw unpairedSurrogate(p, c); - - } // else this is a private use or special character in 0xE000 to 0xFFFF. - - } - return 1; - } - - /** - * Return a ready-to-throw exception indicating an unpaired surrogate. - * - * @param p index within that sequence of the problematic code unit - * @param c the code unit - * @return an exception - */ - private static PyException unpairedSurrogate(int p, int c) { - String fmt = "unpaired surrogate %#4x at code unit %d"; - String msg = String.format(fmt, c, p); - return Py.ValueError(msg); - } - - /** - * Choose an {@link IndexTranslator} implementation for efficient working, according to the - * contents of the {@link PyString#string}. - * - * @return chosen IndexTranslator - */ - private IndexTranslator chooseIndexTranslator() { - int[] count = getSupplementaryCounts(string); - if (DEBUG_NON_BMP_METHODS) { - return new Supplementary(count); - } else { - return count == null ? BASIC : new Supplementary(count); - } - } - - /** - * {@inheritDoc} - *

- * In the PyUnicode version, the arguments are code point indices, such as are - * received from the Python caller, while the first two elements of the returned array have been - * translated to UTF-16 indices in the implementation string. - */ - @Override - protected int[] translateIndices(PyObject start, PyObject end) { - int[] indices = super.translateIndices(start, end); - indices[0] = translator.utf16Index(indices[0]); - indices[1] = translator.utf16Index(indices[1]); - // indices[2] and [3] remain Unicode indices (and may be out of bounds) relative to len() - return indices; - } - - // ------------------------------------------------------------------------------------------ - - /** - * {@inheritDoc} The indices are code point indices, not UTF-16 (char) indices. For - * example: - * - *

-     * PyUnicode u = new PyUnicode("..\ud800\udc02\ud800\udc03...");
-     * // (Python) u = u'..\U00010002\U00010003...'
-     *
-     * String s = u.substring(2, 4);  // = "\ud800\udc02\ud800\udc03" (Java)
-     * 
- */ - @Override - public String substring(int start, int end) { - return super.substring(translator.utf16Index(start), translator.utf16Index(end)); - } - - /** - * Creates a PyUnicode from an already interned String. Just means it won't be reinterned if - * used in a place that requires interned Strings. - */ - public static PyUnicode fromInterned(String interned) { - PyUnicode uni = new PyUnicode(TYPE, interned); - uni.interned = true; - return uni; - } - - /** - * {@inheritDoc} - * - * @return true if the string consists only of BMP characters - */ - @Override - public boolean isBasicPlane() { - return translator == BASIC; - } - - public int getCodePointCount() { - return string.length() - translator.suppCount(); - } - - public static String checkEncoding(String s) { - if (s == null || CharMatcher.ascii().matchesAllOf(s)) { - return s; - } - return codecs.PyUnicode_EncodeASCII(s, s.length(), null); - } - - @ExposedNew - final static PyObject unicode_new(PyNewWrapper new_, boolean init, PyType subtype, - PyObject[] args, String[] keywords) { - ArgParser ap = new ArgParser("unicode", args, keywords, - new String[] {"string", "encoding", "errors"}, 0); - PyObject S = ap.getPyObject(0, null); - String encoding = checkEncoding(ap.getString(1, null)); - String errors = checkEncoding(ap.getString(2, null)); - if (new_.for_type == subtype) { - if (S == null) { - return new PyUnicode(""); - } - if (S instanceof PyUnicode) { - return new PyUnicode(((PyUnicode) S).getString()); - } - if (S instanceof PyString) { - if (S.getType() != PyString.TYPE && encoding == null && errors == null) { - return S.__unicode__(); - } - PyObject decoded = codecs.decode((PyString) S, encoding, errors); - if (decoded instanceof PyUnicode) { - return new PyUnicode((PyUnicode) decoded); - } else { - throw Py.TypeError("decoder did not return an unicode object (type=" - + decoded.getType().fastGetName() + ")"); - } - } - return S.__unicode__(); - } else { - if (S == null) { - return new PyUnicodeDerived(subtype, Py.EmptyString); - } - if (S instanceof PyUnicode) { - return new PyUnicodeDerived(subtype, (PyUnicode) S); - } else { - return new PyUnicodeDerived(subtype, S.__str__()); - } - } - } - - @Override - public PyString createInstance(String str) { - return new PyUnicode(str); - } - - /** - * @param string UTF-16 string encoding the characters (as Java). - * @param isBasic true if it is known that only BMP characters are present. - */ - @Override - protected PyString createInstance(String string, boolean isBasic) { - return new PyUnicode(string, isBasic); - } - - @Override - public PyObject __mod__(PyObject other) { - return unicode___mod__(other); - } - - @ExposedMethod(doc = BuiltinDocs.unicode___mod___doc) - final PyObject unicode___mod__(PyObject other) { - StringFormatter fmt = new StringFormatter(getString(), true); - return fmt.format(other); - } - - @Override - public PyUnicode __unicode__() { - return this; - } - - @Override - public PyString __str__() { - return unicode___str__(); - } - - @ExposedMethod(doc = BuiltinDocs.unicode___str___doc) - final PyString unicode___str__() { - return new PyString(encode()); - } - - @Override - public int __len__() { - return unicode___len__(); - } - - @ExposedMethod(doc = BuiltinDocs.unicode___len___doc) - final int unicode___len__() { - return getCodePointCount(); - } - - @Override - public PyString __repr__() { - return unicode___repr__(); - } - - @ExposedMethod(doc = BuiltinDocs.unicode___repr___doc) - final PyString unicode___repr__() { - return new PyString("u" + encode_UnicodeEscape(getString(), true)); - } - - @ExposedMethod(doc = BuiltinDocs.unicode___getitem___doc) - final PyObject unicode___getitem__(PyObject index) { - return str___getitem__(index); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode___getslice___doc) - final PyObject unicode___getslice__(PyObject start, PyObject stop, PyObject step) { - return seq___getslice__(start, stop, step); - } - - @Override - protected PyObject getslice(int start, int stop, int step) { - if (isBasicPlane()) { - return super.getslice(start, stop, step); - } - if (step > 0 && stop < start) { - stop = start; - } - - StringBuilder buffer = new StringBuilder(sliceLength(start, stop, step)); - for (Iterator iter = newSubsequenceIterator(start, stop, step); iter.hasNext();) { - buffer.appendCodePoint(iter.next()); - } - return createInstance(buffer.toString()); - } - - @ExposedMethod(type = MethodType.CMP) - final int unicode___cmp__(PyObject other) { - // XXX needs proper coercion like __eq__, then UCS-32 code point order :( - return str___cmp__(other); - } - - @Override - public PyObject __eq__(PyObject other) { - return unicode___eq__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___eq___doc) - final PyObject unicode___eq__(PyObject other) { - try { - String s = coerceForComparison(other); - if (s == null) { - return null; - } - return getString().equals(s) ? Py.True : Py.False; - } catch (PyException e) { - // Decoding failed: treat as unequal - return Py.False; - } - } - - @Override - public PyObject __ne__(PyObject other) { - return unicode___ne__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___ne___doc) - final PyObject unicode___ne__(PyObject other) { - try { - String s = coerceForComparison(other); - if (s == null) { - return null; - } - return getString().equals(s) ? Py.False : Py.True; - } catch (PyException e) { - // Decoding failed: treat as unequal - return Py.True; - } - } - - @Override - public PyObject __lt__(PyObject other) { - return unicode___lt__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___lt___doc) - final PyObject unicode___lt__(PyObject other) { - String s = coerceForComparison(other); - if (s == null) { - return null; - } - return getString().compareTo(s) < 0 ? Py.True : Py.False; - } - - @Override - public PyObject __le__(PyObject other) { - return unicode___le__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___le___doc) - final PyObject unicode___le__(PyObject other) { - String s = coerceForComparison(other); - if (s == null) { - return null; - } - return getString().compareTo(s) <= 0 ? Py.True : Py.False; - } - - @Override - public PyObject __gt__(PyObject other) { - return unicode___gt__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___gt___doc) - final PyObject unicode___gt__(PyObject other) { - String s = coerceForComparison(other); - if (s == null) { - return null; - } - return getString().compareTo(s) > 0 ? Py.True : Py.False; - } - - @Override - public PyObject __ge__(PyObject other) { - return unicode___ge__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___ge___doc) - final PyObject unicode___ge__(PyObject other) { - String s = coerceForComparison(other); - if (s == null) { - return null; - } - return getString().compareTo(s) >= 0 ? Py.True : Py.False; - } - - @ExposedMethod(doc = BuiltinDocs.unicode___hash___doc) - final int unicode___hash__() { - return str___hash__(); - } - - @Override - protected PyObject pyget(int i) { - int codepoint = getString().codePointAt(translator.utf16Index(i)); - return Py.makeCharacter(codepoint, true); - } - - @Override - public int getInt(int i) { - return getString().codePointAt(translator.utf16Index(i)); - } - - /** - * An iterator returning code points from this array, for use when not basic plane. - */ - private class SubsequenceIteratorImpl extends SubsequenceIteratorBasic { - - private int k; // UTF-16 index (of current) - - SubsequenceIteratorImpl(int start, int stop, int step) { - super(start, stop, step); - k = translator.utf16Index(current); - } - - SubsequenceIteratorImpl() { - this(0, getCodePointCount(), 1); - } - - @Override - protected int nextCodePoint() { - int U; - int W1 = getString().charAt(k); - if (W1 >= 0xD800 && W1 < 0xDC00) { - int W2 = getString().charAt(k + 1); - U = (((W1 & 0x3FF) << 10) | (W2 & 0x3FF)) + 0x10000; - k += 2; - } else { - U = W1; - k += 1; - } - current += 1; - return U; - } - } - - /** - * An iterator returning code points from this array, for use when basic plane. - */ - private class SubsequenceIteratorBasic implements Iterator { - - protected int current, stop, step; // Character indexes - - SubsequenceIteratorBasic(int start, int stop, int step) { - current = start; - this.stop = stop; - this.step = step; - } - - SubsequenceIteratorBasic() { - this(0, getCodePointCount(), 1); - } - - @Override - public boolean hasNext() { - return current < stop; - } - - @Override - public Integer next() { - int codePoint = nextCodePoint(); - for (int j = 1; j < step && hasNext(); j++) { - nextCodePoint(); - } - return codePoint; - } - - protected int nextCodePoint() { - return getString().charAt(current++); - } - - @Override - public void remove() { - throw new UnsupportedOperationException( - "Not supported on PyUnicode objects (immutable)"); - } - } - - private static class SteppedIterator implements Iterator { - - private final Iterator iter; - private final int step; - private T lookahead = null; - - public SteppedIterator(int step, Iterator iter) { - this.iter = iter; - this.step = step; - lookahead = advance(); - } - - private T advance() { - if (iter.hasNext()) { - T elem = iter.next(); - for (int i = 1; i < step && iter.hasNext(); i++) { - iter.next(); - } - return elem; - } else { - return null; - } - } - - @Override - public boolean hasNext() { - return lookahead != null; - } - - @Override - public T next() { - T old = lookahead; - if (iter.hasNext()) { - lookahead = iter.next(); - for (int i = 1; i < step && iter.hasNext(); i++) { - iter.next(); - } - } else { - lookahead = null; - } - return old; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - // XXX: Parameterize SubsequenceIteratorImpl and friends (and make them Iterable) - /** Get an iterator over the code point sequence. */ - public Iterator newSubsequenceIterator() { - if (isBasicPlane()) { - return new SubsequenceIteratorBasic(); - } else { - return new SubsequenceIteratorImpl(); - } - } - - /** Get an iterator over a slice of the code point sequence. */ - public Iterator newSubsequenceIterator(int start, int stop, int step) { - if (isBasicPlane()) { - if (step < 0) { - return new SteppedIterator(step * -1, new ReversedIterator( - new SubsequenceIteratorBasic(stop + 1, start + 1, 1))); - } else { - return new SubsequenceIteratorBasic(start, stop, step); - } - } else { - if (step < 0) { - return new SteppedIterator(step * -1, new ReversedIterator( - new SubsequenceIteratorImpl(stop + 1, start + 1, 1))); - } else { - return new SubsequenceIteratorImpl(start, stop, step); - } - } - } - - /** - * Interpret the object as a Java String representing characters as UTF-16, or - * return null if the type does not admit this conversion. From a - * PyUnicode we return its internal string. A byte argument is decoded with the - * default encoding. - * - * @param o the object to coerce - * @return an equivalent String - */ - private static String coerceToStringOrNull(PyObject o) { - if (o instanceof PyUnicode) { - return ((PyUnicode) o).getString(); - } else if (o instanceof PyString) { - return ((PyString) o).decode().toString(); - } else if (o instanceof BufferProtocol) { - // PyByteArray, PyMemoryView, Py2kBuffer ... - // We ought to be able to call codecs.decode on o but see Issue #2164 - try (PyBuffer buf = ((BufferProtocol) o).getBuffer(PyBUF.FULL_RO)) { - PyString s = new PyString(buf); - // For any sensible codec, the return is unicode and toString() is getString(). - return s.decode().toString(); - } - } else { - // o is some type not allowed: - return null; - } - } - - /** - * Interpret the object as a Java String for use in comparison. The return - * represents characters as UTF-16. From a PyUnicode we return its internal string. - * A str and buffer argument is decoded with the default encoding. - *

- * This method could be replaced by {@link #coerceToStringOrNull(PyObject)} if we were content - * to allowing a wider range of types to be supported in comparison operations than (C)Python - * unicode.__eq__. - * - * @param o the object to coerce - * @return an equivalent String - */ - private static String coerceForComparison(PyObject o) { - if (o instanceof PyUnicode) { - return ((PyUnicode) o).getString(); - } else if (o instanceof PyString) { - return ((PyString) o).decode().toString(); - } else if (o instanceof Py2kBuffer) { - // We ought to be able to call codecs.decode on o but see Issue #2164 - try (PyBuffer buf = ((BufferProtocol) o).getBuffer(PyBUF.FULL_RO)) { - PyString s = new PyString(buf); - // For any sensible codec, the return is unicode and toString() is getString(). - return s.decode().toString(); - } - } else { - // o is some type not allowed: - return null; - } - } - - /** - * Interpret the object as a Java String representing characters as UTF-16, or - * raise an error if the type does not admit this conversion. A byte argument is decoded with - * the default encoding. - * - * @param o the object to coerce - * @return an equivalent String (and never null) - */ - private static String coerceToString(PyObject o) { - String s = coerceToStringOrNull(o); - if (s == null) { - throw errorCoercingToUnicode(o); - } - return s; - } - - /** - * Interpret the object as a Java String representing characters as UTF-16, or - * optionally as null (for a null or None argument if the - * second argument is true). Raise an error if the type does not admit this - * conversion. - * - * @param o the object to coerce - * @param allowNullArgument iff true allow a null or none argument - * @return an equivalent String or null - */ - private static String coerceToString(PyObject o, boolean allowNullArgument) { - if (allowNullArgument && (o == null || o == Py.None)) { - return null; - } else { - return coerceToString(o); - } - } - - /** Construct exception "coercing to Unicode: ..." */ - private static PyException errorCoercingToUnicode(PyObject o) { - return Py.TypeError("coercing to Unicode: need string or buffer, " - + (o == null ? Py.None : o).getType().fastGetName() + " found"); - } - - /** - * Interpret the object as a PyUnicode, or return null if the type - * does not admit this conversion. From a PyUnicode we return itself. A byte - * argument is decoded with the default encoding. - * - * @param o the object to coerce - * @return an equivalent PyUnicode (or o itself) - */ - private static PyUnicode coerceToUnicodeOrNull(PyObject o) { - if (o instanceof PyUnicode) { - return (PyUnicode) o; - } else if (o instanceof PyString) { - // For any sensible codec, the return here is unicode. - PyObject u = ((PyString) o).decode(); - return (u instanceof PyUnicode) ? (PyUnicode) u : new PyUnicode(o.toString()); - } else if (o instanceof BufferProtocol) { - // PyByteArray, PyMemoryView, Py2kBuffer ... - // We ought to be able to call codecs.decode on o but see Issue #2164 - try (PyBuffer buf = ((BufferProtocol) o).getBuffer(PyBUF.FULL_RO)) { - PyString s = new PyString(buf); - // For any sensible codec, the return is unicode and toString() is getString(). - PyObject u = s.decode(); - return (u instanceof PyUnicode) ? (PyUnicode) u : new PyUnicode(o.toString()); - } - } else { - // o is some type not allowed: - return null; - } - } - - /** - * Interpret the object as a PyUnicode, or raise a TypeError if the - * type does not admit this conversion. From a PyUnicode we return itself. A byte - * argument is decoded with the default encoding. - * - * @param o the object to coerce - * @return an equivalent PyUnicode (or o itself) - */ - private static PyUnicode coerceToUnicode(PyObject o) { - PyUnicode u = coerceToUnicodeOrNull(o); - if (u == null) { - throw errorCoercingToUnicode(o); - } - return u; - } - - @Override - public boolean __contains__(PyObject o) { - return unicode___contains__(o); - } - - @ExposedMethod(doc = BuiltinDocs.unicode___contains___doc) - final boolean unicode___contains__(PyObject o) { - String other = coerceToString(o); - return getString().indexOf(other) >= 0; - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___getslice___doc) - final PyObject unicode___mul__(PyObject o) { - return str___mul__(o); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___getslice___doc) - final PyObject unicode___rmul__(PyObject o) { - return str___rmul__(o); - } - - @Override - public PyObject __add__(PyObject other) { - return unicode___add__(other); - } - - @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___getslice___doc) - final PyObject unicode___add__(PyObject other) { - // Interpret other as a Java String - String s = coerceToStringOrNull(other); - return s == null ? null : new PyUnicode(getString().concat(s)); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_lower_doc) - final PyObject unicode_lower() { - return new PyUnicode(getString().toLowerCase()); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_upper_doc) - final PyObject unicode_upper() { - return new PyUnicode(getString().toUpperCase()); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_title_doc) - final PyObject unicode_title() { - StringBuilder buffer = new StringBuilder(getString().length()); - boolean previous_is_cased = false; - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - int codePoint = iter.next(); - if (previous_is_cased) { - buffer.appendCodePoint(Character.toLowerCase(codePoint)); - } else { - buffer.appendCodePoint(Character.toTitleCase(codePoint)); - } - - if (Character.isLowerCase(codePoint) || Character.isUpperCase(codePoint) - || Character.isTitleCase(codePoint)) { - previous_is_cased = true; - } else { - previous_is_cased = false; - } - } - return new PyUnicode(buffer); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_swapcase_doc) - final PyObject unicode_swapcase() { - StringBuilder buffer = new StringBuilder(getString().length()); - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - int codePoint = iter.next(); - if (Character.isUpperCase(codePoint)) { - buffer.appendCodePoint(Character.toLowerCase(codePoint)); - } else if (Character.isLowerCase(codePoint)) { - buffer.appendCodePoint(Character.toUpperCase(codePoint)); - } else { - buffer.appendCodePoint(codePoint); - } - } - return new PyUnicode(buffer); - } - - /** Define what characters are to be treated as a space according to Python 2. */ - private static boolean isPythonSpace(int ch) { - // Use the Java built-in methods as far as possible - return Character.isWhitespace(ch) // catches the ASCII spaces and some others - || Character.isSpaceChar(ch) // catches remaining Unicode spaces - || ch == 0x0085 // NEXT LINE (not a space in Java) - || ch == 0x180e; // MONGOLIAN VOWEL SEPARATOR (not a space in Java 9+ or Python 3) - } - - private static class StripIterator implements Iterator { - - private final Iterator iter; - private int lookahead = -1; - - public StripIterator(PyUnicode sep, Iterator iter) { - this.iter = iter; - if (sep != null) { - Set sepSet = Generic.set(); - for (Iterator sepIter = sep.newSubsequenceIterator(); sepIter.hasNext();) { - sepSet.add(sepIter.next()); - } - while (iter.hasNext()) { - int codePoint = iter.next(); - if (!sepSet.contains(codePoint)) { - lookahead = codePoint; - return; - } - } - } else { - while (iter.hasNext()) { - int codePoint = iter.next(); - if (!isPythonSpace(codePoint)) { - lookahead = codePoint; - return; - } - } - } - } - - @Override - public boolean hasNext() { - return lookahead != -1; - } - - @Override - public Integer next() { - int old = lookahead; - if (iter.hasNext()) { - lookahead = iter.next(); - } else { - lookahead = -1; - } - return old; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - // Compliance requires a bit of inconsistency with other coercions used. - /** - * Helper used in .strip() to "coerce" a method argument into a - * PyUnicode (which it may already be). A null argument or a - * PyNone causes null to be returned. A buffer type is not acceptable - * to (Unicode) .strip(). This is the difference from - * {@link #coerceToUnicode(PyObject, boolean)}. - * - * @param o the object to coerce - * @param name of method - * @return an equivalent PyUnicode (or o itself, or null) - */ - private static PyUnicode coerceStripSepToUnicode(PyObject o, String name) { - if (o == null) { - return null; - } else if (o instanceof PyUnicode) { - return (PyUnicode) o; - } else if (o instanceof PyString) { - PyObject u = ((PyString) o).decode(); - return (u instanceof PyUnicode) ? (PyUnicode) u : new PyUnicode(u.toString()); - } else if (o == Py.None) { - return null; - } else { - throw Py.TypeError(name + " arg must be None, unicode or str"); - } - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_strip_doc) - final PyObject unicode_strip(PyObject sepObj) { - - PyUnicode sep = coerceStripSepToUnicode(sepObj, "strip"); - - if (isBasicPlane()) { - // this contains only basic plane characters - if (sep == null) { - // And we're stripping whitespace, so use the PyString implementation - return new PyUnicode(_strip()); - } else if (sep.isBasicPlane()) { - // And the strip characters are basic plane too, so use the PyString implementation - return new PyUnicode(_strip(sep.getString())); - } - } - - // Not basic plane: have to do real Unicode - return new PyUnicode(new ReversedIterator(new StripIterator(sep, - new ReversedIterator<>(new StripIterator(sep, newSubsequenceIterator()))))); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_lstrip_doc) - final PyObject unicode_lstrip(PyObject sepObj) { - - PyUnicode sep = coerceStripSepToUnicode(sepObj, "lstrip"); - - if (isBasicPlane()) { - // this contains only basic plane characters - if (sep == null) { - // And we're stripping whitespace, so use the PyString implementation - return new PyUnicode(_lstrip()); - } else if (sep.isBasicPlane()) { - // And the strip characters are basic plane too, so use the PyString implementation - return new PyUnicode(_lstrip(sep.getString())); - } - } - - // Not basic plane: have to do real Unicode - return new PyUnicode(new StripIterator(sep, newSubsequenceIterator())); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_rstrip_doc) - final PyObject unicode_rstrip(PyObject sepObj) { - - PyUnicode sep = coerceStripSepToUnicode(sepObj, "rstrip"); - - if (isBasicPlane()) { - // this contains only basic plane characters - if (sep == null) { - // And we're stripping whitespace, so use the PyString implementation - return new PyUnicode(_rstrip()); - } else if (sep.isBasicPlane()) { - // And the strip characters are basic plane too, so use the PyString implementation - return new PyUnicode(_rstrip(sep.getString())); - } - } - - // Not basic plane: have to do real Unicode - return new PyUnicode(new ReversedIterator( - new StripIterator(sep, new ReversedIterator<>(newSubsequenceIterator())))); - } - - /** {@inheritDoc} */ - @Override - protected int _findLeft(int right) { - String s = getString(); - for (int left = 0; left < right; left++) { - if (!isPythonSpace(s.charAt(left))) { - return left; - } - } - return right; - } - - /** {@inheritDoc} */ - @Override - protected int _findRight() { - String s = getString(); - for (int right = s.length(); --right >= 0;) { - if (!isPythonSpace(s.charAt(right))) { - return right; - } - } - return -1; - } - - @Override - public PyTuple partition(PyObject sep) { - return unicode_partition(sep); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_partition_doc) - final PyTuple unicode_partition(PyObject sep) { - return unicodePartition(coerceToUnicode(sep)); - } - - private abstract class SplitIterator implements Iterator { - - protected final int maxsplit; - protected final Iterator iter = newSubsequenceIterator(); - protected final LinkedList lookahead = new LinkedList(); - protected int numSplits = 0; - protected boolean completeSeparator = false; - - SplitIterator(int maxsplit) { - this.maxsplit = maxsplit; - } - - @Override - public boolean hasNext() { - return lookahead.peek() != null - || (iter.hasNext() && (maxsplit == -1 || numSplits <= maxsplit)); - } - - protected void addLookahead(StringBuilder buffer) { - for (int codepoint : lookahead) { - buffer.appendCodePoint(codepoint); - } - lookahead.clear(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - public boolean getEndsWithSeparator() { - return completeSeparator && !hasNext(); - } - } - - private class WhitespaceSplitIterator extends SplitIterator { - - WhitespaceSplitIterator(int maxsplit) { - super(maxsplit); - } - - @Override - public PyUnicode next() { - StringBuilder buffer = new StringBuilder(); - - addLookahead(buffer); - if (numSplits == maxsplit) { - while (iter.hasNext()) { - buffer.appendCodePoint(iter.next()); - } - return new PyUnicode(buffer); - } - - boolean inSeparator = false; - boolean atBeginning = numSplits == 0; - - while (iter.hasNext()) { - int codepoint = iter.next(); - if (isPythonSpace(codepoint)) { - completeSeparator = true; - if (!atBeginning) { - inSeparator = true; - } - } else if (!inSeparator) { - completeSeparator = false; - buffer.appendCodePoint(codepoint); - } else { - completeSeparator = false; - lookahead.add(codepoint); - break; - } - atBeginning = false; - } - numSplits++; - return new PyUnicode(buffer); - } - } - - private static class PeekIterator implements Iterator { - - private T lookahead = null; - private final Iterator iter; - - public PeekIterator(Iterator iter) { - this.iter = iter; - next(); - } - - public T peek() { - return lookahead; - } - - @Override - public boolean hasNext() { - return lookahead != null; - } - - @Override - public T next() { - T peeked = lookahead; - lookahead = iter.hasNext() ? iter.next() : null; - return peeked; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - private static class ReversedIterator implements Iterator { - - private final List reversed = Generic.list(); - private final Iterator iter; - - ReversedIterator(Iterator iter) { - while (iter.hasNext()) { - reversed.add(iter.next()); - } - Collections.reverse(reversed); - this.iter = reversed.iterator(); - } - - @Override - public boolean hasNext() { - return iter.hasNext(); - } - - @Override - public T next() { - return iter.next(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - private class LineSplitIterator implements Iterator { - - private final PeekIterator iter = new PeekIterator<>(newSubsequenceIterator()); - private final boolean keepends; - - LineSplitIterator(boolean keepends) { - this.keepends = keepends; - } - - @Override - public boolean hasNext() { - return iter.hasNext(); - } - - @Override - public PyObject next() { - StringBuilder buffer = new StringBuilder(); - while (iter.hasNext()) { - int codepoint = iter.next(); - if (codepoint == '\r' && iter.peek() != null && iter.peek() == '\n') { - if (keepends) { - buffer.appendCodePoint(codepoint); - buffer.appendCodePoint(iter.next()); - } else { - iter.next(); - } - break; - } else if (codepoint == '\n' || codepoint == '\r' - || Character.getType(codepoint) == Character.LINE_SEPARATOR) { - if (keepends) { - buffer.appendCodePoint(codepoint); - } - break; - } else { - buffer.appendCodePoint(codepoint); - } - } - return new PyUnicode(buffer); - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - private class SepSplitIterator extends SplitIterator { - - private final PyUnicode sep; - - SepSplitIterator(PyUnicode sep, int maxsplit) { - super(maxsplit); - this.sep = sep; - } - - @Override - public PyUnicode next() { - StringBuilder buffer = new StringBuilder(); - - addLookahead(buffer); - if (numSplits == maxsplit) { - while (iter.hasNext()) { - buffer.appendCodePoint(iter.next()); - } - return new PyUnicode(buffer); - } - - boolean inSeparator = true; - while (iter.hasNext()) { - // TODO: should cache the first codepoint - inSeparator = true; - for (Iterator sepIter = sep.newSubsequenceIterator(); sepIter.hasNext();) { - int codepoint = iter.next(); - if (codepoint != sepIter.next()) { - addLookahead(buffer); - buffer.appendCodePoint(codepoint); - inSeparator = false; - break; - } else { - lookahead.add(codepoint); - } - } - - if (inSeparator) { - lookahead.clear(); - break; - } - } - - numSplits++; - completeSeparator = inSeparator; - return new PyUnicode(buffer); - } - } - - private SplitIterator newSplitIterator(PyUnicode sep, int maxsplit) { - if (sep == null) { - return new WhitespaceSplitIterator(maxsplit); - } else if (sep.getCodePointCount() == 0) { - throw Py.ValueError("empty separator"); - } else { - return new SepSplitIterator(sep, maxsplit); - } - } - - @Override - public PyTuple rpartition(PyObject sep) { - return unicode_rpartition(sep); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_rpartition_doc) - final PyTuple unicode_rpartition(PyObject sep) { - return unicodeRpartition(coerceToUnicode(sep)); - } - - @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_split_doc) - final PyList unicode_split(PyObject sepObj, int maxsplit) { - String sep = coerceToString(sepObj, true); - if (sep != null) { - return _split(sep, maxsplit); - } else { - return _split(null, maxsplit); - } - } - - /** - * {@inheritDoc} The split sections will be {@link PyUnicode} and use the Python - * unicode definition of "space". - */ - @Override - protected PyList splitfields(int maxsplit) { - /* - * Result built here is a list of split parts, exactly as required for s.split(None, - * maxsplit). If there are to be n splits, there will be n+1 elements in L. - */ - PyList list = new PyList(); - - String s = getString(); - int length = s.length(), start = 0, splits = 0, index; - - if (maxsplit < 0) { - // Make all possible splits: there can't be more than: - maxsplit = length; - } - - // start is always the first character not consumed into a piece on the list - while (start < length) { - - // Find the next occurrence of non-whitespace - while (start < length) { - if (!isPythonSpace(s.charAt(start))) { - // Break leaving start pointing at non-whitespace - break; - } - start++; - } - - if (start >= length) { - // Only found whitespace so there is no next segment - break; - - } else if (splits >= maxsplit) { - // The next segment is the last and contains all characters up to the end - index = length; - - } else { - // The next segment runs up to the next next whitespace or end - for (index = start; index < length; index++) { - if (isPythonSpace(s.charAt(index))) { - // Break leaving index pointing at whitespace - break; - } - } - } - - // Make a piece from start up to index - list.append(fromSubstring(start, index)); - splits++; - - // Start next segment search at that point - start = index; - } - - return list; - } - - @ExposedMethod(defaults = {"null", "-1"}, doc = BuiltinDocs.unicode_rsplit_doc) - final PyList unicode_rsplit(PyObject sepObj, int maxsplit) { - String sep = coerceToString(sepObj, true); - if (sep != null) { - return _rsplit(sep, maxsplit); - } else { - return _rsplit(null, maxsplit); - } - } - - /** - * {@inheritDoc} The split sections will be {@link PyUnicode} and use the Python - * unicode definition of "space". - */ - @Override - protected PyList rsplitfields(int maxsplit) { - /* - * Result built here (in reverse) is a list of split parts, exactly as required for - * s.rsplit(None, maxsplit). If there are to be n splits, there will be n+1 elements. - */ - PyList list = new PyList(); - - String s = getString(); - int length = s.length(), end = length - 1, splits = 0, index; - - if (maxsplit < 0) { - // Make all possible splits: there can't be more than: - maxsplit = length; - } - - // end is always the rightmost character not consumed into a piece on the list - while (end >= 0) { - - // Find the next occurrence of non-whitespace (working leftwards) - while (end >= 0) { - if (!isPythonSpace(s.charAt(end))) { - // Break leaving end pointing at non-whitespace - break; - } - --end; - } - - if (end < 0) { - // Only found whitespace so there is no next segment - break; - - } else if (splits >= maxsplit) { - // The next segment is the last and contains all characters back to the beginning - index = -1; - - } else { - // The next segment runs back to the next next whitespace or beginning - for (index = end; index >= 0; --index) { - if (isPythonSpace(s.charAt(index))) { - // Break leaving index pointing at whitespace - break; - } - } - } - - // Make a piece from index+1 start up to end+1 - list.append(fromSubstring(index + 1, end + 1)); - splits++; - - // Start next segment search at that point - end = index; - } - - list.reverse(); - return list; - } - - @ExposedMethod(defaults = "false", doc = BuiltinDocs.unicode___getslice___doc) - final PyList unicode_splitlines(boolean keepends) { - return new PyList(new LineSplitIterator(keepends)); - } - - @Override - protected PyString fromSubstring(int begin, int end) { - assert (isBasicPlane()); // can only be used on a codepath from str_ equivalents - return new PyUnicode(getString().substring(begin, end), true); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_index_doc) - final int unicode_index(PyObject subObj, PyObject start, PyObject end) { - final String sub = coerceToString(subObj); - // Now use the mechanics of the PyString on the UTF-16. - return checkIndex(_find(sub, start, end)); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_index_doc) - final int unicode_rindex(PyObject subObj, PyObject start, PyObject end) { - final String sub = coerceToString(subObj); - // Now use the mechanics of the PyString on the UTF-16. - return checkIndex(_rfind(sub, start, end)); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_count_doc) - final int unicode_count(PyObject subObj, PyObject start, PyObject end) { - final PyUnicode sub = coerceToUnicode(subObj); - if (isBasicPlane()) { - return _count(sub.getString(), start, end); - } - int[] indices = super.translateIndices(start, end); // do not convert to utf-16 indices. - int count = 0; - for (Iterator mainIter = - newSubsequenceIterator(indices[0], indices[1], 1); mainIter.hasNext();) { - int matched = sub.getCodePointCount(); - for (Iterator subIter = sub.newSubsequenceIterator(); mainIter.hasNext() - && subIter.hasNext();) { - if (mainIter.next() != subIter.next()) { - break; - } - matched--; - } - if (matched == 0) { - count++; - } - } - return count; - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_find_doc) - final int unicode_find(PyObject subObj, PyObject start, PyObject end) { - int found = _find(coerceToString(subObj), start, end); - return found < 0 ? -1 : translator.codePointIndex(found); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_rfind_doc) - final int unicode_rfind(PyObject subObj, PyObject start, PyObject end) { - int found = _rfind(coerceToString(subObj), start, end); - return found < 0 ? -1 : translator.codePointIndex(found); - } - - private static String padding(int n, int pad) { - StringBuilder buffer = new StringBuilder(n); - for (int i = 0; i < n; i++) { - buffer.appendCodePoint(pad); - } - return buffer.toString(); - } - - private static int parse_fillchar(String function, String fillchar) { - if (fillchar == null) { - return ' '; - } - if (fillchar.codePointCount(0, fillchar.length()) != 1) { - throw Py.TypeError(function + "() argument 2 must be char, not str"); - } - return fillchar.codePointAt(0); - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode___getslice___doc) - final PyObject unicode_ljust(int width, String padding) { - int n = width - getCodePointCount(); - if (n <= 0) { - return new PyUnicode(getString()); - } else { - return new PyUnicode(getString() + padding(n, parse_fillchar("ljust", padding))); - } - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode___getslice___doc) - final PyObject unicode_rjust(int width, String padding) { - int n = width - getCodePointCount(); - if (n <= 0) { - return new PyUnicode(getString()); - } else { - return new PyUnicode(padding(n, parse_fillchar("ljust", padding)) + getString()); - } - } - - @ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode___getslice___doc) - final PyObject unicode_center(int width, String padding) { - int n = width - getCodePointCount(); - if (n <= 0) { - return new PyUnicode(getString()); - } - int half = n / 2; - if (n % 2 > 0 && width % 2 > 0) { - half += 1; - } - int pad = parse_fillchar("center", padding); - return new PyUnicode(padding(half, pad) + getString() + padding(n - half, pad)); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_zfill_doc) - final PyObject unicode_zfill(int width) { - int n = getCodePointCount(); - if (n >= width) { - return new PyUnicode(getString()); - } - if (isBasicPlane()) { - return new PyUnicode(str_zfill(width)); - } - StringBuilder buffer = new StringBuilder(width); - int nzeros = width - n; - boolean first = true; - boolean leadingSign = false; - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - int codePoint = iter.next(); - if (first) { - first = false; - if (codePoint == '+' || codePoint == '-') { - buffer.appendCodePoint(codePoint); - leadingSign = true; - } - for (int i = 0; i < nzeros; i++) { - buffer.appendCodePoint('0'); - } - if (!leadingSign) { - buffer.appendCodePoint(codePoint); - } - } else { - buffer.appendCodePoint(codePoint); - } - } - if (first) { - for (int i = 0; i < nzeros; i++) { - buffer.appendCodePoint('0'); - } - } - return new PyUnicode(buffer); - } - - @ExposedMethod(defaults = "8", doc = BuiltinDocs.unicode___getslice___doc) - final PyObject unicode_expandtabs(int tabsize) { - return new PyUnicode(str_expandtabs(tabsize)); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_capitalize_doc) - final PyObject unicode_capitalize() { - if (getString().length() == 0) { - return this; - } - StringBuilder buffer = new StringBuilder(getString().length()); - boolean first = true; - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - if (first) { - buffer.appendCodePoint(Character.toUpperCase(iter.next())); - first = false; - } else { - buffer.appendCodePoint(Character.toLowerCase(iter.next())); - } - } - return new PyUnicode(buffer); - } - - @ExposedMethod(defaults = "-1", doc = BuiltinDocs.unicode_replace_doc) - final PyString unicode_replace(PyObject oldPieceObj, PyObject newPieceObj, int count) { - - // Convert other argument types to PyUnicode (or error) - PyUnicode newPiece = coerceToUnicode(newPieceObj); - PyUnicode oldPiece = coerceToUnicode(oldPieceObj); - - if (isBasicPlane() && newPiece.isBasicPlane() && oldPiece.isBasicPlane()) { - // Use the mechanics of PyString, since all is basic plane - return _replace(oldPiece.getString(), newPiece.getString(), count); - - } else { - // A Unicode-specific implementation is needed working in code points - StringBuilder buffer = new StringBuilder(); - - if (oldPiece.getCodePointCount() == 0) { - Iterator iter = newSubsequenceIterator(); - for (int i = 1; (count == -1 || i < count) && iter.hasNext(); i++) { - if (i == 1) { - buffer.append(newPiece.getString()); - } - buffer.appendCodePoint(iter.next()); - buffer.append(newPiece.getString()); - } - while (iter.hasNext()) { - buffer.appendCodePoint(iter.next()); - } - return new PyUnicode(buffer); - - } else { - SplitIterator iter = newSplitIterator(oldPiece, count); - int numSplits = 0; - while (iter.hasNext()) { - buffer.append(((PyUnicode) iter.next()).getString()); - if (iter.hasNext()) { - buffer.append(newPiece.getString()); - } - numSplits++; - } - if (iter.getEndsWithSeparator() && (count == -1 || numSplits <= count)) { - buffer.append(newPiece.getString()); - } - return new PyUnicode(buffer); - } - } - } - - // end utf-16 aware - @Override - public PyString join(PyObject seq) { - return unicode_join(seq); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_join_doc) - final PyUnicode unicode_join(PyObject seq) { - return unicodeJoin(seq); - } - - /** - * Equivalent to the Python unicode.startswith method, testing whether a string - * starts with a specified prefix, where a sub-range is specified by [start:end]. - * Arguments start and end are interpreted as in slice notation, with - * null or {@link Py#None} representing "missing". prefix can also be a tuple of - * prefixes to look for. - * - * @param prefix string to check for (or a PyTuple of them). - * @param start start of slice. - * @param end end of slice. - * @return true if this string slice starts with a specified prefix, otherwise - * false. - */ - @Override - public boolean startswith(PyObject prefix, PyObject start, PyObject end) { - return unicode_startswith(prefix, start, end); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_startswith_doc) - final boolean unicode_startswith(PyObject prefix, PyObject startObj, PyObject endObj) { - int[] indices = translateIndices(startObj, endObj); - int start = indices[0]; - int sliceLen = indices[1] - start; - - if (!(prefix instanceof PyTuple)) { - // It ought to be PyUnicode or some kind of bytes with the buffer API to decode. - String s = coerceToString(prefix); - return sliceLen >= s.length() && getString().startsWith(s, start); - - } else { - // Loop will return true if this slice starts with any prefix in the tuple - for (PyObject prefixObj : ((PyTuple) prefix).getArray()) { - // It ought to be PyUnicode or some kind of bytes with the buffer API. - String s = coerceToString(prefixObj); - if (sliceLen >= s.length() && getString().startsWith(s, start)) { - return true; - } - } - // None matched - return false; - } - } - - /** - * Equivalent to the Python unicode.endswith method, testing whether a string ends - * with a specified suffix, where a sub-range is specified by [start:end]. - * Arguments start and end are interpreted as in slice notation, with - * null or {@link Py#None} representing "missing". suffix can also be a tuple of - * suffixes to look for. - * - * @param suffix string to check for (or a PyTuple of them). - * @param start start of slice. - * @param end end of slice. - * @return true if this string slice ends with a specified suffix, otherwise - * false. - */ - @Override - public boolean endswith(PyObject suffix, PyObject start, PyObject end) { - return unicode_endswith(suffix, start, end); - } - - @ExposedMethod(defaults = {"null", "null"}, doc = BuiltinDocs.unicode_endswith_doc) - final boolean unicode_endswith(PyObject suffix, PyObject startObj, PyObject endObj) { - int[] indices = translateIndices(startObj, endObj); - String substr = getString().substring(indices[0], indices[1]); - - if (!(suffix instanceof PyTuple)) { - // It ought to be PyUnicode or some kind of bytes with the buffer API. - String s = coerceToString(suffix); - return substr.endsWith(s); - - } else { - // Loop will return true if this slice ends with any suffix in the tuple - for (PyObject suffixObj : ((PyTuple) suffix).getArray()) { - // It ought to be PyUnicode or some kind of bytes with the buffer API. - String s = coerceToString(suffixObj); - if (substr.endsWith(s)) { - return true; - } - } - // None matched - return false; - } - } - - @ExposedMethod(doc = BuiltinDocs.unicode_translate_doc) - final PyObject unicode_translate(PyObject table) { - return _codecs.translateCharmap(this, "ignore", table); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_islower_doc) - final boolean unicode_islower() { - boolean cased = false; - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - int codepoint = iter.next(); - if (Character.isUpperCase(codepoint) || Character.isTitleCase(codepoint)) { - return false; - } else if (!cased && Character.isLowerCase(codepoint)) { - cased = true; - } - } - return cased; - } - - @ExposedMethod(doc = BuiltinDocs.unicode_isupper_doc) - final boolean unicode_isupper() { - boolean cased = false; - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - int codepoint = iter.next(); - if (Character.isLowerCase(codepoint) || Character.isTitleCase(codepoint)) { - return false; - } else if (!cased && Character.isUpperCase(codepoint)) { - cased = true; - } - } - return cased; - } - - @ExposedMethod(doc = BuiltinDocs.unicode_isalpha_doc) - final boolean unicode_isalpha() { - if (getCodePointCount() == 0) { - return false; - } - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - if (!Character.isLetter(iter.next())) { - return false; - } - } - return true; - } - - @ExposedMethod(doc = BuiltinDocs.unicode_isalnum_doc) - final boolean unicode_isalnum() { - if (getCodePointCount() == 0) { - return false; - } - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - int codePoint = iter.next(); - if (!(Character.isLetterOrDigit(codePoint) || // - Character.getType(codePoint) == Character.LETTER_NUMBER)) { - return false; - } - } - return true; - } - - @ExposedMethod(doc = BuiltinDocs.unicode_isdecimal_doc) - final boolean unicode_isdecimal() { - if (getCodePointCount() == 0) { - return false; - } - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - if (Character.getType(iter.next()) != Character.DECIMAL_DIGIT_NUMBER) { - return false; - } - } - return true; - } - - @ExposedMethod(doc = BuiltinDocs.unicode_isdigit_doc) - final boolean unicode_isdigit() { - if (getCodePointCount() == 0) { - return false; - } - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - if (!Character.isDigit(iter.next())) { - return false; - } - } - return true; - } - - @ExposedMethod(doc = BuiltinDocs.unicode_isnumeric_doc) - final boolean unicode_isnumeric() { - if (getCodePointCount() == 0) { - return false; - } - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - int type = Character.getType(iter.next()); - if (type != Character.DECIMAL_DIGIT_NUMBER && type != Character.LETTER_NUMBER - && type != Character.OTHER_NUMBER) { - return false; - } - } - return true; - } - - @ExposedMethod(doc = BuiltinDocs.unicode_istitle_doc) - final boolean unicode_istitle() { - if (getCodePointCount() == 0) { - return false; - } - boolean cased = false; - boolean previous_is_cased = false; - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - int codePoint = iter.next(); - if (Character.isUpperCase(codePoint) || Character.isTitleCase(codePoint)) { - if (previous_is_cased) { - return false; - } - previous_is_cased = true; - cased = true; - } else if (Character.isLowerCase(codePoint)) { - if (!previous_is_cased) { - return false; - } - previous_is_cased = true; - cased = true; - } else { - previous_is_cased = false; - } - } - return cased; - } - - @ExposedMethod(doc = BuiltinDocs.unicode_isspace_doc) - final boolean unicode_isspace() { - if (getCodePointCount() == 0) { - return false; - } - for (Iterator iter = newSubsequenceIterator(); iter.hasNext();) { - if (!isPythonSpace(iter.next())) { - return false; - } - } - return true; - } - - // end utf-16 aware - @ExposedMethod(doc = "isunicode is deprecated.") - final boolean unicode_isunicode() { - Py.warning(Py.DeprecationWarning, "isunicode is deprecated."); - return true; - } - - @ExposedMethod(doc = BuiltinDocs.unicode_encode_doc) - final String unicode_encode(PyObject[] args, String[] keywords) { - return str_encode(args, keywords); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_decode_doc) - final PyObject unicode_decode(PyObject[] args, String[] keywords) { - return str_decode(args, keywords); - } - - @ExposedMethod(doc = BuiltinDocs.unicode___getnewargs___doc) - final PyTuple unicode___getnewargs__() { - return new PyTuple(new PyUnicode(this.getString())); - } - - @Override - public PyObject __format__(PyObject formatSpec) { - return unicode___format__(formatSpec); - } - - @ExposedMethod(doc = BuiltinDocs.unicode___format___doc) - final PyObject unicode___format__(PyObject formatSpec) { - // Re-use the str implementation, which adapts itself to unicode. - return str___format__(formatSpec); - } - - @ExposedMethod(doc = BuiltinDocs.unicode__formatter_parser_doc) - final PyObject unicode__formatter_parser() { - return new MarkupIterator(this); - } - - @ExposedMethod(doc = BuiltinDocs.unicode__formatter_field_name_split_doc) - final PyObject unicode__formatter_field_name_split() { - FieldNameIterator iterator = new FieldNameIterator(this); - return new PyTuple(iterator.pyHead(), iterator); - } - - @ExposedMethod(doc = BuiltinDocs.unicode_format_doc) - final PyObject unicode_format(PyObject[] args, String[] keywords) { - try { - return new PyUnicode(buildFormattedString(args, keywords, null, null)); - } catch (IllegalArgumentException e) { - throw Py.ValueError(e.getMessage()); - } - } - - @Override - public Iterator iterator() { - return newSubsequenceIterator(); - } - - @Override - public PyComplex __complex__() { - return new PyString(encodeDecimal()).__complex__(); - } - - @Override - public int atoi(int base) { - return new PyString(encodeDecimal()).atoi(base); - } - - @Override - public PyLong atol(int base) { - return new PyString(encodeDecimal()).atol(base); - } - - @Override - public double atof() { - return new PyString(encodeDecimal()).atof(); - } - - /** - * Encode unicode into a valid decimal String. Throws a UnicodeEncodeError on invalid - * characters. - * - * @return a valid decimal as an encoded String - */ - private String encodeDecimal() { - if (isBasicPlane()) { - return encodeDecimalBasic(); - } - - int digit; - StringBuilder sb = new StringBuilder(); - int i = 0; - for (Iterator iter = newSubsequenceIterator(); iter.hasNext(); i++) { - int codePoint = iter.next(); - if (isPythonSpace(codePoint)) { - sb.append(' '); - continue; - } - digit = Character.digit(codePoint, 10); - if (digit >= 0) { - sb.append(digit); - continue; - } - if (0 < codePoint && codePoint < 256) { - sb.appendCodePoint(codePoint); - continue; - } - // All other characters are considered unencodable - codecs.encoding_error("strict", "decimal", getString(), i, i + 1, - "invalid decimal Unicode string"); - } - return sb.toString(); - } - - /** - * Encode unicode in the basic plane into a valid decimal String. Throws a UnicodeEncodeError on - * invalid characters. - * - * @return a valid decimal as an encoded String - */ - private String encodeDecimalBasic() { - int digit; - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < getString().length(); i++) { - char ch = getString().charAt(i); - if (isPythonSpace(ch)) { - sb.append(' '); - continue; - } - digit = Character.digit(ch, 10); - if (digit >= 0) { - sb.append(digit); - continue; - } - if (0 < ch && ch < 256) { - sb.append(ch); - continue; - } - // All other characters are considered unencodable - codecs.encoding_error("strict", "decimal", getString(), i, i + 1, - "invalid decimal Unicode string"); - } - return sb.toString(); - } -} diff --git a/src/org/python/core/stringlib/FieldNameIterator.java b/src/org/python/core/stringlib/FieldNameIterator.java deleted file mode 100644 index f5f154c0b..000000000 --- a/src/org/python/core/stringlib/FieldNameIterator.java +++ /dev/null @@ -1,248 +0,0 @@ -package org.python.core.stringlib; - -import org.python.core.Py; -import org.python.core.PyBoolean; -import org.python.core.PyInteger; -import org.python.core.PyObject; -import org.python.core.PyString; -import org.python.core.PyTuple; -import org.python.core.PyType; -import org.python.core.PyUnicode; -import org.python.core.Traverseproc; -import org.python.core.Visitproc; -import org.python.expose.ExposedMethod; -import org.python.expose.ExposedType; -import org.python.modules.gc; - -/** - * This class is an implementation of the iterator object returned by - * str._formatter_field_name_split() and - * unicode._formatter_field_name_split(). The function - * _formatter_field_name_split() returns a pair (tuple) consisting of a head element - * and an instance of this iterator. The constructor of this class effectively implements that - * function, since as well as "being" the iterator, the object has an extra method {@link #head()} - * to return the required first member of the pair. - */ - -@ExposedType(name = "fieldnameiterator", base = PyObject.class, isBaseType = false) -public class FieldNameIterator extends PyObject implements Traverseproc { - - public static final PyType TYPE = PyType.fromClass(FieldNameIterator.class); - - /** The UTF-16 string from which elements are being returned. */ - private final String markup; - /** True if originally given a PyString (so must return PyString not PyUnicode). */ - private final boolean bytes; - /** How far along that string we are. */ - private int index; - private Object head; - - /** - * Create an iterator for the parts of this field name (and extract the head name field, which - * may be an empty string). According to the Python Standard Library documentation, a - * replacement field name has the structure: - * - *

-     * field_name        ::=  arg_name ("." attribute_name | "[" element_index "]")*
-     * arg_name          ::=  [identifier | integer]
-     * attribute_name    ::=  identifier
-     * element_index     ::=  integer | index_string
-     * 
- * - * The object is used from PyUnicode and from PyString, and we have to signal which it is, so - * that returned values may match in type. - * - * @param fieldName the field name as UTF-16 - * @param bytes true if elements returned should be PyString, else PyUnicode - */ - public FieldNameIterator(String fieldName, boolean bytes) { - this.markup = fieldName; - this.bytes = bytes; - this.index = nextDotOrBracket(fieldName); - String headStr = fieldName.substring(0, index); - try { - this.head = Integer.parseInt(headStr); - } catch (NumberFormatException e) { - this.head = headStr; - } - } - - /** - * Create an iterator for the parts of this field name (and extract the head name field, which - * may be an empty string). - * - * @param fieldNameObject - */ - public FieldNameIterator(PyString fieldNameObject) { - // Extract UTF-16 string but remember whether PyString or PyUnicode shouyld result. - this(fieldNameObject.getString(), !(fieldNameObject instanceof PyUnicode)); - } - - @Override - public PyObject __iter__() { - return fieldnameiterator___iter__(); - } - - @ExposedMethod - final PyObject fieldnameiterator___iter__() { - return this; - } - - @Override - public PyObject __iternext__() { - return fieldnameiterator___iternext__(); - } - - @ExposedMethod - final PyObject fieldnameiterator___iternext__() { - Chunk chunk = nextChunk(); - if (chunk == null) { - return null; - } - return new PyTuple(Py.newBoolean(chunk.is_attr), wrap(chunk.value)); - } - - /** - * Convenience method to wrap a value as a PyInteger, if it is an Integer, or as - * PyString or PyUnicode according to the type of the original field - * name string. These objects are being used as field specifiers in navigating arguments to a - * format statement. - * - * @param value to wrap as a PyObject. - * @return PyObject equivalent field specifier - */ - private PyObject wrap(Object value) { - if (value instanceof Integer) { - return Py.newInteger(((Integer)value).intValue()); - } else { - // It can only be a String (but if not, at least we see it). - String s = value.toString(); - if (s.length() == 0) { - // This is frequent so avoid the constructor - return bytes ? Py.EmptyString : Py.EmptyUnicode; - } else { - return bytes ? Py.newString(s) : Py.newUnicode(s); - } - } - } - - private int nextDotOrBracket(String markup) { - int dotPos = markup.indexOf('.', index); - if (dotPos < 0) { - dotPos = markup.length(); - } - int bracketPos = markup.indexOf('[', index); - if (bracketPos < 0) { - bracketPos = markup.length(); - } - return Math.min(dotPos, bracketPos); - } - - /** @return the isolated head object from the field name. */ - public Object head() { - return head; - } - - /** - * Return the head object from the field name, as PyInteger, PyString - * or PyUnicode. - * - * @return the isolated head object from the field name. - */ - public PyObject pyHead() { - return wrap(head()); - } - - /** - * If originally given a PyString, the iterator must return PyString not PyUnicode. - * - * @return true if originally given a PyString - */ - public final boolean isBytes() { - return bytes; - } - - /** - * Return the next "chunk" of the field name (or return null if ended). A chunk is a 2-tuple - * describing: - *
    - *
  1. whether the chunk is an attribute name,
  2. - *
  3. the name or number (as a String or Integer) for accessing the value.
  4. - *
- * - * @return next element of the field name - */ - public Chunk nextChunk() { - if (index == markup.length()) { - return null; - } - Chunk chunk = new Chunk(); - if (markup.charAt(index) == '[') { - parseItemChunk(chunk); - } else if (markup.charAt(index) == '.') { - parseAttrChunk(chunk); - } else { - throw new IllegalArgumentException( - "Only '.' or '[' may follow ']' in format field specifier"); - } - return chunk; - } - - private void parseItemChunk(Chunk chunk) { - chunk.is_attr = false; - int endBracket = markup.indexOf(']', index + 1); - if (endBracket < 0) { - throw new IllegalArgumentException("Missing ']' in format string"); - } - String itemValue = markup.substring(index + 1, endBracket); - if (itemValue.length() == 0) { - throw new IllegalArgumentException("Empty attribute in format string"); - } - try { - chunk.value = Integer.parseInt(itemValue); - } catch (NumberFormatException e) { - chunk.value = itemValue; - } - index = endBracket + 1; - } - - private void parseAttrChunk(Chunk chunk) { - index++; // skip dot - chunk.is_attr = true; - int pos = nextDotOrBracket(markup); - if (pos == index) { - throw new IllegalArgumentException("Empty attribute in format string"); - } - chunk.value = markup.substring(index, pos); - index = pos; - } - - public static class Chunk { - - public boolean is_attr; - /** Integer or String. */ - public Object value; - } - - - /* Traverseproc implementation */ - @Override - public int traverse(Visitproc visit, Object arg) { - if (head == null || !gc.canLinkToPyObject(head.getClass(), true)) { - return 0; - } - return gc.traverseByReflection(head, visit, arg); - } - - @Override - public boolean refersDirectlyTo(PyObject ob) - throws UnsupportedOperationException { - if (ob != null && ob == head) { - return true; - } - if (!gc.canLinkToPyObject(head.getClass(), true)) { - return false; - } - throw new UnsupportedOperationException(); - } -} diff --git a/src/org/python/core/stringlib/TextFormatter.java b/src/org/python/core/stringlib/TextFormatter.java deleted file mode 100644 index 1997ba0aa..000000000 --- a/src/org/python/core/stringlib/TextFormatter.java +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) Jython Developers -package org.python.core.stringlib; - -import org.python.core.stringlib.InternalFormat.Formatter; -import org.python.core.stringlib.InternalFormat.Spec; - -/** - * A class that provides the implementation of str and unicode formatting. - * In a limited way, it acts like a StringBuilder to which text, formatted according to the format - * specifier supplied at construction. These are ephemeral objects that are not, on their own, - * thread safe. - */ -public class TextFormatter extends InternalFormat.Formatter { - - /** - * Construct the formatter from a client-supplied buffer, to which the result will be appended, - * and a specification. Sets {@link #mark} to the end of the buffer. - * - * @param result destination buffer - * @param spec parsed conversion specification - */ - public TextFormatter(StringBuilder result, Spec spec) { - super(result, spec); - } - - /** - * Construct the formatter from a specification, allocating a buffer internally for the result. - * - * @param spec parsed conversion specification - */ - public TextFormatter(Spec spec) { - this(new StringBuilder(), spec); - } - - /* - * Re-implement the text appends so they return the right type. - */ - @Override - public TextFormatter append(char c) { - super.append(c); - return this; - } - - @Override - public TextFormatter append(CharSequence csq) { - super.append(csq); - return this; - } - - @Override - public TextFormatter append(CharSequence csq, int start, int end) // - throws IndexOutOfBoundsException { - super.append(csq, start, end); - return this; - } - - /** - * Format the given String into the result buffer. Largely, this is a - * matter of copying the value of the argument, but a subtlety arises when the string contains - * supplementary (non-BMP) Unicode characters, which are represented as surrogate pairs. The - * precision specified in the format relates to a count of Unicode characters (code points), not - * Java chars. The method deals with this correctly, essentially by not counting - * the high-surrogates in the allowance. The final value of {@link #lenWhole} counts the UTF-16 - * units added. - * - * @param value to format - * @return this TextFormatter object - */ - public TextFormatter format(String value) { - - // Scratch all instance variables and start = result.length(). - setStart(); - - int p = spec.precision, n = value.length(); - - if (Spec.specified(p) && p < n) { - // Estimate the space for the converted result (preempt multiple re-allocation) - int space = Math.max(spec.width, p); - result.ensureCapacity(result.length() + space + (bytes ? 0 : space / 4)); - /* - * A precision p was specified less than the length: we may have to truncate. Note we - * compared p with the UTF-16 length, even though it is the code point length that - * matters. But the code point length cannot be greater than n. - */ - int count = 0; - while (count < p) { - // count is the number of UTF-16 chars. - char c = value.charAt(count++); - result.append(c); - // A high-surrogate will always be followed by a low, so doesn't count. - if (Character.isHighSurrogate(c) && p < n) { - // Accomplish "not counting" by bumping the limit p, within the array bounds. - p += 1; - } - } - // Record the UTF-16 count as the length in buffer - lenWhole = count; - - } else { - // We definitely don't need to truncate. Append the whole string. - lenWhole = n; - result.append(value); - } - - return this; - } - - // Variant to deal with supplementary characters: other formatters don't produce them. - @Override - public TextFormatter pad() { - // We'll need this many pad characters (if>0). Note Spec.UNDEFINED<0. - int n = spec.width - result.codePointCount(mark, result.length()); - if (n > 0) { - pad(mark, n); - } - return this; - } - -} diff --git a/tests/java/org/python/core/StringFormatTest.java b/tests/java/org/python/core/StringFormatTest.java deleted file mode 100644 index 448f824a3..000000000 --- a/tests/java/org/python/core/StringFormatTest.java +++ /dev/null @@ -1,348 +0,0 @@ -package org.python.core; - -import java.math.BigInteger; - -import junit.framework.TestCase; - -import org.python.core.stringlib.FieldNameIterator; -import org.python.core.stringlib.IntegerFormatter; -import org.python.core.stringlib.InternalFormat; -import org.python.core.stringlib.MarkupIterator; -import org.python.core.stringlib.TextFormatter; -import org.python.core.stringlib.InternalFormat.Spec; -import org.python.util.PythonInterpreter; - -/** - * Tests for internal bits and pieces of string.format implementation. - */ -public class StringFormatTest extends TestCase { - - /** Exception-raising seems to need the interpreter to be initialised **/ - PythonInterpreter interp = new PythonInterpreter(); - - /** Switches mode in tests that have a shared implementation for bytes and Unicode modes. */ - private boolean useBytes = true; - - public void testInternalFormatSpec() { - InternalFormat.Spec spec; - spec = InternalFormat.fromText("x"); - assertFalse(Spec.specified(spec.align)); - assertFalse(Spec.specified(spec.fill)); - assertFalse(Spec.specified(spec.width)); - assertFalse(Spec.specified(spec.precision)); - assertEquals('x', spec.type); - - spec = InternalFormat.fromText("testFormatIntOrLong which tested PyInteger.formatIntOrLong - * . - */ - public void testPrepareFormatter() { - int v = 123; - IntegerFormatter f; - f = PyInteger.prepareFormatter(InternalFormat.fromText("d")); - assertEquals("123", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText("o")); - assertEquals("173", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText("x")); - assertEquals("7b", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText("X")); - assertEquals("7B", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText("b")); - assertEquals("1111011", f.format(v).pad().getResult()); - - int v2 = 1234567890; - f = PyInteger.prepareFormatter(InternalFormat.fromText(",d")); - assertEquals("1,234,567,890", f.format(v2).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("#o")); - assertEquals("0o173", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText("#X")); - assertEquals("0X7B", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("c")); - assertEquals("{", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("+d")); - assertEquals("+123", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText(" d")); - assertEquals(" 123", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("5")); - assertEquals(" 123", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("^6")); - assertEquals(" 123 ", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("~<5")); - assertEquals("123~~", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("0=+6")); - assertEquals("+00123", f.format(v).pad().getResult()); - - assertValueError("0=+6.1", "Precision not allowed in integer format specifier"); - assertValueError("+c", "Sign not allowed with integer format specifier 'c'"); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("c")); - f.setBytes(true); - assertOverflowError(256, f, "%c arg not in range(0x100)"); - assertOverflowError(-1, f, "%c arg not in range(0x100)"); - assertOverflowError(0x110000, f, "%c arg not in range(0x100)"); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("c")); - assertOverflowError(0x110000, f, "%c arg not in range(0x110000)"); - assertOverflowError(-1, f, "%c arg not in range(0x110000)"); - } - - /** - * Test the IntegerFormatter returned by {@link PyInteger#prepareFormat}. This is based on the - * original testFormatIntOrLong which tested PyInteger.formatIntOrLong - * . - */ - public void testPrepareFormatterLong() { - BigInteger v = BigInteger.valueOf(123); - IntegerFormatter f; - f = PyInteger.prepareFormatter(InternalFormat.fromText("d")); - assertEquals("123", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText("o")); - assertEquals("173", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText("x")); - assertEquals("7b", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText("X")); - assertEquals("7B", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText("b")); - assertEquals("1111011", f.format(v).pad().getResult()); - - BigInteger v2 = BigInteger.valueOf(1234567890); - f = PyInteger.prepareFormatter(InternalFormat.fromText(",d")); - assertEquals("1,234,567,890", f.format(v2).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("#o")); - assertEquals("0o173", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText("#X")); - assertEquals("0X7B", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("c")); - assertEquals("{", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("+d")); - assertEquals("+123", f.format(v).pad().getResult()); - f = PyInteger.prepareFormatter(InternalFormat.fromText(" d")); - assertEquals(" 123", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("5")); - assertEquals(" 123", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("^6")); - assertEquals(" 123 ", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("~<5")); - assertEquals("123~~", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("0=+6")); - assertEquals("+00123", f.format(v).pad().getResult()); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("c")); - f.setBytes(true); - assertOverflowError(BigInteger.valueOf(256), f, "%c arg not in range(0x100)"); - assertOverflowError(BigInteger.valueOf(-1), f, "%c arg not in range(0x100)"); - assertOverflowError(BigInteger.valueOf(0x110000), f, "%c arg not in range(0x100)"); - - f = PyInteger.prepareFormatter(InternalFormat.fromText("c")); - assertOverflowError(BigInteger.valueOf(0x110000), f, "%c arg not in range(0x110000)"); - assertOverflowError(BigInteger.valueOf(-1), f, "%c arg not in range(0x110000)"); - } - - private void assertValueError(String formatSpec, String expected) { - try { - IntegerFormatter f = PyInteger.prepareFormatter(InternalFormat.fromText(formatSpec)); - // f.format(123).pad().getResult(); - fail("ValueError not thrown, expected: " + expected); - } catch (PyException pye) { - assertEquals(expected, pye.value.toString()); - } - } - - private void assertOverflowError(int v, IntegerFormatter f, String expected) { - // Test with Java int for PyInteger - try { - f.format(v).pad().getResult(); - fail("OverflowError not thrown, expected: " + expected); - } catch (PyException pye) { - assertEquals(expected, pye.value.toString()); - } - } - - private void assertOverflowError(BigInteger v, IntegerFormatter f, String expected) { - // Test with BigInteger for PyLong - try { - f.format(v).pad().getResult(); - fail("OverflowError not thrown, expected: " + expected); - } catch (PyException pye) { - assertEquals(expected, pye.value.toString()); - } - } - - public void testFormatString() { - String v = "abc"; - TextFormatter f; - f = PyString.prepareFormatter(InternalFormat.fromText("")); - assertEquals("abc", f.format(v).pad().getResult()); - - String v2 = "abcdef"; - f = PyString.prepareFormatter(InternalFormat.fromText(".3")); - assertEquals("abc", f.format(v2).pad().getResult()); - - f = PyString.prepareFormatter(InternalFormat.fromText("6")); - assertEquals("abc ", f.format(v).pad().getResult()); - } - - public void implTestMarkupIterator() { - MarkupIterator iterator = newMarkupIterator("abc"); - assertEquals("abc", iterator.nextChunk().literalText); - assertNull(iterator.nextChunk()); - - iterator = newMarkupIterator("First, thou shalt count to {0}"); - MarkupIterator.Chunk chunk = iterator.nextChunk(); - assertEquals("First, thou shalt count to ", chunk.literalText); - assertEquals("0", chunk.fieldName); - assertNull(iterator.nextChunk()); - - iterator = newMarkupIterator("Weight in tons {0.weight!r:s}"); - chunk = iterator.nextChunk(); - assertEquals("Weight in tons ", chunk.literalText); - assertEquals("0.weight", chunk.fieldName); - assertEquals("r", chunk.conversion); - assertEquals("s", chunk.formatSpec); - - chunk = newMarkupIterator("{{").nextChunk(); - assertEquals("{", chunk.literalText); - - chunk = newMarkupIterator("}}").nextChunk(); - assertEquals("}", chunk.literalText); - - chunk = newMarkupIterator("{{}}").nextChunk(); - assertEquals("{}", chunk.literalText); - - chunk = newMarkupIterator("{0:.{1}}").nextChunk(); - assertEquals("0", chunk.fieldName); - assertEquals(".{1}", chunk.formatSpec); - assertTrue(chunk.formatSpecNeedsExpanding); - - assertMarkupError("{!}", "end of format while looking for conversion specifier"); - assertMarkupError("{!rrrr}", "expected ':' after conversion specifier"); - assertMarkupError("{", "Single '{' encountered in format string"); - assertMarkupError("}", "Single '}' encountered in format string"); - } - - public void testMarkupIteratorBytes() { - useBytes = true; - implTestMarkupIterator(); - } - - public void testMarkupIteratorUnicode() { - useBytes = false; - implTestMarkupIterator(); - } - - private MarkupIterator newMarkupIterator(String markup) { - PyString markupObject = useBytes ? Py.newString(markup) : Py.newUnicode(markup); - return new MarkupIterator(markupObject); - } - - private void assertMarkupError(String markup, String expected) { - MarkupIterator iterator = newMarkupIterator(markup); - String error = null; - try { - iterator.nextChunk(); - } catch (IllegalArgumentException e) { - error = e.getMessage(); - } - assertEquals(expected, error); - } - - public void implTestFieldNameIterator() { - FieldNameIterator it = newFieldNameIterator("abc"); - assertEquals("abc", it.head()); - assertNull(it.nextChunk()); - - it = newFieldNameIterator("3"); - assertEquals(3, it.head()); - assertNull(it.nextChunk()); - - it = newFieldNameIterator("abc[0]"); - assertEquals("abc", it.head()); - FieldNameIterator.Chunk chunk = it.nextChunk(); - assertEquals(0, chunk.value); - assertFalse(chunk.is_attr); - assertNull(it.nextChunk()); - - it = newFieldNameIterator("abc.def"); - assertEquals("abc", it.head()); - chunk = it.nextChunk(); - assertEquals("def", chunk.value); - assertTrue(chunk.is_attr); - assertNull(it.nextChunk()); - } - - public void testFieldNameIteratorBytes() { - useBytes = true; - implTestFieldNameIterator(); - } - - public void testFieldNameIteratorUnicode() { - useBytes = false; - implTestFieldNameIterator(); - } - - private FieldNameIterator newFieldNameIterator(String field) { - PyString fieldObject = useBytes ? Py.newString(field) : Py.newUnicode(field); - return new FieldNameIterator(fieldObject); - } -}