diff --git a/.gitignore b/.gitignore index 42d68b04..c0634a4b 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ poetry.lock test.py cat.docx cat.jpeg -template.docx \ No newline at end of file +template.docx +*.docx \ No newline at end of file diff --git a/docs/dev/analysis/features/numbering.rst b/docs/dev/analysis/features/numbering.rst index 837cf0e9..a89d9ef9 100644 --- a/docs/dev/analysis/features/numbering.rst +++ b/docs/dev/analysis/features/numbering.rst @@ -2,54 +2,743 @@ Numbering Part ============== -... having to do with numbering sequences for ordered lists, etc. ... +Here are some notes that can be used by developers as API functions +for creating bulleted or numbered lists +(including multi-level lists) are developed. +Overview +-------- -Schema excerpt --------------- +The numbering part is documented in section 17.9 of ISO-29500-1. +The file **numbering.xml** contains the built-in and user-defined +list styles. Unlike paragraph, table, and character styles, list +styles need not have a string-based identifier. -.. highlight:: xml +At the root of **numbering.xml** is the element ```` +whose children are of two main types: ```` and +````, of XML types ``CT_AbstractNum`` and ``CT_Num``, +respectively. Each is identified by a unique +attribute (``w:numId`` and ``w:abstractNumId``, respectively) +of type ``CT_DecimalNumber``. -:: +A paragraph contained in **document.xml** +is recognized by the user agent as numbered +if its ```` element contains a ```` +element which references the ``numId`` attribute +of the corresponding numbering style. + +.. code-block:: xml + + + + + + + + + + Level one + + + +The numbering style defined by a ```` element +is limited to : + +- A reference to an abstract numbering style; and +- (optionally) level-specific overrides of the abstract style. + +The standard explains the two-level scheme for defining numbering +styles as follows (terminology is introduced):[#first]_ + +*Abstract numbering definitions* + define the appearance and behavior of a specific set of numbered paragraphs in + a document. Because this construct is abstract, they are not be directly referenced by document content, but + rather they shall be inherited by a +*numbering definition instance,* + which itself is referenced by document content. + +A numbering definition, therefore, will usually be of simple form such as +the following, with only a **numId** attribute and an element referring +to an abstract numbering definition: + +.. code-block:: xml + + + + + +The abstract numbering definition contains the substantive styling information +for lists, namely the level-specific display of the bullet or numeral +and its placement. For example, + +.. code-block:: xml + + + + + + + + + + + + + + + + + +Low- to High-level support +-------------------------- + +The structure of the ``docx`` module can be +profitably understood by identifying the +levels of representation that it operates +at, and the subsystem that acts upon +that representation. + +We arrange them in order of increasing abstraction +from a raw WordProcessingML file. + +1. Packaged OPC file **[python-opc]** +2. Serialized XML **[lxml]** +3. Deserialized XML **[xmlchemy]** +4. Module-specific primitives **[python-docx]** +5. High-level representations **[user]** + +Support for a given operation can be considered +as a gradient (not Boolean) quality: the fewer +levels of representation a user must traverse +to accomplish an operation, the more +*supported* the operation is. + +Issue #122 on the master branch GitHub page +discusses difficulties with creating or styling +complex lists with the ``docx`` module. +Basically, there is no high-level support +for functions such as +creating multi-level lists, nested lists, +and restarting numbering. +The maintainer has commented that: + +- The standard itself involves a highly cumbersome + method for achieving these kinds of effects + (this is indeed true), making it difficult + to decide on the best way to expose the + various functions to users; and +- It is, however, possible to achieve the desired + effects with significant effort: work + on the raw XML representation, given + knowledge of the standard. + +The issue is also discussed on StackOverflow +at https://stackoverflow.com/questions/23446268 + +Currently, +looking to the hierarchy above, +access to most parts of the structure relating +to list styles and numbering definitions is only through +a deserialized XML ("Level 3") representation. +Full API support would imply access to a "Level 5" +representation. +Decisions about simplifying the implementation of +numbered lists w/r/t the ISO standard are most +likely to come up when developing "Level 5" +support. +However, it should be uncontroversial to adhere +to the ISO standard very closely when developing +"Level 4" support. This means that the level of +*support* for advanced list operations can +be increased through some straightforward +**xmlchemy**-based declarations, +and high-level design questions need not come +into play. + +This is done through two main programming tasks: + +1. Declare relevant XML types following the ``wml.xsd`` schema + (ISO-29500-1 Appendix A.1). +2. Instruct ``xmlchemy`` to recognize a relevant tag + as an instance of the appropriate type. + +If this is accomplished, various low-level methods will +be exposed which abstract the necessary XML manipulations, +allowing for improved access to desired functions for +a user familiar with the semantics of the ISO standard. + +Pull Request #XX is concerned with updating the module +to declare the following types and expose them to +``xmlchemy``-based methods: + ++-------------------+ +|XML Type | ++===================+ +|CT_NumPicBullet | ++-------------------+ +|CT_AbstractNum | ++-------------------+ +|CT_LongHexNumber | ++-------------------+ +|ST_LongHexNumber | ++-------------------+ +|CT_MultiLevelType | ++-------------------+ +|ST_MultiLevelType | ++-------------------+ +|CT_Lvl | ++-------------------+ +|CT_NumFmt | ++-------------------+ +|ST_NumberFormat | ++-------------------+ +|CT_LevelSuffix | ++-------------------+ +|ST_LevelSuffix | ++-------------------+ +|CT_LevelText | ++-------------------+ +|CT_LvlLegacy | ++-------------------+ +|CT_Num | ++-------------------+ + +Having defined these types and trained the parser to +associate them with elements in the namespace +(this is done through calls to ``register_element_cls`` +in ``oxml.__init__``), it is possible to implement +solutions to the documented issues noted above in +a disciplined way. + +Making use of low-level support +------------------------------- +Once the types listed above are defined and the **xmlchemy** +submodule methods can be used, it becomes a little less +painful to implement a solution to the StackOverflow +question referred to above. + + +.. code-block:: python + + + #!/usr/bin/python + + from docx import Document + from docx import oxml + + + d = Document() + + + """ + 1. Create an abstract numbering definition for a multi-level numbering style. + """ + numXML = d.part.numbering_part.numbering_definitions._numbering + nextAbstractId = max([ J.abstractNumId for J in numXML.abstractNum_lst ] ) + 1 + l = numXML.add_abstractNum() + l.abstractNumId = nextAbstractId + m = l.add_multiLevelType() + m.val = 'multiLevel' + + + """ + 2. Define numbering formats for each (zero-indexed) + level. N.B. The formatting text is one-indexed. + The user agent will accept up to nine levels. + """ + formats = {0: "decimal", 1: "upperLetter" } + textFmts = {0: '%1.', 1: '%2.' } + for i in range(2): + lvl = l.add_lvl() + lvl.ilvl = i + n = lvl.add_numFmt() + n.val = formats[i] + lt = lvl.add_lvlText() + lt.val = textFmts[i] + + """ + 3. Link the abstract numbering definition to a numbering definition. + """ + n = numXML.add_num(nextAbstractId) + + """ + 4. Define a function to set the (0-indexed) numbering level of a paragraph. + """ + def set_ilvl(p,ilvl): + pr = p._element._add_pPr() + np = pr.get_or_add_numPr() + il = np.get_or_add_ilvl() + il.val = ilvl + ni = np.get_or_add_numId() + ni.val = n.numId + return(p) + + """ + 5. Create some content + """ + for x in [1,2,3]: + p = d.add_paragraph() + set_ilvl(p,0) + p.add_run("Question %i" % x) + for y in [1,2,3,4]: + p2 = d.add_paragraph() + set_ilvl(p2,1) + p2.add_run("Choice %i" % y) + + + d.save('test.docx') + +Higher level constructs +----------------------- +The following higher level API's have been added +to provide a simpler way to insert numbered lists. + +.. code-block:: python + + d = Document() + d.configure_styles_for_numbered_lists() + + d.add_paragraph("Sample numbered list", style="List Number") + d.add_paragraph("Sample numbered list", style="List Number") + d.add_paragraph("Sample indented list", style="List Number 2") + d.add_paragraph("Sample triple indented list", style="List Number 3") + d.add_paragraph("Sample indented list", style="List Number 2") + d.add_paragraph("Sample numbered list", style="List Number") + + run = d.add_paragraph().add_run("This breaks up the lists") + run.bold = True + + p = d.add_paragraph("This is a new list", style="List Number") + p.restart_numbering() + d.add_paragraph("With updated numbers", style="List Number 2") + + d.save("output.docx") + +Produces the following output (represented here as markdown): + +.. code-block:: markdown + + 1. Sample numbered list + 2. Sample numbered list + 2.1 Sample indented list + 2.1.1 Sample triple indented list + 2.2 Sample indented list + 3. Sample numbered list + **This breaks up the lists** + 1. This is a new list + 1.1 With updated numbers + +Element Semantics +----------------- + +This section contains excerpts from ISO-29500-1 describing +how the user agent should handle +````, ````, ````, and their +descendants (section references are to parts of ISO-29500-1). + +**numPr** (§17.3.1.19) + This element specifies that the current paragraph uses numbering information that is defined by a particular + numbering definition instance. + The presence of this element specifies that the paragraph inherits the properties specified by the numbering + definition in the ``num`` element (§17.9.15) at the level specified by the level specified in the ``lvl`` element (§17.9.6) + and shall have an associated number positioned before the beginning of the text flow in this paragraph. When + this element appears as part of the paragraph formatting for a paragraph style, then any numbering level + defined using the ``ilvl`` element shall be ignored, and the ``pStyle`` element (§17.9.23) on the associated abstract + numbering definition shall be used instead. +``ilvl`` (§17.9.3) + This element specifies the numbering level of the numbering definition instance which shall be applied to the + parent paragraph. Its ``val`` attribute is a zero-based index. +``numId`` (§17.9.18) + This element specifies the numbering definition instance which shall be used for the given parent numbered + paragraph in the WordprocessingML document. +``numberingChange`` + Removed. Previously defined in ECMA-376:2006. +``ins`` (§17.13.5.19) + This element specifies that the numbering information defined by the parent element shall be treated as + numbering information which was recorded as an insertion using revisions. +**num** (§17.9.15) + This element specifies a unique instance of numbering information that can be referenced by zero or more + paragraphs within the parent WordprocessingML document. + This instance requires the referencing of a base abstract numbering definition through the ``abstractNumId`` child + element (§17.9.2). This element also can be used to specify a set of optional overrides applied to zero or more + levels from the abstract numbering definition inherited by this instance through the optional ``lvlOverride`` + child elements (§17.9.8). +``abstractNumId`` (§17.9.2) + This element specifies the abstract numbering definition information whose properties shall be inherited by the + parent numbering definition instance. +``lvlOverride`` (§17.9.8) + This element specifies an optional override which shall be applied in place of zero or more levels from the + abstract numbering definition for a given numbering definition instance. Each instance of this element is used to + override the appearance and behavior of a given numbering level definition within the given abstract numbering + definition. +**abstractNum** (§17.9.1) + This element specifies a set of properties which shall dictate the appearance and behavior of a set of numbered + paragraphs in a WordprocessingML document. These properties are collectively called an *abstract numbering + definition*, and are the basis for all numbering information in a WordprocessingML document. + Although an abstract numbering definition contains a complete set of numbering, it shall not be directly + referenced by content (hence the use of abstract). Instead, these properties shall be inherited by a numbering + definition instance using the ``num`` element (§17.9.15), which can then itself be referenced by content. +``nsid`` (§17.9.14) + This element associates a unique hexadecimal ID to the parent abstract numbering definition. This number shall + be identical for two abstract numbering definitions that are based from the same initial numbering definition --- if + a document is repurposed and the underlying numbering definition is changed, it shall maintain its original ``nsid``. + If this element is omitted, then the list shall have no nsid and one can be added by a producer arbitrarily. +``multiLevelType`` (§17.9.12) + This element specifies the type of numbering defined by a given abstract numbering type. This information shall + only be used by a consumer to determine user interface behaviors for this numbering definition, and shall not + be used to limit the behavior of the list (i.e. a list with multiple levels marked as ``singleLevel`` shall not be + prevented from using levels 2 through 9). + If this element is omitted, then the list shall be assumed to be of any numbering type desired by the consumer. +``tmpl`` (§17.9.29) + This element specifies a unique hexadecimal code which can be used to determine a location within application + user interface in which this abstract numbering definition shall be displayed. + If this element is omitted, then this abstract numbering definition can be displayed in any location chosen by the + consumer. +``name`` (§17.9.13) + This element specifies the name of a given abstract numbering definition. This name can be surfaced in order to + provide a user friendly alias for a given numbering definition, but shall not influence the behavior of the list - + two identical definitions with different name elements shall behave identically. + If this element is omitted, then this abstract numbering definition shall have no name. +``styleLink`` (§17.9.27) + This element specifies that the parent abstract numbering definition is the base numbering definition for the + specified numbering style referenced in its ``val`` attribute. + If this element is omitted, or it references a style which does not exist, then this numbering definition shall not + be the underlying properties for a numbering style. +``numStyleLink`` (§17.9.21) + This element specifies an abstract numbering that does not contain the actual numbering properties for its + numbering type, but rather serves as a reference to a numbering style stored in the document, which shall be + applied when this abstract numbering definition is referenced, and itself points at the actual underlying abstract + numbering definition to be used. + The numbering style that is to be applied when this abstract numbering definition is referenced is identified by + the string contained in ``numStyleLink``'s ``val`` attribute. +**lvl** (§17.9.6) + This element specifies the appearance and behavior of a numbering level within a given abstract numbering + definition. A numbering level contains a set of properties for the display of the numbering for a given numbering + level within an abstract numbering definition. + A numbering level definition is identical to a numbering level override definition, except for the fact that it is + defined as part of a numbering definition instance using the ``abstractNum`` element (§17.9.1) rather than as part + of an abstract numbering definition using the ``num`` element (§17.9.15). +``start`` (§17.9.25) + This element specifies the starting value for the numbering used by the parent numbering level within a given + numbering level definition. This value is used when this level initially starts in a document, as well as whenever it + is restarted via the properties set in the ``lvlRestart`` element (§17.9.10). + If this element is omitted, then the starting value shall be zero ( 0 ). +``numFmt`` (§17.9.17) + This element specifies the number format that shall be used to display all numbering at this level in the + numbering definition. This information is used to replace the level text string %x , where x is a particular one- + based level index, with the appropriate value unless the ``numFmt`` value is bullet , in which case the literal text + of the level text string is used. This value shall be calculated by counting the number of paragraphs at this level + since the last restart using the numbering system defined in the val attribute. + When a document has a custom number format specified by the format attribute, it shall use the referenced + number format. If the referenced number format cannot be resolved as a number format the consumer shall + use the number format specified by the value of the val attribute. If the corresponding value of the val attribute + is custom , the result is implementation-defined. + If this element is omitted, the level shall be assumed to be of level type ``decimal``. +``lvlRestart`` (§17.9.10) + This element specifies a one-based index which determines when a numbering level should restart to its ``start`` + value (§17.9.25). A numbering level restarts when an instance of the specified numbering level, which shall be + higher (earlier than this level) or any earlier level is used in the given document's contents. [Example: If this + value is 2, then both level two and level one reset this value. end example] + If this element is omitted, the numbering level shall restart each time the previous numbering level or any + earlier level is used. If the specified level is higher than the current level, then this element shall be ignored. As + well, a value of 0 shall specify that this level shall never restart. +``pStyle`` (§17.9.23) + This element specifies the name of a paragraph style which shall automatically apply to this numbering level when + applied to the contents of the document. When a paragraph style is defined to include a numbering definition, + any numbering level defined by the ``numPr`` element (§17.3.1.19) shall be ignored, and instead this element shall + specify the numbering level associated with that paragraph style. + If this element references a style which does not exist, or is not a paragraph style, then it can be ignored. +``isLgl`` (§17.9.4) + This element specifies whether or not all levels displayed for a given numbering level's text shall be displayed + using the decimal number format, regardless of the actual number format of that level in the list. [Note: This + numbering style is often referred to as the legal numbering style. end note] + If this element is present, then all numbering levels present in the ``lvlTxt`` element (§17.9.11) shall be converted + to their decimal equivalents when they are displayed in this level in the numbering format. If this element is + omitted, then each level is displayed using the ``numFmt`` (§17.9.17) of that level. +``suff`` (§17.9.28) + This element specifies the content which shall be added between a given numbering level's text and the text of + every numbered paragraph which references that numbering level. + If this element is omitted, then its value shall be assumed to be tab. +``lvlText`` (§17.9.11) + This element specifies the textual content which shall be displayed when displaying a paragraph with the given + numbering level. + All text in this element's val attribute shall be taken as literal text to be repeated in each instance of this + numbering level, except for any use of the percent symbol (%) followed by a number, which shall be used to + indicate the one-based index of the number to be used at this level. Any number of a level higher than this level + shall be ignored. + When the % syntax is used, the number shall be incremented for each subsequent paragraph of that level + (sequential or not), until the restart level is seen between two subsequent paragraphs of this level. +``lvlPicBulletId`` (§17.9.9) + This element specifies a picture which shall be used as a numbering symbol for a given numbering level by + referring to a picture numbering symbol definition's ``numPictBullet`` element (§17.9.20). This reference is made + through this element's ``val`` attribute. + The picture shall be added to the numbering level by replacing each character in the ``lvlText`` with one instance + of this image. +``legacy`` + not in current standard +``lvlJc`` (§17.9.7) + This element specifies the type of justification used on a numbering level's text within a given numbering level. + This justification is applied relative to the text margin of the parent numbered paragraph in the document. + If omitted, the paragraph shall have left justification relative to the text margin in left-to-right paragraphs, and + right justification relative to the text margin in right-to-left paragraphs. + A numbering level's text is the numeral, symbol, character, graphic, etc. used to create a numbered paragraph as + defined by the lvlText element (§17.9.11). +``pPr`` (§17.9.22) + This element specifies the paragraph properties which shall be applied as part of a given numbering level within + the parent numbering definition. These paragraph properties are applied to any numbered paragraph that + references the given numbering definition and numbering level. + Paragraph properties specified on the numbered paragraph itself override the paragraph properties specified by + ``pPr`` elements within a numbering ``lvl`` element (§17.9.5, §17.9.6). +``rPr`` (§17.9.24) + This element specifies the run properties which shall be applied to the numbering level's text specified in the + ``lvlText`` element (§17.9.11) when it is applied to paragraphs in this document. + These run properties are applied to all numbering level text used by a given abstract numbering definition and + numbering level. It should be noted that run properties specified on a numbered paragraph itself, or on text + runs within a numbered paragraph, are separate from the run properties specified by ``rPr`` elements within a + numbering level, as the latter affects only the numbering text itself, not the remainder of runs in the numbered + paragraph. + + +Applicable Schema Definitions +----------------------------- + +This section contains excerpts from the schema **wmd.xsd** +which will be necessary to develop basic support for +parsing **numbering.xml** files and enabling **xmlchemy** +functionality for numbering definitions. + +Once a type is appropriately defined in the source +and the parser is given instructions on which tags +to associate it with, then low-level **xmlchemy** +methods can be used to manipulate the XML directly +or write API functions. + +Schemata are given in the remainder of this +section for the +unimplemented (as of version 0.8.7) types which are necessary to +implement suport for numbering styles. + + +**** ``CT_Numbering`` + +.. code-block:: xml - - - + + + - +**** ``CT_NumPicBullet`` + +.. code-block:: xml + + + + + + + + + +**** ``CT_AbstractNum`` + +.. code-block:: xml + + - - + + + + + + + - + + + +``CT_LongHexNumber`` + +.. code-block:: xml + + + + + + + + + + + +**** ``CT_MultiLevelType`` + +.. code-block:: xml + + + - + + + + + + + + +**** ``CT_Lvl`` + +.. code-block:: xml + + - - + + + + + + + + + + + + + + - - - - - - - +**** ``CT_NumFmt`` + +.. code-block:: xml + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**** ``CT_LevelSuffix`` + +.. code-block:: xml + + + - - + + + + + + + +**** ``CT_LevelText`` + +.. code-block:: xml + + + + + + +**** ``CT_LvlLegacy`` + +.. code-block:: xml + + + + + + + + +**** ``CT_Num`` + +.. code-block:: xml + + + + + + + + + + +.. [#first] ISO/IEC 29500-1:2012(E) at 684. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3b1fc3b8..51c7b702 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "skelmis-docx" -version = "1.1.2" +version = "1.2.1" description = "Create, read, and update Microsoft Word .docx files." authors = ["Skelmis "] license = "MIT" diff --git a/src/docx/__init__.py b/src/docx/__init__.py index cce13f70..f54dae23 100644 --- a/src/docx/__init__.py +++ b/src/docx/__init__.py @@ -13,7 +13,7 @@ if TYPE_CHECKING: from docx.opc.part import Part -__version__ = "1.2.0" +__version__ = "1.2.1" __all__ = ["Document"] diff --git a/src/docx/document.py b/src/docx/document.py index 12006406..8897d4ec 100644 --- a/src/docx/document.py +++ b/src/docx/document.py @@ -8,9 +8,11 @@ from pathlib import Path from typing import IO, TYPE_CHECKING, Iterator, List +import docx from docx.blkcntnr import BlockItemContainer from docx.enum.section import WD_SECTION from docx.enum.text import WD_BREAK +from docx.oxml import simpletypes from docx.section import Section, Sections from docx.shared import ElementProxy, Emu @@ -38,6 +40,55 @@ def __init__(self, element: CT_Document, part: DocumentPart): self._part = part self.__body = None + def configure_styles_for_numbered_lists(self): + """Configures the underlying document such that you + can include multiple numbered lists with correct numbers. + + If you wish to change the appearance of the resultant styles + then you should override this method with your own styling choices + as these are shipped 'as is' and are generally good enough. + """ + STYP = docx.enum.style.WD_STYLE_TYPE + num_xml = self.part.numbering_part.element + next_abstract_id = max([J.abstractNumId for J in num_xml.abstractNum_lst]) + 1 + l = num_xml._new_abstractNum() + l.abstractNumId = next_abstract_id + l.add_multiLevelType().val = "multilevel" + + formats = { + 0: "decimal", + 1: "decimal", + 2: "decimal", + } + text_fmts = { + 0: "%1.", + 1: "%1.%2.", + 2: "%1.%2.%3.", + } + starts = {0: 1, 1: 1, 2: 1} + restarts = {0: False, 1: False, 2: 1} + hosts = {0: "List Number", 1: "List Number 2", 2: "List Number 3"} + + num_xml.abstractNum_lst[-1].addnext(l) + nNum = num_xml.add_num(next_abstract_id) + + for i in range(3): + lvl = l.add_lvl() + lvl.ilvl = i + lvl.add_start().val = starts[i] + lvl.add_numFmt().val = formats[i] + if restarts[i]: + lvl.add_lvlRestart().val = restarts[i] + lvl.add_lvlText().val = text_fmts[i] + lvl.add_suff().val = "tab" + p_pr = lvl.add_pPr() + p_pr.ind_left = simpletypes.Twips(i * 720) + ho = self.styles.get_by_id( + self.styles.get_style_id(hosts[i], STYP.PARAGRAPH), STYP.PARAGRAPH + ).element.pPr.numPr + ho.get_or_add_ilvl().val = i + ho.get_or_add_numId().val = nNum.numId + def add_heading(self, text: str = "", level: int = 1): """Return a heading paragraph newly added to the end of the document. diff --git a/src/docx/oxml/__init__.py b/src/docx/oxml/__init__.py index bf32932f..93517719 100644 --- a/src/docx/oxml/__init__.py +++ b/src/docx/oxml/__init__.py @@ -22,7 +22,7 @@ CT_ShapeProperties, CT_Transform2D, ) -from docx.oxml.shared import CT_DecimalNumber, CT_OnOff, CT_String +from docx.oxml.shared import CT_DecimalNumber, CT_OnOff, CT_String, CT_LongHexNumber from docx.oxml.text.hyperlink import CT_Hyperlink from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak from docx.oxml.text.run import ( @@ -93,7 +93,20 @@ register_element_cls("w:body", CT_Body) register_element_cls("w:document", CT_Document) -from .numbering import CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr # noqa +from .numbering import ( + CT_Num, + CT_Numbering, + CT_NumLvl, + CT_NumPr, + CT_LevelSuffix, + CT_NumFmt, + CT_MultiLevelType, + CT_LvlLegacy, + CT_LevelText, + CT_NumPicBullet, + CT_Lvl, + CT_AbstractNum, +) register_element_cls("w:abstractNumId", CT_DecimalNumber) register_element_cls("w:ilvl", CT_DecimalNumber) @@ -103,6 +116,22 @@ register_element_cls("w:numPr", CT_NumPr) register_element_cls("w:numbering", CT_Numbering) register_element_cls("w:startOverride", CT_DecimalNumber) +register_element_cls("w:suff", CT_LevelSuffix) +register_element_cls("w:numFmt", CT_NumFmt) +register_element_cls("w:multiLevelType", CT_MultiLevelType) +register_element_cls("w:legacy", CT_LvlLegacy) +register_element_cls("w:lvlText", CT_LevelText) +register_element_cls("w:numPicBullet", CT_NumPicBullet) +register_element_cls("w:lvl", CT_Lvl) +register_element_cls("w:abstractNum", CT_AbstractNum) +register_element_cls("w:nsid", CT_LongHexNumber) +register_element_cls("w:tmpl", CT_LongHexNumber) +register_element_cls("w:start", CT_DecimalNumber) +register_element_cls("w:styleLink", CT_String) +register_element_cls("w:numStyleLink", CT_String) +register_element_cls("w:lvlRestart", CT_DecimalNumber) +register_element_cls("w:lvlPicBulletId", CT_DecimalNumber) +register_element_cls("w:isLgl", CT_OnOff) from .section import ( # noqa CT_HdrFtr, diff --git a/src/docx/oxml/numbering.py b/src/docx/oxml/numbering.py index 3512de65..c13dbeee 100644 --- a/src/docx/oxml/numbering.py +++ b/src/docx/oxml/numbering.py @@ -2,16 +2,202 @@ from docx.oxml.parser import OxmlElement from docx.oxml.shared import CT_DecimalNumber -from docx.oxml.simpletypes import ST_DecimalNumber +from docx.oxml.simpletypes import ( + ST_DecimalNumber, + ST_LevelSuffix, + ST_NumberFormat, + ST_String, + ST_MultiLevelType, + ST_TwipsMeasure, + ST_SignedTwipsMeasure, + ST_OnOff, + ST_LongHexNumber, +) from docx.oxml.xmlchemy import ( BaseOxmlElement, OneAndOnlyOne, RequiredAttribute, ZeroOrMore, ZeroOrOne, + OptionalAttribute, + Choice, ) +class CT_LevelText(BaseOxmlElement): + """```` element, which specifies + the formatting of the numeral in a numbered + list. + """ + + val = OptionalAttribute("w:val", ST_String) + null = OptionalAttribute("w:null", ST_OnOff) + + @classmethod + def new(cls, val): + """ + Return a new ```` element with + ``val`` attribute set to *val* + """ + lvlText = OxmlElement("w:lvlText") + lvlText.val = val + return lvlText + + +class CT_LevelSuffix(BaseOxmlElement): + """ + ```` element, which specifies the form of the space + between a list number and the list paragraph + """ + + val = RequiredAttribute("w:val", ST_LevelSuffix) + + @classmethod + def new(cls, val): + """ + Return a new ```` element with ``val`` + attribute set to *val* + """ + suff = OxmlElement("w:suff") + suff.val = val + return suff + + +class CT_NumFmt(BaseOxmlElement): + """ + ```` element, which specifies the formatting + of the numeral in a numbered list + """ + + val = RequiredAttribute("w:val", ST_NumberFormat) + fmt = OptionalAttribute("w:format", ST_String) + + @classmethod + def new(cls, val): + """ + Return a new ```` element with ``val`` + attrribute set to *val* + """ + numFmt = OxmlElement("w:numFmt") + numFmt.val = val + return numFmt + + +class CT_LvlLegacy(BaseOxmlElement): + """ + ```` element. Implemented here in + case the module eventually supports parsing + of documents in the target legacy format. + """ + + legacy = OptionalAttribute("w:legacy", ST_OnOff) + legacySpace = OptionalAttribute("w:legacySpace", ST_TwipsMeasure) + legacyIndent = OptionalAttribute("w:legacyIndent", ST_SignedTwipsMeasure) + + +class CT_MultiLevelType(BaseOxmlElement): + """ + ```` element, which indicates + whether a numbering style is single-level, + multi-level, or hybrid. + """ + + val = RequiredAttribute("w:val", ST_MultiLevelType) + + @classmethod + def new(cls, val): + """ + Return a new ```` element with ``val`` + attribute set to *val* + """ + multiLevelType = OxmlElement("w:multiLevelType") + multiLevelType.val = val + return multiLevelType + + +class CT_AbstractNum(BaseOxmlElement): + """ + ```` element, which collects + all of the level-specific style information + for a particular style. + """ + + nsid = ZeroOrMore("w:nsid") + multiLevelType = ZeroOrMore("w:multiLevelType") + tmpl = ZeroOrMore("w:tmpl") + name = ZeroOrMore("w:name") + styleLink = ZeroOrMore("w:styleLink") + numStyleLink = ZeroOrMore("w:numStyleLink") + lvl = ZeroOrMore("w:lvl") + abstractNumId = RequiredAttribute("w:abstractNumId", ST_DecimalNumber) + + @classmethod + def new(cls, abstractNumId): + """ + Return a new ```` element with ``abstractNumId`` + set to *abstractNumId*. + """ + abstractNum = OxmlElement("w:abstractNum") + abstractNum.abstractNumId = abstractNumId + return abstractNum + + +class CT_Lvl(BaseOxmlElement): + """ + ```` element, which contains all of + the actual, level-specific formatting for + a list style. + """ + + start = ZeroOrMore("w:start") + numFmt = ZeroOrMore("w:numFmt") + lvlRestart = ZeroOrMore("w:lvlRestart") + pStyle = ZeroOrMore("w:pStyle") + isLgl = ZeroOrMore("w:isLgl") + suff = ZeroOrMore("w:suff") + lvlText = ZeroOrMore("w:lvlText") + lvlPicBulletId = ZeroOrMore("w:lvlPicBulletId") + legacy = ZeroOrMore("w:legacy") + lvlJc = ZeroOrMore("w:lvlJc") + pPr = ZeroOrMore("w:pPr") + rPr = ZeroOrMore("w:rPr") + ilvl = RequiredAttribute("w:ilvl", ST_DecimalNumber) + tplc = OptionalAttribute("w:tplc", ST_LongHexNumber) + tentative = OptionalAttribute("w:tentative", ST_OnOff) + + @classmethod + def new(cls, ilvl): + """ + Return a new ```` element with ``ilvl`` + attribute set to *ilvl* + """ + lvl = OxmlElement("w:lvl") + lvl.ilvl = ilvl + return lvl + + +class CT_NumPicBullet(BaseOxmlElement): + """ + ````` for specifying + a picture or SVG drawing as the bullet + symbol in a bulleted list. + """ + + pict = Choice("w:pict") + drawing = Choice("w:drawing") + numPicBulletId = RequiredAttribute("w:numPicBulletId", ST_DecimalNumber) + + @classmethod + def new(cls, Id): + """ + Return a new ```` element with ``numPicBulletId`` + attribute set to *numPicBulletId* + """ + numPicBullet = OxmlElement("w:numPicBullet") + numPicBullet.numPicBulletId = Id + return numPicBullet + + class CT_Num(BaseOxmlElement): """```` element, which represents a concrete list definition instance, having a required child that references an abstract numbering definition @@ -80,6 +266,7 @@ class CT_Numbering(BaseOxmlElement): numbering.xml.""" num = ZeroOrMore("w:num", successors=("w:numIdMacAtCleanup",)) + abstractNum = ZeroOrMore("w:abstractNum") def add_num(self, abstractNum_id): """Return a newly added CT_Num () element referencing the abstract @@ -97,6 +284,17 @@ def num_having_numId(self, numId): except IndexError: raise KeyError("no element with numId %d" % numId) + def abstractNum_having_abstractNumId(self, abstractNumId): + """ + Return the ```` child element having ``abstractNumId`` attribute + matching *numId*. + """ + xpath = './w:abstractNum[@w:abstractNumId="%d"]' % abstractNumId + try: + return self.xpath(xpath)[0] + except IndexError: + raise KeyError("no element with abstractNumId %d" % abstractNumId) + @property def _next_numId(self): """The first ``numId`` unused by a ```` element, starting at 1 and diff --git a/src/docx/oxml/shared.py b/src/docx/oxml/shared.py index 8c2ebc9a..59cda205 100644 --- a/src/docx/oxml/shared.py +++ b/src/docx/oxml/shared.py @@ -6,10 +6,19 @@ from docx.oxml.ns import qn from docx.oxml.parser import OxmlElement -from docx.oxml.simpletypes import ST_DecimalNumber, ST_OnOff, ST_String +from docx.oxml.simpletypes import ST_DecimalNumber, ST_OnOff, ST_String, ST_LongHexNumber from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, RequiredAttribute +class CT_LongHexNumber(BaseOxmlElement): + """ + ```` element, which specifies the form of the space + between a list number and the list paragraph + """ + + val = RequiredAttribute("w:val", ST_LongHexNumber) + + class CT_DecimalNumber(BaseOxmlElement): """Used for ````, ````, ```` and several others, containing a text representation of a decimal number (e.g. 42) in its ``val`` diff --git a/src/docx/oxml/simpletypes.py b/src/docx/oxml/simpletypes.py index dd10ab91..da1fa295 100644 --- a/src/docx/oxml/simpletypes.py +++ b/src/docx/oxml/simpletypes.py @@ -207,6 +207,181 @@ def validate(cls, value: Any) -> None: ST_CoordinateUnqualified.validate(value) +class ST_LongHexNumber(XsdUnsignedInt): + + @classmethod + def convert_from_xml(cls, str_value): + return int(str_value, 16) + + @classmethod + def convert_to_xml(cls, value): + """ + Keep alpha hex numerals all uppercase just for consistency. + """ + # expecting eight hexadeximal digits + return "%08X" % value + + +class ST_MultiLevelType(XsdStringEnumeration): + """ + Valid values for attribute + """ + + SINGLE = "singleLevel" + MULTI = "multilevel" + HYBRID = "hybridMultilevel" + + _members = (SINGLE, MULTI, HYBRID) + + +class ST_LevelSuffix(XsdStringEnumeration): + """ + Valid values for attribute + """ + + TAB = "tab" + SPACE = "space" + NOTHING = "nothing" + + _members = (TAB, SPACE, NOTHING) + + +class ST_NumberFormat(XsdStringEnumeration): + """ + Valid values for attribute + """ + + DECIMAL = "decimal" + UPPERROMAN = "upperRoman" + LOWERROMAN = "lowerRoman" + UPPERLETTER = "upperLetter" + LOWERLETTER = "lowerLetter" + ORDINAL = "ordinal" + CARDINALTEXT = "cardinalText" + ORDINALTEXT = "ordinalText" + HEX = "hex" + CHICAGO = "chicago" + IDEOGRAPHDIGITAL = "ideographDigital" + JAPANESECOUNTING = "japaneseCounting" + AIUEO = "aiueo" + IROHA = "iroha" + DECIMALFULLWIDTH = "decimalFullWidth" + DECIMALHALFWIDTH = "decimalHalfWidth" + JAPANESELEGAL = "japaneseLegal" + JAPANESEDIGITALTENTHOUSAND = "japaneseDigitalTenThousand" + DECIMALENCLOSEDCIRCLE = "decimalEnclosedCircle" + DECIMALFULLWIDTH2 = "decimalFullWidth2" + AIUEOFULLWIDTH = "aiueoFullWidth" + IROHAFULLWIDTH = "irohaFullWidth" + DECIMALZERO = "decimalZero" + BULLET = "bullet" + GANADA = "ganada" + CHOSUNG = "chosung" + DECIMALENCLOSEDFULLSTOP = "decimalEnclosedFullstop" + DECIMALENCLOSEDPAREN = "decimalEnclosedParen" + DECIMALENCLOSEDCIRCLECHINESE = "decimalEnclosedCircleChinese" + IDEOGRAPHENCLOSEDCIRCLE = "ideographEnclosedCircle" + IDEOGRAPHTRADITIONAL = "ideographTraditional" + IDEOGRAPHZODIAC = "ideographZodiac" + IDEOGRAPHZODIACTRADITIONAL = "ideographZodiacTraditional" + TAIWANESECOUNTING = "taiwaneseCounting" + IDEOGRAPHLEGALTRADITIONAL = "ideographLegalTraditional" + TAIWANESECOUNTINGTHOUSAND = "taiwaneseCountingThousand" + TAIWANESEDIGITAL = "taiwaneseDigital" + CHINESECOUNTING = "chineseCounting" + CHINESELEGALSIMPLIFIED = "chineseLegalSimplified" + CHINESECOUNTINGTHOUSAND = "chineseCountingThousand" + KOREANDIGITAL = "koreanDigital" + KOREANCOUNTING = "koreanCounting" + KOREANLEGAL = "koreanLegal" + KOREANDIGITAL2 = "koreanDigital2" + VIETNAMESECOUNTING = "vietnameseCounting" + RUSSIANLOWER = "russianLower" + RUSSIANUPPER = "russianUpper" + NONE = "none" + NUMBERINdASH = "numberInDash" + HEBREW1 = "hebrew1" + HEBREW2 = "hebrew2" + ARABICALPHA = "arabicAlpha" + ARABICABJAD = "arabicAbjad" + HINDIVOWELS = "hindiVowels" + HINDICONSONANTS = "hindiConsonants" + HINDINUMBERS = "hindiNumbers" + HINDICOUNTING = "hindiCounting" + THAILETTERS = "thaiLetters" + THAINUMBERS = "thaiNumbers" + THAICOUNTING = "thaiCounting" + BAHTTEXT = "bahtText" + DOLLARTEXT = "dollarText" + CUSTOM = "custom" + + _members = ( + DECIMAL, + UPPERROMAN, + LOWERROMAN, + UPPERLETTER, + LOWERLETTER, + ORDINAL, + CARDINALTEXT, + ORDINALTEXT, + HEX, + CHICAGO, + IDEOGRAPHDIGITAL, + JAPANESECOUNTING, + AIUEO, + IROHA, + DECIMALFULLWIDTH, + DECIMALHALFWIDTH, + JAPANESELEGAL, + JAPANESEDIGITALTENTHOUSAND, + DECIMALENCLOSEDCIRCLE, + DECIMALFULLWIDTH2, + AIUEOFULLWIDTH, + IROHAFULLWIDTH, + DECIMALZERO, + BULLET, + GANADA, + CHOSUNG, + DECIMALENCLOSEDFULLSTOP, + DECIMALENCLOSEDPAREN, + DECIMALENCLOSEDCIRCLECHINESE, + IDEOGRAPHENCLOSEDCIRCLE, + IDEOGRAPHTRADITIONAL, + IDEOGRAPHZODIAC, + IDEOGRAPHZODIACTRADITIONAL, + TAIWANESECOUNTING, + IDEOGRAPHLEGALTRADITIONAL, + TAIWANESECOUNTINGTHOUSAND, + TAIWANESEDIGITAL, + CHINESECOUNTING, + CHINESELEGALSIMPLIFIED, + CHINESECOUNTINGTHOUSAND, + KOREANDIGITAL, + KOREANCOUNTING, + KOREANLEGAL, + KOREANDIGITAL2, + VIETNAMESECOUNTING, + RUSSIANLOWER, + RUSSIANUPPER, + NONE, + NUMBERINdASH, + HEBREW1, + HEBREW2, + ARABICALPHA, + ARABICABJAD, + HINDIVOWELS, + HINDICONSONANTS, + HINDINUMBERS, + HINDICOUNTING, + THAILETTERS, + THAINUMBERS, + THAICOUNTING, + BAHTTEXT, + DOLLARTEXT, + CUSTOM, + ) + + class ST_CoordinateUnqualified(XsdLong): @classmethod def validate(cls, value: Any) -> None: diff --git a/src/docx/text/paragraph.py b/src/docx/text/paragraph.py index 234ea66c..469f3e02 100644 --- a/src/docx/text/paragraph.py +++ b/src/docx/text/paragraph.py @@ -27,6 +27,36 @@ def __init__(self, p: CT_P, parent: t.ProvidesStoryPart): super(Paragraph, self).__init__(parent) self._p = self._element = p + def restart_numbering(self): + """ + Restarting the numbering of paragraph + + Raises ValueError if you call this on a + paragraph which does not contain a numbered list. + """ + + # Getting the abstract number of paragraph + try: + abstract_num_id = self.part.document.part.numbering_part.element.num_having_numId( + self.style.element.get_or_add_pPr().get_or_add_numPr().numId.val + ).abstractNumId.val + except AttributeError as e: + raise ValueError( + "Are you sure this paragraph contains a numbered list? It doesn't appear so." + ) from e + + # Add abstract number to numbering part and reset + num = self.part.numbering_part.element.add_num(abstract_num_id) + num.add_lvlOverride(ilvl=0).add_startOverride(1) + + # Get or add elements to paragraph + p_pr = self._p.get_or_add_pPr() + num_pr = p_pr.get_or_add_numPr() + ilvl = num_pr.get_or_add_ilvl() + ilvl.val = 0 + num_id = num_pr.get_or_add_numId() + num_id.val = int(num.numId) + def add_run(self, text: str | None = None, style: str | CharacterStyle | None = None) -> Run: """Append run containing `text` and having character-style `style`.