diff --git a/docx/__init__.py b/docx/__init__.py index b6147da04..595755e5f 100644 --- a/docx/__init__.py +++ b/docx/__init__.py @@ -14,6 +14,7 @@ from docx.parts.image import ImagePart from docx.parts.numbering import NumberingPart from docx.parts.styles import StylesPart +from docx.parts.notes import NotesPart def part_class_selector(content_type, reltype): @@ -26,5 +27,7 @@ def part_class_selector(content_type, reltype): PartFactory.part_type_for[CT.WML_DOCUMENT_MAIN] = DocumentPart PartFactory.part_type_for[CT.WML_NUMBERING] = NumberingPart PartFactory.part_type_for[CT.WML_STYLES] = StylesPart +PartFactory.part_type_for[CT.WML_ENDNOTES] = NotesPart +PartFactory.part_type_for[CT.WML_FOOTNOTES] = NotesPart del CT, DocumentPart, PartFactory, part_class_selector diff --git a/docx/api.py b/docx/api.py index 88e97c115..ae0b76d1f 100644 --- a/docx/api.py +++ b/docx/api.py @@ -15,6 +15,7 @@ from docx.package import Package from docx.parts.numbering import NumberingPart from docx.parts.styles import StylesPart +from docx.parts.notes import NotesPart from docx.shared import lazyproperty @@ -139,7 +140,21 @@ def paragraphs(self): marks such as ```` or ```` do not appear in this list. """ return self._document_part.paragraphs - + + @lazyproperty + def endnotes_part(self): + return self._notes_part(RT.ENDNOTES) + + @lazyproperty + def footnotes_part(self): + return self._notes_part(RT.FOOTNOTES) + + def _notes_part(self, rel_type): + try: + return self._document_part.part_related_by(rel_type) + except KeyError: + pass + def save(self, path_or_stream): """ Save this document to *path_or_stream*, which can be either a path to diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py index 820eeec1d..ece0e859b 100644 --- a/docx/oxml/__init__.py +++ b/docx/oxml/__init__.py @@ -45,6 +45,14 @@ register_custom_element_class('w:style', CT_Style) register_custom_element_class('w:styles', CT_Styles) +from docx.oxml.parts.notes import CT_Endnotes, CT_Footnotes, CT_Note, CT_EndnoteReference, CT_FootnoteReference +register_custom_element_class('w:endnotes', CT_Endnotes) +register_custom_element_class('w:endnote', CT_Note) +register_custom_element_class('w:footnotes', CT_Footnotes) +register_custom_element_class('w:footnote', CT_Note) +register_custom_element_class('w:endnoteReference', CT_EndnoteReference) +register_custom_element_class('w:footnoteReference', CT_FootnoteReference) + from docx.oxml.table import CT_Row, CT_Tbl, CT_TblGrid, CT_TblPr, CT_Tc register_custom_element_class('w:tbl', CT_Tbl) register_custom_element_class('w:tblGrid', CT_TblGrid) @@ -54,10 +62,11 @@ register_custom_element_class('w:tr', CT_Row) from docx.oxml.text import ( - CT_Br, CT_P, CT_PPr, CT_R, CT_RPr, CT_Text, CT_Underline + CT_Tab, CT_Br, CT_P, CT_PPr, CT_R, CT_RPr, CT_Text, CT_Underline ) register_custom_element_class('w:b', CT_OnOff) register_custom_element_class('w:bCs', CT_OnOff) +register_custom_element_class('w:tab', CT_Tab) register_custom_element_class('w:br', CT_Br) register_custom_element_class('w:caps', CT_OnOff) register_custom_element_class('w:cs', CT_OnOff) diff --git a/docx/oxml/parts/notes.py b/docx/oxml/parts/notes.py new file mode 100644 index 000000000..cedc4b9a7 --- /dev/null +++ b/docx/oxml/parts/notes.py @@ -0,0 +1,48 @@ +from docx.oxml.shared import OxmlBaseElement, qn + + +class CT_Notes(OxmlBaseElement): + + _notes_tag = None + + @property + def notes_lst(self): + return self.findall(self._notes_tag) + + +class CT_Endnotes(CT_Notes): + _notes_tag = qn('w:endnote') + + +class CT_Footnotes(CT_Notes): + _notes_tag = qn('w:footnote') + + +class CT_Note(OxmlBaseElement): + + @property + def type(self): + return self.attrib.get(qn('w:type')) + + @property + def id(self): + return int(self.attrib.get(qn('w:id'))) + + @property + def p_lst(self): + return self.findall(qn('w:p')) + + +class CT_NoteReference(OxmlBaseElement): + + @property + def id(self): + return int(self.attrib.get(qn('w:id'))) + + +class CT_EndnoteReference(CT_NoteReference): + pass + + +class CT_FootnoteReference(CT_NoteReference): + pass diff --git a/docx/oxml/parts/styles.py b/docx/oxml/parts/styles.py index 58fb6ecfe..9af88dec6 100644 --- a/docx/oxml/parts/styles.py +++ b/docx/oxml/parts/styles.py @@ -14,6 +14,20 @@ class CT_Style(OxmlBaseElement): @property def pPr(self): return self.find(qn('w:pPr')) + + @property + def id(self): + return self.attrib.get(qn('w:styleId')) + + @property + def type(self): + return self.attrib.get(qn('w:type')) + + @property + def name(self): + el = self.find(qn('w:name')) + if el is not None: + return el.attrib.get(qn('w:val')) class CT_Styles(OxmlBaseElement): diff --git a/docx/oxml/text.py b/docx/oxml/text.py index 37b1edefe..785f25964 100644 --- a/docx/oxml/text.py +++ b/docx/oxml/text.py @@ -12,6 +12,14 @@ ) +class CT_Tab(OxmlBaseElement): + + @classmethod + def new(cls): + return OxmlElement('w:tab') + + + class CT_Br(OxmlBaseElement): """ ```` element, indicating a line, page, or column break in a run. @@ -294,6 +302,14 @@ def t_lst(self): Sequence of elements in this paragraph. """ return self.findall(qn('w:t')) + + @property + def endnote_refs(self): + return self.findall(qn('w:endnoteReference')) + + @property + def footnote_refs(self): + return self.findall(qn('w:footnoteReference')) @property def underline(self): diff --git a/docx/parts/notes.py b/docx/parts/notes.py new file mode 100644 index 000000000..70318de3f --- /dev/null +++ b/docx/parts/notes.py @@ -0,0 +1,47 @@ +from ..opc.package import Part +from ..oxml.shared import oxml_fromstring +from ..shared import lazyproperty +from ..text import Paragraph + + +class NotesPart(Part): + + def __init__(self, partname, content_type, endnotes_elm, package): + super(NotesPart, self).__init__( + partname, content_type, package=package + ) + self._element = endnotes_elm + + @classmethod + def load(cls, partname, content_type, blob, package): + """ + Provides PartFactory interface for loading a numbering part from + a WML package. + """ + notes_elm = oxml_fromstring(blob) + return cls(partname, content_type, notes_elm, package) + + @classmethod + def new(cls): + raise NotImplementedError + + def get_note(self, note_id): + if not hasattr(self, '_notes_map'): + self._notes_map = dict((n.id, n) for n in self.notes) + return self._notes_map[note_id] + + @property + def notes(self): + return [Note(n) for n in self._element.notes_lst] + + +class Note(object): + + def __init__(self, el): + self._element = el + self.id = el.id + self.type = el.type + + @property + def paragraphs(self): + return [Paragraph(p) for p in self._element.p_lst] diff --git a/docx/parts/styles.py b/docx/parts/styles.py index b63317b9e..e68269cc1 100644 --- a/docx/parts/styles.py +++ b/docx/parts/styles.py @@ -48,6 +48,9 @@ def styles(self): proxies) for this styles part. """ return _Styles(self._element) + + def get_style(self, style_id): + return self._element.style_having_styleId(style_id) class _Styles(object): @@ -61,3 +64,6 @@ def __init__(self, styles_elm): def __len__(self): return len(self._styles_elm.style_lst) + + def __iter__(self): + return iter(self._styles_elm.style_lst) diff --git a/docx/text.py b/docx/text.py index daf08c000..f8f76cb71 100644 --- a/docx/text.py +++ b/docx/text.py @@ -6,6 +6,8 @@ from __future__ import absolute_import, print_function, unicode_literals +from docx.oxml.text import CT_RPr, CT_Text, CT_Br, CT_Tab +from docx.oxml.parts.notes import CT_EndnoteReference, CT_FootnoteReference from docx.enum.text import WD_BREAK @@ -103,6 +105,43 @@ def text(self): return text +class Text(object): + """ + Proxy object wrapping ```` element. + """ + def __init__(self, t_elm): + super(Text, self).__init__() + self._t = t_elm + + +class NoteReference(object): + + def __init__(self, el, note_type=None): + self._element = el + + @property + def id(self): + return self._element.id + + +class EndnoteReference(NoteReference): + pass + + +class FootnoteReference(NoteReference): + pass + + +class RunElement(object): + + def __init__(self, el): + self._element = el + + +class LineBreak(RunElement): pass +class Tab(RunElement): pass + + class Run(object): """ Proxy object wrapping ```` element. Several of the properties on Run @@ -111,9 +150,35 @@ class Run(object): not specified directly on the run and its effective value is taken from the style hierarchy. """ + + _elements_map = { + CT_RPr:None, + CT_Text:Text, + CT_Br:LineBreak, + CT_Tab:Tab, + CT_EndnoteReference:EndnoteReference, + CT_FootnoteReference:FootnoteReference, + } + def __init__(self, r): super(Run, self).__init__() self._r = r + + def get_elements(self): + + elements_map = self._elements_map + + for el in self._r.getchildren(): + + element_type = type(el) + + if element_type not in elements_map: + raise ValueError("No mapping for element type %s" % element_type) + + wrapper = elements_map.get(element_type) + + if wrapper: + yield wrapper(el) def add_break(self, break_type=WD_BREAK.LINE): """ @@ -328,6 +393,14 @@ def text(self): for t in self._r.t_lst: text += t.text return text + + @property + def endnote_references(self): + return [EndnoteReference(el, 'endnote') for el in self._r.endnote_refs] + + @property + def footnote_references(self): + return [FootnoteReference(el, 'footnote') for el in self._r.footnote_refs] @property def underline(self): @@ -357,11 +430,3 @@ def web_hidden(self): """ return 'webHidden' - -class Text(object): - """ - Proxy object wrapping ```` element. - """ - def __init__(self, t_elm): - super(Text, self).__init__() - self._t = t_elm diff --git a/notes_tests/__init__.py b/notes_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/notes_tests/data/notes.docx b/notes_tests/data/notes.docx new file mode 100644 index 000000000..99fd000f2 Binary files /dev/null and b/notes_tests/data/notes.docx differ diff --git a/notes_tests/data/run.docx b/notes_tests/data/run.docx new file mode 100644 index 000000000..e2dca75d1 Binary files /dev/null and b/notes_tests/data/run.docx differ diff --git a/notes_tests/test_docx.py b/notes_tests/test_docx.py new file mode 100644 index 000000000..4ffc82c1e --- /dev/null +++ b/notes_tests/test_docx.py @@ -0,0 +1,121 @@ +import os +import logging + +from nose.tools import * + +from docx import api +from docx.oxml.shared import qn +from docx.text import EndnoteReference, FootnoteReference +from docx.parts.notes import NotesPart, Note +from docx.oxml.parts.notes import CT_EndnoteReference, CT_FootnoteReference + + +logger = logging.getLogger('docx_converter.tests.docx') + +DOC = api.Document(os.path.join( + os.path.dirname(__file__), + 'data/notes.docx' +)) + + +def test_parts(): + assert_equals(type(DOC.endnotes_part), NotesPart) + assert_equals(type(DOC.footnotes_part), NotesPart) + + +def test_notes(): + part = DOC.endnotes_part + assert_equals(type(part.notes), list) + assert_equals(len(part.notes), 5) + + +def test_footnotes(): + part = DOC.footnotes_part + assert_equals(type(part.notes), list) + assert_equals(len(part.notes), 3) + + +def test_get_endnote(): + note = DOC.endnotes_part.get_note(0) + assert_true(type(note), Note) + + +def test_get_footnote(): + note = DOC.footnotes_part.get_note(0) + assert_true(type(note), Note) + + +def test_endnote(): + note = DOC.endnotes_part.get_note(3) + assert_equals(note.id, 3) + assert_is_none(note.type) + note_paragraphs = note.paragraphs + assert_equals(type(note_paragraphs), list) + assert_equals(len(note_paragraphs), 2) + + +def test_footnote(): + note = DOC.footnotes_part.get_note(2) + assert_equals(note.id, 2) + assert_is_none(note.type) + note_paragraphs = note.paragraphs + assert_equals(type(note_paragraphs), list) + assert_equals(len(note_paragraphs), 1) + + +def test_style_attributes(): + style = DOC.styles_part.get_style('style1') + assert_equals(style.id, 'style1') + assert_equals(style.type, 'paragraph') + assert_equals(style.name, 'Heading 1') + + +def test_style_iterator(): + assert_equals( + [(s.id, s.type, s.name) for s in DOC.styles_part.styles], + [ + ('style0', 'paragraph', 'Normal'), + ('style1', 'paragraph', 'Heading 1'), + ('style15', 'character', 'Endnote anchor'), + ('style16', 'character', 'Footnote anchor'), + ('style17', 'character', 'Endnote Characters'), + ('style18', 'character', 'Footnote Characters'), + ('style19', 'paragraph', 'Heading'), + ('style20', 'paragraph', 'Text body'), + ('style21', 'paragraph', 'List'), + ('style22', 'paragraph', 'Caption'), + ('style23', 'paragraph', 'Index'), + ('style24', 'paragraph', 'Endnote'), + ('style25', 'paragraph', 'Footnote') + ] + ) + + +def test_endnoterefs(): + run = DOC.paragraphs[2].runs[1] + assert_equals(run.text, '') + _endnoteref = run._r[1] + assert_equals(_endnoteref.tag, qn('w:endnoteReference')) + assert_equals(type(_endnoteref), CT_EndnoteReference) + assert_equals(_endnoteref.id, 2) + endnoterefs = run.endnote_references + assert_true(endnoterefs) + assert_equals(len(endnoterefs), 1) + assert_equals(type(endnoterefs[0]), EndnoteReference) + assert_equals(endnoterefs[0].id, _endnoteref.id) + + +def test_footnoterefs(): + run = DOC.paragraphs[4].runs[1] + assert_equals(run.text, '') + _footnoteref = run._r[1] + assert_equals(_footnoteref.tag, qn('w:footnoteReference')) + assert_equals(type(_footnoteref), CT_FootnoteReference) + assert_equals(_footnoteref.id, 2) + footnoterefs = run.footnote_references + assert_true(footnoterefs) + assert_equals(len(footnoterefs), 1) + assert_equals(type(footnoterefs[0]), FootnoteReference) + assert_equals(footnoterefs[0].id, _footnoteref.id) + + diff --git a/notes_tests/test_docx_run.py b/notes_tests/test_docx_run.py new file mode 100644 index 000000000..cf7e2ec26 --- /dev/null +++ b/notes_tests/test_docx_run.py @@ -0,0 +1,35 @@ +import os +import logging + +from types import GeneratorType +from itertools import chain + +from nose.tools import * + +from docx import api +from docx import text + + +logger = logging.getLogger('docx_converter.tests.docx_run') + +DOC = api.Document(os.path.join( + os.path.dirname(__file__), + 'data/run.docx' +)) + + +def test_get_elements_type(): + result = DOC.paragraphs[0].runs[0].get_elements() + assert_equals(type(result), GeneratorType) + assert_equals([type(e) for e in result], [text.Text]) + + +def test_element_classes(): + result = set() + for p in DOC.paragraphs: + for r in p.runs: + for el in r.get_elements(): + result.add(type(el)) + expected = set([text.Text, text.LineBreak, text.EndnoteReference, text.FootnoteReference, text.Tab]) + assert_equals(result, expected) +