Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add read only processing of footnotes from ludoo/python-docx #1

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions docx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from docx.parts.image import ImagePart
from docx.parts.numbering import NumberingPart
from docx.parts.styles import StylesPart
from docx.parts.notes import NotesPart


def part_class_selector(content_type, reltype):
Expand All @@ -26,5 +27,7 @@ def part_class_selector(content_type, reltype):
PartFactory.part_type_for[CT.WML_DOCUMENT_MAIN] = DocumentPart
PartFactory.part_type_for[CT.WML_NUMBERING] = NumberingPart
PartFactory.part_type_for[CT.WML_STYLES] = StylesPart
PartFactory.part_type_for[CT.WML_ENDNOTES] = NotesPart
PartFactory.part_type_for[CT.WML_FOOTNOTES] = NotesPart

del CT, DocumentPart, PartFactory, part_class_selector
17 changes: 16 additions & 1 deletion docx/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from docx.package import Package
from docx.parts.numbering import NumberingPart
from docx.parts.styles import StylesPart
from docx.parts.notes import NotesPart
from docx.shared import lazyproperty


Expand Down Expand Up @@ -139,7 +140,21 @@ def paragraphs(self):
marks such as ``<w:ins>`` or ``<w:del>`` do not appear in this list.
"""
return self._document_part.paragraphs


@lazyproperty
def endnotes_part(self):
return self._notes_part(RT.ENDNOTES)

@lazyproperty
def footnotes_part(self):
return self._notes_part(RT.FOOTNOTES)

def _notes_part(self, rel_type):
try:
return self._document_part.part_related_by(rel_type)
except KeyError:
pass

def save(self, path_or_stream):
"""
Save this document to *path_or_stream*, which can be either a path to
Expand Down
11 changes: 10 additions & 1 deletion docx/oxml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@
register_custom_element_class('w:style', CT_Style)
register_custom_element_class('w:styles', CT_Styles)

from docx.oxml.parts.notes import CT_Endnotes, CT_Footnotes, CT_Note, CT_EndnoteReference, CT_FootnoteReference
register_custom_element_class('w:endnotes', CT_Endnotes)
register_custom_element_class('w:endnote', CT_Note)
register_custom_element_class('w:footnotes', CT_Footnotes)
register_custom_element_class('w:footnote', CT_Note)
register_custom_element_class('w:endnoteReference', CT_EndnoteReference)
register_custom_element_class('w:footnoteReference', CT_FootnoteReference)

from docx.oxml.table import CT_Row, CT_Tbl, CT_TblGrid, CT_TblPr, CT_Tc
register_custom_element_class('w:tbl', CT_Tbl)
register_custom_element_class('w:tblGrid', CT_TblGrid)
Expand All @@ -54,10 +62,11 @@
register_custom_element_class('w:tr', CT_Row)

from docx.oxml.text import (
CT_Br, CT_P, CT_PPr, CT_R, CT_RPr, CT_Text, CT_Underline
CT_Tab, CT_Br, CT_P, CT_PPr, CT_R, CT_RPr, CT_Text, CT_Underline
)
register_custom_element_class('w:b', CT_OnOff)
register_custom_element_class('w:bCs', CT_OnOff)
register_custom_element_class('w:tab', CT_Tab)
register_custom_element_class('w:br', CT_Br)
register_custom_element_class('w:caps', CT_OnOff)
register_custom_element_class('w:cs', CT_OnOff)
Expand Down
48 changes: 48 additions & 0 deletions docx/oxml/parts/notes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from docx.oxml.shared import OxmlBaseElement, qn


class CT_Notes(OxmlBaseElement):

_notes_tag = None

@property
def notes_lst(self):
return self.findall(self._notes_tag)


class CT_Endnotes(CT_Notes):
_notes_tag = qn('w:endnote')


class CT_Footnotes(CT_Notes):
_notes_tag = qn('w:footnote')


class CT_Note(OxmlBaseElement):

@property
def type(self):
return self.attrib.get(qn('w:type'))

@property
def id(self):
return int(self.attrib.get(qn('w:id')))

@property
def p_lst(self):
return self.findall(qn('w:p'))


class CT_NoteReference(OxmlBaseElement):

@property
def id(self):
return int(self.attrib.get(qn('w:id')))


class CT_EndnoteReference(CT_NoteReference):
pass


class CT_FootnoteReference(CT_NoteReference):
pass
14 changes: 14 additions & 0 deletions docx/oxml/parts/styles.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,20 @@ class CT_Style(OxmlBaseElement):
@property
def pPr(self):
return self.find(qn('w:pPr'))

@property
def id(self):
return self.attrib.get(qn('w:styleId'))

@property
def type(self):
return self.attrib.get(qn('w:type'))

@property
def name(self):
el = self.find(qn('w:name'))
if el is not None:
return el.attrib.get(qn('w:val'))


class CT_Styles(OxmlBaseElement):
Expand Down
16 changes: 16 additions & 0 deletions docx/oxml/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
)


class CT_Tab(OxmlBaseElement):

@classmethod
def new(cls):
return OxmlElement('w:tab')



class CT_Br(OxmlBaseElement):
"""
``<w:br>`` element, indicating a line, page, or column break in a run.
Expand Down Expand Up @@ -294,6 +302,14 @@ def t_lst(self):
Sequence of <w:t> elements in this paragraph.
"""
return self.findall(qn('w:t'))

@property
def endnote_refs(self):
return self.findall(qn('w:endnoteReference'))

@property
def footnote_refs(self):
return self.findall(qn('w:footnoteReference'))

@property
def underline(self):
Expand Down
47 changes: 47 additions & 0 deletions docx/parts/notes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from ..opc.package import Part
from ..oxml.shared import oxml_fromstring
from ..shared import lazyproperty
from ..text import Paragraph


class NotesPart(Part):

def __init__(self, partname, content_type, endnotes_elm, package):
super(NotesPart, self).__init__(
partname, content_type, package=package
)
self._element = endnotes_elm

@classmethod
def load(cls, partname, content_type, blob, package):
"""
Provides PartFactory interface for loading a numbering part from
a WML package.
"""
notes_elm = oxml_fromstring(blob)
return cls(partname, content_type, notes_elm, package)

@classmethod
def new(cls):
raise NotImplementedError

def get_note(self, note_id):
if not hasattr(self, '_notes_map'):
self._notes_map = dict((n.id, n) for n in self.notes)
return self._notes_map[note_id]

@property
def notes(self):
return [Note(n) for n in self._element.notes_lst]


class Note(object):

def __init__(self, el):
self._element = el
self.id = el.id
self.type = el.type

@property
def paragraphs(self):
return [Paragraph(p) for p in self._element.p_lst]
6 changes: 6 additions & 0 deletions docx/parts/styles.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def styles(self):
proxies) for this styles part.
"""
return _Styles(self._element)

def get_style(self, style_id):
return self._element.style_having_styleId(style_id)


class _Styles(object):
Expand All @@ -61,3 +64,6 @@ def __init__(self, styles_elm):

def __len__(self):
return len(self._styles_elm.style_lst)

def __iter__(self):
return iter(self._styles_elm.style_lst)
81 changes: 73 additions & 8 deletions docx/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from __future__ import absolute_import, print_function, unicode_literals

from docx.oxml.text import CT_RPr, CT_Text, CT_Br, CT_Tab
from docx.oxml.parts.notes import CT_EndnoteReference, CT_FootnoteReference
from docx.enum.text import WD_BREAK


Expand Down Expand Up @@ -103,6 +105,43 @@ def text(self):
return text


class Text(object):
"""
Proxy object wrapping ``<w:t>`` element.
"""
def __init__(self, t_elm):
super(Text, self).__init__()
self._t = t_elm


class NoteReference(object):

def __init__(self, el, note_type=None):
self._element = el

@property
def id(self):
return self._element.id


class EndnoteReference(NoteReference):
pass


class FootnoteReference(NoteReference):
pass


class RunElement(object):

def __init__(self, el):
self._element = el


class LineBreak(RunElement): pass
class Tab(RunElement): pass


class Run(object):
"""
Proxy object wrapping ``<w:r>`` element. Several of the properties on Run
Expand All @@ -111,9 +150,35 @@ class Run(object):
not specified directly on the run and its effective value is taken from
the style hierarchy.
"""

_elements_map = {
CT_RPr:None,
CT_Text:Text,
CT_Br:LineBreak,
CT_Tab:Tab,
CT_EndnoteReference:EndnoteReference,
CT_FootnoteReference:FootnoteReference,
}

def __init__(self, r):
super(Run, self).__init__()
self._r = r

def get_elements(self):

elements_map = self._elements_map

for el in self._r.getchildren():

element_type = type(el)

if element_type not in elements_map:
raise ValueError("No mapping for element type %s" % element_type)

wrapper = elements_map.get(element_type)

if wrapper:
yield wrapper(el)

def add_break(self, break_type=WD_BREAK.LINE):
"""
Expand Down Expand Up @@ -328,6 +393,14 @@ def text(self):
for t in self._r.t_lst:
text += t.text
return text

@property
def endnote_references(self):
return [EndnoteReference(el, 'endnote') for el in self._r.endnote_refs]

@property
def footnote_references(self):
return [FootnoteReference(el, 'footnote') for el in self._r.footnote_refs]

@property
def underline(self):
Expand Down Expand Up @@ -357,11 +430,3 @@ def web_hidden(self):
"""
return 'webHidden'


class Text(object):
"""
Proxy object wrapping ``<w:t>`` element.
"""
def __init__(self, t_elm):
super(Text, self).__init__()
self._t = t_elm
Empty file added notes_tests/__init__.py
Empty file.
Binary file added notes_tests/data/notes.docx
Binary file not shown.
Binary file added notes_tests/data/run.docx
Binary file not shown.
Loading