Skip to content

Commit

Permalink
Update tei files and code
Browse files Browse the repository at this point in the history
  • Loading branch information
john-papani committed Nov 23, 2023
1 parent fe550ec commit bef88a9
Show file tree
Hide file tree
Showing 8 changed files with 1,963 additions and 2,143 deletions.
18 changes: 7 additions & 11 deletions akn_to_tei/create_tei_from_akn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from saxonche import PySaxonProcessor

def transform_xml2tei_with_xslt(xml_filename, xslt_filename, result_filename):
log_file = open('./no_xml_tei_files.txt', 'a', encoding="utf8")
log_file = open('./no_xml_tei_files.txt', 'a', encoding="utf-8")

with PySaxonProcessor(license=False) as proc:
try:
Expand All @@ -11,24 +11,20 @@ def transform_xml2tei_with_xslt(xml_filename, xslt_filename, result_filename):
executable = xsltproc.compile_stylesheet(stylesheet_file=xslt_filename)
output = executable.transform_to_string(xdm_node=document)

with open(result_filename, "w",encoding="utf-8") as file:
file.write(output)
file = open(result_filename, "w", encoding="utf-8")
file.write(output)
file.close()
except Exception as e:
log_file.write(f'{filename}: {str(e)}\n')

main_datapath = "C:/Users/johnp/Documents/ECE_NTUA/diploma/diploma_github/xml_akn_files/"
datapath_2022_23 = "C:/Users/johnp/Documents/ECE_NTUA/diploma/diploma_github/xml_akn_files/xml_akn_files_2023/"
main_filenames = sorted([f for f in os.listdir(main_datapath) if not f.startswith('.')])
filenames_2022_23 = sorted([f for f in os.listdir(datapath_2022_23) if not f.startswith('.')])
# Combine the lists if needed
filenames = filenames_2022_23
datapath = "C:/Users/johnp/Documents/ECE_NTUA/diploma/diploma_github/xml_akn_files/"
filenames = sorted([f for f in os.listdir(datapath) if not f.startswith('.')])
print("NUMBER OF ALL FILES IN SYSTEM =",len(filenames))
xslt_filename = "./schema_dir/akn2tei.xsl"
for counter, filename in enumerate(filenames):
if (counter % 100 == 0):
print("File "+str(counter)+' from ' +
str(len(filenames)) + ' '+filename)
new_filename = filename.rsplit(".", 1)[0]
result_filename = f"../xml_tei_files/{new_filename}_tei.xml" if filename in main_filenames else f"../xml_tei_files/xml_tei_files_2023/{new_filename}_tei.xml"
datapath = main_datapath if filename in main_filenames else datapath_2022_23
result_filename = f"../xml_tei_files/{new_filename}_tei.xml"
transform_xml2tei_with_xslt(datapath+filename, xslt_filename, result_filename)
350 changes: 166 additions & 184 deletions xml_tei_files/01021994.doc_tei.xml

Large diffs are not rendered by default.

576 changes: 278 additions & 298 deletions xml_tei_files/01031994.doc_tei.xml

Large diffs are not rendered by default.

389 changes: 185 additions & 204 deletions xml_tei_files/01041994.doc_tei.xml

Large diffs are not rendered by default.

447 changes: 208 additions & 239 deletions xml_tei_files/01071994.doc_tei.xml

Large diffs are not rendered by default.

443 changes: 213 additions & 230 deletions xml_tei_files/01091994.doc_tei.xml

Large diffs are not rendered by default.

1,533 changes: 747 additions & 786 deletions xml_tei_files/01121993.doc_tei.xml

Large diffs are not rendered by default.

350 changes: 159 additions & 191 deletions xml_tei_files/es20180608000648.docx_tei.xml

Large diffs are not rendered by default.

0 comments on commit bef88a9

Please sign in to comment.