Skip to content

Commit

Permalink
Move pdf parsing logic to lib
Browse files Browse the repository at this point in the history
  • Loading branch information
frankete333 committed Feb 13, 2025
1 parent a0ff0de commit 2cb0d5a
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 39 deletions.
40 changes: 2 additions & 38 deletions app/controllers/academic_histories_controller.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
require 'pdf-reader'

class AcademicHistoriesController < ApplicationController
AcademicEntry = Struct.new(:name, :credits, :number_of_failures, :date_of_completion, :grade) do
def approved?
grade != '***'
end
end

def index
redirect_to new_academic_history_path
end
Expand All @@ -20,7 +14,7 @@ def create
@successful_entries = []

if file && file.content_type == 'application/pdf'
@academic_entries = academic_entries(file)
@academic_entries = AcademicHistory::PdfProcessor.process(file)
@academic_entries.each do |entry|
save_academic_entry(entry) if entry.approved?
end
Expand All @@ -46,37 +40,7 @@ def save_academic_entry(entry)
end

def save_subject(subject)
current_student.force_add(subject)
current_student.force_add_subject(subject)
@successful_entries << subject
end

def academic_entries(file)
reader = PDF::Reader.new(file.path)
academic_entries = []

reader.pages.each do |page|
page.text.split("\n").each do |line|
line.match(subject_regex) do |match|
academic_entries << AcademicEntry.new(match[1], match[2], match[3], match[4], match[5])
end
end
end

academic_entries
end

# SubjectName Credits NumberOfFailures DateOfCompletion Grade
def subject_regex
/\s*(.*?)\s+(\d+)\s+(\d+)\s+(#{date_regex.source})\s+(#{grade_regex.source})/
end

# The date can be either a date in the format DD/MM/YYYY or **********
def date_regex
/\*{10}|\d\d\/\d\d\/\d\d\d\d/
end

# The grade can be either a number from 0 to 12, S/N or ***
def grade_regex
/\d+|S\/N|\*{3}/
end
end
2 changes: 1 addition & 1 deletion app/models/base_student.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def add(approvable)
end
end

def force_add(subject)
def force_add_subject(subject)
ids << subject.exam.id if subject.exam
ids << subject.course.id if subject.course
save!
Expand Down
42 changes: 42 additions & 0 deletions lib/academic_history/pdf_processor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
require 'pdf-reader'

module AcademicHistory
module PdfProcessor
AcademicEntry = Struct.new(:name, :credits, :number_of_failures, :date_of_completion, :grade) do
def approved?
grade != '***'
end
end
extend self

def process(file)
reader = PDF::Reader.new(file.path)
academic_entries = []

reader.pages.each do |page|
page.text.split("\n").each do |line|
line.match(subject_regex) do |match|
academic_entries << AcademicEntry.new(match[1], match[2], match[3], match[4], match[5])
end
end
end

academic_entries
end

# SubjectName Credits NumberOfFailures DateOfCompletion Concept
def subject_regex
/\s*(.*?)\s+(\d+)\s+(\d+)\s+(#{date_regex.source})\s+(#{concept_regex.source})/
end

# The date can be either a date in the format DD/MM/YYYY or **********
def date_regex
/\*{10}|\d\d\/\d\d\/\d\d\d\d/
end

# The concept can be either a number from 0 to 12, S/N or ***
def concept_regex
/Aceptable|Bueno|Muy Bueno|Excelente|S\/C|\*{3}/
end
end
end

0 comments on commit 2cb0d5a

Please sign in to comment.