Skip to content

Commit

Permalink
evaluation results
Browse files Browse the repository at this point in the history
  • Loading branch information
mrinalgrover committed Jun 27, 2022
1 parent 987ac58 commit 809310d
Show file tree
Hide file tree
Showing 9 changed files with 289 additions and 5 deletions.
23 changes: 23 additions & 0 deletions code/NER_Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,12 +350,35 @@ def predict(sentence, correct_spellings=True):

output_str = crf.predict([feat])
print(f" IOB Tagging : {output_str}")
output_str = NER_Model_postprocessing.tag_color(output_str)
output_str = NER_Model_postprocessing.tag_price(output_str)
output_str = NER_Model_postprocessing.tag_shape(output_str)
output_str = NER_Model_postprocessing.tag_size(output_str)

print(f"Postprocessed output: {output_str}")
result = []
for token, tag in zip(tokens, output_str[0]):
result.append((token, tag))
return result

def evaluate_results(token_list):
feat = sentence2features(token_list)

# print(f"Extracted features : {feat}")

output_str = crf.predict([feat])
# print(f" IOB Tagging : {output_str}")
result = []
for token, tag in zip(token_list, output_str[0]):
result.append((token, tag))

result = NER_Model_postprocessing.tag_color(result)
result = NER_Model_postprocessing.tag_price(result)
result = NER_Model_postprocessing.tag_shape(result)
result = NER_Model_postprocessing.tag_size(result)

# print(f"Postprocessed output: {output_str}")
return result

if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion code/NER_Model_postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
color_list.remove("jet")
color_list.remove("quartz")

def tag_color(sent, color_list):
def tag_color(sent):
"""
Uses a color list to identify colors in the input data.
Colors can be multiple words long; scans with an appropriate window size for each color.
Expand Down
2 changes: 1 addition & 1 deletion code/Negation_Analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def get_spacy_tokens(transformed_sent, entity_indices):
string_sent = " ".join([item for sublist in transformed_sent for item in sublist])
doc = nlp(string_sent)

spacy_special = ["'", '"', ":", ";", ",", "?", "!", ".", "n't", "'m", " "]
spacy_special = [ ":", ";", ",", "?", "!", ".", "n't", "'m", " "]#"'", '"',

for ent_idx in entity_indices:
substring = " ".join([item for item in transformed_sent[ent_idx]])
Expand Down
36 changes: 36 additions & 0 deletions code/evaluation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "c18d1940-9f34-4d23-bad0-feec22fc11b8",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2848639-6a7a-4861-b482-6b52ce873b51",
"metadata": {},
"outputs": [],
"source": [
"actual = pd.read_csv('data/spacy_tokenized')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "573",
"language": "python",
"name": "573"
},
"language_info": {
"name": ""
}
},
"nbformat": 4,
"nbformat_minor": 5
}
226 changes: 226 additions & 0 deletions code/evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
#!/usr/bin/env python
# coding: utf-8

from __future__ import division, print_function, unicode_literals
import numpy as np
import spacy
import pandas as pd
import NER_Model, Negation_Analyser


import sys
from collections import defaultdict

def split_tag(chunk_tag):
"""
split chunk tag into IOBES prefix and chunk_type
e.g.
B-PER -> (B, PER)
O -> (O, None)
"""
if chunk_tag == 'O':
return ('O', None)
if "N-" in chunk_tag:
return chunk_tag[0], "N-" + chunk_tag.split("-")[2]
else:
return chunk_tag.split('-', maxsplit=1)

def is_chunk_end(prev_tag, tag):
"""
check if the previous chunk ended between the previous and current word
e.g.
(B-PER, I-PER) -> False
(B-LOC, O) -> True
Note: in case of contradicting tags, e.g. (B-PER, I-LOC)
this is considered as (B-PER, B-LOC)
"""
prefix1, chunk_type1 = split_tag(prev_tag)
prefix2, chunk_type2 = split_tag(tag)

if prefix1 == 'O':
return False
if prefix2 == 'O':
return prefix1 != 'O'

if chunk_type1 != chunk_type2:
return True

else:
return False

def is_chunk_start(prev_tag, tag):
"""
check if a new chunk started between the previous and current word
"""
prefix1, chunk_type1 = split_tag(prev_tag)
prefix2, chunk_type2 = split_tag(tag)

if prefix2 == 'O':
return False
if prefix1 == 'O':
return prefix2 != 'O'

if chunk_type1 != chunk_type2:
return True
else:
return False

def calc_metrics(tp, p, t, percent=True):
"""
compute overall precision, recall and FB1 (default values are 0.0)
if percent is True, return 100 * original decimal value
"""
precision = tp / p if p else 0
recall = tp / t if t else 0
fb1 = 2 * precision * recall / (precision + recall) if precision + recall else 0
if percent:
return 100 * precision, 100 * recall, 100 * fb1
else:
return precision, recall, fb1


def count_chunks(true_seqs, pred_seqs):
"""
true_seqs: a list of true tags
pred_seqs: a list of predicted tags
return:
correct_chunks: a dict (counter),
key = chunk types,
value = number of correctly identified chunks per type
true_chunks: a dict, number of true chunks per type
pred_chunks: a dict, number of identified chunks per type
correct_counts, true_counts, pred_counts: similar to above, but for tags
"""
correct_chunks = defaultdict(int)
true_chunks = defaultdict(int)
pred_chunks = defaultdict(int)

correct_counts = defaultdict(int)
true_counts = defaultdict(int)
pred_counts = defaultdict(int)

prev_true_tag, prev_pred_tag = 'O', 'O'
correct_chunk = None

for true_tag, pred_tag in zip(true_seqs, pred_seqs):
if true_tag == pred_tag:
correct_counts[true_tag] += 1
true_counts[true_tag] += 1
pred_counts[pred_tag] += 1

_, true_type = split_tag(true_tag)
_, pred_type = split_tag(pred_tag)

if correct_chunk is not None:
true_end = is_chunk_end(prev_true_tag, true_tag)
pred_end = is_chunk_end(prev_pred_tag, pred_tag)

if pred_end and true_end:
correct_chunks[correct_chunk] += 1
correct_chunk = None
elif pred_end != true_end or true_type != pred_type:
correct_chunk = None

true_start = is_chunk_start(prev_true_tag, true_tag)
pred_start = is_chunk_start(prev_pred_tag, pred_tag)

if true_start and pred_start and true_type == pred_type:
correct_chunk = true_type
if true_start:
true_chunks[true_type] += 1
if pred_start:
pred_chunks[pred_type] += 1

prev_true_tag, prev_pred_tag = true_tag, pred_tag
if correct_chunk is not None:
correct_chunks[correct_chunk] += 1

return (correct_chunks, true_chunks, pred_chunks,
correct_counts, true_counts, pred_counts)

def get_result(correct_chunks, true_chunks, pred_chunks,
correct_counts, true_counts, pred_counts, verbose=True):
"""
if verbose, print overall performance, as well as preformance per chunk type;
otherwise, simply return overall prec, rec, f1 scores
"""
# sum counts
sum_correct_chunks = sum(correct_chunks.values())
sum_true_chunks = sum(true_chunks.values())
sum_pred_chunks = sum(pred_chunks.values())

sum_correct_counts = sum(correct_counts.values())
sum_true_counts = sum(true_counts.values())

nonO_correct_counts = sum(v for k, v in correct_counts.items() if k != 'O')
nonO_true_counts = sum(v for k, v in true_counts.items() if k != 'O')

chunk_types = sorted(list(set(list(true_chunks) + list(pred_chunks))))

# compute overall precision, recall and FB1 (default values are 0.0)
prec, rec, f1 = calc_metrics(sum_correct_chunks, sum_pred_chunks, sum_true_chunks)
res = (prec, rec, f1)
if not verbose:
return res

# print overall performance, and performance per chunk type

print("processed %i tokens with %i phrases; " % (sum_true_counts, sum_true_chunks), end='')
print("found: %i phrases; correct: %i.\n" % (sum_pred_chunks, sum_correct_chunks), end='')

print("accuracy: %6.2f%%; (non-O)" % (100*nonO_correct_counts/nonO_true_counts))
print("accuracy: %6.2f%%; " % (100*sum_correct_counts/sum_true_counts), end='')
print("precision: %6.2f%%; recall: %6.2f%%; FB1: %6.2f" % (prec, rec, f1))

# for each chunk type, compute precision, recall and FB1 (default values are 0.0)
for t in chunk_types:
prec, rec, f1 = calc_metrics(correct_chunks[t], pred_chunks[t], true_chunks[t])
print("%17s: " %t , end='')
print("precision: %6.2f%%; recall: %6.2f%%; FB1: %6.2f" %
(prec, rec, f1), end='')
print(" %d" % pred_chunks[t])

return res
# you can generate LaTeX output for tables like in
# http://cnts.uia.ac.be/conll2003/ner/example.tex
# but I'm not implementing this

def evaluate(true_seqs, pred_seqs, verbose=True):
(correct_chunks, true_chunks, pred_chunks,
correct_counts, true_counts, pred_counts) = count_chunks(true_seqs, pred_seqs)
result = get_result(correct_chunks, true_chunks, pred_chunks,
correct_counts, true_counts, pred_counts, verbose=verbose)
return result

validataion_df = pd.read_csv('data/spacy_tokenized.csv')
validataion_df = validataion_df.replace('B-','I-', regex=True)
sentences = []
sent = []
tok_tag_act = []
tokens = []
actual_tags = []
predicted_tags = []
NER_Model.main()
for index, row in validataion_df.iterrows():
if row['Tokens'] is np.nan:
print(' '.join(sent))
print(f"Actual : {tok_tag_act}")
predicted_ner = NER_Model.evaluate_results(sent)
pred_tok_tag = Negation_Analyser.predict(predicted_ner, "tags")
for tag in pred_tok_tag['token-tags']:
predicted_tags.append(tag[1])
print(f"Predicted : {pred_tok_tag}")
sent = []
tok_tag_act = []

else:
sent.append(row['Tokens'])
if(row['is_negative'] is True):
tok_tag_act.append((row['Tokens'], row['Tags'].replace('I-', 'I-N-')))
actual_tags.append(row['Tags'].replace('I-', 'I-N-'))
else:
actual_tags.append(row['Tags'])
tok_tag_act.append((row['Tokens'], row['Tags']))


evaluate(actual_tags, predicted_tags)
2 changes: 1 addition & 1 deletion code/intermediate/Semi-CRF-Val.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1644,7 +1644,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand Down
2 changes: 1 addition & 1 deletion code/intermediate/bert-negation-final.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5443,7 +5443,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand Down
1 change: 0 additions & 1 deletion env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,3 @@ dependencies:
- graphviz
- pip:
- contextualSpellCheck
- spacy-model-en_core_web_sm
Binary file added resources/Screenshot 2022-06-26 at 20.08.04.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 809310d

Please sign in to comment.