diff --git a/lexicon/test_clex_importer.csv b/lexicon/test_clex_importer.csv new file mode 100644 index 0000000..0c046de --- /dev/null +++ b/lexicon/test_clex_importer.csv @@ -0,0 +1,60 @@ +lex_id,word_tag,word_form,logical_symbol,third_arg,tag_form_hash,word_def,synsets,tagsets +1,adv,fast,fast,NULL,67e9b1c5cbd53045919deda792be49b18b41a09b3bd328f9cc406bb27d951f62,NULL,NULL,NULL +2,adv_comp,faster,fast,NULL,38a31bf0527ff6fd23c6be74bfba58c46dbad709ce90b6d09b9a26f103a326b5,NULL,NULL,NULL +3,adv_sup,fastest,fast,NULL,55fee0f355e343b2c6a4d63b72a8ea8bcaa1a71698ada04e01533a8dc98fb4ee,NULL,NULL,NULL +4,adv,quickly,quickly,NULL,b0a248290b9aa18bfbbbfd5367dc0cc0dc82a9e90dd83b88cce59361b8d67e8a,NULL,NULL,NULL +5,adj_itr,large,large,NULL,bbe9bafa7a2a6e250fdf482a7c46217d7c63ccee917b3ae48324b61659c7e32d,NULL,NULL,NULL +6,adj_itr_comp,larger,large,NULL,bf10c6415fdedfef6bb41e276ee11b5411b9735e04279a725fd1e10f73efd5a3,NULL,NULL,NULL +7,adj_itr_sup,largest,large,NULL,272e32e00264453eae65c42e85ebd4e63d2050652adf2a83e6251f58d44c1f80,NULL,NULL,NULL +8,adj_itr,expensive,expensive,NULL,0a071608bacedfe582720a7eac91a086c2fd4b8310886756596e55a19831f1b2,NULL,NULL,NULL +9,adj_tr,valid-for,'valid-for',for,3185ec15fd8fee48b0ce5f3cf0702e7342b8a79e924b9560672d46f85b57cf0d,NULL,NULL,NULL +10,adj_tr,fond-of,'fond-of',of,bea4050ddc4a98586180049a5701670bfda43dfb33a9e9d439e09ea6658d8b49,NULL,NULL,NULL +11,adj_tr_comp,fonder-of,'fond-of',of,f9af59d37b5027585c5c367ff09c9ae6c9a9c2a572f0c2c6e31cb7a477803f70,NULL,NULL,NULL +12,adj_tr_sup,fondest-of,'fond-of',of,0593075d289bc5204293471f2179dcb02b688666ab3f0609dc0707038bda6c19,NULL,NULL,NULL +13,adj_tr,pessimistic-about,'pessimistic-about',about,ad8d6c20f19057bc423efd798ab1b7969e8d16b84175a5e1c8d6327f301c3555,NULL,NULL,NULL +14,noun_sg,woman,woman,fem,81a4335048050be30cbe511f20aa06edd43edeace4e509c9dbe295d91f3d7c67,NULL,NULL,NULL +15,noun_pl,women,woman,fem,d94e7754776de6a607bd1401e97542788b8a5bcdd770a6743029ba3cb9281e9b,NULL,NULL,NULL +16,noun_sg,credit-card,'credit-card',neutr,3e750200b57ae141249f9a284aa994912f051b9cdaaabce0eab54d8f709786b4,NULL,NULL,NULL +17,noun_pl,credit-cards,'credit-card',neutr,ba1d673f62b614589363b57193e41fff0a55c16496de3390cd4bdebe05b66a5f,NULL,NULL,NULL +18,noun_sg,month,month,neutr,7dd815d8b271ce4a800d0709cc47e4542ae83c20580fad16f0ce0ac47ab825cb,NULL,NULL,NULL +19,noun_pl,months,month,neutr,6e7ab17fe3f242d10f360197f40646b443db6079d730e9d746c96824a2606336,NULL,NULL,NULL +20,noun_mass,water,water,neutr,ab4b87413dcf986e3987394af66d8cd5b721d6e2e3414f0bf79b7180c496cffc,NULL,NULL,NULL +21,noun_mass,fear,fear,neutr,1f47f35dd555019a4069f9ccbd73b6ca9aba50e65cba190c6c2234a0ec959550,NULL,NULL,NULL +22,noun_mass,money,money,neutr,54eb36da1c2712a561a7f3d97ae4711be26e689a610a5b3bcc6cf8246b4bd328,NULL,NULL,NULL +23,mn_sg,kg,kg,NULL,f0f05c2c9d5fbbca1a49caaf36e02cabd3e8008b6ba2e06dd4f3ad5af3fc6778,NULL,NULL,NULL +24,mn_pl,kg,kg,NULL,4062f60546af1df473dca3885c5678bceba6f846a393bde85c9f062dd7b3e4e9,NULL,NULL,NULL +25,mn_sg,m,m,NULL,355a5ac057c3579401933c1edab57acd83c4fff5af52b235fe6a6487740e7266,NULL,NULL,NULL +26,mn_pl,m,m,NULL,bd00a278eb02fcda5e6953929b33fb7b722616288a41ffec7e161932b00a4f28,NULL,NULL,NULL +27,mn_sg,°C,'°C',NULL,4dbd3783cd9aaba7e2c7b91cd8b7528fe090214cf231c438a2bbce6c469eae54,NULL,NULL,NULL +28,mn_pl,°C,'°C',NULL,0b159623ff61b9c0c6dcbade8aecbcf5a3ba43a5c78a7353af9d4fc9a89d387b,NULL,NULL,NULL +29,pn_sg,John,'John',masc,cdb8203c69d244b96d0efc358b21bb7ecdfedaa377fc2d22174d202d514b60bd,NULL,NULL,NULL +30,pn_sg,Nokia,'Nokia',neutr,c362d3b6c8f1563be3031c9a27e83307499ef757668783df75d1c2ade5a8dcdb,NULL,NULL,NULL +31,pndef_sg,Nile,'Nile',neutr,b79f2d1e7257a89546b59cb6384c316c6a67b9090e0eb79e4945ab5203c5fcca,NULL,NULL,NULL +32,pndef_pl,United-Nations,'United-Nations',neutr,0ba13f88729a6598583fd0d63c2c946bbefb8e7084b49d1a6add9c13cd87615c,NULL,NULL,NULL +33,pn_sg,Mona-Lisa,'Mona-Lisa',neutr,ab1d4ceef2dea8b6524e093ed881a9b373c7399fdda1238c6d6a50e45cb2e5c1,NULL,NULL,NULL +34,pndef_sg,Mona-Lisa,'Mona-Lisa',neutr,553a63ea05f5be1335cdbaf4776e9fefe7b739709bde3afbc52d13f90d7f0747,NULL,NULL,NULL +35,iv_finsg,waits,wait,NULL,ac7502fa73fb93115da3fdc332c095b93db172fb3c9da5342cb1d4efea1dbfa4,NULL,NULL,NULL +36,iv_infpl,wait,wait,NULL,efb99022f10de88cfa917fd5f2d22db31a264c47c446e39b745ecaf2104386d5,NULL,NULL,NULL +37,iv_finsg,goes-away,'go-away',NULL,43b09b0fdca3918fdeffbdd00f9b9b1e36680a19e18a27b9ef837250aea760ff,NULL,NULL,NULL +38,iv_infpl,go-away,'go-away',NULL,ecc9360327520149b50d67f39339a5340963d1f856c8e2379694e5758b18b90f,NULL,NULL,NULL +39,iv_finsg,walks,walk,NULL,f02be7a15dcd7cca79dc9b1c141991d479120352658c50030c7268da9372e6ff,NULL,NULL,NULL +40,iv_infpl,walk,walk,NULL,31e757a6e9e9b7a3e8b7760ed46a8d8fac08fa4989bd1e2564178a466592fb0b,NULL,NULL,NULL +41,tv_finsg,knows,know,NULL,ee65aeba7add7b5c2bec1c8453b46d0686c74980f5b66e971d8e0fdf4be2f339,NULL,NULL,NULL +42,tv_infpl,know,know,NULL,422c3c3b73090cafb154a7af0c97a4a8b6526d0c081ecfd3c4852555ca497274,NULL,NULL,NULL +43,tv_pp,known,know,NULL,77ca0e15adaec7eb9f1e2b265d8c4aaea36eb3233610413ec08cbd2cc9e75602,NULL,NULL,NULL +44,tv_finsg,likes,like,NULL,f7e5589db0c1cd8731249783f142502b70a9c4c81f2438a1a68d9a14466d7638,NULL,NULL,NULL +45,tv_infpl,like,like,NULL,5662a9d8c3e47a4030ee3ce04f64b96d20f97050b43889b9c99a3d75bdcf2e87,NULL,NULL,NULL +46,tv_pp,liked,like,NULL,f4758877070e53c2c6289823abd379e18f73118cf9427ba32004de974786bd71,NULL,NULL,NULL +47,tv_finsg,relates-to,'relate-to',NULL,ee6ebaf5ffc016337f2f65c9331f36a1fbe639b1f78ae0447d006d28d8056028,NULL,NULL,NULL +48,tv_infpl,relate-to,'relate-to',NULL,192eda123579168e6bd639ed1df16c5698e593c45a353c7dac8e4ba71a910a72,NULL,NULL,NULL +49,tv_pp,related-to,'relate-to',NULL,90837f23fb3398bbf7acd13a4734d872ad7561a56638f7d02aef58af5f950656,NULL,NULL,NULL +50,dv_finsg,shows,show,'',42f5f444d7d957ad84c5a633757577db59591b988579f53bd1f350baddaf9f03,NULL,NULL,NULL +51,dv_infpl,show,show,'',d14f9f05e5a0b45180e21ff9b1e2fbe467d23276236aa52fb37eb73fd340da91,NULL,NULL,NULL +52,dv_pp,shown,show,'',7cf576b300bbb026e4a7b5b4a8d792dc0f6e834354a4efe471977f2d71517a9a,NULL,NULL,NULL +53,dv_finsg,forgives,forgive,'',4b9730c9ceecf1d5f810a4826903bb059c3e92371075ace397c16ed7d738418d,NULL,NULL,NULL +54,dv_infpl,forgive,forgive,'',9b948431ccbbc7fe72acc6521239aa45db87345868a085fef14be2952cf73343,NULL,NULL,NULL +55,dv_pp,forgiven,forgive,'',87c13947298aa7164ca11f17ffb7cf6e0725519849e9cd3b369c99e69db9f417,NULL,NULL,NULL +56,dv_finsg,succeeds,succeed,as,1b1a64cc34090572aa43370164020fb77fe5e26ea27e6841fb5da311c70db49e,NULL,NULL,NULL +57,dv_infpl,succeed,succeed,as,94cd50411086a27d99e36cea494fca14e31bfcdde6608e91b7d21b8abbfcba24,NULL,NULL,NULL +58,dv_pp,succeeded,succeed,as,8ee745975fad537905042b710e2f602f6c6bbe6c72f123b3596ce0b962f2b23f,NULL,NULL,NULL +59,prep,WordForm,LogicalSymbol,NULL,588d4fcff28ceb79f1ec34ddd75c384a00af846f472434f114b740185f9312f5,NULL,NULL,NULL diff --git a/ling508/app/mysql_repository.py b/ling508/app/mysql_repository.py index 51a4fec..f3a060d 100644 --- a/ling508/app/mysql_repository.py +++ b/ling508/app/mysql_repository.py @@ -29,15 +29,14 @@ def save_entry(self, entry: Dict[str, Any]) -> None: conn = self._connect() cursor = conn.cursor() try: - cursor.execute( - "INSERT INTO lexicon (word_tag, word_form, logical_symbol, third_arg, tag_form_hash) VALUES (%s, %s, %s, %s, %s)", - (entry['word_tag'], entry['word_form'], entry['logical_symbol'], entry['third_arg'], entry['tag_form_hash']) - ) - conn.commit() + cursor.execute("INSERT INTO lexicon (word_tag, word_form, tag_form_hash, logical_symbol, third_arg) VALUES (%s, %s, %s, %s, %s)", (entry['word_tag'], entry['word_form'], entry['tag_form_hash'], entry['logical_symbol'], entry['third_arg'])) lex_id = cursor.lastrowid + print(f"Inserted entry with lex_id: {lex_id}") - if 'stix_obj_id' in entry: + if 'stix_obj_id' in entry and lex_id: self.link_entry_with_stix(lex_id, entry['stix_obj_id']) + + conn.commit() except mysql.connector.Error as err: print(f"Error: {err}") conn.rollback() diff --git a/ling508/app/repository.py b/ling508/app/repository.py index f32bfe0..274022c 100644 --- a/ling508/app/repository.py +++ b/ling508/app/repository.py @@ -4,21 +4,51 @@ class AbstractRepository(ABC): @abstractmethod - def load_entries(self) -> List[Any]: + def load_entries(self) -> List[Dict[str, Any]]: + """Load all entries from the database.""" pass @abstractmethod - def save_entry(self, entry: Any) -> None: + def save_entry(self, entry: Dict[str, Any]) -> None: + """Save a new entry to the database.""" pass @abstractmethod - def find_entry_by_id(self, entry_id: int) -> Optional[Any]: + def get_last_insert_id(self) -> int: + """Get the ID of the last inserted entry.""" pass @abstractmethod - def find_stix_object_by_id(self, obj_id: str) -> Optional[Any]: + def link_entry_with_stix(self, lex_id: int, stix_uuid: str) -> None: + """Link an entry with a STIX object.""" pass @abstractmethod - def save_stix_object(self, stix_object: Dict) -> None: - pass \ No newline at end of file + def find_entry_by_id(self, tag_form_hash: str) -> Optional[Dict[str, Any]]: + """Find an entry by its hash.""" + pass + + @abstractmethod + def link_existing_entry(self, tag_form_hash: str) -> None: + """Link an existing entry if needed.""" + pass + + @abstractmethod + def find_stix_object_by_id(self, obj_id: str) -> Optional[Dict[str, Any]]: + """Find a STIX object by its ID.""" + pass + + @abstractmethod + def save_stix_object(self, stix_object: Dict[str, Any]) -> None: + """Save a STIX object to the database.""" + pass + + @abstractmethod + def _map_row_to_entry(self, row: Tuple) -> Dict[str, Any]: + """Map a database row to a dictionary entry.""" + pass + + @abstractmethod + def _map_row_to_stix_object(self, row: Tuple) -> Dict[str, Any]: + """Map a database row to a STIX object dictionary.""" + pass diff --git a/tests/test_clex_importer.py b/tests/test_clex_importer.py new file mode 100644 index 0000000..77226d3 --- /dev/null +++ b/tests/test_clex_importer.py @@ -0,0 +1,45 @@ +# tests/test_clex_importer.py +import pytest +import mysql.connector + +# Database connection configuration +connection_params = { + 'user': 'your_username', + 'password': 'your_password', + 'host': 'localhost', + 'database': 'stixd_corpus' +} + +def get_lexicon_entry(lex_id): + conn = mysql.connector.connect(**connection_params) + cursor = conn.cursor() + query = "SELECT lex_id, word_tag, word_form, logical_symbol, third_arg, tag_form_hash FROM lexicon WHERE lex_id = %s" + cursor.execute(query, (lex_id,)) + result = cursor.fetchone() + conn.close() + return result + +@pytest.mark.parametrize("lex_id, word_tag, word_form, logical_symbol, third_arg, tag_form_hash", [ + (1, 'adv', 'fast', 'fast', None, '67e9b1c5cbd53045919deda792be49b18b41a09b3bd328f9cc406bb27d951f62'), + (19, 'noun_pl', 'months', 'month', 'neutr', '6e7ab17fe3f242d10f360197f40646b443db6079d730e9d746c96824a2606336'), + (39, 'iv_finsg', 'walks', 'walk', None, 'f02be7a15dcd7cca79dc9b1c141991d479120352658c50030c7268da9372e6ff'), + (58, 'dv_pp', 'succeeded', 'succeed', 'as', '8ee745975fad537905042b710e2f602f6c6bbe6c72f123b3596ce0b962f2b23f') +]) +def test_lexicon_entry(lex_id, word_tag, word_form, logical_symbol, third_arg, tag_form_hash): + entry = get_lexicon_entry(lex_id) + assert entry is not None, f"Entry with lex_id {lex_id} not found" + + db_lex_id, db_word_tag, db_word_form, db_logical_symbol, db_third_arg, db_tag_form_hash = entry + + assert db_lex_id == lex_id + assert db_word_tag == word_tag + assert db_word_form == word_form + assert db_logical_symbol == logical_symbol + + # Adjusted comparison for third_arg + if third_arg is None: + assert db_third_arg is None or db_third_arg == 'NULL' + else: + assert db_third_arg == third_arg + + assert db_tag_form_hash == tag_form_hash \ No newline at end of file diff --git a/tests/text_gen_clex_uuid.py b/tests/test_gen_clex_uuid.py similarity index 100% rename from tests/text_gen_clex_uuid.py rename to tests/test_gen_clex_uuid.py