Skip to content

Commit

Permalink
import nltk when needed
Browse files Browse the repository at this point in the history
  • Loading branch information
mgraffg committed Sep 11, 2024
1 parent 368bc46 commit 1e59805
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 69 deletions.
11 changes: 5 additions & 6 deletions .github/workflows/pip.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v2
- name: Set up Python
Expand All @@ -23,18 +23,17 @@ jobs:
activate-environment: test
auto-update-conda: true
python-version: ${{ matrix.python-version }}
channels: conda-forge, ingeotec
channels: conda-forge
allow-softlinks: true
channel-priority: flexible
show-channel-urls: true
- name: Install dependencies
run: |
conda install --yes pip
pip install twine
pip install jieba
conda install --yes numpy scipy scikit-learn nltk nose microtc
conda install --yes numpy scipy scikit-learn microtc
python setup.py sdist
python setup.py bdist_wheel
python setup.py bdist_wheel
- name: Pip
if: ${{ runner.os == 'Linux' }}
env:
Expand All @@ -44,6 +43,6 @@ jobs:
- name: Wheel
if: ${{ runner.os != 'Linux' }}
env:
TWINE: ${{ secrets.TWINE }}
TWINE: ${{ secrets.TWINE }}
run: |
twine upload --skip-existing -u mgraffg -p $TWINE dist/*;
11 changes: 5 additions & 6 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v2
- name: Set up Python
Expand All @@ -23,7 +23,7 @@ jobs:
activate-environment: test
auto-update-conda: true
python-version: ${{ matrix.python-version }}
channels: conda-forge, ingeotec
channels: conda-forge
allow-softlinks: true
channel-priority: flexible
show-channel-urls: true
Expand All @@ -33,19 +33,18 @@ jobs:
pip install coverage
pip install coveralls
pip install jieba
conda install --yes numpy scipy scikit-learn nltk nose microtc
python setup.py build_ext --inplace
conda install --yes numpy scipy scikit-learn nltk microtc pytest
- name: Tests on Linux
if: ${{ runner.os == 'Linux' }}
run: |
which python
python --version
which coverage
nosetests --verbose --with-coverage --cover-package=b4msa b4msa/tests
coverage run -m pytest b4msa/tests
- name: Tests on macOS and Windows
if: ${{ runner.os != 'Linux' }}
run: |
nosetests --verbose b4msa/tests
python -m pytest b4msa/tests
- name: coveralls
if: ${{ runner.os == 'Linux' }}
env:
Expand Down
4 changes: 3 additions & 1 deletion b4msa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@
The starting point is :py:class:`b4msa.textmodel.TextModel`
"""
__version__ = "2.2.3"
__version__ = "2.2.4"

from b4msa.textmodel import TextModel
34 changes: 24 additions & 10 deletions b4msa/lang_dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def get_lang(l):


class LangDependencyError(Exception):
"""Lang Dependency Error"""

def __init__(self, message):
self.message = message

Expand All @@ -74,19 +76,31 @@ def __init__(self, lang="spanish"):
Initializes the parameters for specific language
"""

from nltk.stem.snowball import SnowballStemmer

self.languages = ["spanish", "english", "italian", "german", "arabic"]
self.lang = lang

if self.lang not in SnowballStemmer.languages and self.lang != 'chinese':
raise LangDependencyError("Language not supported for stemming: " + lang)
if self.lang == "english":
from nltk.stem.porter import PorterStemmer
self.stemmer = PorterStemmer()
elif self.lang == 'chinese':
self.stemmer = None
else:
self.stemmer = SnowballStemmer(self.lang)


@property
def stemmer(self):
"""stemmer"""
from nltk.stem.snowball import SnowballStemmer

try:
return self._stemmer
except AttributeError:
if self.lang not in SnowballStemmer.languages and self.lang != 'chinese':
_ = f"Language not supported for stemming: {self.lang}"
raise LangDependencyError(_)
if self.lang == "english":
from nltk.stem.porter import PorterStemmer

self.stemmer = PorterStemmer()
elif self.lang == 'chinese':
self.stemmer = None
else:
self.stemmer = SnowballStemmer(self.lang)

@property
def lang(self):
Expand Down
46 changes: 46 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[project]
name = "b4msa"
description = "Baselines for Multilingual Sentiment Analysis"
readme = "README.rst"
dependencies = [
"numpy",
"scikit-learn>=1.3.0",
"microtc"
]
dynamic = ["version"]
classifiers = [
"Development Status :: 3 - Alpha",
"Environment :: Console",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Natural Language :: English",
"Operating System :: MacOS :: MacOS X",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Artificial Intelligence"
]

authors = [
{name = "Eric S. Tellez"},
{name = "Sabino Miranda-Jiménez"},
{name = "Mario Graff"},
{name = "Daniela Moctezuma"},
{name = "Ranyart R. Suárez"},
{name = "Oscar S. Siordia"}
]

maintainers = [
{name = "Mario Graff", email = "mgraffg@ieee.org"}
]

[tool.setuptools.dynamic]
version = {attr = 'b4msa.__version__'}

[project.urls]
Homepage = "https://b4msa.readthedocs.io"
Repository = "https://github.com/INGEOTEC/b4msa"
Issues = "https://github.com/INGEOTEC/b4msa/issues"

[build-system]
requires = ["setuptools >= 61.0"]
build-backend = "setuptools.build_meta"
47 changes: 1 addition & 46 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,50 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from setuptools import setup
import b4msa


with open('README.rst') as fpt:
long_desc = fpt.read()


setup(
name="b4msa",
description="""Baselines for Multilingual Sentiment Analysis""",
long_description=long_desc,
version=b4msa.__version__,
classifiers=[
"Development Status :: 3 - Alpha",
"Environment :: Console",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Natural Language :: English",
"Operating System :: MacOS :: MacOS X",
"Operating System :: POSIX :: Linux",
'Programming Language :: Python :: 3',
"Topic :: Scientific/Engineering :: Artificial Intelligence"],
# url='https://github.com/mgraffg/EvoDAG',
# author="Mario Graff",
# author_email="mgraffg@ieee.org",
# cmdclass={"build_ext": build_ext, "clean": Clean},
# ext_modules=ext_modules,
packages=['b4msa', 'b4msa/tests', 'b4msa/tools', 'b4msa/resources'],
include_package_data=True,
zip_safe=False,
package_data={'b4msa/resources': ['arabic.stopwords',
'spanish.neg.stopwords',
'spanish.stopwords',
'english.stopwords',
'english.neg.stopwords',
'emoticons.json',
'italian.stopwords',
'italian.neg.stopwords'],
'b4msa/tests': ['text.json']},
install_requires=['microtc'],
scripts=['b4msa/tools/b4msa-train',
'b4msa/tools/b4msa-test',
'b4msa/tools/b4msa-params',
'b4msa/tools/b4msa-perf',
'b4msa/tools/b4msa-kfolds',
'b4msa/tools/b4msa-textModel']
)
setup()

0 comments on commit 1e59805

Please sign in to comment.