Skip to content

Commit d4a714a

Browse files
committed
update source
1 parent cca2b56 commit d4a714a

File tree

2 files changed

+28
-7
lines changed

2 files changed

+28
-7
lines changed

sctokenizer/source.py

+20-6
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from sctokenizer.php_tokenizer import PhpTokenizer
88

99
import os
10+
import enum
1011

1112
LANG_MAP = {
1213
'cc': 'cpp',
@@ -18,14 +19,22 @@ def check_language(lang):
1819
return LANG_MAP[lang]
1920
return lang
2021

22+
class SourceState(enum.Enum):
23+
INIT = 0
24+
UNTOKENIZED = 1
25+
TOKENIZED = 2
26+
2127
class Source():
2228
def __init__(self, source_str, lang=None, name=None):
29+
self.__state = SourceState.INIT
30+
2331
self.source_str = source_str
2432
if lang is None:
2533
self.lang = self.detect_language(self.source_str)
2634
else:
2735
self.lang = check_language(lang)
2836
self.name = name
37+
self.tokens = None
2938

3039
@classmethod
3140
def from_file(cls, filepath, lang=None, name=None):
@@ -42,7 +51,6 @@ def from_file(cls, filepath, lang=None, name=None):
4251
name = filepath
4352
return Source(source_str, lang, name)
4453

45-
4654
@classmethod
4755
def from_str(cls, source_str, lang=None, name=None):
4856
"""
@@ -63,24 +71,30 @@ def get_source_str(self):
6371
return self.source_str
6472

6573
def tokenize(self):
74+
if self.__state == SourceState.TOKENIZED:
75+
return self.tokens
76+
6677
if self.lang == 'c':
6778
c_tokenizer = CTokenizer()
68-
return c_tokenizer.tokenize(self.source_str)
79+
self.tokens = c_tokenizer.tokenize(self.source_str)
6980
elif self.lang == 'cpp':
7081
cpp_tokenizer = CppTokenizer()
71-
return cpp_tokenizer.tokenize(self.source_str)
82+
self.tokens = cpp_tokenizer.tokenize(self.source_str)
7283
elif self.lang == 'java':
7384
java_tokenizer = JavaTokenizer()
74-
return java_tokenizer.tokenize(self.source_str)
85+
self.tokens = java_tokenizer.tokenize(self.source_str)
7586
elif self.lang == 'python':
7687
python_tokenizer = PythonTokenizer()
77-
return python_tokenizer.tokenize(self.source_str)
88+
self.tokens = python_tokenizer.tokenize(self.source_str)
7889
elif self.lang == 'php':
7990
php_tokenizer = PhpTokenizer()
80-
return php_tokenizer.tokenize(self.source_str)
91+
self.tokens = php_tokenizer.tokenize(self.source_str)
8192
else:
8293
raise ValueError("Upsupported language")
8394

95+
self.__state = SourceState.TOKENIZED
96+
return self.tokens
97+
8498
@classmethod
8599
def detect_language(cls, source_str):
86100
"""

setup.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,19 @@
33
with open("README.md", "r") as fh:
44
long_description = fh.read()
55

6+
PROJECT_URLS = {
7+
'Bug Tracker': 'https://github.com/ngocjr7/sctokenizer/issues',
8+
'Documentation': 'https://github.com/ngocjr7/sctokenizer/blob/master/README.md',
9+
'Source Code': 'https://github.com/ngocjr7/sctokenizer'
10+
}
11+
612
setup(name='sctokenizer',
713
description='A Source Code Tokenizer',
814
author='Ngoc Bui',
915
long_description=long_description,
1016
long_description_content_type="text/markdown",
17+
project_urls=PROJECT_URLS,
1118
author_email='ngocjr7@gmail.com',
12-
version='0.0.2',
19+
version='0.0.5',
1320
packages=find_packages(),
1421
python_requires='>=3.6')

0 commit comments

Comments
 (0)