7
7
from sctokenizer .php_tokenizer import PhpTokenizer
8
8
9
9
import os
10
+ import enum
10
11
11
12
LANG_MAP = {
12
13
'cc' : 'cpp' ,
@@ -18,14 +19,22 @@ def check_language(lang):
18
19
return LANG_MAP [lang ]
19
20
return lang
20
21
22
+ class SourceState (enum .Enum ):
23
+ INIT = 0
24
+ UNTOKENIZED = 1
25
+ TOKENIZED = 2
26
+
21
27
class Source ():
22
28
def __init__ (self , source_str , lang = None , name = None ):
29
+ self .__state = SourceState .INIT
30
+
23
31
self .source_str = source_str
24
32
if lang is None :
25
33
self .lang = self .detect_language (self .source_str )
26
34
else :
27
35
self .lang = check_language (lang )
28
36
self .name = name
37
+ self .tokens = None
29
38
30
39
@classmethod
31
40
def from_file (cls , filepath , lang = None , name = None ):
@@ -42,7 +51,6 @@ def from_file(cls, filepath, lang=None, name=None):
42
51
name = filepath
43
52
return Source (source_str , lang , name )
44
53
45
-
46
54
@classmethod
47
55
def from_str (cls , source_str , lang = None , name = None ):
48
56
"""
@@ -63,24 +71,30 @@ def get_source_str(self):
63
71
return self .source_str
64
72
65
73
def tokenize (self ):
74
+ if self .__state == SourceState .TOKENIZED :
75
+ return self .tokens
76
+
66
77
if self .lang == 'c' :
67
78
c_tokenizer = CTokenizer ()
68
- return c_tokenizer .tokenize (self .source_str )
79
+ self . tokens = c_tokenizer .tokenize (self .source_str )
69
80
elif self .lang == 'cpp' :
70
81
cpp_tokenizer = CppTokenizer ()
71
- return cpp_tokenizer .tokenize (self .source_str )
82
+ self . tokens = cpp_tokenizer .tokenize (self .source_str )
72
83
elif self .lang == 'java' :
73
84
java_tokenizer = JavaTokenizer ()
74
- return java_tokenizer .tokenize (self .source_str )
85
+ self . tokens = java_tokenizer .tokenize (self .source_str )
75
86
elif self .lang == 'python' :
76
87
python_tokenizer = PythonTokenizer ()
77
- return python_tokenizer .tokenize (self .source_str )
88
+ self . tokens = python_tokenizer .tokenize (self .source_str )
78
89
elif self .lang == 'php' :
79
90
php_tokenizer = PhpTokenizer ()
80
- return php_tokenizer .tokenize (self .source_str )
91
+ self . tokens = php_tokenizer .tokenize (self .source_str )
81
92
else :
82
93
raise ValueError ("Upsupported language" )
83
94
95
+ self .__state = SourceState .TOKENIZED
96
+ return self .tokens
97
+
84
98
@classmethod
85
99
def detect_language (cls , source_str ):
86
100
"""
0 commit comments