| Class | CodeRay::Scanners::Java |
| In: |
lib/coderay/scanners/java.rb
|
| Parent: | Scanner |
| KEYWORDS | = | %w[ assert break case catch continue default do else finally for if instanceof import new package return switch throw try typeof while debugger export ] | java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html | |
| RESERVED | = | %w[ const goto ] | ||
| CONSTANTS | = | %w[ false null true ] | ||
| MAGIC_VARIABLES | = | %w[ this super ] | ||
| TYPES | = | %w[ boolean byte char class double enum float int interface long short void ] << '[]' | ||
| DIRECTIVES | = | %w[ abstract extends final implements native private protected public static strictfp synchronized throws transient volatile ] | ||
| IDENT_KIND | = | WordList.new(:ident). add(KEYWORDS, :keyword). add(RESERVED, :reserved). add(CONSTANTS, :pre_constant). add(MAGIC_VARIABLES, :local_variable). add(TYPES, :type). add(BuiltinTypes::List, :pre_type). add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception). add(DIRECTIVES, :directive) | ||
| ESCAPE | = | / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x | ||
| UNICODE_ESCAPE | = | / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x | ||
| STRING_CONTENT_PATTERN | = | { "'" => /[^\\']+/, '"' => /[^\\"]+/, '/' => /[^\\\/]+/, } | ||
| IDENT | = | /[a-zA-Z_][A-Za-z_0-9]*/ |
# File lib/coderay/scanners/java.rb, line 48
48: def scan_tokens tokens, options
49:
50: state = :initial
51: string_delimiter = nil
52: import_clause = class_name_follows = last_token_dot = false
53:
54: until eos?
55:
56: kind = nil
57: match = nil
58:
59: case state
60:
61: when :initial
62:
63: if match = scan(/ \s+ | \\\n /x)
64: tokens << [match, :space]
65: next
66:
67: elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
68: tokens << [match, :comment]
69: next
70:
71: elsif import_clause && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
72: kind = :include
73:
74: elsif match = scan(/ #{IDENT} | \[\] /ox)
75: kind = IDENT_KIND[match]
76: if last_token_dot
77: kind = :ident
78: elsif class_name_follows
79: kind = :class
80: class_name_follows = false
81: else
82: import_clause = true if match == 'import'
83: class_name_follows = true if match == 'class' || match == 'interface'
84: end
85:
86: elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
87: kind = :operator
88:
89: elsif scan(/;/)
90: import_clause = false
91: kind = :operator
92:
93: elsif scan(/\{/)
94: class_name_follows = false
95: kind = :operator
96:
97: elsif check(/[\d.]/)
98: if scan(/0[xX][0-9A-Fa-f]+/)
99: kind = :hex
100: elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
101: kind = :oct
102: elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
103: kind = :float
104: elsif scan(/\d+[lL]?/)
105: kind = :integer
106: end
107:
108: elsif match = scan(/["']/)
109: tokens << [:open, :string]
110: state = :string
111: string_delimiter = match
112: kind = :delimiter
113:
114: elsif scan(/ @ #{IDENT} /ox)
115: kind = :annotation
116:
117: else
118: getch
119: kind = :error
120:
121: end
122:
123: when :string
124: if scan(STRING_CONTENT_PATTERN[string_delimiter])
125: kind = :content
126: elsif match = scan(/["'\/]/)
127: tokens << [match, :delimiter]
128: tokens << [:close, state]
129: string_delimiter = nil
130: state = :initial
131: next
132: elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
133: if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
134: kind = :content
135: else
136: kind = :char
137: end
138: elsif scan(/\\./m)
139: kind = :content
140: elsif scan(/ \\ | $ /x)
141: tokens << [:close, state]
142: kind = :error
143: state = :initial
144: else
145: raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
146: end
147:
148: else
149: raise_inspect 'Unknown state', tokens
150:
151: end
152:
153: match ||= matched
154: if $CODERAY_DEBUG and not kind
155: raise_inspect 'Error token %p in line %d' %
156: [[match, kind], line], tokens
157: end
158: raise_inspect 'Empty token', tokens unless match
159:
160: last_token_dot = match == '.'
161:
162: tokens << [match, kind]
163:
164: end
165:
166: if state == :string
167: tokens << [:close, state]
168: end
169:
170: tokens
171: end