-
Notifications
You must be signed in to change notification settings - Fork 765
Expand file tree
/
Copy pathPythonSyntaxTokenizer.cpp
More file actions
87 lines (74 loc) · 2.71 KB
/
PythonSyntaxTokenizer.cpp
File metadata and controls
87 lines (74 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
// Copyright 1998-2016 Epic Games, Inc. All Rights Reserved.
#include "PythonSyntaxTokenizer.h"
#include "PythonEditorPrivatePCH.h"
#include "BreakIterator.h"
TSharedRef< FPythonSyntaxTokenizer > FPythonSyntaxTokenizer::Create(TArray<FRule> InRules)
{
return MakeShareable(new FPythonSyntaxTokenizer(MoveTemp(InRules)));
}
FPythonSyntaxTokenizer::~FPythonSyntaxTokenizer()
{
}
void FPythonSyntaxTokenizer::Process(TArray<FTokenizedLine>& OutTokenizedLines, const FString& Input)
{
#if UE_ENABLE_ICU
TArray<FTextRange> LineRanges;
FTextRange::CalculateLineRangesFromString(Input, LineRanges);
TokenizeLineRanges(Input, LineRanges, OutTokenizedLines);
#else
FTokenizedLine FakeTokenizedLine;
FakeTokenizedLine.Range = FTextRange(0, Input.Len());
FakeTokenizedLine.Tokens.Emplace(FToken(ETokenType::Literal, FakeTokenizedLine.Range));
OutTokenizedLines.Add(FakeTokenizedLine);
#endif
}
FPythonSyntaxTokenizer::FPythonSyntaxTokenizer(TArray<FRule> InRules)
: Rules(MoveTemp(InRules))
{
}
void FPythonSyntaxTokenizer::TokenizeLineRanges(const FString& Input, const TArray<FTextRange>& LineRanges, TArray<FTokenizedLine>& OutTokenizedLines)
{
TSharedRef<IBreakIterator> WBI = FBreakIterator::CreateWordBreakIterator();
WBI->SetString(Input);
// Tokenize line ranges
for(const FTextRange& LineRange : LineRanges)
{
FTokenizedLine TokenizedLine;
TokenizedLine.Range = LineRange;
if(TokenizedLine.Range.IsEmpty())
{
TokenizedLine.Tokens.Emplace(FToken(ETokenType::Literal, TokenizedLine.Range, ESyntaxType::None));
}
else
{
int32 CurrentOffset = LineRange.BeginIndex;
while(CurrentOffset < LineRange.EndIndex)
{
// First check for a match against any syntax token rules
bool bHasMatchedSyntax = false;
for(const FRule& Rule : Rules)
{
if(FCString::Strncmp(&Input[CurrentOffset], *Rule.MatchText, Rule.MatchText.Len()) == 0)
{
const int32 SyntaxTokenEnd = CurrentOffset + Rule.MatchText.Len();
TokenizedLine.Tokens.Emplace(FToken(ETokenType::Syntax, FTextRange(CurrentOffset, SyntaxTokenEnd), Rule.SyntaxType));
check(SyntaxTokenEnd <= LineRange.EndIndex);
bHasMatchedSyntax = true;
CurrentOffset = SyntaxTokenEnd;
break;
}
}
if(bHasMatchedSyntax)
{
continue;
}
// If none matched, consume the character(s) as text
const int32 NextWordBoundary = WBI->MoveToCandidateAfter(CurrentOffset);
const int32 TextTokenEnd = (NextWordBoundary == INDEX_NONE) ? LineRange.EndIndex : FMath::Min(NextWordBoundary, LineRange.EndIndex);
TokenizedLine.Tokens.Emplace(FToken(ETokenType::Literal, FTextRange(CurrentOffset, TextTokenEnd), ESyntaxType::None));
CurrentOffset = TextTokenEnd;
}
}
OutTokenizedLines.Add(TokenizedLine);
}
}